From 28578f966459fcf84294419d259cf7d25de49b2c Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsumoto Date: Tue, 27 Jul 2021 06:18:35 +0000 Subject: [PATCH 001/200] all: fix typos Change-Id: I0c64540bc7848773955a517c20c7dc9d45cbe618 GitHub-Last-Rev: a97430e47c17183bebaf28f0f5bd3d10f91bbf5b GitHub-Pull-Request: golang/arch#5 Reviewed-on: https://go-review.googlesource.com/c/arch/+/337169 Reviewed-by: Ian Lance Taylor Reviewed-by: Cherry Mui Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot --- arm64/arm64asm/plan9x.go | 2 +- x86/x86csv/reader.go | 2 +- x86/x86spec/spec.go | 2 +- x86/xeddata/operand.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arm64/arm64asm/plan9x.go b/arm64/arm64asm/plan9x.go index 3aaf0b2a..f4eef8c0 100644 --- a/arm64/arm64asm/plan9x.go +++ b/arm64/arm64asm/plan9x.go @@ -500,7 +500,7 @@ SHA256SU0 SHA256SU1 `) -// floating point instrcutions without "F" prefix. +// floating point instructions without "F" prefix. var fOpsWithoutFPrefix = map[Op]bool{ LDP: true, STP: true, diff --git a/x86/x86csv/reader.go b/x86/x86csv/reader.go index ed59e31a..5c48ae9b 100644 --- a/x86/x86csv/reader.go +++ b/x86/x86csv/reader.go @@ -24,7 +24,7 @@ func NewReader(r io.Reader) *Reader { // ReadAll reads all remaining rows from r. // -// If error is occured, still returns all rows +// If error has occurred, still returns all rows // that have been read during method execution. // // A successful call returns err == nil, not err == io.EOF. diff --git a/x86/x86spec/spec.go b/x86/x86spec/spec.go index b49e006f..25267941 100644 --- a/x86/x86spec/spec.go +++ b/x86/x86spec/spec.go @@ -36,7 +36,7 @@ // // 4. The instruction encoding. For example, "C1 /4 ib". // -// 5. The validity of the instruction in 32-bit (aka compatiblity, legacy) mode. +// 5. The validity of the instruction in 32-bit (aka compatibility, legacy) mode. // // 6. The validity of the instruction in 64-bit mode. // diff --git a/x86/xeddata/operand.go b/x86/xeddata/operand.go index e934ed73..1632828d 100644 --- a/x86/xeddata/operand.go +++ b/x86/xeddata/operand.go @@ -66,7 +66,7 @@ type Operand struct { // Possible values: // EVEX.b context { // TXT=ZEROSTR - zeroing - // TXT=SAESTR - surpress all exceptions + // TXT=SAESTR - suppress all exceptions // TXT=ROUNDC - rounding // TXT=BCASTSTR - broadcasting // } From ebb09ed340f18f7e2a2200f1adf792992c448346 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Mon, 17 May 2021 14:15:56 -0500 Subject: [PATCH 002/200] ppc64: add ISA level information to pp64.csv Re-purpose the metadata field to hold the ISA level. This field is currently unused. Note, lq/stq/scv/rfscv appear out-of-order in appendix F of ISA 3.1. Change-Id: I96211bf75305a29c0805d95489eee132444d919c Reviewed-on: https://go-review.googlesource.com/c/arch/+/342969 Reviewed-by: Lynn Boger Reviewed-by: Carlos Eduardo Seo Trust: Carlos Eduardo Seo --- ppc64/pp64.csv | 2323 ++++++++++++++++++++++++------------------------ 1 file changed, 1162 insertions(+), 1161 deletions(-) diff --git a/ppc64/pp64.csv b/ppc64/pp64.csv index 6f7b7fcf..3150cade 100644 --- a/ppc64/pp64.csv +++ b/ppc64/pp64.csv @@ -5,1171 +5,1172 @@ # # Each line in the CSV section contains 4 fields: # -# instruction mnemonic encoding tags +# instruction mnemonic encoding isa-level # # The instruction is the headline from the manual. # The mnemonic is the instruction mnemonics, separated by | characters. # The encoding is the encoding, a sequence of name@startbit| describing each bit field in turn or # a list of sequences of the form (,sequence)+. A leading comma is used to signify an # instruction encoding requiring multiple instruction words. -# The tags are additional metadata, currently always empty. +# The fourth field represents the ISA version where the instruction was introduced as +# stated in Appendix F. of ISA 3.1 # -"Byte-Reverse Doubleword X-form","brd RA,RS","31@0|RS@6|RA@11|///@16|187@21|/@31|","" -"Byte-Reverse Halfword X-form","brh RA,RS","31@0|RS@6|RA@11|///@16|219@21|/@31|","" -"Byte-Reverse Word X-form","brw RA,RS","31@0|RS@6|RA@11|///@16|155@21|/@31|","" -"Centrifuge Doubleword X-form","cfuged RA,RS,RB","31@0|RS@6|RA@11|RB@16|220@21|/@31|","" -"Count Leading Zeros Doubleword under bit Mask X-form","cntlzdm RA,RS,RB","31@0|RS@6|RA@11|RB@16|59@21|/@31|","" -"Count Trailing Zeros Doubleword under bit Mask X-form","cnttzdm RA,RS,RB","31@0|RS@6|RA@11|RB@16|571@21|/@31|","" -"DFP Convert From Fixed Quadword Quad X-form","dcffixqq FRTp,VRB","63@0|FRTp@6|0@11|VRB@16|994@21|/@31|","" -"DFP Convert To Fixed Quadword Quad X-form","dctfixqq VRT,FRBp","63@0|VRT@6|1@11|FRBp@16|994@21|/@31|","" -"Load VSX Vector Special Value Quadword X-form","lxvkq XT,UIM","60@0|T@6|31@11|UIM@16|360@21|TX@31|","" -"Load VSX Vector Paired DQ-form","lxvp XTp,DQ(RA)","6@0|Tp@6|TX@10|RA@11|DQ@16|0@28|","" -"Load VSX Vector Paired Indexed X-form","lxvpx XTp,RA,RB","31@0|Tp@6|TX@10|RA@11|RB@16|333@21|/@31|","" -"Load VSX Vector Rightmost Byte Indexed X-form","lxvrbx XT,RA,RB","31@0|T@6|RA@11|RB@16|13@21|TX@31|","" -"Load VSX Vector Rightmost Doubleword Indexed X-form","lxvrdx XT,RA,RB","31@0|T@6|RA@11|RB@16|109@21|TX@31|","" -"Load VSX Vector Rightmost Halfword Indexed X-form","lxvrhx XT,RA,RB","31@0|T@6|RA@11|RB@16|45@21|TX@31|","" -"Load VSX Vector Rightmost Word Indexed X-form","lxvrwx XT,RA,RB","31@0|T@6|RA@11|RB@16|77@21|TX@31|","" -"Move to VSR Byte Mask VX-form","mtvsrbm VRT,RB","4@0|VRT@6|16@11|RB@16|1602@21|","" -"Move To VSR Byte Mask Immediate DX-form","mtvsrbmi VRT,bm","4@0|VRT@6|b1@11|b0@16|10@26|b2@31|","" -"Move to VSR Doubleword Mask VX-form","mtvsrdm VRT,RB","4@0|VRT@6|19@11|RB@16|1602@21|","" -"Move to VSR Halfword Mask VX-form","mtvsrhm VRT,RB","4@0|VRT@6|17@11|RB@16|1602@21|","" -"Move to VSR Quadword Mask VX-form","mtvsrqm VRT,RB","4@0|VRT@6|20@11|RB@16|1602@21|","" -"Move to VSR Word Mask VX-form","mtvsrwm VRT,RB","4@0|VRT@6|18@11|RB@16|1602@21|","" -"Prefixed Add Immediate MLS:D-form","paddi RT,RA,SI,R",",1@0|2@6|0@8|//@9|R@11|//@12|si0@14|,14@0|RT@6|RA@11|si1@16|","" -"Parallel Bits Deposit Doubleword X-form","pdepd RA,RS,RB","31@0|RS@6|RA@11|RB@16|156@21|/@31|","" -"Parallel Bits Extract Doubleword X-form","pextd RA,RS,RB","31@0|RS@6|RA@11|RB@16|188@21|/@31|","" -"Prefixed Load Byte and Zero MLS:D-form","plbz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,34@0|RT@6|RA@11|d1@16|","" -"Prefixed Load Doubleword 8LS:D-form","pld RT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,57@0|RT@6|RA@11|d1@16|","" -"Prefixed Load Floating-Point Double MLS:D-form","plfd FRT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,50@0|FRT@6|RA@11|d1@16|","" -"Prefixed Load Floating-Point Single MLS:D-form","plfs FRT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,48@0|FRT@6|RA@11|d1@16|","" -"Prefixed Load Halfword Algebraic MLS:D-form","plha RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,42@0|RT@6|RA@11|d1@16|","" -"Prefixed Load Halfword and Zero MLS:D-form","plhz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,40@0|RT@6|RA@11|d1@16|","" -"Prefixed Load Quadword 8LS:D-form","plq RTp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,56@0|RTp@6|RA@11|d1@16|","" -"Prefixed Load Word Algebraic 8LS:D-form","plwa RT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,41@0|RT@6|RA@11|d1@16|","" -"Prefixed Load Word and Zero MLS:D-form","plwz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,32@0|RT@6|RA@11|d1@16|","" -"Prefixed Load VSX Scalar Doubleword 8LS:D-form","plxsd VRT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,42@0|VRT@6|RA@11|d1@16|","" -"Prefixed Load VSX Scalar Single-Precision 8LS:D-form","plxssp VRT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,43@0|VRT@6|RA@11|d1@16|","" -"Prefixed Load VSX Vector 8LS:D-form","plxv XT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,25@0|TX@5|T@6|RA@11|d1@16|","" -"Prefixed Load VSX Vector Paired 8LS:D-form","plxvp XTp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,58@0|Tp@6|TX@10|RA@11|d1@16|","" -"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) MMIRR:XX3-form","pmxvbf16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|51@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvbf16ger2nn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|242@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvbf16ger2np AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|114@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvbf16ger2pn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|178@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvbf16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|50@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) MMIRR:XX3-form","pmxvf16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|19@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf16ger2nn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|210@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf16ger2np AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|82@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf16ger2pn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|146@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|18@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) MMIRR:XX3-form","pmxvf32ger AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|27@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf32gernn AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|218@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf32gernp AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|90@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf32gerpn AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|154@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf32gerpp AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|26@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) MMIRR:XX3-form","pmxvf64ger AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|59@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf64gernn AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|250@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf64gernp AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|122@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf64gerpn AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|186@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf64gerpp AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|58@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) MMIRR:XX3-form","pmxvi16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|75@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|107@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation MMIRR:XX3-form","pmxvi16ger2s AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi16ger2spp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|42@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) MMIRR:XX3-form","pmxvi4ger8 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi4ger8pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|34@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) MMIRR:XX3-form","pmxvi8ger4 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|3@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi8ger4pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|2@21|AX@29|BX@30|/@31|","" -"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi8ger4spp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|99@21|AX@29|BX@30|/@31|","" -"Prefixed Nop MRR:*-form","pnop",",1@0|3@6|0@8|///@12|0@14|//@31|,///@0|","" -"Prefixed Store Byte MLS:D-form","pstb RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,38@0|RS@6|RA@11|d1@16|","" -"Prefixed Store Doubleword 8LS:D-form","pstd RS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,61@0|RS@6|RA@11|d1@16|","" -"Prefixed Store Floating-Point Double MLS:D-form","pstfd FRS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,54@0|FRS@6|RA@11|d1@16|","" -"Prefixed Store Floating-Point Single MLS:D-form","pstfs FRS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,52@0|FRS@6|RA@11|d1@16|","" -"Prefixed Store Halfword MLS:D-form","psth RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,44@0|RS@6|RA@11|d1@16|","" -"Prefixed Store Quadword 8LS:D-form","pstq RSp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,60@0|RSp@6|RA@11|d1@16|","" -"Prefixed Store Word MLS:D-form","pstw RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,36@0|RS@6|RA@11|d1@16|","" -"Prefixed Store VSX Scalar Doubleword 8LS:D-form","pstxsd VRS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,46@0|VRS@6|RA@11|d1@16|","" -"Prefixed Store VSX Scalar Single-Precision 8LS:D-form","pstxssp VRS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,47@0|VRS@6|RA@11|d1@16|","" -"Prefixed Store VSX Vector 8LS:D-form","pstxv XS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,27@0|SX@5|S@6|RA@11|d1@16|","" -"Prefixed Store VSX Vector Paired 8LS:D-form","pstxvp XSp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,62@0|Sp@6|SX@10|RA@11|d1@16|","" -"Set Boolean Condition X-form","setbc RT,BI","31@0|RT@6|BI@11|///@16|384@21|/@31|","" -"Set Boolean Condition Reverse X-form","setbcr RT,BI","31@0|RT@6|BI@11|///@16|416@21|/@31|","" -"Set Negative Boolean Condition X-form","setnbc RT,BI","31@0|RT@6|BI@11|///@16|448@21|/@31|","" -"Set Negative Boolean Condition Reverse X-form","setnbcr RT,BI","31@0|RT@6|BI@11|///@16|480@21|/@31|","" -"Store VSX Vector Paired DQ-form","stxvp XSp,DQ(RA)","6@0|Sp@6|SX@10|RA@11|DQ@16|1@28|","" -"Store VSX Vector Paired Indexed X-form","stxvpx XSp,RA,RB","31@0|Sp@6|SX@10|RA@11|RB@16|461@21|/@31|","" -"Store VSX Vector Rightmost Byte Indexed X-form","stxvrbx XS,RA,RB","31@0|S@6|RA@11|RB@16|141@21|SX@31|","" -"Store VSX Vector Rightmost Doubleword Indexed X-form","stxvrdx XS,RA,RB","31@0|S@6|RA@11|RB@16|237@21|SX@31|","" -"Store VSX Vector Rightmost Halfword Indexed X-form","stxvrhx XS,RA,RB","31@0|S@6|RA@11|RB@16|173@21|SX@31|","" -"Store VSX Vector Rightmost Word Indexed X-form","stxvrwx XS,RA,RB","31@0|S@6|RA@11|RB@16|205@21|SX@31|","" -"Vector Centrifuge Doubleword VX-form","vcfuged VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1357@21|","" -"Vector Clear Leftmost Bytes VX-form","vclrlb VRT,VRA,RB","4@0|VRT@6|VRA@11|RB@16|397@21|","" -"Vector Clear Rightmost Bytes VX-form","vclrrb VRT,VRA,RB","4@0|VRT@6|VRA@11|RB@16|461@21|","" -"Vector Count Leading Zeros Doubleword under bit Mask VX-form","vclzdm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1924@21|","" -"Vector Compare Equal Quadword VC-form","vcmpequq VRT,VRA,VRB (Rc=0)|vcmpequq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|455@22|","" -"Vector Compare Greater Than Signed Quadword VC-form","vcmpgtsq VRT,VRA,VRB (Rc=0)|vcmpgtsq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|903@22|","" -"Vector Compare Greater Than Unsigned Quadword VC-form","vcmpgtuq VRT,VRA,VRB (Rc=0)|vcmpgtuq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|647@22|","" -"Vector Compare Signed Quadword VX-form","vcmpsq BF,VRA,VRB","4@0|BF@6|//@9|VRA@11|VRB@16|321@21|","" -"Vector Compare Unsigned Quadword VX-form","vcmpuq BF,VRA,VRB","4@0|BF@6|//@9|VRA@11|VRB@16|257@21|","" -"Vector Count Mask Bits Byte VX-form","vcntmbb RT,VRB,MP","4@0|RT@6|12@11|MP@15|VRB@16|1602@21|","" -"Vector Count Mask Bits Doubleword VX-form","vcntmbd RT,VRB,MP","4@0|RT@6|15@11|MP@15|VRB@16|1602@21|","" -"Vector Count Mask Bits Halfword VX-form","vcntmbh RT,VRB,MP","4@0|RT@6|13@11|MP@15|VRB@16|1602@21|","" -"Vector Count Mask Bits Word VX-form","vcntmbw RT,VRB,MP","4@0|RT@6|14@11|MP@15|VRB@16|1602@21|","" -"Vector Count Trailing Zeros Doubleword under bit Mask VX-form","vctzdm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1988@21|","" -"Vector Divide Extended Signed Doubleword VX-form","vdivesd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|971@21|","" -"Vector Divide Extended Signed Quadword VX-form","vdivesq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|779@21|","" -"Vector Divide Extended Signed Word VX-form","vdivesw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|907@21|","" -"Vector Divide Extended Unsigned Doubleword VX-form","vdiveud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|715@21|","" -"Vector Divide Extended Unsigned Quadword VX-form","vdiveuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|523@21|","" -"Vector Divide Extended Unsigned Word VX-form","vdiveuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|651@21|","" -"Vector Divide Signed Doubleword VX-form","vdivsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|459@21|","" -"Vector Divide Signed Quadword VX-form","vdivsq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|267@21|","" -"Vector Divide Signed Word VX-form","vdivsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|395@21|","" -"Vector Divide Unsigned Doubleword VX-form","vdivud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|203@21|","" -"Vector Divide Unsigned Quadword VX-form","vdivuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|11@21|","" -"Vector Divide Unsigned Word VX-form","vdivuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|139@21|","" -"Vector Expand Byte Mask VX-form","vexpandbm VRT,VRB","4@0|VRT@6|0@11|VRB@16|1602@21|","" -"Vector Expand Doubleword Mask VX-form","vexpanddm VRT,VRB","4@0|VRT@6|3@11|VRB@16|1602@21|","" -"Vector Expand Halfword Mask VX-form","vexpandhm VRT,VRB","4@0|VRT@6|1@11|VRB@16|1602@21|","" -"Vector Expand Quadword Mask VX-form","vexpandqm VRT,VRB","4@0|VRT@6|4@11|VRB@16|1602@21|","" -"Vector Expand Word Mask VX-form","vexpandwm VRT,VRB","4@0|VRT@6|2@11|VRB@16|1602@21|","" -"Vector Extract Double Doubleword to VSR using GPR-specified Left-Index VA-form","vextddvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|30@26|","" -"Vector Extract Double Doubleword to VSR using GPR-specified Right-Index VA-form","vextddvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|31@26|","" -"Vector Extract Double Unsigned Byte to VSR using GPR-specified Left-Index VA-form","vextdubvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|24@26|","" -"Vector Extract Double Unsigned Byte to VSR using GPR-specified Right-Index VA-form","vextdubvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|25@26|","" -"Vector Extract Double Unsigned Halfword to VSR using GPR-specified Left-Index VA-form","vextduhvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|26@26|","" -"Vector Extract Double Unsigned Halfword to VSR using GPR-specified Right-Index VA-form","vextduhvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|27@26|","" -"Vector Extract Double Unsigned Word to VSR using GPR-specified Left-Index VA-form","vextduwvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|28@26|","" -"Vector Extract Double Unsigned Word to VSR using GPR-specified Right-Index VA-form","vextduwvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|29@26|","" -"Vector Extract Byte Mask VX-form","vextractbm RT,VRB","4@0|RT@6|8@11|VRB@16|1602@21|","" -"Vector Extract Doubleword Mask VX-form","vextractdm RT,VRB","4@0|RT@6|11@11|VRB@16|1602@21|","" -"Vector Extract Halfword Mask VX-form","vextracthm RT,VRB","4@0|RT@6|9@11|VRB@16|1602@21|","" -"Vector Extract Quadword Mask VX-form","vextractqm RT,VRB","4@0|RT@6|12@11|VRB@16|1602@21|","" -"Vector Extract Word Mask VX-form","vextractwm RT,VRB","4@0|RT@6|10@11|VRB@16|1602@21|","" -"Vector Extend Sign Doubleword to Quadword VX-form","vextsd2q VRT,VRB","4@0|VRT@6|27@11|VRB@16|1538@21|","" -"Vector Gather every Nth Bit VX-form","vgnb RT,VRB,N","4@0|RT@6|//@11|N@13|VRB@16|1228@21|","" -"Vector Insert Byte from GPR using GPR-specified Left-Index VX-form","vinsblx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|527@21|","" -"Vector Insert Byte from GPR using GPR-specified Right-Index VX-form","vinsbrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|783@21|","" -"Vector Insert Byte from VSR using GPR-specified Left-Index VX-form","vinsbvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|15@21|","" -"Vector Insert Byte from VSR using GPR-specified Right-Index VX-form","vinsbvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|271@21|","" -"Vector Insert Doubleword from GPR using immediate-specified index VX-form","vinsd VRT,RB,UIM","4@0|VRT@6|/@11|UIM@12|RB@16|463@21|","" -"Vector Insert Doubleword from GPR using GPR-specified Left-Index VX-form","vinsdlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|719@21|","" -"Vector Insert Doubleword from GPR using GPR-specified Right-Index VX-form","vinsdrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|975@21|","" -"Vector Insert Halfword from GPR using GPR-specified Left-Index VX-form","vinshlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|591@21|","" -"Vector Insert Halfword from GPR using GPR-specified Right-Index VX-form","vinshrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|847@21|","" -"Vector Insert Halfword from VSR using GPR-specified Left-Index VX-form","vinshvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|79@21|","" -"Vector Insert Halfword from VSR using GPR-specified Right-Index VX-form","vinshvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|335@21|","" -"Vector Insert Word from GPR using immediate-specified index VX-form","vinsw VRT,RB,UIM","4@0|VRT@6|/@11|UIM@12|RB@16|207@21|","" -"Vector Insert Word from GPR using GPR-specified Left-Index VX-form","vinswlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|655@21|","" -"Vector Insert Word from GPR using GPR-specified Right-Index VX-form","vinswrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|911@21|","" -"Vector Insert Word from VSR using GPR-specified Left-Index VX-form","vinswvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|143@21|","" -"Vector Insert Word from VSR using GPR-specified Left-Index VX-form","vinswvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|399@21|","" -"Vector Modulo Signed Doubleword VX-form","vmodsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1995@21|","" -"Vector Modulo Signed Quadword VX-form","vmodsq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1803@21|","" -"Vector Modulo Signed Word VX-form","vmodsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1931@21|","" -"Vector Modulo Unsigned Doubleword VX-form","vmodud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1739@21|","" -"Vector Modulo Unsigned Quadword VX-form","vmoduq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1547@21|","" -"Vector Modulo Unsigned Word VX-form","vmoduw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1675@21|","" -"Vector Multiply-Sum & write Carry-out Unsigned Doubleword VA-form","vmsumcud VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|23@26|","" -"Vector Multiply Even Signed Doubleword VX-form","vmulesd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|968@21|","" -"Vector Multiply Even Unsigned Doubleword VX-form","vmuleud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|712@21|","" -"Vector Multiply High Signed Doubleword VX-form","vmulhsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|969@21|","" -"Vector Multiply High Signed Word VX-form","vmulhsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|905@21|","" -"Vector Multiply High Unsigned Doubleword VX-form","vmulhud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|713@21|","" -"Vector Multiply High Unsigned Word VX-form","vmulhuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|649@21|","" -"Vector Multiply Low Doubleword VX-form","vmulld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|457@21|","" -"Vector Multiply Odd Signed Doubleword VX-form","vmulosd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|456@21|","" -"Vector Multiply Odd Unsigned Doubleword VX-form","vmuloud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|200@21|","" -"Vector Parallel Bits Deposit Doubleword VX-form","vpdepd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1485@21|","" -"Vector Parallel Bits Extract Doubleword VX-form","vpextd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1421@21|","" -"Vector Rotate Left Quadword VX-form","vrlq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|5@21|","" -"Vector Rotate Left Quadword then Mask Insert VX-form","vrlqmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|69@21|","" -"Vector Rotate Left Quadword then AND with Mask VX-form","vrlqnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|325@21|","" -"Vector Shift Left Double by Bit Immediate VN-form","vsldbi VRT,VRA,VRB,SH","4@0|VRT@6|VRA@11|VRB@16|0@21|SH@23|22@26|","" -"Vector Shift Left Quadword VX-form","vslq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|261@21|","" -"Vector Shift Right Algebraic Quadword VX-form","vsraq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|773@21|","" -"Vector Shift Right Double by Bit Immediate VN-form","vsrdbi VRT,VRA,VRB,SH","4@0|VRT@6|VRA@11|VRB@16|1@21|SH@23|22@26|","" -"Vector Shift Right Quadword VX-form","vsrq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|517@21|","" -"Vector String Isolate Byte Left-justified VX-form","vstribl VRT,VRB (Rc=0)|vstribl. VRT,VRB (Rc=1)","4@0|VRT@6|0@11|VRB@16|Rc@21|13@22|","" -"Vector String Isolate Byte Right-justified VX-form","vstribr VRT,VRB (Rc=0)|vstribr. VRT,VRB (Rc=1)","4@0|VRT@6|1@11|VRB@16|Rc@21|13@22|","" -"Vector String Isolate Halfword Left-justified VX-form","vstrihl VRT,VRB (Rc=0)|vstrihl. VRT,VRB (Rc=1)","4@0|VRT@6|2@11|VRB@16|Rc@21|13@22|","" -"Vector String Isolate Halfword Right-justified VX-form","vstrihr VRT,VRB (Rc=0)|vstrihr. VRT,VRB (Rc=1)","4@0|VRT@6|3@11|VRB@16|Rc@21|13@22|","" -"VSX Scalar Compare Equal Quad-Precision X-form","xscmpeqqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|68@21|/@31|","" -"VSX Scalar Compare Greater Than or Equal Quad-Precision X-form","xscmpgeqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|196@21|/@31|","" -"VSX Scalar Compare Greater Than Quad-Precision X-form","xscmpgtqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|228@21|/@31|","" -"VSX Scalar Convert with round to zero Quad-Precision to Signed Quadword X-form","xscvqpsqz VRT,VRB","63@0|VRT@6|8@11|VRB@16|836@21|/@31|","" -"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Quadword X-form","xscvqpuqz VRT,VRB","63@0|VRT@6|0@11|VRB@16|836@21|/@31|","" -"VSX Scalar Convert with round Signed Quadword to Quad-Precision X-form","xscvsqqp VRT,VRB","63@0|VRT@6|11@11|VRB@16|836@21|/@31|","" -"VSX Scalar Convert with round Unsigned Quadword to Quad-Precision X-form","xscvuqqp VRT,VRB","63@0|VRT@6|3@11|VRB@16|836@21|/@31|","" -"VSX Scalar Maximum Type-C Quad-Precision X-form","xsmaxcqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|676@21|/@31|","" -"VSX Scalar Minimum Type-C Quad-Precision X-form","xsmincqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|740@21|/@31|","" -"VSX Vector bfloat16 GER (Rank-2 Update) XX3-form","xvbf16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|51@21|AX@29|BX@30|/@31|","" -"VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative accumulate XX3-form","xvbf16ger2nn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|242@21|AX@29|BX@30|/@31|","" -"VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive accumulate XX3-form","xvbf16ger2np AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|114@21|AX@29|BX@30|/@31|","" -"VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative accumulate XX3-form","xvbf16ger2pn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|178@21|AX@29|BX@30|/@31|","" -"VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate XX3-form","xvbf16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|50@21|AX@29|BX@30|/@31|","" -"VSX Vector Convert bfloat16 to Single-Precision format XX2-form","xvcvbf16spn XT,XB","60@0|T@6|16@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Convert with round Single-Precision to bfloat16 format XX2-form","xvcvspbf16 XT,XB","60@0|T@6|17@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector 16-bit Floating-Point GER (rank-2 update) XX3-form","xvf16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|19@21|AX@29|BX@30|/@31|","" -"VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate XX3-form","xvf16ger2nn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|210@21|AX@29|BX@30|/@31|","" -"VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Positive accumulate XX3-form","xvf16ger2np AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|82@21|AX@29|BX@30|/@31|","" -"VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Negative accumulate XX3-form","xvf16ger2pn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|146@21|AX@29|BX@30|/@31|","" -"VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Positive accumulate XX3-form","xvf16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|18@21|AX@29|BX@30|/@31|","" -"VSX Vector 32-bit Floating-Point GER (rank-1 update) XX3-form","xvf32ger AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|27@21|AX@29|BX@30|/@31|","" -"VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate XX3-form","xvf32gernn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|218@21|AX@29|BX@30|/@31|","" -"VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate XX3-form","xvf32gernp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|90@21|AX@29|BX@30|/@31|","" -"VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate XX3-form","xvf32gerpn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|154@21|AX@29|BX@30|/@31|","" -"VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate XX3-form","xvf32gerpp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|26@21|AX@29|BX@30|/@31|","" -"VSX Vector 64-bit Floating-Point GER (rank-1 update) XX3-form","xvf64ger AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|59@21|AX@29|BX@30|/@31|","" -"VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate XX3-form","xvf64gernn AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|250@21|AX@29|BX@30|/@31|","" -"VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate XX3-form","xvf64gernp AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|122@21|AX@29|BX@30|/@31|","" -"VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate XX3-form","xvf64gerpn AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|186@21|AX@29|BX@30|/@31|","" -"VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate XX3-form","xvf64gerpp AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|58@21|AX@29|BX@30|/@31|","" -"VSX Vector 16-bit Signed Integer GER (rank-2 update) XX3-form","xvi16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|75@21|AX@29|BX@30|/@31|","" -"VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate XX3-form","xvi16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|107@21|AX@29|BX@30|/@31|","" -"VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation XX3-form","xvi16ger2s AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|","" -"VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate XX3-form","xvi16ger2spp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|42@21|AX@29|BX@30|/@31|","" -"VSX Vector 4-bit Signed Integer GER (rank-8 update) XX3-form","xvi4ger8 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|","" -"VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate XX3-form","xvi4ger8pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|34@21|AX@29|BX@30|/@31|","" -"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) XX3-form","xvi8ger4 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|3@21|AX@29|BX@30|/@31|","" -"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate XX3-form","xvi8ger4pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|2@21|AX@29|BX@30|/@31|","" -"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate XX3-form","xvi8ger4spp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|99@21|AX@29|BX@30|/@31|","" -"VSX Vector Test Least-Significant Bit by Byte XX2-form","xvtlsbb BF,XB","60@0|BF@6|//@9|2@11|B@16|475@21|BX@30|/@31|","" -"VSX Vector Blend Variable Byte 8RR:XX4-form","xxblendvb XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|0@26|CX@28|AX@29|BX@30|TX@31|","" -"VSX Vector Blend Variable Doubleword 8RR:XX4-form","xxblendvd XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|3@26|CX@28|AX@29|BX@30|TX@31|","" -"VSX Vector Blend Variable Halfword 8RR:XX4-form","xxblendvh XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|1@26|CX@28|AX@29|BX@30|TX@31|","" -"VSX Vector Blend Variable Word 8RR:XX4-form","xxblendvw XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|2@26|CX@28|AX@29|BX@30|TX@31|","" -"VSX Vector Evaluate 8RR-XX4-form","xxeval XT,XA,XB,XC,IMM",",1@0|1@6|0@8|//@12|///@14|IMM@24|,34@0|T@6|A@11|B@16|C@21|1@26|CX@28|AX@29|BX@30|TX@31|","" -"VSX Vector Generate PCV from Byte Mask X-form","xxgenpcvbm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|916@21|TX@31|","" -"VSX Vector Generate PCV from Doubleword Mask X-form","xxgenpcvdm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|949@21|TX@31|","" -"VSX Vector Generate PCV from Halfword Mask X-form","xxgenpcvhm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|917@21|TX@31|","" -"VSX Vector Generate PCV from Word Mask X-form","xxgenpcvwm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|948@21|TX@31|","" -"VSX Move From Accumulator X-form","xxmfacc AS","31@0|AS@6|//@9|0@11|///@16|177@21|/@31|","" -"VSX Move To Accumulator X-form","xxmtacc AT","31@0|AT@6|//@9|1@11|///@16|177@21|/@31|","" -"VSX Vector Permute Extended 8RR:XX4-form","xxpermx XT,XA,XB,XC,UIM",",1@0|1@6|0@8|//@12|///@14|UIM@29|,34@0|T@6|A@11|B@16|C@21|0@26|CX@28|AX@29|BX@30|TX@31|","" -"VSX Set Accumulator to Zero X-form","xxsetaccz AT","31@0|AT@6|//@9|3@11|///@16|177@21|/@31|","" -"VSX Vector Splat Immediate32 Doubleword Indexed 8RR:D-form","xxsplti32dx XT,IX,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|0@11|IX@14|TX@15|imm1@16|","" -"VSX Vector Splat Immediate Double-Precision 8RR:D-form","xxspltidp XT,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|2@11|TX@15|imm1@16|","" -"VSX Vector Splat Immediate Word 8RR:D-form","xxspltiw XT,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|3@11|TX@15|imm1@16|","" -"Ultravisor Message Clear X-form","msgclru RB","31@0|///@6|///@11|RB@16|110@21|/@31|","" -"Ultravisor Message SendX-form","msgsndu RB","31@0|///@6|///@11|RB@16|78@21|/@31|","" -"Ultravisor Return From Interrupt Doubleword XL-form","urfid","19@0|///@6|///@11|///@16|306@21|/@31|","" -"Add Extended using alternate carry bit Z23-form","addex RT,RA,RB,CY","31@0|RT@6|RA@11|RB@16|CY@21|170@23|/@31|","" -"Move From FPSCR Control & Set DRN X-form","mffscdrn FRT,FRB","63@0|FRT@6|20@11|FRB@16|583@21|/@31|","" -"Move From FPSCR Control & Set DRN Immediate X-form","mffscdrni FRT,DRM","63@0|FRT@6|21@11|//@16|DRM@18|583@21|/@31|","" -"Move From FPSCR & Clear Enables X-form","mffsce FRT","63@0|FRT@6|1@11|///@16|583@21|/@31|","" -"Move From FPSCR Control & Set RN X-form","mffscrn FRT,FRB","63@0|FRT@6|22@11|FRB@16|583@21|/@31|","" -"Move From FPSCR Control & Set RN Immediate X-form","mffscrni FRT,RM","63@0|FRT@6|23@11|///@16|RM@19|583@21|/@31|","" -"Move From FPSCR Lightweight X-form","mffsl FRT","63@0|FRT@6|24@11|///@16|583@21|/@31|","" -"SLB Invalidate All Global X-form","slbiag RS, L","31@0|RS@6|///@11|L@15|///@16|850@21|/@31|","" -"Vector Multiply-Sum Unsigned Doubleword Modulo VA-form","vmsumudm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|35@26|","" -"Add PC Immediate Shifted DX-form","addpcis RT,D","19@0|RT@6|d1@11|d0@16|2@26|d2@31|","" -"Decimal Convert From National VX-form","bcdcfn. VRT,VRB,PS","4@0|VRT@6|7@11|VRB@16|1@21|PS@22|385@23|","" -"Decimal Convert From Signed Quadword VX-form","bcdcfsq. VRT,VRB,PS","4@0|VRT@6|2@11|VRB@16|1@21|PS@22|385@23|","" -"Decimal Convert From Zoned VX-form","bcdcfz. VRT,VRB,PS","4@0|VRT@6|6@11|VRB@16|1@21|PS@22|385@23|","" -"Decimal Copy Sign VX-form","bcdcpsgn. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|833@21|","" -"Decimal Convert To National VX-form","bcdctn. VRT,VRB","4@0|VRT@6|5@11|VRB@16|1@21|/@22|385@23|","" -"Decimal Convert To Signed Quadword VX-form","bcdctsq. VRT,VRB","4@0|VRT@6|0@11|VRB@16|1@21|/@22|385@23|","" -"Decimal Convert To Zoned VX-form","bcdctz. VRT,VRB,PS","4@0|VRT@6|4@11|VRB@16|1@21|PS@22|385@23|","" -"Decimal Shift VX-form","bcds. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|193@23|","" -"Decimal Set Sign VX-form","bcdsetsgn. VRT,VRB,PS","4@0|VRT@6|31@11|VRB@16|1@21|PS@22|385@23|","" -"Decimal Shift and Round VX-form","bcdsr. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|449@23|","" -"Decimal Truncate VX-form","bcdtrunc. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|257@23|","" -"Decimal Unsigned Shift VX-form","bcdus. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1@21|/@22|129@23|","" -"Decimal Unsigned Truncate VX-form","bcdutrunc. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1@21|/@22|321@23|","" -"Compare Equal Byte X-form","cmpeqb BF,RA,RB","31@0|BF@6|//@9|RA@11|RB@16|224@21|/@31|","" -"Compare Ranged Byte X-form","cmprb BF,L,RA,RB","31@0|BF@6|/@9|L@10|RA@11|RB@16|192@21|/@31|","" -"Count Trailing Zeros Doubleword X-form","cnttzd RA,RS (Rc=0)|cnttzd. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|570@21|Rc@31|","" -"Count Trailing Zeros Word X-form","cnttzw RA,RS (Rc=0)|cnttzw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|538@21|Rc@31|","" -"Copy X-form","copy RA,RB","31@0|///@6|1@10|RA@11|RB@16|774@21|/@31|","" -"Copy-Paste Abort X-form","cpabort","31@0|///@6|///@11|///@16|838@21|/@31|","" -"Deliver A Random Number X-form","darn RT,L","31@0|RT@6|///@11|L@14|///@16|755@21|/@31|","" -"DFP Test Significance Immediate X-form","dtstsfi BF,UIM,FRB","59@0|BF@6|/@9|UIM@10|FRB@16|675@21|/@31|","" -"DFP Test Significance Immediate Quad X-form","dtstsfiq BF,UIM,FRBp","63@0|BF@6|/@9|UIM@10|FRBp@16|675@21|/@31|","" -"Extend Sign Word and Shift Left Immediate XS-form","extswsli RA,RS,SH (Rc=0)|extswsli. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|sh@16|445@21|sh@30|Rc@31|","" -"Load Doubleword ATomic X-form","ldat RT,RA,FC","31@0|RT@6|RA@11|FC@16|614@21|/@31|","" -"Load Word ATomic X-form","lwat RT,RA,FC","31@0|RT@6|RA@11|FC@16|582@21|/@31|","" -"Load VSX Scalar Doubleword DS-form","lxsd VRT,DS(RA)","57@0|VRT@6|RA@11|DS@16|2@30|","" -"Load VSX Scalar as Integer Byte & Zero Indexed X-form","lxsibzx XT,RA,RB","31@0|T@6|RA@11|RB@16|781@21|TX@31|","" -"Load VSX Scalar as Integer Halfword & Zero Indexed X-form","lxsihzx XT,RA,RB","31@0|T@6|RA@11|RB@16|813@21|TX@31|","" -"Load VSX Scalar Single-Precision DS-form","lxssp VRT,DS(RA)","57@0|VRT@6|RA@11|DS@16|3@30|","" -"Load VSX Vector DQ-form","lxv XT,DQ(RA)","61@0|T@6|RA@11|DQ@16|TX@28|1@29|","" -"Load VSX Vector Byte*16 Indexed X-form","lxvb16x XT,RA,RB","31@0|T@6|RA@11|RB@16|876@21|TX@31|","" -"Load VSX Vector Halfword*8 Indexed X-form","lxvh8x XT,RA,RB","31@0|T@6|RA@11|RB@16|812@21|TX@31|","" -"Load VSX Vector with Length X-form","lxvl XT,RA,RB","31@0|T@6|RA@11|RB@16|269@21|TX@31|","" -"Load VSX Vector with Length Left-justified X-form","lxvll XT,RA,RB","31@0|T@6|RA@11|RB@16|301@21|TX@31|","" -"Load VSX Vector Word & Splat Indexed X-form","lxvwsx XT,RA,RB","31@0|T@6|RA@11|RB@16|364@21|TX@31|","" -"Load VSX Vector Indexed X-form","lxvx XT,RA,RB","31@0|T@6|RA@11|RB@16|4@21|/@25|12@26|TX@31|","" -"Multiply-Add High Doubleword VA-form","maddhd RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|48@26|","" -"Multiply-Add High Doubleword Unsigned VA-form","maddhdu RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|49@26|","" -"Multiply-Add Low Doubleword VA-form","maddld RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|51@26|","" -"Move to CR from XER Extended X-form","mcrxrx BF","31@0|BF@6|//@9|///@11|///@16|576@21|/@31|","" -"Move From VSR Lower Doubleword X-form","mfvsrld RA,XS","31@0|S@6|RA@11|///@16|307@21|SX@31|","" -"Modulo Signed Doubleword X-form","modsd RT,RA,RB","31@0|RT@6|RA@11|RB@16|777@21|/@31|","" -"Modulo Signed Word X-form","modsw RT,RA,RB","31@0|RT@6|RA@11|RB@16|779@21|/@31|","" -"Modulo Unsigned Doubleword X-form","modud RT,RA,RB","31@0|RT@6|RA@11|RB@16|265@21|/@31|","" -"Modulo Unsigned Word X-form","moduw RT,RA,RB","31@0|RT@6|RA@11|RB@16|267@21|/@31|","" -"Message Synchronize X-form","msgsync","31@0|///@6|///@11|///@16|886@21|/@31|","" -"Move To VSR Double Doubleword X-form","mtvsrdd XT,RA,RB","31@0|T@6|RA@11|RB@16|435@21|TX@31|","" -"Move To VSR Word & Splat X-form","mtvsrws XT,RA","31@0|T@6|RA@11|///@16|403@21|TX@31|","" -"Paste X-form","paste. RA,RB,L","31@0|///@6|L@10|RA@11|RB@16|902@21|1@31|","" -"Set Boolean X-form","setb RT,BFA","31@0|RT@6|BFA@11|//@14|///@16|128@21|/@31|","" -"SLB Invalidate Entry Global X-form","slbieg RS,RB","31@0|RS@6|///@11|RB@16|466@21|/@31|","" -"SLB Synchronize X-form","slbsync","31@0|///@6|///@11|///@16|338@21|/@31|","" -"Store Doubleword ATomic X-form","stdat RS,RA,FC","31@0|RS@6|RA@11|FC@16|742@21|/@31|","" -"Stop XL-form","stop","19@0|///@6|///@11|///@16|370@21|/@31|","" -"Store Word ATomic X-form","stwat RS,RA,FC","31@0|RS@6|RA@11|FC@16|710@21|/@31|","" -"Store VSX Scalar Doubleword DS-form","stxsd VRS,DS(RA)","61@0|VRS@6|RA@11|DS@16|2@30|","" -"Store VSX Scalar as Integer Byte Indexed X-form","stxsibx XS,RA,RB","31@0|S@6|RA@11|RB@16|909@21|SX@31|","" -"Store VSX Scalar as Integer Halfword Indexed X-form","stxsihx XS,RA,RB","31@0|S@6|RA@11|RB@16|941@21|SX@31|","" -"Store VSX Scalar Single DS-form","stxssp VRS,DS(RA)","61@0|VRS@6|RA@11|DS@16|3@30|","" -"Store VSX Vector DQ-form","stxv XS,DQ(RA)","61@0|S@6|RA@11|DQ@16|SX@28|5@29|","" -"Store VSX Vector Byte*16 Indexed X-form","stxvb16x XS,RA,RB","31@0|S@6|RA@11|RB@16|1004@21|SX@31|","" -"Store VSX Vector Halfword*8 Indexed X-form","stxvh8x XS,RA,RB","31@0|S@6|RA@11|RB@16|940@21|SX@31|","" -"Store VSX Vector with Length X-form","stxvl XS,RA,RB","31@0|S@6|RA@11|RB@16|397@21|SX@31|","" -"Store VSX Vector with Length Left-justified X-form","stxvll XS,RA,RB","31@0|S@6|RA@11|RB@16|429@21|SX@31|","" -"Store VSX Vector Indexed X-form","stxvx XS,RA,RB","31@0|S@6|RA@11|RB@16|396@21|SX@31|","" -"Vector Absolute Difference Unsigned Byte VX-form","vabsdub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1027@21|","" -"Vector Absolute Difference Unsigned Halfword VX-form","vabsduh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1091@21|","" -"Vector Absolute Difference Unsigned Word VX-form","vabsduw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1155@21|","" -"Vector Bit Permute Doubleword VX-form","vbpermd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1484@21|","" -"Vector Count Leading Zero Least-Significant Bits Byte VX-form","vclzlsbb RT,VRB","4@0|RT@6|0@11|VRB@16|1538@21|","" -"Vector Compare Not Equal Byte VC-form","vcmpneb VRT,VRA,VRB (Rc=0)|vcmpneb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|7@22|","" -"Vector Compare Not Equal Halfword VC-form","vcmpneh VRT,VRA,VRB (Rc=0)|vcmpneh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|71@22|","" -"Vector Compare Not Equal Word VC-form","vcmpnew VRT,VRA,VRB (Rc=0)|vcmpnew. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|135@22|","" -"Vector Compare Not Equal or Zero Byte VC-form","vcmpnezb VRT,VRA,VRB (Rc=0)|vcmpnezb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|263@22|","" -"Vector Compare Not Equal or Zero Halfword VC-form","vcmpnezh VRT,VRA,VRB (Rc=0)|vcmpnezh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|327@22|","" -"Vector Compare Not Equal or Zero Word VC-form","vcmpnezw VRT,VRA,VRB (Rc=0)|vcmpnezw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|391@22|","" -"Vector Count Trailing Zeros Byte VX-form","vctzb VRT,VRB","4@0|VRT@6|28@11|VRB@16|1538@21|","" -"Vector Count Trailing Zeros Doubleword VX-form","vctzd VRT,VRB","4@0|VRT@6|31@11|VRB@16|1538@21|","" -"Vector Count Trailing Zeros Halfword VX-form","vctzh VRT,VRB","4@0|VRT@6|29@11|VRB@16|1538@21|","" -"Vector Count Trailing Zero Least-Significant Bits Byte VX-form","vctzlsbb RT,VRB","4@0|RT@6|1@11|VRB@16|1538@21|","" -"Vector Count Trailing Zeros Word VX-form","vctzw VRT,VRB","4@0|VRT@6|30@11|VRB@16|1538@21|","" -"Vector Extract Doubleword to VSR using immediate-specified index VX-form","vextractd VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|717@21|","" -"Vector Extract Unsigned Byte to VSR using immediate-specified index VX-form","vextractub VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|525@21|","" -"Vector Extract Unsigned Halfword to VSR using immediate-specified index VX-form","vextractuh VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|589@21|","" -"Vector Extract Unsigned Word to VSR using immediate-specified index VX-form","vextractuw VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|653@21|","" -"Vector Extend Sign Byte To Doubleword VX-form","vextsb2d VRT,VRB","4@0|VRT@6|24@11|VRB@16|1538@21|","" -"Vector Extend Sign Byte To Word VX-form","vextsb2w VRT,VRB","4@0|VRT@6|16@11|VRB@16|1538@21|","" -"Vector Extend Sign Halfword To Doubleword VX-form","vextsh2d VRT,VRB","4@0|VRT@6|25@11|VRB@16|1538@21|","" -"Vector Extend Sign Halfword To Word VX-form","vextsh2w VRT,VRB","4@0|VRT@6|17@11|VRB@16|1538@21|","" -"Vector Extend Sign Word To Doubleword VX-form","vextsw2d VRT,VRB","4@0|VRT@6|26@11|VRB@16|1538@21|","" -"Vector Extract Unsigned Byte to GPR using GPR-specified Left-Index VX-form","vextublx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1549@21|","" -"Vector Extract Unsigned Byte to GPR using GPR-specified Right-Index VX-form","vextubrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1805@21|","" -"Vector Extract Unsigned Halfword to GPR using GPR-specified Left-Index VX-form","vextuhlx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1613@21|","" -"Vector Extract Unsigned Halfword to GPR using GPR-specified Right-Index VX-form","vextuhrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1869@21|","" -"Vector Extract Unsigned Word to GPR using GPR-specified Left-Index VX-form","vextuwlx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1677@21|","" -"Vector Extract Unsigned Word to GPR using GPR-specified Right-Index VX-form","vextuwrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1933@21|","" -"Vector Insert Byte from VSR using immediate-specified index VX-form","vinsertb VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|781@21|","" -"Vector Insert Doubleword from VSR using immediate-specified index VX-form","vinsertd VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|973@21|","" -"Vector Insert Halfword from VSR using immediate-specified index VX-form","vinserth VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|845@21|","" -"Vector Insert Word from VSR using immediate-specified index VX-form","vinsertw VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|909@21|","" -"Vector Multiply-by-10 & write Carry-out Unsigned Quadword VX-form","vmul10cuq VRT,VRA","4@0|VRT@6|VRA@11|///@16|1@21|","" -"Vector Multiply-by-10 Extended & write Carry-out Unsigned Quadword VX-form","vmul10ecuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|65@21|","" -"Vector Multiply-by-10 Extended Unsigned Quadword VX-form","vmul10euq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|577@21|","" -"Vector Multiply-by-10 Unsigned Quadword VX-form","vmul10uq VRT,VRA","4@0|VRT@6|VRA@11|///@16|513@21|","" -"Vector Negate Doubleword VX-form","vnegd VRT,VRB","4@0|VRT@6|7@11|VRB@16|1538@21|","" -"Vector Negate Word VX-form","vnegw VRT,VRB","4@0|VRT@6|6@11|VRB@16|1538@21|","" -"Vector Permute Right-indexed VA-form","vpermr VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|59@26|","" -"Vector Parity Byte Doubleword VX-form","vprtybd VRT,VRB","4@0|VRT@6|9@11|VRB@16|1538@21|","" -"Vector Parity Byte Quadword VX-form","vprtybq VRT,VRB","4@0|VRT@6|10@11|VRB@16|1538@21|","" -"Vector Parity Byte Word VX-form","vprtybw VRT,VRB","4@0|VRT@6|8@11|VRB@16|1538@21|","" -"Vector Rotate Left Doubleword then Mask Insert VX-form","vrldmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|197@21|","" -"Vector Rotate Left Doubleword then AND with Mask VX-form","vrldnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|453@21|","" -"Vector Rotate Left Word then Mask Insert VX-form","vrlwmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|133@21|","" -"Vector Rotate Left Word then AND with Mask VX-form","vrlwnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|389@21|","" -"Vector Shift Left Variable VX-form","vslv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1860@21|","" -"Vector Shift Right Variable VX-form","vsrv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1796@21|","" -"Wait X-form","wait WC,PL","31@0|??@6|/@8|WC@9|///@11|PL@14|///@16|30@21|/@31|","" -"VSX Scalar Absolute Quad-Precision X-form","xsabsqp VRT,VRB","63@0|VRT@6|0@11|VRB@16|804@21|/@31|","" -"VSX Scalar Add Quad-Precision [using round to Odd] X-form","xsaddqp VRT,VRA,VRB (RO=0)|xsaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|4@21|RO@31|","" -"VSX Scalar Compare Equal Double-Precision XX3-form","xscmpeqdp XT,XA,XB","60@0|T@6|A@11|B@16|3@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Compare Exponents Double-Precision XX3-form","xscmpexpdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|59@21|AX@29|BX@30|/@31|","" -"VSX Scalar Compare Exponents Quad-Precision X-form","xscmpexpqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|164@21|/@31|","" -"VSX Scalar Compare Greater Than or Equal Double-Precision XX3-form","xscmpgedp XT,XA,XB","60@0|T@6|A@11|B@16|19@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Compare Greater Than Double-Precision XX3-form","xscmpgtdp XT,XA,XB","60@0|T@6|A@11|B@16|11@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Compare Ordered Quad-Precision X-form","xscmpoqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|132@21|/@31|","" -"VSX Scalar Compare Unordered Quad-Precision X-form","xscmpuqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|644@21|/@31|","" -"VSX Scalar Copy Sign Quad-Precision X-form","xscpsgnqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|100@21|/@31|","" -"VSX Scalar Convert with round Double-Precision to Half-Precision format XX2-form","xscvdphp XT,XB","60@0|T@6|17@11|B@16|347@21|BX@30|TX@31|","" -"VSX Scalar Convert Double-Precision to Quad-Precision format X-form","xscvdpqp VRT,VRB","63@0|VRT@6|22@11|VRB@16|836@21|/@31|","" -"VSX Scalar Convert Half-Precision to Double-Precision format XX2-form","xscvhpdp XT,XB","60@0|T@6|16@11|B@16|347@21|BX@30|TX@31|","" -"VSX Scalar Convert with round Quad-Precision to Double-Precision format [using round to Odd] X-form","xscvqpdp VRT,VRB (RO=0)|xscvqpdpo VRT,VRB (RO=1)","63@0|VRT@6|20@11|VRB@16|836@21|RO@31|","" -"VSX Scalar Convert with round to zero Quad-Precision to Signed Doubleword format X-form","xscvqpsdz VRT,VRB","63@0|VRT@6|25@11|VRB@16|836@21|/@31|","" -"VSX Scalar Convert with round to zero Quad-Precision to Signed Word format X-form","xscvqpswz VRT,VRB","63@0|VRT@6|9@11|VRB@16|836@21|/@31|","" -"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Doubleword format X-form","xscvqpudz VRT,VRB","63@0|VRT@6|17@11|VRB@16|836@21|/@31|","" -"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Word format X-form","xscvqpuwz VRT,VRB","63@0|VRT@6|1@11|VRB@16|836@21|/@31|","" -"VSX Scalar Convert Signed Doubleword to Quad-Precision format X-form","xscvsdqp VRT,VRB","63@0|VRT@6|10@11|VRB@16|836@21|/@31|","" -"VSX Scalar Convert Unsigned Doubleword to Quad-Precision format X-form","xscvudqp VRT,VRB","63@0|VRT@6|2@11|VRB@16|836@21|/@31|","" -"VSX Scalar Divide Quad-Precision [using round to Odd] X-form","xsdivqp VRT,VRA,VRB (RO=0)|xsdivqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|548@21|RO@31|","" -"VSX Scalar Insert Exponent Double-Precision X-form","xsiexpdp XT,RA,RB","60@0|T@6|RA@11|RB@16|918@21|TX@31|","" -"VSX Scalar Insert Exponent Quad-Precision X-form","xsiexpqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|868@21|/@31|","" -"VSX Scalar Multiply-Add Quad-Precision [using round to Odd] X-form","xsmaddqp VRT,VRA,VRB (RO=0)|xsmaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|388@21|RO@31|","" -"VSX Scalar Maximum Type-C Double-Precision XX3-form","xsmaxcdp XT,XA,XB","60@0|T@6|A@11|B@16|128@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Maximum Type-J Double-Precision XX3-form","xsmaxjdp XT,XA,XB","60@0|T@6|A@11|B@16|144@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Minimum Type-C Double-Precision XX3-form","xsmincdp XT,XA,XB","60@0|T@6|A@11|B@16|136@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Minimum Type-J Double-Precision XX3-form","xsminjdp XT,XA,XB","60@0|T@6|A@11|B@16|152@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply-Subtract Quad-Precision [using round to Odd] X-form","xsmsubqp VRT,VRA,VRB (RO=0)|xsmsubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|420@21|RO@31|","" -"VSX Scalar Multiply Quad-Precision [using round to Odd] X-form","xsmulqp VRT,VRA,VRB (RO=0)|xsmulqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|36@21|RO@31|","" -"VSX Scalar Negative Absolute Quad-Precision X-form","xsnabsqp VRT,VRB","63@0|VRT@6|8@11|VRB@16|804@21|TX@31|","" -"VSX Scalar Negate Quad-Precision X-form","xsnegqp VRT,VRB","63@0|VRT@6|16@11|VRB@16|804@21|/@31|","" -"VSX Scalar Negative Multiply-Add Quad-Precision [using round to Odd] X-form","xsnmaddqp VRT,VRA,VRB (RO=0)|xsnmaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|452@21|RO@31|","" -"VSX Scalar Negative Multiply-Subtract Quad-Precision [using round to Odd] X-form","xsnmsubqp VRT,VRA,VRB (RO=0)|xsnmsubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|484@21|RO@31|","" -"VSX Scalar Round to Quad-Precision Integer [with Inexact] Z23-form","xsrqpi R,VRT,VRB,RMC (EX=0)|xsrqpix R,VRT,VRB,RMC (EX=1)","63@0|VRT@6|///@11|R@15|VRB@16|RMC@21|5@23|EX@31|","" -"VSX Scalar Round Quad-Precision to Double-Extended Precision Z23-form","xsrqpxp R,VRT,VRB,RMC","63@0|VRT@6|///@11|R@15|VRB@16|RMC@21|37@23|/@31|","" -"VSX Scalar Square Root Quad-Precision [using round to Odd] X-form","xssqrtqp VRT,VRB (RO=0)|xssqrtqpo VRT,VRB (RO=1)","63@0|VRT@6|27@11|VRB@16|804@21|RO@31|","" -"VSX Scalar Subtract Quad-Precision [using round to Odd] X-form","xssubqp VRT,VRA,VRB (RO=0)|xssubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|516@21|RO@31|","" -"VSX Scalar Test Data Class Double-Precision XX2-form","xststdcdp BF,XB,DCMX","60@0|BF@6|DCMX@9|B@16|362@21|BX@30|/@31|","" -"VSX Scalar Test Data Class Quad-Precision X-form","xststdcqp BF,VRB,DCMX","63@0|BF@6|DCMX@9|VRB@16|708@21|/@31|","" -"VSX Scalar Test Data Class Single-Precision XX2-form","xststdcsp BF,XB,DCMX","60@0|BF@6|DCMX@9|B@16|298@21|BX@30|/@31|","" -"VSX Scalar Extract Exponent Double-Precision XX2-form","xsxexpdp RT,XB","60@0|RT@6|0@11|B@16|347@21|BX@30|/@31|","" -"VSX Scalar Extract Exponent Quad-Precision X-form","xsxexpqp VRT,VRB","63@0|VRT@6|2@11|VRB@16|804@21|/@31|","" -"VSX Scalar Extract Significand Double-Precision XX2-form","xsxsigdp RT,XB","60@0|RT@6|1@11|B@16|347@21|BX@30|/@31|","" -"VSX Scalar Extract Significand Quad-Precision X-form","xsxsigqp VRT,VRB","63@0|VRT@6|18@11|VRB@16|804@21|/@31|","" -"VSX Vector Convert Half-Precision to Single-Precision format XX2-form","xvcvhpsp XT,XB","60@0|T@6|24@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Convert with round Single-Precision to Half-Precision format XX2-form","xvcvsphp XT,XB","60@0|T@6|25@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Insert Exponent Double-Precision XX3-form","xviexpdp XT,XA,XB","60@0|T@6|A@11|B@16|248@21|AX@29|BX@30|TX@31|","" -"VSX Vector Insert Exponent Single-Precision XX3-form","xviexpsp XT,XA,XB","60@0|T@6|A@11|B@16|216@21|AX@29|BX@30|TX@31|","" -"VSX Vector Test Data Class Double-Precision XX2-form","xvtstdcdp XT,XB,DCMX","60@0|T@6|dx@11|B@16|15@21|dc@25|5@26|dm@29|BX@30|TX@31|","" -"VSX Vector Test Data Class Single-Precision XX2-form","xvtstdcsp XT,XB,DCMX","60@0|T@6|dx@11|B@16|13@21|dc@25|5@26|dm@29|BX@30|TX@31|","" -"VSX Vector Extract Exponent Double-Precision XX2-form","xvxexpdp XT,XB","60@0|T@6|0@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Extract Exponent Single-Precision XX2-form","xvxexpsp XT,XB","60@0|T@6|8@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Extract Significand Double-Precision XX2-form","xvxsigdp XT,XB","60@0|T@6|1@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Extract Significand Single-Precision XX2-form","xvxsigsp XT,XB","60@0|T@6|9@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Byte-Reverse Doubleword XX2-form","xxbrd XT,XB","60@0|T@6|23@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Byte-Reverse Halfword XX2-form","xxbrh XT,XB","60@0|T@6|7@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Byte-Reverse Quadword XX2-form","xxbrq XT,XB","60@0|T@6|31@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Byte-Reverse Word XX2-form","xxbrw XT,XB","60@0|T@6|15@11|B@16|475@21|BX@30|TX@31|","" -"VSX Vector Extract Unsigned Word XX2-form","xxextractuw XT,XB,UIM","60@0|T@6|/@11|UIM@12|B@16|165@21|BX@30|TX@31|","" -"VSX Vector Insert Word XX2-form","xxinsertw XT,XB,UIM","60@0|T@6|/@11|UIM@12|B@16|181@21|BX@30|TX@31|","" -"VSX Vector Permute XX3-form","xxperm XT,XA,XB","60@0|T@6|A@11|B@16|26@21|AX@29|BX@30|TX@31|","" -"VSX Vector Permute Right-indexed XX3-form","xxpermr XT,XA,XB","60@0|T@6|A@11|B@16|58@21|AX@29|BX@30|TX@31|","" -"VSX Vector Splat Immediate Byte X-form","xxspltib XT,IMM8","60@0|T@6|0@11|IMM8@13|360@21|TX@31|","" -"Decimal Add Modulo VX-form","bcdadd. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|1@23|","" -"Decimal Subtract Modulo VX-form","bcdsub. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|65@23|","" -"Branch Conditional to Branch Target Address Register XL-form","bctar BO,BI,BH (LK=0)|bctarl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|560@21|LK@31|","" -"Clear BHRB X-form","clrbhrb","31@0|///@6|///@11|///@16|430@21|/@31|","" -"Floating Merge Even Word X-form","fmrgew FRT,FRA,FRB","63@0|FRT@6|FRA@11|FRB@16|966@21|/@31|","" -"Floating Merge Odd Word X-form","fmrgow FRT,FRA,FRB","63@0|FRT@6|FRA@11|FRB@16|838@21|/@31|","" -"Instruction Cache Block Touch X-form","icbt CT, RA, RB","31@0|/@6|CT@7|RA@11|RB@16|22@21|/@31|","" -"Load Quadword And Reserve Indexed X-form","lqarx RTp,RA,RB,EH","31@0|RTp@6|RA@11|RB@16|276@21|EH@31|","" -"Load VSX Scalar as Integer Word Algebraic Indexed X-form","lxsiwax XT,RA,RB","31@0|T@6|RA@11|RB@16|76@21|TX@31|","" -"Load VSX Scalar as Integer Word & Zero Indexed X-form","lxsiwzx XT,RA,RB","31@0|T@6|RA@11|RB@16|12@21|TX@31|","" -"Load VSX Scalar Single-Precision Indexed X-form","lxsspx XT,RA,RB","31@0|T@6|RA@11|RB@16|524@21|TX@31|","" -"Move From BHRB XFX-form","mfbhrbe RT,BHRBE","31@0|RT@6|BHRBE@11|302@21|/@31|","" -"Move From VSR Doubleword X-form","mfvsrd RA,XS","31@0|S@6|RA@11|///@16|51@21|SX@31|","" -"Move From VSR Word and Zero X-form","mfvsrwz RA,XS","31@0|S@6|RA@11|///@16|115@21|SX@31|","" -"Message Clear X-form","msgclr RB","31@0|///@6|///@11|RB@16|238@21|/@31|","" -"Message Clear Privileged X-form","msgclrp RB","31@0|///@6|///@11|RB@16|174@21|/@31|","" -"Message Send X-form","msgsnd RB","31@0|///@6|///@11|RB@16|206@21|/@31|","" -"Message Send Privileged X-form","msgsndp RB","31@0|///@6|///@11|RB@16|142@21|/@31|","" -"Move To VSR Doubleword X-form","mtvsrd XT,RA","31@0|T@6|RA@11|///@16|179@21|TX@31|","" -"Move To VSR Word Algebraic X-form","mtvsrwa XT,RA","31@0|T@6|RA@11|///@16|211@21|TX@31|","" -"Move To VSR Word and Zero X-form","mtvsrwz XT,RA","31@0|T@6|RA@11|///@16|243@21|TX@31|","" -"Return from Event Based Branch XL-form","rfebb S","19@0|///@6|///@11|///@16|S@20|146@21|/@31|","" -"Store Quadword Conditional Indexed X-form","stqcx. RSp,RA,RB","31@0|RSp@6|RA@11|RB@16|182@21|1@31|","" -"Store VSX Scalar as Integer Word Indexed X-form","stxsiwx XS,RA,RB","31@0|S@6|RA@11|RB@16|140@21|SX@31|","" -"Store VSX Scalar Single-Precision Indexed X-form","stxsspx XS,RA,RB","31@0|S@6|RA@11|RB@16|652@21|SX@31|","" -"Vector Add & write Carry Unsigned Quadword VX-form","vaddcuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|320@21|","" -"Vector Add Extended & write Carry Unsigned Quadword VA-form","vaddecuq VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|61@26|","" -"Vector Add Extended Unsigned Quadword Modulo VA-form","vaddeuqm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|60@26|","" -"Vector Add Unsigned Doubleword Modulo VX-form","vaddudm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|192@21|","" -"Vector Add Unsigned Quadword Modulo VX-form","vadduqm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|256@21|","" -"Vector Bit Permute Quadword VX-form","vbpermq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1356@21|","" -"Vector AES Cipher VX-form","vcipher VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1288@21|","" -"Vector AES Cipher Last VX-form","vcipherlast VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1289@21|","" -"Vector Count Leading Zeros Byte VX-form","vclzb VRT,VRB","4@0|VRT@6|///@11|VRB@16|1794@21|","" -"Vector Count Leading Zeros Doubleword VX-form","vclzd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1986@21|","" -"Vector Count Leading Zeros Halfword VX-form","vclzh VRT,VRB","4@0|VRT@6|///@11|VRB@16|1858@21|","" -"Vector Count Leading Zeros Word VX-form","vclzw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1922@21|","" -"Vector Compare Equal Unsigned Doubleword VC-form","vcmpequd VRT,VRA,VRB (Rc=0)|vcmpequd. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|199@22|","" -"Vector Compare Greater Than Signed Doubleword VC-form","vcmpgtsd VRT,VRA,VRB (Rc=0)|vcmpgtsd. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|967@22|","" -"Vector Compare Greater Than Unsigned Doubleword VC-form","vcmpgtud VRT,VRA,VRB (Rc=0)|vcmpgtud. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|711@22|","" -"Vector Logical Equivalence VX-form","veqv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1668@21|","" -"Vector Gather Bits by Bytes by Doubleword VX-form","vgbbd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1292@21|","" -"Vector Maximum Signed Doubleword VX-form","vmaxsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|450@21|","" -"Vector Maximum Unsigned Doubleword VX-form","vmaxud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|194@21|","" -"Vector Minimum Signed Doubleword VX-form","vminsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|962@21|","" -"Vector Minimum Unsigned Doubleword VX-form","vminud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|706@21|","" -"Vector Merge Even Word VX-form","vmrgew VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1932@21|","" -"Vector Merge Odd Word VX-form","vmrgow VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1676@21|","" -"Vector Multiply Even Signed Word VX-form","vmulesw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|904@21|","" -"Vector Multiply Even Unsigned Word VX-form","vmuleuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|648@21|","" -"Vector Multiply Odd Signed Word VX-form","vmulosw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|392@21|","" -"Vector Multiply Odd Unsigned Word VX-form","vmulouw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|136@21|","" -"Vector Multiply Unsigned Word Modulo VX-form","vmuluwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|137@21|","" -"Vector Logical NAND VX-form","vnand VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1412@21|","" -"Vector AES Inverse Cipher VX-form","vncipher VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1352@21|","" -"Vector AES Inverse Cipher Last VX-form","vncipherlast VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1353@21|","" -"Vector Logical OR with Complement VX-form","vorc VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1348@21|","" -"Vector Permute & Exclusive-OR VA-form","vpermxor VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|45@26|","" -"Vector Pack Signed Doubleword Signed Saturate VX-form","vpksdss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1486@21|","" -"Vector Pack Signed Doubleword Unsigned Saturate VX-form","vpksdus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1358@21|","" -"Vector Pack Unsigned Doubleword Unsigned Modulo VX-form","vpkudum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1102@21|","" -"Vector Pack Unsigned Doubleword Unsigned Saturate VX-form","vpkudus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1230@21|","" -"Vector Polynomial Multiply-Sum Byte VX-form","vpmsumb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1032@21|","" -"Vector Polynomial Multiply-Sum Doubleword VX-form","vpmsumd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1224@21|","" -"Vector Polynomial Multiply-Sum Halfword VX-form","vpmsumh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1096@21|","" -"Vector Polynomial Multiply-Sum Word VX-form","vpmsumw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1160@21|","" -"Vector Population Count Byte VX-form","vpopcntb VRT,VRB","4@0|VRT@6|///@11|VRB@16|1795@21|","" -"Vector Population Count Doubleword VX-form","vpopcntd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1987@21|","" -"Vector Population Count Halfword VX-form","vpopcnth VRT,VRB","4@0|VRT@6|///@11|VRB@16|1859@21|","" -"Vector Population Count Word VX-form","vpopcntw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1923@21|","" -"Vector Rotate Left Doubleword VX-form","vrld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|196@21|","" -"Vector AES SubBytes VX-form","vsbox VRT,VRA","4@0|VRT@6|VRA@11|///@16|1480@21|","" -"Vector SHA-512 Sigma Doubleword VX-form","vshasigmad VRT,VRA,ST,SIX","4@0|VRT@6|VRA@11|ST@16|SIX@17|1730@21|","" -"Vector SHA-256 Sigma Word VX-form","vshasigmaw VRT,VRA,ST,SIX","4@0|VRT@6|VRA@11|ST@16|SIX@17|1666@21|","" -"Vector Shift Left Doubleword VX-form","vsld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1476@21|","" -"Vector Shift Right Algebraic Doubleword VX-form","vsrad VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|964@21|","" -"Vector Shift Right Doubleword VX-form","vsrd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1732@21|","" -"Vector Subtract & write Carry-out Unsigned Quadword VX-form","vsubcuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1344@21|","" -"Vector Subtract Extended & write Carry-out Unsigned Quadword VA-form","vsubecuq VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|63@26|","" -"Vector Subtract Extended Unsigned Quadword Modulo VA-form","vsubeuqm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|62@26|","" -"Vector Subtract Unsigned Doubleword Modulo VX-form","vsubudm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1216@21|","" -"Vector Subtract Unsigned Quadword Modulo VX-form","vsubuqm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1280@21|","" -"Vector Unpack High Signed Word VX-form","vupkhsw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1614@21|","" -"Vector Unpack Low Signed Word VX-form","vupklsw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1742@21|","" -"VSX Scalar Add Single-Precision XX3-form","xsaddsp XT,XA,XB","60@0|T@6|A@11|B@16|0@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Convert Scalar Single-Precision to Vector Single-Precision format Non-signalling XX2-form","xscvdpspn XT,XB","60@0|T@6|///@11|B@16|267@21|BX@30|TX@31|","" -"VSX Scalar Convert Single-Precision to Double-Precision format Non-signalling XX2-form","xscvspdpn XT,XB","60@0|T@6|///@11|B@16|331@21|BX@30|TX@31|","" -"VSX Scalar Convert with round Signed Doubleword to Single-Precision format XX2-form","xscvsxdsp XT,XB","60@0|T@6|///@11|B@16|312@21|BX@30|TX@31|","" -"VSX Scalar Convert with round Unsigned Doubleword to Single-Precision XX2-form","xscvuxdsp XT,XB","60@0|T@6|///@11|B@16|296@21|BX@30|TX@31|","" -"VSX Scalar Divide Single-Precision XX3-form","xsdivsp XT,XA,XB","60@0|T@6|A@11|B@16|24@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply-Add Type-A Single-Precision XX3-form","xsmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|1@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply-Add Type-M Single-Precision XX3-form","xsmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|9@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply-Subtract Type-A Single-Precision XX3-form","xsmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|17@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply-Subtract Type-M Single-Precision XX3-form","xsmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|25@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply Single-Precision XX3-form","xsmulsp XT,XA,XB","60@0|T@6|A@11|B@16|16@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Negative Multiply-Add Type-A Single-Precision XX3-form","xsnmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|129@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Negative Multiply-Add Type-M Single-Precision XX3-form","xsnmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|137@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Negative Multiply-Subtract Type-A Single-Precision XX3-form","xsnmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|145@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Negative Multiply-Subtract Type-M Single-Precision XX3-form","xsnmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|153@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Reciprocal Estimate Single-Precision XX2-form","xsresp XT,XB","60@0|T@6|///@11|B@16|26@21|BX@30|TX@31|","" -"VSX Scalar Round to Single-Precision XX2-form","xsrsp XT,XB","60@0|T@6|///@11|B@16|281@21|BX@30|TX@31|","" -"VSX Scalar Reciprocal Square Root Estimate Single-Precision XX2-form","xsrsqrtesp XT,XB","60@0|T@6|///@11|B@16|10@21|BX@30|TX@31|","" -"VSX Scalar Square Root Single-Precision XX2-form","xssqrtsp XT,XB","60@0|T@6|///@11|B@16|11@21|BX@30|TX@31|","" -"VSX Scalar Subtract Single-Precision XX3-form","xssubsp XT,XA,XB","60@0|T@6|A@11|B@16|8@21|AX@29|BX@30|TX@31|","" -"VSX Vector Logical Equivalence XX3-form","xxleqv XT,XA,XB","60@0|T@6|A@11|B@16|186@21|AX@29|BX@30|TX@31|","" -"VSX Vector Logical NAND XX3-form","xxlnand XT,XA,XB","60@0|T@6|A@11|B@16|178@21|AX@29|BX@30|TX@31|","" -"VSX Vector Logical OR with Complement XX3-form","xxlorc XT,XA,XB","60@0|T@6|A@11|B@16|170@21|AX@29|BX@30|TX@31|","" -"Add and Generate Sixes XO-form","addg6s RT,RA,RB","31@0|RT@6|RA@11|RB@16|/@21|74@22|/@31|","" -"Bit Permute Doubleword X-form","bpermd RA,RS,RB","31@0|RS@6|RA@11|RB@16|252@21|/@31|","" -"Convert Binary Coded Decimal To Declets X-form","cbcdtd RA, RS","31@0|RS@6|RA@11|///@16|314@21|/@31|","" -"Convert Declets To Binary Coded Decimal X-form","cdtbcd RA, RS","31@0|RS@6|RA@11|///@16|282@21|/@31|","" -"DFP Convert From Fixed X-form","dcffix FRT,FRB (Rc=0)|dcffix. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|802@21|Rc@31|","" -"Divide Doubleword Extended XO-form","divde RT,RA,RB (OE=0 Rc=0)|divde. RT,RA,RB (OE=0 Rc=1)|divdeo RT,RA,RB (OE=1 Rc=0)|divdeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|425@22|Rc@31|","" -"Divide Doubleword Extended Unsigned XO-form","divdeu RT,RA,RB (OE=0 Rc=0)|divdeu. RT,RA,RB (OE=0 Rc=1)|divdeuo RT,RA,RB (OE=1 Rc=0)|divdeuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|393@22|Rc@31|","" -"Divide Word Extended XO-form","divwe RT,RA,RB (OE=0 Rc=0)|divwe. RT,RA,RB (OE=0 Rc=1)|divweo RT,RA,RB (OE=1 Rc=0)|divweo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|427@22|Rc@31|","" -"Divide Word Extended Unsigned XO-form","divweu RT,RA,RB (OE=0 Rc=0)|divweu. RT,RA,RB (OE=0 Rc=1)|divweuo RT,RA,RB (OE=1 Rc=0)|divweuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|395@22|Rc@31|","" -"Floating Convert with round Signed Doubleword to Single-Precision format X-form","fcfids FRT,FRB (Rc=0)|fcfids. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|846@21|Rc@31|","" -"Floating Convert with round Unsigned Doubleword to Double-Precision format X-form","fcfidu FRT,FRB (Rc=0)|fcfidu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|974@21|Rc@31|","" -"Floating Convert with round Unsigned Doubleword to Single-Precision format X-form","fcfidus FRT,FRB (Rc=0)|fcfidus. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|974@21|Rc@31|","" -"Floating Convert with round Double-Precision To Unsigned Doubleword format X-form","fctidu FRT,FRB (Rc=0)|fctidu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|942@21|Rc@31|","" -"Floating Convert with truncate Double-Precision To Unsigned Doubleword format X-form","fctiduz FRT,FRB (Rc=0)|fctiduz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|943@21|Rc@31|","" -"Floating Convert with round Double-Precision To Unsigned Word format X-form","fctiwu FRT,FRB (Rc=0)|fctiwu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|142@21|Rc@31|","" -"Floating Convert with truncate Double-Precision To Unsigned Word format X-form","fctiwuz FRT,FRB (Rc=0)|fctiwuz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|143@21|Rc@31|","" -"Floating Test for software Divide X-form","ftdiv BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|128@21|/@31|","" -"Floating Test for software Square Root X-form","ftsqrt BF,FRB","63@0|BF@6|//@9|///@11|FRB@16|160@21|/@31|","" -"Load Byte And Reserve Indexed X-form","lbarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|52@21|EH@31|","" -"Load Doubleword Byte-Reverse Indexed X-form","ldbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|532@21|/@31|","" -"Load Floating-Point as Integer Word & Zero Indexed X-form","lfiwzx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|887@21|/@31|","" -"Load Halfword And Reserve Indexed Xform","lharx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|116@21|EH@31|","" -"Load VSX Scalar Doubleword Indexed X-form","lxsdx XT,RA,RB","31@0|T@6|RA@11|RB@16|588@21|TX@31|","" -"Load VSX Vector Doubleword*2 Indexed X-form","lxvd2x XT,RA,RB","31@0|T@6|RA@11|RB@16|844@21|TX@31|","" -"Load VSX Vector Doubleword & Splat Indexed X-form","lxvdsx XT,RA,RB","31@0|T@6|RA@11|RB@16|332@21|TX@31|","" -"Load VSX Vector Word*4 Indexed X-form","lxvw4x XT,RA,RB","31@0|T@6|RA@11|RB@16|780@21|TX@31|","" -"Population Count Doubleword X-form","popcntd RA, RS","31@0|RS@6|RA@11|///@16|506@21|/@31|","" -"Population Count Words X-form","popcntw RA, RS","31@0|RS@6|RA@11|///@16|378@21|/@31|","" -"Store Byte Conditional Indexed X-form","stbcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|694@21|1@31|","" -"Store Doubleword Byte-Reverse Indexed X-form","stdbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|660@21|/@31|","" -"Store Halfword Conditional Indexed X-form","sthcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|726@21|1@31|","" -"Store VSX Scalar Doubleword Indexed X-form","stxsdx XS,RA,RB","31@0|S@6|RA@11|RB@16|716@21|SX@31|","" -"Store VSX Vector Doubleword*2 Indexed X-form","stxvd2x XS,RA,RB","31@0|S@6|RA@11|RB@16|972@21|SX@31|","" -"Store VSX Vector Word*4 Indexed X-form","stxvw4x XS,RA,RB","31@0|S@6|RA@11|RB@16|908@21|SX@31|","" -"VSX Scalar Absolute Double-Precision XX2-form","xsabsdp XT,XB","60@0|T@6|///@11|B@16|345@21|BX@30|TX@31|","" -"VSX Scalar Add Double-Precision XX3-form","xsadddp XT,XA,XB","60@0|T@6|A@11|B@16|32@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Compare Ordered Double-Precision XX3-form","xscmpodp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|","" -"VSX Scalar Compare Unordered Double-Precision XX3-form","xscmpudp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|","" -"VSX Scalar Copy Sign Double-Precision XX3-form","xscpsgndp XT,XA,XB","60@0|T@6|A@11|B@16|176@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Convert with round Double-Precision to Single-Precision format XX2-form","xscvdpsp XT,XB","60@0|T@6|///@11|B@16|265@21|BX@30|TX@31|","" -"VSX Scalar Convert with round to zero Double-Precision to Signed Doubleword format XX2-form","xscvdpsxds XT,XB","60@0|T@6|///@11|B@16|344@21|BX@30|TX@31|","" -"VSX Scalar Convert with round to zero Double-Precision to Signed Word format XX2-form","xscvdpsxws XT,XB","60@0|T@6|///@11|B@16|88@21|BX@30|TX@31|","" -"VSX Scalar Convert with round to zero Double-Precision to Unsigned Doubleword format XX2-form","xscvdpuxds XT,XB","60@0|T@6|///@11|B@16|328@21|BX@30|TX@31|","" -"VSX Scalar Convert with round to zero Double-Precision to Unsigned Word format XX2-form","xscvdpuxws XT,XB","60@0|T@6|///@11|B@16|72@21|BX@30|TX@31|","" -"VSX Scalar Convert Single-Precision to Double-Precision format XX2-form","xscvspdp XT,XB","60@0|T@6|///@11|B@16|329@21|BX@30|TX@31|","" -"VSX Scalar Convert with round Signed Doubleword to Double-Precision format XX2-form","xscvsxddp XT,XB","60@0|T@6|///@11|B@16|376@21|BX@30|TX@31|","" -"VSX Scalar Convert with round Unsigned Doubleword to Double-Precision format XX2-form","xscvuxddp XT,XB","60@0|T@6|///@11|B@16|360@21|BX@30|TX@31|","" -"VSX Scalar Divide Double-Precision XX3-form","xsdivdp XT,XA,XB","60@0|T@6|A@11|B@16|56@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply-Add Type-A Double-Precision XX3-form","xsmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|33@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply-Add Type-M Double-Precision XX3-form","xsmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|41@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Maximum Double-Precision XX3-form","xsmaxdp XT,XA,XB","60@0|T@6|A@11|B@16|160@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Minimum Double-Precision XX3-form","xsmindp XT,XA,XB","60@0|T@6|A@11|B@16|168@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply-Subtract Type-A Double-Precision XX3-form","xsmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|49@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply-Subtract Type-M Double-Precision XX3-form","xsmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|57@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Multiply Double-Precision XX3-form","xsmuldp XT,XA,XB","60@0|T@6|A@11|B@16|48@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Negative Absolute Double-Precision XX2-form","xsnabsdp XT,XB","60@0|T@6|///@11|B@16|361@21|BX@30|TX@31|","" -"VSX Scalar Negate Double-Precision XX2-form","xsnegdp XT,XB","60@0|T@6|///@11|B@16|377@21|BX@30|TX@31|","" -"VSX Scalar Negative Multiply-Add Type-A Double-Precision XX3-form","xsnmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|161@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Negative Multiply-Add Type-M Double-Precision XX3-form","xsnmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|169@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Negative Multiply-Subtract Type-A Double-Precision XX3-form","xsnmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|177@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Negative Multiply-Subtract Type-M Double-Precision XX3-form","xsnmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|185@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Round to Double-Precision Integer using round to Nearest Away XX2-form","xsrdpi XT,XB","60@0|T@6|///@11|B@16|73@21|BX@30|TX@31|","" -"VSX Scalar Round to Double-Precision Integer exact using Current rounding mode XX2-form","xsrdpic XT,XB","60@0|T@6|///@11|B@16|107@21|BX@30|TX@31|","" -"VSX Scalar Round to Double-Precision Integer using round toward -Infinity XX2-form","xsrdpim XT,XB","60@0|T@6|///@11|B@16|121@21|BX@30|TX@31|","" -"VSX Scalar Round to Double-Precision Integer using round toward +Infinity XX2-form","xsrdpip XT,XB","60@0|T@6|///@11|B@16|105@21|BX@30|TX@31|","" -"VSX Scalar Round to Double-Precision Integer using round toward Zero XX2-form","xsrdpiz XT,XB","60@0|T@6|///@11|B@16|89@21|BX@30|TX@31|","" -"VSX Scalar Reciprocal Estimate Double-Precision XX2-form","xsredp XT,XB","60@0|T@6|///@11|B@16|90@21|BX@30|TX@31|","" -"VSX Scalar Reciprocal Square Root Estimate Double-Precision XX2-form","xsrsqrtedp XT,XB","60@0|T@6|///@11|B@16|74@21|BX@30|TX@31|","" -"VSX Scalar Square Root Double-Precision XX2-form","xssqrtdp XT,XB","60@0|T@6|///@11|B@16|75@21|BX@30|TX@31|","" -"VSX Scalar Subtract Double-Precision XX3-form","xssubdp XT,XA,XB","60@0|T@6|A@11|B@16|40@21|AX@29|BX@30|TX@31|","" -"VSX Scalar Test for software Divide Double-Precision XX3-form","xstdivdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|61@21|AX@29|BX@30|/@31|","" -"VSX Scalar Test for software Square Root Double-Precision XX2-form","xstsqrtdp BF,XB","60@0|BF@6|//@9|///@11|B@16|106@21|BX@30|/@31|","" -"VSX Vector Absolute Value Double-Precision XX2-form","xvabsdp XT,XB","60@0|T@6|///@11|B@16|473@21|BX@30|TX@31|","" -"VSX Vector Absolute Value Single-Precision XX2-form","xvabssp XT,XB","60@0|T@6|///@11|B@16|409@21|BX@30|TX@31|","" -"VSX Vector Add Double-Precision XX3-form","xvadddp XT,XA,XB","60@0|T@6|A@11|B@16|96@21|AX@29|BX@30|TX@31|","" -"VSX Vector Add Single-Precision XX3-form","xvaddsp XT,XA,XB","60@0|T@6|A@11|B@16|64@21|AX@29|BX@30|TX@31|","" -"VSX Vector Compare Equal To Double-Precision XX3-form","xvcmpeqdp XT,XA,XB (Rc=0)|xvcmpeqdp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|99@22|AX@29|BX@30|TX@31|","" -"VSX Vector Compare Equal To Single-Precision XX3-form","xvcmpeqsp XT,XA,XB (Rc=0)|xvcmpeqsp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|67@22|AX@29|BX@30|TX@31|","" -"VSX Vector Compare Greater Than or Equal To Double-Precision XX3-form","xvcmpgedp XT,XA,XB (Rc=0)|xvcmpgedp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|115@22|AX@29|BX@30|TX@31|","" -"VSX Vector Compare Greater Than or Equal To Single-Precision XX3-form","xvcmpgesp XT,XA,XB (Rc=0)|xvcmpgesp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|83@22|AX@29|BX@30|TX@31|","" -"VSX Vector Compare Greater Than Double-Precision XX3-form","xvcmpgtdp XT,XA,XB (Rc=0)|xvcmpgtdp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|107@22|AX@29|BX@30|TX@31|","" -"VSX Vector Compare Greater Than Single-Precision XX3-form","xvcmpgtsp XT,XA,XB (Rc=0)|xvcmpgtsp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|75@22|AX@29|BX@30|TX@31|","" -"VSX Vector Copy Sign Double-Precision XX3-form","xvcpsgndp XT,XA,XB","60@0|T@6|A@11|B@16|240@21|AX@29|BX@30|TX@31|","" -"VSX Vector Copy Sign Single-Precision XX3-form","xvcpsgnsp XT,XA,XB","60@0|T@6|A@11|B@16|208@21|AX@29|BX@30|TX@31|","" -"VSX Vector Convert with round Double-Precision to Single-Precision format XX2-form","xvcvdpsp XT,XB","60@0|T@6|///@11|B@16|393@21|BX@30|TX@31|","" -"VSX Vector Convert with round to zero Double-Precision to Signed Doubleword format XX2-form","xvcvdpsxds XT,XB","60@0|T@6|///@11|B@16|472@21|BX@30|TX@31|","" -"VSX Vector Convert with round to zero Double-Precision to Signed Word format XX2-form","xvcvdpsxws XT,XB","60@0|T@6|///@11|B@16|216@21|BX@30|TX@31|","" -"VSX Vector Convert with round to zero Double-Precision to Unsigned Doubleword format XX2-form","xvcvdpuxds XT,XB","60@0|T@6|///@11|B@16|456@21|BX@30|TX@31|","" -"VSX Vector Convert with round to zero Double-Precision to Unsigned Word format XX2-form","xvcvdpuxws XT,XB","60@0|T@6|///@11|B@16|200@21|BX@30|TX@31|","" -"VSX Vector Convert Single-Precision to Double-Precision format XX2-form","xvcvspdp XT,XB","60@0|T@6|///@11|B@16|457@21|BX@30|TX@31|","" -"VSX Vector Convert with round to zero Single-Precision to Signed Doubleword format XX2-form","xvcvspsxds XT,XB","60@0|T@6|///@11|B@16|408@21|BX@30|TX@31|","" -"VSX Vector Convert with round to zero Single-Precision to Signed Word format XX2-form","xvcvspsxws XT,XB","60@0|T@6|///@11|B@16|152@21|BX@30|TX@31|","" -"VSX Vector Convert with round to zero Single-Precision to Unsigned Doubleword format XX2-form","xvcvspuxds XT,XB","60@0|T@6|///@11|B@16|392@21|BX@30|TX@31|","" -"VSX Vector Convert with round to zero Single-Precision to Unsigned Word format XX2-form","xvcvspuxws XT,XB","60@0|T@6|///@11|B@16|136@21|BX@30|TX@31|","" -"VSX Vector Convert with round Signed Doubleword to Double-Precision format XX2-form","xvcvsxddp XT,XB","60@0|T@6|///@11|B@16|504@21|BX@30|TX@31|","" -"VSX Vector Convert with round Signed Doubleword to Single-Precision format XX2-form","xvcvsxdsp XT,XB","60@0|T@6|///@11|B@16|440@21|BX@30|TX@31|","" -"VSX Vector Convert Signed Word to Double-Precision format XX2-form","xvcvsxwdp XT,XB","60@0|T@6|///@11|B@16|248@21|BX@30|TX@31|","" -"VSX Vector Convert with round Signed Word to Single-Precision format XX2-form","xvcvsxwsp XT,XB","60@0|T@6|///@11|B@16|184@21|BX@30|TX@31|","" -"VSX Vector Convert with round Unsigned Doubleword to Double-Precision format XX2-form","xvcvuxddp XT,XB","60@0|T@6|///@11|B@16|488@21|BX@30|TX@31|","" -"VSX Vector Convert with round Unsigned Doubleword to Single-Precision format XX2-form","xvcvuxdsp XT,XB","60@0|T@6|///@11|B@16|424@21|BX@30|TX@31|","" -"VSX Vector Convert Unsigned Word to Double-Precision format XX2-form","xvcvuxwdp XT,XB","60@0|T@6|///@11|B@16|232@21|BX@30|TX@31|","" -"VSX Vector Convert with round Unsigned Word to Single-Precision format XX2-form","xvcvuxwsp XT,XB","60@0|T@6|///@11|B@16|168@21|BX@30|TX@31|","" -"VSX Vector Divide Double-Precision XX3-form","xvdivdp XT,XA,XB","60@0|T@6|A@11|B@16|120@21|AX@29|BX@30|TX@31|","" -"VSX Vector Divide Single-Precision XX3-form","xvdivsp XT,XA,XB","60@0|T@6|A@11|B@16|88@21|AX@29|BX@30|TX@31|","" -"VSX Vector Multiply-Add Type-A Double-Precision XX3-form","xvmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|97@21|AX@29|BX@30|TX@31|","" -"VSX Vector Multiply-Add Type-A Single-Precision XX3-form","xvmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|65@21|AX@29|BX@30|TX@31|","" -"VSX Vector Multiply-Add Type-M Double-Precision XX3-form","xvmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|105@21|AX@29|BX@30|TX@31|","" -"VSX Vector Multiply-Add Type-M Single-Precision XX3-form","xvmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|73@21|AX@29|BX@30|TX@31|","" -"VSX Vector Maximum Double-Precision XX3-form","xvmaxdp XT,XA,XB","60@0|T@6|A@11|B@16|224@21|AX@29|BX@30|TX@31|","" -"VSX Vector Maximum Single-Precision XX3-form","xvmaxsp XT,XA,XB","60@0|T@6|A@11|B@16|192@21|AX@29|BX@30|TX@31|","" -"VSX Vector Minimum Double-Precision XX3-form","xvmindp XT,XA,XB","60@0|T@6|A@11|B@16|232@21|AX@29|BX@30|TX@31|","" -"VSX Vector Minimum Single-Precision XX3-form","xvminsp XT,XA,XB","60@0|T@6|A@11|B@16|200@21|AX@29|BX@30|TX@31|","" -"VSX Vector Multiply-Subtract Type-A Double-Precision XX3-form","xvmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|113@21|AX@29|BX@30|TX@31|","" -"VSX Vector Multiply-Subtract Type-A Single-Precision XX3-form","xvmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|81@21|AX@29|BX@30|TX@31|","" -"VSX Vector Multiply-Subtract Type-M Double-Precision XX3-form","xvmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|121@21|AX@29|BX@30|TX@31|","" -"VSX Vector Multiply-Subtract Type-M Single-Precision XX3-form","xvmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|89@21|AX@29|BX@30|TX@31|","" -"VSX Vector Multiply Double-Precision XX3-form","xvmuldp XT,XA,XB","60@0|T@6|A@11|B@16|112@21|AX@29|BX@30|TX@31|","" -"VSX Vector Multiply Single-Precision XX3-form","xvmulsp XT,XA,XB","60@0|T@6|A@11|B@16|80@21|AX@29|BX@30|TX@31|","" -"VSX Vector Negative Absolute Double-Precision XX2-form","xvnabsdp XT,XB","60@0|T@6|///@11|B@16|489@21|BX@30|TX@31|","" -"VSX Vector Negative Absolute Single-Precision XX2-form","xvnabssp XT,XB","60@0|T@6|///@11|B@16|425@21|BX@30|TX@31|","" -"VSX Vector Negate Double-Precision XX2-form","xvnegdp XT,XB","60@0|T@6|///@11|B@16|505@21|BX@30|TX@31|","" -"VSX Vector Negate Single-Precision XX2-form","xvnegsp XT,XB","60@0|T@6|///@11|B@16|441@21|BX@30|TX@31|","" -"VSX Vector Negative Multiply-Add Type-A Double-Precision XX3-form","xvnmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|225@21|AX@29|BX@30|TX@31|","" -"VSX Vector Negative Multiply-Add Type-A Single-Precision XX3-form","xvnmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|193@21|AX@29|BX@30|TX@31|","" -"VSX Vector Negative Multiply-Add Type-M Double-Precision XX3-form","xvnmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|233@21|AX@29|BX@30|TX@31|","" -"VSX Vector Negative Multiply-Add Type-M Single-Precision XX3-form","xvnmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|201@21|AX@29|BX@30|TX@31|","" -"VSX Vector Negative Multiply-Subtract Type-A Double-Precision XX3-form","xvnmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|241@21|AX@29|BX@30|TX@31|","" -"VSX Vector Negative Multiply-Subtract Type-A Single-Precision XX3-form","xvnmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|209@21|AX@29|BX@30|TX@31|","" -"VSX Vector Negative Multiply-Subtract Type-M Double-Precision XX3-form","xvnmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|249@21|AX@29|BX@30|TX@31|","" -"VSX Vector Negative Multiply-Subtract Type-M Single-Precision XX3-form","xvnmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|217@21|AX@29|BX@30|TX@31|","" -"VSX Vector Round to Double-Precision Integer using round to Nearest Away XX2-form","xvrdpi XT,XB","60@0|T@6|///@11|B@16|201@21|BX@30|TX@31|","" -"VSX Vector Round to Double-Precision Integer Exact using Current rounding mode XX2-form","xvrdpic XT,XB","60@0|T@6|///@11|B@16|235@21|BX@30|TX@31|","" -"VSX Vector Round to Double-Precision Integer using round toward -Infinity XX2-form","xvrdpim XT,XB","60@0|T@6|///@11|B@16|249@21|BX@30|TX@31|","" -"VSX Vector Round to Double-Precision Integer using round toward +Infinity XX2-form","xvrdpip XT,XB","60@0|T@6|///@11|B@16|233@21|BX@30|TX@31|","" -"VSX Vector Round to Double-Precision Integer using round toward Zero XX2-form","xvrdpiz XT,XB","60@0|T@6|///@11|B@16|217@21|BX@30|TX@31|","" -"VSX Vector Reciprocal Estimate Double-Precision XX2-form","xvredp XT,XB","60@0|T@6|///@11|B@16|218@21|BX@30|TX@31|","" -"VSX Vector Reciprocal Estimate Single-Precision XX2-form","xvresp XT,XB","60@0|T@6|///@11|B@16|154@21|BX@30|TX@31|","" -"VSX Vector Round to Single-Precision Integer using round to Nearest Away XX2-form","xvrspi XT,XB","60@0|T@6|///@11|B@16|137@21|BX@30|TX@31|","" -"VSX Vector Round to Single-Precision Integer Exact using Current rounding mode XX2-form","xvrspic XT,XB","60@0|T@6|///@11|B@16|171@21|BX@30|TX@31|","" -"VSX Vector Round to Single-Precision Integer using round toward -Infinity XX2-form","xvrspim XT,XB","60@0|T@6|///@11|B@16|185@21|BX@30|TX@31|","" -"VSX Vector Round to Single-Precision Integer using round toward +Infinity XX2-form","xvrspip XT,XB","60@0|T@6|///@11|B@16|169@21|BX@30|TX@31|","" -"VSX Vector Round to Single-Precision Integer using round toward Zero XX2-form","xvrspiz XT,XB","60@0|T@6|///@11|B@16|153@21|BX@30|TX@31|","" -"VSX Vector Reciprocal Square Root Estimate Double-Precision XX2-form","xvrsqrtedp XT,XB","60@0|T@6|///@11|B@16|202@21|BX@30|TX@31|","" -"VSX Vector Reciprocal Square Root Estimate Single-Precision XX2-form","xvrsqrtesp XT,XB","60@0|T@6|///@11|B@16|138@21|BX@30|TX@31|","" -"VSX Vector Square Root Double-Precision XX2-form","xvsqrtdp XT,XB","60@0|T@6|///@11|B@16|203@21|BX@30|TX@31|","" -"VSX Vector Square Root Single-Precision XX2-form","xvsqrtsp XT,XB","60@0|T@6|///@11|B@16|139@21|BX@30|TX@31|","" -"VSX Vector Subtract Double-Precision XX3-form","xvsubdp XT,XA,XB","60@0|T@6|A@11|B@16|104@21|AX@29|BX@30|TX@31|","" -"VSX Vector Subtract Single-Precision XX3-form","xvsubsp XT,XA,XB","60@0|T@6|A@11|B@16|72@21|AX@29|BX@30|TX@31|","" -"VSX Vector Test for software Divide Double-Precision XX3-form","xvtdivdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|125@21|AX@29|BX@30|/@31|","" -"VSX Vector Test for software Divide Single-Precision XX3-form","xvtdivsp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|93@21|AX@29|BX@30|/@31|","" -"VSX Vector Test for software Square Root Double-Precision XX2-form","xvtsqrtdp BF,XB","60@0|BF@6|//@9|///@11|B@16|234@21|BX@30|/@31|","" -"VSX Vector Test for software Square Root Single-Precision XX2-form","xvtsqrtsp BF,XB","60@0|BF@6|//@9|///@11|B@16|170@21|BX@30|/@31|","" -"VSX Vector Logical AND XX3-form","xxland XT,XA,XB","60@0|T@6|A@11|B@16|130@21|AX@29|BX@30|TX@31|","" -"VSX Vector Logical AND with Complement XX3-form","xxlandc XT,XA,XB","60@0|T@6|A@11|B@16|138@21|AX@29|BX@30|TX@31|","" -"VSX Vector Logical NOR XX3-form","xxlnor XT,XA,XB","60@0|T@6|A@11|B@16|162@21|AX@29|BX@30|TX@31|","" -"VSX Vector Logical OR XX3-form","xxlor XT,XA,XB","60@0|T@6|A@11|B@16|146@21|AX@29|BX@30|TX@31|","" -"VSX Vector Logical XOR XX3-form","xxlxor XT,XA,XB","60@0|T@6|A@11|B@16|154@21|AX@29|BX@30|TX@31|","" -"VSX Vector Merge High Word XX3-form","xxmrghw XT,XA,XB","60@0|T@6|A@11|B@16|18@21|AX@29|BX@30|TX@31|","" -"VSX Vector Merge Low Word XX3-form","xxmrglw XT,XA,XB","60@0|T@6|A@11|B@16|50@21|AX@29|BX@30|TX@31|","" -"VSX Vector Permute Doubleword Immediate XX3-form","xxpermdi XT,XA,XB,DM","60@0|T@6|A@11|B@16|0@21|DM@22|10@24|AX@29|BX@30|TX@31|","" -"VSX Vector Select XX4-form","xxsel XT,XA,XB,XC","60@0|T@6|A@11|B@16|C@21|3@26|CX@28|AX@29|BX@30|TX@31|","" -"VSX Vector Shift Left Double by Word Immediate XX3-form","xxsldwi XT,XA,XB,SHW","60@0|T@6|A@11|B@16|0@21|SHW@22|2@24|AX@29|BX@30|TX@31|","" -"VSX Vector Splat Word XX2-form","xxspltw XT,XB,UIM","60@0|T@6|///@11|UIM@14|B@16|164@21|BX@30|TX@31|","" -"Compare Bytes X-form","cmpb RA,RS,RB","31@0|RS@6|RA@11|RB@16|508@21|/@31|","" -"DFP Add X-form","dadd FRT,FRA,FRB (Rc=0)|dadd. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|2@21|Rc@31|","" -"DFP Add Quad X-form","daddq FRTp,FRAp,FRBp (Rc=0)|daddq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|2@21|Rc@31|","" -"DFP Convert From Fixed Quad X-form","dcffixq FRTp,FRB (Rc=0)|dcffixq. FRTp,FRB (Rc=1)","63@0|FRTp@6|///@11|FRB@16|802@21|Rc@31|","" -"DFP Compare Ordered X-form","dcmpo BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|130@21|/@31|","" -"DFP Compare Ordered Quad X-form","dcmpoq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|130@21|/@31|","" -"DFP Compare Unordered X-form","dcmpu BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|642@21|/@31|","" -"DFP Compare Unordered Quad X-form","dcmpuq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|642@21|/@31|","" -"DFP Convert To DFP Long X-form","dctdp FRT,FRB (Rc=0)|dctdp. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|258@21|Rc@31|","" -"DFP Convert To Fixed X-form","dctfix FRT,FRB (Rc=0)|dctfix. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|290@21|Rc@31|","" -"DFP Convert To Fixed Quad X-form","dctfixq FRT,FRBp (Rc=0)|dctfixq. FRT,FRBp (Rc=1)","63@0|FRT@6|///@11|FRBp@16|290@21|Rc@31|","" -"DFP Convert To DFP Extended X-form","dctqpq FRTp,FRB (Rc=0)|dctqpq. FRTp,FRB (Rc=1)","63@0|FRTp@6|///@11|FRB@16|258@21|Rc@31|","" -"DFP Decode DPD To BCD X-form","ddedpd SP,FRT,FRB (Rc=0)|ddedpd. SP,FRT,FRB (Rc=1)","59@0|FRT@6|SP@11|///@13|FRB@16|322@21|Rc@31|","" -"DFP Decode DPD To BCD Quad X-form","ddedpdq SP,FRTp,FRBp (Rc=0)|ddedpdq. SP,FRTp,FRBp (Rc=1)","63@0|FRTp@6|SP@11|///@13|FRBp@16|322@21|Rc@31|","" -"DFP Divide X-form","ddiv FRT,FRA,FRB (Rc=0)|ddiv. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|546@21|Rc@31|","" -"DFP Divide Quad X-form","ddivq FRTp,FRAp,FRBp (Rc=0)|ddivq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|546@21|Rc@31|","" -"DFP Encode BCD To DPD X-form","denbcd S,FRT,FRB (Rc=0)|denbcd. S,FRT,FRB (Rc=1)","59@0|FRT@6|S@11|///@12|FRB@16|834@21|Rc@31|","" -"DFP Encode BCD To DPD Quad X-form","denbcdq S,FRTp,FRBp (Rc=0)|denbcdq. S,FRTp,FRBp (Rc=1)","63@0|FRTp@6|S@11|///@12|FRBp@16|834@21|Rc@31|","" -"DFP Insert Biased Exponent X-form","diex FRT,FRA,FRB (Rc=0)|diex. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|866@21|Rc@31|","" -"DFP Insert Biased Exponent Quad X-form","diexq FRTp,FRA,FRBp|diexq. FRTp,FRA,FRBp (Rc=1)","63@0|FRTp@6|FRA@11|FRBp@16|866@21|Rc@31|","" -"DFP Multiply X-form","dmul FRT,FRA,FRB (Rc=0)|dmul. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|34@21|Rc@31|","" -"DFP Multiply Quad X-form","dmulq FRTp,FRAp,FRBp (Rc=0)|dmulq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|34@21|Rc@31|","" -"DFP Quantize Z23-form","dqua FRT,FRA,FRB,RMC (Rc=0)|dqua. FRT,FRA,FRB,RMC (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|RMC@21|3@23|Rc@31|","" -"DFP Quantize Immediate Z23-form","dquai TE,FRT,FRB,RMC (Rc=0)|dquai. TE,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|TE@11|FRB@16|RMC@21|67@23|Rc@31|","" -"DFP Quantize Immediate Quad Z23-form","dquaiq TE,FRTp,FRBp,RMC (Rc=0)|dquaiq. TE,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|TE@11|FRBp@16|RMC@21|67@23|Rc@31|","" -"DFP Quantize Quad Z23-form","dquaq FRTp,FRAp,FRBp,RMC (Rc=0)|dquaq. FRTp,FRAp,FRBp,RMC (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|RMC@21|3@23|Rc@31|","" -"DFP Round To DFP Long X-form","drdpq FRTp,FRBp (Rc=0)|drdpq. FRTp,FRBp (Rc=1)","63@0|FRTp@6|///@11|FRBp@16|770@21|Rc@31|","" -"DFP Round To FP Integer Without Inexact Z23-form","drintn R,FRT,FRB,RMC (Rc=0)|drintn. R,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|///@11|R@15|FRB@16|RMC@21|227@23|Rc@31|","" -"DFP Round To FP Integer Without Inexact Quad Z23-form","drintnq R,FRTp,FRBp,RMC (Rc=0)|drintnq. R,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|///@11|R@15|FRBp@16|RMC@21|227@23|Rc@31|","" -"DFP Round To FP Integer With Inexact Z23-form","drintx R,FRT,FRB,RMC (Rc=0)|drintx. R,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|///@11|R@15|FRB@16|RMC@21|99@23|Rc@31|","" -"DFP Round To FP Integer With Inexact Quad Z23-form","drintxq R,FRTp,FRBp,RMC (Rc=0)|drintxq. R,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|///@11|R@15|FRBp@16|RMC@21|99@23|Rc@31|","" -"DFP Reround Z23-form","drrnd FRT,FRA,FRB,RMC (Rc=0)|drrnd. FRT,FRA,FRB,RMC (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|RMC@21|35@23|Rc@31|","" -"DFP Reround Quad Z23-form","drrndq FRTp,FRA,FRBp,RMC (Rc=0)|drrndq. FRTp,FRA,FRBp,RMC (Rc=1)","63@0|FRTp@6|FRA@11|FRBp@16|RMC@21|35@23|Rc@31|","" -"DFP Round To DFP Short X-form","drsp FRT,FRB (Rc=0)|drsp. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|770@21|Rc@31|","" -"DFP Shift Significand Left Immediate Z22-form","dscli FRT,FRA,SH (Rc=0)|dscli. FRT,FRA,SH (Rc=1)","59@0|FRT@6|FRA@11|SH@16|66@22|Rc@31|","" -"DFP Shift Significand Left Immediate Quad Z22-form","dscliq FRTp,FRAp,SH (Rc=0)|dscliq. FRTp,FRAp,SH (Rc=1)","63@0|FRTp@6|FRAp@11|SH@16|66@22|Rc@31|","" -"DFP Shift Significand Right Immediate Z22-form","dscri FRT,FRA,SH (Rc=0)|dscri. FRT,FRA,SH (Rc=1)","59@0|FRT@6|FRA@11|SH@16|98@22|Rc@31|","" -"DFP Shift Significand Right Immediate Quad Z22-form","dscriq FRTp,FRAp,SH (Rc=0)|dscriq. FRTp,FRAp,SH (Rc=1)","63@0|FRTp@6|FRAp@11|SH@16|98@22|Rc@31|","" -"DFP Subtract X-form","dsub FRT,FRA,FRB (Rc=0)|dsub. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|514@21|Rc@31|","" -"DFP Subtract Quad X-form","dsubq FRTp,FRAp,FRBp (Rc=0)|dsubq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|514@21|Rc@31|","" -"DFP Test Data Class Z22-form","dtstdc BF,FRA,DCM","59@0|BF@6|//@9|FRA@11|DCM@16|194@22|/@31|","" -"DFP Test Data Class Quad Z22-form","dtstdcq BF,FRAp,DCM","63@0|BF@6|//@9|FRAp@11|DCM@16|194@22|/@31|","" -"DFP Test Data Group Z22-form","dtstdg BF,FRA,DGM","59@0|BF@6|//@9|FRA@11|DGM@16|226@22|/@31|","" -"DFP Test Data Group Quad Z22-form","dtstdgq BF,FRAp,DGM","63@0|BF@6|//@9|FRAp@11|DGM@16|226@22|/@31|","" -"DFP Test Exponent X-form","dtstex BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|162@21|/@31|","" -"DFP Test Exponent Quad X-form","dtstexq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|162@21|/@31|","" -"DFP Test Significance X-form","dtstsf BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|674@21|/@31|","" -"DFP Test Significance Quad X-form","dtstsfq BF,FRA,FRBp","63@0|BF@6|//@9|FRA@11|FRBp@16|674@21|/@31|","" -"DFP Extract Biased Exponent X-form","dxex FRT,FRB (Rc=0)|dxex. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|354@21|Rc@31|","" -"DFP Extract Biased Exponent Quad X-form","dxexq FRT,FRBp (Rc=0)|dxexq. FRT,FRBp (Rc=1)","63@0|FRT@6|///@11|FRBp@16|354@21|Rc@31|","" -"Floating Copy Sign X-form","fcpsgn FRT, FRA, FRB (Rc=0)|fcpsgn. FRT, FRA, FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|8@21|Rc@31|","" -"Load Byte & Zero Caching Inhibited Indexed X-form","lbzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|853@21|/@31|","" -"Load Doubleword Caching Inhibited Indexed X-form","ldcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|885@21|/@31|","" -"Load Floating-Point Double Pair DS-form","lfdp FRTp,DS(RA)","57@0|FRTp@6|RA@11|DS@16|0@30|","" -"Load Floating-Point Double Pair Indexed X-form","lfdpx FRTp,RA,RB","31@0|FRTp@6|RA@11|RB@16|791@21|/@31|","" -"Load Floating-Point as Integer Word Algebraic Indexed X-form","lfiwax FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|855@21|/@31|","" -"Load Halfword & Zero Caching Inhibited Indexed X-form","lhzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|821@21|/@31|","" -"Load Word & Zero Caching Inhibited Indexed X-form","lwzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|789@21|/@31|","" -"Parity Doubleword X-form","prtyd RA,RS","31@0|RS@6|RA@11|///@16|186@21|/@31|","" -"Parity Word X-form","prtyw RA,RS","31@0|RS@6|RA@11|///@16|154@21|/@31|","" -"SLB Find Entry ESID X-form","slbfee. RT,RB","31@0|RT@6|///@11|RB@16|979@21|1@31|","" -"Store Byte Caching Inhibited Indexed X-form","stbcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|981@21|/@31|","" -"Store Doubleword Caching Inhibited Indexed X-form","stdcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|1013@21|/@31|","" -"Store Floating-Point Double Pair DS-form","stfdp FRSp,DS(RA)","61@0|FRSp@6|RA@11|DS@16|0@30|","" -"Store Floating-Point Double Pair Indexed X-form","stfdpx FRSp,RA,RB","31@0|FRSp@6|RA@11|RB@16|919@21|/@31|","" -"Store Halfword Caching Inhibited Indexed X-form","sthcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|949@21|/@31|","" -"Store Word Caching Inhibited Indexed X-form","stwcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|917@21|/@31|","" -"Integer Select A-form","isel RT,RA,RB,BC","31@0|RT@6|RA@11|RB@16|BC@21|15@26|/@31|","" -"Load Vector Element Byte Indexed X-form","lvebx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|7@21|/@31|","" -"Load Vector Element Halfword Indexed X-form","lvehx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|39@21|/@31|","" -"Load Vector Element Word Indexed X-form","lvewx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|71@21|/@31|","" -"Load Vector for Shift Left Indexed X-form","lvsl VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|6@21|/@31|","" -"Load Vector for Shift Right Indexed X-form","lvsr VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|38@21|/@31|","" -"Load Vector Indexed X-form","lvx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|103@21|/@31|","" -"Load Vector Indexed Last X-form","lvxl VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|359@21|/@31|","" -"Move From Vector Status and Control Register VX-form","mfvscr VRT","4@0|VRT@6|///@11|///@16|1540@21|","" -"Move To Vector Status and Control Register VX-form","mtvscr VRB","4@0|///@6|///@11|VRB@16|1604@21|","" -"Store Vector Element Byte Indexed X-form","stvebx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|135@21|/@31|","" -"Store Vector Element Halfword Indexed X-form","stvehx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|167@21|/@31|","" -"Store Vector Element Word Indexed X-form","stvewx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|199@21|/@31|","" -"Store Vector Indexed X-form","stvx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|231@21|/@31|","" -"Store Vector Indexed Last X-form","stvxl VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|487@21|/@31|","" -"TLB Invalidate Entry Local X-form","tlbiel RB,RS,RIC,PRS,R","31@0|RS@6|/@11|RIC@12|PRS@14|R@15|RB@16|274@21|/@31|","" -"Vector Add & write Carry Unsigned Word VX-form","vaddcuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|384@21|","" -"Vector Add Floating-Point VX-form","vaddfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|10@21|","" -"Vector Add Signed Byte Saturate VX-form","vaddsbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|768@21|","" -"Vector Add Signed Halfword Saturate VX-form","vaddshs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|832@21|","" -"Vector Add Signed Word Saturate VX-form","vaddsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|896@21|","" -"Vector Add Unsigned Byte Modulo VX-form","vaddubm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|0@21|","" -"Vector Add Unsigned Byte Saturate VX-form","vaddubs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|512@21|","" -"Vector Add Unsigned Halfword Modulo VX-form","vadduhm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|64@21|","" -"Vector Add Unsigned Halfword Saturate VX-form","vadduhs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|576@21|","" -"Vector Add Unsigned Word Modulo VX-form","vadduwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|128@21|","" -"Vector Add Unsigned Word Saturate VX-form","vadduws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|640@21|","" -"Vector Logical AND VX-form","vand VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1028@21|","" -"Vector Logical AND with Complement VX-form","vandc VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1092@21|","" -"Vector Average Signed Byte VX-form","vavgsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1282@21|","" -"Vector Average Signed Halfword VX-form","vavgsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1346@21|","" -"Vector Average Signed Word VX-form","vavgsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1410@21|","" -"Vector Average Unsigned Byte VX-form","vavgub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1026@21|","" -"Vector Average Unsigned Halfword VX-form","vavguh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1090@21|","" -"Vector Average Unsigned Word VX-form","vavguw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1154@21|","" -"Vector Convert with round to nearest From Signed Word to floating-point format VX-form","vcfsx VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|842@21|","" -"Vector Convert with round to nearest From Unsigned Word to floating-point format VX-form","vcfux VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|778@21|","" -"Vector Compare Bounds Floating-Point VC-form","vcmpbfp VRT,VRA,VRB (Rc=0)|vcmpbfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|966@22|","" -"Vector Compare Equal Floating-Point VC-form","vcmpeqfp VRT,VRA,VRB (Rc=0)|vcmpeqfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|198@22|","" -"Vector Compare Equal Unsigned Byte VC-form","vcmpequb VRT,VRA,VRB (Rc=0)|vcmpequb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|6@22|","" -"Vector Compare Equal Unsigned Halfword VC-form","vcmpequh VRT,VRA,VRB (Rc=0)|vcmpequh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|70@22|","" -"Vector Compare Equal Unsigned Word VC-form","vcmpequw VRT,VRA,VRB (Rc=0)|vcmpequw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|134@22|","" -"Vector Compare Greater Than or Equal Floating-Point VC-form","vcmpgefp VRT,VRA,VRB (Rc=0)|vcmpgefp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|454@22|","" -"Vector Compare Greater Than Floating-Point VC-form","vcmpgtfp VRT,VRA,VRB (Rc=0)|vcmpgtfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|710@22|","" -"Vector Compare Greater Than Signed Byte VC-form","vcmpgtsb VRT,VRA,VRB (Rc=0)|vcmpgtsb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|774@22|","" -"Vector Compare Greater Than Signed Halfword VC-form","vcmpgtsh VRT,VRA,VRB (Rc=0)|vcmpgtsh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|838@22|","" -"Vector Compare Greater Than Signed Word VC-form","vcmpgtsw VRT,VRA,VRB (Rc=0)|vcmpgtsw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|902@22|","" -"Vector Compare Greater Than Unsigned Byte VC-form","vcmpgtub VRT,VRA,VRB (Rc=0)|vcmpgtub. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|518@22|","" -"Vector Compare Greater Than Unsigned Halfword VC-form","vcmpgtuh VRT,VRA,VRB (Rc=0)|vcmpgtuh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|582@22|","" -"Vector Compare Greater Than Unsigned Word VC-form","vcmpgtuw VRT,VRA,VRB (Rc=0)|vcmpgtuw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|646@22|","" -"Vector Convert with round to zero from floating-point To Signed Word format Saturate VX-form","vctsxs VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|970@21|","" -"Vector Convert with round to zero from floating-point To Unsigned Word format Saturate VX-form","vctuxs VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|906@21|","" -"Vector 2 Raised to the Exponent Estimate Floating-Point VX-form","vexptefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|394@21|","" -"Vector Log Base 2 Estimate Floating-Point VX-form","vlogefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|458@21|","" -"Vector Multiply-Add Floating-Point VA-form","vmaddfp VRT,VRA,VRC,VRB","4@0|VRT@6|VRA@11|VRB@16|VRC@21|46@26|","" -"Vector Maximum Floating-Point VX-form","vmaxfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1034@21|","" -"Vector Maximum Signed Byte VX-form","vmaxsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|258@21|","" -"Vector Maximum Signed Halfword VX-form","vmaxsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|322@21|","" -"Vector Maximum Signed Word VX-form","vmaxsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|386@21|","" -"Vector Maximum Unsigned Byte VX-form","vmaxub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|2@21|","" -"Vector Maximum Unsigned Halfword VX-form","vmaxuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|66@21|","" -"Vector Maximum Unsigned Word VX-form","vmaxuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|130@21|","" -"Vector Multiply-High-Add Signed Halfword Saturate VA-form","vmhaddshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|32@26|","" -"Vector Multiply-High-Round-Add Signed Halfword Saturate VA-form","vmhraddshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|33@26|","" -"Vector Minimum Floating-Point VX-form","vminfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1098@21|","" -"Vector Minimum Signed Byte VX-form","vminsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|770@21|","" -"Vector Minimum Signed Halfword VX-form","vminsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|834@21|","" -"Vector Minimum Signed Word VX-form","vminsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|898@21|","" -"Vector Minimum Unsigned Byte VX-form","vminub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|514@21|","" -"Vector Minimum Unsigned Halfword VX-form","vminuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|578@21|","" -"Vector Minimum Unsigned Word VX-form","vminuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|642@21|","" -"Vector Multiply-Low-Add Unsigned Halfword Modulo VA-form","vmladduhm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|34@26|","" -"Vector Merge High Byte VX-form","vmrghb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|12@21|","" -"Vector Merge High Halfword VX-form","vmrghh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|76@21|","" -"Vector Merge High Word VX-form","vmrghw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|140@21|","" -"Vector Merge Low Byte VX-form","vmrglb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|268@21|","" -"Vector Merge Low Halfword VX-form","vmrglh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|332@21|","" -"Vector Merge Low Word VX-form","vmrglw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|396@21|","" -"Vector Multiply-Sum Mixed Byte Modulo VA-form","vmsummbm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|37@26|","" -"Vector Multiply-Sum Signed Halfword Modulo VA-form","vmsumshm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|40@26|","" -"Vector Multiply-Sum Signed Halfword Saturate VA-form","vmsumshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|41@26|","" -"Vector Multiply-Sum Unsigned Byte Modulo VA-form","vmsumubm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|36@26|","" -"Vector Multiply-Sum Unsigned Halfword Modulo VA-form","vmsumuhm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|38@26|","" -"Vector Multiply-Sum Unsigned Halfword Saturate VA-form","vmsumuhs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|39@26|","" -"Vector Multiply Even Signed Byte VX-form","vmulesb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|776@21|","" -"Vector Multiply Even Signed Halfword VX-form","vmulesh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|840@21|","" -"Vector Multiply Even Unsigned Byte VX-form","vmuleub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|520@21|","" -"Vector Multiply Even Unsigned Halfword VX-form","vmuleuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|584@21|","" -"Vector Multiply Odd Signed Byte VX-form","vmulosb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|264@21|","" -"Vector Multiply Odd Signed Halfword VX-form","vmulosh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|328@21|","" -"Vector Multiply Odd Unsigned Byte VX-form","vmuloub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|8@21|","" -"Vector Multiply Odd Unsigned Halfword VX-form","vmulouh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|72@21|","" -"Vector Negative Multiply-Subtract Floating-Point VA-form","vnmsubfp VRT,VRA,VRC,VRB","4@0|VRT@6|VRA@11|VRB@16|VRC@21|47@26|","" -"Vector Logical NOR VX-form","vnor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1284@21|","" -"Vector Logical OR VX-form","vor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1156@21|","" -"Vector Permute VA-form","vperm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|43@26|","" -"Vector Pack Pixel VX-form","vpkpx VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|782@21|","" -"Vector Pack Signed Halfword Signed Saturate VX-form","vpkshss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|398@21|","" -"Vector Pack Signed Halfword Unsigned Saturate VX-form","vpkshus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|270@21|","" -"Vector Pack Signed Word Signed Saturate VX-form","vpkswss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|462@21|","" -"Vector Pack Signed Word Unsigned Saturate VX-form","vpkswus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|334@21|","" -"Vector Pack Unsigned Halfword Unsigned Modulo VX-form","vpkuhum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|14@21|","" -"Vector Pack Unsigned Halfword Unsigned Saturate VX-form","vpkuhus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|142@21|","" -"Vector Pack Unsigned Word Unsigned Modulo VX-form","vpkuwum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|78@21|","" -"Vector Pack Unsigned Word Unsigned Saturate VX-form","vpkuwus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|206@21|","" -"Vector Reciprocal Estimate Floating-Point VX-form","vrefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|266@21|","" -"Vector Round to Floating-Point Integer toward -Infinity VX-form","vrfim VRT,VRB","4@0|VRT@6|///@11|VRB@16|714@21|","" -"Vector Round to Floating-Point Integer Nearest VX-form","vrfin VRT,VRB","4@0|VRT@6|///@11|VRB@16|522@21|","" -"Vector Round to Floating-Point Integer toward +Infinity VX-form","vrfip VRT,VRB","4@0|VRT@6|///@11|VRB@16|650@21|","" -"Vector Round to Floating-Point Integer toward Zero VX-form","vrfiz VRT,VRB","4@0|VRT@6|///@11|VRB@16|586@21|","" -"Vector Rotate Left Byte VX-form","vrlb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|4@21|","" -"Vector Rotate Left Halfword VX-form","vrlh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|68@21|","" -"Vector Rotate Left Word VX-form","vrlw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|132@21|","" -"Vector Reciprocal Square Root Estimate Floating-Point VX-form","vrsqrtefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|330@21|","" -"Vector Select VA-form","vsel VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|42@26|","" -"Vector Shift Left VX-form","vsl VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|452@21|","" -"Vector Shift Left Byte VX-form","vslb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|260@21|","" -"Vector Shift Left Double by Octet Immediate VA-form","vsldoi VRT,VRA,VRB,SHB","4@0|VRT@6|VRA@11|VRB@16|/@21|SHB@22|44@26|","" -"Vector Shift Left Halfword VX-form","vslh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|324@21|","" -"Vector Shift Left by Octet VX-form","vslo VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1036@21|","" -"Vector Shift Left Word VX-form","vslw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|388@21|","" -"Vector Splat Byte VX-form","vspltb VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|524@21|","" -"Vector Splat Halfword VX-form","vsplth VRT,VRB,UIM","4@0|VRT@6|//@11|UIM@13|VRB@16|588@21|","" -"Vector Splat Immediate Signed Byte VX-form","vspltisb VRT,SIM","4@0|VRT@6|SIM@11|///@16|780@21|","" -"Vector Splat Immediate Signed Halfword VX-form","vspltish VRT,SIM","4@0|VRT@6|SIM@11|///@16|844@21|","" -"Vector Splat Immediate Signed Word VX-form","vspltisw VRT,SIM","4@0|VRT@6|SIM@11|///@16|908@21|","" -"Vector Splat Word VX-form","vspltw VRT,VRB,UIM","4@0|VRT@6|///@11|UIM@14|VRB@16|652@21|","" -"Vector Shift Right VX-form","vsr VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|708@21|","" -"Vector Shift Right Algebraic Byte VX-form","vsrab VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|772@21|","" -"Vector Shift Right Algebraic Halfword VX-form","vsrah VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|836@21|","" -"Vector Shift Right Algebraic Word VX-form","vsraw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|900@21|","" -"Vector Shift Right Byte VX-form","vsrb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|516@21|","" -"Vector Shift Right Halfword VX-form","vsrh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|580@21|","" -"Vector Shift Right by Octet VX-form","vsro VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1100@21|","" -"Vector Shift Right Word VX-form","vsrw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|644@21|","" -"Vector Subtract & Write Carry-out Unsigned Word VX-form","vsubcuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1408@21|","" -"Vector Subtract Floating-Point VX-form","vsubfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|74@21|","" -"Vector Subtract Signed Byte Saturate VX-form","vsubsbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1792@21|","" -"Vector Subtract Signed Halfword Saturate VX-form","vsubshs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1856@21|","" -"Vector Subtract Signed Word Saturate VX-form","vsubsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1920@21|","" -"Vector Subtract Unsigned Byte Modulo VX-form","vsububm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1024@21|","" -"Vector Subtract Unsigned Byte Saturate VX-form","vsububs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1536@21|","" -"Vector Subtract Unsigned Halfword Modulo VX-form","vsubuhm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1088@21|","" -"Vector Subtract Unsigned Halfword Saturate VX-form","vsubuhs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1600@21|","" -"Vector Subtract Unsigned Word Modulo VX-form","vsubuwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1152@21|","" -"Vector Subtract Unsigned Word Saturate VX-form","vsubuws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1664@21|","" -"Vector Sum across Half Signed Word Saturate VX-form","vsum2sws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1672@21|","" -"Vector Sum across Quarter Signed Byte Saturate VX-form","vsum4sbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1800@21|","" -"Vector Sum across Quarter Signed Halfword Saturate VX-form","vsum4shs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1608@21|","" -"Vector Sum across Quarter Unsigned Byte Saturate VX-form","vsum4ubs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1544@21|","" -"Vector Sum across Signed Word Saturate VX-form","vsumsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1928@21|","" -"Vector Unpack High Pixel VX-form","vupkhpx VRT,VRB","4@0|VRT@6|///@11|VRB@16|846@21|","" -"Vector Unpack High Signed Byte VX-form","vupkhsb VRT,VRB","4@0|VRT@6|///@11|VRB@16|526@21|","" -"Vector Unpack High Signed Halfword VX-form","vupkhsh VRT,VRB","4@0|VRT@6|///@11|VRB@16|590@21|","" -"Vector Unpack Low Pixel VX-form","vupklpx VRT,VRB","4@0|VRT@6|///@11|VRB@16|974@21|","" -"Vector Unpack Low Signed Byte VX-form","vupklsb VRT,VRB","4@0|VRT@6|///@11|VRB@16|654@21|","" -"Vector Unpack Low Signed Halfword VX-form","vupklsh VRT,VRB","4@0|VRT@6|///@11|VRB@16|718@21|","" -"Vector Logical XOR VX-form","vxor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1220@21|","" -"Floating Reciprocal Estimate A-form","fre FRT,FRB (Rc=0)|fre. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|24@26|Rc@31|","" -"Floating Round to Integer Minus X-form","frim FRT,FRB (Rc=0)|frim. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|488@21|Rc@31|","" -"Floating Round to Integer Nearest X-form","frin FRT,FRB (Rc=0)|frin. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|392@21|Rc@31|","" -"Floating Round to Integer Plus X-form","frip FRT,FRB (Rc=0)|frip. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|456@21|Rc@31|","" -"Floating Round to Integer Toward Zero X-form","friz FRT,FRB (Rc=0)|friz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|424@21|Rc@31|","" -"Floating Reciprocal Square Root Estimate Single A-form","frsqrtes FRT,FRB (Rc=0)|frsqrtes. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|26@26|Rc@31|","" -"Return From Interrupt Doubleword Hypervisor XL-form","hrfid","19@0|///@6|///@11|///@16|274@21|/@31|","" -"Population Count Bytes X-form","popcntb RA, RS","31@0|RS@6|RA@11|///@16|122@21|/@31|","" -"Move From One Condition Register Field XFX-form","mfocrf RT,FXM","31@0|RT@6|1@11|FXM@12|/@20|19@21|/@31|","" -"Move To One Condition Register Field XFX-form","mtocrf FXM,RS","31@0|RS@6|1@11|FXM@12|/@20|144@21|/@31|","" -"SLB Move From Entry ESID X-form","slbmfee RT,RB","31@0|RT@6|///@11|L@15|RB@16|915@21|/@31|","" -"SLB Move From Entry VSID X-form","slbmfev RT,RB","31@0|RT@6|///@11|L@15|RB@16|851@21|/@31|","" -"SLB Move To Entry X-form","slbmte RS,RB","31@0|RS@6|///@11|RB@16|402@21|/@31|","" -"Return From System Call Vectored XL-form","rfscv","19@0|///@6|///@11|///@16|82@21|/@31|","" -"System Call Vectored SC-form","scv LEV","17@0|///@6|///@11|///@16|LEV@20|///@27|0@30|1@31|","" -"Load Quadword DQ-form","lq RTp,DQ(RA)","56@0|RTp@6|RA@11|DQ@16|///@28|","" -"Store Quadword DS-form","stq RSp,DS(RA)","62@0|RSp@6|RA@11|DS@16|2@30|","" -"Count Leading Zeros Doubleword X-form","cntlzd RA,RS (Rc=0)|cntlzd. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|58@21|Rc@31|","" -"Data Cache Block Flush X-form","dcbf RA,RB,L","31@0|//@6|L@8|RA@11|RB@16|86@21|/@31|","" -"Data Cache Block Store X-form","dcbst RA,RB","31@0|///@6|RA@11|RB@16|54@21|/@31|","" -"Data Cache Block Touch X-form","dcbt RA,RB,TH","31@0|TH@6|RA@11|RB@16|278@21|/@31|","" -"Data Cache Block Touch for Store X-form","dcbtst RA,RB,TH","31@0|TH@6|RA@11|RB@16|246@21|/@31|","" -"Divide Doubleword XO-form","divd RT,RA,RB (OE=0 Rc=0)|divd. RT,RA,RB (OE=0 Rc=1)|divdo RT,RA,RB (OE=1 Rc=0)|divdo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|489@22|Rc@31|","" -"Divide Doubleword Unsigned XO-form","divdu RT,RA,RB (OE=0 Rc=0)|divdu. RT,RA,RB (OE=0 Rc=1)|divduo RT,RA,RB (OE=1 Rc=0)|divduo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|457@22|Rc@31|","" -"Divide Word XO-form","divw RT,RA,RB (OE=0 Rc=0)|divw. RT,RA,RB (OE=0 Rc=1)|divwo RT,RA,RB (OE=1 Rc=0)|divwo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|491@22|Rc@31|","" -"Divide Word Unsigned XO-form","divwu RT,RA,RB (OE=0 Rc=0)|divwu. RT,RA,RB (OE=0 Rc=1)|divwuo RT,RA,RB (OE=1 Rc=0)|divwuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|459@22|Rc@31|","" -"Enforce In-order Execution of I/O X-form","eieio","31@0|///@6|///@11|///@16|854@21|/@31|","" -"Extend Sign Byte X-form","extsb RA,RS (Rc=0)|extsb. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|954@21|Rc@31|","" -"Extend Sign Word X-form","extsw RA,RS (Rc=0)|extsw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|986@21|Rc@31|","" -"Floating Add Single A-form","fadds FRT,FRA,FRB (Rc=0)|fadds. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|21@26|Rc@31|","" -"Floating Convert with round Signed Doubleword to Double-Precision format X-form","fcfid FRT,FRB (Rc=0)|fcfid. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|846@21|Rc@31|","" -"Floating Convert with round Double-Precision To Signed Doubleword format X-form","fctid FRT,FRB (Rc=0)|fctid. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|814@21|Rc@31|","" -"Floating Convert with truncate Double-Precision To Signed Doubleword format X-form","fctidz FRT,FRB (Rc=0)|fctidz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|815@21|Rc@31|","" -"Floating Divide Single A-form","fdivs FRT,FRA,FRB (Rc=0)|fdivs. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|18@26|Rc@31|","" -"Floating Multiply-Add Single A-form","fmadds FRT,FRA,FRC,FRB (Rc=0)|fmadds. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|29@26|Rc@31|","" -"Floating Multiply-Subtract Single A-form","fmsubs FRT,FRA,FRC,FRB (Rc=0)|fmsubs. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|28@26|Rc@31|","" -"Floating Multiply Single A-form","fmuls FRT,FRA,FRC (Rc=0)|fmuls. FRT,FRA,FRC (Rc=1)","59@0|FRT@6|FRA@11|///@16|FRC@21|25@26|Rc@31|","" -"Floating Negative Multiply-Add Single A-form","fnmadds FRT,FRA,FRC,FRB (Rc=0)|fnmadds. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|31@26|Rc@31|","" -"Floating Negative Multiply-Subtract Single A-form","fnmsubs FRT,FRA,FRC,FRB (Rc=0)|fnmsubs. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|30@26|Rc@31|","" -"Floating Reciprocal Estimate Single A-form","fres FRT,FRB (Rc=0)|fres. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|24@26|Rc@31|","" -"Floating Reciprocal Square Root Estimate A-form","frsqrte FRT,FRB (Rc=0)|frsqrte. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|26@26|Rc@31|","" -"Floating Select A-form","fsel FRT,FRA,FRC,FRB (Rc=0)|fsel. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|23@26|Rc@31|","" -"Floating Square Root Single A-form","fsqrts FRT,FRB (Rc=0)|fsqrts. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|22@26|Rc@31|","" -"Floating Subtract Single A-form","fsubs FRT,FRA,FRB (Rc=0)|fsubs. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|20@26|Rc@31|","" -"Instruction Cache Block Invalidate X-form","icbi RA,RB","31@0|///@6|RA@11|RB@16|982@21|/@31|","" -"Load Doubleword DS-form","ld RT,DS(RA)","58@0|RT@6|RA@11|DS@16|0@30|","" -"Load Doubleword And Reserve Indexed X-form","ldarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|84@21|EH@31|","" -"Load Doubleword with Update DS-form","ldu RT,DS(RA)","58@0|RT@6|RA@11|DS@16|1@30|","" -"Load Doubleword with Update Indexed X-form","ldux RT,RA,RB","31@0|RT@6|RA@11|RB@16|53@21|/@31|","" -"Load Doubleword Indexed X-form","ldx RT,RA,RB","31@0|RT@6|RA@11|RB@16|21@21|/@31|","" -"Load Word Algebraic DS-form","lwa RT,DS(RA)","58@0|RT@6|RA@11|DS@16|2@30|","" -"Load Word & Reserve Indexed X-form","lwarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|20@21|EH@31|","" -"Load Word Algebraic with Update Indexed X-form","lwaux RT,RA,RB","31@0|RT@6|RA@11|RB@16|373@21|/@31|","" -"Load Word Algebraic Indexed X-form","lwax RT,RA,RB","31@0|RT@6|RA@11|RB@16|341@21|/@31|","" -"Move From Time Base XFX-form","mftb RT,TBR","31@0|RT@6|tbr@11|371@21|/@31|","" -"Move To MSR Doubleword X-form","mtmsrd RS,L","31@0|RS@6|///@11|L@15|///@16|178@21|/@31|","" -"Multiply High Doubleword XO-form","mulhd RT,RA,RB (Rc=0)|mulhd. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|73@22|Rc@31|","" -"Multiply High Doubleword Unsigned XO-form","mulhdu RT,RA,RB (Rc=0)|mulhdu. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|9@22|Rc@31|","" -"Multiply High Word XO-form","mulhw RT,RA,RB (Rc=0)|mulhw. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|75@22|Rc@31|","" -"Multiply High Word Unsigned XO-form","mulhwu RT,RA,RB (Rc=0)|mulhwu. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|11@22|Rc@31|","" -"Multiply Low Doubleword XO-form","mulld RT,RA,RB (OE=0 Rc=0)|mulld. RT,RA,RB (OE=0 Rc=1)|mulldo RT,RA,RB (OE=1 Rc=0)|mulldo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|233@22|Rc@31|","" -"Return from Interrupt Doubleword XL-form","rfid","19@0|///@6|///@11|///@16|18@21|/@31|","" -"Rotate Left Doubleword then Clear Left MDS-form","rldcl RA,RS,RB,MB (Rc=0)|rldcl. RA,RS,RB,MB (Rc=1)","30@0|RS@6|RA@11|RB@16|mb@21|8@27|Rc@31|","" -"Rotate Left Doubleword then Clear Right MDS-form","rldcr RA,RS,RB,ME (Rc=0)|rldcr. RA,RS,RB,ME (Rc=1)","30@0|RS@6|RA@11|RB@16|me@21|9@27|Rc@31|","" -"Rotate Left Doubleword Immediate then Clear MD-form","rldic RA,RS,SH,MB (Rc=0)|rldic. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|2@27|sh@30|Rc@31|","" -"Rotate Left Doubleword Immediate then Clear Left MD-form","rldicl RA,RS,SH,MB (Rc=0)|rldicl. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|0@27|sh@30|Rc@31|","" -"Rotate Left Doubleword Immediate then Clear Right MD-form","rldicr RA,RS,SH,ME (Rc=0)|rldicr. RA,RS,SH,ME (Rc=1)","30@0|RS@6|RA@11|sh@16|me@21|1@27|sh@30|Rc@31|","" -"Rotate Left Doubleword Immediate then Mask Insert MD-form","rldimi RA,RS,SH,MB (Rc=0)|rldimi. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|3@27|sh@30|Rc@31|","" -"System Call SC-form","sc LEV","17@0|///@6|///@11|///@16|LEV@20|///@27|1@30|/@31|","" -"SLB Invalidate All X-form","slbia IH","31@0|//@6|IH@8|///@11|///@16|498@21|/@31|","" -"SLB Invalidate Entry X-form","slbie RB","31@0|///@6|///@11|RB@16|434@21|/@31|","" -"Shift Left Doubleword X-form","sld RA,RS,RB (Rc=0)|sld. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|27@21|Rc@31|","" -"Shift Right Algebraic Doubleword X-form","srad RA,RS,RB (Rc=0)|srad. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|794@21|Rc@31|","" -"Shift Right Algebraic Doubleword Immediate XS-form","sradi RA,RS,SH (Rc=0)|sradi. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|sh@16|413@21|sh@30|Rc@31|","" -"Shift Right Doubleword X-form","srd RA,RS,RB (Rc=0)|srd. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|539@21|Rc@31|","" -"Store Doubleword DS-form","std RS,DS(RA)","62@0|RS@6|RA@11|DS@16|0@30|","" -"Store Doubleword Conditional Indexed X-form","stdcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|214@21|1@31|","" -"Store Doubleword with Update DS-form","stdu RS,DS(RA)","62@0|RS@6|RA@11|DS@16|1@30|","" -"Store Doubleword with Update Indexed X-form","stdux RS,RA,RB","31@0|RS@6|RA@11|RB@16|181@21|/@31|","" -"Store Doubleword Indexed X-form","stdx RS,RA,RB","31@0|RS@6|RA@11|RB@16|149@21|/@31|","" -"Store Floating-Point as Integer Word Indexed X-form","stfiwx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|983@21|/@31|","" -"Store Word Conditional Indexed X-form","stwcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|150@21|1@31|","" -"Subtract From XO-form","subf RT,RA,RB (OE=0 Rc=0)|subf. RT,RA,RB (OE=0 Rc=1)|subfo RT,RA,RB (OE=1 Rc=0)|subfo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|40@22|Rc@31|","" -"Trap Doubleword X-form","td TO,RA,RB","31@0|TO@6|RA@11|RB@16|68@21|/@31|","" -"Trap Doubleword Immediate D-form","tdi TO,RA,SI","2@0|TO@6|RA@11|SI@16|","" -"TLB Synchronize X-form","tlbsync","31@0|///@6|///@11|///@16|566@21|/@31|","" -"Floating Convert with round Double-Precision To Signed Word format X-form","fctiw FRT,FRB (Rc=0)|fctiw. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|14@21|Rc@31|","" -"Floating Convert with truncate Double-Precision To Signed Word fomat X-form","fctiwz FRT,FRB (Rc=0)|fctiwz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|15@21|Rc@31|","" -"Floating Square Root A-form","fsqrt FRT,FRB (Rc=0)|fsqrt. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|22@26|Rc@31|","" -"Add XO-form","add RT,RA,RB (OE=0 Rc=0)|add. RT,RA,RB (OE=0 Rc=1)|addo RT,RA,RB (OE=1 Rc=0)|addo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|266@22|Rc@31|","" -"Add Carrying XO-form","addc RT,RA,RB (OE=0 Rc=0)|addc. RT,RA,RB (OE=0 Rc=1)|addco RT,RA,RB (OE=1 Rc=0)|addco. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|10@22|Rc@31|","" -"Add Extended XO-form","adde RT,RA,RB (OE=0 Rc=0)|adde. RT,RA,RB (OE=0 Rc=1)|addeo RT,RA,RB (OE=1 Rc=0)|addeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|138@22|Rc@31|","" -"Add Immediate D-form","addi RT,RA,SI|li RT,SI (RA=0)","14@0|RT@6|RA@11|SI@16|","" -"Add Immediate Carrying D-formy","addic RT,RA,SI","12@0|RT@6|RA@11|SI@16|","" -"Add Immediate Carrying and Record D-form","addic. RT,RA,SI","13@0|RT@6|RA@11|SI@16|","" -"Add Immediate Shifted D-form","addis RT,RA,SI|lis RT,SI (RA=0)","15@0|RT@6|RA@11|SI@16|","" -"Add to Minus One Extended XO-form","addme RT,RA (OE=0 Rc=0)|addme. RT,RA (OE=0 Rc=1)|addmeo RT,RA (OE=1 Rc=0)|addmeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|234@22|Rc@31|","" -"Add to Zero Extended XO-form","addze RT,RA (OE=0 Rc=0)|addze. RT,RA (OE=0 Rc=1)|addzeo RT,RA (OE=1 Rc=0)|addzeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|202@22|Rc@31|","" -"AND X-form","and RA,RS,RB (Rc=0)|and. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|28@21|Rc@31|","" -"AND with Complement X-form","andc RA,RS,RB (Rc=0)|andc. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|60@21|Rc@31|","" -"AND Immediate D-form","andi. RA,RS,UI","28@0|RS@6|RA@11|UI@16|","" -"AND Immediate Shifted D-form","andis. RA,RS,UI","29@0|RS@6|RA@11|UI@16|","" -"Branch I-form","b target_addr (AA=0 LK=0)|ba target_addr (AA=1 LK=0)|bl target_addr (AA=0 LK=1)|bla target_addr (AA=1 LK=1)","18@0|LI@6|AA@30|LK@31|","" -"Branch Conditional B-form","bc BO,BI,target_addr (AA=0 LK=0)|bca BO,BI,target_addr (AA=1 LK=0)|bcl BO,BI,target_addr (AA=0 LK=1)|bcla BO,BI,target_addr (AA=1 LK=1)","16@0|BO@6|BI@11|BD@16|AA@30|LK@31|","" -"Branch Conditional to Count Register XL-form","bcctr BO,BI,BH (LK=0)|bcctrl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|528@21|LK@31|","" -"Branch Conditional to Link Register XL-form","bclr BO,BI,BH (LK=0)|bclrl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|16@21|LK@31|","" -"Compare X-form","cmp BF,L,RA,RB|cmpw BF,RA,RB (L=0)|cmpd BF,RA,RB (L=1)","31@0|BF@6|/@9|L@10|RA@11|RB@16|0@21|/@31|","" -"Compare Immediate D-form","cmpi BF,L,RA,SI|cmpwi BF,RA,SI (L=0)|cmpdi BF,RA,SI (L=1)","11@0|BF@6|/@9|L@10|RA@11|SI@16|","" -"Compare Logical X-form","cmpl BF,L,RA,RB|cmplw BF,RA,RB (L=0)|cmpld BF,RA,RB (L=1)","31@0|BF@6|/@9|L@10|RA@11|RB@16|32@21|/@31|","" -"Compare Logical Immediate D-form","cmpli BF,L,RA,UI|cmplwi BF,RA,UI (L=0)|cmpldi BF,RA,UI (L=1)","10@0|BF@6|/@9|L@10|RA@11|UI@16|","" -"Count Leading Zeros Word X-form","cntlzw RA,RS (Rc=0)|cntlzw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|26@21|Rc@31|","" -"Condition Register AND XL-form","crand BT,BA,BB","19@0|BT@6|BA@11|BB@16|257@21|/@31|","" -"Condition Register AND with Complement XL-form","crandc BT,BA,BB","19@0|BT@6|BA@11|BB@16|129@21|/@31|","" -"Condition Register Equivalent XL-form","creqv BT,BA,BB","19@0|BT@6|BA@11|BB@16|289@21|/@31|","" -"Condition Register NAND XL-form","crnand BT,BA,BB","19@0|BT@6|BA@11|BB@16|225@21|/@31|","" -"Condition Register NOR XL-form","crnor BT,BA,BB","19@0|BT@6|BA@11|BB@16|33@21|/@31|","" -"Condition Register OR XL-form","cror BT,BA,BB","19@0|BT@6|BA@11|BB@16|449@21|/@31|","" -"Condition Register OR with Complement XL-form","crorc BT,BA,BB","19@0|BT@6|BA@11|BB@16|417@21|/@31|","" -"Condition Register XOR XL-form","crxor BT,BA,BB","19@0|BT@6|BA@11|BB@16|193@21|/@31|","" -"Data Cache Block set to Zero X-form","dcbz RA,RB","31@0|///@6|RA@11|RB@16|1014@21|/@31|","" -"Equivalent X-form","eqv RA,RS,RB (Rc=0)|eqv. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|284@21|Rc@31|","" -"Extend Sign Halfword X-form","extsh RA,RS (Rc=0)|extsh. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|922@21|Rc@31|","" -"Floating Absolute Value X-form","fabs FRT,FRB (Rc=0)|fabs. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|264@21|Rc@31|","" -"Floating Add A-form","fadd FRT,FRA,FRB (Rc=0)|fadd. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|21@26|Rc@31|","" -"Floating Compare Ordered X-form","fcmpo BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|32@21|/@31|","" -"Floating Compare Unordered X-form","fcmpu BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|0@21|/@31|","" -"Floating Divide A-form","fdiv FRT,FRA,FRB (Rc=0)|fdiv. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|18@26|Rc@31|","" -"Floating Multiply-Add A-form","fmadd FRT,FRA,FRC,FRB (Rc=0)|fmadd. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|29@26|Rc@31|","" -"Floating Move Register X-form","fmr FRT,FRB (Rc=0)|fmr. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|72@21|Rc@31|","" -"Floating Multiply-Subtract A-form","fmsub FRT,FRA,FRC,FRB (Rc=0)|fmsub. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|28@26|Rc@31|","" -"Floating Multiply A-form","fmul FRT,FRA,FRC (Rc=0)|fmul. FRT,FRA,FRC (Rc=1)","63@0|FRT@6|FRA@11|///@16|FRC@21|25@26|Rc@31|","" -"Floating Negative Absolute Value X-form","fnabs FRT,FRB (Rc=0)|fnabs. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|136@21|Rc@31|","" -"Floating Negate X-form","fneg FRT,FRB (Rc=0)|fneg. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|40@21|Rc@31|","" -"Floating Negative Multiply-Add A-form","fnmadd FRT,FRA,FRC,FRB (Rc=0)|fnmadd. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|31@26|Rc@31|","" -"Floating Negative Multiply-Subtract A-form","fnmsub FRT,FRA,FRC,FRB (Rc=0)|fnmsub. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|30@26|Rc@31|","" -"Floating Round to Single-Precision X-form","frsp FRT,FRB (Rc=0)|frsp. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|12@21|Rc@31|","" -"Floating Subtract A-form","fsub FRT,FRA,FRB (Rc=0)|fsub. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|20@26|Rc@31|","" -"Instruction Synchronize XL-form","isync","19@0|///@6|///@11|///@16|150@21|/@31|","" -"Load Byte and Zero D-form","lbz RT,D(RA)","34@0|RT@6|RA@11|D@16|","" -"Load Byte and Zero with Update D-form","lbzu RT,D(RA)","35@0|RT@6|RA@11|D@16|","" -"Load Byte and Zero with Update Indexed X-form","lbzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|119@21|/@31|","" -"Load Byte and Zero Indexed X-form","lbzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|87@21|/@31|","" -"Load Floating-Point Double D-form","lfd FRT,D(RA)","50@0|FRT@6|RA@11|D@16|","" -"Load Floating-Point Double with Update D-form","lfdu FRT,D(RA)","51@0|FRT@6|RA@11|D@16|","" -"Load Floating-Point Double with Update Indexed X-form","lfdux FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|631@21|/@31|","" -"Load Floating-Point Double Indexed X-form","lfdx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|599@21|/@31|","" -"Load Floating-Point Single D-form","lfs FRT,D(RA)","48@0|FRT@6|RA@11|D@16|","" -"Load Floating-Point Single with Update D-form","lfsu FRT,D(RA)","49@0|FRT@6|RA@11|D@16|","" -"Load Floating-Point Single with Update Indexed X-form","lfsux FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|567@21|/@31|","" -"Load Floating-Point Single Indexed X-form","lfsx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|535@21|/@31|","" -"Load Halfword Algebraic D-form","lha RT,D(RA)","42@0|RT@6|RA@11|D@16|","" -"Load Halfword Algebraic with Update D-form","lhau RT,D(RA)","43@0|RT@6|RA@11|D@16|","" -"Load Halfword Algebraic with Update Indexed X-form","lhaux RT,RA,RB","31@0|RT@6|RA@11|RB@16|375@21|/@31|","" -"Load Halfword Algebraic Indexed X-form","lhax RT,RA,RB","31@0|RT@6|RA@11|RB@16|343@21|/@31|","" -"Load Halfword Byte-Reverse Indexed X-form","lhbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|790@21|/@31|","" -"Load Halfword and Zero D-form","lhz RT,D(RA)","40@0|RT@6|RA@11|D@16|","" -"Load Halfword and Zero with Update D-form","lhzu RT,D(RA)","41@0|RT@6|RA@11|D@16|","" -"Load Halfword and Zero with Update Indexed X-form","lhzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|311@21|/@31|","" -"Load Halfword and Zero Indexed X-form","lhzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|279@21|/@31|","" -"Load Multiple Word D-form","lmw RT,D(RA)","46@0|RT@6|RA@11|D@16|","" -"Load String Word Immediate X-form","lswi RT,RA,NB","31@0|RT@6|RA@11|NB@16|597@21|/@31|","" -"Load String Word Indexed X-form","lswx RT,RA,RB","31@0|RT@6|RA@11|RB@16|533@21|/@31|","" -"Load Word Byte-Reverse Indexed X-form","lwbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|534@21|/@31|","" -"Load Word and Zero D-form","lwz RT,D(RA)","32@0|RT@6|RA@11|D@16|","" -"Load Word and Zero with Update D-form","lwzu RT,D(RA)","33@0|RT@6|RA@11|D@16|","" -"Load Word and Zero with Update Indexed X-form","lwzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|55@21|/@31|","" -"Load Word and Zero Indexed X-form","lwzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|23@21|/@31|","" -"Move Condition Register Field XL-form","mcrf BF,BFA","19@0|BF@6|//@9|BFA@11|//@14|///@16|0@21|/@31|","" -"Move to Condition Register from FPSCR X-form","mcrfs BF,BFA","63@0|BF@6|//@9|BFA@11|//@14|///@16|64@21|/@31|","" -"Move From Condition Register XFX-form","mfcr RT","31@0|RT@6|0@11|///@12|/@20|19@21|/@31|","" -"Move From FPSCR X-form","mffs FRT (Rc=0)|mffs. FRT (Rc=1)","63@0|FRT@6|0@11|///@16|583@21|Rc@31|","" -"Move From MSR X-form","mfmsr RT","31@0|RT@6|///@11|///@16|83@21|/@31|","" -"Move From Special Purpose Register XFX-form","mfspr RT,SPR","31@0|RT@6|spr@11|339@21|/@31|","" -"Move To Condition Register Fields XFX-form","mtcrf FXM,RS","31@0|RS@6|0@11|FXM@12|/@20|144@21|/@31|","" -"Move To FPSCR Bit 0 X-form","mtfsb0 BT (Rc=0)|mtfsb0. BT (Rc=1)","63@0|BT@6|///@11|///@16|70@21|Rc@31|","" -"Move To FPSCR Bit 1 X-form","mtfsb1 BT (Rc=0)|mtfsb1. BT (Rc=1)","63@0|BT@6|///@11|///@16|38@21|Rc@31|","" -"Move To FPSCR Fields XFL-form","mtfsf FLM,FRB,L,W (Rc=0)|mtfsf. FLM,FRB,L,W (Rc=1)","63@0|L@6|FLM@7|W@15|FRB@16|711@21|Rc@31|","" -"Move To FPSCR Field Immediate X-form","mtfsfi BF,U,W (Rc=0)|mtfsfi. BF,U,W (Rc=1)","63@0|BF@6|//@9|///@11|W@15|U@16|/@20|134@21|Rc@31|","" -"Move To MSR X-form","mtmsr RS,L","31@0|RS@6|///@11|L@15|///@16|146@21|/@31|","" -"Move To Special Purpose Register XFX-form","mtspr SPR,RS","31@0|RS@6|spr@11|467@21|/@31|","" -"Multiply Low Immediate D-form","mulli RT,RA,SI","7@0|RT@6|RA@11|SI@16|","" -"Multiply Low Word XO-form","mullw RT,RA,RB (OE=0 Rc=0)|mullw. RT,RA,RB (OE=0 Rc=1)|mullwo RT,RA,RB (OE=1 Rc=0)|mullwo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|235@22|Rc@31|","" -"NAND X-form","nand RA,RS,RB (Rc=0)|nand. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|476@21|Rc@31|","" -"Negate XO-form","neg RT,RA (OE=0 Rc=0)|neg. RT,RA (OE=0 Rc=1)|nego RT,RA (OE=1 Rc=0)|nego. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|104@22|Rc@31|","" -"NOR X-form","nor RA,RS,RB (Rc=0)|nor. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|124@21|Rc@31|","" -"OR X-form","or RA,RS,RB (Rc=0)|or. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|444@21|Rc@31|","" -"OR with Complement X-form","orc RA,RS,RB (Rc=0)|orc. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|412@21|Rc@31|","" -"OR Immediate D-form","ori RA,RS,UI|nop (RA=0 RS=0 UI=0)","24@0|RS@6|RA@11|UI@16|","" -"OR Immediate Shifted D-form","oris RA,RS,UI","25@0|RS@6|RA@11|UI@16|","" -"Rotate Left Word Immediate then Mask Insert M-form","rlwimi RA,RS,SH,MB,ME (Rc=0)|rlwimi. RA,RS,SH,MB,ME (Rc=1)","20@0|RS@6|RA@11|SH@16|MB@21|ME@26|Rc@31|","" -"Rotate Left Word Immediate then AND with Mask M-form","rlwinm RA,RS,SH,MB,ME (Rc=0)|rlwinm. RA,RS,SH,MB,ME (Rc=1)","21@0|RS@6|RA@11|SH@16|MB@21|ME@26|Rc@31|","" -"Rotate Left Word then AND with Mask M-form","rlwnm RA,RS,RB,MB,ME (Rc=0)|rlwnm. RA,RS,RB,MB,ME (Rc=1)","23@0|RS@6|RA@11|RB@16|MB@21|ME@26|Rc@31|","" -"Shift Left Word X-form","slw RA,RS,RB (Rc=0)|slw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|24@21|Rc@31|","" -"Shift Right Algebraic Word X-form","sraw RA,RS,RB (Rc=0)|sraw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|792@21|Rc@31|","" -"Shift Right Algebraic Word Immediate X-form","srawi RA,RS,SH (Rc=0)|srawi. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|SH@16|824@21|Rc@31|","" -"Shift Right Word X-form","srw RA,RS,RB (Rc=0)|srw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|536@21|Rc@31|","" -"Store Byte D-form","stb RS,D(RA)","38@0|RS@6|RA@11|D@16|","" -"Store Byte with Update D-form","stbu RS,D(RA)","39@0|RS@6|RA@11|D@16|","" -"Store Byte with Update Indexed X-form","stbux RS,RA,RB","31@0|RS@6|RA@11|RB@16|247@21|/@31|","" -"Store Byte Indexed X-form","stbx RS,RA,RB","31@0|RS@6|RA@11|RB@16|215@21|/@31|","" -"Store Floating-Point Double D-form","stfd FRS,D(RA)","54@0|FRS@6|RA@11|D@16|","" -"Store Floating-Point Double with Update D-form","stfdu FRS,D(RA)","55@0|FRS@6|RA@11|D@16|","" -"Store Floating-Point Double with Update Indexed X-form","stfdux FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|759@21|/@31|","" -"Store Floating-Point Double Indexed X-form","stfdx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|727@21|/@31|","" -"Store Floating-Point Single D-form","stfs FRS,D(RA)","52@0|FRS@6|RA@11|D@16|","" -"Store Floating-Point Single with Update D-form","stfsu FRS,D(RA)","53@0|FRS@6|RA@11|D@16|","" -"Store Floating-Point Single with Update Indexed X-form","stfsux FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|695@21|/@31|","" -"Store Floating-Point Single Indexed X-form","stfsx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|663@21|/@31|","" -"Store Halfword D-form","sth RS,D(RA)","44@0|RS@6|RA@11|D@16|","" -"Store Halfword Byte-Reverse Indexed X-form","sthbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|918@21|/@31|","" -"Store Halfword with Update D-form","sthu RS,D(RA)","45@0|RS@6|RA@11|D@16|","" -"Store Halfword with Update Indexed X-form","sthux RS,RA,RB","31@0|RS@6|RA@11|RB@16|439@21|/@31|","" -"Store Halfword Indexed X-form","sthx RS,RA,RB","31@0|RS@6|RA@11|RB@16|407@21|/@31|","" -"Store Multiple Word D-form","stmw RS,D(RA)","47@0|RS@6|RA@11|D@16|","" -"Store String Word Immediate X-form","stswi RS,RA,NB","31@0|RS@6|RA@11|NB@16|725@21|/@31|","" -"Store String Word Indexed X-form","stswx RS,RA,RB","31@0|RS@6|RA@11|RB@16|661@21|/@31|","" -"Store Word D-form","stw RS,D(RA)","36@0|RS@6|RA@11|D@16|","" -"Store Word Byte-Reverse Indexed X-form","stwbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|662@21|/@31|","" -"Store Word with Update D-form","stwu RS,D(RA)","37@0|RS@6|RA@11|D@16|","" -"Store Word with Update Indexed X-form","stwux RS,RA,RB","31@0|RS@6|RA@11|RB@16|183@21|/@31|","" -"Store Word Indexed X-form","stwx RS,RA,RB","31@0|RS@6|RA@11|RB@16|151@21|/@31|","" -"Subtract From Carrying XO-form","subfc RT,RA,RB (OE=0 Rc=0)|subfc. RT,RA,RB (OE=0 Rc=1)|subfco RT,RA,RB (OE=1 Rc=0)|subfco. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|8@22|Rc@31|","" -"Subtract From Extended XO-form","subfe RT,RA,RB (OE=0 Rc=0)|subfe. RT,RA,RB (OE=0 Rc=1)|subfeo RT,RA,RB (OE=1 Rc=0)|subfeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|136@22|Rc@31|","" -"Subtract From Immediate Carrying D-form","subfic RT,RA,SI","8@0|RT@6|RA@11|SI@16|","" -"Subtract From Minus One Extended XO-form","subfme RT,RA (OE=0 Rc=0)|subfme. RT,RA (OE=0 Rc=1)|subfmeo RT,RA (OE=1 Rc=0)|subfmeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|232@22|Rc@31|","" -"Subtract From Zero Extended XO-form","subfze RT,RA (OE=0 Rc=0)|subfze. RT,RA (OE=0 Rc=1)|subfzeo RT,RA (OE=1 Rc=0)|subfzeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|200@22|Rc@31|","" -"Synchronize X-form","sync L,SC","31@0|//@6|L@8|///@11|SC@14|///@16|598@21|/@31|","" -"TLB Invalidate Entry X-form","tlbie RB,RS,RIC,PRS,R","31@0|RS@6|/@11|RIC@12|PRS@14|R@15|RB@16|306@21|/@31|","" -"Trap Word X-form","tw TO,RA,RB","31@0|TO@6|RA@11|RB@16|4@21|/@31|","" -"Trap Word Immediate D-form","twi TO,RA,SI","3@0|TO@6|RA@11|SI@16|","" -"XOR X-form","xor RA,RS,RB (Rc=0)|xor. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|316@21|Rc@31|","" -"XOR Immediate D-form","xori RA,RS,UI","26@0|RS@6|RA@11|UI@16|","" -"XOR Immediate Shifted D-form","xoris RA,RS,UI","27@0|RS@6|RA@11|UI@16|","" +"Byte-Reverse Doubleword X-form","brd RA,RS","31@0|RS@6|RA@11|///@16|187@21|/@31|","v3.1" +"Byte-Reverse Halfword X-form","brh RA,RS","31@0|RS@6|RA@11|///@16|219@21|/@31|","v3.1" +"Byte-Reverse Word X-form","brw RA,RS","31@0|RS@6|RA@11|///@16|155@21|/@31|","v3.1" +"Centrifuge Doubleword X-form","cfuged RA,RS,RB","31@0|RS@6|RA@11|RB@16|220@21|/@31|","v3.1" +"Count Leading Zeros Doubleword under bit Mask X-form","cntlzdm RA,RS,RB","31@0|RS@6|RA@11|RB@16|59@21|/@31|","v3.1" +"Count Trailing Zeros Doubleword under bit Mask X-form","cnttzdm RA,RS,RB","31@0|RS@6|RA@11|RB@16|571@21|/@31|","v3.1" +"DFP Convert From Fixed Quadword Quad X-form","dcffixqq FRTp,VRB","63@0|FRTp@6|0@11|VRB@16|994@21|/@31|","v3.1" +"DFP Convert To Fixed Quadword Quad X-form","dctfixqq VRT,FRBp","63@0|VRT@6|1@11|FRBp@16|994@21|/@31|","v3.1" +"Load VSX Vector Special Value Quadword X-form","lxvkq XT,UIM","60@0|T@6|31@11|UIM@16|360@21|TX@31|","v3.1" +"Load VSX Vector Paired DQ-form","lxvp XTp,DQ(RA)","6@0|Tp@6|TX@10|RA@11|DQ@16|0@28|","v3.1" +"Load VSX Vector Paired Indexed X-form","lxvpx XTp,RA,RB","31@0|Tp@6|TX@10|RA@11|RB@16|333@21|/@31|","v3.1" +"Load VSX Vector Rightmost Byte Indexed X-form","lxvrbx XT,RA,RB","31@0|T@6|RA@11|RB@16|13@21|TX@31|","v3.1" +"Load VSX Vector Rightmost Doubleword Indexed X-form","lxvrdx XT,RA,RB","31@0|T@6|RA@11|RB@16|109@21|TX@31|","v3.1" +"Load VSX Vector Rightmost Halfword Indexed X-form","lxvrhx XT,RA,RB","31@0|T@6|RA@11|RB@16|45@21|TX@31|","v3.1" +"Load VSX Vector Rightmost Word Indexed X-form","lxvrwx XT,RA,RB","31@0|T@6|RA@11|RB@16|77@21|TX@31|","v3.1" +"Move to VSR Byte Mask VX-form","mtvsrbm VRT,RB","4@0|VRT@6|16@11|RB@16|1602@21|","v3.1" +"Move To VSR Byte Mask Immediate DX-form","mtvsrbmi VRT,bm","4@0|VRT@6|b1@11|b0@16|10@26|b2@31|","v3.1" +"Move to VSR Doubleword Mask VX-form","mtvsrdm VRT,RB","4@0|VRT@6|19@11|RB@16|1602@21|","v3.1" +"Move to VSR Halfword Mask VX-form","mtvsrhm VRT,RB","4@0|VRT@6|17@11|RB@16|1602@21|","v3.1" +"Move to VSR Quadword Mask VX-form","mtvsrqm VRT,RB","4@0|VRT@6|20@11|RB@16|1602@21|","v3.1" +"Move to VSR Word Mask VX-form","mtvsrwm VRT,RB","4@0|VRT@6|18@11|RB@16|1602@21|","v3.1" +"Prefixed Add Immediate MLS:D-form","paddi RT,RA,SI,R",",1@0|2@6|0@8|//@9|R@11|//@12|si0@14|,14@0|RT@6|RA@11|si1@16|","v3.1" +"Parallel Bits Deposit Doubleword X-form","pdepd RA,RS,RB","31@0|RS@6|RA@11|RB@16|156@21|/@31|","v3.1" +"Parallel Bits Extract Doubleword X-form","pextd RA,RS,RB","31@0|RS@6|RA@11|RB@16|188@21|/@31|","v3.1" +"Prefixed Load Byte and Zero MLS:D-form","plbz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,34@0|RT@6|RA@11|d1@16|","v3.1" +"Prefixed Load Doubleword 8LS:D-form","pld RT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,57@0|RT@6|RA@11|d1@16|","v3.1" +"Prefixed Load Floating-Point Double MLS:D-form","plfd FRT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,50@0|FRT@6|RA@11|d1@16|","v3.1" +"Prefixed Load Floating-Point Single MLS:D-form","plfs FRT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,48@0|FRT@6|RA@11|d1@16|","v3.1" +"Prefixed Load Halfword Algebraic MLS:D-form","plha RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,42@0|RT@6|RA@11|d1@16|","v3.1" +"Prefixed Load Halfword and Zero MLS:D-form","plhz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,40@0|RT@6|RA@11|d1@16|","v3.1" +"Prefixed Load Quadword 8LS:D-form","plq RTp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,56@0|RTp@6|RA@11|d1@16|","v3.1" +"Prefixed Load Word Algebraic 8LS:D-form","plwa RT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,41@0|RT@6|RA@11|d1@16|","v3.1" +"Prefixed Load Word and Zero MLS:D-form","plwz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,32@0|RT@6|RA@11|d1@16|","v3.1" +"Prefixed Load VSX Scalar Doubleword 8LS:D-form","plxsd VRT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,42@0|VRT@6|RA@11|d1@16|","v3.1" +"Prefixed Load VSX Scalar Single-Precision 8LS:D-form","plxssp VRT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,43@0|VRT@6|RA@11|d1@16|","v3.1" +"Prefixed Load VSX Vector 8LS:D-form","plxv XT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,25@0|TX@5|T@6|RA@11|d1@16|","v3.1" +"Prefixed Load VSX Vector Paired 8LS:D-form","plxvp XTp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,58@0|Tp@6|TX@10|RA@11|d1@16|","v3.1" +"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) MMIRR:XX3-form","pmxvbf16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|51@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvbf16ger2nn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|242@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvbf16ger2np AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|114@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvbf16ger2pn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|178@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvbf16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|50@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) MMIRR:XX3-form","pmxvf16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|19@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf16ger2nn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|210@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf16ger2np AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|82@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf16ger2pn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|146@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|18@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) MMIRR:XX3-form","pmxvf32ger AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|27@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf32gernn AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|218@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf32gernp AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|90@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf32gerpn AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|154@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf32gerpp AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|26@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) MMIRR:XX3-form","pmxvf64ger AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|59@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf64gernn AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|250@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf64gernp AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|122@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf64gerpn AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|186@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf64gerpp AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|58@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) MMIRR:XX3-form","pmxvi16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|75@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|107@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation MMIRR:XX3-form","pmxvi16ger2s AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi16ger2spp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|42@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) MMIRR:XX3-form","pmxvi4ger8 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi4ger8pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|34@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) MMIRR:XX3-form","pmxvi8ger4 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|3@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi8ger4pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|2@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi8ger4spp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|99@21|AX@29|BX@30|/@31|","v3.1" +"Prefixed Nop MRR:*-form","pnop",",1@0|3@6|0@8|///@12|0@14|//@31|,///@0|","v3.1" +"Prefixed Store Byte MLS:D-form","pstb RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,38@0|RS@6|RA@11|d1@16|","v3.1" +"Prefixed Store Doubleword 8LS:D-form","pstd RS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,61@0|RS@6|RA@11|d1@16|","v3.1" +"Prefixed Store Floating-Point Double MLS:D-form","pstfd FRS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,54@0|FRS@6|RA@11|d1@16|","v3.1" +"Prefixed Store Floating-Point Single MLS:D-form","pstfs FRS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,52@0|FRS@6|RA@11|d1@16|","v3.1" +"Prefixed Store Halfword MLS:D-form","psth RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,44@0|RS@6|RA@11|d1@16|","v3.1" +"Prefixed Store Quadword 8LS:D-form","pstq RSp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,60@0|RSp@6|RA@11|d1@16|","v3.1" +"Prefixed Store Word MLS:D-form","pstw RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,36@0|RS@6|RA@11|d1@16|","v3.1" +"Prefixed Store VSX Scalar Doubleword 8LS:D-form","pstxsd VRS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,46@0|VRS@6|RA@11|d1@16|","v3.1" +"Prefixed Store VSX Scalar Single-Precision 8LS:D-form","pstxssp VRS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,47@0|VRS@6|RA@11|d1@16|","v3.1" +"Prefixed Store VSX Vector 8LS:D-form","pstxv XS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,27@0|SX@5|S@6|RA@11|d1@16|","v3.1" +"Prefixed Store VSX Vector Paired 8LS:D-form","pstxvp XSp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,62@0|Sp@6|SX@10|RA@11|d1@16|","v3.1" +"Set Boolean Condition X-form","setbc RT,BI","31@0|RT@6|BI@11|///@16|384@21|/@31|","v3.1" +"Set Boolean Condition Reverse X-form","setbcr RT,BI","31@0|RT@6|BI@11|///@16|416@21|/@31|","v3.1" +"Set Negative Boolean Condition X-form","setnbc RT,BI","31@0|RT@6|BI@11|///@16|448@21|/@31|","v3.1" +"Set Negative Boolean Condition Reverse X-form","setnbcr RT,BI","31@0|RT@6|BI@11|///@16|480@21|/@31|","v3.1" +"Store VSX Vector Paired DQ-form","stxvp XSp,DQ(RA)","6@0|Sp@6|SX@10|RA@11|DQ@16|1@28|","v3.1" +"Store VSX Vector Paired Indexed X-form","stxvpx XSp,RA,RB","31@0|Sp@6|SX@10|RA@11|RB@16|461@21|/@31|","v3.1" +"Store VSX Vector Rightmost Byte Indexed X-form","stxvrbx XS,RA,RB","31@0|S@6|RA@11|RB@16|141@21|SX@31|","v3.1" +"Store VSX Vector Rightmost Doubleword Indexed X-form","stxvrdx XS,RA,RB","31@0|S@6|RA@11|RB@16|237@21|SX@31|","v3.1" +"Store VSX Vector Rightmost Halfword Indexed X-form","stxvrhx XS,RA,RB","31@0|S@6|RA@11|RB@16|173@21|SX@31|","v3.1" +"Store VSX Vector Rightmost Word Indexed X-form","stxvrwx XS,RA,RB","31@0|S@6|RA@11|RB@16|205@21|SX@31|","v3.1" +"Vector Centrifuge Doubleword VX-form","vcfuged VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1357@21|","v3.1" +"Vector Clear Leftmost Bytes VX-form","vclrlb VRT,VRA,RB","4@0|VRT@6|VRA@11|RB@16|397@21|","v3.1" +"Vector Clear Rightmost Bytes VX-form","vclrrb VRT,VRA,RB","4@0|VRT@6|VRA@11|RB@16|461@21|","v3.1" +"Vector Count Leading Zeros Doubleword under bit Mask VX-form","vclzdm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1924@21|","v3.1" +"Vector Compare Equal Quadword VC-form","vcmpequq VRT,VRA,VRB (Rc=0)|vcmpequq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|455@22|","v3.1" +"Vector Compare Greater Than Signed Quadword VC-form","vcmpgtsq VRT,VRA,VRB (Rc=0)|vcmpgtsq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|903@22|","v3.1" +"Vector Compare Greater Than Unsigned Quadword VC-form","vcmpgtuq VRT,VRA,VRB (Rc=0)|vcmpgtuq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|647@22|","v3.1" +"Vector Compare Signed Quadword VX-form","vcmpsq BF,VRA,VRB","4@0|BF@6|//@9|VRA@11|VRB@16|321@21|","v3.1" +"Vector Compare Unsigned Quadword VX-form","vcmpuq BF,VRA,VRB","4@0|BF@6|//@9|VRA@11|VRB@16|257@21|","v3.1" +"Vector Count Mask Bits Byte VX-form","vcntmbb RT,VRB,MP","4@0|RT@6|12@11|MP@15|VRB@16|1602@21|","v3.1" +"Vector Count Mask Bits Doubleword VX-form","vcntmbd RT,VRB,MP","4@0|RT@6|15@11|MP@15|VRB@16|1602@21|","v3.1" +"Vector Count Mask Bits Halfword VX-form","vcntmbh RT,VRB,MP","4@0|RT@6|13@11|MP@15|VRB@16|1602@21|","v3.1" +"Vector Count Mask Bits Word VX-form","vcntmbw RT,VRB,MP","4@0|RT@6|14@11|MP@15|VRB@16|1602@21|","v3.1" +"Vector Count Trailing Zeros Doubleword under bit Mask VX-form","vctzdm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1988@21|","v3.1" +"Vector Divide Extended Signed Doubleword VX-form","vdivesd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|971@21|","v3.1" +"Vector Divide Extended Signed Quadword VX-form","vdivesq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|779@21|","v3.1" +"Vector Divide Extended Signed Word VX-form","vdivesw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|907@21|","v3.1" +"Vector Divide Extended Unsigned Doubleword VX-form","vdiveud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|715@21|","v3.1" +"Vector Divide Extended Unsigned Quadword VX-form","vdiveuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|523@21|","v3.1" +"Vector Divide Extended Unsigned Word VX-form","vdiveuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|651@21|","v3.1" +"Vector Divide Signed Doubleword VX-form","vdivsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|459@21|","v3.1" +"Vector Divide Signed Quadword VX-form","vdivsq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|267@21|","v3.1" +"Vector Divide Signed Word VX-form","vdivsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|395@21|","v3.1" +"Vector Divide Unsigned Doubleword VX-form","vdivud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|203@21|","v3.1" +"Vector Divide Unsigned Quadword VX-form","vdivuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|11@21|","v3.1" +"Vector Divide Unsigned Word VX-form","vdivuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|139@21|","v3.1" +"Vector Expand Byte Mask VX-form","vexpandbm VRT,VRB","4@0|VRT@6|0@11|VRB@16|1602@21|","v3.1" +"Vector Expand Doubleword Mask VX-form","vexpanddm VRT,VRB","4@0|VRT@6|3@11|VRB@16|1602@21|","v3.1" +"Vector Expand Halfword Mask VX-form","vexpandhm VRT,VRB","4@0|VRT@6|1@11|VRB@16|1602@21|","v3.1" +"Vector Expand Quadword Mask VX-form","vexpandqm VRT,VRB","4@0|VRT@6|4@11|VRB@16|1602@21|","v3.1" +"Vector Expand Word Mask VX-form","vexpandwm VRT,VRB","4@0|VRT@6|2@11|VRB@16|1602@21|","v3.1" +"Vector Extract Double Doubleword to VSR using GPR-specified Left-Index VA-form","vextddvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|30@26|","v3.1" +"Vector Extract Double Doubleword to VSR using GPR-specified Right-Index VA-form","vextddvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|31@26|","v3.1" +"Vector Extract Double Unsigned Byte to VSR using GPR-specified Left-Index VA-form","vextdubvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|24@26|","v3.1" +"Vector Extract Double Unsigned Byte to VSR using GPR-specified Right-Index VA-form","vextdubvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|25@26|","v3.1" +"Vector Extract Double Unsigned Halfword to VSR using GPR-specified Left-Index VA-form","vextduhvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|26@26|","v3.1" +"Vector Extract Double Unsigned Halfword to VSR using GPR-specified Right-Index VA-form","vextduhvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|27@26|","v3.1" +"Vector Extract Double Unsigned Word to VSR using GPR-specified Left-Index VA-form","vextduwvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|28@26|","v3.1" +"Vector Extract Double Unsigned Word to VSR using GPR-specified Right-Index VA-form","vextduwvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|29@26|","v3.1" +"Vector Extract Byte Mask VX-form","vextractbm RT,VRB","4@0|RT@6|8@11|VRB@16|1602@21|","v3.1" +"Vector Extract Doubleword Mask VX-form","vextractdm RT,VRB","4@0|RT@6|11@11|VRB@16|1602@21|","v3.1" +"Vector Extract Halfword Mask VX-form","vextracthm RT,VRB","4@0|RT@6|9@11|VRB@16|1602@21|","v3.1" +"Vector Extract Quadword Mask VX-form","vextractqm RT,VRB","4@0|RT@6|12@11|VRB@16|1602@21|","v3.1" +"Vector Extract Word Mask VX-form","vextractwm RT,VRB","4@0|RT@6|10@11|VRB@16|1602@21|","v3.1" +"Vector Extend Sign Doubleword to Quadword VX-form","vextsd2q VRT,VRB","4@0|VRT@6|27@11|VRB@16|1538@21|","v3.1" +"Vector Gather every Nth Bit VX-form","vgnb RT,VRB,N","4@0|RT@6|//@11|N@13|VRB@16|1228@21|","v3.1" +"Vector Insert Byte from GPR using GPR-specified Left-Index VX-form","vinsblx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|527@21|","v3.1" +"Vector Insert Byte from GPR using GPR-specified Right-Index VX-form","vinsbrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|783@21|","v3.1" +"Vector Insert Byte from VSR using GPR-specified Left-Index VX-form","vinsbvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|15@21|","v3.1" +"Vector Insert Byte from VSR using GPR-specified Right-Index VX-form","vinsbvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|271@21|","v3.1" +"Vector Insert Doubleword from GPR using immediate-specified index VX-form","vinsd VRT,RB,UIM","4@0|VRT@6|/@11|UIM@12|RB@16|463@21|","v3.1" +"Vector Insert Doubleword from GPR using GPR-specified Left-Index VX-form","vinsdlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|719@21|","v3.1" +"Vector Insert Doubleword from GPR using GPR-specified Right-Index VX-form","vinsdrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|975@21|","v3.1" +"Vector Insert Halfword from GPR using GPR-specified Left-Index VX-form","vinshlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|591@21|","v3.1" +"Vector Insert Halfword from GPR using GPR-specified Right-Index VX-form","vinshrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|847@21|","v3.1" +"Vector Insert Halfword from VSR using GPR-specified Left-Index VX-form","vinshvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|79@21|","v3.1" +"Vector Insert Halfword from VSR using GPR-specified Right-Index VX-form","vinshvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|335@21|","v3.1" +"Vector Insert Word from GPR using immediate-specified index VX-form","vinsw VRT,RB,UIM","4@0|VRT@6|/@11|UIM@12|RB@16|207@21|","v3.1" +"Vector Insert Word from GPR using GPR-specified Left-Index VX-form","vinswlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|655@21|","v3.1" +"Vector Insert Word from GPR using GPR-specified Right-Index VX-form","vinswrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|911@21|","v3.1" +"Vector Insert Word from VSR using GPR-specified Left-Index VX-form","vinswvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|143@21|","v3.1" +"Vector Insert Word from VSR using GPR-specified Left-Index VX-form","vinswvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|399@21|","v3.1" +"Vector Modulo Signed Doubleword VX-form","vmodsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1995@21|","v3.1" +"Vector Modulo Signed Quadword VX-form","vmodsq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1803@21|","v3.1" +"Vector Modulo Signed Word VX-form","vmodsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1931@21|","v3.1" +"Vector Modulo Unsigned Doubleword VX-form","vmodud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1739@21|","v3.1" +"Vector Modulo Unsigned Quadword VX-form","vmoduq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1547@21|","v3.1" +"Vector Modulo Unsigned Word VX-form","vmoduw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1675@21|","v3.1" +"Vector Multiply-Sum & write Carry-out Unsigned Doubleword VA-form","vmsumcud VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|23@26|","v3.1" +"Vector Multiply Even Signed Doubleword VX-form","vmulesd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|968@21|","v3.1" +"Vector Multiply Even Unsigned Doubleword VX-form","vmuleud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|712@21|","v3.1" +"Vector Multiply High Signed Doubleword VX-form","vmulhsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|969@21|","v3.1" +"Vector Multiply High Signed Word VX-form","vmulhsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|905@21|","v3.1" +"Vector Multiply High Unsigned Doubleword VX-form","vmulhud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|713@21|","v3.1" +"Vector Multiply High Unsigned Word VX-form","vmulhuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|649@21|","v3.1" +"Vector Multiply Low Doubleword VX-form","vmulld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|457@21|","v3.1" +"Vector Multiply Odd Signed Doubleword VX-form","vmulosd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|456@21|","v3.1" +"Vector Multiply Odd Unsigned Doubleword VX-form","vmuloud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|200@21|","v3.1" +"Vector Parallel Bits Deposit Doubleword VX-form","vpdepd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1485@21|","v3.1" +"Vector Parallel Bits Extract Doubleword VX-form","vpextd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1421@21|","v3.1" +"Vector Rotate Left Quadword VX-form","vrlq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|5@21|","v3.1" +"Vector Rotate Left Quadword then Mask Insert VX-form","vrlqmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|69@21|","v3.1" +"Vector Rotate Left Quadword then AND with Mask VX-form","vrlqnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|325@21|","v3.1" +"Vector Shift Left Double by Bit Immediate VN-form","vsldbi VRT,VRA,VRB,SH","4@0|VRT@6|VRA@11|VRB@16|0@21|SH@23|22@26|","v3.1" +"Vector Shift Left Quadword VX-form","vslq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|261@21|","v3.1" +"Vector Shift Right Algebraic Quadword VX-form","vsraq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|773@21|","v3.1" +"Vector Shift Right Double by Bit Immediate VN-form","vsrdbi VRT,VRA,VRB,SH","4@0|VRT@6|VRA@11|VRB@16|1@21|SH@23|22@26|","v3.1" +"Vector Shift Right Quadword VX-form","vsrq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|517@21|","v3.1" +"Vector String Isolate Byte Left-justified VX-form","vstribl VRT,VRB (Rc=0)|vstribl. VRT,VRB (Rc=1)","4@0|VRT@6|0@11|VRB@16|Rc@21|13@22|","v3.1" +"Vector String Isolate Byte Right-justified VX-form","vstribr VRT,VRB (Rc=0)|vstribr. VRT,VRB (Rc=1)","4@0|VRT@6|1@11|VRB@16|Rc@21|13@22|","v3.1" +"Vector String Isolate Halfword Left-justified VX-form","vstrihl VRT,VRB (Rc=0)|vstrihl. VRT,VRB (Rc=1)","4@0|VRT@6|2@11|VRB@16|Rc@21|13@22|","v3.1" +"Vector String Isolate Halfword Right-justified VX-form","vstrihr VRT,VRB (Rc=0)|vstrihr. VRT,VRB (Rc=1)","4@0|VRT@6|3@11|VRB@16|Rc@21|13@22|","v3.1" +"VSX Scalar Compare Equal Quad-Precision X-form","xscmpeqqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|68@21|/@31|","v3.1" +"VSX Scalar Compare Greater Than or Equal Quad-Precision X-form","xscmpgeqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|196@21|/@31|","v3.1" +"VSX Scalar Compare Greater Than Quad-Precision X-form","xscmpgtqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|228@21|/@31|","v3.1" +"VSX Scalar Convert with round to zero Quad-Precision to Signed Quadword X-form","xscvqpsqz VRT,VRB","63@0|VRT@6|8@11|VRB@16|836@21|/@31|","v3.1" +"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Quadword X-form","xscvqpuqz VRT,VRB","63@0|VRT@6|0@11|VRB@16|836@21|/@31|","v3.1" +"VSX Scalar Convert with round Signed Quadword to Quad-Precision X-form","xscvsqqp VRT,VRB","63@0|VRT@6|11@11|VRB@16|836@21|/@31|","v3.1" +"VSX Scalar Convert with round Unsigned Quadword to Quad-Precision X-form","xscvuqqp VRT,VRB","63@0|VRT@6|3@11|VRB@16|836@21|/@31|","v3.1" +"VSX Scalar Maximum Type-C Quad-Precision X-form","xsmaxcqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|676@21|/@31|","v3.1" +"VSX Scalar Minimum Type-C Quad-Precision X-form","xsmincqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|740@21|/@31|","v3.1" +"VSX Vector bfloat16 GER (Rank-2 Update) XX3-form","xvbf16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|51@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative accumulate XX3-form","xvbf16ger2nn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|242@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive accumulate XX3-form","xvbf16ger2np AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|114@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative accumulate XX3-form","xvbf16ger2pn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|178@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate XX3-form","xvbf16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|50@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector Convert bfloat16 to Single-Precision format XX2-form","xvcvbf16spn XT,XB","60@0|T@6|16@11|B@16|475@21|BX@30|TX@31|","v3.1" +"VSX Vector Convert with round Single-Precision to bfloat16 format XX2-form","xvcvspbf16 XT,XB","60@0|T@6|17@11|B@16|475@21|BX@30|TX@31|","v3.1" +"VSX Vector 16-bit Floating-Point GER (rank-2 update) XX3-form","xvf16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|19@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate XX3-form","xvf16ger2nn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|210@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Positive accumulate XX3-form","xvf16ger2np AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|82@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Negative accumulate XX3-form","xvf16ger2pn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|146@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Positive accumulate XX3-form","xvf16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|18@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 32-bit Floating-Point GER (rank-1 update) XX3-form","xvf32ger AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|27@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate XX3-form","xvf32gernn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|218@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate XX3-form","xvf32gernp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|90@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate XX3-form","xvf32gerpn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|154@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate XX3-form","xvf32gerpp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|26@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 64-bit Floating-Point GER (rank-1 update) XX3-form","xvf64ger AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|59@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate XX3-form","xvf64gernn AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|250@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate XX3-form","xvf64gernp AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|122@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate XX3-form","xvf64gerpn AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|186@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate XX3-form","xvf64gerpp AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|58@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 16-bit Signed Integer GER (rank-2 update) XX3-form","xvi16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|75@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate XX3-form","xvi16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|107@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation XX3-form","xvi16ger2s AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate XX3-form","xvi16ger2spp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|42@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 4-bit Signed Integer GER (rank-8 update) XX3-form","xvi4ger8 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate XX3-form","xvi4ger8pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|34@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) XX3-form","xvi8ger4 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|3@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate XX3-form","xvi8ger4pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|2@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate XX3-form","xvi8ger4spp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|99@21|AX@29|BX@30|/@31|","v3.1" +"VSX Vector Test Least-Significant Bit by Byte XX2-form","xvtlsbb BF,XB","60@0|BF@6|//@9|2@11|B@16|475@21|BX@30|/@31|","v3.1" +"VSX Vector Blend Variable Byte 8RR:XX4-form","xxblendvb XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|0@26|CX@28|AX@29|BX@30|TX@31|","v3.1" +"VSX Vector Blend Variable Doubleword 8RR:XX4-form","xxblendvd XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|3@26|CX@28|AX@29|BX@30|TX@31|","v3.1" +"VSX Vector Blend Variable Halfword 8RR:XX4-form","xxblendvh XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|1@26|CX@28|AX@29|BX@30|TX@31|","v3.1" +"VSX Vector Blend Variable Word 8RR:XX4-form","xxblendvw XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|2@26|CX@28|AX@29|BX@30|TX@31|","v3.1" +"VSX Vector Evaluate 8RR-XX4-form","xxeval XT,XA,XB,XC,IMM",",1@0|1@6|0@8|//@12|///@14|IMM@24|,34@0|T@6|A@11|B@16|C@21|1@26|CX@28|AX@29|BX@30|TX@31|","v3.1" +"VSX Vector Generate PCV from Byte Mask X-form","xxgenpcvbm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|916@21|TX@31|","v3.1" +"VSX Vector Generate PCV from Doubleword Mask X-form","xxgenpcvdm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|949@21|TX@31|","v3.1" +"VSX Vector Generate PCV from Halfword Mask X-form","xxgenpcvhm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|917@21|TX@31|","v3.1" +"VSX Vector Generate PCV from Word Mask X-form","xxgenpcvwm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|948@21|TX@31|","v3.1" +"VSX Move From Accumulator X-form","xxmfacc AS","31@0|AS@6|//@9|0@11|///@16|177@21|/@31|","v3.1" +"VSX Move To Accumulator X-form","xxmtacc AT","31@0|AT@6|//@9|1@11|///@16|177@21|/@31|","v3.1" +"VSX Vector Permute Extended 8RR:XX4-form","xxpermx XT,XA,XB,XC,UIM",",1@0|1@6|0@8|//@12|///@14|UIM@29|,34@0|T@6|A@11|B@16|C@21|0@26|CX@28|AX@29|BX@30|TX@31|","v3.1" +"VSX Set Accumulator to Zero X-form","xxsetaccz AT","31@0|AT@6|//@9|3@11|///@16|177@21|/@31|","v3.1" +"VSX Vector Splat Immediate32 Doubleword Indexed 8RR:D-form","xxsplti32dx XT,IX,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|0@11|IX@14|TX@15|imm1@16|","v3.1" +"VSX Vector Splat Immediate Double-Precision 8RR:D-form","xxspltidp XT,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|2@11|TX@15|imm1@16|","v3.1" +"VSX Vector Splat Immediate Word 8RR:D-form","xxspltiw XT,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|3@11|TX@15|imm1@16|","v3.1" +"Ultravisor Message Clear X-form","msgclru RB","31@0|///@6|///@11|RB@16|110@21|/@31|","v3.0C" +"Ultravisor Message SendX-form","msgsndu RB","31@0|///@6|///@11|RB@16|78@21|/@31|","v3.0C" +"Ultravisor Return From Interrupt Doubleword XL-form","urfid","19@0|///@6|///@11|///@16|306@21|/@31|","v3.0C" +"Add Extended using alternate carry bit Z23-form","addex RT,RA,RB,CY","31@0|RT@6|RA@11|RB@16|CY@21|170@23|/@31|","v3.0B" +"Move From FPSCR Control & Set DRN X-form","mffscdrn FRT,FRB","63@0|FRT@6|20@11|FRB@16|583@21|/@31|","v3.0B" +"Move From FPSCR Control & Set DRN Immediate X-form","mffscdrni FRT,DRM","63@0|FRT@6|21@11|//@16|DRM@18|583@21|/@31|","v3.0B" +"Move From FPSCR & Clear Enables X-form","mffsce FRT","63@0|FRT@6|1@11|///@16|583@21|/@31|","v3.0B" +"Move From FPSCR Control & Set RN X-form","mffscrn FRT,FRB","63@0|FRT@6|22@11|FRB@16|583@21|/@31|","v3.0B" +"Move From FPSCR Control & Set RN Immediate X-form","mffscrni FRT,RM","63@0|FRT@6|23@11|///@16|RM@19|583@21|/@31|","v3.0B" +"Move From FPSCR Lightweight X-form","mffsl FRT","63@0|FRT@6|24@11|///@16|583@21|/@31|","v3.0B" +"SLB Invalidate All Global X-form","slbiag RS, L","31@0|RS@6|///@11|L@15|///@16|850@21|/@31|","v3.0B" +"Vector Multiply-Sum Unsigned Doubleword Modulo VA-form","vmsumudm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|35@26|","v3.0B" +"Add PC Immediate Shifted DX-form","addpcis RT,D","19@0|RT@6|d1@11|d0@16|2@26|d2@31|","v3.0" +"Decimal Convert From National VX-form","bcdcfn. VRT,VRB,PS","4@0|VRT@6|7@11|VRB@16|1@21|PS@22|385@23|","v3.0" +"Decimal Convert From Signed Quadword VX-form","bcdcfsq. VRT,VRB,PS","4@0|VRT@6|2@11|VRB@16|1@21|PS@22|385@23|","v3.0" +"Decimal Convert From Zoned VX-form","bcdcfz. VRT,VRB,PS","4@0|VRT@6|6@11|VRB@16|1@21|PS@22|385@23|","v3.0" +"Decimal Copy Sign VX-form","bcdcpsgn. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|833@21|","v3.0" +"Decimal Convert To National VX-form","bcdctn. VRT,VRB","4@0|VRT@6|5@11|VRB@16|1@21|/@22|385@23|","v3.0" +"Decimal Convert To Signed Quadword VX-form","bcdctsq. VRT,VRB","4@0|VRT@6|0@11|VRB@16|1@21|/@22|385@23|","v3.0" +"Decimal Convert To Zoned VX-form","bcdctz. VRT,VRB,PS","4@0|VRT@6|4@11|VRB@16|1@21|PS@22|385@23|","v3.0" +"Decimal Shift VX-form","bcds. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|193@23|","v3.0" +"Decimal Set Sign VX-form","bcdsetsgn. VRT,VRB,PS","4@0|VRT@6|31@11|VRB@16|1@21|PS@22|385@23|","v3.0" +"Decimal Shift and Round VX-form","bcdsr. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|449@23|","v3.0" +"Decimal Truncate VX-form","bcdtrunc. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|257@23|","v3.0" +"Decimal Unsigned Shift VX-form","bcdus. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1@21|/@22|129@23|","v3.0" +"Decimal Unsigned Truncate VX-form","bcdutrunc. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1@21|/@22|321@23|","v3.0" +"Compare Equal Byte X-form","cmpeqb BF,RA,RB","31@0|BF@6|//@9|RA@11|RB@16|224@21|/@31|","v3.0" +"Compare Ranged Byte X-form","cmprb BF,L,RA,RB","31@0|BF@6|/@9|L@10|RA@11|RB@16|192@21|/@31|","v3.0" +"Count Trailing Zeros Doubleword X-form","cnttzd RA,RS (Rc=0)|cnttzd. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|570@21|Rc@31|","v3.0" +"Count Trailing Zeros Word X-form","cnttzw RA,RS (Rc=0)|cnttzw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|538@21|Rc@31|","v3.0" +"Copy X-form","copy RA,RB","31@0|///@6|1@10|RA@11|RB@16|774@21|/@31|","v3.0" +"Copy-Paste Abort X-form","cpabort","31@0|///@6|///@11|///@16|838@21|/@31|","v3.0" +"Deliver A Random Number X-form","darn RT,L","31@0|RT@6|///@11|L@14|///@16|755@21|/@31|","v3.0" +"DFP Test Significance Immediate X-form","dtstsfi BF,UIM,FRB","59@0|BF@6|/@9|UIM@10|FRB@16|675@21|/@31|","v3.0" +"DFP Test Significance Immediate Quad X-form","dtstsfiq BF,UIM,FRBp","63@0|BF@6|/@9|UIM@10|FRBp@16|675@21|/@31|","v3.0" +"Extend Sign Word and Shift Left Immediate XS-form","extswsli RA,RS,SH (Rc=0)|extswsli. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|sh@16|445@21|sh@30|Rc@31|","v3.0" +"Load Doubleword ATomic X-form","ldat RT,RA,FC","31@0|RT@6|RA@11|FC@16|614@21|/@31|","v3.0" +"Load Word ATomic X-form","lwat RT,RA,FC","31@0|RT@6|RA@11|FC@16|582@21|/@31|","v3.0" +"Load VSX Scalar Doubleword DS-form","lxsd VRT,DS(RA)","57@0|VRT@6|RA@11|DS@16|2@30|","v3.0" +"Load VSX Scalar as Integer Byte & Zero Indexed X-form","lxsibzx XT,RA,RB","31@0|T@6|RA@11|RB@16|781@21|TX@31|","v3.0" +"Load VSX Scalar as Integer Halfword & Zero Indexed X-form","lxsihzx XT,RA,RB","31@0|T@6|RA@11|RB@16|813@21|TX@31|","v3.0" +"Load VSX Scalar Single-Precision DS-form","lxssp VRT,DS(RA)","57@0|VRT@6|RA@11|DS@16|3@30|","v3.0" +"Load VSX Vector DQ-form","lxv XT,DQ(RA)","61@0|T@6|RA@11|DQ@16|TX@28|1@29|","v3.0" +"Load VSX Vector Byte*16 Indexed X-form","lxvb16x XT,RA,RB","31@0|T@6|RA@11|RB@16|876@21|TX@31|","v3.0" +"Load VSX Vector Halfword*8 Indexed X-form","lxvh8x XT,RA,RB","31@0|T@6|RA@11|RB@16|812@21|TX@31|","v3.0" +"Load VSX Vector with Length X-form","lxvl XT,RA,RB","31@0|T@6|RA@11|RB@16|269@21|TX@31|","v3.0" +"Load VSX Vector with Length Left-justified X-form","lxvll XT,RA,RB","31@0|T@6|RA@11|RB@16|301@21|TX@31|","v3.0" +"Load VSX Vector Word & Splat Indexed X-form","lxvwsx XT,RA,RB","31@0|T@6|RA@11|RB@16|364@21|TX@31|","v3.0" +"Load VSX Vector Indexed X-form","lxvx XT,RA,RB","31@0|T@6|RA@11|RB@16|4@21|/@25|12@26|TX@31|","v3.0" +"Multiply-Add High Doubleword VA-form","maddhd RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|48@26|","v3.0" +"Multiply-Add High Doubleword Unsigned VA-form","maddhdu RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|49@26|","v3.0" +"Multiply-Add Low Doubleword VA-form","maddld RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|51@26|","v3.0" +"Move to CR from XER Extended X-form","mcrxrx BF","31@0|BF@6|//@9|///@11|///@16|576@21|/@31|","v3.0" +"Move From VSR Lower Doubleword X-form","mfvsrld RA,XS","31@0|S@6|RA@11|///@16|307@21|SX@31|","v3.0" +"Modulo Signed Doubleword X-form","modsd RT,RA,RB","31@0|RT@6|RA@11|RB@16|777@21|/@31|","v3.0" +"Modulo Signed Word X-form","modsw RT,RA,RB","31@0|RT@6|RA@11|RB@16|779@21|/@31|","v3.0" +"Modulo Unsigned Doubleword X-form","modud RT,RA,RB","31@0|RT@6|RA@11|RB@16|265@21|/@31|","v3.0" +"Modulo Unsigned Word X-form","moduw RT,RA,RB","31@0|RT@6|RA@11|RB@16|267@21|/@31|","v3.0" +"Message Synchronize X-form","msgsync","31@0|///@6|///@11|///@16|886@21|/@31|","v3.0" +"Move To VSR Double Doubleword X-form","mtvsrdd XT,RA,RB","31@0|T@6|RA@11|RB@16|435@21|TX@31|","v3.0" +"Move To VSR Word & Splat X-form","mtvsrws XT,RA","31@0|T@6|RA@11|///@16|403@21|TX@31|","v3.0" +"Paste X-form","paste. RA,RB,L","31@0|///@6|L@10|RA@11|RB@16|902@21|1@31|","v3.0" +"Set Boolean X-form","setb RT,BFA","31@0|RT@6|BFA@11|//@14|///@16|128@21|/@31|","v3.0" +"SLB Invalidate Entry Global X-form","slbieg RS,RB","31@0|RS@6|///@11|RB@16|466@21|/@31|","v3.0" +"SLB Synchronize X-form","slbsync","31@0|///@6|///@11|///@16|338@21|/@31|","v3.0" +"Store Doubleword ATomic X-form","stdat RS,RA,FC","31@0|RS@6|RA@11|FC@16|742@21|/@31|","v3.0" +"Stop XL-form","stop","19@0|///@6|///@11|///@16|370@21|/@31|","v3.0" +"Store Word ATomic X-form","stwat RS,RA,FC","31@0|RS@6|RA@11|FC@16|710@21|/@31|","v3.0" +"Store VSX Scalar Doubleword DS-form","stxsd VRS,DS(RA)","61@0|VRS@6|RA@11|DS@16|2@30|","v3.0" +"Store VSX Scalar as Integer Byte Indexed X-form","stxsibx XS,RA,RB","31@0|S@6|RA@11|RB@16|909@21|SX@31|","v3.0" +"Store VSX Scalar as Integer Halfword Indexed X-form","stxsihx XS,RA,RB","31@0|S@6|RA@11|RB@16|941@21|SX@31|","v3.0" +"Store VSX Scalar Single DS-form","stxssp VRS,DS(RA)","61@0|VRS@6|RA@11|DS@16|3@30|","v3.0" +"Store VSX Vector DQ-form","stxv XS,DQ(RA)","61@0|S@6|RA@11|DQ@16|SX@28|5@29|","v3.0" +"Store VSX Vector Byte*16 Indexed X-form","stxvb16x XS,RA,RB","31@0|S@6|RA@11|RB@16|1004@21|SX@31|","v3.0" +"Store VSX Vector Halfword*8 Indexed X-form","stxvh8x XS,RA,RB","31@0|S@6|RA@11|RB@16|940@21|SX@31|","v3.0" +"Store VSX Vector with Length X-form","stxvl XS,RA,RB","31@0|S@6|RA@11|RB@16|397@21|SX@31|","v3.0" +"Store VSX Vector with Length Left-justified X-form","stxvll XS,RA,RB","31@0|S@6|RA@11|RB@16|429@21|SX@31|","v3.0" +"Store VSX Vector Indexed X-form","stxvx XS,RA,RB","31@0|S@6|RA@11|RB@16|396@21|SX@31|","v3.0" +"Vector Absolute Difference Unsigned Byte VX-form","vabsdub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1027@21|","v3.0" +"Vector Absolute Difference Unsigned Halfword VX-form","vabsduh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1091@21|","v3.0" +"Vector Absolute Difference Unsigned Word VX-form","vabsduw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1155@21|","v3.0" +"Vector Bit Permute Doubleword VX-form","vbpermd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1484@21|","v3.0" +"Vector Count Leading Zero Least-Significant Bits Byte VX-form","vclzlsbb RT,VRB","4@0|RT@6|0@11|VRB@16|1538@21|","v3.0" +"Vector Compare Not Equal Byte VC-form","vcmpneb VRT,VRA,VRB (Rc=0)|vcmpneb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|7@22|","v3.0" +"Vector Compare Not Equal Halfword VC-form","vcmpneh VRT,VRA,VRB (Rc=0)|vcmpneh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|71@22|","v3.0" +"Vector Compare Not Equal Word VC-form","vcmpnew VRT,VRA,VRB (Rc=0)|vcmpnew. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|135@22|","v3.0" +"Vector Compare Not Equal or Zero Byte VC-form","vcmpnezb VRT,VRA,VRB (Rc=0)|vcmpnezb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|263@22|","v3.0" +"Vector Compare Not Equal or Zero Halfword VC-form","vcmpnezh VRT,VRA,VRB (Rc=0)|vcmpnezh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|327@22|","v3.0" +"Vector Compare Not Equal or Zero Word VC-form","vcmpnezw VRT,VRA,VRB (Rc=0)|vcmpnezw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|391@22|","v3.0" +"Vector Count Trailing Zeros Byte VX-form","vctzb VRT,VRB","4@0|VRT@6|28@11|VRB@16|1538@21|","v3.0" +"Vector Count Trailing Zeros Doubleword VX-form","vctzd VRT,VRB","4@0|VRT@6|31@11|VRB@16|1538@21|","v3.0" +"Vector Count Trailing Zeros Halfword VX-form","vctzh VRT,VRB","4@0|VRT@6|29@11|VRB@16|1538@21|","v3.0" +"Vector Count Trailing Zero Least-Significant Bits Byte VX-form","vctzlsbb RT,VRB","4@0|RT@6|1@11|VRB@16|1538@21|","v3.0" +"Vector Count Trailing Zeros Word VX-form","vctzw VRT,VRB","4@0|VRT@6|30@11|VRB@16|1538@21|","v3.0" +"Vector Extract Doubleword to VSR using immediate-specified index VX-form","vextractd VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|717@21|","v3.0" +"Vector Extract Unsigned Byte to VSR using immediate-specified index VX-form","vextractub VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|525@21|","v3.0" +"Vector Extract Unsigned Halfword to VSR using immediate-specified index VX-form","vextractuh VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|589@21|","v3.0" +"Vector Extract Unsigned Word to VSR using immediate-specified index VX-form","vextractuw VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|653@21|","v3.0" +"Vector Extend Sign Byte To Doubleword VX-form","vextsb2d VRT,VRB","4@0|VRT@6|24@11|VRB@16|1538@21|","v3.0" +"Vector Extend Sign Byte To Word VX-form","vextsb2w VRT,VRB","4@0|VRT@6|16@11|VRB@16|1538@21|","v3.0" +"Vector Extend Sign Halfword To Doubleword VX-form","vextsh2d VRT,VRB","4@0|VRT@6|25@11|VRB@16|1538@21|","v3.0" +"Vector Extend Sign Halfword To Word VX-form","vextsh2w VRT,VRB","4@0|VRT@6|17@11|VRB@16|1538@21|","v3.0" +"Vector Extend Sign Word To Doubleword VX-form","vextsw2d VRT,VRB","4@0|VRT@6|26@11|VRB@16|1538@21|","v3.0" +"Vector Extract Unsigned Byte to GPR using GPR-specified Left-Index VX-form","vextublx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1549@21|","v3.0" +"Vector Extract Unsigned Byte to GPR using GPR-specified Right-Index VX-form","vextubrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1805@21|","v3.0" +"Vector Extract Unsigned Halfword to GPR using GPR-specified Left-Index VX-form","vextuhlx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1613@21|","v3.0" +"Vector Extract Unsigned Halfword to GPR using GPR-specified Right-Index VX-form","vextuhrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1869@21|","v3.0" +"Vector Extract Unsigned Word to GPR using GPR-specified Left-Index VX-form","vextuwlx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1677@21|","v3.0" +"Vector Extract Unsigned Word to GPR using GPR-specified Right-Index VX-form","vextuwrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1933@21|","v3.0" +"Vector Insert Byte from VSR using immediate-specified index VX-form","vinsertb VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|781@21|","v3.0" +"Vector Insert Doubleword from VSR using immediate-specified index VX-form","vinsertd VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|973@21|","v3.0" +"Vector Insert Halfword from VSR using immediate-specified index VX-form","vinserth VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|845@21|","v3.0" +"Vector Insert Word from VSR using immediate-specified index VX-form","vinsertw VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|909@21|","v3.0" +"Vector Multiply-by-10 & write Carry-out Unsigned Quadword VX-form","vmul10cuq VRT,VRA","4@0|VRT@6|VRA@11|///@16|1@21|","v3.0" +"Vector Multiply-by-10 Extended & write Carry-out Unsigned Quadword VX-form","vmul10ecuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|65@21|","v3.0" +"Vector Multiply-by-10 Extended Unsigned Quadword VX-form","vmul10euq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|577@21|","v3.0" +"Vector Multiply-by-10 Unsigned Quadword VX-form","vmul10uq VRT,VRA","4@0|VRT@6|VRA@11|///@16|513@21|","v3.0" +"Vector Negate Doubleword VX-form","vnegd VRT,VRB","4@0|VRT@6|7@11|VRB@16|1538@21|","v3.0" +"Vector Negate Word VX-form","vnegw VRT,VRB","4@0|VRT@6|6@11|VRB@16|1538@21|","v3.0" +"Vector Permute Right-indexed VA-form","vpermr VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|59@26|","v3.0" +"Vector Parity Byte Doubleword VX-form","vprtybd VRT,VRB","4@0|VRT@6|9@11|VRB@16|1538@21|","v3.0" +"Vector Parity Byte Quadword VX-form","vprtybq VRT,VRB","4@0|VRT@6|10@11|VRB@16|1538@21|","v3.0" +"Vector Parity Byte Word VX-form","vprtybw VRT,VRB","4@0|VRT@6|8@11|VRB@16|1538@21|","v3.0" +"Vector Rotate Left Doubleword then Mask Insert VX-form","vrldmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|197@21|","v3.0" +"Vector Rotate Left Doubleword then AND with Mask VX-form","vrldnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|453@21|","v3.0" +"Vector Rotate Left Word then Mask Insert VX-form","vrlwmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|133@21|","v3.0" +"Vector Rotate Left Word then AND with Mask VX-form","vrlwnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|389@21|","v3.0" +"Vector Shift Left Variable VX-form","vslv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1860@21|","v3.0" +"Vector Shift Right Variable VX-form","vsrv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1796@21|","v3.0" +"Wait X-form","wait WC,PL","31@0|??@6|/@8|WC@9|///@11|PL@14|///@16|30@21|/@31|","v3.0" +"VSX Scalar Absolute Quad-Precision X-form","xsabsqp VRT,VRB","63@0|VRT@6|0@11|VRB@16|804@21|/@31|","v3.0" +"VSX Scalar Add Quad-Precision [using round to Odd] X-form","xsaddqp VRT,VRA,VRB (RO=0)|xsaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|4@21|RO@31|","v3.0" +"VSX Scalar Compare Equal Double-Precision XX3-form","xscmpeqdp XT,XA,XB","60@0|T@6|A@11|B@16|3@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Scalar Compare Exponents Double-Precision XX3-form","xscmpexpdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|59@21|AX@29|BX@30|/@31|","v3.0" +"VSX Scalar Compare Exponents Quad-Precision X-form","xscmpexpqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|164@21|/@31|","v3.0" +"VSX Scalar Compare Greater Than or Equal Double-Precision XX3-form","xscmpgedp XT,XA,XB","60@0|T@6|A@11|B@16|19@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Scalar Compare Greater Than Double-Precision XX3-form","xscmpgtdp XT,XA,XB","60@0|T@6|A@11|B@16|11@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Scalar Compare Ordered Quad-Precision X-form","xscmpoqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|132@21|/@31|","v3.0" +"VSX Scalar Compare Unordered Quad-Precision X-form","xscmpuqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|644@21|/@31|","v3.0" +"VSX Scalar Copy Sign Quad-Precision X-form","xscpsgnqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|100@21|/@31|","v3.0" +"VSX Scalar Convert with round Double-Precision to Half-Precision format XX2-form","xscvdphp XT,XB","60@0|T@6|17@11|B@16|347@21|BX@30|TX@31|","v3.0" +"VSX Scalar Convert Double-Precision to Quad-Precision format X-form","xscvdpqp VRT,VRB","63@0|VRT@6|22@11|VRB@16|836@21|/@31|","v3.0" +"VSX Scalar Convert Half-Precision to Double-Precision format XX2-form","xscvhpdp XT,XB","60@0|T@6|16@11|B@16|347@21|BX@30|TX@31|","v3.0" +"VSX Scalar Convert with round Quad-Precision to Double-Precision format [using round to Odd] X-form","xscvqpdp VRT,VRB (RO=0)|xscvqpdpo VRT,VRB (RO=1)","63@0|VRT@6|20@11|VRB@16|836@21|RO@31|","v3.0" +"VSX Scalar Convert with round to zero Quad-Precision to Signed Doubleword format X-form","xscvqpsdz VRT,VRB","63@0|VRT@6|25@11|VRB@16|836@21|/@31|","v3.0" +"VSX Scalar Convert with round to zero Quad-Precision to Signed Word format X-form","xscvqpswz VRT,VRB","63@0|VRT@6|9@11|VRB@16|836@21|/@31|","v3.0" +"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Doubleword format X-form","xscvqpudz VRT,VRB","63@0|VRT@6|17@11|VRB@16|836@21|/@31|","v3.0" +"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Word format X-form","xscvqpuwz VRT,VRB","63@0|VRT@6|1@11|VRB@16|836@21|/@31|","v3.0" +"VSX Scalar Convert Signed Doubleword to Quad-Precision format X-form","xscvsdqp VRT,VRB","63@0|VRT@6|10@11|VRB@16|836@21|/@31|","v3.0" +"VSX Scalar Convert Unsigned Doubleword to Quad-Precision format X-form","xscvudqp VRT,VRB","63@0|VRT@6|2@11|VRB@16|836@21|/@31|","v3.0" +"VSX Scalar Divide Quad-Precision [using round to Odd] X-form","xsdivqp VRT,VRA,VRB (RO=0)|xsdivqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|548@21|RO@31|","v3.0" +"VSX Scalar Insert Exponent Double-Precision X-form","xsiexpdp XT,RA,RB","60@0|T@6|RA@11|RB@16|918@21|TX@31|","v3.0" +"VSX Scalar Insert Exponent Quad-Precision X-form","xsiexpqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|868@21|/@31|","v3.0" +"VSX Scalar Multiply-Add Quad-Precision [using round to Odd] X-form","xsmaddqp VRT,VRA,VRB (RO=0)|xsmaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|388@21|RO@31|","v3.0" +"VSX Scalar Maximum Type-C Double-Precision XX3-form","xsmaxcdp XT,XA,XB","60@0|T@6|A@11|B@16|128@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Scalar Maximum Type-J Double-Precision XX3-form","xsmaxjdp XT,XA,XB","60@0|T@6|A@11|B@16|144@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Scalar Minimum Type-C Double-Precision XX3-form","xsmincdp XT,XA,XB","60@0|T@6|A@11|B@16|136@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Scalar Minimum Type-J Double-Precision XX3-form","xsminjdp XT,XA,XB","60@0|T@6|A@11|B@16|152@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Scalar Multiply-Subtract Quad-Precision [using round to Odd] X-form","xsmsubqp VRT,VRA,VRB (RO=0)|xsmsubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|420@21|RO@31|","v3.0" +"VSX Scalar Multiply Quad-Precision [using round to Odd] X-form","xsmulqp VRT,VRA,VRB (RO=0)|xsmulqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|36@21|RO@31|","v3.0" +"VSX Scalar Negative Absolute Quad-Precision X-form","xsnabsqp VRT,VRB","63@0|VRT@6|8@11|VRB@16|804@21|TX@31|","v3.0" +"VSX Scalar Negate Quad-Precision X-form","xsnegqp VRT,VRB","63@0|VRT@6|16@11|VRB@16|804@21|/@31|","v3.0" +"VSX Scalar Negative Multiply-Add Quad-Precision [using round to Odd] X-form","xsnmaddqp VRT,VRA,VRB (RO=0)|xsnmaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|452@21|RO@31|","v3.0" +"VSX Scalar Negative Multiply-Subtract Quad-Precision [using round to Odd] X-form","xsnmsubqp VRT,VRA,VRB (RO=0)|xsnmsubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|484@21|RO@31|","v3.0" +"VSX Scalar Round to Quad-Precision Integer [with Inexact] Z23-form","xsrqpi R,VRT,VRB,RMC (EX=0)|xsrqpix R,VRT,VRB,RMC (EX=1)","63@0|VRT@6|///@11|R@15|VRB@16|RMC@21|5@23|EX@31|","v3.0" +"VSX Scalar Round Quad-Precision to Double-Extended Precision Z23-form","xsrqpxp R,VRT,VRB,RMC","63@0|VRT@6|///@11|R@15|VRB@16|RMC@21|37@23|/@31|","v3.0" +"VSX Scalar Square Root Quad-Precision [using round to Odd] X-form","xssqrtqp VRT,VRB (RO=0)|xssqrtqpo VRT,VRB (RO=1)","63@0|VRT@6|27@11|VRB@16|804@21|RO@31|","v3.0" +"VSX Scalar Subtract Quad-Precision [using round to Odd] X-form","xssubqp VRT,VRA,VRB (RO=0)|xssubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|516@21|RO@31|","v3.0" +"VSX Scalar Test Data Class Double-Precision XX2-form","xststdcdp BF,XB,DCMX","60@0|BF@6|DCMX@9|B@16|362@21|BX@30|/@31|","v3.0" +"VSX Scalar Test Data Class Quad-Precision X-form","xststdcqp BF,VRB,DCMX","63@0|BF@6|DCMX@9|VRB@16|708@21|/@31|","v3.0" +"VSX Scalar Test Data Class Single-Precision XX2-form","xststdcsp BF,XB,DCMX","60@0|BF@6|DCMX@9|B@16|298@21|BX@30|/@31|","v3.0" +"VSX Scalar Extract Exponent Double-Precision XX2-form","xsxexpdp RT,XB","60@0|RT@6|0@11|B@16|347@21|BX@30|/@31|","v3.0" +"VSX Scalar Extract Exponent Quad-Precision X-form","xsxexpqp VRT,VRB","63@0|VRT@6|2@11|VRB@16|804@21|/@31|","v3.0" +"VSX Scalar Extract Significand Double-Precision XX2-form","xsxsigdp RT,XB","60@0|RT@6|1@11|B@16|347@21|BX@30|/@31|","v3.0" +"VSX Scalar Extract Significand Quad-Precision X-form","xsxsigqp VRT,VRB","63@0|VRT@6|18@11|VRB@16|804@21|/@31|","v3.0" +"VSX Vector Convert Half-Precision to Single-Precision format XX2-form","xvcvhpsp XT,XB","60@0|T@6|24@11|B@16|475@21|BX@30|TX@31|","v3.0" +"VSX Vector Convert with round Single-Precision to Half-Precision format XX2-form","xvcvsphp XT,XB","60@0|T@6|25@11|B@16|475@21|BX@30|TX@31|","v3.0" +"VSX Vector Insert Exponent Double-Precision XX3-form","xviexpdp XT,XA,XB","60@0|T@6|A@11|B@16|248@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Vector Insert Exponent Single-Precision XX3-form","xviexpsp XT,XA,XB","60@0|T@6|A@11|B@16|216@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Vector Test Data Class Double-Precision XX2-form","xvtstdcdp XT,XB,DCMX","60@0|T@6|dx@11|B@16|15@21|dc@25|5@26|dm@29|BX@30|TX@31|","v3.0" +"VSX Vector Test Data Class Single-Precision XX2-form","xvtstdcsp XT,XB,DCMX","60@0|T@6|dx@11|B@16|13@21|dc@25|5@26|dm@29|BX@30|TX@31|","v3.0" +"VSX Vector Extract Exponent Double-Precision XX2-form","xvxexpdp XT,XB","60@0|T@6|0@11|B@16|475@21|BX@30|TX@31|","v3.0" +"VSX Vector Extract Exponent Single-Precision XX2-form","xvxexpsp XT,XB","60@0|T@6|8@11|B@16|475@21|BX@30|TX@31|","v3.0" +"VSX Vector Extract Significand Double-Precision XX2-form","xvxsigdp XT,XB","60@0|T@6|1@11|B@16|475@21|BX@30|TX@31|","v3.0" +"VSX Vector Extract Significand Single-Precision XX2-form","xvxsigsp XT,XB","60@0|T@6|9@11|B@16|475@21|BX@30|TX@31|","v3.0" +"VSX Vector Byte-Reverse Doubleword XX2-form","xxbrd XT,XB","60@0|T@6|23@11|B@16|475@21|BX@30|TX@31|","v3.0" +"VSX Vector Byte-Reverse Halfword XX2-form","xxbrh XT,XB","60@0|T@6|7@11|B@16|475@21|BX@30|TX@31|","v3.0" +"VSX Vector Byte-Reverse Quadword XX2-form","xxbrq XT,XB","60@0|T@6|31@11|B@16|475@21|BX@30|TX@31|","v3.0" +"VSX Vector Byte-Reverse Word XX2-form","xxbrw XT,XB","60@0|T@6|15@11|B@16|475@21|BX@30|TX@31|","v3.0" +"VSX Vector Extract Unsigned Word XX2-form","xxextractuw XT,XB,UIM","60@0|T@6|/@11|UIM@12|B@16|165@21|BX@30|TX@31|","v3.0" +"VSX Vector Insert Word XX2-form","xxinsertw XT,XB,UIM","60@0|T@6|/@11|UIM@12|B@16|181@21|BX@30|TX@31|","v3.0" +"VSX Vector Permute XX3-form","xxperm XT,XA,XB","60@0|T@6|A@11|B@16|26@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Vector Permute Right-indexed XX3-form","xxpermr XT,XA,XB","60@0|T@6|A@11|B@16|58@21|AX@29|BX@30|TX@31|","v3.0" +"VSX Vector Splat Immediate Byte X-form","xxspltib XT,IMM8","60@0|T@6|0@11|IMM8@13|360@21|TX@31|","v3.0" +"Decimal Add Modulo VX-form","bcdadd. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|1@23|","v2.07" +"Decimal Subtract Modulo VX-form","bcdsub. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|65@23|","v2.07" +"Branch Conditional to Branch Target Address Register XL-form","bctar BO,BI,BH (LK=0)|bctarl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|560@21|LK@31|","v2.07" +"Clear BHRB X-form","clrbhrb","31@0|///@6|///@11|///@16|430@21|/@31|","v2.07" +"Floating Merge Even Word X-form","fmrgew FRT,FRA,FRB","63@0|FRT@6|FRA@11|FRB@16|966@21|/@31|","v2.07" +"Floating Merge Odd Word X-form","fmrgow FRT,FRA,FRB","63@0|FRT@6|FRA@11|FRB@16|838@21|/@31|","v2.07" +"Instruction Cache Block Touch X-form","icbt CT, RA, RB","31@0|/@6|CT@7|RA@11|RB@16|22@21|/@31|","v2.07" +"Load Quadword And Reserve Indexed X-form","lqarx RTp,RA,RB,EH","31@0|RTp@6|RA@11|RB@16|276@21|EH@31|","v2.07" +"Load VSX Scalar as Integer Word Algebraic Indexed X-form","lxsiwax XT,RA,RB","31@0|T@6|RA@11|RB@16|76@21|TX@31|","v2.07" +"Load VSX Scalar as Integer Word & Zero Indexed X-form","lxsiwzx XT,RA,RB","31@0|T@6|RA@11|RB@16|12@21|TX@31|","v2.07" +"Load VSX Scalar Single-Precision Indexed X-form","lxsspx XT,RA,RB","31@0|T@6|RA@11|RB@16|524@21|TX@31|","v2.07" +"Move From BHRB XFX-form","mfbhrbe RT,BHRBE","31@0|RT@6|BHRBE@11|302@21|/@31|","v2.07" +"Move From VSR Doubleword X-form","mfvsrd RA,XS","31@0|S@6|RA@11|///@16|51@21|SX@31|","v2.07" +"Move From VSR Word and Zero X-form","mfvsrwz RA,XS","31@0|S@6|RA@11|///@16|115@21|SX@31|","v2.07" +"Message Clear X-form","msgclr RB","31@0|///@6|///@11|RB@16|238@21|/@31|","v2.07" +"Message Clear Privileged X-form","msgclrp RB","31@0|///@6|///@11|RB@16|174@21|/@31|","v2.07" +"Message Send X-form","msgsnd RB","31@0|///@6|///@11|RB@16|206@21|/@31|","v2.07" +"Message Send Privileged X-form","msgsndp RB","31@0|///@6|///@11|RB@16|142@21|/@31|","v2.07" +"Move To VSR Doubleword X-form","mtvsrd XT,RA","31@0|T@6|RA@11|///@16|179@21|TX@31|","v2.07" +"Move To VSR Word Algebraic X-form","mtvsrwa XT,RA","31@0|T@6|RA@11|///@16|211@21|TX@31|","v2.07" +"Move To VSR Word and Zero X-form","mtvsrwz XT,RA","31@0|T@6|RA@11|///@16|243@21|TX@31|","v2.07" +"Return from Event Based Branch XL-form","rfebb S","19@0|///@6|///@11|///@16|S@20|146@21|/@31|","v2.07" +"Store Quadword Conditional Indexed X-form","stqcx. RSp,RA,RB","31@0|RSp@6|RA@11|RB@16|182@21|1@31|","v2.07" +"Store VSX Scalar as Integer Word Indexed X-form","stxsiwx XS,RA,RB","31@0|S@6|RA@11|RB@16|140@21|SX@31|","v2.07" +"Store VSX Scalar Single-Precision Indexed X-form","stxsspx XS,RA,RB","31@0|S@6|RA@11|RB@16|652@21|SX@31|","v2.07" +"Vector Add & write Carry Unsigned Quadword VX-form","vaddcuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|320@21|","v2.07" +"Vector Add Extended & write Carry Unsigned Quadword VA-form","vaddecuq VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|61@26|","v2.07" +"Vector Add Extended Unsigned Quadword Modulo VA-form","vaddeuqm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|60@26|","v2.07" +"Vector Add Unsigned Doubleword Modulo VX-form","vaddudm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|192@21|","v2.07" +"Vector Add Unsigned Quadword Modulo VX-form","vadduqm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|256@21|","v2.07" +"Vector Bit Permute Quadword VX-form","vbpermq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1356@21|","v2.07" +"Vector AES Cipher VX-form","vcipher VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1288@21|","v2.07" +"Vector AES Cipher Last VX-form","vcipherlast VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1289@21|","v2.07" +"Vector Count Leading Zeros Byte VX-form","vclzb VRT,VRB","4@0|VRT@6|///@11|VRB@16|1794@21|","v2.07" +"Vector Count Leading Zeros Doubleword VX-form","vclzd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1986@21|","v2.07" +"Vector Count Leading Zeros Halfword VX-form","vclzh VRT,VRB","4@0|VRT@6|///@11|VRB@16|1858@21|","v2.07" +"Vector Count Leading Zeros Word VX-form","vclzw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1922@21|","v2.07" +"Vector Compare Equal Unsigned Doubleword VC-form","vcmpequd VRT,VRA,VRB (Rc=0)|vcmpequd. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|199@22|","v2.07" +"Vector Compare Greater Than Signed Doubleword VC-form","vcmpgtsd VRT,VRA,VRB (Rc=0)|vcmpgtsd. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|967@22|","v2.07" +"Vector Compare Greater Than Unsigned Doubleword VC-form","vcmpgtud VRT,VRA,VRB (Rc=0)|vcmpgtud. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|711@22|","v2.07" +"Vector Logical Equivalence VX-form","veqv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1668@21|","v2.07" +"Vector Gather Bits by Bytes by Doubleword VX-form","vgbbd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1292@21|","v2.07" +"Vector Maximum Signed Doubleword VX-form","vmaxsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|450@21|","v2.07" +"Vector Maximum Unsigned Doubleword VX-form","vmaxud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|194@21|","v2.07" +"Vector Minimum Signed Doubleword VX-form","vminsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|962@21|","v2.07" +"Vector Minimum Unsigned Doubleword VX-form","vminud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|706@21|","v2.07" +"Vector Merge Even Word VX-form","vmrgew VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1932@21|","v2.07" +"Vector Merge Odd Word VX-form","vmrgow VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1676@21|","v2.07" +"Vector Multiply Even Signed Word VX-form","vmulesw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|904@21|","v2.07" +"Vector Multiply Even Unsigned Word VX-form","vmuleuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|648@21|","v2.07" +"Vector Multiply Odd Signed Word VX-form","vmulosw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|392@21|","v2.07" +"Vector Multiply Odd Unsigned Word VX-form","vmulouw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|136@21|","v2.07" +"Vector Multiply Unsigned Word Modulo VX-form","vmuluwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|137@21|","v2.07" +"Vector Logical NAND VX-form","vnand VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1412@21|","v2.07" +"Vector AES Inverse Cipher VX-form","vncipher VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1352@21|","v2.07" +"Vector AES Inverse Cipher Last VX-form","vncipherlast VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1353@21|","v2.07" +"Vector Logical OR with Complement VX-form","vorc VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1348@21|","v2.07" +"Vector Permute & Exclusive-OR VA-form","vpermxor VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|45@26|","v2.07" +"Vector Pack Signed Doubleword Signed Saturate VX-form","vpksdss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1486@21|","v2.07" +"Vector Pack Signed Doubleword Unsigned Saturate VX-form","vpksdus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1358@21|","v2.07" +"Vector Pack Unsigned Doubleword Unsigned Modulo VX-form","vpkudum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1102@21|","v2.07" +"Vector Pack Unsigned Doubleword Unsigned Saturate VX-form","vpkudus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1230@21|","v2.07" +"Vector Polynomial Multiply-Sum Byte VX-form","vpmsumb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1032@21|","v2.07" +"Vector Polynomial Multiply-Sum Doubleword VX-form","vpmsumd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1224@21|","v2.07" +"Vector Polynomial Multiply-Sum Halfword VX-form","vpmsumh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1096@21|","v2.07" +"Vector Polynomial Multiply-Sum Word VX-form","vpmsumw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1160@21|","v2.07" +"Vector Population Count Byte VX-form","vpopcntb VRT,VRB","4@0|VRT@6|///@11|VRB@16|1795@21|","v2.07" +"Vector Population Count Doubleword VX-form","vpopcntd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1987@21|","v2.07" +"Vector Population Count Halfword VX-form","vpopcnth VRT,VRB","4@0|VRT@6|///@11|VRB@16|1859@21|","v2.07" +"Vector Population Count Word VX-form","vpopcntw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1923@21|","v2.07" +"Vector Rotate Left Doubleword VX-form","vrld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|196@21|","v2.07" +"Vector AES SubBytes VX-form","vsbox VRT,VRA","4@0|VRT@6|VRA@11|///@16|1480@21|","v2.07" +"Vector SHA-512 Sigma Doubleword VX-form","vshasigmad VRT,VRA,ST,SIX","4@0|VRT@6|VRA@11|ST@16|SIX@17|1730@21|","v2.07" +"Vector SHA-256 Sigma Word VX-form","vshasigmaw VRT,VRA,ST,SIX","4@0|VRT@6|VRA@11|ST@16|SIX@17|1666@21|","v2.07" +"Vector Shift Left Doubleword VX-form","vsld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1476@21|","v2.07" +"Vector Shift Right Algebraic Doubleword VX-form","vsrad VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|964@21|","v2.07" +"Vector Shift Right Doubleword VX-form","vsrd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1732@21|","v2.07" +"Vector Subtract & write Carry-out Unsigned Quadword VX-form","vsubcuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1344@21|","v2.07" +"Vector Subtract Extended & write Carry-out Unsigned Quadword VA-form","vsubecuq VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|63@26|","v2.07" +"Vector Subtract Extended Unsigned Quadword Modulo VA-form","vsubeuqm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|62@26|","v2.07" +"Vector Subtract Unsigned Doubleword Modulo VX-form","vsubudm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1216@21|","v2.07" +"Vector Subtract Unsigned Quadword Modulo VX-form","vsubuqm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1280@21|","v2.07" +"Vector Unpack High Signed Word VX-form","vupkhsw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1614@21|","v2.07" +"Vector Unpack Low Signed Word VX-form","vupklsw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1742@21|","v2.07" +"VSX Scalar Add Single-Precision XX3-form","xsaddsp XT,XA,XB","60@0|T@6|A@11|B@16|0@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Convert Scalar Single-Precision to Vector Single-Precision format Non-signalling XX2-form","xscvdpspn XT,XB","60@0|T@6|///@11|B@16|267@21|BX@30|TX@31|","v2.07" +"VSX Scalar Convert Single-Precision to Double-Precision format Non-signalling XX2-form","xscvspdpn XT,XB","60@0|T@6|///@11|B@16|331@21|BX@30|TX@31|","v2.07" +"VSX Scalar Convert with round Signed Doubleword to Single-Precision format XX2-form","xscvsxdsp XT,XB","60@0|T@6|///@11|B@16|312@21|BX@30|TX@31|","v2.07" +"VSX Scalar Convert with round Unsigned Doubleword to Single-Precision XX2-form","xscvuxdsp XT,XB","60@0|T@6|///@11|B@16|296@21|BX@30|TX@31|","v2.07" +"VSX Scalar Divide Single-Precision XX3-form","xsdivsp XT,XA,XB","60@0|T@6|A@11|B@16|24@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Multiply-Add Type-A Single-Precision XX3-form","xsmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|1@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Multiply-Add Type-M Single-Precision XX3-form","xsmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|9@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Multiply-Subtract Type-A Single-Precision XX3-form","xsmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|17@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Multiply-Subtract Type-M Single-Precision XX3-form","xsmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|25@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Multiply Single-Precision XX3-form","xsmulsp XT,XA,XB","60@0|T@6|A@11|B@16|16@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Negative Multiply-Add Type-A Single-Precision XX3-form","xsnmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|129@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Negative Multiply-Add Type-M Single-Precision XX3-form","xsnmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|137@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Negative Multiply-Subtract Type-A Single-Precision XX3-form","xsnmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|145@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Negative Multiply-Subtract Type-M Single-Precision XX3-form","xsnmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|153@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Scalar Reciprocal Estimate Single-Precision XX2-form","xsresp XT,XB","60@0|T@6|///@11|B@16|26@21|BX@30|TX@31|","v2.07" +"VSX Scalar Round to Single-Precision XX2-form","xsrsp XT,XB","60@0|T@6|///@11|B@16|281@21|BX@30|TX@31|","v2.07" +"VSX Scalar Reciprocal Square Root Estimate Single-Precision XX2-form","xsrsqrtesp XT,XB","60@0|T@6|///@11|B@16|10@21|BX@30|TX@31|","v2.07" +"VSX Scalar Square Root Single-Precision XX2-form","xssqrtsp XT,XB","60@0|T@6|///@11|B@16|11@21|BX@30|TX@31|","v2.07" +"VSX Scalar Subtract Single-Precision XX3-form","xssubsp XT,XA,XB","60@0|T@6|A@11|B@16|8@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Vector Logical Equivalence XX3-form","xxleqv XT,XA,XB","60@0|T@6|A@11|B@16|186@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Vector Logical NAND XX3-form","xxlnand XT,XA,XB","60@0|T@6|A@11|B@16|178@21|AX@29|BX@30|TX@31|","v2.07" +"VSX Vector Logical OR with Complement XX3-form","xxlorc XT,XA,XB","60@0|T@6|A@11|B@16|170@21|AX@29|BX@30|TX@31|","v2.07" +"Add and Generate Sixes XO-form","addg6s RT,RA,RB","31@0|RT@6|RA@11|RB@16|/@21|74@22|/@31|","v2.06" +"Bit Permute Doubleword X-form","bpermd RA,RS,RB","31@0|RS@6|RA@11|RB@16|252@21|/@31|","v2.06" +"Convert Binary Coded Decimal To Declets X-form","cbcdtd RA, RS","31@0|RS@6|RA@11|///@16|314@21|/@31|","v2.06" +"Convert Declets To Binary Coded Decimal X-form","cdtbcd RA, RS","31@0|RS@6|RA@11|///@16|282@21|/@31|","v2.06" +"DFP Convert From Fixed X-form","dcffix FRT,FRB (Rc=0)|dcffix. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|802@21|Rc@31|","v2.06" +"Divide Doubleword Extended XO-form","divde RT,RA,RB (OE=0 Rc=0)|divde. RT,RA,RB (OE=0 Rc=1)|divdeo RT,RA,RB (OE=1 Rc=0)|divdeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|425@22|Rc@31|","v2.06" +"Divide Doubleword Extended Unsigned XO-form","divdeu RT,RA,RB (OE=0 Rc=0)|divdeu. RT,RA,RB (OE=0 Rc=1)|divdeuo RT,RA,RB (OE=1 Rc=0)|divdeuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|393@22|Rc@31|","v2.06" +"Divide Word Extended XO-form","divwe RT,RA,RB (OE=0 Rc=0)|divwe. RT,RA,RB (OE=0 Rc=1)|divweo RT,RA,RB (OE=1 Rc=0)|divweo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|427@22|Rc@31|","v2.06" +"Divide Word Extended Unsigned XO-form","divweu RT,RA,RB (OE=0 Rc=0)|divweu. RT,RA,RB (OE=0 Rc=1)|divweuo RT,RA,RB (OE=1 Rc=0)|divweuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|395@22|Rc@31|","v2.06" +"Floating Convert with round Signed Doubleword to Single-Precision format X-form","fcfids FRT,FRB (Rc=0)|fcfids. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|846@21|Rc@31|","v2.06" +"Floating Convert with round Unsigned Doubleword to Double-Precision format X-form","fcfidu FRT,FRB (Rc=0)|fcfidu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|974@21|Rc@31|","v2.06" +"Floating Convert with round Unsigned Doubleword to Single-Precision format X-form","fcfidus FRT,FRB (Rc=0)|fcfidus. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|974@21|Rc@31|","v2.06" +"Floating Convert with round Double-Precision To Unsigned Doubleword format X-form","fctidu FRT,FRB (Rc=0)|fctidu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|942@21|Rc@31|","v2.06" +"Floating Convert with truncate Double-Precision To Unsigned Doubleword format X-form","fctiduz FRT,FRB (Rc=0)|fctiduz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|943@21|Rc@31|","v2.06" +"Floating Convert with round Double-Precision To Unsigned Word format X-form","fctiwu FRT,FRB (Rc=0)|fctiwu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|142@21|Rc@31|","v2.06" +"Floating Convert with truncate Double-Precision To Unsigned Word format X-form","fctiwuz FRT,FRB (Rc=0)|fctiwuz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|143@21|Rc@31|","v2.06" +"Floating Test for software Divide X-form","ftdiv BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|128@21|/@31|","v2.06" +"Floating Test for software Square Root X-form","ftsqrt BF,FRB","63@0|BF@6|//@9|///@11|FRB@16|160@21|/@31|","v2.06" +"Load Byte And Reserve Indexed X-form","lbarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|52@21|EH@31|","v2.06" +"Load Doubleword Byte-Reverse Indexed X-form","ldbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|532@21|/@31|","v2.06" +"Load Floating-Point as Integer Word & Zero Indexed X-form","lfiwzx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|887@21|/@31|","v2.06" +"Load Halfword And Reserve Indexed Xform","lharx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|116@21|EH@31|","v2.06" +"Load VSX Scalar Doubleword Indexed X-form","lxsdx XT,RA,RB","31@0|T@6|RA@11|RB@16|588@21|TX@31|","v2.06" +"Load VSX Vector Doubleword*2 Indexed X-form","lxvd2x XT,RA,RB","31@0|T@6|RA@11|RB@16|844@21|TX@31|","v2.06" +"Load VSX Vector Doubleword & Splat Indexed X-form","lxvdsx XT,RA,RB","31@0|T@6|RA@11|RB@16|332@21|TX@31|","v2.06" +"Load VSX Vector Word*4 Indexed X-form","lxvw4x XT,RA,RB","31@0|T@6|RA@11|RB@16|780@21|TX@31|","v2.06" +"Population Count Doubleword X-form","popcntd RA, RS","31@0|RS@6|RA@11|///@16|506@21|/@31|","v2.06" +"Population Count Words X-form","popcntw RA, RS","31@0|RS@6|RA@11|///@16|378@21|/@31|","v2.06" +"Store Byte Conditional Indexed X-form","stbcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|694@21|1@31|","v2.06" +"Store Doubleword Byte-Reverse Indexed X-form","stdbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|660@21|/@31|","v2.06" +"Store Halfword Conditional Indexed X-form","sthcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|726@21|1@31|","v2.06" +"Store VSX Scalar Doubleword Indexed X-form","stxsdx XS,RA,RB","31@0|S@6|RA@11|RB@16|716@21|SX@31|","v2.06" +"Store VSX Vector Doubleword*2 Indexed X-form","stxvd2x XS,RA,RB","31@0|S@6|RA@11|RB@16|972@21|SX@31|","v2.06" +"Store VSX Vector Word*4 Indexed X-form","stxvw4x XS,RA,RB","31@0|S@6|RA@11|RB@16|908@21|SX@31|","v2.06" +"VSX Scalar Absolute Double-Precision XX2-form","xsabsdp XT,XB","60@0|T@6|///@11|B@16|345@21|BX@30|TX@31|","v2.06" +"VSX Scalar Add Double-Precision XX3-form","xsadddp XT,XA,XB","60@0|T@6|A@11|B@16|32@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Compare Ordered Double-Precision XX3-form","xscmpodp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|","v2.06" +"VSX Scalar Compare Unordered Double-Precision XX3-form","xscmpudp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|","v2.06" +"VSX Scalar Copy Sign Double-Precision XX3-form","xscpsgndp XT,XA,XB","60@0|T@6|A@11|B@16|176@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Convert with round Double-Precision to Single-Precision format XX2-form","xscvdpsp XT,XB","60@0|T@6|///@11|B@16|265@21|BX@30|TX@31|","v2.06" +"VSX Scalar Convert with round to zero Double-Precision to Signed Doubleword format XX2-form","xscvdpsxds XT,XB","60@0|T@6|///@11|B@16|344@21|BX@30|TX@31|","v2.06" +"VSX Scalar Convert with round to zero Double-Precision to Signed Word format XX2-form","xscvdpsxws XT,XB","60@0|T@6|///@11|B@16|88@21|BX@30|TX@31|","v2.06" +"VSX Scalar Convert with round to zero Double-Precision to Unsigned Doubleword format XX2-form","xscvdpuxds XT,XB","60@0|T@6|///@11|B@16|328@21|BX@30|TX@31|","v2.06" +"VSX Scalar Convert with round to zero Double-Precision to Unsigned Word format XX2-form","xscvdpuxws XT,XB","60@0|T@6|///@11|B@16|72@21|BX@30|TX@31|","v2.06" +"VSX Scalar Convert Single-Precision to Double-Precision format XX2-form","xscvspdp XT,XB","60@0|T@6|///@11|B@16|329@21|BX@30|TX@31|","v2.06" +"VSX Scalar Convert with round Signed Doubleword to Double-Precision format XX2-form","xscvsxddp XT,XB","60@0|T@6|///@11|B@16|376@21|BX@30|TX@31|","v2.06" +"VSX Scalar Convert with round Unsigned Doubleword to Double-Precision format XX2-form","xscvuxddp XT,XB","60@0|T@6|///@11|B@16|360@21|BX@30|TX@31|","v2.06" +"VSX Scalar Divide Double-Precision XX3-form","xsdivdp XT,XA,XB","60@0|T@6|A@11|B@16|56@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Multiply-Add Type-A Double-Precision XX3-form","xsmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|33@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Multiply-Add Type-M Double-Precision XX3-form","xsmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|41@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Maximum Double-Precision XX3-form","xsmaxdp XT,XA,XB","60@0|T@6|A@11|B@16|160@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Minimum Double-Precision XX3-form","xsmindp XT,XA,XB","60@0|T@6|A@11|B@16|168@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Multiply-Subtract Type-A Double-Precision XX3-form","xsmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|49@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Multiply-Subtract Type-M Double-Precision XX3-form","xsmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|57@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Multiply Double-Precision XX3-form","xsmuldp XT,XA,XB","60@0|T@6|A@11|B@16|48@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Negative Absolute Double-Precision XX2-form","xsnabsdp XT,XB","60@0|T@6|///@11|B@16|361@21|BX@30|TX@31|","v2.06" +"VSX Scalar Negate Double-Precision XX2-form","xsnegdp XT,XB","60@0|T@6|///@11|B@16|377@21|BX@30|TX@31|","v2.06" +"VSX Scalar Negative Multiply-Add Type-A Double-Precision XX3-form","xsnmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|161@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Negative Multiply-Add Type-M Double-Precision XX3-form","xsnmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|169@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Negative Multiply-Subtract Type-A Double-Precision XX3-form","xsnmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|177@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Negative Multiply-Subtract Type-M Double-Precision XX3-form","xsnmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|185@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Round to Double-Precision Integer using round to Nearest Away XX2-form","xsrdpi XT,XB","60@0|T@6|///@11|B@16|73@21|BX@30|TX@31|","v2.06" +"VSX Scalar Round to Double-Precision Integer exact using Current rounding mode XX2-form","xsrdpic XT,XB","60@0|T@6|///@11|B@16|107@21|BX@30|TX@31|","v2.06" +"VSX Scalar Round to Double-Precision Integer using round toward -Infinity XX2-form","xsrdpim XT,XB","60@0|T@6|///@11|B@16|121@21|BX@30|TX@31|","v2.06" +"VSX Scalar Round to Double-Precision Integer using round toward +Infinity XX2-form","xsrdpip XT,XB","60@0|T@6|///@11|B@16|105@21|BX@30|TX@31|","v2.06" +"VSX Scalar Round to Double-Precision Integer using round toward Zero XX2-form","xsrdpiz XT,XB","60@0|T@6|///@11|B@16|89@21|BX@30|TX@31|","v2.06" +"VSX Scalar Reciprocal Estimate Double-Precision XX2-form","xsredp XT,XB","60@0|T@6|///@11|B@16|90@21|BX@30|TX@31|","v2.06" +"VSX Scalar Reciprocal Square Root Estimate Double-Precision XX2-form","xsrsqrtedp XT,XB","60@0|T@6|///@11|B@16|74@21|BX@30|TX@31|","v2.06" +"VSX Scalar Square Root Double-Precision XX2-form","xssqrtdp XT,XB","60@0|T@6|///@11|B@16|75@21|BX@30|TX@31|","v2.06" +"VSX Scalar Subtract Double-Precision XX3-form","xssubdp XT,XA,XB","60@0|T@6|A@11|B@16|40@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Scalar Test for software Divide Double-Precision XX3-form","xstdivdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|61@21|AX@29|BX@30|/@31|","v2.06" +"VSX Scalar Test for software Square Root Double-Precision XX2-form","xstsqrtdp BF,XB","60@0|BF@6|//@9|///@11|B@16|106@21|BX@30|/@31|","v2.06" +"VSX Vector Absolute Value Double-Precision XX2-form","xvabsdp XT,XB","60@0|T@6|///@11|B@16|473@21|BX@30|TX@31|","v2.06" +"VSX Vector Absolute Value Single-Precision XX2-form","xvabssp XT,XB","60@0|T@6|///@11|B@16|409@21|BX@30|TX@31|","v2.06" +"VSX Vector Add Double-Precision XX3-form","xvadddp XT,XA,XB","60@0|T@6|A@11|B@16|96@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Add Single-Precision XX3-form","xvaddsp XT,XA,XB","60@0|T@6|A@11|B@16|64@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Compare Equal To Double-Precision XX3-form","xvcmpeqdp XT,XA,XB (Rc=0)|xvcmpeqdp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|99@22|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Compare Equal To Single-Precision XX3-form","xvcmpeqsp XT,XA,XB (Rc=0)|xvcmpeqsp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|67@22|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Compare Greater Than or Equal To Double-Precision XX3-form","xvcmpgedp XT,XA,XB (Rc=0)|xvcmpgedp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|115@22|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Compare Greater Than or Equal To Single-Precision XX3-form","xvcmpgesp XT,XA,XB (Rc=0)|xvcmpgesp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|83@22|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Compare Greater Than Double-Precision XX3-form","xvcmpgtdp XT,XA,XB (Rc=0)|xvcmpgtdp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|107@22|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Compare Greater Than Single-Precision XX3-form","xvcmpgtsp XT,XA,XB (Rc=0)|xvcmpgtsp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|75@22|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Copy Sign Double-Precision XX3-form","xvcpsgndp XT,XA,XB","60@0|T@6|A@11|B@16|240@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Copy Sign Single-Precision XX3-form","xvcpsgnsp XT,XA,XB","60@0|T@6|A@11|B@16|208@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round Double-Precision to Single-Precision format XX2-form","xvcvdpsp XT,XB","60@0|T@6|///@11|B@16|393@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round to zero Double-Precision to Signed Doubleword format XX2-form","xvcvdpsxds XT,XB","60@0|T@6|///@11|B@16|472@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round to zero Double-Precision to Signed Word format XX2-form","xvcvdpsxws XT,XB","60@0|T@6|///@11|B@16|216@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round to zero Double-Precision to Unsigned Doubleword format XX2-form","xvcvdpuxds XT,XB","60@0|T@6|///@11|B@16|456@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round to zero Double-Precision to Unsigned Word format XX2-form","xvcvdpuxws XT,XB","60@0|T@6|///@11|B@16|200@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert Single-Precision to Double-Precision format XX2-form","xvcvspdp XT,XB","60@0|T@6|///@11|B@16|457@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round to zero Single-Precision to Signed Doubleword format XX2-form","xvcvspsxds XT,XB","60@0|T@6|///@11|B@16|408@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round to zero Single-Precision to Signed Word format XX2-form","xvcvspsxws XT,XB","60@0|T@6|///@11|B@16|152@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round to zero Single-Precision to Unsigned Doubleword format XX2-form","xvcvspuxds XT,XB","60@0|T@6|///@11|B@16|392@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round to zero Single-Precision to Unsigned Word format XX2-form","xvcvspuxws XT,XB","60@0|T@6|///@11|B@16|136@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round Signed Doubleword to Double-Precision format XX2-form","xvcvsxddp XT,XB","60@0|T@6|///@11|B@16|504@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round Signed Doubleword to Single-Precision format XX2-form","xvcvsxdsp XT,XB","60@0|T@6|///@11|B@16|440@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert Signed Word to Double-Precision format XX2-form","xvcvsxwdp XT,XB","60@0|T@6|///@11|B@16|248@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round Signed Word to Single-Precision format XX2-form","xvcvsxwsp XT,XB","60@0|T@6|///@11|B@16|184@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round Unsigned Doubleword to Double-Precision format XX2-form","xvcvuxddp XT,XB","60@0|T@6|///@11|B@16|488@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round Unsigned Doubleword to Single-Precision format XX2-form","xvcvuxdsp XT,XB","60@0|T@6|///@11|B@16|424@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert Unsigned Word to Double-Precision format XX2-form","xvcvuxwdp XT,XB","60@0|T@6|///@11|B@16|232@21|BX@30|TX@31|","v2.06" +"VSX Vector Convert with round Unsigned Word to Single-Precision format XX2-form","xvcvuxwsp XT,XB","60@0|T@6|///@11|B@16|168@21|BX@30|TX@31|","v2.06" +"VSX Vector Divide Double-Precision XX3-form","xvdivdp XT,XA,XB","60@0|T@6|A@11|B@16|120@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Divide Single-Precision XX3-form","xvdivsp XT,XA,XB","60@0|T@6|A@11|B@16|88@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Multiply-Add Type-A Double-Precision XX3-form","xvmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|97@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Multiply-Add Type-A Single-Precision XX3-form","xvmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|65@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Multiply-Add Type-M Double-Precision XX3-form","xvmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|105@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Multiply-Add Type-M Single-Precision XX3-form","xvmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|73@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Maximum Double-Precision XX3-form","xvmaxdp XT,XA,XB","60@0|T@6|A@11|B@16|224@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Maximum Single-Precision XX3-form","xvmaxsp XT,XA,XB","60@0|T@6|A@11|B@16|192@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Minimum Double-Precision XX3-form","xvmindp XT,XA,XB","60@0|T@6|A@11|B@16|232@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Minimum Single-Precision XX3-form","xvminsp XT,XA,XB","60@0|T@6|A@11|B@16|200@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Multiply-Subtract Type-A Double-Precision XX3-form","xvmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|113@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Multiply-Subtract Type-A Single-Precision XX3-form","xvmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|81@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Multiply-Subtract Type-M Double-Precision XX3-form","xvmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|121@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Multiply-Subtract Type-M Single-Precision XX3-form","xvmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|89@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Multiply Double-Precision XX3-form","xvmuldp XT,XA,XB","60@0|T@6|A@11|B@16|112@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Multiply Single-Precision XX3-form","xvmulsp XT,XA,XB","60@0|T@6|A@11|B@16|80@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Negative Absolute Double-Precision XX2-form","xvnabsdp XT,XB","60@0|T@6|///@11|B@16|489@21|BX@30|TX@31|","v2.06" +"VSX Vector Negative Absolute Single-Precision XX2-form","xvnabssp XT,XB","60@0|T@6|///@11|B@16|425@21|BX@30|TX@31|","v2.06" +"VSX Vector Negate Double-Precision XX2-form","xvnegdp XT,XB","60@0|T@6|///@11|B@16|505@21|BX@30|TX@31|","v2.06" +"VSX Vector Negate Single-Precision XX2-form","xvnegsp XT,XB","60@0|T@6|///@11|B@16|441@21|BX@30|TX@31|","v2.06" +"VSX Vector Negative Multiply-Add Type-A Double-Precision XX3-form","xvnmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|225@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Negative Multiply-Add Type-A Single-Precision XX3-form","xvnmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|193@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Negative Multiply-Add Type-M Double-Precision XX3-form","xvnmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|233@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Negative Multiply-Add Type-M Single-Precision XX3-form","xvnmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|201@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Negative Multiply-Subtract Type-A Double-Precision XX3-form","xvnmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|241@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Negative Multiply-Subtract Type-A Single-Precision XX3-form","xvnmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|209@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Negative Multiply-Subtract Type-M Double-Precision XX3-form","xvnmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|249@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Negative Multiply-Subtract Type-M Single-Precision XX3-form","xvnmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|217@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Round to Double-Precision Integer using round to Nearest Away XX2-form","xvrdpi XT,XB","60@0|T@6|///@11|B@16|201@21|BX@30|TX@31|","v2.06" +"VSX Vector Round to Double-Precision Integer Exact using Current rounding mode XX2-form","xvrdpic XT,XB","60@0|T@6|///@11|B@16|235@21|BX@30|TX@31|","v2.06" +"VSX Vector Round to Double-Precision Integer using round toward -Infinity XX2-form","xvrdpim XT,XB","60@0|T@6|///@11|B@16|249@21|BX@30|TX@31|","v2.06" +"VSX Vector Round to Double-Precision Integer using round toward +Infinity XX2-form","xvrdpip XT,XB","60@0|T@6|///@11|B@16|233@21|BX@30|TX@31|","v2.06" +"VSX Vector Round to Double-Precision Integer using round toward Zero XX2-form","xvrdpiz XT,XB","60@0|T@6|///@11|B@16|217@21|BX@30|TX@31|","v2.06" +"VSX Vector Reciprocal Estimate Double-Precision XX2-form","xvredp XT,XB","60@0|T@6|///@11|B@16|218@21|BX@30|TX@31|","v2.06" +"VSX Vector Reciprocal Estimate Single-Precision XX2-form","xvresp XT,XB","60@0|T@6|///@11|B@16|154@21|BX@30|TX@31|","v2.06" +"VSX Vector Round to Single-Precision Integer using round to Nearest Away XX2-form","xvrspi XT,XB","60@0|T@6|///@11|B@16|137@21|BX@30|TX@31|","v2.06" +"VSX Vector Round to Single-Precision Integer Exact using Current rounding mode XX2-form","xvrspic XT,XB","60@0|T@6|///@11|B@16|171@21|BX@30|TX@31|","v2.06" +"VSX Vector Round to Single-Precision Integer using round toward -Infinity XX2-form","xvrspim XT,XB","60@0|T@6|///@11|B@16|185@21|BX@30|TX@31|","v2.06" +"VSX Vector Round to Single-Precision Integer using round toward +Infinity XX2-form","xvrspip XT,XB","60@0|T@6|///@11|B@16|169@21|BX@30|TX@31|","v2.06" +"VSX Vector Round to Single-Precision Integer using round toward Zero XX2-form","xvrspiz XT,XB","60@0|T@6|///@11|B@16|153@21|BX@30|TX@31|","v2.06" +"VSX Vector Reciprocal Square Root Estimate Double-Precision XX2-form","xvrsqrtedp XT,XB","60@0|T@6|///@11|B@16|202@21|BX@30|TX@31|","v2.06" +"VSX Vector Reciprocal Square Root Estimate Single-Precision XX2-form","xvrsqrtesp XT,XB","60@0|T@6|///@11|B@16|138@21|BX@30|TX@31|","v2.06" +"VSX Vector Square Root Double-Precision XX2-form","xvsqrtdp XT,XB","60@0|T@6|///@11|B@16|203@21|BX@30|TX@31|","v2.06" +"VSX Vector Square Root Single-Precision XX2-form","xvsqrtsp XT,XB","60@0|T@6|///@11|B@16|139@21|BX@30|TX@31|","v2.06" +"VSX Vector Subtract Double-Precision XX3-form","xvsubdp XT,XA,XB","60@0|T@6|A@11|B@16|104@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Subtract Single-Precision XX3-form","xvsubsp XT,XA,XB","60@0|T@6|A@11|B@16|72@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Test for software Divide Double-Precision XX3-form","xvtdivdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|125@21|AX@29|BX@30|/@31|","v2.06" +"VSX Vector Test for software Divide Single-Precision XX3-form","xvtdivsp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|93@21|AX@29|BX@30|/@31|","v2.06" +"VSX Vector Test for software Square Root Double-Precision XX2-form","xvtsqrtdp BF,XB","60@0|BF@6|//@9|///@11|B@16|234@21|BX@30|/@31|","v2.06" +"VSX Vector Test for software Square Root Single-Precision XX2-form","xvtsqrtsp BF,XB","60@0|BF@6|//@9|///@11|B@16|170@21|BX@30|/@31|","v2.06" +"VSX Vector Logical AND XX3-form","xxland XT,XA,XB","60@0|T@6|A@11|B@16|130@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Logical AND with Complement XX3-form","xxlandc XT,XA,XB","60@0|T@6|A@11|B@16|138@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Logical NOR XX3-form","xxlnor XT,XA,XB","60@0|T@6|A@11|B@16|162@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Logical OR XX3-form","xxlor XT,XA,XB","60@0|T@6|A@11|B@16|146@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Logical XOR XX3-form","xxlxor XT,XA,XB","60@0|T@6|A@11|B@16|154@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Merge High Word XX3-form","xxmrghw XT,XA,XB","60@0|T@6|A@11|B@16|18@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Merge Low Word XX3-form","xxmrglw XT,XA,XB","60@0|T@6|A@11|B@16|50@21|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Permute Doubleword Immediate XX3-form","xxpermdi XT,XA,XB,DM","60@0|T@6|A@11|B@16|0@21|DM@22|10@24|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Select XX4-form","xxsel XT,XA,XB,XC","60@0|T@6|A@11|B@16|C@21|3@26|CX@28|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Shift Left Double by Word Immediate XX3-form","xxsldwi XT,XA,XB,SHW","60@0|T@6|A@11|B@16|0@21|SHW@22|2@24|AX@29|BX@30|TX@31|","v2.06" +"VSX Vector Splat Word XX2-form","xxspltw XT,XB,UIM","60@0|T@6|///@11|UIM@14|B@16|164@21|BX@30|TX@31|","v2.06" +"Compare Bytes X-form","cmpb RA,RS,RB","31@0|RS@6|RA@11|RB@16|508@21|/@31|","v2.05" +"DFP Add X-form","dadd FRT,FRA,FRB (Rc=0)|dadd. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|2@21|Rc@31|","v2.05" +"DFP Add Quad X-form","daddq FRTp,FRAp,FRBp (Rc=0)|daddq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|2@21|Rc@31|","v2.05" +"DFP Convert From Fixed Quad X-form","dcffixq FRTp,FRB (Rc=0)|dcffixq. FRTp,FRB (Rc=1)","63@0|FRTp@6|///@11|FRB@16|802@21|Rc@31|","v2.05" +"DFP Compare Ordered X-form","dcmpo BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|130@21|/@31|","v2.05" +"DFP Compare Ordered Quad X-form","dcmpoq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|130@21|/@31|","v2.05" +"DFP Compare Unordered X-form","dcmpu BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|642@21|/@31|","v2.05" +"DFP Compare Unordered Quad X-form","dcmpuq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|642@21|/@31|","v2.05" +"DFP Convert To DFP Long X-form","dctdp FRT,FRB (Rc=0)|dctdp. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|258@21|Rc@31|","v2.05" +"DFP Convert To Fixed X-form","dctfix FRT,FRB (Rc=0)|dctfix. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|290@21|Rc@31|","v2.05" +"DFP Convert To Fixed Quad X-form","dctfixq FRT,FRBp (Rc=0)|dctfixq. FRT,FRBp (Rc=1)","63@0|FRT@6|///@11|FRBp@16|290@21|Rc@31|","v2.05" +"DFP Convert To DFP Extended X-form","dctqpq FRTp,FRB (Rc=0)|dctqpq. FRTp,FRB (Rc=1)","63@0|FRTp@6|///@11|FRB@16|258@21|Rc@31|","v2.05" +"DFP Decode DPD To BCD X-form","ddedpd SP,FRT,FRB (Rc=0)|ddedpd. SP,FRT,FRB (Rc=1)","59@0|FRT@6|SP@11|///@13|FRB@16|322@21|Rc@31|","v2.05" +"DFP Decode DPD To BCD Quad X-form","ddedpdq SP,FRTp,FRBp (Rc=0)|ddedpdq. SP,FRTp,FRBp (Rc=1)","63@0|FRTp@6|SP@11|///@13|FRBp@16|322@21|Rc@31|","v2.05" +"DFP Divide X-form","ddiv FRT,FRA,FRB (Rc=0)|ddiv. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|546@21|Rc@31|","v2.05" +"DFP Divide Quad X-form","ddivq FRTp,FRAp,FRBp (Rc=0)|ddivq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|546@21|Rc@31|","v2.05" +"DFP Encode BCD To DPD X-form","denbcd S,FRT,FRB (Rc=0)|denbcd. S,FRT,FRB (Rc=1)","59@0|FRT@6|S@11|///@12|FRB@16|834@21|Rc@31|","v2.05" +"DFP Encode BCD To DPD Quad X-form","denbcdq S,FRTp,FRBp (Rc=0)|denbcdq. S,FRTp,FRBp (Rc=1)","63@0|FRTp@6|S@11|///@12|FRBp@16|834@21|Rc@31|","v2.05" +"DFP Insert Biased Exponent X-form","diex FRT,FRA,FRB (Rc=0)|diex. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|866@21|Rc@31|","v2.05" +"DFP Insert Biased Exponent Quad X-form","diexq FRTp,FRA,FRBp|diexq. FRTp,FRA,FRBp (Rc=1)","63@0|FRTp@6|FRA@11|FRBp@16|866@21|Rc@31|","v2.05" +"DFP Multiply X-form","dmul FRT,FRA,FRB (Rc=0)|dmul. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|34@21|Rc@31|","v2.05" +"DFP Multiply Quad X-form","dmulq FRTp,FRAp,FRBp (Rc=0)|dmulq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|34@21|Rc@31|","v2.05" +"DFP Quantize Z23-form","dqua FRT,FRA,FRB,RMC (Rc=0)|dqua. FRT,FRA,FRB,RMC (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|RMC@21|3@23|Rc@31|","v2.05" +"DFP Quantize Immediate Z23-form","dquai TE,FRT,FRB,RMC (Rc=0)|dquai. TE,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|TE@11|FRB@16|RMC@21|67@23|Rc@31|","v2.05" +"DFP Quantize Immediate Quad Z23-form","dquaiq TE,FRTp,FRBp,RMC (Rc=0)|dquaiq. TE,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|TE@11|FRBp@16|RMC@21|67@23|Rc@31|","v2.05" +"DFP Quantize Quad Z23-form","dquaq FRTp,FRAp,FRBp,RMC (Rc=0)|dquaq. FRTp,FRAp,FRBp,RMC (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|RMC@21|3@23|Rc@31|","v2.05" +"DFP Round To DFP Long X-form","drdpq FRTp,FRBp (Rc=0)|drdpq. FRTp,FRBp (Rc=1)","63@0|FRTp@6|///@11|FRBp@16|770@21|Rc@31|","v2.05" +"DFP Round To FP Integer Without Inexact Z23-form","drintn R,FRT,FRB,RMC (Rc=0)|drintn. R,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|///@11|R@15|FRB@16|RMC@21|227@23|Rc@31|","v2.05" +"DFP Round To FP Integer Without Inexact Quad Z23-form","drintnq R,FRTp,FRBp,RMC (Rc=0)|drintnq. R,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|///@11|R@15|FRBp@16|RMC@21|227@23|Rc@31|","v2.05" +"DFP Round To FP Integer With Inexact Z23-form","drintx R,FRT,FRB,RMC (Rc=0)|drintx. R,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|///@11|R@15|FRB@16|RMC@21|99@23|Rc@31|","v2.05" +"DFP Round To FP Integer With Inexact Quad Z23-form","drintxq R,FRTp,FRBp,RMC (Rc=0)|drintxq. R,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|///@11|R@15|FRBp@16|RMC@21|99@23|Rc@31|","v2.05" +"DFP Reround Z23-form","drrnd FRT,FRA,FRB,RMC (Rc=0)|drrnd. FRT,FRA,FRB,RMC (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|RMC@21|35@23|Rc@31|","v2.05" +"DFP Reround Quad Z23-form","drrndq FRTp,FRA,FRBp,RMC (Rc=0)|drrndq. FRTp,FRA,FRBp,RMC (Rc=1)","63@0|FRTp@6|FRA@11|FRBp@16|RMC@21|35@23|Rc@31|","v2.05" +"DFP Round To DFP Short X-form","drsp FRT,FRB (Rc=0)|drsp. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|770@21|Rc@31|","v2.05" +"DFP Shift Significand Left Immediate Z22-form","dscli FRT,FRA,SH (Rc=0)|dscli. FRT,FRA,SH (Rc=1)","59@0|FRT@6|FRA@11|SH@16|66@22|Rc@31|","v2.05" +"DFP Shift Significand Left Immediate Quad Z22-form","dscliq FRTp,FRAp,SH (Rc=0)|dscliq. FRTp,FRAp,SH (Rc=1)","63@0|FRTp@6|FRAp@11|SH@16|66@22|Rc@31|","v2.05" +"DFP Shift Significand Right Immediate Z22-form","dscri FRT,FRA,SH (Rc=0)|dscri. FRT,FRA,SH (Rc=1)","59@0|FRT@6|FRA@11|SH@16|98@22|Rc@31|","v2.05" +"DFP Shift Significand Right Immediate Quad Z22-form","dscriq FRTp,FRAp,SH (Rc=0)|dscriq. FRTp,FRAp,SH (Rc=1)","63@0|FRTp@6|FRAp@11|SH@16|98@22|Rc@31|","v2.05" +"DFP Subtract X-form","dsub FRT,FRA,FRB (Rc=0)|dsub. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|514@21|Rc@31|","v2.05" +"DFP Subtract Quad X-form","dsubq FRTp,FRAp,FRBp (Rc=0)|dsubq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|514@21|Rc@31|","v2.05" +"DFP Test Data Class Z22-form","dtstdc BF,FRA,DCM","59@0|BF@6|//@9|FRA@11|DCM@16|194@22|/@31|","v2.05" +"DFP Test Data Class Quad Z22-form","dtstdcq BF,FRAp,DCM","63@0|BF@6|//@9|FRAp@11|DCM@16|194@22|/@31|","v2.05" +"DFP Test Data Group Z22-form","dtstdg BF,FRA,DGM","59@0|BF@6|//@9|FRA@11|DGM@16|226@22|/@31|","v2.05" +"DFP Test Data Group Quad Z22-form","dtstdgq BF,FRAp,DGM","63@0|BF@6|//@9|FRAp@11|DGM@16|226@22|/@31|","v2.05" +"DFP Test Exponent X-form","dtstex BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|162@21|/@31|","v2.05" +"DFP Test Exponent Quad X-form","dtstexq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|162@21|/@31|","v2.05" +"DFP Test Significance X-form","dtstsf BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|674@21|/@31|","v2.05" +"DFP Test Significance Quad X-form","dtstsfq BF,FRA,FRBp","63@0|BF@6|//@9|FRA@11|FRBp@16|674@21|/@31|","v2.05" +"DFP Extract Biased Exponent X-form","dxex FRT,FRB (Rc=0)|dxex. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|354@21|Rc@31|","v2.05" +"DFP Extract Biased Exponent Quad X-form","dxexq FRT,FRBp (Rc=0)|dxexq. FRT,FRBp (Rc=1)","63@0|FRT@6|///@11|FRBp@16|354@21|Rc@31|","v2.05" +"Floating Copy Sign X-form","fcpsgn FRT, FRA, FRB (Rc=0)|fcpsgn. FRT, FRA, FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|8@21|Rc@31|","v2.05" +"Load Byte & Zero Caching Inhibited Indexed X-form","lbzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|853@21|/@31|","v2.05" +"Load Doubleword Caching Inhibited Indexed X-form","ldcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|885@21|/@31|","v2.05" +"Load Floating-Point Double Pair DS-form","lfdp FRTp,DS(RA)","57@0|FRTp@6|RA@11|DS@16|0@30|","v2.05" +"Load Floating-Point Double Pair Indexed X-form","lfdpx FRTp,RA,RB","31@0|FRTp@6|RA@11|RB@16|791@21|/@31|","v2.05" +"Load Floating-Point as Integer Word Algebraic Indexed X-form","lfiwax FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|855@21|/@31|","v2.05" +"Load Halfword & Zero Caching Inhibited Indexed X-form","lhzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|821@21|/@31|","v2.05" +"Load Word & Zero Caching Inhibited Indexed X-form","lwzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|789@21|/@31|","v2.05" +"Parity Doubleword X-form","prtyd RA,RS","31@0|RS@6|RA@11|///@16|186@21|/@31|","v2.05" +"Parity Word X-form","prtyw RA,RS","31@0|RS@6|RA@11|///@16|154@21|/@31|","v2.05" +"SLB Find Entry ESID X-form","slbfee. RT,RB","31@0|RT@6|///@11|RB@16|979@21|1@31|","v2.05" +"Store Byte Caching Inhibited Indexed X-form","stbcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|981@21|/@31|","v2.05" +"Store Doubleword Caching Inhibited Indexed X-form","stdcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|1013@21|/@31|","v2.05" +"Store Floating-Point Double Pair DS-form","stfdp FRSp,DS(RA)","61@0|FRSp@6|RA@11|DS@16|0@30|","v2.05" +"Store Floating-Point Double Pair Indexed X-form","stfdpx FRSp,RA,RB","31@0|FRSp@6|RA@11|RB@16|919@21|/@31|","v2.05" +"Store Halfword Caching Inhibited Indexed X-form","sthcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|949@21|/@31|","v2.05" +"Store Word Caching Inhibited Indexed X-form","stwcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|917@21|/@31|","v2.05" +"Integer Select A-form","isel RT,RA,RB,BC","31@0|RT@6|RA@11|RB@16|BC@21|15@26|/@31|","v2.03" +"Load Vector Element Byte Indexed X-form","lvebx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|7@21|/@31|","v2.03" +"Load Vector Element Halfword Indexed X-form","lvehx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|39@21|/@31|","v2.03" +"Load Vector Element Word Indexed X-form","lvewx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|71@21|/@31|","v2.03" +"Load Vector for Shift Left Indexed X-form","lvsl VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|6@21|/@31|","v2.03" +"Load Vector for Shift Right Indexed X-form","lvsr VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|38@21|/@31|","v2.03" +"Load Vector Indexed X-form","lvx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|103@21|/@31|","v2.03" +"Load Vector Indexed Last X-form","lvxl VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|359@21|/@31|","v2.03" +"Move From Vector Status and Control Register VX-form","mfvscr VRT","4@0|VRT@6|///@11|///@16|1540@21|","v2.03" +"Move To Vector Status and Control Register VX-form","mtvscr VRB","4@0|///@6|///@11|VRB@16|1604@21|","v2.03" +"Store Vector Element Byte Indexed X-form","stvebx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|135@21|/@31|","v2.03" +"Store Vector Element Halfword Indexed X-form","stvehx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|167@21|/@31|","v2.03" +"Store Vector Element Word Indexed X-form","stvewx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|199@21|/@31|","v2.03" +"Store Vector Indexed X-form","stvx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|231@21|/@31|","v2.03" +"Store Vector Indexed Last X-form","stvxl VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|487@21|/@31|","v2.03" +"TLB Invalidate Entry Local X-form","tlbiel RB,RS,RIC,PRS,R","31@0|RS@6|/@11|RIC@12|PRS@14|R@15|RB@16|274@21|/@31|","v2.03" +"Vector Add & write Carry Unsigned Word VX-form","vaddcuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|384@21|","v2.03" +"Vector Add Floating-Point VX-form","vaddfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|10@21|","v2.03" +"Vector Add Signed Byte Saturate VX-form","vaddsbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|768@21|","v2.03" +"Vector Add Signed Halfword Saturate VX-form","vaddshs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|832@21|","v2.03" +"Vector Add Signed Word Saturate VX-form","vaddsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|896@21|","v2.03" +"Vector Add Unsigned Byte Modulo VX-form","vaddubm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|0@21|","v2.03" +"Vector Add Unsigned Byte Saturate VX-form","vaddubs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|512@21|","v2.03" +"Vector Add Unsigned Halfword Modulo VX-form","vadduhm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|64@21|","v2.03" +"Vector Add Unsigned Halfword Saturate VX-form","vadduhs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|576@21|","v2.03" +"Vector Add Unsigned Word Modulo VX-form","vadduwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|128@21|","v2.03" +"Vector Add Unsigned Word Saturate VX-form","vadduws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|640@21|","v2.03" +"Vector Logical AND VX-form","vand VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1028@21|","v2.03" +"Vector Logical AND with Complement VX-form","vandc VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1092@21|","v2.03" +"Vector Average Signed Byte VX-form","vavgsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1282@21|","v2.03" +"Vector Average Signed Halfword VX-form","vavgsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1346@21|","v2.03" +"Vector Average Signed Word VX-form","vavgsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1410@21|","v2.03" +"Vector Average Unsigned Byte VX-form","vavgub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1026@21|","v2.03" +"Vector Average Unsigned Halfword VX-form","vavguh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1090@21|","v2.03" +"Vector Average Unsigned Word VX-form","vavguw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1154@21|","v2.03" +"Vector Convert with round to nearest From Signed Word to floating-point format VX-form","vcfsx VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|842@21|","v2.03" +"Vector Convert with round to nearest From Unsigned Word to floating-point format VX-form","vcfux VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|778@21|","v2.03" +"Vector Compare Bounds Floating-Point VC-form","vcmpbfp VRT,VRA,VRB (Rc=0)|vcmpbfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|966@22|","v2.03" +"Vector Compare Equal Floating-Point VC-form","vcmpeqfp VRT,VRA,VRB (Rc=0)|vcmpeqfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|198@22|","v2.03" +"Vector Compare Equal Unsigned Byte VC-form","vcmpequb VRT,VRA,VRB (Rc=0)|vcmpequb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|6@22|","v2.03" +"Vector Compare Equal Unsigned Halfword VC-form","vcmpequh VRT,VRA,VRB (Rc=0)|vcmpequh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|70@22|","v2.03" +"Vector Compare Equal Unsigned Word VC-form","vcmpequw VRT,VRA,VRB (Rc=0)|vcmpequw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|134@22|","v2.03" +"Vector Compare Greater Than or Equal Floating-Point VC-form","vcmpgefp VRT,VRA,VRB (Rc=0)|vcmpgefp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|454@22|","v2.03" +"Vector Compare Greater Than Floating-Point VC-form","vcmpgtfp VRT,VRA,VRB (Rc=0)|vcmpgtfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|710@22|","v2.03" +"Vector Compare Greater Than Signed Byte VC-form","vcmpgtsb VRT,VRA,VRB (Rc=0)|vcmpgtsb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|774@22|","v2.03" +"Vector Compare Greater Than Signed Halfword VC-form","vcmpgtsh VRT,VRA,VRB (Rc=0)|vcmpgtsh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|838@22|","v2.03" +"Vector Compare Greater Than Signed Word VC-form","vcmpgtsw VRT,VRA,VRB (Rc=0)|vcmpgtsw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|902@22|","v2.03" +"Vector Compare Greater Than Unsigned Byte VC-form","vcmpgtub VRT,VRA,VRB (Rc=0)|vcmpgtub. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|518@22|","v2.03" +"Vector Compare Greater Than Unsigned Halfword VC-form","vcmpgtuh VRT,VRA,VRB (Rc=0)|vcmpgtuh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|582@22|","v2.03" +"Vector Compare Greater Than Unsigned Word VC-form","vcmpgtuw VRT,VRA,VRB (Rc=0)|vcmpgtuw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|646@22|","v2.03" +"Vector Convert with round to zero from floating-point To Signed Word format Saturate VX-form","vctsxs VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|970@21|","v2.03" +"Vector Convert with round to zero from floating-point To Unsigned Word format Saturate VX-form","vctuxs VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|906@21|","v2.03" +"Vector 2 Raised to the Exponent Estimate Floating-Point VX-form","vexptefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|394@21|","v2.03" +"Vector Log Base 2 Estimate Floating-Point VX-form","vlogefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|458@21|","v2.03" +"Vector Multiply-Add Floating-Point VA-form","vmaddfp VRT,VRA,VRC,VRB","4@0|VRT@6|VRA@11|VRB@16|VRC@21|46@26|","v2.03" +"Vector Maximum Floating-Point VX-form","vmaxfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1034@21|","v2.03" +"Vector Maximum Signed Byte VX-form","vmaxsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|258@21|","v2.03" +"Vector Maximum Signed Halfword VX-form","vmaxsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|322@21|","v2.03" +"Vector Maximum Signed Word VX-form","vmaxsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|386@21|","v2.03" +"Vector Maximum Unsigned Byte VX-form","vmaxub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|2@21|","v2.03" +"Vector Maximum Unsigned Halfword VX-form","vmaxuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|66@21|","v2.03" +"Vector Maximum Unsigned Word VX-form","vmaxuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|130@21|","v2.03" +"Vector Multiply-High-Add Signed Halfword Saturate VA-form","vmhaddshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|32@26|","v2.03" +"Vector Multiply-High-Round-Add Signed Halfword Saturate VA-form","vmhraddshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|33@26|","v2.03" +"Vector Minimum Floating-Point VX-form","vminfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1098@21|","v2.03" +"Vector Minimum Signed Byte VX-form","vminsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|770@21|","v2.03" +"Vector Minimum Signed Halfword VX-form","vminsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|834@21|","v2.03" +"Vector Minimum Signed Word VX-form","vminsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|898@21|","v2.03" +"Vector Minimum Unsigned Byte VX-form","vminub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|514@21|","v2.03" +"Vector Minimum Unsigned Halfword VX-form","vminuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|578@21|","v2.03" +"Vector Minimum Unsigned Word VX-form","vminuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|642@21|","v2.03" +"Vector Multiply-Low-Add Unsigned Halfword Modulo VA-form","vmladduhm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|34@26|","v2.03" +"Vector Merge High Byte VX-form","vmrghb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|12@21|","v2.03" +"Vector Merge High Halfword VX-form","vmrghh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|76@21|","v2.03" +"Vector Merge High Word VX-form","vmrghw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|140@21|","v2.03" +"Vector Merge Low Byte VX-form","vmrglb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|268@21|","v2.03" +"Vector Merge Low Halfword VX-form","vmrglh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|332@21|","v2.03" +"Vector Merge Low Word VX-form","vmrglw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|396@21|","v2.03" +"Vector Multiply-Sum Mixed Byte Modulo VA-form","vmsummbm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|37@26|","v2.03" +"Vector Multiply-Sum Signed Halfword Modulo VA-form","vmsumshm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|40@26|","v2.03" +"Vector Multiply-Sum Signed Halfword Saturate VA-form","vmsumshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|41@26|","v2.03" +"Vector Multiply-Sum Unsigned Byte Modulo VA-form","vmsumubm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|36@26|","v2.03" +"Vector Multiply-Sum Unsigned Halfword Modulo VA-form","vmsumuhm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|38@26|","v2.03" +"Vector Multiply-Sum Unsigned Halfword Saturate VA-form","vmsumuhs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|39@26|","v2.03" +"Vector Multiply Even Signed Byte VX-form","vmulesb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|776@21|","v2.03" +"Vector Multiply Even Signed Halfword VX-form","vmulesh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|840@21|","v2.03" +"Vector Multiply Even Unsigned Byte VX-form","vmuleub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|520@21|","v2.03" +"Vector Multiply Even Unsigned Halfword VX-form","vmuleuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|584@21|","v2.03" +"Vector Multiply Odd Signed Byte VX-form","vmulosb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|264@21|","v2.03" +"Vector Multiply Odd Signed Halfword VX-form","vmulosh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|328@21|","v2.03" +"Vector Multiply Odd Unsigned Byte VX-form","vmuloub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|8@21|","v2.03" +"Vector Multiply Odd Unsigned Halfword VX-form","vmulouh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|72@21|","v2.03" +"Vector Negative Multiply-Subtract Floating-Point VA-form","vnmsubfp VRT,VRA,VRC,VRB","4@0|VRT@6|VRA@11|VRB@16|VRC@21|47@26|","v2.03" +"Vector Logical NOR VX-form","vnor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1284@21|","v2.03" +"Vector Logical OR VX-form","vor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1156@21|","v2.03" +"Vector Permute VA-form","vperm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|43@26|","v2.03" +"Vector Pack Pixel VX-form","vpkpx VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|782@21|","v2.03" +"Vector Pack Signed Halfword Signed Saturate VX-form","vpkshss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|398@21|","v2.03" +"Vector Pack Signed Halfword Unsigned Saturate VX-form","vpkshus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|270@21|","v2.03" +"Vector Pack Signed Word Signed Saturate VX-form","vpkswss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|462@21|","v2.03" +"Vector Pack Signed Word Unsigned Saturate VX-form","vpkswus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|334@21|","v2.03" +"Vector Pack Unsigned Halfword Unsigned Modulo VX-form","vpkuhum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|14@21|","v2.03" +"Vector Pack Unsigned Halfword Unsigned Saturate VX-form","vpkuhus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|142@21|","v2.03" +"Vector Pack Unsigned Word Unsigned Modulo VX-form","vpkuwum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|78@21|","v2.03" +"Vector Pack Unsigned Word Unsigned Saturate VX-form","vpkuwus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|206@21|","v2.03" +"Vector Reciprocal Estimate Floating-Point VX-form","vrefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|266@21|","v2.03" +"Vector Round to Floating-Point Integer toward -Infinity VX-form","vrfim VRT,VRB","4@0|VRT@6|///@11|VRB@16|714@21|","v2.03" +"Vector Round to Floating-Point Integer Nearest VX-form","vrfin VRT,VRB","4@0|VRT@6|///@11|VRB@16|522@21|","v2.03" +"Vector Round to Floating-Point Integer toward +Infinity VX-form","vrfip VRT,VRB","4@0|VRT@6|///@11|VRB@16|650@21|","v2.03" +"Vector Round to Floating-Point Integer toward Zero VX-form","vrfiz VRT,VRB","4@0|VRT@6|///@11|VRB@16|586@21|","v2.03" +"Vector Rotate Left Byte VX-form","vrlb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|4@21|","v2.03" +"Vector Rotate Left Halfword VX-form","vrlh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|68@21|","v2.03" +"Vector Rotate Left Word VX-form","vrlw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|132@21|","v2.03" +"Vector Reciprocal Square Root Estimate Floating-Point VX-form","vrsqrtefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|330@21|","v2.03" +"Vector Select VA-form","vsel VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|42@26|","v2.03" +"Vector Shift Left VX-form","vsl VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|452@21|","v2.03" +"Vector Shift Left Byte VX-form","vslb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|260@21|","v2.03" +"Vector Shift Left Double by Octet Immediate VA-form","vsldoi VRT,VRA,VRB,SHB","4@0|VRT@6|VRA@11|VRB@16|/@21|SHB@22|44@26|","v2.03" +"Vector Shift Left Halfword VX-form","vslh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|324@21|","v2.03" +"Vector Shift Left by Octet VX-form","vslo VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1036@21|","v2.03" +"Vector Shift Left Word VX-form","vslw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|388@21|","v2.03" +"Vector Splat Byte VX-form","vspltb VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|524@21|","v2.03" +"Vector Splat Halfword VX-form","vsplth VRT,VRB,UIM","4@0|VRT@6|//@11|UIM@13|VRB@16|588@21|","v2.03" +"Vector Splat Immediate Signed Byte VX-form","vspltisb VRT,SIM","4@0|VRT@6|SIM@11|///@16|780@21|","v2.03" +"Vector Splat Immediate Signed Halfword VX-form","vspltish VRT,SIM","4@0|VRT@6|SIM@11|///@16|844@21|","v2.03" +"Vector Splat Immediate Signed Word VX-form","vspltisw VRT,SIM","4@0|VRT@6|SIM@11|///@16|908@21|","v2.03" +"Vector Splat Word VX-form","vspltw VRT,VRB,UIM","4@0|VRT@6|///@11|UIM@14|VRB@16|652@21|","v2.03" +"Vector Shift Right VX-form","vsr VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|708@21|","v2.03" +"Vector Shift Right Algebraic Byte VX-form","vsrab VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|772@21|","v2.03" +"Vector Shift Right Algebraic Halfword VX-form","vsrah VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|836@21|","v2.03" +"Vector Shift Right Algebraic Word VX-form","vsraw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|900@21|","v2.03" +"Vector Shift Right Byte VX-form","vsrb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|516@21|","v2.03" +"Vector Shift Right Halfword VX-form","vsrh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|580@21|","v2.03" +"Vector Shift Right by Octet VX-form","vsro VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1100@21|","v2.03" +"Vector Shift Right Word VX-form","vsrw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|644@21|","v2.03" +"Vector Subtract & Write Carry-out Unsigned Word VX-form","vsubcuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1408@21|","v2.03" +"Vector Subtract Floating-Point VX-form","vsubfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|74@21|","v2.03" +"Vector Subtract Signed Byte Saturate VX-form","vsubsbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1792@21|","v2.03" +"Vector Subtract Signed Halfword Saturate VX-form","vsubshs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1856@21|","v2.03" +"Vector Subtract Signed Word Saturate VX-form","vsubsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1920@21|","v2.03" +"Vector Subtract Unsigned Byte Modulo VX-form","vsububm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1024@21|","v2.03" +"Vector Subtract Unsigned Byte Saturate VX-form","vsububs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1536@21|","v2.03" +"Vector Subtract Unsigned Halfword Modulo VX-form","vsubuhm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1088@21|","v2.03" +"Vector Subtract Unsigned Halfword Saturate VX-form","vsubuhs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1600@21|","v2.03" +"Vector Subtract Unsigned Word Modulo VX-form","vsubuwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1152@21|","v2.03" +"Vector Subtract Unsigned Word Saturate VX-form","vsubuws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1664@21|","v2.03" +"Vector Sum across Half Signed Word Saturate VX-form","vsum2sws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1672@21|","v2.03" +"Vector Sum across Quarter Signed Byte Saturate VX-form","vsum4sbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1800@21|","v2.03" +"Vector Sum across Quarter Signed Halfword Saturate VX-form","vsum4shs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1608@21|","v2.03" +"Vector Sum across Quarter Unsigned Byte Saturate VX-form","vsum4ubs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1544@21|","v2.03" +"Vector Sum across Signed Word Saturate VX-form","vsumsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1928@21|","v2.03" +"Vector Unpack High Pixel VX-form","vupkhpx VRT,VRB","4@0|VRT@6|///@11|VRB@16|846@21|","v2.03" +"Vector Unpack High Signed Byte VX-form","vupkhsb VRT,VRB","4@0|VRT@6|///@11|VRB@16|526@21|","v2.03" +"Vector Unpack High Signed Halfword VX-form","vupkhsh VRT,VRB","4@0|VRT@6|///@11|VRB@16|590@21|","v2.03" +"Vector Unpack Low Pixel VX-form","vupklpx VRT,VRB","4@0|VRT@6|///@11|VRB@16|974@21|","v2.03" +"Vector Unpack Low Signed Byte VX-form","vupklsb VRT,VRB","4@0|VRT@6|///@11|VRB@16|654@21|","v2.03" +"Vector Unpack Low Signed Halfword VX-form","vupklsh VRT,VRB","4@0|VRT@6|///@11|VRB@16|718@21|","v2.03" +"Vector Logical XOR VX-form","vxor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1220@21|","v2.03" +"Floating Reciprocal Estimate A-form","fre FRT,FRB (Rc=0)|fre. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|24@26|Rc@31|","v2.02" +"Floating Round to Integer Minus X-form","frim FRT,FRB (Rc=0)|frim. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|488@21|Rc@31|","v2.02" +"Floating Round to Integer Nearest X-form","frin FRT,FRB (Rc=0)|frin. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|392@21|Rc@31|","v2.02" +"Floating Round to Integer Plus X-form","frip FRT,FRB (Rc=0)|frip. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|456@21|Rc@31|","v2.02" +"Floating Round to Integer Toward Zero X-form","friz FRT,FRB (Rc=0)|friz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|424@21|Rc@31|","v2.02" +"Floating Reciprocal Square Root Estimate Single A-form","frsqrtes FRT,FRB (Rc=0)|frsqrtes. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|26@26|Rc@31|","v2.02" +"Return From Interrupt Doubleword Hypervisor XL-form","hrfid","19@0|///@6|///@11|///@16|274@21|/@31|","v2.02" +"Population Count Bytes X-form","popcntb RA, RS","31@0|RS@6|RA@11|///@16|122@21|/@31|","v2.02" +"Move From One Condition Register Field XFX-form","mfocrf RT,FXM","31@0|RT@6|1@11|FXM@12|/@20|19@21|/@31|","v2.01" +"Move To One Condition Register Field XFX-form","mtocrf FXM,RS","31@0|RS@6|1@11|FXM@12|/@20|144@21|/@31|","v2.01" +"SLB Move From Entry ESID X-form","slbmfee RT,RB","31@0|RT@6|///@11|L@15|RB@16|915@21|/@31|","v2.00" +"SLB Move From Entry VSID X-form","slbmfev RT,RB","31@0|RT@6|///@11|L@15|RB@16|851@21|/@31|","v2.00" +"SLB Move To Entry X-form","slbmte RS,RB","31@0|RS@6|///@11|RB@16|402@21|/@31|","v2.00" +"Return From System Call Vectored XL-form","rfscv","19@0|///@6|///@11|///@16|82@21|/@31|","v3.0" +"System Call Vectored SC-form","scv LEV","17@0|///@6|///@11|///@16|LEV@20|///@27|0@30|1@31|","v3.0" +"Load Quadword DQ-form","lq RTp,DQ(RA)","56@0|RTp@6|RA@11|DQ@16|///@28|","v2.03" +"Store Quadword DS-form","stq RSp,DS(RA)","62@0|RSp@6|RA@11|DS@16|2@30|","v2.03" +"Count Leading Zeros Doubleword X-form","cntlzd RA,RS (Rc=0)|cntlzd. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|58@21|Rc@31|","PPC" +"Data Cache Block Flush X-form","dcbf RA,RB,L","31@0|//@6|L@8|RA@11|RB@16|86@21|/@31|","PPC" +"Data Cache Block Store X-form","dcbst RA,RB","31@0|///@6|RA@11|RB@16|54@21|/@31|","PPC" +"Data Cache Block Touch X-form","dcbt RA,RB,TH","31@0|TH@6|RA@11|RB@16|278@21|/@31|","PPC" +"Data Cache Block Touch for Store X-form","dcbtst RA,RB,TH","31@0|TH@6|RA@11|RB@16|246@21|/@31|","PPC" +"Divide Doubleword XO-form","divd RT,RA,RB (OE=0 Rc=0)|divd. RT,RA,RB (OE=0 Rc=1)|divdo RT,RA,RB (OE=1 Rc=0)|divdo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|489@22|Rc@31|","PPC" +"Divide Doubleword Unsigned XO-form","divdu RT,RA,RB (OE=0 Rc=0)|divdu. RT,RA,RB (OE=0 Rc=1)|divduo RT,RA,RB (OE=1 Rc=0)|divduo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|457@22|Rc@31|","PPC" +"Divide Word XO-form","divw RT,RA,RB (OE=0 Rc=0)|divw. RT,RA,RB (OE=0 Rc=1)|divwo RT,RA,RB (OE=1 Rc=0)|divwo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|491@22|Rc@31|","PPC" +"Divide Word Unsigned XO-form","divwu RT,RA,RB (OE=0 Rc=0)|divwu. RT,RA,RB (OE=0 Rc=1)|divwuo RT,RA,RB (OE=1 Rc=0)|divwuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|459@22|Rc@31|","PPC" +"Enforce In-order Execution of I/O X-form","eieio","31@0|///@6|///@11|///@16|854@21|/@31|","PPC" +"Extend Sign Byte X-form","extsb RA,RS (Rc=0)|extsb. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|954@21|Rc@31|","PPC" +"Extend Sign Word X-form","extsw RA,RS (Rc=0)|extsw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|986@21|Rc@31|","PPC" +"Floating Add Single A-form","fadds FRT,FRA,FRB (Rc=0)|fadds. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|21@26|Rc@31|","PPC" +"Floating Convert with round Signed Doubleword to Double-Precision format X-form","fcfid FRT,FRB (Rc=0)|fcfid. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|846@21|Rc@31|","PPC" +"Floating Convert with round Double-Precision To Signed Doubleword format X-form","fctid FRT,FRB (Rc=0)|fctid. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|814@21|Rc@31|","PPC" +"Floating Convert with truncate Double-Precision To Signed Doubleword format X-form","fctidz FRT,FRB (Rc=0)|fctidz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|815@21|Rc@31|","PPC" +"Floating Divide Single A-form","fdivs FRT,FRA,FRB (Rc=0)|fdivs. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|18@26|Rc@31|","PPC" +"Floating Multiply-Add Single A-form","fmadds FRT,FRA,FRC,FRB (Rc=0)|fmadds. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|29@26|Rc@31|","PPC" +"Floating Multiply-Subtract Single A-form","fmsubs FRT,FRA,FRC,FRB (Rc=0)|fmsubs. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|28@26|Rc@31|","PPC" +"Floating Multiply Single A-form","fmuls FRT,FRA,FRC (Rc=0)|fmuls. FRT,FRA,FRC (Rc=1)","59@0|FRT@6|FRA@11|///@16|FRC@21|25@26|Rc@31|","PPC" +"Floating Negative Multiply-Add Single A-form","fnmadds FRT,FRA,FRC,FRB (Rc=0)|fnmadds. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|31@26|Rc@31|","PPC" +"Floating Negative Multiply-Subtract Single A-form","fnmsubs FRT,FRA,FRC,FRB (Rc=0)|fnmsubs. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|30@26|Rc@31|","PPC" +"Floating Reciprocal Estimate Single A-form","fres FRT,FRB (Rc=0)|fres. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|24@26|Rc@31|","PPC" +"Floating Reciprocal Square Root Estimate A-form","frsqrte FRT,FRB (Rc=0)|frsqrte. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|26@26|Rc@31|","PPC" +"Floating Select A-form","fsel FRT,FRA,FRC,FRB (Rc=0)|fsel. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|23@26|Rc@31|","PPC" +"Floating Square Root Single A-form","fsqrts FRT,FRB (Rc=0)|fsqrts. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|22@26|Rc@31|","PPC" +"Floating Subtract Single A-form","fsubs FRT,FRA,FRB (Rc=0)|fsubs. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|20@26|Rc@31|","PPC" +"Instruction Cache Block Invalidate X-form","icbi RA,RB","31@0|///@6|RA@11|RB@16|982@21|/@31|","PPC" +"Load Doubleword DS-form","ld RT,DS(RA)","58@0|RT@6|RA@11|DS@16|0@30|","PPC" +"Load Doubleword And Reserve Indexed X-form","ldarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|84@21|EH@31|","PPC" +"Load Doubleword with Update DS-form","ldu RT,DS(RA)","58@0|RT@6|RA@11|DS@16|1@30|","PPC" +"Load Doubleword with Update Indexed X-form","ldux RT,RA,RB","31@0|RT@6|RA@11|RB@16|53@21|/@31|","PPC" +"Load Doubleword Indexed X-form","ldx RT,RA,RB","31@0|RT@6|RA@11|RB@16|21@21|/@31|","PPC" +"Load Word Algebraic DS-form","lwa RT,DS(RA)","58@0|RT@6|RA@11|DS@16|2@30|","PPC" +"Load Word & Reserve Indexed X-form","lwarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|20@21|EH@31|","PPC" +"Load Word Algebraic with Update Indexed X-form","lwaux RT,RA,RB","31@0|RT@6|RA@11|RB@16|373@21|/@31|","PPC" +"Load Word Algebraic Indexed X-form","lwax RT,RA,RB","31@0|RT@6|RA@11|RB@16|341@21|/@31|","PPC" +"Move From Time Base XFX-form","mftb RT,TBR","31@0|RT@6|tbr@11|371@21|/@31|","PPC" +"Move To MSR Doubleword X-form","mtmsrd RS,L","31@0|RS@6|///@11|L@15|///@16|178@21|/@31|","PPC" +"Multiply High Doubleword XO-form","mulhd RT,RA,RB (Rc=0)|mulhd. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|73@22|Rc@31|","PPC" +"Multiply High Doubleword Unsigned XO-form","mulhdu RT,RA,RB (Rc=0)|mulhdu. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|9@22|Rc@31|","PPC" +"Multiply High Word XO-form","mulhw RT,RA,RB (Rc=0)|mulhw. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|75@22|Rc@31|","PPC" +"Multiply High Word Unsigned XO-form","mulhwu RT,RA,RB (Rc=0)|mulhwu. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|11@22|Rc@31|","PPC" +"Multiply Low Doubleword XO-form","mulld RT,RA,RB (OE=0 Rc=0)|mulld. RT,RA,RB (OE=0 Rc=1)|mulldo RT,RA,RB (OE=1 Rc=0)|mulldo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|233@22|Rc@31|","PPC" +"Return from Interrupt Doubleword XL-form","rfid","19@0|///@6|///@11|///@16|18@21|/@31|","PPC" +"Rotate Left Doubleword then Clear Left MDS-form","rldcl RA,RS,RB,MB (Rc=0)|rldcl. RA,RS,RB,MB (Rc=1)","30@0|RS@6|RA@11|RB@16|mb@21|8@27|Rc@31|","PPC" +"Rotate Left Doubleword then Clear Right MDS-form","rldcr RA,RS,RB,ME (Rc=0)|rldcr. RA,RS,RB,ME (Rc=1)","30@0|RS@6|RA@11|RB@16|me@21|9@27|Rc@31|","PPC" +"Rotate Left Doubleword Immediate then Clear MD-form","rldic RA,RS,SH,MB (Rc=0)|rldic. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|2@27|sh@30|Rc@31|","PPC" +"Rotate Left Doubleword Immediate then Clear Left MD-form","rldicl RA,RS,SH,MB (Rc=0)|rldicl. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|0@27|sh@30|Rc@31|","PPC" +"Rotate Left Doubleword Immediate then Clear Right MD-form","rldicr RA,RS,SH,ME (Rc=0)|rldicr. RA,RS,SH,ME (Rc=1)","30@0|RS@6|RA@11|sh@16|me@21|1@27|sh@30|Rc@31|","PPC" +"Rotate Left Doubleword Immediate then Mask Insert MD-form","rldimi RA,RS,SH,MB (Rc=0)|rldimi. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|3@27|sh@30|Rc@31|","PPC" +"System Call SC-form","sc LEV","17@0|///@6|///@11|///@16|LEV@20|///@27|1@30|/@31|","PPC" +"SLB Invalidate All X-form","slbia IH","31@0|//@6|IH@8|///@11|///@16|498@21|/@31|","PPC" +"SLB Invalidate Entry X-form","slbie RB","31@0|///@6|///@11|RB@16|434@21|/@31|","PPC" +"Shift Left Doubleword X-form","sld RA,RS,RB (Rc=0)|sld. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|27@21|Rc@31|","PPC" +"Shift Right Algebraic Doubleword X-form","srad RA,RS,RB (Rc=0)|srad. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|794@21|Rc@31|","PPC" +"Shift Right Algebraic Doubleword Immediate XS-form","sradi RA,RS,SH (Rc=0)|sradi. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|sh@16|413@21|sh@30|Rc@31|","PPC" +"Shift Right Doubleword X-form","srd RA,RS,RB (Rc=0)|srd. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|539@21|Rc@31|","PPC" +"Store Doubleword DS-form","std RS,DS(RA)","62@0|RS@6|RA@11|DS@16|0@30|","PPC" +"Store Doubleword Conditional Indexed X-form","stdcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|214@21|1@31|","PPC" +"Store Doubleword with Update DS-form","stdu RS,DS(RA)","62@0|RS@6|RA@11|DS@16|1@30|","PPC" +"Store Doubleword with Update Indexed X-form","stdux RS,RA,RB","31@0|RS@6|RA@11|RB@16|181@21|/@31|","PPC" +"Store Doubleword Indexed X-form","stdx RS,RA,RB","31@0|RS@6|RA@11|RB@16|149@21|/@31|","PPC" +"Store Floating-Point as Integer Word Indexed X-form","stfiwx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|983@21|/@31|","PPC" +"Store Word Conditional Indexed X-form","stwcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|150@21|1@31|","PPC" +"Subtract From XO-form","subf RT,RA,RB (OE=0 Rc=0)|subf. RT,RA,RB (OE=0 Rc=1)|subfo RT,RA,RB (OE=1 Rc=0)|subfo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|40@22|Rc@31|","PPC" +"Trap Doubleword X-form","td TO,RA,RB","31@0|TO@6|RA@11|RB@16|68@21|/@31|","PPC" +"Trap Doubleword Immediate D-form","tdi TO,RA,SI","2@0|TO@6|RA@11|SI@16|","PPC" +"TLB Synchronize X-form","tlbsync","31@0|///@6|///@11|///@16|566@21|/@31|","PPC" +"Floating Convert with round Double-Precision To Signed Word format X-form","fctiw FRT,FRB (Rc=0)|fctiw. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|14@21|Rc@31|","P2" +"Floating Convert with truncate Double-Precision To Signed Word fomat X-form","fctiwz FRT,FRB (Rc=0)|fctiwz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|15@21|Rc@31|","P2" +"Floating Square Root A-form","fsqrt FRT,FRB (Rc=0)|fsqrt. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|22@26|Rc@31|","P2" +"Add XO-form","add RT,RA,RB (OE=0 Rc=0)|add. RT,RA,RB (OE=0 Rc=1)|addo RT,RA,RB (OE=1 Rc=0)|addo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|266@22|Rc@31|","P1" +"Add Carrying XO-form","addc RT,RA,RB (OE=0 Rc=0)|addc. RT,RA,RB (OE=0 Rc=1)|addco RT,RA,RB (OE=1 Rc=0)|addco. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|10@22|Rc@31|","P1" +"Add Extended XO-form","adde RT,RA,RB (OE=0 Rc=0)|adde. RT,RA,RB (OE=0 Rc=1)|addeo RT,RA,RB (OE=1 Rc=0)|addeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|138@22|Rc@31|","P1" +"Add Immediate D-form","addi RT,RA,SI|li RT,SI (RA=0)","14@0|RT@6|RA@11|SI@16|","P1" +"Add Immediate Carrying D-formy","addic RT,RA,SI","12@0|RT@6|RA@11|SI@16|","P1" +"Add Immediate Carrying and Record D-form","addic. RT,RA,SI","13@0|RT@6|RA@11|SI@16|","P1" +"Add Immediate Shifted D-form","addis RT,RA,SI|lis RT,SI (RA=0)","15@0|RT@6|RA@11|SI@16|","P1" +"Add to Minus One Extended XO-form","addme RT,RA (OE=0 Rc=0)|addme. RT,RA (OE=0 Rc=1)|addmeo RT,RA (OE=1 Rc=0)|addmeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|234@22|Rc@31|","P1" +"Add to Zero Extended XO-form","addze RT,RA (OE=0 Rc=0)|addze. RT,RA (OE=0 Rc=1)|addzeo RT,RA (OE=1 Rc=0)|addzeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|202@22|Rc@31|","P1" +"AND X-form","and RA,RS,RB (Rc=0)|and. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|28@21|Rc@31|","P1" +"AND with Complement X-form","andc RA,RS,RB (Rc=0)|andc. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|60@21|Rc@31|","P1" +"AND Immediate D-form","andi. RA,RS,UI","28@0|RS@6|RA@11|UI@16|","P1" +"AND Immediate Shifted D-form","andis. RA,RS,UI","29@0|RS@6|RA@11|UI@16|","P1" +"Branch I-form","b target_addr (AA=0 LK=0)|ba target_addr (AA=1 LK=0)|bl target_addr (AA=0 LK=1)|bla target_addr (AA=1 LK=1)","18@0|LI@6|AA@30|LK@31|","P1" +"Branch Conditional B-form","bc BO,BI,target_addr (AA=0 LK=0)|bca BO,BI,target_addr (AA=1 LK=0)|bcl BO,BI,target_addr (AA=0 LK=1)|bcla BO,BI,target_addr (AA=1 LK=1)","16@0|BO@6|BI@11|BD@16|AA@30|LK@31|","P1" +"Branch Conditional to Count Register XL-form","bcctr BO,BI,BH (LK=0)|bcctrl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|528@21|LK@31|","P1" +"Branch Conditional to Link Register XL-form","bclr BO,BI,BH (LK=0)|bclrl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|16@21|LK@31|","P1" +"Compare X-form","cmp BF,L,RA,RB|cmpw BF,RA,RB (L=0)|cmpd BF,RA,RB (L=1)","31@0|BF@6|/@9|L@10|RA@11|RB@16|0@21|/@31|","P1" +"Compare Immediate D-form","cmpi BF,L,RA,SI|cmpwi BF,RA,SI (L=0)|cmpdi BF,RA,SI (L=1)","11@0|BF@6|/@9|L@10|RA@11|SI@16|","P1" +"Compare Logical X-form","cmpl BF,L,RA,RB|cmplw BF,RA,RB (L=0)|cmpld BF,RA,RB (L=1)","31@0|BF@6|/@9|L@10|RA@11|RB@16|32@21|/@31|","P1" +"Compare Logical Immediate D-form","cmpli BF,L,RA,UI|cmplwi BF,RA,UI (L=0)|cmpldi BF,RA,UI (L=1)","10@0|BF@6|/@9|L@10|RA@11|UI@16|","P1" +"Count Leading Zeros Word X-form","cntlzw RA,RS (Rc=0)|cntlzw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|26@21|Rc@31|","P1" +"Condition Register AND XL-form","crand BT,BA,BB","19@0|BT@6|BA@11|BB@16|257@21|/@31|","P1" +"Condition Register AND with Complement XL-form","crandc BT,BA,BB","19@0|BT@6|BA@11|BB@16|129@21|/@31|","P1" +"Condition Register Equivalent XL-form","creqv BT,BA,BB","19@0|BT@6|BA@11|BB@16|289@21|/@31|","P1" +"Condition Register NAND XL-form","crnand BT,BA,BB","19@0|BT@6|BA@11|BB@16|225@21|/@31|","P1" +"Condition Register NOR XL-form","crnor BT,BA,BB","19@0|BT@6|BA@11|BB@16|33@21|/@31|","P1" +"Condition Register OR XL-form","cror BT,BA,BB","19@0|BT@6|BA@11|BB@16|449@21|/@31|","P1" +"Condition Register OR with Complement XL-form","crorc BT,BA,BB","19@0|BT@6|BA@11|BB@16|417@21|/@31|","P1" +"Condition Register XOR XL-form","crxor BT,BA,BB","19@0|BT@6|BA@11|BB@16|193@21|/@31|","P1" +"Data Cache Block set to Zero X-form","dcbz RA,RB","31@0|///@6|RA@11|RB@16|1014@21|/@31|","P1" +"Equivalent X-form","eqv RA,RS,RB (Rc=0)|eqv. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|284@21|Rc@31|","P1" +"Extend Sign Halfword X-form","extsh RA,RS (Rc=0)|extsh. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|922@21|Rc@31|","P1" +"Floating Absolute Value X-form","fabs FRT,FRB (Rc=0)|fabs. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|264@21|Rc@31|","P1" +"Floating Add A-form","fadd FRT,FRA,FRB (Rc=0)|fadd. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|21@26|Rc@31|","P1" +"Floating Compare Ordered X-form","fcmpo BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|32@21|/@31|","P1" +"Floating Compare Unordered X-form","fcmpu BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|0@21|/@31|","P1" +"Floating Divide A-form","fdiv FRT,FRA,FRB (Rc=0)|fdiv. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|18@26|Rc@31|","P1" +"Floating Multiply-Add A-form","fmadd FRT,FRA,FRC,FRB (Rc=0)|fmadd. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|29@26|Rc@31|","P1" +"Floating Move Register X-form","fmr FRT,FRB (Rc=0)|fmr. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|72@21|Rc@31|","P1" +"Floating Multiply-Subtract A-form","fmsub FRT,FRA,FRC,FRB (Rc=0)|fmsub. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|28@26|Rc@31|","P1" +"Floating Multiply A-form","fmul FRT,FRA,FRC (Rc=0)|fmul. FRT,FRA,FRC (Rc=1)","63@0|FRT@6|FRA@11|///@16|FRC@21|25@26|Rc@31|","P1" +"Floating Negative Absolute Value X-form","fnabs FRT,FRB (Rc=0)|fnabs. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|136@21|Rc@31|","P1" +"Floating Negate X-form","fneg FRT,FRB (Rc=0)|fneg. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|40@21|Rc@31|","P1" +"Floating Negative Multiply-Add A-form","fnmadd FRT,FRA,FRC,FRB (Rc=0)|fnmadd. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|31@26|Rc@31|","P1" +"Floating Negative Multiply-Subtract A-form","fnmsub FRT,FRA,FRC,FRB (Rc=0)|fnmsub. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|30@26|Rc@31|","P1" +"Floating Round to Single-Precision X-form","frsp FRT,FRB (Rc=0)|frsp. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|12@21|Rc@31|","P1" +"Floating Subtract A-form","fsub FRT,FRA,FRB (Rc=0)|fsub. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|20@26|Rc@31|","P1" +"Instruction Synchronize XL-form","isync","19@0|///@6|///@11|///@16|150@21|/@31|","P1" +"Load Byte and Zero D-form","lbz RT,D(RA)","34@0|RT@6|RA@11|D@16|","P1" +"Load Byte and Zero with Update D-form","lbzu RT,D(RA)","35@0|RT@6|RA@11|D@16|","P1" +"Load Byte and Zero with Update Indexed X-form","lbzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|119@21|/@31|","P1" +"Load Byte and Zero Indexed X-form","lbzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|87@21|/@31|","P1" +"Load Floating-Point Double D-form","lfd FRT,D(RA)","50@0|FRT@6|RA@11|D@16|","P1" +"Load Floating-Point Double with Update D-form","lfdu FRT,D(RA)","51@0|FRT@6|RA@11|D@16|","P1" +"Load Floating-Point Double with Update Indexed X-form","lfdux FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|631@21|/@31|","P1" +"Load Floating-Point Double Indexed X-form","lfdx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|599@21|/@31|","P1" +"Load Floating-Point Single D-form","lfs FRT,D(RA)","48@0|FRT@6|RA@11|D@16|","P1" +"Load Floating-Point Single with Update D-form","lfsu FRT,D(RA)","49@0|FRT@6|RA@11|D@16|","P1" +"Load Floating-Point Single with Update Indexed X-form","lfsux FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|567@21|/@31|","P1" +"Load Floating-Point Single Indexed X-form","lfsx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|535@21|/@31|","P1" +"Load Halfword Algebraic D-form","lha RT,D(RA)","42@0|RT@6|RA@11|D@16|","P1" +"Load Halfword Algebraic with Update D-form","lhau RT,D(RA)","43@0|RT@6|RA@11|D@16|","P1" +"Load Halfword Algebraic with Update Indexed X-form","lhaux RT,RA,RB","31@0|RT@6|RA@11|RB@16|375@21|/@31|","P1" +"Load Halfword Algebraic Indexed X-form","lhax RT,RA,RB","31@0|RT@6|RA@11|RB@16|343@21|/@31|","P1" +"Load Halfword Byte-Reverse Indexed X-form","lhbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|790@21|/@31|","P1" +"Load Halfword and Zero D-form","lhz RT,D(RA)","40@0|RT@6|RA@11|D@16|","P1" +"Load Halfword and Zero with Update D-form","lhzu RT,D(RA)","41@0|RT@6|RA@11|D@16|","P1" +"Load Halfword and Zero with Update Indexed X-form","lhzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|311@21|/@31|","P1" +"Load Halfword and Zero Indexed X-form","lhzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|279@21|/@31|","P1" +"Load Multiple Word D-form","lmw RT,D(RA)","46@0|RT@6|RA@11|D@16|","P1" +"Load String Word Immediate X-form","lswi RT,RA,NB","31@0|RT@6|RA@11|NB@16|597@21|/@31|","P1" +"Load String Word Indexed X-form","lswx RT,RA,RB","31@0|RT@6|RA@11|RB@16|533@21|/@31|","P1" +"Load Word Byte-Reverse Indexed X-form","lwbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|534@21|/@31|","P1" +"Load Word and Zero D-form","lwz RT,D(RA)","32@0|RT@6|RA@11|D@16|","P1" +"Load Word and Zero with Update D-form","lwzu RT,D(RA)","33@0|RT@6|RA@11|D@16|","P1" +"Load Word and Zero with Update Indexed X-form","lwzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|55@21|/@31|","P1" +"Load Word and Zero Indexed X-form","lwzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|23@21|/@31|","P1" +"Move Condition Register Field XL-form","mcrf BF,BFA","19@0|BF@6|//@9|BFA@11|//@14|///@16|0@21|/@31|","P1" +"Move to Condition Register from FPSCR X-form","mcrfs BF,BFA","63@0|BF@6|//@9|BFA@11|//@14|///@16|64@21|/@31|","P1" +"Move From Condition Register XFX-form","mfcr RT","31@0|RT@6|0@11|///@12|/@20|19@21|/@31|","P1" +"Move From FPSCR X-form","mffs FRT (Rc=0)|mffs. FRT (Rc=1)","63@0|FRT@6|0@11|///@16|583@21|Rc@31|","P1" +"Move From MSR X-form","mfmsr RT","31@0|RT@6|///@11|///@16|83@21|/@31|","P1" +"Move From Special Purpose Register XFX-form","mfspr RT,SPR","31@0|RT@6|spr@11|339@21|/@31|","P1" +"Move To Condition Register Fields XFX-form","mtcrf FXM,RS","31@0|RS@6|0@11|FXM@12|/@20|144@21|/@31|","P1" +"Move To FPSCR Bit 0 X-form","mtfsb0 BT (Rc=0)|mtfsb0. BT (Rc=1)","63@0|BT@6|///@11|///@16|70@21|Rc@31|","P1" +"Move To FPSCR Bit 1 X-form","mtfsb1 BT (Rc=0)|mtfsb1. BT (Rc=1)","63@0|BT@6|///@11|///@16|38@21|Rc@31|","P1" +"Move To FPSCR Fields XFL-form","mtfsf FLM,FRB,L,W (Rc=0)|mtfsf. FLM,FRB,L,W (Rc=1)","63@0|L@6|FLM@7|W@15|FRB@16|711@21|Rc@31|","P1" +"Move To FPSCR Field Immediate X-form","mtfsfi BF,U,W (Rc=0)|mtfsfi. BF,U,W (Rc=1)","63@0|BF@6|//@9|///@11|W@15|U@16|/@20|134@21|Rc@31|","P1" +"Move To MSR X-form","mtmsr RS,L","31@0|RS@6|///@11|L@15|///@16|146@21|/@31|","P1" +"Move To Special Purpose Register XFX-form","mtspr SPR,RS","31@0|RS@6|spr@11|467@21|/@31|","P1" +"Multiply Low Immediate D-form","mulli RT,RA,SI","7@0|RT@6|RA@11|SI@16|","P1" +"Multiply Low Word XO-form","mullw RT,RA,RB (OE=0 Rc=0)|mullw. RT,RA,RB (OE=0 Rc=1)|mullwo RT,RA,RB (OE=1 Rc=0)|mullwo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|235@22|Rc@31|","P1" +"NAND X-form","nand RA,RS,RB (Rc=0)|nand. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|476@21|Rc@31|","P1" +"Negate XO-form","neg RT,RA (OE=0 Rc=0)|neg. RT,RA (OE=0 Rc=1)|nego RT,RA (OE=1 Rc=0)|nego. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|104@22|Rc@31|","P1" +"NOR X-form","nor RA,RS,RB (Rc=0)|nor. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|124@21|Rc@31|","P1" +"OR X-form","or RA,RS,RB (Rc=0)|or. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|444@21|Rc@31|","P1" +"OR with Complement X-form","orc RA,RS,RB (Rc=0)|orc. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|412@21|Rc@31|","P1" +"OR Immediate D-form","ori RA,RS,UI|nop (RA=0 RS=0 UI=0)","24@0|RS@6|RA@11|UI@16|","P1" +"OR Immediate Shifted D-form","oris RA,RS,UI","25@0|RS@6|RA@11|UI@16|","P1" +"Rotate Left Word Immediate then Mask Insert M-form","rlwimi RA,RS,SH,MB,ME (Rc=0)|rlwimi. RA,RS,SH,MB,ME (Rc=1)","20@0|RS@6|RA@11|SH@16|MB@21|ME@26|Rc@31|","P1" +"Rotate Left Word Immediate then AND with Mask M-form","rlwinm RA,RS,SH,MB,ME (Rc=0)|rlwinm. RA,RS,SH,MB,ME (Rc=1)","21@0|RS@6|RA@11|SH@16|MB@21|ME@26|Rc@31|","P1" +"Rotate Left Word then AND with Mask M-form","rlwnm RA,RS,RB,MB,ME (Rc=0)|rlwnm. RA,RS,RB,MB,ME (Rc=1)","23@0|RS@6|RA@11|RB@16|MB@21|ME@26|Rc@31|","P1" +"Shift Left Word X-form","slw RA,RS,RB (Rc=0)|slw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|24@21|Rc@31|","P1" +"Shift Right Algebraic Word X-form","sraw RA,RS,RB (Rc=0)|sraw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|792@21|Rc@31|","P1" +"Shift Right Algebraic Word Immediate X-form","srawi RA,RS,SH (Rc=0)|srawi. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|SH@16|824@21|Rc@31|","P1" +"Shift Right Word X-form","srw RA,RS,RB (Rc=0)|srw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|536@21|Rc@31|","P1" +"Store Byte D-form","stb RS,D(RA)","38@0|RS@6|RA@11|D@16|","P1" +"Store Byte with Update D-form","stbu RS,D(RA)","39@0|RS@6|RA@11|D@16|","P1" +"Store Byte with Update Indexed X-form","stbux RS,RA,RB","31@0|RS@6|RA@11|RB@16|247@21|/@31|","P1" +"Store Byte Indexed X-form","stbx RS,RA,RB","31@0|RS@6|RA@11|RB@16|215@21|/@31|","P1" +"Store Floating-Point Double D-form","stfd FRS,D(RA)","54@0|FRS@6|RA@11|D@16|","P1" +"Store Floating-Point Double with Update D-form","stfdu FRS,D(RA)","55@0|FRS@6|RA@11|D@16|","P1" +"Store Floating-Point Double with Update Indexed X-form","stfdux FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|759@21|/@31|","P1" +"Store Floating-Point Double Indexed X-form","stfdx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|727@21|/@31|","P1" +"Store Floating-Point Single D-form","stfs FRS,D(RA)","52@0|FRS@6|RA@11|D@16|","P1" +"Store Floating-Point Single with Update D-form","stfsu FRS,D(RA)","53@0|FRS@6|RA@11|D@16|","P1" +"Store Floating-Point Single with Update Indexed X-form","stfsux FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|695@21|/@31|","P1" +"Store Floating-Point Single Indexed X-form","stfsx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|663@21|/@31|","P1" +"Store Halfword D-form","sth RS,D(RA)","44@0|RS@6|RA@11|D@16|","P1" +"Store Halfword Byte-Reverse Indexed X-form","sthbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|918@21|/@31|","P1" +"Store Halfword with Update D-form","sthu RS,D(RA)","45@0|RS@6|RA@11|D@16|","P1" +"Store Halfword with Update Indexed X-form","sthux RS,RA,RB","31@0|RS@6|RA@11|RB@16|439@21|/@31|","P1" +"Store Halfword Indexed X-form","sthx RS,RA,RB","31@0|RS@6|RA@11|RB@16|407@21|/@31|","P1" +"Store Multiple Word D-form","stmw RS,D(RA)","47@0|RS@6|RA@11|D@16|","P1" +"Store String Word Immediate X-form","stswi RS,RA,NB","31@0|RS@6|RA@11|NB@16|725@21|/@31|","P1" +"Store String Word Indexed X-form","stswx RS,RA,RB","31@0|RS@6|RA@11|RB@16|661@21|/@31|","P1" +"Store Word D-form","stw RS,D(RA)","36@0|RS@6|RA@11|D@16|","P1" +"Store Word Byte-Reverse Indexed X-form","stwbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|662@21|/@31|","P1" +"Store Word with Update D-form","stwu RS,D(RA)","37@0|RS@6|RA@11|D@16|","P1" +"Store Word with Update Indexed X-form","stwux RS,RA,RB","31@0|RS@6|RA@11|RB@16|183@21|/@31|","P1" +"Store Word Indexed X-form","stwx RS,RA,RB","31@0|RS@6|RA@11|RB@16|151@21|/@31|","P1" +"Subtract From Carrying XO-form","subfc RT,RA,RB (OE=0 Rc=0)|subfc. RT,RA,RB (OE=0 Rc=1)|subfco RT,RA,RB (OE=1 Rc=0)|subfco. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|8@22|Rc@31|","P1" +"Subtract From Extended XO-form","subfe RT,RA,RB (OE=0 Rc=0)|subfe. RT,RA,RB (OE=0 Rc=1)|subfeo RT,RA,RB (OE=1 Rc=0)|subfeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|136@22|Rc@31|","P1" +"Subtract From Immediate Carrying D-form","subfic RT,RA,SI","8@0|RT@6|RA@11|SI@16|","P1" +"Subtract From Minus One Extended XO-form","subfme RT,RA (OE=0 Rc=0)|subfme. RT,RA (OE=0 Rc=1)|subfmeo RT,RA (OE=1 Rc=0)|subfmeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|232@22|Rc@31|","P1" +"Subtract From Zero Extended XO-form","subfze RT,RA (OE=0 Rc=0)|subfze. RT,RA (OE=0 Rc=1)|subfzeo RT,RA (OE=1 Rc=0)|subfzeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|200@22|Rc@31|","P1" +"Synchronize X-form","sync L,SC","31@0|//@6|L@8|///@11|SC@14|///@16|598@21|/@31|","P1" +"TLB Invalidate Entry X-form","tlbie RB,RS,RIC,PRS,R","31@0|RS@6|/@11|RIC@12|PRS@14|R@15|RB@16|306@21|/@31|","P1" +"Trap Word X-form","tw TO,RA,RB","31@0|TO@6|RA@11|RB@16|4@21|/@31|","P1" +"Trap Word Immediate D-form","twi TO,RA,SI","3@0|TO@6|RA@11|SI@16|","P1" +"XOR X-form","xor RA,RS,RB (Rc=0)|xor. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|316@21|Rc@31|","P1" +"XOR Immediate D-form","xori RA,RS,UI","26@0|RS@6|RA@11|UI@16|","P1" +"XOR Immediate Shifted D-form","xoris RA,RS,UI","27@0|RS@6|RA@11|UI@16|","P1" From 5e4c51d3ff484467c6ff1b51b6bacc8b70221058 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Tue, 17 Aug 2021 14:11:25 -0500 Subject: [PATCH 003/200] ppc64asm: fix plan9 style decoding issues This reworks the decoding of CR bit fields to correctly decode the fcmp/cmp/setbc families of instructions. Comparison instructions always produce a result in a CR field, thus it should be listed last if not implied to be CR0. Furthermore, remove the context sensitive decoding of CR field and CR bit type arguments from plan9Arg. These edge cases are better handled during the per-instruction combining of decoded arguments. This allows setbc like instructions to decode correctly without special handling. Change-Id: I264a600034b5abb8901b0c2e6bffe2887200ac27 Reviewed-on: https://go-review.googlesource.com/c/arch/+/347569 Run-TryBot: Paul Murphy TryBot-Result: Go Bot Reviewed-by: Carlos Eduardo Seo Trust: Lynn Boger Trust: Cherry Mui --- ppc64/ppc64asm/plan9.go | 71 ++++++++++++++---------------- ppc64/ppc64asm/testdata/decode.txt | 8 +++- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/ppc64/ppc64asm/plan9.go b/ppc64/ppc64asm/plan9.go index 89b91732..88e8e1c7 100644 --- a/ppc64/ppc64asm/plan9.go +++ b/ppc64/ppc64asm/plan9.go @@ -30,18 +30,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin break } if s := plan9Arg(&inst, i, pc, a, symname); s != "" { - // In the case for some BC instructions, a CondReg arg has - // both the CR and the branch condition encoded in its value. - // plan9Arg will return a string with the string representation - // of these values separated by a blank that will be treated - // as 2 args from this point on. - if strings.IndexByte(s, ' ') > 0 { - t := strings.Split(s, " ") - args = append(args, t[0]) - args = append(args, t[1]) - } else { - args = append(args, s) - } + args = append(args, s) } } var op string @@ -61,7 +50,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin case 1: return fmt.Sprintf("%s %s", op, args[0]) case 2: - if inst.Op == COPY || inst.Op == PASTECC || inst.Op == FCMPO || inst.Op == FCMPU { + if inst.Op == COPY || inst.Op == PASTECC { return op + " " + args[0] + "," + args[1] } return op + " " + args[1] + "," + args[0] @@ -97,13 +86,13 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin STQ, STFD, STFDU, STFS, STFSU: return op + " " + strings.Join(args, ",") - case CMPD, CMPDI, CMPLD, CMPLDI, CMPW, CMPWI, CMPLW, CMPLWI: - if len(args) == 2 { - return op + " " + args[0] + "," + args[1] - } else if len(args) == 3 { - return op + " " + args[0] + "," + args[1] + "," + args[2] + case FCMPU, FCMPO, CMPD, CMPDI, CMPLD, CMPLDI, CMPW, CMPWI, CMPLW, CMPLWI: + crf := int(inst.Args[0].(CondReg) - CR0) + cmpstr := op + " " + args[1] + "," + args[2] + if crf != 0 { // print CRx as the final operand if not implied (i.e BF != 0) + cmpstr += "," + args[0] } - return op + " " + args[0] + " ??" + return cmpstr case LIS: return "ADDIS $0," + args[1] + "," + args[0] @@ -152,16 +141,15 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin } return op + " " + strings.Join(args, ", ") case BC: - if int(inst.Args[0].(Imm))&0x1c == 12 { // jump on cond bit set - if len(args) == 4 { - return fmt.Sprintf("B%s %s,%s", args[1], args[2], args[3]) - } - return fmt.Sprintf("B%s %s", args[1], args[2]) - } else if int(inst.Args[0].(Imm))&0x1c == 4 && revCondMap[args[1]] != "" { // jump on cond bit not set - if len(args) == 4 { - return fmt.Sprintf("B%s %s,%s", revCondMap[args[1]], args[2], args[3]) + bo := int(inst.Args[0].(Imm)) + bi := int(inst.Args[1].(CondReg) - Cond0LT) + bcname := condName[((bo&0x8)>>1)|(bi&0x3)] + if bo&0x17 == 4 { // jump only a CR bit set/unset, no hints (at bits) set. + if bi >= 4 { + return fmt.Sprintf("B%s CR%d,%s", bcname, bi>>2, args[2]) + } else { + return fmt.Sprintf("B%s %s", bcname, args[2]) } - return fmt.Sprintf("B%s %s", revCondMap[args[1]], args[2]) } return op + " " + strings.Join(args, ",") case BCCTR: @@ -203,19 +191,14 @@ func plan9Arg(inst *Inst, argIndex int, pc uint64, arg Arg, symname func(uint64) if inst.Op == ISEL { return fmt.Sprintf("$%d", (arg - Cond0LT)) } - if arg == CR0 && (strings.HasPrefix(inst.Op.String(), "cmp") || strings.HasPrefix(inst.Op.String(), "fcmp")) { - return "" // don't show cr0 for cmp instructions - } else if arg >= CR0 { - return fmt.Sprintf("CR%d", int(arg-CR0)) - } bit := [4]string{"LT", "GT", "EQ", "SO"}[(arg-Cond0LT)%4] - if strings.HasPrefix(inst.Op.String(), "cr") { - return fmt.Sprintf("CR%d%s", int(arg-Cond0LT)/4, bit) - } if arg <= Cond0SO { return bit + } else if arg > Cond0SO && arg <= Cond7SO { + return fmt.Sprintf("CR%d%s", int(arg-Cond0LT)/4, bit) + } else { + return fmt.Sprintf("CR%d", int(arg-CR0)) } - return fmt.Sprintf("%s CR%d", bit, int(arg-Cond0LT)/4) case Imm: return fmt.Sprintf("$%d", arg) case SpReg: @@ -281,6 +264,20 @@ var revCondMap = map[string]string{ "LT": "GE", "GT": "LE", "EQ": "NE", } +// Lookup table to map BI[0:1] and BO[3] to an extended mnemonic for CR ops. +// Bits 0-1 map to a bit with a CR field, and bit 2 selects the inverted (0) +// or regular (1) extended mnemonic. +var condName = []string{ + "GE", + "LE", + "NE", + "NSO", + "LT", + "GT", + "EQ", + "SO", +} + // plan9OpMap maps an Op to its Plan 9 mnemonics, if different than its GNU mnemonics. var plan9OpMap = map[Op]string{ LWARX: "LWAR", diff --git a/ppc64/ppc64asm/testdata/decode.txt b/ppc64/ppc64asm/testdata/decode.txt index 11c37aa0..a1f8fb33 100644 --- a/ppc64/ppc64asm/testdata/decode.txt +++ b/ppc64/ppc64asm/testdata/decode.txt @@ -41,7 +41,7 @@ e8610032| plan9 MOVW 48(R1),R3 7c00422c| gnu dcbt r0,r8,0 7c00422c| plan9 DCBT (R8) 7fab3040| gnu cmpld cr7,r11,r6 -7fab3040| plan9 CMPU CR7,R11,R6 +7fab3040| plan9 CMPU R11,R6,CR7 2c030001| gnu cmpwi r3,1 2c030001| plan9 CMPW R3,$1 7c2b4840| gnu cmpld r11,r9 @@ -855,3 +855,9 @@ f0400fe0| gnu xvcvsxddp vs2,vs1 7c20003c| gnu wait 1,0 4c000924| gnu rfebb 1 0602000138800007| gnu pli r4,-8589869049 +7c5b03c0| plan9 SETNBCR CR6SO,R2 +fc811000| plan9 FCMPU F1,F2,CR1 +7c220176| plan9 BRD R1,R2 +7c2201b6| plan9 BRH R1,R2 +7c220136| plan9 BRW R1,R2 +7c2311b8| plan9 CFUGED R1,R2,R3 From 6544aa4a77f5e21f3aabb1fda78a38bbf05c7869 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Tue, 17 Aug 2021 11:21:54 -0500 Subject: [PATCH 004/200] ppc64asm: don't print invalid encodings of pst*/pl*/paddi insn Change-Id: I5a01b89c96eba94f0eac3d4db65f98d0c2fc1166 Reviewed-on: https://go-review.googlesource.com/c/arch/+/347570 Reviewed-by: Cherry Mui Reviewed-by: Carlos Eduardo Seo --- ppc64/ppc64asm/gnu.go | 14 ++++++++++++-- ppc64/ppc64asm/testdata/decode.txt | 2 ++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ppc64/ppc64asm/gnu.go b/ppc64/ppc64asm/gnu.go index 225ef4fb..b4c9bf8d 100644 --- a/ppc64/ppc64asm/gnu.go +++ b/ppc64/ppc64asm/gnu.go @@ -297,12 +297,17 @@ func GNUSyntax(inst Inst, pc uint64) string { gnuArg(&inst, 0, inst.Args[0], PC), gnuArg(&inst, 2, inst.Args[2], PC)) startArg = 4 - } else if r == 0 { + } else { str = fmt.Sprintf("%s %s,%s,%s", opName, gnuArg(&inst, 0, inst.Args[0], PC), gnuArg(&inst, 1, inst.Args[1], PC), gnuArg(&inst, 2, inst.Args[2], PC)) startArg = 4 + if r == 1 { + // This is an illegal encoding (ra != 0 && r == 1) on ISA 3.1. + v := uint64(inst.Enc)<<32 | uint64(inst.SuffixEnc) + return fmt.Sprintf(".quad 0x%x", v) + } } buf.WriteString(str) @@ -317,11 +322,16 @@ func GNUSyntax(inst Inst, pc uint64) string { str := fmt.Sprintf("%s %s,%d", opName, gnuArg(&inst, 0, inst.Args[0], PC), d) buf.WriteString(str) startArg = 4 - } else if r == 0 { + } else { str := fmt.Sprintf("%s %s,%d(%s)", opName, gnuArg(&inst, 0, inst.Args[0], PC), d, gnuArg(&inst, 2, inst.Args[2], PC)) + if r == 1 { + // This is an invalid encoding (ra != 0 && r == 1) on ISA 3.1. + v := uint64(inst.Enc)<<32 | uint64(inst.SuffixEnc) + return fmt.Sprintf(".quad 0x%x", v) + } buf.WriteString(str) startArg = 4 } diff --git a/ppc64/ppc64asm/testdata/decode.txt b/ppc64/ppc64asm/testdata/decode.txt index a1f8fb33..3213903d 100644 --- a/ppc64/ppc64asm/testdata/decode.txt +++ b/ppc64/ppc64asm/testdata/decode.txt @@ -861,3 +861,5 @@ fc811000| plan9 FCMPU F1,F2,CR1 7c2201b6| plan9 BRH R1,R2 7c220136| plan9 BRW R1,R2 7c2311b8| plan9 CFUGED R1,R2,R3 +04100016e4820032| gnu .quad 0x4100016e4820032 +0612000138820007| gnu .quad 0x612000138820007 From b76863e36670e165c85261bc41fabaf345376022 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 23 Sep 2021 10:44:22 -0700 Subject: [PATCH 005/200] x86asm: include size suffix on popcnt instruction Update golang/go#48584 Change-Id: I2c770aaf88e15f8987dc8f1d974127b02b220777 Reviewed-on: https://go-review.googlesource.com/c/arch/+/351889 Trust: Keith Randall Run-TryBot: Keith Randall TryBot-Result: Go Bot Reviewed-by: Cherry Mui --- x86/x86asm/plan9x.go | 1 + x86/x86asm/testdata/decode.txt | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/x86/x86asm/plan9x.go b/x86/x86asm/plan9x.go index a93bffd4..59d8f977 100644 --- a/x86/x86asm/plan9x.go +++ b/x86/x86asm/plan9x.go @@ -204,6 +204,7 @@ var plan9Suffix = [maxOp + 1]bool{ OUT: true, POP: true, POPA: true, + POPCNT: true, PUSH: true, PUSHA: true, RCL: true, diff --git a/x86/x86asm/testdata/decode.txt b/x86/x86asm/testdata/decode.txt index 9832dcd3..604123f8 100644 --- a/x86/x86asm/testdata/decode.txt +++ b/x86/x86asm/testdata/decode.txt @@ -6518,10 +6518,10 @@ f30faec8|11223344556677885f5f5f5f 64 gnu rdgsbase %eax f30faec8|11223344556677885f5f5f5f 64 intel rdgsbase eax f30faec8|11223344556677885f5f5f5f 64 plan9 RDGSBASE AX f30fb811|223344556677885f5f5f5f5f 32 intel popcnt edx, dword ptr [ecx] -f30fb811|223344556677885f5f5f5f5f 32 plan9 POPCNT 0(CX), DX +f30fb811|223344556677885f5f5f5f5f 32 plan9 POPCNTL 0(CX), DX f30fb811|223344556677885f5f5f5f5f 64 gnu popcnt (%rcx),%edx f30fb811|223344556677885f5f5f5f5f 64 intel popcnt edx, dword ptr [rcx] -f30fb811|223344556677885f5f5f5f5f 64 plan9 POPCNT 0(CX), DX +f30fb811|223344556677885f5f5f5f5f 64 plan9 POPCNTL 0(CX), DX f30fbc11|223344556677885f5f5f5f5f 32 intel tzcnt edx, dword ptr [ecx] f30fbc11|223344556677885f5f5f5f5f 32 plan9 TZCNT 0(CX), DX f30fbc11|223344556677885f5f5f5f5f 64 gnu tzcnt (%rcx),%edx @@ -6565,7 +6565,7 @@ f3480faec8|11223344556677885f5f5f 64 intel rdgsbase rax f3480faec8|11223344556677885f5f5f 64 plan9 RDGSBASE AX f3480fb811|223344556677885f5f5f5f 64 gnu popcnt (%rcx),%rdx f3480fb811|223344556677885f5f5f5f 64 intel popcnt rdx, qword ptr [rcx] -f3480fb811|223344556677885f5f5f5f 64 plan9 POPCNT 0(CX), DX +f3480fb811|223344556677885f5f5f5f 64 plan9 POPCNTQ 0(CX), DX f3480fbc11|223344556677885f5f5f5f 64 gnu tzcnt (%rcx),%rdx f3480fbc11|223344556677885f5f5f5f 64 intel tzcnt rdx, qword ptr [rcx] f3480fbc11|223344556677885f5f5f5f 64 plan9 TZCNT 0(CX), DX @@ -6573,10 +6573,10 @@ f3480fbd11|223344556677885f5f5f5f 64 gnu lzcnt (%rcx),%rdx f3480fbd11|223344556677885f5f5f5f 64 intel lzcnt rdx, qword ptr [rcx] f3480fbd11|223344556677885f5f5f5f 64 plan9 LZCNT 0(CX), DX f3660fb811|223344556677885f5f5f5f 32 intel popcnt dx, word ptr [ecx] -f3660fb811|223344556677885f5f5f5f 32 plan9 POPCNT 0(CX), DX +f3660fb811|223344556677885f5f5f5f 32 plan9 POPCNTW 0(CX), DX f3660fb811|223344556677885f5f5f5f 64 gnu popcnt (%rcx),%dx f3660fb811|223344556677885f5f5f5f 64 intel popcnt dx, word ptr [rcx] -f3660fb811|223344556677885f5f5f5f 64 plan9 POPCNT 0(CX), DX +f3660fb811|223344556677885f5f5f5f 64 plan9 POPCNTW 0(CX), DX f3660fbc11|223344556677885f5f5f5f 32 intel tzcnt dx, word ptr [ecx] f3660fbc11|223344556677885f5f5f5f 32 plan9 TZCNT 0(CX), DX f3660fbc11|223344556677885f5f5f5f 64 gnu tzcnt (%rcx),%dx From 5424468ecbacebb9cadd2cfdcc6726e39c619b0e Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Thu, 10 Sep 2020 12:06:57 +0800 Subject: [PATCH 006/200] arm64/arm64asm: add support for TLBI and DC The format of TLBI is TLBI {}, where is an optional field. But there is no field for in the instruction format table. This CL adds a new Arg type sysOp to handle this case. This patch is a copy of CL 256197. Co-authored-by: JunchenLi Change-Id: I6e12f49a8614ca80fd60eef5b63755323824f5fa Reviewed-on: https://go-review.googlesource.com/c/arch/+/302889 Trust: Fannie Zhang Run-TryBot: Fannie Zhang TryBot-Result: Gopher Robot Reviewed-by: Cherry Mui --- arm64/arm64asm/condition.go | 8 +- arm64/arm64asm/condition_util.go | 18 +-- arm64/arm64asm/decode.go | 23 ++-- arm64/arm64asm/decode_test.go | 2 - arm64/arm64asm/inst.go | 158 +++++++++++++++++++++++++ arm64/arm64asm/objdump_test.go | 2 - arm64/arm64asm/plan9x.go | 36 +++--- arm64/arm64asm/testdata/gnucases.txt | 106 +++++++++++++++++ arm64/arm64asm/testdata/plan9cases.txt | 106 +++++++++++++++++ 9 files changed, 417 insertions(+), 42 deletions(-) diff --git a/arm64/arm64asm/condition.go b/arm64/arm64asm/condition.go index d6738572..37ad8eed 100644 --- a/arm64/arm64asm/condition.go +++ b/arm64/arm64asm/condition.go @@ -11,7 +11,7 @@ package arm64asm // Refer to instFormat inside decode.go for more details func at_sys_cr_system_cond(instr uint32) bool { - return sys_op_4((instr>>16)&0x7, 0x7, 0x8, (instr>>5)&0x7) == Sys_AT + return sys_op_4((instr>>16)&0x7, 0x7, 0x8, (instr>>5)&0x7) == sys_AT } func bfi_bfm_32m_bitfield_cond(instr uint32) bool { @@ -61,11 +61,11 @@ func csinv_general_cond(instr uint32) bool { return instr&0xe000 != 0xe000 } func dc_sys_cr_system_cond(instr uint32) bool { - return sys_op_4((instr>>16)&0x7, 0x7, (instr>>8)&0xf, (instr>>5)&0x7) == Sys_DC + return sys_op_4((instr>>16)&0x7, 0x7, (instr>>8)&0xf, (instr>>5)&0x7) == sys_DC } func ic_sys_cr_system_cond(instr uint32) bool { - return sys_op_4((instr>>16)&0x7, 0x7, (instr>>8)&0xf, (instr>>5)&0x7) == Sys_IC + return sys_op_4((instr>>16)&0x7, 0x7, (instr>>8)&0xf, (instr>>5)&0x7) == sys_IC } func lsl_ubfm_32m_bitfield_cond(instr uint32) bool { @@ -133,7 +133,7 @@ func sbfx_sbfm_64m_bitfield_cond(instr uint32) bool { } func tlbi_sys_cr_system_cond(instr uint32) bool { - return sys_op_4((instr>>16)&0x7, 0x8, (instr>>8)&0xf, (instr>>5)&0x7) == Sys_TLBI + return sys_op_4((instr>>16)&0x7, 0x8, (instr>>8)&0xf, (instr>>5)&0x7) == sys_TLBI } func ubfiz_ubfm_32m_bitfield_cond(instr uint32) bool { diff --git a/arm64/arm64asm/condition_util.go b/arm64/arm64asm/condition_util.go index 62c0c3b0..f2fa11b9 100644 --- a/arm64/arm64asm/condition_util.go +++ b/arm64/arm64asm/condition_util.go @@ -47,19 +47,19 @@ func move_wide_preferred_4(sf, N, imms, immr uint32) bool { return false } -type Sys uint8 +type sys uint8 const ( - Sys_AT Sys = iota - Sys_DC - Sys_IC - Sys_TLBI - Sys_SYS + sys_AT sys = iota + sys_DC + sys_IC + sys_TLBI + sys_SYS ) -func sys_op_4(op1, crn, crm, op2 uint32) Sys { - // TODO: system instruction - return Sys_SYS +func sys_op_4(op1, crn, crm, op2 uint32) sys { + sysInst := sysInstFields{uint8(op1), uint8(crn), uint8(crm), uint8(op2)} + return sysInst.getType() } func is_zero(x uint32) bool { diff --git a/arm64/arm64asm/decode.go b/arm64/arm64asm/decode.go index 5e29c476..b1c4f5ec 100644 --- a/arm64/arm64asm/decode.go +++ b/arm64/arm64asm/decode.go @@ -684,17 +684,26 @@ func decodeArg(aop instArg, x uint32) Arg { //TODO: system instruction return nil - case arg_sysop_DC_SYS_CR_system: - //TODO: system instruction - return nil - case arg_sysop_SYS_CR_system: //TODO: system instruction return nil - case arg_sysop_TLBI_SYS_CR_system: - //TODO: system instruction - return nil + case arg_sysop_DC_SYS_CR_system, arg_sysop_TLBI_SYS_CR_system: + op1 := (x >> 16) & 7 + cn := (x >> 12) & 15 + cm := (x >> 8) & 15 + op2 := (x >> 5) & 7 + sysInst := sysInstFields{uint8(op1), uint8(cn), uint8(cm), uint8(op2)} + attrs := sysInst.getAttrs() + reg := int(x & 31) + if !attrs.hasOperand2 { + if reg == 31 { + return sysOp{sysInst, 0, false} + } + // This instruction is undefined if the Rt field is not set to 31. + return nil + } + return sysOp{sysInst, X0 + Reg(reg), true} case arg_Bt: return B0 + Reg(x&(1<<5-1)) diff --git a/arm64/arm64asm/decode_test.go b/arm64/arm64asm/decode_test.go index 9c7d2b62..26eb6ae9 100644 --- a/arm64/arm64asm/decode_test.go +++ b/arm64/arm64asm/decode_test.go @@ -63,9 +63,7 @@ func testDecode(t *testing.T, syntax string) { // TODO: system instruction. var Todo = strings.Fields(` sys - dc at - tlbi ic hvc smc diff --git a/arm64/arm64asm/inst.go b/arm64/arm64asm/inst.go index afeb9a3c..8c633fef 100644 --- a/arm64/arm64asm/inst.go +++ b/arm64/arm64asm/inst.go @@ -968,3 +968,161 @@ func (r RegisterWithArrangementAndIndex) String() string { } return fmt.Sprintf("%s[%d]", result, r.index) } + +type sysOp struct { + op sysInstFields + r Reg + hasOperand2 bool +} + +func (s sysOp) isArg() {} + +func (s sysOp) String() string { + result := s.op.String() + // If s.hasOperand2 is false, the value in the register + // specified by s.r is ignored. + if s.hasOperand2 { + result += ", " + s.r.String() + } + return result +} + +type sysInstFields struct { + op1 uint8 + cn uint8 + cm uint8 + op2 uint8 +} + +type sysInstAttrs struct { + typ sys + name string + hasOperand2 bool +} + +func (s sysInstFields) isArg() {} + +func (s sysInstFields) getAttrs() sysInstAttrs { + attrs, ok := sysInstsAttrs[sysInstFields{s.op1, s.cn, s.cm, s.op2}] + if !ok { + return sysInstAttrs{typ: sys_SYS} + } + return attrs +} + +func (s sysInstFields) String() string { + return s.getAttrs().name +} + +func (s sysInstFields) getType() sys { + return s.getAttrs().typ +} + +var sysInstsAttrs = map[sysInstFields]sysInstAttrs{ + sysInstFields{0, 8, 3, 0}: {sys_TLBI, "VMALLE1IS", false}, + sysInstFields{0, 8, 3, 1}: {sys_TLBI, "VAE1IS", true}, + sysInstFields{0, 8, 3, 2}: {sys_TLBI, "ASIDE1IS", true}, + sysInstFields{0, 8, 3, 3}: {sys_TLBI, "VAAE1IS", true}, + sysInstFields{0, 8, 3, 5}: {sys_TLBI, "VALE1IS", true}, + sysInstFields{0, 8, 3, 7}: {sys_TLBI, "VAALE1IS", true}, + sysInstFields{0, 8, 7, 0}: {sys_TLBI, "VMALLE1", false}, + sysInstFields{0, 8, 7, 1}: {sys_TLBI, "VAE1", true}, + sysInstFields{0, 8, 7, 2}: {sys_TLBI, "ASIDE1", true}, + sysInstFields{0, 8, 7, 3}: {sys_TLBI, "VAAE1", true}, + sysInstFields{0, 8, 7, 5}: {sys_TLBI, "VALE1", true}, + sysInstFields{0, 8, 7, 7}: {sys_TLBI, "VAALE1", true}, + sysInstFields{4, 8, 0, 1}: {sys_TLBI, "IPAS2E1IS", true}, + sysInstFields{4, 8, 0, 5}: {sys_TLBI, "IPAS2LE1IS", true}, + sysInstFields{4, 8, 3, 0}: {sys_TLBI, "ALLE2IS", false}, + sysInstFields{4, 8, 3, 1}: {sys_TLBI, "VAE2IS", true}, + sysInstFields{4, 8, 3, 4}: {sys_TLBI, "ALLE1IS", false}, + sysInstFields{4, 8, 3, 5}: {sys_TLBI, "VALE2IS", true}, + sysInstFields{4, 8, 3, 6}: {sys_TLBI, "VMALLS12E1IS", false}, + sysInstFields{4, 8, 4, 1}: {sys_TLBI, "IPAS2E1", true}, + sysInstFields{4, 8, 4, 5}: {sys_TLBI, "IPAS2LE1", true}, + sysInstFields{4, 8, 7, 0}: {sys_TLBI, "ALLE2", false}, + sysInstFields{4, 8, 7, 1}: {sys_TLBI, "VAE2", true}, + sysInstFields{4, 8, 7, 4}: {sys_TLBI, "ALLE1", false}, + sysInstFields{4, 8, 7, 5}: {sys_TLBI, "VALE2", true}, + sysInstFields{4, 8, 7, 6}: {sys_TLBI, "VMALLS12E1", false}, + sysInstFields{6, 8, 3, 0}: {sys_TLBI, "ALLE3IS", false}, + sysInstFields{6, 8, 3, 1}: {sys_TLBI, "VAE3IS", true}, + sysInstFields{6, 8, 3, 5}: {sys_TLBI, "VALE3IS", true}, + sysInstFields{6, 8, 7, 0}: {sys_TLBI, "ALLE3", false}, + sysInstFields{6, 8, 7, 1}: {sys_TLBI, "VAE3", true}, + sysInstFields{6, 8, 7, 5}: {sys_TLBI, "VALE3", true}, + sysInstFields{0, 8, 1, 0}: {sys_TLBI, "VMALLE1OS", false}, + sysInstFields{0, 8, 1, 1}: {sys_TLBI, "VAE1OS", true}, + sysInstFields{0, 8, 1, 2}: {sys_TLBI, "ASIDE1OS", true}, + sysInstFields{0, 8, 1, 3}: {sys_TLBI, "VAAE1OS", true}, + sysInstFields{0, 8, 1, 5}: {sys_TLBI, "VALE1OS", true}, + sysInstFields{0, 8, 1, 7}: {sys_TLBI, "VAALE1OS", true}, + sysInstFields{0, 8, 2, 1}: {sys_TLBI, "RVAE1IS", true}, + sysInstFields{0, 8, 2, 3}: {sys_TLBI, "RVAAE1IS", true}, + sysInstFields{0, 8, 2, 5}: {sys_TLBI, "RVALE1IS", true}, + sysInstFields{0, 8, 2, 7}: {sys_TLBI, "RVAALE1IS", true}, + sysInstFields{0, 8, 5, 1}: {sys_TLBI, "RVAE1OS", true}, + sysInstFields{0, 8, 5, 3}: {sys_TLBI, "RVAAE1OS", true}, + sysInstFields{0, 8, 5, 5}: {sys_TLBI, "RVALE1OS", true}, + sysInstFields{0, 8, 5, 7}: {sys_TLBI, "RVAALE1OS", true}, + sysInstFields{0, 8, 6, 1}: {sys_TLBI, "RVAE1", true}, + sysInstFields{0, 8, 6, 3}: {sys_TLBI, "RVAAE1", true}, + sysInstFields{0, 8, 6, 5}: {sys_TLBI, "RVALE1", true}, + sysInstFields{0, 8, 6, 7}: {sys_TLBI, "RVAALE1", true}, + sysInstFields{4, 8, 0, 2}: {sys_TLBI, "RIPAS2E1IS", true}, + sysInstFields{4, 8, 0, 6}: {sys_TLBI, "RIPAS2LE1IS", true}, + sysInstFields{4, 8, 1, 0}: {sys_TLBI, "ALLE2OS", false}, + sysInstFields{4, 8, 1, 1}: {sys_TLBI, "VAE2OS", true}, + sysInstFields{4, 8, 1, 4}: {sys_TLBI, "ALLE1OS", false}, + sysInstFields{4, 8, 1, 5}: {sys_TLBI, "VALE2OS", true}, + sysInstFields{4, 8, 1, 6}: {sys_TLBI, "VMALLS12E1OS", false}, + sysInstFields{4, 8, 2, 1}: {sys_TLBI, "RVAE2IS", true}, + sysInstFields{4, 8, 2, 5}: {sys_TLBI, "RVALE2IS", true}, + sysInstFields{4, 8, 4, 0}: {sys_TLBI, "IPAS2E1OS", true}, + sysInstFields{4, 8, 4, 2}: {sys_TLBI, "RIPAS2E1", true}, + sysInstFields{4, 8, 4, 3}: {sys_TLBI, "RIPAS2E1OS", true}, + sysInstFields{4, 8, 4, 4}: {sys_TLBI, "IPAS2LE1OS", true}, + sysInstFields{4, 8, 4, 6}: {sys_TLBI, "RIPAS2LE1", true}, + sysInstFields{4, 8, 4, 7}: {sys_TLBI, "RIPAS2LE1OS", true}, + sysInstFields{4, 8, 5, 1}: {sys_TLBI, "RVAE2OS", true}, + sysInstFields{4, 8, 5, 5}: {sys_TLBI, "RVALE2OS", true}, + sysInstFields{4, 8, 6, 1}: {sys_TLBI, "RVAE2", true}, + sysInstFields{4, 8, 6, 5}: {sys_TLBI, "RVALE2", true}, + sysInstFields{6, 8, 1, 0}: {sys_TLBI, "ALLE3OS", false}, + sysInstFields{6, 8, 1, 1}: {sys_TLBI, "VAE3OS", true}, + sysInstFields{6, 8, 1, 5}: {sys_TLBI, "VALE3OS", true}, + sysInstFields{6, 8, 2, 1}: {sys_TLBI, "RVAE3IS", true}, + sysInstFields{6, 8, 2, 5}: {sys_TLBI, "RVALE3IS", true}, + sysInstFields{6, 8, 5, 1}: {sys_TLBI, "RVAE3OS", true}, + sysInstFields{6, 8, 5, 5}: {sys_TLBI, "RVALE3OS", true}, + sysInstFields{6, 8, 6, 1}: {sys_TLBI, "RVAE3", true}, + sysInstFields{6, 8, 6, 5}: {sys_TLBI, "RVALE3", true}, + sysInstFields{0, 7, 6, 1}: {sys_DC, "IVAC", true}, + sysInstFields{0, 7, 6, 2}: {sys_DC, "ISW", true}, + sysInstFields{0, 7, 10, 2}: {sys_DC, "CSW", true}, + sysInstFields{0, 7, 14, 2}: {sys_DC, "CISW", true}, + sysInstFields{3, 7, 4, 1}: {sys_DC, "ZVA", true}, + sysInstFields{3, 7, 10, 1}: {sys_DC, "CVAC", true}, + sysInstFields{3, 7, 11, 1}: {sys_DC, "CVAU", true}, + sysInstFields{3, 7, 14, 1}: {sys_DC, "CIVAC", true}, + sysInstFields{0, 7, 6, 3}: {sys_DC, "IGVAC", true}, + sysInstFields{0, 7, 6, 4}: {sys_DC, "IGSW", true}, + sysInstFields{0, 7, 6, 5}: {sys_DC, "IGDVAC", true}, + sysInstFields{0, 7, 6, 6}: {sys_DC, "IGDSW", true}, + sysInstFields{0, 7, 10, 4}: {sys_DC, "CGSW", true}, + sysInstFields{0, 7, 10, 6}: {sys_DC, "CGDSW", true}, + sysInstFields{0, 7, 14, 4}: {sys_DC, "CIGSW", true}, + sysInstFields{0, 7, 14, 6}: {sys_DC, "CIGDSW", true}, + sysInstFields{3, 7, 4, 3}: {sys_DC, "GVA", true}, + sysInstFields{3, 7, 4, 4}: {sys_DC, "GZVA", true}, + sysInstFields{3, 7, 10, 3}: {sys_DC, "CGVAC", true}, + sysInstFields{3, 7, 10, 5}: {sys_DC, "CGDVAC", true}, + sysInstFields{3, 7, 12, 3}: {sys_DC, "CGVAP", true}, + sysInstFields{3, 7, 12, 5}: {sys_DC, "CGDVAP", true}, + sysInstFields{3, 7, 13, 3}: {sys_DC, "CGVADP", true}, + sysInstFields{3, 7, 13, 5}: {sys_DC, "CGDVADP", true}, + sysInstFields{3, 7, 14, 3}: {sys_DC, "CIGVAC", true}, + sysInstFields{3, 7, 14, 5}: {sys_DC, "CIGDVAC", true}, + sysInstFields{3, 7, 12, 1}: {sys_DC, "CVAP", true}, + sysInstFields{3, 7, 13, 1}: {sys_DC, "CVADP", true}, +} diff --git a/arm64/arm64asm/objdump_test.go b/arm64/arm64asm/objdump_test.go index 3baf8a19..a096dcec 100644 --- a/arm64/arm64asm/objdump_test.go +++ b/arm64/arm64asm/objdump_test.go @@ -120,9 +120,7 @@ func allowedMismatchObjdump(text string, inst *Inst, dec ExtInst) bool { // TODO: system instruction. var todo = strings.Fields(` sys - dc at - tlbi ic hvc smc diff --git a/arm64/arm64asm/plan9x.go b/arm64/arm64asm/plan9x.go index f4eef8c0..ea5139cb 100644 --- a/arm64/arm64asm/plan9x.go +++ b/arm64/arm64asm/plan9x.go @@ -542,10 +542,7 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg } } - if regno == 31 { - return "ZR" - } - return fmt.Sprintf("R%d", regno) + return plan9gpr(a) case RegSP: regno := uint16(a) & 31 @@ -555,13 +552,7 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg return fmt.Sprintf("R%d", regno) case RegExtshiftAmount: - reg := "" - regno := uint16(a.reg) & 31 - if regno == 31 { - reg = "ZR" - } else { - reg = fmt.Sprintf("R%d", uint16(a.reg)&31) - } + reg := plan9gpr(a.reg) extshift := "" amount := "" if a.extShift != ExtShift(0) { @@ -614,19 +605,13 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg case MemExtend: base := "" index := "" - indexreg := "" regno := uint16(a.Base) & 31 if regno == 31 { base = "(RSP)" } else { base = fmt.Sprintf("(R%d)", regno) } - regno = uint16(a.Index) & 31 - if regno == 31 { - indexreg = "ZR" - } else { - indexreg = fmt.Sprintf("R%d", regno) - } + indexreg := plan9gpr(a.Index) if a.Extend == lsl { // Refer to ARM reference manual, for byte load/store(register), the index @@ -736,7 +721,22 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg if strings.Contains(a.String(), "#") { return fmt.Sprintf("$%d", a) } + case sysOp: + result := a.op.String() + if a.r != 0 { + result += ", " + plan9gpr(a.r) + } + return result } return strings.ToUpper(arg.String()) } + +// Convert a general-purpose register to plan9 assembly format. +func plan9gpr(r Reg) string { + regno := uint16(r) & 31 + if regno == 31 { + return "ZR" + } + return fmt.Sprintf("R%d", regno) +} diff --git a/arm64/arm64asm/testdata/gnucases.txt b/arm64/arm64asm/testdata/gnucases.txt index 21542099..3ea6941d 100644 --- a/arm64/arm64asm/testdata/gnucases.txt +++ b/arm64/arm64asm/testdata/gnucases.txt @@ -4649,3 +4649,109 @@ cd5a202e| mvn v13.8b, v22.8b 743d0a0e| umov w20, v11.h[2] 743d0c0e| mov w20, v11.s[1] 743d084e| mov x20, v11.d[0] +1f8308d5| tlbi vmalle1is +1f8708d5| tlbi vmalle1 +1f830cd5| tlbi alle2is +9f830cd5| tlbi alle1is +df830cd5| tlbi vmalls12e1is +1f870cd5| tlbi alle2 +9f870cd5| tlbi alle1 +df870cd5| tlbi vmalls12e1 +1f830ed5| tlbi alle3is +1f870ed5| tlbi alle3 +1f8108d5| tlbi vmalle1os +1f810cd5| tlbi alle2os +9f810cd5| tlbi alle1os +df810cd5| tlbi vmalls12e1os +1f810ed5| tlbi alle3os +208308d5| tlbi vae1is, x0 +418308d5| tlbi aside1is, x1 +628308d5| tlbi vaae1is, x2 +a38308d5| tlbi vale1is, x3 +e48308d5| tlbi vaale1is, x4 +258708d5| tlbi vae1, x5 +468708d5| tlbi aside1, x6 +678708d5| tlbi vaae1, x7 +a88708d5| tlbi vale1, x8 +e98708d5| tlbi vaale1, x9 +2a800cd5| tlbi ipas2e1is, x10 +ab800cd5| tlbi ipas2le1is, x11 +2c830cd5| tlbi vae2is, x12 +ad830cd5| tlbi vale2is, x13 +2e840cd5| tlbi ipas2e1, x14 +af840cd5| tlbi ipas2le1, x15 +30870cd5| tlbi vae2, x16 +b1870cd5| tlbi vale2, x17 +3f830ed5| tlbi vae3is, xzr +b3830ed5| tlbi vale3is, x19 +34870ed5| tlbi vae3, x20 +b5870ed5| tlbi vale3, x21 +368108d5| tlbi vae1os, x22 +578108d5| tlbi aside1os, x23 +788108d5| tlbi vaae1os, x24 +b98108d5| tlbi vale1os, x25 +fa8108d5| tlbi vaale1os, x26 +3b8208d5| tlbi rvae1is, x27 +7f8208d5| tlbi rvaae1is, xzr +bd8208d5| tlbi rvale1is, x29 +fe8208d5| tlbi rvaale1is, x30 +3f8508d5| tlbi rvae1os, xzr +608508d5| tlbi rvaae1os, x0 +a18508d5| tlbi rvale1os, x1 +e28508d5| tlbi rvaale1os, x2 +238608d5| tlbi rvae1, x3 +648608d5| tlbi rvaae1, x4 +a58608d5| tlbi rvale1, x5 +e68608d5| tlbi rvaale1, x6 +47800cd5| tlbi ripas2e1is, x7 +c8800cd5| tlbi ripas2le1is, x8 +29810cd5| tlbi vae2os, x9 +aa810cd5| tlbi vale2os, x10 +2b820cd5| tlbi rvae2is, x11 +ac820cd5| tlbi rvale2is, x12 +0d840cd5| tlbi ipas2e1os, x13 +4e840cd5| tlbi ripas2e1, x14 +6f840cd5| tlbi ripas2e1os, x15 +90840cd5| tlbi ipas2le1os, x16 +d1840cd5| tlbi ripas2le1, x17 +ff840cd5| tlbi ripas2le1os, xzr +33850cd5| tlbi rvae2os, x19 +b4850cd5| tlbi rvale2os, x20 +35860cd5| tlbi rvae2, x21 +b6860cd5| tlbi rvale2, x22 +37810ed5| tlbi vae3os, x23 +b8810ed5| tlbi vale3os, x24 +39820ed5| tlbi rvae3is, x25 +ba820ed5| tlbi rvale3is, x26 +3b850ed5| tlbi rvae3os, x27 +bf850ed5| tlbi rvale3os, xzr +3d860ed5| tlbi rvae3, x29 +be860ed5| tlbi rvale3, x30 +207608d5| dc ivac, x0 +417608d5| dc isw, x1 +427a08d5| dc csw, x2 +437e08d5| dc cisw, x3 +24740bd5| dc zva, x4 +257a0bd5| dc cvac, x5 +267b0bd5| dc cvau, x6 +277e0bd5| dc civac, x7 +687608d5| dc igvac, x8 +897608d5| dc igsw, x9 +aa7608d5| dc igdvac, x10 +cb7608d5| dc igdsw, x11 +8c7a08d5| dc cgsw, x12 +cd7a08d5| dc cgdsw, x13 +8e7e08d5| dc cigsw, x14 +cf7e08d5| dc cigdsw, x15 +70740bd5| dc gva, x16 +91740bd5| dc gzva, x17 +7f7a0bd5| dc cgvac, xzr +b37a0bd5| dc cgdvac, x19 +747c0bd5| dc cgvap, x20 +b57c0bd5| dc cgdvap, x21 +767d0bd5| dc cgvadp, x22 +b77d0bd5| dc cgdvadp, x23 +787e0bd5| dc cigvac, x24 +b97e0bd5| dc cigdvac, x25 +3a7c0bd5| dc cvap, x26 +3b7d0bd5| dc cvadp, x27 diff --git a/arm64/arm64asm/testdata/plan9cases.txt b/arm64/arm64asm/testdata/plan9cases.txt index 1bbb2386..a1da4f87 100644 --- a/arm64/arm64asm/testdata/plan9cases.txt +++ b/arm64/arm64asm/testdata/plan9cases.txt @@ -4576,3 +4576,109 @@ d7061a6f| VUSHR $6, V22.H8, V23.H8 44786638| MOVBU (R2)(R6<<0), R4 ae7bbe38| MOVB (R29)(R30<<0), R14 ae6bbe38| MOVB (R29)(R30), R14 +1f8308d5| TLBI VMALLE1IS +1f8708d5| TLBI VMALLE1 +1f830cd5| TLBI ALLE2IS +9f830cd5| TLBI ALLE1IS +df830cd5| TLBI VMALLS12E1IS +1f870cd5| TLBI ALLE2 +9f870cd5| TLBI ALLE1 +df870cd5| TLBI VMALLS12E1 +1f830ed5| TLBI ALLE3IS +1f870ed5| TLBI ALLE3 +1f8108d5| TLBI VMALLE1OS +1f810cd5| TLBI ALLE2OS +9f810cd5| TLBI ALLE1OS +df810cd5| TLBI VMALLS12E1OS +1f810ed5| TLBI ALLE3OS +208308d5| TLBI VAE1IS, R0 +418308d5| TLBI ASIDE1IS, R1 +628308d5| TLBI VAAE1IS, R2 +a38308d5| TLBI VALE1IS, R3 +e48308d5| TLBI VAALE1IS, R4 +258708d5| TLBI VAE1, R5 +468708d5| TLBI ASIDE1, R6 +678708d5| TLBI VAAE1, R7 +a88708d5| TLBI VALE1, R8 +e98708d5| TLBI VAALE1, R9 +2a800cd5| TLBI IPAS2E1IS, R10 +ab800cd5| TLBI IPAS2LE1IS, R11 +2c830cd5| TLBI VAE2IS, R12 +ad830cd5| TLBI VALE2IS, R13 +2e840cd5| TLBI IPAS2E1, R14 +af840cd5| TLBI IPAS2LE1, R15 +30870cd5| TLBI VAE2, R16 +b1870cd5| TLBI VALE2, R17 +3f830ed5| TLBI VAE3IS, ZR +b3830ed5| TLBI VALE3IS, R19 +34870ed5| TLBI VAE3, R20 +b5870ed5| TLBI VALE3, R21 +368108d5| TLBI VAE1OS, R22 +578108d5| TLBI ASIDE1OS, R23 +788108d5| TLBI VAAE1OS, R24 +b98108d5| TLBI VALE1OS, R25 +fa8108d5| TLBI VAALE1OS, R26 +3b8208d5| TLBI RVAE1IS, R27 +7f8208d5| TLBI RVAAE1IS, ZR +bd8208d5| TLBI RVALE1IS, R29 +fe8208d5| TLBI RVAALE1IS, R30 +3f8508d5| TLBI RVAE1OS, ZR +608508d5| TLBI RVAAE1OS, R0 +a18508d5| TLBI RVALE1OS, R1 +e28508d5| TLBI RVAALE1OS, R2 +238608d5| TLBI RVAE1, R3 +648608d5| TLBI RVAAE1, R4 +a58608d5| TLBI RVALE1, R5 +e68608d5| TLBI RVAALE1, R6 +47800cd5| TLBI RIPAS2E1IS, R7 +c8800cd5| TLBI RIPAS2LE1IS, R8 +29810cd5| TLBI VAE2OS, R9 +aa810cd5| TLBI VALE2OS, R10 +2b820cd5| TLBI RVAE2IS, R11 +ac820cd5| TLBI RVALE2IS, R12 +0d840cd5| TLBI IPAS2E1OS, R13 +4e840cd5| TLBI RIPAS2E1, R14 +6f840cd5| TLBI RIPAS2E1OS, R15 +90840cd5| TLBI IPAS2LE1OS, R16 +d1840cd5| TLBI RIPAS2LE1, R17 +ff840cd5| TLBI RIPAS2LE1OS, ZR +33850cd5| TLBI RVAE2OS, R19 +b4850cd5| TLBI RVALE2OS, R20 +35860cd5| TLBI RVAE2, R21 +b6860cd5| TLBI RVALE2, R22 +37810ed5| TLBI VAE3OS, R23 +b8810ed5| TLBI VALE3OS, R24 +39820ed5| TLBI RVAE3IS, R25 +ba820ed5| TLBI RVALE3IS, R26 +3b850ed5| TLBI RVAE3OS, R27 +bf850ed5| TLBI RVALE3OS, ZR +3d860ed5| TLBI RVAE3, R29 +be860ed5| TLBI RVALE3, R30 +207608d5| DC IVAC, R0 +417608d5| DC ISW, R1 +427a08d5| DC CSW, R2 +437e08d5| DC CISW, R3 +24740bd5| DC ZVA, R4 +257a0bd5| DC CVAC, R5 +267b0bd5| DC CVAU, R6 +277e0bd5| DC CIVAC, R7 +687608d5| DC IGVAC, R8 +897608d5| DC IGSW, R9 +aa7608d5| DC IGDVAC, R10 +cb7608d5| DC IGDSW, R11 +8c7a08d5| DC CGSW, R12 +cd7a08d5| DC CGDSW, R13 +8e7e08d5| DC CIGSW, R14 +cf7e08d5| DC CIGDSW, R15 +70740bd5| DC GVA, R16 +91740bd5| DC GZVA, R17 +7f7a0bd5| DC CGVAC, ZR +b37a0bd5| DC CGDVAC, R19 +747c0bd5| DC CGVAP, R20 +b57c0bd5| DC CGDVAP, R21 +767d0bd5| DC CGVADP, R22 +b77d0bd5| DC CGDVADP, R23 +787e0bd5| DC CIGVAC, R24 +b97e0bd5| DC CIGDVAC, R25 +3a7c0bd5| DC CVAP, R26 +3b7d0bd5| DC CVADP, R27 From cd2ec6f1ddf14e3a42861a19f145293ea4181414 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 13 Sep 2021 16:42:27 -0400 Subject: [PATCH 007/200] x86/x86asm: fix plan9 print of PUSHQ/POPQ The 5x instructions decode correctly and print correctly in the GNU and Intel modes, but it was using the wrong suffix in the Plan 9 mode. Fix that. Change-Id: I8242d142ef56bf3e16e7535d59034c3932f5bbda Reviewed-on: https://go-review.googlesource.com/c/arch/+/349689 Run-TryBot: Russ Cox TryBot-Result: Gopher Robot Reviewed-by: Austin Clements Auto-Submit: Russ Cox --- x86/x86asm/plan9x.go | 4 ++++ x86/x86asm/testdata/decode.txt | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/x86/x86asm/plan9x.go b/x86/x86asm/plan9x.go index 59d8f977..de417946 100644 --- a/x86/x86asm/plan9x.go +++ b/x86/x86asm/plan9x.go @@ -66,6 +66,10 @@ func GoSyntax(inst Inst, pc uint64, symname SymLookup) string { s := inst.DataSize if inst.MemBytes != 0 { s = inst.MemBytes * 8 + } else if inst.Args[1] == nil { // look for register-only 64-bit instruction, like PUSHQ AX + if r, ok := inst.Args[0].(Reg); ok && RAX <= r && r <= R15 { + s = 64 + } } switch s { case 8: diff --git a/x86/x86asm/testdata/decode.txt b/x86/x86asm/testdata/decode.txt index 604123f8..cbd536a8 100644 --- a/x86/x86asm/testdata/decode.txt +++ b/x86/x86asm/testdata/decode.txt @@ -2772,12 +2772,12 @@ 50|11223344556677885f5f5f5f5f5f5f 32 plan9 PUSHL AX 50|11223344556677885f5f5f5f5f5f5f 64 gnu push %rax 50|11223344556677885f5f5f5f5f5f5f 64 intel push rax -50|11223344556677885f5f5f5f5f5f5f 64 plan9 PUSHL AX +50|11223344556677885f5f5f5f5f5f5f 64 plan9 PUSHQ AX 58|11223344556677885f5f5f5f5f5f5f 32 intel pop eax 58|11223344556677885f5f5f5f5f5f5f 32 plan9 POPL AX 58|11223344556677885f5f5f5f5f5f5f 64 gnu pop %rax 58|11223344556677885f5f5f5f5f5f5f 64 intel pop rax -58|11223344556677885f5f5f5f5f5f5f 64 plan9 POPL AX +58|11223344556677885f5f5f5f5f5f5f 64 plan9 POPQ AX 60|11223344556677885f5f5f5f5f5f5f 32 intel pushad 60|11223344556677885f5f5f5f5f5f5f 32 plan9 PUSHAD 60|11223344556677885f5f5f5f5f5f5f 64 gnu error: unrecognized instruction From fc48f9fe4c157e3ed95b38adbda9b9fe5a31cf03 Mon Sep 17 00:00:00 2001 From: Dan Kortschak Date: Sat, 9 Apr 2022 08:19:57 +0930 Subject: [PATCH 008/200] ppc64,x86: fix code generation notice Change-Id: I6de117af0ae5f9ccb0dbecad53bebf6241a13e38 Reviewed-on: https://go-review.googlesource.com/c/arch/+/399274 Reviewed-by: Ian Lance Taylor Run-TryBot: Ian Lance Taylor TryBot-Result: Gopher Robot Reviewed-by: Cherry Mui Reviewed-by: Ian Lance Taylor Auto-Submit: Ian Lance Taylor --- ppc64/ppc64asm/tables.go | 3 +-- ppc64/ppc64map/map.go | 3 +-- x86/x86asm/tables.go | 3 +-- x86/x86map/map.go | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/ppc64/ppc64asm/tables.go b/ppc64/ppc64asm/tables.go index d42ab0f2..9e3b26f1 100644 --- a/ppc64/ppc64asm/tables.go +++ b/ppc64/ppc64asm/tables.go @@ -1,5 +1,4 @@ -// DO NOT EDIT -// generated by: ppc64map -fmt=decoder ../pp64.csv +// Code generated by ppc64map -fmt=decoder pp64.csv DO NOT EDIT. package ppc64asm diff --git a/ppc64/ppc64map/map.go b/ppc64/ppc64map/map.go index 0acad44d..8d345533 100644 --- a/ppc64/ppc64map/map.go +++ b/ppc64/ppc64map/map.go @@ -704,8 +704,7 @@ var funcBodyTmpl = template.Must(template.New("funcBody").Parse(``)) func printDecoder(p *Prog) { var buf bytes.Buffer - fmt.Fprintf(&buf, "// DO NOT EDIT\n") - fmt.Fprintf(&buf, "// generated by: ppc64map -fmt=decoder %s\n", inputFile) + fmt.Fprintf(&buf, "// Code generated by ppc64map -fmt=decoder %s DO NOT EDIT.\n", inputFile) fmt.Fprintf(&buf, "\n") fmt.Fprintf(&buf, "package ppc64asm\n\n") diff --git a/x86/x86asm/tables.go b/x86/x86asm/tables.go index af3fb73c..6f57c70b 100644 --- a/x86/x86asm/tables.go +++ b/x86/x86asm/tables.go @@ -1,5 +1,4 @@ -// DO NOT EDIT -// generated by: x86map -fmt=decoder ../x86.csv +// Code generated by x86map -fmt=decoder x86.csv DO NOT EDIT. package x86asm diff --git a/x86/x86map/map.go b/x86/x86map/map.go index 4f64c0c8..df8c68e5 100644 --- a/x86/x86map/map.go +++ b/x86/x86map/map.go @@ -665,8 +665,7 @@ func printDecoder(p *Prog) { "PAUSE": true, } printDecoderPass(p, 1, false, opMap) - fmt.Printf("// DO NOT EDIT\n") - fmt.Printf("// generated by: x86map -fmt=decoder %s\n", inputFile) + fmt.Printf("// Code generated by x86map -fmt=decoder %s DO NOT EDIT.\n", inputFile) fmt.Printf("\n") fmt.Printf("package x86asm\n\n") fmt.Printf("var decoder = [...]uint16{\n\tuint16(xFail),\n") From 00200b7164a7c6d68f74efd99f51b6100ea0a97d Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Fri, 22 Jul 2022 11:10:03 -0400 Subject: [PATCH 009/200] A+C: delete AUTHORS and CONTRIBUTORS In 2009, Google's open-source lawyers asked us to create the AUTHORS file to define "The Go Authors", and the CONTRIBUTORS file was in keeping with open source best practices of the time. Re-reviewing our repos now in 2022, the open-source lawyers are comfortable with source control history taking the place of the AUTHORS file, and most open source projects no longer maintain CONTRIBUTORS files. To ease maintenance, remove AUTHORS and CONTRIBUTORS from all repos. For golang/go#53961. Change-Id: I6aadbd7aaeee54c143dcbd5de6db48731d48c178 Reviewed-on: https://go-review.googlesource.com/c/arch/+/418900 Run-TryBot: Russ Cox TryBot-Result: Gopher Robot Reviewed-by: David Chase --- AUTHORS | 3 --- CONTRIBUTORS | 3 --- 2 files changed, 6 deletions(-) delete mode 100644 AUTHORS delete mode 100644 CONTRIBUTORS diff --git a/AUTHORS b/AUTHORS deleted file mode 100644 index 2b00ddba..00000000 --- a/AUTHORS +++ /dev/null @@ -1,3 +0,0 @@ -# This source code refers to The Go Authors for copyright purposes. -# The master list of authors is in the main Go distribution, -# visible at https://tip.golang.org/AUTHORS. diff --git a/CONTRIBUTORS b/CONTRIBUTORS deleted file mode 100644 index 1fbd3e97..00000000 --- a/CONTRIBUTORS +++ /dev/null @@ -1,3 +0,0 @@ -# This source code was written by the Go contributors. -# The master list of contributors is in the main Go distribution, -# visible at https://tip.golang.org/CONTRIBUTORS. From 13eedde4113cee87b1eaaf9e20cd6d4f812bf18b Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Wed, 20 Jul 2022 13:26:21 -0500 Subject: [PATCH 010/200] ppc64/ppc64asm: fix objdump tests In short, these tests create an object file from a list of opcodes, and expect objdump to generate exactly as many decoded opcodes. Unfortunately, objdump generates two opcode entries for each invalid prefixed instruction, which causes the the testing code to deadlock itself. For example, objdump decodes an invalid form of paddi like: .long ... addi ... instead of something like: .quadword ... Work around this by examing the primary opcode of any entry which objdump reports as ".long", and skip over the next word if the primary opcode is "1" (the prefix opcode). The test skips over ".long" entries, so it will continue to work as expected. Change-Id: I9dd0fda10683f666aace4140b63e81fc0fea2ad0 Reviewed-on: https://go-review.googlesource.com/c/arch/+/418857 TryBot-Result: Gopher Robot Run-TryBot: Paul Murphy Reviewed-by: Bryan Mills Reviewed-by: Lynn Boger Reviewed-by: Cherry Mui Reviewed-by: Ian Lance Taylor --- ppc64/ppc64asm/ext_test.go | 19 ++++++++++++------- ppc64/ppc64asm/objdumpext_test.go | 18 +++++++++++------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/ppc64/ppc64asm/ext_test.go b/ppc64/ppc64asm/ext_test.go index f9242e1d..806701b2 100644 --- a/ppc64/ppc64asm/ext_test.go +++ b/ppc64/ppc64asm/ext_test.go @@ -40,7 +40,7 @@ var ( // from an external disassembler's output. type ExtInst struct { addr uint32 - enc [4]byte + enc [8]byte nenc int text string } @@ -200,20 +200,25 @@ func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, defer w.Flush() size = 0 generate(func(x []byte) { - if len(x) > 4 { - x = x[:4] + if len(x) != 4 && len(x) != 8 { + panic(fmt.Sprintf("Unexpected instruction %v\n", x)) + } + izeros := zeros + if len(x) == 4 { + // Only pad to 4 bytes for a 4 byte instruction word. + izeros = izeros[4:] } if debug { - fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):]) + fmt.Printf("%#x: %x%x\n", start+size, x, izeros[len(x):]) } w.Write(x) - w.Write(zeros[len(x):]) - size += len(zeros) + w.Write(izeros[len(x):]) + size += len(izeros) }) return file, f, size, nil } -var zeros = []byte{0, 0, 0, 0} +var zeros = []byte{0, 0, 0, 0, 0, 0, 0, 0} // pad pads the code sequence with pops. func pad(enc []byte) []byte { diff --git a/ppc64/ppc64asm/objdumpext_test.go b/ppc64/ppc64asm/objdumpext_test.go index 37aa2573..033f6708 100644 --- a/ppc64/ppc64asm/objdumpext_test.go +++ b/ppc64/ppc64asm/objdumpext_test.go @@ -64,7 +64,7 @@ func objdump(ext *ExtDis) error { reading bool next uint32 = start addr uint32 - encbuf [4]byte + encbuf [8]byte enc []byte text string ) @@ -88,15 +88,19 @@ func objdump(ext *ExtDis) error { text = "error: unknown instruction" enc = nil } - if len(enc) == 4 { - // prints as word but we want to record bytes - enc[0], enc[3] = enc[3], enc[0] - enc[1], enc[2] = enc[2], enc[1] + // Prefixed instructions may not decode as expected if + // they are an invalid form. Some are tested in decode.txt. + // objdump treats these like two instructions. + // + // Look for primary opcode 1 and advance an exta 4 bytes if + // this failed to decode. + if strings.HasPrefix(text, ".long") && enc[0]>>2 == 1 { + next += 4 } ext.Dec <- ExtInst{addr, encbuf, len(enc), text} - encbuf = [4]byte{} + encbuf = [8]byte{} + next += uint32(len(enc)) enc = nil - next += 4 } } var textangle = []byte("<.text>:") From ada1728cebaa682942b88353968347233f495ce7 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Wed, 20 Jul 2022 13:47:02 -0500 Subject: [PATCH 011/200] ppc64/ppc64asm,ppc64map: fix BHRBE argument translation BHRBE stands for "branch history rolling buffer entry". This is not an SPR. Treat it as an unsigned immediate type argument. Similarly, DCRN, SR, TMR, PMRN fields are no longer present in ISA 3.1, they can be removed and nearby code simplified. Fix ppc64map and update tables.go. Change-Id: Ie779d24ae9d24541db6565ea169be0d80b893ff8 Reviewed-on: https://go-review.googlesource.com/c/arch/+/418858 Reviewed-by: Ian Lance Taylor Reviewed-by: Cherry Mui TryBot-Result: Gopher Robot Reviewed-by: Lynn Boger Run-TryBot: Paul Murphy --- ppc64/ppc64asm/tables.go | 4 ++-- ppc64/ppc64map/map.go | 13 +++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/ppc64/ppc64asm/tables.go b/ppc64/ppc64asm/tables.go index 9e3b26f1..8d0a2431 100644 --- a/ppc64/ppc64asm/tables.go +++ b/ppc64/ppc64asm/tables.go @@ -2906,7 +2906,7 @@ var ( ap_FPReg_11_15 = &argField{Type: TypeFPReg, Shift: 0, BitFields: BitFields{{11, 5, 0}}} ap_ImmUnsigned_7_10 = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{7, 4, 0}}} ap_ImmUnsigned_31_31 = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{31, 1, 0}}} - ap_SpReg_11_20 = &argField{Type: TypeSpReg, Shift: 0, BitFields: BitFields{{11, 10, 0}}} + ap_ImmUnsigned_11_20 = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{11, 10, 0}}} ap_ImmUnsigned_20_20 = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{20, 1, 0}}} ap_ImmUnsigned_16_16 = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{16, 1, 0}}} ap_ImmUnsigned_17_20 = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{17, 4, 0}}} @@ -3883,7 +3883,7 @@ var instFormats = [...]instFormat{ {LXSSPX, 0xfc0007fe00000000, 0x7c00041800000000, 0x0, // Load VSX Scalar Single-Precision Indexed X-form (lxsspx XT,RA,RB) [6]*argField{ap_VecSReg_31_31_6_10, ap_Reg_11_15, ap_Reg_16_20}}, {MFBHRBE, 0xfc0007fe00000000, 0x7c00025c00000000, 0x100000000, // Move From BHRB XFX-form (mfbhrbe RT,BHRBE) - [6]*argField{ap_Reg_6_10, ap_SpReg_11_20}}, + [6]*argField{ap_Reg_6_10, ap_ImmUnsigned_11_20}}, {MFVSRD, 0xfc0007fe00000000, 0x7c00006600000000, 0xf80000000000, // Move From VSR Doubleword X-form (mfvsrd RA,XS) [6]*argField{ap_Reg_11_15, ap_VecSReg_31_31_6_10}}, {MFVSRWZ, 0xfc0007fe00000000, 0x7c0000e600000000, 0xf80000000000, // Move From VSR Word and Zero X-form (mfvsrwz RA,XS) diff --git a/ppc64/ppc64map/map.go b/ppc64/ppc64map/map.go index 8d345533..4f90ed90 100644 --- a/ppc64/ppc64map/map.go +++ b/ppc64/ppc64map/map.go @@ -5,12 +5,13 @@ // ppc64map constructs the ppc64 opcode map from the instruction set CSV file. // // Usage: +// // ppc64map [-fmt=format] ppc64.csv // // The known output formats are: // -// text (default) - print decoding tree in text form -// decoder - print decoding tables for the ppc64asm package +// text (default) - print decoding tree in text form +// decoder - print decoding tables for the ppc64asm package package main import ( @@ -423,7 +424,7 @@ func add(p *Prog, text, mnemonics, encoding, tags string) { opr = "BD" } - case "XMSK", "YMSK", "PMSK", "IX": + case "XMSK", "YMSK", "PMSK", "IX", "BHRBE": typ = asm.TypeImmUnsigned case "IMM32": @@ -559,12 +560,8 @@ func add(p *Prog, text, mnemonics, encoding, tags string) { case "VRA", "VRB", "VRC", "VRS", "VRT": typ = asm.TypeVecReg - case "SPR", "DCRN", "BHRBE", "TBR", "SR", "TMR", "PMRN": // Note: if you add to this list and the register field needs special handling, add it to switch statement below + case "SPR", "TBR": typ = asm.TypeSpReg - switch opr { - case "DCRN": - opr = "DCR" - } if n := strings.ToLower(opr); n != opr && args.Find(n) >= 0 { opr = n // spr[5:9] || spr[0:4] } From 44deed04936c31acd3d9306129a66498fb79d2ef Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Mon, 24 May 2021 16:22:57 -0500 Subject: [PATCH 012/200] ppc64/ppc64map: add encoder functionality Use the ISA information to generate bits for supporting ISA 3.1 (POWER10) instructions. This creates a new file asm9_gtables.go to allow assembly of instructions defined in pp64.csv. This uses the input pp64.csv file to generate an encoding function for each "type" of instruction. Some encoder functions can be shared (e.x fpr/gpr/vsr opcodes which share similar encoding). These are named based on the oldest instruction which uses the function, like "type_xxspltiw". All functions share two tables which store the fixed bits of an instruction. Non-prefixed instructions use GenOpcodes exclusively, prefixed opcodes use the GenPfxOpcodes table to hold the suffix instruction word bits. These are used to populate the instruction specific encoding bits for a particular type. Likewise, the function opsetGen is created to map opcodes which share identical argument types. This plugs into the buildop function in asm9.go. Change-Id: I50cddfcec86b667774af858fb8efe8910dfe80b8 Reviewed-on: https://go-review.googlesource.com/c/arch/+/350609 Reviewed-by: Lynn Boger TryBot-Result: Gopher Robot Run-TryBot: Paul Murphy Reviewed-by: Michael Knyszek Reviewed-by: Heschi Kreinick --- ppc64/ppc64map/map.go | 538 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 528 insertions(+), 10 deletions(-) diff --git a/ppc64/ppc64map/map.go b/ppc64/ppc64map/map.go index 4f90ed90..31d692d4 100644 --- a/ppc64/ppc64map/map.go +++ b/ppc64/ppc64map/map.go @@ -12,6 +12,10 @@ // // text (default) - print decoding tree in text form // decoder - print decoding tables for the ppc64asm package +// encoder - generate a self-contained file which can be used to encode +// go obj.Progs into machine code +// asm - generate a gnu asm file which can be compiled by gcc containing +// all opcodes discovered in ppc64.csv using macro friendly arguments. package main import ( @@ -20,6 +24,7 @@ import ( "flag" "fmt" gofmt "go/format" + asm "golang.org/x/arch/ppc64/ppc64asm" "log" "math/bits" "os" @@ -28,8 +33,6 @@ import ( "strconv" "strings" "text/template" - - asm "golang.org/x/arch/ppc64/ppc64asm" ) var format = flag.String("fmt", "text", "output format: text, decoder, asm") @@ -37,6 +40,45 @@ var debug = flag.Bool("debug", false, "enable debugging output") var inputFile string +type isaversion uint32 + +const ( + // Sort as supersets of each other. Generally speaking, each newer ISA + // supports a superset of the previous instructions with a few exceptions + // throughout. + ISA_P1 isaversion = iota + ISA_P2 + ISA_PPC + ISA_V200 + ISA_V201 + ISA_V202 + ISA_V203 + ISA_V205 + ISA_V206 + ISA_V207 + ISA_V30 + ISA_V30B + ISA_V30C + ISA_V31 +) + +var isaToISA = map[string]isaversion{ + "P1": ISA_P1, + "P2": ISA_P2, + "PPC": ISA_PPC, + "v2.00": ISA_V200, + "v2.01": ISA_V201, + "v2.02": ISA_V202, + "v2.03": ISA_V203, + "v2.05": ISA_V205, + "v2.06": ISA_V206, + "v2.07": ISA_V207, + "v3.0": ISA_V30, + "v3.0B": ISA_V30B, + "v3.0C": ISA_V30C, + "v3.1": ISA_V31, +} + func usage() { fmt.Fprintf(os.Stderr, "usage: ppc64map [-fmt=format] ppc64.csv\n") os.Exit(2) @@ -64,6 +106,8 @@ func main() { print = printDecoder case "asm": print = printASM + case "encoder": + print = printEncoder } p, err := readCSV(flag.Arg(0)) @@ -104,15 +148,17 @@ func readCSV(file string) (*Prog, error) { } type Prog struct { - Insts []Inst - OpRanges map[string]string + Insts []Inst + OpRanges map[string]string + nextOrder int // Next position value (used for Insts[x].order) } type Field struct { - Name string - BitFields asm.BitFields - Type asm.ArgType - Shift uint8 + Name string + BitFields asm.BitFields + BitFieldNames []string + Type asm.ArgType + Shift uint8 } func (f Field) String() string { @@ -130,6 +176,12 @@ type Inst struct { SValue uint32 // Likewise for the Value SDontCare uint32 // Likewise for the DontCare bits Fields []Field + Words int // Number of words instruction encodes to. + Isa isaversion + memOp bool // Is this a memory operation? + memOpX bool // Is this an x-form memory operation? + memOpSt bool // Is this a store memory operations? + order int // Position in pp64.csv. } func (i Inst) String() string { @@ -330,7 +382,7 @@ func computeMaskValueReserved(args Args, text string) (mask, value, reserved uin // detected instructions into p. One entry may generate multiple intruction // entries as each extended mnemonic listed in text is treated like a unique // instruction. -func add(p *Prog, text, mnemonics, encoding, tags string) { +func add(p *Prog, text, mnemonics, encoding, isa string) { // Parse encoding, building size and offset of each field. // The first field in the encoding is the smallest offset. // And note the MSB is bit 0, not bit 31. @@ -340,12 +392,18 @@ func add(p *Prog, text, mnemonics, encoding, tags string) { iword := int8(0) ispfx := false + isaLevel, fnd := isaToISA[isa] + if !fnd { + log.Fatalf("%s: ISA level '%s' is unknown\n", text, isa) + return + } + // Is this a prefixed instruction? if encoding[0] == ',' { pfields := strings.Split(encoding, ",")[1:] if len(pfields) != 2 { - fmt.Fprintf(os.Stderr, "%s: Prefixed instruction must be 2 words long.\n", text) + log.Fatalf("%s: Prefixed instruction must be 2 words long.\n", text) return } pargs = parseFields(pfields[0], text, iword) @@ -617,17 +675,30 @@ func add(p *Prog, text, mnemonics, encoding, tags string) { f1.Offs, f1.Bits, f1.Word = uint8(args[i].Offs), uint8(args[i].Bits), uint8(args[i].Word) } field.BitFields.Append(f1) + field.BitFieldNames = append(field.BitFieldNames, opr) if f2.Bits > 0 { field.BitFields.Append(f2) + field.BitFieldNames = append(field.BitFieldNames, opr2) } if f3.Bits > 0 { field.BitFields.Append(f3) + field.BitFieldNames = append(field.BitFieldNames, opr3) } inst.Fields = append(inst.Fields, field) } if *debug { fmt.Printf("%v\n", inst) } + inst.Isa = isaLevel + inst.memOp = hasMemoryArg(&inst) + inst.memOpX = inst.memOp && inst.Op[len(inst.Op)-1] == 'x' + inst.memOpSt = inst.memOp && strings.Contains(inst.Text, "Store") + inst.Words = 1 + inst.order = p.nextOrder + p.nextOrder++ + if ispfx { + inst.Words = 2 + } foundInst = append(foundInst, inst) } @@ -658,6 +729,453 @@ func printText(p *Prog) { log.Fatal("-fmt=text not implemented") } +// Some ISA instructions look like memory ops, but are not. +var isNotMemopMap = map[string]bool{ + "lxvkq": true, + "lvsl": true, + "lvsr": true, +} + +// Some ISA instructions are memops, but are not described like "Load ..." or "Store ..." +var isMemopMap = map[string]bool{} + +// Does this instruction contain a memory argument (e.g x-form load or d-form store) +func hasMemoryArg(insn *Inst) bool { + return ((strings.HasPrefix(insn.Text, "Load") || strings.HasPrefix(insn.Text, "Store") || + strings.HasPrefix(insn.Text, "Prefixed Load") || strings.HasPrefix(insn.Text, "Prefixed Store")) && !isNotMemopMap[insn.Op]) || + isMemopMap[insn.Op] +} + +// Generate a function which takes an obj.Proj and convert it into +// machine code in the supplied buffer. These functions are used +// by asm9.go. +func insnEncFuncStr(insn *Inst, firstName [2]string) string { + buf := new(bytes.Buffer) + // Argument packing order. + // Note, if a2 is not a register type, it is skipped. + argOrder := []string{ + "p.To", // a6 + "p.From", // a1 + "p", // a2 + "p.RestArgs[0].Addr", // a3 + "p.RestArgs[1].Addr", // a4 + "p.RestArgs[2].Addr", // a5 + } + if len(insn.Fields) > len(argOrder) { + log.Fatalf("cannot handle %v. Only %d args supported.", insn, len(argOrder)) + } + + // Does this field require an obj.Addr.Offset? + isImmediate := func(t asm.ArgType) bool { + return t == asm.TypeImmUnsigned || t == asm.TypeSpReg || t == asm.TypeImmSigned || t == asm.TypeOffset + } + + if insn.memOp { + // Swap to/from arguments if we are generating + // for a store operation. + if insn.memOpSt { + // Otherwise, order first three args as: p.From, p.To, p.To + argOrder[0], argOrder[1] = argOrder[1], argOrder[0] + } + argOrder[2] = argOrder[1] // p.Reg is either an Index or Offset (X or D-form) + } else if len(insn.Fields) > 2 && isImmediate(insn.Fields[2].Type) { + // Delete the a2 argument if it is not a register type. + argOrder = append(argOrder[0:2], argOrder[3:]...) + } + + fmt.Fprintf(buf, "// %s\n", insn.Encoding) + fmt.Fprintf(buf, "func type_%s(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) {\n", insn.Op) + if insn.Words > 1 { + fmt.Fprintf(buf, "o0 := GenPfxOpcodes[p.As - A%s]\n", firstName[1]) + } + fmt.Fprintf(buf, "o%d := GenOpcodes[p.As - A%s]\n", insn.Words-1, firstName[0]) + + errCheck := "" + for j, atype := range insn.Fields { + itype := ".Reg" + if isImmediate(atype.Type) { + itype = ".Offset" + } else if insn.memOpX && atype.Name == "RA" { + // X-form memory operations encode RA as the index register of memory type arg. + itype = ".Index" + } + + bitPos := uint64(0) + // VecSpReg is encoded as an even numbered VSR. It is implicitly shifted by 1. + if atype.Type == asm.TypeVecSpReg { + bitPos += 1 + } + // Count the total number of bits to work backwards when shifting + for _, f := range atype.BitFields { + bitPos += uint64(f.Bits) + } + // Adjust for any shifting (e.g DQ/DS shifted instructions) + bitPos += uint64(atype.Shift) + bits := bitPos + + // Generate code to twirl the respective bits into the correct position, and mask off extras. + for i, f := range atype.BitFields { + bitPos -= uint64(f.Bits) + argStr := argOrder[j] + itype + if bitPos != 0 { + argStr = fmt.Sprintf("(%s>>%d)", argStr, bitPos) + } + mask := (1 << uint64(f.Bits)) - 1 + shift := 32 - uint64(f.Offs) - uint64(f.Bits) + fmt.Fprintf(buf, "o%d |= uint32(%s&0x%x)<<%d // %s\n", f.Word, argStr, mask, shift, atype.BitFieldNames[i]) + } + + // Generate a check to verify shifted inputs satisfy their constraints. + // For historical reasons this is not needed for 16 bit values shifted by 16. (i.e SI/UI constants in addis/xoris) + if atype.Shift != 0 && atype.Shift != 16 && bits != 32 { + arg := argOrder[j] + itype + mod := (1 << atype.Shift) - 1 + errCheck += fmt.Sprintf("if %s & 0x%x != 0 {\n", arg, mod) + errCheck += fmt.Sprintf("c.ctxt.Diag(\"Constant 0x%%x (%%d) is not a multiple of %d\\n%%v\",%s,%s,p)\n", mod+1, arg, arg) + errCheck += fmt.Sprintf("}\n") + } + j++ + } + buf.WriteString(errCheck) + if insn.Words > 1 { + fmt.Fprintf(buf, "out[1] = o1\n") + } + fmt.Fprintf(buf, "out[0] = o0\n") + fmt.Fprintf(buf, "}\n") + return buf.String() +} + +// Generate a stringed name representing the type of arguments ISA +// instruction needs to be encoded into a usable machine instruction +func insnTypeStr(insn *Inst, uniqueRegTypes bool) string { + if len(insn.Fields) == 0 { + return "type_none" + } + + ret := "type_" + + // Tag store opcodes to give special treatment when generating + // assembler function. They encode similarly to their load analogues. + if insn.memOp { + if insn.memOpSt { + ret += "st_" + } else { + ret += "ld_" + } + } + + // TODO: this is only sufficient for ISA3.1. + for _, atype := range insn.Fields { + switch atype.Type { + // Simple, register like 5 bit field (CR bit, FPR, GPR, VR) + case asm.TypeReg, asm.TypeFPReg, asm.TypeVecReg, asm.TypeCondRegBit: + if uniqueRegTypes { + ret += map[asm.ArgType]string{asm.TypeReg: "R", asm.TypeFPReg: "F", asm.TypeVecReg: "V", asm.TypeCondRegBit: "C"}[atype.Type] + // Handle even/odd pairs in FPR/GPR args. They encode as 5 bits too, but odd values are invalid. + if atype.Name[len(atype.Name)-1] == 'p' { + ret += "p" + } + } else { + ret += "R" + } + case asm.TypeMMAReg, asm.TypeCondRegField: // 3 bit register fields (MMA or CR field) + ret += "M" + case asm.TypeSpReg: + ret += "P" + case asm.TypeVecSReg: // VSX register (6 bits, usually split into 2 fields) + ret += "X" + case asm.TypeVecSpReg: // VSX register pair (5 bits, maybe split fields) + ret += "Y" + case asm.TypeImmSigned, asm.TypeOffset, asm.TypeImmUnsigned: + if atype.Type == asm.TypeImmUnsigned { + ret += "I" + } else { + ret += "S" + } + if atype.Shift != 0 { + ret += fmt.Sprintf("%d", atype.Shift) + } + default: + log.Fatalf("Unhandled type in insnTypeStr: %v\n", atype) + } + + // And add bit packing info + for _, bf := range atype.BitFields { + ret += fmt.Sprintf("_%d_%d", bf.Word*32+bf.Offs, bf.Bits) + } + } + return ret +} + +type AggInfo struct { + Insns []*Inst // List of instructions sharing this type + Typef string // The generated function name matching this +} + +// Generate an Optab entry for a set of instructions with identical argument types +// and write it to buf. +func genOptabEntry(ta *AggInfo, typeMap map[string]*Inst) string { + buf := new(bytes.Buffer) + fitArg := func(f *Field, i *Inst) string { + argToRegType := map[asm.ArgType]string{ + // TODO: only complete for ISA 3.1 + asm.TypeReg: "C_REG", + asm.TypeCondRegField: "C_CREG", + asm.TypeCondRegBit: "C_CRBIT", + asm.TypeFPReg: "C_FREG", + asm.TypeVecReg: "C_VREG", + asm.TypeVecSReg: "C_VSREG", + asm.TypeVecSpReg: "C_VSREG", + asm.TypeMMAReg: "C_AREG", + asm.TypeSpReg: "C_SPR", + } + if t, fnd := argToRegType[f.Type]; fnd { + if f.Name[len(f.Name)-1] == 'p' { + return t + "P" + } + return t + } + bits := f.Shift + for _, sf := range f.BitFields { + bits += sf.Bits + } + shift := "" + if f.Shift != 0 { + shift = fmt.Sprintf("S%d", f.Shift) + } + sign := "U" + if f.Type == asm.TypeImmSigned || f.Type == asm.TypeOffset { + sign = "S" + // DS/DQ offsets should explicitly test their offsets to ensure + // they are aligned correctly. This makes tracking down bad offset + // passed to the compiler more straightfoward. + if f.Type == asm.TypeOffset { + shift = "" + } + } + return fmt.Sprintf("C_%s%d%sCON", sign, bits, shift) + } + insn := ta.Insns[0] + args := [6]string{} + // Note, a2 is skipped if the second input argument does not map to a reg. + argOrder := []int{ + 5, + 0, + 1, + 2, + 3, + 4} + + i := 0 + for _, j := range insn.Fields { + // skip a2 if it isn't a reg type. + at := fitArg(&j, insn) + if argOrder[i] == 1 && !strings.HasSuffix(at, "REG") { + i++ + } + args[argOrder[i]] = at + i++ + } + + // Likewise, fixup memory operations. Combine imm + reg, reg + reg + // operations into memory type arguments. + if insn.memOp { + switch args[0] + " " + args[1] { + case "C_REG C_REG": + args[0] = "C_XOREG" + case "C_S16CON C_REG": + args[0] = "C_SOREG" + case "C_S34CON C_REG": + args[0] = "C_LOREG" + } + args[1] = "" + // Finally, fixup store operand ordering to match golang + if insn.memOpSt { + args[0], args[5] = args[5], args[0] + } + + } + fmt.Fprintf(buf, "{as: A%s,", opName(insn.Op)) + for i, s := range args { + if len(s) <= 0 { + continue + } + fmt.Fprintf(buf, "a%d: %s, ", i+1, s) + } + typef := typeMap[ta.Typef].Op + + pfx := "" + if insn.Words > 1 { + pfx = " ispfx: true," + } + fmt.Fprintf(buf, "asmout: type_%s,%s size: %d},\n", typef, pfx, insn.Words*4) + return buf.String() +} + +// printEncoder implements the -fmt=encoder mode. This generates a go file named +// asm9_gtables.go.new. It is self-contained and is called into by the PPC64 +// assembler routines. +// +// For now it is restricted to generating code for ISA 3.1 and newer, but it could +// support older ISA versions with some work, and integration effort. +func printEncoder(p *Prog) { + const minISA = ISA_V31 + + // The type map separates based on obj.Addr to a bit field. Register types + // for GPR, FPR, VR pack identically, but are classified differently. + typeMap := map[string]*Inst{} + typeAggMap := map[string]*AggInfo{} + var oplistBuf bytes.Buffer + var opnameBuf bytes.Buffer + + // The first opcode of 32 or 64 bits to appear in the opcode tables. + firstInsn := [2]string{} + + // Sort the instructions by word size, then by ISA version, oldest to newest. + sort.Slice(p.Insts, func(i, j int) bool { + if p.Insts[i].Words != p.Insts[j].Words { + return p.Insts[i].Words < p.Insts[j].Words + } + return p.Insts[i].order > p.Insts[j].order + }) + + // Classify each opcode and it's arguments, and generate opcode name/enum values. + for i, insn := range p.Insts { + if insn.Isa < minISA { + continue + } + extra := "" + if firstInsn[insn.Words-1] == "" { + firstInsn[insn.Words-1] = opName(insn.Op) + if insn.Words == 1 { + extra = " = ALASTAOUT + iota" + } + } + opType := insnTypeStr(&insn, false) + opTypeOptab := insnTypeStr(&insn, true) + fmt.Fprintf(&oplistBuf, "A%s%s\n", opName(insn.Op), extra) + fmt.Fprintf(&opnameBuf, "\"%s\",\n", opName(insn.Op)) + // Use the oldest instruction to name the encoder function. Some names + // may change if minISA is lowered. + if _, fnd := typeMap[opType]; !fnd { + typeMap[opType] = &p.Insts[i] + } + at, fnd := typeAggMap[opTypeOptab] + if !fnd { + typeAggMap[opTypeOptab] = &AggInfo{[]*Inst{&p.Insts[i]}, opType} + } else { + at.Insns = append(at.Insns, &p.Insts[i]) + } + } + fmt.Fprintf(&oplistBuf, "ALASTGEN\n") + fmt.Fprintf(&oplistBuf, "AFIRSTGEN = A%s\n", firstInsn[0]) + + // Sort type information before outputing to ensure stable ordering + targ := struct { + InputFile string + Insts []Inst + MinISA isaversion + TypeAggList []*AggInfo + TypeList []*Inst + FirstInsn [2]string + TypeMap map[string]*Inst + Oplist string + Opnames string + }{InputFile: inputFile, Insts: p.Insts, MinISA: minISA, FirstInsn: firstInsn, TypeMap: typeMap, Oplist: oplistBuf.String(), Opnames: opnameBuf.String()} + for _, v := range typeAggMap { + targ.TypeAggList = append(targ.TypeAggList, v) + } + for _, v := range typeMap { + targ.TypeList = append(targ.TypeList, v) + } + sort.Slice(targ.TypeAggList, func(i, j int) bool { + // Sort based on the first entry, it is the last to appear in Appendix F. + return targ.TypeAggList[i].Insns[0].Op < targ.TypeAggList[j].Insns[0].Op + }) + sort.Slice(targ.TypeList, func(i, j int) bool { + return targ.TypeList[i].Op < targ.TypeList[j].Op + }) + + // Generate asm9_gtable.go from the following template. + asm9_gtable_go := ` + // DO NOT EDIT + // generated by: ppc64map -fmt=encoder {{.InputFile}} + + package ppc64 + + import ( + "cmd/internal/obj" + ) + + const ( + {{print $.Oplist -}} + ) + + var GenAnames = []string { + {{print $.Opnames -}} + } + + var GenOpcodes = [...]uint32 { + {{range $v := .Insts}}{{if ge $v.Isa $.MinISA -}} + {{if (eq $v.Words 1)}}{{printf "0x%08x, // A%s" $v.Value (opname $v.Op)}} + {{else}} {{printf "0x%08x, // A%s" $v.SValue (opname $v.Op)}} + {{end}}{{end}}{{end -}} + } + + var GenPfxOpcodes = [...]uint32 { + {{range $v := .Insts}}{{if and (ge $v.Isa $.MinISA) (eq $v.Words 2) -}} + {{printf "0x%08x, // A%s" $v.Value (opname $v.Op)}} + {{end}}{{end -}} + } + + var optabGen = []Optab { + {{range $v := .TypeAggList -}} + {{genoptabentry $v $.TypeMap -}} + {{end -}} + } + + {{range $v := .TypeList}} + {{genencoderfunc $v $.FirstInsn}} + {{end}} + + func opsetGen(from obj.As) bool { + r0 := from & obj.AMask + switch from { + {{range $v := .TypeAggList -}} + case A{{opname (index $v.Insns 0).Op}}: + {{range $w := (slice $v.Insns 1) -}} + opset(A{{opname $w.Op}},r0) + {{end -}} + {{end -}} + default: + return false + } + return true + } + ` + tmpl := template.New("asm9_gtable.go") + tmpl.Funcs(template.FuncMap{ + "opname": opName, + "genencoderfunc": insnEncFuncStr, + "genoptabentry": genOptabEntry, + }) + tmpl.Parse(asm9_gtable_go) + + // Write and gofmt the new file. + var tbuf bytes.Buffer + if err := tmpl.Execute(&tbuf, targ); err != nil { + log.Fatal(err) + } + tout, err := gofmt.Source(tbuf.Bytes()) + if err != nil { + fmt.Printf("%s", tbuf.Bytes()) + log.Fatalf("gofmt error: %v", err) + } + if err := os.WriteFile("asm9_gtables.go.new", tout, 0666); err != nil { + log.Fatalf("Failed to create asm9_gtables.new: %v", err) + } +} + // printASM implements the -fmt=asm mode. This prints out a gnu assembler file // which can be used to used to generate test output to verify the golang // disassembler's gnu output matches gnu binutils. This is used as an input to From 2926576b28c0567946e1a16de13155f56d9790ea Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Fri, 16 Sep 2022 16:11:33 -0500 Subject: [PATCH 013/200] ppc64/ppc64asm: fix decoding of several CC opcodes A few of them decoded to invalid plan9 opcodes, and some did not swap there input arguments similar to their non-CC variants. Change-Id: I26b2b4e318891a75d9c4973bb88efaff8b188bbf Reviewed-on: https://go-review.googlesource.com/c/arch/+/431475 Reviewed-by: Lynn Boger Reviewed-by: Bryan Mills Reviewed-by: Cherry Mui Run-TryBot: Paul Murphy TryBot-Result: Gopher Robot --- ppc64/ppc64asm/plan9.go | 8 ++++++-- ppc64/ppc64asm/testdata/decode.txt | 11 +++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/ppc64/ppc64asm/plan9.go b/ppc64/ppc64asm/plan9.go index 88e8e1c7..5fe4077c 100644 --- a/ppc64/ppc64asm/plan9.go +++ b/ppc64/ppc64asm/plan9.go @@ -251,7 +251,7 @@ func reverseOperandOrder(op Op) bool { return true case FADDCC, FADDSCC, FSUBCC, FMULCC, FDIVCC, FDIVSCC: return true - case OR, ORC, AND, ANDC, XOR, NAND, EQV, NOR, ANDCC, ORCC, XORCC, EQVCC, NORCC, NANDCC: + case OR, ORCC, ORC, ORCCC, AND, ANDCC, ANDC, ANDCCC, XOR, XORCC, NAND, NANDCC, EQV, EQVCC, NOR, NORCC: return true case SLW, SLWCC, SLD, SLDCC, SRW, SRAW, SRWCC, SRAWCC, SRD, SRDCC, SRAD, SRADCC: return true @@ -305,6 +305,7 @@ var plan9OpMap = map[Op]string{ ORI: "OR", ANDICC: "ANDCC", ANDC: "ANDN", + ANDCCC: "ANDNCC", ADDEO: "ADDEV", ADDEOCC: "ADDEVCC", ADDO: "ADDV", @@ -321,8 +322,12 @@ var plan9OpMap = map[Op]string{ SUBFZECC: "SUBZECC", SUBFZEO: "SUBZEV", SUBFZEOCC: "SUBZEVCC", + SUBF: "SUB", SUBFC: "SUBC", + SUBFCC: "SUBCC", + SUBFCCC: "SUBCCC", ORC: "ORN", + ORCCC: "ORNCC", MULLWO: "MULLWV", MULLWOCC: "MULLWVCC", MULLDO: "MULLDV", @@ -334,7 +339,6 @@ var plan9OpMap = map[Op]string{ ADDI: "ADD", MULLI: "MULLD", SRADI: "SRAD", - SUBF: "SUB", STBCXCC: "STBCCC", STWCXCC: "STWCCC", STDCXCC: "STDCCC", diff --git a/ppc64/ppc64asm/testdata/decode.txt b/ppc64/ppc64asm/testdata/decode.txt index 3213903d..54fcafd6 100644 --- a/ppc64/ppc64asm/testdata/decode.txt +++ b/ppc64/ppc64asm/testdata/decode.txt @@ -126,24 +126,31 @@ b4830002| plan9 MOVHU R4,2(R3) 7c032000| plan9 CMPW R3,R4 7c032040| plan9 CMPWU R3,R4 7ca41a14| plan9 ADD R3,R4,R5 +7ca41a15| plan9 ADDCC R3,R4,R5 7ca41814| plan9 ADDC R3,R4,R5 7ca41815| plan9 ADDCCC R3,R4,R5 7c851838| plan9 AND R3,R4,R5 -7c851878| plan9 ANDN R3,R4,R5 7c851839| plan9 ANDCC R3,R4,R5 +7c851878| plan9 ANDN R3,R4,R5 +7c851879| plan9 ANDNCC R3,R4,R5 7c851b78| plan9 OR R3,R4,R5 -7c851b38| plan9 ORN R3,R4,R5 7c851b79| plan9 ORCC R3,R4,R5 +7c851b38| plan9 ORN R3,R4,R5 +7c851b39| plan9 ORNCC R3,R4,R5 7c851a78| plan9 XOR R3,R4,R5 7c851a79| plan9 XORCC R3,R4,R5 7c851bb8| plan9 NAND R3,R4,R5 7c851bb9| plan9 NANDCC R3,R4,R5 7c851a38| plan9 EQV R3,R4,R5 7c851a39| plan9 EQVCC R3,R4,R5 +7c8300d0| plan9 NEG R3,R4 +7c8300d1| plan9 NEGCC R3,R4 7c8518f8| plan9 NOR R3,R4,R5 7c8518f9| plan9 NORCC R3,R4,R5 7ca32050| plan9 SUB R3,R4,R5 +7ca32051| plan9 SUBCC R3,R4,R5 7ca32010| plan9 SUBC R3,R4,R5 +7ca32011| plan9 SUBCCC R3,R4,R5 7ca419d6| plan9 MULLW R3,R4,R5 7ca419d7| plan9 MULLWCC R3,R4,R5 7ca41896| plan9 MULHW R3,R4,R5 From 6a65923eb7420206543da015f2de19bf506b164a Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Tue, 20 Sep 2022 16:52:35 -0500 Subject: [PATCH 014/200] ppc64/ppc64asm: improve PCrel argument decoding If an object is built for PIE, CALL opcodes will target the symbol's local entry point. When disassembling, we should print the symbol name if the target is the symbol+8. The local entry offset on PPC64 is almost always 0 or 8. For pure go, it is always 0 or 8 today. If a call looks like it targets a local entry, print it as "CALL symbol+8(SB)". Change-Id: I72a2f1eaafd226ed5466384c63040d2f375a541f Reviewed-on: https://go-review.googlesource.com/c/arch/+/432166 Reviewed-by: Cherry Mui Reviewed-by: Dmitri Shuralyov Run-TryBot: Paul Murphy TryBot-Result: Gopher Robot --- ppc64/ppc64asm/decode_test.go | 18 +++++++++++++++++- ppc64/ppc64asm/objdump_test.go | 4 ++++ ppc64/ppc64asm/plan9.go | 12 ++++++++++-- ppc64/ppc64asm/testdata/decode.txt | 3 +++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/ppc64/ppc64asm/decode_test.go b/ppc64/ppc64asm/decode_test.go index 3337312a..83a3acd3 100644 --- a/ppc64/ppc64asm/decode_test.go +++ b/ppc64/ppc64asm/decode_test.go @@ -31,6 +31,15 @@ func TestDecode(t *testing.T) { } } +// Provide a fake symbol to verify PCrel argument decoding. +func symlookup(pc uint64) (string, uint64) { + foopc := uint64(0x100000) + if pc >= foopc && pc < foopc+0x10 { + return "foo", foopc + } + return "", 0 +} + func decode(data []byte, t *testing.T, filename string) { all := string(data) // Simulate PC based on number of instructions found in the test file. @@ -68,7 +77,14 @@ func decode(data []byte, t *testing.T, filename string) { case "gnu": out = GNUSyntax(inst, pc) case "plan9": - out = GoSyntax(inst, pc, nil) + pc := pc + // Hack: Setting PC to 0 effectively transforms the PC relative address + // of CALL (bl) into an absolute address when decoding in GoSyntax. This + // simplifies the testing of symbol lookups via symlookup above. + if inst.Op == BL { + pc = 0 + } + out = GoSyntax(inst, pc, symlookup) default: t.Errorf("unknown syntax %q", syntax) continue diff --git a/ppc64/ppc64asm/objdump_test.go b/ppc64/ppc64asm/objdump_test.go index e89146e5..414fada0 100644 --- a/ppc64/ppc64asm/objdump_test.go +++ b/ppc64/ppc64asm/objdump_test.go @@ -47,6 +47,10 @@ func allowedMismatchObjdump(text string, size int, inst *Inst, dec ExtInst) bool return true case SYNC, WAIT, RFEBB: // ISA 3.1 adds more bits and extended mnemonics for these book ii instructions. return true + case BL: + // TODO: Ignore these for now. The output format from gnu objdump is dependent on more than the + // instruction itself e.g: decode(48100009) = "bl 0x100008", 4, want "bl .+0x100008", 4 + return true } if len(dec.enc) >= 4 { diff --git a/ppc64/ppc64asm/plan9.go b/ppc64/ppc64asm/plan9.go index 5fe4077c..4bd1c7fc 100644 --- a/ppc64/ppc64asm/plan9.go +++ b/ppc64/ppc64asm/plan9.go @@ -168,8 +168,9 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin } // plan9Arg formats arg (which is the argIndex's arg in inst) according to Plan 9 rules. +// // NOTE: because Plan9Syntax is the only caller of this func, and it receives a copy -// of inst, it's ok to modify inst.Args here. +// of inst, it's ok to modify inst.Args here. func plan9Arg(inst *Inst, argIndex int, pc uint64, arg Arg, symname func(uint64) (string, uint64)) string { // special cases for load/store instructions if _, ok := arg.(Offset); ok { @@ -211,9 +212,16 @@ func plan9Arg(inst *Inst, argIndex int, pc uint64, arg Arg, symname func(uint64) return fmt.Sprintf("SPR(%d)", int(arg)) case PCRel: addr := pc + uint64(int64(arg)) - if s, base := symname(addr); s != "" && base == addr { + s, base := symname(addr) + if s != "" && addr == base { return fmt.Sprintf("%s(SB)", s) } + if inst.Op == BL && s != "" && (addr-base) == 8 { + // When decoding an object built for PIE, a CALL targeting + // a global entry point will be adjusted to the local entry + // if any. For now, assume any symname+8 PC is a local call. + return fmt.Sprintf("%s+%d(SB)", s, addr-base) + } return fmt.Sprintf("%#x", addr) case Label: return fmt.Sprintf("%#x", int(arg)) diff --git a/ppc64/ppc64asm/testdata/decode.txt b/ppc64/ppc64asm/testdata/decode.txt index 54fcafd6..7bf4355e 100644 --- a/ppc64/ppc64asm/testdata/decode.txt +++ b/ppc64/ppc64asm/testdata/decode.txt @@ -469,6 +469,9 @@ f0400fe0| plan9 XVCVSXDDP VS1,VS2 7c6802a6| plan9 MOVD LR,R3 7c6902a6| plan9 MOVD CTR,R3 4c8c0000| plan9 MOVFL CR3,CR1 +48100001| plan9 CALL foo(SB) +48100009| plan9 CALL foo+8(SB) +4810000d| plan9 CALL 0x10000c 7c6803a6| gnu mtlr r3 7c6802a6| gnu mflr r3 7c6903a6| gnu mtctr r3 From e1262b008e86e4edcd9b1cafa48f5a7c113905e0 Mon Sep 17 00:00:00 2001 From: cui fliter Date: Wed, 7 Sep 2022 13:19:54 +0000 Subject: [PATCH 015/200] all: remove redundant type conversion Change-Id: I1bff578bdcacac6ea471ed9effb9d9ade573d813 GitHub-Last-Rev: 43904f8dd8f08028af8870fb8de4c1662594887e GitHub-Pull-Request: golang/arch#6 Reviewed-on: https://go-review.googlesource.com/c/arch/+/428983 Run-TryBot: Cherry Mui TryBot-Result: Gopher Robot Reviewed-by: Michael Knyszek Reviewed-by: Cherry Mui --- arm/armasm/decode.go | 8 +++++--- ppc64/ppc64asm/field.go | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arm/armasm/decode.go b/arm/armasm/decode.go index 6b4d7384..f61ac124 100644 --- a/arm/armasm/decode.go +++ b/arm/armasm/decode.go @@ -17,7 +17,9 @@ import ( // If x matches the format, then the rest of the fields describe how to interpret x. // The opBits describe bits that should be extracted from x and added to the opcode. // For example opBits = 0x1234 means that the value +// // (2 bits at offset 1) followed by (4 bits at offset 3) +// // should be added to op. // Finally the args describe how to decode the instruction arguments. // args is stored as a fixed-size array; if there are fewer than len(args) arguments, @@ -233,9 +235,9 @@ func decodeArg(aop instArg, x uint32) Arg { typ, count := decodeShift(x) // ROR #0 here means ROR #0, but decodeShift rewrites to RRX #1. if typ == RotateRightExt { - return Reg(Rm) + return Rm } - return RegShift{Rm, typ, uint8(count)} + return RegShift{Rm, typ, count} case arg_R_shift_R: Rm := Reg(x & (1<<4 - 1)) @@ -249,7 +251,7 @@ func decodeArg(aop instArg, x uint32) Arg { if typ == ShiftLeft && count == 0 { return Reg(Rm) } - return RegShift{Rm, typ, uint8(count)} + return RegShift{Rm, typ, count} case arg_R1_0: return Reg((x & (1<<4 - 1))) diff --git a/ppc64/ppc64asm/field.go b/ppc64/ppc64asm/field.go index 882c91ae..13df063b 100644 --- a/ppc64/ppc64asm/field.go +++ b/ppc64/ppc64asm/field.go @@ -67,7 +67,7 @@ func (bs *BitFields) Append(b BitField) { // the sequence of bitfields is reasonable. func (bs BitFields) parse(i [2]uint32) (u uint64, Bits uint8) { for _, b := range bs { - u = (uint64(u) << b.Bits) | uint64(b.Parse(i)) + u = (u << b.Bits) | uint64(b.Parse(i)) Bits += b.Bits } return u, Bits From 1bb480fc256aacee6555e668dedebd1f8225c946 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Wed, 6 Jul 2022 15:19:45 -0500 Subject: [PATCH 016/200] ppc64/ppc64asm: add ISA 3.1B support The new ISA fixes a couple typos, and adds special hashing instructions to support ROP exploitation. The hash instructions encode a negative offset in a novel way which requires a bit of special handling. Change-Id: I9491e10ac87efe37d93b6efaf7f108ae3a4402fd Reviewed-on: https://go-review.googlesource.com/c/arch/+/418859 Reviewed-by: Cherry Mui Reviewed-by: Lynn Boger Reviewed-by: Joedian Reid TryBot-Result: Gopher Robot Run-TryBot: Paul Murphy Reviewed-by: Bryan Mills --- ppc64/pp64.csv | 12 +++-- ppc64/ppc64asm/decode.go | 16 ++++++- ppc64/ppc64asm/field.go | 9 ++++ ppc64/ppc64asm/field_test.go | 32 +++++++------ ppc64/ppc64asm/plan9.go | 4 +- ppc64/ppc64asm/tables.go | 25 ++++++++-- ppc64/ppc64asm/testdata/decode.txt | 10 ++++ ppc64/ppc64asm/testdata/decode_generated.txt | 15 +++--- ppc64/ppc64map/map.go | 48 ++++++++++++++++++-- ppc64/ppc64util/hack.h | 3 ++ 10 files changed, 141 insertions(+), 33 deletions(-) diff --git a/ppc64/pp64.csv b/ppc64/pp64.csv index 3150cade..b2aa6b37 100644 --- a/ppc64/pp64.csv +++ b/ppc64/pp64.csv @@ -1,4 +1,4 @@ -# POWER ISA 3.1 instruction description. +# POWER ISA 3.1B instruction description. # # This file contains comment lines, each beginning with #, # followed by entries in CSV format. @@ -13,8 +13,12 @@ # a list of sequences of the form (,sequence)+. A leading comma is used to signify an # instruction encoding requiring multiple instruction words. # The fourth field represents the ISA version where the instruction was introduced as -# stated in Appendix F. of ISA 3.1 +# stated in Appendix F. of ISA 3.1B # +"Hash Check X-form","hashchk RB,offset(RA)","31@0|D@6|RA@11|RB@16|754@21|DX@31|","v3.1B" +"Hash Check Privileged X-form","hashchkp RB,offset(RA)","31@0|D@6|RA@11|RB@16|690@21|DX@31|","v3.1B" +"Hash Store X-form","hashst RB,offset(RA)","31@0|D@6|RA@11|RB@16|722@21|DX@31|","v3.1B" +"Hash Store Privileged X-form","hashstp RB,offset(RA)","31@0|D@6|RA@11|RB@16|658@21|DX@31|","v3.1B" "Byte-Reverse Doubleword X-form","brd RA,RS","31@0|RS@6|RA@11|///@16|187@21|/@31|","v3.1" "Byte-Reverse Halfword X-form","brh RA,RS","31@0|RS@6|RA@11|///@16|219@21|/@31|","v3.1" "Byte-Reverse Word X-form","brw RA,RS","31@0|RS@6|RA@11|///@16|155@21|/@31|","v3.1" @@ -209,7 +213,7 @@ "VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive accumulate XX3-form","xvbf16ger2np AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|114@21|AX@29|BX@30|/@31|","v3.1" "VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative accumulate XX3-form","xvbf16ger2pn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|178@21|AX@29|BX@30|/@31|","v3.1" "VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate XX3-form","xvbf16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|50@21|AX@29|BX@30|/@31|","v3.1" -"VSX Vector Convert bfloat16 to Single-Precision format XX2-form","xvcvbf16spn XT,XB","60@0|T@6|16@11|B@16|475@21|BX@30|TX@31|","v3.1" +"VSX Vector Convert bfloat16 to Single-Precision format Non-signaling XX2-form","xvcvbf16spn XT,XB","60@0|T@6|16@11|B@16|475@21|BX@30|TX@31|","v3.1" "VSX Vector Convert with round Single-Precision to bfloat16 format XX2-form","xvcvspbf16 XT,XB","60@0|T@6|17@11|B@16|475@21|BX@30|TX@31|","v3.1" "VSX Vector 16-bit Floating-Point GER (rank-2 update) XX3-form","xvf16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|19@21|AX@29|BX@30|/@31|","v3.1" "VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate XX3-form","xvf16ger2nn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|210@21|AX@29|BX@30|/@31|","v3.1" @@ -1034,7 +1038,7 @@ "Add Carrying XO-form","addc RT,RA,RB (OE=0 Rc=0)|addc. RT,RA,RB (OE=0 Rc=1)|addco RT,RA,RB (OE=1 Rc=0)|addco. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|10@22|Rc@31|","P1" "Add Extended XO-form","adde RT,RA,RB (OE=0 Rc=0)|adde. RT,RA,RB (OE=0 Rc=1)|addeo RT,RA,RB (OE=1 Rc=0)|addeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|138@22|Rc@31|","P1" "Add Immediate D-form","addi RT,RA,SI|li RT,SI (RA=0)","14@0|RT@6|RA@11|SI@16|","P1" -"Add Immediate Carrying D-formy","addic RT,RA,SI","12@0|RT@6|RA@11|SI@16|","P1" +"Add Immediate Carrying D-form","addic RT,RA,SI","12@0|RT@6|RA@11|SI@16|","P1" "Add Immediate Carrying and Record D-form","addic. RT,RA,SI","13@0|RT@6|RA@11|SI@16|","P1" "Add Immediate Shifted D-form","addis RT,RA,SI|lis RT,SI (RA=0)","15@0|RT@6|RA@11|SI@16|","P1" "Add to Minus One Extended XO-form","addme RT,RA (OE=0 Rc=0)|addme. RT,RA (OE=0 Rc=1)|addmeo RT,RA (OE=1 Rc=0)|addmeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|234@22|Rc@31|","P1" diff --git a/ppc64/ppc64asm/decode.go b/ppc64/ppc64asm/decode.go index 59bd3258..b8d857c6 100644 --- a/ppc64/ppc64asm/decode.go +++ b/ppc64/ppc64asm/decode.go @@ -22,9 +22,12 @@ const prefixOpcode = 1 // The Args are stored in the same order as the instruction manual. // // Prefixed instructions are stored as: -// prefix << 32 | suffix, +// +// prefix << 32 | suffix, +// // Regular instructions are: -// inst << 32 +// +// inst << 32 type instFormat struct { Op Op Mask uint64 @@ -77,6 +80,12 @@ func (a argField) Parse(i [2]uint32) Arg { return Label(a.BitFields.ParseSigned(i) << a.Shift) case TypeOffset: return Offset(a.BitFields.ParseSigned(i) << a.Shift) + case TypeNegOffset: + // An oddball encoding of offset for hashchk and similar. + // e.g hashchk offset is 0b1111111000000000 | DX << 8 | D << 3 + off := a.BitFields.ParseSigned(i) << a.Shift + neg := int64(-1) << (int(a.Shift) + a.BitFields.NumBits()) + return Offset(neg | off) } } @@ -98,6 +107,7 @@ const ( TypeImmSigned // signed immediate TypeImmUnsigned // unsigned immediate/flag/mask, this is the catch-all type TypeOffset // signed offset in load/store + TypeNegOffset // A negative 16 bit value 0b1111111xxxxx000 encoded as 0bxxxxx (e.g in the hashchk instruction) TypeLast // must be the last one ) @@ -135,6 +145,8 @@ func (t ArgType) String() string { return "Label" case TypeOffset: return "Offset" + case TypeNegOffset: + return "NegOffset" } } diff --git a/ppc64/ppc64asm/field.go b/ppc64/ppc64asm/field.go index 13df063b..37794460 100644 --- a/ppc64/ppc64asm/field.go +++ b/ppc64/ppc64asm/field.go @@ -86,3 +86,12 @@ func (bs BitFields) ParseSigned(i [2]uint32) int64 { u, l := bs.parse(i) return int64(u) << (64 - l) >> (64 - l) } + +// Count the number of bits in the aggregate BitFields +func (bs BitFields) NumBits() int { + num := 0 + for _, b := range bs { + num += int(b.Bits) + } + return num +} diff --git a/ppc64/ppc64asm/field_test.go b/ppc64/ppc64asm/field_test.go index 01402b59..ce18ad50 100644 --- a/ppc64/ppc64asm/field_test.go +++ b/ppc64/ppc64asm/field_test.go @@ -65,26 +65,29 @@ func TestBitFields(t *testing.T) { i [2]uint32 // input u uint64 // unsigned output s int64 // signed output + nb int // Total number of bits in BitField fail bool // if the check should panic }{ - {BitFields{{0, 0, 1}}, [2]uint32{0, 0}, 0, 0, true}, - {BitFields{{31, 2, 1}}, [2]uint32{0, 0}, 0, 0, true}, - {BitFields{{31, 1, 1}}, [2]uint32{0, 1}, 1, -1, false}, - {BitFields{{29, 2, 1}}, [2]uint32{0, 0 << 1}, 0, 0, false}, - {BitFields{{29, 2, 1}}, [2]uint32{0, 1 << 1}, 1, 1, false}, - {BitFields{{29, 2, 1}}, [2]uint32{0, 2 << 1}, 2, -2, false}, - {BitFields{{29, 2, 1}}, [2]uint32{0, 3 << 1}, 3, -1, false}, - {BitFields{{0, 32, 1}}, [2]uint32{0, 1<<32 - 1}, 1<<32 - 1, -1, false}, - {BitFields{{16, 3, 1}}, [2]uint32{0, 1 << 15}, 4, -4, false}, - {BitFields{{16, 16, 0}, {16, 16, 1}}, [2]uint32{0x8016, 0x32}, 0x80160032, -0x7FE9FFCE, false}, - {BitFields{{14, 18, 0}, {16, 16, 1}}, [2]uint32{0x38016, 0x32}, 0x380160032, -0x07FE9FFCE, false}, + {BitFields{{0, 0, 1}}, [2]uint32{0, 0}, 0, 0, 0, true}, + {BitFields{{31, 2, 1}}, [2]uint32{0, 0}, 0, 0, 2, true}, + {BitFields{{31, 1, 1}}, [2]uint32{0, 1}, 1, -1, 1, false}, + {BitFields{{29, 2, 1}}, [2]uint32{0, 0 << 1}, 0, 0, 2, false}, + {BitFields{{29, 2, 1}}, [2]uint32{0, 1 << 1}, 1, 1, 2, false}, + {BitFields{{29, 2, 1}}, [2]uint32{0, 2 << 1}, 2, -2, 2, false}, + {BitFields{{29, 2, 1}}, [2]uint32{0, 3 << 1}, 3, -1, 2, false}, + {BitFields{{0, 32, 1}}, [2]uint32{0, 1<<32 - 1}, 1<<32 - 1, -1, 32, false}, + {BitFields{{16, 3, 1}}, [2]uint32{0, 1 << 15}, 4, -4, 3, false}, + {BitFields{{16, 16, 0}, {16, 16, 1}}, [2]uint32{0x8016, 0x32}, 0x80160032, -0x7FE9FFCE, 32, false}, + {BitFields{{14, 18, 0}, {16, 16, 1}}, [2]uint32{0x38016, 0x32}, 0x380160032, -0x07FE9FFCE, 34, false}, } for i, tst := range tests { var ( - ou uint64 - os int64 + ou uint64 + os int64 + onb int ) failed := panicOrNot(func() { + onb = tst.b.NumBits() ou = tst.b.Parse(tst.i) os = tst.b.ParseSigned(tst.i) }) @@ -99,5 +102,8 @@ func TestBitFields(t *testing.T) { if os != tst.s { t.Errorf("case %d: %v.ParseSigned(%d) returned %d, expected %d", i, tst.b, tst.i, os, tst.s) } + if onb != tst.nb { + t.Errorf("case %d: %v.NumBits() returned %d, expected %d", i, tst.b, onb, tst.nb) + } } } diff --git a/ppc64/ppc64asm/plan9.go b/ppc64/ppc64asm/plan9.go index 4bd1c7fc..fcb2a128 100644 --- a/ppc64/ppc64asm/plan9.go +++ b/ppc64/ppc64asm/plan9.go @@ -83,7 +83,9 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin STH, STHU, STW, STWU, STD, STDU, - STQ, STFD, STFDU, STFS, STFSU: + STFD, STFDU, + STFS, STFSU, + STQ, HASHST, HASHSTP: return op + " " + strings.Join(args, ",") case FCMPU, FCMPO, CMPD, CMPDI, CMPLD, CMPLDI, CMPW, CMPWI, CMPLW, CMPLWI: diff --git a/ppc64/ppc64asm/tables.go b/ppc64/ppc64asm/tables.go index 8d0a2431..8705077b 100644 --- a/ppc64/ppc64asm/tables.go +++ b/ppc64/ppc64asm/tables.go @@ -1,9 +1,13 @@ -// Code generated by ppc64map -fmt=decoder pp64.csv DO NOT EDIT. +// Code generated by ppc64map -fmt=decoder ../pp64.csv DO NOT EDIT. package ppc64asm const ( _ Op = iota + HASHCHK + HASHCHKP + HASHST + HASHSTP BRD BRH BRW @@ -1420,6 +1424,10 @@ const ( ) var opstr = [...]string{ + HASHCHK: "hashchk", + HASHCHKP: "hashchkp", + HASHST: "hashst", + HASHSTP: "hashstp", BRD: "brd", BRH: "brh", BRW: "brw", @@ -2836,9 +2844,10 @@ var opstr = [...]string{ } var ( + ap_Reg_16_20 = &argField{Type: TypeReg, Shift: 0, BitFields: BitFields{{16, 5, 0}}} + ap_NegOffset_31_31_6_10_shift3 = &argField{Type: TypeNegOffset, Shift: 3, BitFields: BitFields{{31, 1, 0}, {6, 5, 0}}} ap_Reg_11_15 = &argField{Type: TypeReg, Shift: 0, BitFields: BitFields{{11, 5, 0}}} ap_Reg_6_10 = &argField{Type: TypeReg, Shift: 0, BitFields: BitFields{{6, 5, 0}}} - ap_Reg_16_20 = &argField{Type: TypeReg, Shift: 0, BitFields: BitFields{{16, 5, 0}}} ap_FPReg_6_10 = &argField{Type: TypeFPReg, Shift: 0, BitFields: BitFields{{6, 5, 0}}} ap_VecReg_16_20 = &argField{Type: TypeVecReg, Shift: 0, BitFields: BitFields{{16, 5, 0}}} ap_VecReg_6_10 = &argField{Type: TypeVecReg, Shift: 0, BitFields: BitFields{{6, 5, 0}}} @@ -2942,6 +2951,14 @@ var ( ) var instFormats = [...]instFormat{ + {HASHCHK, 0xfc0007fe00000000, 0x7c0005e400000000, 0x0, // Hash Check X-form (hashchk RB,offset(RA)) + [6]*argField{ap_Reg_16_20, ap_NegOffset_31_31_6_10_shift3, ap_Reg_11_15}}, + {HASHCHKP, 0xfc0007fe00000000, 0x7c00056400000000, 0x0, // Hash Check Privileged X-form (hashchkp RB,offset(RA)) + [6]*argField{ap_Reg_16_20, ap_NegOffset_31_31_6_10_shift3, ap_Reg_11_15}}, + {HASHST, 0xfc0007fe00000000, 0x7c0005a400000000, 0x0, // Hash Store X-form (hashst RB,offset(RA)) + [6]*argField{ap_Reg_16_20, ap_NegOffset_31_31_6_10_shift3, ap_Reg_11_15}}, + {HASHSTP, 0xfc0007fe00000000, 0x7c00052400000000, 0x0, // Hash Store Privileged X-form (hashstp RB,offset(RA)) + [6]*argField{ap_Reg_16_20, ap_NegOffset_31_31_6_10_shift3, ap_Reg_11_15}}, {BRD, 0xfc0007fe00000000, 0x7c00017600000000, 0xf80100000000, // Byte-Reverse Doubleword X-form (brd RA,RS) [6]*argField{ap_Reg_11_15, ap_Reg_6_10}}, {BRH, 0xfc0007fe00000000, 0x7c0001b600000000, 0xf80100000000, // Byte-Reverse Halfword X-form (brh RA,RS) @@ -3344,7 +3361,7 @@ var instFormats = [...]instFormat{ [6]*argField{ap_MMAReg_6_8, ap_VecSReg_29_29_11_15, ap_VecSReg_30_30_16_20}}, {XVBF16GER2PP, 0xfc0007f800000000, 0xec00019000000000, 0x60000100000000, // VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate XX3-form (xvbf16ger2pp AT,XA,XB) [6]*argField{ap_MMAReg_6_8, ap_VecSReg_29_29_11_15, ap_VecSReg_30_30_16_20}}, - {XVCVBF16SPN, 0xfc1f07fc00000000, 0xf010076c00000000, 0x0, // VSX Vector Convert bfloat16 to Single-Precision format XX2-form (xvcvbf16spn XT,XB) + {XVCVBF16SPN, 0xfc1f07fc00000000, 0xf010076c00000000, 0x0, // VSX Vector Convert bfloat16 to Single-Precision format Non-signaling XX2-form (xvcvbf16spn XT,XB) [6]*argField{ap_VecSReg_31_31_6_10, ap_VecSReg_30_30_16_20}}, {XVCVSPBF16, 0xfc1f07fc00000000, 0xf011076c00000000, 0x0, // VSX Vector Convert with round Single-Precision to bfloat16 format XX2-form (xvcvspbf16 XT,XB) [6]*argField{ap_VecSReg_31_31_6_10, ap_VecSReg_30_30_16_20}}, @@ -5334,7 +5351,7 @@ var instFormats = [...]instFormat{ [6]*argField{ap_Reg_6_10, ap_ImmSigned_16_31}}, {ADDI, 0xfc00000000000000, 0x3800000000000000, 0x0, // Add Immediate D-form (addi RT,RA,SI) [6]*argField{ap_Reg_6_10, ap_Reg_11_15, ap_ImmSigned_16_31}}, - {ADDIC, 0xfc00000000000000, 0x3000000000000000, 0x0, // Add Immediate Carrying D-formy (addic RT,RA,SI) + {ADDIC, 0xfc00000000000000, 0x3000000000000000, 0x0, // Add Immediate Carrying D-form (addic RT,RA,SI) [6]*argField{ap_Reg_6_10, ap_Reg_11_15, ap_ImmSigned_16_31}}, {ADDICCC, 0xfc00000000000000, 0x3400000000000000, 0x0, // Add Immediate Carrying and Record D-form (addic. RT,RA,SI) [6]*argField{ap_Reg_6_10, ap_Reg_11_15, ap_ImmSigned_16_31}}, diff --git a/ppc64/ppc64asm/testdata/decode.txt b/ppc64/ppc64asm/testdata/decode.txt index 7bf4355e..ef5c90e0 100644 --- a/ppc64/ppc64asm/testdata/decode.txt +++ b/ppc64/ppc64asm/testdata/decode.txt @@ -873,3 +873,13 @@ fc811000| plan9 FCMPU F1,F2,CR1 7c2311b8| plan9 CFUGED R1,R2,R3 04100016e4820032| gnu .quad 0x4100016e4820032 0612000138820007| gnu .quad 0x612000138820007 +7fe20de5| plan9 HASHCHK -8(R2),R1 +7fe20da5| plan9 HASHST R1,-8(R2) +7c020de4| plan9 HASHCHK -512(R2),R1 +7c020da4| plan9 HASHST R1,-512(R2) +7c020de5| plan9 HASHCHK -256(R2),R1 +7c020da5| plan9 HASHST R1,-256(R2) +7fe115a5| plan9 HASHST R2,-8(R1) +7fe11525| plan9 HASHSTP R2,-8(R1) +7fe115e5| plan9 HASHCHK -8(R1),R2 +7fe11565| plan9 HASHCHKP -8(R1),R2 diff --git a/ppc64/ppc64asm/testdata/decode_generated.txt b/ppc64/ppc64asm/testdata/decode_generated.txt index d8619d7f..13345e99 100644 --- a/ppc64/ppc64asm/testdata/decode_generated.txt +++ b/ppc64/ppc64asm/testdata/decode_generated.txt @@ -1,3 +1,7 @@ +7e0115e5| gnu hashchk r2,-128(r1) +7e011565| gnu hashchkp r2,-128(r1) +7e0115a5| gnu hashst r2,-128(r1) +7e011525| gnu hashstp r2,-128(r1) 7c610176| gnu brd r1,r3 7c6101b6| gnu brh r1,r3 7c610136| gnu brw r1,r3 @@ -23,9 +27,9 @@ f03f0ad1| gnu lxvkq vs33,1 7c611138| gnu pdepd r1,r3,r2 7c611178| gnu pextd r1,r3,r2 0610001688800032| gnu plbz r4,1441842 +60000000| gnu nop 04100016e4800032| gnu pld r4,1441842 06100016c8600032| gnu plfd f3,1441842 -60000000| gnu nop 06100016c0600032| gnu plfs f3,1441842 06100016a8800032| gnu plha r4,1441842 06100016a0800032| gnu plhz r4,1441842 @@ -240,7 +244,6 @@ f02d1769| gnu xxgenpcvwm vs33,v2,13 7f810162| gnu xxmtacc a7 0500000188232a4f| gnu xxpermx vs33,vs35,vs37,vs41,1 7f830162| gnu xxsetaccz a7 -60000000| gnu nop 0500012380234567| gnu xxsplti32dx vs33,1,19088743 0500012380254567| gnu xxspltidp vs33,19088743 0500012380274567| gnu xxspltiw vs33,19088743 @@ -1214,13 +1217,13 @@ fc60382d| gnu fsqrt. f3,f7 7c611079| gnu andc. r1,r3,r2 70610000| gnu andi. r1,r3,0 74610000| gnu andis. r1,r3,0 -48000690| gnu b 0x1a90 +48000690| gnu b 0x1a9c 48000692| gnu ba 0x690 -48000691| gnu bl 0x1a98 +48000691| gnu bl 0x1aa4 48000693| gnu bla 0x690 -40860690| gnu bne cr1,0x1aa0 +40860690| gnu bne cr1,0x1aac 40860692| gnu bnea cr1,0x690 -40860691| gnu bnel cr1,0x1aa8 +40860691| gnu bnel cr1,0x1ab4 40860693| gnu bnela cr1,0x690 4c860420| gnu bnectr cr1 4c860421| gnu bnectrl cr1 diff --git a/ppc64/ppc64map/map.go b/ppc64/ppc64map/map.go index 31d692d4..1e3b1b6e 100644 --- a/ppc64/ppc64map/map.go +++ b/ppc64/ppc64map/map.go @@ -60,6 +60,7 @@ const ( ISA_V30B ISA_V30C ISA_V31 + ISA_V31B ) var isaToISA = map[string]isaversion{ @@ -77,6 +78,7 @@ var isaToISA = map[string]isaversion{ "v3.0B": ISA_V30B, "v3.0C": ISA_V30C, "v3.1": ISA_V31, + "v3.1B": ISA_V31B, } func usage() { @@ -482,6 +484,18 @@ func add(p *Prog, text, mnemonics, encoding, isa string) { opr = "BD" } + case "offset": + switch inst.Op { + // These encode a 6 bit displacement in the format of an X-form opcode. + // Allowable displaments are -8 to -8*64 in 8B increments. + case "hashchk", "hashchkp", "hashst", "hashstp": + typ = asm.TypeNegOffset + opr = "DX" + opr2 = "D" + shift = 3 + + } + case "XMSK", "YMSK", "PMSK", "IX", "BHRBE": typ = asm.TypeImmUnsigned @@ -737,7 +751,12 @@ var isNotMemopMap = map[string]bool{ } // Some ISA instructions are memops, but are not described like "Load ..." or "Store ..." -var isMemopMap = map[string]bool{} +var isMemopMap = map[string]bool{ + "hashst": true, + "hashstp": true, + "hashchk": true, + "hashchkp": true, +} // Does this instruction contain a memory argument (e.g x-form load or d-form store) func hasMemoryArg(insn *Inst) bool { @@ -767,7 +786,7 @@ func insnEncFuncStr(insn *Inst, firstName [2]string) string { // Does this field require an obj.Addr.Offset? isImmediate := func(t asm.ArgType) bool { - return t == asm.TypeImmUnsigned || t == asm.TypeSpReg || t == asm.TypeImmSigned || t == asm.TypeOffset + return t == asm.TypeImmUnsigned || t == asm.TypeSpReg || t == asm.TypeImmSigned || t == asm.TypeOffset || t == asm.TypeNegOffset } if insn.memOp { @@ -827,13 +846,26 @@ func insnEncFuncStr(insn *Inst, firstName [2]string) string { // Generate a check to verify shifted inputs satisfy their constraints. // For historical reasons this is not needed for 16 bit values shifted by 16. (i.e SI/UI constants in addis/xoris) - if atype.Shift != 0 && atype.Shift != 16 && bits != 32 { + if atype.Type != asm.TypeNegOffset && atype.Shift != 0 && atype.Shift != 16 && bits != 32 { arg := argOrder[j] + itype mod := (1 << atype.Shift) - 1 errCheck += fmt.Sprintf("if %s & 0x%x != 0 {\n", arg, mod) errCheck += fmt.Sprintf("c.ctxt.Diag(\"Constant 0x%%x (%%d) is not a multiple of %d\\n%%v\",%s,%s,p)\n", mod+1, arg, arg) errCheck += fmt.Sprintf("}\n") } + // NegOffset requires a stronger offset check + if atype.Type == asm.TypeNegOffset { + arg := argOrder[j] + itype + mask := -1 << (atype.BitFields.NumBits() + int(atype.Shift)) + maskl := mask // Sign bits are implied in this type. + mask |= (1 << atype.Shift) - 1 + min := maskl + max := maskl | (^mask) + step := 1 << atype.Shift + errCheck += fmt.Sprintf("if %s & 0x%x != 0x%x {\n", arg, uint32(mask), uint32(maskl)) + errCheck += fmt.Sprintf("c.ctxt.Diag(\"Constant(%%d) must within the range of [%d,%d] in steps of %d\\n%%v\",%s,p)\n", min, max, step, arg) + errCheck += fmt.Sprintf("}\n") + } j++ } buf.WriteString(errCheck) @@ -895,6 +927,8 @@ func insnTypeStr(insn *Inst, uniqueRegTypes bool) string { if atype.Shift != 0 { ret += fmt.Sprintf("%d", atype.Shift) } + case asm.TypeNegOffset: // e.g offset in hashst rb, offset(ra) + ret += "N" default: log.Fatalf("Unhandled type in insnTypeStr: %v\n", atype) } @@ -953,6 +987,14 @@ func genOptabEntry(ta *AggInfo, typeMap map[string]*Inst) string { shift = "" } } + if f.Type == asm.TypeNegOffset { + // This is a hack, but allows hashchk and like to correctly + // merge there argument into a C_SOREG memory location type + // argument a little later. + sign = "S" + bits = 16 + shift = "" + } return fmt.Sprintf("C_%s%d%sCON", sign, bits, shift) } insn := ta.Insns[0] diff --git a/ppc64/ppc64util/hack.h b/ppc64/ppc64util/hack.h index e7dada22..3fd9f314 100644 --- a/ppc64/ppc64util/hack.h +++ b/ppc64/ppc64util/hack.h @@ -129,6 +129,9 @@ #define Rpfx 1 #define SIpfx 0xFFFFFFFE00010007 +// A valid displacement value for the hash check and hash store instructions. +#define offset -128 + // These decode as m.fpr* or m.vr*. This is a matter of preference. We // don't support these mnemonics, and I don't think they improve reading // disassembled code in most cases. so ignore. From 40c19ba4a7c5ffc92baaef5977ffc2b82dfb0e47 Mon Sep 17 00:00:00 2001 From: cui fliter Date: Tue, 7 Feb 2023 22:40:17 +0800 Subject: [PATCH 017/200] all: fix some comments Change-Id: Ic196f7a97e423708d0a86ff8da99871e966dc2fc Reviewed-on: https://go-review.googlesource.com/c/arch/+/466000 Run-TryBot: Ian Lance Taylor Reviewed-by: Ian Lance Taylor Reviewed-by: David Chase Auto-Submit: Ian Lance Taylor TryBot-Result: Gopher Robot --- ppc64/ppc64asm/field.go | 2 +- x86/x86asm/decode.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ppc64/ppc64asm/field.go b/ppc64/ppc64asm/field.go index 37794460..b47f672d 100644 --- a/ppc64/ppc64asm/field.go +++ b/ppc64/ppc64asm/field.go @@ -80,7 +80,7 @@ func (bs BitFields) Parse(i [2]uint32) uint64 { return u } -// Parse extracts the bitfields from i, concatenate them and return the result +// ParseSigned extracts the bitfields from i, concatenate them and return the result // as a signed integer. Parse will panic if any bitfield in b is invalid. func (bs BitFields) ParseSigned(i [2]uint32) int64 { u, l := bs.parse(i) diff --git a/x86/x86asm/decode.go b/x86/x86asm/decode.go index 8c984970..059b73d3 100644 --- a/x86/x86asm/decode.go +++ b/x86/x86asm/decode.go @@ -1550,7 +1550,7 @@ var addr16 = [8]Mem{ {Base: BX}, } -// baseReg returns the base register for a given register size in bits. +// baseRegForBits returns the base register for a given register size in bits. func baseRegForBits(bits int) Reg { switch bits { case 8: From 060bf14d30f8a6b2e19c8aab764c104725b1682f Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 22 Jun 2023 15:18:25 -0700 Subject: [PATCH 018/200] x86asm: disassemble CMP instruction's arguments in the opposite order That way it matches what the compiler's -S flag generates, and what we write in assembly. CMP AX, $16 JLE foo should get to foo if AX <= 16. Without this CL, the disassembly looks like CMP $16, AX JLE foo which reads like we should get to foo if 16 <= AX, which is not what these two instructions actually do. It was originally this way because the CMP instruction parallels the SUB instruction, except it throws away the non-flags result. We write that subtraction as SUB $16, AX // AX <- AX-16 but we don't need to match the SUB's disassembly order, as CMP is not writing to a register output. Update golang/go#60920 (This fixes the underlying issue, but the actual "fixes" comment needs to go on the CL that vendors x/arch containing this CL into the main branch.) Change-Id: Ifa8d3878453d6e33ae144bfdb01b34171c2106a1 Reviewed-on: https://go-review.googlesource.com/c/arch/+/505375 Reviewed-by: Cherry Mui Run-TryBot: Keith Randall TryBot-Result: Gopher Robot Reviewed-by: Keith Randall --- x86/x86asm/plan9x.go | 6 ++++ x86/x86asm/testdata/decode.txt | 66 +++++++++++++++++----------------- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/x86/x86asm/plan9x.go b/x86/x86asm/plan9x.go index de417946..9e866d87 100644 --- a/x86/x86asm/plan9x.go +++ b/x86/x86asm/plan9x.go @@ -83,6 +83,12 @@ func GoSyntax(inst Inst, pc uint64, symname SymLookup) string { } } + if inst.Op == CMP { + // Use reads-left-to-right ordering for comparisons. + // See issue 60920. + args[0], args[1] = args[1], args[0] + } + if args != nil { op += " " + strings.Join(args, ", ") } diff --git a/x86/x86asm/testdata/decode.txt b/x86/x86asm/testdata/decode.txt index cbd536a8..af840c2f 100644 --- a/x86/x86asm/testdata/decode.txt +++ b/x86/x86asm/testdata/decode.txt @@ -2125,35 +2125,35 @@ 37|11223344556677885f5f5f5f5f5f5f 64 intel error: unrecognized instruction 37|11223344556677885f5f5f5f5f5f5f 64 plan9 error: unrecognized instruction 3811|223344556677885f5f5f5f5f5f5f 32 intel cmp byte ptr [ecx], dl -3811|223344556677885f5f5f5f5f5f5f 32 plan9 CMPB DL, 0(CX) +3811|223344556677885f5f5f5f5f5f5f 32 plan9 CMPB 0(CX), DL 3811|223344556677885f5f5f5f5f5f5f 64 gnu cmp %dl,(%rcx) 3811|223344556677885f5f5f5f5f5f5f 64 intel cmp byte ptr [rcx], dl -3811|223344556677885f5f5f5f5f5f5f 64 plan9 CMPB DL, 0(CX) +3811|223344556677885f5f5f5f5f5f5f 64 plan9 CMPB 0(CX), DL 3911|223344556677885f5f5f5f5f5f5f 32 intel cmp dword ptr [ecx], edx -3911|223344556677885f5f5f5f5f5f5f 32 plan9 CMPL DX, 0(CX) +3911|223344556677885f5f5f5f5f5f5f 32 plan9 CMPL 0(CX), DX 3911|223344556677885f5f5f5f5f5f5f 64 gnu cmp %edx,(%rcx) 3911|223344556677885f5f5f5f5f5f5f 64 intel cmp dword ptr [rcx], edx -3911|223344556677885f5f5f5f5f5f5f 64 plan9 CMPL DX, 0(CX) +3911|223344556677885f5f5f5f5f5f5f 64 plan9 CMPL 0(CX), DX 3a11|223344556677885f5f5f5f5f5f5f 32 intel cmp dl, byte ptr [ecx] -3a11|223344556677885f5f5f5f5f5f5f 32 plan9 CMPB 0(CX), DL +3a11|223344556677885f5f5f5f5f5f5f 32 plan9 CMPB DL, 0(CX) 3a11|223344556677885f5f5f5f5f5f5f 64 gnu cmp (%rcx),%dl 3a11|223344556677885f5f5f5f5f5f5f 64 intel cmp dl, byte ptr [rcx] -3a11|223344556677885f5f5f5f5f5f5f 64 plan9 CMPB 0(CX), DL +3a11|223344556677885f5f5f5f5f5f5f 64 plan9 CMPB DL, 0(CX) 3b11|223344556677885f5f5f5f5f5f5f 32 intel cmp edx, dword ptr [ecx] -3b11|223344556677885f5f5f5f5f5f5f 32 plan9 CMPL 0(CX), DX +3b11|223344556677885f5f5f5f5f5f5f 32 plan9 CMPL DX, 0(CX) 3b11|223344556677885f5f5f5f5f5f5f 64 gnu cmp (%rcx),%edx 3b11|223344556677885f5f5f5f5f5f5f 64 intel cmp edx, dword ptr [rcx] -3b11|223344556677885f5f5f5f5f5f5f 64 plan9 CMPL 0(CX), DX +3b11|223344556677885f5f5f5f5f5f5f 64 plan9 CMPL DX, 0(CX) 3c11|223344556677885f5f5f5f5f5f5f 32 intel cmp al, 0x11 -3c11|223344556677885f5f5f5f5f5f5f 32 plan9 CMPL $0x11, AL +3c11|223344556677885f5f5f5f5f5f5f 32 plan9 CMPL AL, $0x11 3c11|223344556677885f5f5f5f5f5f5f 64 gnu cmp $0x11,%al 3c11|223344556677885f5f5f5f5f5f5f 64 intel cmp al, 0x11 -3c11|223344556677885f5f5f5f5f5f5f 64 plan9 CMPL $0x11, AL +3c11|223344556677885f5f5f5f5f5f5f 64 plan9 CMPL AL, $0x11 3d11223344|556677885f5f5f5f5f5f5f 32 intel cmp eax, 0x44332211 -3d11223344|556677885f5f5f5f5f5f5f 32 plan9 CMPL $0x44332211, AX +3d11223344|556677885f5f5f5f5f5f5f 32 plan9 CMPL AX, $0x44332211 3d11223344|556677885f5f5f5f5f5f5f 64 gnu cmp $0x44332211,%eax 3d11223344|556677885f5f5f5f5f5f5f 64 intel cmp eax, 0x44332211 -3d11223344|556677885f5f5f5f5f5f5f 64 plan9 CMPL $0x44332211, AX +3d11223344|556677885f5f5f5f5f5f5f 64 plan9 CMPL AX, $0x44332211 3e67e011|223344556677885f5f5f5f5f 32 intel addr16 loopne .+0x11 3e67e011|223344556677885f5f5f5f5f 32 plan9 LOOPNE .+17 3e67e011|223344556677885f5f5f5f5f 64 gnu loopne,pt .+0x11 @@ -2482,13 +2482,13 @@ 483511223344|556677885f5f5f5f5f5f 64 plan9 XORQ $0x44332211, AX 483911|223344556677885f5f5f5f5f5f 64 gnu cmp %rdx,(%rcx) 483911|223344556677885f5f5f5f5f5f 64 intel cmp qword ptr [rcx], rdx -483911|223344556677885f5f5f5f5f5f 64 plan9 CMPQ DX, 0(CX) +483911|223344556677885f5f5f5f5f5f 64 plan9 CMPQ 0(CX), DX 483b11|223344556677885f5f5f5f5f5f 64 gnu cmp (%rcx),%rdx 483b11|223344556677885f5f5f5f5f5f 64 intel cmp rdx, qword ptr [rcx] -483b11|223344556677885f5f5f5f5f5f 64 plan9 CMPQ 0(CX), DX +483b11|223344556677885f5f5f5f5f5f 64 plan9 CMPQ DX, 0(CX) 483d11223344|556677885f5f5f5f5f5f 64 gnu cmp $0x44332211,%rax 483d11223344|556677885f5f5f5f5f5f 64 intel cmp rax, 0x44332211 -483d11223344|556677885f5f5f5f5f5f 64 plan9 CMPQ $0x44332211, AX +483d11223344|556677885f5f5f5f5f5f 64 plan9 CMPQ AX, $0x44332211 4850|11223344556677885f5f5f5f5f5f 64 gnu push %rax 4850|11223344556677885f5f5f5f5f5f 64 intel push rax 4850|11223344556677885f5f5f5f5f5f 64 plan9 PUSHQ AX @@ -2536,7 +2536,7 @@ 48813011223344|556677885f5f5f5f5f 64 plan9 XORQ $0x44332211, 0(AX) 48813811223344|556677885f5f5f5f5f 64 gnu cmpq $0x44332211,(%rax) 48813811223344|556677885f5f5f5f5f 64 intel cmp qword ptr [rax], 0x44332211 -48813811223344|556677885f5f5f5f5f 64 plan9 CMPQ $0x44332211, 0(AX) +48813811223344|556677885f5f5f5f5f 64 plan9 CMPQ 0(AX), $0x44332211 48830011|223344556677885f5f5f5f5f 64 gnu addq $0x11,(%rax) 48830011|223344556677885f5f5f5f5f 64 intel add qword ptr [rax], 0x11 48830011|223344556677885f5f5f5f5f 64 plan9 ADDQ $0x11, 0(AX) @@ -2560,7 +2560,7 @@ 48833011|223344556677885f5f5f5f5f 64 plan9 XORQ $0x11, 0(AX) 48833811|223344556677885f5f5f5f5f 64 gnu cmpq $0x11,(%rax) 48833811|223344556677885f5f5f5f5f 64 intel cmp qword ptr [rax], 0x11 -48833811|223344556677885f5f5f5f5f 64 plan9 CMPQ $0x11, 0(AX) +48833811|223344556677885f5f5f5f5f 64 plan9 CMPQ 0(AX), $0x11 488511|223344556677885f5f5f5f5f5f 64 gnu test %rdx,(%rcx) 488511|223344556677885f5f5f5f5f5f 64 intel test qword ptr [rcx], rdx 488511|223344556677885f5f5f5f5f5f 64 plan9 TESTQ DX, 0(CX) @@ -4233,20 +4233,20 @@ 66351122|3344556677885f5f5f5f5f5f 64 intel xor ax, 0x2211 66351122|3344556677885f5f5f5f5f5f 64 plan9 XORW $0x2211, AX 663911|223344556677885f5f5f5f5f5f 32 intel cmp word ptr [ecx], dx -663911|223344556677885f5f5f5f5f5f 32 plan9 CMPW DX, 0(CX) +663911|223344556677885f5f5f5f5f5f 32 plan9 CMPW 0(CX), DX 663911|223344556677885f5f5f5f5f5f 64 gnu cmp %dx,(%rcx) 663911|223344556677885f5f5f5f5f5f 64 intel cmp word ptr [rcx], dx -663911|223344556677885f5f5f5f5f5f 64 plan9 CMPW DX, 0(CX) +663911|223344556677885f5f5f5f5f5f 64 plan9 CMPW 0(CX), DX 663b11|223344556677885f5f5f5f5f5f 32 intel cmp dx, word ptr [ecx] -663b11|223344556677885f5f5f5f5f5f 32 plan9 CMPW 0(CX), DX +663b11|223344556677885f5f5f5f5f5f 32 plan9 CMPW DX, 0(CX) 663b11|223344556677885f5f5f5f5f5f 64 gnu cmp (%rcx),%dx 663b11|223344556677885f5f5f5f5f5f 64 intel cmp dx, word ptr [rcx] -663b11|223344556677885f5f5f5f5f5f 64 plan9 CMPW 0(CX), DX +663b11|223344556677885f5f5f5f5f5f 64 plan9 CMPW DX, 0(CX) 663d1122|3344556677885f5f5f5f5f5f 32 intel cmp ax, 0x2211 -663d1122|3344556677885f5f5f5f5f5f 32 plan9 CMPW $0x2211, AX +663d1122|3344556677885f5f5f5f5f5f 32 plan9 CMPW AX, $0x2211 663d1122|3344556677885f5f5f5f5f5f 64 gnu cmp $0x2211,%ax 663d1122|3344556677885f5f5f5f5f5f 64 intel cmp ax, 0x2211 -663d1122|3344556677885f5f5f5f5f5f 64 plan9 CMPW $0x2211, AX +663d1122|3344556677885f5f5f5f5f5f 64 plan9 CMPW AX, $0x2211 6640|11223344556677885f5f5f5f5f5f 32 intel inc ax 6640|11223344556677885f5f5f5f5f5f 32 plan9 INCW AX 66480f3a161122|3344556677885f5f5f 64 gnu pextrq $0x22,%xmm2,(%rcx) @@ -4343,10 +4343,10 @@ 6681301122|3344556677885f5f5f5f5f 64 intel xor word ptr [rax], 0x2211 6681301122|3344556677885f5f5f5f5f 64 plan9 XORW $0x2211, 0(AX) 6681381122|3344556677885f5f5f5f5f 32 intel cmp word ptr [eax], 0x2211 -6681381122|3344556677885f5f5f5f5f 32 plan9 CMPW $0x2211, 0(AX) +6681381122|3344556677885f5f5f5f5f 32 plan9 CMPW 0(AX), $0x2211 6681381122|3344556677885f5f5f5f5f 64 gnu cmpw $0x2211,(%rax) 6681381122|3344556677885f5f5f5f5f 64 intel cmp word ptr [rax], 0x2211 -6681381122|3344556677885f5f5f5f5f 64 plan9 CMPW $0x2211, 0(AX) +6681381122|3344556677885f5f5f5f5f 64 plan9 CMPW 0(AX), $0x2211 66830011|223344556677885f5f5f5f5f 32 intel add word ptr [eax], 0x11 66830011|223344556677885f5f5f5f5f 32 plan9 ADDW $0x11, 0(AX) 66830011|223344556677885f5f5f5f5f 64 gnu addw $0x11,(%rax) @@ -4383,10 +4383,10 @@ 66833011|223344556677885f5f5f5f5f 64 intel xor word ptr [rax], 0x11 66833011|223344556677885f5f5f5f5f 64 plan9 XORW $0x11, 0(AX) 66833811|223344556677885f5f5f5f5f 32 intel cmp word ptr [eax], 0x11 -66833811|223344556677885f5f5f5f5f 32 plan9 CMPW $0x11, 0(AX) +66833811|223344556677885f5f5f5f5f 32 plan9 CMPW 0(AX), $0x11 66833811|223344556677885f5f5f5f5f 64 gnu cmpw $0x11,(%rax) 66833811|223344556677885f5f5f5f5f 64 intel cmp word ptr [rax], 0x11 -66833811|223344556677885f5f5f5f5f 64 plan9 CMPW $0x11, 0(AX) +66833811|223344556677885f5f5f5f5f 64 plan9 CMPW 0(AX), $0x11 668511|223344556677885f5f5f5f5f5f 32 intel test word ptr [ecx], dx 668511|223344556677885f5f5f5f5f5f 32 plan9 TESTW DX, 0(CX) 668511|223344556677885f5f5f5f5f5f 64 gnu test %dx,(%rcx) @@ -4959,10 +4959,10 @@ 803011|223344556677885f5f5f5f5f5f 64 intel xor byte ptr [rax], 0x11 803011|223344556677885f5f5f5f5f5f 64 plan9 XORB $0x11, 0(AX) 803811|223344556677885f5f5f5f5f5f 32 intel cmp byte ptr [eax], 0x11 -803811|223344556677885f5f5f5f5f5f 32 plan9 CMPB $0x11, 0(AX) +803811|223344556677885f5f5f5f5f5f 32 plan9 CMPB 0(AX), $0x11 803811|223344556677885f5f5f5f5f5f 64 gnu cmpb $0x11,(%rax) 803811|223344556677885f5f5f5f5f5f 64 intel cmp byte ptr [rax], 0x11 -803811|223344556677885f5f5f5f5f5f 64 plan9 CMPB $0x11, 0(AX) +803811|223344556677885f5f5f5f5f5f 64 plan9 CMPB 0(AX), $0x11 810011223344|556677885f5f5f5f5f5f 32 intel add dword ptr [eax], 0x44332211 810011223344|556677885f5f5f5f5f5f 32 plan9 ADDL $0x44332211, 0(AX) 810011223344|556677885f5f5f5f5f5f 64 gnu addl $0x44332211,(%rax) @@ -4999,10 +4999,10 @@ 813011223344|556677885f5f5f5f5f5f 64 intel xor dword ptr [rax], 0x44332211 813011223344|556677885f5f5f5f5f5f 64 plan9 XORL $0x44332211, 0(AX) 813811223344|556677885f5f5f5f5f5f 32 intel cmp dword ptr [eax], 0x44332211 -813811223344|556677885f5f5f5f5f5f 32 plan9 CMPL $0x44332211, 0(AX) +813811223344|556677885f5f5f5f5f5f 32 plan9 CMPL 0(AX), $0x44332211 813811223344|556677885f5f5f5f5f5f 64 gnu cmpl $0x44332211,(%rax) 813811223344|556677885f5f5f5f5f5f 64 intel cmp dword ptr [rax], 0x44332211 -813811223344|556677885f5f5f5f5f5f 64 plan9 CMPL $0x44332211, 0(AX) +813811223344|556677885f5f5f5f5f5f 64 plan9 CMPL 0(AX), $0x44332211 830011|223344556677885f5f5f5f5f5f 32 intel add dword ptr [eax], 0x11 830011|223344556677885f5f5f5f5f5f 32 plan9 ADDL $0x11, 0(AX) 830011|223344556677885f5f5f5f5f5f 64 gnu addl $0x11,(%rax) @@ -5039,10 +5039,10 @@ 833011|223344556677885f5f5f5f5f5f 64 intel xor dword ptr [rax], 0x11 833011|223344556677885f5f5f5f5f5f 64 plan9 XORL $0x11, 0(AX) 833811|223344556677885f5f5f5f5f5f 32 intel cmp dword ptr [eax], 0x11 -833811|223344556677885f5f5f5f5f5f 32 plan9 CMPL $0x11, 0(AX) +833811|223344556677885f5f5f5f5f5f 32 plan9 CMPL 0(AX), $0x11 833811|223344556677885f5f5f5f5f5f 64 gnu cmpl $0x11,(%rax) 833811|223344556677885f5f5f5f5f5f 64 intel cmp dword ptr [rax], 0x11 -833811|223344556677885f5f5f5f5f5f 64 plan9 CMPL $0x11, 0(AX) +833811|223344556677885f5f5f5f5f5f 64 plan9 CMPL 0(AX), $0x11 8411|223344556677885f5f5f5f5f5f5f 32 intel test byte ptr [ecx], dl 8411|223344556677885f5f5f5f5f5f5f 32 plan9 TESTB DL, 0(CX) 8411|223344556677885f5f5f5f5f5f5f 64 gnu test %dl,(%rcx) From b6e875325b9240a588005b57569516391c352e78 Mon Sep 17 00:00:00 2001 From: cui fliter Date: Fri, 18 Aug 2023 10:29:34 +0800 Subject: [PATCH 019/200] all: gofmt format Change-Id: Iaea9ce0d3b237123cdb4315790960aeee1b13a80 Reviewed-on: https://go-review.googlesource.com/c/arch/+/520577 Run-TryBot: shuang cui Auto-Submit: Ian Lance Taylor Reviewed-by: Cherry Mui TryBot-Result: Gopher Robot Reviewed-by: Bryan Mills --- arm/armasm/plan9x.go | 9 ++++++--- arm/armmap/map.go | 5 +++-- arm/armspec/spec.go | 13 ++++++------- arm64/arm64asm/inst.go | 6 ++++-- ppc64/ppc64asm/gnu.go | 2 +- ppc64/ppc64spec/spec.go | 12 ++++++------ x86/x86asm/gnu.go | 2 +- x86/x86asm/inst.go | 2 +- x86/x86avxgen/generate.go | 4 ++-- x86/x86avxgen/main.go | 20 ++++++++++---------- x86/x86csv/x86csv.go | 2 +- x86/x86map/map.go | 7 ++++--- x86/x86spec/spec.go | 19 +++++++++---------- x86/xeddata/database.go | 2 ++ x86/xeddata/doc.go | 23 ++++++++++++----------- x86/xeddata/reader.go | 1 + 16 files changed, 69 insertions(+), 60 deletions(-) diff --git a/arm/armasm/plan9x.go b/arm/armasm/plan9x.go index a143d2ef..842ab980 100644 --- a/arm/armasm/plan9x.go +++ b/arm/armasm/plan9x.go @@ -253,10 +253,13 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg // [r2, r0, ror #1] -> (R2)(R0@>1) // inst [r2, -r0, ror #1] -> INST.U (R2)(R0@>1) // input: -// a memory operand +// +// a memory operand +// // return values: -// corresponding memory operand in Plan 9 syntax -// .W/.P/.U suffix +// +// corresponding memory operand in Plan 9 syntax +// .W/.P/.U suffix func memOpTrans(mem Mem) (string, string) { suffix := "" switch mem.Mode { diff --git a/arm/armmap/map.go b/arm/armmap/map.go index f5053052..3e8f3072 100644 --- a/arm/armmap/map.go +++ b/arm/armmap/map.go @@ -5,12 +5,13 @@ // Armmap constructs the ARM opcode map from the instruction set CSV file. // // Usage: +// // armmap [-fmt=format] arm.csv // // The known output formats are: // -// text (default) - print decoding tree in text form -// decoder - print decoding tables for the armasm package +// text (default) - print decoding tree in text form +// decoder - print decoding tables for the armasm package package main import ( diff --git a/arm/armspec/spec.go b/arm/armspec/spec.go index f5bad801..108f7ed3 100644 --- a/arm/armspec/spec.go +++ b/arm/armspec/spec.go @@ -8,15 +8,14 @@ // ... see golang.org/issue/12840 -// Armspec reads the ``ARM Architecture Reference Manual'' +// Armspec reads the “ARM Architecture Reference Manual” // to collect instruction encoding details and writes those details to standard output // in JSON format. // -// Warning Warning Warning +// # Warning Warning Warning // // This program is unfinished. It is being published in this incomplete form // for interested readers, but do not expect it to be runnable or useful. -// package main import ( @@ -606,10 +605,10 @@ func sameFont(f1, f2 string) bool { } var jsFix = strings.NewReplacer( -// `\u003c`, `<`, -// `\u003e`, `>`, -// `\u0026`, `&`, -// `\u0009`, `\t`, +// `\u003c`, `<`, +// `\u003e`, `>`, +// `\u0026`, `&`, +// `\u0009`, `\t`, ) func printTable(name string, table []Inst) { diff --git a/arm64/arm64asm/inst.go b/arm64/arm64asm/inst.go index 8c633fef..866e399c 100644 --- a/arm64/arm64asm/inst.go +++ b/arm64/arm64asm/inst.go @@ -934,8 +934,10 @@ func (r RegisterWithArrangement) String() string { return result } -// Register with arrangement and index: .[], -// { .B, .B }[]. +// Register with arrangement and index: +// +// .[], +// { .B, .B }[]. type RegisterWithArrangementAndIndex struct { r Reg a Arrangement diff --git a/ppc64/ppc64asm/gnu.go b/ppc64/ppc64asm/gnu.go index b4c9bf8d..367acdd4 100644 --- a/ppc64/ppc64asm/gnu.go +++ b/ppc64/ppc64asm/gnu.go @@ -359,7 +359,7 @@ func GNUSyntax(inst Inst, pc uint64) string { // gnuArg formats arg (which is the argIndex's arg in inst) according to GNU rules. // NOTE: because GNUSyntax is the only caller of this func, and it receives a copy -// of inst, it's ok to modify inst.Args here. +// of inst, it's ok to modify inst.Args here. func gnuArg(inst *Inst, argIndex int, arg Arg, pc uint64) string { // special cases for load/store instructions if _, ok := arg.(Offset); ok { diff --git a/ppc64/ppc64spec/spec.go b/ppc64/ppc64spec/spec.go index 54e05353..55cb2756 100644 --- a/ppc64/ppc64spec/spec.go +++ b/ppc64/ppc64spec/spec.go @@ -5,11 +5,12 @@ //go:build (go1.6 && amd64) || go1.8 // +build go1.6,amd64 go1.8 -// Power64spec reads the ``Power ISA V2.07'' Manual +// Power64spec reads the “Power ISA V2.07” Manual // to collect instruction encoding details and writes those details to standard output // in CSV format. // // Usage: +// // ppc64spec PowerISA_V2.07_PUBLIC.pdf >ppc64.csv // // Each CSV line contains four fields: @@ -24,7 +25,6 @@ // For now, empty. // // For more on the exact meaning of these fields, see the Power manual. -// package main import ( @@ -495,10 +495,10 @@ func sameFont(f1, f2 string) bool { } var jsFix = strings.NewReplacer( -// `\u003c`, `<`, -// `\u003e`, `>`, -// `\u0026`, `&`, -// `\u0009`, `\t`, +// `\u003c`, `<`, +// `\u003e`, `>`, +// `\u0026`, `&`, +// `\u0009`, `\t`, ) func printTable(name string, table []Inst) { diff --git a/x86/x86asm/gnu.go b/x86/x86asm/gnu.go index 75cff72b..8eba1fd0 100644 --- a/x86/x86asm/gnu.go +++ b/x86/x86asm/gnu.go @@ -10,7 +10,7 @@ import ( ) // GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils. -// This general form is often called ``AT&T syntax'' as a reference to AT&T System V Unix. +// This general form is often called “AT&T syntax” as a reference to AT&T System V Unix. func GNUSyntax(inst Inst, pc uint64, symname SymLookup) string { // Rewrite instruction to mimic GNU peculiarities. // Note that inst has been passed by value and contains diff --git a/x86/x86asm/inst.go b/x86/x86asm/inst.go index 4632b506..e98f1a84 100644 --- a/x86/x86asm/inst.go +++ b/x86/x86asm/inst.go @@ -144,7 +144,7 @@ type Arg interface { // the interface value instead of requiring an allocation. // A Reg is a single register. -// The zero Reg value has no name but indicates ``no register.'' +// The zero Reg value has no name but indicates “no register.” type Reg uint8 const ( diff --git a/x86/x86avxgen/generate.go b/x86/x86avxgen/generate.go index 14985cb3..da7fd318 100644 --- a/x86/x86avxgen/generate.go +++ b/x86/x86avxgen/generate.go @@ -230,8 +230,8 @@ func (gen *generator) makeYtab(zoffset int, zform string, args []*argument) ytab // // This is required due to how masking is implemented in asm6. // Single MASK1() instruction produces 2 ytabs, for example: -// 1. OP xmm, mem | Yxr, Yxm | Does not permit K arguments (K0 implied) -// 2. OP xmm, K2, mem | Yxr, Yknot0, Yxm | Does not permit K0 argument +// 1. OP xmm, mem | Yxr, Yxm | Does not permit K arguments (K0 implied) +// 2. OP xmm, K2, mem | Yxr, Yknot0, Yxm | Does not permit K0 argument // // This function also exploits that both ytab entries have same opbytes, // hence it is efficient to emit only one opbytes line and 0 Z-offset diff --git a/x86/x86avxgen/main.go b/x86/x86avxgen/main.go index 9fdf262e..b759c505 100644 --- a/x86/x86avxgen/main.go +++ b/x86/x86avxgen/main.go @@ -224,16 +224,16 @@ func assignZforms(ctx *context) { // elements order inside ytabList. // // We want these rules to be satisfied: -// - EVEX-encoded entries go after VEX-encoded entries. -// This way, VEX forms are selected over EVEX variants. -// - EVEX forms with SAE/RC must go before forms without them. -// This helps to avoid problems with reg-reg instructions -// that encode either of them in ModRM.R/M which causes -// ambiguity in ytabList (more than 1 ytab can match args). -// If first matching ytab has SAE/RC, problem will not occur. -// - Memory argument position affects order. -// Required to be in sync with XED encoder when there -// are multiple choices of how to encode instruction. +// - EVEX-encoded entries go after VEX-encoded entries. +// This way, VEX forms are selected over EVEX variants. +// - EVEX forms with SAE/RC must go before forms without them. +// This helps to avoid problems with reg-reg instructions +// that encode either of them in ModRM.R/M which causes +// ambiguity in ytabList (more than 1 ytab can match args). +// If first matching ytab has SAE/RC, problem will not occur. +// - Memory argument position affects order. +// Required to be in sync with XED encoder when there +// are multiple choices of how to encode instruction. func sortGroups(ctx *context) { sort.SliceStable(ctx.groups, func(i, j int) bool { return ctx.groups[i].opcode < ctx.groups[j].opcode diff --git a/x86/x86csv/x86csv.go b/x86/x86csv/x86csv.go index e205c1b4..6f6b68c7 100644 --- a/x86/x86csv/x86csv.go +++ b/x86/x86csv/x86csv.go @@ -6,7 +6,7 @@ // Only latest version of "x86.csv" format is supported. // // Terminology: -// given "OPCODE [ARGS...]" line; +// given "OPCODE [ARGS...]" line; // Opcode - instruction name/mnemonic/class. // Args - instruction operands. // Syntax - Opcode with Args. diff --git a/x86/x86map/map.go b/x86/x86map/map.go index df8c68e5..9d45a704 100644 --- a/x86/x86map/map.go +++ b/x86/x86map/map.go @@ -5,13 +5,14 @@ // X86map constructs the x86 opcode map from the instruction set CSV file. // // Usage: +// // x86map [-fmt=format] x86.csv // // The known output formats are: // -// text (default) - print decoding tree in text form -// decoder - print decoding tables for the x86asm package -// scanner - print scanning tables for x86scan package +// text (default) - print decoding tree in text form +// decoder - print decoding tables for the x86asm package +// scanner - print scanning tables for x86scan package package main import ( diff --git a/x86/x86spec/spec.go b/x86/x86spec/spec.go index 25267941..57f3276c 100644 --- a/x86/x86spec/spec.go +++ b/x86/x86spec/spec.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// X86spec reads the ``Intel® 64 and IA-32 Architectures Software Developer's Manual'' +// X86spec reads the “Intel® 64 and IA-32 Architectures Software Developer's Manual” // to collect instruction encoding details and writes those details to standard output // in CSV format. // @@ -20,7 +20,7 @@ // // There are additional debugging flags, not shown. Run x86spec -help for the list. // -// File Format +// # File Format // // TODO: Mention comments at top of file. // TODO: Mention that this is version 0.2 of the file. @@ -59,7 +59,7 @@ // // "SHR r/m32, imm8","SHRL imm8, r/m32","shrl imm8, r/m32","C1 /5 ib","V","V","","operand32","rw,r","Y","32" // -// Mnemonics +// # Mnemonics // // The instruction mnemonics are as used in the Intel manual, with a few exceptions. // @@ -109,7 +109,7 @@ // moffs8, moffs16, moffs32, moffs64, vm32x, vm32y, vm64x, and vm64y // are all as in the Intel manual. // -// Encodings +// # Encodings // // The encodings are also as used in the Intel manual, with automated corrections. // For example, the Intel manual sometimes omits the modrm /r indicator or other trailing bytes, @@ -118,7 +118,7 @@ // tools for processing x86 machine code. // See https://golang.org/x/arch/x86/x86map for one such generator. // -// Valid32 and Valid64 +// # Valid32 and Valid64 // // These columns hold validity abbreviations as defined in the Intel manual: // V, I, N.E., N.P., N.S., or N.I. @@ -128,7 +128,7 @@ // For example, the manual lists many instruction forms using REX bytes // with an incorrect "V" in the Valid32 column. // -// CPUID Feature Flags +// # CPUID Feature Flags // // This column specifies CPUID feature flags that must be present in order // to use the instruction. If multiple flags are required, @@ -136,7 +136,7 @@ // The column can also list one of the values 486, Pentium, PentiumII, and P6, // indicating that the instruction was introduced on that architecture version. // -// Tags +// # Tags // // The tag column does not correspond to a traditional column in the Intel manual tables. // Instead, it is itself a comma-separated list of tags or hints derived by analysis @@ -169,7 +169,7 @@ // Since most decoders will handle the REX byte separately, the form with the // unnecessary REX is tagged pseudo64. // -// Corrections and Additions +// # Corrections and Additions // // The x86spec program makes various corrections to the Intel manual data // as part of extracting the information. Those corrections are described above. @@ -177,7 +177,7 @@ // The x86spec program also adds a few well-known undocumented instructions, // such as UD1 and FFREEP. // -// Examples +// # Examples // // The latest version of the CSV file is available in this Git repository and also // online at https://golang.org/s/x86.csv. It is meant to be human-readable for @@ -193,7 +193,6 @@ // reads the CSV file and generates an x86 instruction decoder in the form // of a simple byte-code program. This decoder is the core of the disassembler // in the x86asm package (https://golang.org/x/arch/x86/x86asm). -// package main import ( diff --git a/x86/xeddata/database.go b/x86/xeddata/database.go index 35d86d98..94d21de0 100644 --- a/x86/xeddata/database.go +++ b/x86/xeddata/database.go @@ -104,9 +104,11 @@ type xtype struct { // parsing of found file is. // // Lookup: +// // "$xedPath/all-state.txt" => db.LoadStates() // "$xedPath/all-widths.txt" => db.LoadWidths() // "$xedPath/all-element-types.txt" => db.LoadXtypes() +// // $xedPath is the interpolated value of function argument. // // The call NewDatabase("") is valid and returns empty database. diff --git a/x86/xeddata/doc.go b/x86/xeddata/doc.go index bb1a96af..23d51dc5 100644 --- a/x86/xeddata/doc.go +++ b/x86/xeddata/doc.go @@ -5,17 +5,18 @@ // Package xeddata provides utilities to work with XED datafiles. // // Main features: -// * Fundamental XED enumerations (CPU modes, operand sizes, ...) -// * XED objects and their components -// * XED datafiles reader (see below) -// * Utility functions like ExpandStates +// - Fundamental XED enumerations (CPU modes, operand sizes, ...) +// - XED objects and their components +// - XED datafiles reader (see below) +// - Utility functions like ExpandStates // // The amount of file formats that is understood is a minimal // set required to generate x86.csv from XED tables: -// * states - simple macro substitutions used in patterns -// * widths - mappings from width names to their size -// * element-types - XED xtype information -// * objects - XED objects that constitute "the tables" +// - states - simple macro substitutions used in patterns +// - widths - mappings from width names to their size +// - element-types - XED xtype information +// - objects - XED objects that constitute "the tables" +// // Collectively, those files are called "datafiles". // // Terminology is borrowed from XED itself, @@ -26,9 +27,9 @@ // file under local XED source repository folder. // // The default usage scheme: -// 1. Open "XED database" to load required metadata. -// 2. Read XED file with objects definitions. -// 3. Operate on XED objects. +// 1. Open "XED database" to load required metadata. +// 2. Read XED file with objects definitions. +// 3. Operate on XED objects. // // See example_test.go for complete examples. // diff --git a/x86/xeddata/reader.go b/x86/xeddata/reader.go index 4176b66e..fc8aa94b 100644 --- a/x86/xeddata/reader.go +++ b/x86/xeddata/reader.go @@ -92,6 +92,7 @@ func (r *Reader) ReadAll() ([]*Object, error) { // It expects lines that are joined by '\' to be concatenated. // // The format can be described as: +// // unquoted field name "[A-Z_]+" (captured) // field value delimiter ":" // field value string (captured) From 05c9512268b810910595e592c68436f27594f3c1 Mon Sep 17 00:00:00 2001 From: Dmitri Shuralyov Date: Tue, 10 Oct 2023 18:35:22 -0400 Subject: [PATCH 020/200] all: update go directive to 1.18 Done with: go get go@1.18 go mod tidy go fix ./... Using go1.21.3. Also delete the build constraints that are always satisfied when using supported Go versions. For golang/go#60268. Change-Id: Iab4a7237a368b1ac05bb72a646501defb51503f1 Reviewed-on: https://go-review.googlesource.com/c/arch/+/534197 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Auto-Submit: Dmitri Shuralyov --- arm/armspec/spec.go | 4 ---- arm/armspec/specmap.go | 1 - arm64/arm64spec/spec.go | 3 --- go.mod | 2 +- ppc64/ppc64spec/spec.go | 3 --- ppc64/ppc64util/util.go | 1 - 6 files changed, 1 insertion(+), 13 deletions(-) diff --git a/arm/armspec/spec.go b/arm/armspec/spec.go index 108f7ed3..f755579b 100644 --- a/arm/armspec/spec.go +++ b/arm/armspec/spec.go @@ -2,10 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build go1.6 && (!386 || go1.8) -// +build go1.6 -// +build !386 go1.8 - // ... see golang.org/issue/12840 // Armspec reads the “ARM Architecture Reference Manual” diff --git a/arm/armspec/specmap.go b/arm/armspec/specmap.go index b881082a..973030f9 100644 --- a/arm/armspec/specmap.go +++ b/arm/armspec/specmap.go @@ -3,7 +3,6 @@ // license that can be found in the LICENSE file. //go:build ignore -// +build ignore package main diff --git a/arm64/arm64spec/spec.go b/arm64/arm64spec/spec.go index 08b487cd..ee784e55 100644 --- a/arm64/arm64spec/spec.go +++ b/arm64/arm64spec/spec.go @@ -2,9 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build (go1.6 && amd64) || go1.8 -// +build go1.6,amd64 go1.8 - // arm64spec reads the ``ARMv8-A Reference Manual'' // to collect instruction encoding details and writes those // details to standard output in JSON format. diff --git a/go.mod b/go.mod index d29c9298..355098da 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module golang.org/x/arch -go 1.17 +go 1.18 require rsc.io/pdf v0.1.1 diff --git a/ppc64/ppc64spec/spec.go b/ppc64/ppc64spec/spec.go index 55cb2756..4167d6dc 100644 --- a/ppc64/ppc64spec/spec.go +++ b/ppc64/ppc64spec/spec.go @@ -2,9 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build (go1.6 && amd64) || go1.8 -// +build go1.6,amd64 go1.8 - // Power64spec reads the “Power ISA V2.07” Manual // to collect instruction encoding details and writes those details to standard output // in CSV format. diff --git a/ppc64/ppc64util/util.go b/ppc64/ppc64util/util.go index b2f19103..dcb8e428 100644 --- a/ppc64/ppc64util/util.go +++ b/ppc64/ppc64util/util.go @@ -3,7 +3,6 @@ // license that can be found in the LICENSE file. //go:build ignore -// +build ignore // Generate interesting test cases from ppc64 objdump via // go run util.go From a6bdeed4930798f0aa566beb7883ab0d88dc9646 Mon Sep 17 00:00:00 2001 From: Dmitri Shuralyov Date: Wed, 11 Oct 2023 00:55:13 +0000 Subject: [PATCH 021/200] arm/armspec: remove obsolete comment I didn't realize it at the time, but this comment was referring to the build constraint that was removed in CL 534197. Updates golang/go#12840. Change-Id: I1e6694a6c05f4b2e6dbffe6488c69f9d530bdaf4 Reviewed-on: https://go-review.googlesource.com/c/arch/+/534221 Auto-Submit: Dmitri Shuralyov LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Reviewed-by: Cherry Mui --- arm/armspec/spec.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/arm/armspec/spec.go b/arm/armspec/spec.go index f755579b..60579a05 100644 --- a/arm/armspec/spec.go +++ b/arm/armspec/spec.go @@ -2,8 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// ... see golang.org/issue/12840 - // Armspec reads the “ARM Architecture Reference Manual” // to collect instruction encoding details and writes those details to standard output // in JSON format. From a85057043824df19248fb9070bc44f3403f8876e Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 5 Dec 2023 18:00:47 -0500 Subject: [PATCH 022/200] x86avxgen/testdata/xedpath: replace "INTEL CONFIDENTIAL" files These files are not really confidential - they were released in github.com/intelxed/xed with incorrect copyright notices. The copyright notices were updated in https://github.com/intelxed/xed/commit/5c538047876feecf080d9441110f81d0e67b5de8 but the files had also changed a bit by then. Replace the two mislabeled files with the latest versions, bringing in the updated Apache license as well as assorted other changes. The tests still pass, so these changes must not matter too much. Fixes golang/go#64315. [git-generate] cd x86/x86avxgen/testdata/xedpath rm -rf _xed git clone https://github.com/intelxed/xed _xed cd _xed git checkout d41e876 # "2019 copyright" cd .. echo ' e all-dec-instructions.txt /^###FILE:.*avx512-foundation-isa.xed.txt/+1;/^###FILE/-3 d /^###FILE:.*avx512-foundation-isa.xed.txt/+1r _xed/datafiles/avx512f/avx512-foundation-isa.xed.txt /^###FILE:.*skx-isa.xed.txt/+1;/^###FILE/-3 d /^###FILE:.*skx-isa.xed.txt/+1r _xed/datafiles/avx512-skx/skx-isa.xed.txt ,s/ +$//g w q ' | sam -d Change-Id: I60fb4b9a420b8962fbbdd026cb6229d55144908d Reviewed-on: https://go-review.googlesource.com/c/arch/+/547775 LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- .../testdata/xedpath/all-dec-instructions.txt | 634 +++++++++++------- 1 file changed, 385 insertions(+), 249 deletions(-) diff --git a/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt b/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt index 07cbc41b..aad0b816 100644 --- a/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt +++ b/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt @@ -11873,29 +11873,22 @@ IFORM: VPOPCNTQ_ZMMu64_MASKmskw_MEMu64_AVX512 ###FILE: ./datafiles/avx512f/avx512-foundation-isa.xed.txt - #BEGIN_LEGAL -#INTEL CONFIDENTIAL -# -#Copyright (c) 2017, Intel Corporation. All rights reserved. -# -#The source code contained or described herein and all documents -#related to the source code ("Material") are owned by Intel Corporation -#or its suppliers or licensors. Title to the Material remains with -#Intel Corporation or its suppliers and licensors. The Material -#contains trade secrets and proprietary and confidential information of -#Intel or its suppliers and licensors. The Material is protected by -#worldwide copyright and trade secret laws and treaty provisions. No -#part of the Material may be used, copied, reproduced, modified, -#published, uploaded, posted, transmitted, distributed, or disclosed in -#any way without Intel's prior express written permission. -# -#No license under any patent, copyright, trade secret or other -#intellectual property right is granted to or conferred upon you by -#disclosure or delivery of the Materials, either expressly, by -#implication, inducement, estoppel or otherwise. Any license under such -#intellectual property rights must be express and approved by Intel in -#writing. +# +#Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# #END_LEGAL # # @@ -13103,7 +13096,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f64 +IFORM: VCVTSD2SI_GPR32i32_XMMf64_AVX512 +PATTERN: EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f64 IFORM: VCVTSD2SI_GPR32i32_XMMf64_AVX512 } @@ -13117,7 +13113,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:i32:TXT=ROUNDC REG1=XMM_B3():r:dq:f64 +IFORM: VCVTSD2SI_GPR32i32_XMMf64_AVX512 +PATTERN: EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32:TXT=ROUNDC REG1=XMM_B3():r:dq:f64 IFORM: VCVTSD2SI_GPR32i32_XMMf64_AVX512 } @@ -13131,7 +13130,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q -PATTERN: EVV 0x2D VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +PATTERN: EVV 0x2D VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() not64 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +OPERANDS: REG0=GPR32_R():w:d:i32 MEM0:r:q:f64 +IFORM: VCVTSD2SI_GPR32i32_MEMf64_AVX512 +PATTERN: EVV 0x2D VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32 MEM0:r:q:f64 IFORM: VCVTSD2SI_GPR32i32_MEMf64_AVX512 } @@ -13147,7 +13149,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64 REG1=XMM_B3():r:dq:f64 IFORM: VCVTSD2SI_GPR64i64_XMMf64_AVX512 } @@ -13161,7 +13163,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64:TXT=ROUNDC REG1=XMM_B3():r:dq:f64 IFORM: VCVTSD2SI_GPR64i64_XMMf64_AVX512 } @@ -13175,7 +13177,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q -PATTERN: EVV 0x2D VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +PATTERN: EVV 0x2D VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64 MEM0:r:q:f64 IFORM: VCVTSD2SI_GPR64i64_MEMf64_AVX512 } @@ -13235,7 +13237,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f64 +IFORM: VCVTSD2USI_GPR32u32_XMMf64_AVX512 +PATTERN: EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f64 IFORM: VCVTSD2USI_GPR32u32_XMMf64_AVX512 } @@ -13249,7 +13254,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:u32:TXT=ROUNDC REG1=XMM_B3():r:dq:f64 +IFORM: VCVTSD2USI_GPR32u32_XMMf64_AVX512 +PATTERN: EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32:TXT=ROUNDC REG1=XMM_B3():r:dq:f64 IFORM: VCVTSD2USI_GPR32u32_XMMf64_AVX512 } @@ -13263,7 +13271,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q -PATTERN: EVV 0x79 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +PATTERN: EVV 0x79 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() not64 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +OPERANDS: REG0=GPR32_R():w:d:u32 MEM0:r:q:f64 +IFORM: VCVTSD2USI_GPR32u32_MEMf64_AVX512 +PATTERN: EVV 0x79 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32 MEM0:r:q:f64 IFORM: VCVTSD2USI_GPR32u32_MEMf64_AVX512 } @@ -13279,7 +13290,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64 REG1=XMM_B3():r:dq:f64 IFORM: VCVTSD2USI_GPR64u64_XMMf64_AVX512 } @@ -13293,7 +13304,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64:TXT=ROUNDC REG1=XMM_B3():r:dq:f64 IFORM: VCVTSD2USI_GPR64u64_XMMf64_AVX512 } @@ -13307,7 +13318,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q -PATTERN: EVV 0x79 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +PATTERN: EVV 0x79 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64 MEM0:r:q:f64 IFORM: VCVTSD2USI_GPR64u64_MEMf64_AVX512 } @@ -13323,7 +13334,11 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E10NF REAL_OPCODE: Y ATTRIBUTES: SIMD_SCALAR -PATTERN: EVV 0x2A VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 ZEROING=0 MASK=0 +COMMENT: Ignores rounding controls: 32b-INT-to-FP64 does not need rounding +PATTERN: EVV 0x2A VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] not64 ZEROING=0 MASK=0 +OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR32_B():r:d:i32 +IFORM: VCVTSI2SD_XMMf64_XMMf64_GPR32i32_AVX512 +PATTERN: EVV 0x2A VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] mode64 W0 ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR32_B():r:d:i32 IFORM: VCVTSI2SD_XMMf64_XMMf64_GPR32i32_AVX512 } @@ -13337,7 +13352,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E10NF REAL_OPCODE: Y ATTRIBUTES: SIMD_SCALAR DISP8_GPR_READER -PATTERN: EVV 0x2A VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() +PATTERN: EVV 0x2A VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() not64 ZEROING=0 MASK=0 BCRC=0 ESIZE_32_BITS() NELEM_GPR_READER() +OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:d:i32 +IFORM: VCVTSI2SD_XMMf64_XMMf64_MEMi32_AVX512 +PATTERN: EVV 0x2A VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() mode64 W0 ZEROING=0 MASK=0 BCRC=0 ESIZE_32_BITS() NELEM_GPR_READER() OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:d:i32 IFORM: VCVTSI2SD_XMMf64_XMMf64_MEMi32_AVX512 } @@ -13353,7 +13371,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2A VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 ZEROING=0 MASK=0 +PATTERN: EVV 0x2A VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] mode64 W1 ZEROING=0 MASK=0 BCRC=0 OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR64_B():r:q:i64 IFORM: VCVTSI2SD_XMMf64_XMMf64_GPR64i64_AVX512 } @@ -13367,7 +13385,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2A VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W1 mode64 ZEROING=0 MASK=0 +PATTERN: EVV 0x2A VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] mode64 W1 ZEROING=0 MASK=0 BCRC=1 FIX_ROUND_LEN128() AVX512_ROUND() OPERANDS: REG0=XMM_R3():w:dq:f64:TXT=ROUNDC REG1=XMM_N3():r:dq:f64 REG2=GPR64_B():r:q:i64 IFORM: VCVTSI2SD_XMMf64_XMMf64_GPR64i64_AVX512 } @@ -13381,7 +13399,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_READER -PATTERN: EVV 0x2A VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_READER() +PATTERN: EVV 0x2A VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() mode64 W1 ZEROING=0 MASK=0 BCRC=0 ESIZE_64_BITS() NELEM_GPR_READER() OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:q:i64 IFORM: VCVTSI2SD_XMMf64_XMMf64_MEMi64_AVX512 } @@ -13397,7 +13415,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 ZEROING=0 MASK=0 +PATTERN: EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] not64 ZEROING=0 MASK=0 +OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:i32 +IFORM: VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512 +PATTERN: EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W0 ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:i32 IFORM: VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512 } @@ -13411,7 +13432,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W0 ZEROING=0 MASK=0 +PATTERN: EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() not64 ZEROING=0 MASK=0 +OPERANDS: REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:i32 +IFORM: VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512 +PATTERN: EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W0 ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:i32 IFORM: VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512 } @@ -13425,7 +13449,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_READER -PATTERN: EVV 0x2A VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() +PATTERN: EVV 0x2A VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() not64 ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() +OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:d:i32 +IFORM: VCVTSI2SS_XMMf32_XMMf32_MEMi32_AVX512 +PATTERN: EVV 0x2A VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W0 ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:d:i32 IFORM: VCVTSI2SS_XMMf32_XMMf32_MEMi32_AVX512 } @@ -13441,7 +13468,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 ZEROING=0 MASK=0 +PATTERN: EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W1 ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR64_B():r:q:i64 IFORM: VCVTSI2SS_XMMf32_XMMf32_GPR64i64_AVX512 } @@ -13455,7 +13482,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W1 mode64 ZEROING=0 MASK=0 +PATTERN: EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W1 ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR64_B():r:q:i64 IFORM: VCVTSI2SS_XMMf32_XMMf32_GPR64i64_AVX512 } @@ -13469,12 +13496,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_READER -PATTERN: EVV 0x2A VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_READER() +PATTERN: EVV 0x2A VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W1 ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_READER() OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:q:i64 IFORM: VCVTSI2SS_XMMf32_XMMf32_MEMi64_AVX512 } - - # EMITTING VCVTSS2SD (VCVTSS2SD-128-1) { ICLASS: VCVTSS2SD @@ -13529,7 +13554,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f32 +IFORM: VCVTSS2SI_GPR32i32_XMMf32_AVX512 +PATTERN: EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f32 IFORM: VCVTSS2SI_GPR32i32_XMMf32_AVX512 } @@ -13543,7 +13571,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:i32:TXT=ROUNDC REG1=XMM_B3():r:dq:f32 +IFORM: VCVTSS2SI_GPR32i32_XMMf32_AVX512 +PATTERN: EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32:TXT=ROUNDC REG1=XMM_B3():r:dq:f32 IFORM: VCVTSS2SI_GPR32i32_XMMf32_AVX512 } @@ -13557,7 +13588,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR -PATTERN: EVV 0x2D VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +PATTERN: EVV 0x2D VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() not64 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +OPERANDS: REG0=GPR32_R():w:d:i32 MEM0:r:d:f32 +IFORM: VCVTSS2SI_GPR32i32_MEMf32_AVX512 +PATTERN: EVV 0x2D VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32 MEM0:r:d:f32 IFORM: VCVTSS2SI_GPR32i32_MEMf32_AVX512 } @@ -13573,7 +13607,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64 REG1=XMM_B3():r:dq:f32 IFORM: VCVTSS2SI_GPR64i64_XMMf32_AVX512 } @@ -13587,7 +13621,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64:TXT=ROUNDC REG1=XMM_B3():r:dq:f32 IFORM: VCVTSS2SI_GPR64i64_XMMf32_AVX512 } @@ -13601,7 +13635,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR -PATTERN: EVV 0x2D VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +PATTERN: EVV 0x2D VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64 MEM0:r:d:f32 IFORM: VCVTSS2SI_GPR64i64_MEMf32_AVX512 } @@ -13617,7 +13651,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f32 +IFORM: VCVTSS2USI_GPR32u32_XMMf32_AVX512 +PATTERN: EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f32 IFORM: VCVTSS2USI_GPR32u32_XMMf32_AVX512 } @@ -13631,7 +13668,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:u32:TXT=ROUNDC REG1=XMM_B3():r:dq:f32 +IFORM: VCVTSS2USI_GPR32u32_XMMf32_AVX512 +PATTERN: EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32:TXT=ROUNDC REG1=XMM_B3():r:dq:f32 IFORM: VCVTSS2USI_GPR32u32_XMMf32_AVX512 } @@ -13645,7 +13685,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR -PATTERN: EVV 0x79 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +PATTERN: EVV 0x79 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() not64 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +OPERANDS: REG0=GPR32_R():w:d:u32 MEM0:r:d:f32 +IFORM: VCVTSS2USI_GPR32u32_MEMf32_AVX512 +PATTERN: EVV 0x79 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32 MEM0:r:d:f32 IFORM: VCVTSS2USI_GPR32u32_MEMf32_AVX512 } @@ -13661,7 +13704,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64 REG1=XMM_B3():r:dq:f32 IFORM: VCVTSS2USI_GPR64u64_XMMf32_AVX512 } @@ -13675,7 +13718,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64:TXT=ROUNDC REG1=XMM_B3():r:dq:f32 IFORM: VCVTSS2USI_GPR64u64_XMMf32_AVX512 } @@ -13689,7 +13732,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR -PATTERN: EVV 0x79 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +PATTERN: EVV 0x79 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64 MEM0:r:d:f32 IFORM: VCVTSS2USI_GPR64u64_MEMf32_AVX512 } @@ -13881,7 +13924,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f64 +IFORM: VCVTTSD2SI_GPR32i32_XMMf64_AVX512 +PATTERN: EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f64 IFORM: VCVTTSD2SI_GPR32i32_XMMf64_AVX512 } @@ -13895,7 +13941,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:i32:TXT=SAESTR REG1=XMM_B3():r:dq:f64 +IFORM: VCVTTSD2SI_GPR32i32_XMMf64_AVX512 +PATTERN: EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32:TXT=SAESTR REG1=XMM_B3():r:dq:f64 IFORM: VCVTTSD2SI_GPR32i32_XMMf64_AVX512 } @@ -13909,7 +13958,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q -PATTERN: EVV 0x2C VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +PATTERN: EVV 0x2C VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() not64 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +OPERANDS: REG0=GPR32_R():w:d:i32 MEM0:r:q:f64 +IFORM: VCVTTSD2SI_GPR32i32_MEMf64_AVX512 +PATTERN: EVV 0x2C VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32 MEM0:r:q:f64 IFORM: VCVTTSD2SI_GPR32i32_MEMf64_AVX512 } @@ -13925,7 +13977,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64 REG1=XMM_B3():r:dq:f64 IFORM: VCVTTSD2SI_GPR64i64_XMMf64_AVX512 } @@ -13939,7 +13991,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64:TXT=SAESTR REG1=XMM_B3():r:dq:f64 IFORM: VCVTTSD2SI_GPR64i64_XMMf64_AVX512 } @@ -13953,7 +14005,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q -PATTERN: EVV 0x2C VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +PATTERN: EVV 0x2C VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64 MEM0:r:q:f64 IFORM: VCVTTSD2SI_GPR64i64_MEMf64_AVX512 } @@ -13969,7 +14021,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f64 +IFORM: VCVTTSD2USI_GPR32u32_XMMf64_AVX512 +PATTERN: EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f64 IFORM: VCVTTSD2USI_GPR32u32_XMMf64_AVX512 } @@ -13983,7 +14038,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:u32:TXT=SAESTR REG1=XMM_B3():r:dq:f64 +IFORM: VCVTTSD2USI_GPR32u32_XMMf64_AVX512 +PATTERN: EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32:TXT=SAESTR REG1=XMM_B3():r:dq:f64 IFORM: VCVTTSD2USI_GPR32u32_XMMf64_AVX512 } @@ -13997,7 +14055,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q -PATTERN: EVV 0x78 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +PATTERN: EVV 0x78 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() not64 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +OPERANDS: REG0=GPR32_R():w:d:u32 MEM0:r:q:f64 +IFORM: VCVTTSD2USI_GPR32u32_MEMf64_AVX512 +PATTERN: EVV 0x78 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32 MEM0:r:q:f64 IFORM: VCVTTSD2USI_GPR32u32_MEMf64_AVX512 } @@ -14013,7 +14074,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64 REG1=XMM_B3():r:dq:f64 IFORM: VCVTTSD2USI_GPR64u64_XMMf64_AVX512 } @@ -14027,7 +14088,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64:TXT=SAESTR REG1=XMM_B3():r:dq:f64 IFORM: VCVTTSD2USI_GPR64u64_XMMf64_AVX512 } @@ -14041,7 +14102,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q -PATTERN: EVV 0x78 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() +PATTERN: EVV 0x78 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64 MEM0:r:q:f64 IFORM: VCVTTSD2USI_GPR64u64_MEMf64_AVX512 } @@ -14057,7 +14118,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f32 +IFORM: VCVTTSS2SI_GPR32i32_XMMf32_AVX512 +PATTERN: EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f32 IFORM: VCVTTSS2SI_GPR32i32_XMMf32_AVX512 } @@ -14071,7 +14135,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:i32:TXT=SAESTR REG1=XMM_B3():r:dq:f32 +IFORM: VCVTTSS2SI_GPR32i32_XMMf32_AVX512 +PATTERN: EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32:TXT=SAESTR REG1=XMM_B3():r:dq:f32 IFORM: VCVTTSS2SI_GPR32i32_XMMf32_AVX512 } @@ -14085,7 +14152,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR -PATTERN: EVV 0x2C VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +PATTERN: EVV 0x2C VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() not64 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +OPERANDS: REG0=GPR32_R():w:d:i32 MEM0:r:d:f32 +IFORM: VCVTTSS2SI_GPR32i32_MEMf32_AVX512 +PATTERN: EVV 0x2C VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:i32 MEM0:r:d:f32 IFORM: VCVTTSS2SI_GPR32i32_MEMf32_AVX512 } @@ -14101,7 +14171,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64 REG1=XMM_B3():r:dq:f32 IFORM: VCVTTSS2SI_GPR64i64_XMMf32_AVX512 } @@ -14115,7 +14185,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64:TXT=SAESTR REG1=XMM_B3():r:dq:f32 IFORM: VCVTTSS2SI_GPR64i64_XMMf32_AVX512 } @@ -14129,7 +14199,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR -PATTERN: EVV 0x2C VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +PATTERN: EVV 0x2C VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:i64 MEM0:r:d:f32 IFORM: VCVTTSS2SI_GPR64i64_MEMf32_AVX512 } @@ -14145,7 +14215,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f32 +IFORM: VCVTTSS2USI_GPR32u32_XMMf32_AVX512 +PATTERN: EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f32 IFORM: VCVTTSS2USI_GPR32u32_XMMf32_AVX512 } @@ -14159,7 +14232,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_R():w:d:u32:TXT=SAESTR REG1=XMM_B3():r:dq:f32 +IFORM: VCVTTSS2USI_GPR32u32_XMMf32_AVX512 +PATTERN: EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() mode64 W0 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32:TXT=SAESTR REG1=XMM_B3():r:dq:f32 IFORM: VCVTTSS2USI_GPR32u32_XMMf32_AVX512 } @@ -14173,7 +14249,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR -PATTERN: EVV 0x78 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +PATTERN: EVV 0x78 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() not64 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +OPERANDS: REG0=GPR32_R():w:d:u32 MEM0:r:d:f32 +IFORM: VCVTTSS2USI_GPR32u32_MEMf32_AVX512 +PATTERN: EVV 0x78 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE OPERANDS: REG0=GPR32_R():w:d:u32 MEM0:r:d:f32 IFORM: VCVTTSS2USI_GPR32u32_MEMf32_AVX512 } @@ -14189,7 +14268,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64 REG1=XMM_B3():r:dq:f32 IFORM: VCVTTSS2USI_GPR64u64_XMMf32_AVX512 } @@ -14203,7 +14282,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() W1 mode64 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE() mode64 W1 NOEVSR ZEROING=0 MASK=0 EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64:TXT=SAESTR REG1=XMM_B3():r:dq:f32 IFORM: VCVTTSS2USI_GPR64u64_XMMf32_AVX512 } @@ -14217,7 +14296,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR -PATTERN: EVV 0x78 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() +PATTERN: EVV 0x78 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE OPERANDS: REG0=GPR64_R():w:q:u64 MEM0:r:d:f32 IFORM: VCVTTSS2USI_GPR64u64_MEMf32_AVX512 } @@ -14307,7 +14386,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E10NF REAL_OPCODE: Y ATTRIBUTES: SIMD_SCALAR -PATTERN: EVV 0x7B VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 ZEROING=0 MASK=0 +PATTERN: EVV 0x7B VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] not64 ZEROING=0 MASK=0 +OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR32_B():r:d:u32 +IFORM: VCVTUSI2SD_XMMf64_XMMf64_GPR32u32_AVX512 +PATTERN: EVV 0x7B VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] mode64 W0 ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR32_B():r:d:u32 IFORM: VCVTUSI2SD_XMMf64_XMMf64_GPR32u32_AVX512 } @@ -14321,7 +14403,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E10NF REAL_OPCODE: Y ATTRIBUTES: SIMD_SCALAR DISP8_GPR_READER -PATTERN: EVV 0x7B VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() +PATTERN: EVV 0x7B VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() not64 ZEROING=0 MASK=0 BCRC=0 ESIZE_32_BITS() NELEM_GPR_READER() +OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:d:u32 +IFORM: VCVTUSI2SD_XMMf64_XMMf64_MEMu32_AVX512 +PATTERN: EVV 0x7B VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() mode64 W0 ZEROING=0 MASK=0 BCRC=0 ESIZE_32_BITS() NELEM_GPR_READER() OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:d:u32 IFORM: VCVTUSI2SD_XMMf64_XMMf64_MEMu32_AVX512 } @@ -14337,7 +14422,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x7B VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 ZEROING=0 MASK=0 +PATTERN: EVV 0x7B VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] mode64 W1 ZEROING=0 MASK=0 BCRC=0 OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR64_B():r:q:u64 IFORM: VCVTUSI2SD_XMMf64_XMMf64_GPR64u64_AVX512 } @@ -14351,7 +14436,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x7B VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W1 mode64 ZEROING=0 MASK=0 +PATTERN: EVV 0x7B VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] mode64 W1 ZEROING=0 MASK=0 BCRC=1 FIX_ROUND_LEN128() AVX512_ROUND() OPERANDS: REG0=XMM_R3():w:dq:f64:TXT=ROUNDC REG1=XMM_N3():r:dq:f64 REG2=GPR64_B():r:q:u64 IFORM: VCVTUSI2SD_XMMf64_XMMf64_GPR64u64_AVX512 } @@ -14365,7 +14450,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_READER -PATTERN: EVV 0x7B VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_READER() +PATTERN: EVV 0x7B VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() mode64 W1 ZEROING=0 MASK=0 BCRC=0 ESIZE_64_BITS() NELEM_GPR_READER() OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:q:u64 IFORM: VCVTUSI2SD_XMMf64_XMMf64_MEMu64_AVX512 } @@ -14381,7 +14466,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W0 ZEROING=0 MASK=0 +PATTERN: EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] not64 ZEROING=0 MASK=0 +OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:u32 +IFORM: VCVTUSI2SS_XMMf32_XMMf32_GPR32u32_AVX512 +PATTERN: EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W0 ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:u32 IFORM: VCVTUSI2SS_XMMf32_XMMf32_GPR32u32_AVX512 } @@ -14395,7 +14483,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W0 ZEROING=0 MASK=0 +PATTERN: EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() not64 ZEROING=0 MASK=0 +OPERANDS: REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:u32 +IFORM: VCVTUSI2SS_XMMf32_XMMf32_GPR32u32_AVX512 +PATTERN: EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W0 ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:u32 IFORM: VCVTUSI2SS_XMMf32_XMMf32_GPR32u32_AVX512 } @@ -14409,7 +14500,10 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_READER -PATTERN: EVV 0x7B VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W0 ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() +PATTERN: EVV 0x7B VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() not64 ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() +OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:d:u32 +IFORM: VCVTUSI2SS_XMMf32_XMMf32_MEMu32_AVX512 +PATTERN: EVV 0x7B VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W0 ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:d:u32 IFORM: VCVTUSI2SS_XMMf32_XMMf32_MEMu32_AVX512 } @@ -14425,7 +14519,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 mode64 ZEROING=0 MASK=0 +PATTERN: EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] mode64 W1 ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR64_B():r:q:u64 IFORM: VCVTUSI2SS_XMMf32_XMMf32_GPR64u64_AVX512 } @@ -14439,7 +14533,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR -PATTERN: EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() W1 mode64 ZEROING=0 MASK=0 +PATTERN: EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND() mode64 W1 ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR64_B():r:q:u64 IFORM: VCVTUSI2SS_XMMf32_XMMf32_GPR64u64_AVX512 } @@ -14453,7 +14547,7 @@ ISA_SET: AVX512F_SCALAR EXCEPTIONS: AVX512-E3NF REAL_OPCODE: Y ATTRIBUTES: MXCSR SIMD_SCALAR DISP8_GPR_READER -PATTERN: EVV 0x7B VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 mode64 ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_READER() +PATTERN: EVV 0x7B VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() mode64 W1 ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_GPR_READER() OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:q:u64 IFORM: VCVTUSI2SS_XMMf32_XMMf32_MEMu64_AVX512 } @@ -18722,7 +18816,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y -PATTERN: EVV 0x6E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x6E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=GPR32_B():r:d:u32 +IFORM: VMOVD_XMMu32_GPR32u32_AVX512 +PATTERN: EVV 0x6E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 mode64 W0 NOEVSR ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=GPR32_B():r:d:u32 IFORM: VMOVD_XMMu32_GPR32u32_AVX512 } @@ -18736,7 +18833,10 @@ ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_READER -PATTERN: EVV 0x6E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() +PATTERN: EVV 0x6E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 not64 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() +OPERANDS: REG0=XMM_R3():w:dq:u32 MEM0:r:d:u32 +IFORM: VMOVD_XMMu32_MEMu32_AVX512 +PATTERN: EVV 0x6E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 mode64 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_READER() OPERANDS: REG0=XMM_R3():w:dq:u32 MEM0:r:d:u32 IFORM: VMOVD_XMMu32_MEMu32_AVX512 } @@ -18751,7 +18851,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y -PATTERN: EVV 0x7E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W0 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x7E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 not64 NOEVSR ZEROING=0 MASK=0 +OPERANDS: REG0=GPR32_B():w:d:u32 REG1=XMM_R3():r:dq:u32 +IFORM: VMOVD_GPR32u32_XMMu32_AVX512 +PATTERN: EVV 0x7E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 mode64 W0 NOEVSR ZEROING=0 MASK=0 OPERANDS: REG0=GPR32_B():w:d:u32 REG1=XMM_R3():r:dq:u32 IFORM: VMOVD_GPR32u32_XMMu32_AVX512 } @@ -18765,7 +18868,10 @@ ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_WRITER_STORE -PATTERN: EVV 0x7E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_STORE() +PATTERN: EVV 0x7E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 not64 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_STORE() +OPERANDS: MEM0:w:d:u32 REG0=XMM_R3():r:dq:u32 +IFORM: VMOVD_MEMu32_XMMu32_AVX512 +PATTERN: EVV 0x7E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 mode64 W0 NOEVSR ZEROING=0 MASK=0 ESIZE_32_BITS() NELEM_GPR_WRITER_STORE() OPERANDS: MEM0:w:d:u32 REG0=XMM_R3():r:dq:u32 IFORM: VMOVD_MEMu32_XMMu32_AVX512 } @@ -19074,7 +19180,7 @@ ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y ATTRIBUTES: DISP8_SCALAR -PATTERN: EVV 0x16 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() +PATTERN: EVV 0x16 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W1 ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:q:f64 MEM0:r:q:f64 IFORM: VMOVHPD_XMMf64_XMMf64_MEMf64_AVX512 } @@ -19090,7 +19196,7 @@ ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y ATTRIBUTES: DISP8_SCALAR -PATTERN: EVV 0x17 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() +PATTERN: EVV 0x17 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() OPERANDS: MEM0:w:q:f64 REG0=XMM_R3():r:dq:f64 IFORM: VMOVHPD_MEMf64_XMMf64_AVX512 } @@ -19153,7 +19259,7 @@ ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y ATTRIBUTES: DISP8_SCALAR -PATTERN: EVV 0x12 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() +PATTERN: EVV 0x12 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W1 ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:q:f64 IFORM: VMOVLPD_XMMf64_XMMf64_MEMf64_AVX512 } @@ -19169,7 +19275,7 @@ ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y ATTRIBUTES: DISP8_SCALAR -PATTERN: EVV 0x13 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() +PATTERN: EVV 0x13 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() OPERANDS: MEM0:w:q:f64 REG0=XMM_R3():r:q:f64 IFORM: VMOVLPD_MEMf64_XMMf64_AVX512 } @@ -19338,7 +19444,7 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y -PATTERN: EVV 0x7E VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0x7E VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W1 NOEVSR ZEROING=0 MASK=0 OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=XMM_B3():r:dq:u64 IFORM: VMOVQ_XMMu64_XMMu64_AVX512 } @@ -19352,7 +19458,7 @@ ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y ATTRIBUTES: DISP8_SCALAR -PATTERN: EVV 0x7E VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() +PATTERN: EVV 0x7E VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() OPERANDS: REG0=XMM_R3():w:dq:u64 MEM0:r:q:u64 IFORM: VMOVQ_XMMu64_MEMu64_AVX512 } @@ -19367,7 +19473,7 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y -PATTERN: EVV 0xD6 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] W1 NOEVSR ZEROING=0 MASK=0 +PATTERN: EVV 0xD6 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W1 NOEVSR ZEROING=0 MASK=0 OPERANDS: REG0=XMM_B3():w:dq:u64 REG1=XMM_R3():r:dq:u64 IFORM: VMOVQ_XMMu64_XMMu64_AVX512 } @@ -19381,7 +19487,7 @@ ISA_SET: AVX512F_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y ATTRIBUTES: DISP8_SCALAR -PATTERN: EVV 0xD6 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() +PATTERN: EVV 0xD6 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W1 NOEVSR ZEROING=0 MASK=0 ESIZE_64_BITS() NELEM_SCALAR() OPERANDS: MEM0:w:q:u64 REG0=XMM_R3():r:dq:u64 IFORM: VMOVQ_MEMu64_XMMu64_AVX512 } @@ -20217,7 +20323,10 @@ ISA_SET: AVX512F_512 EXCEPTIONS: AVX512-E7NM REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX -PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W0 NOEVSR +PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 not64 NOEVSR +OPERANDS: REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO16_32 +IFORM: VPBROADCASTD_ZMMu32_MASKmskw_GPR32u32_AVX512 +PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 mode64 W0 NOEVSR OPERANDS: REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO16_32 IFORM: VPBROADCASTD_ZMMu32_MASKmskw_GPR32u32_AVX512 } @@ -20265,7 +20374,7 @@ ISA_SET: AVX512F_512 EXCEPTIONS: AVX512-E7NM REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX -PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W1 mode64 NOEVSR +PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 mode64 W1 NOEVSR OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR64_B():r:q:u64 EMX_BROADCAST_1TO8_64 IFORM: VPBROADCASTQ_ZMMu64_MASKmskw_GPR64u64_AVX512 } @@ -22272,9 +22381,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_512 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x28 V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W1 -OPERANDS: REG0=ZMM_R3():w:zi64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zi32 REG3=ZMM_B3():r:zi32 +OPERANDS: REG0=ZMM_R3():w:zi64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zi64 REG3=ZMM_B3():r:zi64 IFORM: VPMULDQ_ZMMi64_MASKmskw_ZMMi32_ZMMi32_AVX512 } @@ -22286,9 +22396,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_512 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX PATTERN: EVV 0x28 V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL512 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=ZMM_R3():w:zi64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zi32 MEM0:r:vv:i32:TXT=BCASTSTR +OPERANDS: REG0=ZMM_R3():w:zi64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zi64 MEM0:r:vv:i64:TXT=BCASTSTR IFORM: VPMULDQ_ZMMi64_MASKmskw_ZMMi32_MEMi32_AVX512 } @@ -22332,9 +22443,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_512 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0xF4 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W1 -OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 REG3=ZMM_B3():r:zu32 +OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 REG3=ZMM_B3():r:zu64 IFORM: VPMULUDQ_ZMMu64_MASKmskw_ZMMu32_ZMMu32_AVX512 } @@ -22346,9 +22458,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_512 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX PATTERN: EVV 0xF4 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL512 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 MEM0:r:vv:u32:TXT=BCASTSTR +OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 MEM0:r:vv:u64:TXT=BCASTSTR IFORM: VPMULUDQ_ZMMu64_MASKmskw_ZMMu32_MEMu32_AVX512 } @@ -25350,7 +25463,6 @@ IFORM: KXORW_MASKmskw_MASKmskw_MASKmskw_AVX512 - ###FILE: ./datafiles/avx512cd/vconflict-isa.xed.txt #BEGIN_LEGAL @@ -25533,29 +25645,22 @@ IFORM: VPLZCNTQ_ZMMu64_MASKmskw_MEMu64_AVX512CD ###FILE: ./datafiles/avx512-skx/skx-isa.xed.txt - #BEGIN_LEGAL -#INTEL CONFIDENTIAL -# -#Copyright (c) 2017, Intel Corporation. All rights reserved. -# -#The source code contained or described herein and all documents -#related to the source code ("Material") are owned by Intel Corporation -#or its suppliers or licensors. Title to the Material remains with -#Intel Corporation or its suppliers and licensors. The Material -#contains trade secrets and proprietary and confidential information of -#Intel or its suppliers and licensors. The Material is protected by -#worldwide copyright and trade secret laws and treaty provisions. No -#part of the Material may be used, copied, reproduced, modified, -#published, uploaded, posted, transmitted, distributed, or disclosed in -#any way without Intel's prior express written permission. -# -#No license under any patent, copyright, trade secret or other -#intellectual property right is granted to or conferred upon you by -#disclosure or delivery of the Materials, either expressly, by -#implication, inducement, estoppel or otherwise. Any license under such -#intellectual property rights must be express and approved by Intel in -#writing. +# +#Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# #END_LEGAL # # @@ -25818,8 +25923,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x55 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W1 -OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 REG3=XMM_B3():r:dq:f64 -IFORM: VANDNPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 REG3=XMM_B3():r:dq:u64 +IFORM: VANDNPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512 } { @@ -25832,8 +25937,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x55 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VANDNPD_XMMf64_MASKmskw_XMMf64_MEMf64_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VANDNPD_XMMu64_MASKmskw_XMMu64_MEMu64_AVX512 } @@ -25848,8 +25953,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x55 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W1 -OPERANDS: REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 REG3=YMM_B3():r:qq:f64 -IFORM: VANDNPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 REG3=YMM_B3():r:qq:u64 +IFORM: VANDNPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512 } { @@ -25862,8 +25967,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x55 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL256 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VANDNPD_YMMf64_MASKmskw_YMMf64_MEMf64_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VANDNPD_YMMu64_MASKmskw_YMMu64_MEMu64_AVX512 } @@ -25878,8 +25983,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x55 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W1 -OPERANDS: REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 REG3=ZMM_B3():r:zf64 -IFORM: VANDNPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 REG3=ZMM_B3():r:zu64 +IFORM: VANDNPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512 } { @@ -25892,8 +25997,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x55 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL512 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VANDNPD_ZMMf64_MASKmskw_ZMMf64_MEMf64_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VANDNPD_ZMMu64_MASKmskw_ZMMu64_MEMu64_AVX512 } @@ -25908,8 +26013,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x55 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W0 -OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 REG3=XMM_B3():r:dq:f32 -IFORM: VANDNPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 REG3=XMM_B3():r:dq:u32 +IFORM: VANDNPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512 } { @@ -25922,8 +26027,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x55 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VANDNPS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VANDNPS_XMMu32_MASKmskw_XMMu32_MEMu32_AVX512 } @@ -25938,8 +26043,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x55 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W0 -OPERANDS: REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 REG3=YMM_B3():r:qq:f32 -IFORM: VANDNPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 REG3=YMM_B3():r:qq:u32 +IFORM: VANDNPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512 } { @@ -25952,8 +26057,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x55 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL256 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VANDNPS_YMMf32_MASKmskw_YMMf32_MEMf32_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VANDNPS_YMMu32_MASKmskw_YMMu32_MEMu32_AVX512 } @@ -25968,8 +26073,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x55 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W0 -OPERANDS: REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 REG3=ZMM_B3():r:zf32 -IFORM: VANDNPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 REG3=ZMM_B3():r:zu32 +IFORM: VANDNPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512 } { @@ -25982,8 +26087,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x55 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL512 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VANDNPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VANDNPS_ZMMu32_MASKmskw_ZMMu32_MEMu32_AVX512 } @@ -25998,8 +26103,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x54 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W1 -OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 REG3=XMM_B3():r:dq:f64 -IFORM: VANDPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 REG3=XMM_B3():r:dq:u64 +IFORM: VANDPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512 } { @@ -26012,8 +26117,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x54 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VANDPD_XMMf64_MASKmskw_XMMf64_MEMf64_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VANDPD_XMMu64_MASKmskw_XMMu64_MEMu64_AVX512 } @@ -26028,8 +26133,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x54 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W1 -OPERANDS: REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 REG3=YMM_B3():r:qq:f64 -IFORM: VANDPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 REG3=YMM_B3():r:qq:u64 +IFORM: VANDPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512 } { @@ -26042,8 +26147,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x54 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL256 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VANDPD_YMMf64_MASKmskw_YMMf64_MEMf64_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VANDPD_YMMu64_MASKmskw_YMMu64_MEMu64_AVX512 } @@ -26058,8 +26163,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x54 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W1 -OPERANDS: REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 REG3=ZMM_B3():r:zf64 -IFORM: VANDPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 REG3=ZMM_B3():r:zu64 +IFORM: VANDPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512 } { @@ -26072,8 +26177,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x54 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL512 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VANDPD_ZMMf64_MASKmskw_ZMMf64_MEMf64_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VANDPD_ZMMu64_MASKmskw_ZMMu64_MEMu64_AVX512 } @@ -26088,8 +26193,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x54 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W0 -OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 REG3=XMM_B3():r:dq:f32 -IFORM: VANDPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 REG3=XMM_B3():r:dq:u32 +IFORM: VANDPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512 } { @@ -26102,8 +26207,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x54 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VANDPS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VANDPS_XMMu32_MASKmskw_XMMu32_MEMu32_AVX512 } @@ -26118,8 +26223,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x54 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W0 -OPERANDS: REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 REG3=YMM_B3():r:qq:f32 -IFORM: VANDPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 REG3=YMM_B3():r:qq:u32 +IFORM: VANDPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512 } { @@ -26132,8 +26237,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x54 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL256 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VANDPS_YMMf32_MASKmskw_YMMf32_MEMf32_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VANDPS_YMMu32_MASKmskw_YMMu32_MEMu32_AVX512 } @@ -26148,8 +26253,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x54 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W0 -OPERANDS: REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 REG3=ZMM_B3():r:zf32 -IFORM: VANDPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 REG3=ZMM_B3():r:zu32 +IFORM: VANDPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512 } { @@ -26162,8 +26267,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x54 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL512 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VANDPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VANDPS_ZMMu32_MASKmskw_ZMMu32_MEMu32_AVX512 } @@ -34886,8 +34991,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x56 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W1 -OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 REG3=XMM_B3():r:dq:f64 -IFORM: VORPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 REG3=XMM_B3():r:dq:u64 +IFORM: VORPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512 } { @@ -34900,8 +35005,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x56 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VORPD_XMMf64_MASKmskw_XMMf64_MEMf64_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VORPD_XMMu64_MASKmskw_XMMu64_MEMu64_AVX512 } @@ -34916,8 +35021,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x56 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W1 -OPERANDS: REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 REG3=YMM_B3():r:qq:f64 -IFORM: VORPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 REG3=YMM_B3():r:qq:u64 +IFORM: VORPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512 } { @@ -34930,8 +35035,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x56 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL256 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VORPD_YMMf64_MASKmskw_YMMf64_MEMf64_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VORPD_YMMu64_MASKmskw_YMMu64_MEMu64_AVX512 } @@ -34946,8 +35051,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x56 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W1 -OPERANDS: REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 REG3=ZMM_B3():r:zf64 -IFORM: VORPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 REG3=ZMM_B3():r:zu64 +IFORM: VORPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512 } { @@ -34960,8 +35065,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x56 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL512 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VORPD_ZMMf64_MASKmskw_ZMMf64_MEMf64_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VORPD_ZMMu64_MASKmskw_ZMMu64_MEMu64_AVX512 } @@ -34976,8 +35081,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x56 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W0 -OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 REG3=XMM_B3():r:dq:f32 -IFORM: VORPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 REG3=XMM_B3():r:dq:u32 +IFORM: VORPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512 } { @@ -34990,8 +35095,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x56 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VORPS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VORPS_XMMu32_MASKmskw_XMMu32_MEMu32_AVX512 } @@ -35006,8 +35111,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x56 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W0 -OPERANDS: REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 REG3=YMM_B3():r:qq:f32 -IFORM: VORPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 REG3=YMM_B3():r:qq:u32 +IFORM: VORPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512 } { @@ -35020,8 +35125,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x56 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL256 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VORPS_YMMf32_MASKmskw_YMMf32_MEMf32_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VORPS_YMMu32_MASKmskw_YMMu32_MEMu32_AVX512 } @@ -35036,8 +35141,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x56 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W0 -OPERANDS: REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 REG3=ZMM_B3():r:zf32 -IFORM: VORPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 REG3=ZMM_B3():r:zu32 +IFORM: VORPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512 } { @@ -35050,8 +35155,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x56 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL512 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VORPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VORPS_ZMMu32_MASKmskw_ZMMu32_MEMu32_AVX512 } @@ -37365,7 +37470,10 @@ ISA_SET: AVX512F_128 EXCEPTIONS: AVX512-E7NM REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX -PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W0 NOEVSR +PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 not64 NOEVSR +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO4_32 +IFORM: VPBROADCASTD_XMMu32_MASKmskw_GPR32u32_AVX512 +PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 mode64 W0 NOEVSR OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO4_32 IFORM: VPBROADCASTD_XMMu32_MASKmskw_GPR32u32_AVX512 } @@ -37413,7 +37521,10 @@ ISA_SET: AVX512F_256 EXCEPTIONS: AVX512-E7NM REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX -PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W0 NOEVSR +PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 not64 NOEVSR +OPERANDS: REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO8_32 +IFORM: VPBROADCASTD_YMMu32_MASKmskw_GPR32u32_AVX512 +PATTERN: EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 mode64 W0 NOEVSR OPERANDS: REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO8_32 IFORM: VPBROADCASTD_YMMu32_MASKmskw_GPR32u32_AVX512 } @@ -40497,7 +40608,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512DQ_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y -PATTERN: EVV 0x16 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W0 NOEVSR ZEROING=0 MASK=0 UIMM8() +PATTERN: EVV 0x16 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 not64 NOEVSR ZEROING=0 MASK=0 UIMM8() +OPERANDS: REG0=GPR32_B():w:d:u32 REG1=XMM_R3():r:dq:u32 IMM0:r:b +IFORM: VPEXTRD_GPR32u32_XMMu32_IMM8_AVX512 +PATTERN: EVV 0x16 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 mode64 W0 NOEVSR ZEROING=0 MASK=0 UIMM8() OPERANDS: REG0=GPR32_B():w:d:u32 REG1=XMM_R3():r:dq:u32 IMM0:r:b IFORM: VPEXTRD_GPR32u32_XMMu32_IMM8_AVX512 } @@ -40511,7 +40625,10 @@ ISA_SET: AVX512DQ_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_WRITER_STORE -PATTERN: EVV 0x16 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W0 NOEVSR ZEROING=0 MASK=0 UIMM8() ESIZE_32_BITS() NELEM_GPR_WRITER_STORE() +PATTERN: EVV 0x16 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 not64 NOEVSR ZEROING=0 MASK=0 UIMM8() ESIZE_32_BITS() NELEM_GPR_WRITER_STORE() +OPERANDS: MEM0:w:d:u32 REG0=XMM_R3():r:dq:u32 IMM0:r:b +IFORM: VPEXTRD_MEMu32_XMMu32_IMM8_AVX512 +PATTERN: EVV 0x16 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 mode64 W0 NOEVSR ZEROING=0 MASK=0 UIMM8() ESIZE_32_BITS() NELEM_GPR_WRITER_STORE() OPERANDS: MEM0:w:d:u32 REG0=XMM_R3():r:dq:u32 IMM0:r:b IFORM: VPEXTRD_MEMu32_XMMu32_IMM8_AVX512 } @@ -40577,16 +40694,22 @@ IFORM: VPEXTRW_MEMu16_XMMu16_IMM8_AVX512 # EMITTING VPEXTRW (VPEXTRW-128-2) { -ICLASS: VPEXTRW +ICLASS: VPEXTRW_C5 +DISASM: vpextrw CPL: 3 CATEGORY: AVX512 EXTENSION: AVX512EVEX ISA_SET: AVX512BW_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y -PATTERN: EVV 0xC5 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 NOEVSR ZEROING=0 MASK=0 UIMM8() + +PATTERN: EVV 0xC5 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 NOEVSR ZEROING=0 MASK=0 UIMM8() not64 OPERANDS: REG0=GPR32_R():w:d:u16 REG1=XMM_B3():r:dq:u16 IMM0:r:b -IFORM: VPEXTRW_GPR32u16_XMMu16_IMM8_AVX512 +IFORM: VPEXTRW_GPR32u16_XMMu16_IMM8_AVX512_C5 + +PATTERN: EVV 0xC5 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 NOEVSR ZEROING=0 MASK=0 UIMM8() mode64 EVEXRR_ONE +OPERANDS: REG0=GPR32_R():w:d:u16 REG1=XMM_B3():r:dq:u16 IMM0:r:b +IFORM: VPEXTRW_GPR32u16_XMMu16_IMM8_AVX512_C5 } @@ -40756,7 +40879,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512DQ_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y -PATTERN: EVV 0x22 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W0 ZEROING=0 MASK=0 UIMM8() +PATTERN: EVV 0x22 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 not64 ZEROING=0 MASK=0 UIMM8() +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=XMM_N3():r:dq:u32 REG2=GPR32_B():r:d:u32 IMM0:r:b +IFORM: VPINSRD_XMMu32_XMMu32_GPR32u32_IMM8_AVX512 +PATTERN: EVV 0x22 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 mode64 W0 ZEROING=0 MASK=0 UIMM8() OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=XMM_N3():r:dq:u32 REG2=GPR32_B():r:d:u32 IMM0:r:b IFORM: VPINSRD_XMMu32_XMMu32_GPR32u32_IMM8_AVX512 } @@ -40770,7 +40896,10 @@ ISA_SET: AVX512DQ_128N EXCEPTIONS: AVX512-E9NF REAL_OPCODE: Y ATTRIBUTES: DISP8_GPR_READER -PATTERN: EVV 0x22 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W0 ZEROING=0 MASK=0 UIMM8() ESIZE_32_BITS() NELEM_GPR_READER() +PATTERN: EVV 0x22 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 not64 ZEROING=0 MASK=0 UIMM8() ESIZE_32_BITS() NELEM_GPR_READER() +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=XMM_N3():r:dq:u32 MEM0:r:d:u32 IMM0:r:b +IFORM: VPINSRD_XMMu32_XMMu32_MEMu32_IMM8_AVX512 +PATTERN: EVV 0x22 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 mode64 W0 ZEROING=0 MASK=0 UIMM8() ESIZE_32_BITS() NELEM_GPR_READER() OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=XMM_N3():r:dq:u32 MEM0:r:d:u32 IMM0:r:b IFORM: VPINSRD_XMMu32_XMMu32_MEMu32_IMM8_AVX512 } @@ -44731,9 +44860,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_128 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x28 V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W1 -OPERANDS: REG0=XMM_R3():w:dq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:i32 REG3=XMM_B3():r:dq:i32 +OPERANDS: REG0=XMM_R3():w:dq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:i64 REG3=XMM_B3():r:dq:i64 IFORM: VPMULDQ_XMMi64_MASKmskw_XMMi32_XMMi32_AVX512 } @@ -44745,9 +44875,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_128 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX PATTERN: EVV 0x28 V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=XMM_R3():w:dq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:i32 MEM0:r:vv:i32:TXT=BCASTSTR +OPERANDS: REG0=XMM_R3():w:dq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:i64 MEM0:r:vv:i64:TXT=BCASTSTR IFORM: VPMULDQ_XMMi64_MASKmskw_XMMi32_MEMi32_AVX512 } @@ -44761,9 +44892,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_256 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x28 V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W1 -OPERANDS: REG0=YMM_R3():w:qq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:i32 REG3=YMM_B3():r:qq:i32 +OPERANDS: REG0=YMM_R3():w:qq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:i64 REG3=YMM_B3():r:qq:i64 IFORM: VPMULDQ_YMMi64_MASKmskw_YMMi32_YMMi32_AVX512 } @@ -44775,9 +44907,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_256 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX PATTERN: EVV 0x28 V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL256 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=YMM_R3():w:qq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:i32 MEM0:r:vv:i32:TXT=BCASTSTR +OPERANDS: REG0=YMM_R3():w:qq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:i64 MEM0:r:vv:i64:TXT=BCASTSTR IFORM: VPMULDQ_YMMi64_MASKmskw_YMMi32_MEMi32_AVX512 } @@ -45301,9 +45434,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_128 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0xF4 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W1 -OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 REG3=XMM_B3():r:dq:u32 +OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 REG3=XMM_B3():r:dq:u64 IFORM: VPMULUDQ_XMMu64_MASKmskw_XMMu32_XMMu32_AVX512 } @@ -45315,9 +45449,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_128 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX PATTERN: EVV 0xF4 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 MEM0:r:vv:u32:TXT=BCASTSTR +OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 MEM0:r:vv:u64:TXT=BCASTSTR IFORM: VPMULUDQ_XMMu64_MASKmskw_XMMu32_MEMu32_AVX512 } @@ -45331,9 +45466,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_256 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0xF4 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W1 -OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 REG3=YMM_B3():r:qq:u32 +OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 REG3=YMM_B3():r:qq:u64 IFORM: VPMULUDQ_YMMu64_MASKmskw_YMMu32_YMMu32_AVX512 } @@ -45345,9 +45481,10 @@ EXTENSION: AVX512EVEX ISA_SET: AVX512F_256 EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y +COMMENT: Strange instruction that uses 32b of each 64b input element ATTRIBUTES: MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX PATTERN: EVV 0xF4 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL256 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 MEM0:r:vv:u32:TXT=BCASTSTR +OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 MEM0:r:vv:u64:TXT=BCASTSTR IFORM: VPMULUDQ_YMMu64_MASKmskw_YMMu32_MEMu32_AVX512 } @@ -52592,8 +52729,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x57 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W1 -OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 REG3=XMM_B3():r:dq:f64 -IFORM: VXORPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 REG3=XMM_B3():r:dq:u64 +IFORM: VXORPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512 } { @@ -52606,8 +52743,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x57 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VXORPD_XMMf64_MASKmskw_XMMf64_MEMf64_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VXORPD_XMMu64_MASKmskw_XMMu64_MEMu64_AVX512 } @@ -52622,8 +52759,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x57 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W1 -OPERANDS: REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 REG3=YMM_B3():r:qq:f64 -IFORM: VXORPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 REG3=YMM_B3():r:qq:u64 +IFORM: VXORPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512 } { @@ -52636,8 +52773,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x57 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL256 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VXORPD_YMMf64_MASKmskw_YMMf64_MEMf64_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VXORPD_YMMu64_MASKmskw_YMMu64_MEMu64_AVX512 } @@ -52652,8 +52789,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x57 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W1 -OPERANDS: REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 REG3=ZMM_B3():r:zf64 -IFORM: VXORPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 REG3=ZMM_B3():r:zu64 +IFORM: VXORPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512 } { @@ -52666,8 +52803,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x57 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL512 W1 ESIZE_64_BITS() NELEM_FULL() -OPERANDS: REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 MEM0:r:vv:f64:TXT=BCASTSTR -IFORM: VXORPD_ZMMf64_MASKmskw_ZMMf64_MEMf64_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 MEM0:r:vv:u64:TXT=BCASTSTR +IFORM: VXORPD_ZMMu64_MASKmskw_ZMMu64_MEMu64_AVX512 } @@ -52682,8 +52819,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x57 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W0 -OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 REG3=XMM_B3():r:dq:f32 -IFORM: VXORPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 REG3=XMM_B3():r:dq:u32 +IFORM: VXORPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512 } { @@ -52696,8 +52833,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x57 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL128 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VXORPS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512 +OPERANDS: REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VXORPS_XMMu32_MASKmskw_XMMu32_MEMu32_AVX512 } @@ -52712,8 +52849,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x57 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL256 W0 -OPERANDS: REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 REG3=YMM_B3():r:qq:f32 -IFORM: VXORPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 REG3=YMM_B3():r:qq:u32 +IFORM: VXORPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512 } { @@ -52726,8 +52863,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x57 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL256 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VXORPS_YMMf32_MASKmskw_YMMf32_MEMf32_AVX512 +OPERANDS: REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VXORPS_YMMu32_MASKmskw_YMMu32_MEMu32_AVX512 } @@ -52742,8 +52879,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MASKOP_EVEX PATTERN: EVV 0x57 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL512 W0 -OPERANDS: REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 REG3=ZMM_B3():r:zf32 -IFORM: VXORPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 REG3=ZMM_B3():r:zu32 +IFORM: VXORPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512 } { @@ -52756,8 +52893,8 @@ EXCEPTIONS: AVX512-E4 REAL_OPCODE: Y ATTRIBUTES: MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED PATTERN: EVV 0x57 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] MODRM() VL512 W0 ESIZE_32_BITS() NELEM_FULL() -OPERANDS: REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 MEM0:r:vv:f32:TXT=BCASTSTR -IFORM: VXORPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512 +OPERANDS: REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 MEM0:r:vv:u32:TXT=BCASTSTR +IFORM: VXORPS_ZMMu32_MASKmskw_ZMMu32_MEMu32_AVX512 } @@ -53606,7 +53743,6 @@ IFORM: KXORQ_MASKmskw_MASKmskw_MASKmskw_AVX512 - ###FILE: ./datafiles/avx512ifma/ifma-isa.xed.txt #BEGIN_LEGAL From 5d0b1102bee052ce00bf47f9cfef8f73d2ac6d46 Mon Sep 17 00:00:00 2001 From: cuishuang Date: Tue, 16 Apr 2024 18:07:52 +0800 Subject: [PATCH 023/200] x86/x86asm: fix function name in comment Change-Id: Ie70c842161c96948098082d3c0ff1b026bcfd8de Reviewed-on: https://go-review.googlesource.com/c/arch/+/579198 Reviewed-by: qiu laidongfeng2 <2645477756@qq.com> LUCI-TryBot-Result: Go LUCI Reviewed-by: Ian Lance Taylor TryBot-Result: Gopher Robot Reviewed-by: Cherry Mui Auto-Submit: Ian Lance Taylor Commit-Queue: Ian Lance Taylor Run-TryBot: shuang cui --- x86/x86asm/ext_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x86/x86asm/ext_test.go b/x86/x86asm/ext_test.go index e63f1138..2e31dd30 100644 --- a/x86/x86asm/ext_test.go +++ b/x86/x86asm/ext_test.go @@ -653,7 +653,7 @@ func enum8bit(try func([]byte)) { } } -// enum8bit generates all possible 2-byte sequences, followed by distinctive padding. +// enum16bit generates all possible 2-byte sequences, followed by distinctive padding. func enum16bit(try func([]byte)) { for i := 0; i < 1<<16; i++ { try([]byte{byte(i), byte(i >> 8), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) From b863392466ea228f6359643b2e2b4c658761ba39 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 16 Jul 2024 11:35:16 -0400 Subject: [PATCH 024/200] LICENSE: update per Google Legal Very minor tweaks: - Remove (c) pseudosymbol. - Remove "All Rights Reserved." - Change "Google Inc." (no longer exists) to "Google LLC". [git-generate] echo ' ,s/\(c\) // ,s/ All rights reserved.// ,s/Google Inc./Google LLC/ w q ' | sam -d LICENSE Change-Id: I44ceee02758453e6afee1e63518aa275f53429d8 Reviewed-on: https://go-review.googlesource.com/c/arch/+/598518 Auto-Submit: Russ Cox LUCI-TryBot-Result: Go LUCI Reviewed-by: Ian Lance Taylor --- LICENSE | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE b/LICENSE index d29b3726..686d8a91 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2015 The Go Authors. All rights reserved. +Copyright 2015 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. From 9d90945922a772f53487baa5b1b03f061aebb164 Mon Sep 17 00:00:00 2001 From: Vishwanatha HD Date: Sat, 13 Jul 2024 19:52:41 +0000 Subject: [PATCH 025/200] s390x: add s390x disassembler support, GNU syntax Change-Id: Idd91cc89510ce117e49db541fd68b0fa113b92fa Reviewed-on: https://go-review.googlesource.com/c/arch/+/575675 Auto-Submit: Cherry Mui Reviewed-by: Cherry Mui Reviewed-by: David Chase Reviewed-by: Bill O'Farrell Reviewed-by: Srinivas Pokala LUCI-TryBot-Result: Go LUCI --- s390x/s390x.csv | 1277 +++++ s390x/s390xasm/Makefile | 2 + s390x/s390xasm/decode.go | 241 + s390x/s390xasm/decode_test.go | 88 + s390x/s390xasm/field.go | 98 + s390x/s390xasm/gnu.go | 1018 ++++ s390x/s390xasm/inst.go | 399 ++ s390x/s390xasm/tables.go | 5046 ++++++++++++++++++ s390x/s390xasm/testdata/decode_generated.txt | 1245 +++++ s390x/s390xmap/map.go | 636 +++ s390x/s390xspec/spec.go | 1059 ++++ s390x/s390xutil/hack.h | 56 + s390x/s390xutil/util.go | 90 + 13 files changed, 11255 insertions(+) create mode 100644 s390x/s390x.csv create mode 100644 s390x/s390xasm/Makefile create mode 100644 s390x/s390xasm/decode.go create mode 100644 s390x/s390xasm/decode_test.go create mode 100644 s390x/s390xasm/field.go create mode 100644 s390x/s390xasm/gnu.go create mode 100644 s390x/s390xasm/inst.go create mode 100644 s390x/s390xasm/tables.go create mode 100644 s390x/s390xasm/testdata/decode_generated.txt create mode 100644 s390x/s390xmap/map.go create mode 100644 s390x/s390xspec/spec.go create mode 100644 s390x/s390xutil/hack.h create mode 100644 s390x/s390xutil/util.go diff --git a/s390x/s390x.csv b/s390x/s390x.csv new file mode 100644 index 00000000..a53942d8 --- /dev/null +++ b/s390x/s390x.csv @@ -0,0 +1,1277 @@ +# Copyright 2024 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. +# +# This file is generated by the s390xspec program. +# +# Command to generate this file is: +# ./s390xspec > s390x.csv +# +# For eg: ./s390xspec z_Architecture_Principles_of_Operation.pdf > s390x.csv +# +# Specific Edition of the PDF manual used (Publication No): SA22-7832-13 +# Document link: https://www.ibm.com/docs/en/module_1678991624569/pdf/SA22-7832-13.pdf +# +# IBM Z-ISA Principles of Operation PDF instruction description. +# +# This file contains comment lines, each beginning with #, +# followed by entries in CSV format. +# +# Each line in the CSV section contains 4 fields: +# +# instruction mnemonic encoding isa-level +# +# The instruction is list of instructions picked from the Appendix-B "Lists of Instructions" heading. +# The mnemonic is the instruction mnemonics, separated by | characters. +# The encoding is the encoding, a sequence of name@startbit| describing each bit field in turn or +# a list of sequences of the form (,sequence)+. A leading comma is used to signify an +# instruction encoding requiring multiple instruction words. +# The fourth field represents instruction characteristics string +# + + +"ADD (32)","A R1,D2(X2,B2)","90@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"ADD (32)","AR R1,R2","26@0|R1@8|R2@12|??@16","1A" +"ADD (32)","ARK R1,R2,R3","47608@0|R3@16|//@20|R1@24|R2@28|??@32","B9F8" +"ADD (32)","AY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|90@40|??@48","B" +"ADD (64)","AG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|8@40|??@48","B" +"ADD (64)","AGR R1,R2","47368@0|//@16|R1@24|R2@28|??@32","B908" +"ADD (64)","AGRK R1,R2,R3","47592@0|R3@16|//@20|R1@24|R2@28|??@32","B9E8" +"ADD (64←32)","AGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|24@40|??@48","B" +"ADD (64←32)","AGFR R1,R2","47384@0|//@16|R1@24|R2@28|??@32","B918" +"ADD (extended BFP)","AXBR R1,R2","45898@0|//@16|R1@24|R2@28|??@32","SP Db" +"ADD (extended DFP)","AXTR R1,R2,R3","46042@0|R3@16|//@20|R1@24|R2@28|??@32","SP Dt" +"ADD (extended DFP)","AXTRA R1,R2,R3,M4","46042@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"ADD (long BFP)","ADB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|26@40|??@48","Db" +"ADD (long BFP)","ADBR R1,R2","45850@0|//@16|R1@24|R2@28|??@32","Db" +"ADD (long DFP)","ADTR R1,R2,R3","46034@0|R3@16|//@20|R1@24|R2@28|??@32","Dt" +"ADD (long DFP)","ADTRA R1,R2,R3,M4","46034@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt" +"ADD (short BFP)","AEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|10@40|??@48","Db" +"ADD (short BFP)","AEBR R1,R2","45834@0|//@16|R1@24|R2@28|??@32","Db" +"ADD DECIMAL","AP D1(L1,B1),D2(L2,B2)","250@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","Dg" +"ADD HALFWORD (32←16)","AH R1,D2(X2,B2)","74@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"ADD HALFWORD (32←16)","AHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|122@40|??@48","B" +"ADD HALFWORD (64→16)","AGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|56@40|??@48","B" +"ADD HALFWORD IMMEDIATE (32←16)","AHI R1,I2","167@0|R1@8|10@12|I2@16|??@32","A7A" +"ADD HALFWORD IMMEDIATE (64←16)","AGHI R1,I2","167@0|R1@8|11@12|I2@16|??@32","A7B" +"ADD HIGH (32)","AHHHR R1,R2,R3","47560@0|R3@16|//@20|R1@24|R2@28|??@32","B9C8" +"ADD HIGH (32)","AHHLR R1,R2,R3","47576@0|R3@16|//@20|R1@24|R2@28|??@32","B9D8" +"ADD IMMEDIATE (32)","AFI R1,I2","194@0|R1@8|9@12|I2@16|??@48","C29" +"ADD IMMEDIATE (32←16)","AHIK R1,R3,I2","236@0|R1@8|R3@12|I2@16|//@32|216@40|??@48","ECD8" +"ADD IMMEDIATE (32←8)","ASI D1(B1),I2","235@0|I2@8|B1@16|D1@20|106@40|??@48","ST" +"ADD IMMEDIATE (64←16)","AGHIK R1,R3,I2","236@0|R1@8|R3@12|I2@16|//@32|217@40|??@48","ECD9" +"ADD IMMEDIATE (64←32)","AGFI R1,I2","194@0|R1@8|8@12|I2@16|??@48","C28" +"ADD IMMEDIATE (64←8)","AGSI D1(B1),I2","235@0|I2@8|B1@16|D1@20|122@40|??@48","ST" +"ADD IMMEDIATE HIGH (32)","AIH R1,I2","204@0|R1@8|8@12|I2@16|??@48","CC8" +"ADD LOGICAL (32)","AL R1,D2(X2,B2)","94@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"ADD LOGICAL (32)","ALR R1,R2","30@0|R1@8|R2@12|??@16","1E" +"ADD LOGICAL (32)","ALRK R1,R2,R3","47610@0|R3@16|//@20|R1@24|R2@28|??@32","B9FA" +"ADD LOGICAL (32)","ALY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|94@40|??@48","B" +"ADD LOGICAL (64)","ALG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|10@40|??@48","B" +"ADD LOGICAL (64)","ALGR R1,R2","47370@0|//@16|R1@24|R2@28|??@32","B90A" +"ADD LOGICAL (64)","ALGRK R1,R2,R3","47594@0|R3@16|//@20|R1@24|R2@28|??@32","B9EA" +"ADD LOGICAL (64←32)","ALGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|26@40|??@48","B" +"ADD LOGICAL (64←32)","ALGFR R1,R2","47386@0|//@16|R1@24|R2@28|??@32","B91A" +"ADD LOGICAL HIGH (32)","ALHHHR R1,R2,R3","47562@0|R3@16|//@20|R1@24|R2@28|??@32","B9CA" +"ADD LOGICAL HIGH (32)","ALHHLR R1,R2,R3","47578@0|R3@16|//@20|R1@24|R2@28|??@32","B9DA" +"ADD LOGICAL IMMEDIATE (32)","ALFI R1,I2","194@0|R1@8|11@12|I2@16|??@48","C2B" +"ADD LOGICAL IMMEDIATE (64←32)","ALGFI R1,I2","194@0|R1@8|10@12|I2@16|??@48","C2A" +"ADD LOGICAL WITH CARRY (32)","ALC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|152@40|??@48","B" +"ADD LOGICAL WITH CARRY (32)","ALCR R1,R2","47512@0|//@16|R1@24|R2@28|??@32","B998" +"ADD LOGICAL WITH CARRY (64)","ALCG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|136@40|??@48","B" +"ADD LOGICAL WITH CARRY (64)","ALCGR R1,R2","47496@0|//@16|R1@24|R2@28|??@32","B988" +"ADD LOGICAL WITH SIGNED IMMEDIATE(32→16)","ALHSIK R1,R3,I2","236@0|R1@8|R3@12|I2@16|//@32|218@40|??@48","ECDA 7-31" +"ADD LOGICAL WITH SIGNED IMMEDIATE (32←8)","ALSI D1(B1),I2","235@0|I2@8|B1@16|D1@20|110@40|??@48","ST" +"ADD LOGICAL WITH SIGNED IMMEDIATE(64→16)","ALGHSIK R1,R3,I2","236@0|R1@8|R3@12|I2@16|//@32|219@40|??@48","ECDB 7-31" +"ADD LOGICAL WITH SIGNED IMMEDIATE (64→8)","ALGSI D1(B1),I2","235@0|I2@8|B1@16|D1@20|126@40|??@48","ST" +"ADD LOGICAL WITH SIGNED IMMEDIATE HIGH(32)","ALSIH R1,I2","204@0|R1@8|10@12|I2@16|??@48","CCA" +"ADD LOGICAL WITH SIGNED IMMEDIATE HIGH(32)","ALSIHN R1,I2","204@0|R1@8|11@12|I2@16|??@48","CCB" +"ADD NORMALIZED (extended HFP)","AXR R1,R2","54@0|R1@8|R2@12|??@16","SP Da" +"ADD NORMALIZED (long HFP)","AD R1,D2(X2,B2)","106@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"ADD NORMALIZED (long HFP)","ADR R1,R2","42@0|R1@8|R2@12|??@16","Da" +"ADD NORMALIZED (short HFP)","AE R1,D2(X2,B2)","122@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"ADD NORMALIZED (short HFP)","AER R1,R2","58@0|R1@8|R2@12|??@16","Da" +"ADD UNNORMALIZED (long HFP)","AW R1,D2(X2,B2)","110@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"ADD UNNORMALIZED (long HFP)","AWR R1,R2","46@0|R1@8|R2@12|??@16","Da" +"ADD UNNORMALIZED (short HFP)","AU R1,D2(X2,B2)","126@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"ADD UNNORMALIZED (short HFP)","AUR R1,R2","62@0|R1@8|R2@12|??@16","Da" +"AND (32)","N R1,D2(X2,B2)","84@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"AND (32)","NR R1,R2","20@0|R1@8|R2@12|??@16","14" +"AND (32)","NRK R1,R2,R3","47604@0|R3@16|//@20|R1@24|R2@28|??@32","B9F4" +"AND (32)","NY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|84@40|??@48","B" +"AND (64)","NG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|128@40|??@48","B" +"AND (64)","NGR R1,R2","47488@0|//@16|R1@24|R2@28|??@32","B980" +"AND (64)","NGRK R1,R2,R3","47588@0|R3@16|//@20|R1@24|R2@28|??@32","B9E4" +"AND (character)","NC D1(L1,B1),D2(B2)","212@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"AND (immediate)","NI D1(B1),I2","148@0|I2@8|B1@16|D1@20|??@32","ST" +"AND (immediate)","NIY D1(B1),I2","235@0|I2@8|B1@16|D1@20|84@40|??@48","ST" +"AND IMMEDIATE (high high)","NIHH R1,I2","165@0|R1@8|4@12|I2@16|??@32","A54" +"AND IMMEDIATE (high low)","NIHL R1,I2","165@0|R1@8|5@12|I2@16|??@32","A55" +"AND IMMEDIATE (high)","NIHF R1,I2","192@0|R1@8|10@12|I2@16|??@48","C0A" +"AND IMMEDIATE (low high)","NILH R1,I2","165@0|R1@8|6@12|I2@16|??@32","A56" +"AND IMMEDIATE (low low)","NILL R1,I2","165@0|R1@8|7@12|I2@16|??@32","A57" +"AND IMMEDIATE (low)","NILF R1,I2","192@0|R1@8|11@12|I2@16|??@48","C0B" +"AND WITH COMPLEMENT(32)","NCRK R1,R2,R3","47605@0|R3@16|//@20|R1@24|R2@28|??@32","B9F5" +"AND WITH COMPLEMENT(64)","NCGRK R1,R2,R3","47589@0|R3@16|//@20|R1@24|R2@28|??@32","B9E5" +"BRANCH AND LINK","BAL R1,D2(X2,B2)","69@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"BRANCH AND LINK","BALR R1,R2","5@0|R1@8|R2@12|??@16","B" +"BRANCH AND SAVE","BAS R1,D2(X2,B2)","77@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"BRANCH AND SAVE","BASR R1,R2","13@0|R1@8|R2@12|??@16","B" +"BRANCH AND SAVE AND SET MODE","BASSM R1,R2","12@0|R1@8|R2@12|??@16","B" +"BRANCH AND SET AUTHORITY","BSA R1,R2","45658@0|//@16|R1@24|R2@28|??@32","SO" +"BRANCH AND SET MODE","BSM R1,R2","11@0|R1@8|R2@12|??@16","B" +"BRANCH AND STACK","BAKR R1,R2","45632@0|//@16|R1@24|R2@28|??@32","Z" +"BRANCH IN SUBSPACE GROUP","BSG R1,R2","45656@0|//@16|R1@24|R2@28|??@32","SO" +"BRANCH INDIRECT ON CONDITION","BIC M1,D2(X2,B2)","227@0|M1@8|X2@12|B2@16|D2@20|71@40|??@48","B" +"BRANCH ON CONDITION","BC M1,D2(X2,B2)","71@0|M1@8|X2@12|B2@16|D2@20|??@32","B" +"BRANCH ON CONDITION","BCR M1,R2","7@0|M1@8|R2@12|??@16","B" +"BRANCH ON COUNT (32)","BCT R1,D2(X2,B2)","70@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"BRANCH ON COUNT (32)","BCTR R1,R2","6@0|R1@8|R2@12|??@16","B" +"BRANCH ON COUNT (64)","BCTG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|70@40|??@48","B" +"BRANCH ON COUNT (64)","BCTGR R1,R2","47430@0|//@16|R1@24|R2@28|??@32","B" +"BRANCH ON INDEX HIGH (32)","BXH R1,R3,D2(B2)","134@0|R1@8|R3@12|B2@16|D2@20|??@32","B" +"BRANCH ON INDEX HIGH (64)","BXHG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|68@40|??@48","B" +"BRANCH ON INDEX LOW OR EQUAL (32)","BXLE R1,R3,D2(B2)","135@0|R1@8|R3@12|B2@16|D2@20|??@32","B" +"BRANCH ON INDEX LOW OR EQUAL (64)","BXLEG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|69@40|??@48","B" +"BRANCH PREDICTION PRELOAD","BPP M1,RI2,D3(B3)","199@0|M1@8|//@12|B3@16|D3@20|RI2@32|??@48","C7" +"BRANCH PREDICTION RELATIVE PRELOAD","BPRP M1,RI2,RI3","197@0|M1@8|RI2@12|RI3@24|??@48","C5" +"BRANCH RELATIVE AND SAVE","BRAS R1,RI2","167@0|R1@8|5@12|RI2@16|??@32","B" +"BRANCH RELATIVE AND SAVE LONG","BRASL R1,RI2","192@0|R1@8|5@12|RI2@16|??@48","B" +"BRANCH RELATIVE ON CONDITION","BRC M1,RI2","167@0|M1@8|4@12|RI2@16|??@32","B" +"BRANCH RELATIVE ON CONDITION LONG","BRCL M1,RI2","192@0|M1@8|4@12|RI2@16|??@48","B" +"BRANCH RELATIVE ON COUNT (32)","BRCT R1,RI2","167@0|R1@8|6@12|RI2@16|??@32","B" +"BRANCH RELATIVE ON COUNT (64)","BRCTG R1,RI2","167@0|R1@8|7@12|RI2@16|??@32","B" +"BRANCH RELATIVE ON COUNT HIGH (32)","BRCTH R1,RI2","204@0|R1@8|6@12|RI2@16|??@48","B" +"BRANCH RELATIVE ON INDEX HIGH (32)","BRXH R1,R3,RI2","132@0|R1@8|R3@12|RI2@16|??@32","B" +"BRANCH RELATIVE ON INDEX HIGH (64)","BRXHG R1,R3,RI2","236@0|R1@8|R3@12|RI2@16|//@32|68@40|??@48","B" +"BRANCH RELATIVE ON INDEX LOW OR EQ. (32)","BRXLE R1,R3,RI2","133@0|R1@8|R3@12|RI2@16|??@32","B" +"BRANCH RELATIVE ON INDEX LOW OR EQ. (64)","BRXLG R1,R3,RI2","236@0|R1@8|R3@12|RI2@16|//@32|69@40|??@48","B" +"CANCEL SUBCHANNEL","XSCH","45686@0|//@16|??@32","OP" +"CHECKSUM","CKSM R1,R2","45633@0|//@16|R1@24|R2@28|??@32","SP IC" +"CIPHER MESSAGE","KM R1,R2","47406@0|//@16|R1@24|R2@28|??@32","SP IC" +"CIPHER MESSAGE WITH AUTHENTICATION","KMA R1,R3,R2","47401@0|R3@16|//@20|R1@24|R2@28|??@32","SP IC" +"CIPHER MESSAGE WITH CHAINING","KMC R1,R2","47407@0|//@16|R1@24|R2@28|??@32","SP IC" +"CIPHER MESSAGE WITH CIPHER FEEDBACK","KMF R1,R2","47402@0|//@16|R1@24|R2@28|??@32","SP IC" +"CIPHER MESSAGE WITH COUNTER","KMCTR R1,R3,R2","47405@0|R3@16|//@20|R1@24|R2@28|??@32","SP IC" +"CIPHER MESSAGE WITH OUTPUT FEEDBACK","KMO R1,R2","47403@0|//@16|R1@24|R2@28|??@32","SP IC" +"CLEAR SUBCHANNEL","CSCH","45616@0|//@16|??@32","OP" +"COMPARE (32)","C R1,D2(X2,B2)","89@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"COMPARE (32)","CR R1,R2","25@0|R1@8|R2@12|??@16","19" +"COMPARE (32)","CY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|89@40|??@48","B" +"COMPARE (64)","CG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|32@40|??@48","B" +"COMPARE (64)","CGR R1,R2","47392@0|//@16|R1@24|R2@28|??@32","B920" +"COMPARE (64←32)","CGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|48@40|??@48","B" +"COMPARE (64←32)","CGFR R1,R2","47408@0|//@16|R1@24|R2@28|??@32","B930" +"COMPARE (extended BFP)","CXBR R1,R2","45897@0|//@16|R1@24|R2@28|??@32","SP Db" +"COMPARE (extended DFP)","CXTR R1,R2","46060@0|//@16|R1@24|R2@28|??@32","SP Dt" +"COMPARE (extended HFP)","CXR R1,R2","45929@0|//@16|R1@24|R2@28|??@32","SP Da" +"COMPARE (long BFP)","CDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|25@40|??@48","Db" +"COMPARE (long BFP)","CDBR R1,R2","45849@0|//@16|R1@24|R2@28|??@32","Db" +"COMPARE (long DFP)","CDTR R1,R2","46052@0|//@16|R1@24|R2@28|??@32","Dt" +"COMPARE (long HFP)","CD R1,D2(X2,B2)","105@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"COMPARE (long HFP)","CDR R1,R2","41@0|R1@8|R2@12|??@16","Da" +"COMPARE (short BFP)","CEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|9@40|??@48","Db" +"COMPARE (short BFP)","CEBR R1,R2","45833@0|//@16|R1@24|R2@28|??@32","Db" +"COMPARE (short HFP)","CE R1,D2(X2,B2)","121@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"COMPARE (short HFP)","CER R1,R2","57@0|R1@8|R2@12|??@16","Da" +"COMPARE AND BRANCH (32)","CRB R1,R2,M3,D4(B4)","236@0|R1@8|R2@12|B4@16|D4@20|M3@32|//@36|246@40|??@48","B" +"COMPARE AND BRANCH (64)","CGRB R1,R2,M3,D4(B4)","236@0|R1@8|R2@12|B4@16|D4@20|M3@32|//@36|228@40|??@48","B" +"COMPARE AND BRANCH RELATIVE (32)","CRJ R1,R2,M3,RI4","236@0|R1@8|R2@12|RI4@16|M3@32|//@36|118@40|??@48","B" +"COMPARE AND BRANCH RELATIVE (64)","CGRJ R1,R2,M3,RI4","236@0|R1@8|R2@12|RI4@16|M3@32|//@36|100@40|??@48","B" +"COMPARE AND FORM CODEWORD","CFC D2(B2)","45594@0|B2@16|D2@20|??@32","SP II" +"COMPARE AND REPLACE DAT TABLE ENTRY","CRDTE R1,R3,R2,M4","47503@0|R3@16|M4@20|R1@24|R2@28|??@32","SP" +"COMPARE AND SIGNAL (extended BFP)","KXBR R1,R2","45896@0|//@16|R1@24|R2@28|??@32","SP Db" +"COMPARE AND SIGNAL (extended DFP)","KXTR R1,R2","46056@0|//@16|R1@24|R2@28|??@32","SP Dt" +"COMPARE AND SIGNAL (long BFP)","KDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|24@40|??@48","Db" +"COMPARE AND SIGNAL (long BFP)","KDBR R1,R2","45848@0|//@16|R1@24|R2@28|??@32","Db" +"COMPARE AND SIGNAL (long DFP)","KDTR R1,R2","46048@0|//@16|R1@24|R2@28|??@32","Dt" +"COMPARE AND SIGNAL (short BFP)","KEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|8@40|??@48","Db" +"COMPARE AND SIGNAL (short BFP)","KEBR R1,R2","45832@0|//@16|R1@24|R2@28|??@32","Db" +"COMPARE AND SWAP (32)","CS R1,R3,D2(B2)","186@0|R1@8|R3@12|B2@16|D2@20|??@32","SP" +"COMPARE AND SWAP (32)","CSY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|20@40|??@48","SP" +"COMPARE AND SWAP (64)","CSG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|48@40|??@48","SP" +"COMPARE AND SWAP AND PURGE (32)","CSP R1,R2","45648@0|//@16|R1@24|R2@28|??@32","SP" +"COMPARE AND SWAP AND PURGE (64)","CSPG R1,R2","47498@0|//@16|R1@24|R2@28|??@32","SP" +"COMPARE AND SWAP AND STORE","CSST D1(B1),D2(B2),R3","200@0|R3@8|2@12|B1@16|D1@20|B2@32|D2@36|??@48","SP" +"COMPARE AND TRAP (32)","CRT R1,R2,M3","47474@0|M3@16|//@20|R1@24|R2@28|??@32","B972" +"COMPARE AND TRAP (64)","CGRT R1,R2,M3","47456@0|M3@16|//@20|R1@24|R2@28|??@32","B960" +"COMPARE BIASED EXPONENT (extended DFP)","CEXTR R1,R2","46076@0|//@16|R1@24|R2@28|??@32","SP Dt" +"COMPARE BIASED EXPONENT (long DFP)","CEDTR R1,R2","46068@0|//@16|R1@24|R2@28|??@32","Dt" +"COMPARE DECIMAL","CP D1(L1,B1),D2(L2,B2)","249@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","Dg" +"COMPARE DOUBLE AND SWAP (32)","CDS R1,R3,D2(B2)","187@0|R1@8|R3@12|B2@16|D2@20|??@32","SP" +"COMPARE DOUBLE AND SWAP (32)","CDSY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|49@40|??@48","SP" +"COMPARE DOUBLE AND SWAP (64)","CDSG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|62@40|??@48","SP" +"COMPARE HALFWORD (32→16)","CH R1,D2(X2,B2)","73@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"COMPARE HALFWORD (32→16)","CHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|121@40|??@48","B" +"COMPARE HALFWORD (64←16)","CGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|52@40|??@48","B" +"COMPARE HALFWORD IMMEDIATE (16→16)","CHHSI D1(B1),I2","58708@0|B1@16|D1@20|I2@32|??@48","B" +"COMPARE HALFWORD IMMEDIATE (32←16)","CHI R1,I2","167@0|R1@8|14@12|I2@16|??@32","A7E" +"COMPARE HALFWORD IMMEDIATE (32←16)","CHSI D1(B1),I2","58716@0|B1@16|D1@20|I2@32|??@48","B" +"COMPARE HALFWORD IMMEDIATE (64←16)","CGHI R1,I2","167@0|R1@8|15@12|I2@16|??@32","A7F" +"COMPARE HALFWORD IMMEDIATE (64←16)","CGHSI D1(B1),I2","58712@0|B1@16|D1@20|I2@32|??@48","B" +"COMPAREHALFWORDRELATIVE LONG (32→16)","CHRL R1,RI2","198@0|R1@8|5@12|RI2@16|??@48","C65" +"COMPAREHALFWORDRELATIVE LONG (64←16)","CGHRL R1,RI2","198@0|R1@8|4@12|RI2@16|??@48","C64" +"COMPARE HIGH (32)","CHF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|205@40|??@48","B" +"COMPARE HIGH (32)","CHHR R1,R2","47565@0|//@16|R1@24|R2@28|??@32","B9CD" +"COMPARE HIGH (32)","CHLR R1,R2","47581@0|//@16|R1@24|R2@28|??@32","B9DD" +"COMPARE IMMEDIATE (32)","CFI R1,I2","194@0|R1@8|13@12|I2@16|??@48","C2D" +"COMPARE IMMEDIATE (64←32)","CGFI R1,I2","194@0|R1@8|12@12|I2@16|??@48","C2C" +"COMPARE IMMEDIATE AND BRANCH (32←8)","CIB R1,I2,M3,D4(B4)","236@0|R1@8|M3@12|B4@16|D4@20|I2@32|254@40|??@48","B" +"COMPARE IMMEDIATE AND BRANCH (64←8)","CGIB R1,I2,M3,D4(B4)","236@0|R1@8|M3@12|B4@16|D4@20|I2@32|252@40|??@48","B" +"COMPARE IMMEDIATE AND BRANCH RELATIVE(32→8)","CIJ R1,I2,M3,RI4","236@0|R1@8|M3@12|RI4@16|I2@32|126@40|??@48","B" +"COMPARE IMMEDIATE AND BRANCH RELATIVE(64→8)","CGIJ R1,I2,M3,RI4","236@0|R1@8|M3@12|RI4@16|I2@32|124@40|??@48","B" +"COMPARE IMMEDIATE AND TRAP (32→16)","CIT R1,I2,M3","236@0|R1@8|//@12|I2@16|M3@32|//@36|114@40|??@48","EC72" +"COMPARE IMMEDIATE AND TRAP (64←16)","CGIT R1,I2,M3","236@0|R1@8|//@12|I2@16|M3@32|//@36|112@40|??@48","EC70" +"COMPARE IMMEDIATE HIGH (32)","CIH R1,I2","204@0|R1@8|13@12|I2@16|??@48","CCD" +"COMPARE LOGICAL (32)","CL R1,D2(X2,B2)","85@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"COMPARE LOGICAL (32)","CLR R1,R2","21@0|R1@8|R2@12|??@16","15" +"COMPARE LOGICAL (32)","CLY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|85@40|??@48","B" +"COMPARE LOGICAL (64)","CLG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|33@40|??@48","B" +"COMPARE LOGICAL (64)","CLGR R1,R2","47393@0|//@16|R1@24|R2@28|??@32","B921" +"COMPARE LOGICAL (64→32)","CLGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|49@40|??@48","B" +"COMPARE LOGICAL (64→32)","CLGFR R1,R2","47409@0|//@16|R1@24|R2@28|??@32","B931" +"COMPARE LOGICAL (character)","CLC D1(L1,B1),D2(B2)","213@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","B" +"COMPARE LOGICAL (immediate)","CLI D1(B1),I2","149@0|I2@8|B1@16|D1@20|??@32","B" +"COMPARE LOGICAL (immediate)","CLIY D1(B1),I2","235@0|I2@8|B1@16|D1@20|85@40|??@48","B" +"COMPARE LOGICAL AND BRANCH (32)","CLRB R1,R2,M3,D4(B4)","236@0|R1@8|R2@12|B4@16|D4@20|M3@32|//@36|247@40|??@48","B" +"COMPARE LOGICAL AND BRANCH (64)","CLGRB R1,R2,M3,D4(B4)","236@0|R1@8|R2@12|B4@16|D4@20|M3@32|//@36|229@40|??@48","B" +"COMPARE LOGICAL AND BRANCH RELATIVE(32)","CLRJ R1,R2,M3,RI4","236@0|R1@8|R2@12|RI4@16|M3@32|//@36|119@40|??@48","B" +"COMPARE LOGICAL AND BRANCH RELATIVE(64)","CLGRJ R1,R2,M3,RI4","236@0|R1@8|R2@12|RI4@16|M3@32|//@36|101@40|??@48","B" +"COMPARE LOGICAL AND TRAP (32)","CLRT R1,R2,M3","47475@0|M3@16|//@20|R1@24|R2@28|??@32","B973" +"COMPARE LOGICAL AND TRAP (32)","CLT R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|35@40|??@48","B" +"COMPARE LOGICAL AND TRAP (64)","CLGRT R1,R2,M3","47457@0|M3@16|//@20|R1@24|R2@28|??@32","B961" +"COMPARE LOGICAL AND TRAP (64)","CLGT R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|43@40|??@48","B" +"COMPARE LOGICAL CHAR. UNDER MASK (high)","CLMH R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|32@40|??@48","B" +"COMPARE LOGICAL CHAR. UNDER MASK (low)","CLM R1,M3,D2(B2)","189@0|R1@8|M3@12|B2@16|D2@20|??@32","B" +"COMPARE LOGICAL CHAR. UNDER MASK (low)","CLMY R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|33@40|??@48","B" +"COMPARE LOGICAL HIGH (32)","CLHF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|207@40|??@48","B" +"COMPARE LOGICAL HIGH (32)","CLHHR R1,R2","47567@0|//@16|R1@24|R2@28|??@32","B9CF" +"COMPARE LOGICAL HIGH (32)","CLHLR R1,R2","47583@0|//@16|R1@24|R2@28|??@32","B9DF" +"COMPARE LOGICAL IMMEDIATE (16←16)","CLHHSI D1(B1),I2","58709@0|B1@16|D1@20|I2@32|??@48","B" +"COMPARE LOGICAL IMMEDIATE (32)","CLFI R1,I2","194@0|R1@8|15@12|I2@16|??@48","C2F" +"COMPARE LOGICAL IMMEDIATE (32←16)","CLFHSI D1(B1),I2","58717@0|B1@16|D1@20|I2@32|??@48","B" +"COMPARE LOGICAL IMMEDIATE (64←16)","CLGHSI D1(B1),I2","58713@0|B1@16|D1@20|I2@32|??@48","B" +"COMPARE LOGICAL IMMEDIATE (64←32)","CLGFI R1,I2","194@0|R1@8|14@12|I2@16|??@48","C2E" +"COMPARE LOGICAL IMMEDIATE AND BRANCH(32←8)","CLIB R1,I2,M3,D4(B4)","236@0|R1@8|M3@12|B4@16|D4@20|I2@32|255@40|??@48","B" +"COMPARE LOGICAL IMMEDIATE AND BRANCH(64→8)","CLGIB R1,I2,M3,D4(B4)","236@0|R1@8|M3@12|B4@16|D4@20|I2@32|253@40|??@48","B" +"COMPARE LOGICAL IMMEDIATE AND BRANCH","CLIJ R1,I2,M3,RI4","236@0|R1@8|M3@12|RI4@16|I2@32|127@40|??@48","B" +"RELATIVE (32→8)10COMPARE LOGICAL IMMEDIATE AND BRANCH","CLGIJ R1,I2,M3,RI4","236@0|R1@8|M3@12|RI4@16|I2@32|125@40|??@48","B" +"RELATIVE (64→8)COMPARE LOGICAL IMMEDIATE AND TRAP(32→16)","CLFIT R1,I2,M3","236@0|R1@8|//@12|I2@16|M3@32|//@36|115@40|??@48","EC73" +"COMPARE LOGICAL IMMEDIATE AND TRAP(64←16)","CLGIT R1,I2,M3","236@0|R1@8|//@12|I2@16|M3@32|//@36|113@40|??@48","EC71" +"COMPARE LOGICAL IMMEDIATE HIGH (32)","CLIH R1,I2","204@0|R1@8|15@12|I2@16|??@48","CCF" +"COMPARE LOGICAL LONG","CLCL R1,R2","15@0|R1@8|R2@12|??@16","SP II" +"COMPARE LOGICAL LONG EXTENDED","CLCLE R1,R3,D2(B2)","169@0|R1@8|R3@12|B2@16|D2@20|??@32","SP IC" +"COMPARE LOGICAL LONG UNICODE","CLCLU R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|143@40|??@48","SP IC" +"COMPARE LOGICAL RELATIVE LONG (32)","CLRL R1,RI2","198@0|R1@8|15@12|RI2@16|??@48","SP" +"COMPARE LOGICAL RELATIVE LONG (32→16)","CLHRL R1,RI2","198@0|R1@8|7@12|RI2@16|??@48","C67" +"COMPARE LOGICAL RELATIVE LONG (64)","CLGRL R1,RI2","198@0|R1@8|10@12|RI2@16|??@48","SP" +"COMPARE LOGICAL RELATIVE LONG (64→16)","CLGHRL R1,RI2","198@0|R1@8|6@12|RI2@16|??@48","C66" +"COMPARE LOGICAL RELATIVE LONG (64→32)","CLGFRL R1,RI2","198@0|R1@8|14@12|RI2@16|??@48","SP" +"COMPARE LOGICAL STRING","CLST R1,R2","45661@0|//@16|R1@24|R2@28|??@32","SP IC" +"COMPARE RELATIVE LONG (32)","CRL R1,RI2","198@0|R1@8|13@12|RI2@16|??@48","SP" +"COMPARE RELATIVE LONG (64)","CGRL R1,RI2","198@0|R1@8|8@12|RI2@16|??@48","SP" +"COMPARE RELATIVE LONG (64←32)","CGFRL R1,RI2","198@0|R1@8|12@12|RI2@16|??@48","SP" +"COMPARE UNTIL SUBSTRING EQUAL","CUSE R1,R2","45655@0|//@16|R1@24|R2@28|??@32","SP II" +"COMPRESSION CALL","CMPSC R1,R2","45667@0|//@16|R1@24|R2@28|??@32","SP II" +"COMPUTE DIGITAL SIGNATURE AUTHENTICATION","KDSA R1,R2","47418@0|//@16|R1@24|R2@28|??@32","SP IC" +"COMPUTE INTERMEDIATE MESSAGE DIGEST","KIMD R1,R2","47422@0|//@16|R1@24|R2@28|??@32","SP IC" +"COMPUTE LAST MESSAGE DIGEST","KLMD R1,R2","47423@0|//@16|R1@24|R2@28|??@32","SP IC" +"COMPUTE MESSAGE AUTHENTICATION CODE","KMAC R1,R2","47390@0|//@16|R1@24|R2@28|??@32","SP IC" +"CONVERT BFP TO HFP (long)","THDR R1,R2","45913@0|//@16|R1@24|R2@28|??@32","Da" +"CONVERT BFP TO HFP (short to long)","THDER R1,R2","45912@0|//@16|R1@24|R2@28|??@32","Da" +"CONVERT FROM FIXED (32 to extended BFP)","CXFBR R1,R2","45974@0|//@16|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM FIXED (32 to extended BFP)","CXFBRA R1,M3,R2,M4","45974@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM FIXED (32 to extended DFP)","CXFTR R1,M3,R2,M4","47449@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"CONVERT FROM FIXED (32 to extended HFP)","CXFR R1,R2","46006@0|//@16|R1@24|R2@28|??@32","SP Da" +"CONVERT FROM FIXED (32 to long BFP)","CDFBR R1,R2","45973@0|//@16|R1@24|R2@28|??@32","Db" +"CONVERT FROM FIXED (32 to long BFP)","CDFBRA R1,M3,R2,M4","45973@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM FIXED (32 to long DFP)","CDFTR R1,M3,R2,M4","47441@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt" +"CONVERT FROM FIXED (32 to long HFP)","CDFR R1,R2","46005@0|//@16|R1@24|R2@28|??@32","Da" +"CONVERT FROM FIXED (32 to short BFP)","CEFBR R1,R2","45972@0|//@16|R1@24|R2@28|??@32","Db" +"CONVERT FROM FIXED (32 to short BFP)","CEFBRA R1,M3,R2,M4","45972@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM FIXED (32 to short HFP)","CEFR R1,R2","46004@0|//@16|R1@24|R2@28|??@32","Da" +"CONVERT FROM FIXED (64 to extended BFP)","CXGBR R1,R2","45990@0|//@16|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM FIXED (64 to extended BFP)","CXGBRA R1,M3,R2,M4","45990@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM FIXED (64 to extended DFP)","CXGTR R1,R2","46073@0|//@16|R1@24|R2@28|??@32","SP Dt" +"CONVERT FROM FIXED (64 to extended DFP)","CXGTRA R1,M3,R2,M4","46073@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"CONVERT FROM FIXED (64 to extended HFP)","CXGR R1,R2","46022@0|//@16|R1@24|R2@28|??@32","SP Da" +"CONVERT FROM FIXED (64 to long BFP)","CDGBR R1,R2","45989@0|//@16|R1@24|R2@28|??@32","Db" +"CONVERT FROM FIXED (64 to long BFP)","CDGBRA R1,M3,R2,M4","45989@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM FIXED (64 to long DFP)","CDGTR R1,R2","46065@0|//@16|R1@24|R2@28|??@32","Dt" +"CONVERT FROM FIXED (64 to long DFP)","CDGTRA R1,M3,R2,M4","46065@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt" +"CONVERT FROM FIXED (64 to long HFP)","CDGR R1,R2","46021@0|//@16|R1@24|R2@28|??@32","Da" +"CONVERT FROM FIXED (64 to short BFP)","CEGBR R1,R2","45988@0|//@16|R1@24|R2@28|??@32","Db" +"CONVERT FROM FIXED (64 to short BFP)","CEGBRA R1,M3,R2,M4","45988@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM FIXED (64 to short HFP)","CEGR R1,R2","46020@0|//@16|R1@24|R2@28|??@32","Da" +"CONVERT FROM LOGICAL (32 to extended BFP)","CXLFBR R1,M3,R2,M4","45970@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM LOGICAL (32 to extended DFP)","CXLFTR R1,M3,R2,M4","47451@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"CONVERT FROM LOGICAL (32 to long BFP)","CDLFBR R1,M3,R2,M4","45969@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM LOGICAL (32 to long DFP)","CDLFTR R1,M3,R2,M4","47443@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt" +"CONVERT FROM LOGICAL (32 to short BFP)","CELFBR R1,M3,R2,M4","45968@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM LOGICAL (64 to extended BFP)","CXLGBR R1,M3,R2,M4","45986@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM LOGICAL (64 to extended DFP)","CXLGTR R1,M3,R2,M4","47450@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"CONVERT FROM LOGICAL (64 to long BFP)","CDLGBR R1,M3,R2,M4","45985@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM LOGICAL (64 to long DFP)","CDLGTR R1,M3,R2,M4","47442@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt" +"CONVERT FROM LOGICAL (64 to short BFP)","CELGBR R1,M3,R2,M4","45984@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT FROM PACKED (to extended DFP)","CXPT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|175@40|??@48","SP Dt" +"CONVERT FROM PACKED (to long DFP)","CDPT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|174@40|??@48","SP Dt" +"CONVERT FROM SIGNED PACKED (128 to extended DFP)","CXSTR R1,R2","46075@0|//@16|R1@24|R2@28|??@32","SP Dt" +"CONVERT FROM SIGNED PACKED (64 to long DFP)","CDSTR R1,R2","46067@0|//@16|R1@24|R2@28|??@32","Dt" +"CONVERT FROM UNSIGNED PACKED (128 to ext. DFP)","CXUTR R1,R2","46074@0|//@16|R1@24|R2@28|??@32","SP Dt" +"CONVERT FROM UNSIGNED PACKED (64 to long DFP)","CDUTR R1,R2","46066@0|//@16|R1@24|R2@28|??@32","Dt" +"CONVERT FROM ZONED (to extended DFP)","CXZT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|171@40|??@48","SP Dt" +"CONVERT FROM ZONED (to long DFP)","CDZT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|170@40|??@48","SP Dt" +"CONVERT HFP TO BFP (long to short)","TBEDR R1,M3,R2","45904@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da" +"CONVERT HFP TO BFP (long)","TBDR R1,M3,R2","45905@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da" +"CONVERT TO BINARY (32)","CVB R1,D2(X2,B2)","79@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"CONVERT TO BINARY (32)","CVBY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|6@40|??@48","B" +"CONVERT TO BINARY (64)","CVBG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|14@40|??@48","B" +"CONVERT TO DECIMAL (32)","CVD R1,D2(X2,B2)","78@0|R1@8|X2@12|B2@16|D2@20|??@32","ST" +"CONVERT TO DECIMAL (32)","CVDY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|38@40|??@48","ST" +"CONVERT TO DECIMAL (64)","CVDG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|46@40|??@48","ST" +"CONVERT TO FIXED (extended BFP to 32)","CFXBR R1,M3,R2","45978@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (extended BFP to 32)","CFXBRA R1,M3,R2,M4","45978@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (extended BFP to 64)","CGXBR R1,M3,R2","45994@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (extended BFP to 64)","CGXBRA R1,M3,R2,M4","45994@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (extended DFP to 32)","CFXTR R1,M3,R2,M4","47433@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"CONVERT TO FIXED (extended DFP to 64)","CGXTR R1,M3,R2","46057@0|M3@16|//@20|R1@24|R2@28|??@32","SP Dt" +"CONVERT TO FIXED (extended DFP to 64)","CGXTRA R1,M3,R2,M4","46057@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"CONVERT TO FIXED (extended HFP to 32)","CFXR R1,M3,R2","46010@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da" +"CONVERT TO FIXED (extended HFP to 64)","CGXR R1,M3,R2","46026@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da" +"CONVERT TO FIXED (long BFP to 32)","CFDBR R1,M3,R2","45977@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (long BFP to 32)","CFDBRA R1,M3,R2,M4","45977@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (long BFP to 64)","CGDBR R1,M3,R2","45993@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (long BFP to 64)","CGDBRA R1,M3,R2,M4","45993@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (long DFP to 32)","CFDTR R1,M3,R2,M4","47425@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt" +"CONVERT TO FIXED (long DFP to 64)","CGDTR R1,M3,R2","46049@0|M3@16|//@20|R1@24|R2@28|??@32","Dt" +"CONVERT TO FIXED (long DFP to 64)","CGDTRA R1,M3,R2,M4","46049@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt" +"CONVERT TO FIXED (long HFP to 32)","CFDR R1,M3,R2","46009@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da" +"CONVERT TO FIXED (long HFP to 64)","CGDR R1,M3,R2","46025@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da" +"CONVERT TO FIXED (short BFP to 32)","CFEBR R1,M3,R2","45976@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (short BFP to 32)","CFEBRA R1,M3,R2,M4","45976@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (short BFP to 64)","CGEBR R1,M3,R2","45992@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (short BFP to 64)","CGEBRA R1,M3,R2,M4","45992@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO FIXED (short HFP to 32)","CFER R1,M3,R2","46008@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da" +"CONVERT TO FIXED (short HFP to 64)","CGER R1,M3,R2","46024@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da" +"CONVERT TO LOGICAL (extended BFP to 32)","CLFXBR R1,M3,R2,M4","45982@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO LOGICAL (extended BFP to 64)","CLGXBR R1,M3,R2,M4","45998@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO LOGICAL (extended DFP to 32)","CLFXTR R1,M3,R2,M4","47435@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"CONVERT TO LOGICAL (extended DFP to 64)","CLGXTR R1,M3,R2,M4","47434@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"CONVERT TO LOGICAL (long BFP to 32)","CLFDBR R1,M3,R2,M4","45981@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO LOGICAL (long BFP to 64)","CLGDBR R1,M3,R2,M4","45997@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO LOGICAL (long DFP to 32)","CLFDTR R1,M3,R2,M4","47427@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt" +"CONVERT TO LOGICAL (long DFP to 64)","CLGDTR R1,M3,R2,M4","47426@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt" +"CONVERT TO LOGICAL (short BFP to 32)","CLFEBR R1,M3,R2,M4","45980@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO LOGICAL (short BFP to 64)","CLGEBR R1,M3,R2,M4","45996@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"CONVERT TO PACKED (from extended DFP)","CPXT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|173@40|??@48","SP Dt" +"CONVERT TO PACKED (from long DFP)","CPDT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|172@40|??@48","SP Dt" +"CONVERT TO SIGNED PACKED (extended DFP to 128)","CSXTR R1,R2,M4","46059@0|//@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"CONVERT TO SIGNED PACKED (long DFP to 64)","CSDTR R1,R2,M4","46051@0|//@16|M4@20|R1@24|R2@28|??@32","Dt" +"CONVERTTOUNSIGNEDPACKED(extendedDFP to 128)","CUXTR R1,R2","46058@0|//@16|R1@24|R2@28|??@32","SP Dt" +"CONVERT TO UNSIGNED PACKED (long DFP to 64)","CUDTR R1,R2","46050@0|//@16|R1@24|R2@28|??@32","Dt" +"CONVERT TO ZONED (from extended DFP)","CZXT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|169@40|??@48","SP" +"CONVERT TO ZONED (from long DFP)","CZDT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|168@40|??@48","SP" +"CONVERT UNICODE TO UTF-8","CUUTF R1,R2,M3","45734@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"CONVERT UTF-16 TO UTF-32","CU24 R1,R2,M3","47537@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"CONVERT UTF-16 TO UTF-8","CU21 R1,R2,M3","45734@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"CONVERT UTF-8 TO UNICODE","CUTFU R1,R2,M3","45735@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"CONVERT UTF-8 TO UTF-16","CU12 R1,R2,M3","45735@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"CONVERT UTF-8 TO UTF-32","CU14 R1,R2,M3","47536@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"CONVERT UTF-32 TO UTF-16","CU42 R1,R2","47539@0|//@16|R1@24|R2@28|??@32","SP IC" +"CONVERT UTF-32 TO UTF-8","CU41 R1,R2","47538@0|//@16|R1@24|R2@28|??@32","SP IC" +"COPY ACCESS","CPYA R1,R2","45645@0|//@16|R1@24|R2@28|??@32","U" +"COPY SIGN (long)","CPSDR R1,R3,R2","45938@0|R3@16|//@20|R1@24|R2@28|??@32","Da" +"DECIMAL SCALE AND CONVERT AND SPLIT TO HFP","VSCSHP V1,V2,V3","230@0|V1@8|V2@12|V3@16|//@20|RXB@36|124@40|??@48","Dv" +"DECIMAL SCALE AND CONVERT TO HFP","VSCHP V1,V2,V3,M4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|116@40|??@48","SP Dv" +"DEFLATE CONVERSION CALL","DFLTCC R1,R2,R3","47417@0|R3@16|//@20|R1@24|R2@28|??@32","SP IC" +"DIVIDE (32→64)","D R1,D2(X2,B2)","93@0|R1@8|X2@12|B2@16|D2@20|??@32","SP" +"DIVIDE (32←64)","DR R1,R2","29@0|R1@8|R2@12|??@16","SP" +"DIVIDE (extended BFP)","DXBR R1,R2","45901@0|//@16|R1@24|R2@28|??@32","SP Db" +"DIVIDE (extended DFP)","DXTR R1,R2,R3","46041@0|R3@16|//@20|R1@24|R2@28|??@32","SP Dt" +"DIVIDE (extended DFP)","DXTRA R1,R2,R3,M4","46041@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"DIVIDE (extended HFP)","DXR R1,R2","45613@0|//@16|R1@24|R2@28|??@32","SP Da" +"DIVIDE (long BFP)","DDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|29@40|??@48","Db" +"DIVIDE (long BFP)","DDBR R1,R2","45853@0|//@16|R1@24|R2@28|??@32","Db" +"DIVIDE (long DFP)","DDTR R1,R2,R3","46033@0|R3@16|//@20|R1@24|R2@28|??@32","Dt" +"DIVIDE (long DFP)","DDTRA R1,R2,R3,M4","46033@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt" +"DIVIDE (long HFP)","DD R1,D2(X2,B2)","109@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"DIVIDE (long HFP)","DDR R1,R2","45@0|R1@8|R2@12|??@16","Da" +"DIVIDE (short BFP)","DEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|13@40|??@48","Db" +"DIVIDE (short BFP)","DEBR R1,R2","45837@0|//@16|R1@24|R2@28|??@32","Db" +"DIVIDE (short HFP)","DE R1,D2(X2,B2)","125@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"DIVIDE (short HFP)","DER R1,R2","61@0|R1@8|R2@12|??@16","Da" +"DIVIDE DECIMAL","DP D1(L1,B1),D2(L2,B2)","253@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","SP Dg" +"DIVIDE LOGICAL (32→64)","DL R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|151@40|??@48","SP" +"DIVIDE LOGICAL (32←64)","DLR R1,R2","47511@0|//@16|R1@24|R2@28|??@32","SP" +"DIVIDE LOGICAL (64←128)","DLG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|135@40|??@48","SP" +"DIVIDE LOGICAL (64→128)","DLGR R1,R2","47495@0|//@16|R1@24|R2@28|??@32","SP" +"DIVIDE SINGLE (64)","DSG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|13@40|??@48","SP" +"DIVIDE SINGLE (64)","DSGR R1,R2","47373@0|//@16|R1@24|R2@28|??@32","SP" +"DIVIDE SINGLE (64←32)","DSGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|29@40|??@48","SP" +"DIVIDE SINGLE (64→32)","DSGFR R1,R2","47389@0|//@16|R1@24|R2@28|??@32","SP" +"DIVIDE TO INTEGER (long BFP)","DIDBR R1,R3,R2,M4","45915@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"DIVIDE TO INTEGER (short BFP)","DIEBR R1,R3,R2,M4","45907@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"EDIT","ED D1(L1,B1),D2(B2)","222@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","Dg" +"EDIT AND MARK","EDMK D1(L1,B1),D2(B2)","223@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","Dg" +"EXCLUSIVE OR (32)","X R1,D2(X2,B2)","87@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"EXCLUSIVE OR (32)","XR R1,R2","23@0|R1@8|R2@12|??@16","17" +"EXCLUSIVE OR (32)","XRK R1,R2,R3","47607@0|R3@16|//@20|R1@24|R2@28|??@32","B9F7" +"EXCLUSIVE OR (32)","XY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|87@40|??@48","B" +"EXCLUSIVE OR (64)","XG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|130@40|??@48","B" +"EXCLUSIVE OR (64)","XGR R1,R2","47490@0|//@16|R1@24|R2@28|??@32","B982" +"EXCLUSIVE OR (64)","XGRK R1,R2,R3","47591@0|R3@16|//@20|R1@24|R2@28|??@32","B9E7" +"EXCLUSIVE OR (character)","XC D1(L1,B1),D2(B2)","215@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"EXCLUSIVE OR (immediate)","XI D1(B1),I2","151@0|I2@8|B1@16|D1@20|??@32","ST" +"EXCLUSIVE OR (immediate)","XIY D1(B1),I2","235@0|I2@8|B1@16|D1@20|87@40|??@48","ST" +"EXCLUSIVE OR IMMEDIATE (high)","XIHF R1,I2","192@0|R1@8|6@12|I2@16|??@48","C06" +"EXCLUSIVE OR IMMEDIATE (low)","XILF R1,I2","192@0|R1@8|7@12|I2@16|??@48","C07" +"EXECUTE","EX R1,D2(X2,B2)","68@0|R1@8|X2@12|B2@16|D2@20|??@32","SP" +"EXECUTE RELATIVE LONG","EXRL R1,RI2","198@0|R1@8|0@12|RI2@16|??@48","C60" +"EXTRACT ACCESS","EAR R1,R2","45647@0|//@16|R1@24|R2@28|??@32","U" +"EXTRACT AND SET EXTENDED AUTHORITY","ESEA R1","47517@0|//@16|R1@24|//@28|??@32","B99D" +"EXTRACT BIASED EXPONENT (extended DFP to 64)","EEXTR R1,R2","46061@0|//@16|R1@24|R2@28|??@32","SP Dt" +"EXTRACT BIASED EXPONENT (long DFP to 64)","EEDTR R1,R2","46053@0|//@16|R1@24|R2@28|??@32","Dt" +"EXTRACT CPU ATTRIBUTE","ECAG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|76@40|??@48","EB4C" +"EXTRACT CPU TIME","ECTG D1(B1),D2(B2),R3","200@0|R3@8|1@12|B1@16|D1@20|B2@32|D2@36|??@48","R" +"EXTRACT FPC","EFPC R1","45964@0|//@16|R1@24|//@28|??@32","Db" +"EXTRACT PRIMARY ASN","EPAR R1","45606@0|//@16|R1@24|//@28|??@32","SO" +"EXTRACT PRIMARY ASN AND INSTANCE","EPAIR R1","47514@0|//@16|R1@24|//@28|??@32","SO" +"EXTRACT PSW","EPSW R1,R2","47501@0|//@16|R1@24|R2@28|??@32","B98D" +"EXTRACT SECONDARY ASN","ESAR R1","45607@0|//@16|R1@24|//@28|??@32","SO" +"EXTRACT SECONDARY ASN AND INSTANCE","ESAIR R1","47515@0|//@16|R1@24|//@28|??@32","SO" +"EXTRACT SIGNIFICANCE (extended DFP to 64)","ESXTR R1,R2","46063@0|//@16|R1@24|R2@28|??@32","SP Dt" +"EXTRACT SIGNIFICANCE (long DFP to 64)","ESDTR R1,R2","46055@0|//@16|R1@24|R2@28|??@32","Dt" +"EXTRACT STACKED REGISTERS (32)","EREG R1,R2","45641@0|//@16|R1@24|R2@28|??@32","SE" +"EXTRACT STACKED REGISTERS (64)","EREGG R1,R2","47374@0|//@16|R1@24|R2@28|??@32","SE" +"EXTRACT STACKED STATE","ESTA R1,R2","45642@0|//@16|R1@24|R2@28|??@32","B24A" +"EXTRACT TRANSACTION NESTING DEPTH","ETND R1","45804@0|//@16|R1@24|//@28|??@32","SO" +"FIND LEFTMOST ONE","FLOGR R1,R2","47491@0|//@16|R1@24|R2@28|??@32","SP" +"HALT SUBCHANNEL","HSCH","45617@0|//@16|??@32","OP" +"HALVE (long HFP)","HDR R1,R2","36@0|R1@8|R2@12|??@16","Da" +"HALVE (short HFP)","HER R1,R2","52@0|R1@8|R2@12|??@16","Da" +"INSERT ADDRESS SPACE CONTROL","IAC R1","45604@0|//@16|R1@24|//@28|??@32","SO" +"INSERT BIASED EXPONENT (64 to extended DFP)","IEXTR R1,R3,R2","46078@0|R3@16|//@20|R1@24|R2@28|??@32","SP Dt" +"INSERT BIASED EXPONENT (64 to long DFP)","IEDTR R1,R3,R2","46070@0|R3@16|//@20|R1@24|R2@28|??@32","Dt" +"INSERT CHARACTER","IC R1,D2(X2,B2)","67@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"INSERT CHARACTER","ICY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|115@40|??@48","B" +"INSERT CHARACTERS UNDER MASK (high)","ICMH R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|128@40|??@48","B" +"INSERT CHARACTERS UNDER MASK (low)","ICM R1,M3,D2(B2)","191@0|R1@8|M3@12|B2@16|D2@20|??@32","B" +"INSERT CHARACTERS UNDER MASK (low)","ICMY R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|129@40|??@48","B" +"INSERT IMMEDIATE (high high)","IIHH R1,I2","165@0|R1@8|0@12|I2@16|??@32","A50" +"INSERT IMMEDIATE (high low)","IIHL R1,I2","165@0|R1@8|1@12|I2@16|??@32","A51" +"INSERT IMMEDIATE (high)","IIHF R1,I2","192@0|R1@8|8@12|I2@16|??@48","C08" +"INSERT IMMEDIATE (low high)","IILH R1,I2","165@0|R1@8|2@12|I2@16|??@32","A52" +"INSERT IMMEDIATE (low low)","IILL R1,I2","165@0|R1@8|3@12|I2@16|??@32","A53" +"INSERT IMMEDIATE (low)","IILF R1,I2","192@0|R1@8|9@12|I2@16|??@48","C09" +"INSERT PROGRAM MASK","IPM R1","45602@0|//@16|R1@24|//@28|??@32","B222" +"INSERT PSW KEY","IPK","45579@0|//@16|??@32","G2" +"INSERT REFERENCE BITS MULTIPLE","IRBM R1,R2","47532@0|//@16|R1@24|R2@28|??@32","B9AC" +"INSERT STORAGE KEY EXTENDED","ISKE R1,R2","45609@0|//@16|R1@24|R2@28|??@32","SO" +"INSERT VIRTUAL STORAGE KEY","IVSK R1,R2","45603@0|//@16|R1@24|R2@28|??@32","SO" +"INVALIDATE DAT TABLE ENTRY","IDTE R1,R3,R2,M4","47502@0|R3@16|M4@20|R1@24|R2@28|??@32","SP" +"INVALIDATE PAGE TABLE ENTRY","IPTE R1,R2,R3,M4","45601@0|R3@16|M4@20|R1@24|R2@28|??@32","SP II" +"LOAD (32)","L R1,D2(X2,B2)","88@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"LOAD (32)","LR R1,R2","24@0|R1@8|R2@12|??@16","18" +"LOAD (32)","LY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|88@40|??@48","B" +"LOAD (64)","LG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|4@40|??@48","B" +"LOAD (64)","LGR R1,R2","47364@0|//@16|R1@24|R2@28|??@32","B904" +"LOAD (64←32)","LGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|20@40|??@48","B" +"LOAD (64←32)","LGFR R1,R2","47380@0|//@16|R1@24|R2@28|??@32","B914" +"LOAD (extended)","LXR R1,R2","45925@0|//@16|R1@24|R2@28|??@32","SP Da" +"LOAD (long)","LD R1,D2(X2,B2)","104@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"LOAD (long)","LDR R1,R2","40@0|R1@8|R2@12|??@16","Da" +"LOAD (long)","LDY R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|101@40|??@48","Da" +"LOAD (short)","LE R1,D2(X2,B2)","120@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"LOAD (short)","LER R1,R2","56@0|R1@8|R2@12|??@16","Da" +"LOAD (short)","LEY R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|100@40|??@48","Da" +"LOAD ACCESS MULTIPLE 7-268","LAM R1,R3,D2(B2)","154@0|R1@8|R3@12|B2@16|D2@20|??@32","SP" +"LOAD ACCESS MULTIPLE 7-268","LAMY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|154@40|??@48","SP" +"LOAD ADDRESS","LA R1,D2(X2,B2)","65@0|R1@8|X2@12|B2@16|D2@20|??@32","41" +"LOAD ADDRESS","LAY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|113@40|??@48","E371" +"LOAD ADDRESS EXTENDED","LAE R1,D2(X2,B2)","81@0|R1@8|X2@12|B2@16|D2@20|??@32","U" +"LOAD ADDRESS EXTENDED","LAEY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|117@40|??@48","U" +"LOAD ADDRESS RELATIVE LONG","LARL R1,RI2","192@0|R1@8|0@12|RI2@16|??@48","C00" +"LOAD ADDRESS SPACE PARAMETERS","LASP D1(B1),D2(B2)","58624@0|B1@16|D1@20|B2@32|D2@36|??@48","SP SO" +"LOAD AND ADD (32)","LAA R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|248@40|??@48","SP" +"LOAD AND ADD (64)","LAAG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|232@40|??@48","SP" +"LOAD AND ADD LOGICAL (32)","LAAL R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|250@40|??@48","SP" +"LOAD AND ADD LOGICAL (64)","LAALG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|234@40|??@48","SP" +"LOAD AND AND (32)","LAN R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|244@40|??@48","SP" +"LOAD AND AND (64)","LANG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|228@40|??@48","SP" +"LOAD AND EXCLUSIVE OR (32)","LAX R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|247@40|??@48","SP" +"LOAD AND EXCLUSIVE OR (64)","LAXG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|231@40|??@48","SP" +"LOAD AND OR (32)","LAO R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|246@40|??@48","SP" +"LOAD AND OR (64)","LAOG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|230@40|??@48","SP" +"LOAD AND TEST (32)","LT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|18@40|??@48","B" +"LOAD AND TEST (32)","LTR R1,R2","18@0|R1@8|R2@12|??@16","12" +"LOAD AND TEST (64)","LTG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|2@40|??@48","B" +"LOAD AND TEST (64)","LTGR R1,R2","47362@0|//@16|R1@24|R2@28|??@32","B902" +"LOAD AND TEST (64→32)","LTGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|50@40|??@48","B" +"LOAD AND TEST (64→32)","LTGFR R1,R2","47378@0|//@16|R1@24|R2@28|??@32","B912" +"LOAD AND TEST (extended BFP)","LTXBR R1,R2","45890@0|//@16|R1@24|R2@28|??@32","SP Db" +"LOAD AND TEST (extended DFP)","LTXTR R1,R2","46046@0|//@16|R1@24|R2@28|??@32","SP Dt" +"LOAD AND TEST (extended HFP)","LTXR R1,R2","45922@0|//@16|R1@24|R2@28|??@32","SP Da" +"LOAD AND TEST (long BFP)","LTDBR R1,R2","45842@0|//@16|R1@24|R2@28|??@32","Db" +"LOAD AND TEST (long DFP)","LTDTR R1,R2","46038@0|//@16|R1@24|R2@28|??@32","Dt" +"LOAD AND TEST (long HFP)","LTDR R1,R2","34@0|R1@8|R2@12|??@16","Da" +"LOAD AND TEST (short BFP)","LTEBR R1,R2","45826@0|//@16|R1@24|R2@28|??@32","Db" +"LOAD AND TEST (short HFP)","LTER R1,R2","50@0|R1@8|R2@12|??@16","Da" +"LOAD AND TRAP (32L→32)","LAT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|159@40|??@48","B" +"LOAD AND TRAP (64)","LGAT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|133@40|??@48","B" +"LOAD AND ZERO RIGHTMOST BYTE (32)","LZRF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|59@40|??@48","B" +"LOAD AND ZERO RIGHTMOST BYTE (64)","LZRG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|42@40|??@48","B" +"LOAD BEAR","LBEAR D2(B2)","45568@0|B2@16|D2@20|??@32","SP" +"LOAD BYTE (32→8)","LB R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|118@40|??@48","E376" +"LOAD BYTE (32←8)","LBR R1,R2","47398@0|//@16|R1@24|R2@28|??@32","B926" +"LOAD BYTE (64→8)","LGB R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|119@40|??@48","E377" +"LOAD BYTE (64←8)","LGBR R1,R2","47366@0|//@16|R1@24|R2@28|??@32","B906" +"LOAD BYTE HIGH (32←8)","LBH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|192@40|??@48","B" +"LOAD COMPLEMENT (32)","LCR R1,R2","19@0|R1@8|R2@12|??@16","13" +"LOAD COMPLEMENT (64)","LCGR R1,R2","47363@0|//@16|R1@24|R2@28|??@32","B903" +"LOAD COMPLEMENT (64←32)","LCGFR R1,R2","47379@0|//@16|R1@24|R2@28|??@32","B913" +"LOAD COMPLEMENT (extended BFP)","LCXBR R1,R2","45891@0|//@16|R1@24|R2@28|??@32","SP Db" +"LOAD COMPLEMENT (extended HFP)","LCXR R1,R2","45923@0|//@16|R1@24|R2@28|??@32","SP Da" +"LOAD COMPLEMENT (long BFP)","LCDBR R1,R2","45843@0|//@16|R1@24|R2@28|??@32","Db" +"LOAD COMPLEMENT (long HFP)","LCDR R1,R2","35@0|R1@8|R2@12|??@16","Da" +"LOAD COMPLEMENT (long)","LCDFR R1,R2","45939@0|//@16|R1@24|R2@28|??@32","Da" +"LOAD COMPLEMENT (short BFP)","LCEBR R1,R2","45827@0|//@16|R1@24|R2@28|??@32","Db" +"LOAD COMPLEMENT (short HFP)","LCER R1,R2","51@0|R1@8|R2@12|??@16","Da" +"LOAD CONTROL (32)","LCTL R1,R3,D2(B2)","183@0|R1@8|R3@12|B2@16|D2@20|??@32","SP" +"LOAD CONTROL (64)","LCTLG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|47@40|??@48","SP" +"LOAD COUNT TO BLOCK BOUNDARY","LCBB R1,D2(X2,B2),M3","231@0|R1@8|X2@12|B2@16|D2@20|M3@32|//@36|39@40|??@48","SP" +"LOAD FP INTEGER (extended BFP)","FIXBR R1,M3,R2","45895@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db" +"LOAD FP INTEGER (extended BFP)","FIXBRA R1,M3,R2,M4","45895@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"LOAD FP INTEGER (extended DFP)","FIXTR R1,M3,R2,M4","46047@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"LOAD FP INTEGER (extended HFP)","FIXR R1,R2","45927@0|//@16|R1@24|R2@28|??@32","SP Da" +"LOAD FP INTEGER (long BFP)","FIDBR R1,M3,R2","45919@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db" +"LOAD FP INTEGER (long BFP)","FIDBRA R1,M3,R2,M4","45919@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"LOAD FP INTEGER (long DFP)","FIDTR R1,M3,R2,M4","46039@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt" +"LOAD FP INTEGER (long HFP)","FIDR R1,R2","45951@0|//@16|R1@24|R2@28|??@32","Da" +"LOAD FP INTEGER (short BFP)","FIEBR R1,M3,R2","45911@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db" +"LOAD FP INTEGER (short BFP)","FIEBRA R1,M3,R2,M4","45911@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"LOAD FP INTEGER (short HFP)","FIER R1,R2","45943@0|//@16|R1@24|R2@28|??@32","Da" +"LOAD FPC","LFPC D2(B2)","45725@0|B2@16|D2@20|??@32","SP Db" +"LOAD FPC AND SIGNAL","LFAS D2(B2)","45757@0|B2@16|D2@20|??@32","SP Dt" +"LOAD FPR FROM GR (64 to long)","LDGR R1,R2","46017@0|//@16|R1@24|R2@28|??@32","Da" +"LOAD GR FROM FPR (long to 64)","LGDR R1,R2","46029@0|//@16|R1@24|R2@28|??@32","Da" +"LOAD GUARDED (64)","LGG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|76@40|??@48","SP" +"LOAD GUARDED STORAGE CONTROLS","LGSC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|77@40|??@48","SO" +"LOAD HALFWORD (32→16)","LH R1,D2(X2,B2)","72@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"LOAD HALFWORD (32←16)","LHR R1,R2","47399@0|//@16|R1@24|R2@28|??@32","B927" +"LOAD HALFWORD (32←16)","LHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|120@40|??@48","B" +"LOAD HALFWORD (64←16)","LGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|21@40|??@48","B" +"LOAD HALFWORD (64←16)","LGHR R1,R2","47367@0|//@16|R1@24|R2@28|??@32","B907" +"LOAD HALFWORD HIGH (32→16)","LHH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|196@40|??@48","B" +"LOAD HALFWORD HIGH IMMEDIATE ON","LOCHHI R1,I2,M3","236@0|R1@8|M3@12|I2@16|//@32|78@40|??@48","EC4E" +"CONDITION (32←16)LOAD HALFWORD IMMEDIATE (32)←16","LHI R1,I2","167@0|R1@8|8@12|I2@16|??@32","A78" +"LOAD HALFWORD IMMEDIATE (64→16)","LGHI R1,I2","167@0|R1@8|9@12|I2@16|??@32","A79" +"LOAD HALFWORD IMMEDIATE ON CONDITION(32←16)","LOCHI R1,I2,M3","236@0|R1@8|M3@12|I2@16|//@32|66@40|??@48","EC42" +"LOAD HALFWORD IMMEDIATE ON CONDITION(64→16)","LOCGHI R1,I2,M3","236@0|R1@8|M3@12|I2@16|//@32|70@40|??@48","EC46" +"LOAD HALFWORD RELATIVE LONG (32←16)","LHRL R1,RI2","196@0|R1@8|5@12|RI2@16|??@48","C45" +"LOAD HALFWORD RELATIVE LONG (64←16)","LGHRL R1,RI2","196@0|R1@8|4@12|RI2@16|??@48","C44" +"LOAD HIGH (32)","LFH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|202@40|??@48","B" +"LOAD HIGH AND TRAP (32H←32)","LFHAT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|200@40|??@48","B" +"LOAD HIGH ON CONDITION (32)","LOCFH R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|224@40|??@48","B" +"LOAD HIGH ON CONDITION (32)","LOCFHR R1,R2,M3","47584@0|M3@16|//@20|R1@24|R2@28|??@32","B9E0" +"LOAD IMMEDIATE (64→32)","LGFI R1,I2","192@0|R1@8|1@12|I2@16|??@48","C01" +"LOAD LENGTHENED (long to extended BFP)","LXDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|5@40|??@48","SP Db" +"LOAD LENGTHENED (long to extended BFP)","LXDBR R1,R2","45829@0|//@16|R1@24|R2@28|??@32","SP Db" +"LOAD LENGTHENED (long to extended DFP)","LXDTR R1,R2,M4","46044@0|//@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"LOAD LENGTHENED (long to extended HFP)","LXD R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|37@40|??@48","SP Da" +"LOAD LENGTHENED (long to extended HFP)","LXDR R1,R2","45861@0|//@16|R1@24|R2@28|??@32","SP Da" +"LOAD LENGTHENED (short to extended BFP)","LXEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|6@40|??@48","SP Db" +"LOAD LENGTHENED (short to extended BFP)","LXEBR R1,R2","45830@0|//@16|R1@24|R2@28|??@32","SP Db" +"LOAD LENGTHENED (short to extended HFP)","LXE R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|38@40|??@48","SP Da" +"LOAD LENGTHENED (short to extended HFP)","LXER R1,R2","45862@0|//@16|R1@24|R2@28|??@32","SP Da" +"LOAD LENGTHENED (short to long BFP)","LDEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|4@40|??@48","Db" +"LOAD LENGTHENED (short to long BFP)","LDEBR R1,R2","45828@0|//@16|R1@24|R2@28|??@32","Db" +"LOAD LENGTHENED (short to long DFP)","LDETR R1,R2,M4","46036@0|//@16|M4@20|R1@24|R2@28|??@32","Dt" +"LOAD LENGTHENED (short to long HFP)","LDE R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|36@40|??@48","Da" +"LOAD LENGTHENED (short to long HFP)","LDER R1,R2","45860@0|//@16|R1@24|R2@28|??@32","Da" +"LOAD LOGICAL (64←32)","LLGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|22@40|??@48","B" +"LOAD LOGICAL (64←32)","LLGFR R1,R2","47382@0|//@16|R1@24|R2@28|??@32","B916" +"LOAD LOGICAL AND SHIFT GUARDED (64←32)","LLGFSG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|72@40|??@48","SP" +"LOAD LOGICAL AND TRAP (64→32)","LLGFAT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|157@40|??@48","B" +"LOAD LOGICAL AND ZERO RIGHTMOST BYTE(64→32)","LLZRGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|58@40|??@48","B" +"LOAD LOGICAL CHARACTER (32→8)","LLC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|148@40|??@48","B" +"LOAD LOGICAL CHARACTER (32←8)","LLCR R1,R2","47508@0|//@16|R1@24|R2@28|??@32","B994" +"LOAD LOGICAL CHARACTER (64←8)","LLGC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|144@40|??@48","B" +"LOAD LOGICAL CHARACTER (64←8)","LLGCR R1,R2","47492@0|//@16|R1@24|R2@28|??@32","B984" +"LOAD LOGICAL CHARACTER HIGH (32←8)","LLCH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|194@40|??@48","B" +"LOAD LOGICAL HALFWORD (32←16)","LLH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|149@40|??@48","B" +"LOAD LOGICAL HALFWORD (32←16)","LLHR R1,R2","47509@0|//@16|R1@24|R2@28|??@32","B995" +"LOAD LOGICAL HALFWORD (64→16)","LLGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|145@40|??@48","B" +"LOAD LOGICAL HALFWORD (64←16)","LLGHR R1,R2","47493@0|//@16|R1@24|R2@28|??@32","B985" +"LOAD LOGICAL HALFWORD HIGH (32→16)","LLHH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|198@40|??@48","B" +"LOAD LOGICAL HALFWORD RELATIVE LONG(32←16)","LLHRL R1,RI2","196@0|R1@8|2@12|RI2@16|??@48","C42" +"LOAD LOGICAL HALFWORD RELATIVE LONG(64→16)","LLGHRL R1,RI2","196@0|R1@8|6@12|RI2@16|??@48","C46" +"LOAD LOGICAL IMMEDIATE (high high)","LLIHH R1,I2","165@0|R1@8|12@12|I2@16|??@32","A5C" +"LOAD LOGICAL IMMEDIATE (high low)","LLIHL R1,I2","165@0|R1@8|13@12|I2@16|??@32","A5D" +"LOAD LOGICAL IMMEDIATE (high)","LLIHF R1,I2","192@0|R1@8|14@12|I2@16|??@48","C0E" +"LOAD LOGICAL IMMEDIATE (low high)","LLILH R1,I2","165@0|R1@8|14@12|I2@16|??@32","A5E" +"LOAD LOGICAL IMMEDIATE (low low)","LLILL R1,I2","165@0|R1@8|15@12|I2@16|??@32","A5F" +"LOAD LOGICAL IMMEDIATE (low)","LLILF R1,I2","192@0|R1@8|15@12|I2@16|??@48","C0F" +"LOAD LOGICAL RELATIVE LONG (64→32)","LLGFRL R1,RI2","196@0|R1@8|14@12|RI2@16|??@48","SP" +"LOAD LOGICAL THIRTY ONE BITS (64→31)","LLGT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|23@40|??@48","B" +"LOAD LOGICAL THIRTY ONE BITS (64→31)","LLGTR R1,R2","47383@0|//@16|R1@24|R2@28|??@32","B917" +"LOAD LOGICAL THIRTY ONE BITS AND TRAP(64←31)","LLGTAT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|156@40|??@48","B" +"LOAD MULTIPLE (32)","LM R1,R3,D2(B2)","152@0|R1@8|R3@12|B2@16|D2@20|??@32","B" +"LOAD MULTIPLE (32)","LMY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|152@40|??@48","B" +"LOAD MULTIPLE (64)","LMG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|4@40|??@48","B" +"LOAD MULTIPLE DISJOINT (64→32&32)","LMD R1,R3,D2(B2),D4(B4)","239@0|R1@8|R3@12|B2@16|D2@20|B4@32|D4@36|??@48","B" +"LOAD MULTIPLE HIGH (32)","LMH R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|150@40|??@48","B" +"LOAD NEGATIVE (32)","LNR R1,R2","17@0|R1@8|R2@12|??@16","11" +"LOAD NEGATIVE (64)","LNGR R1,R2","47361@0|//@16|R1@24|R2@28|??@32","B901" +"LOAD NEGATIVE (64→32)","LNGFR R1,R2","47377@0|//@16|R1@24|R2@28|??@32","B911" +"LOAD NEGATIVE (extended BFP)","LNXBR R1,R2","45889@0|//@16|R1@24|R2@28|??@32","SP Db" +"LOAD NEGATIVE (extended HFP)","LNXR R1,R2","45921@0|//@16|R1@24|R2@28|??@32","SP Da" +"LOAD NEGATIVE (long BFP)","LNDBR R1,R2","45841@0|//@16|R1@24|R2@28|??@32","Db" +"LOAD NEGATIVE (long HFP)","LNDR R1,R2","33@0|R1@8|R2@12|??@16","Da" +"LOAD NEGATIVE (long)","LNDFR R1,R2","45937@0|//@16|R1@24|R2@28|??@32","Da" +"LOAD NEGATIVE (short BFP)","LNEBR R1,R2","45825@0|//@16|R1@24|R2@28|??@32","Db" +"LOAD NEGATIVE (short HFP)","LNER R1,R2","49@0|R1@8|R2@12|??@16","Da" +"LOAD ON CONDITION (32)","LOC R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|242@40|??@48","B" +"LOAD ON CONDITION (32)","LOCR R1,R2,M3","47602@0|M3@16|//@20|R1@24|R2@28|??@32","B9F2" +"LOAD ON CONDITION (64)","LOCG R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|226@40|??@48","B" +"LOAD ON CONDITION (64)","LOCGR R1,R2,M3","47586@0|M3@16|//@20|R1@24|R2@28|??@32","B9E2" +"LOAD PAGE TABLE ENTRY ADDRESS","LPTEA R1,R3,R2,M4","47530@0|R3@16|M4@20|R1@24|R2@28|??@32","R" +"LOAD PAIR DISJOINT (32)","LPD R3,D1(B1),D2(B2)","200@0|R3@8|4@12|B1@16|D1@20|B2@32|D2@36|??@48","SP" +"LOAD PAIR DISJOINT (64)","LPDG R3,D1(B1),D2(B2)","200@0|R3@8|5@12|B1@16|D1@20|B2@32|D2@36|??@48","SP" +"LOAD PAIR FROM QUADWORD (64&64←128)","LPQ R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|143@40|??@48","SP" +"LOAD POSITIVE (32)","LPR R1,R2","16@0|R1@8|R2@12|??@16","10" +"LOAD POSITIVE (64)","LPGR R1,R2","47360@0|//@16|R1@24|R2@28|??@32","B900" +"LOAD POSITIVE (64→32)","LPGFR R1,R2","47376@0|//@16|R1@24|R2@28|??@32","B910" +"LOAD POSITIVE (extended BFP)","LPXBR R1,R2","45888@0|//@16|R1@24|R2@28|??@32","SP Db" +"LOAD POSITIVE (extended HFP)","LPXR R1,R2","45920@0|//@16|R1@24|R2@28|??@32","SP Da" +"LOAD POSITIVE (long BFP)","LPDBR R1,R2","45840@0|//@16|R1@24|R2@28|??@32","Db" +"LOAD POSITIVE (long HFP)","LPDR R1,R2","32@0|R1@8|R2@12|??@16","Da" +"LOAD POSITIVE (long)","LPDFR R1,R2","45936@0|//@16|R1@24|R2@28|??@32","Da" +"LOAD POSITIVE (short BFP)","LPEBR R1,R2","45824@0|//@16|R1@24|R2@28|??@32","Db" +"LOAD POSITIVE (short HFP)","LPER R1,R2","48@0|R1@8|R2@12|??@16","Da" +"LOAD PSW","LPSW D1(B1)","130@0|I2@8|B1@16|D1@20|??@32","SP SO" +"LOAD PSW EXTENDED","LPSWE D2(B2)","45746@0|B2@16|D2@20|??@32","SP SO" +"LOAD PSW EXTENDED","LPSWEY D1(B1)","235@0|//@8|B1@16|D1@20|113@40|??@48","SP SO" +"LOAD REAL ADDRESS (32)","LRA R1,D2(X2,B2)","177@0|R1@8|X2@12|B2@16|D2@20|??@32","SO" +"LOAD REAL ADDRESS (32)","LRAY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|19@40|??@48","SO" +"LOAD REAL ADDRESS (64)","LRAG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|3@40|??@48","BP" +"LOAD RELATIVE LONG (32)","LRL R1,RI2","196@0|R1@8|13@12|RI2@16|??@48","SP" +"LOAD RELATIVE LONG (64)","LGRL R1,RI2","196@0|R1@8|8@12|RI2@16|??@48","SP" +"LOAD RELATIVE LONG (64→32)","LGFRL R1,RI2","196@0|R1@8|12@12|RI2@16|??@48","SP" +"LOAD REVERSED (16)","LRVH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|31@40|??@48","B" +"LOAD REVERSED (32)","LRV R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|30@40|??@48","B" +"LOAD REVERSED (32)","LRVR R1,R2","47391@0|//@16|R1@24|R2@28|??@32","B91F" +"LOAD REVERSED (64)","LRVG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|15@40|??@48","B" +"LOAD REVERSED (64)","LRVGR R1,R2","47375@0|//@16|R1@24|R2@28|??@32","B90F" +"LOAD ROUNDED (extended to long BFP)","LDXBR R1,R2","45893@0|//@16|R1@24|R2@28|??@32","SP Db" +"LOAD ROUNDED (extended to long BFP)","LDXBRA R1,M3,R2,M4","45893@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"LOAD ROUNDED (extended to long DFP)","LDXTR R1,M3,R2,M4","46045@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"LOAD ROUNDED (extended to long HFP)","LDXR R1,R2","37@0|R1@8|R2@12|??@16","SP Da" +"LOAD ROUNDED (extended to long HFP)","LRDR R1,R2","37@0|R1@8|R2@12|??@16","SP Da" +"LOAD ROUNDED (extended to short BFP)","LEXBR R1,R2","45894@0|//@16|R1@24|R2@28|??@32","SP Db" +"LOAD ROUNDED (extended to short BFP)","LEXBRA R1,M3,R2,M4","45894@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"LOAD ROUNDED (extended to short HFP)","LEXR R1,R2","45926@0|//@16|R1@24|R2@28|??@32","SP Da" +"LOAD ROUNDED (long to short BFP)","LEDBR R1,R2","45892@0|//@16|R1@24|R2@28|??@32","Db" +"LOAD ROUNDED (long to short BFP)","LEDBRA R1,M3,R2,M4","45892@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db" +"LOAD ROUNDED (long to short DFP)","LEDTR R1,M3,R2,M4","46037@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt" +"LOAD ROUNDED (long to short HFP)","LEDR R1,R2","53@0|R1@8|R2@12|??@16","Da" +"LOAD ROUNDED (long to short HFP)","LRER R1,R2","53@0|R1@8|R2@12|??@16","Da" +"LOAD USING REAL ADDRESS (32)","LURA R1,R2","45643@0|//@16|R1@24|R2@28|??@32","SP" +"LOAD USING REAL ADDRESS (64)","LURAG R1,R2","47365@0|//@16|R1@24|R2@28|??@32","SP" +"LOAD ZERO (extended)","LZXR R1","45942@0|//@16|R1@24|//@28|??@32","SP Da" +"LOAD ZERO (long)","LZDR R1","45941@0|//@16|R1@24|//@28|??@32","Da" +"LOAD ZERO (short)","LZER R1","45940@0|//@16|R1@24|//@28|??@32","Da" +"MODIFY STACKED STATE","MSTA R1","45639@0|//@16|R1@24|//@28|??@32","ST" +"MODIFY SUBCHANNEL","MSCH D2(B2)","45618@0|B2@16|D2@20|??@32","SP OP" +"MONITOR CALL","MC D1(B1),I2","175@0|I2@8|B1@16|D1@20|??@32","SP" +"MOVE (16←16)","MVHHI D1(B1),I2","58692@0|B1@16|D1@20|I2@32|??@48","ST" +"MOVE (32→16)","MVHI D1(B1),I2","58700@0|B1@16|D1@20|I2@32|??@48","ST" +"MOVE (64←16)","MVGHI D1(B1),I2","58696@0|B1@16|D1@20|I2@32|??@48","ST" +"MOVE (character)","MVC D1(L1,B1),D2(B2)","210@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"MOVE (immediate)","MVI D1(B1),I2","146@0|I2@8|B1@16|D1@20|??@32","ST" +"MOVE (immediate)","MVIY D1(B1),I2","235@0|I2@8|B1@16|D1@20|82@40|??@48","ST" +"MOVE INVERSE","MVCIN D1(L1,B1),D2(B2)","232@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"MOVE LONG","MVCL R1,R2","14@0|R1@8|R2@12|??@16","SP II" +"MOVE LONG EXTENDED","MVCLE R1,R3,D2(B2)","168@0|R1@8|R3@12|B2@16|D2@20|??@32","SP IC" +"MOVE LONG UNICODE","MVCLU R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|142@40|??@48","SP IC" +"MOVE NUMERICS","MVN D1(L1,B1),D2(B2)","209@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"MOVE PAGE","MVPG R1,R2","45652@0|//@16|R1@24|R2@28|??@32","SP SO" +"MOVE RIGHT TO LEFT","MVCRL D1(B1),D2(B2)","58634@0|B1@16|D1@20|B2@32|D2@36|??@48","G0" +"MOVE STRING","MVST R1,R2","45653@0|//@16|R1@24|R2@28|??@32","SP IC" +"MOVE TO PRIMARY","MVCP D1(R1,B1),D2(B2),R3","218@0|R1@8|R3@12|B1@16|D1@20|B2@32|D2@36|??@48","SO" +"MOVE TO SECONDARY","MVCS D1(R1,B1),D2(B2),R3","219@0|R1@8|R3@12|B1@16|D1@20|B2@32|D2@36|??@48","SO" +"MOVE WITH DESTINATION KEY","MVCDK D1(B1),D2(B2)","58639@0|B1@16|D1@20|B2@32|D2@36|??@48","SO" +"MOVE WITH KEY","MVCK D1(R1,B1),D2(B2),R3","217@0|R1@8|R3@12|B1@16|D1@20|B2@32|D2@36|??@48","SO" +"MOVE WITH OFFSET","MVO D1(L1,B1),D2(L2,B2)","241@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"MOVE WITH OPTIONAL SPECIFICATIONS","MVCOS D1(B1),D2(B2),R3","200@0|R3@8|0@12|B1@16|D1@20|B2@32|D2@36|??@48","SO" +"MOVE WITH SOURCE KEY","MVCSK D1(B1),D2(B2)","58638@0|B1@16|D1@20|B2@32|D2@36|??@48","SO" +"MOVE ZONES","MVZ D1(L1,B1),D2(B2)","211@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"MULTIPLY (128←64)","MG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|132@40|??@48","SP" +"MULTIPLY (128←64)","MGRK R1,R2,R3","47596@0|R3@16|//@20|R1@24|R2@28|??@32","SP" +"MULTIPLY (64←32)","M R1,D2(X2,B2)","92@0|R1@8|X2@12|B2@16|D2@20|??@32","SP" +"MULTIPLY (64←32)","MFY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|92@40|??@48","SP" +"MULTIPLY (64←32)","MR R1,R2","28@0|R1@8|R2@12|??@16","SP" +"MULTIPLY (extended BFP)","MXBR R1,R2","45900@0|//@16|R1@24|R2@28|??@32","SP Db" +"MULTIPLY (extended DFP)","MXTR R1,R2,R3","46040@0|R3@16|//@20|R1@24|R2@28|??@32","SP Dt" +"MULTIPLY (extended DFP)","MXTRA R1,R2,R3,M4","46040@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"MULTIPLY (extended HFP)","MXR R1,R2","38@0|R1@8|R2@12|??@16","SP Da" +"MULTIPLY (long BFP)","MDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|28@40|??@48","Db" +"MULTIPLY (long BFP)","MDBR R1,R2","45852@0|//@16|R1@24|R2@28|??@32","Db" +"MULTIPLY (long DFP)","MDTR R1,R2,R3","46032@0|R3@16|//@20|R1@24|R2@28|??@32","Dt" +"MULTIPLY (long DFP)","MDTRA R1,R2,R3,M4","46032@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt" +"MULTIPLY (long HFP)","MD R1,D2(X2,B2)","108@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"MULTIPLY (long HFP)","MDR R1,R2","44@0|R1@8|R2@12|??@16","Da" +"MULTIPLY (long to extended BFP)","MXDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|7@40|??@48","SP Db" +"MULTIPLY (long to extended BFP)","MXDBR R1,R2","45831@0|//@16|R1@24|R2@28|??@32","SP Db" +"MULTIPLY (long to extended HFP)","MXD R1,D2(X2,B2)","103@0|R1@8|X2@12|B2@16|D2@20|??@32","SP Da" +"MULTIPLY (long to extended HFP)","MXDR R1,R2","39@0|R1@8|R2@12|??@16","SP Da" +"MULTIPLY (short BFP)","MEEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|23@40|??@48","Db" +"MULTIPLY (short BFP)","MEEBR R1,R2","45847@0|//@16|R1@24|R2@28|??@32","Db" +"MULTIPLY (short HFP)","MEE R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|55@40|??@48","Da" +"MULTIPLY (short HFP)","MEER R1,R2","45879@0|//@16|R1@24|R2@28|??@32","Da" +"MULTIPLY (short to long BFP)","MDEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|12@40|??@48","Db" +"MULTIPLY (short to long BFP)","MDEBR R1,R2","45836@0|//@16|R1@24|R2@28|??@32","Db" +"MULTIPLY (short to long HFP)","MDE R1,D2(X2,B2)","124@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"MULTIPLY (short to long HFP)","MDER R1,R2","60@0|R1@8|R2@12|??@16","Da" +"MULTIPLY (short to long HFP)","ME R1,D2(X2,B2)","124@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"MULTIPLY (short to long HFP)","MER R1,R2","60@0|R1@8|R2@12|??@16","Da" +"MULTIPLY & ADD UNNORMALIZED (long to ext. HFP)","MAY R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|58@40|??@48","Da" +"MULTIPLY & ADD UNNORMALIZED (long to ext. HFP)","MAYR R1,R3,R2","45882@0|R1@16|//@20|R3@24|R2@28|??@32","Da" +"MULTIPLY AND ADD (long BFP)","MADB R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|30@40|??@48","Db" +"MULTIPLY AND ADD (long BFP)","MADBR R1,R3,R2","45854@0|R1@16|//@20|R3@24|R2@28|??@32","Db" +"MULTIPLY AND ADD (long HFP)","MAD R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|62@40|??@48","Da" +"MULTIPLY AND ADD (long HFP)","MADR R1,R3,R2","45886@0|R1@16|//@20|R3@24|R2@28|??@32","Da" +"MULTIPLY AND ADD (short BFP)","MAEB R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|14@40|??@48","Db" +"MULTIPLY AND ADD (short BFP)","MAEBR R1,R3,R2","45838@0|R1@16|//@20|R3@24|R2@28|??@32","Db" +"MULTIPLY AND ADD (short HFP)","MAE R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|46@40|??@48","Da" +"MULTIPLY AND ADD (short HFP)","MAER R1,R3,R2","45870@0|R1@16|//@20|R3@24|R2@28|??@32","Da" +"MULTIPLY AND ADD UNNRM. (long to ext. high HFP)","MAYH R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|60@40|??@48","Da" +"MULTIPLY AND ADD UNNRM. (long to ext. high HFP)","MAYHR R1,R3,R2","45884@0|R1@16|//@20|R3@24|R2@28|??@32","Da" +"MULTIPLY AND ADD UNNRM. (long to ext. low HFP)","MAYL R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|56@40|??@48","Da" +"MULTIPLY AND ADD UNNRM. (long to ext. low HFP)","MAYLR R1,R3,R2","45880@0|R1@16|//@20|R3@24|R2@28|??@32","Da" +"MULTIPLY AND SUBTRACT (long BFP)","MSDB R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|31@40|??@48","Db" +"MULTIPLY AND SUBTRACT (long BFP)","MSDBR R1,R3,R2","45855@0|R1@16|//@20|R3@24|R2@28|??@32","Db" +"MULTIPLY AND SUBTRACT (long HFP)","MSD R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|63@40|??@48","Da" +"MULTIPLY AND SUBTRACT (long HFP)","MSDR R1,R3,R2","45887@0|R1@16|//@20|R3@24|R2@28|??@32","Da" +"MULTIPLY AND SUBTRACT (short BFP)","MSEB R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|15@40|??@48","Db" +"MULTIPLY AND SUBTRACT (short BFP)","MSEBR R1,R3,R2","45839@0|R1@16|//@20|R3@24|R2@28|??@32","Db" +"MULTIPLY AND SUBTRACT (short HFP)","MSE R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|47@40|??@48","Da" +"MULTIPLY AND SUBTRACT (short HFP)","MSER R1,R3,R2","45871@0|R1@16|//@20|R3@24|R2@28|??@32","Da" +"MULTIPLY DECIMAL","MP D1(L1,B1),D2(L2,B2)","252@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","SP Dg" +"MULTIPLY HALFWORD (32←16)","MH R1,D2(X2,B2)","76@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"MULTIPLY HALFWORD (32←16)","MHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|124@40|??@48","B" +"MULTIPLY HALFWORD (64→16)","MGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|60@40|??@48","B" +"MULTIPLY HALFWORD IMMEDIATE (32→16)","MHI R1,I2","167@0|R1@8|12@12|I2@16|??@32","A7C" +"MULTIPLY HALFWORD IMMEDIATE (64→16)","MGHI R1,I2","167@0|R1@8|13@12|I2@16|??@32","A7D" +"MULTIPLY LOGICAL (128→64)","MLG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|134@40|??@48","SP" +"MULTIPLY LOGICAL (128→64)","MLGR R1,R2","47494@0|//@16|R1@24|R2@28|??@32","SP" +"MULTIPLY LOGICAL (64←32)","ML R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|150@40|??@48","SP" +"MULTIPLY LOGICAL (64←32)","MLR R1,R2","47510@0|//@16|R1@24|R2@28|??@32","SP" +"MULTIPLY SINGLE (32)","MS R1,D2(X2,B2)","113@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"MULTIPLY SINGLE (32)","MSC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|83@40|??@48","B" +"MULTIPLY SINGLE (32)","MSR R1,R2","45650@0|//@16|R1@24|R2@28|??@32","B252" +"MULTIPLY SINGLE (32)","MSRKC R1,R2,R3","47613@0|R3@16|//@20|R1@24|R2@28|??@32","B9FD" +"MULTIPLY SINGLE (32)","MSY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|81@40|??@48","B" +"MULTIPLY SINGLE (64)","MSG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|12@40|??@48","B" +"MULTIPLY SINGLE (64)","MSGC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|131@40|??@48","B" +"MULTIPLY SINGLE (64)","MSGR R1,R2","47372@0|//@16|R1@24|R2@28|??@32","B90C" +"MULTIPLY SINGLE (64)","MSGRKC R1,R2,R3","47597@0|R3@16|//@20|R1@24|R2@28|??@32","B9ED" +"MULTIPLY SINGLE (64←32)","MSGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|28@40|??@48","B" +"MULTIPLY SINGLE (64←32)","MSGFR R1,R2","47388@0|//@16|R1@24|R2@28|??@32","B91C" +"MULTIPLY SINGLE IMMEDIATE (32)","MSFI R1,I2","194@0|R1@8|1@12|I2@16|??@48","C21" +"MULTIPLY SINGLE IMMEDIATE (64←32)","MSGFI R1,I2","194@0|R1@8|0@12|I2@16|??@48","C20" +"MULTIPLY UNNORM. (long to ext. high HFP)","MYH R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|61@40|??@48","Da" +"MULTIPLY UNNORM. (long to ext. high HFP)","MYHR R1,R3,R2","45885@0|R1@16|//@20|R3@24|R2@28|??@32","Da" +"MULTIPLY UNNORM. (long to ext. low HFP)","MYL R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|57@40|??@48","Da" +"MULTIPLY UNNORM. (long to ext. low HFP)","MYLR R1,R3,R2","45881@0|R1@16|//@20|R3@24|R2@28|??@32","Da" +"MULTIPLY UNNORMALIZED (long to ext. HFP)","MY R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|59@40|??@48","SP Da" +"MULTIPLY UNNORMALIZED (long to ext. HFP)","MYR R1,R3,R2","45883@0|R1@16|//@20|R3@24|R2@28|??@32","SP Da" +"NAND (32)","NNRK R1,R2,R3","47476@0|R3@16|//@20|R1@24|R2@28|??@32","B974" +"NAND (64)","NNGRK R1,R2,R3","47460@0|R3@16|//@20|R1@24|R2@28|??@32","B964" +"NEURAL NETWORK PROCESSING ASSIST","NNPA","47419@0|//@16|??@32","SP IC" +"NEXT INSTRUCTION ACCESS INTENT","NIAI I1,I2","45818@0|//@16|I1@24|I2@28|??@32","B2FA" +"NONTRANSACTIONAL STORE (64)","NTSTG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|37@40|??@48","SP" +"NOR (32)","NORK R1,R2,R3","47478@0|R3@16|//@20|R1@24|R2@28|??@32","B976" +"NOR (64)","NOGRK R1,R2,R3","47462@0|R3@16|//@20|R1@24|R2@28|??@32","B966" +"NOT EXCLUSIVE OR (32)","NXRK R1,R2,R3","47479@0|R3@16|//@20|R1@24|R2@28|??@32","B977" +"NOT EXCLUSIVE OR (64)","NXGRK R1,R2,R3","47463@0|R3@16|//@20|R1@24|R2@28|??@32","B967" +"OR (32)","O R1,D2(X2,B2)","86@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"OR (32)","OR R1,R2","22@0|R1@8|R2@12|??@16","16" +"OR (32)","ORK R1,R2,R3","47606@0|R3@16|//@20|R1@24|R2@28|??@32","B9F6" +"OR (32)","OY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|86@40|??@48","B" +"OR (64)","OG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|129@40|??@48","B" +"OR (64)","OGR R1,R2","47489@0|//@16|R1@24|R2@28|??@32","B981" +"OR (64)","OGRK R1,R2,R3","47590@0|R3@16|//@20|R1@24|R2@28|??@32","B9E6" +"OR (character)","OC D1(L1,B1),D2(B2)","214@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"OR (immediate)","OI D1(B1),I2","150@0|I2@8|B1@16|D1@20|??@32","ST" +"OR (immediate)","OIY D1(B1),I2","235@0|I2@8|B1@16|D1@20|86@40|??@48","ST" +"OR IMMEDIATE (high high)","OIHH R1,I2","165@0|R1@8|8@12|I2@16|??@32","A58" +"OR IMMEDIATE (high low)","OIHL R1,I2","165@0|R1@8|9@12|I2@16|??@32","A59" +"OR IMMEDIATE (high)","OIHF R1,I2","192@0|R1@8|12@12|I2@16|??@48","C0C" +"OR IMMEDIATE (low high)","OILH R1,I2","165@0|R1@8|10@12|I2@16|??@32","A5A" +"OR IMMEDIATE (low low)","OILL R1,I2","165@0|R1@8|11@12|I2@16|??@32","A5B" +"OR IMMEDIATE (low)","OILF R1,I2","192@0|R1@8|13@12|I2@16|??@48","C0D" +"OR WITH COMPLEMENT (32)","OCRK R1,R2,R3","47477@0|R3@16|//@20|R1@24|R2@28|??@32","B975" +"OR WITH COMPLEMENT (64)","OCGRK R1,R2,R3","47461@0|R3@16|//@20|R1@24|R2@28|??@32","B965" +"PACK","PACK D1(L1,B1),D2(L2,B2)","242@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"PACK ASCII","PKA D1(B1),D2(L2,B2)","233@0|L2@8|B1@16|D1@20|B2@32|D2@36|??@48","SP" +"PACK UNICODE","PKU D1(B1),D2(L2,B2)","225@0|L2@8|B1@16|D1@20|B2@32|D2@36|??@48","SP" +"PAGE IN","PGIN R1,R2","45614@0|//@16|R1@24|R2@28|??@32","B22E" +"PAGE OUT","PGOUT R1,R2","45615@0|//@16|R1@24|R2@28|??@32","B22F" +"PERFORM CRYPTOGRAPHIC COMPUTATION","PCC","47404@0|//@16|??@32","SP IC" +"PERFORM CRYPTOGRAPHIC KEY MGMT. OPERATIONS","PCKMO","47400@0|//@16|??@32","SP" +"PERFORM FLOATING-POINT OPERATION","PFPO","266@0|??@16","SP Da" +"PERFORM FRAME MANAGEMENT FUNCTION","PFMF R1,R2","47535@0|//@16|R1@24|R2@28|??@32","SP IS" +"PERFORM LOCKED OPERATION","PLO R1,D2(B2),R3,D4(B4)","238@0|R1@8|R3@12|B2@16|D2@20|B4@32|D4@36|??@48","SP" +"PERFORM PROCESSOR ASSIST","PPA R1,R2,M3","45800@0|M3@16|//@20|R1@24|R2@28|??@32","B2E8" +"PERFORM RANDOM NUMBER OPERATION","PPNO R1,R2","47420@0|//@16|R1@24|R2@28|??@32","SP IC" +"PERFORM RANDOM NUMBER OPERATION","PRNO R1,R2","47420@0|//@16|R1@24|R2@28|??@32","SP IC" +"PERFORM TIMING FACILITY FUNCTION","PTFF","260@0|??@16","SP" +"PERFORM TOPOLOGY FUNCTION","PTF R1","47522@0|//@16|R1@24|//@28|??@32","SP" +"POPULATION COUNT","POPCNT R1,R2,M3","47585@0|M3@16|//@20|R1@24|R2@28|??@32","B9E1" +"PREFETCH DATA","PFD M1,D2(X2,B2)","227@0|M1@8|X2@12|B2@16|D2@20|54@40|??@48","B" +"PREFETCH DATA RELATIVE LONG","PFDRL M1,RI2","198@0|M1@8|2@12|RI2@16|??@48","C62" +"PROGRAM CALL","PC D2(B2)","45592@0|B2@16|D2@20|??@32","Z" +"PROGRAM RETURN","PR","257@0|??@16","B ST" +"PROGRAM TRANSFER","PT R1,R2","45608@0|//@16|R1@24|R2@28|??@32","B" +"PROGRAM TRANSFER WITH INSTANCE","PTI R1,R2","47518@0|//@16|R1@24|R2@28|??@32","B" +"PURGE ALB","PALB","45640@0|//@16|??@32","B248" +"PURGE TLB","PTLB","45581@0|//@16|??@32","B20D" +"QUANTIZE (extended DFP)","QAXTR R1,R3,R2,M4","46077@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"QUANTIZE (long DFP)","QADTR R1,R3,R2,M4","46069@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt" +"QUERY PROCESSOR ACTIVITY COUNTER INFORMATION","QPACI D2(B2)","45711@0|B2@16|D2@20|??@32","ST" +"REROUND (extended DFP)","RRXTR R1,R3,R2,M4","46079@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"REROUND (long DFP)","RRDTR R1,R3,R2,M4","46071@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt" +"RESET CHANNEL PATH","RCHP","45627@0|//@16|??@32","B23B" +"RESET DAT PROTECTION","RDP R1,R3,R2,M4","47499@0|R3@16|M4@20|R1@24|R2@28|??@32","B98B" +"RESET REFERENCE BIT EXTENDED","RRBE R1,R2","45610@0|//@16|R1@24|R2@28|??@32","SO" +"RESET REFERENCE BITS MULTIPLE","RRBM R1,R2","47534@0|//@16|R1@24|R2@28|??@32","B9AE" +"RESUME PROGRAM","RP D2(B2)","45687@0|B2@16|D2@20|??@32","SP WE T" +"RESUME SUBCHANNEL","RSCH","45624@0|//@16|??@32","OP" +"ROTATE LEFT SINGLE LOGICAL (32)","RLL R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|29@40|??@48","EB1D" +"ROTATE LEFT SINGLE LOGICAL (64)","RLLG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|28@40|??@48","EB1C" +"ROTATE THEN AND SELECTED BITS (64)","RNSBG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|84@40|??@48","EC54" +"ROTATETHENEXCLUSIVEORSELECT.BITS(64)","RXSBG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|87@40|??@48","EC57" +"ROTATE THEN INSERT SELECTED BITS (64)","RISBG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|85@40|??@48","EC55" +"ROTATE THEN INSERT SELECTED BITS (64)","RISBGN R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|89@40|??@48","EC59" +"ROTATE THEN INSERT SELECTED BITS HIGH(64)","RISBHG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|93@40|??@48","EC5D" +"ROTATE THEN INSERT SELECTED BITS LOW (64)","RISBLG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|81@40|??@48","EC51" +"ROTATE THEN OR SELECTED BITS (64)","ROSBG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|86@40|??@48","EC56" +"SEARCH STRING","SRST R1,R2","45662@0|//@16|R1@24|R2@28|??@32","SP IC" +"SEARCH STRING UNICODE","SRSTU R1,R2","47550@0|//@16|R1@24|R2@28|??@32","SP IC" +"SELECT (32)","SELR R1,R2,R3,M4","47600@0|R3@16|M4@20|R1@24|R2@28|??@32","B9F0" +"SELECT (64)","SELGR R1,R2,R3,M4","47587@0|R3@16|M4@20|R1@24|R2@28|??@32","B9E3" +"SELECT HIGH (32)","SELFHR R1,R2,R3,M4","47552@0|R3@16|M4@20|R1@24|R2@28|??@32","B9C0" +"SET ACCESS","SAR R1,R2","45646@0|//@16|R1@24|R2@28|??@32","U" +"SET ADDRESS LIMIT","SAL","45623@0|//@16|??@32","OP" +"SET ADDRESS SPACE CONTROL","SAC D2(B2)","45593@0|B2@16|D2@20|??@32","SP SW" +"SET ADDRESS SPACE CONTROL FAST","SACF D2(B2)","45689@0|B2@16|D2@20|??@32","SP SW" +"SET ADDRESSING MODE (24)","SAM24","268@0|??@16","SP" +"SET ADDRESSING MODE (31)","SAM31","269@0|??@16","SP" +"SET ADDRESSING MODE (64)","SAM64","270@0|??@16","010E" +"SET BFP ROUNDING MODE (2 bit)","SRNM D2(B2)","45721@0|B2@16|D2@20|??@32","Db" +"SET BFP ROUNDING MODE (3 bit)","SRNMB D2(B2)","45752@0|B2@16|D2@20|??@32","SP Db" +"SET CHANNEL MONITOR","SCHM","45628@0|//@16|??@32","OP" +"SET CLOCK","SCK D2(B2)","45572@0|B2@16|D2@20|??@32","SP" +"SET CLOCK COMPARATOR","SCKC D2(B2)","45574@0|B2@16|D2@20|??@32","SP" +"SET CLOCK PROGRAMMABLE FIELD","SCKPF","263@0|??@16","SP" +"SET CPU TIMER","SPT D2(B2)","45576@0|B2@16|D2@20|??@32","SP" +"SET DFP ROUNDING MODE","SRNMT D2(B2)","45753@0|B2@16|D2@20|??@32","Dt" +"SET FPC","SFPC R1","45956@0|//@16|R1@24|//@28|??@32","SP Db" +"SET FPC AND SIGNAL","SFASR R1","45957@0|//@16|R1@24|//@28|??@32","SP Dt" +"SET PREFIX","SPX D2(B2)","45584@0|B2@16|D2@20|??@32","SP" +"SET PROGRAM MASK","SPM R1","4@0|R1@8|//@12|??@16","04" +"SET PSW KEY FROM ADDRESS","SPKA D2(B2)","45578@0|B2@16|D2@20|??@32","SO" +"SET SECONDARY ASN","SSAR R1","45605@0|//@16|R1@24|//@28|??@32","Z" +"SET SECONDARY ASN WITH INSTANCE","SSAIR R1","47519@0|//@16|R1@24|//@28|??@32","Z" +"SET STORAGE KEY EXTENDED","SSKE R1,R2,M3","45611@0|M3@16|//@20|R1@24|R2@28|??@32","IS" +"SET SYSTEM MASK","SSM D1(B1)","128@0|I2@8|B1@16|D1@20|??@32","SP SO" +"SHIFT AND ROUND DECIMAL","SRP D1(L1,B1),D2(B2),I3","240@0|L1@8|I3@12|B1@16|D1@20|B2@32|D2@36|??@48","Dg" +"SHIFT LEFT DOUBLE (64)","SLDA R1,D2(B2)","143@0|R1@8|//@12|B2@16|D2@20|??@32","SP" +"SHIFT LEFT DOUBLE LOGICAL (64)","SLDL R1,D2(B2)","141@0|R1@8|//@12|B2@16|D2@20|??@32","SP" +"SHIFT LEFT SINGLE (32)","SLA R1,D2(B2)","139@0|R1@8|//@12|B2@16|D2@20|??@32","8B" +"SHIFT LEFT SINGLE (32)","SLAK R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|221@40|??@48","EBDD 7-383" +"SHIFT LEFT SINGLE (64)","SLAG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|11@40|??@48","EB0B" +"SHIFT LEFT SINGLE LOGICAL (32)","SLL R1,D2(B2)","137@0|R1@8|//@12|B2@16|D2@20|??@32","89" +"SHIFT LEFT SINGLE LOGICAL (32)","SLLK R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|223@40|??@48","EBDF" +"SHIFT LEFT SINGLE LOGICAL (64)","SLLG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|13@40|??@48","EB0D" +"SHIFT RIGHT DOUBLE (64)","SRDA R1,D2(B2)","142@0|R1@8|//@12|B2@16|D2@20|??@32","SP" +"SHIFT RIGHT DOUBLE LOGICAL (64)","SRDL R1,D2(B2)","140@0|R1@8|//@12|B2@16|D2@20|??@32","SP" +"SHIFT RIGHT SINGLE (32)","SRA R1,D2(B2)","138@0|R1@8|//@12|B2@16|D2@20|??@32","8A" +"SHIFT RIGHT SINGLE (32)","SRAK R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|220@40|??@48","EBDC 7-386" +"SHIFT RIGHT SINGLE (64)","SRAG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|10@40|??@48","EB0A" +"SHIFT RIGHT SINGLE LOGICAL (32)","SRL R1,D2(B2)","136@0|R1@8|//@12|B2@16|D2@20|??@32","88" +"SHIFT RIGHT SINGLE LOGICAL (32)","SRLK R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|222@40|??@48","EBDE 7-387" +"SHIFT RIGHT SINGLE LOGICAL (64)","SRLG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|12@40|??@48","EB0C" +"SHIFT SIGNIFICAND LEFT (extended DFP)","SLXT R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|72@40|??@48","SP Dt" +"SHIFT SIGNIFICAND LEFT (long DFP)","SLDT R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|64@40|??@48","Dt" +"SHIFT SIGNIFICAND RIGHT (extended DFP)","SRXT R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|73@40|??@48","SP Dt" +"SHIFT SIGNIFICAND RIGHT (long DFP)","SRDT R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|65@40|??@48","Dt" +"SIGNAL PROCESSOR","SIGP R1,R3,D2(B2)","174@0|R1@8|R3@12|B2@16|D2@20|??@32","AE" +"SORT LISTS","SORTL R1,R2","47416@0|//@16|R1@24|R2@28|??@32","SP IC" +"SQUARE ROOT (extended BFP)","SQXBR R1,R2","45846@0|//@16|R1@24|R2@28|??@32","SP Db" +"SQUARE ROOT (extended HFP)","SQXR R1,R2","45878@0|//@16|R1@24|R2@28|??@32","SP Da" +"SQUARE ROOT (long BFP)","SQDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|21@40|??@48","Db" +"SQUARE ROOT (long BFP)","SQDBR R1,R2","45845@0|//@16|R1@24|R2@28|??@32","Db" +"SQUARE ROOT (long HFP)","SQD R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|53@40|??@48","Da" +"SQUARE ROOT (long HFP)","SQDR R1,R2","45636@0|//@16|R1@24|R2@28|??@32","Da" +"SQUARE ROOT (short BFP)","SQEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|20@40|??@48","Db" +"SQUARE ROOT (short BFP)","SQEBR R1,R2","45844@0|//@16|R1@24|R2@28|??@32","Db" +"SQUARE ROOT (short HFP)","SQE R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|52@40|??@48","Da" +"SQUARE ROOT (short HFP)","SQER R1,R2","45637@0|//@16|R1@24|R2@28|??@32","Da" +"START SUBCHANNEL","SSCH D2(B2)","45619@0|B2@16|D2@20|??@32","SP OP" +"STORE (32)","ST R1,D2(X2,B2)","80@0|R1@8|X2@12|B2@16|D2@20|??@32","ST" +"STORE (32)","STY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|80@40|??@48","ST" +"STORE (64)","STG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|36@40|??@48","ST" +"STORE (long)","STD R1,D2(X2,B2)","96@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"STORE (long)","STDY R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|103@40|??@48","Da" +"STORE (short)","STE R1,D2(X2,B2)","112@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"STORE (short)","STEY R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|102@40|??@48","Da" +"STORE ACCESS MULTIPLE 7-389","STAM R1,R3,D2(B2)","155@0|R1@8|R3@12|B2@16|D2@20|??@32","SP" +"STORE ACCESS MULTIPLE 7-389","STAMY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|155@40|??@48","SP" +"STORE BEAR","STBEAR D2(B2)","45569@0|B2@16|D2@20|??@32","SP" +"STORE CHANNEL PATH STATUS","STCPS D2(B2)","45626@0|B2@16|D2@20|??@32","SP" +"STORE CHANNEL REPORT WORD","STCRW D2(B2)","45625@0|B2@16|D2@20|??@32","SP" +"STORE CHARACTER","STC R1,D2(X2,B2)","66@0|R1@8|X2@12|B2@16|D2@20|??@32","ST" +"STORE CHARACTER","STCY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|114@40|??@48","ST" +"STORE CHARACTER HIGH (8)","STCH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|195@40|??@48","ST" +"STORE CHARACTERS UNDER MASK (high)","STCMH R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|44@40|??@48","ST" +"STORE CHARACTERS UNDER MASK (low)","STCM R1,M3,D2(B2)","190@0|R1@8|M3@12|B2@16|D2@20|??@32","ST" +"STORE CHARACTERS UNDER MASK (low)","STCMY R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|45@40|??@48","ST" +"STORE CLOCK","STCK D2(B2)","45573@0|B2@16|D2@20|??@32","ST" +"STORE CLOCK COMPARATOR","STCKC D2(B2)","45575@0|B2@16|D2@20|??@32","SP" +"STORE CLOCK EXTENDED","STCKE D2(B2)","45688@0|B2@16|D2@20|??@32","ST" +"STORE CLOCK FAST","STCKF D2(B2)","45692@0|B2@16|D2@20|??@32","ST" +"STORE CONTROL (32)","STCTL R1,R3,D2(B2)","182@0|R1@8|R3@12|B2@16|D2@20|??@32","SP" +"STORE CONTROL (64)","STCTG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|37@40|??@48","SP" +"STORE CPU ADDRESS","STAP D2(B2)","45586@0|B2@16|D2@20|??@32","SP" +"STORE CPU ID","STIDP D2(B2)","45570@0|B2@16|D2@20|??@32","SP" +"STORE CPU TIMER","STPT D2(B2)","45577@0|B2@16|D2@20|??@32","SP" +"STORE FACILITY LIST","STFL D2(B2)","45745@0|B2@16|D2@20|??@32","B2B1" +"STORE FACILITY LIST EXTENDED","STFLE D2(B2)","45744@0|B2@16|D2@20|??@32","SP" +"STORE FPC","STFPC D2(B2)","45724@0|B2@16|D2@20|??@32","Db" +"STORE GUARDED STORAGE CONTROLS","STGSC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|73@40|??@48","SO" +"STORE HALFWORD (16)","STH R1,D2(X2,B2)","64@0|R1@8|X2@12|B2@16|D2@20|??@32","ST" +"STORE HALFWORD (16)","STHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|112@40|??@48","ST" +"STORE HALFWORD HIGH (16)","STHH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|199@40|??@48","ST" +"STORE HALFWORD RELATIVE LONG (16)","STHRL R1,RI2","196@0|R1@8|7@12|RI2@16|??@48","ST" +"STORE HIGH (32)","STFH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|203@40|??@48","ST" +"STORE HIGH ON CONDITION","STOCFH R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|225@40|??@48","ST" +"STORE MULTIPLE (32)","STM R1,R3,D2(B2)","144@0|R1@8|R3@12|B2@16|D2@20|??@32","ST" +"STORE MULTIPLE (32)","STMY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|144@40|??@48","ST" +"STORE MULTIPLE (64)","STMG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|36@40|??@48","ST" +"STORE MULTIPLE HIGH (32)","STMH R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|38@40|??@48","ST" +"STORE ON CONDITION (32)","STOC R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|243@40|??@48","ST" +"STORE ON CONDITION (64)","STOCG R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|227@40|??@48","ST" +"STORE PAIR TO QUADWORD","STPQ R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|142@40|??@48","SP" +"STORE PREFIX","STPX D2(B2)","45585@0|B2@16|D2@20|??@32","SP" +"STORE REAL ADDRESS","STRAG D1(B1),D2(B2)","58626@0|B1@16|D1@20|B2@32|D2@36|??@48","SP" +"STORE RELATIVE LONG (32)","STRL R1,RI2","196@0|R1@8|15@12|RI2@16|??@48","SP" +"STORE RELATIVE LONG (64)","STGRL R1,RI2","196@0|R1@8|11@12|RI2@16|??@48","SP" +"STORE REVERSED (16)","STRVH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|63@40|??@48","ST" +"STORE REVERSED (32)","STRV R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|62@40|??@48","ST" +"STORE REVERSED (64)","STRVG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|47@40|??@48","ST" +"STORE SUBCHANNEL","STSCH D2(B2)","45620@0|B2@16|D2@20|??@32","SP OP" +"STORE SYSTEM INFORMATION","STSI D2(B2)","45693@0|B2@16|D2@20|??@32","SP" +"STORE THEN AND SYSTEM MASK","STNSM D1(B1),I2","172@0|I2@8|B1@16|D1@20|??@32","ST" +"STORE THEN OR SYSTEM MASK","STOSM D1(B1),I2","173@0|I2@8|B1@16|D1@20|??@32","SP" +"STORE USING REAL ADDRESS (32)","STURA R1,R2","45638@0|//@16|R1@24|R2@28|??@32","SP" +"STORE USING REAL ADDRESS (64)","STURG R1,R2","47397@0|//@16|R1@24|R2@28|??@32","SP" +"SUBTRACT (32)","S R1,D2(X2,B2)","91@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"SUBTRACT (32)","SR R1,R2","27@0|R1@8|R2@12|??@16","1B" +"SUBTRACT (32)","SRK R1,R2,R3","47609@0|R3@16|//@20|R1@24|R2@28|??@32","B9F9" +"SUBTRACT (32)","SY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|91@40|??@48","B" +"SUBTRACT (64)","SG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|9@40|??@48","B" +"SUBTRACT (64)","SGR R1,R2","47369@0|//@16|R1@24|R2@28|??@32","B909" +"SUBTRACT (64)","SGRK R1,R2,R3","47593@0|R3@16|//@20|R1@24|R2@28|??@32","B9E9" +"SUBTRACT (64←32)","SGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|25@40|??@48","B" +"SUBTRACT (64→32)","SGFR R1,R2","47385@0|//@16|R1@24|R2@28|??@32","B919" +"SUBTRACT (extended BFP)","SXBR R1,R2","45899@0|//@16|R1@24|R2@28|??@32","SP Db" +"SUBTRACT (extended DFP)","SXTR R1,R2,R3","46043@0|R3@16|//@20|R1@24|R2@28|??@32","SP Dt" +"SUBTRACT (extended DFP)","SXTRA R1,R2,R3,M4","46043@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt" +"SUBTRACT (long BFP)","SDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|27@40|??@48","Db" +"SUBTRACT (long BFP)","SDBR R1,R2","45851@0|//@16|R1@24|R2@28|??@32","Db" +"SUBTRACT (long DFP)","SDTR R1,R2,R3","46035@0|R3@16|//@20|R1@24|R2@28|??@32","Dt" +"SUBTRACT (long DFP)","SDTRA R1,R2,R3,M4","46035@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt" +"SUBTRACT (short BFP)","SEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|11@40|??@48","Db" +"SUBTRACT (short BFP)","SEBR R1,R2","45835@0|//@16|R1@24|R2@28|??@32","Db" +"SUBTRACT DECIMAL","SP D1(L1,B1),D2(L2,B2)","251@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","Dg" +"SUBTRACT HALFWORD (32←16)","SH R1,D2(X2,B2)","75@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"SUBTRACT HALFWORD (32→16)","SHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|123@40|??@48","B" +"SUBTRACT HALFWORD (64→16)","SGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|57@40|??@48","B" +"SUBTRACT HIGH (32)","SHHHR R1,R2,R3","47561@0|R3@16|//@20|R1@24|R2@28|??@32","B9C9" +"SUBTRACT HIGH (32)","SHHLR R1,R2,R3","47577@0|R3@16|//@20|R1@24|R2@28|??@32","B9D9" +"SUBTRACT LOGICAL (32)","SL R1,D2(X2,B2)","95@0|R1@8|X2@12|B2@16|D2@20|??@32","B" +"SUBTRACT LOGICAL (32)","SLR R1,R2","31@0|R1@8|R2@12|??@16","1F" +"SUBTRACT LOGICAL (32)","SLRK R1,R2,R3","47611@0|R3@16|//@20|R1@24|R2@28|??@32","B9FB" +"SUBTRACT LOGICAL (32)","SLY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|95@40|??@48","B" +"SUBTRACT LOGICAL (64)","SLG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|11@40|??@48","B" +"SUBTRACT LOGICAL (64)","SLGR R1,R2","47371@0|//@16|R1@24|R2@28|??@32","B90B" +"SUBTRACT LOGICAL (64)","SLGRK R1,R2,R3","47595@0|R3@16|//@20|R1@24|R2@28|??@32","B9EB" +"SUBTRACT LOGICAL (64←32)","SLGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|27@40|??@48","B" +"SUBTRACT LOGICAL (64←32)","SLGFR R1,R2","47387@0|//@16|R1@24|R2@28|??@32","B91B" +"SUBTRACT LOGICAL HIGH (32)","SLHHHR R1,R2,R3","47563@0|R3@16|//@20|R1@24|R2@28|??@32","B9CB" +"SUBTRACT LOGICAL HIGH (32)","SLHHLR R1,R2,R3","47579@0|R3@16|//@20|R1@24|R2@28|??@32","B9DB" +"SUBTRACT LOGICAL IMMEDIATE (32)","SLFI R1,I2","194@0|R1@8|5@12|I2@16|??@48","C25" +"SUBTRACT LOGICAL IMMEDIATE (64→32)","SLGFI R1,I2","194@0|R1@8|4@12|I2@16|??@48","C24" +"SUBTRACT LOGICAL WITH BORROW (32)","SLB R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|153@40|??@48","B" +"SUBTRACT LOGICAL WITH BORROW (32)","SLBR R1,R2","47513@0|//@16|R1@24|R2@28|??@32","B999" +"SUBTRACT LOGICAL WITH BORROW (64)","SLBG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|137@40|??@48","B" +"SUBTRACT LOGICAL WITH BORROW (64)","SLBGR R1,R2","47497@0|//@16|R1@24|R2@28|??@32","B989" +"SUBTRACT NORMALIZED (extended HFP)","SXR R1,R2","55@0|R1@8|R2@12|??@16","SP Da" +"SUBTRACT NORMALIZED (long HFP)","SD R1,D2(X2,B2)","107@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"SUBTRACT NORMALIZED (long HFP)","SDR R1,R2","43@0|R1@8|R2@12|??@16","Da" +"SUBTRACT NORMALIZED (short HFP)","SE R1,D2(X2,B2)","123@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"SUBTRACT NORMALIZED (short HFP)","SER R1,R2","59@0|R1@8|R2@12|??@16","Da" +"SUBTRACT UNNORMALIZED (long HFP)","SW R1,D2(X2,B2)","111@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"SUBTRACT UNNORMALIZED (long HFP)","SWR R1,R2","47@0|R1@8|R2@12|??@16","Da" +"SUBTRACT UNNORMALIZED (short HFP)","SU R1,D2(X2,B2)","127@0|R1@8|X2@12|B2@16|D2@20|??@32","Da" +"SUBTRACT UNNORMALIZED (short HFP)","SUR R1,R2","63@0|R1@8|R2@12|??@16","Da" +"SUPERVISOR CALL","SVC I","10@0|I@8|??@16","0A" +"TEST ACCESS","TAR R1,R2","45644@0|//@16|R1@24|R2@28|??@32","U" +"TEST ADDRESSING MODE","TAM","267@0|??@16","010B" +"TEST AND SET","TS D1(B1)","147@0|I2@8|B1@16|D1@20|??@32","ST" +"TEST BLOCK","TB R1,R2","45612@0|//@16|R1@24|R2@28|??@32","II" +"TEST DATA CLASS (extended BFP)","TCXB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|18@40|??@48","SP Db" +"TEST DATA CLASS (extended DFP)","TDCXT R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|88@40|??@48","SP Dt" +"TEST DATA CLASS (long BFP)","TCDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|17@40|??@48","Db" +"TEST DATA CLASS (long DFP)","TDCDT R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|84@40|??@48","Dt" +"TEST DATA CLASS (short BFP)","TCEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|16@40|??@48","Db" +"TEST DATA CLASS (short DFP)","TDCET R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|80@40|??@48","Dt" +"TEST DATA GROUP (extended DFP)","TDGXT R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|89@40|??@48","SP Dt" +"TEST DATA GROUP (long DFP)","TDGDT R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|85@40|??@48","Dt" +"TEST DATA GROUP (short DFP)","TDGET R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|81@40|??@48","Dt" +"TEST DECIMAL","TP D1(L1,B1)","235@0|L1@8|//@12|B1@16|D1@20|//@32|192@40|??@48","B" +"TEST PENDING EXTERNAL INTERRUPTION","TPEI R1,R2","47521@0|//@16|R1@24|R2@28|??@32","B9A1" +"TEST PENDING INTERRUPTION","TPI D2(B2)","45622@0|B2@16|D2@20|??@32","ST" +"TEST PROTECTION","TPROT D1(B1),D2(B2)","58625@0|B1@16|D1@20|B2@32|D2@36|??@48","SO" +"TEST SUBCHANNEL","TSCH D2(B2)","45621@0|B2@16|D2@20|??@32","SP OP" +"TEST UNDER MASK","TM D1(B1),I2","145@0|I2@8|B1@16|D1@20|??@32","B" +"TEST UNDER MASK","TMY D1(B1),I2","235@0|I2@8|B1@16|D1@20|81@40|??@48","B" +"TEST UNDER MASK (high high)","TMHH R1,I2","167@0|R1@8|2@12|I2@16|??@32","A72" +"TEST UNDER MASK (high low)","TMHL R1,I2","167@0|R1@8|3@12|I2@16|??@32","A73" +"TEST UNDER MASK (low high)","TMLH R1,I2","167@0|R1@8|0@12|I2@16|??@32","A70" +"TEST UNDER MASK (low low)","TMLL R1,I2","167@0|R1@8|1@12|I2@16|??@32","A71" +"TEST UNDER MASK HIGH","TMH R1,I2","167@0|R1@8|0@12|I2@16|??@32","A70" +"TEST UNDER MASK LOW","TML R1,I2","167@0|R1@8|1@12|I2@16|??@32","A71" +"TRACE (32)","TRACE R1,R3,D2(B2)","153@0|R1@8|R3@12|B2@16|D2@20|??@32","SP" +"TRACE (64)","TRACG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|15@40|??@48","SP" +"TRANSACTION ABORT","TABORT D2(B2)","45820@0|B2@16|D2@20|??@32","SP SO" +"TRANSACTION BEGIN (constrained)","TBEGINC D1(B1),I2","58721@0|B1@16|D1@20|I2@32|??@48","SP SO" +"TRANSACTION BEGIN (nonconstrained)","TBEGIN D1(B1),I2","58720@0|B1@16|D1@20|I2@32|??@48","SP SO" +"TRANSACTION END","TEND","45816@0|//@16|??@32","SO" +"TRANSLATE","TR D1(L1,B1),D2(B2)","220@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"TRANSLATE AND TEST","TRT D1(L1,B1),D2(B2)","221@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","B" +"TRANSLATE AND TEST EXTENDED","TRTE R1,R2,M3","47551@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"TRANSLATE AND TEST REVERSE","TRTR D1(L1,B1),D2(B2)","208@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","B" +"TRANSLATE AND TEST REVERSE EXTENDED","TRTRE R1,R2,M3","47549@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"TRANSLATE EXTENDED","TRE R1,R2","45733@0|//@16|R1@24|R2@28|??@32","SP IC" +"TRANSLATE ONE TO ONE","TROO R1,R2,M3","47507@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"TRANSLATE ONE TO TWO","TROT R1,R2,M3","47506@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"TRANSLATE TWO TO ONE","TRTO R1,R2,M3","47505@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"TRANSLATE TWO TO TWO","TRTT R1,R2,M3","47504@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC" +"TRAP","TRAP2","511@0|??@16","SO" +"TRAP","TRAP4 D2(B2)","45823@0|B2@16|D2@20|??@32","SO" +"UNPACK","UNPK D1(L1,B1),D2(L2,B2)","243@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","ST" +"UNPACK ASCII","UNPKA D1(L1,B1),D2(B2)","234@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","SP" +"UNPACK UNICODE","UNPKU D1(L1,B1),D2(B2)","226@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","SP" +"UPDATE TREE","UPT","258@0|??@16","SP II" +"VECTOR ADD","VA V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|243@40|??@48","SP Dv" +"VECTOR ADD COMPUTE CARRY","VACC V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|241@40|??@48","SP Dv" +"VECTOR ADD DECIMAL","VAP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|113@40|??@48","SP Dv" +"VECTOR ADD WITH CARRY","VAC V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|187@40|??@48","SP Dv" +"VECTOR ADD WITH CARRY COMPUTE CARRY","VACCC V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|185@40|??@48","SP Dv" +"VECTOR AND","VN V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|104@40|??@48","Dv" +"VECTOR AND WITH COMPLEMENT","VNC V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|105@40|??@48","Dv" +"VECTOR AVERAGE","VAVG V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|242@40|??@48","SP Dv" +"VECTOR AVERAGE LOGICAL","VAVGL V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|240@40|??@48","SP Dv" +"VECTOR BIT PERMUTE","VBPERM V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|133@40|??@48","Dv" +"VECTOR CHECKSUM","VCKSM V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|102@40|??@48","Dv" +"VECTOR COMPARE DECIMAL","VCP V1,V2,M3","230@0|//@8|V1@12|V2@16|//@20|M3@24|//@28|RXB@36|119@40|??@48","Dv" +"VECTOR COMPARE EQUAL","VCEQ V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|248@40|??@48","SP Dv" +"VECTOR COMPARE HIGH","VCH V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|251@40|??@48","SP Dv" +"VECTOR COMPARE HIGH LOGICAL","VCHL V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|249@40|??@48","SP Dv" +"VECTOR CONVERT HFP TO SCALED DECIMAL","VCSPH V1,V2,V3,M4","230@0|V1@8|V2@12|V3@16|//@20|M4@24|//@28|RXB@36|125@40|??@48","Dv" +"VECTOR CONVERT TO BINARY","VCVB R1,V2,M3,M4","230@0|R1@8|V2@12|//@16|M3@24|M4@28|//@32|RXB@36|80@40|??@48","Dv" +"VECTOR CONVERT TO BINARY","VCVBG R1,V2,M3,M4","230@0|R1@8|V2@12|//@16|M3@24|M4@28|//@32|RXB@36|82@40|??@48","Dv" +"VECTOR CONVERT TO DECIMAL","VCVD V1,R2,I3,M4","230@0|V1@8|R2@12|//@16|M4@24|I3@28|RXB@36|88@40|??@48","SP Dv" +"VECTOR CONVERT TO DECIMAL","VCVDG V1,R2,I3,M4","230@0|V1@8|R2@12|//@16|M4@24|I3@28|RXB@36|90@40|??@48","SP Dv" +"VECTOR COUNT LEADING ZERO DIGITS","VCLZDP V1,V2,M3","230@0|V1@8|V2@12|//@16|M3@24|//@28|RXB@36|81@40|??@48","Dv" +"VECTOR COUNT LEADING ZEROS","VCLZ V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|83@40|??@48","SP Dv" +"VECTOR COUNT TRAILING ZEROS","VCTZ V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|82@40|??@48","SP Dv" +"VECTOR DIVIDE DECIMAL","VDP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|122@40|??@48","SP Dv" +"VECTOR ELEMENT COMPARE","VEC V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|219@40|??@48","SP Dv" +"VECTOR ELEMENT COMPARE LOGICAL","VECL V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|217@40|??@48","SP Dv" +"VECTORELEMENTROTATEANDINSERTUNDER MASK","VERIM V1,V2,V3,I4,M5","231@0|V1@8|V2@12|V3@16|//@20|I4@24|M5@32|RXB@36|114@40|??@48","SP Dv" +"VECTOR ELEMENT ROTATE LEFT LOGICAL","VERLL V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|51@40|??@48","SP Dv" +"VECTOR ELEMENT ROTATE LEFT LOGICAL","VERLLV V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|115@40|??@48","SP Dv" +"VECTOR ELEMENT SHIFT LEFT","VESLV V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|112@40|??@48","SP Dv" +"VECTOR ELEMENT SHIFT LEFT","VESL V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|48@40|??@48","SP Dv" +"VECTOR ELEMENT SHIFT RIGHT ARITHMETIC","VESRA V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|58@40|??@48","SP Dv" +"VECTOR ELEMENT SHIFT RIGHT ARITHMETIC","VESRAV V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|122@40|??@48","SP Dv" +"VECTOR ELEMENT SHIFT RIGHT LOGICAL","VESRL V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|56@40|??@48","SP Dv" +"VECTOR ELEMENT SHIFT RIGHT LOGICAL","VESRLV V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|120@40|??@48","SP Dv" +"VECTOR EXCLUSIVE OR","VX V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|109@40|??@48","Dv" +"VECTOR FIND ANY ELEMENT EQUAL","VFAE V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|130@40|??@48","SP Dv" +"VECTOR FIND ELEMENT EQUAL","VFEE V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|128@40|??@48","SP Dv" +"VECTOR FIND ELEMENT NOT EQUAL","VFENE V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|129@40|??@48","SP Dv" +"VECTOR FP ADD","VFA V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|227@40|??@48","SP Dv" +"VECTOR FP COMPARE AND SIGNAL SCALAR","WFK V1,V2,M3,M4","231@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|202@40|??@48","SP Dv" +"VECTOR FP COMPARE EQUAL","VFCE V1,V2,V3,M4,M5,M6","231@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|232@40|??@48","SP Dv" +"VECTOR FP COMPARE HIGH","VFCH V1,V2,V3,M4,M5,M6","231@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|235@40|??@48","SP Dv" +"VECTOR FP COMPARE HIGH OR EQUAL","VFCHE V1,V2,V3,M4,M5,M6","231@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|234@40|??@48","SP Dv" +"VECTOR FP COMPARE SCALAR","WFC V1,V2,M3,M4","231@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|203@40|??@48","SP Dv" +"VECTOR FP CONVERT AND LENGTHEN FROM NNP HIGH","VCLFNH V1,V2,M3,M4","230@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|86@40|??@48","Dv" +"VECTOR FP CONVERT AND LENGTHEN FROM NNP LOW","VCLFNL V1,V2,M3,M4","230@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|94@40|??@48","Dv" +"VECTOR FP CONVERT AND ROUND TO NNP","VCRNF V1,V2,V3,M4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|117@40|??@48","Dv" +"VECTOR FP CONVERT FROM FIXED","VCFPS V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|195@40|??@48","SP Dv" +"VECTOR FP CONVERT FROM FIXED 64-BIT","VCDG V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|195@40|??@48","SP Dv" +"VECTOR FP CONVERT FROM LOGICAL","VCFPL V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|193@40|??@48","SP Dv" +"VECTOR FP CONVERT FROM LOGICAL 64-BIT","VCDLG V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|193@40|??@48","SP Dv" +"VECTOR FP CONVERT FROM NNP","VCFN V1,V2,M3,M4","230@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|93@40|??@48","Dv" +"VECTOR FP CONVERT TO FIXED","VCSFP V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|194@40|??@48","SP Dv" +"VECTOR FP CONVERT TO FIXED 64-BIT","VCGD V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|194@40|??@48","SP Dv" +"VECTOR FP CONVERT TO LOGICAL","VCLFP V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|192@40|??@48","SP Dv" +"VECTOR FP CONVERT TO LOGICAL 64-BIT","VCLGD V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|192@40|??@48","SP Dv" +"VECTOR FP CONVERT TO NNP","VCNF V1,V2,M3,M4","230@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|85@40|??@48","Dv" +"VECTOR FP DIVIDE","VFD V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|229@40|??@48","SP Dv" +"VECTOR FP LOAD LENGTHENED","VFLL V1,V2,M3,M4","231@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|196@40|??@48","SP Dv" +"VECTOR FP LOAD ROUNDED","VFLR V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|197@40|??@48","SP Dv" +"VECTOR FP MAXIMUM","VFMAX V1,V2,V3,M4,M5,M6","231@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|239@40|??@48","SP Dv" +"VECTOR FP MINIMUM","VFMIN V1,V2,V3,M4,M5,M6","231@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|238@40|??@48","SP Dv" +"VECTOR FP MULTIPLY","VFM V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|231@40|??@48","SP Dv" +"VECTOR FP MULTIPLY AND ADD","VFMA V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M6@20|//@24|M5@28|V4@32|RXB@36|143@40|??@48","SP Dv" +"VECTOR FP MULTIPLY AND SUBTRACT","VFMS V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M6@20|//@24|M5@28|V4@32|RXB@36|142@40|??@48","SP Dv" +"VECTOR FP NEGATIVE MULTIPLY AND ADD","VFNMA V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M6@20|//@24|M5@28|V4@32|RXB@36|159@40|??@48","SP Dv" +"VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT","VFNMS V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M6@20|//@24|M5@28|V4@32|RXB@36|158@40|??@48","SP Dv" +"VECTOR FP PERFORM SIGN OPERATION","VFPSO V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|204@40|??@48","SP Dv" +"VECTOR FP SQUARE ROOT","VFSQ V1,V2,M3,M4","231@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|206@40|??@48","SP Dv" +"VECTOR FP SUBTRACT","VFS V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|226@40|??@48","SP Dv" +"VECTOR FP TEST DATA CLASS IMMEDIATE","VFTCI V1,V2,I3,M4,M5","231@0|V1@8|V2@12|I3@16|M5@28|M4@32|RXB@36|74@40|??@48","SP Dv" +"VECTOR GALOIS FIELD MULTIPLY SUM","VGFM V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|180@40|??@48","SP Dv" +"VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE","VGFMA V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|188@40|??@48","SP Dv" +"VECTOR GATHER ELEMENT (32)","VGEF V1,D2(V2,B2),M3","231@0|V1@8|V2@12|B2@16|D2@20|M3@32|RXB@36|19@40|??@48","SP Dv" +"VECTOR GATHER ELEMENT (64)","VGEG V1,D2(V2,B2),M3","231@0|V1@8|V2@12|B2@16|D2@20|M3@32|RXB@36|18@40|??@48","SP Dv" +"VECTOR GENERATE BYTE MASK","VGBM V1,I2","231@0|V1@8|//@12|I2@16|//@32|RXB@36|68@40|??@48","Dv" +"VECTOR GENERATE MASK","VGM V1,I2,I3,M4","231@0|V1@8|//@12|I2@16|I3@24|M4@32|RXB@36|70@40|??@48","SP Dv" +"VECTOR ISOLATE STRING","VISTR V1,V2,M3,M5","231@0|V1@8|V2@12|//@16|M5@24|//@28|M3@32|RXB@36|92@40|??@48","SP Dv" +"VECTOR LOAD","VL V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|6@40|??@48","Dv" +"VECTOR LOAD","VLR V1,V2","231@0|V1@8|V2@12|//@16|RXB@36|86@40|??@48","Dv" +"VECTOR LOAD AND REPLICATE","VLREP V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|5@40|??@48","SP Dv" +"VECTOR LOAD BYTE REVERSED ELEMENT (16)","VLEBRH V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|1@40|??@48","SP Dv" +"VECTOR LOAD BYTE REVERSED ELEMENT (32)","VLEBRF V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|3@40|??@48","SP Dv" +"VECTOR LOAD BYTE REVERSED ELEMENT (64)","VLEBRG V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|2@40|??@48","SP Dv" +"VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE","VLBRREP V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|5@40|??@48","SP Dv" +"VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO","VLLEBRZ V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|4@40|??@48","SP Dv" +"VECTOR LOAD BYTE REVERSED ELEMENTS","VLBR V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|6@40|??@48","SP Dv" +"VECTOR LOAD COMPLEMENT","VLC V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|222@40|??@48","SP Dv" +"VECTOR LOAD ELEMENT (16)","VLEH V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|1@40|??@48","SP Dv" +"VECTOR LOAD ELEMENT (32)","VLEF V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|3@40|??@48","SP Dv" +"VECTOR LOAD ELEMENT (64)","VLEG V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|2@40|??@48","SP Dv" +"VECTOR LOAD ELEMENT (8)","VLEB V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|0@40|??@48","SP Dv" +"VECTOR LOAD ELEMENT IMMEDIATE (16)","VLEIH V1,I2,M3","231@0|V1@8|//@12|I2@16|M3@32|RXB@36|65@40|??@48","SP Dv" +"VECTOR LOAD ELEMENT IMMEDIATE (32)","VLEIF V1,I2,M3","231@0|V1@8|//@12|I2@16|M3@32|RXB@36|67@40|??@48","SP Dv" +"VECTOR LOAD ELEMENT IMMEDIATE (64)","VLEIG V1,I2,M3","231@0|V1@8|//@12|I2@16|M3@32|RXB@36|66@40|??@48","SP Dv" +"VECTOR LOAD ELEMENT IMMEDIATE (8)","VLEIB V1,I2,M3","231@0|V1@8|//@12|I2@16|M3@32|RXB@36|64@40|??@48","SP Dv" +"VECTOR LOAD ELEMENTS REVERSED","VLER V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|7@40|??@48","SP Dv" +"VECTOR LOAD FP INTEGER","VFI V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|199@40|??@48","SP Dv" +"VECTOR LOAD GR FROM VR ELEMENT","VLGV R1,V3,D2(B2),M4","231@0|R1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|33@40|??@48","SP Dv" +"VECTOR LOAD IMMEDIATE DECIMAL","VLIP V1,I2,I3","230@0|V1@8|//@12|I2@16|I3@32|RXB@36|73@40|??@48","Dv" +"VECTOR LOAD LOGICAL ELEMENT AND ZERO","VLLEZ V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|4@40|??@48","SP Dv" +"VECTOR LOAD MULTIPLE","VLM V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|54@40|??@48","SP Dv" +"VECTOR LOAD POSITIVE","VLP V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|223@40|??@48","SP Dv" +"VECTOR LOAD RIGHTMOST WITH LENGTH","VLRL V1,D2(B2),I3","230@0|I3@8|B2@16|D2@20|V1@32|RXB@36|53@40|??@48","SP Dv" +"VECTOR LOAD RIGHTMOST WITH LENGTH","VLRLR V1,R3,D2(B2)","230@0|//@8|R3@12|B2@16|D2@20|V1@32|RXB@36|55@40|??@48","Dv" +"VECTOR LOAD TO BLOCK BOUNDARY","VLBB V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|7@40|??@48","SP Dv" +"VECTOR LOAD VR ELEMENT FROM GR","VLVG V1,R3,D2(B2),M4","231@0|V1@8|R3@12|B2@16|D2@20|M4@32|RXB@36|34@40|??@48","SP Dv" +"VECTOR LOAD VR FROM GRS DISJOINT","VLVGP V1,R2,R3","231@0|V1@8|R2@12|R3@16|//@20|RXB@36|98@40|??@48","Dv" +"VECTOR LOAD WITH LENGTH","VLL V1,R3,D2(B2)","231@0|V1@8|R3@12|B2@16|D2@20|//@32|RXB@36|55@40|??@48","Dv" +"VECTOR MAXIMUM","VMX V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|255@40|??@48","SP Dv" +"VECTOR MAXIMUM LOGICAL","VMXL V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|253@40|??@48","SP Dv" +"VECTOR MERGE HIGH","VMRH V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|97@40|??@48","SP Dv" +"VECTOR MERGE LOW","VMRL V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|96@40|??@48","SP Dv" +"VECTOR MINIMUM","VMN V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|254@40|??@48","SP Dv" +"VECTOR MINIMUM LOGICAL","VMNL V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|252@40|??@48","SP Dv" +"VECTOR MULTIPLY AND ADD EVEN","VMAE V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|174@40|??@48","SP Dv" +"VECTOR MULTIPLY AND ADD HIGH","VMAH V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|171@40|??@48","SP Dv" +"VECTOR MULTIPLY AND ADD LOGICAL EVEN","VMALE V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|172@40|??@48","SP Dv" +"VECTOR MULTIPLY AND ADD LOGICAL HIGH","VMALH V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|169@40|??@48","SP Dv" +"VECTOR MULTIPLY AND ADD LOGICAL ODD","VMALO V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|173@40|??@48","SP Dv" +"VECTOR MULTIPLY AND ADD LOW","VMAL V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|170@40|??@48","SP Dv" +"VECTOR MULTIPLY AND ADD ODD","VMAO V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|175@40|??@48","SP Dv" +"VECTOR MULTIPLY AND SHIFT DECIMAL","VMSP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|121@40|??@48","SP Dv" +"VECTOR MULTIPLY DECIMAL","VMP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|120@40|??@48","SP Dv" +"VECTOR MULTIPLY EVEN","VME V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|166@40|??@48","SP Dv" +"VECTOR MULTIPLY HIGH","VMH V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|163@40|??@48","SP Dv" +"VECTOR MULTIPLY LOGICAL EVEN","VMLE V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|164@40|??@48","SP Dv" +"VECTOR MULTIPLY LOGICAL HIGH","VMLH V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|161@40|??@48","SP Dv" +"VECTOR MULTIPLY LOGICAL ODD","VMLO V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|165@40|??@48","SP Dv" +"VECTOR MULTIPLY LOW","VML V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|162@40|??@48","SP Dv" +"VECTOR MULTIPLY ODD","VMO V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|167@40|??@48","SP Dv" +"VECTOR MULTIPLY SUM LOGICAL","VMSL V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M5@20|M6@24|//@28|V4@32|RXB@36|184@40|??@48","SP Dv" +"VECTOR NAND","VNN V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|110@40|??@48","DV" +"VECTOR NOR","VNO V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|107@40|??@48","Dv" +"VECTOR NOT EXCLUSIVE OR","VNX V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|108@40|??@48","Dv" +"VECTOR OR","VO V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|106@40|??@48","Dv" +"VECTOR OR WITH COMPLEMENT","VOC V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|111@40|??@48","Dv" +"VECTOR PACK","VPK V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|148@40|??@48","SP Dv" +"VECTOR PACK LOGICAL SATURATE","VPKLS V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|149@40|??@48","SP Dv" +"VECTOR PACK SATURATE","VPKS V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|151@40|??@48","SP Dv" +"VECTOR PACK ZONED","VPKZ V1,D2(B2),I3","230@0|I3@8|B2@16|D2@20|V1@32|RXB@36|52@40|??@48","SP Dv" +"VECTOR PACK ZONED REGISTER","VPKZR V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|112@40|??@48","SP Dv" +"VECTOR PERFORM SIGN OPERATION DECIMAL","VPSOP V1,V2,I3,I4,M5","230@0|V1@8|V2@12|I4@16|M5@24|I3@28|RXB@36|91@40|??@48","SP Dv" +"VECTOR PERMUTE","VPERM V1,V2,V3,V4","231@0|V1@8|V2@12|V3@16|//@20|V4@32|RXB@36|140@40|??@48","Dv" +"VECTOR PERMUTE DOUBLEWORD IMMEDIATE","VPDI V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|132@40|??@48","Dv" +"VECTOR POPULATION COUNT","VPOPCT V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|80@40|??@48","SP Dv" +"VECTOR REMAINDER DECIMAL","VRP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|123@40|??@48","SP Dv" +"VECTOR REPLICATE","VREP V1,V3,I2,M4","231@0|V1@8|V3@12|I2@16|M4@32|RXB@36|77@40|??@48","SP Dv" +"VECTOR REPLICATE IMMEDIATE","VREPI V1,I2,M3","231@0|V1@8|//@12|I2@16|M3@32|RXB@36|69@40|??@48","SP Dv" +"VECTOR SCATTER ELEMENT (32)","VSCEF V1,D2(V2,B2),M3","231@0|V1@8|V2@12|B2@16|D2@20|M3@32|RXB@36|27@40|??@48","SP Dv" +"VECTOR SCATTER ELEMENT (64)","VSCEG V1,D2(V2,B2),M3","231@0|V1@8|V2@12|B2@16|D2@20|M3@32|RXB@36|26@40|??@48","SP Dv" +"VECTOR SELECT","VSEL V1,V2,V3,V4","231@0|V1@8|V2@12|V3@16|//@20|V4@32|RXB@36|141@40|??@48","Dv" +"VECTOR SHIFT AND DIVIDE DECIMAL","VSDP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|126@40|??@48","SP Dv" +"VECTOR SHIFT AND ROUND DECIMAL","VSRP V1,V2,I3,I4,M5","230@0|V1@8|V2@12|I4@16|M5@24|I3@28|RXB@36|89@40|??@48","SP Dv" +"VECTOR SHIFT AND ROUND DECIMAL REGISTER","VSRPR V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|114@40|??@48","SP Dv" +"VECTOR SHIFT LEFT","VSL V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|116@40|??@48","Dv" +"VECTOR SHIFT LEFT BY BYTE","VSLB V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|117@40|??@48","Dv" +"VECTOR SHIFT LEFT DOUBLE BY BIT","VSLD V1,V2,V3,I4","231@0|V1@8|V2@12|V3@16|//@20|I4@24|//@32|RXB@36|134@40|??@48","SP Dv" +"VECTOR SHIFT LEFT DOUBLE BY BYTE","VSLDB V1,V2,V3,I4","231@0|V1@8|V2@12|V3@16|//@20|I4@24|//@32|RXB@36|119@40|??@48","Dv" +"VECTOR SHIFT RIGHT ARITHMETIC","VSRA V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|126@40|??@48","Dv" +"VECTOR SHIFT RIGHT ARITHMETIC BY BYTE","VSRAB V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|127@40|??@48","Dv" +"VECTOR SHIFT RIGHT DOUBLE BY BIT","VSRD V1,V2,V3,I4","231@0|V1@8|V2@12|V3@16|//@20|I4@24|//@32|RXB@36|135@40|??@48","SP Dv" +"VECTOR SHIFT RIGHT LOGICAL","VSRL V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|124@40|??@48","Dv" +"VECTOR SHIFT RIGHT LOGICAL BY BYTE","VSRLB V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|125@40|??@48","Dv" +"VECTOR SIGN EXTEND TO DOUBLEWORD","VSEG V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|95@40|??@48","SP Dv" +"VECTOR STORE","VST V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|14@40|??@48","Dv" +"VECTOR STORE BYTE REVERSED ELEMENT(16)","VSTEBRH V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|9@40|??@48","SP Dv" +"VECTOR STORE BYTE REVERSED ELEMENT(32)","VSTEBRF V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|11@40|??@48","SP Dv" +"VECTOR STORE BYTE REVERSED ELEMENT(64)","VSTEBRG V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|10@40|??@48","SP Dv" +"VECTOR STORE BYTE REVERSED ELEMENTS","VSTBR V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|14@40|??@48","SP Dv" +"VECTOR STORE ELEMENT (16)","VSTEH V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|9@40|??@48","SP Dv" +"VECTOR STORE ELEMENT (32)","VSTEF V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|11@40|??@48","SP Dv" +"VECTOR STORE ELEMENT (64)","VSTEG V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|10@40|??@48","SP Dv" +"VECTOR STORE ELEMENT (8)","VSTEB V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|8@40|??@48","SP Dv" +"VECTOR STORE ELEMENTS REVERSED","VSTER V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|15@40|??@48","SP Dv" +"VECTOR STORE MULTIPLE","VSTM V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|62@40|??@48","SP Dv" +"VECTOR STORE RIGHTMOST WITH LENGTH","VSTRL V1,D2(B2),I3","230@0|I3@8|B2@16|D2@20|V1@32|RXB@36|61@40|??@48","SP Dv" +"VECTOR STORE RIGHTMOST WITH LENGTH","VSTRLR V1,R3,D2(B2)","230@0|//@8|R3@12|B2@16|D2@20|V1@32|RXB@36|63@40|??@48","Dv" +"VECTOR STORE WITH LENGTH","VSTL V1,R3,D2(B2)","231@0|V1@8|R3@12|B2@16|D2@20|//@32|RXB@36|63@40|??@48","Dv" +"VECTOR STRING RANGE COMPARE","VSTRC V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M5@20|M6@24|//@28|V4@32|RXB@36|138@40|??@48","SP Dv" +"VECTOR STRING SEARCH","VSTRS V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M5@20|M6@24|//@28|V4@32|RXB@36|139@40|??@48","SP Dv" +"VECTOR SUBTRACT","VS V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|247@40|??@48","SP Dv" +"VECTOR SUBTRACT COMPUTE BORROW INDICATION","VSCBI V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|245@40|??@48","SP Dv" +"VECTOR SUBTRACT DECIMAL","VSP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|115@40|??@48","SP Dv" +"VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION","VSBCBI V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|189@40|??@48","SP Dv" +"VECTOR SUBTRACT WITH BORROW INDICATION","VSBI V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|191@40|??@48","SP Dv" +"VECTOR SUM ACROSS DOUBLEWORD","VSUMG V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|101@40|??@48","SP Dv" +"VECTOR SUM ACROSS QUADWORD","VSUMQ V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|103@40|??@48","SP Dv" +"VECTOR SUM ACROSS WORD","VSUM V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|100@40|??@48","SP Dv" +"VECTOR TEST DECIMAL","VTP V1","230@0|//@8|V1@12|//@16|RXB@36|95@40|??@48","Dv" +"VECTOR TEST UNDER MASK","VTM V1,V2","231@0|V1@8|V2@12|//@16|RXB@36|216@40|??@48","Dv" +"VECTOR UNPACK HIGH","VUPH V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|215@40|??@48","SP Dv" +"VECTOR UNPACK LOGICAL HIGH","VUPLH V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|213@40|??@48","SP Dv" +"VECTOR UNPACK LOGICAL LOW","VUPLL V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|212@40|??@48","SP Dv" +"VECTOR UNPACK LOW","VUPL V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|214@40|??@48","SP Dv" +"VECTOR UNPACK ZONED","VUPKZ V1,D2(B2),I3","230@0|I3@8|B2@16|D2@20|V1@32|RXB@36|60@40|??@48","SP Dv" +"VECTOR UNPACK ZONED HIGH","VUPKZH V1,V2,M3","230@0|V1@8|V2@12|//@16|M3@24|//@28|RXB@36|84@40|??@48","Dv" +"VECTOR UNPACK ZONED LOW","VUPKZL V1,V2,M3","230@0|V1@8|V2@12|//@16|M3@24|//@28|RXB@36|92@40|??@48","Dv" +"ZERO AND ADD","ZAP D1(L1,B1),D2(L2,B2)","248@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","Dg" diff --git a/s390x/s390xasm/Makefile b/s390x/s390xasm/Makefile new file mode 100644 index 00000000..6d02dac2 --- /dev/null +++ b/s390x/s390xasm/Makefile @@ -0,0 +1,2 @@ +tables.go: ../s390xmap/map.go ../s390x.csv + go run ../s390xmap/map.go -fmt=decoder ../s390x.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go diff --git a/s390x/s390xasm/decode.go b/s390x/s390xasm/decode.go new file mode 100644 index 00000000..823fe591 --- /dev/null +++ b/s390x/s390xasm/decode.go @@ -0,0 +1,241 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390xasm + +import ( + "encoding/binary" + "fmt" +) + +// instFormat is a decoding rule for one specific instruction form. +// An instruction ins matches the rule if ins&Mask == Value. +// DontCare bits are mainly used for finding the same instruction +// name differing with the number of argument fields. +// The Args are stored in the same order as the instruction manual. +type instFormat struct { + Op Op + Mask uint64 + Value uint64 + DontCare uint64 + Args [8]*argField +} + +// argField indicate how to decode an argument to an instruction. +// First parse the value from the BitFields, shift it left by Shift +// bits to get the actual numerical value. +type argField struct { + Type ArgType + flags uint16 + BitField +} + +// Parse parses the Arg out from the given binary instruction i. +func (a argField) Parse(i uint64) Arg { + switch a.Type { + default: + return nil + case TypeUnknown: + return nil + case TypeReg: + return R0 + Reg(a.BitField.Parse(i)) + case TypeFPReg: + return F0 + Reg(a.BitField.Parse(i)) + case TypeCReg: + return C0 + Reg(a.BitField.Parse(i)) + case TypeACReg: + return A0 + Reg(a.BitField.Parse(i)) + case TypeBaseReg: + return B0 + Base(a.BitField.Parse(i)) + case TypeIndexReg: + return X0 + Index(a.BitField.Parse(i)) + case TypeDispUnsigned: + return Disp12(a.BitField.Parse(i)) + case TypeDispSigned20: + return Disp20(a.BitField.ParseSigned(i)) + case TypeVecReg: + m := i >> 24 // Handling RXB field(bits 36 to 39) + if ((m>>3)&0x1 == 1) && (a.BitField.Offs == 8) { + return V0 + VReg(a.BitField.Parse(i)) + VReg(16) + } else if ((m>>2)&0x1 == 1) && (a.BitField.Offs == 12) { + return V0 + VReg(a.BitField.Parse(i)) + VReg(16) + } else if ((m>>1)&0x1 == 1) && (a.BitField.Offs == 16) { + return V0 + VReg(a.BitField.Parse(i)) + VReg(16) + } else if ((m)&0x1 == 1) && (a.BitField.Offs == 32) { + return V0 + VReg(a.BitField.Parse(i)) + VReg(16) + } else { + return V0 + VReg(a.BitField.Parse(i)) + } + case TypeImmSigned8: + return Sign8(a.BitField.ParseSigned(i)) + case TypeImmSigned16: + return Sign16(a.BitField.ParseSigned(i)) + case TypeImmSigned32: + return Sign32(a.BitField.ParseSigned(i)) + case TypeImmUnsigned: + return Imm(a.BitField.Parse(i)) + case TypeRegImSigned12: + return RegIm12(a.BitField.ParseSigned(i)) + case TypeRegImSigned16: + return RegIm16(a.BitField.ParseSigned(i)) + case TypeRegImSigned24: + return RegIm24(a.BitField.ParseSigned(i)) + case TypeRegImSigned32: + return RegIm32(a.BitField.ParseSigned(i)) + case TypeMask: + return Mask(a.BitField.Parse(i)) + case TypeLen: + return Len(a.BitField.Parse(i)) + } +} + +type ArgType int8 + +const ( + TypeUnknown ArgType = iota + TypeReg // integer register + TypeFPReg // floating point register + TypeACReg // access register + TypeCReg // control register + TypeVecReg // vector register + TypeImmUnsigned // unsigned immediate/flag/mask, this is the catch-all type + TypeImmSigned8 // Signed 8-bit Immdediate + TypeImmSigned16 // Signed 16-bit Immdediate + TypeImmSigned32 // Signed 32-bit Immdediate + TypeBaseReg // Base Register for accessing memory + TypeIndexReg // Index Register + TypeDispUnsigned // Displacement 12-bit unsigned for memory address + TypeDispSigned20 // Displacement 20-bit signed for memory address + TypeRegImSigned12 // RegisterImmediate 12-bit signed data + TypeRegImSigned16 // RegisterImmediate 16-bit signed data + TypeRegImSigned24 // RegisterImmediate 24-bit signed data + TypeRegImSigned32 // RegisterImmediate 32-bit signed data + TypeMask // 4-bit Mask + TypeLen // Length of Memory Operand + TypeLast +) + +func (t ArgType) String() string { + switch t { + default: + return fmt.Sprintf("ArgType(%d)", int(t)) + case TypeUnknown: + return "Unknown" + case TypeReg: + return "Reg" + case TypeFPReg: + return "FPReg" + case TypeACReg: + return "ACReg" + case TypeCReg: + return "CReg" + case TypeDispUnsigned: + return "DispUnsigned" + case TypeDispSigned20: + return "DispSigned20" + case TypeBaseReg: + return "BaseReg" + case TypeIndexReg: + return "IndexReg" + case TypeVecReg: + return "VecReg" + case TypeImmSigned8: + return "ImmSigned8" + case TypeImmSigned16: + return "ImmSigned16" + case TypeImmSigned32: + return "ImmSigned32" + case TypeImmUnsigned: + return "ImmUnsigned" + case TypeRegImSigned12: + return "RegImSigned12" + case TypeRegImSigned16: + return "RegImSigned16" + case TypeRegImSigned24: + return "RegImSigned24" + case TypeRegImSigned32: + return "RegImSigned32" + case TypeMask: + return "Mask" + case TypeLen: + return "Len" + } +} + +func (t ArgType) GoString() string { + s := t.String() + if t > 0 && t < TypeLast { + return "Type" + s + } + return s +} + +var ( + // Errors + errShort = fmt.Errorf("truncated instruction") + errUnknown = fmt.Errorf("unknown instruction") +) + +var decoderCover []bool + +// Decode decodes the leading bytes in src as a single instruction using +// byte order ord. +func Decode(src []byte) (inst Inst, err error) { + if len(src) < 2 { + return inst, errShort + } + if decoderCover == nil { + decoderCover = make([]bool, len(instFormats)) + } + bit_check := binary.BigEndian.Uint16(src[:2]) + bit_check = bit_check >> 14 + l := int(0) + if (bit_check & 0x03) == 0 { + l = 2 + } else if bit_check&0x03 == 3 { + l = 6 + } else if (bit_check&0x01 == 1) || (bit_check&0x02 == 2) { + l = 4 + } + inst.Len = l + ui_extn := uint64(0) + switch l { + case 2: + ui_extn = uint64(binary.BigEndian.Uint16(src[:inst.Len])) + inst.Enc = ui_extn + ui_extn = ui_extn << 48 + case 4: + ui_extn = uint64(binary.BigEndian.Uint32(src[:inst.Len])) + inst.Enc = ui_extn + ui_extn = ui_extn << 32 + case 6: + u1 := binary.BigEndian.Uint32(src[:(inst.Len - 2)]) + u2 := binary.BigEndian.Uint16(src[(inst.Len - 2):inst.Len]) + ui_extn = uint64(u1)<<16 | uint64(u2) + ui_extn = ui_extn << 16 + inst.Enc = ui_extn + default: + return inst, errShort + } + for _, iform := range instFormats { + if ui_extn&iform.Mask != iform.Value { + continue + } + if (iform.DontCare & ^(ui_extn)) != iform.DontCare { + continue + } + for j, argfield := range iform.Args { + if argfield == nil { + break + } + inst.Args[j] = argfield.Parse(ui_extn) + } + inst.Op = iform.Op + break + } + if inst.Op == 0 && inst.Enc != 0 { + return inst, errUnknown + } + return inst, nil +} diff --git a/s390x/s390xasm/decode_test.go b/s390x/s390xasm/decode_test.go new file mode 100644 index 00000000..5ca0b741 --- /dev/null +++ b/s390x/s390xasm/decode_test.go @@ -0,0 +1,88 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390xasm + +import ( + "encoding/hex" + "io/ioutil" + "path" + "strings" + "testing" +) + +func TestDecode(t *testing.T) { + files, err := ioutil.ReadDir("testdata") + if err != nil { + t.Fatal(err) + } + for _, f := range files { + if !strings.HasPrefix(f.Name(), "decode") { + continue + } + filename := path.Join("testdata", f.Name()) + data, err := ioutil.ReadFile(filename) + if err != nil { + t.Fatal(err) + } + decode(data, t, filename) + } +} + +// Provide a fake symbol to verify PCrel argument decoding. +func symlookup(pc uint64) (string, uint64) { + foopc := uint64(0x100000) + if pc >= foopc && pc < foopc+0x10 { + return "foo", foopc + } + return "", 0 +} + +func decode(data []byte, t *testing.T, filename string) { + all := string(data) + // Simulate PC based on number of instructions found in the test file. + pc := uint64(0) + for strings.Contains(all, "\t\t") { + all = strings.Replace(all, "\t\t", "\t", -1) + } + for _, line := range strings.Split(all, "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + f := strings.SplitN(line, "\t", 3) + i := strings.Index(f[0], "|") + if i < 0 { + t.Errorf("%s: parsing %q: missing | separator", filename, f[0]) + continue + } + if i%2 != 0 { + t.Errorf("%s: parsing %q: misaligned | separator", filename, f[0]) + } + size := i / 2 + code, err := hex.DecodeString(f[0][:i] + f[0][i+1:]) + if err != nil { + t.Errorf("%s: parsing %q: %v", filename, f[0], err) + continue + } + syntax, asm := f[1], f[2] + inst, err := Decode(code) + var out string + if err != nil { + out = "error: " + err.Error() + } else { + switch syntax { + case "gnu": + out = GNUSyntax(inst, pc) + default: + t.Errorf("unknown syntax %q", syntax) + continue + } + } + pc += uint64(size) + if out != asm || inst.Len != size { + t.Errorf("%s: Decode(%s) [%s] = %s want %s", filename, f[0], syntax, out, asm) + } + } +} diff --git a/s390x/s390xasm/field.go b/s390x/s390xasm/field.go new file mode 100644 index 00000000..e00415fc --- /dev/null +++ b/s390x/s390xasm/field.go @@ -0,0 +1,98 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390xasm + +import ( + "fmt" + "strings" +) + +// A BitField is a bit-field in a 64-bit double word. +// Bits are counted from 0 from the MSB to 63 as the LSB. +type BitField struct { + Offs uint8 // the offset of the left-most bit. + Bits uint8 // length in bits. +} + +func (b BitField) String() string { + if b.Bits > 1 { + return fmt.Sprintf("[%d:%d]", b.Offs, int(b.Offs+b.Bits)-1) + } else if b.Bits == 1 { + return fmt.Sprintf("[%d]", b.Offs) + } else { + return fmt.Sprintf("[%d, len=0]", b.Offs) + } +} + +// Parse extracts the bitfield b from i, and return it as an unsigned integer. +// Parse will panic if b is invalid. +func (b BitField) Parse(i uint64) uint64 { + if b.Bits > 64 || b.Bits == 0 || b.Offs > 63 || b.Offs+b.Bits > 64 { + panic(fmt.Sprintf("invalid bitfiled %v", b)) + } + if b.Bits == 20 { + return ((((i >> (64 - b.Offs - b.Bits)) & ((1 << 8) - 1)) << 12) | ((i >> (64 - b.Offs - b.Bits + 8)) & 0xFFF)) + + } else { + return (i >> (64 - b.Offs - b.Bits)) & ((1 << b.Bits) - 1) + } +} + +// ParseSigned extracts the bitfield b from i, and return it as a signed integer. +// ParseSigned will panic if b is invalid. +func (b BitField) ParseSigned(i uint64) int64 { + u := int64(b.Parse(i)) + return u << (64 - b.Bits) >> (64 - b.Bits) +} + +// BitFields is a series of BitFields representing a single number. +type BitFields []BitField + +func (bs BitFields) String() string { + ss := make([]string, len(bs)) + for i, bf := range bs { + ss[i] = bf.String() + } + return fmt.Sprintf("<%s>", strings.Join(ss, "|")) +} + +func (bs *BitFields) Append(b BitField) { + *bs = append(*bs, b) +} + +// parse extracts the bitfields from i, concatenate them and return the result +// as an unsigned integer and the total length of all the bitfields. +// parse will panic if any bitfield in b is invalid, but it doesn't check if +// the sequence of bitfields is reasonable. +func (bs BitFields) parse(i uint64) (u uint64, Bits uint8) { + for _, b := range bs { + u = (u << b.Bits) | uint64(b.Parse(i)) + Bits += b.Bits + } + return u, Bits +} + +// Parse extracts the bitfields from i, concatenate them and return the result +// as an unsigned integer. Parse will panic if any bitfield in b is invalid. +func (bs BitFields) Parse(i uint64) uint64 { + u, _ := bs.parse(i) + return u +} + +// ParseSigned extracts the bitfields from i, concatenate them and return the result +// as a signed integer. Parse will panic if any bitfield in b is invalid. +func (bs BitFields) ParseSigned(i uint64) int64 { + u, l := bs.parse(i) + return int64(u) << (64 - l) >> (64 - l) +} + +// Count the number of bits in the aggregate BitFields +func (bs BitFields) NumBits() int { + num := 0 + for _, b := range bs { + num += int(b.Bits) + } + return num +} diff --git a/s390x/s390xasm/gnu.go b/s390x/s390xasm/gnu.go new file mode 100644 index 00000000..5755b354 --- /dev/null +++ b/s390x/s390xasm/gnu.go @@ -0,0 +1,1018 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390xasm + +// Instructions with extended mnemonics fall under various categories. +// To handle each of them in one single function, various different +// structure types are defined as below. Corresponding instruction +// structures are created with the help of these base structures. +// Different instruction types are as below: + +// Typ1 - Instructions having different base and extended mnemonic strings. +// +// These instructions have single M-field value and single offset. +type typ1ExtndMnics struct { + BaseOpStr string + Value uint8 + Offset uint8 + ExtnOpStr string +} + +// Typ2 - Instructions having couple of extra strings added to the base mnemonic string, +// +// depending on the condition code evaluation. +// These instructions have single M-field value and single offset. +type typ2ExtndMnics struct { + Value uint8 + Offset uint8 + ExtnOpStr string +} + +// Typ3 - Instructions having couple of extra strings added to the base mnemonic string, +// +// depending on the condition code evaluation. +// These instructions have two M-field values and two offsets. +type typ3ExtndMnics struct { + Value1 uint8 + Value2 uint8 + Offset1 uint8 + Offset2 uint8 + ExtnOpStr string +} + +// Typ4 - Instructions having different base and extended mnemonic strings. +// +// These instructions have two M-field values and two offsets. +type typ4ExtndMnics struct { + BaseOpStr string + Value1 uint8 + Value2 uint8 + Offset1 uint8 + Offset2 uint8 + ExtnOpStr string +} + +// Typ5 - Instructions having different base and extended mnemonic strings. +// +// These instructions have three M-field values and three offsets. +type typ5ExtndMnics struct { + BaseOpStr string + Value1 uint8 + Value2 uint8 + Value3 uint8 + Offset1 uint8 + Offset2 uint8 + Offset3 uint8 + ExtnOpStr string +} + +// "func Handleextndmnemonic" - This is the function where the extended mnemonic logic +// is implemented. This function defines various structures to keep a list of base +// instructions and their extended mnemonic strings. These structure will also have +// M-field values and offset values defined, based on their type. +// HandleExtndMnemonic takes "inst" structure as the input variable. +// Inst structure will have all the details related to an instruction. Based on the +// opcode base string, a switch-case statement is executed. In that, based on the +// M-field value and the offset value of that particular M-field, extended mnemonic +// string is either searched or constructed by adding couple of extra strings to the base +// opcode string from one of the structure defined below. +func HandleExtndMnemonic(inst *Inst) string { + + brnchInstrExtndMnics := []typ1ExtndMnics{ + //BIC - BRANCH INDIRECT ON CONDITION instruction + typ1ExtndMnics{BaseOpStr: "bic", Value: 1, Offset: 0, ExtnOpStr: "bio"}, + typ1ExtndMnics{BaseOpStr: "bic", Value: 2, Offset: 0, ExtnOpStr: "bih"}, + typ1ExtndMnics{BaseOpStr: "bic", Value: 4, Offset: 0, ExtnOpStr: "bil"}, + typ1ExtndMnics{BaseOpStr: "bic", Value: 7, Offset: 0, ExtnOpStr: "bine"}, + typ1ExtndMnics{BaseOpStr: "bic", Value: 8, Offset: 0, ExtnOpStr: "bie"}, + typ1ExtndMnics{BaseOpStr: "bic", Value: 11, Offset: 0, ExtnOpStr: "binl"}, + typ1ExtndMnics{BaseOpStr: "bic", Value: 13, Offset: 0, ExtnOpStr: "binh"}, + typ1ExtndMnics{BaseOpStr: "bic", Value: 14, Offset: 0, ExtnOpStr: "bino"}, + typ1ExtndMnics{BaseOpStr: "bic", Value: 15, Offset: 0, ExtnOpStr: "bi"}, + + //BCR - BRANCH ON CONDITION instruction + typ1ExtndMnics{BaseOpStr: "bcr", Value: 0, Offset: 0, ExtnOpStr: "nopr"}, + typ1ExtndMnics{BaseOpStr: "bcr", Value: 1, Offset: 0, ExtnOpStr: "bor"}, + typ1ExtndMnics{BaseOpStr: "bcr", Value: 2, Offset: 0, ExtnOpStr: "bhr"}, + typ1ExtndMnics{BaseOpStr: "bcr", Value: 4, Offset: 0, ExtnOpStr: "blr"}, + typ1ExtndMnics{BaseOpStr: "bcr", Value: 7, Offset: 0, ExtnOpStr: "bner"}, + typ1ExtndMnics{BaseOpStr: "bcr", Value: 8, Offset: 0, ExtnOpStr: "ber"}, + typ1ExtndMnics{BaseOpStr: "bcr", Value: 11, Offset: 0, ExtnOpStr: "bnlr"}, + typ1ExtndMnics{BaseOpStr: "bcr", Value: 13, Offset: 0, ExtnOpStr: "bnhr"}, + typ1ExtndMnics{BaseOpStr: "bcr", Value: 14, Offset: 0, ExtnOpStr: "bnor"}, + typ1ExtndMnics{BaseOpStr: "bcr", Value: 15, Offset: 0, ExtnOpStr: "br"}, + + //BC - BRANCH ON CONDITION instruction + typ1ExtndMnics{BaseOpStr: "bc", Value: 0, Offset: 0, ExtnOpStr: "nopr"}, + typ1ExtndMnics{BaseOpStr: "bc", Value: 1, Offset: 0, ExtnOpStr: "bo"}, + typ1ExtndMnics{BaseOpStr: "bc", Value: 2, Offset: 0, ExtnOpStr: "bh"}, + typ1ExtndMnics{BaseOpStr: "bc", Value: 4, Offset: 0, ExtnOpStr: "bl"}, + typ1ExtndMnics{BaseOpStr: "bc", Value: 7, Offset: 0, ExtnOpStr: "bne"}, + typ1ExtndMnics{BaseOpStr: "bc", Value: 8, Offset: 0, ExtnOpStr: "be"}, + typ1ExtndMnics{BaseOpStr: "bc", Value: 11, Offset: 0, ExtnOpStr: "bnl"}, + typ1ExtndMnics{BaseOpStr: "bc", Value: 13, Offset: 0, ExtnOpStr: "bnh"}, + typ1ExtndMnics{BaseOpStr: "bc", Value: 14, Offset: 0, ExtnOpStr: "bno"}, + typ1ExtndMnics{BaseOpStr: "bc", Value: 15, Offset: 0, ExtnOpStr: "b"}, + + //BRC - BRANCH RELATIVE ON CONDITION instruction + typ1ExtndMnics{BaseOpStr: "brc", Value: 0, Offset: 0, ExtnOpStr: "jnop"}, + typ1ExtndMnics{BaseOpStr: "brc", Value: 1, Offset: 0, ExtnOpStr: "jo"}, + typ1ExtndMnics{BaseOpStr: "brc", Value: 2, Offset: 0, ExtnOpStr: "jh"}, + typ1ExtndMnics{BaseOpStr: "brc", Value: 4, Offset: 0, ExtnOpStr: "jl"}, + typ1ExtndMnics{BaseOpStr: "brc", Value: 7, Offset: 0, ExtnOpStr: "jne"}, + typ1ExtndMnics{BaseOpStr: "brc", Value: 8, Offset: 0, ExtnOpStr: "je"}, + typ1ExtndMnics{BaseOpStr: "brc", Value: 11, Offset: 0, ExtnOpStr: "jnl"}, + typ1ExtndMnics{BaseOpStr: "brc", Value: 13, Offset: 0, ExtnOpStr: "jnh"}, + typ1ExtndMnics{BaseOpStr: "brc", Value: 14, Offset: 0, ExtnOpStr: "jno"}, + typ1ExtndMnics{BaseOpStr: "brc", Value: 15, Offset: 0, ExtnOpStr: "j"}, + + //BRCL - BRANCH RELATIVE ON CONDITION LONG instruction + typ1ExtndMnics{BaseOpStr: "brcl", Value: 0, Offset: 0, ExtnOpStr: "jgnop"}, + typ1ExtndMnics{BaseOpStr: "brcl", Value: 1, Offset: 0, ExtnOpStr: "jgo"}, + typ1ExtndMnics{BaseOpStr: "brcl", Value: 2, Offset: 0, ExtnOpStr: "jgh"}, + typ1ExtndMnics{BaseOpStr: "brcl", Value: 4, Offset: 0, ExtnOpStr: "jgl"}, + typ1ExtndMnics{BaseOpStr: "brcl", Value: 7, Offset: 0, ExtnOpStr: "jgne"}, + typ1ExtndMnics{BaseOpStr: "brcl", Value: 8, Offset: 0, ExtnOpStr: "jge"}, + typ1ExtndMnics{BaseOpStr: "brcl", Value: 11, Offset: 0, ExtnOpStr: "jgnl"}, + typ1ExtndMnics{BaseOpStr: "brcl", Value: 13, Offset: 0, ExtnOpStr: "jgnh"}, + typ1ExtndMnics{BaseOpStr: "brcl", Value: 14, Offset: 0, ExtnOpStr: "jgno"}, + typ1ExtndMnics{BaseOpStr: "brcl", Value: 15, Offset: 0, ExtnOpStr: "jg"}, + } + + //Compare instructions + cmpInstrExtndMnics := []typ2ExtndMnics{ + typ2ExtndMnics{Value: 2, Offset: 2, ExtnOpStr: "h"}, + typ2ExtndMnics{Value: 4, Offset: 2, ExtnOpStr: "l"}, + typ2ExtndMnics{Value: 6, Offset: 2, ExtnOpStr: "ne"}, + typ2ExtndMnics{Value: 8, Offset: 2, ExtnOpStr: "e"}, + typ2ExtndMnics{Value: 10, Offset: 2, ExtnOpStr: "nl"}, + typ2ExtndMnics{Value: 12, Offset: 2, ExtnOpStr: "nh"}, + } + + //Load and Store instructions + ldSt_InstrExtndMnics := []typ2ExtndMnics{ + typ2ExtndMnics{Value: 1, Offset: 2, ExtnOpStr: "o"}, + typ2ExtndMnics{Value: 2, Offset: 2, ExtnOpStr: "h"}, + typ2ExtndMnics{Value: 3, Offset: 2, ExtnOpStr: "nle"}, + typ2ExtndMnics{Value: 4, Offset: 2, ExtnOpStr: "l"}, + typ2ExtndMnics{Value: 5, Offset: 2, ExtnOpStr: "nhe"}, + typ2ExtndMnics{Value: 6, Offset: 2, ExtnOpStr: "lh"}, + typ2ExtndMnics{Value: 7, Offset: 2, ExtnOpStr: "ne"}, + typ2ExtndMnics{Value: 8, Offset: 2, ExtnOpStr: "e"}, + typ2ExtndMnics{Value: 9, Offset: 2, ExtnOpStr: "nlh"}, + typ2ExtndMnics{Value: 10, Offset: 2, ExtnOpStr: "he"}, + typ2ExtndMnics{Value: 11, Offset: 2, ExtnOpStr: "nl"}, + typ2ExtndMnics{Value: 12, Offset: 2, ExtnOpStr: "le"}, + typ2ExtndMnics{Value: 13, Offset: 2, ExtnOpStr: "nh"}, + typ2ExtndMnics{Value: 14, Offset: 2, ExtnOpStr: "no"}, + } + + vecInstrExtndMnics := []typ2ExtndMnics{ + typ2ExtndMnics{Value: 0, Offset: 3, ExtnOpStr: "b"}, + typ2ExtndMnics{Value: 1, Offset: 3, ExtnOpStr: "h"}, + typ2ExtndMnics{Value: 2, Offset: 3, ExtnOpStr: "f"}, + typ2ExtndMnics{Value: 3, Offset: 3, ExtnOpStr: "g"}, + typ2ExtndMnics{Value: 4, Offset: 3, ExtnOpStr: "q"}, + typ2ExtndMnics{Value: 6, Offset: 3, ExtnOpStr: "lf"}, + } + + //VCEQ, VCH, VCHL + vec2InstrExtndMnics := []typ3ExtndMnics{ + typ3ExtndMnics{Value1: 0, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "b"}, + typ3ExtndMnics{Value1: 1, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "h"}, + typ3ExtndMnics{Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "f"}, + typ3ExtndMnics{Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "g"}, + typ3ExtndMnics{Value1: 0, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "bs"}, + typ3ExtndMnics{Value1: 1, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "hs"}, + typ3ExtndMnics{Value1: 2, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "fs"}, + typ3ExtndMnics{Value1: 3, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "gs"}, + } + + //VFAE, VFEE, VFENE + vec21InstrExtndMnics := []typ3ExtndMnics{ + typ3ExtndMnics{Value1: 0, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "b"}, + typ3ExtndMnics{Value1: 1, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "h"}, + typ3ExtndMnics{Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "f"}, + typ3ExtndMnics{Value1: 0, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "bs"}, + typ3ExtndMnics{Value1: 1, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "hs"}, + typ3ExtndMnics{Value1: 2, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "fs"}, + typ3ExtndMnics{Value1: 0, Value2: 2, Offset1: 3, Offset2: 4, ExtnOpStr: "zb"}, + typ3ExtndMnics{Value1: 1, Value2: 2, Offset1: 3, Offset2: 4, ExtnOpStr: "zh"}, + typ3ExtndMnics{Value1: 2, Value2: 2, Offset1: 3, Offset2: 4, ExtnOpStr: "zf"}, + typ3ExtndMnics{Value1: 0, Value2: 3, Offset1: 3, Offset2: 4, ExtnOpStr: "zbs"}, + typ3ExtndMnics{Value1: 1, Value2: 3, Offset1: 3, Offset2: 4, ExtnOpStr: "zhs"}, + typ3ExtndMnics{Value1: 2, Value2: 3, Offset1: 3, Offset2: 4, ExtnOpStr: "zfs"}, + } + + vec3InstrExtndMnics := []typ3ExtndMnics{ + typ3ExtndMnics{Value1: 2, Value2: 0, Offset1: 2, Offset2: 3, ExtnOpStr: "sb"}, + typ3ExtndMnics{Value1: 3, Value2: 0, Offset1: 2, Offset2: 3, ExtnOpStr: "db"}, + typ3ExtndMnics{Value1: 4, Value2: 0, Offset1: 2, Offset2: 3, ExtnOpStr: "xb"}, + } + + vec4InstrExtndMnics := []typ4ExtndMnics{ + // VFA - VECTOR FP ADD + typ4ExtndMnics{BaseOpStr: "vfa", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfasb"}, + typ4ExtndMnics{BaseOpStr: "vfa", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfadb"}, + typ4ExtndMnics{BaseOpStr: "vfa", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfasb"}, + typ4ExtndMnics{BaseOpStr: "vfa", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfadb"}, + typ4ExtndMnics{BaseOpStr: "vfa", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfaxb"}, + + // VFD - VECTOR FP DIVIDE + typ4ExtndMnics{BaseOpStr: "vfd", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfdsb"}, + typ4ExtndMnics{BaseOpStr: "vfd", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfddb"}, + typ4ExtndMnics{BaseOpStr: "vfd", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfdsb"}, + typ4ExtndMnics{BaseOpStr: "vfd", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfddb"}, + typ4ExtndMnics{BaseOpStr: "vfd", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfdxb"}, + + // VFLL - VECTOR FP LOAD LENGTHENED + typ4ExtndMnics{BaseOpStr: "vfll", Value1: 2, Value2: 0, Offset1: 2, Offset2: 3, ExtnOpStr: "vflfs"}, + typ4ExtndMnics{BaseOpStr: "vfll", Value1: 2, Value2: 8, Offset1: 2, Offset2: 3, ExtnOpStr: "wflls"}, + typ4ExtndMnics{BaseOpStr: "vfll", Value1: 3, Value2: 8, Offset1: 2, Offset2: 3, ExtnOpStr: "wflld"}, + + // VFMAX - VECTOR FP MAXIMUM + typ4ExtndMnics{BaseOpStr: "vfmax", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfmaxsb"}, + typ4ExtndMnics{BaseOpStr: "vfmax", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfmaxdb"}, + typ4ExtndMnics{BaseOpStr: "vfmax", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmaxsb"}, + typ4ExtndMnics{BaseOpStr: "vfmax", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmaxdb"}, + typ4ExtndMnics{BaseOpStr: "vfmax", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmaxxb"}, + + // VFMIN - VECTOR FP MINIMUM + typ4ExtndMnics{BaseOpStr: "vfmin", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfminsb"}, + typ4ExtndMnics{BaseOpStr: "vfmin", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfmindb"}, + typ4ExtndMnics{BaseOpStr: "vfmin", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfminsb"}, + typ4ExtndMnics{BaseOpStr: "vfmin", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmindb"}, + typ4ExtndMnics{BaseOpStr: "vfmin", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfminxb"}, + + // VFM - VECTOR FP MULTIPLY + typ4ExtndMnics{BaseOpStr: "vfm", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfmsb"}, + typ4ExtndMnics{BaseOpStr: "vfm", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfmdb"}, + typ4ExtndMnics{BaseOpStr: "vfm", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmsb"}, + typ4ExtndMnics{BaseOpStr: "vfm", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmdb"}, + typ4ExtndMnics{BaseOpStr: "vfm", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmxb"}, + + // VFSQ - VECTOR FP SQUARE ROOT + typ4ExtndMnics{BaseOpStr: "vfsq", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfsqsb"}, + typ4ExtndMnics{BaseOpStr: "vfsq", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfsqdb"}, + typ4ExtndMnics{BaseOpStr: "vfsq", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfsqsb"}, + typ4ExtndMnics{BaseOpStr: "vfsq", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfsqdb"}, + typ4ExtndMnics{BaseOpStr: "vfsq", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfsqxb"}, + + // VFS - VECTOR FP SUBTRACT + typ4ExtndMnics{BaseOpStr: "vfs", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfssb"}, + typ4ExtndMnics{BaseOpStr: "vfs", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfsdb"}, + typ4ExtndMnics{BaseOpStr: "vfs", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfssb"}, + typ4ExtndMnics{BaseOpStr: "vfs", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfsdb"}, + typ4ExtndMnics{BaseOpStr: "vfs", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfsxb"}, + + // VFTCI - VECTOR FP TEST DATA CLASS IMMEDIATE + typ4ExtndMnics{BaseOpStr: "vftci", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vftcisb"}, + typ4ExtndMnics{BaseOpStr: "vftci", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vftcidb"}, + typ4ExtndMnics{BaseOpStr: "vftci", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wftcisb"}, + typ4ExtndMnics{BaseOpStr: "vftci", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wftcidb"}, + typ4ExtndMnics{BaseOpStr: "vftci", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wftcixb"}, + } + + vec6InstrExtndMnics := []typ5ExtndMnics{ + // VFCE - VECTOR FP COMPARE EQUAL + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcesb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcesbs"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcedb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcesb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcesbs"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcedb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcedbs"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 4, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcexb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 4, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcexbs"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkesb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkesbs"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkedb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkedbs"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkesb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkesbs"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkedb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkedbs"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 4, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkexb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 4, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkexbs"}, + + // VFCH - VECTOR FP COMPARE HIGH + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchsb"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchsbs"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchdb"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchdbs"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchsb"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchsbs"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchdb"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchdbs"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 4, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchxb"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 4, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchxbs"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhsb"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhsbs"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhdb"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhdbs"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhsb"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhsbs"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhdb"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhdbs"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 4, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhxb"}, + typ5ExtndMnics{BaseOpStr: "vfch", Value1: 4, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhxbs"}, + + // VFCHE - VECTOR FP COMPARE HIGH OR EQUAL + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchesb"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchesbs"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchedb"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchedbs"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchesb"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchesbs"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchedb"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchedbs"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 4, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchexb"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 4, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchexbs"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhesb"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhesbs"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhedb"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhedbs"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhesb"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhesbs"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhedb"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhedbs"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 4, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhexb"}, + typ5ExtndMnics{BaseOpStr: "vfche", Value1: 4, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhexbs"}, + + // VFPSO - VECTOR FP PERFORM SIGN OPERATION + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 0, Value3: 0, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflcsb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 8, Value3: 0, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflcsb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 0, Value3: 1, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflnsb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 8, Value3: 1, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflnsb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 0, Value3: 2, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflpsb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 8, Value3: 2, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflpsb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 0, Value3: 0, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflcdb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 8, Value3: 0, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflcdb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 0, Value3: 1, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflndb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 8, Value3: 1, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflndb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 0, Value3: 2, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflpdb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 8, Value3: 2, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflpdb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 4, Value2: 8, Value3: 0, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflcxb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 4, Value2: 8, Value3: 1, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflnxb"}, + typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 4, Value2: 8, Value3: 2, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflpxb"}, + } + + vec7InstrExtndMnics := []typ4ExtndMnics{ + // VFMA - VECTOR FP MULTIPLY AND ADD + typ4ExtndMnics{BaseOpStr: "vfma", Value1: 0, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "vfmasb"}, + typ4ExtndMnics{BaseOpStr: "vfma", Value1: 0, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "vfmadb"}, + typ4ExtndMnics{BaseOpStr: "vfma", Value1: 8, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmasb"}, + typ4ExtndMnics{BaseOpStr: "vfma", Value1: 8, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmadb"}, + typ4ExtndMnics{BaseOpStr: "vfma", Value1: 8, Value2: 4, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmaxb"}, + + // VFMS - VECTOR FP MULTIPLY AND SUBTRACT + typ4ExtndMnics{BaseOpStr: "vfms", Value1: 0, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "vfmssb"}, + typ4ExtndMnics{BaseOpStr: "vfms", Value1: 0, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "vfmsdb"}, + typ4ExtndMnics{BaseOpStr: "vfms", Value1: 8, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmssb"}, + typ4ExtndMnics{BaseOpStr: "vfms", Value1: 8, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmsdb"}, + typ4ExtndMnics{BaseOpStr: "vfms", Value1: 8, Value2: 4, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmsxb"}, + + // VFNMA - VECTOR FP NEGATIVE MULTIPLY AND ADD + typ4ExtndMnics{BaseOpStr: "vfnma", Value1: 0, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "vfnmasb"}, + typ4ExtndMnics{BaseOpStr: "vfnma", Value1: 0, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "vfnmadb"}, + typ4ExtndMnics{BaseOpStr: "vfnma", Value1: 8, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmasb"}, + typ4ExtndMnics{BaseOpStr: "vfnma", Value1: 8, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmadb"}, + typ4ExtndMnics{BaseOpStr: "vfnma", Value1: 8, Value2: 4, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmaxb"}, + + // VFNMS - VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT + typ4ExtndMnics{BaseOpStr: "vfnms", Value1: 0, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "vfnmssb"}, + typ4ExtndMnics{BaseOpStr: "vfnms", Value1: 0, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "vfnmsdb"}, + typ4ExtndMnics{BaseOpStr: "vfnms", Value1: 8, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmssb"}, + typ4ExtndMnics{BaseOpStr: "vfnms", Value1: 8, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmsdb"}, + typ4ExtndMnics{BaseOpStr: "vfnms", Value1: 8, Value2: 4, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmsxb"}, + } + + opString := inst.Op.String() + newOpStr := opString + + if inst.Enc == 0 { + return ".long 0x0" + } else if inst.Op == 0 { + return "error: unknown instruction" + } + + switch opString { + // Case to handle all "branch" instructions with one M-field operand + case "bic", "bcr", "bc", "brc", "brcl": + + for i := 0; i < len(brnchInstrExtndMnics); i++ { + if opString == brnchInstrExtndMnics[i].BaseOpStr && + uint8(inst.Args[brnchInstrExtndMnics[i].Offset].(Mask)) == brnchInstrExtndMnics[i].Value { + newOpStr = brnchInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(brnchInstrExtndMnics[i].Offset)) + break + } + } + + // Case to handle all "compare" instructions with one M-field operand + case "crb", "cgrb", "crj", "cgrj", "crt", "cgrt", "cib", "cgib", "cij", "cgij", "cit", "cgit", "clrb", "clgrb", + "clrj", "clgrj", "clrt", "clgrt", "clt", "clgt", "clib", "clgib", "clij", "clgij", "clfit", "clgit": + + for i := 0; i < len(cmpInstrExtndMnics); i++ { + //For CLT and CLGT instructions, M-value is the second operand. + //Hence, set the offset to "1" + if opString == "clt" || opString == "clgt" { + cmpInstrExtndMnics[i].Offset = 1 + } + + if uint8(inst.Args[cmpInstrExtndMnics[i].Offset].(Mask)) == cmpInstrExtndMnics[i].Value { + newOpStr = opString + cmpInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(cmpInstrExtndMnics[i].Offset)) + break + } + } + + // Case to handle all "load" and "store" instructions with one M-field operand + case "lochhi", "lochi", "locghi", "locfhr", "locfh", "locr", "locgr", "loc", + "locg", "selr", "selgr", "selfhr", "stocfh", "stoc", "stocg": + + for i := 0; i < len(ldSt_InstrExtndMnics); i++ { + + //For LOCFH, LOC, LOCG, SELR, SELGR, SELFHR, STOCFH, STOC, STOCG instructions, + //M-value is the forth operand. Hence, set the offset to "3" + if opString == "locfh" || opString == "loc" || opString == "locg" || opString == "selr" || opString == "selgr" || + opString == "selfhr" || opString == "stocfh" || opString == "stoc" || opString == "stocg" { + ldSt_InstrExtndMnics[i].Offset = 3 + } + + if uint8(inst.Args[ldSt_InstrExtndMnics[i].Offset].(Mask)) == ldSt_InstrExtndMnics[i].Value { + newOpStr = opString + ldSt_InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(ldSt_InstrExtndMnics[i].Offset)) + break + } + } + + // Case to handle all "vector" instructions with one M-field operand + case "vavg", "vavgl", "verllv", "veslv", "vesrav", "vesrlv", "vgfm", "vgm", "vmx", "vmxl", "vmrh", "vmrl", "vmn", "vmnl", "vrep", + "vclz", "vctz", "vec", "vecl", "vlc", "vlp", "vpopct", "vrepi", "verim", "verll", "vesl", "vesra", "vesrl", "vgfma", "vlrep", + "vlgv", "vlvg", "vlbrrep", "vler", "vlbr", "vstbr", "vster", "vpk", "vme", "vmh", "vmle", "vmlh", "vmlo", "vml", "vmo", "vmae", + "vmale", "vmalo", "vmal", "vmah", "vmalh", "vmao", "vmph", "vmplh", "vupl", "vupll", "vscbi", "vs", "vsum", "vsumg", "vsumq", + "va", "vacc": + + switch opString { + + case "vavg", "vavgl", "verllv", "veslv", "vesrav", "vesrlv", "vgfm", "vgm", "vmx", "vmxl", "vmrh", "vmrl", "vmn", "vmnl", "vrep": + //M-field is 3rd arg for all these instructions. Hence, set the offset to "2" + for i := 0; i < len(vecInstrExtndMnics)-2; i++ { // 0,1,2,3 + if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset)) + break + } + } + + case "vclz", "vctz", "vec", "vecl", "vlc", "vlp", "vpopct", "vrepi": + for i := 0; i < len(vecInstrExtndMnics)-2; i++ { //0,1,2,3 + if uint8(inst.Args[vecInstrExtndMnics[i].Offset-1].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset-1)) + break + } + } + + case "verim", "verll", "vesl", "vesra", "vesrl", "vgfma", "vlrep": + for i := 0; i < len(vecInstrExtndMnics)-2; i++ { //0,1,2,3 + if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1)) + break + } + } + + case "vlgv", "vlvg": + for i := 0; i < len(vecInstrExtndMnics)-2; i++ { + if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1)) + break + } + } + + case "vlbrrep", "vler", "vster": + for i := 1; i < len(vecInstrExtndMnics)-2; i++ { + if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1)) + break + } + } + + case "vpk": + for i := 1; i < len(vecInstrExtndMnics)-2; i++ { + if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset)) + break + } + } + + case "vlbr", "vstbr": + for i := 1; i < len(vecInstrExtndMnics)-1; i++ { + if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1)) + break + } + } + case "vme", "vmh", "vmle", "vmlh", "vmlo", "vmo": + for i := 0; i < len(vecInstrExtndMnics)-3; i++ { //0,1,2 + if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset)) + break + } + } + + case "vml": + for i := 0; i < len(vecInstrExtndMnics)-3; i++ { //0,1,2 + if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value { + if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == 1 { + newOpStr = opString + string("hw") + } else { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + } + removeArg(inst, int8(vecInstrExtndMnics[i].Offset)) + break + } + } + + case "vmae", "vmale", "vmalo", "vmal", "vmah", "vmalh", "vmao": + for i := 0; i < len(vecInstrExtndMnics)-3; i++ { //0,1,2 + if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1)) + break + } + } + + case "vmph", "vmplh", "vupl", "vupll": //0,1,2 + for i := 0; i < len(vecInstrExtndMnics)-3; i++ { + if uint8(inst.Args[vecInstrExtndMnics[i].Offset-1].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset-1)) + break + } + } + + case "vscbi", "vs", "va", "vacc": // 0,1,2,3,4 + for i := 0; i < len(vecInstrExtndMnics)-1; i++ { + if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset)) + break + } + } + case "vsum", "vsumg": + for i := 1; i < len(vecInstrExtndMnics)-4; i++ { + if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset)) + break + } + } + case "vsumq": + for i := 2; i < len(vecInstrExtndMnics)-2; i++ { + if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset)) + break + } + } + } + + case "vllez": + for i := 0; i < len(vecInstrExtndMnics); i++ { + if i == 4 { + continue + } + if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value { + newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1)) + break + } + } + + case "vgbm": + if uint16(inst.Args[1].(Imm)) == uint16(0) { + newOpStr = "vzeo" + removeArg(inst, int8(1)) + } else if uint16(inst.Args[1].(Imm)) == uint16(0xFFFF) { + newOpStr = "vone" + removeArg(inst, int8(1)) + } + case "vno": + if uint8(inst.Args[1].(VReg)) == uint8(inst.Args[2].(VReg)) { //Bitwise Not instruction(VNOT) if V2 equal to v3 + newOpStr = opString + "t" + removeArg(inst, int8(2)) + } + + case "vmsl": + if uint8(inst.Args[4].(Mask)) == uint8(3) { + newOpStr = opString + "g" + removeArg(inst, int8(4)) + } + + case "vflr": + if uint8(inst.Args[2].(Mask)) == uint8(3) && ((inst.Args[3].(Mask)>>3)&0x1 == 0x1) { + inst.Args[3] = (inst.Args[3].(Mask) ^ 0x8) + newOpStr = "wflrd" + removeArg(inst, int8(2)) + } else if uint8(inst.Args[2].(Mask)) == uint8(4) && ((inst.Args[3].(Mask)>>3)&0x1 == 0x1) { + inst.Args[3] = (inst.Args[3].(Mask) ^ 0x8) + newOpStr = "wflrx" + removeArg(inst, int8(2)) + } else if uint8(inst.Args[2].(Mask)) == uint8(3) { + newOpStr = "vflrd" + removeArg(inst, int8(2)) + } + + case "vllebrz": + if uint8(inst.Args[4].(Mask)) == uint8(1) { + newOpStr = opString + "h" + removeArg(inst, int8(4)) + } else if uint8(inst.Args[4].(Mask)) == uint8(2) { + newOpStr = opString + "f" + removeArg(inst, int8(4)) + } else if uint8(inst.Args[4].(Mask)) == uint8(3) { + newOpStr = "ldrv" + removeArg(inst, int8(4)) + } else if uint8(inst.Args[4].(Mask)) == uint8(6) { + newOpStr = "lerv" + removeArg(inst, int8(4)) + } + + case "vschp": + if uint8(inst.Args[3].(Mask)) == uint8(2) { + newOpStr = "vschsp" + removeArg(inst, int8(3)) + } else if uint8(inst.Args[3].(Mask)) == uint8(3) { + newOpStr = "vschdp" + removeArg(inst, int8(3)) + } else if uint8(inst.Args[3].(Mask)) == uint8(4) { + newOpStr = "vschxp" + removeArg(inst, int8(3)) + } + + case "vsbcbi", "vsbi": + if uint8(inst.Args[4].(Mask)) == uint8(4) { + newOpStr = opString + vecInstrExtndMnics[4].ExtnOpStr + removeArg(inst, int8(4)) + } + + case "vac", "vaccc": + if uint8(inst.Args[4].(Mask)) == uint8(4) { + newOpStr = opString + vecInstrExtndMnics[3].ExtnOpStr + removeArg(inst, int8(3)) + } + + case "vceq", "vch", "vchl": + for i := 0; i < len(vec2InstrExtndMnics)-6; i++ { + if uint8(inst.Args[vec2InstrExtndMnics[i].Offset1].(Mask)) == vec2InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec2InstrExtndMnics[i].Offset2].(Mask)) == vec2InstrExtndMnics[i].Value2 { + newOpStr = opString + vec2InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec2InstrExtndMnics[i].Offset1)) + removeArg(inst, int8(vec2InstrExtndMnics[i].Offset2-1)) + break + } + } + + case "vpks", "vpkls": + for i := 1; i < len(vec2InstrExtndMnics)-6; i++ { + if i == 4 { + continue + } + if uint8(inst.Args[vec2InstrExtndMnics[i].Offset1].(Mask)) == vec2InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec2InstrExtndMnics[i].Offset2].(Mask)) == vec2InstrExtndMnics[i].Value2 { + newOpStr = opString + vec2InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec2InstrExtndMnics[i].Offset1)) + removeArg(inst, int8(vec2InstrExtndMnics[i].Offset2-1)) + break + } + } + case "vfee", "vfene": + var check bool + for i := 0; i < len(vec21InstrExtndMnics); i++ { + if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1].(Mask)) == vec21InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec21InstrExtndMnics[i].Offset2].(Mask)) == vec21InstrExtndMnics[i].Value2 { + newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1)) + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2-1)) + check = true + break + } + } + if !check { + if uint8(inst.Args[3].(Mask)) == 0 && (uint8(inst.Args[4].(Mask)) != uint8(0)) { + newOpStr = opString + vec21InstrExtndMnics[0].ExtnOpStr + removeArg(inst, int8(vec21InstrExtndMnics[0].Offset1)) + } else if uint8(inst.Args[3].(Mask)) == 1 && (uint8(inst.Args[4].(Mask)) != uint8(0)) { + newOpStr = opString + vec21InstrExtndMnics[1].ExtnOpStr + removeArg(inst, int8(vec21InstrExtndMnics[1].Offset1)) + } else if uint8(inst.Args[3].(Mask)) == 2 && (uint8(inst.Args[4].(Mask)) != uint8(0)) { + newOpStr = opString + vec21InstrExtndMnics[2].ExtnOpStr + removeArg(inst, int8(vec21InstrExtndMnics[2].Offset1)) + } else if uint8(inst.Args[4].(Mask)) == 0 { + removeArg(inst, int8(vec21InstrExtndMnics[2].Offset2)) + } + } + + case "vfae", "vstrc": + off := uint8(0) + var check bool + if opString == "vstrc" { + off = uint8(1) + } + for i := 0; i < len(vec21InstrExtndMnics)-9; i++ { + if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1+off].(Mask)) == vec21InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+off].(Mask)) == vec21InstrExtndMnics[i].Value2 { + newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+off)) + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2+off-1)) + check = true + break + } + } + + for i := 0; !(check) && (i < len(vec21InstrExtndMnics)-9); i++ { + if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1+off].(Mask)) == vec21InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+off].(Mask)) == vec21InstrExtndMnics[i].Value2 { + newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+off)) + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2+off-1)) + check = true + break + } + } + //for i := 3; !(check) && (i < len(vec21InstrExtndMnics)); i++ { + for i := len(vec21InstrExtndMnics) - 1; !(check) && (i > 2); i-- { + if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1+off].(Mask)) == vec21InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+off].(Mask))&(vec21InstrExtndMnics[i].Value2) == vec21InstrExtndMnics[i].Value2 { + x := uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+off].(Mask)) ^ (vec21InstrExtndMnics[i].Value2) + newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr + if x != 0 { + inst.Args[vec21InstrExtndMnics[i].Offset2+off] = Mask(x) + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+off)) + check = true + break + } else { + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+off)) + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2+off-1)) + check = true + break + } + } + } + if !check && inst.Args[4+off].(Mask) == Mask(0) { + removeArg(inst, int8(4+off)) + break + } + + case "vstrs": + var check bool + for i := 0; i < len(vec21InstrExtndMnics)-3; i++ { + if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1+1].(Mask)) == vec21InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+1].(Mask)) == vec21InstrExtndMnics[i].Value2 { + newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+1)) + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2)) + check = true + break + } + if i == 2 { + i = i + 3 + } + } + + for i := 0; !(check) && (i < len(vec21InstrExtndMnics)-9); i++ { + if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1+1].(Mask)) == vec21InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+1].(Mask)) != 0 { + newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+1)) + break + } + } + + case "vistr": + var check bool + for i := 0; i < len(vec21InstrExtndMnics)-6; i++ { + if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1-1].(Mask)) == vec21InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec21InstrExtndMnics[i].Offset2-1].(Mask)) == vec21InstrExtndMnics[i].Value2 { + newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1-1)) + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2-2)) + check = true + break + } + } + + for i := 0; !(check) && (i < len(vec21InstrExtndMnics)-9); i++ { + if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1-1].(Mask)) == vec21InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec21InstrExtndMnics[i].Offset2-1].(Mask)) != 0 { + newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1-1)) + break + } + } + + if uint8(inst.Args[3].(Mask)) == 0 { + removeArg(inst, int8(3)) + break + } + + case "vcfps": + if inst.Args[2].(Mask) == Mask(2) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) { + inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8)) + newOpStr = "wcefb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(3) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) { + inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8)) + newOpStr = "wcdgb" + removeArg(inst, int8(2)) + break + } else if uint8(inst.Args[2].(Mask)) == uint8(2) { + newOpStr = "vcefb" + removeArg(inst, int8(2)) + break + } else if uint8(inst.Args[2].(Mask)) == uint8(3) { + newOpStr = "vcdgb" + removeArg(inst, int8(2)) + break + } + + case "vcfpl": + if inst.Args[2].(Mask) == Mask(2) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) { + inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8)) + newOpStr = "wcelfb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(3) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) { + inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8)) + newOpStr = "wcdlgb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(2) { + newOpStr = "vcelfb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(3) { + newOpStr = "vcdlgb" + removeArg(inst, int8(2)) + break + } + + case "vcsfp": + if inst.Args[2].(Mask) == Mask(2) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) { + inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8)) + newOpStr = "wcfeb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(3) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) { + inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8)) + newOpStr = "wcgdb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(2) { + newOpStr = "vcfeb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(3) { + newOpStr = "vcgdb" + removeArg(inst, int8(2)) + break + } + + case "vclfp": + if inst.Args[2].(Mask) == Mask(2) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) { + inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8)) + newOpStr = "wclfeb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(3) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) { + inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8)) + newOpStr = "wclgdb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(2) { + newOpStr = "vclfeb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(3) { + newOpStr = "vclgdb" + removeArg(inst, int8(2)) + break + } + + case "vfi": + if inst.Args[2].(Mask) == Mask(2) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) { + newOpStr = "wfisb" + removeArg(inst, int8(2)) + inst.Args[2] = Mask((inst.Args[2].(Mask)) ^ (0x8)) + break + } else if inst.Args[2].(Mask) == Mask(3) && ((inst.Args[3].(Mask)>>3)&(0x3) == 1) { + newOpStr = "wfidb" + removeArg(inst, int8(2)) + inst.Args[2] = Mask((inst.Args[2].(Mask)) ^ (0x8)) + break + } else if inst.Args[2].(Mask) == Mask(4) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) { + newOpStr = "wfixb" + removeArg(inst, int8(2)) + inst.Args[2] = Mask((inst.Args[2].(Mask)) ^ (0x8)) + break + } else if inst.Args[2].(Mask) == Mask(2) { + newOpStr = "vfisb" + removeArg(inst, int8(2)) + break + } else if inst.Args[2].(Mask) == Mask(3) { + newOpStr = "vfidb" + removeArg(inst, int8(2)) + break + } + + // Case to handle few vector instructions with 2 M-field operands + case "vfa", "vfd", "vfll", "vfmax", "vfmin", "vfm": + for i := 0; i < len(vec4InstrExtndMnics); i++ { + if opString == vec4InstrExtndMnics[i].BaseOpStr && + uint8(inst.Args[vec4InstrExtndMnics[i].Offset1].(Mask)) == vec4InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec4InstrExtndMnics[i].Offset2].(Mask)) == vec4InstrExtndMnics[i].Value2 { + newOpStr = vec4InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec4InstrExtndMnics[i].Offset1)) + removeArg(inst, int8(vec4InstrExtndMnics[i].Offset2-1)) + break + } + } + + // Case to handle few special "vector" instructions with 2 M-field operands + case "wfc", "wfk": + for i := 0; i < len(vec3InstrExtndMnics); i++ { + if uint8(inst.Args[vec3InstrExtndMnics[i].Offset1].(Mask)) == vec3InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec3InstrExtndMnics[i].Offset2].(Mask)) == vec3InstrExtndMnics[i].Value2 { + newOpStr = opString + vec3InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec3InstrExtndMnics[i].Offset1)) + removeArg(inst, int8(vec3InstrExtndMnics[i].Offset2-1)) + break + } + } + + // Case to handle few vector instructions with 2 M-field operands + case "vfma", "vfms", "vfnma", "vfnms": + for i := 0; i < len(vec7InstrExtndMnics); i++ { + if opString == vec7InstrExtndMnics[i].BaseOpStr && + uint8(inst.Args[vec7InstrExtndMnics[i].Offset1].(Mask)) == vec7InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec7InstrExtndMnics[i].Offset2].(Mask)) == vec7InstrExtndMnics[i].Value2 { + newOpStr = vec7InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec7InstrExtndMnics[i].Offset1)) + removeArg(inst, int8(vec7InstrExtndMnics[i].Offset2-1)) + break + } + } + + // List of instructions with 3 M-field operands. + case "vfce", "vfch", "vfche", "vfpso": + for i := 0; i < len(vec6InstrExtndMnics); i++ { + if opString == vec6InstrExtndMnics[i].BaseOpStr && + uint8(inst.Args[vec6InstrExtndMnics[i].Offset1].(Mask)) == vec6InstrExtndMnics[i].Value1 && + uint8(inst.Args[vec6InstrExtndMnics[i].Offset2].(Mask)) == vec6InstrExtndMnics[i].Value2 && + uint8(inst.Args[vec6InstrExtndMnics[i].Offset3].(Mask)) == vec6InstrExtndMnics[i].Value3 { + newOpStr = vec6InstrExtndMnics[i].ExtnOpStr + removeArg(inst, int8(vec6InstrExtndMnics[i].Offset1)) + removeArg(inst, int8(vec6InstrExtndMnics[i].Offset2-1)) + removeArg(inst, int8(vec6InstrExtndMnics[i].Offset3-2)) + break + } + } + + default: + return opString + } + return newOpStr +} + +// This is the function that is called to print the disassembled instruction +// in the GNU (AT&T) syntax form. +func GNUSyntax(inst Inst, pc uint64) string { + if inst.Enc == 0 { + return ".long 0x0" + } else if inst.Op == 0 { + return "error: unknown instruction" + } + return inst.String(pc) +} + +// removeArg removes the arg in inst.Args[index]. +func removeArg(inst *Inst, index int8) { + for i := int(index); i < len(inst.Args); i++ { + if i+1 < len(inst.Args) { + inst.Args[i] = inst.Args[i+1] + } else { + inst.Args[i] = nil + } + } +} diff --git a/s390x/s390xasm/inst.go b/s390x/s390xasm/inst.go new file mode 100644 index 00000000..19d70156 --- /dev/null +++ b/s390x/s390xasm/inst.go @@ -0,0 +1,399 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390xasm + +import ( + "bytes" + "fmt" + "strings" +) + +type Inst struct { + Op Op // Opcode mnemonic + Enc uint64 // Raw encoding bits (if Len == 8, this is the prefix word) + Len int // Length of encoding in bytes. + Args Args // Instruction arguments, in Power ISA manual order. +} + +func (i Inst) String(pc uint64) string { + var buf bytes.Buffer + var rxb_check bool + m := i.Op.String() + if strings.HasPrefix(m, "v") || strings.Contains(m, "wfc") || strings.Contains(m, "wfk") { + rxb_check = true + } + mnemonic := HandleExtndMnemonic(&i) + buf.WriteString(fmt.Sprintf("%s", mnemonic)) + for j, arg := range i.Args { + if arg == nil { + break + } + if j == 0 { + buf.WriteString(" ") + } else { + switch arg.(type) { + case VReg, Reg: + if _, ok := i.Args[j-1].(Disp12); ok { + buf.WriteString("") + } else if _, ok := i.Args[j-1].(Disp20); ok { + buf.WriteString("") + } else { + buf.WriteString(",") + } + case Base: + if _, ok := i.Args[j-1].(VReg); ok { + buf.WriteString(",") + } else if _, ok := i.Args[j-1].(Reg); ok { + buf.WriteString(",") + } + case Index, Len: + default: + buf.WriteString(",") + } + } + buf.WriteString(arg.String(pc)) + if rxb_check && i.Args[j+2] == nil { + break + } + } + return buf.String() +} + +// An Op is an instruction operation. +type Op uint16 + +func (o Op) String() string { + if int(o) >= len(opstr) || opstr[o] == "" { + return fmt.Sprintf("Op(%d)", int(o)) + } + return opstr[o] +} + +// An Arg is a single instruction argument. +// One of these types: Reg, Base, Index, Disp20, Disp12, Len, Mask, Sign8, Sign16, Sign32, RegIm12, RegIm16, RegIm24, RegIm32. +type Arg interface { + IsArg() + String(pc uint64) string +} + +// An Args holds the instruction arguments. +// If an instruction has fewer than 6 arguments, +// the final elements in the array are nil. +type Args [8]Arg + +// Base represents an 4-bit Base Register field +type Base uint8 + +const ( + B0 Base = iota + B1 + B2 + B3 + B4 + B5 + B6 + B7 + B8 + B9 + B10 + B11 + B12 + B13 + B14 + B15 +) + +func (Base) IsArg() {} +func (r Base) String(pc uint64) string { + switch { + case B1 <= r && r <= B15: + s := "%" + return fmt.Sprintf("%sr%d)", s, int(r-B0)) + case B0 == r: + return fmt.Sprintf("") + default: + return fmt.Sprintf("Base(%d)", int(r)) + } +} + +// Index represents an 4-bit Index Register field +type Index uint8 + +const ( + X0 Index = iota + X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 + X12 + X13 + X14 + X15 +) + +func (Index) IsArg() {} +func (r Index) String(pc uint64) string { + switch { + case X1 <= r && r <= X15: + s := "%" + return fmt.Sprintf("%sr%d,", s, int(r-X0)) + case X0 == r: + return fmt.Sprintf("") + default: + return fmt.Sprintf("Base(%d)", int(r)) + } +} + +// Disp20 represents an 20-bit Unsigned Displacement +type Disp20 uint32 + +func (Disp20) IsArg() {} +func (r Disp20) String(pc uint64) string { + if (r>>19)&0x01 == 1 { + return fmt.Sprintf("%d(", int32(r|0xfff<<20)) + } else { + return fmt.Sprintf("%d(", int32(r)) + } +} + +// Disp12 represents an 12-bit Unsigned Displacement +type Disp12 uint16 + +func (Disp12) IsArg() {} +func (r Disp12) String(pc uint64) string { + return fmt.Sprintf("%d(", r) +} + +// RegIm12 represents an 12-bit Register immediate number. +type RegIm12 uint16 + +func (RegIm12) IsArg() {} +func (r RegIm12) String(pc uint64) string { + if (r>>11)&0x01 == 1 { + return fmt.Sprintf("%#x", pc+(2*uint64(int16(r|0xf<<12)))) + } else { + return fmt.Sprintf("%#x", pc+(2*uint64(int16(r)))) + } +} + +// RegIm16 represents an 16-bit Register immediate number. +type RegIm16 uint16 + +func (RegIm16) IsArg() {} +func (r RegIm16) String(pc uint64) string { + return fmt.Sprintf("%#x", pc+(2*uint64(int16(r)))) +} + +// RegIm24 represents an 24-bit Register immediate number. +type RegIm24 uint32 + +func (RegIm24) IsArg() {} +func (r RegIm24) String(pc uint64) string { + if (r>>23)&0x01 == 1 { + return fmt.Sprintf("%#x", pc+(2*uint64(int32(r|0xff<<24)))) + } else { + return fmt.Sprintf("%#x", pc+(2*uint64(int32(r)))) + } +} + +// RegIm32 represents an 32-bit Register immediate number. +type RegIm32 uint32 + +func (RegIm32) IsArg() {} +func (r RegIm32) String(pc uint64) string { + return fmt.Sprintf("%#x", pc+(2*uint64(int32(r)))) +} + +// A Reg is a single register. The zero value means R0, not the absence of a register. +// It also includes special registers. +type Reg uint16 + +const ( + R0 Reg = iota + R1 + R2 + R3 + R4 + R5 + R6 + R7 + R8 + R9 + R10 + R11 + R12 + R13 + R14 + R15 + F0 + F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 + F12 + F13 + F14 + F15 + A0 + A1 + A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + A10 + A11 + A12 + A13 + A14 + A15 + C0 + C1 + C2 + C3 + C4 + C5 + C6 + C7 + C8 + C9 + C10 + C11 + C12 + C13 + C14 + C15 +) + +func (Reg) IsArg() {} +func (r Reg) String(pc uint64) string { + s := "%" + switch { + case R0 <= r && r <= R15: + return fmt.Sprintf("%sr%d", s, int(r-R0)) + case F0 <= r && r <= F15: + return fmt.Sprintf("%sf%d", s, int(r-F0)) + case A0 <= r && r <= A15: + return fmt.Sprintf("%sa%d", s, int(r-A0)) + case C0 <= r && r <= C15: + return fmt.Sprintf("%sc%d", s, int(r-C0)) + default: + return fmt.Sprintf("Reg(%d)", int(r)) + } +} + +// VReg is a vector register. The zero value means V0, not the absence of a register. + +type VReg uint8 + +const ( + V0 VReg = iota + V1 + V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9 + V10 + V11 + V12 + V13 + V14 + V15 + V16 + V17 + V18 + V19 + V20 + V21 + V22 + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30 + V31 +) + +func (VReg) IsArg() {} +func (r VReg) String(pc uint64) string { + s := "%" + if V0 <= r && r <= V31 { + return fmt.Sprintf("%sv%d", s, int(r-V0)) + } else { + return fmt.Sprintf("VReg(%d)", int(r)) + } +} + +// Imm represents an immediate number. +type Imm uint32 + +func (Imm) IsArg() {} +func (i Imm) String(pc uint64) string { + return fmt.Sprintf("%d", uint32(i)) +} + +// Sign8 represents an 8-bit signed immediate number. +type Sign8 int8 + +func (Sign8) IsArg() {} +func (i Sign8) String(pc uint64) string { + return fmt.Sprintf("%d", i) +} + +// Sign16 represents an 16-bit signed immediate number. +type Sign16 int16 + +func (Sign16) IsArg() {} +func (i Sign16) String(pc uint64) string { + return fmt.Sprintf("%d", i) +} + +// Sign32 represents an 32-bit signed immediate number. +type Sign32 int32 + +func (Sign32) IsArg() {} +func (i Sign32) String(pc uint64) string { + return fmt.Sprintf("%d", i) +} + +// Mask represents an 4-bit mask value +type Mask uint8 + +func (Mask) IsArg() {} +func (i Mask) String(pc uint64) string { + return fmt.Sprintf("%d", i) +} + +// Len represents an 8-bit type holds 4/8-bit Len argument +type Len uint8 + +func (Len) IsArg() {} +func (i Len) String(pc uint64) string { + return fmt.Sprintf("%d,", uint16(i)+1) +} diff --git a/s390x/s390xasm/tables.go b/s390x/s390xasm/tables.go new file mode 100644 index 00000000..f0db5e90 --- /dev/null +++ b/s390x/s390xasm/tables.go @@ -0,0 +1,5046 @@ +// Code generated by s390xmap -fmt=decoder ../s390x.csv DO NOT EDIT. + +package s390xasm + +const ( + _ Op = iota + A + AR + ARK + AY + AG + AGR + AGRK + AGF + AGFR + AXBR + AXTR + AXTRA + ADB + ADBR + ADTR + ADTRA + AEB + AEBR + AP + AH + AHY + AGH + AHI + AGHI + AHHHR + AHHLR + AFI + AHIK + ASI + AGHIK + AGFI + AGSI + AIH + AL + ALR + ALRK + ALY + ALG + ALGR + ALGRK + ALGF + ALGFR + ALHHHR + ALHHLR + ALFI + ALGFI + ALC + ALCR + ALCG + ALCGR + ALHSIK + ALSI + ALGHSIK + ALGSI + ALSIH + ALSIHN + AXR + AD + ADR + AE + AER + AW + AWR + AU + AUR + N + NR + NRK + NY + NG + NGR + NGRK + NC + NI + NIY + NIHH + NIHL + NIHF + NILH + NILL + NILF + NCRK + NCGRK + BAL + BALR + BAS + BASR + BASSM + BSA + BSM + BAKR + BSG + BIC + BC + BCR + BCT + BCTR + BCTG + BCTGR + BXH + BXHG + BXLE + BXLEG + BPP + BPRP + BRAS + BRASL + BRC + BRCL + BRCT + BRCTG + BRCTH + BRXH + BRXHG + BRXLE + BRXLG + XSCH + CKSM + KM + KMA + KMC + KMF + KMCTR + KMO + CSCH + C + CR + CY + CG + CGR + CGF + CGFR + CXBR + CXTR + CXR + CDB + CDBR + CDTR + CD + CDR + CEB + CEBR + CE + CER + CRB + CGRB + CRJ + CGRJ + CFC + CRDTE + KXBR + KXTR + KDB + KDBR + KDTR + KEB + KEBR + CS + CSY + CSG + CSP + CSPG + CSST + CRT + CGRT + CEXTR + CEDTR + CP + CDS + CDSY + CDSG + CH + CHY + CGH + CHHSI + CHI + CHSI + CGHI + CGHSI + CHRL + CGHRL + CHF + CHHR + CHLR + CFI + CGFI + CIB + CGIB + CIJ + CGIJ + CIT + CGIT + CIH + CL + CLR + CLY + CLG + CLGR + CLGF + CLGFR + CLC + CLI + CLIY + CLRB + CLGRB + CLRJ + CLGRJ + CLRT + CLT + CLGRT + CLGT + CLMH + CLM + CLMY + CLHF + CLHHR + CLHLR + CLHHSI + CLFI + CLFHSI + CLGHSI + CLGFI + CLIB + CLGIB + CLIJ + CLGIJ + CLFIT + CLGIT + CLIH + CLCL + CLCLE + CLCLU + CLRL + CLHRL + CLGRL + CLGHRL + CLGFRL + CLST + CRL + CGRL + CGFRL + CUSE + CMPSC + KDSA + KIMD + KLMD + KMAC + THDR + THDER + CXFBR + CXFBRA + CXFTR + CXFR + CDFBR + CDFBRA + CDFTR + CDFR + CEFBR + CEFBRA + CEFR + CXGBR + CXGBRA + CXGTR + CXGTRA + CXGR + CDGBR + CDGBRA + CDGTR + CDGTRA + CDGR + CEGBR + CEGBRA + CEGR + CXLFBR + CXLFTR + CDLFBR + CDLFTR + CELFBR + CXLGBR + CXLGTR + CDLGBR + CDLGTR + CELGBR + CXPT + CDPT + CXSTR + CDSTR + CXUTR + CDUTR + CXZT + CDZT + TBEDR + TBDR + CVB + CVBY + CVBG + CVD + CVDY + CVDG + CFXBR + CFXBRA + CGXBR + CGXBRA + CFXTR + CGXTR + CGXTRA + CFXR + CGXR + CFDBR + CFDBRA + CGDBR + CGDBRA + CFDTR + CGDTR + CGDTRA + CFDR + CGDR + CFEBR + CFEBRA + CGEBR + CGEBRA + CFER + CGER + CLFXBR + CLGXBR + CLFXTR + CLGXTR + CLFDBR + CLGDBR + CLFDTR + CLGDTR + CLFEBR + CLGEBR + CPXT + CPDT + CSXTR + CSDTR + CUXTR + CUDTR + CZXT + CZDT + CU24 + CU21 + CU12 + CU14 + CU42 + CU41 + CPYA + CPSDR + VSCSHP + VSCHP + DFLTCC + D + DR + DXBR + DXTR + DXTRA + DXR + DDB + DDBR + DDTR + DDTRA + DD + DDR + DEB + DEBR + DE + DER + DP + DL + DLR + DLG + DLGR + DSG + DSGR + DSGF + DSGFR + DIDBR + DIEBR + ED + EDMK + X + XR + XRK + XY + XG + XGR + XGRK + XC + XI + XIY + XIHF + XILF + EX + EXRL + EAR + ESEA + EEXTR + EEDTR + ECAG + ECTG + EFPC + EPAR + EPAIR + EPSW + ESAR + ESAIR + ESXTR + ESDTR + EREG + EREGG + ESTA + ETND + FLOGR + HSCH + HDR + HER + IAC + IEXTR + IEDTR + IC + ICY + ICMH + ICM + ICMY + IIHH + IIHL + IIHF + IILH + IILL + IILF + IPM + IPK + IRBM + ISKE + IVSK + IDTE + IPTE + L + LR + LY + LG + LGR + LGF + LGFR + LXR + LD + LDR + LDY + LE + LER + LEY + LAM + LAMY + LA + LAY + LAE + LAEY + LARL + LASP + LAA + LAAG + LAAL + LAALG + LAN + LANG + LAX + LAXG + LAO + LAOG + LT + LTR + LTG + LTGR + LTGF + LTGFR + LTXBR + LTXTR + LTXR + LTDBR + LTDTR + LTDR + LTEBR + LTER + LAT + LGAT + LZRF + LZRG + LBEAR + LB + LBR + LGB + LGBR + LBH + LCR + LCGR + LCGFR + LCXBR + LCXR + LCDBR + LCDR + LCDFR + LCEBR + LCER + LCTL + LCTLG + LCBB + FIXBR + FIXBRA + FIXTR + FIXR + FIDBR + FIDBRA + FIDTR + FIDR + FIEBR + FIEBRA + FIER + LFPC + LFAS + LDGR + LGDR + LGG + LGSC + LH + LHR + LHY + LGH + LGHR + LHH + LOCHHI + LHI + LGHI + LOCHI + LOCGHI + LHRL + LGHRL + LFH + LFHAT + LOCFH + LOCFHR + LGFI + LXDB + LXDBR + LXDTR + LXD + LXDR + LXEB + LXEBR + LXE + LXER + LDEB + LDEBR + LDETR + LDE + LDER + LLGF + LLGFR + LLGFSG + LLGFAT + LLZRGF + LLC + LLCR + LLGC + LLGCR + LLCH + LLH + LLHR + LLGH + LLGHR + LLHH + LLHRL + LLGHRL + LLIHH + LLIHL + LLIHF + LLILH + LLILL + LLILF + LLGFRL + LLGT + LLGTR + LLGTAT + LM + LMY + LMG + LMD + LMH + LNR + LNGR + LNGFR + LNXBR + LNXR + LNDBR + LNDR + LNDFR + LNEBR + LNER + LOC + LOCR + LOCG + LOCGR + LPTEA + LPD + LPDG + LPQ + LPR + LPGR + LPGFR + LPXBR + LPXR + LPDBR + LPDR + LPDFR + LPEBR + LPER + LPSW + LPSWE + LPSWEY + LRA + LRAY + LRAG + LRL + LGRL + LGFRL + LRVH + LRV + LRVR + LRVG + LRVGR + LDXBR + LDXBRA + LDXTR + LDXR + LRDR + LEXBR + LEXBRA + LEXR + LEDBR + LEDBRA + LEDTR + LEDR + LRER + LURA + LURAG + LZXR + LZDR + LZER + MSTA + MSCH + MC + MVHHI + MVHI + MVGHI + MVC + MVI + MVIY + MVCIN + MVCL + MVCLE + MVCLU + MVN + MVPG + MVCRL + MVST + MVCP + MVCS + MVCDK + MVCK + MVO + MVCOS + MVCSK + MVZ + MG + MGRK + M + MFY + MR + MXBR + MXTR + MXTRA + MXR + MDB + MDBR + MDTR + MDTRA + MD + MDR + MXDB + MXDBR + MXD + MXDR + MEEB + MEEBR + MEE + MEER + MDEB + MDEBR + MDE + MDER + ME + MER + MAY + MAYR + MADB + MADBR + MAD + MADR + MAEB + MAEBR + MAE + MAER + MAYH + MAYHR + MAYL + MAYLR + MSDB + MSDBR + MSD + MSDR + MSEB + MSEBR + MSE + MSER + MP + MH + MHY + MGH + MHI + MGHI + MLG + MLGR + ML + MLR + MS + MSC + MSR + MSRKC + MSY + MSG + MSGC + MSGR + MSGRKC + MSGF + MSGFR + MSFI + MSGFI + MYH + MYHR + MYL + MYLR + MY + MYR + NNRK + NNGRK + NNPA + NIAI + NTSTG + NORK + NOGRK + NXRK + NXGRK + O + OR + ORK + OY + OG + OGR + OGRK + OC + OI + OIY + OIHH + OIHL + OIHF + OILH + OILL + OILF + OCRK + OCGRK + PACK + PKA + PKU + PGIN + PGOUT + PCC + PCKMO + PFPO + PFMF + PLO + PPA + PRNO + PTFF + PTF + POPCNT + PFD + PFDRL + PC + PR + PT + PTI + PALB + PTLB + QAXTR + QADTR + QPACI + RRXTR + RRDTR + RCHP + RDP + RRBE + RRBM + RP + RSCH + RLL + RLLG + RNSBG + RXSBG + RISBG + RISBGN + RISBHG + RISBLG + ROSBG + SRST + SRSTU + SELR + SELGR + SELFHR + SAR + SAL + SAC + SACF + SAM24 + SAM31 + SAM64 + SRNM + SRNMB + SCHM + SCK + SCKC + SCKPF + SPT + SRNMT + SFPC + SFASR + SPX + SPM + SPKA + SSAR + SSAIR + SSKE + SSM + SRP + SLDA + SLDL + SLA + SLAK + SLAG + SLL + SLLK + SLLG + SRDA + SRDL + SRA + SRAK + SRAG + SRL + SRLK + SRLG + SLXT + SLDT + SRXT + SRDT + SIGP + SORTL + SQXBR + SQXR + SQDB + SQDBR + SQD + SQDR + SQEB + SQEBR + SQE + SQER + SSCH + ST + STY + STG + STD + STDY + STE + STEY + STAM + STAMY + STBEAR + STCPS + STCRW + STC + STCY + STCH + STCMH + STCM + STCMY + STCK + STCKC + STCKE + STCKF + STCTL + STCTG + STAP + STIDP + STPT + STFL + STFLE + STFPC + STGSC + STH + STHY + STHH + STHRL + STFH + STOCFH + STM + STMY + STMG + STMH + STOC + STOCG + STPQ + STPX + STRAG + STRL + STGRL + STRVH + STRV + STRVG + STSCH + STSI + STNSM + STOSM + STURA + STURG + S + SR + SRK + SY + SG + SGR + SGRK + SGF + SGFR + SXBR + SXTR + SXTRA + SDB + SDBR + SDTR + SDTRA + SEB + SEBR + SP + SH + SHY + SGH + SHHHR + SHHLR + SL + SLR + SLRK + SLY + SLG + SLGR + SLGRK + SLGF + SLGFR + SLHHHR + SLHHLR + SLFI + SLGFI + SLB + SLBR + SLBG + SLBGR + SXR + SD + SDR + SE + SER + SW + SWR + SU + SUR + SVC + TAR + TAM + TS + TB + TCXB + TDCXT + TCDB + TDCDT + TCEB + TDCET + TDGXT + TDGDT + TDGET + TP + TPEI + TPI + TPROT + TSCH + TM + TMY + TMHH + TMHL + TMLH + TMLL + TMH + TML + TRACE + TRACG + TABORT + TBEGINC + TBEGIN + TEND + TR + TRT + TRTE + TRTR + TRTRE + TRE + TROO + TROT + TRTO + TRTT + TRAP2 + TRAP4 + UNPK + UNPKA + UNPKU + UPT + VA + VACC + VAP + VAC + VACCC + VN + VNC + VAVG + VAVGL + VBPERM + VCKSM + VCP + VCEQ + VCH + VCHL + VCSPH + VCVB + VCVBG + VCVD + VCVDG + VCLZDP + VCLZ + VCTZ + VDP + VEC + VECL + VERIM + VERLL + VERLLV + VESLV + VESL + VESRA + VESRAV + VESRL + VESRLV + VX + VFAE + VFEE + VFENE + VFA + WFK + VFCE + VFCH + VFCHE + WFC + VCLFNH + VCLFNL + VCRNF + VCFPS + VCDG + VCFPL + VCDLG + VCFN + VCSFP + VCGD + VCLFP + VCLGD + VCNF + VFD + VFLL + VFLR + VFMAX + VFMIN + VFM + VFMA + VFMS + VFNMA + VFNMS + VFPSO + VFSQ + VFS + VFTCI + VGFM + VGFMA + VGEF + VGEG + VGBM + VGM + VISTR + VL + VLR + VLREP + VLEBRH + VLEBRF + VLEBRG + VLBRREP + VLLEBRZ + VLBR + VLC + VLEH + VLEF + VLEG + VLEB + VLEIH + VLEIF + VLEIG + VLEIB + VLER + VFI + VLGV + VLIP + VLLEZ + VLM + VLP + VLRL + VLRLR + VLBB + VLVG + VLVGP + VLL + VMX + VMXL + VMRH + VMRL + VMN + VMNL + VMAE + VMAH + VMALE + VMALH + VMALO + VMAL + VMAO + VMSP + VMP + VME + VMH + VMLE + VMLH + VMLO + VML + VMO + VMSL + VNN + VNO + VNX + VO + VOC + VPK + VPKLS + VPKS + VPKZ + VPKZR + VPSOP + VPERM + VPDI + VPOPCT + VRP + VREP + VREPI + VSCEF + VSCEG + VSEL + VSDP + VSRP + VSRPR + VSL + VSLB + VSLD + VSLDB + VSRA + VSRAB + VSRD + VSRL + VSRLB + VSEG + VST + VSTEBRH + VSTEBRF + VSTEBRG + VSTBR + VSTEH + VSTEF + VSTEG + VSTEB + VSTER + VSTM + VSTRL + VSTRLR + VSTL + VSTRC + VSTRS + VS + VSCBI + VSP + VSBCBI + VSBI + VSUMG + VSUMQ + VSUM + VTP + VTM + VUPH + VUPLH + VUPLL + VUPL + VUPKZ + VUPKZH + VUPKZL + ZAP +) + +var opstr = [...]string{ + A: "a", + AR: "ar", + ARK: "ark", + AY: "ay", + AG: "ag", + AGR: "agr", + AGRK: "agrk", + AGF: "agf", + AGFR: "agfr", + AXBR: "axbr", + AXTR: "axtr", + AXTRA: "axtra", + ADB: "adb", + ADBR: "adbr", + ADTR: "adtr", + ADTRA: "adtra", + AEB: "aeb", + AEBR: "aebr", + AP: "ap", + AH: "ah", + AHY: "ahy", + AGH: "agh", + AHI: "ahi", + AGHI: "aghi", + AHHHR: "ahhhr", + AHHLR: "ahhlr", + AFI: "afi", + AHIK: "ahik", + ASI: "asi", + AGHIK: "aghik", + AGFI: "agfi", + AGSI: "agsi", + AIH: "aih", + AL: "al", + ALR: "alr", + ALRK: "alrk", + ALY: "aly", + ALG: "alg", + ALGR: "algr", + ALGRK: "algrk", + ALGF: "algf", + ALGFR: "algfr", + ALHHHR: "alhhhr", + ALHHLR: "alhhlr", + ALFI: "alfi", + ALGFI: "algfi", + ALC: "alc", + ALCR: "alcr", + ALCG: "alcg", + ALCGR: "alcgr", + ALHSIK: "alhsik", + ALSI: "alsi", + ALGHSIK: "alghsik", + ALGSI: "algsi", + ALSIH: "alsih", + ALSIHN: "alsihn", + AXR: "axr", + AD: "ad", + ADR: "adr", + AE: "ae", + AER: "aer", + AW: "aw", + AWR: "awr", + AU: "au", + AUR: "aur", + N: "n", + NR: "nr", + NRK: "nrk", + NY: "ny", + NG: "ng", + NGR: "ngr", + NGRK: "ngrk", + NC: "nc", + NI: "ni", + NIY: "niy", + NIHH: "nihh", + NIHL: "nihl", + NIHF: "nihf", + NILH: "nilh", + NILL: "nill", + NILF: "nilf", + NCRK: "ncrk", + NCGRK: "ncgrk", + BAL: "bal", + BALR: "balr", + BAS: "bas", + BASR: "basr", + BASSM: "bassm", + BSA: "bsa", + BSM: "bsm", + BAKR: "bakr", + BSG: "bsg", + BIC: "bic", + BC: "bc", + BCR: "bcr", + BCT: "bct", + BCTR: "bctr", + BCTG: "bctg", + BCTGR: "bctgr", + BXH: "bxh", + BXHG: "bxhg", + BXLE: "bxle", + BXLEG: "bxleg", + BPP: "bpp", + BPRP: "bprp", + BRAS: "bras", + BRASL: "brasl", + BRC: "brc", + BRCL: "brcl", + BRCT: "brct", + BRCTG: "brctg", + BRCTH: "brcth", + BRXH: "brxh", + BRXHG: "brxhg", + BRXLE: "brxle", + BRXLG: "brxlg", + XSCH: "xsch", + CKSM: "cksm", + KM: "km", + KMA: "kma", + KMC: "kmc", + KMF: "kmf", + KMCTR: "kmctr", + KMO: "kmo", + CSCH: "csch", + C: "c", + CR: "cr", + CY: "cy", + CG: "cg", + CGR: "cgr", + CGF: "cgf", + CGFR: "cgfr", + CXBR: "cxbr", + CXTR: "cxtr", + CXR: "cxr", + CDB: "cdb", + CDBR: "cdbr", + CDTR: "cdtr", + CD: "cd", + CDR: "cdr", + CEB: "ceb", + CEBR: "cebr", + CE: "ce", + CER: "cer", + CRB: "crb", + CGRB: "cgrb", + CRJ: "crj", + CGRJ: "cgrj", + CFC: "cfc", + CRDTE: "crdte", + KXBR: "kxbr", + KXTR: "kxtr", + KDB: "kdb", + KDBR: "kdbr", + KDTR: "kdtr", + KEB: "keb", + KEBR: "kebr", + CS: "cs", + CSY: "csy", + CSG: "csg", + CSP: "csp", + CSPG: "cspg", + CSST: "csst", + CRT: "crt", + CGRT: "cgrt", + CEXTR: "cextr", + CEDTR: "cedtr", + CP: "cp", + CDS: "cds", + CDSY: "cdsy", + CDSG: "cdsg", + CH: "ch", + CHY: "chy", + CGH: "cgh", + CHHSI: "chhsi", + CHI: "chi", + CHSI: "chsi", + CGHI: "cghi", + CGHSI: "cghsi", + CHRL: "chrl", + CGHRL: "cghrl", + CHF: "chf", + CHHR: "chhr", + CHLR: "chlr", + CFI: "cfi", + CGFI: "cgfi", + CIB: "cib", + CGIB: "cgib", + CIJ: "cij", + CGIJ: "cgij", + CIT: "cit", + CGIT: "cgit", + CIH: "cih", + CL: "cl", + CLR: "clr", + CLY: "cly", + CLG: "clg", + CLGR: "clgr", + CLGF: "clgf", + CLGFR: "clgfr", + CLC: "clc", + CLI: "cli", + CLIY: "cliy", + CLRB: "clrb", + CLGRB: "clgrb", + CLRJ: "clrj", + CLGRJ: "clgrj", + CLRT: "clrt", + CLT: "clt", + CLGRT: "clgrt", + CLGT: "clgt", + CLMH: "clmh", + CLM: "clm", + CLMY: "clmy", + CLHF: "clhf", + CLHHR: "clhhr", + CLHLR: "clhlr", + CLHHSI: "clhhsi", + CLFI: "clfi", + CLFHSI: "clfhsi", + CLGHSI: "clghsi", + CLGFI: "clgfi", + CLIB: "clib", + CLGIB: "clgib", + CLIJ: "clij", + CLGIJ: "clgij", + CLFIT: "clfit", + CLGIT: "clgit", + CLIH: "clih", + CLCL: "clcl", + CLCLE: "clcle", + CLCLU: "clclu", + CLRL: "clrl", + CLHRL: "clhrl", + CLGRL: "clgrl", + CLGHRL: "clghrl", + CLGFRL: "clgfrl", + CLST: "clst", + CRL: "crl", + CGRL: "cgrl", + CGFRL: "cgfrl", + CUSE: "cuse", + CMPSC: "cmpsc", + KDSA: "kdsa", + KIMD: "kimd", + KLMD: "klmd", + KMAC: "kmac", + THDR: "thdr", + THDER: "thder", + CXFBR: "cxfbr", + CXFBRA: "cxfbra", + CXFTR: "cxftr", + CXFR: "cxfr", + CDFBR: "cdfbr", + CDFBRA: "cdfbra", + CDFTR: "cdftr", + CDFR: "cdfr", + CEFBR: "cefbr", + CEFBRA: "cefbra", + CEFR: "cefr", + CXGBR: "cxgbr", + CXGBRA: "cxgbra", + CXGTR: "cxgtr", + CXGTRA: "cxgtra", + CXGR: "cxgr", + CDGBR: "cdgbr", + CDGBRA: "cdgbra", + CDGTR: "cdgtr", + CDGTRA: "cdgtra", + CDGR: "cdgr", + CEGBR: "cegbr", + CEGBRA: "cegbra", + CEGR: "cegr", + CXLFBR: "cxlfbr", + CXLFTR: "cxlftr", + CDLFBR: "cdlfbr", + CDLFTR: "cdlftr", + CELFBR: "celfbr", + CXLGBR: "cxlgbr", + CXLGTR: "cxlgtr", + CDLGBR: "cdlgbr", + CDLGTR: "cdlgtr", + CELGBR: "celgbr", + CXPT: "cxpt", + CDPT: "cdpt", + CXSTR: "cxstr", + CDSTR: "cdstr", + CXUTR: "cxutr", + CDUTR: "cdutr", + CXZT: "cxzt", + CDZT: "cdzt", + TBEDR: "tbedr", + TBDR: "tbdr", + CVB: "cvb", + CVBY: "cvby", + CVBG: "cvbg", + CVD: "cvd", + CVDY: "cvdy", + CVDG: "cvdg", + CFXBR: "cfxbr", + CFXBRA: "cfxbra", + CGXBR: "cgxbr", + CGXBRA: "cgxbra", + CFXTR: "cfxtr", + CGXTR: "cgxtr", + CGXTRA: "cgxtra", + CFXR: "cfxr", + CGXR: "cgxr", + CFDBR: "cfdbr", + CFDBRA: "cfdbra", + CGDBR: "cgdbr", + CGDBRA: "cgdbra", + CFDTR: "cfdtr", + CGDTR: "cgdtr", + CGDTRA: "cgdtra", + CFDR: "cfdr", + CGDR: "cgdr", + CFEBR: "cfebr", + CFEBRA: "cfebra", + CGEBR: "cgebr", + CGEBRA: "cgebra", + CFER: "cfer", + CGER: "cger", + CLFXBR: "clfxbr", + CLGXBR: "clgxbr", + CLFXTR: "clfxtr", + CLGXTR: "clgxtr", + CLFDBR: "clfdbr", + CLGDBR: "clgdbr", + CLFDTR: "clfdtr", + CLGDTR: "clgdtr", + CLFEBR: "clfebr", + CLGEBR: "clgebr", + CPXT: "cpxt", + CPDT: "cpdt", + CSXTR: "csxtr", + CSDTR: "csdtr", + CUXTR: "cuxtr", + CUDTR: "cudtr", + CZXT: "czxt", + CZDT: "czdt", + CU24: "cu24", + CU21: "cu21", + CU12: "cu12", + CU14: "cu14", + CU42: "cu42", + CU41: "cu41", + CPYA: "cpya", + CPSDR: "cpsdr", + VSCSHP: "vscshp", + VSCHP: "vschp", + DFLTCC: "dfltcc", + D: "d", + DR: "dr", + DXBR: "dxbr", + DXTR: "dxtr", + DXTRA: "dxtra", + DXR: "dxr", + DDB: "ddb", + DDBR: "ddbr", + DDTR: "ddtr", + DDTRA: "ddtra", + DD: "dd", + DDR: "ddr", + DEB: "deb", + DEBR: "debr", + DE: "de", + DER: "der", + DP: "dp", + DL: "dl", + DLR: "dlr", + DLG: "dlg", + DLGR: "dlgr", + DSG: "dsg", + DSGR: "dsgr", + DSGF: "dsgf", + DSGFR: "dsgfr", + DIDBR: "didbr", + DIEBR: "diebr", + ED: "ed", + EDMK: "edmk", + X: "x", + XR: "xr", + XRK: "xrk", + XY: "xy", + XG: "xg", + XGR: "xgr", + XGRK: "xgrk", + XC: "xc", + XI: "xi", + XIY: "xiy", + XIHF: "xihf", + XILF: "xilf", + EX: "ex", + EXRL: "exrl", + EAR: "ear", + ESEA: "esea", + EEXTR: "eextr", + EEDTR: "eedtr", + ECAG: "ecag", + ECTG: "ectg", + EFPC: "efpc", + EPAR: "epar", + EPAIR: "epair", + EPSW: "epsw", + ESAR: "esar", + ESAIR: "esair", + ESXTR: "esxtr", + ESDTR: "esdtr", + EREG: "ereg", + EREGG: "eregg", + ESTA: "esta", + ETND: "etnd", + FLOGR: "flogr", + HSCH: "hsch", + HDR: "hdr", + HER: "her", + IAC: "iac", + IEXTR: "iextr", + IEDTR: "iedtr", + IC: "ic", + ICY: "icy", + ICMH: "icmh", + ICM: "icm", + ICMY: "icmy", + IIHH: "iihh", + IIHL: "iihl", + IIHF: "iihf", + IILH: "iilh", + IILL: "iill", + IILF: "iilf", + IPM: "ipm", + IPK: "ipk", + IRBM: "irbm", + ISKE: "iske", + IVSK: "ivsk", + IDTE: "idte", + IPTE: "ipte", + L: "l", + LR: "lr", + LY: "ly", + LG: "lg", + LGR: "lgr", + LGF: "lgf", + LGFR: "lgfr", + LXR: "lxr", + LD: "ld", + LDR: "ldr", + LDY: "ldy", + LE: "le", + LER: "ler", + LEY: "ley", + LAM: "lam", + LAMY: "lamy", + LA: "la", + LAY: "lay", + LAE: "lae", + LAEY: "laey", + LARL: "larl", + LASP: "lasp", + LAA: "laa", + LAAG: "laag", + LAAL: "laal", + LAALG: "laalg", + LAN: "lan", + LANG: "lang", + LAX: "lax", + LAXG: "laxg", + LAO: "lao", + LAOG: "laog", + LT: "lt", + LTR: "ltr", + LTG: "ltg", + LTGR: "ltgr", + LTGF: "ltgf", + LTGFR: "ltgfr", + LTXBR: "ltxbr", + LTXTR: "ltxtr", + LTXR: "ltxr", + LTDBR: "ltdbr", + LTDTR: "ltdtr", + LTDR: "ltdr", + LTEBR: "ltebr", + LTER: "lter", + LAT: "lat", + LGAT: "lgat", + LZRF: "lzrf", + LZRG: "lzrg", + LBEAR: "lbear", + LB: "lb", + LBR: "lbr", + LGB: "lgb", + LGBR: "lgbr", + LBH: "lbh", + LCR: "lcr", + LCGR: "lcgr", + LCGFR: "lcgfr", + LCXBR: "lcxbr", + LCXR: "lcxr", + LCDBR: "lcdbr", + LCDR: "lcdr", + LCDFR: "lcdfr", + LCEBR: "lcebr", + LCER: "lcer", + LCTL: "lctl", + LCTLG: "lctlg", + LCBB: "lcbb", + FIXBR: "fixbr", + FIXBRA: "fixbra", + FIXTR: "fixtr", + FIXR: "fixr", + FIDBR: "fidbr", + FIDBRA: "fidbra", + FIDTR: "fidtr", + FIDR: "fidr", + FIEBR: "fiebr", + FIEBRA: "fiebra", + FIER: "fier", + LFPC: "lfpc", + LFAS: "lfas", + LDGR: "ldgr", + LGDR: "lgdr", + LGG: "lgg", + LGSC: "lgsc", + LH: "lh", + LHR: "lhr", + LHY: "lhy", + LGH: "lgh", + LGHR: "lghr", + LHH: "lhh", + LOCHHI: "lochhi", + LHI: "lhi", + LGHI: "lghi", + LOCHI: "lochi", + LOCGHI: "locghi", + LHRL: "lhrl", + LGHRL: "lghrl", + LFH: "lfh", + LFHAT: "lfhat", + LOCFH: "locfh", + LOCFHR: "locfhr", + LGFI: "lgfi", + LXDB: "lxdb", + LXDBR: "lxdbr", + LXDTR: "lxdtr", + LXD: "lxd", + LXDR: "lxdr", + LXEB: "lxeb", + LXEBR: "lxebr", + LXE: "lxe", + LXER: "lxer", + LDEB: "ldeb", + LDEBR: "ldebr", + LDETR: "ldetr", + LDE: "lde", + LDER: "lder", + LLGF: "llgf", + LLGFR: "llgfr", + LLGFSG: "llgfsg", + LLGFAT: "llgfat", + LLZRGF: "llzrgf", + LLC: "llc", + LLCR: "llcr", + LLGC: "llgc", + LLGCR: "llgcr", + LLCH: "llch", + LLH: "llh", + LLHR: "llhr", + LLGH: "llgh", + LLGHR: "llghr", + LLHH: "llhh", + LLHRL: "llhrl", + LLGHRL: "llghrl", + LLIHH: "llihh", + LLIHL: "llihl", + LLIHF: "llihf", + LLILH: "llilh", + LLILL: "llill", + LLILF: "llilf", + LLGFRL: "llgfrl", + LLGT: "llgt", + LLGTR: "llgtr", + LLGTAT: "llgtat", + LM: "lm", + LMY: "lmy", + LMG: "lmg", + LMD: "lmd", + LMH: "lmh", + LNR: "lnr", + LNGR: "lngr", + LNGFR: "lngfr", + LNXBR: "lnxbr", + LNXR: "lnxr", + LNDBR: "lndbr", + LNDR: "lndr", + LNDFR: "lndfr", + LNEBR: "lnebr", + LNER: "lner", + LOC: "loc", + LOCR: "locr", + LOCG: "locg", + LOCGR: "locgr", + LPTEA: "lptea", + LPD: "lpd", + LPDG: "lpdg", + LPQ: "lpq", + LPR: "lpr", + LPGR: "lpgr", + LPGFR: "lpgfr", + LPXBR: "lpxbr", + LPXR: "lpxr", + LPDBR: "lpdbr", + LPDR: "lpdr", + LPDFR: "lpdfr", + LPEBR: "lpebr", + LPER: "lper", + LPSW: "lpsw", + LPSWE: "lpswe", + LPSWEY: "lpswey", + LRA: "lra", + LRAY: "lray", + LRAG: "lrag", + LRL: "lrl", + LGRL: "lgrl", + LGFRL: "lgfrl", + LRVH: "lrvh", + LRV: "lrv", + LRVR: "lrvr", + LRVG: "lrvg", + LRVGR: "lrvgr", + LDXBR: "ldxbr", + LDXBRA: "ldxbra", + LDXTR: "ldxtr", + LDXR: "ldxr", + LRDR: "lrdr", + LEXBR: "lexbr", + LEXBRA: "lexbra", + LEXR: "lexr", + LEDBR: "ledbr", + LEDBRA: "ledbra", + LEDTR: "ledtr", + LEDR: "ledr", + LRER: "lrer", + LURA: "lura", + LURAG: "lurag", + LZXR: "lzxr", + LZDR: "lzdr", + LZER: "lzer", + MSTA: "msta", + MSCH: "msch", + MC: "mc", + MVHHI: "mvhhi", + MVHI: "mvhi", + MVGHI: "mvghi", + MVC: "mvc", + MVI: "mvi", + MVIY: "mviy", + MVCIN: "mvcin", + MVCL: "mvcl", + MVCLE: "mvcle", + MVCLU: "mvclu", + MVN: "mvn", + MVPG: "mvpg", + MVCRL: "mvcrl", + MVST: "mvst", + MVCP: "mvcp", + MVCS: "mvcs", + MVCDK: "mvcdk", + MVCK: "mvck", + MVO: "mvo", + MVCOS: "mvcos", + MVCSK: "mvcsk", + MVZ: "mvz", + MG: "mg", + MGRK: "mgrk", + M: "m", + MFY: "mfy", + MR: "mr", + MXBR: "mxbr", + MXTR: "mxtr", + MXTRA: "mxtra", + MXR: "mxr", + MDB: "mdb", + MDBR: "mdbr", + MDTR: "mdtr", + MDTRA: "mdtra", + MD: "md", + MDR: "mdr", + MXDB: "mxdb", + MXDBR: "mxdbr", + MXD: "mxd", + MXDR: "mxdr", + MEEB: "meeb", + MEEBR: "meebr", + MEE: "mee", + MEER: "meer", + MDEB: "mdeb", + MDEBR: "mdebr", + MDE: "mde", + MDER: "mder", + ME: "me", + MER: "mer", + MAY: "may", + MAYR: "mayr", + MADB: "madb", + MADBR: "madbr", + MAD: "mad", + MADR: "madr", + MAEB: "maeb", + MAEBR: "maebr", + MAE: "mae", + MAER: "maer", + MAYH: "mayh", + MAYHR: "mayhr", + MAYL: "mayl", + MAYLR: "maylr", + MSDB: "msdb", + MSDBR: "msdbr", + MSD: "msd", + MSDR: "msdr", + MSEB: "mseb", + MSEBR: "msebr", + MSE: "mse", + MSER: "mser", + MP: "mp", + MH: "mh", + MHY: "mhy", + MGH: "mgh", + MHI: "mhi", + MGHI: "mghi", + MLG: "mlg", + MLGR: "mlgr", + ML: "ml", + MLR: "mlr", + MS: "ms", + MSC: "msc", + MSR: "msr", + MSRKC: "msrkc", + MSY: "msy", + MSG: "msg", + MSGC: "msgc", + MSGR: "msgr", + MSGRKC: "msgrkc", + MSGF: "msgf", + MSGFR: "msgfr", + MSFI: "msfi", + MSGFI: "msgfi", + MYH: "myh", + MYHR: "myhr", + MYL: "myl", + MYLR: "mylr", + MY: "my", + MYR: "myr", + NNRK: "nnrk", + NNGRK: "nngrk", + NNPA: "nnpa", + NIAI: "niai", + NTSTG: "ntstg", + NORK: "nork", + NOGRK: "nogrk", + NXRK: "nxrk", + NXGRK: "nxgrk", + O: "o", + OR: "or", + ORK: "ork", + OY: "oy", + OG: "og", + OGR: "ogr", + OGRK: "ogrk", + OC: "oc", + OI: "oi", + OIY: "oiy", + OIHH: "oihh", + OIHL: "oihl", + OIHF: "oihf", + OILH: "oilh", + OILL: "oill", + OILF: "oilf", + OCRK: "ocrk", + OCGRK: "ocgrk", + PACK: "pack", + PKA: "pka", + PKU: "pku", + PGIN: "pgin", + PGOUT: "pgout", + PCC: "pcc", + PCKMO: "pckmo", + PFPO: "pfpo", + PFMF: "pfmf", + PLO: "plo", + PPA: "ppa", + PRNO: "prno", + PTFF: "ptff", + PTF: "ptf", + POPCNT: "popcnt", + PFD: "pfd", + PFDRL: "pfdrl", + PC: "pc", + PR: "pr", + PT: "pt", + PTI: "pti", + PALB: "palb", + PTLB: "ptlb", + QAXTR: "qaxtr", + QADTR: "qadtr", + QPACI: "qpaci", + RRXTR: "rrxtr", + RRDTR: "rrdtr", + RCHP: "rchp", + RDP: "rdp", + RRBE: "rrbe", + RRBM: "rrbm", + RP: "rp", + RSCH: "rsch", + RLL: "rll", + RLLG: "rllg", + RNSBG: "rnsbg", + RXSBG: "rxsbg", + RISBG: "risbg", + RISBGN: "risbgn", + RISBHG: "risbhg", + RISBLG: "risblg", + ROSBG: "rosbg", + SRST: "srst", + SRSTU: "srstu", + SELR: "selr", + SELGR: "selgr", + SELFHR: "selfhr", + SAR: "sar", + SAL: "sal", + SAC: "sac", + SACF: "sacf", + SAM24: "sam24", + SAM31: "sam31", + SAM64: "sam64", + SRNM: "srnm", + SRNMB: "srnmb", + SCHM: "schm", + SCK: "sck", + SCKC: "sckc", + SCKPF: "sckpf", + SPT: "spt", + SRNMT: "srnmt", + SFPC: "sfpc", + SFASR: "sfasr", + SPX: "spx", + SPM: "spm", + SPKA: "spka", + SSAR: "ssar", + SSAIR: "ssair", + SSKE: "sske", + SSM: "ssm", + SRP: "srp", + SLDA: "slda", + SLDL: "sldl", + SLA: "sla", + SLAK: "slak", + SLAG: "slag", + SLL: "sll", + SLLK: "sllk", + SLLG: "sllg", + SRDA: "srda", + SRDL: "srdl", + SRA: "sra", + SRAK: "srak", + SRAG: "srag", + SRL: "srl", + SRLK: "srlk", + SRLG: "srlg", + SLXT: "slxt", + SLDT: "sldt", + SRXT: "srxt", + SRDT: "srdt", + SIGP: "sigp", + SORTL: "sortl", + SQXBR: "sqxbr", + SQXR: "sqxr", + SQDB: "sqdb", + SQDBR: "sqdbr", + SQD: "sqd", + SQDR: "sqdr", + SQEB: "sqeb", + SQEBR: "sqebr", + SQE: "sqe", + SQER: "sqer", + SSCH: "ssch", + ST: "st", + STY: "sty", + STG: "stg", + STD: "std", + STDY: "stdy", + STE: "ste", + STEY: "stey", + STAM: "stam", + STAMY: "stamy", + STBEAR: "stbear", + STCPS: "stcps", + STCRW: "stcrw", + STC: "stc", + STCY: "stcy", + STCH: "stch", + STCMH: "stcmh", + STCM: "stcm", + STCMY: "stcmy", + STCK: "stck", + STCKC: "stckc", + STCKE: "stcke", + STCKF: "stckf", + STCTL: "stctl", + STCTG: "stctg", + STAP: "stap", + STIDP: "stidp", + STPT: "stpt", + STFL: "stfl", + STFLE: "stfle", + STFPC: "stfpc", + STGSC: "stgsc", + STH: "sth", + STHY: "sthy", + STHH: "sthh", + STHRL: "sthrl", + STFH: "stfh", + STOCFH: "stocfh", + STM: "stm", + STMY: "stmy", + STMG: "stmg", + STMH: "stmh", + STOC: "stoc", + STOCG: "stocg", + STPQ: "stpq", + STPX: "stpx", + STRAG: "strag", + STRL: "strl", + STGRL: "stgrl", + STRVH: "strvh", + STRV: "strv", + STRVG: "strvg", + STSCH: "stsch", + STSI: "stsi", + STNSM: "stnsm", + STOSM: "stosm", + STURA: "stura", + STURG: "sturg", + S: "s", + SR: "sr", + SRK: "srk", + SY: "sy", + SG: "sg", + SGR: "sgr", + SGRK: "sgrk", + SGF: "sgf", + SGFR: "sgfr", + SXBR: "sxbr", + SXTR: "sxtr", + SXTRA: "sxtra", + SDB: "sdb", + SDBR: "sdbr", + SDTR: "sdtr", + SDTRA: "sdtra", + SEB: "seb", + SEBR: "sebr", + SP: "sp", + SH: "sh", + SHY: "shy", + SGH: "sgh", + SHHHR: "shhhr", + SHHLR: "shhlr", + SL: "sl", + SLR: "slr", + SLRK: "slrk", + SLY: "sly", + SLG: "slg", + SLGR: "slgr", + SLGRK: "slgrk", + SLGF: "slgf", + SLGFR: "slgfr", + SLHHHR: "slhhhr", + SLHHLR: "slhhlr", + SLFI: "slfi", + SLGFI: "slgfi", + SLB: "slb", + SLBR: "slbr", + SLBG: "slbg", + SLBGR: "slbgr", + SXR: "sxr", + SD: "sd", + SDR: "sdr", + SE: "se", + SER: "ser", + SW: "sw", + SWR: "swr", + SU: "su", + SUR: "sur", + SVC: "svc", + TAR: "tar", + TAM: "tam", + TS: "ts", + TB: "tb", + TCXB: "tcxb", + TDCXT: "tdcxt", + TCDB: "tcdb", + TDCDT: "tdcdt", + TCEB: "tceb", + TDCET: "tdcet", + TDGXT: "tdgxt", + TDGDT: "tdgdt", + TDGET: "tdget", + TP: "tp", + TPEI: "tpei", + TPI: "tpi", + TPROT: "tprot", + TSCH: "tsch", + TM: "tm", + TMY: "tmy", + TMHH: "tmhh", + TMHL: "tmhl", + TMLH: "tmlh", + TMLL: "tmll", + TMH: "tmh", + TML: "tml", + TRACE: "trace", + TRACG: "tracg", + TABORT: "tabort", + TBEGINC: "tbeginc", + TBEGIN: "tbegin", + TEND: "tend", + TR: "tr", + TRT: "trt", + TRTE: "trte", + TRTR: "trtr", + TRTRE: "trtre", + TRE: "tre", + TROO: "troo", + TROT: "trot", + TRTO: "trto", + TRTT: "trtt", + TRAP2: "trap2", + TRAP4: "trap4", + UNPK: "unpk", + UNPKA: "unpka", + UNPKU: "unpku", + UPT: "upt", + VA: "va", + VACC: "vacc", + VAP: "vap", + VAC: "vac", + VACCC: "vaccc", + VN: "vn", + VNC: "vnc", + VAVG: "vavg", + VAVGL: "vavgl", + VBPERM: "vbperm", + VCKSM: "vcksm", + VCP: "vcp", + VCEQ: "vceq", + VCH: "vch", + VCHL: "vchl", + VCSPH: "vcsph", + VCVB: "vcvb", + VCVBG: "vcvbg", + VCVD: "vcvd", + VCVDG: "vcvdg", + VCLZDP: "vclzdp", + VCLZ: "vclz", + VCTZ: "vctz", + VDP: "vdp", + VEC: "vec", + VECL: "vecl", + VERIM: "verim", + VERLL: "verll", + VERLLV: "verllv", + VESLV: "veslv", + VESL: "vesl", + VESRA: "vesra", + VESRAV: "vesrav", + VESRL: "vesrl", + VESRLV: "vesrlv", + VX: "vx", + VFAE: "vfae", + VFEE: "vfee", + VFENE: "vfene", + VFA: "vfa", + WFK: "wfk", + VFCE: "vfce", + VFCH: "vfch", + VFCHE: "vfche", + WFC: "wfc", + VCLFNH: "vclfnh", + VCLFNL: "vclfnl", + VCRNF: "vcrnf", + VCFPS: "vcfps", + VCDG: "vcdg", + VCFPL: "vcfpl", + VCDLG: "vcdlg", + VCFN: "vcfn", + VCSFP: "vcsfp", + VCGD: "vcgd", + VCLFP: "vclfp", + VCLGD: "vclgd", + VCNF: "vcnf", + VFD: "vfd", + VFLL: "vfll", + VFLR: "vflr", + VFMAX: "vfmax", + VFMIN: "vfmin", + VFM: "vfm", + VFMA: "vfma", + VFMS: "vfms", + VFNMA: "vfnma", + VFNMS: "vfnms", + VFPSO: "vfpso", + VFSQ: "vfsq", + VFS: "vfs", + VFTCI: "vftci", + VGFM: "vgfm", + VGFMA: "vgfma", + VGEF: "vgef", + VGEG: "vgeg", + VGBM: "vgbm", + VGM: "vgm", + VISTR: "vistr", + VL: "vl", + VLR: "vlr", + VLREP: "vlrep", + VLEBRH: "vlebrh", + VLEBRF: "vlebrf", + VLEBRG: "vlebrg", + VLBRREP: "vlbrrep", + VLLEBRZ: "vllebrz", + VLBR: "vlbr", + VLC: "vlc", + VLEH: "vleh", + VLEF: "vlef", + VLEG: "vleg", + VLEB: "vleb", + VLEIH: "vleih", + VLEIF: "vleif", + VLEIG: "vleig", + VLEIB: "vleib", + VLER: "vler", + VFI: "vfi", + VLGV: "vlgv", + VLIP: "vlip", + VLLEZ: "vllez", + VLM: "vlm", + VLP: "vlp", + VLRL: "vlrl", + VLRLR: "vlrlr", + VLBB: "vlbb", + VLVG: "vlvg", + VLVGP: "vlvgp", + VLL: "vll", + VMX: "vmx", + VMXL: "vmxl", + VMRH: "vmrh", + VMRL: "vmrl", + VMN: "vmn", + VMNL: "vmnl", + VMAE: "vmae", + VMAH: "vmah", + VMALE: "vmale", + VMALH: "vmalh", + VMALO: "vmalo", + VMAL: "vmal", + VMAO: "vmao", + VMSP: "vmsp", + VMP: "vmp", + VME: "vme", + VMH: "vmh", + VMLE: "vmle", + VMLH: "vmlh", + VMLO: "vmlo", + VML: "vml", + VMO: "vmo", + VMSL: "vmsl", + VNN: "vnn", + VNO: "vno", + VNX: "vnx", + VO: "vo", + VOC: "voc", + VPK: "vpk", + VPKLS: "vpkls", + VPKS: "vpks", + VPKZ: "vpkz", + VPKZR: "vpkzr", + VPSOP: "vpsop", + VPERM: "vperm", + VPDI: "vpdi", + VPOPCT: "vpopct", + VRP: "vrp", + VREP: "vrep", + VREPI: "vrepi", + VSCEF: "vscef", + VSCEG: "vsceg", + VSEL: "vsel", + VSDP: "vsdp", + VSRP: "vsrp", + VSRPR: "vsrpr", + VSL: "vsl", + VSLB: "vslb", + VSLD: "vsld", + VSLDB: "vsldb", + VSRA: "vsra", + VSRAB: "vsrab", + VSRD: "vsrd", + VSRL: "vsrl", + VSRLB: "vsrlb", + VSEG: "vseg", + VST: "vst", + VSTEBRH: "vstebrh", + VSTEBRF: "vstebrf", + VSTEBRG: "vstebrg", + VSTBR: "vstbr", + VSTEH: "vsteh", + VSTEF: "vstef", + VSTEG: "vsteg", + VSTEB: "vsteb", + VSTER: "vster", + VSTM: "vstm", + VSTRL: "vstrl", + VSTRLR: "vstrlr", + VSTL: "vstl", + VSTRC: "vstrc", + VSTRS: "vstrs", + VS: "vs", + VSCBI: "vscbi", + VSP: "vsp", + VSBCBI: "vsbcbi", + VSBI: "vsbi", + VSUMG: "vsumg", + VSUMQ: "vsumq", + VSUM: "vsum", + VTP: "vtp", + VTM: "vtm", + VUPH: "vuph", + VUPLH: "vuplh", + VUPLL: "vupll", + VUPL: "vupl", + VUPKZ: "vupkz", + VUPKZH: "vupkzh", + VUPKZL: "vupkzl", + ZAP: "zap", +} + +var ( + ap_Reg_8_11 = &argField{Type: TypeReg, flags: 0x1, BitField: BitField{8, 4}} + ap_DispUnsigned_20_31 = &argField{Type: TypeDispUnsigned, flags: 0x10, BitField: BitField{20, 12}} + ap_IndexReg_12_15 = &argField{Type: TypeIndexReg, flags: 0x41, BitField: BitField{12, 4}} + ap_BaseReg_16_19 = &argField{Type: TypeBaseReg, flags: 0x21, BitField: BitField{16, 4}} + ap_Reg_12_15 = &argField{Type: TypeReg, flags: 0x1, BitField: BitField{12, 4}} + ap_Reg_24_27 = &argField{Type: TypeReg, flags: 0x1, BitField: BitField{24, 4}} + ap_Reg_28_31 = &argField{Type: TypeReg, flags: 0x1, BitField: BitField{28, 4}} + ap_Reg_16_19 = &argField{Type: TypeReg, flags: 0x1, BitField: BitField{16, 4}} + ap_DispSigned20_20_39 = &argField{Type: TypeDispSigned20, flags: 0x10, BitField: BitField{20, 20}} + ap_FPReg_24_27 = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{24, 4}} + ap_FPReg_28_31 = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{28, 4}} + ap_FPReg_16_19 = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{16, 4}} + ap_Mask_20_23 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{20, 4}} + ap_FPReg_8_11 = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{8, 4}} + ap_Len_8_11 = &argField{Type: TypeLen, flags: 0x10, BitField: BitField{8, 4}} + ap_DispUnsigned_36_47 = &argField{Type: TypeDispUnsigned, flags: 0x10, BitField: BitField{36, 12}} + ap_Len_12_15 = &argField{Type: TypeLen, flags: 0x10, BitField: BitField{12, 4}} + ap_BaseReg_32_35 = &argField{Type: TypeBaseReg, flags: 0x21, BitField: BitField{32, 4}} + ap_ImmSigned16_16_31 = &argField{Type: TypeImmSigned16, flags: 0x0, BitField: BitField{16, 16}} + ap_ImmSigned32_16_47 = &argField{Type: TypeImmSigned32, flags: 0x0, BitField: BitField{16, 32}} + ap_ImmSigned8_8_15 = &argField{Type: TypeImmSigned8, flags: 0x0, BitField: BitField{8, 8}} + ap_ImmUnsigned_16_47 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 32}} + ap_FPReg_12_15 = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{12, 4}} + ap_Len_8_15 = &argField{Type: TypeLen, flags: 0x10, BitField: BitField{8, 8}} + ap_ImmUnsigned_8_15 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{8, 8}} + ap_ImmUnsigned_16_31 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 16}} + ap_Mask_8_11 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{8, 4}} + ap_RegImSigned16_32_47 = &argField{Type: TypeRegImSigned16, flags: 0x80, BitField: BitField{32, 16}} + ap_RegImSigned12_12_23 = &argField{Type: TypeRegImSigned12, flags: 0x80, BitField: BitField{12, 12}} + ap_RegImSigned24_24_47 = &argField{Type: TypeRegImSigned24, flags: 0x80, BitField: BitField{24, 24}} + ap_RegImSigned16_16_31 = &argField{Type: TypeRegImSigned16, flags: 0x80, BitField: BitField{16, 16}} + ap_RegImSigned32_16_47 = &argField{Type: TypeRegImSigned32, flags: 0x80, BitField: BitField{16, 32}} + ap_Mask_32_35 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{32, 4}} + ap_Mask_16_19 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{16, 4}} + ap_ImmSigned16_32_47 = &argField{Type: TypeImmSigned16, flags: 0x0, BitField: BitField{32, 16}} + ap_ImmSigned8_32_39 = &argField{Type: TypeImmSigned8, flags: 0x0, BitField: BitField{32, 8}} + ap_Mask_12_15 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{12, 4}} + ap_ImmUnsigned_32_47 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{32, 16}} + ap_ImmUnsigned_32_39 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{32, 8}} + ap_FPReg_32_35 = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{32, 4}} + ap_Mask_36_39 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{36, 4}} + ap_ACReg_24_27 = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{24, 4}} + ap_ACReg_28_31 = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{28, 4}} + ap_VecReg_8_11 = &argField{Type: TypeVecReg, flags: 0x8, BitField: BitField{8, 4}} + ap_VecReg_12_15 = &argField{Type: TypeVecReg, flags: 0x8, BitField: BitField{12, 4}} + ap_VecReg_16_19 = &argField{Type: TypeVecReg, flags: 0x8, BitField: BitField{16, 4}} + ap_ImmUnsigned_36_39 = &argField{Type: TypeImmUnsigned, flags: 0xc00, BitField: BitField{36, 4}} + ap_Mask_24_27 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{24, 4}} + ap_ACReg_8_11 = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{8, 4}} + ap_ACReg_12_15 = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{12, 4}} + ap_CReg_8_11 = &argField{Type: TypeCReg, flags: 0x4, BitField: BitField{8, 4}} + ap_CReg_12_15 = &argField{Type: TypeCReg, flags: 0x4, BitField: BitField{12, 4}} + ap_ImmUnsigned_24_27 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{24, 4}} + ap_ImmUnsigned_28_31 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{28, 4}} + ap_ImmUnsigned_16_23 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 8}} + ap_ImmUnsigned_24_31 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{24, 8}} + ap_ImmUnsigned_12_15 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{12, 4}} + ap_ImmUnsigned_28_35 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{28, 8}} + ap_VecReg_32_35 = &argField{Type: TypeVecReg, flags: 0x8, BitField: BitField{32, 4}} + ap_Mask_28_31 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{28, 4}} + ap_ImmUnsigned_16_27 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 12}} + ap_ImmUnsigned_32_35 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{32, 4}} +) + +var instFormats = [...]instFormat{ + {A, 0xff00000000000000, 0x5a00000000000000, 0x0, // ADD (32) (A R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AR, 0xff00000000000000, 0x1a00000000000000, 0x0, // ADD (32) (AR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {ARK, 0xffff000000000000, 0xb9f8000000000000, 0xf0000000000, // ADD (32) (ARK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {AY, 0xff00000000ff0000, 0xe3000000005a0000, 0x0, // ADD (32) (AY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AG, 0xff00000000ff0000, 0xe300000000080000, 0x0, // ADD (64) (AG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AGR, 0xffff000000000000, 0xb908000000000000, 0xff0000000000, // ADD (64) (AGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {AGRK, 0xffff000000000000, 0xb9e8000000000000, 0xf0000000000, // ADD (64) (AGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {AGF, 0xff00000000ff0000, 0xe300000000180000, 0x0, // ADD (64←32) (AGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AGFR, 0xffff000000000000, 0xb918000000000000, 0xff0000000000, // ADD (64←32) (AGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {AXBR, 0xffff000000000000, 0xb34a000000000000, 0xff0000000000, // ADD (extended BFP) (AXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {AXTR, 0xffff000000000000, 0xb3da000000000000, 0xf0000000000, // ADD (extended DFP) (AXTR R1,R2,R3) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}}, + {AXTRA, 0xffff000000000000, 0xb3da000000000000, 0x0, // ADD (extended DFP) (AXTRA R1,R2,R3,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}}, + {ADB, 0xff00000000ff0000, 0xed000000001a0000, 0xff000000, // ADD (long BFP) (ADB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {ADBR, 0xffff000000000000, 0xb31a000000000000, 0xff0000000000, // ADD (long BFP) (ADBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {ADTR, 0xffff000000000000, 0xb3d2000000000000, 0xf0000000000, // ADD (long DFP) (ADTR R1,R2,R3) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}}, + {ADTRA, 0xffff000000000000, 0xb3d2000000000000, 0x0, // ADD (long DFP) (ADTRA R1,R2,R3,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}}, + {AEB, 0xff00000000ff0000, 0xed000000000a0000, 0xff000000, // ADD (short BFP) (AEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AEBR, 0xffff000000000000, 0xb30a000000000000, 0xff0000000000, // ADD (short BFP) (AEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {AP, 0xff00000000000000, 0xfa00000000000000, 0x0, // ADD DECIMAL (AP D1(L1,B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}}, + {AH, 0xff00000000000000, 0x4a00000000000000, 0x0, // ADD HALFWORD (32←16) (AH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AHY, 0xff00000000ff0000, 0xe3000000007a0000, 0x0, // ADD HALFWORD (32←16) (AHY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AGH, 0xff00000000ff0000, 0xe300000000380000, 0x0, // ADD HALFWORD (64→16) (AGH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AHI, 0xff0f000000000000, 0xa70a000000000000, 0x0, // ADD HALFWORD IMMEDIATE (32←16) (AHI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, + {AGHI, 0xff0f000000000000, 0xa70b000000000000, 0x0, // ADD HALFWORD IMMEDIATE (64←16) (AGHI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, + {AHHHR, 0xffff000000000000, 0xb9c8000000000000, 0xf0000000000, // ADD HIGH (32) (AHHHR R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {AHHLR, 0xffff000000000000, 0xb9d8000000000000, 0xf0000000000, // ADD HIGH (32) (AHHLR R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {AFI, 0xff0f000000000000, 0xc209000000000000, 0x0, // ADD IMMEDIATE (32) (AFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, + {AHIK, 0xff00000000ff0000, 0xec00000000d80000, 0xff000000, // ADD IMMEDIATE (32←16) (AHIK R1,R3,I2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmSigned16_16_31}}, + {ASI, 0xff00000000ff0000, 0xeb000000006a0000, 0x0, // ADD IMMEDIATE (32←8) (ASI D1(B1),I2) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}}, + {AGHIK, 0xff00000000ff0000, 0xec00000000d90000, 0xff000000, // ADD IMMEDIATE (64←16) (AGHIK R1,R3,I2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmSigned16_16_31}}, + {AGFI, 0xff0f000000000000, 0xc208000000000000, 0x0, // ADD IMMEDIATE (64←32) (AGFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, + {AGSI, 0xff00000000ff0000, 0xeb000000007a0000, 0x0, // ADD IMMEDIATE (64←8) (AGSI D1(B1),I2) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}}, + {AIH, 0xff0f000000000000, 0xcc08000000000000, 0x0, // ADD IMMEDIATE HIGH (32) (AIH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, + {AL, 0xff00000000000000, 0x5e00000000000000, 0x0, // ADD LOGICAL (32) (AL R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {ALR, 0xff00000000000000, 0x1e00000000000000, 0x0, // ADD LOGICAL (32) (ALR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {ALRK, 0xffff000000000000, 0xb9fa000000000000, 0xf0000000000, // ADD LOGICAL (32) (ALRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {ALY, 0xff00000000ff0000, 0xe3000000005e0000, 0x0, // ADD LOGICAL (32) (ALY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {ALG, 0xff00000000ff0000, 0xe3000000000a0000, 0x0, // ADD LOGICAL (64) (ALG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {ALGR, 0xffff000000000000, 0xb90a000000000000, 0xff0000000000, // ADD LOGICAL (64) (ALGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {ALGRK, 0xffff000000000000, 0xb9ea000000000000, 0xf0000000000, // ADD LOGICAL (64) (ALGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {ALGF, 0xff00000000ff0000, 0xe3000000001a0000, 0x0, // ADD LOGICAL (64←32) (ALGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {ALGFR, 0xffff000000000000, 0xb91a000000000000, 0xff0000000000, // ADD LOGICAL (64←32) (ALGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {ALHHHR, 0xffff000000000000, 0xb9ca000000000000, 0xf0000000000, // ADD LOGICAL HIGH (32) (ALHHHR R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {ALHHLR, 0xffff000000000000, 0xb9da000000000000, 0xf0000000000, // ADD LOGICAL HIGH (32) (ALHHLR R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {ALFI, 0xff0f000000000000, 0xc20b000000000000, 0x0, // ADD LOGICAL IMMEDIATE (32) (ALFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {ALGFI, 0xff0f000000000000, 0xc20a000000000000, 0x0, // ADD LOGICAL IMMEDIATE (64←32) (ALGFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {ALC, 0xff00000000ff0000, 0xe300000000980000, 0x0, // ADD LOGICAL WITH CARRY (32) (ALC R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {ALCR, 0xffff000000000000, 0xb998000000000000, 0xff0000000000, // ADD LOGICAL WITH CARRY (32) (ALCR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {ALCG, 0xff00000000ff0000, 0xe300000000880000, 0x0, // ADD LOGICAL WITH CARRY (64) (ALCG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {ALCGR, 0xffff000000000000, 0xb988000000000000, 0xff0000000000, // ADD LOGICAL WITH CARRY (64) (ALCGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {ALHSIK, 0xff00000000ff0000, 0xec00000000da0000, 0xff000000, // ADD LOGICAL WITH SIGNED IMMEDIATE(32→16) (ALHSIK R1,R3,I2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmSigned16_16_31}}, + {ALSI, 0xff00000000ff0000, 0xeb000000006e0000, 0x0, // ADD LOGICAL WITH SIGNED IMMEDIATE (32←8) (ALSI D1(B1),I2) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}}, + {ALGHSIK, 0xff00000000ff0000, 0xec00000000db0000, 0xff000000, // ADD LOGICAL WITH SIGNED IMMEDIATE(64→16) (ALGHSIK R1,R3,I2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmSigned16_16_31}}, + {ALGSI, 0xff00000000ff0000, 0xeb000000007e0000, 0x0, // ADD LOGICAL WITH SIGNED IMMEDIATE (64→8) (ALGSI D1(B1),I2) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}}, + {ALSIH, 0xff0f000000000000, 0xcc0a000000000000, 0x0, // ADD LOGICAL WITH SIGNED IMMEDIATE HIGH(32) (ALSIH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {ALSIHN, 0xff0f000000000000, 0xcc0b000000000000, 0x0, // ADD LOGICAL WITH SIGNED IMMEDIATE HIGH(32) (ALSIHN R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {AXR, 0xff00000000000000, 0x3600000000000000, 0x0, // ADD NORMALIZED (extended HFP) (AXR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {AD, 0xff00000000000000, 0x6a00000000000000, 0x0, // ADD NORMALIZED (long HFP) (AD R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {ADR, 0xff00000000000000, 0x2a00000000000000, 0x0, // ADD NORMALIZED (long HFP) (ADR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {AE, 0xff00000000000000, 0x7a00000000000000, 0x0, // ADD NORMALIZED (short HFP) (AE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AER, 0xff00000000000000, 0x3a00000000000000, 0x0, // ADD NORMALIZED (short HFP) (AER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {AW, 0xff00000000000000, 0x6e00000000000000, 0x0, // ADD UNNORMALIZED (long HFP) (AW R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AWR, 0xff00000000000000, 0x2e00000000000000, 0x0, // ADD UNNORMALIZED (long HFP) (AWR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {AU, 0xff00000000000000, 0x7e00000000000000, 0x0, // ADD UNNORMALIZED (short HFP) (AU R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {AUR, 0xff00000000000000, 0x3e00000000000000, 0x0, // ADD UNNORMALIZED (short HFP) (AUR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {N, 0xff00000000000000, 0x5400000000000000, 0x0, // AND (32) (N R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {NR, 0xff00000000000000, 0x1400000000000000, 0x0, // AND (32) (NR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {NRK, 0xffff000000000000, 0xb9f4000000000000, 0xf0000000000, // AND (32) (NRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {NY, 0xff00000000ff0000, 0xe300000000540000, 0x0, // AND (32) (NY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {NG, 0xff00000000ff0000, 0xe300000000800000, 0x0, // AND (64) (NG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {NGR, 0xffff000000000000, 0xb980000000000000, 0xff0000000000, // AND (64) (NGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {NGRK, 0xffff000000000000, 0xb9e4000000000000, 0xf0000000000, // AND (64) (NGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {NC, 0xff00000000000000, 0xd400000000000000, 0x0, // AND (character) (NC D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {NI, 0xff00000000000000, 0x9400000000000000, 0x0, // AND (immediate) (NI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {NIY, 0xff00000000ff0000, 0xeb00000000540000, 0x0, // AND (immediate) (NIY D1(B1),I2) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {NIHH, 0xff0f000000000000, 0xa504000000000000, 0x0, // AND IMMEDIATE (high high) (NIHH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {NIHL, 0xff0f000000000000, 0xa505000000000000, 0x0, // AND IMMEDIATE (high low) (NIHL R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {NIHF, 0xff0f000000000000, 0xc00a000000000000, 0x0, // AND IMMEDIATE (high) (NIHF R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {NILH, 0xff0f000000000000, 0xa506000000000000, 0x0, // AND IMMEDIATE (low high) (NILH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {NILL, 0xff0f000000000000, 0xa507000000000000, 0x0, // AND IMMEDIATE (low low) (NILL R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {NILF, 0xff0f000000000000, 0xc00b000000000000, 0x0, // AND IMMEDIATE (low) (NILF R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {NCRK, 0xffff000000000000, 0xb9f5000000000000, 0xf0000000000, // AND WITH COMPLEMENT(32) (NCRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {NCGRK, 0xffff000000000000, 0xb9e5000000000000, 0xf0000000000, // AND WITH COMPLEMENT(64) (NCGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {BAL, 0xff00000000000000, 0x4500000000000000, 0x0, // BRANCH AND LINK (BAL R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {BALR, 0xff00000000000000, 0x500000000000000, 0x0, // BRANCH AND LINK (BALR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {BAS, 0xff00000000000000, 0x4d00000000000000, 0x0, // BRANCH AND SAVE (BAS R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {BASR, 0xff00000000000000, 0xd00000000000000, 0x0, // BRANCH AND SAVE (BASR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {BASSM, 0xff00000000000000, 0xc00000000000000, 0x0, // BRANCH AND SAVE AND SET MODE (BASSM R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {BSA, 0xffff000000000000, 0xb25a000000000000, 0xff0000000000, // BRANCH AND SET AUTHORITY (BSA R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {BSM, 0xff00000000000000, 0xb00000000000000, 0x0, // BRANCH AND SET MODE (BSM R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {BAKR, 0xffff000000000000, 0xb240000000000000, 0xff0000000000, // BRANCH AND STACK (BAKR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {BSG, 0xffff000000000000, 0xb258000000000000, 0xff0000000000, // BRANCH IN SUBSPACE GROUP (BSG R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {BIC, 0xff00000000ff0000, 0xe300000000470000, 0x0, // BRANCH INDIRECT ON CONDITION (BIC M1,D2(X2,B2)) + [8]*argField{ap_Mask_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {BC, 0xff00000000000000, 0x4700000000000000, 0x0, // BRANCH ON CONDITION (BC M1,D2(X2,B2)) + [8]*argField{ap_Mask_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {BCR, 0xff00000000000000, 0x700000000000000, 0x0, // BRANCH ON CONDITION (BCR M1,R2) + [8]*argField{ap_Mask_8_11, ap_Reg_12_15}}, + {BCT, 0xff00000000000000, 0x4600000000000000, 0x0, // BRANCH ON COUNT (32) (BCT R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {BCTR, 0xff00000000000000, 0x600000000000000, 0x0, // BRANCH ON COUNT (32) (BCTR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {BCTG, 0xff00000000ff0000, 0xe300000000460000, 0x0, // BRANCH ON COUNT (64) (BCTG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {BCTGR, 0xffff000000000000, 0xb946000000000000, 0xff0000000000, // BRANCH ON COUNT (64) (BCTGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {BXH, 0xff00000000000000, 0x8600000000000000, 0x0, // BRANCH ON INDEX HIGH (32) (BXH R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {BXHG, 0xff00000000ff0000, 0xeb00000000440000, 0x0, // BRANCH ON INDEX HIGH (64) (BXHG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {BXLE, 0xff00000000000000, 0x8700000000000000, 0x0, // BRANCH ON INDEX LOW OR EQUAL (32) (BXLE R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {BXLEG, 0xff00000000ff0000, 0xeb00000000450000, 0x0, // BRANCH ON INDEX LOW OR EQUAL (64) (BXLEG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {BPP, 0xff00000000000000, 0xc700000000000000, 0xf000000000000, // BRANCH PREDICTION PRELOAD (BPP M1,RI2,D3(B3)) + [8]*argField{ap_Mask_8_11, ap_RegImSigned16_32_47, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {BPRP, 0xff00000000000000, 0xc500000000000000, 0x0, // BRANCH PREDICTION RELATIVE PRELOAD (BPRP M1,RI2,RI3) + [8]*argField{ap_Mask_8_11, ap_RegImSigned12_12_23, ap_RegImSigned24_24_47}}, + {BRAS, 0xff0f000000000000, 0xa705000000000000, 0x0, // BRANCH RELATIVE AND SAVE (BRAS R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned16_16_31}}, + {BRASL, 0xff0f000000000000, 0xc005000000000000, 0x0, // BRANCH RELATIVE AND SAVE LONG (BRASL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {BRC, 0xff0f000000000000, 0xa704000000000000, 0x0, // BRANCH RELATIVE ON CONDITION (BRC M1,RI2) + [8]*argField{ap_Mask_8_11, ap_RegImSigned16_16_31}}, + {BRCL, 0xff0f000000000000, 0xc004000000000000, 0x0, // BRANCH RELATIVE ON CONDITION LONG (BRCL M1,RI2) + [8]*argField{ap_Mask_8_11, ap_RegImSigned32_16_47}}, + {BRCT, 0xff0f000000000000, 0xa706000000000000, 0x0, // BRANCH RELATIVE ON COUNT (32) (BRCT R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned16_16_31}}, + {BRCTG, 0xff0f000000000000, 0xa707000000000000, 0x0, // BRANCH RELATIVE ON COUNT (64) (BRCTG R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned16_16_31}}, + {BRCTH, 0xff0f000000000000, 0xcc06000000000000, 0x0, // BRANCH RELATIVE ON COUNT HIGH (32) (BRCTH R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {BRXH, 0xff00000000000000, 0x8400000000000000, 0x0, // BRANCH RELATIVE ON INDEX HIGH (32) (BRXH R1,R3,RI2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_RegImSigned16_16_31}}, + {BRXHG, 0xff00000000ff0000, 0xec00000000440000, 0xff000000, // BRANCH RELATIVE ON INDEX HIGH (64) (BRXHG R1,R3,RI2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_RegImSigned16_16_31}}, + {BRXLE, 0xff00000000000000, 0x8500000000000000, 0x0, // BRANCH RELATIVE ON INDEX LOW OR EQ. (32) (BRXLE R1,R3,RI2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_RegImSigned16_16_31}}, + {BRXLG, 0xff00000000ff0000, 0xec00000000450000, 0xff000000, // BRANCH RELATIVE ON INDEX LOW OR EQ. (64) (BRXLG R1,R3,RI2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_RegImSigned16_16_31}}, + {XSCH, 0xffff000000000000, 0xb276000000000000, 0xffff00000000, // CANCEL SUBCHANNEL (XSCH) + [8]*argField{}}, + {CKSM, 0xffff000000000000, 0xb241000000000000, 0xff0000000000, // CHECKSUM (CKSM R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {KM, 0xffff000000000000, 0xb92e000000000000, 0xff0000000000, // CIPHER MESSAGE (KM R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {KMA, 0xffff000000000000, 0xb929000000000000, 0xf0000000000, // CIPHER MESSAGE WITH AUTHENTICATION (KMA R1,R3,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31}}, + {KMC, 0xffff000000000000, 0xb92f000000000000, 0xff0000000000, // CIPHER MESSAGE WITH CHAINING (KMC R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {KMF, 0xffff000000000000, 0xb92a000000000000, 0xff0000000000, // CIPHER MESSAGE WITH CIPHER FEEDBACK (KMF R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {KMCTR, 0xffff000000000000, 0xb92d000000000000, 0xf0000000000, // CIPHER MESSAGE WITH COUNTER (KMCTR R1,R3,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31}}, + {KMO, 0xffff000000000000, 0xb92b000000000000, 0xff0000000000, // CIPHER MESSAGE WITH OUTPUT FEEDBACK (KMO R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CSCH, 0xffff000000000000, 0xb230000000000000, 0xffff00000000, // CLEAR SUBCHANNEL (CSCH) + [8]*argField{}}, + {C, 0xff00000000000000, 0x5900000000000000, 0x0, // COMPARE (32) (C R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CR, 0xff00000000000000, 0x1900000000000000, 0x0, // COMPARE (32) (CR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {CY, 0xff00000000ff0000, 0xe300000000590000, 0x0, // COMPARE (32) (CY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CG, 0xff00000000ff0000, 0xe300000000200000, 0x0, // COMPARE (64) (CG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CGR, 0xffff000000000000, 0xb920000000000000, 0xff0000000000, // COMPARE (64) (CGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CGF, 0xff00000000ff0000, 0xe300000000300000, 0x0, // COMPARE (64←32) (CGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CGFR, 0xffff000000000000, 0xb930000000000000, 0xff0000000000, // COMPARE (64←32) (CGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CXBR, 0xffff000000000000, 0xb349000000000000, 0xff0000000000, // COMPARE (extended BFP) (CXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {CXTR, 0xffff000000000000, 0xb3ec000000000000, 0xff0000000000, // COMPARE (extended DFP) (CXTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {CXR, 0xffff000000000000, 0xb369000000000000, 0xff0000000000, // COMPARE (extended HFP) (CXR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {CDB, 0xff00000000ff0000, 0xed00000000190000, 0xff000000, // COMPARE (long BFP) (CDB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CDBR, 0xffff000000000000, 0xb319000000000000, 0xff0000000000, // COMPARE (long BFP) (CDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {CDTR, 0xffff000000000000, 0xb3e4000000000000, 0xff0000000000, // COMPARE (long DFP) (CDTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {CD, 0xff00000000000000, 0x6900000000000000, 0x0, // COMPARE (long HFP) (CD R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CDR, 0xff00000000000000, 0x2900000000000000, 0x0, // COMPARE (long HFP) (CDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {CEB, 0xff00000000ff0000, 0xed00000000090000, 0xff000000, // COMPARE (short BFP) (CEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CEBR, 0xffff000000000000, 0xb309000000000000, 0xff0000000000, // COMPARE (short BFP) (CEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {CE, 0xff00000000000000, 0x7900000000000000, 0x0, // COMPARE (short HFP) (CE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CER, 0xff00000000000000, 0x3900000000000000, 0x0, // COMPARE (short HFP) (CER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {CRB, 0xff00000000ff0000, 0xec00000000f60000, 0xf000000, // COMPARE AND BRANCH (32) (CRB R1,R2,M3,D4(B4)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CGRB, 0xff00000000ff0000, 0xec00000000e40000, 0xf000000, // COMPARE AND BRANCH (64) (CGRB R1,R2,M3,D4(B4)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CRJ, 0xff00000000ff0000, 0xec00000000760000, 0xf000000, // COMPARE AND BRANCH RELATIVE (32) (CRJ R1,R2,M3,RI4) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_RegImSigned16_16_31}}, + {CGRJ, 0xff00000000ff0000, 0xec00000000640000, 0xf000000, // COMPARE AND BRANCH RELATIVE (64) (CGRJ R1,R2,M3,RI4) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_RegImSigned16_16_31}}, + {CFC, 0xffff000000000000, 0xb21a000000000000, 0x0, // COMPARE AND FORM CODEWORD (CFC D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CRDTE, 0xffff000000000000, 0xb98f000000000000, 0x0, // COMPARE AND REPLACE DAT TABLE ENTRY (CRDTE R1,R3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {KXBR, 0xffff000000000000, 0xb348000000000000, 0xff0000000000, // COMPARE AND SIGNAL (extended BFP) (KXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {KXTR, 0xffff000000000000, 0xb3e8000000000000, 0xff0000000000, // COMPARE AND SIGNAL (extended DFP) (KXTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {KDB, 0xff00000000ff0000, 0xed00000000180000, 0xff000000, // COMPARE AND SIGNAL (long BFP) (KDB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {KDBR, 0xffff000000000000, 0xb318000000000000, 0xff0000000000, // COMPARE AND SIGNAL (long BFP) (KDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {KDTR, 0xffff000000000000, 0xb3e0000000000000, 0xff0000000000, // COMPARE AND SIGNAL (long DFP) (KDTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {KEB, 0xff00000000ff0000, 0xed00000000080000, 0xff000000, // COMPARE AND SIGNAL (short BFP) (KEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {KEBR, 0xffff000000000000, 0xb308000000000000, 0xff0000000000, // COMPARE AND SIGNAL (short BFP) (KEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {CS, 0xff00000000000000, 0xba00000000000000, 0x0, // COMPARE AND SWAP (32) (CS R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CSY, 0xff00000000ff0000, 0xeb00000000140000, 0x0, // COMPARE AND SWAP (32) (CSY R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {CSG, 0xff00000000ff0000, 0xeb00000000300000, 0x0, // COMPARE AND SWAP (64) (CSG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {CSP, 0xffff000000000000, 0xb250000000000000, 0xff0000000000, // COMPARE AND SWAP AND PURGE (32) (CSP R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CSPG, 0xffff000000000000, 0xb98a000000000000, 0xff0000000000, // COMPARE AND SWAP AND PURGE (64) (CSPG R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CSST, 0xff0f000000000000, 0xc802000000000000, 0x0, // COMPARE AND SWAP AND STORE (CSST D1(B1),D2(B2),R3) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_8_11}}, + {CRT, 0xffff000000000000, 0xb972000000000000, 0xf0000000000, // COMPARE AND TRAP (32) (CRT R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {CGRT, 0xffff000000000000, 0xb960000000000000, 0xf0000000000, // COMPARE AND TRAP (64) (CGRT R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {CEXTR, 0xffff000000000000, 0xb3fc000000000000, 0xff0000000000, // COMPARE BIASED EXPONENT (extended DFP) (CEXTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {CEDTR, 0xffff000000000000, 0xb3f4000000000000, 0xff0000000000, // COMPARE BIASED EXPONENT (long DFP) (CEDTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {CP, 0xff00000000000000, 0xf900000000000000, 0x0, // COMPARE DECIMAL (CP D1(L1,B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}}, + {CDS, 0xff00000000000000, 0xbb00000000000000, 0x0, // COMPARE DOUBLE AND SWAP (32) (CDS R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CDSY, 0xff00000000ff0000, 0xeb00000000310000, 0x0, // COMPARE DOUBLE AND SWAP (32) (CDSY R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {CDSG, 0xff00000000ff0000, 0xeb000000003e0000, 0x0, // COMPARE DOUBLE AND SWAP (64) (CDSG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {CH, 0xff00000000000000, 0x4900000000000000, 0x0, // COMPARE HALFWORD (32→16) (CH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CHY, 0xff00000000ff0000, 0xe300000000790000, 0x0, // COMPARE HALFWORD (32→16) (CHY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CGH, 0xff00000000ff0000, 0xe300000000340000, 0x0, // COMPARE HALFWORD (64←16) (CGH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CHHSI, 0xffff000000000000, 0xe554000000000000, 0x0, // COMPARE HALFWORD IMMEDIATE (16→16) (CHHSI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned16_32_47}}, + {CHI, 0xff0f000000000000, 0xa70e000000000000, 0x0, // COMPARE HALFWORD IMMEDIATE (32←16) (CHI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, + {CHSI, 0xffff000000000000, 0xe55c000000000000, 0x0, // COMPARE HALFWORD IMMEDIATE (32←16) (CHSI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned16_32_47}}, + {CGHI, 0xff0f000000000000, 0xa70f000000000000, 0x0, // COMPARE HALFWORD IMMEDIATE (64←16) (CGHI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, + {CGHSI, 0xffff000000000000, 0xe558000000000000, 0x0, // COMPARE HALFWORD IMMEDIATE (64←16) (CGHSI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned16_32_47}}, + {CHRL, 0xff0f000000000000, 0xc605000000000000, 0x0, // COMPAREHALFWORDRELATIVE LONG (32→16) (CHRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {CGHRL, 0xff0f000000000000, 0xc604000000000000, 0x0, // COMPAREHALFWORDRELATIVE LONG (64←16) (CGHRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {CHF, 0xff00000000ff0000, 0xe300000000cd0000, 0x0, // COMPARE HIGH (32) (CHF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CHHR, 0xffff000000000000, 0xb9cd000000000000, 0xff0000000000, // COMPARE HIGH (32) (CHHR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CHLR, 0xffff000000000000, 0xb9dd000000000000, 0xff0000000000, // COMPARE HIGH (32) (CHLR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CFI, 0xff0f000000000000, 0xc20d000000000000, 0x0, // COMPARE IMMEDIATE (32) (CFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, + {CGFI, 0xff0f000000000000, 0xc20c000000000000, 0x0, // COMPARE IMMEDIATE (64←32) (CGFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, + {CIB, 0xff00000000ff0000, 0xec00000000fe0000, 0x0, // COMPARE IMMEDIATE AND BRANCH (32←8) (CIB R1,I2,M3,D4(B4)) + [8]*argField{ap_Reg_8_11, ap_ImmSigned8_32_39, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CGIB, 0xff00000000ff0000, 0xec00000000fc0000, 0x0, // COMPARE IMMEDIATE AND BRANCH (64←8) (CGIB R1,I2,M3,D4(B4)) + [8]*argField{ap_Reg_8_11, ap_ImmSigned8_32_39, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CIJ, 0xff00000000ff0000, 0xec000000007e0000, 0x0, // COMPARE IMMEDIATE AND BRANCH RELATIVE(32→8) (CIJ R1,I2,M3,RI4) + [8]*argField{ap_Reg_8_11, ap_ImmSigned8_32_39, ap_Mask_12_15, ap_RegImSigned16_16_31}}, + {CGIJ, 0xff00000000ff0000, 0xec000000007c0000, 0x0, // COMPARE IMMEDIATE AND BRANCH RELATIVE(64→8) (CGIJ R1,I2,M3,RI4) + [8]*argField{ap_Reg_8_11, ap_ImmSigned8_32_39, ap_Mask_12_15, ap_RegImSigned16_16_31}}, + {CIT, 0xff00000000ff0000, 0xec00000000720000, 0xf00000f000000, // COMPARE IMMEDIATE AND TRAP (32→16) (CIT R1,I2,M3) + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35}}, + {CGIT, 0xff00000000ff0000, 0xec00000000700000, 0xf00000f000000, // COMPARE IMMEDIATE AND TRAP (64←16) (CGIT R1,I2,M3) + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35}}, + {CIH, 0xff0f000000000000, 0xcc0d000000000000, 0x0, // COMPARE IMMEDIATE HIGH (32) (CIH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, + {CL, 0xff00000000000000, 0x5500000000000000, 0x0, // COMPARE LOGICAL (32) (CL R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CLR, 0xff00000000000000, 0x1500000000000000, 0x0, // COMPARE LOGICAL (32) (CLR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {CLY, 0xff00000000ff0000, 0xe300000000550000, 0x0, // COMPARE LOGICAL (32) (CLY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CLG, 0xff00000000ff0000, 0xe300000000210000, 0x0, // COMPARE LOGICAL (64) (CLG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CLGR, 0xffff000000000000, 0xb921000000000000, 0xff0000000000, // COMPARE LOGICAL (64) (CLGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CLGF, 0xff00000000ff0000, 0xe300000000310000, 0x0, // COMPARE LOGICAL (64→32) (CLGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CLGFR, 0xffff000000000000, 0xb931000000000000, 0xff0000000000, // COMPARE LOGICAL (64→32) (CLGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CLC, 0xff00000000000000, 0xd500000000000000, 0x0, // COMPARE LOGICAL (character) (CLC D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {CLI, 0xff00000000000000, 0x9500000000000000, 0x0, // COMPARE LOGICAL (immediate) (CLI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {CLIY, 0xff00000000ff0000, 0xeb00000000550000, 0x0, // COMPARE LOGICAL (immediate) (CLIY D1(B1),I2) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {CLRB, 0xff00000000ff0000, 0xec00000000f70000, 0xf000000, // COMPARE LOGICAL AND BRANCH (32) (CLRB R1,R2,M3,D4(B4)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CLGRB, 0xff00000000ff0000, 0xec00000000e50000, 0xf000000, // COMPARE LOGICAL AND BRANCH (64) (CLGRB R1,R2,M3,D4(B4)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CLRJ, 0xff00000000ff0000, 0xec00000000770000, 0xf000000, // COMPARE LOGICAL AND BRANCH RELATIVE(32) (CLRJ R1,R2,M3,RI4) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_RegImSigned16_16_31}}, + {CLGRJ, 0xff00000000ff0000, 0xec00000000650000, 0xf000000, // COMPARE LOGICAL AND BRANCH RELATIVE(64) (CLGRJ R1,R2,M3,RI4) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_RegImSigned16_16_31}}, + {CLRT, 0xffff000000000000, 0xb973000000000000, 0xf0000000000, // COMPARE LOGICAL AND TRAP (32) (CLRT R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {CLT, 0xff00000000ff0000, 0xeb00000000230000, 0x0, // COMPARE LOGICAL AND TRAP (32) (CLT R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {CLGRT, 0xffff000000000000, 0xb961000000000000, 0xf0000000000, // COMPARE LOGICAL AND TRAP (64) (CLGRT R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {CLGT, 0xff00000000ff0000, 0xeb000000002b0000, 0x0, // COMPARE LOGICAL AND TRAP (64) (CLGT R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {CLMH, 0xff00000000ff0000, 0xeb00000000200000, 0x0, // COMPARE LOGICAL CHAR. UNDER MASK (high) (CLMH R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {CLM, 0xff00000000000000, 0xbd00000000000000, 0x0, // COMPARE LOGICAL CHAR. UNDER MASK (low) (CLM R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CLMY, 0xff00000000ff0000, 0xeb00000000210000, 0x0, // COMPARE LOGICAL CHAR. UNDER MASK (low) (CLMY R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {CLHF, 0xff00000000ff0000, 0xe300000000cf0000, 0x0, // COMPARE LOGICAL HIGH (32) (CLHF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CLHHR, 0xffff000000000000, 0xb9cf000000000000, 0xff0000000000, // COMPARE LOGICAL HIGH (32) (CLHHR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CLHLR, 0xffff000000000000, 0xb9df000000000000, 0xff0000000000, // COMPARE LOGICAL HIGH (32) (CLHLR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CLHHSI, 0xffff000000000000, 0xe555000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE (16←16) (CLHHSI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}}, + {CLFI, 0xff0f000000000000, 0xc20f000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE (32) (CLFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {CLFHSI, 0xffff000000000000, 0xe55d000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE (32←16) (CLFHSI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}}, + {CLGHSI, 0xffff000000000000, 0xe559000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE (64←16) (CLGHSI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}}, + {CLGFI, 0xff0f000000000000, 0xc20e000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE (64←32) (CLGFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {CLIB, 0xff00000000ff0000, 0xec00000000ff0000, 0x0, // COMPARE LOGICAL IMMEDIATE AND BRANCH(32←8) (CLIB R1,I2,M3,D4(B4)) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_32_39, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CLGIB, 0xff00000000ff0000, 0xec00000000fd0000, 0x0, // COMPARE LOGICAL IMMEDIATE AND BRANCH(64→8) (CLGIB R1,I2,M3,D4(B4)) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_32_39, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CLIJ, 0xff00000000ff0000, 0xec000000007f0000, 0x0, // COMPARE LOGICAL IMMEDIATE AND BRANCH (CLIJ R1,I2,M3,RI4) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_32_39, ap_Mask_12_15, ap_RegImSigned16_16_31}}, + {CLGIJ, 0xff00000000ff0000, 0xec000000007d0000, 0x0, // RELATIVE (32→8)10COMPARE LOGICAL IMMEDIATE AND BRANCH (CLGIJ R1,I2,M3,RI4) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_32_39, ap_Mask_12_15, ap_RegImSigned16_16_31}}, + {CLFIT, 0xff00000000ff0000, 0xec00000000730000, 0xf00000f000000, // RELATIVE (64→8)COMPARE LOGICAL IMMEDIATE AND TRAP(32→16) (CLFIT R1,I2,M3) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35}}, + {CLGIT, 0xff00000000ff0000, 0xec00000000710000, 0xf00000f000000, // COMPARE LOGICAL IMMEDIATE AND TRAP(64←16) (CLGIT R1,I2,M3) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35}}, + {CLIH, 0xff0f000000000000, 0xcc0f000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE HIGH (32) (CLIH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {CLCL, 0xff00000000000000, 0xf00000000000000, 0x0, // COMPARE LOGICAL LONG (CLCL R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {CLCLE, 0xff00000000000000, 0xa900000000000000, 0x0, // COMPARE LOGICAL LONG EXTENDED (CLCLE R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {CLCLU, 0xff00000000ff0000, 0xeb000000008f0000, 0x0, // COMPARE LOGICAL LONG UNICODE (CLCLU R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {CLRL, 0xff0f000000000000, 0xc60f000000000000, 0x0, // COMPARE LOGICAL RELATIVE LONG (32) (CLRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {CLHRL, 0xff0f000000000000, 0xc607000000000000, 0x0, // COMPARE LOGICAL RELATIVE LONG (32→16) (CLHRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {CLGRL, 0xff0f000000000000, 0xc60a000000000000, 0x0, // COMPARE LOGICAL RELATIVE LONG (64) (CLGRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {CLGHRL, 0xff0f000000000000, 0xc606000000000000, 0x0, // COMPARE LOGICAL RELATIVE LONG (64→16) (CLGHRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {CLGFRL, 0xff0f000000000000, 0xc60e000000000000, 0x0, // COMPARE LOGICAL RELATIVE LONG (64→32) (CLGFRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {CLST, 0xffff000000000000, 0xb25d000000000000, 0xff0000000000, // COMPARE LOGICAL STRING (CLST R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CRL, 0xff0f000000000000, 0xc60d000000000000, 0x0, // COMPARE RELATIVE LONG (32) (CRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {CGRL, 0xff0f000000000000, 0xc608000000000000, 0x0, // COMPARE RELATIVE LONG (64) (CGRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {CGFRL, 0xff0f000000000000, 0xc60c000000000000, 0x0, // COMPARE RELATIVE LONG (64←32) (CGFRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {CUSE, 0xffff000000000000, 0xb257000000000000, 0xff0000000000, // COMPARE UNTIL SUBSTRING EQUAL (CUSE R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CMPSC, 0xffff000000000000, 0xb263000000000000, 0xff0000000000, // COMPRESSION CALL (CMPSC R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {KDSA, 0xffff000000000000, 0xb93a000000000000, 0xff0000000000, // COMPUTE DIGITAL SIGNATURE AUTHENTICATION (KDSA R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {KIMD, 0xffff000000000000, 0xb93e000000000000, 0xff0000000000, // COMPUTE INTERMEDIATE MESSAGE DIGEST (KIMD R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {KLMD, 0xffff000000000000, 0xb93f000000000000, 0xff0000000000, // COMPUTE LAST MESSAGE DIGEST (KLMD R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {KMAC, 0xffff000000000000, 0xb91e000000000000, 0xff0000000000, // COMPUTE MESSAGE AUTHENTICATION CODE (KMAC R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {THDR, 0xffff000000000000, 0xb359000000000000, 0xff0000000000, // CONVERT BFP TO HFP (long) (THDR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {THDER, 0xffff000000000000, 0xb358000000000000, 0xff0000000000, // CONVERT BFP TO HFP (short to long) (THDER R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {CXFBR, 0xffff000000000000, 0xb396000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to extended BFP) (CXFBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CXFBRA, 0xffff000000000000, 0xb396000000000000, 0x0, // CONVERT FROM FIXED (32 to extended BFP) (CXFBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CXFTR, 0xffff000000000000, 0xb959000000000000, 0x0, // CONVERT FROM FIXED (32 to extended DFP) (CXFTR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CXFR, 0xffff000000000000, 0xb3b6000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to extended HFP) (CXFR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CDFBR, 0xffff000000000000, 0xb395000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to long BFP) (CDFBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CDFBRA, 0xffff000000000000, 0xb395000000000000, 0x0, // CONVERT FROM FIXED (32 to long BFP) (CDFBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CDFTR, 0xffff000000000000, 0xb951000000000000, 0x0, // CONVERT FROM FIXED (32 to long DFP) (CDFTR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CDFR, 0xffff000000000000, 0xb3b5000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to long HFP) (CDFR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CEFBR, 0xffff000000000000, 0xb394000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to short BFP) (CEFBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CEFBRA, 0xffff000000000000, 0xb394000000000000, 0x0, // CONVERT FROM FIXED (32 to short BFP) (CEFBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CEFR, 0xffff000000000000, 0xb3b4000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to short HFP) (CEFR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CXGBR, 0xffff000000000000, 0xb3a6000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to extended BFP) (CXGBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CXGBRA, 0xffff000000000000, 0xb3a6000000000000, 0x0, // CONVERT FROM FIXED (64 to extended BFP) (CXGBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CXGTR, 0xffff000000000000, 0xb3f9000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to extended DFP) (CXGTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CXGTRA, 0xffff000000000000, 0xb3f9000000000000, 0x0, // CONVERT FROM FIXED (64 to extended DFP) (CXGTRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CXGR, 0xffff000000000000, 0xb3c6000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to extended HFP) (CXGR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CDGBR, 0xffff000000000000, 0xb3a5000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to long BFP) (CDGBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CDGBRA, 0xffff000000000000, 0xb3a5000000000000, 0x0, // CONVERT FROM FIXED (64 to long BFP) (CDGBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CDGTR, 0xffff000000000000, 0xb3f1000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to long DFP) (CDGTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CDGTRA, 0xffff000000000000, 0xb3f1000000000000, 0x0, // CONVERT FROM FIXED (64 to long DFP) (CDGTRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CDGR, 0xffff000000000000, 0xb3c5000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to long HFP) (CDGR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CEGBR, 0xffff000000000000, 0xb3a4000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to short BFP) (CEGBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CEGBRA, 0xffff000000000000, 0xb3a4000000000000, 0x0, // CONVERT FROM FIXED (64 to short BFP) (CEGBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CEGR, 0xffff000000000000, 0xb3c4000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to short HFP) (CEGR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CXLFBR, 0xffff000000000000, 0xb392000000000000, 0x0, // CONVERT FROM LOGICAL (32 to extended BFP) (CXLFBR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CXLFTR, 0xffff000000000000, 0xb95b000000000000, 0x0, // CONVERT FROM LOGICAL (32 to extended DFP) (CXLFTR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CDLFBR, 0xffff000000000000, 0xb391000000000000, 0x0, // CONVERT FROM LOGICAL (32 to long BFP) (CDLFBR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CDLFTR, 0xffff000000000000, 0xb953000000000000, 0x0, // CONVERT FROM LOGICAL (32 to long DFP) (CDLFTR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CELFBR, 0xffff000000000000, 0xb390000000000000, 0x0, // CONVERT FROM LOGICAL (32 to short BFP) (CELFBR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CXLGBR, 0xffff000000000000, 0xb3a2000000000000, 0x0, // CONVERT FROM LOGICAL (64 to extended BFP) (CXLGBR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CXLGTR, 0xffff000000000000, 0xb95a000000000000, 0x0, // CONVERT FROM LOGICAL (64 to extended DFP) (CXLGTR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CDLGBR, 0xffff000000000000, 0xb3a1000000000000, 0x0, // CONVERT FROM LOGICAL (64 to long BFP) (CDLGBR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CDLGTR, 0xffff000000000000, 0xb952000000000000, 0x0, // CONVERT FROM LOGICAL (64 to long DFP) (CDLGTR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CELGBR, 0xffff000000000000, 0xb3a0000000000000, 0x0, // CONVERT FROM LOGICAL (64 to short BFP) (CELGBR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {CXPT, 0xff00000000ff0000, 0xed00000000af0000, 0x0, // CONVERT FROM PACKED (to extended DFP) (CXPT R1,D2(L2,B2),M3) + [8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}}, + {CDPT, 0xff00000000ff0000, 0xed00000000ae0000, 0x0, // CONVERT FROM PACKED (to long DFP) (CDPT R1,D2(L2,B2),M3) + [8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}}, + {CXSTR, 0xffff000000000000, 0xb3fb000000000000, 0xff0000000000, // CONVERT FROM SIGNED PACKED (128 to extended DFP) (CXSTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CDSTR, 0xffff000000000000, 0xb3f3000000000000, 0xff0000000000, // CONVERT FROM SIGNED PACKED (64 to long DFP) (CDSTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CXUTR, 0xffff000000000000, 0xb3fa000000000000, 0xff0000000000, // CONVERT FROM UNSIGNED PACKED (128 to ext. DFP) (CXUTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CDUTR, 0xffff000000000000, 0xb3f2000000000000, 0xff0000000000, // CONVERT FROM UNSIGNED PACKED (64 to long DFP) (CDUTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {CXZT, 0xff00000000ff0000, 0xed00000000ab0000, 0x0, // CONVERT FROM ZONED (to extended DFP) (CXZT R1,D2(L2,B2),M3) + [8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}}, + {CDZT, 0xff00000000ff0000, 0xed00000000aa0000, 0x0, // CONVERT FROM ZONED (to long DFP) (CDZT R1,D2(L2,B2),M3) + [8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}}, + {TBEDR, 0xffff000000000000, 0xb350000000000000, 0xf0000000000, // CONVERT HFP TO BFP (long to short) (TBEDR R1,M3,R2) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {TBDR, 0xffff000000000000, 0xb351000000000000, 0xf0000000000, // CONVERT HFP TO BFP (long) (TBDR R1,M3,R2) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CVB, 0xff00000000000000, 0x4f00000000000000, 0x0, // CONVERT TO BINARY (32) (CVB R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CVBY, 0xff00000000ff0000, 0xe300000000060000, 0x0, // CONVERT TO BINARY (32) (CVBY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CVBG, 0xff00000000ff0000, 0xe3000000000e0000, 0x0, // CONVERT TO BINARY (64) (CVBG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CVD, 0xff00000000000000, 0x4e00000000000000, 0x0, // CONVERT TO DECIMAL (32) (CVD R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CVDY, 0xff00000000ff0000, 0xe300000000260000, 0x0, // CONVERT TO DECIMAL (32) (CVDY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CVDG, 0xff00000000ff0000, 0xe3000000002e0000, 0x0, // CONVERT TO DECIMAL (64) (CVDG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {CFXBR, 0xffff000000000000, 0xb39a000000000000, 0xf0000000000, // CONVERT TO FIXED (extended BFP to 32) (CFXBR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CFXBRA, 0xffff000000000000, 0xb39a000000000000, 0x0, // CONVERT TO FIXED (extended BFP to 32) (CFXBRA R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CGXBR, 0xffff000000000000, 0xb3aa000000000000, 0xf0000000000, // CONVERT TO FIXED (extended BFP to 64) (CGXBR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CGXBRA, 0xffff000000000000, 0xb3aa000000000000, 0x0, // CONVERT TO FIXED (extended BFP to 64) (CGXBRA R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CFXTR, 0xffff000000000000, 0xb949000000000000, 0x0, // CONVERT TO FIXED (extended DFP to 32) (CFXTR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CGXTR, 0xffff000000000000, 0xb3e9000000000000, 0xf0000000000, // CONVERT TO FIXED (extended DFP to 64) (CGXTR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CGXTRA, 0xffff000000000000, 0xb3e9000000000000, 0x0, // CONVERT TO FIXED (extended DFP to 64) (CGXTRA R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CFXR, 0xffff000000000000, 0xb3ba000000000000, 0xf0000000000, // CONVERT TO FIXED (extended HFP to 32) (CFXR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CGXR, 0xffff000000000000, 0xb3ca000000000000, 0xf0000000000, // CONVERT TO FIXED (extended HFP to 64) (CGXR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CFDBR, 0xffff000000000000, 0xb399000000000000, 0xf0000000000, // CONVERT TO FIXED (long BFP to 32) (CFDBR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CFDBRA, 0xffff000000000000, 0xb399000000000000, 0x0, // CONVERT TO FIXED (long BFP to 32) (CFDBRA R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CGDBR, 0xffff000000000000, 0xb3a9000000000000, 0xf0000000000, // CONVERT TO FIXED (long BFP to 64) (CGDBR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CGDBRA, 0xffff000000000000, 0xb3a9000000000000, 0x0, // CONVERT TO FIXED (long BFP to 64) (CGDBRA R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CFDTR, 0xffff000000000000, 0xb941000000000000, 0x0, // CONVERT TO FIXED (long DFP to 32) (CFDTR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CGDTR, 0xffff000000000000, 0xb3e1000000000000, 0xf0000000000, // CONVERT TO FIXED (long DFP to 64) (CGDTR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CGDTRA, 0xffff000000000000, 0xb3e1000000000000, 0x0, // CONVERT TO FIXED (long DFP to 64) (CGDTRA R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CFDR, 0xffff000000000000, 0xb3b9000000000000, 0xf0000000000, // CONVERT TO FIXED (long HFP to 32) (CFDR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CGDR, 0xffff000000000000, 0xb3c9000000000000, 0xf0000000000, // CONVERT TO FIXED (long HFP to 64) (CGDR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CFEBR, 0xffff000000000000, 0xb398000000000000, 0xf0000000000, // CONVERT TO FIXED (short BFP to 32) (CFEBR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CFEBRA, 0xffff000000000000, 0xb398000000000000, 0x0, // CONVERT TO FIXED (short BFP to 32) (CFEBRA R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CGEBR, 0xffff000000000000, 0xb3a8000000000000, 0xf0000000000, // CONVERT TO FIXED (short BFP to 64) (CGEBR R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CGEBRA, 0xffff000000000000, 0xb3a8000000000000, 0x0, // CONVERT TO FIXED (short BFP to 64) (CGEBRA R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CFER, 0xffff000000000000, 0xb3b8000000000000, 0xf0000000000, // CONVERT TO FIXED (short HFP to 32) (CFER R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CGER, 0xffff000000000000, 0xb3c8000000000000, 0xf0000000000, // CONVERT TO FIXED (short HFP to 64) (CGER R1,M3,R2) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {CLFXBR, 0xffff000000000000, 0xb39e000000000000, 0x0, // CONVERT TO LOGICAL (extended BFP to 32) (CLFXBR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CLGXBR, 0xffff000000000000, 0xb3ae000000000000, 0x0, // CONVERT TO LOGICAL (extended BFP to 64) (CLGXBR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CLFXTR, 0xffff000000000000, 0xb94b000000000000, 0x0, // CONVERT TO LOGICAL (extended DFP to 32) (CLFXTR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CLGXTR, 0xffff000000000000, 0xb94a000000000000, 0x0, // CONVERT TO LOGICAL (extended DFP to 64) (CLGXTR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CLFDBR, 0xffff000000000000, 0xb39d000000000000, 0x0, // CONVERT TO LOGICAL (long BFP to 32) (CLFDBR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CLGDBR, 0xffff000000000000, 0xb3ad000000000000, 0x0, // CONVERT TO LOGICAL (long BFP to 64) (CLGDBR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CLFDTR, 0xffff000000000000, 0xb943000000000000, 0x0, // CONVERT TO LOGICAL (long DFP to 32) (CLFDTR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CLGDTR, 0xffff000000000000, 0xb942000000000000, 0x0, // CONVERT TO LOGICAL (long DFP to 64) (CLGDTR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CLFEBR, 0xffff000000000000, 0xb39c000000000000, 0x0, // CONVERT TO LOGICAL (short BFP to 32) (CLFEBR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CLGEBR, 0xffff000000000000, 0xb3ac000000000000, 0x0, // CONVERT TO LOGICAL (short BFP to 64) (CLGEBR R1,M3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {CPXT, 0xff00000000ff0000, 0xed00000000ad0000, 0x0, // CONVERT TO PACKED (from extended DFP) (CPXT R1,D2(L2,B2),M3) + [8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}}, + {CPDT, 0xff00000000ff0000, 0xed00000000ac0000, 0x0, // CONVERT TO PACKED (from long DFP) (CPDT R1,D2(L2,B2),M3) + [8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}}, + {CSXTR, 0xffff000000000000, 0xb3eb000000000000, 0xf00000000000, // CONVERT TO SIGNED PACKED (extended DFP to 128) (CSXTR R1,R2,M4) + [8]*argField{ap_Reg_24_27, ap_FPReg_28_31, ap_Mask_20_23}}, + {CSDTR, 0xffff000000000000, 0xb3e3000000000000, 0xf00000000000, // CONVERT TO SIGNED PACKED (long DFP to 64) (CSDTR R1,R2,M4) + [8]*argField{ap_Reg_24_27, ap_FPReg_28_31, ap_Mask_20_23}}, + {CUXTR, 0xffff000000000000, 0xb3ea000000000000, 0xff0000000000, // CONVERTTOUNSIGNEDPACKED(extendedDFP to 128) (CUXTR R1,R2) + [8]*argField{ap_Reg_24_27, ap_FPReg_28_31}}, + {CUDTR, 0xffff000000000000, 0xb3e2000000000000, 0xff0000000000, // CONVERT TO UNSIGNED PACKED (long DFP to 64) (CUDTR R1,R2) + [8]*argField{ap_Reg_24_27, ap_FPReg_28_31}}, + {CZXT, 0xff00000000ff0000, 0xed00000000a90000, 0x0, // CONVERT TO ZONED (from extended DFP) (CZXT R1,D2(L2,B2),M3) + [8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}}, + {CZDT, 0xff00000000ff0000, 0xed00000000a80000, 0x0, // CONVERT TO ZONED (from long DFP) (CZDT R1,D2(L2,B2),M3) + [8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}}, + {CU24, 0xffff000000000000, 0xb9b1000000000000, 0xf0000000000, // CONVERT UTF-16 TO UTF-32 (CU24 R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {CU21, 0xffff000000000000, 0xb2a6000000000000, 0xf0000000000, // CONVERT UTF-16 TO UTF-8 (CU21 R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {CU12, 0xffff000000000000, 0xb2a7000000000000, 0xf0000000000, // CONVERT UTF-8 TO UTF-16 (CU12 R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {CU14, 0xffff000000000000, 0xb9b0000000000000, 0xf0000000000, // CONVERT UTF-8 TO UTF-32 (CU14 R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {CU42, 0xffff000000000000, 0xb9b3000000000000, 0xff0000000000, // CONVERT UTF-32 TO UTF-16 (CU42 R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CU41, 0xffff000000000000, 0xb9b2000000000000, 0xff0000000000, // CONVERT UTF-32 TO UTF-8 (CU41 R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {CPYA, 0xffff000000000000, 0xb24d000000000000, 0xff0000000000, // COPY ACCESS (CPYA R1,R2) + [8]*argField{ap_ACReg_24_27, ap_ACReg_28_31}}, + {CPSDR, 0xffff000000000000, 0xb372000000000000, 0xf0000000000, // COPY SIGN (long) (CPSDR R1,R3,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_FPReg_28_31}}, + {VSCSHP, 0xff00000000ff0000, 0xe6000000007c0000, 0xffff0000000, // DECIMAL SCALE AND CONVERT AND SPLIT TO HFP (VSCSHP V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VSCHP, 0xff00000000ff0000, 0xe600000000740000, 0xf0f00000000, // DECIMAL SCALE AND CONVERT TO HFP (VSCHP V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {DFLTCC, 0xffff000000000000, 0xb939000000000000, 0xf0000000000, // DEFLATE CONVERSION CALL (DFLTCC R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {D, 0xff00000000000000, 0x5d00000000000000, 0x0, // DIVIDE (32→64) (D R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {DR, 0xff00000000000000, 0x1d00000000000000, 0x0, // DIVIDE (32←64) (DR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {DXBR, 0xffff000000000000, 0xb34d000000000000, 0xff0000000000, // DIVIDE (extended BFP) (DXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {DXTR, 0xffff000000000000, 0xb3d9000000000000, 0xf0000000000, // DIVIDE (extended DFP) (DXTR R1,R2,R3) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}}, + {DXTRA, 0xffff000000000000, 0xb3d9000000000000, 0x0, // DIVIDE (extended DFP) (DXTRA R1,R2,R3,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}}, + {DXR, 0xffff000000000000, 0xb22d000000000000, 0xff0000000000, // DIVIDE (extended HFP) (DXR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {DDB, 0xff00000000ff0000, 0xed000000001d0000, 0xff000000, // DIVIDE (long BFP) (DDB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {DDBR, 0xffff000000000000, 0xb31d000000000000, 0xff0000000000, // DIVIDE (long BFP) (DDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {DDTR, 0xffff000000000000, 0xb3d1000000000000, 0xf0000000000, // DIVIDE (long DFP) (DDTR R1,R2,R3) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}}, + {DDTRA, 0xffff000000000000, 0xb3d1000000000000, 0x0, // DIVIDE (long DFP) (DDTRA R1,R2,R3,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}}, + {DD, 0xff00000000000000, 0x6d00000000000000, 0x0, // DIVIDE (long HFP) (DD R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {DDR, 0xff00000000000000, 0x2d00000000000000, 0x0, // DIVIDE (long HFP) (DDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {DEB, 0xff00000000ff0000, 0xed000000000d0000, 0xff000000, // DIVIDE (short BFP) (DEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {DEBR, 0xffff000000000000, 0xb30d000000000000, 0xff0000000000, // DIVIDE (short BFP) (DEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {DE, 0xff00000000000000, 0x7d00000000000000, 0x0, // DIVIDE (short HFP) (DE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {DER, 0xff00000000000000, 0x3d00000000000000, 0x0, // DIVIDE (short HFP) (DER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {DP, 0xff00000000000000, 0xfd00000000000000, 0x0, // DIVIDE DECIMAL (DP D1(L1,B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}}, + {DL, 0xff00000000ff0000, 0xe300000000970000, 0x0, // DIVIDE LOGICAL (32→64) (DL R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {DLR, 0xffff000000000000, 0xb997000000000000, 0xff0000000000, // DIVIDE LOGICAL (32←64) (DLR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {DLG, 0xff00000000ff0000, 0xe300000000870000, 0x0, // DIVIDE LOGICAL (64←128) (DLG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {DLGR, 0xffff000000000000, 0xb987000000000000, 0xff0000000000, // DIVIDE LOGICAL (64→128) (DLGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {DSG, 0xff00000000ff0000, 0xe3000000000d0000, 0x0, // DIVIDE SINGLE (64) (DSG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {DSGR, 0xffff000000000000, 0xb90d000000000000, 0xff0000000000, // DIVIDE SINGLE (64) (DSGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {DSGF, 0xff00000000ff0000, 0xe3000000001d0000, 0x0, // DIVIDE SINGLE (64←32) (DSGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {DSGFR, 0xffff000000000000, 0xb91d000000000000, 0xff0000000000, // DIVIDE SINGLE (64→32) (DSGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {DIDBR, 0xffff000000000000, 0xb35b000000000000, 0x0, // DIVIDE TO INTEGER (long BFP) (DIDBR R1,R3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {DIEBR, 0xffff000000000000, 0xb353000000000000, 0x0, // DIVIDE TO INTEGER (short BFP) (DIEBR R1,R3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {ED, 0xff00000000000000, 0xde00000000000000, 0x0, // EDIT (ED D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {EDMK, 0xff00000000000000, 0xdf00000000000000, 0x0, // EDIT AND MARK (EDMK D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {X, 0xff00000000000000, 0x5700000000000000, 0x0, // EXCLUSIVE OR (32) (X R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {XR, 0xff00000000000000, 0x1700000000000000, 0x0, // EXCLUSIVE OR (32) (XR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {XRK, 0xffff000000000000, 0xb9f7000000000000, 0xf0000000000, // EXCLUSIVE OR (32) (XRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {XY, 0xff00000000ff0000, 0xe300000000570000, 0x0, // EXCLUSIVE OR (32) (XY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {XG, 0xff00000000ff0000, 0xe300000000820000, 0x0, // EXCLUSIVE OR (64) (XG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {XGR, 0xffff000000000000, 0xb982000000000000, 0xff0000000000, // EXCLUSIVE OR (64) (XGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {XGRK, 0xffff000000000000, 0xb9e7000000000000, 0xf0000000000, // EXCLUSIVE OR (64) (XGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {XC, 0xff00000000000000, 0xd700000000000000, 0x0, // EXCLUSIVE OR (character) (XC D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {XI, 0xff00000000000000, 0x9700000000000000, 0x0, // EXCLUSIVE OR (immediate) (XI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {XIY, 0xff00000000ff0000, 0xeb00000000570000, 0x0, // EXCLUSIVE OR (immediate) (XIY D1(B1),I2) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {XIHF, 0xff0f000000000000, 0xc006000000000000, 0x0, // EXCLUSIVE OR IMMEDIATE (high) (XIHF R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {XILF, 0xff0f000000000000, 0xc007000000000000, 0x0, // EXCLUSIVE OR IMMEDIATE (low) (XILF R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {EX, 0xff00000000000000, 0x4400000000000000, 0x0, // EXECUTE (EX R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {EXRL, 0xff0f000000000000, 0xc600000000000000, 0x0, // EXECUTE RELATIVE LONG (EXRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {EAR, 0xffff000000000000, 0xb24f000000000000, 0xff0000000000, // EXTRACT ACCESS (EAR R1,R2) + [8]*argField{ap_Reg_24_27, ap_ACReg_28_31}}, + {ESEA, 0xffff000000000000, 0xb99d000000000000, 0xff0f00000000, // EXTRACT AND SET EXTENDED AUTHORITY (ESEA R1) + [8]*argField{ap_Reg_24_27}}, + {EEXTR, 0xffff000000000000, 0xb3ed000000000000, 0xff0000000000, // EXTRACT BIASED EXPONENT (extended DFP to 64) (EEXTR R1,R2) + [8]*argField{ap_Reg_24_27, ap_FPReg_28_31}}, + {EEDTR, 0xffff000000000000, 0xb3e5000000000000, 0xff0000000000, // EXTRACT BIASED EXPONENT (long DFP to 64) (EEDTR R1,R2) + [8]*argField{ap_Reg_24_27, ap_FPReg_28_31}}, + {ECAG, 0xff00000000ff0000, 0xeb000000004c0000, 0x0, // EXTRACT CPU ATTRIBUTE (ECAG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {ECTG, 0xff0f000000000000, 0xc801000000000000, 0x0, // EXTRACT CPU TIME (ECTG D1(B1),D2(B2),R3) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_8_11}}, + {EFPC, 0xffff000000000000, 0xb38c000000000000, 0xff0f00000000, // EXTRACT FPC (EFPC R1) + [8]*argField{ap_Reg_24_27}}, + {EPAR, 0xffff000000000000, 0xb226000000000000, 0xff0f00000000, // EXTRACT PRIMARY ASN (EPAR R1) + [8]*argField{ap_Reg_24_27}}, + {EPAIR, 0xffff000000000000, 0xb99a000000000000, 0xff0f00000000, // EXTRACT PRIMARY ASN AND INSTANCE (EPAIR R1) + [8]*argField{ap_Reg_24_27}}, + {EPSW, 0xffff000000000000, 0xb98d000000000000, 0xff0000000000, // EXTRACT PSW (EPSW R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {ESAR, 0xffff000000000000, 0xb227000000000000, 0xff0f00000000, // EXTRACT SECONDARY ASN (ESAR R1) + [8]*argField{ap_Reg_24_27}}, + {ESAIR, 0xffff000000000000, 0xb99b000000000000, 0xff0f00000000, // EXTRACT SECONDARY ASN AND INSTANCE (ESAIR R1) + [8]*argField{ap_Reg_24_27}}, + {ESXTR, 0xffff000000000000, 0xb3ef000000000000, 0xff0000000000, // EXTRACT SIGNIFICANCE (extended DFP to 64) (ESXTR R1,R2) + [8]*argField{ap_Reg_24_27, ap_FPReg_28_31}}, + {ESDTR, 0xffff000000000000, 0xb3e7000000000000, 0xff0000000000, // EXTRACT SIGNIFICANCE (long DFP to 64) (ESDTR R1,R2) + [8]*argField{ap_Reg_24_27, ap_FPReg_28_31}}, + {EREG, 0xffff000000000000, 0xb249000000000000, 0xff0000000000, // EXTRACT STACKED REGISTERS (32) (EREG R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {EREGG, 0xffff000000000000, 0xb90e000000000000, 0xff0000000000, // EXTRACT STACKED REGISTERS (64) (EREGG R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {ESTA, 0xffff000000000000, 0xb24a000000000000, 0xff0000000000, // EXTRACT STACKED STATE (ESTA R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {ETND, 0xffff000000000000, 0xb2ec000000000000, 0xff0f00000000, // EXTRACT TRANSACTION NESTING DEPTH (ETND R1) + [8]*argField{ap_Reg_24_27}}, + {FLOGR, 0xffff000000000000, 0xb983000000000000, 0xff0000000000, // FIND LEFTMOST ONE (FLOGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {HSCH, 0xffff000000000000, 0xb231000000000000, 0xffff00000000, // HALT SUBCHANNEL (HSCH) + [8]*argField{}}, + {HDR, 0xff00000000000000, 0x2400000000000000, 0x0, // HALVE (long HFP) (HDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {HER, 0xff00000000000000, 0x3400000000000000, 0x0, // HALVE (short HFP) (HER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {IAC, 0xffff000000000000, 0xb224000000000000, 0xff0f00000000, // INSERT ADDRESS SPACE CONTROL (IAC R1) + [8]*argField{ap_Reg_24_27}}, + {IEXTR, 0xffff000000000000, 0xb3fe000000000000, 0xf0000000000, // INSERT BIASED EXPONENT (64 to extended DFP) (IEXTR R1,R3,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_Reg_28_31}}, + {IEDTR, 0xffff000000000000, 0xb3f6000000000000, 0xf0000000000, // INSERT BIASED EXPONENT (64 to long DFP) (IEDTR R1,R3,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_Reg_28_31}}, + {IC, 0xff00000000000000, 0x4300000000000000, 0x0, // INSERT CHARACTER (IC R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {ICY, 0xff00000000ff0000, 0xe300000000730000, 0x0, // INSERT CHARACTER (ICY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {ICMH, 0xff00000000ff0000, 0xeb00000000800000, 0x0, // INSERT CHARACTERS UNDER MASK (high) (ICMH R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {ICM, 0xff00000000000000, 0xbf00000000000000, 0x0, // INSERT CHARACTERS UNDER MASK (low) (ICM R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {ICMY, 0xff00000000ff0000, 0xeb00000000810000, 0x0, // INSERT CHARACTERS UNDER MASK (low) (ICMY R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {IIHH, 0xff0f000000000000, 0xa500000000000000, 0x0, // INSERT IMMEDIATE (high high) (IIHH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {IIHL, 0xff0f000000000000, 0xa501000000000000, 0x0, // INSERT IMMEDIATE (high low) (IIHL R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {IIHF, 0xff0f000000000000, 0xc008000000000000, 0x0, // INSERT IMMEDIATE (high) (IIHF R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {IILH, 0xff0f000000000000, 0xa502000000000000, 0x0, // INSERT IMMEDIATE (low high) (IILH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {IILL, 0xff0f000000000000, 0xa503000000000000, 0x0, // INSERT IMMEDIATE (low low) (IILL R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {IILF, 0xff0f000000000000, 0xc009000000000000, 0x0, // INSERT IMMEDIATE (low) (IILF R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {IPM, 0xffff000000000000, 0xb222000000000000, 0xff0f00000000, // INSERT PROGRAM MASK (IPM R1) + [8]*argField{ap_Reg_24_27}}, + {IPK, 0xffff000000000000, 0xb20b000000000000, 0xffff00000000, // INSERT PSW KEY (IPK) + [8]*argField{}}, + {IRBM, 0xffff000000000000, 0xb9ac000000000000, 0xff0000000000, // INSERT REFERENCE BITS MULTIPLE (IRBM R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {ISKE, 0xffff000000000000, 0xb229000000000000, 0xff0000000000, // INSERT STORAGE KEY EXTENDED (ISKE R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {IVSK, 0xffff000000000000, 0xb223000000000000, 0xff0000000000, // INSERT VIRTUAL STORAGE KEY (IVSK R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {IDTE, 0xffff000000000000, 0xb98e000000000000, 0x0, // INVALIDATE DAT TABLE ENTRY (IDTE R1,R3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {IPTE, 0xffff000000000000, 0xb221000000000000, 0x0, // INVALIDATE PAGE TABLE ENTRY (IPTE R1,R2,R3,M4) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19, ap_Mask_20_23}}, + {L, 0xff00000000000000, 0x5800000000000000, 0x0, // LOAD (32) (L R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LR, 0xff00000000000000, 0x1800000000000000, 0x0, // LOAD (32) (LR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {LY, 0xff00000000ff0000, 0xe300000000580000, 0x0, // LOAD (32) (LY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LG, 0xff00000000ff0000, 0xe300000000040000, 0x0, // LOAD (64) (LG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LGR, 0xffff000000000000, 0xb904000000000000, 0xff0000000000, // LOAD (64) (LGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LGF, 0xff00000000ff0000, 0xe300000000140000, 0x0, // LOAD (64←32) (LGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LGFR, 0xffff000000000000, 0xb914000000000000, 0xff0000000000, // LOAD (64←32) (LGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LXR, 0xffff000000000000, 0xb365000000000000, 0xff0000000000, // LOAD (extended) (LXR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LD, 0xff00000000000000, 0x6800000000000000, 0x0, // LOAD (long) (LD R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LDR, 0xff00000000000000, 0x2800000000000000, 0x0, // LOAD (long) (LDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LDY, 0xff00000000ff0000, 0xed00000000650000, 0x0, // LOAD (long) (LDY R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LE, 0xff00000000000000, 0x7800000000000000, 0x0, // LOAD (short) (LE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LER, 0xff00000000000000, 0x3800000000000000, 0x0, // LOAD (short) (LER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LEY, 0xff00000000ff0000, 0xed00000000640000, 0x0, // LOAD (short) (LEY R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LAM, 0xff00000000000000, 0x9a00000000000000, 0x0, // LOAD ACCESS MULTIPLE 7-268 (LAM R1,R3,D2(B2)) + [8]*argField{ap_ACReg_8_11, ap_ACReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {LAMY, 0xff00000000ff0000, 0xeb000000009a0000, 0x0, // LOAD ACCESS MULTIPLE 7-268 (LAMY R1,R3,D2(B2)) + [8]*argField{ap_ACReg_8_11, ap_ACReg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LA, 0xff00000000000000, 0x4100000000000000, 0x0, // LOAD ADDRESS (LA R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LAY, 0xff00000000ff0000, 0xe300000000710000, 0x0, // LOAD ADDRESS (LAY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LAE, 0xff00000000000000, 0x5100000000000000, 0x0, // LOAD ADDRESS EXTENDED (LAE R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LAEY, 0xff00000000ff0000, 0xe300000000750000, 0x0, // LOAD ADDRESS EXTENDED (LAEY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LARL, 0xff0f000000000000, 0xc000000000000000, 0x0, // LOAD ADDRESS RELATIVE LONG (LARL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {LASP, 0xffff000000000000, 0xe500000000000000, 0x0, // LOAD ADDRESS SPACE PARAMETERS (LASP D1(B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {LAA, 0xff00000000ff0000, 0xeb00000000f80000, 0x0, // LOAD AND ADD (32) (LAA R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LAAG, 0xff00000000ff0000, 0xeb00000000e80000, 0x0, // LOAD AND ADD (64) (LAAG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LAAL, 0xff00000000ff0000, 0xeb00000000fa0000, 0x0, // LOAD AND ADD LOGICAL (32) (LAAL R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LAALG, 0xff00000000ff0000, 0xeb00000000ea0000, 0x0, // LOAD AND ADD LOGICAL (64) (LAALG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LAN, 0xff00000000ff0000, 0xeb00000000f40000, 0x0, // LOAD AND AND (32) (LAN R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LANG, 0xff00000000ff0000, 0xeb00000000e40000, 0x0, // LOAD AND AND (64) (LANG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LAX, 0xff00000000ff0000, 0xeb00000000f70000, 0x0, // LOAD AND EXCLUSIVE OR (32) (LAX R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LAXG, 0xff00000000ff0000, 0xeb00000000e70000, 0x0, // LOAD AND EXCLUSIVE OR (64) (LAXG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LAO, 0xff00000000ff0000, 0xeb00000000f60000, 0x0, // LOAD AND OR (32) (LAO R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LAOG, 0xff00000000ff0000, 0xeb00000000e60000, 0x0, // LOAD AND OR (64) (LAOG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LT, 0xff00000000ff0000, 0xe300000000120000, 0x0, // LOAD AND TEST (32) (LT R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LTR, 0xff00000000000000, 0x1200000000000000, 0x0, // LOAD AND TEST (32) (LTR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {LTG, 0xff00000000ff0000, 0xe300000000020000, 0x0, // LOAD AND TEST (64) (LTG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LTGR, 0xffff000000000000, 0xb902000000000000, 0xff0000000000, // LOAD AND TEST (64) (LTGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LTGF, 0xff00000000ff0000, 0xe300000000320000, 0x0, // LOAD AND TEST (64→32) (LTGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LTGFR, 0xffff000000000000, 0xb912000000000000, 0xff0000000000, // LOAD AND TEST (64→32) (LTGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LTXBR, 0xffff000000000000, 0xb342000000000000, 0xff0000000000, // LOAD AND TEST (extended BFP) (LTXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LTXTR, 0xffff000000000000, 0xb3de000000000000, 0xff0000000000, // LOAD AND TEST (extended DFP) (LTXTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LTXR, 0xffff000000000000, 0xb362000000000000, 0xff0000000000, // LOAD AND TEST (extended HFP) (LTXR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LTDBR, 0xffff000000000000, 0xb312000000000000, 0xff0000000000, // LOAD AND TEST (long BFP) (LTDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LTDTR, 0xffff000000000000, 0xb3d6000000000000, 0xff0000000000, // LOAD AND TEST (long DFP) (LTDTR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LTDR, 0xff00000000000000, 0x2200000000000000, 0x0, // LOAD AND TEST (long HFP) (LTDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LTEBR, 0xffff000000000000, 0xb302000000000000, 0xff0000000000, // LOAD AND TEST (short BFP) (LTEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LTER, 0xff00000000000000, 0x3200000000000000, 0x0, // LOAD AND TEST (short HFP) (LTER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LAT, 0xff00000000ff0000, 0xe3000000009f0000, 0x0, // LOAD AND TRAP (32L→32) (LAT R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LGAT, 0xff00000000ff0000, 0xe300000000850000, 0x0, // LOAD AND TRAP (64) (LGAT R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LZRF, 0xff00000000ff0000, 0xe3000000003b0000, 0x0, // LOAD AND ZERO RIGHTMOST BYTE (32) (LZRF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LZRG, 0xff00000000ff0000, 0xe3000000002a0000, 0x0, // LOAD AND ZERO RIGHTMOST BYTE (64) (LZRG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LBEAR, 0xffff000000000000, 0xb200000000000000, 0x0, // LOAD BEAR (LBEAR D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {LB, 0xff00000000ff0000, 0xe300000000760000, 0x0, // LOAD BYTE (32→8) (LB R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LBR, 0xffff000000000000, 0xb926000000000000, 0xff0000000000, // LOAD BYTE (32←8) (LBR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LGB, 0xff00000000ff0000, 0xe300000000770000, 0x0, // LOAD BYTE (64→8) (LGB R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LGBR, 0xffff000000000000, 0xb906000000000000, 0xff0000000000, // LOAD BYTE (64←8) (LGBR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LBH, 0xff00000000ff0000, 0xe300000000c00000, 0x0, // LOAD BYTE HIGH (32←8) (LBH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LCR, 0xff00000000000000, 0x1300000000000000, 0x0, // LOAD COMPLEMENT (32) (LCR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {LCGR, 0xffff000000000000, 0xb903000000000000, 0xff0000000000, // LOAD COMPLEMENT (64) (LCGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LCGFR, 0xffff000000000000, 0xb913000000000000, 0xff0000000000, // LOAD COMPLEMENT (64←32) (LCGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LCXBR, 0xffff000000000000, 0xb343000000000000, 0xff0000000000, // LOAD COMPLEMENT (extended BFP) (LCXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LCXR, 0xffff000000000000, 0xb363000000000000, 0xff0000000000, // LOAD COMPLEMENT (extended HFP) (LCXR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LCDBR, 0xffff000000000000, 0xb313000000000000, 0xff0000000000, // LOAD COMPLEMENT (long BFP) (LCDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LCDR, 0xff00000000000000, 0x2300000000000000, 0x0, // LOAD COMPLEMENT (long HFP) (LCDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LCDFR, 0xffff000000000000, 0xb373000000000000, 0xff0000000000, // LOAD COMPLEMENT (long) (LCDFR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LCEBR, 0xffff000000000000, 0xb303000000000000, 0xff0000000000, // LOAD COMPLEMENT (short BFP) (LCEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LCER, 0xff00000000000000, 0x3300000000000000, 0x0, // LOAD COMPLEMENT (short HFP) (LCER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LCTL, 0xff00000000000000, 0xb700000000000000, 0x0, // LOAD CONTROL (32) (LCTL R1,R3,D2(B2)) + [8]*argField{ap_CReg_8_11, ap_CReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {LCTLG, 0xff00000000ff0000, 0xeb000000002f0000, 0x0, // LOAD CONTROL (64) (LCTLG R1,R3,D2(B2)) + [8]*argField{ap_CReg_8_11, ap_CReg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LCBB, 0xff00000000ff0000, 0xe700000000270000, 0xf000000, // LOAD COUNT TO BLOCK BOUNDARY (LCBB R1,D2(X2,B2),M3) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35}}, + {FIXBR, 0xffff000000000000, 0xb347000000000000, 0xf0000000000, // LOAD FP INTEGER (extended BFP) (FIXBR R1,M3,R2) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {FIXBRA, 0xffff000000000000, 0xb347000000000000, 0x0, // LOAD FP INTEGER (extended BFP) (FIXBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {FIXTR, 0xffff000000000000, 0xb3df000000000000, 0x0, // LOAD FP INTEGER (extended DFP) (FIXTR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {FIXR, 0xffff000000000000, 0xb367000000000000, 0xff0000000000, // LOAD FP INTEGER (extended HFP) (FIXR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {FIDBR, 0xffff000000000000, 0xb35f000000000000, 0xf0000000000, // LOAD FP INTEGER (long BFP) (FIDBR R1,M3,R2) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {FIDBRA, 0xffff000000000000, 0xb35f000000000000, 0x0, // LOAD FP INTEGER (long BFP) (FIDBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {FIDTR, 0xffff000000000000, 0xb3d7000000000000, 0x0, // LOAD FP INTEGER (long DFP) (FIDTR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {FIDR, 0xffff000000000000, 0xb37f000000000000, 0xff0000000000, // LOAD FP INTEGER (long HFP) (FIDR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {FIEBR, 0xffff000000000000, 0xb357000000000000, 0xf0000000000, // LOAD FP INTEGER (short BFP) (FIEBR R1,M3,R2) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31}}, + {FIEBRA, 0xffff000000000000, 0xb357000000000000, 0x0, // LOAD FP INTEGER (short BFP) (FIEBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {FIER, 0xffff000000000000, 0xb377000000000000, 0xff0000000000, // LOAD FP INTEGER (short HFP) (FIER R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LFPC, 0xffff000000000000, 0xb29d000000000000, 0x0, // LOAD FPC (LFPC D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {LFAS, 0xffff000000000000, 0xb2bd000000000000, 0x0, // LOAD FPC AND SIGNAL (LFAS D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {LDGR, 0xffff000000000000, 0xb3c1000000000000, 0xff0000000000, // LOAD FPR FROM GR (64 to long) (LDGR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_Reg_28_31}}, + {LGDR, 0xffff000000000000, 0xb3cd000000000000, 0xff0000000000, // LOAD GR FROM FPR (long to 64) (LGDR R1,R2) + [8]*argField{ap_Reg_24_27, ap_FPReg_28_31}}, + {LGG, 0xff00000000ff0000, 0xe3000000004c0000, 0x0, // LOAD GUARDED (64) (LGG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LGSC, 0xff00000000ff0000, 0xe3000000004d0000, 0x0, // LOAD GUARDED STORAGE CONTROLS (LGSC R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LH, 0xff00000000000000, 0x4800000000000000, 0x0, // LOAD HALFWORD (32→16) (LH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LHR, 0xffff000000000000, 0xb927000000000000, 0xff0000000000, // LOAD HALFWORD (32←16) (LHR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LHY, 0xff00000000ff0000, 0xe300000000780000, 0x0, // LOAD HALFWORD (32←16) (LHY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LGH, 0xff00000000ff0000, 0xe300000000150000, 0x0, // LOAD HALFWORD (64←16) (LGH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LGHR, 0xffff000000000000, 0xb907000000000000, 0xff0000000000, // LOAD HALFWORD (64←16) (LGHR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LHH, 0xff00000000ff0000, 0xe300000000c40000, 0x0, // LOAD HALFWORD HIGH (32→16) (LHH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LOCHHI, 0xff00000000ff0000, 0xec000000004e0000, 0xff000000, // LOAD HALFWORD HIGH IMMEDIATE ON (LOCHHI R1,I2,M3) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31, ap_Mask_12_15}}, + {LHI, 0xff0f000000000000, 0xa708000000000000, 0x0, // CONDITION (32←16)LOAD HALFWORD IMMEDIATE (32)←16 (LHI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, + {LGHI, 0xff0f000000000000, 0xa709000000000000, 0x0, // LOAD HALFWORD IMMEDIATE (64→16) (LGHI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, + {LOCHI, 0xff00000000ff0000, 0xec00000000420000, 0xff000000, // LOAD HALFWORD IMMEDIATE ON CONDITION(32←16) (LOCHI R1,I2,M3) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31, ap_Mask_12_15}}, + {LOCGHI, 0xff00000000ff0000, 0xec00000000460000, 0xff000000, // LOAD HALFWORD IMMEDIATE ON CONDITION(64→16) (LOCGHI R1,I2,M3) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31, ap_Mask_12_15}}, + {LHRL, 0xff0f000000000000, 0xc405000000000000, 0x0, // LOAD HALFWORD RELATIVE LONG (32←16) (LHRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {LGHRL, 0xff0f000000000000, 0xc404000000000000, 0x0, // LOAD HALFWORD RELATIVE LONG (64←16) (LGHRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {LFH, 0xff00000000ff0000, 0xe300000000ca0000, 0x0, // LOAD HIGH (32) (LFH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LFHAT, 0xff00000000ff0000, 0xe300000000c80000, 0x0, // LOAD HIGH AND TRAP (32H←32) (LFHAT R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LOCFH, 0xff00000000ff0000, 0xeb00000000e00000, 0x0, // LOAD HIGH ON CONDITION (32) (LOCFH R1,D2(B2),M3) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}}, + {LOCFHR, 0xffff000000000000, 0xb9e0000000000000, 0xf0000000000, // LOAD HIGH ON CONDITION (32) (LOCFHR R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {LGFI, 0xff0f000000000000, 0xc001000000000000, 0x0, // LOAD IMMEDIATE (64→32) (LGFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {LXDB, 0xff00000000ff0000, 0xed00000000050000, 0xff000000, // LOAD LENGTHENED (long to extended BFP) (LXDB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LXDBR, 0xffff000000000000, 0xb305000000000000, 0xff0000000000, // LOAD LENGTHENED (long to extended BFP) (LXDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LXDTR, 0xffff000000000000, 0xb3dc000000000000, 0xf00000000000, // LOAD LENGTHENED (long to extended DFP) (LXDTR R1,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_Mask_20_23}}, + {LXD, 0xff00000000ff0000, 0xed00000000250000, 0xff000000, // LOAD LENGTHENED (long to extended HFP) (LXD R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LXDR, 0xffff000000000000, 0xb325000000000000, 0xff0000000000, // LOAD LENGTHENED (long to extended HFP) (LXDR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LXEB, 0xff00000000ff0000, 0xed00000000060000, 0xff000000, // LOAD LENGTHENED (short to extended BFP) (LXEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LXEBR, 0xffff000000000000, 0xb306000000000000, 0xff0000000000, // LOAD LENGTHENED (short to extended BFP) (LXEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LXE, 0xff00000000ff0000, 0xed00000000260000, 0xff000000, // LOAD LENGTHENED (short to extended HFP) (LXE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LXER, 0xffff000000000000, 0xb326000000000000, 0xff0000000000, // LOAD LENGTHENED (short to extended HFP) (LXER R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LDEB, 0xff00000000ff0000, 0xed00000000040000, 0xff000000, // LOAD LENGTHENED (short to long BFP) (LDEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LDEBR, 0xffff000000000000, 0xb304000000000000, 0xff0000000000, // LOAD LENGTHENED (short to long BFP) (LDEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LDETR, 0xffff000000000000, 0xb3d4000000000000, 0xf00000000000, // LOAD LENGTHENED (short to long DFP) (LDETR R1,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_Mask_20_23}}, + {LDE, 0xff00000000ff0000, 0xed00000000240000, 0xff000000, // LOAD LENGTHENED (short to long HFP) (LDE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LDER, 0xffff000000000000, 0xb324000000000000, 0xff0000000000, // LOAD LENGTHENED (short to long HFP) (LDER R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LLGF, 0xff00000000ff0000, 0xe300000000160000, 0x0, // LOAD LOGICAL (64←32) (LLGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLGFR, 0xffff000000000000, 0xb916000000000000, 0xff0000000000, // LOAD LOGICAL (64←32) (LLGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LLGFSG, 0xff00000000ff0000, 0xe300000000480000, 0x0, // LOAD LOGICAL AND SHIFT GUARDED (64←32) (LLGFSG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLGFAT, 0xff00000000ff0000, 0xe3000000009d0000, 0x0, // LOAD LOGICAL AND TRAP (64→32) (LLGFAT R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLZRGF, 0xff00000000ff0000, 0xe3000000003a0000, 0x0, // LOAD LOGICAL AND ZERO RIGHTMOST BYTE(64→32) (LLZRGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLC, 0xff00000000ff0000, 0xe300000000940000, 0x0, // LOAD LOGICAL CHARACTER (32→8) (LLC R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLCR, 0xffff000000000000, 0xb994000000000000, 0xff0000000000, // LOAD LOGICAL CHARACTER (32←8) (LLCR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LLGC, 0xff00000000ff0000, 0xe300000000900000, 0x0, // LOAD LOGICAL CHARACTER (64←8) (LLGC R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLGCR, 0xffff000000000000, 0xb984000000000000, 0xff0000000000, // LOAD LOGICAL CHARACTER (64←8) (LLGCR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LLCH, 0xff00000000ff0000, 0xe300000000c20000, 0x0, // LOAD LOGICAL CHARACTER HIGH (32←8) (LLCH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLH, 0xff00000000ff0000, 0xe300000000950000, 0x0, // LOAD LOGICAL HALFWORD (32←16) (LLH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLHR, 0xffff000000000000, 0xb995000000000000, 0xff0000000000, // LOAD LOGICAL HALFWORD (32←16) (LLHR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LLGH, 0xff00000000ff0000, 0xe300000000910000, 0x0, // LOAD LOGICAL HALFWORD (64→16) (LLGH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLGHR, 0xffff000000000000, 0xb985000000000000, 0xff0000000000, // LOAD LOGICAL HALFWORD (64←16) (LLGHR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LLHH, 0xff00000000ff0000, 0xe300000000c60000, 0x0, // LOAD LOGICAL HALFWORD HIGH (32→16) (LLHH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLHRL, 0xff0f000000000000, 0xc402000000000000, 0x0, // LOAD LOGICAL HALFWORD RELATIVE LONG(32←16) (LLHRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {LLGHRL, 0xff0f000000000000, 0xc406000000000000, 0x0, // LOAD LOGICAL HALFWORD RELATIVE LONG(64→16) (LLGHRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {LLIHH, 0xff0f000000000000, 0xa50c000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high high) (LLIHH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {LLIHL, 0xff0f000000000000, 0xa50d000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high low) (LLIHL R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {LLIHF, 0xff0f000000000000, 0xc00e000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high) (LLIHF R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {LLILH, 0xff0f000000000000, 0xa50e000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low high) (LLILH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {LLILL, 0xff0f000000000000, 0xa50f000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low low) (LLILL R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {LLILF, 0xff0f000000000000, 0xc00f000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low) (LLILF R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {LLGFRL, 0xff0f000000000000, 0xc40e000000000000, 0x0, // LOAD LOGICAL RELATIVE LONG (64→32) (LLGFRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {LLGT, 0xff00000000ff0000, 0xe300000000170000, 0x0, // LOAD LOGICAL THIRTY ONE BITS (64→31) (LLGT R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LLGTR, 0xffff000000000000, 0xb917000000000000, 0xff0000000000, // LOAD LOGICAL THIRTY ONE BITS (64→31) (LLGTR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LLGTAT, 0xff00000000ff0000, 0xe3000000009c0000, 0x0, // LOAD LOGICAL THIRTY ONE BITS AND TRAP(64←31) (LLGTAT R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LM, 0xff00000000000000, 0x9800000000000000, 0x0, // LOAD MULTIPLE (32) (LM R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {LMY, 0xff00000000ff0000, 0xeb00000000980000, 0x0, // LOAD MULTIPLE (32) (LMY R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LMG, 0xff00000000ff0000, 0xeb00000000040000, 0x0, // LOAD MULTIPLE (64) (LMG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LMD, 0xff00000000000000, 0xef00000000000000, 0x0, // LOAD MULTIPLE DISJOINT (64→32&32) (LMD R1,R3,D2(B2),D4(B4)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {LMH, 0xff00000000ff0000, 0xeb00000000960000, 0x0, // LOAD MULTIPLE HIGH (32) (LMH R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LNR, 0xff00000000000000, 0x1100000000000000, 0x0, // LOAD NEGATIVE (32) (LNR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {LNGR, 0xffff000000000000, 0xb901000000000000, 0xff0000000000, // LOAD NEGATIVE (64) (LNGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LNGFR, 0xffff000000000000, 0xb911000000000000, 0xff0000000000, // LOAD NEGATIVE (64→32) (LNGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LNXBR, 0xffff000000000000, 0xb341000000000000, 0xff0000000000, // LOAD NEGATIVE (extended BFP) (LNXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LNXR, 0xffff000000000000, 0xb361000000000000, 0xff0000000000, // LOAD NEGATIVE (extended HFP) (LNXR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LNDBR, 0xffff000000000000, 0xb311000000000000, 0xff0000000000, // LOAD NEGATIVE (long BFP) (LNDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LNDR, 0xff00000000000000, 0x2100000000000000, 0x0, // LOAD NEGATIVE (long HFP) (LNDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LNDFR, 0xffff000000000000, 0xb371000000000000, 0xff0000000000, // LOAD NEGATIVE (long) (LNDFR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LNEBR, 0xffff000000000000, 0xb301000000000000, 0xff0000000000, // LOAD NEGATIVE (short BFP) (LNEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LNER, 0xff00000000000000, 0x3100000000000000, 0x0, // LOAD NEGATIVE (short HFP) (LNER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LOC, 0xff00000000ff0000, 0xeb00000000f20000, 0x0, // LOAD ON CONDITION (32) (LOC R1,D2(B2),M3) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}}, + {LOCR, 0xffff000000000000, 0xb9f2000000000000, 0xf0000000000, // LOAD ON CONDITION (32) (LOCR R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {LOCG, 0xff00000000ff0000, 0xeb00000000e20000, 0x0, // LOAD ON CONDITION (64) (LOCG R1,D2(B2),M3) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}}, + {LOCGR, 0xffff000000000000, 0xb9e2000000000000, 0xf0000000000, // LOAD ON CONDITION (64) (LOCGR R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {LPTEA, 0xffff000000000000, 0xb9aa000000000000, 0x0, // LOAD PAGE TABLE ENTRY ADDRESS (LPTEA R1,R3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {LPD, 0xff0f000000000000, 0xc804000000000000, 0x0, // LOAD PAIR DISJOINT (32) (LPD R3,D1(B1),D2(B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {LPDG, 0xff0f000000000000, 0xc805000000000000, 0x0, // LOAD PAIR DISJOINT (64) (LPDG R3,D1(B1),D2(B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {LPQ, 0xff00000000ff0000, 0xe3000000008f0000, 0x0, // LOAD PAIR FROM QUADWORD (64&64←128) (LPQ R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LPR, 0xff00000000000000, 0x1000000000000000, 0x0, // LOAD POSITIVE (32) (LPR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {LPGR, 0xffff000000000000, 0xb900000000000000, 0xff0000000000, // LOAD POSITIVE (64) (LPGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LPGFR, 0xffff000000000000, 0xb910000000000000, 0xff0000000000, // LOAD POSITIVE (64→32) (LPGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LPXBR, 0xffff000000000000, 0xb340000000000000, 0xff0000000000, // LOAD POSITIVE (extended BFP) (LPXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LPXR, 0xffff000000000000, 0xb360000000000000, 0xff0000000000, // LOAD POSITIVE (extended HFP) (LPXR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LPDBR, 0xffff000000000000, 0xb310000000000000, 0xff0000000000, // LOAD POSITIVE (long BFP) (LPDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LPDR, 0xff00000000000000, 0x2000000000000000, 0x0, // LOAD POSITIVE (long HFP) (LPDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LPDFR, 0xffff000000000000, 0xb370000000000000, 0xff0000000000, // LOAD POSITIVE (long) (LPDFR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LPEBR, 0xffff000000000000, 0xb300000000000000, 0xff0000000000, // LOAD POSITIVE (short BFP) (LPEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LPER, 0xff00000000000000, 0x3000000000000000, 0x0, // LOAD POSITIVE (short HFP) (LPER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LPSW, 0xff00000000000000, 0x8200000000000000, 0x0, // LOAD PSW (LPSW D1(B1)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {LPSWE, 0xffff000000000000, 0xb2b2000000000000, 0x0, // LOAD PSW EXTENDED (LPSWE D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {LPSWEY, 0xff00000000ff0000, 0xeb00000000710000, 0xff000000000000, // LOAD PSW EXTENDED (LPSWEY D1(B1)) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {LRA, 0xff00000000000000, 0xb100000000000000, 0x0, // LOAD REAL ADDRESS (32) (LRA R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LRAY, 0xff00000000ff0000, 0xe300000000130000, 0x0, // LOAD REAL ADDRESS (32) (LRAY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LRAG, 0xff00000000ff0000, 0xe300000000030000, 0x0, // LOAD REAL ADDRESS (64) (LRAG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LRL, 0xff0f000000000000, 0xc40d000000000000, 0x0, // LOAD RELATIVE LONG (32) (LRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {LGRL, 0xff0f000000000000, 0xc408000000000000, 0x0, // LOAD RELATIVE LONG (64) (LGRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {LGFRL, 0xff0f000000000000, 0xc40c000000000000, 0x0, // LOAD RELATIVE LONG (64→32) (LGFRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {LRVH, 0xff00000000ff0000, 0xe3000000001f0000, 0x0, // LOAD REVERSED (16) (LRVH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LRV, 0xff00000000ff0000, 0xe3000000001e0000, 0x0, // LOAD REVERSED (32) (LRV R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LRVR, 0xffff000000000000, 0xb91f000000000000, 0xff0000000000, // LOAD REVERSED (32) (LRVR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LRVG, 0xff00000000ff0000, 0xe3000000000f0000, 0x0, // LOAD REVERSED (64) (LRVG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {LRVGR, 0xffff000000000000, 0xb90f000000000000, 0xff0000000000, // LOAD REVERSED (64) (LRVGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LDXBR, 0xffff000000000000, 0xb345000000000000, 0xff0000000000, // LOAD ROUNDED (extended to long BFP) (LDXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LDXBRA, 0xffff000000000000, 0xb345000000000000, 0x0, // LOAD ROUNDED (extended to long BFP) (LDXBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {LDXTR, 0xffff000000000000, 0xb3dd000000000000, 0x0, // LOAD ROUNDED (extended to long DFP) (LDXTR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {LDXR, 0xff00000000000000, 0x2500000000000000, 0x0, // LOAD ROUNDED (extended to long HFP) (LDXR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LRDR, 0xff00000000000000, 0x2500000000000000, 0x0, // LOAD ROUNDED (extended to long HFP) (LRDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LEXBR, 0xffff000000000000, 0xb346000000000000, 0xff0000000000, // LOAD ROUNDED (extended to short BFP) (LEXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LEXBRA, 0xffff000000000000, 0xb346000000000000, 0x0, // LOAD ROUNDED (extended to short BFP) (LEXBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {LEXR, 0xffff000000000000, 0xb366000000000000, 0xff0000000000, // LOAD ROUNDED (extended to short HFP) (LEXR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LEDBR, 0xffff000000000000, 0xb344000000000000, 0xff0000000000, // LOAD ROUNDED (long to short BFP) (LEDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {LEDBRA, 0xffff000000000000, 0xb344000000000000, 0x0, // LOAD ROUNDED (long to short BFP) (LEDBRA R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {LEDTR, 0xffff000000000000, 0xb3d5000000000000, 0x0, // LOAD ROUNDED (long to short DFP) (LEDTR R1,M3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {LEDR, 0xff00000000000000, 0x3500000000000000, 0x0, // LOAD ROUNDED (long to short HFP) (LEDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LRER, 0xff00000000000000, 0x3500000000000000, 0x0, // LOAD ROUNDED (long to short HFP) (LRER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {LURA, 0xffff000000000000, 0xb24b000000000000, 0xff0000000000, // LOAD USING REAL ADDRESS (32) (LURA R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LURAG, 0xffff000000000000, 0xb905000000000000, 0xff0000000000, // LOAD USING REAL ADDRESS (64) (LURAG R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {LZXR, 0xffff000000000000, 0xb376000000000000, 0xff0f00000000, // LOAD ZERO (extended) (LZXR R1) + [8]*argField{ap_FPReg_24_27}}, + {LZDR, 0xffff000000000000, 0xb375000000000000, 0xff0f00000000, // LOAD ZERO (long) (LZDR R1) + [8]*argField{ap_FPReg_24_27}}, + {LZER, 0xffff000000000000, 0xb374000000000000, 0xff0f00000000, // LOAD ZERO (short) (LZER R1) + [8]*argField{ap_FPReg_24_27}}, + {MSTA, 0xffff000000000000, 0xb247000000000000, 0xff0f00000000, // MODIFY STACKED STATE (MSTA R1) + [8]*argField{ap_Reg_24_27}}, + {MSCH, 0xffff000000000000, 0xb232000000000000, 0x0, // MODIFY SUBCHANNEL (MSCH D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {MC, 0xff00000000000000, 0xaf00000000000000, 0x0, // MONITOR CALL (MC D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {MVHHI, 0xffff000000000000, 0xe544000000000000, 0x0, // MOVE (16←16) (MVHHI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}}, + {MVHI, 0xffff000000000000, 0xe54c000000000000, 0x0, // MOVE (32→16) (MVHI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}}, + {MVGHI, 0xffff000000000000, 0xe548000000000000, 0x0, // MOVE (64←16) (MVGHI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned16_32_47}}, + {MVC, 0xff00000000000000, 0xd200000000000000, 0x0, // MOVE (character) (MVC D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {MVI, 0xff00000000000000, 0x9200000000000000, 0x0, // MOVE (immediate) (MVI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {MVIY, 0xff00000000ff0000, 0xeb00000000520000, 0x0, // MOVE (immediate) (MVIY D1(B1),I2) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {MVCIN, 0xff00000000000000, 0xe800000000000000, 0x0, // MOVE INVERSE (MVCIN D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {MVCL, 0xff00000000000000, 0xe00000000000000, 0x0, // MOVE LONG (MVCL R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {MVCLE, 0xff00000000000000, 0xa800000000000000, 0x0, // MOVE LONG EXTENDED (MVCLE R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {MVCLU, 0xff00000000ff0000, 0xeb000000008e0000, 0x0, // MOVE LONG UNICODE (MVCLU R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {MVN, 0xff00000000000000, 0xd100000000000000, 0x0, // MOVE NUMERICS (MVN D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {MVPG, 0xffff000000000000, 0xb254000000000000, 0xff0000000000, // MOVE PAGE (MVPG R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {MVCRL, 0xffff000000000000, 0xe50a000000000000, 0x0, // MOVE RIGHT TO LEFT (MVCRL D1(B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {MVST, 0xffff000000000000, 0xb255000000000000, 0xff0000000000, // MOVE STRING (MVST R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {MVCP, 0xff00000000000000, 0xda00000000000000, 0x0, // MOVE TO PRIMARY (MVCP D1(R1,B1),D2(B2),R3) + [8]*argField{ap_DispUnsigned_20_31, ap_Reg_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_12_15}}, + {MVCS, 0xff00000000000000, 0xdb00000000000000, 0x0, // MOVE TO SECONDARY (MVCS D1(R1,B1),D2(B2),R3) + [8]*argField{ap_DispUnsigned_20_31, ap_Reg_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_12_15}}, + {MVCDK, 0xffff000000000000, 0xe50f000000000000, 0x0, // MOVE WITH DESTINATION KEY (MVCDK D1(B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {MVCK, 0xff00000000000000, 0xd900000000000000, 0x0, // MOVE WITH KEY (MVCK D1(R1,B1),D2(B2),R3) + [8]*argField{ap_DispUnsigned_20_31, ap_Reg_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_12_15}}, + {MVO, 0xff00000000000000, 0xf100000000000000, 0x0, // MOVE WITH OFFSET (MVO D1(L1,B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}}, + {MVCOS, 0xff0f000000000000, 0xc800000000000000, 0x0, // MOVE WITH OPTIONAL SPECIFICATIONS (MVCOS D1(B1),D2(B2),R3) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_8_11}}, + {MVCSK, 0xffff000000000000, 0xe50e000000000000, 0x0, // MOVE WITH SOURCE KEY (MVCSK D1(B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {MVZ, 0xff00000000000000, 0xd300000000000000, 0x0, // MOVE ZONES (MVZ D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {MG, 0xff00000000ff0000, 0xe300000000840000, 0x0, // MULTIPLY (128←64) (MG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MGRK, 0xffff000000000000, 0xb9ec000000000000, 0xf0000000000, // MULTIPLY (128←64) (MGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {M, 0xff00000000000000, 0x5c00000000000000, 0x0, // MULTIPLY (64←32) (M R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MFY, 0xff00000000ff0000, 0xe3000000005c0000, 0x0, // MULTIPLY (64←32) (MFY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MR, 0xff00000000000000, 0x1c00000000000000, 0x0, // MULTIPLY (64←32) (MR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {MXBR, 0xffff000000000000, 0xb34c000000000000, 0xff0000000000, // MULTIPLY (extended BFP) (MXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {MXTR, 0xffff000000000000, 0xb3d8000000000000, 0xf0000000000, // MULTIPLY (extended DFP) (MXTR R1,R2,R3) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}}, + {MXTRA, 0xffff000000000000, 0xb3d8000000000000, 0x0, // MULTIPLY (extended DFP) (MXTRA R1,R2,R3,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}}, + {MXR, 0xff00000000000000, 0x2600000000000000, 0x0, // MULTIPLY (extended HFP) (MXR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {MDB, 0xff00000000ff0000, 0xed000000001c0000, 0xff000000, // MULTIPLY (long BFP) (MDB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MDBR, 0xffff000000000000, 0xb31c000000000000, 0xff0000000000, // MULTIPLY (long BFP) (MDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {MDTR, 0xffff000000000000, 0xb3d0000000000000, 0xf0000000000, // MULTIPLY (long DFP) (MDTR R1,R2,R3) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}}, + {MDTRA, 0xffff000000000000, 0xb3d0000000000000, 0x0, // MULTIPLY (long DFP) (MDTRA R1,R2,R3,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}}, + {MD, 0xff00000000000000, 0x6c00000000000000, 0x0, // MULTIPLY (long HFP) (MD R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MDR, 0xff00000000000000, 0x2c00000000000000, 0x0, // MULTIPLY (long HFP) (MDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {MXDB, 0xff00000000ff0000, 0xed00000000070000, 0xff000000, // MULTIPLY (long to extended BFP) (MXDB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MXDBR, 0xffff000000000000, 0xb307000000000000, 0xff0000000000, // MULTIPLY (long to extended BFP) (MXDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {MXD, 0xff00000000000000, 0x6700000000000000, 0x0, // MULTIPLY (long to extended HFP) (MXD R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MXDR, 0xff00000000000000, 0x2700000000000000, 0x0, // MULTIPLY (long to extended HFP) (MXDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {MEEB, 0xff00000000ff0000, 0xed00000000170000, 0xff000000, // MULTIPLY (short BFP) (MEEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MEEBR, 0xffff000000000000, 0xb317000000000000, 0xff0000000000, // MULTIPLY (short BFP) (MEEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {MEE, 0xff00000000ff0000, 0xed00000000370000, 0xff000000, // MULTIPLY (short HFP) (MEE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MEER, 0xffff000000000000, 0xb337000000000000, 0xff0000000000, // MULTIPLY (short HFP) (MEER R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {MDEB, 0xff00000000ff0000, 0xed000000000c0000, 0xff000000, // MULTIPLY (short to long BFP) (MDEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MDEBR, 0xffff000000000000, 0xb30c000000000000, 0xff0000000000, // MULTIPLY (short to long BFP) (MDEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {MDE, 0xff00000000000000, 0x7c00000000000000, 0x0, // MULTIPLY (short to long HFP) (MDE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MDER, 0xff00000000000000, 0x3c00000000000000, 0x0, // MULTIPLY (short to long HFP) (MDER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {ME, 0xff00000000000000, 0x7c00000000000000, 0x0, // MULTIPLY (short to long HFP) (ME R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MER, 0xff00000000000000, 0x3c00000000000000, 0x0, // MULTIPLY (short to long HFP) (MER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {MAY, 0xff00000000ff0000, 0xed000000003a0000, 0xf000000, // MULTIPLY & ADD UNNORMALIZED (long to ext. HFP) (MAY R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MAYR, 0xffff000000000000, 0xb33a000000000000, 0xf0000000000, // MULTIPLY & ADD UNNORMALIZED (long to ext. HFP) (MAYR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MADB, 0xff00000000ff0000, 0xed000000001e0000, 0xf000000, // MULTIPLY AND ADD (long BFP) (MADB R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MADBR, 0xffff000000000000, 0xb31e000000000000, 0xf0000000000, // MULTIPLY AND ADD (long BFP) (MADBR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MAD, 0xff00000000ff0000, 0xed000000003e0000, 0xf000000, // MULTIPLY AND ADD (long HFP) (MAD R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MADR, 0xffff000000000000, 0xb33e000000000000, 0xf0000000000, // MULTIPLY AND ADD (long HFP) (MADR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MAEB, 0xff00000000ff0000, 0xed000000000e0000, 0xf000000, // MULTIPLY AND ADD (short BFP) (MAEB R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MAEBR, 0xffff000000000000, 0xb30e000000000000, 0xf0000000000, // MULTIPLY AND ADD (short BFP) (MAEBR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MAE, 0xff00000000ff0000, 0xed000000002e0000, 0xf000000, // MULTIPLY AND ADD (short HFP) (MAE R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MAER, 0xffff000000000000, 0xb32e000000000000, 0xf0000000000, // MULTIPLY AND ADD (short HFP) (MAER R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MAYH, 0xff00000000ff0000, 0xed000000003c0000, 0xf000000, // MULTIPLY AND ADD UNNRM. (long to ext. high HFP) (MAYH R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MAYHR, 0xffff000000000000, 0xb33c000000000000, 0xf0000000000, // MULTIPLY AND ADD UNNRM. (long to ext. high HFP) (MAYHR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MAYL, 0xff00000000ff0000, 0xed00000000380000, 0xf000000, // MULTIPLY AND ADD UNNRM. (long to ext. low HFP) (MAYL R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MAYLR, 0xffff000000000000, 0xb338000000000000, 0xf0000000000, // MULTIPLY AND ADD UNNRM. (long to ext. low HFP) (MAYLR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MSDB, 0xff00000000ff0000, 0xed000000001f0000, 0xf000000, // MULTIPLY AND SUBTRACT (long BFP) (MSDB R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MSDBR, 0xffff000000000000, 0xb31f000000000000, 0xf0000000000, // MULTIPLY AND SUBTRACT (long BFP) (MSDBR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MSD, 0xff00000000ff0000, 0xed000000003f0000, 0xf000000, // MULTIPLY AND SUBTRACT (long HFP) (MSD R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MSDR, 0xffff000000000000, 0xb33f000000000000, 0xf0000000000, // MULTIPLY AND SUBTRACT (long HFP) (MSDR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MSEB, 0xff00000000ff0000, 0xed000000000f0000, 0xf000000, // MULTIPLY AND SUBTRACT (short BFP) (MSEB R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MSEBR, 0xffff000000000000, 0xb30f000000000000, 0xf0000000000, // MULTIPLY AND SUBTRACT (short BFP) (MSEBR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MSE, 0xff00000000ff0000, 0xed000000002f0000, 0xf000000, // MULTIPLY AND SUBTRACT (short HFP) (MSE R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MSER, 0xffff000000000000, 0xb32f000000000000, 0xf0000000000, // MULTIPLY AND SUBTRACT (short HFP) (MSER R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MP, 0xff00000000000000, 0xfc00000000000000, 0x0, // MULTIPLY DECIMAL (MP D1(L1,B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}}, + {MH, 0xff00000000000000, 0x4c00000000000000, 0x0, // MULTIPLY HALFWORD (32←16) (MH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MHY, 0xff00000000ff0000, 0xe3000000007c0000, 0x0, // MULTIPLY HALFWORD (32←16) (MHY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MGH, 0xff00000000ff0000, 0xe3000000003c0000, 0x0, // MULTIPLY HALFWORD (64→16) (MGH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MHI, 0xff0f000000000000, 0xa70c000000000000, 0x0, // MULTIPLY HALFWORD IMMEDIATE (32→16) (MHI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {MGHI, 0xff0f000000000000, 0xa70d000000000000, 0x0, // MULTIPLY HALFWORD IMMEDIATE (64→16) (MGHI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {MLG, 0xff00000000ff0000, 0xe300000000860000, 0x0, // MULTIPLY LOGICAL (128→64) (MLG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MLGR, 0xffff000000000000, 0xb986000000000000, 0xff0000000000, // MULTIPLY LOGICAL (128→64) (MLGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {ML, 0xff00000000ff0000, 0xe300000000960000, 0x0, // MULTIPLY LOGICAL (64←32) (ML R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MLR, 0xffff000000000000, 0xb996000000000000, 0xff0000000000, // MULTIPLY LOGICAL (64←32) (MLR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {MS, 0xff00000000000000, 0x7100000000000000, 0x0, // MULTIPLY SINGLE (32) (MS R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MSC, 0xff00000000ff0000, 0xe300000000530000, 0x0, // MULTIPLY SINGLE (32) (MSC R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MSR, 0xffff000000000000, 0xb252000000000000, 0xff0000000000, // MULTIPLY SINGLE (32) (MSR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {MSRKC, 0xffff000000000000, 0xb9fd000000000000, 0xf0000000000, // MULTIPLY SINGLE (32) (MSRKC R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {MSY, 0xff00000000ff0000, 0xe300000000510000, 0x0, // MULTIPLY SINGLE (32) (MSY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MSG, 0xff00000000ff0000, 0xe3000000000c0000, 0x0, // MULTIPLY SINGLE (64) (MSG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MSGC, 0xff00000000ff0000, 0xe300000000830000, 0x0, // MULTIPLY SINGLE (64) (MSGC R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MSGR, 0xffff000000000000, 0xb90c000000000000, 0xff0000000000, // MULTIPLY SINGLE (64) (MSGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {MSGRKC, 0xffff000000000000, 0xb9ed000000000000, 0xf0000000000, // MULTIPLY SINGLE (64) (MSGRKC R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {MSGF, 0xff00000000ff0000, 0xe3000000001c0000, 0x0, // MULTIPLY SINGLE (64←32) (MSGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MSGFR, 0xffff000000000000, 0xb91c000000000000, 0xff0000000000, // MULTIPLY SINGLE (64←32) (MSGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {MSFI, 0xff0f000000000000, 0xc201000000000000, 0x0, // MULTIPLY SINGLE IMMEDIATE (32) (MSFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {MSGFI, 0xff0f000000000000, 0xc200000000000000, 0x0, // MULTIPLY SINGLE IMMEDIATE (64←32) (MSGFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {MYH, 0xff00000000ff0000, 0xed000000003d0000, 0xf000000, // MULTIPLY UNNORM. (long to ext. high HFP) (MYH R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MYHR, 0xffff000000000000, 0xb33d000000000000, 0xf0000000000, // MULTIPLY UNNORM. (long to ext. high HFP) (MYHR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MYL, 0xff00000000ff0000, 0xed00000000390000, 0xf000000, // MULTIPLY UNNORM. (long to ext. low HFP) (MYL R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MYLR, 0xffff000000000000, 0xb339000000000000, 0xf0000000000, // MULTIPLY UNNORM. (long to ext. low HFP) (MYLR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {MY, 0xff00000000ff0000, 0xed000000003b0000, 0xf000000, // MULTIPLY UNNORMALIZED (long to ext. HFP) (MY R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {MYR, 0xffff000000000000, 0xb33b000000000000, 0xf0000000000, // MULTIPLY UNNORMALIZED (long to ext. HFP) (MYR R1,R3,R2) + [8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}}, + {NNRK, 0xffff000000000000, 0xb974000000000000, 0xf0000000000, // NAND (32) (NNRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {NNGRK, 0xffff000000000000, 0xb964000000000000, 0xf0000000000, // NAND (64) (NNGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {NNPA, 0xffff000000000000, 0xb93b000000000000, 0xffff00000000, // NEURAL NETWORK PROCESSING ASSIST (NNPA) + [8]*argField{}}, + {NIAI, 0xffff000000000000, 0xb2fa000000000000, 0xff0000000000, // NEXT INSTRUCTION ACCESS INTENT (NIAI I1,I2) + [8]*argField{ap_ImmUnsigned_24_27, ap_ImmUnsigned_28_31}}, + {NTSTG, 0xff00000000ff0000, 0xe300000000250000, 0x0, // NONTRANSACTIONAL STORE (64) (NTSTG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {NORK, 0xffff000000000000, 0xb976000000000000, 0xf0000000000, // NOR (32) (NORK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {NOGRK, 0xffff000000000000, 0xb966000000000000, 0xf0000000000, // NOR (64) (NOGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {NXRK, 0xffff000000000000, 0xb977000000000000, 0xf0000000000, // NOT EXCLUSIVE OR (32) (NXRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {NXGRK, 0xffff000000000000, 0xb967000000000000, 0xf0000000000, // NOT EXCLUSIVE OR (64) (NXGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {O, 0xff00000000000000, 0x5600000000000000, 0x0, // OR (32) (O R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {OR, 0xff00000000000000, 0x1600000000000000, 0x0, // OR (32) (OR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {ORK, 0xffff000000000000, 0xb9f6000000000000, 0xf0000000000, // OR (32) (ORK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {OY, 0xff00000000ff0000, 0xe300000000560000, 0x0, // OR (32) (OY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {OG, 0xff00000000ff0000, 0xe300000000810000, 0x0, // OR (64) (OG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {OGR, 0xffff000000000000, 0xb981000000000000, 0xff0000000000, // OR (64) (OGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {OGRK, 0xffff000000000000, 0xb9e6000000000000, 0xf0000000000, // OR (64) (OGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {OC, 0xff00000000000000, 0xd600000000000000, 0x0, // OR (character) (OC D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {OI, 0xff00000000000000, 0x9600000000000000, 0x0, // OR (immediate) (OI D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {OIY, 0xff00000000ff0000, 0xeb00000000560000, 0x0, // OR (immediate) (OIY D1(B1),I2) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {OIHH, 0xff0f000000000000, 0xa508000000000000, 0x0, // OR IMMEDIATE (high high) (OIHH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {OIHL, 0xff0f000000000000, 0xa509000000000000, 0x0, // OR IMMEDIATE (high low) (OIHL R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {OIHF, 0xff0f000000000000, 0xc00c000000000000, 0x0, // OR IMMEDIATE (high) (OIHF R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {OILH, 0xff0f000000000000, 0xa50a000000000000, 0x0, // OR IMMEDIATE (low high) (OILH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {OILL, 0xff0f000000000000, 0xa50b000000000000, 0x0, // OR IMMEDIATE (low low) (OILL R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {OILF, 0xff0f000000000000, 0xc00d000000000000, 0x0, // OR IMMEDIATE (low) (OILF R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {OCRK, 0xffff000000000000, 0xb975000000000000, 0xf0000000000, // OR WITH COMPLEMENT (32) (OCRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {OCGRK, 0xffff000000000000, 0xb965000000000000, 0xf0000000000, // OR WITH COMPLEMENT (64) (OCGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {PACK, 0xff00000000000000, 0xf200000000000000, 0x0, // PACK (PACK D1(L1,B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}}, + {PKA, 0xff00000000000000, 0xe900000000000000, 0x0, // PACK ASCII (PKA D1(B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_8_15, ap_BaseReg_32_35}}, + {PKU, 0xff00000000000000, 0xe100000000000000, 0x0, // PACK UNICODE (PKU D1(B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_8_15, ap_BaseReg_32_35}}, + {PGIN, 0xffff000000000000, 0xb22e000000000000, 0xff0000000000, // PAGE IN (PGIN R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {PGOUT, 0xffff000000000000, 0xb22f000000000000, 0xff0000000000, // PAGE OUT (PGOUT R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {PCC, 0xffff000000000000, 0xb92c000000000000, 0xffff00000000, // PERFORM CRYPTOGRAPHIC COMPUTATION (PCC) + [8]*argField{}}, + {PCKMO, 0xffff000000000000, 0xb928000000000000, 0xffff00000000, // PERFORM CRYPTOGRAPHIC KEY MGMT. OPERATIONS (PCKMO) + [8]*argField{}}, + {PFPO, 0xffff000000000000, 0x10a000000000000, 0x0, // PERFORM FLOATING-POINT OPERATION (PFPO) + [8]*argField{}}, + {PFMF, 0xffff000000000000, 0xb9af000000000000, 0xff0000000000, // PERFORM FRAME MANAGEMENT FUNCTION (PFMF R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {PLO, 0xff00000000000000, 0xee00000000000000, 0x0, // PERFORM LOCKED OPERATION (PLO R1,D2(B2),R3,D4(B4)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Reg_12_15, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {PPA, 0xffff000000000000, 0xb2e8000000000000, 0xf0000000000, // PERFORM PROCESSOR ASSIST (PPA R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {PRNO, 0xffff000000000000, 0xb93c000000000000, 0xff0000000000, // PERFORM RANDOM NUMBER OPERATION (PRNO R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {PTFF, 0xffff000000000000, 0x104000000000000, 0x0, // PERFORM TIMING FACILITY FUNCTION (PTFF) + [8]*argField{}}, + {PTF, 0xffff000000000000, 0xb9a2000000000000, 0xff0f00000000, // PERFORM TOPOLOGY FUNCTION (PTF R1) + [8]*argField{ap_Reg_24_27}}, + {POPCNT, 0xffff000000000000, 0xb9e1000000000000, 0xf0000000000, // POPULATION COUNT (POPCNT R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {PFD, 0xff00000000ff0000, 0xe300000000360000, 0x0, // PREFETCH DATA (PFD M1,D2(X2,B2)) + [8]*argField{ap_Mask_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {PFDRL, 0xff0f000000000000, 0xc602000000000000, 0x0, // PREFETCH DATA RELATIVE LONG (PFDRL M1,RI2) + [8]*argField{ap_Mask_8_11, ap_RegImSigned32_16_47}}, + {PC, 0xffff000000000000, 0xb218000000000000, 0x0, // PROGRAM CALL (PC D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {PR, 0xffff000000000000, 0x101000000000000, 0x0, // PROGRAM RETURN (PR) + [8]*argField{}}, + {PT, 0xffff000000000000, 0xb228000000000000, 0xff0000000000, // PROGRAM TRANSFER (PT R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {PTI, 0xffff000000000000, 0xb99e000000000000, 0xff0000000000, // PROGRAM TRANSFER WITH INSTANCE (PTI R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {PALB, 0xffff000000000000, 0xb248000000000000, 0xffff00000000, // PURGE ALB (PALB) + [8]*argField{}}, + {PTLB, 0xffff000000000000, 0xb20d000000000000, 0xffff00000000, // PURGE TLB (PTLB) + [8]*argField{}}, + {QAXTR, 0xffff000000000000, 0xb3fd000000000000, 0x0, // QUANTIZE (extended DFP) (QAXTR R1,R3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {QADTR, 0xffff000000000000, 0xb3f5000000000000, 0x0, // QUANTIZE (long DFP) (QADTR R1,R3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_FPReg_28_31, ap_Mask_20_23}}, + {QPACI, 0xffff000000000000, 0xb28f000000000000, 0x0, // QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {RRXTR, 0xffff000000000000, 0xb3ff000000000000, 0x0, // REROUND (extended DFP) (RRXTR R1,R3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {RRDTR, 0xffff000000000000, 0xb3f7000000000000, 0x0, // REROUND (long DFP) (RRDTR R1,R3,R2,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {RCHP, 0xffff000000000000, 0xb23b000000000000, 0xffff00000000, // RESET CHANNEL PATH (RCHP) + [8]*argField{}}, + {RDP, 0xffff000000000000, 0xb98b000000000000, 0x0, // RESET DAT PROTECTION (RDP R1,R3,R2,M4) + [8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31, ap_Mask_20_23}}, + {RRBE, 0xffff000000000000, 0xb22a000000000000, 0xff0000000000, // RESET REFERENCE BIT EXTENDED (RRBE R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {RRBM, 0xffff000000000000, 0xb9ae000000000000, 0xff0000000000, // RESET REFERENCE BITS MULTIPLE (RRBM R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {RP, 0xffff000000000000, 0xb277000000000000, 0x0, // RESUME PROGRAM (RP D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {RSCH, 0xffff000000000000, 0xb238000000000000, 0xffff00000000, // RESUME SUBCHANNEL (RSCH) + [8]*argField{}}, + {RLL, 0xff00000000ff0000, 0xeb000000001d0000, 0x0, // ROTATE LEFT SINGLE LOGICAL (32) (RLL R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {RLLG, 0xff00000000ff0000, 0xeb000000001c0000, 0x0, // ROTATE LEFT SINGLE LOGICAL (64) (RLLG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {RNSBG, 0xff00000000ff0000, 0xec00000000540000, 0x0, // ROTATE THEN AND SELECTED BITS (64) (RNSBG R1,R2,I3,I4,I5) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}}, + {RXSBG, 0xff00000000ff0000, 0xec00000000570000, 0x0, // ROTATETHENEXCLUSIVEORSELECT.BITS(64) (RXSBG R1,R2,I3,I4,I5) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}}, + {RISBG, 0xff00000000ff0000, 0xec00000000550000, 0x0, // ROTATE THEN INSERT SELECTED BITS (64) (RISBG R1,R2,I3,I4,I5) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}}, + {RISBGN, 0xff00000000ff0000, 0xec00000000590000, 0x0, // ROTATE THEN INSERT SELECTED BITS (64) (RISBGN R1,R2,I3,I4,I5) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}}, + {RISBHG, 0xff00000000ff0000, 0xec000000005d0000, 0x0, // ROTATE THEN INSERT SELECTED BITS HIGH(64) (RISBHG R1,R2,I3,I4,I5) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}}, + {RISBLG, 0xff00000000ff0000, 0xec00000000510000, 0x0, // ROTATE THEN INSERT SELECTED BITS LOW (64) (RISBLG R1,R2,I3,I4,I5) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}}, + {ROSBG, 0xff00000000ff0000, 0xec00000000560000, 0x0, // ROTATE THEN OR SELECTED BITS (64) (ROSBG R1,R2,I3,I4,I5) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}}, + {SRST, 0xffff000000000000, 0xb25e000000000000, 0xff0000000000, // SEARCH STRING (SRST R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {SRSTU, 0xffff000000000000, 0xb9be000000000000, 0xff0000000000, // SEARCH STRING UNICODE (SRSTU R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {SELR, 0xffff000000000000, 0xb9f0000000000000, 0x0, // SELECT (32) (SELR R1,R2,R3,M4) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19, ap_Mask_20_23}}, + {SELGR, 0xffff000000000000, 0xb9e3000000000000, 0x0, // SELECT (64) (SELGR R1,R2,R3,M4) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19, ap_Mask_20_23}}, + {SELFHR, 0xffff000000000000, 0xb9c0000000000000, 0x0, // SELECT HIGH (32) (SELFHR R1,R2,R3,M4) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19, ap_Mask_20_23}}, + {SAR, 0xffff000000000000, 0xb24e000000000000, 0xff0000000000, // SET ACCESS (SAR R1,R2) + [8]*argField{ap_ACReg_24_27, ap_Reg_28_31}}, + {SAL, 0xffff000000000000, 0xb237000000000000, 0xffff00000000, // SET ADDRESS LIMIT (SAL) + [8]*argField{}}, + {SAC, 0xffff000000000000, 0xb219000000000000, 0x0, // SET ADDRESS SPACE CONTROL (SAC D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SACF, 0xffff000000000000, 0xb279000000000000, 0x0, // SET ADDRESS SPACE CONTROL FAST (SACF D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SAM24, 0xffff000000000000, 0x10c000000000000, 0x0, // SET ADDRESSING MODE (24) (SAM24) + [8]*argField{}}, + {SAM31, 0xffff000000000000, 0x10d000000000000, 0x0, // SET ADDRESSING MODE (31) (SAM31) + [8]*argField{}}, + {SAM64, 0xffff000000000000, 0x10e000000000000, 0x0, // SET ADDRESSING MODE (64) (SAM64) + [8]*argField{}}, + {SRNM, 0xffff000000000000, 0xb299000000000000, 0x0, // SET BFP ROUNDING MODE (2 bit) (SRNM D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SRNMB, 0xffff000000000000, 0xb2b8000000000000, 0x0, // SET BFP ROUNDING MODE (3 bit) (SRNMB D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SCHM, 0xffff000000000000, 0xb23c000000000000, 0xffff00000000, // SET CHANNEL MONITOR (SCHM) + [8]*argField{}}, + {SCK, 0xffff000000000000, 0xb204000000000000, 0x0, // SET CLOCK (SCK D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SCKC, 0xffff000000000000, 0xb206000000000000, 0x0, // SET CLOCK COMPARATOR (SCKC D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SCKPF, 0xffff000000000000, 0x107000000000000, 0x0, // SET CLOCK PROGRAMMABLE FIELD (SCKPF) + [8]*argField{}}, + {SPT, 0xffff000000000000, 0xb208000000000000, 0x0, // SET CPU TIMER (SPT D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SRNMT, 0xffff000000000000, 0xb2b9000000000000, 0x0, // SET DFP ROUNDING MODE (SRNMT D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SFPC, 0xffff000000000000, 0xb384000000000000, 0xff0f00000000, // SET FPC (SFPC R1) + [8]*argField{ap_Reg_24_27}}, + {SFASR, 0xffff000000000000, 0xb385000000000000, 0xff0f00000000, // SET FPC AND SIGNAL (SFASR R1) + [8]*argField{ap_Reg_24_27}}, + {SPX, 0xffff000000000000, 0xb210000000000000, 0x0, // SET PREFIX (SPX D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SPM, 0xff00000000000000, 0x400000000000000, 0xf000000000000, // SET PROGRAM MASK (SPM R1) + [8]*argField{ap_Reg_8_11}}, + {SPKA, 0xffff000000000000, 0xb20a000000000000, 0x0, // SET PSW KEY FROM ADDRESS (SPKA D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SSAR, 0xffff000000000000, 0xb225000000000000, 0xff0f00000000, // SET SECONDARY ASN (SSAR R1) + [8]*argField{ap_Reg_24_27}}, + {SSAIR, 0xffff000000000000, 0xb99f000000000000, 0xff0f00000000, // SET SECONDARY ASN WITH INSTANCE (SSAIR R1) + [8]*argField{ap_Reg_24_27}}, + {SSKE, 0xffff000000000000, 0xb22b000000000000, 0xf0000000000, // SET STORAGE KEY EXTENDED (SSKE R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {SSM, 0xff00000000000000, 0x8000000000000000, 0x0, // SET SYSTEM MASK (SSM D1(B1)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SRP, 0xff00000000000000, 0xf000000000000000, 0x0, // SHIFT AND ROUND DECIMAL (SRP D1(L1,B1),D2(B2),I3) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_ImmUnsigned_12_15}}, + {SLDA, 0xff00000000000000, 0x8f00000000000000, 0xf000000000000, // SHIFT LEFT DOUBLE (64) (SLDA R1,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SLDL, 0xff00000000000000, 0x8d00000000000000, 0xf000000000000, // SHIFT LEFT DOUBLE LOGICAL (64) (SLDL R1,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SLA, 0xff00000000000000, 0x8b00000000000000, 0xf000000000000, // SHIFT LEFT SINGLE (32) (SLA R1,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SLAK, 0xff00000000ff0000, 0xeb00000000dd0000, 0x0, // SHIFT LEFT SINGLE (32) (SLAK R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {SLAG, 0xff00000000ff0000, 0xeb000000000b0000, 0x0, // SHIFT LEFT SINGLE (64) (SLAG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {SLL, 0xff00000000000000, 0x8900000000000000, 0xf000000000000, // SHIFT LEFT SINGLE LOGICAL (32) (SLL R1,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SLLK, 0xff00000000ff0000, 0xeb00000000df0000, 0x0, // SHIFT LEFT SINGLE LOGICAL (32) (SLLK R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {SLLG, 0xff00000000ff0000, 0xeb000000000d0000, 0x0, // SHIFT LEFT SINGLE LOGICAL (64) (SLLG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {SRDA, 0xff00000000000000, 0x8e00000000000000, 0xf000000000000, // SHIFT RIGHT DOUBLE (64) (SRDA R1,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SRDL, 0xff00000000000000, 0x8c00000000000000, 0xf000000000000, // SHIFT RIGHT DOUBLE LOGICAL (64) (SRDL R1,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SRA, 0xff00000000000000, 0x8a00000000000000, 0xf000000000000, // SHIFT RIGHT SINGLE (32) (SRA R1,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SRAK, 0xff00000000ff0000, 0xeb00000000dc0000, 0x0, // SHIFT RIGHT SINGLE (32) (SRAK R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {SRAG, 0xff00000000ff0000, 0xeb000000000a0000, 0x0, // SHIFT RIGHT SINGLE (64) (SRAG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {SRL, 0xff00000000000000, 0x8800000000000000, 0xf000000000000, // SHIFT RIGHT SINGLE LOGICAL (32) (SRL R1,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SRLK, 0xff00000000ff0000, 0xeb00000000de0000, 0x0, // SHIFT RIGHT SINGLE LOGICAL (32) (SRLK R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {SRLG, 0xff00000000ff0000, 0xeb000000000c0000, 0x0, // SHIFT RIGHT SINGLE LOGICAL (64) (SRLG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {SLXT, 0xff00000000ff0000, 0xed00000000480000, 0xf000000, // SHIFT SIGNIFICAND LEFT (extended DFP) (SLXT R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SLDT, 0xff00000000ff0000, 0xed00000000400000, 0xf000000, // SHIFT SIGNIFICAND LEFT (long DFP) (SLDT R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SRXT, 0xff00000000ff0000, 0xed00000000490000, 0xf000000, // SHIFT SIGNIFICAND RIGHT (extended DFP) (SRXT R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SRDT, 0xff00000000ff0000, 0xed00000000410000, 0xf000000, // SHIFT SIGNIFICAND RIGHT (long DFP) (SRDT R1,R3,D2(X2,B2)) + [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SIGP, 0xff00000000000000, 0xae00000000000000, 0x0, // SIGNAL PROCESSOR (SIGP R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {SORTL, 0xffff000000000000, 0xb938000000000000, 0xff0000000000, // SORT LISTS (SORTL R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {SQXBR, 0xffff000000000000, 0xb316000000000000, 0xff0000000000, // SQUARE ROOT (extended BFP) (SQXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {SQXR, 0xffff000000000000, 0xb336000000000000, 0xff0000000000, // SQUARE ROOT (extended HFP) (SQXR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {SQDB, 0xff00000000ff0000, 0xed00000000150000, 0xff000000, // SQUARE ROOT (long BFP) (SQDB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SQDBR, 0xffff000000000000, 0xb315000000000000, 0xff0000000000, // SQUARE ROOT (long BFP) (SQDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {SQD, 0xff00000000ff0000, 0xed00000000350000, 0xff000000, // SQUARE ROOT (long HFP) (SQD R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SQDR, 0xffff000000000000, 0xb244000000000000, 0xff0000000000, // SQUARE ROOT (long HFP) (SQDR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {SQEB, 0xff00000000ff0000, 0xed00000000140000, 0xff000000, // SQUARE ROOT (short BFP) (SQEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SQEBR, 0xffff000000000000, 0xb314000000000000, 0xff0000000000, // SQUARE ROOT (short BFP) (SQEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {SQE, 0xff00000000ff0000, 0xed00000000340000, 0xff000000, // SQUARE ROOT (short HFP) (SQE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SQER, 0xffff000000000000, 0xb245000000000000, 0xff0000000000, // SQUARE ROOT (short HFP) (SQER R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {SSCH, 0xffff000000000000, 0xb233000000000000, 0x0, // START SUBCHANNEL (SSCH D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {ST, 0xff00000000000000, 0x5000000000000000, 0x0, // STORE (32) (ST R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STY, 0xff00000000ff0000, 0xe300000000500000, 0x0, // STORE (32) (STY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STG, 0xff00000000ff0000, 0xe300000000240000, 0x0, // STORE (64) (STG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STD, 0xff00000000000000, 0x6000000000000000, 0x0, // STORE (long) (STD R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STDY, 0xff00000000ff0000, 0xed00000000670000, 0x0, // STORE (long) (STDY R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STE, 0xff00000000000000, 0x7000000000000000, 0x0, // STORE (short) (STE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STEY, 0xff00000000ff0000, 0xed00000000660000, 0x0, // STORE (short) (STEY R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STAM, 0xff00000000000000, 0x9b00000000000000, 0x0, // STORE ACCESS MULTIPLE 7-389 (STAM R1,R3,D2(B2)) + [8]*argField{ap_ACReg_8_11, ap_ACReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STAMY, 0xff00000000ff0000, 0xeb000000009b0000, 0x0, // STORE ACCESS MULTIPLE 7-389 (STAMY R1,R3,D2(B2)) + [8]*argField{ap_ACReg_8_11, ap_ACReg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {STBEAR, 0xffff000000000000, 0xb201000000000000, 0x0, // STORE BEAR (STBEAR D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STCPS, 0xffff000000000000, 0xb23a000000000000, 0x0, // STORE CHANNEL PATH STATUS (STCPS D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STCRW, 0xffff000000000000, 0xb239000000000000, 0x0, // STORE CHANNEL REPORT WORD (STCRW D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STC, 0xff00000000000000, 0x4200000000000000, 0x0, // STORE CHARACTER (STC R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STCY, 0xff00000000ff0000, 0xe300000000720000, 0x0, // STORE CHARACTER (STCY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STCH, 0xff00000000ff0000, 0xe300000000c30000, 0x0, // STORE CHARACTER HIGH (8) (STCH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STCMH, 0xff00000000ff0000, 0xeb000000002c0000, 0x0, // STORE CHARACTERS UNDER MASK (high) (STCMH R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {STCM, 0xff00000000000000, 0xbe00000000000000, 0x0, // STORE CHARACTERS UNDER MASK (low) (STCM R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STCMY, 0xff00000000ff0000, 0xeb000000002d0000, 0x0, // STORE CHARACTERS UNDER MASK (low) (STCMY R1,M3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {STCK, 0xffff000000000000, 0xb205000000000000, 0x0, // STORE CLOCK (STCK D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STCKC, 0xffff000000000000, 0xb207000000000000, 0x0, // STORE CLOCK COMPARATOR (STCKC D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STCKE, 0xffff000000000000, 0xb278000000000000, 0x0, // STORE CLOCK EXTENDED (STCKE D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STCKF, 0xffff000000000000, 0xb27c000000000000, 0x0, // STORE CLOCK FAST (STCKF D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STCTL, 0xff00000000000000, 0xb600000000000000, 0x0, // STORE CONTROL (32) (STCTL R1,R3,D2(B2)) + [8]*argField{ap_CReg_8_11, ap_CReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STCTG, 0xff00000000ff0000, 0xeb00000000250000, 0x0, // STORE CONTROL (64) (STCTG R1,R3,D2(B2)) + [8]*argField{ap_CReg_8_11, ap_CReg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {STAP, 0xffff000000000000, 0xb212000000000000, 0x0, // STORE CPU ADDRESS (STAP D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STIDP, 0xffff000000000000, 0xb202000000000000, 0x0, // STORE CPU ID (STIDP D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STPT, 0xffff000000000000, 0xb209000000000000, 0x0, // STORE CPU TIMER (STPT D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STFL, 0xffff000000000000, 0xb2b1000000000000, 0x0, // STORE FACILITY LIST (STFL D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STFLE, 0xffff000000000000, 0xb2b0000000000000, 0x0, // STORE FACILITY LIST EXTENDED (STFLE D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STFPC, 0xffff000000000000, 0xb29c000000000000, 0x0, // STORE FPC (STFPC D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STGSC, 0xff00000000ff0000, 0xe300000000490000, 0x0, // STORE GUARDED STORAGE CONTROLS (STGSC R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STH, 0xff00000000000000, 0x4000000000000000, 0x0, // STORE HALFWORD (16) (STH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STHY, 0xff00000000ff0000, 0xe300000000700000, 0x0, // STORE HALFWORD (16) (STHY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STHH, 0xff00000000ff0000, 0xe300000000c70000, 0x0, // STORE HALFWORD HIGH (16) (STHH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STHRL, 0xff0f000000000000, 0xc407000000000000, 0x0, // STORE HALFWORD RELATIVE LONG (16) (STHRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {STFH, 0xff00000000ff0000, 0xe300000000cb0000, 0x0, // STORE HIGH (32) (STFH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STOCFH, 0xff00000000ff0000, 0xeb00000000e10000, 0x0, // STORE HIGH ON CONDITION (STOCFH R1,D2(B2),M3) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}}, + {STM, 0xff00000000000000, 0x9000000000000000, 0x0, // STORE MULTIPLE (32) (STM R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STMY, 0xff00000000ff0000, 0xeb00000000900000, 0x0, // STORE MULTIPLE (32) (STMY R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {STMG, 0xff00000000ff0000, 0xeb00000000240000, 0x0, // STORE MULTIPLE (64) (STMG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {STMH, 0xff00000000ff0000, 0xeb00000000260000, 0x0, // STORE MULTIPLE HIGH (32) (STMH R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {STOC, 0xff00000000ff0000, 0xeb00000000f30000, 0x0, // STORE ON CONDITION (32) (STOC R1,D2(B2),M3) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}}, + {STOCG, 0xff00000000ff0000, 0xeb00000000e30000, 0x0, // STORE ON CONDITION (64) (STOCG R1,D2(B2),M3) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}}, + {STPQ, 0xff00000000ff0000, 0xe3000000008e0000, 0x0, // STORE PAIR TO QUADWORD (STPQ R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STPX, 0xffff000000000000, 0xb211000000000000, 0x0, // STORE PREFIX (STPX D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STRAG, 0xffff000000000000, 0xe502000000000000, 0x0, // STORE REAL ADDRESS (STRAG D1(B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {STRL, 0xff0f000000000000, 0xc40f000000000000, 0x0, // STORE RELATIVE LONG (32) (STRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {STGRL, 0xff0f000000000000, 0xc40b000000000000, 0x0, // STORE RELATIVE LONG (64) (STGRL R1,RI2) + [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, + {STRVH, 0xff00000000ff0000, 0xe3000000003f0000, 0x0, // STORE REVERSED (16) (STRVH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STRV, 0xff00000000ff0000, 0xe3000000003e0000, 0x0, // STORE REVERSED (32) (STRV R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STRVG, 0xff00000000ff0000, 0xe3000000002f0000, 0x0, // STORE REVERSED (64) (STRVG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {STSCH, 0xffff000000000000, 0xb234000000000000, 0x0, // STORE SUBCHANNEL (STSCH D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STSI, 0xffff000000000000, 0xb27d000000000000, 0x0, // STORE SYSTEM INFORMATION (STSI D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {STNSM, 0xff00000000000000, 0xac00000000000000, 0x0, // STORE THEN AND SYSTEM MASK (STNSM D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {STOSM, 0xff00000000000000, 0xad00000000000000, 0x0, // STORE THEN OR SYSTEM MASK (STOSM D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {STURA, 0xffff000000000000, 0xb246000000000000, 0xff0000000000, // STORE USING REAL ADDRESS (32) (STURA R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {STURG, 0xffff000000000000, 0xb925000000000000, 0xff0000000000, // STORE USING REAL ADDRESS (64) (STURG R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {S, 0xff00000000000000, 0x5b00000000000000, 0x0, // SUBTRACT (32) (S R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SR, 0xff00000000000000, 0x1b00000000000000, 0x0, // SUBTRACT (32) (SR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {SRK, 0xffff000000000000, 0xb9f9000000000000, 0xf0000000000, // SUBTRACT (32) (SRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {SY, 0xff00000000ff0000, 0xe3000000005b0000, 0x0, // SUBTRACT (32) (SY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SG, 0xff00000000ff0000, 0xe300000000090000, 0x0, // SUBTRACT (64) (SG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SGR, 0xffff000000000000, 0xb909000000000000, 0xff0000000000, // SUBTRACT (64) (SGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {SGRK, 0xffff000000000000, 0xb9e9000000000000, 0xf0000000000, // SUBTRACT (64) (SGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {SGF, 0xff00000000ff0000, 0xe300000000190000, 0x0, // SUBTRACT (64←32) (SGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SGFR, 0xffff000000000000, 0xb919000000000000, 0xff0000000000, // SUBTRACT (64→32) (SGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {SXBR, 0xffff000000000000, 0xb34b000000000000, 0xff0000000000, // SUBTRACT (extended BFP) (SXBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {SXTR, 0xffff000000000000, 0xb3db000000000000, 0xf0000000000, // SUBTRACT (extended DFP) (SXTR R1,R2,R3) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}}, + {SXTRA, 0xffff000000000000, 0xb3db000000000000, 0x0, // SUBTRACT (extended DFP) (SXTRA R1,R2,R3,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}}, + {SDB, 0xff00000000ff0000, 0xed000000001b0000, 0xff000000, // SUBTRACT (long BFP) (SDB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SDBR, 0xffff000000000000, 0xb31b000000000000, 0xff0000000000, // SUBTRACT (long BFP) (SDBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {SDTR, 0xffff000000000000, 0xb3d3000000000000, 0xf0000000000, // SUBTRACT (long DFP) (SDTR R1,R2,R3) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}}, + {SDTRA, 0xffff000000000000, 0xb3d3000000000000, 0x0, // SUBTRACT (long DFP) (SDTRA R1,R2,R3,M4) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}}, + {SEB, 0xff00000000ff0000, 0xed000000000b0000, 0xff000000, // SUBTRACT (short BFP) (SEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SEBR, 0xffff000000000000, 0xb30b000000000000, 0xff0000000000, // SUBTRACT (short BFP) (SEBR R1,R2) + [8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}}, + {SP, 0xff00000000000000, 0xfb00000000000000, 0x0, // SUBTRACT DECIMAL (SP D1(L1,B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}}, + {SH, 0xff00000000000000, 0x4b00000000000000, 0x0, // SUBTRACT HALFWORD (32←16) (SH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SHY, 0xff00000000ff0000, 0xe3000000007b0000, 0x0, // SUBTRACT HALFWORD (32→16) (SHY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SGH, 0xff00000000ff0000, 0xe300000000390000, 0x0, // SUBTRACT HALFWORD (64→16) (SGH R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SHHHR, 0xffff000000000000, 0xb9c9000000000000, 0xf0000000000, // SUBTRACT HIGH (32) (SHHHR R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {SHHLR, 0xffff000000000000, 0xb9d9000000000000, 0xf0000000000, // SUBTRACT HIGH (32) (SHHLR R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {SL, 0xff00000000000000, 0x5f00000000000000, 0x0, // SUBTRACT LOGICAL (32) (SL R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SLR, 0xff00000000000000, 0x1f00000000000000, 0x0, // SUBTRACT LOGICAL (32) (SLR R1,R2) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15}}, + {SLRK, 0xffff000000000000, 0xb9fb000000000000, 0xf0000000000, // SUBTRACT LOGICAL (32) (SLRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {SLY, 0xff00000000ff0000, 0xe3000000005f0000, 0x0, // SUBTRACT LOGICAL (32) (SLY R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SLG, 0xff00000000ff0000, 0xe3000000000b0000, 0x0, // SUBTRACT LOGICAL (64) (SLG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SLGR, 0xffff000000000000, 0xb90b000000000000, 0xff0000000000, // SUBTRACT LOGICAL (64) (SLGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {SLGRK, 0xffff000000000000, 0xb9eb000000000000, 0xf0000000000, // SUBTRACT LOGICAL (64) (SLGRK R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {SLGF, 0xff00000000ff0000, 0xe3000000001b0000, 0x0, // SUBTRACT LOGICAL (64←32) (SLGF R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SLGFR, 0xffff000000000000, 0xb91b000000000000, 0xff0000000000, // SUBTRACT LOGICAL (64←32) (SLGFR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {SLHHHR, 0xffff000000000000, 0xb9cb000000000000, 0xf0000000000, // SUBTRACT LOGICAL HIGH (32) (SLHHHR R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {SLHHLR, 0xffff000000000000, 0xb9db000000000000, 0xf0000000000, // SUBTRACT LOGICAL HIGH (32) (SLHHLR R1,R2,R3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, + {SLFI, 0xff0f000000000000, 0xc205000000000000, 0x0, // SUBTRACT LOGICAL IMMEDIATE (32) (SLFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {SLGFI, 0xff0f000000000000, 0xc204000000000000, 0x0, // SUBTRACT LOGICAL IMMEDIATE (64→32) (SLGFI R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + {SLB, 0xff00000000ff0000, 0xe300000000990000, 0x0, // SUBTRACT LOGICAL WITH BORROW (32) (SLB R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SLBR, 0xffff000000000000, 0xb999000000000000, 0xff0000000000, // SUBTRACT LOGICAL WITH BORROW (32) (SLBR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {SLBG, 0xff00000000ff0000, 0xe300000000890000, 0x0, // SUBTRACT LOGICAL WITH BORROW (64) (SLBG R1,D2(X2,B2)) + [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SLBGR, 0xffff000000000000, 0xb989000000000000, 0xff0000000000, // SUBTRACT LOGICAL WITH BORROW (64) (SLBGR R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {SXR, 0xff00000000000000, 0x3700000000000000, 0x0, // SUBTRACT NORMALIZED (extended HFP) (SXR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {SD, 0xff00000000000000, 0x6b00000000000000, 0x0, // SUBTRACT NORMALIZED (long HFP) (SD R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SDR, 0xff00000000000000, 0x2b00000000000000, 0x0, // SUBTRACT NORMALIZED (long HFP) (SDR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {SE, 0xff00000000000000, 0x7b00000000000000, 0x0, // SUBTRACT NORMALIZED (short HFP) (SE R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SER, 0xff00000000000000, 0x3b00000000000000, 0x0, // SUBTRACT NORMALIZED (short HFP) (SER R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {SW, 0xff00000000000000, 0x6f00000000000000, 0x0, // SUBTRACT UNNORMALIZED (long HFP) (SW R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SWR, 0xff00000000000000, 0x2f00000000000000, 0x0, // SUBTRACT UNNORMALIZED (long HFP) (SWR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {SU, 0xff00000000000000, 0x7f00000000000000, 0x0, // SUBTRACT UNNORMALIZED (short HFP) (SU R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {SUR, 0xff00000000000000, 0x3f00000000000000, 0x0, // SUBTRACT UNNORMALIZED (short HFP) (SUR R1,R2) + [8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}}, + {SVC, 0xff00000000000000, 0xa00000000000000, 0x0, // SUPERVISOR CALL (SVC I) + [8]*argField{ap_ImmUnsigned_8_15}}, + {TAR, 0xffff000000000000, 0xb24c000000000000, 0xff0000000000, // TEST ACCESS (TAR R1,R2) + [8]*argField{ap_ACReg_24_27, ap_Reg_28_31}}, + {TAM, 0xffff000000000000, 0x10b000000000000, 0x0, // TEST ADDRESSING MODE (TAM) + [8]*argField{}}, + {TS, 0xff00000000000000, 0x9300000000000000, 0x0, // TEST AND SET (TS D1(B1)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {TB, 0xffff000000000000, 0xb22c000000000000, 0xff0000000000, // TEST BLOCK (TB R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {TCXB, 0xff00000000ff0000, 0xed00000000120000, 0xff000000, // TEST DATA CLASS (extended BFP) (TCXB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {TDCXT, 0xff00000000ff0000, 0xed00000000580000, 0xff000000, // TEST DATA CLASS (extended DFP) (TDCXT R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {TCDB, 0xff00000000ff0000, 0xed00000000110000, 0xff000000, // TEST DATA CLASS (long BFP) (TCDB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {TDCDT, 0xff00000000ff0000, 0xed00000000540000, 0xff000000, // TEST DATA CLASS (long DFP) (TDCDT R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {TCEB, 0xff00000000ff0000, 0xed00000000100000, 0xff000000, // TEST DATA CLASS (short BFP) (TCEB R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {TDCET, 0xff00000000ff0000, 0xed00000000500000, 0xff000000, // TEST DATA CLASS (short DFP) (TDCET R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {TDGXT, 0xff00000000ff0000, 0xed00000000590000, 0xff000000, // TEST DATA GROUP (extended DFP) (TDGXT R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {TDGDT, 0xff00000000ff0000, 0xed00000000550000, 0xff000000, // TEST DATA GROUP (long DFP) (TDGDT R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {TDGET, 0xff00000000ff0000, 0xed00000000510000, 0xff000000, // TEST DATA GROUP (short DFP) (TDGET R1,D2(X2,B2)) + [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, + {TP, 0xff00000000ff0000, 0xeb00000000c00000, 0xf0000ff000000, // TEST DECIMAL (TP D1(L1,B1)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19}}, + {TPEI, 0xffff000000000000, 0xb9a1000000000000, 0xff0000000000, // TEST PENDING EXTERNAL INTERRUPTION (TPEI R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {TPI, 0xffff000000000000, 0xb236000000000000, 0x0, // TEST PENDING INTERRUPTION (TPI D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {TPROT, 0xffff000000000000, 0xe501000000000000, 0x0, // TEST PROTECTION (TPROT D1(B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {TSCH, 0xffff000000000000, 0xb235000000000000, 0x0, // TEST SUBCHANNEL (TSCH D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {TM, 0xff00000000000000, 0x9100000000000000, 0x0, // TEST UNDER MASK (TM D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {TMY, 0xff00000000ff0000, 0xeb00000000510000, 0x0, // TEST UNDER MASK (TMY D1(B1),I2) + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + {TMHH, 0xff0f000000000000, 0xa702000000000000, 0x0, // TEST UNDER MASK (high high) (TMHH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {TMHL, 0xff0f000000000000, 0xa703000000000000, 0x0, // TEST UNDER MASK (high low) (TMHL R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {TMLH, 0xff0f000000000000, 0xa700000000000000, 0x0, // TEST UNDER MASK (low high) (TMLH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {TMLL, 0xff0f000000000000, 0xa701000000000000, 0x0, // TEST UNDER MASK (low low) (TMLL R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {TMH, 0xff0f000000000000, 0xa700000000000000, 0x0, // TEST UNDER MASK HIGH (TMH R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {TML, 0xff0f000000000000, 0xa701000000000000, 0x0, // TEST UNDER MASK LOW (TML R1,I2) + [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + {TRACE, 0xff00000000000000, 0x9900000000000000, 0x0, // TRACE (32) (TRACE R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {TRACG, 0xff00000000ff0000, 0xeb000000000f0000, 0x0, // TRACE (64) (TRACG R1,R3,D2(B2)) + [8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}}, + {TABORT, 0xffff000000000000, 0xb2fc000000000000, 0x0, // TRANSACTION ABORT (TABORT D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {TBEGINC, 0xffff000000000000, 0xe561000000000000, 0x0, // TRANSACTION BEGIN (constrained) (TBEGINC D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}}, + {TBEGIN, 0xffff000000000000, 0xe560000000000000, 0x0, // TRANSACTION BEGIN (nonconstrained) (TBEGIN D1(B1),I2) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}}, + {TEND, 0xffff000000000000, 0xb2f8000000000000, 0xffff00000000, // TRANSACTION END (TEND) + [8]*argField{}}, + {TR, 0xff00000000000000, 0xdc00000000000000, 0x0, // TRANSLATE (TR D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {TRT, 0xff00000000000000, 0xdd00000000000000, 0x0, // TRANSLATE AND TEST (TRT D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {TRTE, 0xffff000000000000, 0xb9bf000000000000, 0xf0000000000, // TRANSLATE AND TEST EXTENDED (TRTE R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {TRTR, 0xff00000000000000, 0xd000000000000000, 0x0, // TRANSLATE AND TEST REVERSE (TRTR D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {TRTRE, 0xffff000000000000, 0xb9bd000000000000, 0xf0000000000, // TRANSLATE AND TEST REVERSE EXTENDED (TRTRE R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {TRE, 0xffff000000000000, 0xb2a5000000000000, 0xff0000000000, // TRANSLATE EXTENDED (TRE R1,R2) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, + {TROO, 0xffff000000000000, 0xb993000000000000, 0xf0000000000, // TRANSLATE ONE TO ONE (TROO R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {TROT, 0xffff000000000000, 0xb992000000000000, 0xf0000000000, // TRANSLATE ONE TO TWO (TROT R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {TRTO, 0xffff000000000000, 0xb991000000000000, 0xf0000000000, // TRANSLATE TWO TO ONE (TRTO R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {TRTT, 0xffff000000000000, 0xb990000000000000, 0xf0000000000, // TRANSLATE TWO TO TWO (TRTT R1,R2,M3) + [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, + {TRAP2, 0xffff000000000000, 0x1ff000000000000, 0x0, // TRAP (TRAP2) + [8]*argField{}}, + {TRAP4, 0xffff000000000000, 0xb2ff000000000000, 0x0, // TRAP (TRAP4 D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}}, + {UNPK, 0xff00000000000000, 0xf300000000000000, 0x0, // UNPACK (UNPK D1(L1,B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}}, + {UNPKA, 0xff00000000000000, 0xea00000000000000, 0x0, // UNPACK ASCII (UNPKA D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {UNPKU, 0xff00000000000000, 0xe200000000000000, 0x0, // UNPACK UNICODE (UNPKU D1(L1,B1),D2(B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, + {UPT, 0xffff000000000000, 0x102000000000000, 0x0, // UPDATE TREE (UPT) + [8]*argField{}}, + {VA, 0xff00000000ff0000, 0xe700000000f30000, 0xfff00000000, // VECTOR ADD (VA V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VACC, 0xff00000000ff0000, 0xe700000000f10000, 0xfff00000000, // VECTOR ADD COMPUTE CARRY (VACC V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VAP, 0xff00000000ff0000, 0xe600000000710000, 0xf0000000000, // VECTOR ADD DECIMAL (VAP V1,V2,V3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VAC, 0xff00000000ff0000, 0xe700000000bb0000, 0xff00000000, // VECTOR ADD WITH CARRY (VAC V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VACCC, 0xff00000000ff0000, 0xe700000000b90000, 0xff00000000, // VECTOR ADD WITH CARRY COMPUTE CARRY (VACCC V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VN, 0xff00000000ff0000, 0xe700000000680000, 0xffff0000000, // VECTOR AND (VN V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VNC, 0xff00000000ff0000, 0xe700000000690000, 0xffff0000000, // VECTOR AND WITH COMPLEMENT (VNC V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VAVG, 0xff00000000ff0000, 0xe700000000f20000, 0xfff00000000, // VECTOR AVERAGE (VAVG V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VAVGL, 0xff00000000ff0000, 0xe700000000f00000, 0xfff00000000, // VECTOR AVERAGE LOGICAL (VAVGL V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VBPERM, 0xff00000000ff0000, 0xe700000000850000, 0xffff0000000, // VECTOR BIT PERMUTE (VBPERM V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VCKSM, 0xff00000000ff0000, 0xe700000000660000, 0xffff0000000, // VECTOR CHECKSUM (VCKSM V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VCP, 0xff00000000ff0000, 0xe600000000770000, 0xf00f0ff0000000, // VECTOR COMPARE DECIMAL (VCP V1,V2,M3) + [8]*argField{ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCEQ, 0xff00000000ff0000, 0xe700000000f80000, 0xf0f00000000, // VECTOR COMPARE EQUAL (VCEQ V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCH, 0xff00000000ff0000, 0xe700000000fb0000, 0xf0f00000000, // VECTOR COMPARE HIGH (VCH V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCHL, 0xff00000000ff0000, 0xe700000000f90000, 0xf0f00000000, // VECTOR COMPARE HIGH LOGICAL (VCHL V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCSPH, 0xff00000000ff0000, 0xe6000000007d0000, 0xf0ff0000000, // VECTOR CONVERT HFP TO SCALED DECIMAL (VCSPH V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCVB, 0xff00000000ff0000, 0xe600000000500000, 0xff00f0000000, // VECTOR CONVERT TO BINARY (VCVB R1,V2,M3,M4) + [8]*argField{ap_Reg_8_11, ap_VecReg_12_15, ap_Mask_24_27, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VCVBG, 0xff00000000ff0000, 0xe600000000520000, 0xff00f0000000, // VECTOR CONVERT TO BINARY (VCVBG R1,V2,M3,M4) + [8]*argField{ap_Reg_8_11, ap_VecReg_12_15, ap_Mask_24_27, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VCVD, 0xff00000000ff0000, 0xe600000000580000, 0xff0000000000, // VECTOR CONVERT TO DECIMAL (VCVD V1,R2,I3,M4) + [8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCVDG, 0xff00000000ff0000, 0xe6000000005a0000, 0xff0000000000, // VECTOR CONVERT TO DECIMAL (VCVDG V1,R2,I3,M4) + [8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCLZDP, 0xff00000000ff0000, 0xe600000000510000, 0xff0ff0000000, // VECTOR COUNT LEADING ZERO DIGITS (VCLZDP V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCLZ, 0xff00000000ff0000, 0xe700000000530000, 0xffff00000000, // VECTOR COUNT LEADING ZEROS (VCLZ V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VCTZ, 0xff00000000ff0000, 0xe700000000520000, 0xffff00000000, // VECTOR COUNT TRAILING ZEROS (VCTZ V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VDP, 0xff00000000ff0000, 0xe6000000007a0000, 0xf0000000000, // VECTOR DIVIDE DECIMAL (VDP V1,V2,V3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VEC, 0xff00000000ff0000, 0xe700000000db0000, 0xffff00000000, // VECTOR ELEMENT COMPARE (VEC V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VECL, 0xff00000000ff0000, 0xe700000000d90000, 0xffff00000000, // VECTOR ELEMENT COMPARE LOGICAL (VECL V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VERIM, 0xff00000000ff0000, 0xe700000000720000, 0xf0000000000, // VECTORELEMENTROTATEANDINSERTUNDER MASK (VERIM V1,V2,V3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_24_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VERLL, 0xff00000000ff0000, 0xe700000000330000, 0x0, // VECTOR ELEMENT ROTATE LEFT LOGICAL (VERLL V1,V3,D2(B2),M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VERLLV, 0xff00000000ff0000, 0xe700000000730000, 0xfff00000000, // VECTOR ELEMENT ROTATE LEFT LOGICAL (VERLLV V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VESLV, 0xff00000000ff0000, 0xe700000000700000, 0xfff00000000, // VECTOR ELEMENT SHIFT LEFT (VESLV V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VESL, 0xff00000000ff0000, 0xe700000000300000, 0x0, // VECTOR ELEMENT SHIFT LEFT (VESL V1,V3,D2(B2),M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VESRA, 0xff00000000ff0000, 0xe7000000003a0000, 0x0, // VECTOR ELEMENT SHIFT RIGHT ARITHMETIC (VESRA V1,V3,D2(B2),M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VESRAV, 0xff00000000ff0000, 0xe7000000007a0000, 0xfff00000000, // VECTOR ELEMENT SHIFT RIGHT ARITHMETIC (VESRAV V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VESRL, 0xff00000000ff0000, 0xe700000000380000, 0x0, // VECTOR ELEMENT SHIFT RIGHT LOGICAL (VESRL V1,V3,D2(B2),M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VESRLV, 0xff00000000ff0000, 0xe700000000780000, 0xfff00000000, // VECTOR ELEMENT SHIFT RIGHT LOGICAL (VESRLV V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VX, 0xff00000000ff0000, 0xe7000000006d0000, 0xffff0000000, // VECTOR EXCLUSIVE OR (VX V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VFAE, 0xff00000000ff0000, 0xe700000000820000, 0xf0f00000000, // VECTOR FIND ANY ELEMENT EQUAL (VFAE V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VFEE, 0xff00000000ff0000, 0xe700000000800000, 0xf0f00000000, // VECTOR FIND ELEMENT EQUAL (VFEE V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VFENE, 0xff00000000ff0000, 0xe700000000810000, 0xf0f00000000, // VECTOR FIND ELEMENT NOT EQUAL (VFENE V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VFA, 0xff00000000ff0000, 0xe700000000e30000, 0xff000000000, // VECTOR FP ADD (VFA V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {WFK, 0xff00000000ff0000, 0xe700000000ca0000, 0xfff000000000, // VECTOR FP COMPARE AND SIGNAL SCALAR (WFK V1,V2,M3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VFCE, 0xff00000000ff0000, 0xe700000000e80000, 0xf0000000000, // VECTOR FP COMPARE EQUAL (VFCE V1,V2,V3,M4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VFCH, 0xff00000000ff0000, 0xe700000000eb0000, 0xf0000000000, // VECTOR FP COMPARE HIGH (VFCH V1,V2,V3,M4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VFCHE, 0xff00000000ff0000, 0xe700000000ea0000, 0xf0000000000, // VECTOR FP COMPARE HIGH OR EQUAL (VFCHE V1,V2,V3,M4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {WFC, 0xff00000000ff0000, 0xe700000000cb0000, 0xfff000000000, // VECTOR FP COMPARE SCALAR (WFC V1,V2,M3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VCLFNH, 0xff00000000ff0000, 0xe600000000560000, 0xfff000000000, // VECTOR FP CONVERT AND LENGTHEN FROM NNP HIGH (VCLFNH V1,V2,M3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VCLFNL, 0xff00000000ff0000, 0xe6000000005e0000, 0xfff000000000, // VECTOR FP CONVERT AND LENGTHEN FROM NNP LOW (VCLFNL V1,V2,M3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VCRNF, 0xff00000000ff0000, 0xe600000000750000, 0xff000000000, // VECTOR FP CONVERT AND ROUND TO NNP (VCRNF V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VCFPS, 0xff00000000ff0000, 0xe700000000c30000, 0xff0000000000, // VECTOR FP CONVERT FROM FIXED (VCFPS V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCDG, 0xff00000000ff0000, 0xe700000000c30000, 0xff0000000000, // VECTOR FP CONVERT FROM FIXED 64-BIT (VCDG V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCFPL, 0xff00000000ff0000, 0xe700000000c10000, 0xff0000000000, // VECTOR FP CONVERT FROM LOGICAL (VCFPL V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCDLG, 0xff00000000ff0000, 0xe700000000c10000, 0xff0000000000, // VECTOR FP CONVERT FROM LOGICAL 64-BIT (VCDLG V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCFN, 0xff00000000ff0000, 0xe6000000005d0000, 0xfff000000000, // VECTOR FP CONVERT FROM NNP (VCFN V1,V2,M3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VCSFP, 0xff00000000ff0000, 0xe700000000c20000, 0xff0000000000, // VECTOR FP CONVERT TO FIXED (VCSFP V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCGD, 0xff00000000ff0000, 0xe700000000c20000, 0xff0000000000, // VECTOR FP CONVERT TO FIXED 64-BIT (VCGD V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCLFP, 0xff00000000ff0000, 0xe700000000c00000, 0xff0000000000, // VECTOR FP CONVERT TO LOGICAL (VCLFP V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCLGD, 0xff00000000ff0000, 0xe700000000c00000, 0xff0000000000, // VECTOR FP CONVERT TO LOGICAL 64-BIT (VCLGD V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VCNF, 0xff00000000ff0000, 0xe600000000550000, 0xfff000000000, // VECTOR FP CONVERT TO NNP (VCNF V1,V2,M3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VFD, 0xff00000000ff0000, 0xe700000000e50000, 0xff000000000, // VECTOR FP DIVIDE (VFD V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VFLL, 0xff00000000ff0000, 0xe700000000c40000, 0xfff000000000, // VECTOR FP LOAD LENGTHENED (VFLL V1,V2,M3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VFLR, 0xff00000000ff0000, 0xe700000000c50000, 0xff0000000000, // VECTOR FP LOAD ROUNDED (VFLR V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VFMAX, 0xff00000000ff0000, 0xe700000000ef0000, 0xf0000000000, // VECTOR FP MAXIMUM (VFMAX V1,V2,V3,M4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VFMIN, 0xff00000000ff0000, 0xe700000000ee0000, 0xf0000000000, // VECTOR FP MINIMUM (VFMIN V1,V2,V3,M4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VFM, 0xff00000000ff0000, 0xe700000000e70000, 0xff000000000, // VECTOR FP MULTIPLY (VFM V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VFMA, 0xff00000000ff0000, 0xe7000000008f0000, 0xf000000000, // VECTOR FP MULTIPLY AND ADD (VFMA V1,V2,V3,V4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_28_31, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VFMS, 0xff00000000ff0000, 0xe7000000008e0000, 0xf000000000, // VECTOR FP MULTIPLY AND SUBTRACT (VFMS V1,V2,V3,V4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_28_31, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VFNMA, 0xff00000000ff0000, 0xe7000000009f0000, 0xf000000000, // VECTOR FP NEGATIVE MULTIPLY AND ADD (VFNMA V1,V2,V3,V4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_28_31, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VFNMS, 0xff00000000ff0000, 0xe7000000009e0000, 0xf000000000, // VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT (VFNMS V1,V2,V3,V4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_28_31, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VFPSO, 0xff00000000ff0000, 0xe700000000cc0000, 0xff0000000000, // VECTOR FP PERFORM SIGN OPERATION (VFPSO V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VFSQ, 0xff00000000ff0000, 0xe700000000ce0000, 0xfff000000000, // VECTOR FP SQUARE ROOT (VFSQ V1,V2,M3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VFS, 0xff00000000ff0000, 0xe700000000e20000, 0xff000000000, // VECTOR FP SUBTRACT (VFS V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VFTCI, 0xff00000000ff0000, 0xe7000000004a0000, 0x0, // VECTOR FP TEST DATA CLASS IMMEDIATE (VFTCI V1,V2,I3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_16_27, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}}, + {VGFM, 0xff00000000ff0000, 0xe700000000b40000, 0xfff00000000, // VECTOR GALOIS FIELD MULTIPLY SUM (VGFM V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VGFMA, 0xff00000000ff0000, 0xe700000000bc0000, 0xff00000000, // VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE (VGFMA V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VGEF, 0xff00000000ff0000, 0xe700000000130000, 0x0, // VECTOR GATHER ELEMENT (32) (VGEF V1,D2(V2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_VecReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VGEG, 0xff00000000ff0000, 0xe700000000120000, 0x0, // VECTOR GATHER ELEMENT (64) (VGEG V1,D2(V2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_VecReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VGBM, 0xff00000000ff0000, 0xe700000000440000, 0xf0000f0000000, // VECTOR GENERATE BYTE MASK (VGBM V1,I2) + [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_ImmUnsigned_36_39}}, + {VGM, 0xff00000000ff0000, 0xe700000000460000, 0xf000000000000, // VECTOR GENERATE MASK (VGM V1,I2,I3,M4) + [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VISTR, 0xff00000000ff0000, 0xe7000000005c0000, 0xff0f00000000, // VECTOR ISOLATE STRING (VISTR V1,V2,M3,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VL, 0xff00000000ff0000, 0xe700000000060000, 0x0, // VECTOR LOAD (VL V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLR, 0xff00000000ff0000, 0xe700000000560000, 0xfffff0000000, // VECTOR LOAD (VLR V1,V2) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_36_39}}, + {VLREP, 0xff00000000ff0000, 0xe700000000050000, 0x0, // VECTOR LOAD AND REPLICATE (VLREP V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEBRH, 0xff00000000ff0000, 0xe600000000010000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENT (16) (VLEBRH V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEBRF, 0xff00000000ff0000, 0xe600000000030000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENT (32) (VLEBRF V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEBRG, 0xff00000000ff0000, 0xe600000000020000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENT (64) (VLEBRG V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLBRREP, 0xff00000000ff0000, 0xe600000000050000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE (VLBRREP V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLLEBRZ, 0xff00000000ff0000, 0xe600000000040000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO (VLLEBRZ V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLBR, 0xff00000000ff0000, 0xe600000000060000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENTS (VLBR V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLC, 0xff00000000ff0000, 0xe700000000de0000, 0xffff00000000, // VECTOR LOAD COMPLEMENT (VLC V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEH, 0xff00000000ff0000, 0xe700000000010000, 0x0, // VECTOR LOAD ELEMENT (16) (VLEH V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEF, 0xff00000000ff0000, 0xe700000000030000, 0x0, // VECTOR LOAD ELEMENT (32) (VLEF V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEG, 0xff00000000ff0000, 0xe700000000020000, 0x0, // VECTOR LOAD ELEMENT (64) (VLEG V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEB, 0xff00000000ff0000, 0xe700000000000000, 0x0, // VECTOR LOAD ELEMENT (8) (VLEB V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEIH, 0xff00000000ff0000, 0xe700000000410000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (16) (VLEIH V1,I2,M3) + [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEIF, 0xff00000000ff0000, 0xe700000000430000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (32) (VLEIF V1,I2,M3) + [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEIG, 0xff00000000ff0000, 0xe700000000420000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (64) (VLEIG V1,I2,M3) + [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLEIB, 0xff00000000ff0000, 0xe700000000400000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (8) (VLEIB V1,I2,M3) + [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLER, 0xff00000000ff0000, 0xe600000000070000, 0x0, // VECTOR LOAD ELEMENTS REVERSED (VLER V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VFI, 0xff00000000ff0000, 0xe700000000c70000, 0xff0000000000, // VECTOR LOAD FP INTEGER (VFI V1,V2,M3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VLGV, 0xff00000000ff0000, 0xe700000000210000, 0x0, // VECTOR LOAD GR FROM VR ELEMENT (VLGV R1,V3,D2(B2),M4) + [8]*argField{ap_Reg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLIP, 0xff00000000ff0000, 0xe600000000490000, 0xf000000000000, // VECTOR LOAD IMMEDIATE DECIMAL (VLIP V1,I2,I3) + [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_ImmUnsigned_32_35, ap_ImmUnsigned_36_39}}, + {VLLEZ, 0xff00000000ff0000, 0xe700000000040000, 0x0, // VECTOR LOAD LOGICAL ELEMENT AND ZERO (VLLEZ V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLM, 0xff00000000ff0000, 0xe700000000360000, 0x0, // VECTOR LOAD MULTIPLE (VLM V1,V3,D2(B2),M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLP, 0xff00000000ff0000, 0xe700000000df0000, 0xffff00000000, // VECTOR LOAD POSITIVE (VLP V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLRL, 0xff00000000ff0000, 0xe600000000350000, 0x0, // VECTOR LOAD RIGHTMOST WITH LENGTH (VLRL V1,D2(B2),I3) + [8]*argField{ap_VecReg_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15, ap_ImmUnsigned_36_39}}, + {VLRLR, 0xff00000000ff0000, 0xe600000000370000, 0xf0000000000000, // VECTOR LOAD RIGHTMOST WITH LENGTH (VLRLR V1,R3,D2(B2)) + [8]*argField{ap_VecReg_32_35, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_36_39}}, + {VLBB, 0xff00000000ff0000, 0xe700000000070000, 0x0, // VECTOR LOAD TO BLOCK BOUNDARY (VLBB V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLVG, 0xff00000000ff0000, 0xe700000000220000, 0x0, // VECTOR LOAD VR ELEMENT FROM GR (VLVG V1,R3,D2(B2),M4) + [8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VLVGP, 0xff00000000ff0000, 0xe700000000620000, 0xffff0000000, // VECTOR LOAD VR FROM GRS DISJOINT (VLVGP V1,R2,R3) + [8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_Reg_16_19, ap_ImmUnsigned_36_39}}, + {VLL, 0xff00000000ff0000, 0xe700000000370000, 0xf0000000, // VECTOR LOAD WITH LENGTH (VLL V1,R3,D2(B2)) + [8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_36_39}}, + {VMX, 0xff00000000ff0000, 0xe700000000ff0000, 0xfff00000000, // VECTOR MAXIMUM (VMX V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMXL, 0xff00000000ff0000, 0xe700000000fd0000, 0xfff00000000, // VECTOR MAXIMUM LOGICAL (VMXL V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMRH, 0xff00000000ff0000, 0xe700000000610000, 0xfff00000000, // VECTOR MERGE HIGH (VMRH V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMRL, 0xff00000000ff0000, 0xe700000000600000, 0xfff00000000, // VECTOR MERGE LOW (VMRL V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMN, 0xff00000000ff0000, 0xe700000000fe0000, 0xfff00000000, // VECTOR MINIMUM (VMN V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMNL, 0xff00000000ff0000, 0xe700000000fc0000, 0xfff00000000, // VECTOR MINIMUM LOGICAL (VMNL V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMAE, 0xff00000000ff0000, 0xe700000000ae0000, 0xff00000000, // VECTOR MULTIPLY AND ADD EVEN (VMAE V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VMAH, 0xff00000000ff0000, 0xe700000000ab0000, 0xff00000000, // VECTOR MULTIPLY AND ADD HIGH (VMAH V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VMALE, 0xff00000000ff0000, 0xe700000000ac0000, 0xff00000000, // VECTOR MULTIPLY AND ADD LOGICAL EVEN (VMALE V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VMALH, 0xff00000000ff0000, 0xe700000000a90000, 0xff00000000, // VECTOR MULTIPLY AND ADD LOGICAL HIGH (VMALH V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VMALO, 0xff00000000ff0000, 0xe700000000ad0000, 0xff00000000, // VECTOR MULTIPLY AND ADD LOGICAL ODD (VMALO V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VMAL, 0xff00000000ff0000, 0xe700000000aa0000, 0xff00000000, // VECTOR MULTIPLY AND ADD LOW (VMAL V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VMAO, 0xff00000000ff0000, 0xe700000000af0000, 0xff00000000, // VECTOR MULTIPLY AND ADD ODD (VMAO V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VMSP, 0xff00000000ff0000, 0xe600000000790000, 0xf0000000000, // VECTOR MULTIPLY AND SHIFT DECIMAL (VMSP V1,V2,V3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VMP, 0xff00000000ff0000, 0xe600000000780000, 0xf0000000000, // VECTOR MULTIPLY DECIMAL (VMP V1,V2,V3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VME, 0xff00000000ff0000, 0xe700000000a60000, 0xfff00000000, // VECTOR MULTIPLY EVEN (VME V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMH, 0xff00000000ff0000, 0xe700000000a30000, 0xfff00000000, // VECTOR MULTIPLY HIGH (VMH V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMLE, 0xff00000000ff0000, 0xe700000000a40000, 0xfff00000000, // VECTOR MULTIPLY LOGICAL EVEN (VMLE V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMLH, 0xff00000000ff0000, 0xe700000000a10000, 0xfff00000000, // VECTOR MULTIPLY LOGICAL HIGH (VMLH V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMLO, 0xff00000000ff0000, 0xe700000000a50000, 0xfff00000000, // VECTOR MULTIPLY LOGICAL ODD (VMLO V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VML, 0xff00000000ff0000, 0xe700000000a20000, 0xfff00000000, // VECTOR MULTIPLY LOW (VML V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMO, 0xff00000000ff0000, 0xe700000000a70000, 0xfff00000000, // VECTOR MULTIPLY ODD (VMO V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VMSL, 0xff00000000ff0000, 0xe700000000b80000, 0xf00000000, // VECTOR MULTIPLY SUM LOGICAL (VMSL V1,V2,V3,V4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VNN, 0xff00000000ff0000, 0xe7000000006e0000, 0xffff0000000, // VECTOR NAND (VNN V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VNO, 0xff00000000ff0000, 0xe7000000006b0000, 0xffff0000000, // VECTOR NOR (VNO V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VNX, 0xff00000000ff0000, 0xe7000000006c0000, 0xffff0000000, // VECTOR NOT EXCLUSIVE OR (VNX V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VO, 0xff00000000ff0000, 0xe7000000006a0000, 0xffff0000000, // VECTOR OR (VO V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VOC, 0xff00000000ff0000, 0xe7000000006f0000, 0xffff0000000, // VECTOR OR WITH COMPLEMENT (VOC V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VPK, 0xff00000000ff0000, 0xe700000000940000, 0xfff00000000, // VECTOR PACK (VPK V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VPKLS, 0xff00000000ff0000, 0xe700000000950000, 0xf0f00000000, // VECTOR PACK LOGICAL SATURATE (VPKLS V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VPKS, 0xff00000000ff0000, 0xe700000000970000, 0xf0f00000000, // VECTOR PACK SATURATE (VPKS V1,V2,V3,M4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VPKZ, 0xff00000000ff0000, 0xe600000000340000, 0x0, // VECTOR PACK ZONED (VPKZ V1,D2(B2),I3) + [8]*argField{ap_VecReg_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15, ap_ImmUnsigned_36_39}}, + {VPKZR, 0xff00000000ff0000, 0xe600000000700000, 0xf0000000000, // VECTOR PACK ZONED REGISTER (VPKZR V1,V2,V3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VPSOP, 0xff00000000ff0000, 0xe6000000005b0000, 0x0, // VECTOR PERFORM SIGN OPERATION DECIMAL (VPSOP V1,V2,I3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_28_35, ap_ImmUnsigned_16_23, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VPERM, 0xff00000000ff0000, 0xe7000000008c0000, 0xfff00000000, // VECTOR PERMUTE (VPERM V1,V2,V3,V4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_ImmUnsigned_36_39}}, + {VPDI, 0xff00000000ff0000, 0xe700000000840000, 0xfff00000000, // VECTOR PERMUTE DOUBLEWORD IMMEDIATE (VPDI V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VPOPCT, 0xff00000000ff0000, 0xe700000000500000, 0xffff00000000, // VECTOR POPULATION COUNT (VPOPCT V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VRP, 0xff00000000ff0000, 0xe6000000007b0000, 0xf0000000000, // VECTOR REMAINDER DECIMAL (VRP V1,V2,V3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VREP, 0xff00000000ff0000, 0xe7000000004d0000, 0x0, // VECTOR REPLICATE (VREP V1,V3,I2,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VREPI, 0xff00000000ff0000, 0xe700000000450000, 0xf000000000000, // VECTOR REPLICATE IMMEDIATE (VREPI V1,I2,M3) + [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSCEF, 0xff00000000ff0000, 0xe7000000001b0000, 0x0, // VECTOR SCATTER ELEMENT (32) (VSCEF V1,D2(V2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_VecReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSCEG, 0xff00000000ff0000, 0xe7000000001a0000, 0x0, // VECTOR SCATTER ELEMENT (64) (VSCEG V1,D2(V2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_VecReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSEL, 0xff00000000ff0000, 0xe7000000008d0000, 0xfff00000000, // VECTOR SELECT (VSEL V1,V2,V3,V4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_ImmUnsigned_36_39}}, + {VSDP, 0xff00000000ff0000, 0xe6000000007e0000, 0xf0000000000, // VECTOR SHIFT AND DIVIDE DECIMAL (VSDP V1,V2,V3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VSRP, 0xff00000000ff0000, 0xe600000000590000, 0x0, // VECTOR SHIFT AND ROUND DECIMAL (VSRP V1,V2,I3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_28_35, ap_ImmUnsigned_16_23, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VSRPR, 0xff00000000ff0000, 0xe600000000720000, 0xf0000000000, // VECTOR SHIFT AND ROUND DECIMAL REGISTER (VSRPR V1,V2,V3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VSL, 0xff00000000ff0000, 0xe700000000740000, 0xffff0000000, // VECTOR SHIFT LEFT (VSL V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VSLB, 0xff00000000ff0000, 0xe700000000750000, 0xffff0000000, // VECTOR SHIFT LEFT BY BYTE (VSLB V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VSLD, 0xff00000000ff0000, 0xe700000000860000, 0xf00f0000000, // VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD V1,V2,V3,I4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_24_31, ap_ImmUnsigned_36_39}}, + {VSLDB, 0xff00000000ff0000, 0xe700000000770000, 0xf00f0000000, // VECTOR SHIFT LEFT DOUBLE BY BYTE (VSLDB V1,V2,V3,I4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_24_31, ap_ImmUnsigned_36_39}}, + {VSRA, 0xff00000000ff0000, 0xe7000000007e0000, 0xffff0000000, // VECTOR SHIFT RIGHT ARITHMETIC (VSRA V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VSRAB, 0xff00000000ff0000, 0xe7000000007f0000, 0xffff0000000, // VECTOR SHIFT RIGHT ARITHMETIC BY BYTE (VSRAB V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VSRD, 0xff00000000ff0000, 0xe700000000870000, 0xf00f0000000, // VECTOR SHIFT RIGHT DOUBLE BY BIT (VSRD V1,V2,V3,I4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_24_31, ap_ImmUnsigned_36_39}}, + {VSRL, 0xff00000000ff0000, 0xe7000000007c0000, 0xffff0000000, // VECTOR SHIFT RIGHT LOGICAL (VSRL V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VSRLB, 0xff00000000ff0000, 0xe7000000007d0000, 0xffff0000000, // VECTOR SHIFT RIGHT LOGICAL BY BYTE (VSRLB V1,V2,V3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}}, + {VSEG, 0xff00000000ff0000, 0xe7000000005f0000, 0xffff00000000, // VECTOR SIGN EXTEND TO DOUBLEWORD (VSEG V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VST, 0xff00000000ff0000, 0xe7000000000e0000, 0x0, // VECTOR STORE (VST V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTEBRH, 0xff00000000ff0000, 0xe600000000090000, 0x0, // VECTOR STORE BYTE REVERSED ELEMENT(16) (VSTEBRH V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTEBRF, 0xff00000000ff0000, 0xe6000000000b0000, 0x0, // VECTOR STORE BYTE REVERSED ELEMENT(32) (VSTEBRF V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTEBRG, 0xff00000000ff0000, 0xe6000000000a0000, 0x0, // VECTOR STORE BYTE REVERSED ELEMENT(64) (VSTEBRG V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTBR, 0xff00000000ff0000, 0xe6000000000e0000, 0x0, // VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTEH, 0xff00000000ff0000, 0xe700000000090000, 0x0, // VECTOR STORE ELEMENT (16) (VSTEH V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTEF, 0xff00000000ff0000, 0xe7000000000b0000, 0x0, // VECTOR STORE ELEMENT (32) (VSTEF V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTEG, 0xff00000000ff0000, 0xe7000000000a0000, 0x0, // VECTOR STORE ELEMENT (64) (VSTEG V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTEB, 0xff00000000ff0000, 0xe700000000080000, 0x0, // VECTOR STORE ELEMENT (8) (VSTEB V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTER, 0xff00000000ff0000, 0xe6000000000f0000, 0x0, // VECTOR STORE ELEMENTS REVERSED (VSTER V1,D2(X2,B2),M3) + [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTM, 0xff00000000ff0000, 0xe7000000003e0000, 0x0, // VECTOR STORE MULTIPLE (VSTM V1,V3,D2(B2),M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSTRL, 0xff00000000ff0000, 0xe6000000003d0000, 0x0, // VECTOR STORE RIGHTMOST WITH LENGTH (VSTRL V1,D2(B2),I3) + [8]*argField{ap_VecReg_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15, ap_ImmUnsigned_36_39}}, + {VSTRLR, 0xff00000000ff0000, 0xe6000000003f0000, 0xf0000000000000, // VECTOR STORE RIGHTMOST WITH LENGTH (VSTRLR V1,R3,D2(B2)) + [8]*argField{ap_VecReg_32_35, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_36_39}}, + {VSTL, 0xff00000000ff0000, 0xe7000000003f0000, 0xf0000000, // VECTOR STORE WITH LENGTH (VSTL V1,R3,D2(B2)) + [8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_36_39}}, + {VSTRC, 0xff00000000ff0000, 0xe7000000008a0000, 0xf00000000, // VECTOR STRING RANGE COMPARE (VSTRC V1,V2,V3,V4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VSTRS, 0xff00000000ff0000, 0xe7000000008b0000, 0xf00000000, // VECTOR STRING SEARCH (VSTRS V1,V2,V3,V4,M5,M6) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VS, 0xff00000000ff0000, 0xe700000000f70000, 0xfff00000000, // VECTOR SUBTRACT (VS V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSCBI, 0xff00000000ff0000, 0xe700000000f50000, 0xfff00000000, // VECTOR SUBTRACT COMPUTE BORROW INDICATION (VSCBI V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSP, 0xff00000000ff0000, 0xe600000000730000, 0xf0000000000, // VECTOR SUBTRACT DECIMAL (VSP V1,V2,V3,I4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VSBCBI, 0xff00000000ff0000, 0xe700000000bd0000, 0xff00000000, // VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION (VSBCBI V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VSBI, 0xff00000000ff0000, 0xe700000000bf0000, 0xff00000000, // VECTOR SUBTRACT WITH BORROW INDICATION (VSBI V1,V2,V3,V4,M5) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}}, + {VSUMG, 0xff00000000ff0000, 0xe700000000650000, 0xfff00000000, // VECTOR SUM ACROSS DOUBLEWORD (VSUMG V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSUMQ, 0xff00000000ff0000, 0xe700000000670000, 0xfff00000000, // VECTOR SUM ACROSS QUADWORD (VSUMQ V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VSUM, 0xff00000000ff0000, 0xe700000000640000, 0xfff00000000, // VECTOR SUM ACROSS WORD (VSUM V1,V2,V3,M4) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VTP, 0xff00000000ff0000, 0xe6000000005f0000, 0xf0fffff0000000, // VECTOR TEST DECIMAL (VTP V1) + [8]*argField{ap_VecReg_12_15, ap_ImmUnsigned_36_39}}, + {VTM, 0xff00000000ff0000, 0xe700000000d80000, 0xfffff0000000, // VECTOR TEST UNDER MASK (VTM V1,V2) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_36_39}}, + {VUPH, 0xff00000000ff0000, 0xe700000000d70000, 0xffff00000000, // VECTOR UNPACK HIGH (VUPH V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VUPLH, 0xff00000000ff0000, 0xe700000000d50000, 0xffff00000000, // VECTOR UNPACK LOGICAL HIGH (VUPLH V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VUPLL, 0xff00000000ff0000, 0xe700000000d40000, 0xffff00000000, // VECTOR UNPACK LOGICAL LOW (VUPLL V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VUPL, 0xff00000000ff0000, 0xe700000000d60000, 0xffff00000000, // VECTOR UNPACK LOW (VUPL V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + {VUPKZ, 0xff00000000ff0000, 0xe6000000003c0000, 0x0, // VECTOR UNPACK ZONED (VUPKZ V1,D2(B2),I3) + [8]*argField{ap_VecReg_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15, ap_ImmUnsigned_36_39}}, + {VUPKZH, 0xff00000000ff0000, 0xe600000000540000, 0xff0ff0000000, // VECTOR UNPACK ZONED HIGH (VUPKZH V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {VUPKZL, 0xff00000000ff0000, 0xe6000000005c0000, 0xff0ff0000000, // VECTOR UNPACK ZONED LOW (VUPKZL V1,V2,M3) + [8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_24_27, ap_ImmUnsigned_36_39}}, + {ZAP, 0xff00000000000000, 0xf800000000000000, 0x0, // ZERO AND ADD (ZAP D1(L1,B1),D2(L2,B2)) + [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}}, +} diff --git a/s390x/s390xasm/testdata/decode_generated.txt b/s390x/s390xasm/testdata/decode_generated.txt new file mode 100644 index 00000000..b7b3f09f --- /dev/null +++ b/s390x/s390xasm/testdata/decode_generated.txt @@ -0,0 +1,1245 @@ + 5a82100b| gnu a %r8,11(%r2,%r1) + 1a80| gnu ar %r8,%r0 + b9f80080| gnu ark %r8,%r0,%r0 +e382100b005a| gnu ay %r8,11(%r2,%r1) +e382100b0008| gnu ag %r8,11(%r2,%r1) + b9080080| gnu agr %r8,%r0 + b9e80080| gnu agrk %r8,%r0,%r0 +e382100b0018| gnu agf %r8,11(%r2,%r1) + b9180080| gnu agfr %r8,%r0 + b34a0080| gnu axbr %f8,%f0 + b3da0080| gnu axtr %f8,%f0,%f0 + b3da0180| gnu axtra %f8,%f0,%f0,1 +ed82100b001a| gnu adb %f8,11(%r2,%r1) + b31a0080| gnu adbr %f8,%f0 + b3d20080| gnu adtr %f8,%f0,%f0 + b3d20180| gnu adtra %f8,%f0,%f0,1 +ed82100b000a| gnu aeb %f8,11(%r2,%r1) + b30a0080| gnu aebr %f8,%f0 +fa332006100b| gnu ap 6(4,%r2),11(4,%r1) + 4a82100b| gnu ah %r8,11(%r2,%r1) +e382100b007a| gnu ahy %r8,11(%r2,%r1) +e382100b0038| gnu agh %r8,11(%r2,%r1) + a78a0008| gnu ahi %r8,8 + a78b0008| gnu aghi %r8,8 + b9c80080| gnu ahhhr %r8,%r0,%r0 + b9d80080| gnu ahhlr %r8,%r0,%r0 +c28900000008| gnu afi %r8,8 +ec80000800d8| gnu ahik %r8,%r0,8 +eb082006006a| gnu asi 6(%r2),8 +ec80000800d9| gnu aghik %r8,%r0,8 +c28800000008| gnu agfi %r8,8 +eb082006007a| gnu agsi 6(%r2),8 +cc8800000008| gnu aih %r8,8 + 5e82100b| gnu al %r8,11(%r2,%r1) + 1e80| gnu alr %r8,%r0 + b9fa0080| gnu alrk %r8,%r0,%r0 +e382100b005e| gnu aly %r8,11(%r2,%r1) +e382100b000a| gnu alg %r8,11(%r2,%r1) + b90a0080| gnu algr %r8,%r0 + b9ea0080| gnu algrk %r8,%r0,%r0 +e382100b001a| gnu algf %r8,11(%r2,%r1) + b91a0080| gnu algfr %r8,%r0 + b9ca0080| gnu alhhhr %r8,%r0,%r0 + b9da0080| gnu alhhlr %r8,%r0,%r0 +c28b00000008| gnu alfi %r8,8 +c28a00000008| gnu algfi %r8,8 +e382100b0098| gnu alc %r8,11(%r2,%r1) + b9980080| gnu alcr %r8,%r0 +e382100b0088| gnu alcg %r8,11(%r2,%r1) + b9880080| gnu alcgr %r8,%r0 +ec80000800da| gnu alhsik %r8,%r0,8 +eb082006006e| gnu alsi 6(%r2),8 +ec80000800db| gnu alghsik %r8,%r0,8 +eb082006007e| gnu algsi 6(%r2),8 +cc8a00000008| gnu alsih %r8,8 +cc8b00000008| gnu alsihn %r8,8 + 3680| gnu axr %f8,%f0 + 6a82100b| gnu ad %f8,11(%r2,%r1) + 2a80| gnu adr %f8,%f0 + 7a82100b| gnu ae %f8,11(%r2,%r1) + 3a80| gnu aer %f8,%f0 + 6e82100b| gnu aw %f8,11(%r2,%r1) + 2e80| gnu awr %f8,%f0 + 7e82100b| gnu au %f8,11(%r2,%r1) + 3e80| gnu aur %f8,%f0 + 5482100b| gnu n %r8,11(%r2,%r1) + 1480| gnu nr %r8,%r0 + b9f40080| gnu nrk %r8,%r0,%r0 +e382100b0054| gnu ny %r8,11(%r2,%r1) +e382100b0080| gnu ng %r8,11(%r2,%r1) + b9800080| gnu ngr %r8,%r0 + b9e40080| gnu ngrk %r8,%r0,%r0 +d4032006100b| gnu nc 6(4,%r2),11(%r1) + 94082006| gnu ni 6(%r2),8 +eb0820060054| gnu niy 6(%r2),8 + a5840008| gnu nihh %r8,8 + a5850008| gnu nihl %r8,8 +c08a00000008| gnu nihf %r8,8 + a5860008| gnu nilh %r8,8 + a5870008| gnu nill %r8,8 +c08b00000008| gnu nilf %r8,8 + b9f50080| gnu ncrk %r8,%r0,%r0 + b9e50080| gnu ncgrk %r8,%r0,%r0 + 4582100b| gnu bal %r8,11(%r2,%r1) + 0580| gnu balr %r8,%r0 + 4d82100b| gnu bas %r8,11(%r2,%r1) + 0d80| gnu basr %r8,%r0 + 0c80| gnu bassm %r8,%r0 + b25a0080| gnu bsa %r8,%r0 + 0b80| gnu bsm %r8,%r0 + b2400080| gnu bakr %r8,%r0 + b2580080| gnu bsg %r8,%r0 +e372100b0047| gnu bine 11(%r2,%r1) + 4772100b| gnu bne 11(%r2,%r1) + 0770| gnu bner %r0 + 4682100b| gnu bct %r8,11(%r2,%r1) + 0680| gnu bctr %r8,%r0 +e382100b0046| gnu bctg %r8,11(%r2,%r1) + b9460080| gnu bctgr %r8,%r0 + 8680100b| gnu bxh %r8,%r0,11(%r1) +eb80100b0044| gnu bxhg %r8,%r0,11(%r1) + 8780100b| gnu bxle %r8,%r0,11(%r1) +eb80100b0045| gnu bxleg %r8,%r0,11(%r1) +c77060b60000| gnu bpp 7,0x1cc,182(%r6) +c57000000093| gnu bprp 7,0x1d2,0x2f8 + a7850000| gnu bras %r8,0x1d8 +c08500000000| gnu brasl %r8,0x1dc + a7740000| gnu jne 0x1e2 +c07400000000| gnu jgne 0x1e6 + a7860000| gnu brct %r8,0x1ec + a7870000| gnu brctg %r8,0x1f0 +cc8600000000| gnu brcth %r8,0x1f4 + 84800000| gnu brxh %r8,%r0,0x1fa +ec8000000044| gnu brxhg %r8,%r0,0x1fe + 85800000| gnu brxle %r8,%r0,0x204 +ec8000000045| gnu brxlg %r8,%r0,0x208 + b2760000| gnu xsch + b2410080| gnu cksm %r8,%r0 + b92e0080| gnu km %r8,%r0 + b9290080| gnu kma %r8,%r0,%r0 + b92f0080| gnu kmc %r8,%r0 + b92a0080| gnu kmf %r8,%r0 + b92d0080| gnu kmctr %r8,%r0,%r0 + b92b0080| gnu kmo %r8,%r0 + b2300000| gnu csch + 5982100b| gnu c %r8,11(%r2,%r1) + 1980| gnu cr %r8,%r0 +e382100b0059| gnu cy %r8,11(%r2,%r1) +e382100b0020| gnu cg %r8,11(%r2,%r1) + b9200080| gnu cgr %r8,%r0 +e382100b0030| gnu cgf %r8,11(%r2,%r1) + b9300080| gnu cgfr %r8,%r0 + b3490080| gnu cxbr %f8,%f0 + b3ec0080| gnu cxtr %f8,%f0 + b3690080| gnu cxr %f8,%f0 +ed82100b0019| gnu cdb %f8,11(%r2,%r1) + b3190080| gnu cdbr %f8,%f0 + b3e40080| gnu cdtr %f8,%f0 + 6982100b| gnu cd %f8,11(%r2,%r1) + 2980| gnu cdr %f8,%f0 +ed82100b0009| gnu ceb %f8,11(%r2,%r1) + b3090080| gnu cebr %f8,%f0 + 7982100b| gnu ce %f8,11(%r2,%r1) + 3980| gnu cer %f8,%f0 +ec8080cd30f6| gnu crb %r8,%r0,3,205(%r8) +ec8080cd30e4| gnu cgrb %r8,%r0,3,205(%r8) +ec80ffac3076| gnu crj %r8,%r0,3,0x1e6 +ec80ffac3064| gnu cgrj %r8,%r0,3,0x1ec + b21a100b| gnu cfc 11(%r1) + b98f0180| gnu crdte %r8,%r0,%r0,1 + b3480080| gnu kxbr %f8,%f0 + b3e80080| gnu kxtr %f8,%f0 +ed82100b0018| gnu kdb %f8,11(%r2,%r1) + b3180080| gnu kdbr %f8,%f0 + b3e00080| gnu kdtr %f8,%f0 +ed82100b0008| gnu keb %f8,11(%r2,%r1) + b3080080| gnu kebr %f8,%f0 + ba80100b| gnu cs %r8,%r0,11(%r1) +eb80100b0014| gnu csy %r8,%r0,11(%r1) +eb80100b0030| gnu csg %r8,%r0,11(%r1) + b2500080| gnu csp %r8,%r0 + b98a0080| gnu cspg %r8,%r0 +c8022006100b| gnu csst 6(%r2),11(%r1),%r0 + b9723080| gnu crt %r8,%r0,3 + b9603080| gnu cgrt %r8,%r0,3 + b3fc0080| gnu cextr %f8,%f0 + b3f40080| gnu cedtr %f8,%f0 +f9332006100b| gnu cp 6(4,%r2),11(4,%r1) + bb80100b| gnu cds %r8,%r0,11(%r1) +eb80100b0031| gnu cdsy %r8,%r0,11(%r1) +eb80100b003e| gnu cdsg %r8,%r0,11(%r1) + 4982100b| gnu ch %r8,11(%r2,%r1) +e382100b0079| gnu chy %r8,11(%r2,%r1) +e382100b0034| gnu cgh %r8,11(%r2,%r1) +e55420060008| gnu chhsi 6(%r2),8 + a78e0008| gnu chi %r8,8 +e55c20060008| gnu chsi 6(%r2),8 + a78f0008| gnu cghi %r8,8 +e55820060008| gnu cghsi 6(%r2),8 +c68500000000| gnu chrl %r8,0x330 +c68400000000| gnu cghrl %r8,0x336 +e382100b00cd| gnu chf %r8,11(%r2,%r1) + b9cd0080| gnu chhr %r8,%r0 + b9dd0080| gnu chlr %r8,%r0 +c28d00000008| gnu cfi %r8,8 +c28c00000008| gnu cgfi %r8,8 +ec8380cd08fe| gnu cib %r8,8,3,205(%r8) +ec8380cd08fc| gnu cgib %r8,8,3,205(%r8) +ec83ffac087e| gnu cij %r8,8,3,0x2ba +ec83ffac087c| gnu cgij %r8,8,3,0x2c0 +ec8000083072| gnu cit %r8,8,3 +ec8000083070| gnu cgit %r8,8,3 +cc8d00000008| gnu cih %r8,8 + 5582100b| gnu cl %r8,11(%r2,%r1) + 1580| gnu clr %r8,%r0 +e382100b0055| gnu cly %r8,11(%r2,%r1) +e382100b0021| gnu clg %r8,11(%r2,%r1) + b9210080| gnu clgr %r8,%r0 +e382100b0031| gnu clgf %r8,11(%r2,%r1) + b9310080| gnu clgfr %r8,%r0 +d5032006100b| gnu clc 6(4,%r2),11(%r1) + 95082006| gnu cli 6(%r2),8 +eb0820060055| gnu cliy 6(%r2),8 +ec8080cd30f7| gnu clrb %r8,%r0,3,205(%r8) +ec8080cd30e5| gnu clgrb %r8,%r0,3,205(%r8) +ec80ffac3077| gnu clrj %r8,%r0,3,0x314 +ec80ffac3065| gnu clgrj %r8,%r0,3,0x31a + b9733080| gnu clrt %r8,%r0,3 +eb83100b0023| gnu clt %r8,3,11(%r1) + b9613080| gnu clgrt %r8,%r0,3 +eb83100b002b| gnu clgt %r8,3,11(%r1) +eb83100b0020| gnu clmh %r8,3,11(%r1) + bd83100b| gnu clm %r8,3,11(%r1) +eb83100b0021| gnu clmy %r8,3,11(%r1) +e382100b00cf| gnu clhf %r8,11(%r2,%r1) + b9cf0080| gnu clhhr %r8,%r0 + b9df0080| gnu clhlr %r8,%r0 +e55520060008| gnu clhhsi 6(%r2),8 +c28f00000008| gnu clfi %r8,8 +e55d20060008| gnu clfhsi 6(%r2),8 +e55920060008| gnu clghsi 6(%r2),8 +c28e00000008| gnu clgfi %r8,8 +ec8380cd08ff| gnu clib %r8,8,3,205(%r8) +ec8380cd08fd| gnu clgib %r8,8,3,205(%r8) +ec83ffac087f| gnu clij %r8,8,3,0x37c +ec83ffac087d| gnu clgij %r8,8,3,0x382 +ec8000083073| gnu clfit %r8,8,3 +ec8000083071| gnu clgit %r8,8,3 +cc8f00000008| gnu clih %r8,8 + 0f80| gnu clcl %r8,%r0 + a980100b| gnu clcle %r8,%r0,11(%r1) +eb80100b008f| gnu clclu %r8,%r0,11(%r1) +c68f00000000| gnu clrl %r8,0x44e +c68700000000| gnu clhrl %r8,0x454 +c68a00000000| gnu clgrl %r8,0x45a +c68600000000| gnu clghrl %r8,0x460 +c68e00000000| gnu clgfrl %r8,0x466 + b25d0080| gnu clst %r8,%r0 +c68d00000000| gnu crl %r8,0x470 +c68800000000| gnu cgrl %r8,0x476 +c68c00000000| gnu cgfrl %r8,0x47c + b2570080| gnu cuse %r8,%r0 + b2630080| gnu cmpsc %r8,%r0 + b93a0080| gnu kdsa %r8,%r0 + b93e0080| gnu kimd %r8,%r0 + b93f0080| gnu klmd %r8,%r0 + b91e0080| gnu kmac %r8,%r0 + b3590080| gnu thdr %f8,%f0 + b3580080| gnu thder %f8,%f0 + b3960080| gnu cxfbr %f8,%r0 + b3963180| gnu cxfbra %f8,3,%r0,1 + b9593180| gnu cxftr %f8,3,%r0,1 + b3b60080| gnu cxfr %f8,%r0 + b3950080| gnu cdfbr %f8,%r0 + b3953180| gnu cdfbra %f8,3,%r0,1 + b9513180| gnu cdftr %f8,3,%r0,1 + b3b50080| gnu cdfr %f8,%r0 + b3940080| gnu cefbr %f8,%r0 + b3943180| gnu cefbra %f8,3,%r0,1 + b3b40080| gnu cefr %f8,%r0 + b3a60080| gnu cxgbr %f8,%r0 + b3a63180| gnu cxgbra %f8,3,%r0,1 + b3f90080| gnu cxgtr %f8,%r0 + b3f93180| gnu cxgtra %f8,3,%r0,1 + b3c60080| gnu cxgr %f8,%r0 + b3a50080| gnu cdgbr %f8,%r0 + b3a53180| gnu cdgbra %f8,3,%r0,1 + b3f10080| gnu cdgtr %f8,%r0 + b3f13180| gnu cdgtra %f8,3,%r0,1 + b3c50080| gnu cdgr %f8,%r0 + b3a40080| gnu cegbr %f8,%r0 + b3a43180| gnu cegbra %f8,3,%r0,1 + b3c40080| gnu cegr %f8,%r0 + b3923180| gnu cxlfbr %f8,3,%r0,1 + b95b3180| gnu cxlftr %f8,3,%r0,1 + b3913180| gnu cdlfbr %f8,3,%r0,1 + b9533180| gnu cdlftr %f8,3,%r0,1 + b3903180| gnu celfbr %f8,3,%r0,1 + b3a23180| gnu cxlgbr %f8,3,%r0,1 + b95a3180| gnu cxlgtr %f8,3,%r0,1 + b3a13180| gnu cdlgbr %f8,3,%r0,1 + b9523180| gnu cdlgtr %f8,3,%r0,1 + b3a03180| gnu celgbr %f8,3,%r0,1 +ed03100b83af| gnu cxpt %f8,11(4,%r1),3 +ed03100b83ae| gnu cdpt %f8,11(4,%r1),3 + b3fb0080| gnu cxstr %f8,%r0 + b3f30080| gnu cdstr %f8,%r0 + b3fa0080| gnu cxutr %f8,%r0 + b3f20080| gnu cdutr %f8,%r0 +ed03100b83ab| gnu cxzt %f8,11(4,%r1),3 +ed03100b83aa| gnu cdzt %f8,11(4,%r1),3 + b3503080| gnu tbedr %f8,3,%f0 + b3513080| gnu tbdr %f8,3,%f0 + 4f82100b| gnu cvb %r8,11(%r2,%r1) +e382100b0006| gnu cvby %r8,11(%r2,%r1) +e382100b000e| gnu cvbg %r8,11(%r2,%r1) + 4e82100b| gnu cvd %r8,11(%r2,%r1) +e382100b0026| gnu cvdy %r8,11(%r2,%r1) +e382100b002e| gnu cvdg %r8,11(%r2,%r1) + b39a3080| gnu cfxbr %r8,3,%f0 + b39a3180| gnu cfxbra %r8,3,%f0,1 + b3aa3080| gnu cgxbr %r8,3,%f0 + b3aa3180| gnu cgxbra %r8,3,%f0,1 + b9493180| gnu cfxtr %r8,3,%f0,1 + b3e93080| gnu cgxtr %r8,3,%f0 + b3e93180| gnu cgxtra %r8,3,%f0,1 + b3ba3080| gnu cfxr %r8,3,%f0 + b3ca3080| gnu cgxr %r8,3,%f0 + b3993080| gnu cfdbr %r8,3,%f0 + b3993180| gnu cfdbra %r8,3,%f0,1 + b3a93080| gnu cgdbr %r8,3,%f0 + b3a93180| gnu cgdbra %r8,3,%f0,1 + b9413180| gnu cfdtr %r8,3,%f0,1 + b3e13080| gnu cgdtr %r8,3,%f0 + b3e13180| gnu cgdtra %r8,3,%f0,1 + b3b93080| gnu cfdr %r8,3,%f0 + b3c93080| gnu cgdr %r8,3,%f0 + b3983080| gnu cfebr %r8,3,%f0 + b3983180| gnu cfebra %r8,3,%f0,1 + b3a83080| gnu cgebr %r8,3,%f0 + b3a83180| gnu cgebra %r8,3,%f0,1 + b3b83080| gnu cfer %r8,3,%f0 + b3c83080| gnu cger %r8,3,%f0 + b39e3180| gnu clfxbr %r8,3,%f0,1 + b3ae3180| gnu clgxbr %r8,3,%f0,1 + b94b3180| gnu clfxtr %r8,3,%f0,1 + b94a3180| gnu clgxtr %r8,3,%f0,1 + b39d3180| gnu clfdbr %r8,3,%f0,1 + b3ad3180| gnu clgdbr %r8,3,%f0,1 + b9433180| gnu clfdtr %r8,3,%f0,1 + b9423180| gnu clgdtr %r8,3,%f0,1 + b39c3180| gnu clfebr %r8,3,%f0,1 + b3ac3180| gnu clgebr %r8,3,%f0,1 +ed03100b83ad| gnu cpxt %f8,11(4,%r1),3 +ed03100b83ac| gnu cpdt %f8,11(4,%r1),3 + b3eb0180| gnu csxtr %r8,%f0,1 + b3e30180| gnu csdtr %r8,%f0,1 + b3ea0080| gnu cuxtr %r8,%f0 + b3e20080| gnu cudtr %r8,%f0 +ed03100b83a9| gnu czxt %f8,11(4,%r1),3 +ed03100b83a8| gnu czdt %f8,11(4,%r1),3 + b2a63080| gnu cu21 %r8,%r0,3 + b9b13080| gnu cu24 %r8,%r0,3 + b2a63080| gnu cu21 %r8,%r0,3 + b2a73080| gnu cu12 %r8,%r0,3 + b2a73080| gnu cu12 %r8,%r0,3 + b9b03080| gnu cu14 %r8,%r0,3 + b9b30080| gnu cu42 %r8,%r0 + b9b20080| gnu cu41 %r8,%r0 + b24d0080| gnu cpya %a8,%a0 + b3720080| gnu cpsdr %f8,%f0,%f0 +e6235000087c| gnu vscshp %v18,%v3,%v5 +e62350901874| gnu vschp %v18,%v3,%v5,1,9 + b9390080| gnu dfltcc %r8,%r0,%r0 + 5d82100b| gnu d %r8,11(%r2,%r1) + 1d80| gnu dr %r8,%r0 + b34d0080| gnu dxbr %f8,%f0 + b3d90080| gnu dxtr %f8,%f0,%f0 + b3d90180| gnu dxtra %f8,%f0,%f0,1 + b22d0080| gnu dxr %f8,%f0 +ed82100b001d| gnu ddb %f8,11(%r2,%r1) + b31d0080| gnu ddbr %f8,%f0 + b3d10080| gnu ddtr %f8,%f0,%f0 + b3d10180| gnu ddtra %f8,%f0,%f0,1 + 6d82100b| gnu dd %f8,11(%r2,%r1) + 2d80| gnu ddr %f8,%f0 +ed82100b000d| gnu deb %f8,11(%r2,%r1) + b30d0080| gnu debr %f8,%f0 + 7d82100b| gnu de %f8,11(%r2,%r1) + 3d80| gnu der %f8,%f0 +fd332006100b| gnu dp 6(4,%r2),11(4,%r1) +e382100b0097| gnu dl %r8,11(%r2,%r1) + b9970080| gnu dlr %r8,%r0 +e382100b0087| gnu dlg %r8,11(%r2,%r1) + b9870080| gnu dlgr %r8,%r0 +e382100b000d| gnu dsg %r8,11(%r2,%r1) + b90d0080| gnu dsgr %r8,%r0 +e382100b001d| gnu dsgf %r8,11(%r2,%r1) + b91d0080| gnu dsgfr %r8,%r0 + b35b0180| gnu didbr %f8,%f0,%f0,1 + b3530180| gnu diebr %f8,%f0,%f0,1 +de032006100b| gnu ed 6(4,%r2),11(%r1) +df032006100b| gnu edmk 6(4,%r2),11(%r1) + 5782100b| gnu x %r8,11(%r2,%r1) + 1780| gnu xr %r8,%r0 + b9f70080| gnu xrk %r8,%r0,%r0 +e382100b0057| gnu xy %r8,11(%r2,%r1) +e382100b0082| gnu xg %r8,11(%r2,%r1) + b9820080| gnu xgr %r8,%r0 + b9e70080| gnu xgrk %r8,%r0,%r0 +d7032006100b| gnu xc 6(4,%r2),11(%r1) + 97082006| gnu xi 6(%r2),8 +eb0820060057| gnu xiy 6(%r2),8 +c08600000008| gnu xihf %r8,8 +c08700000008| gnu xilf %r8,8 + 4482100b| gnu ex %r8,11(%r2,%r1) +c68000000000| gnu exrl %r8,0x720 + b24f0080| gnu ear %r8,%a0 + b99d0080| gnu esea %r8 + b3ed0080| gnu eextr %r8,%f0 + b3e50080| gnu eedtr %r8,%f0 +eb80100b004c| gnu ecag %r8,%r0,11(%r1) +c8012006100b| gnu ectg 6(%r2),11(%r1),%r0 + b38c0080| gnu efpc %r8 + b2260080| gnu epar %r8 + b99a0080| gnu epair %r8 + b98d0080| gnu epsw %r8,%r0 + b2270080| gnu esar %r8 + b99b0080| gnu esair %r8 + b3ef0080| gnu esxtr %r8,%f0 + b3e70080| gnu esdtr %r8,%f0 + b2490080| gnu ereg %r8,%r0 + b90e0080| gnu eregg %r8,%r0 + b24a0080| gnu esta %r8,%r0 + b2ec0080| gnu etnd %r8 + b9830080| gnu flogr %r8,%r0 + b2310000| gnu hsch + 2480| gnu hdr %f8,%f0 + 3480| gnu her %f8,%f0 + b2240080| gnu iac %r8 + b3fe0080| gnu iextr %f8,%f0,%r0 + b3f60080| gnu iedtr %f8,%f0,%r0 + 4382100b| gnu ic %r8,11(%r2,%r1) +e382100b0073| gnu icy %r8,11(%r2,%r1) +eb83100b0080| gnu icmh %r8,3,11(%r1) + bf83100b| gnu icm %r8,3,11(%r1) +eb83100b0081| gnu icmy %r8,3,11(%r1) + a5800008| gnu iihh %r8,8 + a5810008| gnu iihl %r8,8 +c08800000008| gnu iihf %r8,8 + a5820008| gnu iilh %r8,8 + a5830008| gnu iill %r8,8 +c08900000008| gnu iilf %r8,8 + b2220080| gnu ipm %r8 + b20b0000| gnu ipk + b9ac0080| gnu irbm %r8,%r0 + b2290080| gnu iske %r8,%r0 + b2230080| gnu ivsk %r8,%r0 + b98e0180| gnu idte %r8,%r0,%r0,1 + b2210180| gnu ipte %r8,%r0,%r0,1 + 5882100b| gnu l %r8,11(%r2,%r1) + 1880| gnu lr %r8,%r0 +e382100b0058| gnu ly %r8,11(%r2,%r1) +e382100b0004| gnu lg %r8,11(%r2,%r1) + b9040080| gnu lgr %r8,%r0 +e382100b0014| gnu lgf %r8,11(%r2,%r1) + b9140080| gnu lgfr %r8,%r0 + b3650080| gnu lxr %f8,%f0 + 6882100b| gnu ld %f8,11(%r2,%r1) + 2880| gnu ldr %f8,%f0 +ed82100b0065| gnu ldy %f8,11(%r2,%r1) + 7882100b| gnu le %f8,11(%r2,%r1) + 3880| gnu ler %f8,%f0 +ed82100b0064| gnu ley %f8,11(%r2,%r1) + 9a80100b| gnu lam %a8,%a0,11(%r1) +eb80100b009a| gnu lamy %a8,%a0,11(%r1) + 4182100b| gnu la %r8,11(%r2,%r1) +e382100b0071| gnu lay %r8,11(%r2,%r1) + 5182100b| gnu lae %r8,11(%r2,%r1) +e382100b0075| gnu laey %r8,11(%r2,%r1) +c08000000000| gnu larl %r8,0x836 +e5002006100b| gnu lasp 6(%r2),11(%r1) +eb80100b00f8| gnu laa %r8,%r0,11(%r1) +eb80100b00e8| gnu laag %r8,%r0,11(%r1) +eb80100b00fa| gnu laal %r8,%r0,11(%r1) +eb80100b00ea| gnu laalg %r8,%r0,11(%r1) +eb80100b00f4| gnu lan %r8,%r0,11(%r1) +eb80100b00e4| gnu lang %r8,%r0,11(%r1) +eb80100b00f7| gnu lax %r8,%r0,11(%r1) +eb80100b00e7| gnu laxg %r8,%r0,11(%r1) +eb80100b00f6| gnu lao %r8,%r0,11(%r1) +eb80100b00e6| gnu laog %r8,%r0,11(%r1) +e382100b0012| gnu lt %r8,11(%r2,%r1) + 1280| gnu ltr %r8,%r0 +e382100b0002| gnu ltg %r8,11(%r2,%r1) + b9020080| gnu ltgr %r8,%r0 +e382100b0032| gnu ltgf %r8,11(%r2,%r1) + b9120080| gnu ltgfr %r8,%r0 + b3420080| gnu ltxbr %f8,%f0 + b3de0080| gnu ltxtr %f8,%f0 + b3620080| gnu ltxr %f8,%f0 + b3120080| gnu ltdbr %f8,%f0 + b3d60080| gnu ltdtr %f8,%f0 + 2280| gnu ltdr %f8,%f0 + b3020080| gnu ltebr %f8,%f0 + 3280| gnu lter %f8,%f0 +e382100b009f| gnu lat %r8,11(%r2,%r1) +e382100b0085| gnu lgat %r8,11(%r2,%r1) +e382100b003b| gnu lzrf %r8,11(%r2,%r1) +e382100b002a| gnu lzrg %r8,11(%r2,%r1) + b200100b| gnu lbear 11(%r1) +e382100b0076| gnu lb %r8,11(%r2,%r1) + b9260080| gnu lbr %r8,%r0 +e382100b0077| gnu lgb %r8,11(%r2,%r1) + b9060080| gnu lgbr %r8,%r0 +e382100b00c0| gnu lbh %r8,11(%r2,%r1) + 1380| gnu lcr %r8,%r0 + b9030080| gnu lcgr %r8,%r0 + b9130080| gnu lcgfr %r8,%r0 + b3430080| gnu lcxbr %f8,%f0 + b3630080| gnu lcxr %f8,%f0 + b3130080| gnu lcdbr %f8,%f0 + 2380| gnu lcdr %f8,%f0 + b3730080| gnu lcdfr %f8,%f0 + b3030080| gnu lcebr %f8,%f0 + 3380| gnu lcer %f8,%f0 + b780100b| gnu lctl %c8,%c0,11(%r1) +eb80100b002f| gnu lctlg %c8,%c0,11(%r1) +e782100b3027| gnu lcbb %r8,11(%r2,%r1),3 + b3473080| gnu fixbr %f8,3,%f0 + b3473180| gnu fixbra %f8,3,%f0,1 + b3df3180| gnu fixtr %f8,3,%f0,1 + b3670080| gnu fixr %f8,%f0 + b35f3080| gnu fidbr %f8,3,%f0 + b35f3180| gnu fidbra %f8,3,%f0,1 + b3d73180| gnu fidtr %f8,3,%f0,1 + b37f0080| gnu fidr %f8,%f0 + b3573080| gnu fiebr %f8,3,%f0 + b3573180| gnu fiebra %f8,3,%f0,1 + b3770080| gnu fier %f8,%f0 + b29d100b| gnu lfpc 11(%r1) + b2bd100b| gnu lfas 11(%r1) + b3c10080| gnu ldgr %f8,%r0 + b3cd0080| gnu lgdr %r8,%f0 +e382100b004c| gnu lgg %r8,11(%r2,%r1) +e382100b004d| gnu lgsc %r8,11(%r2,%r1) + 4882100b| gnu lh %r8,11(%r2,%r1) + b9270080| gnu lhr %r8,%r0 +e382100b0078| gnu lhy %r8,11(%r2,%r1) +e382100b0015| gnu lgh %r8,11(%r2,%r1) + b9070080| gnu lghr %r8,%r0 +e382100b00c4| gnu lhh %r8,11(%r2,%r1) +ec830008004e| gnu lochhinle %r8,8 + a7880008| gnu lhi %r8,8 + a7890008| gnu lghi %r8,8 +ec8300080042| gnu lochinle %r8,8 +ec8300080046| gnu locghinle %r8,8 +c48500000000| gnu lhrl %r8,0x99e +c48400000000| gnu lghrl %r8,0x9a4 +e382100b00ca| gnu lfh %r8,11(%r2,%r1) +e382100b00c8| gnu lfhat %r8,11(%r2,%r1) +eb83100b00e0| gnu locfhnle %r8,11(%r1) + b9e03080| gnu locfhrnle %r8,%r0 +c08100000008| gnu lgfi %r8,8 +ed82100b0005| gnu lxdb %f8,11(%r2,%r1) + b3050080| gnu lxdbr %f8,%f0 + b3dc0180| gnu lxdtr %f8,%f0,1 +ed82100b0025| gnu lxd %f8,11(%r2,%r1) + b3250080| gnu lxdr %f8,%f0 +ed82100b0006| gnu lxeb %f8,11(%r2,%r1) + b3060080| gnu lxebr %f8,%f0 +ed82100b0026| gnu lxe %f8,11(%r2,%r1) + b3260080| gnu lxer %f8,%f0 +ed82100b0004| gnu ldeb %f8,11(%r2,%r1) + b3040080| gnu ldebr %f8,%f0 + b3d40180| gnu ldetr %f8,%f0,1 +ed82100b0024| gnu lde %f8,11(%r2,%r1) + b3240080| gnu lder %f8,%f0 +e382100b0016| gnu llgf %r8,11(%r2,%r1) + b9160080| gnu llgfr %r8,%r0 +e382100b0048| gnu llgfsg %r8,11(%r2,%r1) +e382100b009d| gnu llgfat %r8,11(%r2,%r1) +e382100b003a| gnu llzrgf %r8,11(%r2,%r1) +e382100b0094| gnu llc %r8,11(%r2,%r1) + b9940080| gnu llcr %r8,%r0 +e382100b0090| gnu llgc %r8,11(%r2,%r1) + b9840080| gnu llgcr %r8,%r0 +e382100b00c2| gnu llch %r8,11(%r2,%r1) +e382100b0095| gnu llh %r8,11(%r2,%r1) + b9950080| gnu llhr %r8,%r0 +e382100b0091| gnu llgh %r8,11(%r2,%r1) + b9850080| gnu llghr %r8,%r0 +e382100b00c6| gnu llhh %r8,11(%r2,%r1) +c48200000000| gnu llhrl %r8,0xa5a +c48600000000| gnu llghrl %r8,0xa60 + a58c0008| gnu llihh %r8,8 + a58d0008| gnu llihl %r8,8 +c08e00000008| gnu llihf %r8,8 + a58e0008| gnu llilh %r8,8 + a58f0008| gnu llill %r8,8 +c08f00000008| gnu llilf %r8,8 +c48e00000000| gnu llgfrl %r8,0xa82 +e382100b0017| gnu llgt %r8,11(%r2,%r1) + b9170080| gnu llgtr %r8,%r0 +e382100b009c| gnu llgtat %r8,11(%r2,%r1) + 9880100b| gnu lm %r8,%r0,11(%r1) +eb80100b0098| gnu lmy %r8,%r0,11(%r1) +eb80100b0004| gnu lmg %r8,%r0,11(%r1) +ef80100b80cd| gnu lmd %r8,%r0,11(%r1),205(%r8) +eb80100b0096| gnu lmh %r8,%r0,11(%r1) + 1180| gnu lnr %r8,%r0 + b9010080| gnu lngr %r8,%r0 + b9110080| gnu lngfr %r8,%r0 + b3410080| gnu lnxbr %f8,%f0 + b3610080| gnu lnxr %f8,%f0 + b3110080| gnu lndbr %f8,%f0 + 2180| gnu lndr %f8,%f0 + b3710080| gnu lndfr %f8,%f0 + b3010080| gnu lnebr %f8,%f0 + 3180| gnu lner %f8,%f0 +eb83100b00f2| gnu locnle %r8,11(%r1) + b9f23080| gnu locrnle %r8,%r0 +eb83100b00e2| gnu locgnle %r8,11(%r1) + b9e23080| gnu locgrnle %r8,%r0 + b9aa0180| gnu lptea %r8,%r0,%r0,1 +c8042006100b| gnu lpd %r0,6(%r2),11(%r1) +c8052006100b| gnu lpdg %r0,6(%r2),11(%r1) +e382100b008f| gnu lpq %r8,11(%r2,%r1) + 1080| gnu lpr %r8,%r0 + b9000080| gnu lpgr %r8,%r0 + b9100080| gnu lpgfr %r8,%r0 + b3400080| gnu lpxbr %f8,%f0 + b3600080| gnu lpxr %f8,%f0 + b3100080| gnu lpdbr %f8,%f0 + 2080| gnu lpdr %f8,%f0 + b3700080| gnu lpdfr %f8,%f0 + b3000080| gnu lpebr %f8,%f0 + 3080| gnu lper %f8,%f0 + 82002006| gnu lpsw 6(%r2) + b2b2100b| gnu lpswe 11(%r1) +eb0020060071| gnu lpswey 6(%r2) + b182100b| gnu lra %r8,11(%r2,%r1) +e382100b0013| gnu lray %r8,11(%r2,%r1) +e382100b0003| gnu lrag %r8,11(%r2,%r1) +c48d00000000| gnu lrl %r8,0xb40 +c48800000000| gnu lgrl %r8,0xb46 +c48c00000000| gnu lgfrl %r8,0xb4c +e382100b001f| gnu lrvh %r8,11(%r2,%r1) +e382100b001e| gnu lrv %r8,11(%r2,%r1) + b91f0080| gnu lrvr %r8,%r0 +e382100b000f| gnu lrvg %r8,11(%r2,%r1) + b90f0080| gnu lrvgr %r8,%r0 + b3450080| gnu ldxbr %f8,%f0 + b3453180| gnu ldxbra %f8,3,%f0,1 + b3dd3180| gnu ldxtr %f8,3,%f0,1 + 2580| gnu ldxr %f8,%f0 + 2580| gnu ldxr %f8,%f0 + b3460080| gnu lexbr %f8,%f0 + b3463180| gnu lexbra %f8,3,%f0,1 + b3660080| gnu lexr %f8,%f0 + b3440080| gnu ledbr %f8,%f0 + b3443180| gnu ledbra %f8,3,%f0,1 + b3d53180| gnu ledtr %f8,3,%f0,1 + 3580| gnu ledr %f8,%f0 + 3580| gnu ledr %f8,%f0 + b24b0080| gnu lura %r8,%r0 + b9050080| gnu lurag %r8,%r0 + b3760080| gnu lzxr %f8 + b3750080| gnu lzdr %f8 + b3740080| gnu lzer %f8 + b2470080| gnu msta %r8 + b232100b| gnu msch 11(%r1) + af082006| gnu mc 6(%r2),8 +e54420060008| gnu mvhhi 6(%r2),8 +e54c20060008| gnu mvhi 6(%r2),8 +e54820060008| gnu mvghi 6(%r2),8 +d2032006100b| gnu mvc 6(4,%r2),11(%r1) + 92082006| gnu mvi 6(%r2),8 +eb0820060052| gnu mviy 6(%r2),8 +e8032006100b| gnu mvcin 6(4,%r2),11(%r1) + 0e80| gnu mvcl %r8,%r0 + a880100b| gnu mvcle %r8,%r0,11(%r1) +eb80100b008e| gnu mvclu %r8,%r0,11(%r1) +d1032006100b| gnu mvn 6(4,%r2),11(%r1) + b2540080| gnu mvpg %r8,%r0 +e50a2006100b| gnu mvcrl 6(%r2),11(%r1) + b2550080| gnu mvst %r8,%r0 +da802006100b| gnu mvcp 6(%r8,%r2),11(%r1),%r0 +db802006100b| gnu mvcs 6(%r8,%r2),11(%r1),%r0 +e50f2006100b| gnu mvcdk 6(%r2),11(%r1) +d9802006100b| gnu mvck 6(%r8,%r2),11(%r1),%r0 +f1332006100b| gnu mvo 6(4,%r2),11(4,%r1) +c8002006100b| gnu mvcos 6(%r2),11(%r1),%r0 +e50e2006100b| gnu mvcsk 6(%r2),11(%r1) +d3032006100b| gnu mvz 6(4,%r2),11(%r1) +e382100b0084| gnu mg %r8,11(%r2,%r1) + b9ec0080| gnu mgrk %r8,%r0,%r0 + 5c82100b| gnu m %r8,11(%r2,%r1) +e382100b005c| gnu mfy %r8,11(%r2,%r1) + 1c80| gnu mr %r8,%r0 + b34c0080| gnu mxbr %f8,%f0 + b3d80080| gnu mxtr %f8,%f0,%f0 + b3d80180| gnu mxtra %f8,%f0,%f0,1 + 2680| gnu mxr %f8,%f0 +ed82100b001c| gnu mdb %f8,11(%r2,%r1) + b31c0080| gnu mdbr %f8,%f0 + b3d00080| gnu mdtr %f8,%f0,%f0 + b3d00180| gnu mdtra %f8,%f0,%f0,1 + 6c82100b| gnu md %f8,11(%r2,%r1) + 2c80| gnu mdr %f8,%f0 +ed82100b0007| gnu mxdb %f8,11(%r2,%r1) + b3070080| gnu mxdbr %f8,%f0 + 6782100b| gnu mxd %f8,11(%r2,%r1) + 2780| gnu mxdr %f8,%f0 +ed82100b0017| gnu meeb %f8,11(%r2,%r1) + b3170080| gnu meebr %f8,%f0 +ed82100b0037| gnu mee %f8,11(%r2,%r1) + b3370080| gnu meer %f8,%f0 +ed82100b000c| gnu mdeb %f8,11(%r2,%r1) + b30c0080| gnu mdebr %f8,%f0 + 7c82100b| gnu mde %f8,11(%r2,%r1) + 3c80| gnu mder %f8,%f0 + 7c82100b| gnu mde %f8,11(%r2,%r1) + 3c80| gnu mder %f8,%f0 +ed02100b803a| gnu may %f8,%f0,11(%r2,%r1) + b33a8000| gnu mayr %f8,%f0,%f0 +ed02100b801e| gnu madb %f8,%f0,11(%r2,%r1) + b31e8000| gnu madbr %f8,%f0,%f0 +ed02100b803e| gnu mad %f8,%f0,11(%r2,%r1) + b33e8000| gnu madr %f8,%f0,%f0 +ed02100b800e| gnu maeb %f8,%f0,11(%r2,%r1) + b30e8000| gnu maebr %f8,%f0,%f0 +ed02100b802e| gnu mae %f8,%f0,11(%r2,%r1) + b32e8000| gnu maer %f8,%f0,%f0 +ed02100b803c| gnu mayh %f8,%f0,11(%r2,%r1) + b33c8000| gnu mayhr %f8,%f0,%f0 +ed02100b8038| gnu mayl %f8,%f0,11(%r2,%r1) + b3388000| gnu maylr %f8,%f0,%f0 +ed02100b801f| gnu msdb %f8,%f0,11(%r2,%r1) + b31f8000| gnu msdbr %f8,%f0,%f0 +ed02100b803f| gnu msd %f8,%f0,11(%r2,%r1) + b33f8000| gnu msdr %f8,%f0,%f0 +ed02100b800f| gnu mseb %f8,%f0,11(%r2,%r1) + b30f8000| gnu msebr %f8,%f0,%f0 +ed02100b802f| gnu mse %f8,%f0,11(%r2,%r1) + b32f8000| gnu mser %f8,%f0,%f0 +fc332006100b| gnu mp 6(4,%r2),11(4,%r1) + 4c82100b| gnu mh %r8,11(%r2,%r1) +e382100b007c| gnu mhy %r8,11(%r2,%r1) +e382100b003c| gnu mgh %r8,11(%r2,%r1) + a78c0008| gnu mhi %r8,8 + a78d0008| gnu mghi %r8,8 +e382100b0086| gnu mlg %r8,11(%r2,%r1) + b9860080| gnu mlgr %r8,%r0 +e382100b0096| gnu ml %r8,11(%r2,%r1) + b9960080| gnu mlr %r8,%r0 + 7182100b| gnu ms %r8,11(%r2,%r1) +e382100b0053| gnu msc %r8,11(%r2,%r1) + b2520080| gnu msr %r8,%r0 + b9fd0080| gnu msrkc %r8,%r0,%r0 +e382100b0051| gnu msy %r8,11(%r2,%r1) +e382100b000c| gnu msg %r8,11(%r2,%r1) +e382100b0083| gnu msgc %r8,11(%r2,%r1) + b90c0080| gnu msgr %r8,%r0 + b9ed0080| gnu msgrkc %r8,%r0,%r0 +e382100b001c| gnu msgf %r8,11(%r2,%r1) + b91c0080| gnu msgfr %r8,%r0 +c28100000008| gnu msfi %r8,8 +c28000000008| gnu msgfi %r8,8 +ed02100b803d| gnu myh %f8,%f0,11(%r2,%r1) + b33d8000| gnu myhr %f8,%f0,%f0 +ed02100b8039| gnu myl %f8,%f0,11(%r2,%r1) + b3398000| gnu mylr %f8,%f0,%f0 +ed02100b803b| gnu my %f8,%f0,11(%r2,%r1) + b33b8000| gnu myr %f8,%f0,%f0 + b9740080| gnu nnrk %r8,%r0,%r0 + b9640080| gnu nngrk %r8,%r0,%r0 + b93b0000| gnu nnpa + b2fa00c8| gnu niai 12,8 +e382100b0025| gnu ntstg %r8,11(%r2,%r1) + b9760080| gnu nork %r8,%r0,%r0 + b9660080| gnu nogrk %r8,%r0,%r0 + b9770080| gnu nxrk %r8,%r0,%r0 + b9670080| gnu nxgrk %r8,%r0,%r0 + 5682100b| gnu o %r8,11(%r2,%r1) + 1680| gnu or %r8,%r0 + b9f60080| gnu ork %r8,%r0,%r0 +e382100b0056| gnu oy %r8,11(%r2,%r1) +e382100b0081| gnu og %r8,11(%r2,%r1) + b9810080| gnu ogr %r8,%r0 + b9e60080| gnu ogrk %r8,%r0,%r0 +d6032006100b| gnu oc 6(4,%r2),11(%r1) + 96082006| gnu oi 6(%r2),8 +eb0820060056| gnu oiy 6(%r2),8 + a5880008| gnu oihh %r8,8 + a5890008| gnu oihl %r8,8 +c08c00000008| gnu oihf %r8,8 + a58a0008| gnu oilh %r8,8 + a58b0008| gnu oill %r8,8 +c08d00000008| gnu oilf %r8,8 + b9750080| gnu ocrk %r8,%r0,%r0 + b9650080| gnu ocgrk %r8,%r0,%r0 +f2332006100b| gnu pack 6(4,%r2),11(4,%r1) +e9032006100b| gnu pka 6(%r2),11(4,%r1) +e1032006100b| gnu pku 6(%r2),11(4,%r1) + b22e0080| gnu pgin %r8,%r0 + b22f0080| gnu pgout %r8,%r0 + b92c0000| gnu pcc + b9280000| gnu pckmo + 010a| gnu pfpo + b9af0080| gnu pfmf %r8,%r0 +ee80100b80cd| gnu plo %r8,11(%r1),%r0,205(%r8) + b2e83080| gnu ppa %r8,%r0,3 + b93c0080| gnu prno %r8,%r0 + b93c0080| gnu prno %r8,%r0 + 0104| gnu ptff + b9a20080| gnu ptf %r8 + b9e13080| gnu popcnt %r8,%r0,3 +e372100b0036| gnu pfd 7,11(%r2,%r1) +c67200000000| gnu pfdrl 7,0xe68 + b218100b| gnu pc 11(%r1) + 0101| gnu pr + b2280080| gnu pt %r8,%r0 + b99e0080| gnu pti %r8,%r0 + b2480000| gnu palb + b20d0000| gnu ptlb + b3fd0180| gnu qaxtr %f8,%f0,%f0,1 + b3f50180| gnu qadtr %f8,%f0,%f0,1 + b28f100b| gnu qpaci 11(%r1) + b3ff0180| gnu rrxtr %f8,%f0,%r0,1 + b3f70180| gnu rrdtr %f8,%f0,%r0,1 + b23b0000| gnu rchp + b98b0180| gnu rdp %r8,%r0,%r0,1 + b22a0080| gnu rrbe %r8,%r0 + b9ae0080| gnu rrbm %r8,%r0 + b277100b| gnu rp 11(%r1) + b2380000| gnu rsch +eb80100b001d| gnu rll %r8,%r0,11(%r1) +eb80100b001c| gnu rllg %r8,%r0,11(%r1) +ec8009691254| gnu rnsbg %r8,%r0,9,105,18 +ec8009691257| gnu rxsbg %r8,%r0,9,105,18 +ec8009691255| gnu risbg %r8,%r0,9,105,18 +ec8009691259| gnu risbgn %r8,%r0,9,105,18 +ec800969125d| gnu risbhg %r8,%r0,9,105,18 +ec8009691251| gnu risblg %r8,%r0,9,105,18 +ec8009691256| gnu rosbg %r8,%r0,9,105,18 + b25e0080| gnu srst %r8,%r0 + b9be0080| gnu srstu %r8,%r0 + b9f00180| gnu selro %r8,%r0,%r0 + b9e30180| gnu selgro %r8,%r0,%r0 + b9c00180| gnu selfhro %r8,%r0,%r0 + b24e0080| gnu sar %a8,%r0 + b2370000| gnu sal + b219100b| gnu sac 11(%r1) + b279100b| gnu sacf 11(%r1) + 010c| gnu sam24 + 010d| gnu sam31 + 010e| gnu sam64 + b299100b| gnu srnm 11(%r1) + b2b8100b| gnu srnmb 11(%r1) + b23c0000| gnu schm + b204100b| gnu sck 11(%r1) + b206100b| gnu sckc 11(%r1) + 0107| gnu sckpf + b208100b| gnu spt 11(%r1) + b2b9100b| gnu srnmt 11(%r1) + b3840080| gnu sfpc %r8 + b3850080| gnu sfasr %r8 + b210100b| gnu spx 11(%r1) + 0480| gnu spm %r8 + b20a100b| gnu spka 11(%r1) + b2250080| gnu ssar %r8 + b99f0080| gnu ssair %r8 + b22b3080| gnu sske %r8,%r0,3 + 80002006| gnu ssm 6(%r2) +f0392006100b| gnu srp 6(4,%r2),11(%r1),9 + 8f80100b| gnu slda %r8,11(%r1) + 8d80100b| gnu sldl %r8,11(%r1) + 8b80100b| gnu sla %r8,11(%r1) +eb80100b00dd| gnu slak %r8,%r0,11(%r1) +eb80100b000b| gnu slag %r8,%r0,11(%r1) + 8980100b| gnu sll %r8,11(%r1) +eb80100b00df| gnu sllk %r8,%r0,11(%r1) +eb80100b000d| gnu sllg %r8,%r0,11(%r1) + 8e80100b| gnu srda %r8,11(%r1) + 8c80100b| gnu srdl %r8,11(%r1) + 8a80100b| gnu sra %r8,11(%r1) +eb80100b00dc| gnu srak %r8,%r0,11(%r1) +eb80100b000a| gnu srag %r8,%r0,11(%r1) + 8880100b| gnu srl %r8,11(%r1) +eb80100b00de| gnu srlk %r8,%r0,11(%r1) +eb80100b000c| gnu srlg %r8,%r0,11(%r1) +ed02100b8048| gnu slxt %f8,%f0,11(%r2,%r1) +ed02100b8040| gnu sldt %f8,%f0,11(%r2,%r1) +ed02100b8049| gnu srxt %f8,%f0,11(%r2,%r1) +ed02100b8041| gnu srdt %f8,%f0,11(%r2,%r1) + ae80100b| gnu sigp %r8,%r0,11(%r1) + b9380080| gnu sortl %r8,%r0 + b3160080| gnu sqxbr %f8,%f0 + b3360080| gnu sqxr %f8,%f0 +ed82100b0015| gnu sqdb %f8,11(%r2,%r1) + b3150080| gnu sqdbr %f8,%f0 +ed82100b0035| gnu sqd %f8,11(%r2,%r1) + b2440080| gnu sqdr %f8,%f0 +ed82100b0014| gnu sqeb %f8,11(%r2,%r1) + b3140080| gnu sqebr %f8,%f0 +ed82100b0034| gnu sqe %f8,11(%r2,%r1) + b2450080| gnu sqer %f8,%f0 + b233100b| gnu ssch 11(%r1) + 5082100b| gnu st %r8,11(%r2,%r1) +e382100b0050| gnu sty %r8,11(%r2,%r1) +e382100b0024| gnu stg %r8,11(%r2,%r1) + 6082100b| gnu std %f8,11(%r2,%r1) +ed82100b0067| gnu stdy %f8,11(%r2,%r1) + 7082100b| gnu ste %f8,11(%r2,%r1) +ed82100b0066| gnu stey %f8,11(%r2,%r1) + 9b80100b| gnu stam %a8,%a0,11(%r1) +eb80100b009b| gnu stamy %a8,%a0,11(%r1) + b201100b| gnu stbear 11(%r1) + b23a100b| gnu stcps 11(%r1) + b239100b| gnu stcrw 11(%r1) + 4282100b| gnu stc %r8,11(%r2,%r1) +e382100b0072| gnu stcy %r8,11(%r2,%r1) +e382100b00c3| gnu stch %r8,11(%r2,%r1) +eb83100b002c| gnu stcmh %r8,3,11(%r1) + be83100b| gnu stcm %r8,3,11(%r1) +eb83100b002d| gnu stcmy %r8,3,11(%r1) + b205100b| gnu stck 11(%r1) + b207100b| gnu stckc 11(%r1) + b278100b| gnu stcke 11(%r1) + b27c100b| gnu stckf 11(%r1) + b680100b| gnu stctl %c8,%c0,11(%r1) +eb80100b0025| gnu stctg %c8,%c0,11(%r1) + b212100b| gnu stap 11(%r1) + b202100b| gnu stidp 11(%r1) + b209100b| gnu stpt 11(%r1) + b2b1100b| gnu stfl 11(%r1) + b2b0100b| gnu stfle 11(%r1) + b29c100b| gnu stfpc 11(%r1) +e382100b0049| gnu stgsc %r8,11(%r2,%r1) + 4082100b| gnu sth %r8,11(%r2,%r1) +e382100b0070| gnu sthy %r8,11(%r2,%r1) +e382100b00c7| gnu sthh %r8,11(%r2,%r1) +c48700000000| gnu sthrl %r8,0x109c +e382100b00cb| gnu stfh %r8,11(%r2,%r1) +eb83100b00e1| gnu stocfhnle %r8,11(%r1) + 9080100b| gnu stm %r8,%r0,11(%r1) +eb80100b0090| gnu stmy %r8,%r0,11(%r1) +eb80100b0024| gnu stmg %r8,%r0,11(%r1) +eb80100b0026| gnu stmh %r8,%r0,11(%r1) +eb83100b00f3| gnu stocnle %r8,11(%r1) +eb83100b00e3| gnu stocgnle %r8,11(%r1) +e382100b008e| gnu stpq %r8,11(%r2,%r1) + b211100b| gnu stpx 11(%r1) +e5022006100b| gnu strag 6(%r2),11(%r1) +c48f00000000| gnu strl %r8,0x10e0 +c48b00000000| gnu stgrl %r8,0x10e6 +e382100b003f| gnu strvh %r8,11(%r2,%r1) +e382100b003e| gnu strv %r8,11(%r2,%r1) +e382100b002f| gnu strvg %r8,11(%r2,%r1) + b234100b| gnu stsch 11(%r1) + b27d100b| gnu stsi 11(%r1) + ac082006| gnu stnsm 6(%r2),8 + ad082006| gnu stosm 6(%r2),8 + b2460080| gnu stura %r8,%r0 + b9250080| gnu sturg %r8,%r0 + 5b82100b| gnu s %r8,11(%r2,%r1) + 1b80| gnu sr %r8,%r0 + b9f90080| gnu srk %r8,%r0,%r0 +e382100b005b| gnu sy %r8,11(%r2,%r1) +e382100b0009| gnu sg %r8,11(%r2,%r1) + b9090080| gnu sgr %r8,%r0 + b9e90080| gnu sgrk %r8,%r0,%r0 +e382100b0019| gnu sgf %r8,11(%r2,%r1) + b9190080| gnu sgfr %r8,%r0 + b34b0080| gnu sxbr %f8,%f0 + b3db0080| gnu sxtr %f8,%f0,%f0 + b3db0180| gnu sxtra %f8,%f0,%f0,1 +ed82100b001b| gnu sdb %f8,11(%r2,%r1) + b31b0080| gnu sdbr %f8,%f0 + b3d30080| gnu sdtr %f8,%f0,%f0 + b3d30180| gnu sdtra %f8,%f0,%f0,1 +ed82100b000b| gnu seb %f8,11(%r2,%r1) + b30b0080| gnu sebr %f8,%f0 +fb332006100b| gnu sp 6(4,%r2),11(4,%r1) + 4b82100b| gnu sh %r8,11(%r2,%r1) +e382100b007b| gnu shy %r8,11(%r2,%r1) +e382100b0039| gnu sgh %r8,11(%r2,%r1) + b9c90080| gnu shhhr %r8,%r0,%r0 + b9d90080| gnu shhlr %r8,%r0,%r0 + 5f82100b| gnu sl %r8,11(%r2,%r1) + 1f80| gnu slr %r8,%r0 + b9fb0080| gnu slrk %r8,%r0,%r0 +e382100b005f| gnu sly %r8,11(%r2,%r1) +e382100b000b| gnu slg %r8,11(%r2,%r1) + b90b0080| gnu slgr %r8,%r0 + b9eb0080| gnu slgrk %r8,%r0,%r0 +e382100b001b| gnu slgf %r8,11(%r2,%r1) + b91b0080| gnu slgfr %r8,%r0 + b9cb0080| gnu slhhhr %r8,%r0,%r0 + b9db0080| gnu slhhlr %r8,%r0,%r0 +c28500000008| gnu slfi %r8,8 +c28400000008| gnu slgfi %r8,8 +e382100b0099| gnu slb %r8,11(%r2,%r1) + b9990080| gnu slbr %r8,%r0 +e382100b0089| gnu slbg %r8,11(%r2,%r1) + b9890080| gnu slbgr %r8,%r0 + 3780| gnu sxr %f8,%f0 + 6b82100b| gnu sd %f8,11(%r2,%r1) + 2b80| gnu sdr %f8,%f0 + 7b82100b| gnu se %f8,11(%r2,%r1) + 3b80| gnu ser %f8,%f0 + 6f82100b| gnu sw %f8,11(%r2,%r1) + 2f80| gnu swr %f8,%f0 + 7f82100b| gnu su %f8,11(%r2,%r1) + 3f80| gnu sur %f8,%f0 + 0a7c| gnu svc 124 + b24c0080| gnu tar %a8,%r0 + 010b| gnu tam + 93002006| gnu ts 6(%r2) + b22c0080| gnu tb %r8,%r0 +ed82100b0012| gnu tcxb %f8,11(%r2,%r1) +ed82100b0058| gnu tdcxt %f8,11(%r2,%r1) +ed82100b0011| gnu tcdb %f8,11(%r2,%r1) +ed82100b0054| gnu tdcdt %f8,11(%r2,%r1) +ed82100b0010| gnu tceb %f8,11(%r2,%r1) +ed82100b0050| gnu tdcet %f8,11(%r2,%r1) +ed82100b0059| gnu tdgxt %f8,11(%r2,%r1) +ed82100b0055| gnu tdgdt %f8,11(%r2,%r1) +ed82100b0051| gnu tdget %f8,11(%r2,%r1) +eb30200600c0| gnu tp 6(4,%r2) + b9a10080| gnu tpei %r8,%r0 + b236100b| gnu tpi 11(%r1) +e5012006100b| gnu tprot 6(%r2),11(%r1) + b235100b| gnu tsch 11(%r1) + 91082006| gnu tm 6(%r2),8 +eb0820060051| gnu tmy 6(%r2),8 + a7820008| gnu tmhh %r8,8 + a7830008| gnu tmhl %r8,8 + a7800008| gnu tmlh %r8,8 + a7810008| gnu tmll %r8,8 + a7800008| gnu tmlh %r8,8 + a7810008| gnu tmll %r8,8 + 9980100b| gnu trace %r8,%r0,11(%r1) +eb80100b000f| gnu tracg %r8,%r0,11(%r1) + b2fc100b| gnu tabort 11(%r1) +e56120060008| gnu tbeginc 6(%r2),8 +e56020060008| gnu tbegin 6(%r2),8 + b2f80000| gnu tend +dc032006100b| gnu tr 6(4,%r2),11(%r1) +dd032006100b| gnu trt 6(4,%r2),11(%r1) + b9bf3080| gnu trte %r8,%r0,3 +d0032006100b| gnu trtr 6(4,%r2),11(%r1) + b9bd3080| gnu trtre %r8,%r0,3 + b2a50080| gnu tre %r8,%r0 + b9933080| gnu troo %r8,%r0,3 + b9923080| gnu trot %r8,%r0,3 + b9913080| gnu trto %r8,%r0,3 + b9903080| gnu trtt %r8,%r0,3 + 01ff| gnu trap2 + b2ff100b| gnu trap4 11(%r1) +f3332006100b| gnu unpk 6(4,%r2),11(4,%r1) +ea032006100b| gnu unpka 6(4,%r2),11(%r1) +e2032006100b| gnu unpku 6(4,%r2),11(%r1) + 0102| gnu upt +e723500018f3| gnu vah %v18,%v3,%v5 +e723500018f1| gnu vacch %v18,%v3,%v5 +e62350969871| gnu vap %v18,%v3,%v5,105,9 +e723590088bb| gnu vac %v18,%v3,%v5,%v8,9 +e723590088b9| gnu vaccc %v18,%v3,%v5,%v8,9 +e72350000868| gnu vn %v18,%v3,%v5 +e72350000869| gnu vnc %v18,%v3,%v5 +e723500018f2| gnu vavgh %v18,%v3,%v5 +e723500018f0| gnu vavglh %v18,%v3,%v5 +e72350000885| gnu vbperm %v18,%v3,%v5 +e72350000866| gnu vcksm %v18,%v3,%v5 +e60230300477| gnu vcp %v18,%v3,3 +e723509018f8| gnu vceq %v18,%v3,%v5,1,9 +e723509018fb| gnu vch %v18,%v3,%v5,1,9 +e723509018f9| gnu vchl %v18,%v3,%v5,1,9 +e6235010087d| gnu vcsph %v18,%v3,%v5,1 +e68300310050| gnu vcvb %r8,%v3,3,1 +e68300310052| gnu vcvbg %r8,%v3,3,1 +e62000109858| gnu vcvd %v18,%r0,9,1 +e6200010985a| gnu vcvdg %v18,%r0,9,1 +e62300300851| gnu vclzdp %v18,%v3,3 +e72300003853| gnu vclzg %v18,%v3 +e72300003852| gnu vctzg %v18,%v3 +e6235096987a| gnu vdp %v18,%v3,%v5,105,9 +e723000038db| gnu vecg %v18,%v3 +e723000038d9| gnu veclg %v18,%v3 +e72350699872| gnu verim %v18,%v3,%v5,105,9 +e725100b1833| gnu verllh %v18,%v5,11(%r1) +e72350001873| gnu verllvh %v18,%v3,%v5 +e72350001870| gnu veslvh %v18,%v3,%v5 +e725100b1830| gnu veslh %v18,%v5,11(%r1) +e725100b183a| gnu vesrah %v18,%v5,11(%r1) +e7235000187a| gnu vesravh %v18,%v3,%v5 +e725100b1838| gnu vesrlh %v18,%v5,11(%r1) +e72350001878| gnu vesrlvh %v18,%v3,%v5 +e7235000086d| gnu vx %v18,%v3,%v5 +e72350901882| gnu vfaehs %v18,%v3,%v5,8 +e72350901880| gnu vfeeh %v18,%v3,%v5,9 +e72350901881| gnu vfeneh %v18,%v3,%v5,9 +e723500918e3| gnu vfa %v18,%v3,%v5,1,9 +e723000138ca| gnu wfk %v18,%v3,3,1 +e72350b918e8| gnu vfce %v18,%v3,%v5,1,9,11 +e72350b918eb| gnu vfch %v18,%v3,%v5,1,9,11 +e72350b918ea| gnu vfche %v18,%v3,%v5,1,9,11 +e723000138cb| gnu wfc %v18,%v3,3,1 +e62300013856| gnu vclfnh %v18,%v3,3,1 +e6230001385e| gnu vclfnl %v18,%v3,3,1 +e62350091875| gnu vcrnf %v18,%v3,%v5,1,9 +e723009138c3| gnu vcdgb %v18,%v3,1,9 +e723009138c3| gnu vcdgb %v18,%v3,1,9 +e723009138c1| gnu vcdlgb %v18,%v3,1,9 +e723009138c1| gnu vcdlgb %v18,%v3,1,9 +e6230001385d| gnu vcfn %v18,%v3,3,1 +e723009138c2| gnu vcgdb %v18,%v3,1,9 +e723009138c2| gnu vcgdb %v18,%v3,1,9 +e723009138c0| gnu vclgdb %v18,%v3,1,9 +e723009138c0| gnu vclgdb %v18,%v3,1,9 +e62300013855| gnu vcnf %v18,%v3,3,1 +e723500918e5| gnu vfd %v18,%v3,%v5,1,9 +e723000138c4| gnu vfll %v18,%v3,3,1 +e723009138c5| gnu vflrd %v18,%v3,1,9 +e72350b918ef| gnu vfmax %v18,%v3,%v5,1,9,11 +e72350b918ee| gnu vfmin %v18,%v3,%v5,1,9,11 +e723500918e7| gnu vfm %v18,%v3,%v5,1,9 +e7235b09888f| gnu vfma %v18,%v3,%v5,%v8,9,11 +e7235b09888e| gnu vfms %v18,%v3,%v5,%v8,9,11 +e7235b09889f| gnu vfnma %v18,%v3,%v5,%v8,9,11 +e7235b09889e| gnu vfnms %v18,%v3,%v5,%v8,9,11 +e723009138cc| gnu vfpso %v18,%v3,3,1,9 +e723000138ce| gnu vfsq %v18,%v3,3,1 +e723500918e2| gnu vfs %v18,%v3,%v5,1,9 +e7230099184a| gnu vftci %v18,%v3,9,1,9 +e723500018b4| gnu vgfmh %v18,%v3,%v5 +e723590088bc| gnu vgfma %v18,%v3,%v5,%v8,9 +e723100b3813| gnu vgef %v18,11(%v3,%r1),3 +e723100b3812| gnu vgeg %v18,11(%v3,%r1),3 +e72000080844| gnu vgbm %v18,8 +e72008091846| gnu vgmh %v18,8,9 +e7230090385c| gnu vistr %v18,%v3,3,9 +e722100b3806| gnu vl %v18,11(%r2,%r1),3 +e72300000856| gnu vlr %v18,%v3 +e722100b3805| gnu vlrepg %v18,11(%r2,%r1) +e622100b3801| gnu vlebrh %v18,11(%r2,%r1),3 +e622100b3803| gnu vlebrf %v18,11(%r2,%r1),3 +e622100b3802| gnu vlebrg %v18,11(%r2,%r1),3 +e622100b3805| gnu vlbrrepg %v18,11(%r2,%r1) +e622100b3804| gnu ldrv %v18,11(%r2,%r1) +e622100b3806| gnu vlbrg %v18,11(%r2,%r1) +e723000038de| gnu vlcg %v18,%v3 +e722100b3801| gnu vleh %v18,11(%r2,%r1),3 +e722100b3803| gnu vlef %v18,11(%r2,%r1),3 +e722100b3802| gnu vleg %v18,11(%r2,%r1),3 +e722100b3800| gnu vleb %v18,11(%r2,%r1),3 +e72000083841| gnu vleih %v18,8,3 +e72000083843| gnu vleif %v18,8,3 +e72000083842| gnu vleig %v18,8,3 +e72000083840| gnu vleib %v18,8,3 +e622100b3807| gnu vlerg %v18,11(%r2,%r1) +e723009138c7| gnu vfidb %v18,%v3,1,9 +e785100b1021| gnu vlgvh %r8,%v5,11(%r1) +e62000089849| gnu vlip %v18,8,9 +e722100b3804| gnu vllezg %v18,11(%r2,%r1) +e725100b1836| gnu vlm %v18,%v5,11(%r1),1 +e723000038df| gnu vlpg %v18,%v3 +e609100b2135| gnu vlrl %v18,11(%r1),9 +e600100b2137| gnu vlrlr %v18,%r0,11(%r1) +e722100b3807| gnu vlbb %v18,11(%r2,%r1),3 +e720100b1822| gnu vlvgh %v18,%r0,11(%r1) +e72000000862| gnu vlvgp %v18,%r0,%r0 +e720100b0837| gnu vll %v18,%r0,11(%r1) +e723500018ff| gnu vmxh %v18,%v3,%v5 +e723500018fd| gnu vmxlh %v18,%v3,%v5 +e72350001861| gnu vmrhh %v18,%v3,%v5 +e72350001860| gnu vmrlh %v18,%v3,%v5 +e723500018fe| gnu vmnh %v18,%v3,%v5 +e723500018fc| gnu vmnlh %v18,%v3,%v5 +e723590088ae| gnu vmae %v18,%v3,%v5,%v8,9 +e723590088ab| gnu vmah %v18,%v3,%v5,%v8,9 +e723590088ac| gnu vmale %v18,%v3,%v5,%v8,9 +e723590088a9| gnu vmalh %v18,%v3,%v5,%v8,9 +e723590088ad| gnu vmalo %v18,%v3,%v5,%v8,9 +e723590088aa| gnu vmal %v18,%v3,%v5,%v8,9 +e723590088af| gnu vmao %v18,%v3,%v5,%v8,9 +e62350969879| gnu vmsp %v18,%v3,%v5,105,9 +e62350969878| gnu vmp %v18,%v3,%v5,105,9 +e723500018a6| gnu vmeh %v18,%v3,%v5 +e723500018a3| gnu vmhh %v18,%v3,%v5 +e723500018a4| gnu vmleh %v18,%v3,%v5 +e723500018a1| gnu vmlhh %v18,%v3,%v5 +e723500018a5| gnu vmloh %v18,%v3,%v5 +e723500018a2| gnu vmlhw %v18,%v3,%v5 +e723500018a7| gnu vmoh %v18,%v3,%v5 +e72359b088b8| gnu vmsl %v18,%v3,%v5,%v8,9,11 +e7235000086e| gnu vnn %v18,%v3,%v5 +e7235000086b| gnu vno %v18,%v3,%v5 +e7235000086c| gnu vnx %v18,%v3,%v5 +e7235000086a| gnu vo %v18,%v3,%v5 +e7235000086f| gnu voc %v18,%v3,%v5 +e72350001894| gnu vpkh %v18,%v3,%v5 +e72350901895| gnu vpkls %v18,%v3,%v5,1,9 +e72350901897| gnu vpks %v18,%v3,%v5,1,9 +e609100b2134| gnu vpkz %v18,11(%r1),9 +e62350969870| gnu vpkzr %v18,%v3,%v5,105,9 +e6236990985b| gnu vpsop %v18,%v3,9,105,9 +e7235000888c| gnu vperm %v18,%v3,%v5,%v8 +e72350001884| gnu vpdi %v18,%v3,%v5,1 +e72300003850| gnu vpopctg %v18,%v3 +e6235096987b| gnu vrp %v18,%v3,%v5,105,9 +e7250008184d| gnu vreph %v18,%v5,8 +e72000083845| gnu vrepig %v18,8 +e723100b381b| gnu vscef %v18,11(%v3,%r1),3 +e723100b381a| gnu vsceg %v18,11(%v3,%r1),3 +e7235000888d| gnu vsel %v18,%v3,%v5,%v8 +e6235096987e| gnu vsdp %v18,%v3,%v5,105,9 +e62369909859| gnu vsrp %v18,%v3,9,105,9 +e62350969872| gnu vsrpr %v18,%v3,%v5,105,9 +e72350000874| gnu vsl %v18,%v3,%v5 +e72350000875| gnu vslb %v18,%v3,%v5 +e72350690886| gnu vsld %v18,%v3,%v5,105 +e72350690877| gnu vsldb %v18,%v3,%v5,105 +e7235000087e| gnu vsra %v18,%v3,%v5 +e7235000087f| gnu vsrab %v18,%v3,%v5 +e72350690887| gnu vsrd %v18,%v3,%v5,105 +e7235000087c| gnu vsrl %v18,%v3,%v5 +e7235000087d| gnu vsrlb %v18,%v3,%v5 +e7230000385f| gnu vseg %v18,%v3,3 +e722100b380e| gnu vst %v18,11(%r2,%r1),3 +e622100b3809| gnu vstebrh %v18,11(%r2,%r1),3 +e622100b380b| gnu vstebrf %v18,11(%r2,%r1),3 +e622100b380a| gnu vstebrg %v18,11(%r2,%r1),3 +e622100b380e| gnu vstbrg %v18,11(%r2,%r1) +e722100b3809| gnu vsteh %v18,11(%r2,%r1),3 +e722100b380b| gnu vstef %v18,11(%r2,%r1),3 +e722100b380a| gnu vsteg %v18,11(%r2,%r1),3 +e722100b3808| gnu vsteb %v18,11(%r2,%r1),3 +e622100b380f| gnu vsterg %v18,11(%r2,%r1) +e725100b183e| gnu vstm %v18,%v5,11(%r1),1 +e609100b213d| gnu vstrl %v18,11(%r1),9 +e600100b213f| gnu vstrlr %v18,%r0,11(%r1) +e720100b083f| gnu vstl %v18,%r0,11(%r1) +e72359b0888a| gnu vstrc %v18,%v3,%v5,%v8,9,11 +e72359b0888b| gnu vstrs %v18,%v3,%v5,%v8,9,11 +e723500018f7| gnu vsh %v18,%v3,%v5 +e723500018f5| gnu vscbih %v18,%v3,%v5 +e62350969873| gnu vsp %v18,%v3,%v5,105,9 +e723590088bd| gnu vsbcbi %v18,%v3,%v5,%v8,9 +e723590088bf| gnu vsbi %v18,%v3,%v5,%v8,9 +e72350001865| gnu vsumgh %v18,%v3,%v5 +e72350001867| gnu vsumq %v18,%v3,%v5,1 +e72350001864| gnu vsumh %v18,%v3,%v5 +e6020000045f| gnu vtp %v18 +e723000008d8| gnu vtm %v18,%v3 +e723000038d7| gnu vuph %v18,%v3,3 +e723000038d5| gnu vuplh %v18,%v3,3 +e723000038d4| gnu vupll %v18,%v3,3 +e723000038d6| gnu vupl %v18,%v3,3 +e609100b213c| gnu vupkz %v18,11(%r1),9 +e62300300854| gnu vupkzh %v18,%v3,3 +e6230030085c| gnu vupkzl %v18,%v3,3 +f8332006100b| gnu zap 6(4,%r2),11(4,%r1) diff --git a/s390x/s390xmap/map.go b/s390x/s390xmap/map.go new file mode 100644 index 00000000..9ba698f4 --- /dev/null +++ b/s390x/s390xmap/map.go @@ -0,0 +1,636 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// s390xmap constructs the s390x opcode map from the instruction set CSV file. +// +// Usage: +// +// s390map [-fmt=format] s390x.csv +// +// The known output formats are: +// +// text (default) - print decoding tree in text form +// decoder - print decoding tables for the s390xasm package +// encoder - generate a self-contained file which can be used to encode +// go obj.Progs into machine code +// asm - generate a GNU asm file which can be compiled by gcc containing +// all opcodes discovered in s390x.csv using macro friendly arguments. +package main + +import ( + "bytes" + "encoding/csv" + "flag" + "fmt" + gofmt "go/format" + asm "golang.org/x/arch/s390x/s390xasm" + "log" + "os" + "regexp" + "strconv" + "strings" +) + +var format = flag.String("fmt", "text", "output format: text, decoder, asm") +var debug = flag.Bool("debug", false, "enable debugging output") + +var inputFile string + +func usage() { + fmt.Fprintf(os.Stderr, "usage: s390xmap [-fmt=format] s390x.csv\n") + os.Exit(2) +} + +func main() { + log.SetFlags(0) + log.SetPrefix("s390xmap: ") + + flag.Usage = usage + flag.Parse() + if flag.NArg() != 1 { + usage() + } + + inputFile = flag.Arg(0) + + var printTyp func(*Prog) + switch *format { + default: + log.Fatalf("unknown output format %q", *format) + case "text": + printTyp = printText + case "decoder": + printTyp = printDecoder + case "asm": + printTyp = printASM + case "encoder": + printTyp = printEncoder + } + + p, err := readCSV(flag.Arg(0)) + if err != nil { + log.Fatal(err) + } + log.Printf("Parsed %d instruction forms.", len(p.Insts)) + printTyp(p) +} + +// readCSV reads the CSV file and returns the corresponding Prog. +// It may print details about problems to standard error using the log package. +func readCSV(file string) (*Prog, error) { + // Read input. + // Skip leading blank and # comment lines. + f, err := os.Open(file) + if err != nil { + return nil, err + } + csvReader := csv.NewReader(f) + csvReader.Comment = '#' + table, err := csvReader.ReadAll() + if err != nil { + return nil, fmt.Errorf("parsing %s: %v", file, err) + } + if len(table) == 0 { + return nil, fmt.Errorf("empty csv input") + } + if len(table[0]) < 3 { + return nil, fmt.Errorf("csv too narrow: need at least four columns") + } + + p := &Prog{} + for _, row := range table { + add(p, row[0], row[1], row[2], row[3]) + } + return p, nil +} + +type Prog struct { + Insts []Inst + OpRanges map[string]string + nextOrder int // Next position value (used for Insts[x].order) +} + +type Field struct { + Name string + BitField asm.BitField + Type asm.ArgType + flags uint16 +} + +func (f Field) String() string { + return fmt.Sprintf("%v(%s%v)", f.Type, f.Name, f.BitField) +} + +type Inst struct { + Text string + Encoding string + Op string + Mask uint64 + Value uint64 + DontCare uint64 + Len uint16 + Fields []Field +} + +func (i Inst) String() string { + return fmt.Sprintf("%s (%s) %08x/%08x %v (%s)", i.Op, i.Encoding, i.Value, i.Mask, i.Fields, i.Text) +} + +type Arg struct { + Name string + Bits int8 + Offs int8 +} + +func (a Arg) String() string { + return fmt.Sprintf("%s[%d:%d]", a.Name, a.Offs, a.Offs+a.Bits-1) +} + +func (a Arg) Maximum() int { + return 1< 0 { + args[len(args)-1].Bits += int8(off) + } + if name != "" && name != "??" { + arg := Arg{Name: name, Offs: int8(off), Bits: int8(-off)} + args.Append(arg) + } + } + return args +} + +// Compute the Mask (usually Opcode + secondary Opcode bitfields), +// the Value (the expected value under the mask), and +// reserved bits (i.e the // fields which should be set to 0) +func computeMaskValueReserved(args Args, text string) (mask, value, reserved uint64) { + for i := 0; i < len(args); i++ { + arg := args[i] + v, err := strconv.Atoi(arg.Name) + switch { + case err == nil && v >= 0: // is a numbered field + if v < 0 || v > arg.Maximum() { + fmt.Fprintf(os.Stderr, "%s: field %s value (%d) is out of range (%d-bit)\n", text, arg, v, arg.Bits) + } + mask |= arg.BitMask() + value |= uint64(v) << arg.Shift() + args.Delete(i) + i-- + case arg.Name[0] == '/': // don't care + if arg.Name != strings.Repeat("/", len(arg.Name)) { + log.Fatalf("%s: arg %v named like a don't care bit, but it's not", text, arg) + } + reserved |= arg.BitMask() + args.Delete(i) + i-- + default: + continue + } + } + // sanity checks + if mask&reserved != 0 { + log.Fatalf("%s: mask (%08x) and don't care (%08x) collide", text, mask, reserved) + } + if value&^mask != 0 { + log.Fatalf("%s: value (%08x) out of range of mask (%08x)", text, value, mask) + } + return +} + +func Imm_signed_8bit_check(op string) bool { + imm_8 := []string{"ASI", "AGSI", "ALSI", "ALGSI", "CIB", "CGIB", "CIJ", "CGIJ"} + var ret bool + ret = false + for _, str := range imm_8 { + if strings.Compare(op, str) == 0 { + ret = true + break + } + } + return ret +} + +func Imm_signed_16bit_check(op string) bool { + imm_16 := []string{"AHI", "AGHI", "ALHSIK", "ALGHSIK", "AHIK", "AGHIK", "LHI", "LGHI", "MVGHI", "CIT", "CGIT", "CGHI", "CGHSI", "CHHSI", "CHI", "CHSI", "CRJ", "CGRJ"} + var ret bool + ret = false + for _, str := range imm_16 { + if strings.Compare(op, str) == 0 { + ret = true + break + } + } + return ret +} + +func Imm_signed_32bit_check(op string) bool { + imm_32 := []string{"AFI", "AGFI", "AIH", "CIH", "CFI", "CGFI", "CRL", "STRL", "STGRL"} + var ret bool + ret = false + for _, str := range imm_32 { + if strings.Compare(op, str) == 0 { + ret = true + break + } + } + return ret +} + +func check_flags(flags string) bool { + if strings.Contains(flags, "Da") { + return true + } else if strings.Contains(flags, "Db") { + return true + } else if strings.Contains(flags, "Dt") { + return true + } else { + return false + } +} + +// Parse a row from the CSV describing the instructions, and place the +// detected instructions into p. One entry may generate multiple intruction +// entries as each extended mnemonic listed in text is treated like a unique +// instruction. +// func add(p *Prog, text, mnemonics, encoding, format string) { +func add(p *Prog, text, mnemonics, encoding, flags string) { + // Parse encoding, building size and offset of each field. + // The first field in the encoding is the smallest offset. + // And note the MSB is bit 0, not bit 31. + // Example: "31@0|RS@6|RA@11|///@16|26@21|Rc@31|" + var args Args + + args = parseFields(encoding, text) + mask, value, dontCare := computeMaskValueReserved(args, text) + + // split mnemonics into individual instructions + // example: "b target_addr (AA=0 LK=0)|ba target_addr (AA=1 LK=0)|bl target_addr (AA=0 LK=1)|bla target_addr (AA=1 LK=1)" + inst := Inst{Text: text, Encoding: mnemonics, Value: value, Mask: mask, DontCare: dontCare} + + // order inst.Args according to mnemonics order + for i, opr := range operandRe.FindAllString(mnemonics, -1) { + if i == 0 { // operation + inst.Op = opr + continue + } + field := Field{Name: opr} + typ := asm.TypeUnknown + flag := uint16(0) + switch opr { + case "R1", "R2", "R3": + s := strings.Split(mnemonics, " ") + switch opr { + case "R1": + switch s[0] { + case "CPDT", "CPXT", "CDXT", "CZXT", "CZDT": + typ = asm.TypeFPReg + flag = 0x2 + case "CUXTR", "EEXTR", "EEDTR", "EFPC", "ESXTR", "ESDTR", "LGDR", "SFPC", "SFASR": + typ = asm.TypeReg + flag = 0x1 + case "CPYA", "LAM", "LAMY", "STAM", "STAMY", "SAR", "TAR": + typ = asm.TypeACReg + flag = 0x3 + case "LCTL", "LCTLG", "STCTL", "STCTG": + typ = asm.TypeCReg + flag = 0x4 + default: + if check_flags(flags) { + if strings.Contains(text, "CONVERT TO") { + typ = asm.TypeReg + flag = 0x1 + } else { + typ = asm.TypeFPReg + flag = 0x2 + } + } else { + typ = asm.TypeReg + flag = 0x1 + } + } + case "R2": + switch s[0] { + case "IEXTR", "IEDTR", "LDGR", "RRXTR", "RRDTR": + typ = asm.TypeReg + flag = 0x1 + case "CPYA", "EAR": + typ = asm.TypeACReg + flag = 0x3 + default: + if check_flags(flags) { + if strings.Contains(text, "CONVERT FROM") { + typ = asm.TypeReg + flag = 0x1 + } else { + typ = asm.TypeFPReg + flag = 0x2 + } + } else { + typ = asm.TypeReg + flag = 0x1 + } + } + case "R3": + switch s[0] { + case "LAM", "LAMY", "STAM", "STAMY": + typ = asm.TypeACReg + flag = 0x3 + case "LCTL", "LCTLG", "STCTL", "STCTG": + typ = asm.TypeCReg + flag = 0x4 + default: + if check_flags(flags) { + typ = asm.TypeFPReg + flag = 0x2 + } else { + typ = asm.TypeReg + flag = 0x1 + } + } + } + + case "I", "I1", "I2", "I3", "I4", "I5": + flag = 0x0 + switch opr { + case "I", "I1": + typ = asm.TypeImmUnsigned + + case "I2": + if Imm_signed_8bit_check(inst.Op) { + typ = asm.TypeImmSigned8 + break + } else if Imm_signed_16bit_check(inst.Op) { // "ASI", "AGSI", "ALSI", "ALGSI" + typ = asm.TypeImmSigned16 + break + } else if Imm_signed_32bit_check(inst.Op) { // "AHI", "AGHI", "AHIK", "AGHIK", "LHI", "LGHI" + typ = asm.TypeImmSigned32 + break + } else { + typ = asm.TypeImmUnsigned + break + } + + case "I3", "I4", "I5": + typ = asm.TypeImmUnsigned + + } + + case "RI2", "RI3", "RI4": + flag = 0x80 + i := args.Find(opr) + count := uint8(args[i].Bits) + if count == 12 { + typ = asm.TypeRegImSigned12 + break + } else if count == 16 { + typ = asm.TypeRegImSigned16 + break + } else if count == 24 { + typ = asm.TypeRegImSigned24 + break + } else if count == 32 { + typ = asm.TypeRegImSigned32 + break + } + + case "M1", "M3", "M4", "M5", "M6": + flag = 0x800 + typ = asm.TypeMask + + case "B1", "B2", "B3", "B4": + typ = asm.TypeBaseReg + flag = 0x20 | 0x01 + + case "X2": + typ = asm.TypeIndexReg + flag = 0x40 | 0x01 + + case "D1", "D2", "D3", "D4": + flag = 0x10 + i := args.Find(opr) + if uint8(args[i].Bits) == 20 { + typ = asm.TypeDispSigned20 + break + } else { + typ = asm.TypeDispUnsigned + break + } + + case "L1", "L2": + typ = asm.TypeLen + flag = 0x10 + case "V1", "V2", "V3", "V4", "V5", "V6": + typ = asm.TypeVecReg + flag = 0x08 + } + + if typ == asm.TypeUnknown { + log.Fatalf("%s %s unknown type for opr %s", text, inst, opr) + } + field.Type = typ + field.flags = flag + var f1 asm.BitField + i := args.Find(opr) + if i < 0 { + log.Fatalf("%s: couldn't find %s in %s", text, opr, args) + } + f1.Offs, f1.Bits = uint8(args[i].Offs), uint8(args[i].Bits) + field.BitField = f1 + inst.Fields = append(inst.Fields, field) + } + if strings.HasPrefix(inst.Op, "V") || strings.Contains(inst.Op, "WFC") || strings.Contains(inst.Op, "WFK") { //Check Vector Instructions + Bits := asm.BitField{Offs: 36, Bits: 4} + field := Field{Name: "RXB", BitField: Bits, Type: asm.TypeImmUnsigned, flags: 0xC00} + inst.Fields = append(inst.Fields, field) + } + if *debug { + fmt.Printf("%v\n", inst) + } + p.Insts = append(p.Insts, inst) +} + +// operandRe matches each operand (including opcode) in instruction mnemonics +var operandRe = regexp.MustCompile(`([[:alpha:]][[:alnum:]_]*\.?)`) + +// printText implements the -fmt=text mode, which is not implemented (yet?). +func printText(p *Prog) { + log.Fatal("-fmt=text not implemented") +} + +// printEncoder implements the -fmt=encoder mode. which is not implemented (yet?). +func printEncoder(p *Prog) { + log.Fatal("-fmt=encoder not implemented") +} + +func printASM(p *Prog) { + fmt.Printf("#include \"hack.h\"\n") + fmt.Printf(".text\n") + for _, inst := range p.Insts { + fmt.Printf("\t%s\n", inst.Encoding) + } +} + +// argFieldName constructs a name for the argField +func argFieldName(f Field) string { + ns := []string{"ap", f.Type.String()} + b := f.BitField + ns = append(ns, fmt.Sprintf("%d_%d", b.Offs, b.Offs+b.Bits-1)) + return strings.Join(ns, "_") +} + +// printDecoder implements the -fmt=decoder mode. +// It emits the tables.go for package armasm's decoder. +func printDecoder(p *Prog) { + var buf bytes.Buffer + + fmt.Fprintf(&buf, "// Code generated by s390xmap -fmt=decoder %s DO NOT EDIT.\n", inputFile) + fmt.Fprintf(&buf, "\n") + + fmt.Fprintf(&buf, "package s390xasm\n\n") + + // Build list of opcodes, using the csv order (which corresponds to ISA docs order) + m := map[string]bool{} + fmt.Fprintf(&buf, "const (\n\t_ Op = iota\n") + for i := 0; i < len(p.Insts); i++ { + name := p.Insts[i].Op + switch name { + case "CUUTF", "CUTFU", "PPNO": + m[name] = false + p.Insts = append(p.Insts[:i], p.Insts[i+1:]...) + i-- + default: + m[name] = true + } + if ok := m[name]; !ok { + continue + } + fmt.Fprintf(&buf, "\t%s\n", name) + } + fmt.Fprint(&buf, ")\n\n\n") + + // Emit slice mapping opcode number to name string. + m = map[string]bool{} + fmt.Fprintf(&buf, "var opstr = [...]string{\n") + for _, inst := range p.Insts { + name := inst.Op + if ok := m[name]; ok { + continue + } + m[name] = true + fmt.Fprintf(&buf, "\t%s: %q,\n", inst.Op, strings.ToLower(inst.Op)) + } + fmt.Fprint(&buf, "}\n\n\n") + + // print out argFields + fmt.Fprintf(&buf, "var (\n") + m = map[string]bool{} + for _, inst := range p.Insts { + for _, f := range inst.Fields { + name := argFieldName(f) + if ok := m[name]; ok { + continue + } + m[name] = true + fmt.Fprintf(&buf, "\t%s = &argField{Type: %#v, flags: %#x, BitField: BitField", name, f.Type, f.flags) + b := f.BitField + fmt.Fprintf(&buf, "{%d, %d }", b.Offs, b.Bits) + fmt.Fprintf(&buf, "}\n") + } + } + fmt.Fprint(&buf, ")\n\n\n") + + // Emit decoding table. + fmt.Fprintf(&buf, "var instFormats = [...]instFormat{\n") + for _, inst := range p.Insts { + m, v, dc := inst.Mask, inst.Value, inst.DontCare + fmt.Fprintf(&buf, "\t{ %s, %#x, %#x, %#x,", inst.Op, m, v, dc) + fmt.Fprintf(&buf, " // %s (%s)\n\t\t[8]*argField{", inst.Text, inst.Encoding) + for _, f := range inst.Fields { + fmt.Fprintf(&buf, "%s, ", argFieldName(f)) + } + fmt.Fprintf(&buf, "}},\n") + } + fmt.Fprint(&buf, "}\n\n") + + out, err := gofmt.Source(buf.Bytes()) + if err != nil { + log.Fatalf("gofmt error: %v", err) + fmt.Printf("%s", buf.Bytes()) + } else { + fmt.Printf("%s", out) + } +} diff --git a/s390x/s390xspec/spec.go b/s390x/s390xspec/spec.go new file mode 100644 index 00000000..cc0ebade --- /dev/null +++ b/s390x/s390xspec/spec.go @@ -0,0 +1,1059 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// S390xspec reads the Principles of Operation PDF Manual +// to collect instruction encoding details and writes those details to standard output +// in CSV format. +// +// Usage: +// +// s390xspec z_Architecture_Principles_of_Operation.pdf > s390x.csv +// +// Each CSV line contains three fields: +// +// instruction +// The instruction heading, such as "BRANCH AND LINK". +// mnemonic +// The instruction mnemonics, such as "BAL R1,D2(X2,B2)". +// encoding +// The instruction encoding, a sequence of opcode and operands encoding in respective bit positions +// such as operand@bitposition each separated by | +// Ex: "45@0|R1@8|X2@12|B2@16|D2@20|" +// +// For more on the exact meaning of these fields, see the Principle of Operations IBM-Z Architecture PDF Manual. +package main + +import ( + "bufio" + "fmt" + "log" + "math" + "os" + "rsc.io/pdf" + "sort" + "strconv" + "strings" +) + +type Inst struct { + Name string + Text string + Enc string + Flags string +} + +var stdout *bufio.Writer + +func main() { + log.SetFlags(0) + log.SetPrefix("s390xspec: ") + + if len(os.Args) != 2 { + fmt.Fprintf(os.Stderr, "usage: s390xspec file.pdf\n") + os.Exit(2) + } + + f, err := pdf.Open(os.Args[1]) + if err != nil { + log.Fatal(err) + } + + // Split across multiple columns and pages! + var all = []Inst{} + + // Scan document looking for instructions. + // Must find exactly the ones in the outline. + n := f.NumPage() + for pageNum := 1; pageNum <= n; pageNum++ { + page := f.Page(pageNum) + t1 := getPageContent(page) + if len(t1) > 0 && match(t1[0], "Helvetica-Bold", 13.98, "Instructions Arranged by Name") { + for n := pageNum; n < pageNum+24; n++ { + page := f.Page(n) + table := parsePage(n, page) + all = append(all, table...) + } + break + } else { + continue + } + } + stdout = bufio.NewWriter(os.Stdout) + for _, inst := range all { + if strings.Contains(inst.Name, "\x00I") { + r := rune(0x2190) + inst.Name = strings.Replace(inst.Name, "\x00I", string(r), -1) + } else if strings.Contains(inst.Name, "I\x00") { + r := rune(0x2192) + inst.Name = strings.Replace(inst.Name, "I\x00", string(r), -1) + } + fmt.Fprintf(stdout, "%q,%q,%q,%q\n", inst.Name, inst.Text, inst.Enc, inst.Flags) + } + stdout.Flush() + +} + +// getPageContent gets the page content of a single PDF page +func getPageContent(p pdf.Page) []pdf.Text { + var text []pdf.Text + + content := p.Content() + for _, t := range content.Text { + text = append(text, t) + } + + text = findWords(text) + return text +} + +// parsePage parses single PDF page and returns the instructions content +func parsePage(num int, p pdf.Page) []Inst { + var insts []Inst + text := getPageContent(p) + + for { + var heading, mnemonic, format string + // The float numbers below are the horizontal X-coordinate values to be parsed out of the Z-ISA PDF book. + for len(text) > 0 && !(match(text[0], "Helvetica-Narrow", 8, "") && (matchXCord(text[0], 73.9) || matchXCord(text[0], 55.9))) { + text = text[1:] + } + if len(text) == 0 { + break + } + heading = text[0].S + text = text[1:] + // The float numbers below are the horizontal X-coordinate values to be parsed out of the Z-ISA PDF book. + for !(matchXCord(text[0], 212.2) || matchXCord(text[0], 230.1) || matchXCord(text[0], 246.2) || matchXCord(text[0], 264.2)) { + heading += text[0].S + if match(text[0], "Wingdings3", 0, "") { + heading += text[1].S + text = text[1:] + } + text = text[1:] + } + if strings.Compare(heading, "DIAGNOSE") == 0 { + text = text[1:] + continue + } + heading, check, m := checkHeading(heading) + if check { + mnemonic = m + } else { + mnemonic = text[0].S + text = text[1:] + } + index := strings.Index(mnemonic, " ") + if index != -1 { + format = mnemonic[index+1:] + mnemonic = mnemonic[:index] + } else { + format = text[0].S + } + text = text[1:] + if strings.Compare(format, "SS") == 0 { + format += text[0].S + } + before, _, _ := strings.Cut(format, " ") + format = before + // The float numbers below are the horizontal X-coordinate values to be parsed out of the Z-ISA PDF book. + for len(text) > 0 && !(match(text[0], "Helvetica-Narrow", 8, "") && (matchXCord(text[0], 350.82) || matchXCord(text[0], 363.84) || matchXCord(text[0], 332.82) || matchXCord(text[0], 345.84))) { + if text[0].X > 405.48 { + break + } + text = text[1:] + } + flags := text[0].S + // The float numbers below are the horizontal X-coordinate values to be parsed out of the Z-ISA PDF book. + for len(text) > 0 && !(match(text[0], "Helvetica-Narrow", 8, "") && ((matchXCord(text[0], 481.7) && (!matchXCord(text[1], 496.1))) || matchXCord(text[0], 496.1) || (matchXCord(text[0], 499.6) && (!matchXCord(text[1], 514))) || (matchXCord(text[0], 514)))) { + text = text[1:] + } + if len(text) == 0 { + break + } + opcode := text[0].S + b1, b2, _ := strings.Cut(opcode, " ") + if matchXCord(text[0], 481.7) || matchXCord(text[0], 499.6) { + opcode = b2 + } else { + opcode = b1 + } + if strings.Compare(text[0].S, b1) == 0 { + text = text[2:] + } else { + text = text[1:] + } + mnemonic1, encoding := frameMnemonic(mnemonic, format, opcode) + for match(text[0], "Helvetica-Narrow", 5.1, "") { + text = text[1:] + } + if match(text[0], "Helvetica-Oblique", 9, "") { + text = text[2:] + insts = append(insts, Inst{heading, mnemonic1, encoding, flags}) + continue + } + if strings.HasPrefix(text[0].S, "(") { + y123 := text[0].Y + for text[0].Y == y123 && !matchXCord(text[0], 5.1) { + heading += text[0].S + text = text[1:] + } + } else if !(math.Abs(text[0].Y-text[1].Y) < 0.3) { + heading += " " + text[0].S + text = text[1:] + } + insts = append(insts, Inst{heading, mnemonic1, encoding, flags}) + if match(text[0], "Helvetica-Oblique", 9, "") { + break + } + } + return insts +} + +func checkHeading(heading string) (string, bool, string) { + substr := []string{"ALSI", "ALGSI", "CHRL", "CGHRL", "CUXTR", "IEXTR", "RXSBG", "RISBLG", "VERIM", "VPSOP"} + b := false + for _, s := range substr { + r1 := strings.Index(heading, s) + if r1 != -1 { + heading = heading[:r1-1] + b = true + return heading, b, s + } + } + return heading, b, "" +} + +func frameMnemonic(mnemonic, format, opcode string) (string, string) { + + var mn, enc string + + switch format { + case "E": + mn, enc = mnemonic_E(mnemonic, opcode) + case "I": + mn, enc = mnemonic_I(mnemonic, opcode) + case "IE": + mn, enc = mnemonic_IE(mnemonic, opcode) + case "MII": + mn, enc = mnemonic_MII(mnemonic, opcode) + case "RI-a", "RI-b", "RI-c": + mn, enc = mnemonic_RI(mnemonic, format, opcode) + case "RIE-a", "RIE-b", "RIE-c", "RIE-d", "RIE-e", "RIE-f", "RIE-g": + mn, enc = mnemonic_RIE(mnemonic, format, opcode) + case "RIL-a", "RIL-b", "RIL-c": + mn, enc = mnemonic_RIL(mnemonic, format, opcode) + case "RIS": + mn, enc = mnemonic_RIS(mnemonic, opcode) + case "RR": + mn, enc = mnemonic_RR(mnemonic, opcode) + case "RRD": + mn, enc = mnemonic_RRD(mnemonic, opcode) + case "RRE": + mn, enc = mnemonic_RRE(mnemonic, opcode) + case "RRF-a", "RRF-b", "RRF-c", "RRF-d", "RRF-e": + mn, enc = mnemonic_RRF(mnemonic, format, opcode) + case "RRS": + mn, enc = mnemonic_RRS(mnemonic, opcode) + case "RS-a", "RS-b": + mn, enc = mnemonic_RS(mnemonic, format, opcode) + case "RSI": + mn, enc = mnemonic_RSI(mnemonic, opcode) + case "RSL-a", "RSL-b": + mn, enc = mnemonic_RSL(mnemonic, format, opcode) + case "RSY-a", "RSY-b": + mn, enc = mnemonic_RSY(mnemonic, format, opcode) + case "RX-a", "RX-b": + mn, enc = mnemonic_RX(mnemonic, format, opcode) + case "RXE": + mn, enc = mnemonic_RXE(mnemonic, opcode) + case "RXF": + mn, enc = mnemonic_RXF(mnemonic, opcode) + case "RXY-a", "RXY-b": + mn, enc = mnemonic_RXY(mnemonic, format, opcode) + case "S": + mn, enc = mnemonic_S(mnemonic, opcode) + case "SI": + mn, enc = mnemonic_SI(mnemonic, opcode) + case "SIL": + mn, enc = mnemonic_SIL(mnemonic, opcode) + case "SIY": + mn, enc = mnemonic_SIY(mnemonic, opcode) + case "SMI": + mn, enc = mnemonic_SMI(mnemonic, opcode) + case "SS-a", "SS-b", "SS-c", "SS-d", "SS-e", "SS-f": + mn, enc = mnemonic_SS(mnemonic, format, opcode) + case "SSE": + mn, enc = mnemonic_SSE(mnemonic, opcode) + case "SSF": + mn, enc = mnemonic_SSF(mnemonic, opcode) + case "VRI-a", "VRI-b", "VRI-c", "VRI-d", "VRI-e", "VRI-f", "VRI-g", "VRI-h", "VRI-i": + mn, enc = mnemonic_VRI(mnemonic, format, opcode) + case "VRR-a", "VRR-b", "VRR-c", "VRR-d", "VRR-e", "VRR-f", "VRR-g", "VRR-h", "VRR-i", "VRR-j", "VRR-k": + mn, enc = mnemonic_VRR(mnemonic, format, opcode) + case "VRS-a", "VRS-b", "VRS-c", "VRS-d": + mn, enc = mnemonic_VRS(mnemonic, format, opcode) + case "VRV": + mn, enc = mnemonic_VRV(mnemonic, opcode) + case "VRX": + mn, enc = mnemonic_VRX(mnemonic, opcode) + case "VSI": + mn, enc = mnemonic_VSI(mnemonic, opcode) + default: + mn = mnemonic + } + return mn, enc +} + +func mnemonic_E(mnemonic, opcode string) (string, string) { + var enc string + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + enc = str + "@0|??@16" + return mnemonic, enc +} + +func mnemonic_I(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " I" + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + enc = str + "@0|I@8|??@16" + return mnemonic, enc +} + +func mnemonic_IE(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " I1,I2" + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + enc = str + "@0|//@16|I1@24|I2@28|??@32" + return mnemonic, enc +} + +func mnemonic_MII(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " M1,RI2,RI3" + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + enc = str + "@0|M1@8|RI2@12|RI3@24|??@48" + return mnemonic, enc +} + +func mnemonic_RI(mnemonic, format, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:3], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch format { + case "RI-a": + mnemonic += " R1,I2" + enc = str1 + "@0|R1@8|" + str2 + "@12|I2@16|??@32" + case "RI-b": + mnemonic += " R1,RI2" + enc = str1 + "@0|R1@8|" + str2 + "@12|RI2@16|??@32" + case "RI-c": + mnemonic += " M1,RI2" + enc = str1 + "@0|M1@8|" + str2 + "@12|RI2@16|??@32" + } + return mnemonic, enc +} + +func mnemonic_RIE(mnemonic, format, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch format { + case "RIE-a": + mnemonic += " R1,I2,M3" + enc = str1 + "@0|R1@8|//@12|I2@16|M3@32|//@36|" + str2 + "@40|??@48" + case "RIE-b": + mnemonic += " R1,R2,M3,RI4" + enc = str1 + "@0|R1@8|R2@12|RI4@16|M3@32|//@36|" + str2 + "@40|??@48" + case "RIE-c": + mnemonic += " R1,I2,M3,RI4" + enc = str1 + "@0|R1@8|M3@12|RI4@16|I2@32|" + str2 + "@40|??@48" + case "RIE-d": + mnemonic += " R1,R3,I2" + enc = str1 + "@0|R1@8|R3@12|I2@16|//@32|" + str2 + "@40|??@48" + case "RIE-e": + mnemonic += " R1,R3,RI2" + enc = str1 + "@0|R1@8|R3@12|RI2@16|//@32|" + str2 + "@40|??@48" + case "RIE-f": + mnemonic += " R1,R2,I3,I4,I5" + enc = str1 + "@0|R1@8|R2@12|I3@16|I4@24|I5@32|" + str2 + "@40|??@48" + case "RIE-g": + mnemonic += " R1,I2,M3" + enc = str1 + "@0|R1@8|M3@12|I2@16|//@32|" + str2 + "@40|??@48" + } + return mnemonic, enc +} + +func mnemonic_RIL(mnemonic, format, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch format { + case "RIL-a": + mnemonic += " R1,I2" + enc = str1 + "@0|R1@8|" + str2 + "@12|I2@16|??@48" + case "RIL-b": + mnemonic += " R1,RI2" + enc = str1 + "@0|R1@8|" + str2 + "@12|RI2@16|??@48" + case "RIL-c": + mnemonic += " M1,RI2" + enc = str1 + "@0|M1@8|" + str2 + "@12|RI2@16|??@48" + } + return mnemonic, enc +} + +func mnemonic_RIS(mnemonic, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + mnemonic += " R1,I2,M3,D4(B4)" + enc = str1 + "@0|R1@8|M3@12|B4@16|D4@20|I2@32|" + str2 + "@40|??@48" + return mnemonic, enc +} + +func mnemonic_RR(mnemonic, opcode string) (string, string) { + var enc string + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + switch mnemonic { + case "BCR": + mnemonic += " M1,R2" + enc = str + "@0|M1@8|R2@12|??@16" + case "SPM": + mnemonic += " R1" + enc = str + "@0|R1@8|//@12|??@16" + default: + mnemonic += " R1,R2" + enc = str + "@0|R1@8|R2@12|??@16" + } + return mnemonic, enc +} + +func mnemonic_RRD(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " R1,R3,R2" + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + enc = str + "@0|R1@16|//@20|R3@24|R2@28|??@32" + return mnemonic, enc +} + +func mnemonic_RRE(mnemonic, opcode string) (string, string) { + var enc string + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + switch mnemonic { + case "LZER", "LZDR", "LZXR", "EFPC", "EPAR", "EPAIR", "ESEA", "ESAIR", "ESAR", "ETND", "IAC", "IPM", "MSTA", "PTF", "SFASR", "SFPC", "SSAR", "SSAIR": + mnemonic += " R1" + enc = str + "@0|//@16|R1@24|//@28|??@32" + case "NNPA", "PALB", "PCC", "PCKMO": + enc = str + "@0|//@16|??@32" + default: + mnemonic += " R1,R2" + enc = str + "@0|//@16|R1@24|R2@28|??@32" + } + return mnemonic, enc +} + +func mnemonic_RRF(mnemonic, format, opcode string) (string, string) { + var enc string + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + switch format { + case "RRF-a": + switch mnemonic { + case "SELR", "SELGR", "SELFHR", "IPTE", "AXTRA", "ADTRA", + "DDTRA", "DXTRA", "MDTRA", "MXTRA", "SDTRA", "SXTRA": + mnemonic += " R1,R2,R3,M4" + enc = str + "@0|R3@16|M4@20|R1@24|R2@28|??@32" + default: + mnemonic += " R1,R2,R3" + enc = str + "@0|R3@16|//@20|R1@24|R2@28|??@32" + } + case "RRF-b": + switch mnemonic { + case "CRDTE", "IDTE", "LPTEA", "RDP", "DIEBR", "DIDBR", + "QADTR", "QAXTR", "RRDTR", "RRXTR": + mnemonic += " R1,R3,R2,M4" + enc = str + "@0|R3@16|M4@20|R1@24|R2@28|??@32" + default: + mnemonic += " R1,R3,R2" + enc = str + "@0|R3@16|//@20|R1@24|R2@28|??@32" + } + case "RRF-c": + mnemonic += " R1,R2,M3" + enc = str + "@0|M3@16|//@20|R1@24|R2@28|??@32" + case "RRF-d": + mnemonic += " R1,R2,M4" + enc = str + "@0|//@16|M4@20|R1@24|R2@28|??@32" + case "RRF-e": + switch mnemonic { + case "CXFBRA", "CXFTR", "CDFBRA", "CDFTR", "CEFBRA", "CXGBRA", "CXGTRA", "CDGBRA", "CDGTRA", "CEGBRA", "CXLFBR", "CXLFTR", "CDLFBR", "CDLFTR", "CELFBR", + "CXLGBR", "CXLGTR", "CDLGBR", "CDLGTR", "CELGBR", "CFXBRA", "CGXBRA", "CFXTR", "CGXTRA", "CFDBRA", "CGDBRA", "CFDTR", "CGDTRA", "CFEBRA", "CGEBRA", + "CLFEBR", "CLFDBR", "CLFXBR", "CLGEBR", "CLGDBR", "CLGXBR", "CLFXTR", "CLFDTR", "CLGXTR", "CLGDTR", "FIEBRA", "FIDBRA", "FIXBRA", "FIDTR", "FIXTR", + "LDXBRA", "LEDBRA", "LEXBRA", "LEDTR", "LDXTR": + mnemonic += " R1,M3,R2,M4" + enc = str + "@0|M3@16|M4@20|R1@24|R2@28|??@32" + default: + mnemonic += " R1,M3,R2" + enc = str + "@0|M3@16|//@20|R1@24|R2@28|??@32" + } + } + return mnemonic, enc +} + +func mnemonic_RRS(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " R1,R2,M3,D4(B4)" + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + enc = str1 + "@0|R1@8|R2@12|B4@16|D4@20|M3@32|//@36|" + str2 + "@40|??@48" + return mnemonic, enc +} + +func mnemonic_RS(mnemonic, format, opcode string) (string, string) { + var enc string + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + switch format { + case "RS-a": + switch mnemonic { + case "SLDA", "SLDL", "SLA", "SLL", "SRA", "SRDA", "SRDL", "SRL": + mnemonic += " R1,D2(B2)" + enc = str + "@0|R1@8|//@12|B2@16|D2@20|??@32" + default: + mnemonic += " R1,R3,D2(B2)" + enc = str + "@0|R1@8|R3@12|B2@16|D2@20|??@32" + } + case "RS-b": + mnemonic += " R1,M3,D2(B2)" + enc = str + "@0|R1@8|M3@12|B2@16|D2@20|??@32" + } + return mnemonic, enc +} + +func mnemonic_RSI(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " R1,R3,RI2" + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + enc = str + "@0|R1@8|R3@12|RI2@16|??@32" + return mnemonic, enc +} + +func mnemonic_RSL(mnemonic, format, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch format { + case "RSL-a": + mnemonic += " D1(L1,B1)" + enc = str1 + "@0|L1@8|//@12|B1@16|D1@20|//@32|" + str2 + "@40|??@48" + case "RSL-b": + mnemonic += " R1,D2(L2,B2),M3" + enc = str1 + "@0|L2@8|B2@16|D2@20|R1@32|M3@36|" + str2 + "@40|??@48" + } + return mnemonic, enc +} + +func mnemonic_RSY(mnemonic, format, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch format { + case "RSY-a": + mnemonic += " R1,R3,D2(B2)" + enc = str1 + "@0|R1@8|R3@12|B2@16|D2@20|" + str2 + "@40|??@48" + case "RSY-b": + switch mnemonic { + case "LOC", "LOCFH", "LOCG", "STOCFH", "STOC", "STOCG": + mnemonic += " R1,D2(B2),M3" + default: + mnemonic += " R1,M3,D2(B2)" + } + enc = str1 + "@0|R1@8|M3@12|B2@16|D2@20|" + str2 + "@40|??@48" + } + return mnemonic, enc +} + +func mnemonic_RX(mnemonic, format, opcode string) (string, string) { + var enc string + val, _ := strconv.ParseInt(opcode, 16, 16) + str := strconv.Itoa(int(val)) + switch format { + case "RX-a": + mnemonic += " R1,D2(X2,B2)" + enc = str + "@0|R1@8|X2@12|B2@16|D2@20|??@32" + case "RX-b": + mnemonic += " M1,D2(X2,B2)" + enc = str + "@0|M1@8|X2@12|B2@16|D2@20|??@32" + } + return mnemonic, enc +} + +func mnemonic_RXE(mnemonic, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch mnemonic { + case "LCBB": + mnemonic += " R1,D2(X2,B2),M3" + enc = str1 + "@0|R1@8|X2@12|B2@16|D2@20|M3@32|//@36|" + str2 + "@40|??@48" + default: + mnemonic += " R1,D2(X2,B2)" + enc = str1 + "@0|R1@8|X2@12|B2@16|D2@20|//@32|" + str2 + "@40|??@48" + } + return mnemonic, enc +} + +func mnemonic_RXF(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " R1,R3,D2(X2,B2)" + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + enc = str1 + "@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|" + str2 + "@40|??@48" + return mnemonic, enc +} + +func mnemonic_RXY(mnemonic, format, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch format { + case "RXY-a": + mnemonic += " R1,D2(X2,B2)" + enc = str1 + "@0|R1@8|X2@12|B2@16|D2@20|" + str2 + "@40|??@48" + case "RXY-b": + mnemonic += " M1,D2(X2,B2)" + enc = str1 + "@0|M1@8|X2@12|B2@16|D2@20|" + str2 + "@40|??@48" + } + return mnemonic, enc +} + +func mnemonic_S(mnemonic, opcode string) (string, string) { + var enc string + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + switch mnemonic { + case "PTLB", "TEND", "XSCH", "CSCH", "HSCH", "IPK", "RCHP", "RSCH", "SAL", "SCHM": + enc = str + "@0|//@16|??@32" + default: + mnemonic += " D2(B2)" + enc = str + "@0|B2@16|D2@20|??@32" + } + return mnemonic, enc +} + +func mnemonic_SI(mnemonic, opcode string) (string, string) { + var enc string + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + switch mnemonic { + case "TS", "SSM", "LPSW": + mnemonic += " D1(B1)" + default: + mnemonic += " D1(B1),I2" + } + enc = str + "@0|I2@8|B1@16|D1@20|??@32" + return mnemonic, enc +} + +func mnemonic_SIL(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " D1(B1),I2" + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + enc = str + "@0|B1@16|D1@20|I2@32|??@48" + return mnemonic, enc +} + +func mnemonic_SIY(mnemonic, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch mnemonic { + case "LPSWEY": + mnemonic += " D1(B1)" + enc = str1 + "@0|//@8|B1@16|D1@20|" + str2 + "@40|??@48" + default: + mnemonic += " D1(B1),I2" + enc = str1 + "@0|I2@8|B1@16|D1@20|" + str2 + "@40|??@48" + } + return mnemonic, enc +} + +func mnemonic_SMI(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " M1,RI2,D3(B3)" + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + enc = str + "@0|M1@8|//@12|B3@16|D3@20|RI2@32|??@48" + return mnemonic, enc +} + +func mnemonic_SS(mnemonic, format, opcode string) (string, string) { + var enc string + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + switch format { + case "SS-a": + mnemonic += " D1(L1,B1),D2(B2)" + enc = str + "@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48" + case "SS-b": + mnemonic += " D1(L1,B1),D2(L2,B2)" + enc = str + "@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48" + case "SS-c": + mnemonic += " D1(L1,B1),D2(B2),I3" + enc = str + "@0|L1@8|I3@12|B1@16|D1@20|B2@32|D2@36|??@48" + case "SS-d": + mnemonic += " D1(R1,B1),D2(B2),R3" + enc = str + "@0|R1@8|R3@12|B1@16|D1@20|B2@32|D2@36|??@48" + case "SS-e": + switch mnemonic { + case "LMD": + mnemonic += " R1,R3,D2(B2),D4(B4)" + default: + mnemonic += " R1,D2(B2),R3,D4(B4)" + } + enc = str + "@0|R1@8|R3@12|B2@16|D2@20|B4@32|D4@36|??@48" + case "SS-f": + mnemonic += " D1(B1),D2(L2,B2)" + enc = str + "@0|L2@8|B1@16|D1@20|B2@32|D2@36|??@48" + } + return mnemonic, enc + +} + +func mnemonic_SSE(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " D1(B1),D2(B2)" + val, _ := strconv.ParseUint(opcode, 16, 16) + str := strconv.Itoa(int(val)) + enc = str + "@0|B1@16|D1@20|B2@32|D2@36|??@48" + return mnemonic, enc +} + +func mnemonic_SSF(mnemonic, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch mnemonic { + case "LPD", "LPDG": + mnemonic += " R3,D1(B1),D2(B2)" + default: + mnemonic += " D1(B1),D2(B2),R3" + } + enc = str1 + "@0|R3@8|" + str2 + "@12|B1@16|D1@20|B2@32|D2@36|??@48" + return mnemonic, enc +} + +func mnemonic_VRI(mnemonic, format, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch format { + case "VRI-a": + if strings.Contains(mnemonic, "VGBM") { // Check for M3 field + mnemonic += " V1,I2" + enc = str1 + "@0|V1@8|//@12|I2@16|//@32|RXB@36|" + str2 + "@40|??@48" + } else { + mnemonic += " V1,I2,M3" + enc = str1 + "@0|V1@8|//@12|I2@16|M3@32|RXB@36|" + str2 + "@40|??@48" + } + case "VRI-b": + mnemonic += " V1,I2,I3,M4" + enc = str1 + "@0|V1@8|//@12|I2@16|I3@24|M4@32|RXB@36|" + str2 + "@40|??@48" + case "VRI-c": + mnemonic += " V1,V3,I2,M4" + enc = str1 + "@0|V1@8|V3@12|I2@16|M4@32|RXB@36|" + str2 + "@40|??@48" + case "VRI-d": + if strings.Contains(mnemonic, "VERIM") { // Check for M5 field + mnemonic += " V1,V2,V3,I4,M5" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|I4@24|M5@32|RXB@36|" + str2 + "@40|??@48" + } else { + mnemonic += " V1,V2,V3,I4" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|I4@24|//@32|RXB@36|" + str2 + "@40|??@48" + } + case "VRI-e": + mnemonic += " V1,V2,I3,M4,M5" + enc = str1 + "@0|V1@8|V2@12|I3@16|M5@28|M4@32|RXB@36|" + str2 + "@40|??@48" + case "VRI-f": + mnemonic += " V1,V2,V3,I4,M5" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|" + str2 + "@40|??@48" + case "VRI-g": + mnemonic += " V1,V2,I3,I4,M5" + enc = str1 + "@0|V1@8|V2@12|I4@16|M5@24|I3@28|RXB@36|" + str2 + "@40|??@48" + case "VRI-h": + mnemonic += " V1,I2,I3" + enc = str1 + "@0|V1@8|//@12|I2@16|I3@32|RXB@36|" + str2 + "@40|??@48" + case "VRI-i": + mnemonic += " V1,R2,I3,M4" + enc = str1 + "@0|V1@8|R2@12|//@16|M4@24|I3@28|RXB@36|" + str2 + "@40|??@48" + } + return mnemonic, enc +} + +func mnemonic_VRR(mnemonic, format, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch format { + case "VRR-a": + switch mnemonic { + case "VLR", "VTM": // V1,V2 + mnemonic += " V1,V2" + enc = str1 + "@0|V1@8|V2@12|//@16|RXB@36|" + str2 + "@40|??@48" + + case "VSEG", "VUPH", "VUPLH", "VUPL", "VUPLL", "VCLZ", "VCTZ", "VEC", "VECL", "VLC", "VLP", "VPOPCT": // V1,V2,M3 + mnemonic += " V1,V2,M3" + enc = str1 + "@0|V1@8|V2@12|//@16|M3@32|RXB@36|" + str2 + "@40|??@48" + + case "VISTR": // V1,V2,M3,M5 + mnemonic += " V1,V2,M3,M5" + enc = str1 + "@0|V1@8|V2@12|//@16|M5@24|//@28|M3@32|RXB@36|" + str2 + "@40|??@48" + + case "WFC", "WFK", "VFLL", "VFSQ", "VCLFNH", "VCLFNL", "VCFN", "VCNF": // V1,V2,M3,M4 + mnemonic += " V1,V2,M3,M4" + enc = str1 + "@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|" + str2 + "@40|??@48" + + case "VCFPS", "VCDG", "VCDLG", "VCGD", "VCFPL", "VCSFP", "VCLFP", "VCLGD", "VFI", "VFLR", "VFPSO": // V1,V2,M3,M4,M5 + mnemonic += " V1,V2,M3,M4,M5" + enc = str1 + "@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|" + str2 + "@40|??@48" + } + case "VRR-b": + switch mnemonic { + case "VSCSHP": + mnemonic += " V1,V2,V3" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|RXB@36|" + str2 + "@40|??@48" + default: + mnemonic += " V1,V2,V3,M4,M5" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|" + str2 + "@40|??@48" + } + case "VRR-c": + switch mnemonic { + case "VFA", "VFD", "VFM", "VFS", "VCRNF": // V1,V2,V3,M4,M5 + mnemonic += " V1,V2,V3,M4,M5" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|" + str2 + "@40|??@48" + + case "VFCE", "VFCH", "VFCHE", "VFMAX", "VFMIN": // V1,V2,V3,M4,M5,M6 + mnemonic += " V1,V2,V3,M4,M5,M6" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|" + str2 + "@40|??@48" + + case "VBPERM", "VN", "VNC", "VCKSM", "VX", "VNN", "VNO", "VNX", + "VO", "VOC", "VSL", "VSLB", "VSRA", "VSRAB", "VSRL", "VSRLB": // V1,V2,V3 + mnemonic += " V1,V2,V3" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|RXB@36|" + str2 + "@40|??@48" + default: // V1,V2,V3,M4 + mnemonic += " V1,V2,V3,M4" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|" + str2 + "@40|??@48" + } + case "VRR-d": + switch mnemonic { + case "VMSL", "VSTRC", "VSTRS": // V1,V2,V3,V4,M5,M6 + mnemonic += " V1,V2,V3,V4,M5,M6" + enc = str1 + "@0|V1@8|V2@12|V3@16|M5@20|M6@24|//@28|V4@32|RXB@36|" + str2 + "@40|??@48" + default: + mnemonic += " V1,V2,V3,V4,M5" + enc = str1 + "@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|" + str2 + "@40|??@48" + } + case "VRR-e": + switch mnemonic { + case "VPERM", "VSEL": + mnemonic += " V1,V2,V3,V4" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|V4@32|RXB@36|" + str2 + "@40|??@48" + default: + mnemonic += " V1,V2,V3,V4,M5,M6" + enc = str1 + "@0|V1@8|V2@12|V3@16|M6@20|//@24|M5@28|V4@32|RXB@36|" + str2 + "@40|??@48" + } + case "VRR-f": + mnemonic += " V1,R2,R3" + enc = str1 + "@0|V1@8|R2@12|R3@16|//@20|RXB@36|" + str2 + "@40|??@48" + case "VRR-g": + mnemonic += " V1" + enc = str1 + "@0|//@8|V1@12|//@16|RXB@36|" + str2 + "@40|??@48" + case "VRR-h": + mnemonic += " V1,V2,M3" + enc = str1 + "@0|//@8|V1@12|V2@16|//@20|M3@24|//@28|RXB@36|" + str2 + "@40|??@48" + case "VRR-i": + mnemonic += " R1,V2,M3,M4" + enc = str1 + "@0|R1@8|V2@12|//@16|M3@24|M4@28|//@32|RXB@36|" + str2 + "@40|??@48" + case "VRR-j": + mnemonic += " V1,V2,V3,M4" + enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M4@24|//@28|RXB@36|" + str2 + "@40|??@48" + case "VRR-k": + mnemonic += " V1,V2,M3" + enc = str1 + "@0|V1@8|V2@12|//@16|M3@24|//@28|RXB@36|" + str2 + "@40|??@48" + } + return mnemonic, enc +} + +func mnemonic_VRS(mnemonic, format, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + switch format { + case "VRS-a": + mnemonic += " V1,V3,D2(B2),M4" + enc = str1 + "@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|" + str2 + "@40|??@48" + case "VRS-b": + if strings.Contains(mnemonic, "VLVG") { + mnemonic += " V1,R3,D2(B2),M4" + enc = str1 + "@0|V1@8|R3@12|B2@16|D2@20|M4@32|RXB@36|" + str2 + "@40|??@48" + } else { + mnemonic += " V1,R3,D2(B2)" + enc = str1 + "@0|V1@8|R3@12|B2@16|D2@20|//@32|RXB@36|" + str2 + "@40|??@48" + } + case "VRS-c": + mnemonic += " R1,V3,D2(B2),M4" + enc = str1 + "@0|R1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|" + str2 + "@40|??@48" + case "VRS-d": + mnemonic += " V1,R3,D2(B2)" + enc = str1 + "@0|//@8|R3@12|B2@16|D2@20|V1@32|RXB@36|" + str2 + "@40|??@48" + } + return mnemonic, enc +} + +func mnemonic_VRV(mnemonic, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + mnemonic += " V1,D2(V2,B2),M3" + enc = str1 + "@0|V1@8|V2@12|B2@16|D2@20|M3@32|RXB@36|" + str2 + "@40|??@48" + return mnemonic, enc +} + +func mnemonic_VRX(mnemonic, opcode string) (string, string) { + var enc string + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + mnemonic += " V1,D2(X2,B2),M3" + enc = str1 + "@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|" + str2 + "@40|??@48" + return mnemonic, enc +} + +func mnemonic_VSI(mnemonic, opcode string) (string, string) { + var enc string + mnemonic += " V1,D2(B2),I3" + val1, _ := strconv.ParseUint(opcode[:2], 16, 16) + str1 := strconv.Itoa(int(val1)) + val2, _ := strconv.ParseUint(opcode[2:], 16, 16) + str2 := strconv.Itoa(int(val2)) + enc = str1 + "@0|I3@8|B2@16|D2@20|V1@32|RXB@36|" + str2 + "@40|??@48" + return mnemonic, enc +} + +func matchXCord(t pdf.Text, Xcord float64) bool { + return math.Abs(t.X-Xcord) < 0.9 +} + +func match(t pdf.Text, font string, size float64, substr string) bool { + return t.Font == font && (size == 0 || math.Abs(t.FontSize-size) < 0.2) && strings.Contains(t.S, substr) +} + +func findWords(chars []pdf.Text) (words []pdf.Text) { + // Sort by Y coordinate and normalize. + const nudge = 1.5 + sort.Sort(pdf.TextVertical(chars)) + old := -100000.0 + for i, c := range chars { + if c.Y != old && math.Abs(old-c.Y) < nudge { + chars[i].Y = old + } else { + old = c.Y + } + } + + // Sort by Y coordinate, breaking ties with X. + // This will bring letters in a single word together. + sort.Sort(pdf.TextVertical(chars)) + + // Loop over chars. + for i := 0; i < len(chars); { + // Find all chars on line. + j := i + 1 + for j < len(chars) && chars[j].Y == chars[i].Y { + j++ + } + var end float64 + // Split line into words (really, phrases). + for k := i; k < j; { + ck := &chars[k] + s := ck.S + end = ck.X + ck.W + charSpace := ck.FontSize / 6 + wordSpace := ck.FontSize * 2 / 3 + l := k + 1 + for l < j { + // Grow word. + cl := &chars[l] + if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace { + s += cl.S + end = cl.X + cl.W + l++ + continue + } + // Add space to phrase before next word. + if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace { + s += " " + cl.S + end = cl.X + cl.W + l++ + continue + } + break + } + f := ck.Font + f = strings.TrimSuffix(f, ",Italic") + f = strings.TrimSuffix(f, "-Italic") + words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s}) + k = l + } + i = j + } + return words +} + +func sameFont(f1, f2 string) bool { + f1 = strings.TrimSuffix(f1, ",Italic") + f1 = strings.TrimSuffix(f1, "-Italic") + f2 = strings.TrimSuffix(f1, ",Italic") + f2 = strings.TrimSuffix(f1, "-Italic") + return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman" +} diff --git a/s390x/s390xutil/hack.h b/s390x/s390xutil/hack.h new file mode 100644 index 00000000..22ef049f --- /dev/null +++ b/s390x/s390xutil/hack.h @@ -0,0 +1,56 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// This file requires gcc and binutils with -march=z16 support. +// s390xutil runs a series of commands like: +// go run map.go -fmt=asm ../s390x.csv > asm.S +// /usr/bin/gcc -c asm.S -march=z16 +// /usr/bin/objdump -d asm.o +// to create the file decode_generated.txt used to verify the disassembler. +// +// Note, the Go disassembler is not expected to support every extended +// mnemonic, but it should support those which frequently show up in object +// files compiled by the Go toolchain. + + +#define R1 8 +#define R2 0 +#define R3 0 + +#define X2 2 + +#define L1 4 +#define L2 4 + +#define B1 2 +#define B2 1 +#define B3 6 +#define B4 8 + +#define D1 6 +#define D2 11 +#define D3 182 +#define D4 205 + +#define V1 18 +#define V2 3 +#define V3 5 +#define V4 8 + +#define I 124 +#define I1 12 +#define I2 8 +#define I3 9 +#define I4 105 +#define I5 18 + +#define RI2 0 +#define RI3 294 +#define RI4 -168 + +#define M1 7 +#define M3 3 +#define M4 1 +#define M5 9 +#define M6 11 diff --git a/s390x/s390xutil/util.go b/s390x/s390xutil/util.go new file mode 100644 index 00000000..003ce5df --- /dev/null +++ b/s390x/s390xutil/util.go @@ -0,0 +1,90 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +// Generate interesting test cases from s390x objdump via +// go run util.go +// +// This requires "/usr/bin/gcc" and "objdump" be in the PATH this command is run. +// +// These tools can be acquired from the IBM advance toolchain for amd64 hosts too. + +package main + +import ( + "bufio" + "fmt" + "io" + "os" + "os/exec" + "regexp" + "strconv" + "strings" +) + +// Emit a test file using the generator called name.txt. This requires +// a GCC toolchain which supports -march=z16. +func genOutput(name, tcPfx string, generator func(io.Writer)) { + // Generate object code from gcc + cmd := exec.Command(tcPfx+"gcc", "-c", "-march=z16", "-x", "assembler-with-cpp", "-o", name+".o", "-") + input, _ := cmd.StdinPipe() + cmd.Stderr = os.Stderr + go func() { + defer input.Close() + generator(input.(io.Writer)) + }() + if cmd.Run() != nil { + fmt.Printf("Failed running gcc for: %s\n", name) + return + } + defer os.Remove(name + ".o") + cmd = exec.Command(tcPfx+"objdump", "-d", name+".o") + + // Run objdump and parse output into test format + output, _ := cmd.StdoutPipe() + defer output.Close() + scanner := bufio.NewScanner(output) + spacere := regexp.MustCompile("[[:space:]]+") + outf, _ := os.Create(name + ".txt") + defer outf.Close() + if cmd.Start() != nil { + fmt.Printf("Failed running objdump for: %s\n", name) + return + } + + for scanner.Scan() { + ln := spacere.Split(scanner.Text(), -1) + var cnt int16 + if len(ln) >= 5 { + v, _ := strconv.ParseInt(ln[2], 16, 16) + if (v >> 6 & 0x3) == 0 { + cnt = 2 + } else if v>>6&0x3 == 1 || v>>6&0x3 == 2 { + cnt = 4 + } else { + cnt = 6 + } + opc := strings.Join(ln[2:cnt+2], "") + dec := strings.Join(ln[cnt+2:], " ") + fmt.Fprintf(outf, "%12s|\tgnu\t%-18s\n", opc, dec) + } + } + cmd.Wait() +} + +// Generate representative instructions for all[1] instructions in s390x.csv. +// +// [1] See hack.h for a few minor, exceptional workarounds. +func emitGenerated(out io.Writer) { + cmd := exec.Command("go", "run", "../s390xmap/map.go", "-fmt=asm", "../s390x.csv") + cmdout, _ := cmd.Output() + out.Write(cmdout) +} + +// Produce generated test outputs. This should be run every so often with +// new versions of objdump to ensure we stay up to date. +func main() { + genOutput("decode_generated", "/usr/bin/", emitGenerated) +} From 655f7a06f2d1c86bad93313e6431199c99c2daf2 Mon Sep 17 00:00:00 2001 From: Lin Runze Date: Sun, 4 Aug 2024 18:27:19 +0800 Subject: [PATCH 026/200] riscv64: implement riscv64spec for instruction table generation Support generate all riscv extensions in $GOROOT/src/src/cmd/internal/obj/riscv/inst.go, also including "C" Standard Extension for Compressed Instructions, used to support instruction decoding on riscv64 target. riscv64spec relies on the riscv-opcodes project: https://github.com/riscv/riscv-opcodes Change-Id: Ib0589a87d1ba31fe431162d1f2d44a42bdb2ae06 Reviewed-on: https://go-review.googlesource.com/c/arch/+/602875 Reviewed-by: Mark Ryan LUCI-TryBot-Result: Go LUCI Reviewed-by: Carlos Amedee Reviewed-by: Cherry Mui Reviewed-by: Joel Sing Reviewed-by: Meng Zhuo --- riscv64/riscv64spec/spec.go | 476 ++++++++++++++++++++++++++++++++++++ 1 file changed, 476 insertions(+) create mode 100644 riscv64/riscv64spec/spec.go diff --git a/riscv64/riscv64spec/spec.go b/riscv64/riscv64spec/spec.go new file mode 100644 index 00000000..53c0f1de --- /dev/null +++ b/riscv64/riscv64spec/spec.go @@ -0,0 +1,476 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// riscv64spec reads the files contained in riscv-opcodes repo +// to collect instruction encoding details. +// repo url: https://github.com/riscv/riscv-opcodes +// usage: go run spec.go + +package main + +import ( + "bufio" + "fmt" + "log" + "os" + "path/filepath" + "sort" + "strconv" + "strings" +) + +// RV64GC_zba_zbb_zbs Extensions Listing +// Reference: $GOROOT/src/src/cmd/internal/obj/riscv/inst.go +var extensions = []string{ + "rv_a", + "rv_c", + "rv_c_d", + "rv_d", + "rv_f", + "rv_i", + "rv_m", + "rv_q", + "rv_zba", + "rv_zbb", + "rv_zbs", + "rv_zfh", + "rv_zicsr", + "rv_zifencei", + "rv64_a", + "rv64_c", + "rv64_d", + "rv64_f", + "rv64_i", + "rv64_m", + "rv64_q", + "rv64_zba", + "rv64_zbb", + "rv64_zbs", + "rv64_zfh", +} + +const ( + prologueSec = "// Generated by riscv64spec riscv-opcodes\n// DO NOT EDIT\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage riscv64asm\n\n" + opSec = "const (\n\t_ Op = iota\n" + opstrSec = "var opstr = [...]string{\n" + instFormatsSec = "var instFormats = [...]instFormat{\n" +) + +var ( + ops []string + opstrs = make(map[string]string) + instFormatComments = make(map[string]string) + instFormats = make(map[string]string) +) + +func main() { + log.SetFlags(0) + log.SetPrefix("riscv64spec: ") + + var repoPath string + if len(os.Args) < 1 { + log.Fatal("usage: go run spec.go ") + } + repoPath = os.Args[1] + + fileTables, err := os.Create("tables.go") + if err != nil { + log.Fatal(err) + } + + buf := bufio.NewWriter(fileTables) + _, err = buf.Write([]byte(prologueSec)) + if err != nil { + log.Fatal(err) + } + + for _, ext := range extensions { + f, err := os.Open(filepath.Join(repoPath, ext)) + if err != nil { + log.Fatal(err) + } + defer f.Close() + + buf := bufio.NewScanner(f) + for buf.Scan() { + line := buf.Text() + if len(line) == 0 { + continue + } + words := strings.Fields(line) + if len(words) == 0 || words[0][0] == '#' { + continue + } + + // skip $pseudo_op except rv_zbb/rv64_zbb + if words[0][0] == '$' { + if ext != "rv_zbb" && ext != "rv64_zbb" { + continue + } + words = words[2:] + } + + genInst(words) + } + } + + // c.unimp wasn't in riscv-opcodes, so add it there + c_unimp := "c.unimp 15..0=0" + genInst(strings.Fields(c_unimp)) + + sort.Strings(ops) + + // 1. write op + if _, err := buf.Write([]byte(opSec)); err != nil { + log.Fatal(err) + } + for _, op := range ops { + if _, err := fmt.Fprintf(buf, "\t%s\n", op); err != nil { + log.Fatal(err) + } + } + if _, err := buf.Write([]byte(")\n\n")); err != nil { + log.Fatal(err) + } + + // 2. write opstr + if _, err := buf.Write([]byte(opstrSec)); err != nil { + log.Fatal(err) + } + for _, op := range ops { + if _, err := fmt.Fprintf(buf, "\t%s\n", opstrs[op]); err != nil { + log.Fatal(err) + } + } + if _, err := buf.Write([]byte("}\n\n")); err != nil { + log.Fatal(err) + } + + // 3. write instFormatComment and instFormat + if _, err := buf.Write([]byte(instFormatsSec)); err != nil { + log.Fatal(err) + } + for _, op := range ops { + if _, err := fmt.Fprintf(buf, "\t%s\n\t%s\n", instFormatComments[op], instFormats[op]); err != nil { + log.Fatal(err) + } + } + if _, err = buf.Write([]byte("}\n")); err != nil { + log.Fatal(err) + } + + if err := buf.Flush(); err != nil { + log.Fatal(err) + } + + if err := fileTables.Close(); err != nil { + log.Fatal(err) + } +} + +func genInst(words []string) { + op := strings.ToUpper(strings.Replace(words[0], ".", "_", -1)) + opstr := fmt.Sprintf("%s:\t\"%s\",", op, strings.ToUpper(words[0])) + + var value uint32 + var mask uint32 + var instArgs []string + + for i := 1; i < len(words); i++ { + if strings.Contains(words[i], "=") { + val := strings.Split(words[i], "=") + sec := strings.Split(val[0], "..") + if len(sec) < 2 { + sec[0] = val[0] + } + subval, submsk := genValueAndMask(val, sec) + value |= subval + mask |= submsk + } else if len(words[i]) > 0 { + instArgs = append(instArgs, words[i]) + } + } + + instArgsStr := inferFormats(instArgs, op) + instFormatComment := "// " + strings.Replace(op, "_", ".", -1) + " " + strings.Replace(instArgsStr, "arg_", "", -1) + instFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: instArgs{%s}},", mask, value, op, instArgsStr) + + // Handle the suffix of atomic instruction. + if isAtomic(op) { + suffix := []string{"", ".RL", ".AQ", ".AQRL"} + // Re-generate the opcode string, opcode value and mask. + for i, suf := range suffix { + aop := op + strings.Replace(suf, ".", "_", -1) + aopstr := fmt.Sprintf("%s:\t\"%s\",", aop, strings.ToUpper(words[0])+suf) + avalue := value | (uint32(i) << 25) + amask := mask | 0x06000000 + ainstFormatComment := "// " + strings.Replace(aop, "_", ".", -1) + " " + strings.Replace(instArgsStr, "arg_", "", -1) + ainstFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: instArgs{%s}},", amask, avalue, aop, instArgsStr) + ops = append(ops, aop) + opstrs[aop] = aopstr + instFormats[aop] = ainstFormat + instFormatComments[aop] = ainstFormatComment + } + } else { + ops = append(ops, op) + opstrs[op] = opstr + instFormats[op] = instFormat + instFormatComments[op] = instFormatComment + } +} + +// inferFormats identifies inst format: +// R-Type (inst rd, rs1, rs2), +// I-Type (inst rd, rs1, imm / inst rd, offset(rs1)), +// UJ-Type (inst rd, imm), +// U-Type (inst rd, imm), +// SB-Type (inst rs1, rs2, offset) +// S-Type (inst rs2, offset(rs1)) +func inferFormats(instArgs []string, op string) string { + switch { + case strings.Contains(op, "AMO") || strings.Contains(op, "SC_"): + return "arg_rd, arg_rs2, arg_rs1_amo" + + case strings.Contains(op, "LR_"): + return "arg_rd, arg_rs1_amo" + + case op == "LB" || op == "LBU" || op == "LD" || + op == "LH" || op == "LHU" || op == "LW" || op == "LWU": + return "arg_rd, arg_rs1_mem" + + case op == "FLD" || op == "FLW" || op == "FLH" || op == "FLQ": + return "arg_fd, arg_rs1_mem" + + case op == "FSD" || op == "FSW" || op == "FSH" || op == "FSQ": + return "arg_fs2, arg_rs1_store" + + case op == "SD" || op == "SB" || op == "SW" || op == "SH": + return "arg_rs2, arg_rs1_store" + + case op == "CSRRW" || op == "CSRRS" || op == "CSRRC": + return "arg_rd, arg_csr, arg_rs1" + + case op == "CSRRWI" || op == "CSRRSI" || op == "CSRRCI": + return "arg_rd, arg_csr, arg_zimm" + + case op == "JALR": + return "arg_rd, arg_rs1_mem" + + case op == "FENCE_I": + return "" + + case op == "FENCE": + return "arg_pred, arg_succ" + + default: + var instStr []string + for _, arg := range instArgs { + if decodeArgs(arg, op) != "" { + instStr = append(instStr, decodeArgs(arg, op)) + } + } + return strings.Join(instStr, ", ") + } +} + +// decodeArgs turns the args into formats defined in arg.go +func decodeArgs(arg string, op string) string { + switch { + case strings.Contains("arg_rd", arg): + if isFloatReg(op, "rd") || strings.Contains(op, "C_FLDSP") { + return "arg_fd" + } + return "arg_rd" + + case strings.Contains("arg_rs1", arg): + if isFloatReg(op, "rs") { + return "arg_fs1" + } + return "arg_rs1" + + case strings.Contains("arg_rs2", arg): + if isFloatReg(op, "rs") { + return "arg_fs2" + } + return "arg_rs2" + + case strings.Contains("arg_rs3", arg): + if isFloatReg(op, "rs") { + return "arg_fs3" + } + return "arg_rs3" + + case arg == "imm12": + return "arg_imm12" + + case arg == "imm20": + return "arg_imm20" + + case arg == "jimm20": + return "arg_jimm20" + + case arg == "bimm12lo": + return "arg_bimm12" + + case arg == "imm12lo": + return "arg_simm12" + + case arg == "shamtw": + return "arg_shamt5" + + case arg == "shamtd": + return "arg_shamt6" + + case arg == "rd_p": + if strings.Contains(op, "C_FLD") { + return "arg_fd_p" + } + return "arg_rd_p" + + case arg == "rs1_p": + return "arg_rs1_p" + + case arg == "rd_rs1_p": + return "arg_rd_rs1_p" + + case arg == "rs2_p": + if strings.Contains(op, "C_FSD") { + return "arg_fs2_p" + } + return "arg_rs2_p" + + case arg == "rd_n0": + return "arg_rd_n0" + + case arg == "rs1_n0": + return "arg_rs1_n0" + + case arg == "rd_rs1_n0": + return "arg_rd_rs1_n0" + + case arg == "c_rs1_n0": + return "arg_c_rs1_n0" + + case arg == "c_rs2_n0": + return "arg_c_rs2_n0" + + case arg == "c_rs2": + if strings.Contains(op, "C_FSDSP") { + return "arg_c_fs2" + } + return "arg_c_rs2" + + case arg == "rd_n2": + return "arg_rd_n2" + + case arg == "c_imm6lo": + return "arg_c_imm6" + + case arg == "c_nzimm6lo": + return "arg_c_nzimm6" + + case arg == "c_nzuimm6lo": + return "arg_c_nzuimm6" + + case arg == "c_uimm7lo": + return "arg_c_uimm7" + + case arg == "c_uimm8lo": + return "arg_c_uimm8" + + case arg == "c_uimm8sp_s": + return "arg_c_uimm8sp_s" + + case arg == "c_uimm8splo": + return "arg_c_uimm8sp" + + case arg == "c_uimm9sp_s": + return "arg_c_uimm9sp_s" + + case arg == "c_uimm9splo": + return "arg_c_uimm9sp" + + case arg == "c_bimm9lo": + return "arg_c_bimm9" + + case arg == "c_nzimm10lo": + return "arg_c_nzimm10" + + case arg == "c_nzuimm10": + return "arg_c_nzuimm10" + + case arg == "c_imm12": + return "arg_c_imm12" + + case arg == "c_nzimm18lo": + return "arg_c_nzimm18" + } + return "" +} + +// genValueAndMask generates instruction value and relative mask. +func genValueAndMask(valStr []string, secStr []string) (uint32, uint32) { + var val int64 + + val, err := strconv.ParseInt(valStr[1], 0, 32) + if err != nil { + log.Fatal(err) + } + + l, err := strconv.Atoi(secStr[0]) + if err != nil { + log.Fatal(err) + } + var r int + if len(secStr) == 1 { + r = l + } else { + r, err = strconv.Atoi(secStr[1]) + if err != nil { + log.Fatal(err) + } + } + + subval := uint32(val << r) + submsk := ^uint32(0) << (31 - l) >> (31 - l + r) << r + return subval, submsk +} + +// isAtomic reports whether the instruction is atomic. +func isAtomic(op string) bool { + return strings.HasPrefix(op, "AMO") || strings.HasPrefix(op, "LR_") || strings.HasPrefix(op, "SC_") +} + +// isFloatReg reports whether the register of a floating point instruction is a floating point register. +func isFloatReg(op string, reg string) bool { + switch { + case strings.Contains(op, "FADD") || strings.Contains(op, "FSUB") || + strings.Contains(op, "FDIV") || strings.Contains(op, "FMUL") || + strings.Contains(op, "FMIN") || strings.Contains(op, "FMAX") || + strings.Contains(op, "FMADD") || strings.Contains(op, "FMSUB") || + strings.Contains(op, "FCVT_D_S") || strings.Contains(op, "FCVT_S_D") || + strings.Contains(op, "FCVT_D_Q") || strings.Contains(op, "FCVT_Q_D") || + strings.Contains(op, "FCVT_S_Q") || strings.Contains(op, "FCVT_Q_S") || + strings.Contains(op, "FCVT_H_S") || strings.Contains(op, "FCVT_S_H") || + strings.Contains(op, "FNM") || strings.Contains(op, "FNEG") || + strings.Contains(op, "FSQRT") || strings.Contains(op, "FSGNJ"): + return true + + case strings.Contains(op, "FCLASS") || strings.Contains(op, "FCVT_L") || + strings.Contains(op, "FCVT_W") || strings.Contains(op, "FEQ") || + strings.Contains(op, "FLE") || strings.Contains(op, "FLT") || + strings.Contains(op, "FMV_X_H") || strings.Contains(op, "FMV_X_D") || + strings.Contains(op, "FMV_X_W"): + return reg != "rd" + + case strings.Contains(op, "FCVT_D") || strings.Contains(op, "FCVT_S") || + strings.Contains(op, "FCVT_H") || strings.Contains(op, "FCVT_Q") || + strings.Contains(op, "FMV_H_X") || strings.Contains(op, "FMV_D_X") || + strings.Contains(op, "FMV_W_X"): + return reg != "rs" + + default: + return false + } +} From 93cb9f839e50a5d87c483533133abe1488cc1c7a Mon Sep 17 00:00:00 2001 From: limeidan Date: Sat, 16 Oct 2021 15:24:32 +0800 Subject: [PATCH 027/200] loong64: Implement loong64 GNU and plan9 format disassembler Loong64 documentation: https://github.com/loongson/LoongArch-Documentation.git Change-Id: Iff47bdcfc787f69361be510bc4784fe91e10431c Co-authored-by: huangqiqi Co-authored-by: chenguoqi Reviewed-on: https://go-review.googlesource.com/c/arch/+/358854 LUCI-TryBot-Result: Go LUCI Reviewed-by: sophie zhao Reviewed-by: abner chenc Reviewed-by: Michael Pratt Reviewed-by: Qiqi Huang Reviewed-by: Zxilly Chou Reviewed-by: Cherry Mui --- loong64/loong64asm/arg.go | 93 ++ loong64/loong64asm/decode.go | 269 ++++ loong64/loong64asm/decode_test.go | 76 + loong64/loong64asm/ext_test.go | 405 +++++ loong64/loong64asm/gnu.go | 16 + loong64/loong64asm/inst.go | 298 ++++ loong64/loong64asm/objdump_test.go | 145 ++ loong64/loong64asm/objdumpext_test.go | 249 +++ loong64/loong64asm/plan9x.go | 536 +++++++ loong64/loong64asm/tables.go | 1613 ++++++++++++++++++++ loong64/loong64asm/testdata/gnucases.txt | 415 +++++ loong64/loong64asm/testdata/plan9cases.txt | 365 +++++ loong64/loong64spec/spec.go | 528 +++++++ 13 files changed, 5008 insertions(+) create mode 100644 loong64/loong64asm/arg.go create mode 100644 loong64/loong64asm/decode.go create mode 100644 loong64/loong64asm/decode_test.go create mode 100644 loong64/loong64asm/ext_test.go create mode 100644 loong64/loong64asm/gnu.go create mode 100644 loong64/loong64asm/inst.go create mode 100644 loong64/loong64asm/objdump_test.go create mode 100644 loong64/loong64asm/objdumpext_test.go create mode 100644 loong64/loong64asm/plan9x.go create mode 100644 loong64/loong64asm/tables.go create mode 100644 loong64/loong64asm/testdata/gnucases.txt create mode 100644 loong64/loong64asm/testdata/plan9cases.txt create mode 100644 loong64/loong64spec/spec.go diff --git a/loong64/loong64asm/arg.go b/loong64/loong64asm/arg.go new file mode 100644 index 00000000..460af3d1 --- /dev/null +++ b/loong64/loong64asm/arg.go @@ -0,0 +1,93 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64asm + +// Naming for Go decoder arguments: +// +// - arg_fd: a Floating Point operand register fd encoded in the fd[4:0] field +// +// - arg_fj: a Floating Point operand register fj encoded in the fj[9:5] field +// +// - arg_fk: a Floating Point operand register fk encoded in the fk[14:10] field +// +// - arg_fa: a Floating Point operand register fa encoded in the fa[19:15] field +// +// - arg_rd: a general-purpose register rd encoded in the rd[4:0] field +// +// - arg_rj: a general-purpose register rj encoded in the rj[9:5] field +// +// - arg_rk: a general-purpose register rk encoded in the rk[14:10] field +// +// - arg_fcsr_4_0: float control status register encoded in [4:0] field +// +// - arg_cd_2_0: condition flag register encoded in [2:0] field +// +// - arg_sa2_16_15: shift bits constant encoded in [16:15] field +// +// - arg_code_14_0: arg for exception process routine encoded in [14:0] field +// +// - arg_ui5_14_10: 5bits unsigned immediate +// +// - arg_lsbw: For details, please refer to chapter 2.2.3.8 of instruction manual +// +// - arg_msbw: For details, please refer to chapter 2.2.3.9 of instruction manual +// +// - arg_hint_4_0: hint field implied the prefetch type and the data should fetch to cache's level +// 0: load to data cache level 1 +// 8: store to data cache level 1 +// other: no define +// +// - arg_si12_21_10: 12bits signed immediate + +type instArg uint16 + +const ( + _ instArg = iota + // 1-5 + arg_fd + arg_fj + arg_fk + arg_fa + arg_rd + // 6-10 + arg_rj + arg_rk + arg_op_4_0 + arg_fcsr_4_0 + arg_fcsr_9_5 + // 11-15 + arg_csr_23_10 + arg_cd + arg_cj + arg_ca + arg_sa2_16_15 + // 16-20 + arg_sa3_17_15 + arg_code_4_0 + arg_code_14_0 + arg_ui5_14_10 + arg_ui6_15_10 + // 21-25 + arg_ui12_21_10 + arg_lsbw + arg_msbw + arg_lsbd + arg_msbd + // 26-30 + arg_hint_4_0 + arg_hint_14_0 + arg_level_14_0 + arg_level_17_10 + arg_seq_17_10 + // 31-35 + arg_si12_21_10 + arg_si14_23_10 + arg_si16_25_10 + arg_si20_24_5 + arg_offset_20_0 + // 36~ + arg_offset_25_0 + arg_offset_15_0 +) diff --git a/loong64/loong64asm/decode.go b/loong64/loong64asm/decode.go new file mode 100644 index 00000000..3aca0074 --- /dev/null +++ b/loong64/loong64asm/decode.go @@ -0,0 +1,269 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64asm + +import ( + "encoding/binary" + "fmt" +) + +type instArgs [5]instArg + +// An instFormat describes the format of an instruction encoding. +type instFormat struct { + mask uint32 + value uint32 + op Op + // args describe how to decode the instruction arguments. + // args is stored as a fixed-size array. + // if there are fewer than len(args) arguments, args[i] == 0 marks + // the end of the argument list. + args instArgs +} + +var ( + errShort = fmt.Errorf("truncated instruction") + errUnknown = fmt.Errorf("unknown instruction") +) + +var decoderCover []bool + +func init() { + decoderCover = make([]bool, len(instFormats)) +} + +// Decode decodes the 4 bytes in src as a single instruction. +func Decode(src []byte) (inst Inst, err error) { + if len(src) < 4 { + return Inst{}, errShort + } + + x := binary.LittleEndian.Uint32(src) + +Search: + for i := range instFormats { + f := &instFormats[i] + + if (x & f.mask) != f.value { + continue + } + + // Decode args. + var args Args + for j, aop := range f.args { + if aop == 0 { + break + } + + arg := decodeArg(aop, x, i) + if arg == nil { + // Cannot decode argument + continue Search + } + + args[j] = arg + } + + decoderCover[i] = true + inst = Inst{ + Op: f.op, + Args: args, + Enc: x, + } + return inst, nil + } + + return Inst{}, errUnknown +} + +// decodeArg decodes the arg described by aop from the instruction bits x. +// It returns nil if x cannot be decoded according to aop. +func decodeArg(aop instArg, x uint32, index int) Arg { + switch aop { + case arg_fd: + return F0 + Reg(x&((1<<5)-1)) + + case arg_fj: + return F0 + Reg((x>>5)&((1<<5)-1)) + + case arg_fk: + return F0 + Reg((x>>10)&((1<<5)-1)) + + case arg_fa: + return F0 + Reg((x>>15)&((1<<5)-1)) + + case arg_rd: + return R0 + Reg(x&((1<<5)-1)) + + case arg_rj: + return R0 + Reg((x>>5)&((1<<5)-1)) + + case arg_rk: + return R0 + Reg((x>>10)&((1<<5)-1)) + + case arg_fcsr_4_0: + return FCSR0 + Fcsr(x&((1<<5)-1)) + + case arg_fcsr_9_5: + return FCSR0 + Fcsr((x>>5)&((1<<5)-1)) + + case arg_cd: + return FCC0 + Fcc(x&((1<<3)-1)) + + case arg_cj: + return FCC0 + Fcc((x>>5)&((1<<3)-1)) + + case arg_ca: + return FCC0 + Fcc((x>>15)&((1<<3)-1)) + + case arg_op_4_0: + tmp := x & ((1 << 5) - 1) + return Uimm{tmp, false} + + case arg_csr_23_10: + tmp := (x >> 10) & ((1 << 14) - 1) + return Uimm{tmp, false} + + case arg_sa2_16_15: + f := &instFormats[index] + tmp := SaSimm((x >> 15) & ((1 << 2) - 1)) + if (f.op == ALSL_D) || (f.op == ALSL_W) || (f.op == ALSL_WU) { + return tmp + 1 + } else { + return tmp + 0 + } + + case arg_sa3_17_15: + return SaSimm((x >> 15) & ((1 << 3) - 1)) + + case arg_code_4_0: + return CodeSimm(x & ((1 << 5) - 1)) + + case arg_code_14_0: + return CodeSimm(x & ((1 << 15) - 1)) + + case arg_ui5_14_10: + tmp := (x >> 10) & ((1 << 5) - 1) + return Uimm{tmp, false} + + case arg_ui6_15_10: + tmp := (x >> 10) & ((1 << 6) - 1) + return Uimm{tmp, false} + + case arg_ui12_21_10: + tmp := ((x >> 10) & ((1 << 12) - 1) & 0xfff) + return Uimm{tmp, false} + + case arg_lsbw: + tmp := (x >> 10) & ((1 << 5) - 1) + return Uimm{tmp, false} + + case arg_msbw: + tmp := (x >> 16) & ((1 << 5) - 1) + return Uimm{tmp, false} + + case arg_lsbd: + tmp := (x >> 10) & ((1 << 6) - 1) + return Uimm{tmp, false} + + case arg_msbd: + tmp := (x >> 16) & ((1 << 6) - 1) + return Uimm{tmp, false} + + case arg_hint_4_0: + tmp := x & ((1 << 5) - 1) + return Uimm{tmp, false} + + case arg_hint_14_0: + tmp := x & ((1 << 15) - 1) + return Uimm{tmp, false} + + case arg_level_14_0: + tmp := x & ((1 << 15) - 1) + return Uimm{tmp, false} + + case arg_level_17_10: + tmp := (x >> 10) & ((1 << 8) - 1) + return Uimm{tmp, false} + + case arg_seq_17_10: + tmp := (x >> 10) & ((1 << 8) - 1) + return Uimm{tmp, false} + + case arg_si12_21_10: + var tmp int16 + + // no int12, so sign-extend a 12-bit signed to 16-bit signed + if (x & 0x200000) == 0x200000 { + tmp = int16(((x >> 10) & ((1 << 12) - 1)) | 0xf000) + } else { + tmp = int16(((x >> 10) & ((1 << 12) - 1)) | 0x0000) + } + return Simm16{tmp, 12} + + case arg_si14_23_10: + var tmp int32 + if (x & 0x800000) == 0x800000 { + tmp = int32((((x >> 10) & ((1 << 14) - 1)) << 2) | 0xffff0000) + } else { + tmp = int32((((x >> 10) & ((1 << 14) - 1)) << 2) | 0x00000000) + } + return Simm32{tmp, 14} + + case arg_si16_25_10: + var tmp int32 + + if (x & 0x2000000) == 0x2000000 { + tmp = int32(((x >> 10) & ((1 << 16) - 1)) | 0xffff0000) + } else { + tmp = int32(((x >> 10) & ((1 << 16) - 1)) | 0x00000000) + } + + return Simm32{tmp, 16} + + case arg_si20_24_5: + var tmp int32 + if (x & 0x1000000) == 0x1000000 { + tmp = int32(((x >> 5) & ((1 << 20) - 1)) | 0xfff00000) + } else { + tmp = int32(((x >> 5) & ((1 << 20) - 1)) | 0x00000000) + } + return Simm32{tmp, 20} + + case arg_offset_20_0: + var tmp int32 + + if (x & 0x10) == 0x10 { + tmp = int32(((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 21) - 1)) << 2) | 0xff800000) + } else { + tmp = int32((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 21) - 1)) << 2) + } + + return OffsetSimm{tmp, 21} + + case arg_offset_15_0: + var tmp int32 + if (x & 0x2000000) == 0x2000000 { + tmp = int32((((x >> 10) & ((1 << 16) - 1)) << 2) | 0xfffc0000) + } else { + tmp = int32((((x >> 10) & ((1 << 16) - 1)) << 2) | 0x00000000) + } + + return OffsetSimm{tmp, 16} + + case arg_offset_25_0: + var tmp int32 + + if (x & 0x200) == 0x200 { + tmp = int32(((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 26) - 1)) << 2) | 0xf0000000) + } else { + tmp = int32(((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 26) - 1)) << 2) | 0x00000000) + } + + return OffsetSimm{tmp, 26} + default: + return nil + } +} diff --git a/loong64/loong64asm/decode_test.go b/loong64/loong64asm/decode_test.go new file mode 100644 index 00000000..74a32773 --- /dev/null +++ b/loong64/loong64asm/decode_test.go @@ -0,0 +1,76 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64asm + +import ( + "encoding/hex" + "io/ioutil" + "path/filepath" + "strings" + "testing" +) + +func testDecode(t *testing.T, syntax string) { + input := filepath.Join("testdata", syntax+"cases.txt") + data, err := ioutil.ReadFile(input) + if err != nil { + t.Fatal(err) + } + all := string(data) + for strings.Contains(all, "\t\t") { + all = strings.Replace(all, "\t\t", "\t", -1) + } + for _, line := range strings.Split(all, "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + f := strings.SplitN(line, "\t", 2) + i := strings.Index(f[0], "|") + if i < 0 { + t.Errorf("parsing %q: missing | separator", f[0]) + continue + } + if i%2 != 0 { + t.Errorf("parsing %q: misaligned | separator", f[0]) + } + code, err := hex.DecodeString(f[0][:i] + f[0][i+1:]) + if err != nil { + t.Errorf("parsing %q: %v", f[0], err) + continue + } + asm := f[1] + inst, decodeErr := Decode(code) + if decodeErr != nil && decodeErr != errUnknown { + // Some rarely used system instructions are not supported + // Following logicals will filter such unknown instructions + t.Errorf("parsing %x: %s", code, decodeErr) + continue + } + var out string + switch syntax { + case "gnu": + out = GNUSyntax(inst) + case "plan9": + out = GoSyntax(inst, 0, nil) + default: + t.Errorf("unknown syntax %q", syntax) + continue + } + + // var out string + if asm != out || len(asm) != len(out) { + t.Errorf("Decode(%s) [%s] = %s want %s", f[0], syntax, out, asm) + } + } +} + +func TestDecodeGNUSyntax(t *testing.T) { + testDecode(t, "gnu") +} + +func TestDecodeGoSyntax(t *testing.T) { + testDecode(t, "plan9") +} diff --git a/loong64/loong64asm/ext_test.go b/loong64/loong64asm/ext_test.go new file mode 100644 index 00000000..5c90586e --- /dev/null +++ b/loong64/loong64asm/ext_test.go @@ -0,0 +1,405 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Support for testing against external disassembler program. + +package loong64asm + +import ( + "bufio" + "bytes" + "encoding/hex" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "math/rand" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" +) + +var ( + dumpTest = flag.Bool("dump", false, "dump all encodings") + mismatch = flag.Bool("mismatch", false, "log allowed mismatches") + keep = flag.Bool("keep", false, "keep object files around") + debug = false +) + +// An ExtInst represents a single decoded instruction parsed +// from an external disassembler's output. +type ExtInst struct { + addr uint64 + enc [4]byte + nenc int + text string +} + +func (r ExtInst) String() string { + return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text) +} + +// An ExtDis is a connection between an external disassembler and a test. +type ExtDis struct { + Dec chan ExtInst + File *os.File + Size int + Cmd *exec.Cmd +} + +// Run runs the given command - the external disassembler - and returns +// a buffered reader of its standard output. +func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) { + if *keep { + log.Printf("%s\n", strings.Join(cmd, " ")) + } + ext.Cmd = exec.Command(cmd[0], cmd[1:]...) + out, err := ext.Cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("stdoutpipe: %v", err) + } + if err := ext.Cmd.Start(); err != nil { + return nil, fmt.Errorf("exec: %v", err) + } + + b := bufio.NewReaderSize(out, 1<<20) + return b, nil +} + +// Wait waits for the command started with Run to exit. +func (ext *ExtDis) Wait() error { + return ext.Cmd.Wait() +} + +// testExtDis tests a set of byte sequences against an external disassembler. +// The disassembler is expected to produce the given syntax and run +// in the given architecture mode (16, 32, or 64-bit). +// The extdis function must start the external disassembler +// and then parse its output, sending the parsed instructions on ext.Dec. +// The generate function calls its argument f once for each byte sequence +// to be tested. The generate function itself will be called twice, and it must +// make the same sequence of calls to f each time. +// When a disassembly does not match the internal decoding, +// allowedMismatch determines whether this mismatch should be +// allowed, or else considered an error. +func testExtDis( + t *testing.T, + syntax string, + extdis func(ext *ExtDis) error, + generate func(f func([]byte)), + allowedMismatch func(text string, inst *Inst, dec ExtInst) bool, +) { + start := time.Now() + ext := &ExtDis{ + Dec: make(chan ExtInst), + } + errc := make(chan error) + + // First pass: write instructions to input file for external disassembler. + file, f, size, err := writeInst(generate) + if err != nil { + t.Fatal(err) + } + ext.Size = size + ext.File = f + defer func() { + f.Close() + if !*keep { + os.Remove(file) + } + }() + + // Second pass: compare disassembly against our decodings. + var ( + totalTests = 0 + totalSkips = 0 + totalErrors = 0 + + errors = make([]string, 0, 100) // Sampled errors, at most cap + ) + go func() { + errc <- extdis(ext) + }() + + generate(func(enc []byte) { + dec, ok := <-ext.Dec + if !ok { + t.Errorf("decoding stream ended early") + return + } + inst, text := disasm(syntax, pad(enc)) + + totalTests++ + if *dumpTest { + fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc) + } + + if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" { + suffix := "" + if allowedMismatch(text, &inst, dec) { + totalSkips++ + if !*mismatch { + return + } + suffix += " (allowed mismatch)" + } + totalErrors++ + cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix) + + if len(errors) >= cap(errors) { + j := rand.Intn(totalErrors) + if j >= cap(errors) { + return + } + errors = append(errors[:j], errors[j+1:]...) + } + errors = append(errors, cmp) + } + }) + + if *mismatch { + totalErrors -= totalSkips + } + + fmt.Printf("totalTest: %d total skip: %d total error: %d\n", totalTests, totalSkips, totalErrors) + + // Here are some errors about mismatches(44) + for _, b := range errors { + t.Log(b) + } + + if totalErrors > 0 { + t.Fail() + } + t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds()) + t.Logf("decoder coverage: %.1f%%;\n", decodeCoverage()) +} + +// Start address of text. +const start = 0x8000 + +// writeInst writes the generated byte sequences to a new file +// starting at offset start. That file is intended to be the input to +// the external disassembler. +func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) { + f, err = ioutil.TempFile("", "loong64asm") + if err != nil { + return + } + + file = f.Name() + + f.Seek(start, io.SeekStart) + w := bufio.NewWriter(f) + defer w.Flush() + size = 0 + generate(func(x []byte) { + if debug { + fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):]) + } + w.Write(x) + w.Write(zeros[len(x):]) + size += len(zeros) + }) + return file, f, size, nil +} + +var zeros = []byte{0, 0, 0, 0} + +// pad pads the code sequence with pops. +func pad(enc []byte) []byte { + if len(enc) < 4 { + enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...) + } + return enc +} + +// disasm returns the decoded instruction and text +// for the given source bytes, using the given syntax and mode. +func disasm(syntax string, src []byte) (inst Inst, text string) { + var err error + inst, err = Decode(src) + if err != nil { + text = "error: " + err.Error() + return + } + text = inst.String() + switch syntax { + case "gnu": + text = GNUSyntax(inst) + case "plan9": // [sic] + text = GoSyntax(inst, 0, nil) + default: + text = "error: unknown syntax " + syntax + } + return +} + +// decodecoverage returns a floating point number denoting the +// decoder coverage. +func decodeCoverage() float64 { + n := 0 + for _, t := range decoderCover { + if t { + n++ + } + } + return 100 * float64(1+n) / float64(1+len(decoderCover)) +} + +// Helpers for writing disassembler output parsers. + +// isHex reports whether b is a hexadecimal character (0-9a-fA-F). +func isHex(b byte) bool { + return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F') +} + +// parseHex parses the hexadecimal byte dump in hex, +// appending the parsed bytes to raw and returning the updated slice. +// The returned bool reports whether any invalid hex was found. +// Spaces and tabs between bytes are okay but any other non-hex is not. +func parseHex(hex []byte, raw []byte) ([]byte, bool) { + hex = bytes.TrimSpace(hex) + for j := 0; j < len(hex); { + for hex[j] == ' ' || hex[j] == '\t' { + j++ + } + if j >= len(hex) { + break + } + if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) { + return nil, false + } + raw = append(raw, unhex(hex[j])<<4|unhex(hex[j+1])) + j += 2 + } + return raw, true +} + +func unhex(b byte) byte { + if '0' <= b && b <= '9' { + return b - '0' + } else if 'A' <= b && b <= 'F' { + return b - 'A' + 10 + } else if 'a' <= b && b <= 'f' { + return b - 'a' + 10 + } + return 0 +} + +// index is like bytes.Index(s, []byte(t)) but avoids the allocation. +func index(s []byte, t string) int { + i := 0 + for { + j := bytes.IndexByte(s[i:], t[0]) + if j < 0 { + return -1 + } + i = i + j + if i+len(t) > len(s) { + return -1 + } + for k := 1; k < len(t); k++ { + if s[i+k] != t[k] { + goto nomatch + } + } + return i + nomatch: + i++ + } +} + +// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s. +// If s must be rewritten, it is rewritten in place. +func fixSpace(s []byte) []byte { + s = bytes.TrimSpace(s) + for i := 0; i < len(s); i++ { + if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' { + goto Fix + } + } + return s + +Fix: + b := s + w := 0 + for i := 0; i < len(s); i++ { + c := s[i] + if c == '\t' || c == '\n' { + c = ' ' + } + if c == ' ' && w > 0 && b[w-1] == ' ' { + continue + } + b[w] = c + w++ + } + if w > 0 && b[w-1] == ' ' { + w-- + } + return b[:w] +} + +// Generators. +// +// The test cases are described as functions that invoke a callback repeatedly, +// with a new input sequence each time. These helpers make writing those +// a little easier. + +// hexCases generates the cases written in hexadecimal in the encoded string. +// Spaces in 'encoded' separate entire test cases, not individual bytes. +func hexCases(t *testing.T, encoded string) func(func([]byte)) { + return func(try func([]byte)) { + for _, x := range strings.Fields(encoded) { + src, err := hex.DecodeString(x) + if err != nil { + t.Errorf("parsing %q: %v", x, err) + } + try(src) + } + } +} + +// testdataCases generates the test cases recorded in testdata/cases.txt. +// It only uses the inputs; it ignores the answers recorded in that file. +func testdataCases(t *testing.T, syntax string) func(func([]byte)) { + var codes [][]byte + input := filepath.Join("testdata", syntax+"cases.txt") + data, err := ioutil.ReadFile(input) + if err != nil { + t.Fatal(err) + } + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + f := strings.Fields(line)[0] + i := strings.Index(f, "|") + if i < 0 { + t.Errorf("parsing %q: missing | separator", f) + continue + } + if i%2 != 0 { + t.Errorf("parsing %q: misaligned | separator", f) + } + code, err := hex.DecodeString(f[:i] + f[i+1:]) + if err != nil { + t.Errorf("parsing %q: %v", f, err) + continue + } + codes = append(codes, code) + } + + return func(try func([]byte)) { + for _, code := range codes { + try(code) + } + } +} diff --git a/loong64/loong64asm/gnu.go b/loong64/loong64asm/gnu.go new file mode 100644 index 00000000..4807abc5 --- /dev/null +++ b/loong64/loong64asm/gnu.go @@ -0,0 +1,16 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64asm + +import ( + "strings" +) + +// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils. +// This form typically matches the syntax defined in the Loong64 Reference Manual. See +// https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html +func GNUSyntax(inst Inst) string { + return strings.ToLower(inst.String()) +} diff --git a/loong64/loong64asm/inst.go b/loong64/loong64asm/inst.go new file mode 100644 index 00000000..1ac5c797 --- /dev/null +++ b/loong64/loong64asm/inst.go @@ -0,0 +1,298 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64asm + +import ( + "fmt" + "strings" +) + +// An Inst is a single instruction. +type Inst struct { + Op Op // Opcode mnemonic + Enc uint32 // Raw encoding bits. + Args Args // Instruction arguments, in Loong64 manual order. +} + +func (i Inst) String() string { + var op string = i.Op.String() + var args []string + + for _, arg := range i.Args { + if arg == nil { + break + } + args = append(args, arg.String()) + } + + switch i.Op { + case OR: + if i.Args[2].(Reg) == R0 { + op = "move" + args = args[0:2] + } + + case ANDI: + if i.Args[0].(Reg) == R0 && i.Args[1].(Reg) == R0 { + return "nop" + } + + case JIRL: + if i.Args[0].(Reg) == R0 && i.Args[2].(OffsetSimm).Imm == 0 { + return "jr " + args[1] + } + + case BLT: + if i.Args[0].(Reg) == R0 { + op = "bgtz" + args = args[1:] + } else if i.Args[1].(Reg) == R0 { + op = "bltz" + args = append(args[:1], args[2:]...) + } + + case BGE: + if i.Args[0].(Reg) == R0 { + op = "blez" + args = args[1:] + } else if i.Args[1].(Reg) == R0 { + op = "bgez" + args = append(args[:1], args[2:]...) + } + } + + if len(args) == 0 { + return op + } else { + return op + " " + strings.Join(args, ", ") + } +} + +// An Op is an Loong64 opcode. +type Op uint16 + +// NOTE: The actual Op values are defined in tables.go. +// They are chosen to simplify instruction decoding and +// are not a dense packing from 0 to N, although the +// density is high, probably at least 90%. +func (op Op) String() string { + if (op >= Op(len(opstr))) || (opstr[op] == "") { + return fmt.Sprintf("Op(%d)", int(op)) + } + + return opstr[op] +} + +// An Args holds the instruction arguments. +// If an instruction has fewer than 5 arguments, +// the final elements in the array are nil. +type Args [5]Arg + +// An Arg is a single instruction argument +type Arg interface { + String() string +} + +// A Reg is a single register. +// The zero value denotes R0, not the absence of a register. +type Reg uint16 + +const ( + // General-purpose register + R0 Reg = iota + R1 + R2 + R3 + R4 + R5 + R6 + R7 + R8 + R9 + R10 + R11 + R12 + R13 + R14 + R15 + R16 + R17 + R18 + R19 + R20 + R21 + R22 + R23 + R24 + R25 + R26 + R27 + R28 + R29 + R30 + R31 + + // Float point register + F0 + F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 + F12 + F13 + F14 + F15 + F16 + F17 + F18 + F19 + F20 + F21 + F22 + F23 + F24 + F25 + F26 + F27 + F28 + F29 + F30 + F31 +) + +func (r Reg) String() string { + switch { + case r == R0: + return "$zero" + + case r == R1: + return "$ra" + + case r == R2: + return "$tp" + + case r == R3: + return "$sp" + + case (r >= R4) && (r <= R11): + return fmt.Sprintf("$a%d", int(r-R4)) + + case (r >= R12) && (r <= R20): + return fmt.Sprintf("$t%d", int(r-R12)) + + case r == R21: + return "$r21" + + case r == R22: + return "$fp" + + case (r >= R23) && (r <= R31): + return fmt.Sprintf("$s%d", int(r-R23)) + + case (r >= F0) && (r <= F7): + return fmt.Sprintf("$fa%d", int(r-F0)) + + case (r >= F8) && (r <= F23): + return fmt.Sprintf("$ft%d", int(r-F8)) + + case (r >= F24) && (r <= F31): + return fmt.Sprintf("$fs%d", int(r-F24)) + + default: + return fmt.Sprintf("Unknown(%d)", int(r)) + } +} + +// float control status register +type Fcsr uint8 + +const ( + FCSR0 Fcsr = iota + FCSR1 + FCSR2 + FCSR3 +) + +func (f Fcsr) String() string { + return fmt.Sprintf("$fcsr%d", uint8(f)) +} + +// float condition flags register +type Fcc uint8 + +const ( + FCC0 Fcc = iota + FCC1 + FCC2 + FCC3 + FCC4 + FCC5 + FCC6 + FCC7 +) + +func (f Fcc) String() string { + return fmt.Sprintf("$fcc%d", uint8(f)) +} + +// An Imm is an integer constant. +type Uimm struct { + Imm uint32 + Decimal bool +} + +func (i Uimm) String() string { + if i.Decimal == true { + return fmt.Sprintf("%d", i.Imm) + } else { + return fmt.Sprintf("%#x", i.Imm) + } +} + +type Simm16 struct { + Imm int16 + Width uint8 +} + +func (si Simm16) String() string { + return fmt.Sprintf("%d", int32(si.Imm)) +} + +type Simm32 struct { + Imm int32 + Width uint8 +} + +func (si Simm32) String() string { + return fmt.Sprintf("%d", int32(si.Imm)) +} + +type OffsetSimm struct { + Imm int32 + Width uint8 +} + +func (o OffsetSimm) String() string { + return fmt.Sprintf("%d", int32(o.Imm)) +} + +type SaSimm int16 + +func (s SaSimm) String() string { + return fmt.Sprintf("%#x", int(s)) +} + +type CodeSimm int16 + +func (c CodeSimm) String() string { + return fmt.Sprintf("%#x", int(c)) +} diff --git a/loong64/loong64asm/objdump_test.go b/loong64/loong64asm/objdump_test.go new file mode 100644 index 00000000..04766557 --- /dev/null +++ b/loong64/loong64asm/objdump_test.go @@ -0,0 +1,145 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64asm + +import ( + "strconv" + "strings" + "testing" +) + +func TestObjdumpLoong64TestDecodeGNUSyntaxdata(t *testing.T) { + testObjdumpLoong64(t, testdataCases(t, "gnu")) +} + +func TestObjdumpLoong64TestDecodeGoSyntaxdata(t *testing.T) { + testObjdumpLoong64(t, testdataCases(t, "plan9")) +} + +func TestObjdumpLoong64Manual(t *testing.T) { + testObjdumpLoong64(t, hexCases(t, objdumpManualTests)) +} + +// objdumpManualTests holds test cases that will be run by TestObjdumpLoong64Manual. +// If you are debugging a few cases that turned up in a longer run, it can be useful +// to list them here and then use -run=Manual, particularly with tracing enabled. +// Note that these are byte sequences, so they must be reversed from the usual +// word presentation. +var objdumpManualTests = ` +00007238 +00807238 +00004003 +00100050 +ac410028 +ac41002a +ac41c028 +ac414028 +ac41402a +ac418028 +ac41802a +ac397838 +acb97938 +acb97838 +ac397938 +ac397a38 +acb97b38 +acb97a38 +ac397b38 +ac110026 +ac110024 +ac390038 +ac392038 +ac390c38 +ac390438 +ac392438 +ac390838 +ac392838 +ac391600 +ac391400 +ac391500 +ac418003 +` + +// allowedMismatchObjdump reports whether the mismatch between text and dec +// should be allowed by the test. +func allowedMismatchObjdump(text string, inst *Inst, dec ExtInst) bool { + // GNU objdump use register, decode use alias of register, so corrected it in here + var dec_text = strings.Replace(dec.text, " ", ",", -1) + var decsp []string = strings.Split(dec_text, ",") + var num int = cap(decsp) + for i := 0; i < num; i++ { + dex := strings.Index(decsp[i], "$r") + fdex := strings.Index(decsp[i], "$f") + ddex := strings.Index(decsp[i], "(") + if ddex > 0 { + // ldptr.w $r12,$r13,16(0x10) + decsp[i] = decsp[i][0:ddex] + } + xdex := strings.Index(decsp[i], "0x") + // convert registers to registers aliases + if dex >= 0 { + reg, _ := strconv.Atoi(decsp[i][dex+2:]) + // r12~r20 $t0~t8 + if reg >= 12 && reg <= 20 { + decsp[i] = strings.Join([]string{"t", strconv.Itoa(reg - 12)}, "") + } + // r4~r11 $a0~a7 + if reg >= 4 && reg <= 11 { + decsp[i] = strings.Join([]string{"a", strconv.Itoa(reg - 4)}, "") + } + // r23~r31 $s0~s8 + if reg >= 23 && reg <= 31 { + decsp[i] = strings.Join([]string{"s", strconv.Itoa(reg - 23)}, "") + } + // r0 zero + if reg == 0 { + decsp[i] = strings.Join([]string{"zero"}, "") + } + // r1 ra + if reg == 1 { + decsp[i] = strings.Join([]string{"ra"}, "") + } + // r2 tp + if reg == 2 { + decsp[i] = strings.Join([]string{"tp"}, "") + } + // r3 sp + if reg == 3 { + decsp[i] = strings.Join([]string{"sp"}, "") + } + // r21 x + if reg == 21 { + decsp[i] = strings.Join([]string{"x"}, "") + } + // r22 fp + if reg == 22 { + decsp[i] = strings.Join([]string{"fp"}, "") + } + } + // convert hexadecimal to decimal + if xdex >= 0 { + parseint, _ := strconv.ParseInt(decsp[i][xdex+2:], 16, 32) + decsp[i] = strings.Join([]string{strconv.Itoa(int(parseint))}, "") + } + // convert floating-point registers to floating-point aliases + if fdex >= 0 && !strings.Contains(decsp[i], "$fcc") { + freg, _ := strconv.Atoi(decsp[i][fdex+2:]) + // f0~f7 fa0~fa7 + if freg >= 0 && freg <= 7 { + decsp[i] = strings.Join([]string{"fa", strconv.Itoa(freg - 0)}, "") + } + // f8~f23 ft0~ft15 + if freg >= 8 && freg <= 23 { + decsp[i] = strings.Join([]string{"ft", strconv.Itoa(freg - 8)}, "") + } + // f24~f31 fs0~fs7 + if freg >= 24 && freg <= 31 { + decsp[i] = strings.Join([]string{"fs", strconv.Itoa(freg - 24)}, "") + } + } + } + + return false +} diff --git a/loong64/loong64asm/objdumpext_test.go b/loong64/loong64asm/objdumpext_test.go new file mode 100644 index 00000000..80396d99 --- /dev/null +++ b/loong64/loong64asm/objdumpext_test.go @@ -0,0 +1,249 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64asm + +import ( + "bytes" + "debug/elf" + "encoding/binary" + "fmt" + "io" + "log" + "os" + "os/exec" + "strconv" + "strings" + "testing" +) + +const objdumpPath = "/usr/bin/objdump" + +func testObjdumpLoong64(t *testing.T, generate func(func([]byte))) { + testObjdumpArch(t, generate) +} + +func testObjdumpArch(t *testing.T, generate func(func([]byte))) { + checkObjdumpLoong64(t) + testExtDis(t, "gnu", objdump, generate, allowedMismatchObjdump) + testExtDis(t, "plan9", objdump, generate, allowedMismatchObjdump) +} + +func checkObjdumpLoong64(t *testing.T) { + out, err := exec.Command(objdumpPath, "-i").Output() + if err != nil { + t.Skipf("cannot run objdump: %v\n%s", err, out) + } + if !strings.Contains(string(out), "Loongarch64") { + t.Skip("objdump does not have loong64 support") + } +} + +func objdump(ext *ExtDis) error { + // File already written with instructions; add ELF header. + if err := writeELF64(ext.File, ext.Size); err != nil { + return err + } + + b, err := ext.Run(objdumpPath, "-d", "-z", ext.File.Name()) + if err != nil { + return err + } + + var ( + nmatch int + reading bool + next uint64 = start + addr uint64 + encbuf [4]byte + enc []byte + text string + ) + flush := func() { + if addr == next { + // PC-relative addresses are translated to absolute addresses based on PC by GNU objdump + // Following logical rewrites the absolute addresses back to PC-relative ones for comparing + // with our disassembler output which are PC-relative + if text == "undefined" && len(enc) == 4 { + text = "error: unknown instruction" + enc = nil + } + if len(enc) == 4 { + // prints as word but we want to record bytes + enc[0], enc[3] = enc[3], enc[0] + enc[1], enc[2] = enc[2], enc[1] + } + ext.Dec <- ExtInst{addr, encbuf, len(enc), text} + encbuf = [4]byte{} + enc = nil + next += 4 + } + } + var textangle = []byte("<.text>:") + for { + line, err := b.ReadSlice('\n') + if err != nil { + if err == io.EOF { + break + } + return fmt.Errorf("reading objdump output: %v", err) + } + if bytes.Contains(line, textangle) { + reading = true + continue + } + if !reading { + continue + } + if debug { + os.Stdout.Write(line) + } + if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil { + enc = enc1 + continue + } + flush() + nmatch++ + addr, enc, text = parseLine(line, encbuf[:0]) + if addr > next { + return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line) + } + } + flush() + if next != start+uint64(ext.Size) { + return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size) + } + if err := ext.Wait(); err != nil { + return fmt.Errorf("exec: %v", err) + } + + return nil +} + +var ( + undefined = []byte("undefined") + unpredictable = []byte("unpredictable") + slashslash = []byte("//") +) + +func parseLine(line []byte, encstart []byte) (addr uint64, enc []byte, text string) { + ok := false + oline := line + i := index(line, ":\t") + if i < 0 { + log.Fatalf("cannot parse disassembly: %q", oline) + } + x, err := strconv.ParseUint(string(bytes.TrimSpace(line[:i])), 16, 32) + if err != nil { + log.Fatalf("cannot parse disassembly: %q", oline) + } + addr = uint64(x) + line = line[i+2:] + i = bytes.IndexByte(line, '\t') + if i < 0 { + log.Fatalf("cannot parse disassembly: %q", oline) + } + enc, ok = parseHex(line[:i], encstart) + if !ok { + log.Fatalf("cannot parse disassembly: %q", oline) + } + line = bytes.TrimSpace(line[i:]) + if bytes.Contains(line, undefined) { + text = "undefined" + return + } + if false && bytes.Contains(line, unpredictable) { + text = "unpredictable" + return + } + // Strip trailing comment starting with '#' + if i := bytes.IndexByte(line, '#'); i >= 0 { + line = bytes.TrimSpace(line[:i]) + } + // Strip trailing comment starting with "//" + if i := bytes.Index(line, slashslash); i >= 0 { + line = bytes.TrimSpace(line[:i]) + } + text = string(fixSpace(line)) + return +} + +func parseContinuation(line []byte, enc []byte) []byte { + i := index(line, ":\t") + if i < 0 { + return nil + } + line = line[i+1:] + enc, _ = parseHex(line, enc) + return enc +} + +// writeELF64 writes an ELF64 header to the file, describing a text +// segment that starts at start (0x8000) and extends for size bytes. +func writeELF64(f *os.File, size int) error { + f.Seek(0, io.SeekStart) + var hdr elf.Header64 + var prog elf.Prog64 + var sect elf.Section64 + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, &hdr) + off1 := buf.Len() + binary.Write(&buf, binary.LittleEndian, &prog) + off2 := buf.Len() + binary.Write(&buf, binary.LittleEndian, §) + off3 := buf.Len() + buf.Reset() + data := byte(elf.ELFDATA2LSB) + hdr = elf.Header64{ + Ident: [16]byte{0x7F, 'E', 'L', 'F', 2, data, 1}, + Type: 2, + Machine: uint16(elf.EM_LOONGARCH), + Version: 1, + Entry: start, + Phoff: uint64(off1), + Shoff: uint64(off2), + Flags: 0x3, + Ehsize: uint16(off1), + Phentsize: uint16(off2 - off1), + Phnum: 1, + Shentsize: uint16(off3 - off2), + Shnum: 3, + Shstrndx: 2, + } + binary.Write(&buf, binary.LittleEndian, &hdr) + prog = elf.Prog64{ + Type: 1, + Off: start, + Vaddr: start, + Paddr: start, + Filesz: uint64(size), + Memsz: uint64(size), + Flags: 5, + Align: start, + } + binary.Write(&buf, binary.LittleEndian, &prog) + binary.Write(&buf, binary.LittleEndian, §) // NULL section + sect = elf.Section64{ + Name: 1, + Type: uint32(elf.SHT_PROGBITS), + Addr: start, + Off: start, + Size: uint64(size), + Flags: uint64(elf.SHF_ALLOC | elf.SHF_EXECINSTR), + Addralign: 4, + } + binary.Write(&buf, binary.LittleEndian, §) // .text + sect = elf.Section64{ + Name: uint32(len("\x00.text\x00")), + Type: uint32(elf.SHT_STRTAB), + Addr: 0, + Off: uint64(off2 + (off3-off2)*3), + Size: uint64(len("\x00.text\x00.shstrtab\x00")), + Addralign: 1, + } + binary.Write(&buf, binary.LittleEndian, §) + buf.WriteString("\x00.text\x00.shstrtab\x00") + f.Write(buf.Bytes()) + return nil +} diff --git a/loong64/loong64asm/plan9x.go b/loong64/loong64asm/plan9x.go new file mode 100644 index 00000000..5db32903 --- /dev/null +++ b/loong64/loong64asm/plan9x.go @@ -0,0 +1,536 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64asm + +import ( + "fmt" + "strings" +) + +// GoSyntax returns the Go assembler syntax for the instruction. +// The syntax was originally defined by Plan 9. +// The pc is the program counter of the instruction, used for +// expanding PC-relative addresses into absolute ones. +// The symname function queries the symbol table for the program +// being disassembled. Given a target address it returns the name +// and base address of the symbol containing the target, if any; +// otherwise it returns "", 0. +func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) string { + if symname == nil { + symname = func(uint64) (string, uint64) { return "", 0 } + } + if inst.Op == 0 && inst.Enc == 0 { + return "WORD $0" + } else if inst.Op == 0 { + return "?" + } + + var args []string + for _, a := range inst.Args { + if a == nil { + break + } + args = append(args, plan9Arg(&inst, pc, symname, a)) + } + + var op string = plan9OpMap[inst.Op] + if op == "" { + op = "Unknown " + inst.Op.String() + } + + switch inst.Op { + case BSTRPICK_W, BSTRPICK_D, BSTRINS_W, BSTRINS_D: + msbw, lsbw := inst.Args[2].(Uimm), inst.Args[3].(Uimm) + if inst.Op == BSTRPICK_D && msbw.Imm == 15 && lsbw.Imm == 0 { + op = "MOVHU" + args = append(args[1:2], args[0:1]...) + } else { + args[0], args[1], args[2], args[3] = args[2], args[1], args[3], args[0] + } + + case BCNEZ, BCEQZ: + args = args[1:2] + + case BEQ, BNE: + rj := inst.Args[0].(Reg) + rd := inst.Args[1].(Reg) + if rj == rd && inst.Op == BEQ { + op = "JMP" + args = args[2:] + } else if rj == R0 { + args = args[1:] + } else if rd == R0 { + args = append(args[:1], args[2:]...) + } + + case BEQZ, BNEZ: + if inst.Args[0].(Reg) == R0 && inst.Op == BEQ { + op = "JMP" + args = args[1:] + } + + case BLT, BLTU, BGE, BGEU: + rj := inst.Args[0].(Reg) + rd := inst.Args[1].(Reg) + if rj == rd && (inst.Op == BGE || inst.Op == BGEU) { + op = "JMP" + args = args[2:] + } else if rj == R0 { + switch inst.Op { + case BGE: + op = "BLEZ" + case BLT: + op = "BGTZ" + } + args = args[1:] + } else if rd == R0 { + if !strings.HasSuffix(op, "U") { + op += "Z" + } + args = append(args[:1], args[2:]...) + } + + case JIRL: + rd := inst.Args[0].(Reg) + rj := inst.Args[1].(Reg) + regno := uint16(rj) & 31 + if rd == R0 { + return fmt.Sprintf("JMP (R%d)", regno) + } + return fmt.Sprintf("CALL (R%d)", regno) + + case LD_B, LD_H, LD_W, LD_D, LD_BU, LD_HU, LD_WU, LL_W, LL_D, + ST_B, ST_H, ST_W, ST_D, SC_W, SC_D, FLD_S, FLD_D, FST_S, FST_D: + var off int32 + switch a := inst.Args[2].(type) { + case Simm16: + off = signumConvInt32(int32(a.Imm), a.Width) + case Simm32: + off = signumConvInt32(int32(a.Imm), a.Width) >> 2 + } + Iop := strings.ToUpper(inst.Op.String()) + if strings.HasPrefix(Iop, "L") || strings.HasPrefix(Iop, "FL") { + return fmt.Sprintf("%s %d(%s), %s", op, off, args[1], args[0]) + } + return fmt.Sprintf("%s %s, %d(%s)", op, args[0], off, args[1]) + + case LDX_B, LDX_H, LDX_W, LDX_D, LDX_BU, LDX_HU, LDX_WU, FLDX_S, FLDX_D, + STX_B, STX_H, STX_W, STX_D, FSTX_S, FSTX_D: + Iop := strings.ToUpper(inst.Op.String()) + if strings.HasPrefix(Iop, "L") || strings.HasPrefix(Iop, "FL") { + return fmt.Sprintf("%s (%s)(%s), %s", op, args[1], args[2], args[0]) + } + return fmt.Sprintf("%s %s, (%s)(%s)", op, args[0], args[1], args[2]) + + case AMADD_B, AMADD_D, AMADD_DB_B, AMADD_DB_D, AMADD_DB_H, AMADD_DB_W, AMADD_H, + AMADD_W, AMAND_D, AMAND_DB_D, AMAND_DB_W, AMAND_W, AMCAS_B, AMCAS_D, AMCAS_DB_B, + AMCAS_DB_D, AMCAS_DB_H, AMCAS_DB_W, AMCAS_H, AMCAS_W, AMMAX_D, AMMAX_DB_D, + AMMAX_DB_DU, AMMAX_DB_W, AMMAX_DB_WU, AMMAX_DU, AMMAX_W, AMMAX_WU, AMMIN_D, + AMMIN_DB_D, AMMIN_DB_DU, AMMIN_DB_W, AMMIN_DB_WU, AMMIN_DU, AMMIN_W, AMMIN_WU, + AMOR_D, AMOR_DB_D, AMOR_DB_W, AMOR_W, AMSWAP_B, AMSWAP_D, AMSWAP_DB_B, AMSWAP_DB_D, + AMSWAP_DB_H, AMSWAP_DB_W, AMSWAP_H, AMSWAP_W, AMXOR_D, AMXOR_DB_D, AMXOR_DB_W, AMXOR_W: + return fmt.Sprintf("%s %s, (%s), %s", op, args[1], args[2], args[0]) + + default: + // Reverse args, placing dest last + for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { + args[i], args[j] = args[j], args[i] + } + switch len(args) { // Special use cases + case 0, 1: + if inst.Op != B && inst.Op != BL { + return op + } + + case 3: + switch a0 := inst.Args[0].(type) { + case Reg: + rj := inst.Args[1].(Reg) + if a0 == rj && a0 != R0 { + args = args[0:2] + } + } + switch inst.Op { + case SUB_W, SUB_D, ADDI_W, ADDI_D, ORI: + rj := inst.Args[1].(Reg) + if rj == R0 { + args = append(args[0:1], args[2:]...) + if inst.Op == SUB_W { + op = "NEGW" + } else if inst.Op == SUB_D { + op = "NEGV" + } else { + op = "MOVW" + } + } + + case ANDI: + ui12 := inst.Args[2].(Uimm) + if ui12.Imm == uint32(0xff) { + op = "MOVBU" + args = args[1:] + } else if ui12.Imm == 0 && inst.Args[0].(Reg) == R0 && inst.Args[1].(Reg) == R0 { + return "NOOP" + } + + case SLL_W, OR: + rk := inst.Args[2].(Reg) + if rk == R0 { + args = args[1:] + if inst.Op == SLL_W { + op = "MOVW" + } else { + op = "MOVV" + } + } + } + } + } + + if args != nil { + op += " " + strings.Join(args, ", ") + } + return op +} + +func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string { + // Reg: gpr[0, 31] and fpr[0, 31] + // Fcsr: fcsr[0, 3] + // Fcc: fcc[0, 7] + // Uimm: unsigned integer constant + // Simm16: si16 + // Simm32: si32 + // OffsetSimm: si32 + switch a := arg.(type) { + case Reg: + regenum := uint16(a) + regno := uint16(a) & 0x1f + // General-purpose register + if regenum >= uint16(R0) && regenum <= uint16(R31) { + return fmt.Sprintf("R%d", regno) + } else { // Float point register + return fmt.Sprintf("F%d", regno) + } + + case Fcsr: + regno := uint8(a) & 0x1f + return fmt.Sprintf("FCSR%d", regno) + + case Fcc: + regno := uint8(a) & 0x1f + return fmt.Sprintf("FCC%d", regno) + + case Uimm: + return fmt.Sprintf("$%d", a.Imm) + + case Simm16: + si16 := signumConvInt32(int32(a.Imm), a.Width) + return fmt.Sprintf("$%d", si16) + + case Simm32: + si32 := signumConvInt32(a.Imm, a.Width) + return fmt.Sprintf("$%d", si32) + + case OffsetSimm: + offs := offsConvInt32(a.Imm, a.Width) + if inst.Op == B || inst.Op == BL { + addr := int64(pc) + int64(a.Imm) + if s, base := symname(uint64(addr)); s != "" && uint64(addr) == base { + return fmt.Sprintf("%s(SB)", s) + } + } + return fmt.Sprintf("%d(PC)", offs>>2) + + case SaSimm: + return fmt.Sprintf("$%d", a) + + case CodeSimm: + return fmt.Sprintf("$%d", a) + + } + return strings.ToUpper(arg.String()) +} + +func signumConvInt32(imm int32, width uint8) int32 { + active := uint32(1<> (width - 1)) & 0x1) == 1 { + signum |= ^active + } + return int32(signum) +} + +func offsConvInt32(imm int32, width uint8) int32 { + relWidth := width + 2 + return signumConvInt32(imm, relWidth) +} + +var plan9OpMap = map[Op]string{ + ADD_W: "ADD", + ADD_D: "ADDV", + SUB_W: "SUB", + SUB_D: "SUBV", + ADDI_W: "ADD", + ADDI_D: "ADDV", + LU12I_W: "LU12IW", + LU32I_D: "LU32ID", + LU52I_D: "LU52ID", + SLT: "SGT", + SLTU: "SGTU", + SLTI: "SGT", + SLTUI: "SGTU", + PCADDU12I: "PCADDU12I", + PCALAU12I: "PCALAU12I", + AND: "AND", + OR: "OR", + NOR: "NOR", + XOR: "XOR", + ANDI: "AND", + ORI: "OR", + XORI: "XOR", + MUL_W: "MUL", + MULH_W: "MULH", + MULH_WU: "MULHU", + MUL_D: "MULV", + MULH_D: "MULHV", + MULH_DU: "MULHVU", + DIV_W: "DIV", + DIV_WU: "DIVU", + DIV_D: "DIVV", + DIV_DU: "DIVVU", + MOD_W: "REM", + MOD_WU: "REMU", + MOD_D: "REMV", + MOD_DU: "REMVU", + SLL_W: "SLL", + SRL_W: "SRL", + SRA_W: "SRA", + ROTR_W: "ROTR", + SLL_D: "SLLV", + SRL_D: "SRLV", + SRA_D: "SRAV", + ROTR_D: "ROTRV", + SLLI_W: "SLL", + SRLI_W: "SRL", + SRAI_W: "SRA", + ROTRI_W: "ROTR", + SLLI_D: "SLLV", + SRLI_D: "SRLV", + SRAI_D: "SRAV", + ROTRI_D: "ROTRV", + EXT_W_B: "?", + EXT_W_H: "?", + BITREV_W: "BITREVW", + BITREV_D: "BITREVV", + CLO_W: "CLOW", + CLO_D: "CLOV", + CLZ_W: "CLZW", + CLZ_D: "CLZV", + CTO_W: "CTOW", + CTO_D: "CTOV", + CTZ_W: "CTZW", + CTZ_D: "CTZV", + REVB_2H: "REVB2H", + REVB_2W: "REVB2W", + REVB_4H: "REVB4H", + REVB_D: "REVBV", + BSTRPICK_W: "BSTRPICKW", + BSTRPICK_D: "BSTRPICKV", + BSTRINS_W: "BSTRINSW", + BSTRINS_D: "BSTRINSV", + MASKEQZ: "MASKEQZ", + MASKNEZ: "MASKNEZ", + BCNEZ: "BFPT", + BCEQZ: "BFPF", + BEQ: "BEQ", + BNE: "BNE", + BEQZ: "BEQ", + BNEZ: "BNE", + BLT: "BLT", + BLTU: "BLTU", + BGE: "BGE", + BGEU: "BGEU", + B: "JMP", + BL: "CALL", + LD_B: "MOVB", + LD_H: "MOVH", + LD_W: "MOVW", + LD_D: "MOVV", + LD_BU: "MOVBU", + LD_HU: "MOVHU", + LD_WU: "MOVWU", + ST_B: "MOVB", + ST_H: "MOVH", + ST_W: "MOVW", + ST_D: "MOVV", + LDX_B: "MOVB", + LDX_BU: "MOVBU", + LDX_D: "MOVV", + LDX_H: "MOVH", + LDX_HU: "MOVHU", + LDX_W: "MOVW", + LDX_WU: "MOVWU", + STX_B: "MOVB", + STX_D: "MOVV", + STX_H: "MOVH", + STX_W: "MOVW", + AMADD_B: "AMADDB", + AMADD_D: "AMADDV", + AMADD_DB_B: "AMADDDBB", + AMADD_DB_D: "AMADDDBV", + AMADD_DB_H: "AMADDDBH", + AMADD_DB_W: "AMADDDBW", + AMADD_H: "AMADDH", + AMADD_W: "AMADDW", + AMAND_D: "AMANDV", + AMAND_DB_D: "AMANDDBV", + AMAND_DB_W: "AMANDDBW", + AMAND_W: "AMANDW", + AMCAS_B: "AMCASB", + AMCAS_D: "AMCASV", + AMCAS_DB_B: "AMCASDBB", + AMCAS_DB_D: "AMCASDBV", + AMCAS_DB_H: "AMCASDBH", + AMCAS_DB_W: "AMCASDBW", + AMCAS_H: "AMCASH", + AMCAS_W: "AMCASW", + AMMAX_D: "AMMAXV", + AMMAX_DB_D: "AMMAXDBV", + AMMAX_DB_DU: "AMMAXDBVU", + AMMAX_DB_W: "AMMAXDBW", + AMMAX_DB_WU: "AMMAXDBWU", + AMMAX_DU: "AMMAXVU", + AMMAX_W: "AMMAXW", + AMMAX_WU: "AMMAXWU", + AMMIN_D: "AMMINV", + AMMIN_DB_D: "AMMINDBV", + AMMIN_DB_DU: "AMMINDBVU", + AMMIN_DB_W: "AMMINDBW", + AMMIN_DB_WU: "AMMINDBWU", + AMMIN_DU: "AMMINVU", + AMMIN_W: "AMMINW", + AMMIN_WU: "AMMINWU", + AMOR_D: "AMORV", + AMOR_DB_D: "AMORDBV", + AMOR_DB_W: "AMORDBW", + AMOR_W: "AMORW", + AMSWAP_B: "AMSWAPB", + AMSWAP_D: "AMSWAPV", + AMSWAP_DB_B: "AMSWAPDBB", + AMSWAP_DB_D: "AMSWAPDBV", + AMSWAP_DB_H: "AMSWAPDBH", + AMSWAP_DB_W: "AMSWAPDBW", + AMSWAP_H: "AMSWAPH", + AMSWAP_W: "AMSWAPW", + AMXOR_D: "AMXORV", + AMXOR_DB_D: "AMXORDBV", + AMXOR_DB_W: "AMXORDBW", + AMXOR_W: "AMXORW", + LL_W: "LL", + LL_D: "LLV", + SC_W: "SC", + SC_D: "SCV", + CRCC_W_B_W: "CRCCWBW", + CRCC_W_D_W: "CRCCWVW", + CRCC_W_H_W: "CRCCWHW", + CRCC_W_W_W: "CRCCWWW", + CRC_W_B_W: "CRCWBW", + CRC_W_D_W: "CRCWVW", + CRC_W_H_W: "CRCWHW", + CRC_W_W_W: "CRCWWW", + DBAR: "DBAR", + SYSCALL: "SYSCALL", + BREAK: "BREAK", + RDTIMEL_W: "RDTIMELW", + RDTIMEH_W: "RDTIMEHW", + RDTIME_D: "RDTIMED", + CPUCFG: "CPUCFG", + + // Floating-point instructions + FADD_S: "ADDF", + FADD_D: "ADDD", + FSUB_S: "SUBF", + FSUB_D: "SUBD", + FMUL_S: "MULF", + FMUL_D: "MULD", + FDIV_S: "DIVF", + FDIV_D: "DIVD", + FMSUB_S: "FMSUBF", + FMSUB_D: "FMSUBD", + FMADD_S: "FMADDF", + FMADD_D: "FMADDD", + FNMADD_S: "FNMADDF", + FNMADD_D: "FNMADDD", + FNMSUB_S: "FNMSUBF", + FNMSUB_D: "FNMSUBD", + FABS_S: "ABSF", + FABS_D: "ABSD", + FNEG_S: "NEGF", + FNEG_D: "NEGD", + FSQRT_S: "SQRTF", + FSQRT_D: "SQRTD", + FCOPYSIGN_S: "FCOPYSGF", + FCOPYSIGN_D: "FCOPYSGD", + FMAX_S: "FMAXF", + FMAX_D: "FMAXD", + FMIN_S: "FMINF", + FMIN_D: "FMIND", + FCLASS_S: "FCLASSF", + FCLASS_D: "FCLASSD", + FCMP_CEQ_S: "CMPEQF", + FCMP_CEQ_D: "CMPEQD", + FCMP_SLE_S: "CMPGEF", + FCMP_SLE_D: "CMPGED", + FCMP_SLT_S: "CMPGTF", + FCMP_SLT_D: "CMPGTD", + FCVT_D_S: "MOVFD", + FCVT_S_D: "MOVDF", + FFINT_S_W: "FFINTFW", + FFINT_S_L: "FFINTFV", + FFINT_D_W: "FFINTDW", + FFINT_D_L: "FFINTDV", + FTINTRM_L_D: "FTINTRMVD", + FTINTRM_L_S: "FTINTRMVF", + FTINTRM_W_D: "FTINTRMWD", + FTINTRM_W_S: "FTINTRMWF", + FTINTRNE_L_D: "FTINTRNEVD", + FTINTRNE_L_S: "FTINTRNEVF", + FTINTRNE_W_D: "FTINTRNEWD", + FTINTRNE_W_S: "FTINTRNEWF", + FTINTRP_L_D: "FTINTRPVD", + FTINTRP_L_S: "FTINTRPVF", + FTINTRP_W_D: "FTINTRPWD", + FTINTRP_W_S: "FTINTRPWF", + FTINTRZ_L_D: "FTINTRZVD", + FTINTRZ_L_S: "FTINTRZVF", + FTINTRZ_W_D: "FTINTRZWD", + FTINTRZ_W_S: "FTINTRZWF", + FTINT_L_D: "FTINTVD", + FTINT_L_S: "FTINTVF", + FTINT_W_D: "FTINTWD", + FTINT_W_S: "FTINTWF", + FRINT_S: "FRINTS", + FRINT_D: "FRINTD", + FMOV_S: "MOVF", + FMOV_D: "MOVD", + MOVGR2FR_W: "MOVW", + MOVGR2FR_D: "MOVV", + MOVFR2GR_S: "MOVW", + MOVFR2GR_D: "MOVV", + MOVGR2CF: "MOVV", + MOVCF2GR: "MOVV", + MOVFCSR2GR: "MOVV", + MOVGR2FCSR: "MOVV", + MOVFR2CF: "MOVV", + MOVCF2FR: "MOVV", + FLD_S: "MOVF", + FLD_D: "MOVD", + FST_S: "MOVF", + FST_D: "MOVD", + FLDX_S: "MOVF", + FLDX_D: "MOVD", + FSTX_S: "MOVF", + FSTX_D: "MOVD", +} diff --git a/loong64/loong64asm/tables.go b/loong64/loong64asm/tables.go new file mode 100644 index 00000000..c85d47c2 --- /dev/null +++ b/loong64/loong64asm/tables.go @@ -0,0 +1,1613 @@ +// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT. + +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64asm + +const ( + _ Op = iota + ADDI_D + ADDI_W + ADDU16I_D + ADD_D + ADD_W + ALSL_D + ALSL_W + ALSL_WU + AMADD_B + AMADD_D + AMADD_DB_B + AMADD_DB_D + AMADD_DB_H + AMADD_DB_W + AMADD_H + AMADD_W + AMAND_D + AMAND_DB_D + AMAND_DB_W + AMAND_W + AMCAS_B + AMCAS_D + AMCAS_DB_B + AMCAS_DB_D + AMCAS_DB_H + AMCAS_DB_W + AMCAS_H + AMCAS_W + AMMAX_D + AMMAX_DB_D + AMMAX_DB_DU + AMMAX_DB_W + AMMAX_DB_WU + AMMAX_DU + AMMAX_W + AMMAX_WU + AMMIN_D + AMMIN_DB_D + AMMIN_DB_DU + AMMIN_DB_W + AMMIN_DB_WU + AMMIN_DU + AMMIN_W + AMMIN_WU + AMOR_D + AMOR_DB_D + AMOR_DB_W + AMOR_W + AMSWAP_B + AMSWAP_D + AMSWAP_DB_B + AMSWAP_DB_D + AMSWAP_DB_H + AMSWAP_DB_W + AMSWAP_H + AMSWAP_W + AMXOR_D + AMXOR_DB_D + AMXOR_DB_W + AMXOR_W + AND + ANDI + ANDN + ASRTGT_D + ASRTLE_D + B + BCEQZ + BCNEZ + BEQ + BEQZ + BGE + BGEU + BITREV_4B + BITREV_8B + BITREV_D + BITREV_W + BL + BLT + BLTU + BNE + BNEZ + BREAK + BSTRINS_D + BSTRINS_W + BSTRPICK_D + BSTRPICK_W + BYTEPICK_D + BYTEPICK_W + CACOP + CLO_D + CLO_W + CLZ_D + CLZ_W + CPUCFG + CRCC_W_B_W + CRCC_W_D_W + CRCC_W_H_W + CRCC_W_W_W + CRC_W_B_W + CRC_W_D_W + CRC_W_H_W + CRC_W_W_W + CSRRD + CSRWR + CSRXCHG + CTO_D + CTO_W + CTZ_D + CTZ_W + DBAR + DBCL + DIV_D + DIV_DU + DIV_W + DIV_WU + ERTN + EXT_W_B + EXT_W_H + FABS_D + FABS_S + FADD_D + FADD_S + FCLASS_D + FCLASS_S + FCMP_CAF_D + FCMP_CAF_S + FCMP_CEQ_D + FCMP_CEQ_S + FCMP_CLE_D + FCMP_CLE_S + FCMP_CLT_D + FCMP_CLT_S + FCMP_CNE_D + FCMP_CNE_S + FCMP_COR_D + FCMP_COR_S + FCMP_CUEQ_D + FCMP_CUEQ_S + FCMP_CULE_D + FCMP_CULE_S + FCMP_CULT_D + FCMP_CULT_S + FCMP_CUNE_D + FCMP_CUNE_S + FCMP_CUN_D + FCMP_CUN_S + FCMP_SAF_D + FCMP_SAF_S + FCMP_SEQ_D + FCMP_SEQ_S + FCMP_SLE_D + FCMP_SLE_S + FCMP_SLT_D + FCMP_SLT_S + FCMP_SNE_D + FCMP_SNE_S + FCMP_SOR_D + FCMP_SOR_S + FCMP_SUEQ_D + FCMP_SUEQ_S + FCMP_SULE_D + FCMP_SULE_S + FCMP_SULT_D + FCMP_SULT_S + FCMP_SUNE_D + FCMP_SUNE_S + FCMP_SUN_D + FCMP_SUN_S + FCOPYSIGN_D + FCOPYSIGN_S + FCVT_D_S + FCVT_S_D + FDIV_D + FDIV_S + FFINT_D_L + FFINT_D_W + FFINT_S_L + FFINT_S_W + FLDGT_D + FLDGT_S + FLDLE_D + FLDLE_S + FLDX_D + FLDX_S + FLD_D + FLD_S + FLOGB_D + FLOGB_S + FMADD_D + FMADD_S + FMAXA_D + FMAXA_S + FMAX_D + FMAX_S + FMINA_D + FMINA_S + FMIN_D + FMIN_S + FMOV_D + FMOV_S + FMSUB_D + FMSUB_S + FMUL_D + FMUL_S + FNEG_D + FNEG_S + FNMADD_D + FNMADD_S + FNMSUB_D + FNMSUB_S + FRECIPE_D + FRECIPE_S + FRECIP_D + FRECIP_S + FRINT_D + FRINT_S + FRSQRTE_D + FRSQRTE_S + FRSQRT_D + FRSQRT_S + FSCALEB_D + FSCALEB_S + FSEL + FSQRT_D + FSQRT_S + FSTGT_D + FSTGT_S + FSTLE_D + FSTLE_S + FSTX_D + FSTX_S + FST_D + FST_S + FSUB_D + FSUB_S + FTINTRM_L_D + FTINTRM_L_S + FTINTRM_W_D + FTINTRM_W_S + FTINTRNE_L_D + FTINTRNE_L_S + FTINTRNE_W_D + FTINTRNE_W_S + FTINTRP_L_D + FTINTRP_L_S + FTINTRP_W_D + FTINTRP_W_S + FTINTRZ_L_D + FTINTRZ_L_S + FTINTRZ_W_D + FTINTRZ_W_S + FTINT_L_D + FTINT_L_S + FTINT_W_D + FTINT_W_S + IBAR + IDLE + INVTLB + IOCSRRD_B + IOCSRRD_D + IOCSRRD_H + IOCSRRD_W + IOCSRWR_B + IOCSRWR_D + IOCSRWR_H + IOCSRWR_W + JIRL + LDDIR + LDGT_B + LDGT_D + LDGT_H + LDGT_W + LDLE_B + LDLE_D + LDLE_H + LDLE_W + LDPTE + LDPTR_D + LDPTR_W + LDX_B + LDX_BU + LDX_D + LDX_H + LDX_HU + LDX_W + LDX_WU + LD_B + LD_BU + LD_D + LD_H + LD_HU + LD_W + LD_WU + LLACQ_D + LLACQ_W + LL_D + LL_W + LU12I_W + LU32I_D + LU52I_D + MASKEQZ + MASKNEZ + MOD_D + MOD_DU + MOD_W + MOD_WU + MOVCF2FR + MOVCF2GR + MOVFCSR2GR + MOVFR2CF + MOVFR2GR_D + MOVFR2GR_S + MOVFRH2GR_S + MOVGR2CF + MOVGR2FCSR + MOVGR2FRH_W + MOVGR2FR_D + MOVGR2FR_W + MULH_D + MULH_DU + MULH_W + MULH_WU + MULW_D_W + MULW_D_WU + MUL_D + MUL_W + NOR + OR + ORI + ORN + PCADDI + PCADDU12I + PCADDU18I + PCALAU12I + PRELD + PRELDX + RDTIMEH_W + RDTIMEL_W + RDTIME_D + REVB_2H + REVB_2W + REVB_4H + REVB_D + REVH_2W + REVH_D + ROTRI_D + ROTRI_W + ROTR_D + ROTR_W + SCREL_D + SCREL_W + SC_D + SC_Q + SC_W + SLLI_D + SLLI_W + SLL_D + SLL_W + SLT + SLTI + SLTU + SLTUI + SRAI_D + SRAI_W + SRA_D + SRA_W + SRLI_D + SRLI_W + SRL_D + SRL_W + STGT_B + STGT_D + STGT_H + STGT_W + STLE_B + STLE_D + STLE_H + STLE_W + STPTR_D + STPTR_W + STX_B + STX_D + STX_H + STX_W + ST_B + ST_D + ST_H + ST_W + SUB_D + SUB_W + SYSCALL + TLBCLR + TLBFILL + TLBFLUSH + TLBRD + TLBSRCH + TLBWR + XOR + XORI +) + +var opstr = [...]string{ + ADDI_D: "ADDI.D", + ADDI_W: "ADDI.W", + ADDU16I_D: "ADDU16I.D", + ADD_D: "ADD.D", + ADD_W: "ADD.W", + ALSL_D: "ALSL.D", + ALSL_W: "ALSL.W", + ALSL_WU: "ALSL.WU", + AMADD_B: "AMADD.B", + AMADD_D: "AMADD.D", + AMADD_DB_B: "AMADD_DB.B", + AMADD_DB_D: "AMADD_DB.D", + AMADD_DB_H: "AMADD_DB.H", + AMADD_DB_W: "AMADD_DB.W", + AMADD_H: "AMADD.H", + AMADD_W: "AMADD.W", + AMAND_D: "AMAND.D", + AMAND_DB_D: "AMAND_DB.D", + AMAND_DB_W: "AMAND_DB.W", + AMAND_W: "AMAND.W", + AMCAS_B: "AMCAS.B", + AMCAS_D: "AMCAS.D", + AMCAS_DB_B: "AMCAS_DB.B", + AMCAS_DB_D: "AMCAS_DB.D", + AMCAS_DB_H: "AMCAS_DB.H", + AMCAS_DB_W: "AMCAS_DB.W", + AMCAS_H: "AMCAS.H", + AMCAS_W: "AMCAS.W", + AMMAX_D: "AMMAX.D", + AMMAX_DB_D: "AMMAX_DB.D", + AMMAX_DB_DU: "AMMAX_DB.DU", + AMMAX_DB_W: "AMMAX_DB.W", + AMMAX_DB_WU: "AMMAX_DB.WU", + AMMAX_DU: "AMMAX.DU", + AMMAX_W: "AMMAX.W", + AMMAX_WU: "AMMAX.WU", + AMMIN_D: "AMMIN.D", + AMMIN_DB_D: "AMMIN_DB.D", + AMMIN_DB_DU: "AMMIN_DB.DU", + AMMIN_DB_W: "AMMIN_DB.W", + AMMIN_DB_WU: "AMMIN_DB.WU", + AMMIN_DU: "AMMIN.DU", + AMMIN_W: "AMMIN.W", + AMMIN_WU: "AMMIN.WU", + AMOR_D: "AMOR.D", + AMOR_DB_D: "AMOR_DB.D", + AMOR_DB_W: "AMOR_DB.W", + AMOR_W: "AMOR.W", + AMSWAP_B: "AMSWAP.B", + AMSWAP_D: "AMSWAP.D", + AMSWAP_DB_B: "AMSWAP_DB.B", + AMSWAP_DB_D: "AMSWAP_DB.D", + AMSWAP_DB_H: "AMSWAP_DB.H", + AMSWAP_DB_W: "AMSWAP_DB.W", + AMSWAP_H: "AMSWAP.H", + AMSWAP_W: "AMSWAP.W", + AMXOR_D: "AMXOR.D", + AMXOR_DB_D: "AMXOR_DB.D", + AMXOR_DB_W: "AMXOR_DB.W", + AMXOR_W: "AMXOR.W", + AND: "AND", + ANDI: "ANDI", + ANDN: "ANDN", + ASRTGT_D: "ASRTGT.D", + ASRTLE_D: "ASRTLE.D", + B: "B", + BCEQZ: "BCEQZ", + BCNEZ: "BCNEZ", + BEQ: "BEQ", + BEQZ: "BEQZ", + BGE: "BGE", + BGEU: "BGEU", + BITREV_4B: "BITREV.4B", + BITREV_8B: "BITREV.8B", + BITREV_D: "BITREV.D", + BITREV_W: "BITREV.W", + BL: "BL", + BLT: "BLT", + BLTU: "BLTU", + BNE: "BNE", + BNEZ: "BNEZ", + BREAK: "BREAK", + BSTRINS_D: "BSTRINS.D", + BSTRINS_W: "BSTRINS.W", + BSTRPICK_D: "BSTRPICK.D", + BSTRPICK_W: "BSTRPICK.W", + BYTEPICK_D: "BYTEPICK.D", + BYTEPICK_W: "BYTEPICK.W", + CACOP: "CACOP", + CLO_D: "CLO.D", + CLO_W: "CLO.W", + CLZ_D: "CLZ.D", + CLZ_W: "CLZ.W", + CPUCFG: "CPUCFG", + CRCC_W_B_W: "CRCC.W.B.W", + CRCC_W_D_W: "CRCC.W.D.W", + CRCC_W_H_W: "CRCC.W.H.W", + CRCC_W_W_W: "CRCC.W.W.W", + CRC_W_B_W: "CRC.W.B.W", + CRC_W_D_W: "CRC.W.D.W", + CRC_W_H_W: "CRC.W.H.W", + CRC_W_W_W: "CRC.W.W.W", + CSRRD: "CSRRD", + CSRWR: "CSRWR", + CSRXCHG: "CSRXCHG", + CTO_D: "CTO.D", + CTO_W: "CTO.W", + CTZ_D: "CTZ.D", + CTZ_W: "CTZ.W", + DBAR: "DBAR", + DBCL: "DBCL", + DIV_D: "DIV.D", + DIV_DU: "DIV.DU", + DIV_W: "DIV.W", + DIV_WU: "DIV.WU", + ERTN: "ERTN", + EXT_W_B: "EXT.W.B", + EXT_W_H: "EXT.W.H", + FABS_D: "FABS.D", + FABS_S: "FABS.S", + FADD_D: "FADD.D", + FADD_S: "FADD.S", + FCLASS_D: "FCLASS.D", + FCLASS_S: "FCLASS.S", + FCMP_CAF_D: "FCMP.CAF.D", + FCMP_CAF_S: "FCMP.CAF.S", + FCMP_CEQ_D: "FCMP.CEQ.D", + FCMP_CEQ_S: "FCMP.CEQ.S", + FCMP_CLE_D: "FCMP.CLE.D", + FCMP_CLE_S: "FCMP.CLE.S", + FCMP_CLT_D: "FCMP.CLT.D", + FCMP_CLT_S: "FCMP.CLT.S", + FCMP_CNE_D: "FCMP.CNE.D", + FCMP_CNE_S: "FCMP.CNE.S", + FCMP_COR_D: "FCMP.COR.D", + FCMP_COR_S: "FCMP.COR.S", + FCMP_CUEQ_D: "FCMP.CUEQ.D", + FCMP_CUEQ_S: "FCMP.CUEQ.S", + FCMP_CULE_D: "FCMP.CULE.D", + FCMP_CULE_S: "FCMP.CULE.S", + FCMP_CULT_D: "FCMP.CULT.D", + FCMP_CULT_S: "FCMP.CULT.S", + FCMP_CUNE_D: "FCMP.CUNE.D", + FCMP_CUNE_S: "FCMP.CUNE.S", + FCMP_CUN_D: "FCMP.CUN.D", + FCMP_CUN_S: "FCMP.CUN.S", + FCMP_SAF_D: "FCMP.SAF.D", + FCMP_SAF_S: "FCMP.SAF.S", + FCMP_SEQ_D: "FCMP.SEQ.D", + FCMP_SEQ_S: "FCMP.SEQ.S", + FCMP_SLE_D: "FCMP.SLE.D", + FCMP_SLE_S: "FCMP.SLE.S", + FCMP_SLT_D: "FCMP.SLT.D", + FCMP_SLT_S: "FCMP.SLT.S", + FCMP_SNE_D: "FCMP.SNE.D", + FCMP_SNE_S: "FCMP.SNE.S", + FCMP_SOR_D: "FCMP.SOR.D", + FCMP_SOR_S: "FCMP.SOR.S", + FCMP_SUEQ_D: "FCMP.SUEQ.D", + FCMP_SUEQ_S: "FCMP.SUEQ.S", + FCMP_SULE_D: "FCMP.SULE.D", + FCMP_SULE_S: "FCMP.SULE.S", + FCMP_SULT_D: "FCMP.SULT.D", + FCMP_SULT_S: "FCMP.SULT.S", + FCMP_SUNE_D: "FCMP.SUNE.D", + FCMP_SUNE_S: "FCMP.SUNE.S", + FCMP_SUN_D: "FCMP.SUN.D", + FCMP_SUN_S: "FCMP.SUN.S", + FCOPYSIGN_D: "FCOPYSIGN.D", + FCOPYSIGN_S: "FCOPYSIGN.S", + FCVT_D_S: "FCVT.D.S", + FCVT_S_D: "FCVT.S.D", + FDIV_D: "FDIV.D", + FDIV_S: "FDIV.S", + FFINT_D_L: "FFINT.D.L", + FFINT_D_W: "FFINT.D.W", + FFINT_S_L: "FFINT.S.L", + FFINT_S_W: "FFINT.S.W", + FLDGT_D: "FLDGT.D", + FLDGT_S: "FLDGT.S", + FLDLE_D: "FLDLE.D", + FLDLE_S: "FLDLE.S", + FLDX_D: "FLDX.D", + FLDX_S: "FLDX.S", + FLD_D: "FLD.D", + FLD_S: "FLD.S", + FLOGB_D: "FLOGB.D", + FLOGB_S: "FLOGB.S", + FMADD_D: "FMADD.D", + FMADD_S: "FMADD.S", + FMAXA_D: "FMAXA.D", + FMAXA_S: "FMAXA.S", + FMAX_D: "FMAX.D", + FMAX_S: "FMAX.S", + FMINA_D: "FMINA.D", + FMINA_S: "FMINA.S", + FMIN_D: "FMIN.D", + FMIN_S: "FMIN.S", + FMOV_D: "FMOV.D", + FMOV_S: "FMOV.S", + FMSUB_D: "FMSUB.D", + FMSUB_S: "FMSUB.S", + FMUL_D: "FMUL.D", + FMUL_S: "FMUL.S", + FNEG_D: "FNEG.D", + FNEG_S: "FNEG.S", + FNMADD_D: "FNMADD.D", + FNMADD_S: "FNMADD.S", + FNMSUB_D: "FNMSUB.D", + FNMSUB_S: "FNMSUB.S", + FRECIPE_D: "FRECIPE.D", + FRECIPE_S: "FRECIPE.S", + FRECIP_D: "FRECIP.D", + FRECIP_S: "FRECIP.S", + FRINT_D: "FRINT.D", + FRINT_S: "FRINT.S", + FRSQRTE_D: "FRSQRTE.D", + FRSQRTE_S: "FRSQRTE.S", + FRSQRT_D: "FRSQRT.D", + FRSQRT_S: "FRSQRT.S", + FSCALEB_D: "FSCALEB.D", + FSCALEB_S: "FSCALEB.S", + FSEL: "FSEL", + FSQRT_D: "FSQRT.D", + FSQRT_S: "FSQRT.S", + FSTGT_D: "FSTGT.D", + FSTGT_S: "FSTGT.S", + FSTLE_D: "FSTLE.D", + FSTLE_S: "FSTLE.S", + FSTX_D: "FSTX.D", + FSTX_S: "FSTX.S", + FST_D: "FST.D", + FST_S: "FST.S", + FSUB_D: "FSUB.D", + FSUB_S: "FSUB.S", + FTINTRM_L_D: "FTINTRM.L.D", + FTINTRM_L_S: "FTINTRM.L.S", + FTINTRM_W_D: "FTINTRM.W.D", + FTINTRM_W_S: "FTINTRM.W.S", + FTINTRNE_L_D: "FTINTRNE.L.D", + FTINTRNE_L_S: "FTINTRNE.L.S", + FTINTRNE_W_D: "FTINTRNE.W.D", + FTINTRNE_W_S: "FTINTRNE.W.S", + FTINTRP_L_D: "FTINTRP.L.D", + FTINTRP_L_S: "FTINTRP.L.S", + FTINTRP_W_D: "FTINTRP.W.D", + FTINTRP_W_S: "FTINTRP.W.S", + FTINTRZ_L_D: "FTINTRZ.L.D", + FTINTRZ_L_S: "FTINTRZ.L.S", + FTINTRZ_W_D: "FTINTRZ.W.D", + FTINTRZ_W_S: "FTINTRZ.W.S", + FTINT_L_D: "FTINT.L.D", + FTINT_L_S: "FTINT.L.S", + FTINT_W_D: "FTINT.W.D", + FTINT_W_S: "FTINT.W.S", + IBAR: "IBAR", + IDLE: "IDLE", + INVTLB: "INVTLB", + IOCSRRD_B: "IOCSRRD.B", + IOCSRRD_D: "IOCSRRD.D", + IOCSRRD_H: "IOCSRRD.H", + IOCSRRD_W: "IOCSRRD.W", + IOCSRWR_B: "IOCSRWR.B", + IOCSRWR_D: "IOCSRWR.D", + IOCSRWR_H: "IOCSRWR.H", + IOCSRWR_W: "IOCSRWR.W", + JIRL: "JIRL", + LDDIR: "LDDIR", + LDGT_B: "LDGT.B", + LDGT_D: "LDGT.D", + LDGT_H: "LDGT.H", + LDGT_W: "LDGT.W", + LDLE_B: "LDLE.B", + LDLE_D: "LDLE.D", + LDLE_H: "LDLE.H", + LDLE_W: "LDLE.W", + LDPTE: "LDPTE", + LDPTR_D: "LDPTR.D", + LDPTR_W: "LDPTR.W", + LDX_B: "LDX.B", + LDX_BU: "LDX.BU", + LDX_D: "LDX.D", + LDX_H: "LDX.H", + LDX_HU: "LDX.HU", + LDX_W: "LDX.W", + LDX_WU: "LDX.WU", + LD_B: "LD.B", + LD_BU: "LD.BU", + LD_D: "LD.D", + LD_H: "LD.H", + LD_HU: "LD.HU", + LD_W: "LD.W", + LD_WU: "LD.WU", + LLACQ_D: "LLACQ.D", + LLACQ_W: "LLACQ.W", + LL_D: "LL.D", + LL_W: "LL.W", + LU12I_W: "LU12I.W", + LU32I_D: "LU32I.D", + LU52I_D: "LU52I.D", + MASKEQZ: "MASKEQZ", + MASKNEZ: "MASKNEZ", + MOD_D: "MOD.D", + MOD_DU: "MOD.DU", + MOD_W: "MOD.W", + MOD_WU: "MOD.WU", + MOVCF2FR: "MOVCF2FR", + MOVCF2GR: "MOVCF2GR", + MOVFCSR2GR: "MOVFCSR2GR", + MOVFR2CF: "MOVFR2CF", + MOVFR2GR_D: "MOVFR2GR.D", + MOVFR2GR_S: "MOVFR2GR.S", + MOVFRH2GR_S: "MOVFRH2GR.S", + MOVGR2CF: "MOVGR2CF", + MOVGR2FCSR: "MOVGR2FCSR", + MOVGR2FRH_W: "MOVGR2FRH.W", + MOVGR2FR_D: "MOVGR2FR.D", + MOVGR2FR_W: "MOVGR2FR.W", + MULH_D: "MULH.D", + MULH_DU: "MULH.DU", + MULH_W: "MULH.W", + MULH_WU: "MULH.WU", + MULW_D_W: "MULW.D.W", + MULW_D_WU: "MULW.D.WU", + MUL_D: "MUL.D", + MUL_W: "MUL.W", + NOR: "NOR", + OR: "OR", + ORI: "ORI", + ORN: "ORN", + PCADDI: "PCADDI", + PCADDU12I: "PCADDU12I", + PCADDU18I: "PCADDU18I", + PCALAU12I: "PCALAU12I", + PRELD: "PRELD", + PRELDX: "PRELDX", + RDTIMEH_W: "RDTIMEH.W", + RDTIMEL_W: "RDTIMEL.W", + RDTIME_D: "RDTIME.D", + REVB_2H: "REVB.2H", + REVB_2W: "REVB.2W", + REVB_4H: "REVB.4H", + REVB_D: "REVB.D", + REVH_2W: "REVH.2W", + REVH_D: "REVH.D", + ROTRI_D: "ROTRI.D", + ROTRI_W: "ROTRI.W", + ROTR_D: "ROTR.D", + ROTR_W: "ROTR.W", + SCREL_D: "SCREL.D", + SCREL_W: "SCREL.W", + SC_D: "SC.D", + SC_Q: "SC.Q", + SC_W: "SC.W", + SLLI_D: "SLLI.D", + SLLI_W: "SLLI.W", + SLL_D: "SLL.D", + SLL_W: "SLL.W", + SLT: "SLT", + SLTI: "SLTI", + SLTU: "SLTU", + SLTUI: "SLTUI", + SRAI_D: "SRAI.D", + SRAI_W: "SRAI.W", + SRA_D: "SRA.D", + SRA_W: "SRA.W", + SRLI_D: "SRLI.D", + SRLI_W: "SRLI.W", + SRL_D: "SRL.D", + SRL_W: "SRL.W", + STGT_B: "STGT.B", + STGT_D: "STGT.D", + STGT_H: "STGT.H", + STGT_W: "STGT.W", + STLE_B: "STLE.B", + STLE_D: "STLE.D", + STLE_H: "STLE.H", + STLE_W: "STLE.W", + STPTR_D: "STPTR.D", + STPTR_W: "STPTR.W", + STX_B: "STX.B", + STX_D: "STX.D", + STX_H: "STX.H", + STX_W: "STX.W", + ST_B: "ST.B", + ST_D: "ST.D", + ST_H: "ST.H", + ST_W: "ST.W", + SUB_D: "SUB.D", + SUB_W: "SUB.W", + SYSCALL: "SYSCALL", + TLBCLR: "TLBCLR", + TLBFILL: "TLBFILL", + TLBFLUSH: "TLBFLUSH", + TLBRD: "TLBRD", + TLBSRCH: "TLBSRCH", + TLBWR: "TLBWR", + XOR: "XOR", + XORI: "XORI", +} + +var instFormats = [...]instFormat{ + // ADDI.D rd, rj, si12 + {mask: 0xffc00000, value: 0x02c00000, op: ADDI_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // ADDI.W rd, rj, si12 + {mask: 0xffc00000, value: 0x02800000, op: ADDI_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // ADDU16I.D rd, rj, si16 + {mask: 0xfc000000, value: 0x10000000, op: ADDU16I_D, args: instArgs{arg_rd, arg_rj, arg_si16_25_10}}, + // ADD.D rd, rj, rk + {mask: 0xffff8000, value: 0x00108000, op: ADD_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // ADD.W rd, rj, rk + {mask: 0xffff8000, value: 0x00100000, op: ADD_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // ALSL.D rd, rj, rk, sa2 + {mask: 0xfffe0000, value: 0x002c0000, op: ALSL_D, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, + // ALSL.W rd, rj, rk, sa2 + {mask: 0xfffe0000, value: 0x00040000, op: ALSL_W, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, + // ALSL.WU rd, rj, rk, sa2 + {mask: 0xfffe0000, value: 0x00060000, op: ALSL_WU, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, + // AMADD.B rd, rk, rj + {mask: 0xffff8000, value: 0x385d0000, op: AMADD_B, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMADD.D rd, rk, rj + {mask: 0xffff8000, value: 0x38618000, op: AMADD_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMADD_DB.B rd, rk, rj + {mask: 0xffff8000, value: 0x385f0000, op: AMADD_DB_B, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMADD_DB.D rd, rk, rj + {mask: 0xffff8000, value: 0x386a8000, op: AMADD_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMADD_DB.H rd, rk, rj + {mask: 0xffff8000, value: 0x385f8000, op: AMADD_DB_H, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMADD_DB.W rd, rk, rj + {mask: 0xffff8000, value: 0x386a0000, op: AMADD_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMADD.H rd, rk, rj + {mask: 0xffff8000, value: 0x385d8000, op: AMADD_H, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMADD.W rd, rk, rj + {mask: 0xffff8000, value: 0x38610000, op: AMADD_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMAND.D rd, rk, rj + {mask: 0xffff8000, value: 0x38628000, op: AMAND_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMAND_DB.D rd, rk, rj + {mask: 0xffff8000, value: 0x386b8000, op: AMAND_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMAND_DB.W rd, rk, rj + {mask: 0xffff8000, value: 0x386b0000, op: AMAND_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMAND.W rd, rk, rj + {mask: 0xffff8000, value: 0x38620000, op: AMAND_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMCAS.B rd, rk, rj + {mask: 0xffff8000, value: 0x38580000, op: AMCAS_B, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMCAS.D rd, rk, rj + {mask: 0xffff8000, value: 0x38598000, op: AMCAS_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMCAS_DB.B rd, rk, rj + {mask: 0xffff8000, value: 0x385a0000, op: AMCAS_DB_B, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMCAS_DB.D rd, rk, rj + {mask: 0xffff8000, value: 0x385b8000, op: AMCAS_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMCAS_DB.H rd, rk, rj + {mask: 0xffff8000, value: 0x385a8000, op: AMCAS_DB_H, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMCAS_DB.W rd, rk, rj + {mask: 0xffff8000, value: 0x385b0000, op: AMCAS_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMCAS.H rd, rk, rj + {mask: 0xffff8000, value: 0x38588000, op: AMCAS_H, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMCAS.W rd, rk, rj + {mask: 0xffff8000, value: 0x38590000, op: AMCAS_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMAX.D rd, rk, rj + {mask: 0xffff8000, value: 0x38658000, op: AMMAX_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMAX_DB.D rd, rk, rj + {mask: 0xffff8000, value: 0x386e8000, op: AMMAX_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMAX_DB.DU rd, rk, rj + {mask: 0xffff8000, value: 0x38708000, op: AMMAX_DB_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMAX_DB.W rd, rk, rj + {mask: 0xffff8000, value: 0x386e0000, op: AMMAX_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMAX_DB.WU rd, rk, rj + {mask: 0xffff8000, value: 0x38700000, op: AMMAX_DB_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMAX.DU rd, rk, rj + {mask: 0xffff8000, value: 0x38678000, op: AMMAX_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMAX.W rd, rk, rj + {mask: 0xffff8000, value: 0x38650000, op: AMMAX_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMAX.WU rd, rk, rj + {mask: 0xffff8000, value: 0x38670000, op: AMMAX_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMIN.D rd, rk, rj + {mask: 0xffff8000, value: 0x38668000, op: AMMIN_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMIN_DB.D rd, rk, rj + {mask: 0xffff8000, value: 0x386f8000, op: AMMIN_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMIN_DB.DU rd, rk, rj + {mask: 0xffff8000, value: 0x38718000, op: AMMIN_DB_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMIN_DB.W rd, rk, rj + {mask: 0xffff8000, value: 0x386f0000, op: AMMIN_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMIN_DB.WU rd, rk, rj + {mask: 0xffff8000, value: 0x38710000, op: AMMIN_DB_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMIN.DU rd, rk, rj + {mask: 0xffff8000, value: 0x38688000, op: AMMIN_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMIN.W rd, rk, rj + {mask: 0xffff8000, value: 0x38660000, op: AMMIN_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMMIN.WU rd, rk, rj + {mask: 0xffff8000, value: 0x38680000, op: AMMIN_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMOR.D rd, rk, rj + {mask: 0xffff8000, value: 0x38638000, op: AMOR_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMOR_DB.D rd, rk, rj + {mask: 0xffff8000, value: 0x386c8000, op: AMOR_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMOR_DB.W rd, rk, rj + {mask: 0xffff8000, value: 0x386c0000, op: AMOR_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMOR.W rd, rk, rj + {mask: 0xffff8000, value: 0x38630000, op: AMOR_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMSWAP.B rd, rk, rj + {mask: 0xffff8000, value: 0x385c0000, op: AMSWAP_B, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMSWAP.D rd, rk, rj + {mask: 0xffff8000, value: 0x38608000, op: AMSWAP_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMSWAP_DB.B rd, rk, rj + {mask: 0xffff8000, value: 0x385e0000, op: AMSWAP_DB_B, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMSWAP_DB.D rd, rk, rj + {mask: 0xffff8000, value: 0x38698000, op: AMSWAP_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMSWAP_DB.H rd, rk, rj + {mask: 0xffff8000, value: 0x385e8000, op: AMSWAP_DB_H, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMSWAP_DB.W rd, rk, rj + {mask: 0xffff8000, value: 0x38690000, op: AMSWAP_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMSWAP.H rd, rk, rj + {mask: 0xffff8000, value: 0x385c8000, op: AMSWAP_H, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMSWAP.W rd, rk, rj + {mask: 0xffff8000, value: 0x38600000, op: AMSWAP_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMXOR.D rd, rk, rj + {mask: 0xffff8000, value: 0x38648000, op: AMXOR_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMXOR_DB.D rd, rk, rj + {mask: 0xffff8000, value: 0x386d8000, op: AMXOR_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMXOR_DB.W rd, rk, rj + {mask: 0xffff8000, value: 0x386d0000, op: AMXOR_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AMXOR.W rd, rk, rj + {mask: 0xffff8000, value: 0x38640000, op: AMXOR_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // AND rd, rj, rk + {mask: 0xffff8000, value: 0x00148000, op: AND, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // ANDI rd, rj, ui12 + {mask: 0xffc00000, value: 0x03400000, op: ANDI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}}, + // ANDN rd, rj, rk + {mask: 0xffff8000, value: 0x00168000, op: ANDN, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // ASRTGT.D rj, rk + {mask: 0xffff801f, value: 0x00018000, op: ASRTGT_D, args: instArgs{arg_rj, arg_rk}}, + // ASRTLE.D rj, rk + {mask: 0xffff801f, value: 0x00010000, op: ASRTLE_D, args: instArgs{arg_rj, arg_rk}}, + // B offs + {mask: 0xfc000000, value: 0x50000000, op: B, args: instArgs{arg_offset_25_0}}, + // BCEQZ cj, offs + {mask: 0xfc000300, value: 0x48000000, op: BCEQZ, args: instArgs{arg_cj, arg_offset_20_0}}, + // BCNEZ cj, offs + {mask: 0xfc000300, value: 0x48000100, op: BCNEZ, args: instArgs{arg_cj, arg_offset_20_0}}, + // BEQ rj, rd, offs + {mask: 0xfc000000, value: 0x58000000, op: BEQ, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, + // BEQZ rj, offs + {mask: 0xfc000000, value: 0x40000000, op: BEQZ, args: instArgs{arg_rj, arg_offset_20_0}}, + // BGE rj, rd, offs + {mask: 0xfc000000, value: 0x64000000, op: BGE, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, + // BGEU rj, rd, offs + {mask: 0xfc000000, value: 0x6c000000, op: BGEU, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, + // BITREV.4B rd, rj + {mask: 0xfffffc00, value: 0x00004800, op: BITREV_4B, args: instArgs{arg_rd, arg_rj}}, + // BITREV.8B rd, rj + {mask: 0xfffffc00, value: 0x00004c00, op: BITREV_8B, args: instArgs{arg_rd, arg_rj}}, + // BITREV.D rd, rj + {mask: 0xfffffc00, value: 0x00005400, op: BITREV_D, args: instArgs{arg_rd, arg_rj}}, + // BITREV.W rd, rj + {mask: 0xfffffc00, value: 0x00005000, op: BITREV_W, args: instArgs{arg_rd, arg_rj}}, + // BL offs + {mask: 0xfc000000, value: 0x54000000, op: BL, args: instArgs{arg_offset_25_0}}, + // BLT rj, rd, offs + {mask: 0xfc000000, value: 0x60000000, op: BLT, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, + // BLTU rj, rd, offs + {mask: 0xfc000000, value: 0x68000000, op: BLTU, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, + // BNE rj, rd, offs + {mask: 0xfc000000, value: 0x5c000000, op: BNE, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, + // BNEZ rj, offs + {mask: 0xfc000000, value: 0x44000000, op: BNEZ, args: instArgs{arg_rj, arg_offset_20_0}}, + // BREAK code + {mask: 0xffff8000, value: 0x002a0000, op: BREAK, args: instArgs{arg_code_14_0}}, + // BSTRINS.D rd, rj, msbd, lsbd + {mask: 0xffc00000, value: 0x00800000, op: BSTRINS_D, args: instArgs{arg_rd, arg_rj, arg_msbd, arg_lsbd}}, + // BSTRINS.W rd, rj, msbw, lsbw + {mask: 0xffe08000, value: 0x00600000, op: BSTRINS_W, args: instArgs{arg_rd, arg_rj, arg_msbw, arg_lsbw}}, + // BSTRPICK.D rd, rj, msbd, lsbd + {mask: 0xffc00000, value: 0x00c00000, op: BSTRPICK_D, args: instArgs{arg_rd, arg_rj, arg_msbd, arg_lsbd}}, + // BSTRPICK.W rd, rj, msbw, lsbw + {mask: 0xffe08000, value: 0x00608000, op: BSTRPICK_W, args: instArgs{arg_rd, arg_rj, arg_msbw, arg_lsbw}}, + // BYTEPICK.D rd, rj, rk, sa3 + {mask: 0xfffc0000, value: 0x000c0000, op: BYTEPICK_D, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa3_17_15}}, + // BYTEPICK.W rd, rj, rk, sa2 + {mask: 0xfffe0000, value: 0x00080000, op: BYTEPICK_W, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, + // CACOP code, rj, si12 + {mask: 0xffc00000, value: 0x06000000, op: CACOP, args: instArgs{arg_code_4_0, arg_rj, arg_si12_21_10}}, + // CLO.D rd, rj + {mask: 0xfffffc00, value: 0x00002000, op: CLO_D, args: instArgs{arg_rd, arg_rj}}, + // CLO.W rd, rj + {mask: 0xfffffc00, value: 0x00001000, op: CLO_W, args: instArgs{arg_rd, arg_rj}}, + // CLZ.D rd, rj + {mask: 0xfffffc00, value: 0x00002400, op: CLZ_D, args: instArgs{arg_rd, arg_rj}}, + // CLZ.W rd, rj + {mask: 0xfffffc00, value: 0x00001400, op: CLZ_W, args: instArgs{arg_rd, arg_rj}}, + // CPUCFG rd, rj + {mask: 0xfffffc00, value: 0x00006c00, op: CPUCFG, args: instArgs{arg_rd, arg_rj}}, + // CRCC.W.B.W rd, rj, rk + {mask: 0xffff8000, value: 0x00260000, op: CRCC_W_B_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // CRCC.W.D.W rd, rj, rk + {mask: 0xffff8000, value: 0x00278000, op: CRCC_W_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // CRCC.W.H.W rd, rj, rk + {mask: 0xffff8000, value: 0x00268000, op: CRCC_W_H_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // CRCC.W.W.W rd, rj, rk + {mask: 0xffff8000, value: 0x00270000, op: CRCC_W_W_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // CRC.W.B.W rd, rj, rk + {mask: 0xffff8000, value: 0x00240000, op: CRC_W_B_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // CRC.W.D.W rd, rj, rk + {mask: 0xffff8000, value: 0x00258000, op: CRC_W_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // CRC.W.H.W rd, rj, rk + {mask: 0xffff8000, value: 0x00248000, op: CRC_W_H_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // CRC.W.W.W rd, rj, rk + {mask: 0xffff8000, value: 0x00250000, op: CRC_W_W_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // CSRRD rd, csr + {mask: 0xff0003e0, value: 0x04000000, op: CSRRD, args: instArgs{arg_rd, arg_csr_23_10}}, + // CSRWR rd, csr + {mask: 0xff0003e0, value: 0x04000020, op: CSRWR, args: instArgs{arg_rd, arg_csr_23_10}}, + // CSRXCHG rd, rj, csr + {mask: 0xff000000, value: 0x04000000, op: CSRXCHG, args: instArgs{arg_rd, arg_rj, arg_csr_23_10}}, + // CTO.D rd, rj + {mask: 0xfffffc00, value: 0x00002800, op: CTO_D, args: instArgs{arg_rd, arg_rj}}, + // CTO.W rd, rj + {mask: 0xfffffc00, value: 0x00001800, op: CTO_W, args: instArgs{arg_rd, arg_rj}}, + // CTZ.D rd, rj + {mask: 0xfffffc00, value: 0x00002c00, op: CTZ_D, args: instArgs{arg_rd, arg_rj}}, + // CTZ.W rd, rj + {mask: 0xfffffc00, value: 0x00001c00, op: CTZ_W, args: instArgs{arg_rd, arg_rj}}, + // DBAR hint + {mask: 0xffff8000, value: 0x38720000, op: DBAR, args: instArgs{arg_hint_14_0}}, + // DBCL code + {mask: 0xffff8000, value: 0x002a8000, op: DBCL, args: instArgs{arg_code_14_0}}, + // DIV.D rd, rj, rk + {mask: 0xffff8000, value: 0x00220000, op: DIV_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // DIV.DU rd, rj, rk + {mask: 0xffff8000, value: 0x00230000, op: DIV_DU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // DIV.W rd, rj, rk + {mask: 0xffff8000, value: 0x00200000, op: DIV_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // DIV.WU rd, rj, rk + {mask: 0xffff8000, value: 0x00210000, op: DIV_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // ERTN + {mask: 0xffffffff, value: 0x06483800, op: ERTN, args: instArgs{}}, + // EXT.W.B rd, rj + {mask: 0xfffffc00, value: 0x00005c00, op: EXT_W_B, args: instArgs{arg_rd, arg_rj}}, + // EXT.W.H rd, rj + {mask: 0xfffffc00, value: 0x00005800, op: EXT_W_H, args: instArgs{arg_rd, arg_rj}}, + // FABS.D fd, fj + {mask: 0xfffffc00, value: 0x01140800, op: FABS_D, args: instArgs{arg_fd, arg_fj}}, + // FABS.S fd, fj + {mask: 0xfffffc00, value: 0x01140400, op: FABS_S, args: instArgs{arg_fd, arg_fj}}, + // FADD.D fd, fj, fk + {mask: 0xffff8000, value: 0x01010000, op: FADD_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FADD.S fd, fj, fk + {mask: 0xffff8000, value: 0x01008000, op: FADD_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FCLASS.D fd, fj + {mask: 0xfffffc00, value: 0x01143800, op: FCLASS_D, args: instArgs{arg_fd, arg_fj}}, + // FCLASS.S fd, fj + {mask: 0xfffffc00, value: 0x01143400, op: FCLASS_S, args: instArgs{arg_fd, arg_fj}}, + // FCMP.CAF.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c200000, op: FCMP_CAF_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CAF.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c100000, op: FCMP_CAF_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CEQ.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c220000, op: FCMP_CEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CEQ.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c120000, op: FCMP_CEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CLE.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c230000, op: FCMP_CLE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CLE.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c130000, op: FCMP_CLE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CLT.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c210000, op: FCMP_CLT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CLT.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c110000, op: FCMP_CLT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CNE.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c280000, op: FCMP_CNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CNE.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c180000, op: FCMP_CNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.COR.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c2a0000, op: FCMP_COR_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.COR.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c1a0000, op: FCMP_COR_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CUEQ.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c260000, op: FCMP_CUEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CUEQ.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c160000, op: FCMP_CUEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CULE.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c270000, op: FCMP_CULE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CULE.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c170000, op: FCMP_CULE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CULT.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c250000, op: FCMP_CULT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CULT.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c150000, op: FCMP_CULT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CUNE.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c2c0000, op: FCMP_CUNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CUNE.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c1c0000, op: FCMP_CUNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CUN.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c240000, op: FCMP_CUN_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.CUN.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c140000, op: FCMP_CUN_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SAF.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c208000, op: FCMP_SAF_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SAF.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c108000, op: FCMP_SAF_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SEQ.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c228000, op: FCMP_SEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SEQ.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c128000, op: FCMP_SEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SLE.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c238000, op: FCMP_SLE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SLE.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c138000, op: FCMP_SLE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SLT.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c218000, op: FCMP_SLT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SLT.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c118000, op: FCMP_SLT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SNE.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c288000, op: FCMP_SNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SNE.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c188000, op: FCMP_SNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SOR.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c2a8000, op: FCMP_SOR_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SOR.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c1a8000, op: FCMP_SOR_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SUEQ.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c268000, op: FCMP_SUEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SUEQ.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c168000, op: FCMP_SUEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SULE.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c278000, op: FCMP_SULE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SULE.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c178000, op: FCMP_SULE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SULT.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c258000, op: FCMP_SULT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SULT.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c158000, op: FCMP_SULT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SUNE.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c2c8000, op: FCMP_SUNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SUNE.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c1c8000, op: FCMP_SUNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SUN.D cd, fj, fk + {mask: 0xffff8018, value: 0x0c248000, op: FCMP_SUN_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCMP.SUN.S cd, fj, fk + {mask: 0xffff8018, value: 0x0c148000, op: FCMP_SUN_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, + // FCOPYSIGN.D fd, fj, fk + {mask: 0xffff8000, value: 0x01130000, op: FCOPYSIGN_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FCOPYSIGN.S fd, fj, fk + {mask: 0xffff8000, value: 0x01128000, op: FCOPYSIGN_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FCVT.D.S fd, fj + {mask: 0xfffffc00, value: 0x01192400, op: FCVT_D_S, args: instArgs{arg_fd, arg_fj}}, + // FCVT.S.D fd, fj + {mask: 0xfffffc00, value: 0x01191800, op: FCVT_S_D, args: instArgs{arg_fd, arg_fj}}, + // FDIV.D fd, fj, fk + {mask: 0xffff8000, value: 0x01070000, op: FDIV_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FDIV.S fd, fj, fk + {mask: 0xffff8000, value: 0x01068000, op: FDIV_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FFINT.D.L fd, fj + {mask: 0xfffffc00, value: 0x011d2800, op: FFINT_D_L, args: instArgs{arg_fd, arg_fj}}, + // FFINT.D.W fd, fj + {mask: 0xfffffc00, value: 0x011d2000, op: FFINT_D_W, args: instArgs{arg_fd, arg_fj}}, + // FFINT.S.L fd, fj + {mask: 0xfffffc00, value: 0x011d1800, op: FFINT_S_L, args: instArgs{arg_fd, arg_fj}}, + // FFINT.S.W fd, fj + {mask: 0xfffffc00, value: 0x011d1000, op: FFINT_S_W, args: instArgs{arg_fd, arg_fj}}, + // FLDGT.D fd, rj, rk + {mask: 0xffff8000, value: 0x38748000, op: FLDGT_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FLDGT.S fd, rj, rk + {mask: 0xffff8000, value: 0x38740000, op: FLDGT_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FLDLE.D fd, rj, rk + {mask: 0xffff8000, value: 0x38758000, op: FLDLE_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FLDLE.S fd, rj, rk + {mask: 0xffff8000, value: 0x38750000, op: FLDLE_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FLDX.D fd, rj, rk + {mask: 0xffff8000, value: 0x38340000, op: FLDX_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FLDX.S fd, rj, rk + {mask: 0xffff8000, value: 0x38300000, op: FLDX_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FLD.D fd, rj, si12 + {mask: 0xffc00000, value: 0x2b800000, op: FLD_D, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, + // FLD.S fd, rj, si12 + {mask: 0xffc00000, value: 0x2b000000, op: FLD_S, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, + // FLOGB.D fd, fj + {mask: 0xfffffc00, value: 0x01142800, op: FLOGB_D, args: instArgs{arg_fd, arg_fj}}, + // FLOGB.S fd, fj + {mask: 0xfffffc00, value: 0x01142400, op: FLOGB_S, args: instArgs{arg_fd, arg_fj}}, + // FMADD.D fd, fj, fk, fa + {mask: 0xfff00000, value: 0x08200000, op: FMADD_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, + // FMADD.S fd, fj, fk, fa + {mask: 0xfff00000, value: 0x08100000, op: FMADD_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, + // FMAXA.D fd, fj, fk + {mask: 0xffff8000, value: 0x010d0000, op: FMAXA_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FMAXA.S fd, fj, fk + {mask: 0xffff8000, value: 0x010c8000, op: FMAXA_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FMAX.D fd, fj, fk + {mask: 0xffff8000, value: 0x01090000, op: FMAX_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FMAX.S fd, fj, fk + {mask: 0xffff8000, value: 0x01088000, op: FMAX_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FMINA.D fd, fj, fk + {mask: 0xffff8000, value: 0x010f0000, op: FMINA_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FMINA.S fd, fj, fk + {mask: 0xffff8000, value: 0x010e8000, op: FMINA_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FMIN.D fd, fj, fk + {mask: 0xffff8000, value: 0x010b0000, op: FMIN_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FMIN.S fd, fj, fk + {mask: 0xffff8000, value: 0x010a8000, op: FMIN_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FMOV.D fd, fj + {mask: 0xfffffc00, value: 0x01149800, op: FMOV_D, args: instArgs{arg_fd, arg_fj}}, + // FMOV.S fd, fj + {mask: 0xfffffc00, value: 0x01149400, op: FMOV_S, args: instArgs{arg_fd, arg_fj}}, + // FMSUB.D fd, fj, fk, fa + {mask: 0xfff00000, value: 0x08600000, op: FMSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, + // FMSUB.S fd, fj, fk, fa + {mask: 0xfff00000, value: 0x08500000, op: FMSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, + // FMUL.D fd, fj, fk + {mask: 0xffff8000, value: 0x01050000, op: FMUL_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FMUL.S fd, fj, fk + {mask: 0xffff8000, value: 0x01048000, op: FMUL_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FNEG.D fd, fj + {mask: 0xfffffc00, value: 0x01141800, op: FNEG_D, args: instArgs{arg_fd, arg_fj}}, + // FNEG.S fd, fj + {mask: 0xfffffc00, value: 0x01141400, op: FNEG_S, args: instArgs{arg_fd, arg_fj}}, + // FNMADD.D fd, fj, fk, fa + {mask: 0xfff00000, value: 0x08a00000, op: FNMADD_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, + // FNMADD.S fd, fj, fk, fa + {mask: 0xfff00000, value: 0x08900000, op: FNMADD_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, + // FNMSUB.D fd, fj, fk, fa + {mask: 0xfff00000, value: 0x08e00000, op: FNMSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, + // FNMSUB.S fd, fj, fk, fa + {mask: 0xfff00000, value: 0x08d00000, op: FNMSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, + // FRECIPE.D fd, fj + {mask: 0xfffffc00, value: 0x01147800, op: FRECIPE_D, args: instArgs{arg_fd, arg_fj}}, + // FRECIPE.S fd, fj + {mask: 0xfffffc00, value: 0x01147400, op: FRECIPE_S, args: instArgs{arg_fd, arg_fj}}, + // FRECIP.D fd, fj + {mask: 0xfffffc00, value: 0x01145800, op: FRECIP_D, args: instArgs{arg_fd, arg_fj}}, + // FRECIP.S fd, fj + {mask: 0xfffffc00, value: 0x01145400, op: FRECIP_S, args: instArgs{arg_fd, arg_fj}}, + // FRINT.D fd, fj + {mask: 0xfffffc00, value: 0x011e4800, op: FRINT_D, args: instArgs{arg_fd, arg_fj}}, + // FRINT.S fd, fj + {mask: 0xfffffc00, value: 0x011e4400, op: FRINT_S, args: instArgs{arg_fd, arg_fj}}, + // FRSQRTE.D fd, fj + {mask: 0xfffffc00, value: 0x01148800, op: FRSQRTE_D, args: instArgs{arg_fd, arg_fj}}, + // FRSQRTE.S fd, fj + {mask: 0xfffffc00, value: 0x01148400, op: FRSQRTE_S, args: instArgs{arg_fd, arg_fj}}, + // FRSQRT.D fd, fj + {mask: 0xfffffc00, value: 0x01146800, op: FRSQRT_D, args: instArgs{arg_fd, arg_fj}}, + // FRSQRT.S fd, fj + {mask: 0xfffffc00, value: 0x01146400, op: FRSQRT_S, args: instArgs{arg_fd, arg_fj}}, + // FSCALEB.D fd, fj, fk + {mask: 0xffff8000, value: 0x01110000, op: FSCALEB_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FSCALEB.S fd, fj, fk + {mask: 0xffff8000, value: 0x01108000, op: FSCALEB_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FSEL fd, fj, fk, ca + {mask: 0xfffc0000, value: 0x0d000000, op: FSEL, args: instArgs{arg_fd, arg_fj, arg_fk, arg_ca}}, + // FSQRT.D fd, fj + {mask: 0xfffffc00, value: 0x01144800, op: FSQRT_D, args: instArgs{arg_fd, arg_fj}}, + // FSQRT.S fd, fj + {mask: 0xfffffc00, value: 0x01144400, op: FSQRT_S, args: instArgs{arg_fd, arg_fj}}, + // FSTGT.D fd, rj, rk + {mask: 0xffff8000, value: 0x38768000, op: FSTGT_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FSTGT.S fd, rj, rk + {mask: 0xffff8000, value: 0x38760000, op: FSTGT_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FSTLE.D fd, rj, rk + {mask: 0xffff8000, value: 0x38778000, op: FSTLE_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FSTLE.S fd, rj, rk + {mask: 0xffff8000, value: 0x38770000, op: FSTLE_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FSTX.D fd, rj, rk + {mask: 0xffff8000, value: 0x383c0000, op: FSTX_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FSTX.S fd, rj, rk + {mask: 0xffff8000, value: 0x38380000, op: FSTX_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, + // FST.D fd, rj, si12 + {mask: 0xffc00000, value: 0x2bc00000, op: FST_D, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, + // FST.S fd, rj, si12 + {mask: 0xffc00000, value: 0x2b400000, op: FST_S, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, + // FSUB.D fd, fj, fk + {mask: 0xffff8000, value: 0x01030000, op: FSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FSUB.S fd, fj, fk + {mask: 0xffff8000, value: 0x01028000, op: FSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, + // FTINTRM.L.D fd, fj + {mask: 0xfffffc00, value: 0x011a2800, op: FTINTRM_L_D, args: instArgs{arg_fd, arg_fj}}, + // FTINTRM.L.S fd, fj + {mask: 0xfffffc00, value: 0x011a2400, op: FTINTRM_L_S, args: instArgs{arg_fd, arg_fj}}, + // FTINTRM.W.D fd, fj + {mask: 0xfffffc00, value: 0x011a0800, op: FTINTRM_W_D, args: instArgs{arg_fd, arg_fj}}, + // FTINTRM.W.S fd, fj + {mask: 0xfffffc00, value: 0x011a0400, op: FTINTRM_W_S, args: instArgs{arg_fd, arg_fj}}, + // FTINTRNE.L.D fd, fj + {mask: 0xfffffc00, value: 0x011ae800, op: FTINTRNE_L_D, args: instArgs{arg_fd, arg_fj}}, + // FTINTRNE.L.S fd, fj + {mask: 0xfffffc00, value: 0x011ae400, op: FTINTRNE_L_S, args: instArgs{arg_fd, arg_fj}}, + // FTINTRNE.W.D fd, fj + {mask: 0xfffffc00, value: 0x011ac800, op: FTINTRNE_W_D, args: instArgs{arg_fd, arg_fj}}, + // FTINTRNE.W.S fd, fj + {mask: 0xfffffc00, value: 0x011ac400, op: FTINTRNE_W_S, args: instArgs{arg_fd, arg_fj}}, + // FTINTRP.L.D fd, fj + {mask: 0xfffffc00, value: 0x011a6800, op: FTINTRP_L_D, args: instArgs{arg_fd, arg_fj}}, + // FTINTRP.L.S fd, fj + {mask: 0xfffffc00, value: 0x011a6400, op: FTINTRP_L_S, args: instArgs{arg_fd, arg_fj}}, + // FTINTRP.W.D fd, fj + {mask: 0xfffffc00, value: 0x011a4800, op: FTINTRP_W_D, args: instArgs{arg_fd, arg_fj}}, + // FTINTRP.W.S fd, fj + {mask: 0xfffffc00, value: 0x011a4400, op: FTINTRP_W_S, args: instArgs{arg_fd, arg_fj}}, + // FTINTRZ.L.D fd, fj + {mask: 0xfffffc00, value: 0x011aa800, op: FTINTRZ_L_D, args: instArgs{arg_fd, arg_fj}}, + // FTINTRZ.L.S fd, fj + {mask: 0xfffffc00, value: 0x011aa400, op: FTINTRZ_L_S, args: instArgs{arg_fd, arg_fj}}, + // FTINTRZ.W.D fd, fj + {mask: 0xfffffc00, value: 0x011a8800, op: FTINTRZ_W_D, args: instArgs{arg_fd, arg_fj}}, + // FTINTRZ.W.S fd, fj + {mask: 0xfffffc00, value: 0x011a8400, op: FTINTRZ_W_S, args: instArgs{arg_fd, arg_fj}}, + // FTINT.L.D fd, fj + {mask: 0xfffffc00, value: 0x011b2800, op: FTINT_L_D, args: instArgs{arg_fd, arg_fj}}, + // FTINT.L.S fd, fj + {mask: 0xfffffc00, value: 0x011b2400, op: FTINT_L_S, args: instArgs{arg_fd, arg_fj}}, + // FTINT.W.D fd, fj + {mask: 0xfffffc00, value: 0x011b0800, op: FTINT_W_D, args: instArgs{arg_fd, arg_fj}}, + // FTINT.W.S fd, fj + {mask: 0xfffffc00, value: 0x011b0400, op: FTINT_W_S, args: instArgs{arg_fd, arg_fj}}, + // IBAR hint + {mask: 0xffff8000, value: 0x38728000, op: IBAR, args: instArgs{arg_hint_14_0}}, + // IDLE level + {mask: 0xffff8000, value: 0x06488000, op: IDLE, args: instArgs{arg_level_14_0}}, + // INVTLB op, rj, rk + {mask: 0xffff8000, value: 0x06498000, op: INVTLB, args: instArgs{arg_op_4_0, arg_rj, arg_rk}}, + // IOCSRRD.B rd, rj + {mask: 0xfffffc00, value: 0x06480000, op: IOCSRRD_B, args: instArgs{arg_rd, arg_rj}}, + // IOCSRRD.D rd, rj + {mask: 0xfffffc00, value: 0x06480c00, op: IOCSRRD_D, args: instArgs{arg_rd, arg_rj}}, + // IOCSRRD.H rd, rj + {mask: 0xfffffc00, value: 0x06480400, op: IOCSRRD_H, args: instArgs{arg_rd, arg_rj}}, + // IOCSRRD.W rd, rj + {mask: 0xfffffc00, value: 0x06480800, op: IOCSRRD_W, args: instArgs{arg_rd, arg_rj}}, + // IOCSRWR.B rd, rj + {mask: 0xfffffc00, value: 0x06481000, op: IOCSRWR_B, args: instArgs{arg_rd, arg_rj}}, + // IOCSRWR.D rd, rj + {mask: 0xfffffc00, value: 0x06481c00, op: IOCSRWR_D, args: instArgs{arg_rd, arg_rj}}, + // IOCSRWR.H rd, rj + {mask: 0xfffffc00, value: 0x06481400, op: IOCSRWR_H, args: instArgs{arg_rd, arg_rj}}, + // IOCSRWR.W rd, rj + {mask: 0xfffffc00, value: 0x06481800, op: IOCSRWR_W, args: instArgs{arg_rd, arg_rj}}, + // JIRL rd, rj, offs + {mask: 0xfc000000, value: 0x4c000000, op: JIRL, args: instArgs{arg_rd, arg_rj, arg_offset_15_0}}, + // LDDIR rd, rj, level + {mask: 0xfffc0000, value: 0x06400000, op: LDDIR, args: instArgs{arg_rd, arg_rj, arg_level_17_10}}, + // LDGT.B rd, rj, rk + {mask: 0xffff8000, value: 0x38780000, op: LDGT_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDGT.D rd, rj, rk + {mask: 0xffff8000, value: 0x38798000, op: LDGT_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDGT.H rd, rj, rk + {mask: 0xffff8000, value: 0x38788000, op: LDGT_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDGT.W rd, rj, rk + {mask: 0xffff8000, value: 0x38790000, op: LDGT_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDLE.B rd, rj, rk + {mask: 0xffff8000, value: 0x387a0000, op: LDLE_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDLE.D rd, rj, rk + {mask: 0xffff8000, value: 0x387b8000, op: LDLE_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDLE.H rd, rj, rk + {mask: 0xffff8000, value: 0x387a8000, op: LDLE_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDLE.W rd, rj, rk + {mask: 0xffff8000, value: 0x387b0000, op: LDLE_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDPTE rj, seq + {mask: 0xfffc001f, value: 0x06440000, op: LDPTE, args: instArgs{arg_rj, arg_seq_17_10}}, + // LDPTR.D rd, rj, si14 + {mask: 0xff000000, value: 0x26000000, op: LDPTR_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, + // LDPTR.W rd, rj, si14 + {mask: 0xff000000, value: 0x24000000, op: LDPTR_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, + // LDX.B rd, rj, rk + {mask: 0xffff8000, value: 0x38000000, op: LDX_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDX.BU rd, rj, rk + {mask: 0xffff8000, value: 0x38200000, op: LDX_BU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDX.D rd, rj, rk + {mask: 0xffff8000, value: 0x380c0000, op: LDX_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDX.H rd, rj, rk + {mask: 0xffff8000, value: 0x38040000, op: LDX_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDX.HU rd, rj, rk + {mask: 0xffff8000, value: 0x38240000, op: LDX_HU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDX.W rd, rj, rk + {mask: 0xffff8000, value: 0x38080000, op: LDX_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LDX.WU rd, rj, rk + {mask: 0xffff8000, value: 0x38280000, op: LDX_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // LD.B rd, rj, si12 + {mask: 0xffc00000, value: 0x28000000, op: LD_B, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // LD.BU rd, rj, si12 + {mask: 0xffc00000, value: 0x2a000000, op: LD_BU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // LD.D rd, rj, si12 + {mask: 0xffc00000, value: 0x28c00000, op: LD_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // LD.H rd, rj, si12 + {mask: 0xffc00000, value: 0x28400000, op: LD_H, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // LD.HU rd, rj, si12 + {mask: 0xffc00000, value: 0x2a400000, op: LD_HU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // LD.W rd, rj, si12 + {mask: 0xffc00000, value: 0x28800000, op: LD_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // LD.WU rd, rj, si12 + {mask: 0xffc00000, value: 0x2a800000, op: LD_WU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // LLACQ.D rd, rj + {mask: 0xfffffc00, value: 0x38578800, op: LLACQ_D, args: instArgs{arg_rd, arg_rj}}, + // LLACQ.W rd, rj + {mask: 0xfffffc00, value: 0x38578000, op: LLACQ_W, args: instArgs{arg_rd, arg_rj}}, + // LL.D rd, rj, si14 + {mask: 0xff000000, value: 0x22000000, op: LL_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, + // LL.W rd, rj, si14 + {mask: 0xff000000, value: 0x20000000, op: LL_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, + // LU12I.W rd, si20 + {mask: 0xfe000000, value: 0x14000000, op: LU12I_W, args: instArgs{arg_rd, arg_si20_24_5}}, + // LU32I.D rd, si20 + {mask: 0xfe000000, value: 0x16000000, op: LU32I_D, args: instArgs{arg_rd, arg_si20_24_5}}, + // LU52I.D rd, rj, si12 + {mask: 0xffc00000, value: 0x03000000, op: LU52I_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // MASKEQZ rd, rj, rk + {mask: 0xffff8000, value: 0x00130000, op: MASKEQZ, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MASKNEZ rd, rj, rk + {mask: 0xffff8000, value: 0x00138000, op: MASKNEZ, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MOD.D rd, rj, rk + {mask: 0xffff8000, value: 0x00228000, op: MOD_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MOD.DU rd, rj, rk + {mask: 0xffff8000, value: 0x00238000, op: MOD_DU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MOD.W rd, rj, rk + {mask: 0xffff8000, value: 0x00208000, op: MOD_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MOD.WU rd, rj, rk + {mask: 0xffff8000, value: 0x00218000, op: MOD_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MOVCF2FR fd, cj + {mask: 0xffffff00, value: 0x0114d400, op: MOVCF2FR, args: instArgs{arg_fd, arg_cj}}, + // MOVCF2GR rd, cj + {mask: 0xffffff00, value: 0x0114dc00, op: MOVCF2GR, args: instArgs{arg_rd, arg_cj}}, + // MOVFCSR2GR rd, fcsr + {mask: 0xfffffc00, value: 0x0114c800, op: MOVFCSR2GR, args: instArgs{arg_rd, arg_fcsr_9_5}}, + // MOVFR2CF cd, fj + {mask: 0xfffffc18, value: 0x0114d000, op: MOVFR2CF, args: instArgs{arg_cd, arg_fj}}, + // MOVFR2GR.D rd, fj + {mask: 0xfffffc00, value: 0x0114b800, op: MOVFR2GR_D, args: instArgs{arg_rd, arg_fj}}, + // MOVFR2GR.S rd, fj + {mask: 0xfffffc00, value: 0x0114b400, op: MOVFR2GR_S, args: instArgs{arg_rd, arg_fj}}, + // MOVFRH2GR.S rd, fj + {mask: 0xfffffc00, value: 0x0114bc00, op: MOVFRH2GR_S, args: instArgs{arg_rd, arg_fj}}, + // MOVGR2CF cd, rj + {mask: 0xfffffc18, value: 0x0114d800, op: MOVGR2CF, args: instArgs{arg_cd, arg_rj}}, + // MOVGR2FCSR fcsr, rj + {mask: 0xfffffc00, value: 0x0114c000, op: MOVGR2FCSR, args: instArgs{arg_fcsr_4_0, arg_rj}}, + // MOVGR2FRH.W fd, rj + {mask: 0xfffffc00, value: 0x0114ac00, op: MOVGR2FRH_W, args: instArgs{arg_fd, arg_rj}}, + // MOVGR2FR.D fd, rj + {mask: 0xfffffc00, value: 0x0114a800, op: MOVGR2FR_D, args: instArgs{arg_fd, arg_rj}}, + // MOVGR2FR.W fd, rj + {mask: 0xfffffc00, value: 0x0114a400, op: MOVGR2FR_W, args: instArgs{arg_fd, arg_rj}}, + // MULH.D rd, rj, rk + {mask: 0xffff8000, value: 0x001e0000, op: MULH_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MULH.DU rd, rj, rk + {mask: 0xffff8000, value: 0x001e8000, op: MULH_DU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MULH.W rd, rj, rk + {mask: 0xffff8000, value: 0x001c8000, op: MULH_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MULH.WU rd, rj, rk + {mask: 0xffff8000, value: 0x001d0000, op: MULH_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MULW.D.W rd, rj, rk + {mask: 0xffff8000, value: 0x001f0000, op: MULW_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MULW.D.WU rd, rj, rk + {mask: 0xffff8000, value: 0x001f8000, op: MULW_D_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MUL.D rd, rj, rk + {mask: 0xffff8000, value: 0x001d8000, op: MUL_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // MUL.W rd, rj, rk + {mask: 0xffff8000, value: 0x001c0000, op: MUL_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // NOR rd, rj, rk + {mask: 0xffff8000, value: 0x00140000, op: NOR, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // OR rd, rj, rk + {mask: 0xffff8000, value: 0x00150000, op: OR, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // ORI rd, rj, ui12 + {mask: 0xffc00000, value: 0x03800000, op: ORI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}}, + // ORN rd, rj, rk + {mask: 0xffff8000, value: 0x00160000, op: ORN, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // PCADDI rd, si20 + {mask: 0xfe000000, value: 0x18000000, op: PCADDI, args: instArgs{arg_rd, arg_si20_24_5}}, + // PCADDU12I rd, si20 + {mask: 0xfe000000, value: 0x1c000000, op: PCADDU12I, args: instArgs{arg_rd, arg_si20_24_5}}, + // PCADDU18I rd, si20 + {mask: 0xfe000000, value: 0x1e000000, op: PCADDU18I, args: instArgs{arg_rd, arg_si20_24_5}}, + // PCALAU12I rd, si20 + {mask: 0xfe000000, value: 0x1a000000, op: PCALAU12I, args: instArgs{arg_rd, arg_si20_24_5}}, + // PRELD hint, rj, si12 + {mask: 0xffc00000, value: 0x2ac00000, op: PRELD, args: instArgs{arg_hint_4_0, arg_rj, arg_si12_21_10}}, + // PRELDX hint, rj, rk + {mask: 0xffff8000, value: 0x382c0000, op: PRELDX, args: instArgs{arg_hint_4_0, arg_rj, arg_rk}}, + // RDTIMEH.W rd, rj + {mask: 0xfffffc00, value: 0x00006400, op: RDTIMEH_W, args: instArgs{arg_rd, arg_rj}}, + // RDTIMEL.W rd, rj + {mask: 0xfffffc00, value: 0x00006000, op: RDTIMEL_W, args: instArgs{arg_rd, arg_rj}}, + // RDTIME.D rd, rj + {mask: 0xfffffc00, value: 0x00006800, op: RDTIME_D, args: instArgs{arg_rd, arg_rj}}, + // REVB.2H rd, rj + {mask: 0xfffffc00, value: 0x00003000, op: REVB_2H, args: instArgs{arg_rd, arg_rj}}, + // REVB.2W rd, rj + {mask: 0xfffffc00, value: 0x00003800, op: REVB_2W, args: instArgs{arg_rd, arg_rj}}, + // REVB.4H rd, rj + {mask: 0xfffffc00, value: 0x00003400, op: REVB_4H, args: instArgs{arg_rd, arg_rj}}, + // REVB.D rd, rj + {mask: 0xfffffc00, value: 0x00003c00, op: REVB_D, args: instArgs{arg_rd, arg_rj}}, + // REVH.2W rd, rj + {mask: 0xfffffc00, value: 0x00004000, op: REVH_2W, args: instArgs{arg_rd, arg_rj}}, + // REVH.D rd, rj + {mask: 0xfffffc00, value: 0x00004400, op: REVH_D, args: instArgs{arg_rd, arg_rj}}, + // ROTRI.D rd, rj, ui6 + {mask: 0xffff0000, value: 0x004d0000, op: ROTRI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, + // ROTRI.W rd, rj, ui5 + {mask: 0xffff8000, value: 0x004c8000, op: ROTRI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, + // ROTR.D rd, rj, rk + {mask: 0xffff8000, value: 0x001b8000, op: ROTR_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // ROTR.W rd, rj, rk + {mask: 0xffff8000, value: 0x001b0000, op: ROTR_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // SCREL.D rd, rj + {mask: 0xfffffc00, value: 0x38578c00, op: SCREL_D, args: instArgs{arg_rd, arg_rj}}, + // SCREL.W rd, rj + {mask: 0xfffffc00, value: 0x38578400, op: SCREL_W, args: instArgs{arg_rd, arg_rj}}, + // SC.D rd, rj, si14 + {mask: 0xff000000, value: 0x23000000, op: SC_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, + // SC.Q rd, rk, rj + {mask: 0xffff8000, value: 0x38570000, op: SC_Q, args: instArgs{arg_rd, arg_rk, arg_rj}}, + // SC.W rd, rj, si14 + {mask: 0xff000000, value: 0x21000000, op: SC_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, + // SLLI.D rd, rj, ui6 + {mask: 0xffff0000, value: 0x00410000, op: SLLI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, + // SLLI.W rd, rj, ui5 + {mask: 0xffff8000, value: 0x00408000, op: SLLI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, + // SLL.D rd, rj, rk + {mask: 0xffff8000, value: 0x00188000, op: SLL_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // SLL.W rd, rj, rk + {mask: 0xffff8000, value: 0x00170000, op: SLL_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // SLT rd, rj, rk + {mask: 0xffff8000, value: 0x00120000, op: SLT, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // SLTI rd, rj, si12 + {mask: 0xffc00000, value: 0x02000000, op: SLTI, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // SLTU rd, rj, rk + {mask: 0xffff8000, value: 0x00128000, op: SLTU, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // SLTUI rd, rj, si12 + {mask: 0xffc00000, value: 0x02400000, op: SLTUI, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // SRAI.D rd, rj, ui6 + {mask: 0xffff0000, value: 0x00490000, op: SRAI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, + // SRAI.W rd, rj, ui5 + {mask: 0xffff8000, value: 0x00488000, op: SRAI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, + // SRA.D rd, rj, rk + {mask: 0xffff8000, value: 0x00198000, op: SRA_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // SRA.W rd, rj, rk + {mask: 0xffff8000, value: 0x00180000, op: SRA_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // SRLI.D rd, rj, ui6 + {mask: 0xffff0000, value: 0x00450000, op: SRLI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, + // SRLI.W rd, rj, ui5 + {mask: 0xffff8000, value: 0x00448000, op: SRLI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, + // SRL.D rd, rj, rk + {mask: 0xffff8000, value: 0x00190000, op: SRL_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // SRL.W rd, rj, rk + {mask: 0xffff8000, value: 0x00178000, op: SRL_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STGT.B rd, rj, rk + {mask: 0xffff8000, value: 0x387c0000, op: STGT_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STGT.D rd, rj, rk + {mask: 0xffff8000, value: 0x387d8000, op: STGT_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STGT.H rd, rj, rk + {mask: 0xffff8000, value: 0x387c8000, op: STGT_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STGT.W rd, rj, rk + {mask: 0xffff8000, value: 0x387d0000, op: STGT_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STLE.B rd, rj, rk + {mask: 0xffff8000, value: 0x387e0000, op: STLE_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STLE.D rd, rj, rk + {mask: 0xffff8000, value: 0x387f8000, op: STLE_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STLE.H rd, rj, rk + {mask: 0xffff8000, value: 0x387e8000, op: STLE_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STLE.W rd, rj, rk + {mask: 0xffff8000, value: 0x387f0000, op: STLE_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STPTR.D rd, rj, si14 + {mask: 0xff000000, value: 0x27000000, op: STPTR_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, + // STPTR.W rd, rj, si14 + {mask: 0xff000000, value: 0x25000000, op: STPTR_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, + // STX.B rd, rj, rk + {mask: 0xffff8000, value: 0x38100000, op: STX_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STX.D rd, rj, rk + {mask: 0xffff8000, value: 0x381c0000, op: STX_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STX.H rd, rj, rk + {mask: 0xffff8000, value: 0x38140000, op: STX_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // STX.W rd, rj, rk + {mask: 0xffff8000, value: 0x38180000, op: STX_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // ST.B rd, rj, si12 + {mask: 0xffc00000, value: 0x29000000, op: ST_B, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // ST.D rd, rj, si12 + {mask: 0xffc00000, value: 0x29c00000, op: ST_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // ST.H rd, rj, si12 + {mask: 0xffc00000, value: 0x29400000, op: ST_H, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // ST.W rd, rj, si12 + {mask: 0xffc00000, value: 0x29800000, op: ST_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, + // SUB.D rd, rj, rk + {mask: 0xffff8000, value: 0x00118000, op: SUB_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // SUB.W rd, rj, rk + {mask: 0xffff8000, value: 0x00110000, op: SUB_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // SYSCALL code + {mask: 0xffff8000, value: 0x002b0000, op: SYSCALL, args: instArgs{arg_code_14_0}}, + // TLBCLR + {mask: 0xffffffff, value: 0x06482000, op: TLBCLR, args: instArgs{}}, + // TLBFILL + {mask: 0xffffffff, value: 0x06483400, op: TLBFILL, args: instArgs{}}, + // TLBFLUSH + {mask: 0xffffffff, value: 0x06482400, op: TLBFLUSH, args: instArgs{}}, + // TLBRD + {mask: 0xffffffff, value: 0x06482c00, op: TLBRD, args: instArgs{}}, + // TLBSRCH + {mask: 0xffffffff, value: 0x06482800, op: TLBSRCH, args: instArgs{}}, + // TLBWR + {mask: 0xffffffff, value: 0x06483000, op: TLBWR, args: instArgs{}}, + // XOR rd, rj, rk + {mask: 0xffff8000, value: 0x00158000, op: XOR, args: instArgs{arg_rd, arg_rj, arg_rk}}, + // XORI rd, rj, ui12 + {mask: 0xffc00000, value: 0x03c00000, op: XORI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}}, +} diff --git a/loong64/loong64asm/testdata/gnucases.txt b/loong64/loong64asm/testdata/gnucases.txt new file mode 100644 index 00000000..2cfd5ea1 --- /dev/null +++ b/loong64/loong64asm/testdata/gnucases.txt @@ -0,0 +1,415 @@ +ac391000| add.w $t0, $t1, $t2 +acb91000| add.d $t0, $t1, $t2 +ac41c002| addi.d $t0, $t1, 16 +ac01e002| addi.d $t0, $t1, -2048 +acfdff02| addi.d $t0, $t1, -1 +ac05e002| addi.d $t0, $t1, -2047 +acf9ff02| addi.d $t0, $t1, -2 +acfdff02| addi.d $t0, $t1, -1 +ac418002| addi.w $t0, $t1, 16 +ac410010| addu16i.d $t0, $t1, 16 +a4fcff13| addu16i.d $a0, $a1, -1 +acb92c00| alsl.d $t0, $t1, $t2, 0x2 +acb90400| alsl.w $t0, $t1, $t2, 0x2 +acb90600| alsl.wu $t0, $t1, $t2, 0x2 +ccb56138| amadd.d $t0, $t1, $t2 +cc356138| amadd.w $t0, $t1, $t2 +ccb56a38| amadd_db.d $t0, $t1, $t2 +cc356a38| amadd_db.w $t0, $t1, $t2 +ccb56238| amand.d $t0, $t1, $t2 +cc356238| amand.w $t0, $t1, $t2 +ccb56b38| amand_db.d $t0, $t1, $t2 +cc356b38| amand_db.w $t0, $t1, $t2 +ccb56538| ammax.d $t0, $t1, $t2 +ccb56738| ammax.du $t0, $t1, $t2 +cc356538| ammax.w $t0, $t1, $t2 +cc356738| ammax.wu $t0, $t1, $t2 +ccb56e38| ammax_db.d $t0, $t1, $t2 +ccb57038| ammax_db.du $t0, $t1, $t2 +cc356e38| ammax_db.w $t0, $t1, $t2 +cc357038| ammax_db.wu $t0, $t1, $t2 +ccb56638| ammin.d $t0, $t1, $t2 +ccb56838| ammin.du $t0, $t1, $t2 +cc356638| ammin.w $t0, $t1, $t2 +cc356838| ammin.wu $t0, $t1, $t2 +ccb56f38| ammin_db.d $t0, $t1, $t2 +ccb57138| ammin_db.du $t0, $t1, $t2 +cc356f38| ammin_db.w $t0, $t1, $t2 +cc357138| ammin_db.wu $t0, $t1, $t2 +ccb56338| amor.d $t0, $t1, $t2 +cc356338| amor.w $t0, $t1, $t2 +ccb56c38| amor_db.d $t0, $t1, $t2 +cc356c38| amor_db.w $t0, $t1, $t2 +ccb56038| amswap.d $t0, $t1, $t2 +cc356038| amswap.w $t0, $t1, $t2 +ccb56938| amswap_db.d $t0, $t1, $t2 +cc356938| amswap_db.w $t0, $t1, $t2 +ccb56438| amxor.d $t0, $t1, $t2 +cc356438| amxor.w $t0, $t1, $t2 +ccb56d38| amxor_db.d $t0, $t1, $t2 +cc356d38| amxor_db.w $t0, $t1, $t2 +acb91400| and $t0, $t1, $t2 +ac414003| andi $t0, $t1, 0x10 +acb91600| andn $t0, $t1, $t2 +00004003| nop +a0b90100| asrtgt.d $t1, $t2 +a0390100| asrtle.d $t1, $t2 +00100050| b 16 +20100048| bceqz $fcc1, 16 +20110048| bcnez $fcc1, 16 +1ff1ff4b| bcnez $fcc0, -16 +8d110058| beq $t0, $t1, 16 +a0110040| beqz $t1, 16 +9ff1ff43| beqz $t0, -16 +8d110064| bge $t0, $t1, 16 +8d11006c| bgeu $t0, $t1, 16 +ac490000| bitrev.4b $t0, $t1 +ac4d0000| bitrev.8b $t0, $t1 +ac550000| bitrev.d $t0, $t1 +ac510000| bitrev.w $t0, $t1 +00100054| bl 16 +8d110060| blt $t0, $t1, 16 +8d110068| bltu $t0, $t1, 16 +8d11005c| bne $t0, $t1, 16 +a0110044| bnez $t1, 16 +00002a00| break 0x0 +ac158a00| bstrins.d $t0, $t1, 0xa, 0x5 +ac156a00| bstrins.w $t0, $t1, 0xa, 0x5 +ac15ca00| bstrpick.d $t0, $t1, 0xa, 0x5 +ac956a00| bstrpick.w $t0, $t1, 0xa, 0x5 +ac390d00| bytepick.d $t0, $t1, $t2, 0x2 +ac390900| bytepick.w $t0, $t1, $t2, 0x2 +84010406| cacop 0x4, $t0, 256 +ac210000| clo.d $t0, $t1 +ac110000| clo.w $t0, $t1 +ac250000| clz.d $t0, $t1 +ac150000| clz.w $t0, $t1 +ac6d0000| cpucfg $t0, $t1 +ac392400| crc.w.b.w $t0, $t1, $t2 +acb92500| crc.w.d.w $t0, $t1, $t2 +acb92400| crc.w.h.w $t0, $t1, $t2 +ac392500| crc.w.w.w $t0, $t1, $t2 +ac392600| crcc.w.b.w $t0, $t1, $t2 +acb92700| crcc.w.d.w $t0, $t1, $t2 +acb92600| crcc.w.h.w $t0, $t1, $t2 +ac392700| crcc.w.w.w $t0, $t1, $t2 +0c040004| csrrd $t0, 0x1 +2c040004| csrwr $t0, 0x1 +ac050004| csrxchg $t0, $t1, 0x1 +ac290000| cto.d $t0, $t1 +ac190000| cto.w $t0, $t1 +ac2d0000| ctz.d $t0, $t1 +ac1d0000| ctz.w $t0, $t1 +00007238| dbar 0x0 +10802a00| dbcl 0x10 +ac392200| div.d $t0, $t1, $t2 +ac392300| div.du $t0, $t1, $t2 +ac392000| div.w $t0, $t1, $t2 +ac392100| div.wu $t0, $t1, $t2 +00384806| ertn +ac5d0000| ext.w.b $t0, $t1 +ac590000| ext.w.h $t0, $t1 +28091401| fabs.d $ft0, $ft1 +28051401| fabs.s $ft0, $ft1 +28250101| fadd.d $ft0, $ft1, $ft1 +28a50001| fadd.s $ft0, $ft1, $ft1 +28391401| fclass.d $ft0, $ft1 +28351401| fclass.s $ft0, $ft1 +2029200c| fcmp.caf.d $fcc0, $ft1, $ft2 +2029100c| fcmp.caf.s $fcc0, $ft1, $ft2 +2029220c| fcmp.ceq.d $fcc0, $ft1, $ft2 +2029120c| fcmp.ceq.s $fcc0, $ft1, $ft2 +2029230c| fcmp.cle.d $fcc0, $ft1, $ft2 +2029130c| fcmp.cle.s $fcc0, $ft1, $ft2 +2029210c| fcmp.clt.d $fcc0, $ft1, $ft2 +2029110c| fcmp.clt.s $fcc0, $ft1, $ft2 +2029280c| fcmp.cne.d $fcc0, $ft1, $ft2 +2029180c| fcmp.cne.s $fcc0, $ft1, $ft2 +20292a0c| fcmp.cor.d $fcc0, $ft1, $ft2 +20291a0c| fcmp.cor.s $fcc0, $ft1, $ft2 +2029260c| fcmp.cueq.d $fcc0, $ft1, $ft2 +2029160c| fcmp.cueq.s $fcc0, $ft1, $ft2 +2029270c| fcmp.cule.d $fcc0, $ft1, $ft2 +2029170c| fcmp.cule.s $fcc0, $ft1, $ft2 +2029250c| fcmp.cult.d $fcc0, $ft1, $ft2 +2029150c| fcmp.cult.s $fcc0, $ft1, $ft2 +20292c0c| fcmp.cune.d $fcc0, $ft1, $ft2 +20291c0c| fcmp.cune.s $fcc0, $ft1, $ft2 +2029240c| fcmp.cun.d $fcc0, $ft1, $ft2 +2029140c| fcmp.cun.s $fcc0, $ft1, $ft2 +20a9200c| fcmp.saf.d $fcc0, $ft1, $ft2 +20a9100c| fcmp.saf.s $fcc0, $ft1, $ft2 +20a9220c| fcmp.seq.d $fcc0, $ft1, $ft2 +20a9120c| fcmp.seq.s $fcc0, $ft1, $ft2 +20a9230c| fcmp.sle.d $fcc0, $ft1, $ft2 +20a9130c| fcmp.sle.s $fcc0, $ft1, $ft2 +20a9210c| fcmp.slt.d $fcc0, $ft1, $ft2 +20a9110c| fcmp.slt.s $fcc0, $ft1, $ft2 +20a9280c| fcmp.sne.d $fcc0, $ft1, $ft2 +20a9180c| fcmp.sne.s $fcc0, $ft1, $ft2 +20a92a0c| fcmp.sor.d $fcc0, $ft1, $ft2 +20a91a0c| fcmp.sor.s $fcc0, $ft1, $ft2 +20a9260c| fcmp.sueq.d $fcc0, $ft1, $ft2 +20a9160c| fcmp.sueq.s $fcc0, $ft1, $ft2 +20a9270c| fcmp.sule.d $fcc0, $ft1, $ft2 +20a9170c| fcmp.sule.s $fcc0, $ft1, $ft2 +20a9250c| fcmp.sult.d $fcc0, $ft1, $ft2 +20a9150c| fcmp.sult.s $fcc0, $ft1, $ft2 +20a92c0c| fcmp.sune.d $fcc0, $ft1, $ft2 +20a91c0c| fcmp.sune.s $fcc0, $ft1, $ft2 +20a9240c| fcmp.sun.d $fcc0, $ft1, $ft2 +20a9140c| fcmp.sun.s $fcc0, $ft1, $ft2 +28291301| fcopysign.d $ft0, $ft1, $ft2 +28a91201| fcopysign.s $ft0, $ft1, $ft2 +28251901| fcvt.d.s $ft0, $ft1 +28191901| fcvt.s.d $ft0, $ft1 +28290701| fdiv.d $ft0, $ft1, $ft2 +28a90601| fdiv.s $ft0, $ft1, $ft2 +28291d01| ffint.d.l $ft0, $ft1 +28211d01| ffint.d.w $ft0, $ft1 +28191d01| ffint.s.l $ft0, $ft1 +28111d01| ffint.s.w $ft0, $ft1 +a841802b| fld.d $ft0, $t1, 16 +a841002b| fld.s $ft0, $t1, 16 +a8b97438| fldgt.d $ft0, $t1, $t2 +a8397438| fldgt.s $ft0, $t1, $t2 +a8b97538| fldle.d $ft0, $t1, $t2 +a8397538| fldle.s $ft0, $t1, $t2 +a8393438| fldx.d $ft0, $t1, $t2 +a8393038| fldx.s $ft0, $t1, $t2 +28291401| flogb.d $ft0, $ft1 +28251401| flogb.s $ft0, $ft1 +28a92508| fmadd.d $ft0, $ft1, $ft2, $ft3 +28a91508| fmadd.s $ft0, $ft1, $ft2, $ft3 +28290901| fmax.d $ft0, $ft1, $ft2 +28a90801| fmax.s $ft0, $ft1, $ft2 +28290d01| fmaxa.d $ft0, $ft1, $ft2 +28a90c01| fmaxa.s $ft0, $ft1, $ft2 +28290b01| fmin.d $ft0, $ft1, $ft2 +28a90a01| fmin.s $ft0, $ft1, $ft2 +28290f01| fmina.d $ft0, $ft1, $ft2 +28a90e01| fmina.s $ft0, $ft1, $ft2 +48991401| fmov.d $ft0, $ft2 +48951401| fmov.s $ft0, $ft2 +28a96508| fmsub.d $ft0, $ft1, $ft2, $ft3 +28a95508| fmsub.s $ft0, $ft1, $ft2, $ft3 +28290501| fmul.d $ft0, $ft1, $ft2 +28a90401| fmul.s $ft0, $ft1, $ft2 +28191401| fneg.d $ft0, $ft1 +28151401| fneg.s $ft0, $ft1 +28a9a508| fnmadd.d $ft0, $ft1, $ft2, $ft3 +28a99508| fnmadd.s $ft0, $ft1, $ft2, $ft3 +28a9e508| fnmsub.d $ft0, $ft1, $ft2, $ft3 +28a9d508| fnmsub.s $ft0, $ft1, $ft2, $ft3 +28591401| frecip.d $ft0, $ft1 +28551401| frecip.s $ft0, $ft1 +28491e01| frint.d $ft0, $ft1 +28451e01| frint.s $ft0, $ft1 +28691401| frsqrt.d $ft0, $ft1 +28651401| frsqrt.s $ft0, $ft1 +28291101| fscaleb.d $ft0, $ft1, $ft2 +28a91001| fscaleb.s $ft0, $ft1, $ft2 +28a9000d| fsel $ft0, $ft1, $ft2, $fcc1 +28491401| fsqrt.d $ft0, $ft1 +28451401| fsqrt.s $ft0, $ft1 +a841c02b| fst.d $ft0, $t1, 16 +a841402b| fst.s $ft0, $t1, 16 +a8b97638| fstgt.d $ft0, $t1, $t2 +a8397638| fstgt.s $ft0, $t1, $t2 +a8b97738| fstle.d $ft0, $t1, $t2 +a8397738| fstle.s $ft0, $t1, $t2 +a8393c38| fstx.d $ft0, $t1, $t2 +a8393838| fstx.s $ft0, $t1, $t2 +28290301| fsub.d $ft0, $ft1, $ft2 +28a90201| fsub.s $ft0, $ft1, $ft2 +28291b01| ftint.l.d $ft0, $ft1 +28251b01| ftint.l.s $ft0, $ft1 +28091b01| ftint.w.d $ft0, $ft1 +28051b01| ftint.w.s $ft0, $ft1 +28291a01| ftintrm.l.d $ft0, $ft1 +28251a01| ftintrm.l.s $ft0, $ft1 +28091a01| ftintrm.w.d $ft0, $ft1 +28051a01| ftintrm.w.s $ft0, $ft1 +28e91a01| ftintrne.l.d $ft0, $ft1 +28e51a01| ftintrne.l.s $ft0, $ft1 +28c91a01| ftintrne.w.d $ft0, $ft1 +28c51a01| ftintrne.w.s $ft0, $ft1 +28691a01| ftintrp.l.d $ft0, $ft1 +28651a01| ftintrp.l.s $ft0, $ft1 +28491a01| ftintrp.w.d $ft0, $ft1 +28451a01| ftintrp.w.s $ft0, $ft1 +28a91a01| ftintrz.l.d $ft0, $ft1 +28a51a01| ftintrz.l.s $ft0, $ft1 +28891a01| ftintrz.w.d $ft0, $ft1 +28851a01| ftintrz.w.s $ft0, $ft1 +00807238| ibar 0x0 +10804806| idle 0x10 +ac014806| iocsrrd.b $t0, $t1 +ac054806| iocsrrd.h $t0, $t1 +ac094806| iocsrrd.w $t0, $t1 +ac0d4806| iocsrrd.d $t0, $t1 +ac114806| iocsrwr.b $t0, $t1 +ac154806| iocsrwr.h $t0, $t1 +ac194806| iocsrwr.w $t0, $t1 +ac1d4806| iocsrwr.d $t0, $t1 +82b54906| invtlb 0x2, $t0, $t1 +ac11004c| jirl $t0, $t1, 16 +ac410028| ld.b $t0, $t1, 16 +ac41002a| ld.bu $t0, $t1, 16 +ac41c028| ld.d $t0, $t1, 16 +ac414028| ld.h $t0, $t1, 16 +ac41402a| ld.hu $t0, $t1, 16 +ac418028| ld.w $t0, $t1, 16 +ac41802a| ld.wu $t0, $t1, 16 +ac414006| lddir $t0, $t1, 0x10 +ac397838| ldgt.b $t0, $t1, $t2 +acb97938| ldgt.d $t0, $t1, $t2 +acb97838| ldgt.h $t0, $t1, $t2 +ac397938| ldgt.w $t0, $t1, $t2 +ac397a38| ldle.b $t0, $t1, $t2 +acb97b38| ldle.d $t0, $t1, $t2 +acb97a38| ldle.h $t0, $t1, $t2 +ac397b38| ldle.w $t0, $t1, $t2 +ac110026| ldptr.d $t0, $t1, 16 +ac01e024| ldptr.w $t0, $t1, -8192 +ac05f024| ldptr.w $t0, $t1, -4092 +acfd1f24| ldptr.w $t0, $t1, 8188 +acfdff24| ldptr.w $t0, $t1, -4 +ac050024| ldptr.w $t0, $t1, 4 +ac110024| ldptr.w $t0, $t1, 16 +80094406| ldpte $t0, 0x2 +ac390038| ldx.b $t0, $t1, $t2 +ac392038| ldx.bu $t0, $t1, $t2 +ac390c38| ldx.d $t0, $t1, $t2 +ac390438| ldx.h $t0, $t1, $t2 +ac392438| ldx.hu $t0, $t1, $t2 +ac390838| ldx.w $t0, $t1, $t2 +ac392838| ldx.wu $t0, $t1, $t2 +ac110022| ll.d $t0, $t1, 16 +ac110020| ll.w $t0, $t1, 16 +0c020014| lu12i.w $t0, 16 +0c000015| lu12i.w $t0, -524288 +ecffff15| lu12i.w $t0, -1 +ecffff14| lu12i.w $t0, 524287 +0c020016| lu32i.d $t0, 16 +ac410003| lu52i.d $t0, $t1, 16 +ac391300| maskeqz $t0, $t1, $t2 +acb91300| masknez $t0, $t1, $t2 +acb92200| mod.d $t0, $t1, $t2 +acb92300| mod.du $t0, $t1, $t2 +acb92000| mod.w $t0, $t1, $t2 +acb92100| mod.wu $t0, $t1, $t2 +28d41401| movcf2fr $ft0, $fcc1 +2cdc1401| movcf2gr $t0, $fcc1 +0cc81401| movfcsr2gr $t0, $fcsr0 +20d11401| movfr2cf $fcc0, $ft1 +2cb91401| movfr2gr.d $t0, $ft1 +2cb51401| movfr2gr.s $t0, $ft1 +2cbd1401| movfrh2gr.s $t0, $ft1 +a0d91401| movgr2cf $fcc0, $t1 +80c11401| movgr2fcsr $fcsr0, $t0 +a8a91401| movgr2fr.d $ft0, $t1 +a8a51401| movgr2fr.w $ft0, $t1 +a8ad1401| movgr2frh.w $ft0, $t1 +acb91d00| mul.d $t0, $t1, $t2 +ac391c00| mul.w $t0, $t1, $t2 +ac391e00| mulh.d $t0, $t1, $t2 +acb91e00| mulh.du $t0, $t1, $t2 +acb91c00| mulh.w $t0, $t1, $t2 +ac391d00| mulh.wu $t0, $t1, $t2 +ac391f00| mulw.d.w $t0, $t1, $t2 +acb91f00| mulw.d.wu $t0, $t1, $t2 +ac391400| nor $t0, $t1, $t2 +ac391500| or $t0, $t1, $t2 +ac418003| ori $t0, $t1, 0x10 +ac391600| orn $t0, $t1, $t2 +0c020018| pcaddi $t0, 16 +0c02001c| pcaddu12i $t0, 16 +0c02001e| pcaddu18i $t0, 16 +0c02001a| pcalau12i $t0, 16 +a041c02a| preld 0x0, $t1, 16 +a0392c38| preldx 0x0, $t1, $t2 +ac690000| rdtime.d $t0, $t1 +ac650000| rdtimeh.w $t0, $t1 +ac610000| rdtimel.w $t0, $t1 +ac310000| revb.2h $t0, $t1 +ac390000| revb.2w $t0, $t1 +ac350000| revb.4h $t0, $t1 +ac3d0000| revb.d $t0, $t1 +ac410000| revh.2w $t0, $t1 +ac450000| revh.d $t0, $t1 +acb91b00| rotr.d $t0, $t1, $t2 +ac391b00| rotr.w $t0, $t1, $t2 +ac414d00| rotri.d $t0, $t1, 0x10 +acc14c00| rotri.w $t0, $t1, 0x10 +ac110023| sc.d $t0, $t1, 16 +ac110021| sc.w $t0, $t1, 16 +acb91800| sll.d $t0, $t1, $t2 +ac391700| sll.w $t0, $t1, $t2 +ac414100| slli.d $t0, $t1, 0x10 +acc14000| slli.w $t0, $t1, 0x10 +ac391200| slt $t0, $t1, $t2 +ac410002| slti $t0, $t1, 16 +acb91200| sltu $t0, $t1, $t2 +ac414002| sltui $t0, $t1, 16 +acb91900| sra.d $t0, $t1, $t2 +ac391800| sra.w $t0, $t1, $t2 +ac414900| srai.d $t0, $t1, 0x10 +acc14800| srai.w $t0, $t1, 0x10 +ac391900| srl.d $t0, $t1, $t2 +acb91700| srl.w $t0, $t1, $t2 +ac414500| srli.d $t0, $t1, 0x10 +acc14400| srli.w $t0, $t1, 0x10 +ac410029| st.b $t0, $t1, 16 +ac41c029| st.d $t0, $t1, 16 +ac414029| st.h $t0, $t1, 16 +ac418029| st.w $t0, $t1, 16 +ac397c38| stgt.b $t0, $t1, $t2 +acb97d38| stgt.d $t0, $t1, $t2 +acb97c38| stgt.h $t0, $t1, $t2 +ac397d38| stgt.w $t0, $t1, $t2 +ac397e38| stle.b $t0, $t1, $t2 +acb97f38| stle.d $t0, $t1, $t2 +acb97e38| stle.h $t0, $t1, $t2 +ac397f38| stle.w $t0, $t1, $t2 +ac110027| stptr.d $t0, $t1, 16 +ac110025| stptr.w $t0, $t1, 16 +ac391038| stx.b $t0, $t1, $t2 +ac391c38| stx.d $t0, $t1, $t2 +ac391438| stx.h $t0, $t1, $t2 +ac391838| stx.w $t0, $t1, $t2 +acb91100| sub.d $t0, $t1, $t2 +ac391100| sub.w $t0, $t1, $t2 +00002b00| syscall 0x0 +00204806| tlbclr +00344806| tlbfill +00244806| tlbflush +002c4806| tlbrd +00284806| tlbsrch +00304806| tlbwr +acb91500| xor $t0, $t1, $t2 +ac41c003| xori $t0, $t1, 0x10 +cc355d38| amadd.b $t0, $t1, $t2 +cc355f38| amadd_db.b $t0, $t1, $t2 +ccb55f38| amadd_db.h $t0, $t1, $t2 +ccb55d38| amadd.h $t0, $t1, $t2 +cc355838| amcas.b $t0, $t1, $t2 +ccb55938| amcas.d $t0, $t1, $t2 +cc355a38| amcas_db.b $t0, $t1, $t2 +ccb55b38| amcas_db.d $t0, $t1, $t2 +ccb55a38| amcas_db.h $t0, $t1, $t2 +cc355b38| amcas_db.w $t0, $t1, $t2 +ccb55838| amcas.h $t0, $t1, $t2 +cc355938| amcas.w $t0, $t1, $t2 +cc355c38| amswap.b $t0, $t1, $t2 +cc355e38| amswap_db.b $t0, $t1, $t2 +ccb55e38| amswap_db.h $t0, $t1, $t2 +ccb55c38| amswap.h $t0, $t1, $t2 +28791401| frecipe.d $ft0, $ft1 +28751401| frecipe.s $ft0, $ft1 +28891401| frsqrte.d $ft0, $ft1 +28851401| frsqrte.s $ft0, $ft1 +ac895738| llacq.d $t0, $t1 +ac815738| llacq.w $t0, $t1 +ac8d5738| screl.d $t0, $t1 +ac855738| screl.w $t0, $t1 diff --git a/loong64/loong64asm/testdata/plan9cases.txt b/loong64/loong64asm/testdata/plan9cases.txt new file mode 100644 index 00000000..53f5d450 --- /dev/null +++ b/loong64/loong64asm/testdata/plan9cases.txt @@ -0,0 +1,365 @@ +a6101000| ADD R4, R5, R6 +a6901000| ADDV R4, R5, R6 +a5101000| ADD R4, R5 +a5901000| ADDV R4, R5 +85fcbf02| ADD $-1, R4, R5 +84fcbf02| ADD $-1, R4 +85fcff02| ADDV $-1, R4, R5 +84fcff02| ADDV $-1, R4 +ac391000| ADD R14, R13, R12 +acb91000| ADDV R14, R13, R12 +ac41c002| ADDV $16, R13, R12 +ac01e002| ADDV $-2048, R13, R12 +acfdff02| ADDV $-1, R13, R12 +ac05e002| ADDV $-2047, R13, R12 +acf9ff02| ADDV $-2, R13, R12 +ac418002| ADD $16, R13, R12 +a6101100| SUB R4, R5, R6 +a6901100| SUBV R4, R5, R6 +a5101100| SUB R4, R5 +a5901100| SUBV R4, R5 +05101100| NEGW R4, R5 +05901100| NEGV R4, R5 +84781200| SGT R30, R4 +85781200| SGT R30, R4, R5 +84f81200| SGTU R30, R4 +85f81200| SGTU R30, R4, R5 +a6901400| AND R4, R5, R6 +a5901400| AND R4, R5 +85044003| AND $1, R4, R5 +84044003| AND $1, R4 +a5101c00| MUL R4, R5 +a6101c00| MUL R4, R5, R6 +a5901d00| MULV R4, R5 +a6901d00| MULV R4, R5, R6 +a5101e00| MULHV R4, R5 +a6101e00| MULHV R4, R5, R6 +a5901e00| MULHVU R4, R5 +a6901e00| MULHVU R4, R5, R6 +28290501| MULD F10, F9, F8 +28a90401| MULF F10, F9, F8 +a5102000| DIV R4, R5 +a6102000| DIV R4, R5, R6 +a5102100| DIVU R4, R5 +a6102100| DIVU R4, R5, R6 +a5102200| DIVV R4, R5 +a6102200| DIVV R4, R5, R6 +a5102300| DIVVU R4, R5 +a6102300| DIVVU R4, R5, R6 +28290701| DIVD F10, F9, F8 +28a90601| DIVF F10, F9, F8 +a5902000| REM R4, R5 +a6902000| REM R4, R5, R6 +a5902100| REMU R4, R5 +a6902100| REMU R4, R5, R6 +a5902200| REMV R4, R5 +a6902200| REMV R4, R5, R6 +a5902300| REMVU R4, R5 +a6902300| REMVU R4, R5, R6 +04020014| LU12IW $16, R4 +24000014| LU12IW $1, R4 +85001700| MOVW R4, R5 +85001500| MOVV R4, R5 +85fc4303| MOVBU R4, R5 +1e020014| LU12IW $16, R30 +85781000| ADD R30, R4, R5 +de038003| OR $0, R30 +ac391400| NOR R14, R13, R12 +acb91500| XOR R14, R13, R12 +ac41c003| XOR $16, R13, R12 +85f81000| ADDV R30, R4, R5 +8500cf00| MOVHU R4, R5 +a5101700| SLL R4, R5 +a6101700| SLL R4, R5, R6 +a5901700| SRL R4, R5 +a6901700| SRL R4, R5, R6 +a5101800| SRA R4, R5 +a6101800| SRA R4, R5, R6 +a5101b00| ROTR R4, R5 +a6101b00| ROTR R4, R5, R6 +a5901800| SLLV R4, R5 +a6901800| SLLV R4, R5, R6 +a5901b00| ROTRV R4, R5 +a6901b00| ROTRV R4, R5, R6 +85904000| SLL $4, R4, R5 +84904000| SLL $4, R4 +85904400| SRL $4, R4, R5 +84904400| SRL $4, R4 +85904800| SRA $4, R4, R5 +84904800| SRA $4, R4 +85904c00| ROTR $4, R4, R5 +84904c00| ROTR $4, R4 +85104100| SLLV $4, R4, R5 +84104100| SLLV $4, R4 +85104d00| ROTRV $4, R4, R5 +84104d00| ROTRV $4, R4 +a6101300| MASKEQZ R4, R5, R6 +a6901300| MASKNEZ R4, R5, R6 +00050048| BFPT 1(PC) +00040048| BFPF 1(PC) +80040058| BEQ R4, 1(PC) +8d110058| BEQ R12, R13, 4(PC) +a0110040| BEQ R13, 4(PC) +9ff1ff43| BEQ R12, -4(PC) +8504005c| BNE R4, R5, 1(PC) +8004005c| BNE R4, 1(PC) +8004005c| BNE R4, 1(PC) +a0140058| BEQ R5, 5(PC) +a0110058| BEQ R13, 4(PC) +00ebff5f| BNE R24, -6(PC) +801d005c| BNE R12, 7(PC) +85040060| BLT R4, R5, 1(PC) +80fcff63| BLTZ R4, -1(PC) +05040060| BGTZ R5, 1(PC) +80040060| BLTZ R4, 1(PC) +47d5ff6b| BLTU R10, R7, -11(PC) +802c0068| BLTU R4, 11(PC) +85040064| BGE R4, R5, 1(PC) +80fcff67| BGEZ R4, -1(PC) +47d5ff6f| BGEU R10, R7, -11(PC) +802c006c| BGEU R4, 11(PC) +04d8ff67| BLEZ R4, -10(PC) +00040058| JMP 1(PC) +8000004c| JMP (R4) +00340050| JMP 13(PC) +00100050| JMP 4(PC) +00100054| CALL 4(PC) +8100004c| CALL (R4) +00140054| CALL 5(PC) +a4048029| MOVW R4, 1(R5) +a404c029| MOVV R4, 1(R5) +a4040029| MOVB R4, 1(R5) +ac410029| MOVB R12, 16(R13) +a4040021| SC R4, 1(R5) +a4040023| SCV R4, 1(R5) +a4040028| MOVB 1(R5), R4 +a404002a| MOVBU 1(R5), R4 +a4044028| MOVH 1(R5), R4 +a404402a| MOVHU 1(R5), R4 +a4048028| MOVW 1(R5), R4 +a404802a| MOVWU 1(R5), R4 +a404c028| MOVV 1(R5), R4 +a4040020| LL 1(R5), R4 +a4040022| LLV 1(R5), R4 +a4fc3f20| LL -1(R5), R4 +a4fc3f22| LLV -1(R5), R4 +00002a00| BREAK +00007238| DBAR +00002b00| SYSCALL +00004003| NOOP +a5900001| ADDF F4, F5 +a6900001| ADDF F4, F5, F6 +28a50001| ADDF F9, F9, F8 +28250101| ADDD F9, F9, F8 +85041401| ABSF F4, F5 +85081401| ABSD F4, F5 +85141401| NEGF F4, F5 +85181401| NEGD F4, F5 +85441401| SQRTF F4, F5 +85481401| SQRTD F4, F5 +a090110c| CMPGTF F4, F5, FCC0 +a090210c| CMPGTD F4, F5, FCC0 +a090130c| CMPGEF F4, F5, FCC0 +a090230c| CMPGED F4, F5, FCC0 +a010220c| CMPEQD F4, F5, FCC0 +2029120c| CMPEQF F10, F9, FCC0 +85241901| MOVFD F4, F5 +85181901| MOVDF F4, F5 +85101d01| FFINTFW F4, F5 +85201d01| FFINTDW F4, F5 +85181d01| FFINTFV F4, F5 +85281d01| FFINTDV F4, F5 +85041b01| FTINTWF F4, F5 +85241b01| FTINTVF F4, F5 +85081b01| FTINTWD F4, F5 +85281b01| FTINTVD F4, F5 +85881a01| FTINTRZWD F4, F5 +85841a01| FTINTRZWF F4, F5 +85a81a01| FTINTRZVD F4, F5 +85a41a01| FTINTRZVF F4, F5 +85941401| MOVF F4, F5 +85981401| MOVD F4, F5 +85a41401| MOVW R4, F5 +85b41401| MOVW F4, R5 +85a81401| MOVV R4, F5 +85b81401| MOVV F4, R5 +04dc1401| MOVV FCC0, R4 +80d81401| MOVV R4, FCC0 +6440002b| MOVF 16(R3), F4 +6440802b| MOVD 16(R3), F4 +a404002b| MOVF 1(R5), F4 +a404802b| MOVD 1(R5), F4 +6460402b| MOVF F4, 24(R3) +6460c02b| MOVD F4, 24(R3) +a404402b| MOVF F4, 1(R5) +a404c02b| MOVD F4, 1(R5) +5e020014| LU12IW $18, R30 +64a95214| LU12IW $169291, R4 +84d08703| OR $500, R4 +84428614| LU12IW $274964, R4 +84848c03| OR $801, R4 +24640816| LU32ID $17185, R4 +84000003| LU52ID $0, R4 +24a93615| LU12IW $-412343, R4 +84849003| OR $1057, R4 +8464c817| LU32ID $-113884, R4 +848c0203| LU52ID $163, R4 +80600000| RDTIMELW R4, R0 +80640000| RDTIMEHW R4, R0 +85680000| RDTIMED R4, R5 +1e00001a| PCALAU12I $0, R30 +0c02001c| PCADDU12I $16, R12 +c4038029| MOVW R4, 0(R30) +ac395c38| AMSWAPB R14, (R13), R12 +acb95c38| AMSWAPH R14, (R13), R12 +ac396038| AMSWAPW R14, (R13), R12 +acb96038| AMSWAPV R14, (R13), R12 +ac395838| AMCASB R14, (R13), R12 +acb95838| AMCASH R14, (R13), R12 +ac395938| AMCASW R14, (R13), R12 +acb95938| AMCASV R14, (R13), R12 +ac396138| AMADDW R14, (R13), R12 +acb96138| AMADDV R14, (R13), R12 +ac396238| AMANDW R14, (R13), R12 +acb96238| AMANDV R14, (R13), R12 +ac396338| AMORW R14, (R13), R12 +acb96338| AMORV R14, (R13), R12 +ac396438| AMXORW R14, (R13), R12 +acb96438| AMXORV R14, (R13), R12 +ac396538| AMMAXW R14, (R13), R12 +acb96538| AMMAXV R14, (R13), R12 +ac396638| AMMINW R14, (R13), R12 +acb96638| AMMINV R14, (R13), R12 +ac396738| AMMAXWU R14, (R13), R12 +acb96738| AMMAXVU R14, (R13), R12 +ac396838| AMMINWU R14, (R13), R12 +acb96838| AMMINVU R14, (R13), R12 +ac395e38| AMSWAPDBB R14, (R13), R12 +acb95e38| AMSWAPDBH R14, (R13), R12 +ac396938| AMSWAPDBW R14, (R13), R12 +acb96938| AMSWAPDBV R14, (R13), R12 +ac395a38| AMCASDBB R14, (R13), R12 +acb95a38| AMCASDBH R14, (R13), R12 +ac395b38| AMCASDBW R14, (R13), R12 +acb95b38| AMCASDBV R14, (R13), R12 +ac396a38| AMADDDBW R14, (R13), R12 +acb96a38| AMADDDBV R14, (R13), R12 +ac396b38| AMANDDBW R14, (R13), R12 +acb96b38| AMANDDBV R14, (R13), R12 +ac396c38| AMORDBW R14, (R13), R12 +acb96c38| AMORDBV R14, (R13), R12 +ac396d38| AMXORDBW R14, (R13), R12 +acb96d38| AMXORDBV R14, (R13), R12 +ac396e38| AMMAXDBW R14, (R13), R12 +acb96e38| AMMAXDBV R14, (R13), R12 +ac396f38| AMMINDBW R14, (R13), R12 +acb96f38| AMMINDBV R14, (R13), R12 +ac397038| AMMAXDBWU R14, (R13), R12 +acb97038| AMMAXDBVU R14, (R13), R12 +ac397138| AMMINDBWU R14, (R13), R12 +acb97138| AMMINDBVU R14, (R13), R12 +856c0000| CPUCFG R4, R5 +85481e01| FRINTD F4, F5 +83c01401| MOVV R4, FCSR3 +64c81401| MOVV FCSR3, R4 +80d01401| MOVV F4, FCC0 +04d41401| MOVV FCC0, F4 +a6901201| FCOPYSGF F4, F5, F6 +a6101301| FCOPYSGD F4, F5, F6 +85500000| BITREVW R4, R5 +85540000| BITREVV R4, R5 +a6102400| CRCWBW R4, R5, R6 +a6902400| CRCWHW R4, R5, R6 +a6102500| CRCWWW R4, R5, R6 +a6902500| CRCWVW R4, R5, R6 +a6102600| CRCCWBW R4, R5, R6 +a6902600| CRCCWHW R4, R5, R6 +a6102700| CRCCWWW R4, R5, R6 +a6902700| CRCCWVW R4, R5, R6 +cc350038| MOVB (R14)(R13), R12 +cc352038| MOVBU (R14)(R13), R12 +cc350438| MOVH (R14)(R13), R12 +cc352438| MOVHU (R14)(R13), R12 +cc350838| MOVW (R14)(R13), R12 +cc352838| MOVWU (R14)(R13), R12 +cc350c38| MOVV (R14)(R13), R12 +cc351038| MOVB R12, (R14)(R13) +cc351438| MOVH R12, (R14)(R13) +cc351838| MOVW R12, (R14)(R13) +cc351c38| MOVV R12, (R14)(R13) +c2353038| MOVF (R14)(R13), F2 +c2353438| MOVD (R14)(R13), F2 +c2353838| MOVF F2, (R14)(R13) +c2353c38| MOVD F2, (R14)(R13) +a010120c| CMPEQF F4, F5, FCC0 +a190110c| CMPGTF F4, F5, FCC1 +a290210c| CMPGTD F4, F5, FCC2 +a390130c| CMPGEF F4, F5, FCC3 +a490230c| CMPGED F4, F5, FCC4 +a510220c| CMPEQD F4, F5, FCC5 +85100000| CLOW R4, R5 +85200000| CLOV R4, R5 +85140000| CLZW R4, R5 +85240000| CLZV R4, R5 +85180000| CTOW R4, R5 +85280000| CTOV R4, R5 +851c0000| CTZW R4, R5 +852c0000| CTZV R4, R5 +853c0000| REVBV R4, R5 +85380000| REVB2W R4, R5 +85340000| REVB4H R4, R5 +85300000| REVB2H R4, R5 +a6900a01| FMINF F4, F5, F6 +a5900a01| FMINF F4, F5 +a6100b01| FMIND F4, F5, F6 +a5100b01| FMIND F4, F5 +a6900801| FMAXF F4, F5, F6 +a5900801| FMAXF F4, F5 +a6100901| FMAXD F4, F5, F6 +a5100901| FMAXD F4, F5 +85341401| FCLASSF F4, F5 +85381401| FCLASSD F4, F5 +02041a01| FTINTRMWF F0, F2 +02081a01| FTINTRMWD F0, F2 +02241a01| FTINTRMVF F0, F2 +02281a01| FTINTRMVD F0, F2 +02441a01| FTINTRPWF F0, F2 +02481a01| FTINTRPWD F0, F2 +02641a01| FTINTRPVF F0, F2 +02681a01| FTINTRPVD F0, F2 +02841a01| FTINTRZWF F0, F2 +02881a01| FTINTRZWD F0, F2 +02a41a01| FTINTRZVF F0, F2 +02a81a01| FTINTRZVD F0, F2 +02c41a01| FTINTRNEWF F0, F2 +02c81a01| FTINTRNEWD F0, F2 +02e41a01| FTINTRNEVF F0, F2 +02e81a01| FTINTRNEVD F0, F2 +01101d01| FFINTFW F0, F1 +01181d01| FFINTFV F0, F1 +01201d01| FFINTDW F0, F1 +01281d01| FFINTDV F0, F1 +01041b01| FTINTWF F0, F1 +01081b01| FTINTWD F0, F1 +01241b01| FTINTVF F0, F1 +01281b01| FTINTVD F0, F1 +c7901208| FMADDF F5, F4, F6, F7 +c7902208| FMADDD F5, F4, F6, F7 +c7905208| FMSUBF F5, F4, F6, F7 +c7906208| FMSUBD F5, F4, F6, F7 +c7909208| FNMADDF F5, F4, F6, F7 +c790a208| FNMADDD F5, F4, F6, F7 +c790d208| FNMSUBF F5, F4, F6, F7 +c790e208| FNMSUBD F5, F4, F6, F7 +85806000| BSTRPICKW $0, R4, $0, R5 +85807f00| BSTRPICKW $31, R4, $0, R5 +85986f00| BSTRPICKW $15, R4, $6, R5 +8500c000| BSTRPICKV $0, R4, $0, R5 +8500ff00| BSTRPICKV $63, R4, $0, R5 +8518cf00| BSTRPICKV $15, R4, $6, R5 +85006000| BSTRINSW $0, R4, $0, R5 +85007f00| BSTRINSW $31, R4, $0, R5 +85186f00| BSTRINSW $15, R4, $6, R5 +85008000| BSTRINSV $0, R4, $0, R5 +8500bf00| BSTRINSV $63, R4, $0, R5 +85188f00| BSTRINSV $15, R4, $6, R5 diff --git a/loong64/loong64spec/spec.go b/loong64/loong64spec/spec.go new file mode 100644 index 00000000..4c32961c --- /dev/null +++ b/loong64/loong64spec/spec.go @@ -0,0 +1,528 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// loong64spec reads the "LoongArch-Vol1-EN.pdf" [1] to collect instruction +// encoding details and output to tables.go. +// +// usage: go run spec.go LoongArch-Vol1-EN.pdf +// +// [1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf + +package main + +import ( + "bytes" + "fmt" + "log" + "math" + "os" + "regexp" + "sort" + "strconv" + "strings" + + "rsc.io/pdf" +) + +func mergeMap(m1 map[string]string, m2 map[string]string) { + for k := range m2 { + m1[k] = m2[k] + } +} + +func main() { + log.SetFlags(0) + log.SetPrefix("loong64spec: ") + + if len(os.Args) != 2 { + fmt.Fprintf(os.Stderr, "usage: loong64spec LoongArch-Vol1-EN.pdf\n") + os.Exit(2) + } + f, err := pdf.Open(os.Args[1]) + if err != nil { + log.Fatal(err) + } + var prologue bytes.Buffer + prologue.Write([]byte("// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n")) + + var op_f bytes.Buffer + op_f.Write([]byte("const (\n\t_ Op = iota\n")) + + var opstr_f bytes.Buffer + opstr_f.Write([]byte("var opstr = [...]string{\n")) + + var instFormats_f bytes.Buffer + instFormats_f.Write([]byte("var instFormats = [...]instFormat{\n")) + + // Scan document looking for instructions. + n := f.NumPage() + var ops []string + opstrs := map[string]string{} + instFormatComments := map[string]string{} + instFormats := map[string]string{} + var fp int + for pageNum := 1; pageNum <= n; pageNum++ { + p := f.Page(pageNum) + if fp == 0 { + if !isFirstPage(p) { + continue + } + fp = pageNum + } + cPageOps, cPageOpstrs, cPageInstFormatComments, cPageInstFormats := parsePage(pageNum, p, fp == pageNum) + ops = append(ops, cPageOps...) + mergeMap(opstrs, cPageOpstrs) + mergeMap(instFormatComments, cPageInstFormatComments) + mergeMap(instFormats, cPageInstFormats) + } + + sort.Strings(ops) + + for _, op := range ops { + // 1. write op + op_f.Write([]byte(fmt.Sprintf("\t%s\n", op))) + // 2. write opstr + opstr_f.Write([]byte(fmt.Sprintf("\t%s\n", opstrs[op]))) + // 3. write instFormat + instFormats_f.Write([]byte(fmt.Sprintf("\t%s\n\t%s\n", instFormatComments[op], instFormats[op]))) + } + + op_f.Write([]byte(")\n\n")) + opstr_f.Write([]byte("}\n\n")) + instFormats_f.Write([]byte("}\n")) + + fileTables, err := os.Create("tables.go") + defer fileTables.Close() + + fileTables.Write(prologue.Bytes()) + fileTables.Write(op_f.Bytes()) + fileTables.Write(opstr_f.Bytes()) + fileTables.Write(instFormats_f.Bytes()) + + fileTables.Close() +} + +func isFirstPage(page pdf.Page) bool { + content := page.Content() + appendixb := "AppendixB" + ct := "" + for _, t := range content.Text { + ct += t.S + if ct == "AppendixB" { + return true + } + if strings.HasPrefix(appendixb, ct) { + continue + } else { + return false + } + } + return false +} + +func getArg(name string) (length int, argName string) { + switch { + case strings.Contains("arg_fd", name): + return 5, "arg_fd" + case strings.Contains("arg_fj", name): + return 5, "arg_fj" + case strings.Contains("arg_fk", name): + return 5, "arg_fk" + case strings.Contains("arg_fa", name): + return 5, "arg_fa" + case strings.Contains("arg_rd", name): + return 5, "arg_rd" + case strings.Contains("arg_rj", name) || name == "rj!=0,1": + return 5, "arg_rj" + case strings.Contains("arg_rk", name): + return 5, "arg_rk" + case name == "csr": + return 14, "arg_csr_23_10" + case strings.Contains("arg_cd", name): + return 5, "arg_cd" + case strings.Contains("arg_cj", name): + return 5, "arg_cj" + case strings.Contains("arg_ca", name): + return 5, "arg_ca" + case strings.Contains(name, "sa"): + length, _ := strconv.Atoi(strings.Split(name, "sa")[1]) + if length == 2 { + argName = "arg_sa2_16_15" + } else { + argName = "arg_sa3_17_15" + } + return length, argName + case strings.Contains("arg_seq_17_10", name): + return 8, "arg_seq_17_10" + case strings.Contains("arg_op_4_0", name): + return 5, "arg_op_4_0" + case strings.Contains(name, "ui"): + length, _ := strconv.Atoi(strings.Split(name, "ui")[1]) + if length == 5 { + argName = "arg_ui5_14_10" + } else if length == 6 { + argName = "arg_ui6_15_10" + } else { + argName = "arg_ui12_21_10" + } + return length, argName + case strings.Contains("arg_lsbw", name): + return 5, "arg_lsbw" + case strings.Contains("arg_msbw", name): + return 5, "arg_msbw" + case strings.Contains("arg_lsbd", name): + return 6, "arg_lsbd" + case strings.Contains("arg_msbd", name): + return 6, "arg_msbd" + case strings.Contains(name, "si"): + length, _ := strconv.Atoi(strings.Split(name, "si")[1]) + if length == 12 { + argName = "arg_si12_21_10" + } else if length == 14 { + argName = "arg_si14_23_10" + } else if length == 16 { + argName = "arg_si16_25_10" + } else { + argName = "arg_si20_24_5" + } + return length, argName + case strings.Contains(name, "offs"): + splitName := strings.Split(name, ":") + left, _ := strconv.Atoi(strings.Split(splitName[0], "[")[1]) + right, _ := strconv.Atoi(strings.Split(splitName[1], "]")[0]) + return left - right + 1, "offs" + default: + return 0, "" + } +} + +func binstrToHex(str string) string { + rst := 0 + hex := "0x" + charArray := []byte(str) + for i := 0; i < 32; { + rst = 1*(int(charArray[i+3])-48) + 2*(int(charArray[i+2])-48) + 4*(int(charArray[i+1])-48) + 8*(int(charArray[i])-48) + switch rst { + case 10: + hex = hex + "a" + case 11: + hex = hex + "b" + case 12: + hex = hex + "c" + case 13: + hex = hex + "d" + case 14: + hex = hex + "e" + case 15: + hex = hex + "f" + default: + hex += strconv.Itoa(rst) + } + + i = i + 4 + } + return hex +} + +/* +Here we deal with the instruction FCMP.cond.S/D, which has the following format: + + | 31 - 20 | 19 - 15 | 14 - 10 | 9 - 5 | 4 | 3 | 2 - 0 | + |---------|---------|---------|-------|---|---|-------| + | op | cond | fk | fj | 0 | 0 | cd | + +The `cond` field has these possible values: + + "CAF": "00", + "CUN": "08", + "CEQ": "04", + "CUEQ": "0c", + "CLT": "02", + "CULT": "0a", + "CLE": "06", + "CULE": "0e", + "CNE": "10", + "COR": "14", + "CUNE": "18", + "SAF": "01", + "SUN": "09", + "SEQ": "05", + "SUEQ": "0d", + "SLT": "03", + "SULT": "0b", + "SLE": "07", + "SULE": "0f", + "SNE": "11", + "SOR": "15", + "SUNE": "19", + +These values are the hexadecimal numbers of bits 19 to 15, the same as +described in the instruction set manual. + +The following code defines a map, the values in it represent the hexadecimal +encoding of the cond field in the entire instruction. In this case, the upper +4 bits and the lowest 1 bit are encoded separately, so the encoding is +different from the encoding described above. +*/ +func dealWithFcmp(ds string) (fcmpConditions map[string]map[string]string) { + conds := map[string]string{ + "CAF": "00", + "CUN": "40", + "CEQ": "20", + "CUEQ": "60", + "CLT": "10", + "CULT": "50", + "CLE": "30", + "CULE": "70", + "CNE": "80", + "COR": "a0", + "CUNE": "c0", + "SAF": "08", + "SUN": "48", + "SEQ": "28", + "SUEQ": "68", + "SLT": "18", + "SULT": "58", + "SLE": "38", + "SULE": "78", + "SNE": "88", + "SOR": "a8", + "SUNE": "c8", + } + fcmpConditions = make(map[string]map[string]string) + for k, v := range conds { + op := fmt.Sprintf("FCMP_%s_%s", k, ds) + opstr := fmt.Sprintf("FCMP_%s_%s:\t\"FCMP.%s.%s\",", k, ds, k, ds) + instFormatComment := fmt.Sprintf("// FCMP.%s.%s cd, fj, fk", k, ds) + var instFormat string + if ds == "D" { + instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c2%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds) + } else { + instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c1%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds) + } + + fcmpConditions[op] = make(map[string]string) + fcmpConditions[op]["op"] = op + fcmpConditions[op]["opstr"] = opstr + fcmpConditions[op]["instFormatComment"] = instFormatComment + fcmpConditions[op]["instFormat"] = instFormat + } + return +} + +func findWords(chars []pdf.Text) (words []pdf.Text) { + for i := 0; i < len(chars); { + xRange := []float64{chars[i].X, chars[i].X} + j := i + 1 + + // Find all chars on one line. + for j < len(chars) && chars[j].Y == chars[i].Y { + xRange[1] = chars[j].X + j++ + } + + // we need to note that the word may change line(Y) but belong to one cell. So, after loop over all continued + // chars whose Y are same, check if the next char's X belong to the range of xRange, if true, means it should + // be contact to current word, because the next word's X should bigger than current one. + for j < len(chars) && chars[j].X >= xRange[0] && chars[j].X <= xRange[1] { + j++ + } + + var end float64 + // Split line into words (really, phrases). + for k := i; k < j; { + ck := &chars[k] + s := ck.S + end = ck.X + ck.W + charSpace := ck.FontSize / 6 + wordSpace := ck.FontSize * 2 / 3 + l := k + 1 + for l < j { + // Grow word. + cl := &chars[l] + + if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace { + s += cl.S + end = cl.X + cl.W + l++ + continue + } + // Add space to phrase before next word. + if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace { + s += " " + cl.S + end = cl.X + cl.W + l++ + continue + } + break + } + f := ck.Font + words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s}) + k = l + } + i = j + } + + return words +} + +func parsePage(num int, p pdf.Page, isFP bool) (ops []string, opstrs map[string]string, instFormatComments map[string]string, instFormats map[string]string) { + opstrs = make(map[string]string) + instFormatComments = make(map[string]string) + instFormats = make(map[string]string) + + content := p.Content() + + var text []pdf.Text + for _, t := range content.Text { + text = append(text, t) + } + + // table name(70), table header(64), page num(3) + if isFP { + text = text[134 : len(text)-3] + } else { + text = text[64 : len(text)-3] + } + + text = findWords(text) + + for i := 0; i < len(text); { + var fcmpConditions map[string]map[string]string + if strings.HasPrefix(text[i].S, "FCMP") { + fcmpConditions = dealWithFcmp(strings.Split(text[i].S, ".")[2]) + + for fc, inst := range fcmpConditions { + ops = append(ops, inst["op"]) + opstrs[fc] = inst["opstr"] + instFormatComments[fc] = inst["instFormatComment"] + instFormats[fc] = inst["instFormat"] + } + t := i + 1 + for ; text[t].Y == text[i].Y; t++ { + continue + } + i = t + continue + } + + op := strings.Replace(text[i].S, ".", "_", -1) + opstr := fmt.Sprintf("%s:\t\"%s\",", op, text[i].S) + instFormatComment := "" + binValue := "" + binMask := "" + instArgs := "" + offs := false + var offArgs []string + + j := i + 1 + for ; j < len(text) && text[j].Y == text[i].Y; j++ { + + // Some instruction has no arguments, so the next word(text[j].S) is not the arguments string but 0/1 bit, it shouldn't be skipped. + if res, _ := regexp.MatchString("^\\d+$", text[j].S); j == i+1 && res == false { + instFormatComment = fmt.Sprintf("// %s %s", text[i].S, strings.Replace(text[j].S, ",", ", ", -1)) + continue + } + if text[j].S == "0" || text[j].S == "1" { + binValue += text[j].S + binMask += "1" + } else { + argLen, argName := getArg(text[j].S) + + // Get argument's length failed, compute it by other arguments. + if argLen == 0 { + left := 31 - len(binValue) + right := 0 + l := j + 1 + if l < len(text) && text[l].Y == text[j].Y { + for ; text[l].Y == text[j].Y; l++ { + if text[l].S == "0" || text[l].S == "1" { + right += 1 + } else { + tArgLen, _ := getArg(text[l].S) + if tArgLen == 0 { + fmt.Fprintf(os.Stderr, "there are more than two args whose length is unknown.\n") + } + right += tArgLen + } + } + } + argLen = left - right + 1 + argName = "arg_" + text[j].S + "_" + strconv.FormatInt(int64(left), 10) + "_" + strconv.FormatInt(int64(right), 10) + } + + for k := 0; k < argLen; k++ { + binValue += "0" + binMask += "0" + } + + if argName != "offs" { + if instArgs != "" { + instArgs = ", " + instArgs + } + instArgs = argName + instArgs + } else { + offs = true + offArgs = append(offArgs, text[j].S) + } + } + } + + // The real offset is a combination of two offsets in the binary code of the instruction, for example: BEQZ + if offs && offArgs != nil { + var left int + var right int + if len(offArgs) == 1 { + left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[0], "[")[1]) + right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0]) + } else if len(offArgs) == 2 { + left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[1], ":")[0], "[")[1]) + right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0]) + } + + if instArgs == "" { + instArgs = fmt.Sprintf("arg_offset_%d_%d", left, right) + } else { + instArgs += fmt.Sprintf(", arg_offset_%d_%d", left, right) + } + } + + ops = append(ops, op) + opstrs[op] = opstr + if instFormatComment == "" { + instFormatComment = "// " + text[i].S + } else if strings.HasPrefix(op, "AM") { + instFormatComment = fmt.Sprintf("// %s rd, rk, rj", text[i].S) + } + instFormatComments[op] = instFormatComment + // The parameter order of some instructions is inconsistent in encoding and syntax, such as BSTRINS.* + if instArgs != "" { + args := strings.Split(instFormatComment, " ")[2:] + tInstArgs := strings.Split(instArgs, ", ") + newOrderedInstArgs := []string{} + for _, a := range args { + a = strings.Split(a, ",")[0] + for _, aa := range tInstArgs { + if strings.Contains(aa, a) { + newOrderedInstArgs = append(newOrderedInstArgs, aa) + break + } else if a == "rd" && aa == "arg_fd" { + newOrderedInstArgs = append(newOrderedInstArgs, "arg_rd") + break + } + } + } + instArgs = strings.Join(newOrderedInstArgs, ", ") + } + if strings.HasPrefix(op, "AM") { + instArgs = "arg_rd, arg_rk, arg_rj" + } + instFormat := fmt.Sprintf("{mask: %s, value: %s, op: %s, args: instArgs{%s}},", binstrToHex(binMask), binstrToHex(binValue), op, instArgs) + instFormats[op] = instFormat + + i = j // next instruction + } + + return +} From b3635f5142e10850f935740529702e766bb68fd6 Mon Sep 17 00:00:00 2001 From: limeidan Date: Thu, 29 Aug 2024 10:33:42 +0800 Subject: [PATCH 028/200] loong64: fix the expression of code generate line Change-Id: Id4615a28320c2acdc41fc1fc21a19943fec3b23f Reviewed-on: https://go-review.googlesource.com/c/arch/+/609475 Auto-Submit: Dmitri Shuralyov Reviewed-by: Dmitri Shuralyov Reviewed-by: Dmitri Shuralyov LUCI-TryBot-Result: Go LUCI Reviewed-by: abner chenc Reviewed-by: sophie zhao Reviewed-by: Cherry Mui --- loong64/loong64asm/tables.go | 2 +- loong64/loong64spec/spec.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/loong64/loong64asm/tables.go b/loong64/loong64asm/tables.go index c85d47c2..ad34195b 100644 --- a/loong64/loong64asm/tables.go +++ b/loong64/loong64asm/tables.go @@ -1,4 +1,4 @@ -// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT. +// Code generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT. // Copyright 2024 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style diff --git a/loong64/loong64spec/spec.go b/loong64/loong64spec/spec.go index 4c32961c..3e69a24e 100644 --- a/loong64/loong64spec/spec.go +++ b/loong64/loong64spec/spec.go @@ -44,7 +44,7 @@ func main() { log.Fatal(err) } var prologue bytes.Buffer - prologue.Write([]byte("// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n")) + prologue.Write([]byte("// Code generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n")) var op_f bytes.Buffer op_f.Write([]byte("const (\n\t_ Op = iota\n")) From 8644b45fb7514c947c3fdacee68c67801accee22 Mon Sep 17 00:00:00 2001 From: limeidan Date: Thu, 29 Aug 2024 10:48:03 +0800 Subject: [PATCH 029/200] loong64: add the mapping of platform instruction JIRL to plan9 instruction RET Change-Id: Ifec777b10bc9a5a8e5e9b4fd6bd2077205ad4151 Reviewed-on: https://go-review.googlesource.com/c/arch/+/609495 Reviewed-by: sophie zhao Reviewed-by: abner chenc LUCI-TryBot-Result: Go LUCI Reviewed-by: Qiqi Huang Reviewed-by: Michael Pratt Reviewed-by: Dmitri Shuralyov --- loong64/loong64asm/inst.go | 4 +++- loong64/loong64asm/plan9x.go | 7 ++++++- loong64/loong64asm/testdata/gnucases.txt | 2 ++ loong64/loong64asm/testdata/plan9cases.txt | 2 ++ 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/loong64/loong64asm/inst.go b/loong64/loong64asm/inst.go index 1ac5c797..362d73ba 100644 --- a/loong64/loong64asm/inst.go +++ b/loong64/loong64asm/inst.go @@ -40,7 +40,9 @@ func (i Inst) String() string { } case JIRL: - if i.Args[0].(Reg) == R0 && i.Args[2].(OffsetSimm).Imm == 0 { + if i.Args[0].(Reg) == R0 && i.Args[1].(Reg) == R1 && i.Args[2].(OffsetSimm).Imm == 0 { + return "ret" + } else if i.Args[0].(Reg) == R0 && i.Args[2].(OffsetSimm).Imm == 0 { return "jr " + args[1] } diff --git a/loong64/loong64asm/plan9x.go b/loong64/loong64asm/plan9x.go index 5db32903..c18e217c 100644 --- a/loong64/loong64asm/plan9x.go +++ b/loong64/loong64asm/plan9x.go @@ -96,8 +96,13 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin rd := inst.Args[0].(Reg) rj := inst.Args[1].(Reg) regno := uint16(rj) & 31 - if rd == R0 { + off := inst.Args[2].(OffsetSimm).Imm + if rd == R0 && rj == R1 && off == 0 { + return fmt.Sprintf("RET") + } else if rd == R0 && off == 0 { return fmt.Sprintf("JMP (R%d)", regno) + } else if rd == R0 { + return fmt.Sprintf("JMP %d(R%d)", off, regno) } return fmt.Sprintf("CALL (R%d)", regno) diff --git a/loong64/loong64asm/testdata/gnucases.txt b/loong64/loong64asm/testdata/gnucases.txt index 2cfd5ea1..90ad9b7e 100644 --- a/loong64/loong64asm/testdata/gnucases.txt +++ b/loong64/loong64asm/testdata/gnucases.txt @@ -253,6 +253,8 @@ ac154806| iocsrwr.h $t0, $t1 ac194806| iocsrwr.w $t0, $t1 ac1d4806| iocsrwr.d $t0, $t1 82b54906| invtlb 0x2, $t0, $t1 +2000004c| ret +2008004c| jirl $zero, $ra, 8 ac11004c| jirl $t0, $t1, 16 ac410028| ld.b $t0, $t1, 16 ac41002a| ld.bu $t0, $t1, 16 diff --git a/loong64/loong64asm/testdata/plan9cases.txt b/loong64/loong64asm/testdata/plan9cases.txt index 53f5d450..c6a5bd57 100644 --- a/loong64/loong64asm/testdata/plan9cases.txt +++ b/loong64/loong64asm/testdata/plan9cases.txt @@ -119,6 +119,8 @@ a0110058| BEQ R13, 4(PC) 47d5ff6f| BGEU R10, R7, -11(PC) 802c006c| BGEU R4, 11(PC) 04d8ff67| BLEZ R4, -10(PC) +2000004c| RET +2008004c| JMP 8(R1) 00040058| JMP 1(PC) 8000004c| JMP (R4) 00340050| JMP 13(PC) From 292026d483a88b3bc4d859f7a2d5df3794cf6da5 Mon Sep 17 00:00:00 2001 From: limeidan Date: Thu, 5 Sep 2024 20:24:51 +0800 Subject: [PATCH 030/200] loong64: fix self-assignment error Change-Id: Icdc21032f37ebd56f5bbbbe058637efbc7cef1f0 Reviewed-on: https://go-review.googlesource.com/c/arch/+/611135 Reviewed-by: Qiqi Huang Reviewed-by: Michael Pratt Reviewed-by: sophie zhao Reviewed-by: Dmitri Shuralyov Reviewed-by: abner chenc LUCI-TryBot-Result: Go LUCI --- loong64/loong64asm/plan9x.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loong64/loong64asm/plan9x.go b/loong64/loong64asm/plan9x.go index c18e217c..4e3c4f1e 100644 --- a/loong64/loong64asm/plan9x.go +++ b/loong64/loong64asm/plan9x.go @@ -47,7 +47,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin op = "MOVHU" args = append(args[1:2], args[0:1]...) } else { - args[0], args[1], args[2], args[3] = args[2], args[1], args[3], args[0] + args[0], args[2], args[3] = args[2], args[3], args[0] } case BCNEZ, BCEQZ: From 7874f23b9c060aa1d0aaa13b7352d59335c30184 Mon Sep 17 00:00:00 2001 From: Lin Runze Date: Sun, 4 Aug 2024 19:19:12 +0800 Subject: [PATCH 031/200] riscv64: implement RV64GC_zba_zbb_zbs GNU/Plan9 format disassembler Support decoding RV64GC_zba_zbb_zbs instructions as GNU & Plan9 format, relies on riscv64spec/spec.go to generate instruction tables Change-Id: I3b2793a7dd9faa3ac18d85361a8627eba0923068 Reviewed-on: https://go-review.googlesource.com/c/arch/+/602915 Reviewed-by: Meng Zhuo LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Reviewed-by: Cherry Mui Reviewed-by: Joel Sing --- riscv64/riscv64asm/arg.go | 116 +++ riscv64/riscv64asm/csr_string.go | 577 ++++++++++++ riscv64/riscv64asm/decode.go | 550 +++++++++++ riscv64/riscv64asm/gnu.go | 328 +++++++ riscv64/riscv64asm/inst.go | 495 ++++++++++ riscv64/riscv64asm/plan9x.go | 377 ++++++++ riscv64/riscv64asm/tables.go | 1474 ++++++++++++++++++++++++++++++ riscv64/riscv64spec/spec.go | 16 +- 8 files changed, 3925 insertions(+), 8 deletions(-) create mode 100644 riscv64/riscv64asm/arg.go create mode 100644 riscv64/riscv64asm/csr_string.go create mode 100644 riscv64/riscv64asm/decode.go create mode 100644 riscv64/riscv64asm/gnu.go create mode 100644 riscv64/riscv64asm/inst.go create mode 100644 riscv64/riscv64asm/plan9x.go create mode 100644 riscv64/riscv64asm/tables.go diff --git a/riscv64/riscv64asm/arg.go b/riscv64/riscv64asm/arg.go new file mode 100644 index 00000000..7898c273 --- /dev/null +++ b/riscv64/riscv64asm/arg.go @@ -0,0 +1,116 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv64asm + +// Naming for Go decoder arguments: +// +// - arg_rd: a general purpose register rd encoded in rd[11:7] field +// +// - arg_rs1: a general purpose register rs1 encoded in rs1[19:15] field +// +// - arg_rs2: a general purpose register rs2 encoded in rs2[24:20] field +// +// - arg_rs3: a general purpose register rs3 encoded in rs3[31:27] field +// +// - arg_fd: a floating point register rd encoded in rd[11:7] field +// +// - arg_fs1: a floating point register rs1 encoded in rs1[19:15] field +// +// - arg_fs2: a floating point register rs2 encoded in rs2[24:20] field +// +// - arg_fs3: a floating point register rs3 encoded in rs3[31:27] field +// +// - arg_csr: a control status register encoded in csr[31:20] field +// +// - arg_rs1_mem: source register with offset in load commands +// +// - arg_rs1_store: source register with offset in store commands +// +// - arg_rs1_amo: source register with offset in atomic commands +// +// - arg_pred: predecessor memory ordering information encoded in pred[27:24] field +// For details, please refer to chapter 2.7 of ISA manual volume 1 +// +// - arg_succ: successor memory ordering information encoded in succ[23:20] field +// For details, please refer to chapter 2.7 of ISA manual volume 1 +// +// - arg_zimm: a unsigned immediate encoded in zimm[19:15] field +// +// - arg_imm12: an I-type immediate encoded in imm12[31:20] field +// +// - arg_simm12: a S-type immediate encoded in simm12[31:25|11:7] field +// +// - arg_bimm12: a B-type immediate encoded in bimm12[31:25|11:7] field +// +// - arg_imm20: an U-type immediate encoded in imm20[31:12] field +// +// - arg_jimm20: a J-type immediate encoded in jimm20[31:12] field +// +// - arg_shamt5: a shift amount encoded in shamt5[24:20] field +// +// - arg_shamt6: a shift amount encoded in shamt6[25:20] field +// + +type argType uint16 + +const ( + _ argType = iota + arg_rd + arg_rs1 + arg_rs2 + arg_rs3 + arg_fd + arg_fs1 + arg_fs2 + arg_fs3 + arg_csr + + arg_rs1_amo + arg_rs1_mem + arg_rs1_store + + arg_pred + arg_succ + + arg_zimm + arg_imm12 + arg_simm12 + arg_bimm12 + arg_imm20 + arg_jimm20 + arg_shamt5 + arg_shamt6 + + // RISC-V Compressed Extension Args + arg_rd_p + arg_fd_p + arg_rs1_p + arg_rd_rs1_p + arg_fs2_p + arg_rs2_p + arg_rd_n0 + arg_rs1_n0 + arg_rd_rs1_n0 + arg_c_rs1_n0 + arg_c_rs2_n0 + arg_c_fs2 + arg_c_rs2 + arg_rd_n2 + + arg_c_imm6 + arg_c_nzimm6 + arg_c_nzuimm6 + arg_c_uimm7 + arg_c_uimm8 + arg_c_uimm8sp_s + arg_c_uimm8sp + arg_c_uimm9sp_s + arg_c_uimm9sp + arg_c_bimm9 + arg_c_nzimm10 + arg_c_nzuimm10 + arg_c_imm12 + arg_c_nzimm18 +) diff --git a/riscv64/riscv64asm/csr_string.go b/riscv64/riscv64asm/csr_string.go new file mode 100644 index 00000000..addf91aa --- /dev/null +++ b/riscv64/riscv64asm/csr_string.go @@ -0,0 +1,577 @@ +// Code generated by "stringer -type=CSR"; DO NOT EDIT. + +package riscv64asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[USTATUS-0] + _ = x[FFLAGS-1] + _ = x[FRM-2] + _ = x[FCSR-3] + _ = x[UIE-4] + _ = x[UTVEC-5] + _ = x[UTVT-7] + _ = x[VSTART-8] + _ = x[VXSAT-9] + _ = x[VXRM-10] + _ = x[VCSR-15] + _ = x[USCRATCH-64] + _ = x[UEPC-65] + _ = x[UCAUSE-66] + _ = x[UTVAL-67] + _ = x[UIP-68] + _ = x[UNXTI-69] + _ = x[UINTSTATUS-70] + _ = x[USCRATCHCSW-72] + _ = x[USCRATCHCSWL-73] + _ = x[SSTATUS-256] + _ = x[SEDELEG-258] + _ = x[SIDELEG-259] + _ = x[SIE-260] + _ = x[STVEC-261] + _ = x[SCOUNTEREN-262] + _ = x[STVT-263] + _ = x[SSCRATCH-320] + _ = x[SEPC-321] + _ = x[SCAUSE-322] + _ = x[STVAL-323] + _ = x[SIP-324] + _ = x[SNXTI-325] + _ = x[SINTSTATUS-326] + _ = x[SSCRATCHCSW-328] + _ = x[SSCRATCHCSWL-329] + _ = x[SATP-384] + _ = x[VSSTATUS-512] + _ = x[VSIE-516] + _ = x[VSTVEC-517] + _ = x[VSSCRATCH-576] + _ = x[VSEPC-577] + _ = x[VSCAUSE-578] + _ = x[VSTVAL-579] + _ = x[VSIP-580] + _ = x[VSATP-640] + _ = x[MSTATUS-768] + _ = x[MISA-769] + _ = x[MEDELEG-770] + _ = x[MIDELEG-771] + _ = x[MIE-772] + _ = x[MTVEC-773] + _ = x[MCOUNTEREN-774] + _ = x[MTVT-775] + _ = x[MSTATUSH-784] + _ = x[MCOUNTINHIBIT-800] + _ = x[MHPMEVENT3-803] + _ = x[MHPMEVENT4-804] + _ = x[MHPMEVENT5-805] + _ = x[MHPMEVENT6-806] + _ = x[MHPMEVENT7-807] + _ = x[MHPMEVENT8-808] + _ = x[MHPMEVENT9-809] + _ = x[MHPMEVENT10-810] + _ = x[MHPMEVENT11-811] + _ = x[MHPMEVENT12-812] + _ = x[MHPMEVENT13-813] + _ = x[MHPMEVENT14-814] + _ = x[MHPMEVENT15-815] + _ = x[MHPMEVENT16-816] + _ = x[MHPMEVENT17-817] + _ = x[MHPMEVENT18-818] + _ = x[MHPMEVENT19-819] + _ = x[MHPMEVENT20-820] + _ = x[MHPMEVENT21-821] + _ = x[MHPMEVENT22-822] + _ = x[MHPMEVENT23-823] + _ = x[MHPMEVENT24-824] + _ = x[MHPMEVENT25-825] + _ = x[MHPMEVENT26-826] + _ = x[MHPMEVENT27-827] + _ = x[MHPMEVENT28-828] + _ = x[MHPMEVENT29-829] + _ = x[MHPMEVENT30-830] + _ = x[MHPMEVENT31-831] + _ = x[MSCRATCH-832] + _ = x[MEPC-833] + _ = x[MCAUSE-834] + _ = x[MTVAL-835] + _ = x[MIP-836] + _ = x[MNXTI-837] + _ = x[MINTSTATUS-838] + _ = x[MSCRATCHCSW-840] + _ = x[MSCRATCHCSWL-841] + _ = x[MTINST-842] + _ = x[MTVAL2-843] + _ = x[PMPCFG0-928] + _ = x[PMPCFG1-929] + _ = x[PMPCFG2-930] + _ = x[PMPCFG3-931] + _ = x[PMPADDR0-944] + _ = x[PMPADDR1-945] + _ = x[PMPADDR2-946] + _ = x[PMPADDR3-947] + _ = x[PMPADDR4-948] + _ = x[PMPADDR5-949] + _ = x[PMPADDR6-950] + _ = x[PMPADDR7-951] + _ = x[PMPADDR8-952] + _ = x[PMPADDR9-953] + _ = x[PMPADDR10-954] + _ = x[PMPADDR11-955] + _ = x[PMPADDR12-956] + _ = x[PMPADDR13-957] + _ = x[PMPADDR14-958] + _ = x[PMPADDR15-959] + _ = x[HSTATUS-1536] + _ = x[HEDELEG-1538] + _ = x[HIDELEG-1539] + _ = x[HIE-1540] + _ = x[HTIMEDELTA-1541] + _ = x[HCOUNTEREN-1542] + _ = x[HGEIE-1543] + _ = x[HTIMEDELTAH-1557] + _ = x[HTVAL-1603] + _ = x[HIP-1604] + _ = x[HVIP-1605] + _ = x[HTINST-1610] + _ = x[HGATP-1664] + _ = x[TSELECT-1952] + _ = x[TDATA1-1953] + _ = x[TDATA2-1954] + _ = x[TDATA3-1955] + _ = x[TINFO-1956] + _ = x[TCONTROL-1957] + _ = x[MCONTEXT-1960] + _ = x[MNOISE-1961] + _ = x[SCONTEXT-1962] + _ = x[DCSR-1968] + _ = x[DPC-1969] + _ = x[DSCRATCH0-1970] + _ = x[DSCRATCH1-1971] + _ = x[MCYCLE-2816] + _ = x[MINSTRET-2818] + _ = x[MHPMCOUNTER3-2819] + _ = x[MHPMCOUNTER4-2820] + _ = x[MHPMCOUNTER5-2821] + _ = x[MHPMCOUNTER6-2822] + _ = x[MHPMCOUNTER7-2823] + _ = x[MHPMCOUNTER8-2824] + _ = x[MHPMCOUNTER9-2825] + _ = x[MHPMCOUNTER10-2826] + _ = x[MHPMCOUNTER11-2827] + _ = x[MHPMCOUNTER12-2828] + _ = x[MHPMCOUNTER13-2829] + _ = x[MHPMCOUNTER14-2830] + _ = x[MHPMCOUNTER15-2831] + _ = x[MHPMCOUNTER16-2832] + _ = x[MHPMCOUNTER17-2833] + _ = x[MHPMCOUNTER18-2834] + _ = x[MHPMCOUNTER19-2835] + _ = x[MHPMCOUNTER20-2836] + _ = x[MHPMCOUNTER21-2837] + _ = x[MHPMCOUNTER22-2838] + _ = x[MHPMCOUNTER23-2839] + _ = x[MHPMCOUNTER24-2840] + _ = x[MHPMCOUNTER25-2841] + _ = x[MHPMCOUNTER26-2842] + _ = x[MHPMCOUNTER27-2843] + _ = x[MHPMCOUNTER28-2844] + _ = x[MHPMCOUNTER29-2845] + _ = x[MHPMCOUNTER30-2846] + _ = x[MHPMCOUNTER31-2847] + _ = x[MCYCLEH-2944] + _ = x[MINSTRETH-2946] + _ = x[MHPMCOUNTER3H-2947] + _ = x[MHPMCOUNTER4H-2948] + _ = x[MHPMCOUNTER5H-2949] + _ = x[MHPMCOUNTER6H-2950] + _ = x[MHPMCOUNTER7H-2951] + _ = x[MHPMCOUNTER8H-2952] + _ = x[MHPMCOUNTER9H-2953] + _ = x[MHPMCOUNTER10H-2954] + _ = x[MHPMCOUNTER11H-2955] + _ = x[MHPMCOUNTER12H-2956] + _ = x[MHPMCOUNTER13H-2957] + _ = x[MHPMCOUNTER14H-2958] + _ = x[MHPMCOUNTER15H-2959] + _ = x[MHPMCOUNTER16H-2960] + _ = x[MHPMCOUNTER17H-2961] + _ = x[MHPMCOUNTER18H-2962] + _ = x[MHPMCOUNTER19H-2963] + _ = x[MHPMCOUNTER20H-2964] + _ = x[MHPMCOUNTER21H-2965] + _ = x[MHPMCOUNTER22H-2966] + _ = x[MHPMCOUNTER23H-2967] + _ = x[MHPMCOUNTER24H-2968] + _ = x[MHPMCOUNTER25H-2969] + _ = x[MHPMCOUNTER26H-2970] + _ = x[MHPMCOUNTER27H-2971] + _ = x[MHPMCOUNTER28H-2972] + _ = x[MHPMCOUNTER29H-2973] + _ = x[MHPMCOUNTER30H-2974] + _ = x[MHPMCOUNTER31H-2975] + _ = x[CYCLE-3072] + _ = x[TIME-3073] + _ = x[INSTRET-3074] + _ = x[HPMCOUNTER3-3075] + _ = x[HPMCOUNTER4-3076] + _ = x[HPMCOUNTER5-3077] + _ = x[HPMCOUNTER6-3078] + _ = x[HPMCOUNTER7-3079] + _ = x[HPMCOUNTER8-3080] + _ = x[HPMCOUNTER9-3081] + _ = x[HPMCOUNTER10-3082] + _ = x[HPMCOUNTER11-3083] + _ = x[HPMCOUNTER12-3084] + _ = x[HPMCOUNTER13-3085] + _ = x[HPMCOUNTER14-3086] + _ = x[HPMCOUNTER15-3087] + _ = x[HPMCOUNTER16-3088] + _ = x[HPMCOUNTER17-3089] + _ = x[HPMCOUNTER18-3090] + _ = x[HPMCOUNTER19-3091] + _ = x[HPMCOUNTER20-3092] + _ = x[HPMCOUNTER21-3093] + _ = x[HPMCOUNTER22-3094] + _ = x[HPMCOUNTER23-3095] + _ = x[HPMCOUNTER24-3096] + _ = x[HPMCOUNTER25-3097] + _ = x[HPMCOUNTER26-3098] + _ = x[HPMCOUNTER27-3099] + _ = x[HPMCOUNTER28-3100] + _ = x[HPMCOUNTER29-3101] + _ = x[HPMCOUNTER30-3102] + _ = x[HPMCOUNTER31-3103] + _ = x[VL-3104] + _ = x[VTYPE-3105] + _ = x[VLENB-3106] + _ = x[CYCLEH-3200] + _ = x[TIMEH-3201] + _ = x[INSTRETH-3202] + _ = x[HPMCOUNTER3H-3203] + _ = x[HPMCOUNTER4H-3204] + _ = x[HPMCOUNTER5H-3205] + _ = x[HPMCOUNTER6H-3206] + _ = x[HPMCOUNTER7H-3207] + _ = x[HPMCOUNTER8H-3208] + _ = x[HPMCOUNTER9H-3209] + _ = x[HPMCOUNTER10H-3210] + _ = x[HPMCOUNTER11H-3211] + _ = x[HPMCOUNTER12H-3212] + _ = x[HPMCOUNTER13H-3213] + _ = x[HPMCOUNTER14H-3214] + _ = x[HPMCOUNTER15H-3215] + _ = x[HPMCOUNTER16H-3216] + _ = x[HPMCOUNTER17H-3217] + _ = x[HPMCOUNTER18H-3218] + _ = x[HPMCOUNTER19H-3219] + _ = x[HPMCOUNTER20H-3220] + _ = x[HPMCOUNTER21H-3221] + _ = x[HPMCOUNTER22H-3222] + _ = x[HPMCOUNTER23H-3223] + _ = x[HPMCOUNTER24H-3224] + _ = x[HPMCOUNTER25H-3225] + _ = x[HPMCOUNTER26H-3226] + _ = x[HPMCOUNTER27H-3227] + _ = x[HPMCOUNTER28H-3228] + _ = x[HPMCOUNTER29H-3229] + _ = x[HPMCOUNTER30H-3230] + _ = x[HPMCOUNTER31H-3231] + _ = x[HGEIP-3602] + _ = x[MVENDORID-3857] + _ = x[MARCHID-3858] + _ = x[MIMPID-3859] + _ = x[MHARTID-3860] + _ = x[MENTROPY-3861] +} + +const _CSR_name = "USTATUSFFLAGSFRMFCSRUIEUTVECUTVTVSTARTVXSATVXRMVCSRUSCRATCHUEPCUCAUSEUTVALUIPUNXTIUINTSTATUSUSCRATCHCSWUSCRATCHCSWLSSTATUSSEDELEGSIDELEGSIESTVECSCOUNTERENSTVTSSCRATCHSEPCSCAUSESTVALSIPSNXTISINTSTATUSSSCRATCHCSWSSCRATCHCSWLSATPVSSTATUSVSIEVSTVECVSSCRATCHVSEPCVSCAUSEVSTVALVSIPVSATPMSTATUSMISAMEDELEGMIDELEGMIEMTVECMCOUNTERENMTVTMSTATUSHMCOUNTINHIBITMHPMEVENT3MHPMEVENT4MHPMEVENT5MHPMEVENT6MHPMEVENT7MHPMEVENT8MHPMEVENT9MHPMEVENT10MHPMEVENT11MHPMEVENT12MHPMEVENT13MHPMEVENT14MHPMEVENT15MHPMEVENT16MHPMEVENT17MHPMEVENT18MHPMEVENT19MHPMEVENT20MHPMEVENT21MHPMEVENT22MHPMEVENT23MHPMEVENT24MHPMEVENT25MHPMEVENT26MHPMEVENT27MHPMEVENT28MHPMEVENT29MHPMEVENT30MHPMEVENT31MSCRATCHMEPCMCAUSEMTVALMIPMNXTIMINTSTATUSMSCRATCHCSWMSCRATCHCSWLMTINSTMTVAL2PMPCFG0PMPCFG1PMPCFG2PMPCFG3PMPADDR0PMPADDR1PMPADDR2PMPADDR3PMPADDR4PMPADDR5PMPADDR6PMPADDR7PMPADDR8PMPADDR9PMPADDR10PMPADDR11PMPADDR12PMPADDR13PMPADDR14PMPADDR15HSTATUSHEDELEGHIDELEGHIEHTIMEDELTAHCOUNTERENHGEIEHTIMEDELTAHHTVALHIPHVIPHTINSTHGATPTSELECTTDATA1TDATA2TDATA3TINFOTCONTROLMCONTEXTMNOISESCONTEXTDCSRDPCDSCRATCH0DSCRATCH1MCYCLEMINSTRETMHPMCOUNTER3MHPMCOUNTER4MHPMCOUNTER5MHPMCOUNTER6MHPMCOUNTER7MHPMCOUNTER8MHPMCOUNTER9MHPMCOUNTER10MHPMCOUNTER11MHPMCOUNTER12MHPMCOUNTER13MHPMCOUNTER14MHPMCOUNTER15MHPMCOUNTER16MHPMCOUNTER17MHPMCOUNTER18MHPMCOUNTER19MHPMCOUNTER20MHPMCOUNTER21MHPMCOUNTER22MHPMCOUNTER23MHPMCOUNTER24MHPMCOUNTER25MHPMCOUNTER26MHPMCOUNTER27MHPMCOUNTER28MHPMCOUNTER29MHPMCOUNTER30MHPMCOUNTER31MCYCLEHMINSTRETHMHPMCOUNTER3HMHPMCOUNTER4HMHPMCOUNTER5HMHPMCOUNTER6HMHPMCOUNTER7HMHPMCOUNTER8HMHPMCOUNTER9HMHPMCOUNTER10HMHPMCOUNTER11HMHPMCOUNTER12HMHPMCOUNTER13HMHPMCOUNTER14HMHPMCOUNTER15HMHPMCOUNTER16HMHPMCOUNTER17HMHPMCOUNTER18HMHPMCOUNTER19HMHPMCOUNTER20HMHPMCOUNTER21HMHPMCOUNTER22HMHPMCOUNTER23HMHPMCOUNTER24HMHPMCOUNTER25HMHPMCOUNTER26HMHPMCOUNTER27HMHPMCOUNTER28HMHPMCOUNTER29HMHPMCOUNTER30HMHPMCOUNTER31HCYCLETIMEINSTRETHPMCOUNTER3HPMCOUNTER4HPMCOUNTER5HPMCOUNTER6HPMCOUNTER7HPMCOUNTER8HPMCOUNTER9HPMCOUNTER10HPMCOUNTER11HPMCOUNTER12HPMCOUNTER13HPMCOUNTER14HPMCOUNTER15HPMCOUNTER16HPMCOUNTER17HPMCOUNTER18HPMCOUNTER19HPMCOUNTER20HPMCOUNTER21HPMCOUNTER22HPMCOUNTER23HPMCOUNTER24HPMCOUNTER25HPMCOUNTER26HPMCOUNTER27HPMCOUNTER28HPMCOUNTER29HPMCOUNTER30HPMCOUNTER31VLVTYPEVLENBCYCLEHTIMEHINSTRETHHPMCOUNTER3HHPMCOUNTER4HHPMCOUNTER5HHPMCOUNTER6HHPMCOUNTER7HHPMCOUNTER8HHPMCOUNTER9HHPMCOUNTER10HHPMCOUNTER11HHPMCOUNTER12HHPMCOUNTER13HHPMCOUNTER14HHPMCOUNTER15HHPMCOUNTER16HHPMCOUNTER17HHPMCOUNTER18HHPMCOUNTER19HHPMCOUNTER20HHPMCOUNTER21HHPMCOUNTER22HHPMCOUNTER23HHPMCOUNTER24HHPMCOUNTER25HHPMCOUNTER26HHPMCOUNTER27HHPMCOUNTER28HHPMCOUNTER29HHPMCOUNTER30HHPMCOUNTER31HHGEIPMVENDORIDMARCHIDMIMPIDMHARTIDMENTROPY" + +var _CSR_map = map[CSR]string{ + 0: _CSR_name[0:7], + 1: _CSR_name[7:13], + 2: _CSR_name[13:16], + 3: _CSR_name[16:20], + 4: _CSR_name[20:23], + 5: _CSR_name[23:28], + 7: _CSR_name[28:32], + 8: _CSR_name[32:38], + 9: _CSR_name[38:43], + 10: _CSR_name[43:47], + 15: _CSR_name[47:51], + 64: _CSR_name[51:59], + 65: _CSR_name[59:63], + 66: _CSR_name[63:69], + 67: _CSR_name[69:74], + 68: _CSR_name[74:77], + 69: _CSR_name[77:82], + 70: _CSR_name[82:92], + 72: _CSR_name[92:103], + 73: _CSR_name[103:115], + 256: _CSR_name[115:122], + 258: _CSR_name[122:129], + 259: _CSR_name[129:136], + 260: _CSR_name[136:139], + 261: _CSR_name[139:144], + 262: _CSR_name[144:154], + 263: _CSR_name[154:158], + 320: _CSR_name[158:166], + 321: _CSR_name[166:170], + 322: _CSR_name[170:176], + 323: _CSR_name[176:181], + 324: _CSR_name[181:184], + 325: _CSR_name[184:189], + 326: _CSR_name[189:199], + 328: _CSR_name[199:210], + 329: _CSR_name[210:222], + 384: _CSR_name[222:226], + 512: _CSR_name[226:234], + 516: _CSR_name[234:238], + 517: _CSR_name[238:244], + 576: _CSR_name[244:253], + 577: _CSR_name[253:258], + 578: _CSR_name[258:265], + 579: _CSR_name[265:271], + 580: _CSR_name[271:275], + 640: _CSR_name[275:280], + 768: _CSR_name[280:287], + 769: _CSR_name[287:291], + 770: _CSR_name[291:298], + 771: _CSR_name[298:305], + 772: _CSR_name[305:308], + 773: _CSR_name[308:313], + 774: _CSR_name[313:323], + 775: _CSR_name[323:327], + 784: _CSR_name[327:335], + 800: _CSR_name[335:348], + 803: _CSR_name[348:358], + 804: _CSR_name[358:368], + 805: _CSR_name[368:378], + 806: _CSR_name[378:388], + 807: _CSR_name[388:398], + 808: _CSR_name[398:408], + 809: _CSR_name[408:418], + 810: _CSR_name[418:429], + 811: _CSR_name[429:440], + 812: _CSR_name[440:451], + 813: _CSR_name[451:462], + 814: _CSR_name[462:473], + 815: _CSR_name[473:484], + 816: _CSR_name[484:495], + 817: _CSR_name[495:506], + 818: _CSR_name[506:517], + 819: _CSR_name[517:528], + 820: _CSR_name[528:539], + 821: _CSR_name[539:550], + 822: _CSR_name[550:561], + 823: _CSR_name[561:572], + 824: _CSR_name[572:583], + 825: _CSR_name[583:594], + 826: _CSR_name[594:605], + 827: _CSR_name[605:616], + 828: _CSR_name[616:627], + 829: _CSR_name[627:638], + 830: _CSR_name[638:649], + 831: _CSR_name[649:660], + 832: _CSR_name[660:668], + 833: _CSR_name[668:672], + 834: _CSR_name[672:678], + 835: _CSR_name[678:683], + 836: _CSR_name[683:686], + 837: _CSR_name[686:691], + 838: _CSR_name[691:701], + 840: _CSR_name[701:712], + 841: _CSR_name[712:724], + 842: _CSR_name[724:730], + 843: _CSR_name[730:736], + 928: _CSR_name[736:743], + 929: _CSR_name[743:750], + 930: _CSR_name[750:757], + 931: _CSR_name[757:764], + 944: _CSR_name[764:772], + 945: _CSR_name[772:780], + 946: _CSR_name[780:788], + 947: _CSR_name[788:796], + 948: _CSR_name[796:804], + 949: _CSR_name[804:812], + 950: _CSR_name[812:820], + 951: _CSR_name[820:828], + 952: _CSR_name[828:836], + 953: _CSR_name[836:844], + 954: _CSR_name[844:853], + 955: _CSR_name[853:862], + 956: _CSR_name[862:871], + 957: _CSR_name[871:880], + 958: _CSR_name[880:889], + 959: _CSR_name[889:898], + 1536: _CSR_name[898:905], + 1538: _CSR_name[905:912], + 1539: _CSR_name[912:919], + 1540: _CSR_name[919:922], + 1541: _CSR_name[922:932], + 1542: _CSR_name[932:942], + 1543: _CSR_name[942:947], + 1557: _CSR_name[947:958], + 1603: _CSR_name[958:963], + 1604: _CSR_name[963:966], + 1605: _CSR_name[966:970], + 1610: _CSR_name[970:976], + 1664: _CSR_name[976:981], + 1952: _CSR_name[981:988], + 1953: _CSR_name[988:994], + 1954: _CSR_name[994:1000], + 1955: _CSR_name[1000:1006], + 1956: _CSR_name[1006:1011], + 1957: _CSR_name[1011:1019], + 1960: _CSR_name[1019:1027], + 1961: _CSR_name[1027:1033], + 1962: _CSR_name[1033:1041], + 1968: _CSR_name[1041:1045], + 1969: _CSR_name[1045:1048], + 1970: _CSR_name[1048:1057], + 1971: _CSR_name[1057:1066], + 2816: _CSR_name[1066:1072], + 2818: _CSR_name[1072:1080], + 2819: _CSR_name[1080:1092], + 2820: _CSR_name[1092:1104], + 2821: _CSR_name[1104:1116], + 2822: _CSR_name[1116:1128], + 2823: _CSR_name[1128:1140], + 2824: _CSR_name[1140:1152], + 2825: _CSR_name[1152:1164], + 2826: _CSR_name[1164:1177], + 2827: _CSR_name[1177:1190], + 2828: _CSR_name[1190:1203], + 2829: _CSR_name[1203:1216], + 2830: _CSR_name[1216:1229], + 2831: _CSR_name[1229:1242], + 2832: _CSR_name[1242:1255], + 2833: _CSR_name[1255:1268], + 2834: _CSR_name[1268:1281], + 2835: _CSR_name[1281:1294], + 2836: _CSR_name[1294:1307], + 2837: _CSR_name[1307:1320], + 2838: _CSR_name[1320:1333], + 2839: _CSR_name[1333:1346], + 2840: _CSR_name[1346:1359], + 2841: _CSR_name[1359:1372], + 2842: _CSR_name[1372:1385], + 2843: _CSR_name[1385:1398], + 2844: _CSR_name[1398:1411], + 2845: _CSR_name[1411:1424], + 2846: _CSR_name[1424:1437], + 2847: _CSR_name[1437:1450], + 2944: _CSR_name[1450:1457], + 2946: _CSR_name[1457:1466], + 2947: _CSR_name[1466:1479], + 2948: _CSR_name[1479:1492], + 2949: _CSR_name[1492:1505], + 2950: _CSR_name[1505:1518], + 2951: _CSR_name[1518:1531], + 2952: _CSR_name[1531:1544], + 2953: _CSR_name[1544:1557], + 2954: _CSR_name[1557:1571], + 2955: _CSR_name[1571:1585], + 2956: _CSR_name[1585:1599], + 2957: _CSR_name[1599:1613], + 2958: _CSR_name[1613:1627], + 2959: _CSR_name[1627:1641], + 2960: _CSR_name[1641:1655], + 2961: _CSR_name[1655:1669], + 2962: _CSR_name[1669:1683], + 2963: _CSR_name[1683:1697], + 2964: _CSR_name[1697:1711], + 2965: _CSR_name[1711:1725], + 2966: _CSR_name[1725:1739], + 2967: _CSR_name[1739:1753], + 2968: _CSR_name[1753:1767], + 2969: _CSR_name[1767:1781], + 2970: _CSR_name[1781:1795], + 2971: _CSR_name[1795:1809], + 2972: _CSR_name[1809:1823], + 2973: _CSR_name[1823:1837], + 2974: _CSR_name[1837:1851], + 2975: _CSR_name[1851:1865], + 3072: _CSR_name[1865:1870], + 3073: _CSR_name[1870:1874], + 3074: _CSR_name[1874:1881], + 3075: _CSR_name[1881:1892], + 3076: _CSR_name[1892:1903], + 3077: _CSR_name[1903:1914], + 3078: _CSR_name[1914:1925], + 3079: _CSR_name[1925:1936], + 3080: _CSR_name[1936:1947], + 3081: _CSR_name[1947:1958], + 3082: _CSR_name[1958:1970], + 3083: _CSR_name[1970:1982], + 3084: _CSR_name[1982:1994], + 3085: _CSR_name[1994:2006], + 3086: _CSR_name[2006:2018], + 3087: _CSR_name[2018:2030], + 3088: _CSR_name[2030:2042], + 3089: _CSR_name[2042:2054], + 3090: _CSR_name[2054:2066], + 3091: _CSR_name[2066:2078], + 3092: _CSR_name[2078:2090], + 3093: _CSR_name[2090:2102], + 3094: _CSR_name[2102:2114], + 3095: _CSR_name[2114:2126], + 3096: _CSR_name[2126:2138], + 3097: _CSR_name[2138:2150], + 3098: _CSR_name[2150:2162], + 3099: _CSR_name[2162:2174], + 3100: _CSR_name[2174:2186], + 3101: _CSR_name[2186:2198], + 3102: _CSR_name[2198:2210], + 3103: _CSR_name[2210:2222], + 3104: _CSR_name[2222:2224], + 3105: _CSR_name[2224:2229], + 3106: _CSR_name[2229:2234], + 3200: _CSR_name[2234:2240], + 3201: _CSR_name[2240:2245], + 3202: _CSR_name[2245:2253], + 3203: _CSR_name[2253:2265], + 3204: _CSR_name[2265:2277], + 3205: _CSR_name[2277:2289], + 3206: _CSR_name[2289:2301], + 3207: _CSR_name[2301:2313], + 3208: _CSR_name[2313:2325], + 3209: _CSR_name[2325:2337], + 3210: _CSR_name[2337:2350], + 3211: _CSR_name[2350:2363], + 3212: _CSR_name[2363:2376], + 3213: _CSR_name[2376:2389], + 3214: _CSR_name[2389:2402], + 3215: _CSR_name[2402:2415], + 3216: _CSR_name[2415:2428], + 3217: _CSR_name[2428:2441], + 3218: _CSR_name[2441:2454], + 3219: _CSR_name[2454:2467], + 3220: _CSR_name[2467:2480], + 3221: _CSR_name[2480:2493], + 3222: _CSR_name[2493:2506], + 3223: _CSR_name[2506:2519], + 3224: _CSR_name[2519:2532], + 3225: _CSR_name[2532:2545], + 3226: _CSR_name[2545:2558], + 3227: _CSR_name[2558:2571], + 3228: _CSR_name[2571:2584], + 3229: _CSR_name[2584:2597], + 3230: _CSR_name[2597:2610], + 3231: _CSR_name[2610:2623], + 3602: _CSR_name[2623:2628], + 3857: _CSR_name[2628:2637], + 3858: _CSR_name[2637:2644], + 3859: _CSR_name[2644:2650], + 3860: _CSR_name[2650:2657], + 3861: _CSR_name[2657:2665], +} + +func (i CSR) String() string { + if str, ok := _CSR_map[i]; ok { + return str + } + return "CSR(" + strconv.FormatInt(int64(i), 10) + ")" +} diff --git a/riscv64/riscv64asm/decode.go b/riscv64/riscv64asm/decode.go new file mode 100644 index 00000000..d78fef9e --- /dev/null +++ b/riscv64/riscv64asm/decode.go @@ -0,0 +1,550 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv64asm + +import ( + "encoding/binary" + "errors" +) + +type argTypeList [6]argType + +// An instFormat describes the format of an instruction encoding. +type instFormat struct { + mask uint32 + value uint32 + op Op + // args describe how to decode the instruction arguments. + // args is stored as a fixed-size array. + // if there are fewer than len(args) arguments, args[i] == 0 marks + // the end of the argument list. + args argTypeList +} + +var ( + errShort = errors.New("truncated instruction") + errUnknown = errors.New("unknown instruction") +) + +var decoderCover []bool + +func init() { + decoderCover = make([]bool, len(instFormats)) +} + +// Decode decodes the 4 bytes in src as a single instruction. +func Decode(src []byte) (Inst, error) { + length := len(src) + if length < 2 { + return Inst{}, errShort + } + + var x uint32 + // Non-RVC instructions always starts with 0x11 + // So check whether src[0] & 3 == 3 + if src[0]&3 == 3 { + if length < 4 { + return Inst{}, errShort + } + length = 4 + x = binary.LittleEndian.Uint32(src) + } else { + length = 2 + x = uint32(binary.LittleEndian.Uint16(src)) + } + +Search: + for i, f := range instFormats { + if (x & f.mask) != f.value { + continue + } + + // Decode args. + var args Args + for j, aop := range f.args { + if aop == 0 { + break + } + arg := decodeArg(aop, x, i) + if arg == nil && f.op != C_NOP { + // Cannot decode argument. + continue Search + } + args[j] = arg + } + + if length == 2 { + args = convertCompressedIns(&f, args) + } + + decoderCover[i] = true + inst := Inst{ + Op: f.op, + Args: args, + Enc: x, + Len: length, + } + return inst, nil + } + return Inst{}, errUnknown +} + +// decodeArg decodes the arg described by aop from the instruction bits x. +// It returns nil if x cannot be decoded according to aop. +func decodeArg(aop argType, x uint32, index int) Arg { + switch aop { + case arg_rd: + return X0 + Reg((x>>7)&((1<<5)-1)) + + case arg_rs1: + return X0 + Reg((x>>15)&((1<<5)-1)) + + case arg_rs2: + return X0 + Reg((x>>20)&((1<<5)-1)) + + case arg_rs3: + return X0 + Reg((x>>27)&((1<<5)-1)) + + case arg_fd: + return F0 + Reg((x>>7)&((1<<5)-1)) + + case arg_fs1: + return F0 + Reg((x>>15)&((1<<5)-1)) + + case arg_fs2: + return F0 + Reg((x>>20)&((1<<5)-1)) + + case arg_fs3: + return F0 + Reg((x>>27)&((1<<5)-1)) + + case arg_rs1_amo: + return AmoReg{X0 + Reg((x>>15)&((1<<5)-1))} + + case arg_rs1_mem: + imm := x >> 20 + // Sign-extend + if imm>>uint32(12-1) == 1 { + imm |= 0xfffff << 12 + } + return RegOffset{X0 + Reg((x>>15)&((1<<5)-1)), Simm{int32(imm), true, 12}} + + case arg_rs1_store: + imm := (x<<20)>>27 | (x>>25)<<5 + // Sign-extend + if imm>>uint32(12-1) == 1 { + imm |= 0xfffff << 12 + } + return RegOffset{X0 + Reg((x>>15)&((1<<5)-1)), Simm{int32(imm), true, 12}} + + case arg_pred: + imm := x << 4 >> 28 + return MemOrder(uint8(imm)) + + case arg_succ: + imm := x << 8 >> 28 + return MemOrder(uint8(imm)) + + case arg_csr: + imm := x >> 20 + return CSR(imm) + + case arg_zimm: + imm := x << 12 >> 27 + return Uimm{imm, true} + + case arg_shamt5: + imm := x << 7 >> 27 + return Uimm{imm, false} + + case arg_shamt6: + imm := x << 6 >> 26 + return Uimm{imm, false} + + case arg_imm12: + imm := x >> 20 + // Sign-extend + if imm>>uint32(12-1) == 1 { + imm |= 0xfffff << 12 + } + return Simm{int32(imm), true, 12} + + case arg_imm20: + imm := x >> 12 + return Uimm{imm, false} + + case arg_jimm20: + imm := (x>>31)<<20 | (x<<1)>>22<<1 | (x<<11)>>31<<11 | (x<<12)>>24<<12 + // Sign-extend + if imm>>uint32(21-1) == 1 { + imm |= 0x7ff << 21 + } + return Simm{int32(imm), true, 21} + + case arg_simm12: + imm := (x<<20)>>27 | (x>>25)<<5 + // Sign-extend + if imm>>uint32(12-1) == 1 { + imm |= 0xfffff << 12 + } + return Simm{int32(imm), true, 12} + + case arg_bimm12: + imm := (x<<20)>>28<<1 | (x<<1)>>26<<5 | (x<<24)>>31<<11 | (x>>31)<<12 + // Sign-extend + if imm>>uint32(13-1) == 1 { + imm |= 0x7ffff << 13 + } + return Simm{int32(imm), true, 13} + + case arg_rd_p, arg_rs2_p: + return X8 + Reg((x>>2)&((1<<3)-1)) + + case arg_fd_p, arg_fs2_p: + return F8 + Reg((x>>2)&((1<<3)-1)) + + case arg_rs1_p, arg_rd_rs1_p: + return X8 + Reg((x>>7)&((1<<3)-1)) + + case arg_rd_n0, arg_rs1_n0, arg_rd_rs1_n0, arg_c_rs1_n0: + if X0+Reg((x>>7)&((1<<5)-1)) == X0 { + return nil + } + return X0 + Reg((x>>7)&((1<<5)-1)) + + case arg_c_rs2_n0: + if X0+Reg((x>>2)&((1<<5)-1)) == X0 { + return nil + } + return X0 + Reg((x>>2)&((1<<5)-1)) + + case arg_c_fs2: + return F0 + Reg((x>>2)&((1<<5)-1)) + + case arg_c_rs2: + return X0 + Reg((x>>2)&((1<<5)-1)) + + case arg_rd_n2: + if X0+Reg((x>>7)&((1<<5)-1)) == X0 || X0+Reg((x>>7)&((1<<5)-1)) == X2 { + return nil + } + return X0 + Reg((x>>7)&((1<<5)-1)) + + case arg_c_imm6: + imm := (x<<25)>>27 | (x<<19)>>31<<5 + // Sign-extend + if imm>>uint32(6-1) == 1 { + imm |= 0x3ffffff << 6 + } + return Simm{int32(imm), true, 6} + + case arg_c_nzimm6: + imm := (x<<25)>>27 | (x<<19)>>31<<5 + // Sign-extend + if imm>>uint32(6-1) == 1 { + imm |= 0x3ffffff << 6 + } + if int32(imm) == 0 { + return nil + } + return Simm{int32(imm), true, 6} + + case arg_c_nzuimm6: + imm := (x<<25)>>27 | (x<<19)>>31<<5 + if int32(imm) == 0 { + return nil + } + return Uimm{imm, false} + + case arg_c_uimm7: + imm := (x<<26)>>31<<6 | (x<<25)>>31<<2 | (x<<19)>>29<<3 + return Uimm{imm, false} + + case arg_c_uimm8: + imm := (x<<25)>>30<<6 | (x<<19)>>29<<3 + return Uimm{imm, false} + + case arg_c_uimm8sp_s: + imm := (x<<23)>>30<<6 | (x<<19)>>28<<2 + return Uimm{imm, false} + + case arg_c_uimm8sp: + imm := (x<<25)>>29<<2 | (x<<19)>>31<<5 | (x<<28)>>30<<6 + return Uimm{imm, false} + + case arg_c_uimm9sp_s: + imm := (x<<22)>>29<<6 | (x<<19)>>29<<3 + return Uimm{imm, false} + + case arg_c_uimm9sp: + imm := (x<<25)>>30<<3 | (x<<19)>>31<<5 | (x<<27)>>29<<6 + return Uimm{imm, false} + + case arg_c_bimm9: + imm := (x<<29)>>31<<5 | (x<<27)>>30<<1 | (x<<25)>>30<<6 | (x<<19)>>31<<8 | (x<<20)>>30<<3 + // Sign-extend + if imm>>uint32(9-1) == 1 { + imm |= 0x7fffff << 9 + } + return Simm{int32(imm), true, 9} + + case arg_c_nzimm10: + imm := (x<<29)>>31<<5 | (x<<27)>>30<<7 | (x<<26)>>31<<6 | (x<<25)>>31<<4 | (x<<19)>>31<<9 + // Sign-extend + if imm>>uint32(10-1) == 1 { + imm |= 0x3fffff << 10 + } + if int32(imm) == 0 { + return nil + } + return Simm{int32(imm), true, 10} + + case arg_c_nzuimm10: + imm := (x<<26)>>31<<3 | (x<<25)>>31<<2 | (x<<21)>>28<<6 | (x<<19)>>30<<4 + if int32(imm) == 0 { + return nil + } + return Uimm{imm, false} + + case arg_c_imm12: + imm := (x<<29)>>31<<5 | (x<<26)>>28<<1 | (x<<25)>>31<<7 | (x<<24)>>31<<6 | (x<<23)>>31<<10 | (x<<21)>>30<<8 | (x<<20)>>31<<4 | (x<<19)>>31<<11 + // Sign-extend + if imm>>uint32(12-1) == 1 { + imm |= 0xfffff << 12 + } + return Simm{int32(imm), true, 12} + + case arg_c_nzimm18: + imm := (x<<25)>>27<<12 | (x<<19)>>31<<17 + // Sign-extend + if imm>>uint32(18-1) == 1 { + imm |= 0x3fff << 18 + } + if int32(imm) == 0 { + return nil + } + return Simm{int32(imm), true, 18} + + default: + return nil + } +} + +// convertCompressedIns rewrites the RVC Instruction to regular Instructions +func convertCompressedIns(f *instFormat, args Args) Args { + var newargs Args + switch f.op { + case C_ADDI4SPN: + f.op = ADDI + newargs[0] = args[0] + newargs[1] = Reg(X2) + newargs[2] = Simm{int32(args[1].(Uimm).Imm), true, 12} + + case C_LW: + f.op = LW + newargs[0] = args[0] + newargs[1] = RegOffset{args[1].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}} + + case C_SW: + f.op = SW + newargs[0] = args[1] + newargs[1] = RegOffset{args[0].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}} + + case C_NOP: + f.op = ADDI + newargs[0] = X0 + newargs[1] = X0 + newargs[2] = Simm{0, true, 12} + + case C_ADDI: + f.op = ADDI + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = Simm{args[1].(Simm).Imm, true, 12} + + case C_LI: + f.op = ADDI + newargs[0] = args[0] + newargs[1] = Reg(X0) + newargs[2] = Simm{args[1].(Simm).Imm, true, 12} + + case C_ADDI16SP: + f.op = ADDI + newargs[0] = Reg(X2) + newargs[1] = Reg(X2) + newargs[2] = Simm{args[0].(Simm).Imm, true, 12} + + case C_LUI: + f.op = LUI + newargs[0] = args[0] + newargs[1] = Uimm{uint32(args[1].(Simm).Imm >> 12), false} + + case C_ANDI: + f.op = ANDI + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = Simm{args[1].(Simm).Imm, true, 12} + + case C_SUB: + f.op = SUB + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = args[1] + + case C_XOR: + f.op = XOR + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = args[1] + + case C_OR: + f.op = OR + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = args[1] + + case C_AND: + f.op = AND + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = args[1] + + case C_J: + f.op = JAL + newargs[0] = Reg(X0) + newargs[1] = Simm{args[0].(Simm).Imm, true, 21} + + case C_BEQZ: + f.op = BEQ + newargs[0] = args[0] + newargs[1] = Reg(X0) + newargs[2] = Simm{args[1].(Simm).Imm, true, 13} + + case C_BNEZ: + f.op = BNE + newargs[0] = args[0] + newargs[1] = Reg(X0) + newargs[2] = Simm{args[1].(Simm).Imm, true, 13} + + case C_LWSP: + f.op = LW + newargs[0] = args[0] + newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}} + + case C_JR: + f.op = JALR + newargs[0] = Reg(X0) + newargs[1] = RegOffset{args[0].(Reg), Simm{0, true, 12}} + + case C_MV: + f.op = ADD + newargs[0] = args[0] + newargs[1] = Reg(X0) + newargs[2] = args[1] + + case C_EBREAK: + f.op = EBREAK + + case C_JALR: + f.op = JALR + newargs[0] = Reg(X1) + newargs[1] = RegOffset{args[0].(Reg), Simm{0, true, 12}} + + case C_ADD: + f.op = ADD + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = args[1] + + case C_SWSP: + f.op = SW + newargs[0] = args[0] + newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}} + + // riscv64 compressed instructions + case C_LD: + f.op = LD + newargs[0] = args[0] + newargs[1] = RegOffset{args[1].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}} + + case C_SD: + f.op = SD + newargs[0] = args[1] + newargs[1] = RegOffset{args[0].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}} + + case C_ADDIW: + f.op = ADDIW + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = Simm{args[1].(Simm).Imm, true, 12} + + case C_SRLI: + f.op = SRLI + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = args[1] + + case C_SRAI: + f.op = SRAI + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = args[1] + + case C_SUBW: + f.op = SUBW + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = args[1] + + case C_ADDW: + f.op = ADDW + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = args[1] + + case C_SLLI: + f.op = SLLI + newargs[0] = args[0] + newargs[1] = args[0] + newargs[2] = args[1] + + case C_LDSP: + f.op = LD + newargs[0] = args[0] + newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}} + + case C_SDSP: + f.op = SD + newargs[0] = args[0] + newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}} + + // riscv double precision floating point compressed instructions + case C_FLD: + f.op = FLD + newargs[0] = args[0] + newargs[1] = RegOffset{args[1].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}} + + case C_FSD: + f.op = FSD + newargs[0] = args[1] + newargs[1] = RegOffset{args[0].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}} + + case C_FLDSP: + f.op = FLD + newargs[0] = args[0] + newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}} + + case C_FSDSP: + f.op = FSD + newargs[0] = args[0] + newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}} + + case C_UNIMP: + f.op = CSRRW + newargs[0] = Reg(X0) + newargs[1] = CSR(CYCLE) + newargs[2] = Reg(X0) + } + return newargs +} diff --git a/riscv64/riscv64asm/gnu.go b/riscv64/riscv64asm/gnu.go new file mode 100644 index 00000000..d6b3dc04 --- /dev/null +++ b/riscv64/riscv64asm/gnu.go @@ -0,0 +1,328 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv64asm + +import ( + "strings" +) + +// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils. +// This form typically matches the syntax defined in the RISC-V Instruction Set Manual. See +// https://github.com/riscv/riscv-isa-manual/releases/download/Ratified-IMAFDQC/riscv-spec-20191213.pdf +func GNUSyntax(inst Inst) string { + op := strings.ToLower(inst.Op.String()) + var args []string + for _, a := range inst.Args { + if a == nil { + break + } + args = append(args, strings.ToLower(a.String())) + } + + switch inst.Op { + case ADDI, ADDIW, ANDI, ORI, SLLI, SLLIW, SRAI, SRAIW, SRLI, SRLIW, XORI: + if inst.Op == ADDI { + if inst.Args[1].(Reg) == X0 && inst.Args[0].(Reg) != X0 { + op = "li" + args[1] = args[2] + args = args[:len(args)-1] + break + } + + if inst.Args[2].(Simm).Imm == 0 { + if inst.Args[0].(Reg) == X0 && inst.Args[1].(Reg) == X0 { + op = "nop" + args = nil + } else { + op = "mv" + args = args[:len(args)-1] + } + } + } + + if inst.Op == ADDIW && inst.Args[2].(Simm).Imm == 0 { + op = "sext.w" + args = args[:len(args)-1] + } + + if inst.Op == XORI && inst.Args[2].(Simm).String() == "-1" { + op = "not" + args = args[:len(args)-1] + } + + case ADD: + if inst.Args[1].(Reg) == X0 { + op = "mv" + args[1] = args[2] + args = args[:len(args)-1] + } + + case BEQ: + if inst.Args[1].(Reg) == X0 { + op = "beqz" + args[1] = args[2] + args = args[:len(args)-1] + } + + case BGE: + if inst.Args[1].(Reg) == X0 { + op = "bgez" + args[1] = args[2] + args = args[:len(args)-1] + } else if inst.Args[0].(Reg) == X0 { + op = "blez" + args[0], args[1] = args[1], args[2] + args = args[:len(args)-1] + } + + case BLT: + if inst.Args[1].(Reg) == X0 { + op = "bltz" + args[1] = args[2] + args = args[:len(args)-1] + } else if inst.Args[0].(Reg) == X0 { + op = "bgtz" + args[0], args[1] = args[1], args[2] + args = args[:len(args)-1] + } + + case BNE: + if inst.Args[1].(Reg) == X0 { + op = "bnez" + args[1] = args[2] + args = args[:len(args)-1] + } + + case CSRRC: + if inst.Args[0].(Reg) == X0 { + op = "csrc" + args[0], args[1] = args[1], args[2] + args = args[:len(args)-1] + } + + case CSRRCI: + if inst.Args[0].(Reg) == X0 { + op = "csrci" + args[0], args[1] = args[1], args[2] + args = args[:len(args)-1] + } + + case CSRRS: + if inst.Args[2].(Reg) == X0 { + switch inst.Args[1].(CSR) { + case FCSR: + op = "frcsr" + args = args[:len(args)-2] + + case FFLAGS: + op = "frflags" + args = args[:len(args)-2] + + case FRM: + op = "frrm" + args = args[:len(args)-2] + + // rdcycleh, rdinstreth and rdtimeh are RV-32 only instructions. + // So not included there. + case CYCLE: + op = "rdcycle" + args = args[:len(args)-2] + + case INSTRET: + op = "rdinstret" + args = args[:len(args)-2] + + case TIME: + op = "rdtime" + args = args[:len(args)-2] + + default: + op = "csrr" + args = args[:len(args)-1] + } + } else if inst.Args[0].(Reg) == X0 { + op = "csrs" + args[0], args[1] = args[1], args[2] + args = args[:len(args)-1] + } + + case CSRRSI: + if inst.Args[0].(Reg) == X0 { + op = "csrsi" + args[0], args[1] = args[1], args[2] + args = args[:len(args)-1] + } + + case CSRRW: + switch inst.Args[1].(CSR) { + case FCSR: + op = "fscsr" + if inst.Args[0].(Reg) == X0 { + args[0] = args[2] + args = args[:len(args)-2] + } else { + args[1] = args[2] + args = args[:len(args)-1] + } + + case FFLAGS: + op = "fsflags" + if inst.Args[0].(Reg) == X0 { + args[0] = args[2] + args = args[:len(args)-2] + } else { + args[1] = args[2] + args = args[:len(args)-1] + } + + case FRM: + op = "fsrm" + if inst.Args[0].(Reg) == X0 { + args[0] = args[2] + args = args[:len(args)-2] + } else { + args[1] = args[2] + args = args[:len(args)-1] + } + + case CYCLE: + if inst.Args[0].(Reg) == X0 && inst.Args[2].(Reg) == X0 { + op = "unimp" + args = nil + } + + default: + if inst.Args[0].(Reg) == X0 { + op = "csrw" + args[0], args[1] = args[1], args[2] + args = args[:len(args)-1] + } + } + + case CSRRWI: + if inst.Args[0].(Reg) == X0 { + op = "csrwi" + args[0], args[1] = args[1], args[2] + args = args[:len(args)-1] + } + + // When both pred and succ equals to iorw, the GNU objdump will omit them. + case FENCE: + if inst.Args[0].(MemOrder).String() == "iorw" && + inst.Args[1].(MemOrder).String() == "iorw" { + args = nil + } + + case FSGNJX_D: + if inst.Args[1].(Reg) == inst.Args[2].(Reg) { + op = "fabs.d" + args = args[:len(args)-1] + } + + case FSGNJX_S: + if inst.Args[1].(Reg) == inst.Args[2].(Reg) { + op = "fabs.s" + args = args[:len(args)-1] + } + + case FSGNJ_D: + if inst.Args[1].(Reg) == inst.Args[2].(Reg) { + op = "fmv.d" + args = args[:len(args)-1] + } + + case FSGNJ_S: + if inst.Args[1].(Reg) == inst.Args[2].(Reg) { + op = "fmv.s" + args = args[:len(args)-1] + } + + case FSGNJN_D: + if inst.Args[1].(Reg) == inst.Args[2].(Reg) { + op = "fneg.d" + args = args[:len(args)-1] + } + + case FSGNJN_S: + if inst.Args[1].(Reg) == inst.Args[2].(Reg) { + op = "fneg.s" + args = args[:len(args)-1] + } + + case JAL: + if inst.Args[0].(Reg) == X0 { + op = "j" + args[0] = args[1] + args = args[:len(args)-1] + } else if inst.Args[0].(Reg) == X1 { + op = "jal" + args[0] = args[1] + args = args[:len(args)-1] + } + + case JALR: + if inst.Args[0].(Reg) == X1 && inst.Args[1].(RegOffset).Ofs.Imm == 0 { + args[0] = inst.Args[1].(RegOffset).OfsReg.String() + args = args[:len(args)-1] + } + + if inst.Args[0].(Reg) == X0 { + if inst.Args[1].(RegOffset).OfsReg == X1 && inst.Args[1].(RegOffset).Ofs.Imm == 0 { + op = "ret" + args = nil + } else if inst.Args[1].(RegOffset).Ofs.Imm == 0 { + op = "jr" + args[0] = inst.Args[1].(RegOffset).OfsReg.String() + args = args[:len(args)-1] + } else { + op = "jr" + args[0] = inst.Args[1].(RegOffset).String() + args = args[:len(args)-1] + } + } + + case SLTIU: + if inst.Args[2].(Simm).String() == "1" { + op = "seqz" + args = args[:len(args)-1] + } + + case SLT: + if inst.Args[1].(Reg) == X0 { + op = "sgtz" + args[1] = args[2] + args = args[:len(args)-1] + } else if inst.Args[2].(Reg) == X0 { + op = "sltz" + args = args[:len(args)-1] + } + + case SLTU: + if inst.Args[1].(Reg) == X0 { + op = "snez" + args[1] = args[2] + args = args[:len(args)-1] + } + + case SUB: + if inst.Args[1].(Reg) == X0 { + op = "neg" + args[1] = args[2] + args = args[:len(args)-1] + } + + case SUBW: + if inst.Args[1].(Reg) == X0 { + op = "negw" + args[1] = args[2] + args = args[:len(args)-1] + } + } + + if args != nil { + op += " " + strings.Join(args, ",") + } + return op +} diff --git a/riscv64/riscv64asm/inst.go b/riscv64/riscv64asm/inst.go new file mode 100644 index 00000000..3c13567c --- /dev/null +++ b/riscv64/riscv64asm/inst.go @@ -0,0 +1,495 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv64asm + +import ( + "fmt" + "strings" +) + +// An Op is a RISC-V opcode. +type Op uint16 + +// NOTE: The actual Op values are defined in tables.go. +func (op Op) String() string { + if op >= Op(len(opstr)) || opstr[op] == "" { + return fmt.Sprintf("Op(%d)", op) + } + + return opstr[op] +} + +// An Arg is a single instruction argument. +type Arg interface { + String() string +} + +// An Args holds the instruction arguments. +// If an instruction has fewer than 6 arguments, +// the final elements in the array are nil. +type Args [6]Arg + +// An Inst is a single instruction. +type Inst struct { + Op Op // Opcode mnemonic. + Enc uint32 // Raw encoding bits. + Args Args // Instruction arguments, in RISC-V mamual order. + Len int // Length of encoded instruction in bytes +} + +func (i Inst) String() string { + var args []string + for _, arg := range i.Args { + if arg == nil { + break + } + args = append(args, arg.String()) + } + + if len(args) == 0 { + return i.Op.String() + } + return i.Op.String() + " " + strings.Join(args, ",") +} + +// A Reg is a single register. +// The zero value denotes X0, not the absence of a register. +type Reg uint16 + +const ( + // General-purpose register + X0 Reg = iota + X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 + X12 + X13 + X14 + X15 + X16 + X17 + X18 + X19 + X20 + X21 + X22 + X23 + X24 + X25 + X26 + X27 + X28 + X29 + X30 + X31 + + //Float point register + F0 + F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 + F12 + F13 + F14 + F15 + F16 + F17 + F18 + F19 + F20 + F21 + F22 + F23 + F24 + F25 + F26 + F27 + F28 + F29 + F30 + F31 +) + +func (r Reg) String() string { + switch { + case r >= X0 && r <= X31: + return fmt.Sprintf("x%d", r) + + case r >= F0 && r <= F31: + return fmt.Sprintf("f%d", r-F0) + + default: + return fmt.Sprintf("Unknown(%d)", r) + } +} + +// A CSR is a single control and status register. +// Use stringer to generate CSR match table. +// +//go:generate stringer -type=CSR +type CSR uint16 + +const ( + // Control status register + USTATUS CSR = 0x0000 + FFLAGS CSR = 0x0001 + FRM CSR = 0x0002 + FCSR CSR = 0x0003 + UIE CSR = 0x0004 + UTVEC CSR = 0x0005 + UTVT CSR = 0x0007 + VSTART CSR = 0x0008 + VXSAT CSR = 0x0009 + VXRM CSR = 0x000a + VCSR CSR = 0x000f + USCRATCH CSR = 0x0040 + UEPC CSR = 0x0041 + UCAUSE CSR = 0x0042 + UTVAL CSR = 0x0043 + UIP CSR = 0x0044 + UNXTI CSR = 0x0045 + UINTSTATUS CSR = 0x0046 + USCRATCHCSW CSR = 0x0048 + USCRATCHCSWL CSR = 0x0049 + SSTATUS CSR = 0x0100 + SEDELEG CSR = 0x0102 + SIDELEG CSR = 0x0103 + SIE CSR = 0x0104 + STVEC CSR = 0x0105 + SCOUNTEREN CSR = 0x0106 + STVT CSR = 0x0107 + SSCRATCH CSR = 0x0140 + SEPC CSR = 0x0141 + SCAUSE CSR = 0x0142 + STVAL CSR = 0x0143 + SIP CSR = 0x0144 + SNXTI CSR = 0x0145 + SINTSTATUS CSR = 0x0146 + SSCRATCHCSW CSR = 0x0148 + SSCRATCHCSWL CSR = 0x0149 + SATP CSR = 0x0180 + VSSTATUS CSR = 0x0200 + VSIE CSR = 0x0204 + VSTVEC CSR = 0x0205 + VSSCRATCH CSR = 0x0240 + VSEPC CSR = 0x0241 + VSCAUSE CSR = 0x0242 + VSTVAL CSR = 0x0243 + VSIP CSR = 0x0244 + VSATP CSR = 0x0280 + MSTATUS CSR = 0x0300 + MISA CSR = 0x0301 + MEDELEG CSR = 0x0302 + MIDELEG CSR = 0x0303 + MIE CSR = 0x0304 + MTVEC CSR = 0x0305 + MCOUNTEREN CSR = 0x0306 + MTVT CSR = 0x0307 + MSTATUSH CSR = 0x0310 + MCOUNTINHIBIT CSR = 0x0320 + MHPMEVENT3 CSR = 0x0323 + MHPMEVENT4 CSR = 0x0324 + MHPMEVENT5 CSR = 0x0325 + MHPMEVENT6 CSR = 0x0326 + MHPMEVENT7 CSR = 0x0327 + MHPMEVENT8 CSR = 0x0328 + MHPMEVENT9 CSR = 0x0329 + MHPMEVENT10 CSR = 0x032a + MHPMEVENT11 CSR = 0x032b + MHPMEVENT12 CSR = 0x032c + MHPMEVENT13 CSR = 0x032d + MHPMEVENT14 CSR = 0x032e + MHPMEVENT15 CSR = 0x032f + MHPMEVENT16 CSR = 0x0330 + MHPMEVENT17 CSR = 0x0331 + MHPMEVENT18 CSR = 0x0332 + MHPMEVENT19 CSR = 0x0333 + MHPMEVENT20 CSR = 0x0334 + MHPMEVENT21 CSR = 0x0335 + MHPMEVENT22 CSR = 0x0336 + MHPMEVENT23 CSR = 0x0337 + MHPMEVENT24 CSR = 0x0338 + MHPMEVENT25 CSR = 0x0339 + MHPMEVENT26 CSR = 0x033a + MHPMEVENT27 CSR = 0x033b + MHPMEVENT28 CSR = 0x033c + MHPMEVENT29 CSR = 0x033d + MHPMEVENT30 CSR = 0x033e + MHPMEVENT31 CSR = 0x033f + MSCRATCH CSR = 0x0340 + MEPC CSR = 0x0341 + MCAUSE CSR = 0x0342 + MTVAL CSR = 0x0343 + MIP CSR = 0x0344 + MNXTI CSR = 0x0345 + MINTSTATUS CSR = 0x0346 + MSCRATCHCSW CSR = 0x0348 + MSCRATCHCSWL CSR = 0x0349 + MTINST CSR = 0x034a + MTVAL2 CSR = 0x034b + PMPCFG0 CSR = 0x03a0 + PMPCFG1 CSR = 0x03a1 + PMPCFG2 CSR = 0x03a2 + PMPCFG3 CSR = 0x03a3 + PMPADDR0 CSR = 0x03b0 + PMPADDR1 CSR = 0x03b1 + PMPADDR2 CSR = 0x03b2 + PMPADDR3 CSR = 0x03b3 + PMPADDR4 CSR = 0x03b4 + PMPADDR5 CSR = 0x03b5 + PMPADDR6 CSR = 0x03b6 + PMPADDR7 CSR = 0x03b7 + PMPADDR8 CSR = 0x03b8 + PMPADDR9 CSR = 0x03b9 + PMPADDR10 CSR = 0x03ba + PMPADDR11 CSR = 0x03bb + PMPADDR12 CSR = 0x03bc + PMPADDR13 CSR = 0x03bd + PMPADDR14 CSR = 0x03be + PMPADDR15 CSR = 0x03bf + HSTATUS CSR = 0x0600 + HEDELEG CSR = 0x0602 + HIDELEG CSR = 0x0603 + HIE CSR = 0x0604 + HTIMEDELTA CSR = 0x0605 + HCOUNTEREN CSR = 0x0606 + HGEIE CSR = 0x0607 + HTIMEDELTAH CSR = 0x0615 + HTVAL CSR = 0x0643 + HIP CSR = 0x0644 + HVIP CSR = 0x0645 + HTINST CSR = 0x064a + HGATP CSR = 0x0680 + TSELECT CSR = 0x07a0 + TDATA1 CSR = 0x07a1 + TDATA2 CSR = 0x07a2 + TDATA3 CSR = 0x07a3 + TINFO CSR = 0x07a4 + TCONTROL CSR = 0x07a5 + MCONTEXT CSR = 0x07a8 + MNOISE CSR = 0x07a9 + SCONTEXT CSR = 0x07aa + DCSR CSR = 0x07b0 + DPC CSR = 0x07b1 + DSCRATCH0 CSR = 0x07b2 + DSCRATCH1 CSR = 0x07b3 + MCYCLE CSR = 0x0b00 + MINSTRET CSR = 0x0b02 + MHPMCOUNTER3 CSR = 0x0b03 + MHPMCOUNTER4 CSR = 0x0b04 + MHPMCOUNTER5 CSR = 0x0b05 + MHPMCOUNTER6 CSR = 0x0b06 + MHPMCOUNTER7 CSR = 0x0b07 + MHPMCOUNTER8 CSR = 0x0b08 + MHPMCOUNTER9 CSR = 0x0b09 + MHPMCOUNTER10 CSR = 0x0b0a + MHPMCOUNTER11 CSR = 0x0b0b + MHPMCOUNTER12 CSR = 0x0b0c + MHPMCOUNTER13 CSR = 0x0b0d + MHPMCOUNTER14 CSR = 0x0b0e + MHPMCOUNTER15 CSR = 0x0b0f + MHPMCOUNTER16 CSR = 0x0b10 + MHPMCOUNTER17 CSR = 0x0b11 + MHPMCOUNTER18 CSR = 0x0b12 + MHPMCOUNTER19 CSR = 0x0b13 + MHPMCOUNTER20 CSR = 0x0b14 + MHPMCOUNTER21 CSR = 0x0b15 + MHPMCOUNTER22 CSR = 0x0b16 + MHPMCOUNTER23 CSR = 0x0b17 + MHPMCOUNTER24 CSR = 0x0b18 + MHPMCOUNTER25 CSR = 0x0b19 + MHPMCOUNTER26 CSR = 0x0b1a + MHPMCOUNTER27 CSR = 0x0b1b + MHPMCOUNTER28 CSR = 0x0b1c + MHPMCOUNTER29 CSR = 0x0b1d + MHPMCOUNTER30 CSR = 0x0b1e + MHPMCOUNTER31 CSR = 0x0b1f + MCYCLEH CSR = 0x0b80 + MINSTRETH CSR = 0x0b82 + MHPMCOUNTER3H CSR = 0x0b83 + MHPMCOUNTER4H CSR = 0x0b84 + MHPMCOUNTER5H CSR = 0x0b85 + MHPMCOUNTER6H CSR = 0x0b86 + MHPMCOUNTER7H CSR = 0x0b87 + MHPMCOUNTER8H CSR = 0x0b88 + MHPMCOUNTER9H CSR = 0x0b89 + MHPMCOUNTER10H CSR = 0x0b8a + MHPMCOUNTER11H CSR = 0x0b8b + MHPMCOUNTER12H CSR = 0x0b8c + MHPMCOUNTER13H CSR = 0x0b8d + MHPMCOUNTER14H CSR = 0x0b8e + MHPMCOUNTER15H CSR = 0x0b8f + MHPMCOUNTER16H CSR = 0x0b90 + MHPMCOUNTER17H CSR = 0x0b91 + MHPMCOUNTER18H CSR = 0x0b92 + MHPMCOUNTER19H CSR = 0x0b93 + MHPMCOUNTER20H CSR = 0x0b94 + MHPMCOUNTER21H CSR = 0x0b95 + MHPMCOUNTER22H CSR = 0x0b96 + MHPMCOUNTER23H CSR = 0x0b97 + MHPMCOUNTER24H CSR = 0x0b98 + MHPMCOUNTER25H CSR = 0x0b99 + MHPMCOUNTER26H CSR = 0x0b9a + MHPMCOUNTER27H CSR = 0x0b9b + MHPMCOUNTER28H CSR = 0x0b9c + MHPMCOUNTER29H CSR = 0x0b9d + MHPMCOUNTER30H CSR = 0x0b9e + MHPMCOUNTER31H CSR = 0x0b9f + CYCLE CSR = 0x0c00 + TIME CSR = 0x0c01 + INSTRET CSR = 0x0c02 + HPMCOUNTER3 CSR = 0x0c03 + HPMCOUNTER4 CSR = 0x0c04 + HPMCOUNTER5 CSR = 0x0c05 + HPMCOUNTER6 CSR = 0x0c06 + HPMCOUNTER7 CSR = 0x0c07 + HPMCOUNTER8 CSR = 0x0c08 + HPMCOUNTER9 CSR = 0x0c09 + HPMCOUNTER10 CSR = 0x0c0a + HPMCOUNTER11 CSR = 0x0c0b + HPMCOUNTER12 CSR = 0x0c0c + HPMCOUNTER13 CSR = 0x0c0d + HPMCOUNTER14 CSR = 0x0c0e + HPMCOUNTER15 CSR = 0x0c0f + HPMCOUNTER16 CSR = 0x0c10 + HPMCOUNTER17 CSR = 0x0c11 + HPMCOUNTER18 CSR = 0x0c12 + HPMCOUNTER19 CSR = 0x0c13 + HPMCOUNTER20 CSR = 0x0c14 + HPMCOUNTER21 CSR = 0x0c15 + HPMCOUNTER22 CSR = 0x0c16 + HPMCOUNTER23 CSR = 0x0c17 + HPMCOUNTER24 CSR = 0x0c18 + HPMCOUNTER25 CSR = 0x0c19 + HPMCOUNTER26 CSR = 0x0c1a + HPMCOUNTER27 CSR = 0x0c1b + HPMCOUNTER28 CSR = 0x0c1c + HPMCOUNTER29 CSR = 0x0c1d + HPMCOUNTER30 CSR = 0x0c1e + HPMCOUNTER31 CSR = 0x0c1f + VL CSR = 0x0c20 + VTYPE CSR = 0x0c21 + VLENB CSR = 0x0c22 + CYCLEH CSR = 0x0c80 + TIMEH CSR = 0x0c81 + INSTRETH CSR = 0x0c82 + HPMCOUNTER3H CSR = 0x0c83 + HPMCOUNTER4H CSR = 0x0c84 + HPMCOUNTER5H CSR = 0x0c85 + HPMCOUNTER6H CSR = 0x0c86 + HPMCOUNTER7H CSR = 0x0c87 + HPMCOUNTER8H CSR = 0x0c88 + HPMCOUNTER9H CSR = 0x0c89 + HPMCOUNTER10H CSR = 0x0c8a + HPMCOUNTER11H CSR = 0x0c8b + HPMCOUNTER12H CSR = 0x0c8c + HPMCOUNTER13H CSR = 0x0c8d + HPMCOUNTER14H CSR = 0x0c8e + HPMCOUNTER15H CSR = 0x0c8f + HPMCOUNTER16H CSR = 0x0c90 + HPMCOUNTER17H CSR = 0x0c91 + HPMCOUNTER18H CSR = 0x0c92 + HPMCOUNTER19H CSR = 0x0c93 + HPMCOUNTER20H CSR = 0x0c94 + HPMCOUNTER21H CSR = 0x0c95 + HPMCOUNTER22H CSR = 0x0c96 + HPMCOUNTER23H CSR = 0x0c97 + HPMCOUNTER24H CSR = 0x0c98 + HPMCOUNTER25H CSR = 0x0c99 + HPMCOUNTER26H CSR = 0x0c9a + HPMCOUNTER27H CSR = 0x0c9b + HPMCOUNTER28H CSR = 0x0c9c + HPMCOUNTER29H CSR = 0x0c9d + HPMCOUNTER30H CSR = 0x0c9e + HPMCOUNTER31H CSR = 0x0c9f + HGEIP CSR = 0x0e12 + MVENDORID CSR = 0x0f11 + MARCHID CSR = 0x0f12 + MIMPID CSR = 0x0f13 + MHARTID CSR = 0x0f14 + MENTROPY CSR = 0x0f15 +) + +// An Uimm is an unsigned immediate number +type Uimm struct { + Imm uint32 // 32-bit unsigned integer + Decimal bool // Print format of the immediate, either decimal or hexadecimal +} + +func (ui Uimm) String() string { + if ui.Decimal { + return fmt.Sprintf("%d", ui.Imm) + } + return fmt.Sprintf("%#x", ui.Imm) +} + +// A Simm is a signed immediate number +type Simm struct { + Imm int32 // 32-bit signed integer + Decimal bool // Print format of the immediate, either decimal or hexadecimal + Width uint8 // Actual width of the Simm +} + +func (si Simm) String() string { + if si.Decimal { + return fmt.Sprintf("%d", si.Imm) + } + return fmt.Sprintf("%#x", si.Imm) +} + +// An AmoReg is an atomic address register used in AMO instructions +type AmoReg struct { + reg Reg // Avoid promoted String method +} + +func (amoReg AmoReg) String() string { + return fmt.Sprintf("(%s)", amoReg.reg) +} + +// A RegOffset is a register with offset value +type RegOffset struct { + OfsReg Reg + Ofs Simm +} + +func (regofs RegOffset) String() string { + return fmt.Sprintf("%s(%s)", regofs.Ofs, regofs.OfsReg) +} + +// A MemOrder is a memory order hint in fence instruction +type MemOrder uint8 + +func (memOrder MemOrder) String() string { + var str string + if memOrder<<7>>7 == 1 { + str += "i" + } + if memOrder>>1<<7>>7 == 1 { + str += "o" + } + if memOrder>>2<<7>>7 == 1 { + str += "r" + } + if memOrder>>3<<7>>7 == 1 { + str += "w" + } + return str +} diff --git a/riscv64/riscv64asm/plan9x.go b/riscv64/riscv64asm/plan9x.go new file mode 100644 index 00000000..367122d9 --- /dev/null +++ b/riscv64/riscv64asm/plan9x.go @@ -0,0 +1,377 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv64asm + +import ( + "fmt" + "io" + "strconv" + "strings" +) + +// GoSyntax returns the Go assembler syntax for the instruction. +// The syntax was originally defined by Plan 9. +// The pc is the program counter of the instruction, used for +// expanding PC-relative addresses into absolute ones. +// The symname function queries the symbol table for the program +// being disassembled. Given a target address it returns the name +// and base address of the symbol containing the target, if any; +// otherwise it returns "", 0. +// The reader text should read from the text segment using text addresses +// as offsets; it is used to display pc-relative loads as constant loads. +func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64), text io.ReaderAt) string { + if symname == nil { + symname = func(uint64) (string, uint64) { return "", 0 } + } + + var args []string + for _, a := range inst.Args { + if a == nil { + break + } + args = append(args, plan9Arg(&inst, pc, symname, a)) + } + + op := inst.Op.String() + + switch inst.Op { + + case AMOADD_D, AMOADD_D_AQ, AMOADD_D_RL, AMOADD_D_AQRL, AMOADD_W, AMOADD_W_AQ, + AMOADD_W_RL, AMOADD_W_AQRL, AMOAND_D, AMOAND_D_AQ, AMOAND_D_RL, AMOAND_D_AQRL, + AMOAND_W, AMOAND_W_AQ, AMOAND_W_RL, AMOAND_W_AQRL, AMOMAXU_D, AMOMAXU_D_AQ, + AMOMAXU_D_RL, AMOMAXU_D_AQRL, AMOMAXU_W, AMOMAXU_W_AQ, AMOMAXU_W_RL, AMOMAXU_W_AQRL, + AMOMAX_D, AMOMAX_D_AQ, AMOMAX_D_RL, AMOMAX_D_AQRL, AMOMAX_W, AMOMAX_W_AQ, AMOMAX_W_RL, + AMOMAX_W_AQRL, AMOMINU_D, AMOMINU_D_AQ, AMOMINU_D_RL, AMOMINU_D_AQRL, AMOMINU_W, + AMOMINU_W_AQ, AMOMINU_W_RL, AMOMINU_W_AQRL, AMOMIN_D, AMOMIN_D_AQ, AMOMIN_D_RL, + AMOMIN_D_AQRL, AMOMIN_W, AMOMIN_W_AQ, AMOMIN_W_RL, AMOMIN_W_AQRL, AMOOR_D, AMOOR_D_AQ, + AMOOR_D_RL, AMOOR_D_AQRL, AMOOR_W, AMOOR_W_AQ, AMOOR_W_RL, AMOOR_W_AQRL, AMOSWAP_D, + AMOSWAP_D_AQ, AMOSWAP_D_RL, AMOSWAP_D_AQRL, AMOSWAP_W, AMOSWAP_W_AQ, AMOSWAP_W_RL, + AMOSWAP_W_AQRL, AMOXOR_D, AMOXOR_D_AQ, AMOXOR_D_RL, AMOXOR_D_AQRL, AMOXOR_W, + AMOXOR_W_AQ, AMOXOR_W_RL, AMOXOR_W_AQRL, SC_D, SC_D_AQ, SC_D_RL, SC_D_AQRL, + SC_W, SC_W_AQ, SC_W_RL, SC_W_AQRL: + // Atomic instructions have special operand order. + args[2], args[1] = args[1], args[2] + + case ADDI: + if inst.Args[2].(Simm).Imm == 0 { + op = "MOV" + args = args[:len(args)-1] + } + + case ADDIW: + if inst.Args[2].(Simm).Imm == 0 { + op = "MOVW" + args = args[:len(args)-1] + } + + case ANDI: + if inst.Args[2].(Simm).Imm == 255 { + op = "MOVBU" + args = args[:len(args)-1] + } + + case BEQ: + if inst.Args[1].(Reg) == X0 { + op = "BEQZ" + args[1] = args[2] + args = args[:len(args)-1] + } + for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { + args[i], args[j] = args[j], args[i] + } + + case BGE: + if inst.Args[1].(Reg) == X0 { + op = "BGEZ" + args[1] = args[2] + args = args[:len(args)-1] + } + for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { + args[i], args[j] = args[j], args[i] + } + + case BLT: + if inst.Args[1].(Reg) == X0 { + op = "BLTZ" + args[1] = args[2] + args = args[:len(args)-1] + } + for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { + args[i], args[j] = args[j], args[i] + } + + case BNE: + if inst.Args[1].(Reg) == X0 { + op = "BNEZ" + args[1] = args[2] + args = args[:len(args)-1] + } + for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { + args[i], args[j] = args[j], args[i] + } + + case BLTU, BGEU: + for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { + args[i], args[j] = args[j], args[i] + } + + case CSRRW: + switch inst.Args[1].(CSR) { + case FCSR: + op = "FSCSR" + args[1] = args[2] + args = args[:len(args)-1] + case FFLAGS: + op = "FSFLAGS" + args[1] = args[2] + args = args[:len(args)-1] + case FRM: + op = "FSRM" + args[1] = args[2] + args = args[:len(args)-1] + case CYCLE: + if inst.Args[0].(Reg) == X0 && inst.Args[2].(Reg) == X0 { + op = "UNIMP" + args = nil + } + } + + case CSRRS: + if inst.Args[2].(Reg) == X0 { + switch inst.Args[1].(CSR) { + case FCSR: + op = "FRCSR" + args = args[:len(args)-2] + case FFLAGS: + op = "FRFLAGS" + args = args[:len(args)-2] + case FRM: + op = "FRRM" + args = args[:len(args)-2] + case CYCLE: + op = "RDCYCLE" + args = args[:len(args)-2] + case CYCLEH: + op = "RDCYCLEH" + args = args[:len(args)-2] + case INSTRET: + op = "RDINSTRET" + args = args[:len(args)-2] + case INSTRETH: + op = "RDINSTRETH" + args = args[:len(args)-2] + case TIME: + op = "RDTIME" + args = args[:len(args)-2] + case TIMEH: + op = "RDTIMEH" + args = args[:len(args)-2] + } + } + + // Fence instruction in plan9 doesn't have any operands. + case FENCE: + args = nil + + case FMADD_D, FMADD_H, FMADD_Q, FMADD_S, FMSUB_D, FMSUB_H, + FMSUB_Q, FMSUB_S, FNMADD_D, FNMADD_H, FNMADD_Q, FNMADD_S, + FNMSUB_D, FNMSUB_H, FNMSUB_Q, FNMSUB_S: + args[1], args[3] = args[3], args[1] + + case FSGNJ_S: + if inst.Args[2] == inst.Args[1] { + op = "MOVF" + args = args[:len(args)-1] + } + + case FSGNJ_D: + if inst.Args[2] == inst.Args[1] { + op = "MOVD" + args = args[:len(args)-1] + } + + case FSGNJX_S: + if inst.Args[2] == inst.Args[1] { + op = "FABSS" + args = args[:len(args)-1] + } + + case FSGNJX_D: + if inst.Args[2] == inst.Args[1] { + op = "FABSD" + args = args[:len(args)-1] + } + + case FSGNJN_S: + if inst.Args[2] == inst.Args[1] { + op = "FNEGS" + args = args[:len(args)-1] + } + + case FSGNJN_D: + if inst.Args[2] == inst.Args[1] { + op = "FNESD" + args = args[:len(args)-1] + } + + case LD, SD: + op = "MOV" + if inst.Op == SD { + args[0], args[1] = args[1], args[0] + } + + case LB, SB: + op = "MOVB" + if inst.Op == SB { + args[0], args[1] = args[1], args[0] + } + + case LH, SH: + op = "MOVH" + if inst.Op == SH { + args[0], args[1] = args[1], args[0] + } + + case LW, SW: + op = "MOVW" + if inst.Op == SW { + args[0], args[1] = args[1], args[0] + } + + case LBU: + op = "MOVBU" + + case LHU: + op = "MOVHU" + + case LWU: + op = "MOVWU" + + case FLW, FSW: + op = "MOVF" + if inst.Op == FLW { + args[0], args[1] = args[1], args[0] + } + + case FLD, FSD: + op = "MOVD" + if inst.Op == FLD { + args[0], args[1] = args[1], args[0] + } + + case SUB: + if inst.Args[1].(Reg) == X0 { + op = "NEG" + args[1] = args[2] + args = args[:len(args)-1] + } + + case XORI: + if inst.Args[2].(Simm).String() == "-1" { + op = "NOT" + args = args[:len(args)-1] + } + + case SLTIU: + if inst.Args[2].(Simm).Imm == 1 { + op = "SEQZ" + args = args[:len(args)-1] + } + + case SLTU: + if inst.Args[1].(Reg) == X0 { + op = "SNEZ" + args[1] = args[2] + args = args[:len(args)-1] + } + + case JAL: + if inst.Args[0].(Reg) == X0 { + op = "JMP" + args[0] = args[1] + args = args[:len(args)-1] + } else if inst.Args[0].(Reg) == X1 { + op = "CALL" + args[0] = args[1] + args = args[:len(args)-1] + } else { + args[0], args[1] = args[1], args[0] + } + + case JALR: + if inst.Args[0].(Reg) == X0 { + if inst.Args[1].(RegOffset).OfsReg == X1 && inst.Args[1].(RegOffset).Ofs.Imm == 0 { + op = "RET" + args = nil + break + } + op = "JMP" + args[0] = args[1] + args = args[:len(args)-1] + } else if inst.Args[0].(Reg) == X1 { + op = "CALL" + args[0] = args[1] + args = args[:len(args)-1] + } else { + args[0], args[1] = args[1], args[0] + } + } + + // Reverse args, placing dest last. + for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { + args[i], args[j] = args[j], args[i] + } + + // Change to plan9 opcode format + // Atomic instructions do not have reorder suffix, so remove them + op = strings.Replace(op, ".AQRL", "", -1) + op = strings.Replace(op, ".AQ", "", -1) + op = strings.Replace(op, ".RL", "", -1) + op = strings.Replace(op, ".", "", -1) + + if args != nil { + op += " " + strings.Join(args, ", ") + } + + return op +} + +func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string { + switch a := arg.(type) { + case Uimm: + return fmt.Sprintf("$%d", uint32(a.Imm)) + + case Simm: + imm, _ := strconv.Atoi(a.String()) + if a.Width == 13 || a.Width == 21 { + addr := int64(pc) + int64(imm) + if s, base := symname(uint64(addr)); s != "" && uint64(addr) == base { + return fmt.Sprintf("%s(SB)", s) + } + return fmt.Sprintf("%d(PC)", imm/4) + } + return fmt.Sprintf("$%d", int32(imm)) + + case Reg: + if a <= 31 { + return fmt.Sprintf("X%d", a) + } else { + return fmt.Sprintf("F%d", a-32) + } + + case RegOffset: + if a.Ofs.Imm == 0 { + return fmt.Sprintf("(X%d)", a.OfsReg) + } else { + return fmt.Sprintf("%s(X%d)", a.Ofs.String(), a.OfsReg) + } + + case AmoReg: + return fmt.Sprintf("(X%d)", a.reg) + + default: + return strings.ToUpper(arg.String()) + } +} diff --git a/riscv64/riscv64asm/tables.go b/riscv64/riscv64asm/tables.go new file mode 100644 index 00000000..3e5db415 --- /dev/null +++ b/riscv64/riscv64asm/tables.go @@ -0,0 +1,1474 @@ +// Code generated by riscv64spec riscv-opcodes +// DO NOT EDIT + +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv64asm + +const ( + _ Op = iota + ADD + ADDI + ADDIW + ADDW + ADD_UW + AMOADD_D + AMOADD_D_AQ + AMOADD_D_AQRL + AMOADD_D_RL + AMOADD_W + AMOADD_W_AQ + AMOADD_W_AQRL + AMOADD_W_RL + AMOAND_D + AMOAND_D_AQ + AMOAND_D_AQRL + AMOAND_D_RL + AMOAND_W + AMOAND_W_AQ + AMOAND_W_AQRL + AMOAND_W_RL + AMOMAXU_D + AMOMAXU_D_AQ + AMOMAXU_D_AQRL + AMOMAXU_D_RL + AMOMAXU_W + AMOMAXU_W_AQ + AMOMAXU_W_AQRL + AMOMAXU_W_RL + AMOMAX_D + AMOMAX_D_AQ + AMOMAX_D_AQRL + AMOMAX_D_RL + AMOMAX_W + AMOMAX_W_AQ + AMOMAX_W_AQRL + AMOMAX_W_RL + AMOMINU_D + AMOMINU_D_AQ + AMOMINU_D_AQRL + AMOMINU_D_RL + AMOMINU_W + AMOMINU_W_AQ + AMOMINU_W_AQRL + AMOMINU_W_RL + AMOMIN_D + AMOMIN_D_AQ + AMOMIN_D_AQRL + AMOMIN_D_RL + AMOMIN_W + AMOMIN_W_AQ + AMOMIN_W_AQRL + AMOMIN_W_RL + AMOOR_D + AMOOR_D_AQ + AMOOR_D_AQRL + AMOOR_D_RL + AMOOR_W + AMOOR_W_AQ + AMOOR_W_AQRL + AMOOR_W_RL + AMOSWAP_D + AMOSWAP_D_AQ + AMOSWAP_D_AQRL + AMOSWAP_D_RL + AMOSWAP_W + AMOSWAP_W_AQ + AMOSWAP_W_AQRL + AMOSWAP_W_RL + AMOXOR_D + AMOXOR_D_AQ + AMOXOR_D_AQRL + AMOXOR_D_RL + AMOXOR_W + AMOXOR_W_AQ + AMOXOR_W_AQRL + AMOXOR_W_RL + AND + ANDI + ANDN + AUIPC + BCLR + BCLRI + BEQ + BEXT + BEXTI + BGE + BGEU + BINV + BINVI + BLT + BLTU + BNE + BSET + BSETI + CLZ + CLZW + CPOP + CPOPW + CSRRC + CSRRCI + CSRRS + CSRRSI + CSRRW + CSRRWI + CTZ + CTZW + C_ADD + C_ADDI + C_ADDI16SP + C_ADDI4SPN + C_ADDIW + C_ADDW + C_AND + C_ANDI + C_BEQZ + C_BNEZ + C_EBREAK + C_FLD + C_FLDSP + C_FSD + C_FSDSP + C_J + C_JALR + C_JR + C_LD + C_LDSP + C_LI + C_LUI + C_LW + C_LWSP + C_MV + C_NOP + C_OR + C_SD + C_SDSP + C_SLLI + C_SRAI + C_SRLI + C_SUB + C_SUBW + C_SW + C_SWSP + C_UNIMP + C_XOR + DIV + DIVU + DIVUW + DIVW + EBREAK + ECALL + FADD_D + FADD_H + FADD_Q + FADD_S + FCLASS_D + FCLASS_H + FCLASS_Q + FCLASS_S + FCVT_D_L + FCVT_D_LU + FCVT_D_Q + FCVT_D_S + FCVT_D_W + FCVT_D_WU + FCVT_H_L + FCVT_H_LU + FCVT_H_S + FCVT_H_W + FCVT_H_WU + FCVT_LU_D + FCVT_LU_H + FCVT_LU_Q + FCVT_LU_S + FCVT_L_D + FCVT_L_H + FCVT_L_Q + FCVT_L_S + FCVT_Q_D + FCVT_Q_L + FCVT_Q_LU + FCVT_Q_S + FCVT_Q_W + FCVT_Q_WU + FCVT_S_D + FCVT_S_H + FCVT_S_L + FCVT_S_LU + FCVT_S_Q + FCVT_S_W + FCVT_S_WU + FCVT_WU_D + FCVT_WU_H + FCVT_WU_Q + FCVT_WU_S + FCVT_W_D + FCVT_W_H + FCVT_W_Q + FCVT_W_S + FDIV_D + FDIV_H + FDIV_Q + FDIV_S + FENCE + FENCE_I + FEQ_D + FEQ_H + FEQ_Q + FEQ_S + FLD + FLE_D + FLE_H + FLE_Q + FLE_S + FLH + FLQ + FLT_D + FLT_H + FLT_Q + FLT_S + FLW + FMADD_D + FMADD_H + FMADD_Q + FMADD_S + FMAX_D + FMAX_H + FMAX_Q + FMAX_S + FMIN_D + FMIN_H + FMIN_Q + FMIN_S + FMSUB_D + FMSUB_H + FMSUB_Q + FMSUB_S + FMUL_D + FMUL_H + FMUL_Q + FMUL_S + FMV_D_X + FMV_H_X + FMV_W_X + FMV_X_D + FMV_X_H + FMV_X_W + FNMADD_D + FNMADD_H + FNMADD_Q + FNMADD_S + FNMSUB_D + FNMSUB_H + FNMSUB_Q + FNMSUB_S + FSD + FSGNJN_D + FSGNJN_H + FSGNJN_Q + FSGNJN_S + FSGNJX_D + FSGNJX_H + FSGNJX_Q + FSGNJX_S + FSGNJ_D + FSGNJ_H + FSGNJ_Q + FSGNJ_S + FSH + FSQ + FSQRT_D + FSQRT_H + FSQRT_Q + FSQRT_S + FSUB_D + FSUB_H + FSUB_Q + FSUB_S + FSW + JAL + JALR + LB + LBU + LD + LH + LHU + LR_D + LR_D_AQ + LR_D_AQRL + LR_D_RL + LR_W + LR_W_AQ + LR_W_AQRL + LR_W_RL + LUI + LW + LWU + MAX + MAXU + MIN + MINU + MUL + MULH + MULHSU + MULHU + MULW + OR + ORC_B + ORI + ORN + REM + REMU + REMUW + REMW + REV8 + ROL + ROLW + ROR + RORI + RORIW + RORW + SB + SC_D + SC_D_AQ + SC_D_AQRL + SC_D_RL + SC_W + SC_W_AQ + SC_W_AQRL + SC_W_RL + SD + SEXT_B + SEXT_H + SH + SH1ADD + SH1ADD_UW + SH2ADD + SH2ADD_UW + SH3ADD + SH3ADD_UW + SLL + SLLI + SLLIW + SLLI_UW + SLLW + SLT + SLTI + SLTIU + SLTU + SRA + SRAI + SRAIW + SRAW + SRL + SRLI + SRLIW + SRLW + SUB + SUBW + SW + XNOR + XOR + XORI + ZEXT_H +) + +var opstr = [...]string{ + ADD: "ADD", + ADDI: "ADDI", + ADDIW: "ADDIW", + ADDW: "ADDW", + ADD_UW: "ADD.UW", + AMOADD_D: "AMOADD.D", + AMOADD_D_AQ: "AMOADD.D.AQ", + AMOADD_D_AQRL: "AMOADD.D.AQRL", + AMOADD_D_RL: "AMOADD.D.RL", + AMOADD_W: "AMOADD.W", + AMOADD_W_AQ: "AMOADD.W.AQ", + AMOADD_W_AQRL: "AMOADD.W.AQRL", + AMOADD_W_RL: "AMOADD.W.RL", + AMOAND_D: "AMOAND.D", + AMOAND_D_AQ: "AMOAND.D.AQ", + AMOAND_D_AQRL: "AMOAND.D.AQRL", + AMOAND_D_RL: "AMOAND.D.RL", + AMOAND_W: "AMOAND.W", + AMOAND_W_AQ: "AMOAND.W.AQ", + AMOAND_W_AQRL: "AMOAND.W.AQRL", + AMOAND_W_RL: "AMOAND.W.RL", + AMOMAXU_D: "AMOMAXU.D", + AMOMAXU_D_AQ: "AMOMAXU.D.AQ", + AMOMAXU_D_AQRL: "AMOMAXU.D.AQRL", + AMOMAXU_D_RL: "AMOMAXU.D.RL", + AMOMAXU_W: "AMOMAXU.W", + AMOMAXU_W_AQ: "AMOMAXU.W.AQ", + AMOMAXU_W_AQRL: "AMOMAXU.W.AQRL", + AMOMAXU_W_RL: "AMOMAXU.W.RL", + AMOMAX_D: "AMOMAX.D", + AMOMAX_D_AQ: "AMOMAX.D.AQ", + AMOMAX_D_AQRL: "AMOMAX.D.AQRL", + AMOMAX_D_RL: "AMOMAX.D.RL", + AMOMAX_W: "AMOMAX.W", + AMOMAX_W_AQ: "AMOMAX.W.AQ", + AMOMAX_W_AQRL: "AMOMAX.W.AQRL", + AMOMAX_W_RL: "AMOMAX.W.RL", + AMOMINU_D: "AMOMINU.D", + AMOMINU_D_AQ: "AMOMINU.D.AQ", + AMOMINU_D_AQRL: "AMOMINU.D.AQRL", + AMOMINU_D_RL: "AMOMINU.D.RL", + AMOMINU_W: "AMOMINU.W", + AMOMINU_W_AQ: "AMOMINU.W.AQ", + AMOMINU_W_AQRL: "AMOMINU.W.AQRL", + AMOMINU_W_RL: "AMOMINU.W.RL", + AMOMIN_D: "AMOMIN.D", + AMOMIN_D_AQ: "AMOMIN.D.AQ", + AMOMIN_D_AQRL: "AMOMIN.D.AQRL", + AMOMIN_D_RL: "AMOMIN.D.RL", + AMOMIN_W: "AMOMIN.W", + AMOMIN_W_AQ: "AMOMIN.W.AQ", + AMOMIN_W_AQRL: "AMOMIN.W.AQRL", + AMOMIN_W_RL: "AMOMIN.W.RL", + AMOOR_D: "AMOOR.D", + AMOOR_D_AQ: "AMOOR.D.AQ", + AMOOR_D_AQRL: "AMOOR.D.AQRL", + AMOOR_D_RL: "AMOOR.D.RL", + AMOOR_W: "AMOOR.W", + AMOOR_W_AQ: "AMOOR.W.AQ", + AMOOR_W_AQRL: "AMOOR.W.AQRL", + AMOOR_W_RL: "AMOOR.W.RL", + AMOSWAP_D: "AMOSWAP.D", + AMOSWAP_D_AQ: "AMOSWAP.D.AQ", + AMOSWAP_D_AQRL: "AMOSWAP.D.AQRL", + AMOSWAP_D_RL: "AMOSWAP.D.RL", + AMOSWAP_W: "AMOSWAP.W", + AMOSWAP_W_AQ: "AMOSWAP.W.AQ", + AMOSWAP_W_AQRL: "AMOSWAP.W.AQRL", + AMOSWAP_W_RL: "AMOSWAP.W.RL", + AMOXOR_D: "AMOXOR.D", + AMOXOR_D_AQ: "AMOXOR.D.AQ", + AMOXOR_D_AQRL: "AMOXOR.D.AQRL", + AMOXOR_D_RL: "AMOXOR.D.RL", + AMOXOR_W: "AMOXOR.W", + AMOXOR_W_AQ: "AMOXOR.W.AQ", + AMOXOR_W_AQRL: "AMOXOR.W.AQRL", + AMOXOR_W_RL: "AMOXOR.W.RL", + AND: "AND", + ANDI: "ANDI", + ANDN: "ANDN", + AUIPC: "AUIPC", + BCLR: "BCLR", + BCLRI: "BCLRI", + BEQ: "BEQ", + BEXT: "BEXT", + BEXTI: "BEXTI", + BGE: "BGE", + BGEU: "BGEU", + BINV: "BINV", + BINVI: "BINVI", + BLT: "BLT", + BLTU: "BLTU", + BNE: "BNE", + BSET: "BSET", + BSETI: "BSETI", + CLZ: "CLZ", + CLZW: "CLZW", + CPOP: "CPOP", + CPOPW: "CPOPW", + CSRRC: "CSRRC", + CSRRCI: "CSRRCI", + CSRRS: "CSRRS", + CSRRSI: "CSRRSI", + CSRRW: "CSRRW", + CSRRWI: "CSRRWI", + CTZ: "CTZ", + CTZW: "CTZW", + C_ADD: "C.ADD", + C_ADDI: "C.ADDI", + C_ADDI16SP: "C.ADDI16SP", + C_ADDI4SPN: "C.ADDI4SPN", + C_ADDIW: "C.ADDIW", + C_ADDW: "C.ADDW", + C_AND: "C.AND", + C_ANDI: "C.ANDI", + C_BEQZ: "C.BEQZ", + C_BNEZ: "C.BNEZ", + C_EBREAK: "C.EBREAK", + C_FLD: "C.FLD", + C_FLDSP: "C.FLDSP", + C_FSD: "C.FSD", + C_FSDSP: "C.FSDSP", + C_J: "C.J", + C_JALR: "C.JALR", + C_JR: "C.JR", + C_LD: "C.LD", + C_LDSP: "C.LDSP", + C_LI: "C.LI", + C_LUI: "C.LUI", + C_LW: "C.LW", + C_LWSP: "C.LWSP", + C_MV: "C.MV", + C_NOP: "C.NOP", + C_OR: "C.OR", + C_SD: "C.SD", + C_SDSP: "C.SDSP", + C_SLLI: "C.SLLI", + C_SRAI: "C.SRAI", + C_SRLI: "C.SRLI", + C_SUB: "C.SUB", + C_SUBW: "C.SUBW", + C_SW: "C.SW", + C_SWSP: "C.SWSP", + C_UNIMP: "C.UNIMP", + C_XOR: "C.XOR", + DIV: "DIV", + DIVU: "DIVU", + DIVUW: "DIVUW", + DIVW: "DIVW", + EBREAK: "EBREAK", + ECALL: "ECALL", + FADD_D: "FADD.D", + FADD_H: "FADD.H", + FADD_Q: "FADD.Q", + FADD_S: "FADD.S", + FCLASS_D: "FCLASS.D", + FCLASS_H: "FCLASS.H", + FCLASS_Q: "FCLASS.Q", + FCLASS_S: "FCLASS.S", + FCVT_D_L: "FCVT.D.L", + FCVT_D_LU: "FCVT.D.LU", + FCVT_D_Q: "FCVT.D.Q", + FCVT_D_S: "FCVT.D.S", + FCVT_D_W: "FCVT.D.W", + FCVT_D_WU: "FCVT.D.WU", + FCVT_H_L: "FCVT.H.L", + FCVT_H_LU: "FCVT.H.LU", + FCVT_H_S: "FCVT.H.S", + FCVT_H_W: "FCVT.H.W", + FCVT_H_WU: "FCVT.H.WU", + FCVT_LU_D: "FCVT.LU.D", + FCVT_LU_H: "FCVT.LU.H", + FCVT_LU_Q: "FCVT.LU.Q", + FCVT_LU_S: "FCVT.LU.S", + FCVT_L_D: "FCVT.L.D", + FCVT_L_H: "FCVT.L.H", + FCVT_L_Q: "FCVT.L.Q", + FCVT_L_S: "FCVT.L.S", + FCVT_Q_D: "FCVT.Q.D", + FCVT_Q_L: "FCVT.Q.L", + FCVT_Q_LU: "FCVT.Q.LU", + FCVT_Q_S: "FCVT.Q.S", + FCVT_Q_W: "FCVT.Q.W", + FCVT_Q_WU: "FCVT.Q.WU", + FCVT_S_D: "FCVT.S.D", + FCVT_S_H: "FCVT.S.H", + FCVT_S_L: "FCVT.S.L", + FCVT_S_LU: "FCVT.S.LU", + FCVT_S_Q: "FCVT.S.Q", + FCVT_S_W: "FCVT.S.W", + FCVT_S_WU: "FCVT.S.WU", + FCVT_WU_D: "FCVT.WU.D", + FCVT_WU_H: "FCVT.WU.H", + FCVT_WU_Q: "FCVT.WU.Q", + FCVT_WU_S: "FCVT.WU.S", + FCVT_W_D: "FCVT.W.D", + FCVT_W_H: "FCVT.W.H", + FCVT_W_Q: "FCVT.W.Q", + FCVT_W_S: "FCVT.W.S", + FDIV_D: "FDIV.D", + FDIV_H: "FDIV.H", + FDIV_Q: "FDIV.Q", + FDIV_S: "FDIV.S", + FENCE: "FENCE", + FENCE_I: "FENCE.I", + FEQ_D: "FEQ.D", + FEQ_H: "FEQ.H", + FEQ_Q: "FEQ.Q", + FEQ_S: "FEQ.S", + FLD: "FLD", + FLE_D: "FLE.D", + FLE_H: "FLE.H", + FLE_Q: "FLE.Q", + FLE_S: "FLE.S", + FLH: "FLH", + FLQ: "FLQ", + FLT_D: "FLT.D", + FLT_H: "FLT.H", + FLT_Q: "FLT.Q", + FLT_S: "FLT.S", + FLW: "FLW", + FMADD_D: "FMADD.D", + FMADD_H: "FMADD.H", + FMADD_Q: "FMADD.Q", + FMADD_S: "FMADD.S", + FMAX_D: "FMAX.D", + FMAX_H: "FMAX.H", + FMAX_Q: "FMAX.Q", + FMAX_S: "FMAX.S", + FMIN_D: "FMIN.D", + FMIN_H: "FMIN.H", + FMIN_Q: "FMIN.Q", + FMIN_S: "FMIN.S", + FMSUB_D: "FMSUB.D", + FMSUB_H: "FMSUB.H", + FMSUB_Q: "FMSUB.Q", + FMSUB_S: "FMSUB.S", + FMUL_D: "FMUL.D", + FMUL_H: "FMUL.H", + FMUL_Q: "FMUL.Q", + FMUL_S: "FMUL.S", + FMV_D_X: "FMV.D.X", + FMV_H_X: "FMV.H.X", + FMV_W_X: "FMV.W.X", + FMV_X_D: "FMV.X.D", + FMV_X_H: "FMV.X.H", + FMV_X_W: "FMV.X.W", + FNMADD_D: "FNMADD.D", + FNMADD_H: "FNMADD.H", + FNMADD_Q: "FNMADD.Q", + FNMADD_S: "FNMADD.S", + FNMSUB_D: "FNMSUB.D", + FNMSUB_H: "FNMSUB.H", + FNMSUB_Q: "FNMSUB.Q", + FNMSUB_S: "FNMSUB.S", + FSD: "FSD", + FSGNJN_D: "FSGNJN.D", + FSGNJN_H: "FSGNJN.H", + FSGNJN_Q: "FSGNJN.Q", + FSGNJN_S: "FSGNJN.S", + FSGNJX_D: "FSGNJX.D", + FSGNJX_H: "FSGNJX.H", + FSGNJX_Q: "FSGNJX.Q", + FSGNJX_S: "FSGNJX.S", + FSGNJ_D: "FSGNJ.D", + FSGNJ_H: "FSGNJ.H", + FSGNJ_Q: "FSGNJ.Q", + FSGNJ_S: "FSGNJ.S", + FSH: "FSH", + FSQ: "FSQ", + FSQRT_D: "FSQRT.D", + FSQRT_H: "FSQRT.H", + FSQRT_Q: "FSQRT.Q", + FSQRT_S: "FSQRT.S", + FSUB_D: "FSUB.D", + FSUB_H: "FSUB.H", + FSUB_Q: "FSUB.Q", + FSUB_S: "FSUB.S", + FSW: "FSW", + JAL: "JAL", + JALR: "JALR", + LB: "LB", + LBU: "LBU", + LD: "LD", + LH: "LH", + LHU: "LHU", + LR_D: "LR.D", + LR_D_AQ: "LR.D.AQ", + LR_D_AQRL: "LR.D.AQRL", + LR_D_RL: "LR.D.RL", + LR_W: "LR.W", + LR_W_AQ: "LR.W.AQ", + LR_W_AQRL: "LR.W.AQRL", + LR_W_RL: "LR.W.RL", + LUI: "LUI", + LW: "LW", + LWU: "LWU", + MAX: "MAX", + MAXU: "MAXU", + MIN: "MIN", + MINU: "MINU", + MUL: "MUL", + MULH: "MULH", + MULHSU: "MULHSU", + MULHU: "MULHU", + MULW: "MULW", + OR: "OR", + ORC_B: "ORC.B", + ORI: "ORI", + ORN: "ORN", + REM: "REM", + REMU: "REMU", + REMUW: "REMUW", + REMW: "REMW", + REV8: "REV8", + ROL: "ROL", + ROLW: "ROLW", + ROR: "ROR", + RORI: "RORI", + RORIW: "RORIW", + RORW: "RORW", + SB: "SB", + SC_D: "SC.D", + SC_D_AQ: "SC.D.AQ", + SC_D_AQRL: "SC.D.AQRL", + SC_D_RL: "SC.D.RL", + SC_W: "SC.W", + SC_W_AQ: "SC.W.AQ", + SC_W_AQRL: "SC.W.AQRL", + SC_W_RL: "SC.W.RL", + SD: "SD", + SEXT_B: "SEXT.B", + SEXT_H: "SEXT.H", + SH: "SH", + SH1ADD: "SH1ADD", + SH1ADD_UW: "SH1ADD.UW", + SH2ADD: "SH2ADD", + SH2ADD_UW: "SH2ADD.UW", + SH3ADD: "SH3ADD", + SH3ADD_UW: "SH3ADD.UW", + SLL: "SLL", + SLLI: "SLLI", + SLLIW: "SLLIW", + SLLI_UW: "SLLI.UW", + SLLW: "SLLW", + SLT: "SLT", + SLTI: "SLTI", + SLTIU: "SLTIU", + SLTU: "SLTU", + SRA: "SRA", + SRAI: "SRAI", + SRAIW: "SRAIW", + SRAW: "SRAW", + SRL: "SRL", + SRLI: "SRLI", + SRLIW: "SRLIW", + SRLW: "SRLW", + SUB: "SUB", + SUBW: "SUBW", + SW: "SW", + XNOR: "XNOR", + XOR: "XOR", + XORI: "XORI", + ZEXT_H: "ZEXT.H", +} + +var instFormats = [...]instFormat{ + // ADD rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x00000033, op: ADD, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // ADDI rd, rs1, imm12 + {mask: 0x0000707f, value: 0x00000013, op: ADDI, args: argTypeList{arg_rd, arg_rs1, arg_imm12}}, + // ADDIW rd, rs1, imm12 + {mask: 0x0000707f, value: 0x0000001b, op: ADDIW, args: argTypeList{arg_rd, arg_rs1, arg_imm12}}, + // ADDW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0000003b, op: ADDW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // ADD.UW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0800003b, op: ADD_UW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // AMOADD.D rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0000302f, op: AMOADD_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOADD.D.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0400302f, op: AMOADD_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOADD.D.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0600302f, op: AMOADD_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOADD.D.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0200302f, op: AMOADD_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOADD.W rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0000202f, op: AMOADD_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOADD.W.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0400202f, op: AMOADD_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOADD.W.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0600202f, op: AMOADD_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOADD.W.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0200202f, op: AMOADD_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOAND.D rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x6000302f, op: AMOAND_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOAND.D.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x6400302f, op: AMOAND_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOAND.D.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x6600302f, op: AMOAND_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOAND.D.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x6200302f, op: AMOAND_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOAND.W rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x6000202f, op: AMOAND_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOAND.W.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x6400202f, op: AMOAND_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOAND.W.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x6600202f, op: AMOAND_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOAND.W.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x6200202f, op: AMOAND_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAXU.D rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xe000302f, op: AMOMAXU_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAXU.D.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xe400302f, op: AMOMAXU_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAXU.D.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xe600302f, op: AMOMAXU_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAXU.D.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xe200302f, op: AMOMAXU_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAXU.W rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xe000202f, op: AMOMAXU_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAXU.W.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xe400202f, op: AMOMAXU_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAXU.W.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xe600202f, op: AMOMAXU_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAXU.W.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xe200202f, op: AMOMAXU_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAX.D rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xa000302f, op: AMOMAX_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAX.D.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xa400302f, op: AMOMAX_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAX.D.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xa600302f, op: AMOMAX_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAX.D.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xa200302f, op: AMOMAX_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAX.W rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xa000202f, op: AMOMAX_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAX.W.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xa400202f, op: AMOMAX_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAX.W.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xa600202f, op: AMOMAX_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMAX.W.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xa200202f, op: AMOMAX_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMINU.D rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xc000302f, op: AMOMINU_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMINU.D.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xc400302f, op: AMOMINU_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMINU.D.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xc600302f, op: AMOMINU_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMINU.D.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xc200302f, op: AMOMINU_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMINU.W rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xc000202f, op: AMOMINU_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMINU.W.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xc400202f, op: AMOMINU_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMINU.W.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xc600202f, op: AMOMINU_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMINU.W.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0xc200202f, op: AMOMINU_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMIN.D rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x8000302f, op: AMOMIN_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMIN.D.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x8400302f, op: AMOMIN_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMIN.D.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x8600302f, op: AMOMIN_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMIN.D.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x8200302f, op: AMOMIN_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMIN.W rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x8000202f, op: AMOMIN_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMIN.W.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x8400202f, op: AMOMIN_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMIN.W.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x8600202f, op: AMOMIN_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOMIN.W.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x8200202f, op: AMOMIN_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOOR.D rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x4000302f, op: AMOOR_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOOR.D.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x4400302f, op: AMOOR_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOOR.D.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x4600302f, op: AMOOR_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOOR.D.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x4200302f, op: AMOOR_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOOR.W rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x4000202f, op: AMOOR_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOOR.W.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x4400202f, op: AMOOR_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOOR.W.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x4600202f, op: AMOOR_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOOR.W.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x4200202f, op: AMOOR_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOSWAP.D rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0800302f, op: AMOSWAP_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOSWAP.D.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0c00302f, op: AMOSWAP_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOSWAP.D.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0e00302f, op: AMOSWAP_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOSWAP.D.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0a00302f, op: AMOSWAP_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOSWAP.W rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0800202f, op: AMOSWAP_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOSWAP.W.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0c00202f, op: AMOSWAP_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOSWAP.W.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0e00202f, op: AMOSWAP_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOSWAP.W.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x0a00202f, op: AMOSWAP_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOXOR.D rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x2000302f, op: AMOXOR_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOXOR.D.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x2400302f, op: AMOXOR_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOXOR.D.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x2600302f, op: AMOXOR_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOXOR.D.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x2200302f, op: AMOXOR_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOXOR.W rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x2000202f, op: AMOXOR_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOXOR.W.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x2400202f, op: AMOXOR_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOXOR.W.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x2600202f, op: AMOXOR_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AMOXOR.W.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x2200202f, op: AMOXOR_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // AND rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x00007033, op: AND, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // ANDI rd, rs1, imm12 + {mask: 0x0000707f, value: 0x00007013, op: ANDI, args: argTypeList{arg_rd, arg_rs1, arg_imm12}}, + // ANDN rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x40007033, op: ANDN, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // AUIPC rd, imm20 + {mask: 0x0000007f, value: 0x00000017, op: AUIPC, args: argTypeList{arg_rd, arg_imm20}}, + // BCLR rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x48001033, op: BCLR, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // BCLRI rd, rs1, shamt6 + {mask: 0xfc00707f, value: 0x48001013, op: BCLRI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}}, + // BEQ rs1, rs2, bimm12 + {mask: 0x0000707f, value: 0x00000063, op: BEQ, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}}, + // BEXT rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x48005033, op: BEXT, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // BEXTI rd, rs1, shamt6 + {mask: 0xfc00707f, value: 0x48005013, op: BEXTI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}}, + // BGE rs1, rs2, bimm12 + {mask: 0x0000707f, value: 0x00005063, op: BGE, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}}, + // BGEU rs1, rs2, bimm12 + {mask: 0x0000707f, value: 0x00007063, op: BGEU, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}}, + // BINV rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x68001033, op: BINV, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // BINVI rd, rs1, shamt6 + {mask: 0xfc00707f, value: 0x68001013, op: BINVI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}}, + // BLT rs1, rs2, bimm12 + {mask: 0x0000707f, value: 0x00004063, op: BLT, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}}, + // BLTU rs1, rs2, bimm12 + {mask: 0x0000707f, value: 0x00006063, op: BLTU, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}}, + // BNE rs1, rs2, bimm12 + {mask: 0x0000707f, value: 0x00001063, op: BNE, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}}, + // BSET rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x28001033, op: BSET, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // BSETI rd, rs1, shamt6 + {mask: 0xfc00707f, value: 0x28001013, op: BSETI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}}, + // CLZ rd, rs1 + {mask: 0xfff0707f, value: 0x60001013, op: CLZ, args: argTypeList{arg_rd, arg_rs1}}, + // CLZW rd, rs1 + {mask: 0xfff0707f, value: 0x6000101b, op: CLZW, args: argTypeList{arg_rd, arg_rs1}}, + // CPOP rd, rs1 + {mask: 0xfff0707f, value: 0x60201013, op: CPOP, args: argTypeList{arg_rd, arg_rs1}}, + // CPOPW rd, rs1 + {mask: 0xfff0707f, value: 0x6020101b, op: CPOPW, args: argTypeList{arg_rd, arg_rs1}}, + // CSRRC rd, csr, rs1 + {mask: 0x0000707f, value: 0x00003073, op: CSRRC, args: argTypeList{arg_rd, arg_csr, arg_rs1}}, + // CSRRCI rd, csr, zimm + {mask: 0x0000707f, value: 0x00007073, op: CSRRCI, args: argTypeList{arg_rd, arg_csr, arg_zimm}}, + // CSRRS rd, csr, rs1 + {mask: 0x0000707f, value: 0x00002073, op: CSRRS, args: argTypeList{arg_rd, arg_csr, arg_rs1}}, + // CSRRSI rd, csr, zimm + {mask: 0x0000707f, value: 0x00006073, op: CSRRSI, args: argTypeList{arg_rd, arg_csr, arg_zimm}}, + // CSRRW rd, csr, rs1 + {mask: 0x0000707f, value: 0x00001073, op: CSRRW, args: argTypeList{arg_rd, arg_csr, arg_rs1}}, + // CSRRWI rd, csr, zimm + {mask: 0x0000707f, value: 0x00005073, op: CSRRWI, args: argTypeList{arg_rd, arg_csr, arg_zimm}}, + // CTZ rd, rs1 + {mask: 0xfff0707f, value: 0x60101013, op: CTZ, args: argTypeList{arg_rd, arg_rs1}}, + // CTZW rd, rs1 + {mask: 0xfff0707f, value: 0x6010101b, op: CTZW, args: argTypeList{arg_rd, arg_rs1}}, + // C.ADD rd_rs1_n0, c_rs2_n0 + {mask: 0x0000f003, value: 0x00009002, op: C_ADD, args: argTypeList{arg_rd_rs1_n0, arg_c_rs2_n0}}, + // C.ADDI rd_rs1_n0, c_nzimm6 + {mask: 0x0000e003, value: 0x00000001, op: C_ADDI, args: argTypeList{arg_rd_rs1_n0, arg_c_nzimm6}}, + // C.ADDI16SP c_nzimm10 + {mask: 0x0000ef83, value: 0x00006101, op: C_ADDI16SP, args: argTypeList{arg_c_nzimm10}}, + // C.ADDI4SPN rd_p, c_nzuimm10 + {mask: 0x0000e003, value: 0x00000000, op: C_ADDI4SPN, args: argTypeList{arg_rd_p, arg_c_nzuimm10}}, + // C.ADDIW rd_rs1_n0, c_imm6 + {mask: 0x0000e003, value: 0x00002001, op: C_ADDIW, args: argTypeList{arg_rd_rs1_n0, arg_c_imm6}}, + // C.ADDW rd_rs1_p, rs2_p + {mask: 0x0000fc63, value: 0x00009c21, op: C_ADDW, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}}, + // C.AND rd_rs1_p, rs2_p + {mask: 0x0000fc63, value: 0x00008c61, op: C_AND, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}}, + // C.ANDI rd_rs1_p, c_imm6 + {mask: 0x0000ec03, value: 0x00008801, op: C_ANDI, args: argTypeList{arg_rd_rs1_p, arg_c_imm6}}, + // C.BEQZ rs1_p, c_bimm9 + {mask: 0x0000e003, value: 0x0000c001, op: C_BEQZ, args: argTypeList{arg_rs1_p, arg_c_bimm9}}, + // C.BNEZ rs1_p, c_bimm9 + {mask: 0x0000e003, value: 0x0000e001, op: C_BNEZ, args: argTypeList{arg_rs1_p, arg_c_bimm9}}, + // C.EBREAK + {mask: 0x0000ffff, value: 0x00009002, op: C_EBREAK, args: argTypeList{}}, + // C.FLD fd_p, rs1_p, c_uimm8 + {mask: 0x0000e003, value: 0x00002000, op: C_FLD, args: argTypeList{arg_fd_p, arg_rs1_p, arg_c_uimm8}}, + // C.FLDSP fd, c_uimm9sp + {mask: 0x0000e003, value: 0x00002002, op: C_FLDSP, args: argTypeList{arg_fd, arg_c_uimm9sp}}, + // C.FSD rs1_p, fs2_p, c_uimm8 + {mask: 0x0000e003, value: 0x0000a000, op: C_FSD, args: argTypeList{arg_rs1_p, arg_fs2_p, arg_c_uimm8}}, + // C.FSDSP c_fs2, c_uimm9sp_s + {mask: 0x0000e003, value: 0x0000a002, op: C_FSDSP, args: argTypeList{arg_c_fs2, arg_c_uimm9sp_s}}, + // C.J c_imm12 + {mask: 0x0000e003, value: 0x0000a001, op: C_J, args: argTypeList{arg_c_imm12}}, + // C.JALR c_rs1_n0 + {mask: 0x0000f07f, value: 0x00009002, op: C_JALR, args: argTypeList{arg_c_rs1_n0}}, + // C.JR rs1_n0 + {mask: 0x0000f07f, value: 0x00008002, op: C_JR, args: argTypeList{arg_rs1_n0}}, + // C.LD rd_p, rs1_p, c_uimm8 + {mask: 0x0000e003, value: 0x00006000, op: C_LD, args: argTypeList{arg_rd_p, arg_rs1_p, arg_c_uimm8}}, + // C.LDSP rd_n0, c_uimm9sp + {mask: 0x0000e003, value: 0x00006002, op: C_LDSP, args: argTypeList{arg_rd_n0, arg_c_uimm9sp}}, + // C.LI rd_n0, c_imm6 + {mask: 0x0000e003, value: 0x00004001, op: C_LI, args: argTypeList{arg_rd_n0, arg_c_imm6}}, + // C.LUI rd_n2, c_nzimm18 + {mask: 0x0000e003, value: 0x00006001, op: C_LUI, args: argTypeList{arg_rd_n2, arg_c_nzimm18}}, + // C.LW rd_p, rs1_p, c_uimm7 + {mask: 0x0000e003, value: 0x00004000, op: C_LW, args: argTypeList{arg_rd_p, arg_rs1_p, arg_c_uimm7}}, + // C.LWSP rd_n0, c_uimm8sp + {mask: 0x0000e003, value: 0x00004002, op: C_LWSP, args: argTypeList{arg_rd_n0, arg_c_uimm8sp}}, + // C.MV rd_n0, c_rs2_n0 + {mask: 0x0000f003, value: 0x00008002, op: C_MV, args: argTypeList{arg_rd_n0, arg_c_rs2_n0}}, + // C.NOP c_nzimm6 + {mask: 0x0000ef83, value: 0x00000001, op: C_NOP, args: argTypeList{arg_c_nzimm6}}, + // C.OR rd_rs1_p, rs2_p + {mask: 0x0000fc63, value: 0x00008c41, op: C_OR, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}}, + // C.SD rs1_p, rs2_p, c_uimm8 + {mask: 0x0000e003, value: 0x0000e000, op: C_SD, args: argTypeList{arg_rs1_p, arg_rs2_p, arg_c_uimm8}}, + // C.SDSP c_rs2, c_uimm9sp_s + {mask: 0x0000e003, value: 0x0000e002, op: C_SDSP, args: argTypeList{arg_c_rs2, arg_c_uimm9sp_s}}, + // C.SLLI rd_rs1_n0, c_nzuimm6 + {mask: 0x0000e003, value: 0x00000002, op: C_SLLI, args: argTypeList{arg_rd_rs1_n0, arg_c_nzuimm6}}, + // C.SRAI rd_rs1_p, c_nzuimm6 + {mask: 0x0000ec03, value: 0x00008401, op: C_SRAI, args: argTypeList{arg_rd_rs1_p, arg_c_nzuimm6}}, + // C.SRLI rd_rs1_p, c_nzuimm6 + {mask: 0x0000ec03, value: 0x00008001, op: C_SRLI, args: argTypeList{arg_rd_rs1_p, arg_c_nzuimm6}}, + // C.SUB rd_rs1_p, rs2_p + {mask: 0x0000fc63, value: 0x00008c01, op: C_SUB, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}}, + // C.SUBW rd_rs1_p, rs2_p + {mask: 0x0000fc63, value: 0x00009c01, op: C_SUBW, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}}, + // C.SW rs1_p, rs2_p, c_uimm7 + {mask: 0x0000e003, value: 0x0000c000, op: C_SW, args: argTypeList{arg_rs1_p, arg_rs2_p, arg_c_uimm7}}, + // C.SWSP c_rs2, c_uimm8sp_s + {mask: 0x0000e003, value: 0x0000c002, op: C_SWSP, args: argTypeList{arg_c_rs2, arg_c_uimm8sp_s}}, + // C.UNIMP + {mask: 0x0000ffff, value: 0x00000000, op: C_UNIMP, args: argTypeList{}}, + // C.XOR rd_rs1_p, rs2_p + {mask: 0x0000fc63, value: 0x00008c21, op: C_XOR, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}}, + // DIV rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x02004033, op: DIV, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // DIVU rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x02005033, op: DIVU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // DIVUW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0200503b, op: DIVUW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // DIVW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0200403b, op: DIVW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // EBREAK + {mask: 0xffffffff, value: 0x00100073, op: EBREAK, args: argTypeList{}}, + // ECALL + {mask: 0xffffffff, value: 0x00000073, op: ECALL, args: argTypeList{}}, + // FADD.D fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x02000053, op: FADD_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FADD.H fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x04000053, op: FADD_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FADD.Q fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x06000053, op: FADD_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FADD.S fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x00000053, op: FADD_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FCLASS.D rd, fs1 + {mask: 0xfff0707f, value: 0xe2001053, op: FCLASS_D, args: argTypeList{arg_rd, arg_fs1}}, + // FCLASS.H rd, fs1 + {mask: 0xfff0707f, value: 0xe4001053, op: FCLASS_H, args: argTypeList{arg_rd, arg_fs1}}, + // FCLASS.Q rd, fs1 + {mask: 0xfff0707f, value: 0xe6001053, op: FCLASS_Q, args: argTypeList{arg_rd, arg_fs1}}, + // FCLASS.S rd, fs1 + {mask: 0xfff0707f, value: 0xe0001053, op: FCLASS_S, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.D.L fd, rs1 + {mask: 0xfff0007f, value: 0xd2200053, op: FCVT_D_L, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.D.LU fd, rs1 + {mask: 0xfff0007f, value: 0xd2300053, op: FCVT_D_LU, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.D.Q fd, fs1 + {mask: 0xfff0007f, value: 0x42300053, op: FCVT_D_Q, args: argTypeList{arg_fd, arg_fs1}}, + // FCVT.D.S fd, fs1 + {mask: 0xfff0007f, value: 0x42000053, op: FCVT_D_S, args: argTypeList{arg_fd, arg_fs1}}, + // FCVT.D.W fd, rs1 + {mask: 0xfff0007f, value: 0xd2000053, op: FCVT_D_W, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.D.WU fd, rs1 + {mask: 0xfff0007f, value: 0xd2100053, op: FCVT_D_WU, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.H.L fd, rs1 + {mask: 0xfff0007f, value: 0xd4200053, op: FCVT_H_L, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.H.LU fd, rs1 + {mask: 0xfff0007f, value: 0xd4300053, op: FCVT_H_LU, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.H.S fd, fs1 + {mask: 0xfff0007f, value: 0x44000053, op: FCVT_H_S, args: argTypeList{arg_fd, arg_fs1}}, + // FCVT.H.W fd, rs1 + {mask: 0xfff0007f, value: 0xd4000053, op: FCVT_H_W, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.H.WU fd, rs1 + {mask: 0xfff0007f, value: 0xd4100053, op: FCVT_H_WU, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.LU.D rd, fs1 + {mask: 0xfff0007f, value: 0xc2300053, op: FCVT_LU_D, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.LU.H rd, fs1 + {mask: 0xfff0007f, value: 0xc4300053, op: FCVT_LU_H, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.LU.Q rd, fs1 + {mask: 0xfff0007f, value: 0xc6300053, op: FCVT_LU_Q, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.LU.S rd, fs1 + {mask: 0xfff0007f, value: 0xc0300053, op: FCVT_LU_S, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.L.D rd, fs1 + {mask: 0xfff0007f, value: 0xc2200053, op: FCVT_L_D, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.L.H rd, fs1 + {mask: 0xfff0007f, value: 0xc4200053, op: FCVT_L_H, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.L.Q rd, fs1 + {mask: 0xfff0007f, value: 0xc6200053, op: FCVT_L_Q, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.L.S rd, fs1 + {mask: 0xfff0007f, value: 0xc0200053, op: FCVT_L_S, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.Q.D fd, fs1 + {mask: 0xfff0007f, value: 0x46100053, op: FCVT_Q_D, args: argTypeList{arg_fd, arg_fs1}}, + // FCVT.Q.L fd, rs1 + {mask: 0xfff0007f, value: 0xd6200053, op: FCVT_Q_L, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.Q.LU fd, rs1 + {mask: 0xfff0007f, value: 0xd6300053, op: FCVT_Q_LU, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.Q.S fd, fs1 + {mask: 0xfff0007f, value: 0x46000053, op: FCVT_Q_S, args: argTypeList{arg_fd, arg_fs1}}, + // FCVT.Q.W fd, rs1 + {mask: 0xfff0007f, value: 0xd6000053, op: FCVT_Q_W, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.Q.WU fd, rs1 + {mask: 0xfff0007f, value: 0xd6100053, op: FCVT_Q_WU, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.S.D fd, fs1 + {mask: 0xfff0007f, value: 0x40100053, op: FCVT_S_D, args: argTypeList{arg_fd, arg_fs1}}, + // FCVT.S.H fd, fs1 + {mask: 0xfff0007f, value: 0x40200053, op: FCVT_S_H, args: argTypeList{arg_fd, arg_fs1}}, + // FCVT.S.L fd, rs1 + {mask: 0xfff0007f, value: 0xd0200053, op: FCVT_S_L, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.S.LU fd, rs1 + {mask: 0xfff0007f, value: 0xd0300053, op: FCVT_S_LU, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.S.Q fd, fs1 + {mask: 0xfff0007f, value: 0x40300053, op: FCVT_S_Q, args: argTypeList{arg_fd, arg_fs1}}, + // FCVT.S.W fd, rs1 + {mask: 0xfff0007f, value: 0xd0000053, op: FCVT_S_W, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.S.WU fd, rs1 + {mask: 0xfff0007f, value: 0xd0100053, op: FCVT_S_WU, args: argTypeList{arg_fd, arg_rs1}}, + // FCVT.WU.D rd, fs1 + {mask: 0xfff0007f, value: 0xc2100053, op: FCVT_WU_D, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.WU.H rd, fs1 + {mask: 0xfff0007f, value: 0xc4100053, op: FCVT_WU_H, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.WU.Q rd, fs1 + {mask: 0xfff0007f, value: 0xc6100053, op: FCVT_WU_Q, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.WU.S rd, fs1 + {mask: 0xfff0007f, value: 0xc0100053, op: FCVT_WU_S, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.W.D rd, fs1 + {mask: 0xfff0007f, value: 0xc2000053, op: FCVT_W_D, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.W.H rd, fs1 + {mask: 0xfff0007f, value: 0xc4000053, op: FCVT_W_H, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.W.Q rd, fs1 + {mask: 0xfff0007f, value: 0xc6000053, op: FCVT_W_Q, args: argTypeList{arg_rd, arg_fs1}}, + // FCVT.W.S rd, fs1 + {mask: 0xfff0007f, value: 0xc0000053, op: FCVT_W_S, args: argTypeList{arg_rd, arg_fs1}}, + // FDIV.D fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x1a000053, op: FDIV_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FDIV.H fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x1c000053, op: FDIV_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FDIV.Q fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x1e000053, op: FDIV_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FDIV.S fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x18000053, op: FDIV_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FENCE pred, succ + {mask: 0x0000707f, value: 0x0000000f, op: FENCE, args: argTypeList{arg_pred, arg_succ}}, + // FENCE.I + {mask: 0x0000707f, value: 0x0000100f, op: FENCE_I, args: argTypeList{}}, + // FEQ.D rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa2002053, op: FEQ_D, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FEQ.H rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa4002053, op: FEQ_H, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FEQ.Q rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa6002053, op: FEQ_Q, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FEQ.S rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa0002053, op: FEQ_S, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FLD fd, rs1_mem + {mask: 0x0000707f, value: 0x00003007, op: FLD, args: argTypeList{arg_fd, arg_rs1_mem}}, + // FLE.D rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa2000053, op: FLE_D, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FLE.H rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa4000053, op: FLE_H, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FLE.Q rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa6000053, op: FLE_Q, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FLE.S rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa0000053, op: FLE_S, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FLH fd, rs1_mem + {mask: 0x0000707f, value: 0x00001007, op: FLH, args: argTypeList{arg_fd, arg_rs1_mem}}, + // FLQ fd, rs1_mem + {mask: 0x0000707f, value: 0x00004007, op: FLQ, args: argTypeList{arg_fd, arg_rs1_mem}}, + // FLT.D rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa2001053, op: FLT_D, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FLT.H rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa4001053, op: FLT_H, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FLT.Q rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa6001053, op: FLT_Q, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FLT.S rd, fs1, fs2 + {mask: 0xfe00707f, value: 0xa0001053, op: FLT_S, args: argTypeList{arg_rd, arg_fs1, arg_fs2}}, + // FLW fd, rs1_mem + {mask: 0x0000707f, value: 0x00002007, op: FLW, args: argTypeList{arg_fd, arg_rs1_mem}}, + // FMADD.D fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x02000043, op: FMADD_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FMADD.H fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x04000043, op: FMADD_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FMADD.Q fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x06000043, op: FMADD_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FMADD.S fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x00000043, op: FMADD_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FMAX.D fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x2a001053, op: FMAX_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMAX.H fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x2c001053, op: FMAX_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMAX.Q fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x2e001053, op: FMAX_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMAX.S fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x28001053, op: FMAX_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMIN.D fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x2a000053, op: FMIN_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMIN.H fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x2c000053, op: FMIN_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMIN.Q fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x2e000053, op: FMIN_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMIN.S fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x28000053, op: FMIN_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMSUB.D fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x02000047, op: FMSUB_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FMSUB.H fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x04000047, op: FMSUB_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FMSUB.Q fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x06000047, op: FMSUB_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FMSUB.S fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x00000047, op: FMSUB_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FMUL.D fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x12000053, op: FMUL_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMUL.H fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x14000053, op: FMUL_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMUL.Q fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x16000053, op: FMUL_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMUL.S fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x10000053, op: FMUL_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FMV.D.X fd, rs1 + {mask: 0xfff0707f, value: 0xf2000053, op: FMV_D_X, args: argTypeList{arg_fd, arg_rs1}}, + // FMV.H.X fd, rs1 + {mask: 0xfff0707f, value: 0xf4000053, op: FMV_H_X, args: argTypeList{arg_fd, arg_rs1}}, + // FMV.W.X fd, rs1 + {mask: 0xfff0707f, value: 0xf0000053, op: FMV_W_X, args: argTypeList{arg_fd, arg_rs1}}, + // FMV.X.D rd, fs1 + {mask: 0xfff0707f, value: 0xe2000053, op: FMV_X_D, args: argTypeList{arg_rd, arg_fs1}}, + // FMV.X.H rd, fs1 + {mask: 0xfff0707f, value: 0xe4000053, op: FMV_X_H, args: argTypeList{arg_rd, arg_fs1}}, + // FMV.X.W rd, fs1 + {mask: 0xfff0707f, value: 0xe0000053, op: FMV_X_W, args: argTypeList{arg_rd, arg_fs1}}, + // FNMADD.D fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x0200004f, op: FNMADD_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FNMADD.H fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x0400004f, op: FNMADD_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FNMADD.Q fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x0600004f, op: FNMADD_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FNMADD.S fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x0000004f, op: FNMADD_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FNMSUB.D fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x0200004b, op: FNMSUB_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FNMSUB.H fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x0400004b, op: FNMSUB_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FNMSUB.Q fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x0600004b, op: FNMSUB_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FNMSUB.S fd, fs1, fs2, fs3 + {mask: 0x0600007f, value: 0x0000004b, op: FNMSUB_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}}, + // FSD fs2, rs1_store + {mask: 0x0000707f, value: 0x00003027, op: FSD, args: argTypeList{arg_fs2, arg_rs1_store}}, + // FSGNJN.D fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x22001053, op: FSGNJN_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJN.H fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x24001053, op: FSGNJN_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJN.Q fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x26001053, op: FSGNJN_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJN.S fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x20001053, op: FSGNJN_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJX.D fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x22002053, op: FSGNJX_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJX.H fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x24002053, op: FSGNJX_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJX.Q fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x26002053, op: FSGNJX_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJX.S fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x20002053, op: FSGNJX_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJ.D fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x22000053, op: FSGNJ_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJ.H fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x24000053, op: FSGNJ_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJ.Q fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x26000053, op: FSGNJ_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSGNJ.S fd, fs1, fs2 + {mask: 0xfe00707f, value: 0x20000053, op: FSGNJ_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSH fs2, rs1_store + {mask: 0x0000707f, value: 0x00001027, op: FSH, args: argTypeList{arg_fs2, arg_rs1_store}}, + // FSQ fs2, rs1_store + {mask: 0x0000707f, value: 0x00004027, op: FSQ, args: argTypeList{arg_fs2, arg_rs1_store}}, + // FSQRT.D fd, fs1 + {mask: 0xfff0007f, value: 0x5a000053, op: FSQRT_D, args: argTypeList{arg_fd, arg_fs1}}, + // FSQRT.H fd, fs1 + {mask: 0xfff0007f, value: 0x5c000053, op: FSQRT_H, args: argTypeList{arg_fd, arg_fs1}}, + // FSQRT.Q fd, fs1 + {mask: 0xfff0007f, value: 0x5e000053, op: FSQRT_Q, args: argTypeList{arg_fd, arg_fs1}}, + // FSQRT.S fd, fs1 + {mask: 0xfff0007f, value: 0x58000053, op: FSQRT_S, args: argTypeList{arg_fd, arg_fs1}}, + // FSUB.D fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x0a000053, op: FSUB_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSUB.H fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x0c000053, op: FSUB_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSUB.Q fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x0e000053, op: FSUB_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSUB.S fd, fs1, fs2 + {mask: 0xfe00007f, value: 0x08000053, op: FSUB_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}}, + // FSW fs2, rs1_store + {mask: 0x0000707f, value: 0x00002027, op: FSW, args: argTypeList{arg_fs2, arg_rs1_store}}, + // JAL rd, jimm20 + {mask: 0x0000007f, value: 0x0000006f, op: JAL, args: argTypeList{arg_rd, arg_jimm20}}, + // JALR rd, rs1_mem + {mask: 0x0000707f, value: 0x00000067, op: JALR, args: argTypeList{arg_rd, arg_rs1_mem}}, + // LB rd, rs1_mem + {mask: 0x0000707f, value: 0x00000003, op: LB, args: argTypeList{arg_rd, arg_rs1_mem}}, + // LBU rd, rs1_mem + {mask: 0x0000707f, value: 0x00004003, op: LBU, args: argTypeList{arg_rd, arg_rs1_mem}}, + // LD rd, rs1_mem + {mask: 0x0000707f, value: 0x00003003, op: LD, args: argTypeList{arg_rd, arg_rs1_mem}}, + // LH rd, rs1_mem + {mask: 0x0000707f, value: 0x00001003, op: LH, args: argTypeList{arg_rd, arg_rs1_mem}}, + // LHU rd, rs1_mem + {mask: 0x0000707f, value: 0x00005003, op: LHU, args: argTypeList{arg_rd, arg_rs1_mem}}, + // LR.D rd, rs1_amo + {mask: 0xfff0707f, value: 0x1000302f, op: LR_D, args: argTypeList{arg_rd, arg_rs1_amo}}, + // LR.D.AQ rd, rs1_amo + {mask: 0xfff0707f, value: 0x1400302f, op: LR_D_AQ, args: argTypeList{arg_rd, arg_rs1_amo}}, + // LR.D.AQRL rd, rs1_amo + {mask: 0xfff0707f, value: 0x1600302f, op: LR_D_AQRL, args: argTypeList{arg_rd, arg_rs1_amo}}, + // LR.D.RL rd, rs1_amo + {mask: 0xfff0707f, value: 0x1200302f, op: LR_D_RL, args: argTypeList{arg_rd, arg_rs1_amo}}, + // LR.W rd, rs1_amo + {mask: 0xfff0707f, value: 0x1000202f, op: LR_W, args: argTypeList{arg_rd, arg_rs1_amo}}, + // LR.W.AQ rd, rs1_amo + {mask: 0xfff0707f, value: 0x1400202f, op: LR_W_AQ, args: argTypeList{arg_rd, arg_rs1_amo}}, + // LR.W.AQRL rd, rs1_amo + {mask: 0xfff0707f, value: 0x1600202f, op: LR_W_AQRL, args: argTypeList{arg_rd, arg_rs1_amo}}, + // LR.W.RL rd, rs1_amo + {mask: 0xfff0707f, value: 0x1200202f, op: LR_W_RL, args: argTypeList{arg_rd, arg_rs1_amo}}, + // LUI rd, imm20 + {mask: 0x0000007f, value: 0x00000037, op: LUI, args: argTypeList{arg_rd, arg_imm20}}, + // LW rd, rs1_mem + {mask: 0x0000707f, value: 0x00002003, op: LW, args: argTypeList{arg_rd, arg_rs1_mem}}, + // LWU rd, rs1_mem + {mask: 0x0000707f, value: 0x00006003, op: LWU, args: argTypeList{arg_rd, arg_rs1_mem}}, + // MAX rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0a006033, op: MAX, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // MAXU rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0a007033, op: MAXU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // MIN rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0a004033, op: MIN, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // MINU rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0a005033, op: MINU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // MUL rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x02000033, op: MUL, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // MULH rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x02001033, op: MULH, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // MULHSU rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x02002033, op: MULHSU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // MULHU rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x02003033, op: MULHU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // MULW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0200003b, op: MULW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // OR rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x00006033, op: OR, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // ORC.B rd, rs1 + {mask: 0xfff0707f, value: 0x28705013, op: ORC_B, args: argTypeList{arg_rd, arg_rs1}}, + // ORI rd, rs1, imm12 + {mask: 0x0000707f, value: 0x00006013, op: ORI, args: argTypeList{arg_rd, arg_rs1, arg_imm12}}, + // ORN rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x40006033, op: ORN, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // REM rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x02006033, op: REM, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // REMU rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x02007033, op: REMU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // REMUW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0200703b, op: REMUW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // REMW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0200603b, op: REMW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // REV8 rd, rs1 + {mask: 0xfff0707f, value: 0x6b805013, op: REV8, args: argTypeList{arg_rd, arg_rs1}}, + // ROL rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x60001033, op: ROL, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // ROLW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x6000103b, op: ROLW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // ROR rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x60005033, op: ROR, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // RORI rd, rs1, shamt6 + {mask: 0xfc00707f, value: 0x60005013, op: RORI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}}, + // RORIW rd, rs1, shamt5 + {mask: 0xfe00707f, value: 0x6000501b, op: RORIW, args: argTypeList{arg_rd, arg_rs1, arg_shamt5}}, + // RORW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x6000503b, op: RORW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SB rs2, rs1_store + {mask: 0x0000707f, value: 0x00000023, op: SB, args: argTypeList{arg_rs2, arg_rs1_store}}, + // SC.D rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x1800302f, op: SC_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // SC.D.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x1c00302f, op: SC_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // SC.D.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x1e00302f, op: SC_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // SC.D.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x1a00302f, op: SC_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // SC.W rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x1800202f, op: SC_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // SC.W.AQ rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x1c00202f, op: SC_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // SC.W.AQRL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x1e00202f, op: SC_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // SC.W.RL rd, rs2, rs1_amo + {mask: 0xfe00707f, value: 0x1a00202f, op: SC_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}}, + // SD rs2, rs1_store + {mask: 0x0000707f, value: 0x00003023, op: SD, args: argTypeList{arg_rs2, arg_rs1_store}}, + // SEXT.B rd, rs1 + {mask: 0xfff0707f, value: 0x60401013, op: SEXT_B, args: argTypeList{arg_rd, arg_rs1}}, + // SEXT.H rd, rs1 + {mask: 0xfff0707f, value: 0x60501013, op: SEXT_H, args: argTypeList{arg_rd, arg_rs1}}, + // SH rs2, rs1_store + {mask: 0x0000707f, value: 0x00001023, op: SH, args: argTypeList{arg_rs2, arg_rs1_store}}, + // SH1ADD rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x20002033, op: SH1ADD, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SH1ADD.UW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x2000203b, op: SH1ADD_UW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SH2ADD rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x20004033, op: SH2ADD, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SH2ADD.UW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x2000403b, op: SH2ADD_UW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SH3ADD rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x20006033, op: SH3ADD, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SH3ADD.UW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x2000603b, op: SH3ADD_UW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SLL rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x00001033, op: SLL, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SLLI rd, rs1, shamt6 + {mask: 0xfc00707f, value: 0x00001013, op: SLLI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}}, + // SLLIW rd, rs1, shamt5 + {mask: 0xfe00707f, value: 0x0000101b, op: SLLIW, args: argTypeList{arg_rd, arg_rs1, arg_shamt5}}, + // SLLI.UW rd, rs1, shamt6 + {mask: 0xfc00707f, value: 0x0800101b, op: SLLI_UW, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}}, + // SLLW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0000103b, op: SLLW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SLT rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x00002033, op: SLT, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SLTI rd, rs1, imm12 + {mask: 0x0000707f, value: 0x00002013, op: SLTI, args: argTypeList{arg_rd, arg_rs1, arg_imm12}}, + // SLTIU rd, rs1, imm12 + {mask: 0x0000707f, value: 0x00003013, op: SLTIU, args: argTypeList{arg_rd, arg_rs1, arg_imm12}}, + // SLTU rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x00003033, op: SLTU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SRA rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x40005033, op: SRA, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SRAI rd, rs1, shamt6 + {mask: 0xfc00707f, value: 0x40005013, op: SRAI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}}, + // SRAIW rd, rs1, shamt5 + {mask: 0xfe00707f, value: 0x4000501b, op: SRAIW, args: argTypeList{arg_rd, arg_rs1, arg_shamt5}}, + // SRAW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x4000503b, op: SRAW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SRL rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x00005033, op: SRL, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SRLI rd, rs1, shamt6 + {mask: 0xfc00707f, value: 0x00005013, op: SRLI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}}, + // SRLIW rd, rs1, shamt5 + {mask: 0xfe00707f, value: 0x0000501b, op: SRLIW, args: argTypeList{arg_rd, arg_rs1, arg_shamt5}}, + // SRLW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x0000503b, op: SRLW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SUB rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x40000033, op: SUB, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SUBW rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x4000003b, op: SUBW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // SW rs2, rs1_store + {mask: 0x0000707f, value: 0x00002023, op: SW, args: argTypeList{arg_rs2, arg_rs1_store}}, + // XNOR rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x40004033, op: XNOR, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // XOR rd, rs1, rs2 + {mask: 0xfe00707f, value: 0x00004033, op: XOR, args: argTypeList{arg_rd, arg_rs1, arg_rs2}}, + // XORI rd, rs1, imm12 + {mask: 0x0000707f, value: 0x00004013, op: XORI, args: argTypeList{arg_rd, arg_rs1, arg_imm12}}, + // ZEXT.H rd, rs1 + {mask: 0xfff0707f, value: 0x0800403b, op: ZEXT_H, args: argTypeList{arg_rd, arg_rs1}}, +} diff --git a/riscv64/riscv64spec/spec.go b/riscv64/riscv64spec/spec.go index 53c0f1de..55c498a0 100644 --- a/riscv64/riscv64spec/spec.go +++ b/riscv64/riscv64spec/spec.go @@ -51,7 +51,7 @@ var extensions = []string{ } const ( - prologueSec = "// Generated by riscv64spec riscv-opcodes\n// DO NOT EDIT\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage riscv64asm\n\n" + prologueSec = "// Code generated by riscv64spec riscv-opcodes\n// DO NOT EDIT\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage riscv64asm\n\n" opSec = "const (\n\t_ Op = iota\n" opstrSec = "var opstr = [...]string{\n" instFormatsSec = "var instFormats = [...]instFormat{\n" @@ -175,7 +175,7 @@ func genInst(words []string) { var value uint32 var mask uint32 - var instArgs []string + var argTypeList []string for i := 1; i < len(words); i++ { if strings.Contains(words[i], "=") { @@ -188,13 +188,13 @@ func genInst(words []string) { value |= subval mask |= submsk } else if len(words[i]) > 0 { - instArgs = append(instArgs, words[i]) + argTypeList = append(argTypeList, words[i]) } } - instArgsStr := inferFormats(instArgs, op) + instArgsStr := inferFormats(argTypeList, op) instFormatComment := "// " + strings.Replace(op, "_", ".", -1) + " " + strings.Replace(instArgsStr, "arg_", "", -1) - instFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: instArgs{%s}},", mask, value, op, instArgsStr) + instFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: argTypeList{%s}},", mask, value, op, instArgsStr) // Handle the suffix of atomic instruction. if isAtomic(op) { @@ -206,7 +206,7 @@ func genInst(words []string) { avalue := value | (uint32(i) << 25) amask := mask | 0x06000000 ainstFormatComment := "// " + strings.Replace(aop, "_", ".", -1) + " " + strings.Replace(instArgsStr, "arg_", "", -1) - ainstFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: instArgs{%s}},", amask, avalue, aop, instArgsStr) + ainstFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: argTypeList{%s}},", amask, avalue, aop, instArgsStr) ops = append(ops, aop) opstrs[aop] = aopstr instFormats[aop] = ainstFormat @@ -227,7 +227,7 @@ func genInst(words []string) { // U-Type (inst rd, imm), // SB-Type (inst rs1, rs2, offset) // S-Type (inst rs2, offset(rs1)) -func inferFormats(instArgs []string, op string) string { +func inferFormats(argTypeList []string, op string) string { switch { case strings.Contains(op, "AMO") || strings.Contains(op, "SC_"): return "arg_rd, arg_rs2, arg_rs1_amo" @@ -265,7 +265,7 @@ func inferFormats(instArgs []string, op string) string { default: var instStr []string - for _, arg := range instArgs { + for _, arg := range argTypeList { if decodeArgs(arg, op) != "" { instStr = append(instStr, decodeArgs(arg, op)) } From bc8e2b9ab6c676219fde0a71f3cdfc902daf0204 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Wed, 29 May 2024 16:57:21 -0500 Subject: [PATCH 032/200] ppc64/ppc64asm: speed up PPC64 instruction decoding It's really slow to iterate every instruction until a match is found. This turns decoding PPC64 binaries into a seemingly quick operation instead of a seconds long process for go toolchain sized binaries. Use the primary opcode to map each instruction into a list of viable masks, and group instructions with identical masks into a map to speed up decoding. Change-Id: Id0d0eefbb77244c379832d8a602662e551a7568a Reviewed-on: https://go-review.googlesource.com/c/arch/+/602717 Reviewed-by: Archana Ravindar Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov --- ppc64/ppc64asm/decode.go | 50 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/ppc64/ppc64asm/decode.go b/ppc64/ppc64asm/decode.go index b8d857c6..6c25c5c3 100644 --- a/ppc64/ppc64asm/decode.go +++ b/ppc64/ppc64asm/decode.go @@ -8,6 +8,8 @@ import ( "encoding/binary" "fmt" "log" + "sort" + "sync" ) const debugDecode = false @@ -111,6 +113,47 @@ const ( TypeLast // must be the last one ) +type InstMaskMap struct { + mask uint64 + insn map[uint64]*instFormat +} + +// Note, plxv/pstxv have a 5 bit opcode in the second instruction word. Only match the most significant 5 of 6 bits of the second primary opcode. +const lookupOpcodeMask = uint64(0xFC000000F8000000) + +// Three level lookup for any instruction: +// 1. Primary opcode map to a list of secondary opcode maps. +// 2. A list of opcodes with distinct masks, sorted by largest to smallest mask. +// 3. A map to a specific opcodes with a given mask. +var getLookupMap = sync.OnceValue(func() map[uint64][]InstMaskMap { + lMap := make(map[uint64][]InstMaskMap) + for idx, _ := range instFormats { + i := &instFormats[idx] + pop := i.Value & lookupOpcodeMask + var me *InstMaskMap + masks := lMap[pop] + for im, m := range masks { + if m.mask == i.Mask { + me = &masks[im] + break + } + } + if me == nil { + me = &InstMaskMap{i.Mask, map[uint64]*instFormat{}} + masks = append(masks, *me) + } + me.insn[i.Value] = i + lMap[pop] = masks + } + // Reverse sort masks to ensure extended mnemonics match before more generic forms of an opcode (e.x nop over ori 0,0,0) + for _, v := range lMap { + sort.Slice(v, func(i, j int) bool { + return v[i].mask > v[j].mask + }) + } + return lMap +}) + func (t ArgType) String() string { switch t { default: @@ -191,10 +234,13 @@ func Decode(src []byte, ord binary.ByteOrder) (inst Inst, err error) { ui |= uint64(ui_extn[1]) inst.SuffixEnc = ui_extn[1] } - for i, iform := range instFormats { - if ui&iform.Mask != iform.Value { + + fmts := getLookupMap()[ui&lookupOpcodeMask] + for i, masks := range fmts { + if _, fnd := masks.insn[masks.mask&ui]; !fnd { continue } + iform := masks.insn[masks.mask&ui] if ui&iform.DontCare != 0 { if debugDecode { log.Printf("Decode(%#x): unused bit is 1 for Op %s", ui, iform.Op) From 76fb3b0a5d488e2e7a5fbbc6fef71483b973c723 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 15 Oct 2024 11:12:59 +0200 Subject: [PATCH 033/200] s390x: fix decoding several gnu instructions Some of the GNU instructions decoding incorrectly due to incorrect Immediate field type and added testcases for the same. Also, done the code clean up and reformat/Indentation. Change-Id: I21df05c240d918f9de48b825123c7eaa2181c259 Reviewed-on: https://go-review.googlesource.com/c/arch/+/620335 Reviewed-by: Vishwanatha HD Reviewed-by: Michael Pratt Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- s390x/s390xasm/field.go | 51 - s390x/s390xasm/gnu.go | 28 +- s390x/s390xasm/inst.go | 62 +- s390x/s390xasm/tables.go | 75 +- s390x/s390xasm/testdata/decode_generated.txt | 1186 +++++++++--------- s390x/s390xmap/map.go | 8 +- 6 files changed, 697 insertions(+), 713 deletions(-) diff --git a/s390x/s390xasm/field.go b/s390x/s390xasm/field.go index e00415fc..29adc821 100644 --- a/s390x/s390xasm/field.go +++ b/s390x/s390xasm/field.go @@ -6,7 +6,6 @@ package s390xasm import ( "fmt" - "strings" ) // A BitField is a bit-field in a 64-bit double word. @@ -46,53 +45,3 @@ func (b BitField) ParseSigned(i uint64) int64 { u := int64(b.Parse(i)) return u << (64 - b.Bits) >> (64 - b.Bits) } - -// BitFields is a series of BitFields representing a single number. -type BitFields []BitField - -func (bs BitFields) String() string { - ss := make([]string, len(bs)) - for i, bf := range bs { - ss[i] = bf.String() - } - return fmt.Sprintf("<%s>", strings.Join(ss, "|")) -} - -func (bs *BitFields) Append(b BitField) { - *bs = append(*bs, b) -} - -// parse extracts the bitfields from i, concatenate them and return the result -// as an unsigned integer and the total length of all the bitfields. -// parse will panic if any bitfield in b is invalid, but it doesn't check if -// the sequence of bitfields is reasonable. -func (bs BitFields) parse(i uint64) (u uint64, Bits uint8) { - for _, b := range bs { - u = (u << b.Bits) | uint64(b.Parse(i)) - Bits += b.Bits - } - return u, Bits -} - -// Parse extracts the bitfields from i, concatenate them and return the result -// as an unsigned integer. Parse will panic if any bitfield in b is invalid. -func (bs BitFields) Parse(i uint64) uint64 { - u, _ := bs.parse(i) - return u -} - -// ParseSigned extracts the bitfields from i, concatenate them and return the result -// as a signed integer. Parse will panic if any bitfield in b is invalid. -func (bs BitFields) ParseSigned(i uint64) int64 { - u, l := bs.parse(i) - return int64(u) << (64 - l) >> (64 - l) -} - -// Count the number of bits in the aggregate BitFields -func (bs BitFields) NumBits() int { - num := 0 - for _, b := range bs { - num += int(b.Bits) - } - return num -} diff --git a/s390x/s390xasm/gnu.go b/s390x/s390xasm/gnu.go index 5755b354..71b9f138 100644 --- a/s390x/s390xasm/gnu.go +++ b/s390x/s390xasm/gnu.go @@ -280,6 +280,7 @@ func HandleExtndMnemonic(inst *Inst) string { typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcesb"}, typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcesbs"}, typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcedb"}, + typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcedbs"}, typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcesb"}, typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcesbs"}, typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcedb"}, @@ -453,8 +454,7 @@ func HandleExtndMnemonic(inst *Inst) string { case "vavg", "vavgl", "verllv", "veslv", "vesrav", "vesrlv", "vgfm", "vgm", "vmx", "vmxl", "vmrh", "vmrl", "vmn", "vmnl", "vrep", "vclz", "vctz", "vec", "vecl", "vlc", "vlp", "vpopct", "vrepi", "verim", "verll", "vesl", "vesra", "vesrl", "vgfma", "vlrep", "vlgv", "vlvg", "vlbrrep", "vler", "vlbr", "vstbr", "vster", "vpk", "vme", "vmh", "vmle", "vmlh", "vmlo", "vml", "vmo", "vmae", - "vmale", "vmalo", "vmal", "vmah", "vmalh", "vmao", "vmph", "vmplh", "vupl", "vupll", "vscbi", "vs", "vsum", "vsumg", "vsumq", - "va", "vacc": + "vmale", "vmalo", "vmal", "vmah", "vmalh", "vmao", "vmph", "vmplh", "vupl", "vupll", "vscbi", "vs", "vsum", "vsumg", "vsumq", "va", "vacc": switch opString { @@ -569,16 +569,18 @@ func HandleExtndMnemonic(inst *Inst) string { break } } - case "vsum", "vsumg": - for i := 1; i < len(vecInstrExtndMnics)-4; i++ { - if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value { - newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr - removeArg(inst, int8(vecInstrExtndMnics[i].Offset)) - break - } + case "vsum", "vsumg", "vsumq": + var off int + switch opString { + case "vsum": + off = 0 + case "vsumg": + off = 1 + case "vsumq": + off = 2 + } - case "vsumq": - for i := 2; i < len(vecInstrExtndMnics)-2; i++ { + for i := off; i < len(vecInstrExtndMnics)-4+off; i++ { if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value { newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr removeArg(inst, int8(vecInstrExtndMnics[i].Offset)) @@ -668,8 +670,8 @@ func HandleExtndMnemonic(inst *Inst) string { case "vac", "vaccc": if uint8(inst.Args[4].(Mask)) == uint8(4) { - newOpStr = opString + vecInstrExtndMnics[3].ExtnOpStr - removeArg(inst, int8(3)) + newOpStr = opString + vecInstrExtndMnics[4].ExtnOpStr + removeArg(inst, int8(4)) } case "vceq", "vch", "vchl": diff --git a/s390x/s390xasm/inst.go b/s390x/s390xasm/inst.go index 19d70156..e1fde847 100644 --- a/s390x/s390xasm/inst.go +++ b/s390x/s390xasm/inst.go @@ -12,9 +12,9 @@ import ( type Inst struct { Op Op // Opcode mnemonic - Enc uint64 // Raw encoding bits (if Len == 8, this is the prefix word) + Enc uint64 // Raw encoding bits Len int // Length of encoding in bytes. - Args Args // Instruction arguments, in Power ISA manual order. + Args Args // Instruction arguments, in s390x ISA manual order. } func (i Inst) String(pc uint64) string { @@ -26,19 +26,32 @@ func (i Inst) String(pc uint64) string { } mnemonic := HandleExtndMnemonic(&i) buf.WriteString(fmt.Sprintf("%s", mnemonic)) - for j, arg := range i.Args { - if arg == nil { + for j := 0; j < len(i.Args); j++ { + if i.Args[j] == nil { break } + str := i.Args[j].String(pc) if j == 0 { buf.WriteString(" ") } else { - switch arg.(type) { - case VReg, Reg: + switch i.Args[j].(type) { + case VReg: if _, ok := i.Args[j-1].(Disp12); ok { - buf.WriteString("") + buf.WriteString("(") } else if _, ok := i.Args[j-1].(Disp20); ok { - buf.WriteString("") + buf.WriteString("(") + } else { + buf.WriteString(",") + } + case Reg: + if _, ok := i.Args[j-1].(Disp12); ok { + if str != "" { + buf.WriteString("(") + } + } else if _, ok := i.Args[j-1].(Disp20); ok { + if str != "" { + buf.WriteString("(") + } } else { buf.WriteString(",") } @@ -47,13 +60,34 @@ func (i Inst) String(pc uint64) string { buf.WriteString(",") } else if _, ok := i.Args[j-1].(Reg); ok { buf.WriteString(",") + } else if _, ok := i.Args[j-1].(Disp12); ok { + if str != "" { + buf.WriteString("(") + } + } else if _, ok := i.Args[j-1].(Disp20); ok { + if str != "" { + buf.WriteString("(") + } + } else if _, ok := i.Args[j-1].(Len); ok { + buf.WriteString(",") + } else if _, ok := i.Args[j-1].(Index); ok { + if ((i.Args[j-1].String(pc)) != "") && str != "" { + str = "," + str + } else if str == "" { + str = ")" + } } case Index, Len: + if str != "" || (i.Args[j+1].String(pc)) != "" { + buf.WriteString("(") + } else { + j = j + 1 + } default: buf.WriteString(",") } } - buf.WriteString(arg.String(pc)) + buf.WriteString(str) if rxb_check && i.Args[j+2] == nil { break } @@ -145,7 +179,7 @@ func (r Index) String(pc uint64) string { switch { case X1 <= r && r <= X15: s := "%" - return fmt.Sprintf("%sr%d,", s, int(r-X0)) + return fmt.Sprintf("%sr%d", s, int(r-X0)) case X0 == r: return fmt.Sprintf("") default: @@ -159,9 +193,9 @@ type Disp20 uint32 func (Disp20) IsArg() {} func (r Disp20) String(pc uint64) string { if (r>>19)&0x01 == 1 { - return fmt.Sprintf("%d(", int32(r|0xfff<<20)) + return fmt.Sprintf("%d", int32(r|0xfff<<20)) } else { - return fmt.Sprintf("%d(", int32(r)) + return fmt.Sprintf("%d", int32(r)) } } @@ -170,7 +204,7 @@ type Disp12 uint16 func (Disp12) IsArg() {} func (r Disp12) String(pc uint64) string { - return fmt.Sprintf("%d(", r) + return fmt.Sprintf("%d", r) } // RegIm12 represents an 12-bit Register immediate number. @@ -395,5 +429,5 @@ type Len uint8 func (Len) IsArg() {} func (i Len) String(pc uint64) string { - return fmt.Sprintf("%d,", uint16(i)+1) + return fmt.Sprintf("%d", uint16(i)+1) } diff --git a/s390x/s390xasm/tables.go b/s390x/s390xasm/tables.go index f0db5e90..5a66c1fe 100644 --- a/s390x/s390xasm/tables.go +++ b/s390x/s390xasm/tables.go @@ -2518,8 +2518,6 @@ var ( ap_ImmUnsigned_16_47 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 32}} ap_FPReg_12_15 = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{12, 4}} ap_Len_8_15 = &argField{Type: TypeLen, flags: 0x10, BitField: BitField{8, 8}} - ap_ImmUnsigned_8_15 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{8, 8}} - ap_ImmUnsigned_16_31 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 16}} ap_Mask_8_11 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{8, 4}} ap_RegImSigned16_32_47 = &argField{Type: TypeRegImSigned16, flags: 0x80, BitField: BitField{32, 16}} ap_RegImSigned12_12_23 = &argField{Type: TypeRegImSigned12, flags: 0x80, BitField: BitField{12, 12}} @@ -2531,8 +2529,10 @@ var ( ap_ImmSigned16_32_47 = &argField{Type: TypeImmSigned16, flags: 0x0, BitField: BitField{32, 16}} ap_ImmSigned8_32_39 = &argField{Type: TypeImmSigned8, flags: 0x0, BitField: BitField{32, 8}} ap_Mask_12_15 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{12, 4}} + ap_ImmUnsigned_8_15 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{8, 8}} ap_ImmUnsigned_32_47 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{32, 16}} ap_ImmUnsigned_32_39 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{32, 8}} + ap_ImmUnsigned_16_31 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 16}} ap_FPReg_32_35 = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{32, 4}} ap_Mask_36_39 = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{36, 4}} ap_ACReg_24_27 = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{24, 4}} @@ -2546,6 +2546,7 @@ var ( ap_ACReg_12_15 = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{12, 4}} ap_CReg_8_11 = &argField{Type: TypeCReg, flags: 0x4, BitField: BitField{8, 4}} ap_CReg_12_15 = &argField{Type: TypeCReg, flags: 0x4, BitField: BitField{12, 4}} + ap_ImmSigned32_16_31 = &argField{Type: TypeImmSigned32, flags: 0x0, BitField: BitField{16, 16}} ap_ImmUnsigned_24_27 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{24, 4}} ap_ImmUnsigned_28_31 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{28, 4}} ap_ImmUnsigned_16_23 = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 8}} @@ -2706,21 +2707,21 @@ var instFormats = [...]instFormat{ {NC, 0xff00000000000000, 0xd400000000000000, 0x0, // AND (character) (NC D1(L1,B1),D2(B2)) [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, {NI, 0xff00000000000000, 0x9400000000000000, 0x0, // AND (immediate) (NI D1(B1),I2) - [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned8_8_15}}, {NIY, 0xff00000000ff0000, 0xeb00000000540000, 0x0, // AND (immediate) (NIY D1(B1),I2) - [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}}, {NIHH, 0xff0f000000000000, 0xa504000000000000, 0x0, // AND IMMEDIATE (high high) (NIHH R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {NIHL, 0xff0f000000000000, 0xa505000000000000, 0x0, // AND IMMEDIATE (high low) (NIHL R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {NIHF, 0xff0f000000000000, 0xc00a000000000000, 0x0, // AND IMMEDIATE (high) (NIHF R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {NILH, 0xff0f000000000000, 0xa506000000000000, 0x0, // AND IMMEDIATE (low high) (NILH R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {NILL, 0xff0f000000000000, 0xa507000000000000, 0x0, // AND IMMEDIATE (low low) (NILL R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {NILF, 0xff0f000000000000, 0xc00b000000000000, 0x0, // AND IMMEDIATE (low) (NILF R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {NCRK, 0xffff000000000000, 0xb9f5000000000000, 0xf0000000000, // AND WITH COMPLEMENT(32) (NCRK R1,R2,R3) [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, {NCGRK, 0xffff000000000000, 0xb9e5000000000000, 0xf0000000000, // AND WITH COMPLEMENT(64) (NCGRK R1,R2,R3) @@ -3338,13 +3339,13 @@ var instFormats = [...]instFormat{ {XC, 0xff00000000000000, 0xd700000000000000, 0x0, // EXCLUSIVE OR (character) (XC D1(L1,B1),D2(B2)) [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, {XI, 0xff00000000000000, 0x9700000000000000, 0x0, // EXCLUSIVE OR (immediate) (XI D1(B1),I2) - [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned8_8_15}}, {XIY, 0xff00000000ff0000, 0xeb00000000570000, 0x0, // EXCLUSIVE OR (immediate) (XIY D1(B1),I2) - [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}}, {XIHF, 0xff0f000000000000, 0xc006000000000000, 0x0, // EXCLUSIVE OR IMMEDIATE (high) (XIHF R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {XILF, 0xff0f000000000000, 0xc007000000000000, 0x0, // EXCLUSIVE OR IMMEDIATE (low) (XILF R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {EX, 0xff00000000000000, 0x4400000000000000, 0x0, // EXECUTE (EX R1,D2(X2,B2)) [8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, {EXRL, 0xff0f000000000000, 0xc600000000000000, 0x0, // EXECUTE RELATIVE LONG (EXRL R1,RI2) @@ -3642,7 +3643,7 @@ var instFormats = [...]instFormat{ {LOCFHR, 0xffff000000000000, 0xb9e0000000000000, 0xf0000000000, // LOAD HIGH ON CONDITION (32) (LOCFHR R1,R2,M3) [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}}, {LGFI, 0xff0f000000000000, 0xc001000000000000, 0x0, // LOAD IMMEDIATE (64→32) (LGFI R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {LXDB, 0xff00000000ff0000, 0xed00000000050000, 0xff000000, // LOAD LENGTHENED (long to extended BFP) (LXDB R1,D2(X2,B2)) [8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, {LXDBR, 0xffff000000000000, 0xb305000000000000, 0xff0000000000, // LOAD LENGTHENED (long to extended BFP) (LXDBR R1,R2) @@ -3706,17 +3707,17 @@ var instFormats = [...]instFormat{ {LLGHRL, 0xff0f000000000000, 0xc406000000000000, 0x0, // LOAD LOGICAL HALFWORD RELATIVE LONG(64→16) (LLGHRL R1,RI2) [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, {LLIHH, 0xff0f000000000000, 0xa50c000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high high) (LLIHH R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {LLIHL, 0xff0f000000000000, 0xa50d000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high low) (LLIHL R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {LLIHF, 0xff0f000000000000, 0xc00e000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high) (LLIHF R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {LLILH, 0xff0f000000000000, 0xa50e000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low high) (LLILH R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {LLILL, 0xff0f000000000000, 0xa50f000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low low) (LLILL R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {LLILF, 0xff0f000000000000, 0xc00f000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low) (LLILF R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {LLGFRL, 0xff0f000000000000, 0xc40e000000000000, 0x0, // LOAD LOGICAL RELATIVE LONG (64→32) (LLGFRL R1,RI2) [8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}}, {LLGT, 0xff00000000ff0000, 0xe300000000170000, 0x0, // LOAD LOGICAL THIRTY ONE BITS (64→31) (LLGT R1,D2(X2,B2)) @@ -4016,9 +4017,9 @@ var instFormats = [...]instFormat{ {MGH, 0xff00000000ff0000, 0xe3000000003c0000, 0x0, // MULTIPLY HALFWORD (64→16) (MGH R1,D2(X2,B2)) [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, {MHI, 0xff0f000000000000, 0xa70c000000000000, 0x0, // MULTIPLY HALFWORD IMMEDIATE (32→16) (MHI R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_31}}, {MGHI, 0xff0f000000000000, 0xa70d000000000000, 0x0, // MULTIPLY HALFWORD IMMEDIATE (64→16) (MGHI R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_31}}, {MLG, 0xff00000000ff0000, 0xe300000000860000, 0x0, // MULTIPLY LOGICAL (128→64) (MLG R1,D2(X2,B2)) [8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}}, {MLGR, 0xffff000000000000, 0xb986000000000000, 0xff0000000000, // MULTIPLY LOGICAL (128→64) (MLGR R1,R2) @@ -4050,9 +4051,9 @@ var instFormats = [...]instFormat{ {MSGFR, 0xffff000000000000, 0xb91c000000000000, 0xff0000000000, // MULTIPLY SINGLE (64←32) (MSGFR R1,R2) [8]*argField{ap_Reg_24_27, ap_Reg_28_31}}, {MSFI, 0xff0f000000000000, 0xc201000000000000, 0x0, // MULTIPLY SINGLE IMMEDIATE (32) (MSFI R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {MSGFI, 0xff0f000000000000, 0xc200000000000000, 0x0, // MULTIPLY SINGLE IMMEDIATE (64←32) (MSGFI R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {MYH, 0xff00000000ff0000, 0xed000000003d0000, 0xf000000, // MULTIPLY UNNORM. (long to ext. high HFP) (MYH R1,R3,D2(X2,B2)) [8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}}, {MYHR, 0xffff000000000000, 0xb33d000000000000, 0xf0000000000, // MULTIPLY UNNORM. (long to ext. high HFP) (MYHR R1,R3,R2) @@ -4100,21 +4101,21 @@ var instFormats = [...]instFormat{ {OC, 0xff00000000000000, 0xd600000000000000, 0x0, // OR (character) (OC D1(L1,B1),D2(B2)) [8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}}, {OI, 0xff00000000000000, 0x9600000000000000, 0x0, // OR (immediate) (OI D1(B1),I2) - [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + [8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned8_8_15}}, {OIY, 0xff00000000ff0000, 0xeb00000000560000, 0x0, // OR (immediate) (OIY D1(B1),I2) - [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}}, + [8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}}, {OIHH, 0xff0f000000000000, 0xa508000000000000, 0x0, // OR IMMEDIATE (high high) (OIHH R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {OIHL, 0xff0f000000000000, 0xa509000000000000, 0x0, // OR IMMEDIATE (high low) (OIHL R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {OIHF, 0xff0f000000000000, 0xc00c000000000000, 0x0, // OR IMMEDIATE (high) (OIHF R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {OILH, 0xff0f000000000000, 0xa50a000000000000, 0x0, // OR IMMEDIATE (low high) (OILH R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {OILL, 0xff0f000000000000, 0xa50b000000000000, 0x0, // OR IMMEDIATE (low low) (OILL R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}}, {OILF, 0xff0f000000000000, 0xc00d000000000000, 0x0, // OR IMMEDIATE (low) (OILF R1,I2) - [8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}}, + [8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}}, {OCRK, 0xffff000000000000, 0xb975000000000000, 0xf0000000000, // OR WITH COMPLEMENT (32) (OCRK R1,R2,R3) [8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}}, {OCGRK, 0xffff000000000000, 0xb965000000000000, 0xf0000000000, // OR WITH COMPLEMENT (64) (OCGRK R1,R2,R3) @@ -4830,13 +4831,13 @@ var instFormats = [...]instFormat{ {VLEB, 0xff00000000ff0000, 0xe700000000000000, 0x0, // VECTOR LOAD ELEMENT (8) (VLEB V1,D2(X2,B2),M3) [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, {VLEIH, 0xff00000000ff0000, 0xe700000000410000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (16) (VLEIH V1,I2,M3) - [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + [8]*argField{ap_VecReg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, {VLEIF, 0xff00000000ff0000, 0xe700000000430000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (32) (VLEIF V1,I2,M3) - [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + [8]*argField{ap_VecReg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, {VLEIG, 0xff00000000ff0000, 0xe700000000420000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (64) (VLEIG V1,I2,M3) - [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + [8]*argField{ap_VecReg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, {VLEIB, 0xff00000000ff0000, 0xe700000000400000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (8) (VLEIB V1,I2,M3) - [8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, + [8]*argField{ap_VecReg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, {VLER, 0xff00000000ff0000, 0xe600000000070000, 0x0, // VECTOR LOAD ELEMENTS REVERSED (VLER V1,D2(X2,B2),M3) [8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}}, {VFI, 0xff00000000ff0000, 0xe700000000c70000, 0xff0000000000, // VECTOR LOAD FP INTEGER (VFI V1,V2,M3,M4,M5) diff --git a/s390x/s390xasm/testdata/decode_generated.txt b/s390x/s390xasm/testdata/decode_generated.txt index b7b3f09f..738a76e0 100644 --- a/s390x/s390xasm/testdata/decode_generated.txt +++ b/s390x/s390xasm/testdata/decode_generated.txt @@ -1,170 +1,170 @@ - 5a82100b| gnu a %r8,11(%r2,%r1) - 1a80| gnu ar %r8,%r0 - b9f80080| gnu ark %r8,%r0,%r0 + 5a82100b| gnu a %r8,11(%r2,%r1) + 1a80| gnu ar %r8,%r0 + b9f80080| gnu ark %r8,%r0,%r0 e382100b005a| gnu ay %r8,11(%r2,%r1) e382100b0008| gnu ag %r8,11(%r2,%r1) - b9080080| gnu agr %r8,%r0 - b9e80080| gnu agrk %r8,%r0,%r0 + b9080080| gnu agr %r8,%r0 + b9e80080| gnu agrk %r8,%r0,%r0 e382100b0018| gnu agf %r8,11(%r2,%r1) - b9180080| gnu agfr %r8,%r0 - b34a0080| gnu axbr %f8,%f0 - b3da0080| gnu axtr %f8,%f0,%f0 + b9180080| gnu agfr %r8,%r0 + b34a0080| gnu axbr %f8,%f0 + b3da0080| gnu axtr %f8,%f0,%f0 b3da0180| gnu axtra %f8,%f0,%f0,1 ed82100b001a| gnu adb %f8,11(%r2,%r1) - b31a0080| gnu adbr %f8,%f0 - b3d20080| gnu adtr %f8,%f0,%f0 + b31a0080| gnu adbr %f8,%f0 + b3d20080| gnu adtr %f8,%f0,%f0 b3d20180| gnu adtra %f8,%f0,%f0,1 ed82100b000a| gnu aeb %f8,11(%r2,%r1) - b30a0080| gnu aebr %f8,%f0 + b30a0080| gnu aebr %f8,%f0 fa332006100b| gnu ap 6(4,%r2),11(4,%r1) 4a82100b| gnu ah %r8,11(%r2,%r1) e382100b007a| gnu ahy %r8,11(%r2,%r1) e382100b0038| gnu agh %r8,11(%r2,%r1) - a78a0008| gnu ahi %r8,8 - a78b0008| gnu aghi %r8,8 - b9c80080| gnu ahhhr %r8,%r0,%r0 - b9d80080| gnu ahhlr %r8,%r0,%r0 -c28900000008| gnu afi %r8,8 -ec80000800d8| gnu ahik %r8,%r0,8 -eb082006006a| gnu asi 6(%r2),8 -ec80000800d9| gnu aghik %r8,%r0,8 -c28800000008| gnu agfi %r8,8 -eb082006007a| gnu agsi 6(%r2),8 -cc8800000008| gnu aih %r8,8 + a78a0008| gnu ahi %r8,8 + a78b0008| gnu aghi %r8,8 + b9c80080| gnu ahhhr %r8,%r0,%r0 + b9d80080| gnu ahhlr %r8,%r0,%r0 +c28900000008| gnu afi %r8,8 +ec80000800d8| gnu ahik %r8,%r0,8 +eb082006006a| gnu asi 6(%r2),8 +ec80000800d9| gnu aghik %r8,%r0,8 +c28800000008| gnu agfi %r8,8 +eb082006007a| gnu agsi 6(%r2),8 +cc8800000008| gnu aih %r8,8 5e82100b| gnu al %r8,11(%r2,%r1) - 1e80| gnu alr %r8,%r0 - b9fa0080| gnu alrk %r8,%r0,%r0 + 1e80| gnu alr %r8,%r0 + b9fa0080| gnu alrk %r8,%r0,%r0 e382100b005e| gnu aly %r8,11(%r2,%r1) e382100b000a| gnu alg %r8,11(%r2,%r1) - b90a0080| gnu algr %r8,%r0 - b9ea0080| gnu algrk %r8,%r0,%r0 + b90a0080| gnu algr %r8,%r0 + b9ea0080| gnu algrk %r8,%r0,%r0 e382100b001a| gnu algf %r8,11(%r2,%r1) - b91a0080| gnu algfr %r8,%r0 + b91a0080| gnu algfr %r8,%r0 b9ca0080| gnu alhhhr %r8,%r0,%r0 b9da0080| gnu alhhlr %r8,%r0,%r0 -c28b00000008| gnu alfi %r8,8 -c28a00000008| gnu algfi %r8,8 +c28b00000008| gnu alfi %r8,8 +c28a00000008| gnu algfi %r8,8 e382100b0098| gnu alc %r8,11(%r2,%r1) - b9980080| gnu alcr %r8,%r0 + b9980080| gnu alcr %r8,%r0 e382100b0088| gnu alcg %r8,11(%r2,%r1) - b9880080| gnu alcgr %r8,%r0 -ec80000800da| gnu alhsik %r8,%r0,8 -eb082006006e| gnu alsi 6(%r2),8 -ec80000800db| gnu alghsik %r8,%r0,8 -eb082006007e| gnu algsi 6(%r2),8 -cc8a00000008| gnu alsih %r8,8 -cc8b00000008| gnu alsihn %r8,8 - 3680| gnu axr %f8,%f0 + b9880080| gnu alcgr %r8,%r0 +ec80000800da| gnu alhsik %r8,%r0,8 +eb082006006e| gnu alsi 6(%r2),8 +ec80000800db| gnu alghsik %r8,%r0,8 +eb082006007e| gnu algsi 6(%r2),8 +cc8a00000008| gnu alsih %r8,8 +cc8b00000008| gnu alsihn %r8,8 + 3680| gnu axr %f8,%f0 6a82100b| gnu ad %f8,11(%r2,%r1) - 2a80| gnu adr %f8,%f0 + 2a80| gnu adr %f8,%f0 7a82100b| gnu ae %f8,11(%r2,%r1) - 3a80| gnu aer %f8,%f0 + 3a80| gnu aer %f8,%f0 6e82100b| gnu aw %f8,11(%r2,%r1) - 2e80| gnu awr %f8,%f0 + 2e80| gnu awr %f8,%f0 7e82100b| gnu au %f8,11(%r2,%r1) - 3e80| gnu aur %f8,%f0 - 5482100b| gnu n %r8,11(%r2,%r1) - 1480| gnu nr %r8,%r0 - b9f40080| gnu nrk %r8,%r0,%r0 + 3e80| gnu aur %f8,%f0 + 5482100b| gnu n %r8,11(%r2,%r1) + 1480| gnu nr %r8,%r0 + b9f40080| gnu nrk %r8,%r0,%r0 e382100b0054| gnu ny %r8,11(%r2,%r1) e382100b0080| gnu ng %r8,11(%r2,%r1) - b9800080| gnu ngr %r8,%r0 - b9e40080| gnu ngrk %r8,%r0,%r0 + b9800080| gnu ngr %r8,%r0 + b9e40080| gnu ngrk %r8,%r0,%r0 d4032006100b| gnu nc 6(4,%r2),11(%r1) - 94082006| gnu ni 6(%r2),8 -eb0820060054| gnu niy 6(%r2),8 - a5840008| gnu nihh %r8,8 - a5850008| gnu nihl %r8,8 -c08a00000008| gnu nihf %r8,8 - a5860008| gnu nilh %r8,8 - a5870008| gnu nill %r8,8 -c08b00000008| gnu nilf %r8,8 - b9f50080| gnu ncrk %r8,%r0,%r0 - b9e50080| gnu ncgrk %r8,%r0,%r0 + 94ff2006| gnu ni 6(%r2),-1 +ebff20060054| gnu niy 6(%r2),-1 + a584fffe| gnu nihh %r8,-2 + a585fffe| gnu nihl %r8,-2 +c08afffffffe| gnu nihf %r8,-2 + a586fffe| gnu nilh %r8,-2 + a587fffe| gnu nill %r8,-2 +c08bfffffffe| gnu nilf %r8,-2 + b9f50080| gnu ncrk %r8,%r0,%r0 + b9e50080| gnu ncgrk %r8,%r0,%r0 4582100b| gnu bal %r8,11(%r2,%r1) - 0580| gnu balr %r8,%r0 + 0580| gnu balr %r8,%r0 4d82100b| gnu bas %r8,11(%r2,%r1) - 0d80| gnu basr %r8,%r0 - 0c80| gnu bassm %r8,%r0 - b25a0080| gnu bsa %r8,%r0 - 0b80| gnu bsm %r8,%r0 - b2400080| gnu bakr %r8,%r0 - b2580080| gnu bsg %r8,%r0 -e372100b0047| gnu bine 11(%r2,%r1) - 4772100b| gnu bne 11(%r2,%r1) - 0770| gnu bner %r0 + 0d80| gnu basr %r8,%r0 + 0c80| gnu bassm %r8,%r0 + b25a0080| gnu bsa %r8,%r0 + 0b80| gnu bsm %r8,%r0 + b2400080| gnu bakr %r8,%r0 + b2580080| gnu bsg %r8,%r0 +e372100b0047| gnu bine 11(%r2,%r1) + 4772100b| gnu bne 11(%r2,%r1) + 0770| gnu bner %r0 4682100b| gnu bct %r8,11(%r2,%r1) - 0680| gnu bctr %r8,%r0 + 0680| gnu bctr %r8,%r0 e382100b0046| gnu bctg %r8,11(%r2,%r1) - b9460080| gnu bctgr %r8,%r0 + b9460080| gnu bctgr %r8,%r0 8680100b| gnu bxh %r8,%r0,11(%r1) eb80100b0044| gnu bxhg %r8,%r0,11(%r1) 8780100b| gnu bxle %r8,%r0,11(%r1) eb80100b0045| gnu bxleg %r8,%r0,11(%r1) c77060b60000| gnu bpp 7,0x1cc,182(%r6) c57000000093| gnu bprp 7,0x1d2,0x2f8 - a7850000| gnu bras %r8,0x1d8 -c08500000000| gnu brasl %r8,0x1dc - a7740000| gnu jne 0x1e2 -c07400000000| gnu jgne 0x1e6 - a7860000| gnu brct %r8,0x1ec - a7870000| gnu brctg %r8,0x1f0 -cc8600000000| gnu brcth %r8,0x1f4 + a7850000| gnu bras %r8,0x1d8 +c08500000000| gnu brasl %r8,0x1dc + a7740000| gnu jne 0x1e2 +c07400000000| gnu jgne 0x1e6 + a7860000| gnu brct %r8,0x1ec + a7870000| gnu brctg %r8,0x1f0 +cc8600000000| gnu brcth %r8,0x1f4 84800000| gnu brxh %r8,%r0,0x1fa ec8000000044| gnu brxhg %r8,%r0,0x1fe 85800000| gnu brxle %r8,%r0,0x204 ec8000000045| gnu brxlg %r8,%r0,0x208 - b2760000| gnu xsch - b2410080| gnu cksm %r8,%r0 - b92e0080| gnu km %r8,%r0 - b9290080| gnu kma %r8,%r0,%r0 - b92f0080| gnu kmc %r8,%r0 - b92a0080| gnu kmf %r8,%r0 - b92d0080| gnu kmctr %r8,%r0,%r0 - b92b0080| gnu kmo %r8,%r0 - b2300000| gnu csch - 5982100b| gnu c %r8,11(%r2,%r1) - 1980| gnu cr %r8,%r0 + b2760000| gnu xsch + b2410080| gnu cksm %r8,%r0 + b92e0080| gnu km %r8,%r0 + b9290080| gnu kma %r8,%r0,%r0 + b92f0080| gnu kmc %r8,%r0 + b92a0080| gnu kmf %r8,%r0 + b92d0080| gnu kmctr %r8,%r0,%r0 + b92b0080| gnu kmo %r8,%r0 + b2300000| gnu csch + 5982100b| gnu c %r8,11(%r2,%r1) + 1980| gnu cr %r8,%r0 e382100b0059| gnu cy %r8,11(%r2,%r1) e382100b0020| gnu cg %r8,11(%r2,%r1) - b9200080| gnu cgr %r8,%r0 + b9200080| gnu cgr %r8,%r0 e382100b0030| gnu cgf %r8,11(%r2,%r1) - b9300080| gnu cgfr %r8,%r0 - b3490080| gnu cxbr %f8,%f0 - b3ec0080| gnu cxtr %f8,%f0 - b3690080| gnu cxr %f8,%f0 + b9300080| gnu cgfr %r8,%r0 + b3490080| gnu cxbr %f8,%f0 + b3ec0080| gnu cxtr %f8,%f0 + b3690080| gnu cxr %f8,%f0 ed82100b0019| gnu cdb %f8,11(%r2,%r1) - b3190080| gnu cdbr %f8,%f0 - b3e40080| gnu cdtr %f8,%f0 + b3190080| gnu cdbr %f8,%f0 + b3e40080| gnu cdtr %f8,%f0 6982100b| gnu cd %f8,11(%r2,%r1) - 2980| gnu cdr %f8,%f0 + 2980| gnu cdr %f8,%f0 ed82100b0009| gnu ceb %f8,11(%r2,%r1) - b3090080| gnu cebr %f8,%f0 + b3090080| gnu cebr %f8,%f0 7982100b| gnu ce %f8,11(%r2,%r1) - 3980| gnu cer %f8,%f0 + 3980| gnu cer %f8,%f0 ec8080cd30f6| gnu crb %r8,%r0,3,205(%r8) ec8080cd30e4| gnu cgrb %r8,%r0,3,205(%r8) ec80ffac3076| gnu crj %r8,%r0,3,0x1e6 ec80ffac3064| gnu cgrj %r8,%r0,3,0x1ec - b21a100b| gnu cfc 11(%r1) + b21a100b| gnu cfc 11(%r1) b98f0180| gnu crdte %r8,%r0,%r0,1 - b3480080| gnu kxbr %f8,%f0 - b3e80080| gnu kxtr %f8,%f0 + b3480080| gnu kxbr %f8,%f0 + b3e80080| gnu kxtr %f8,%f0 ed82100b0018| gnu kdb %f8,11(%r2,%r1) - b3180080| gnu kdbr %f8,%f0 - b3e00080| gnu kdtr %f8,%f0 + b3180080| gnu kdbr %f8,%f0 + b3e00080| gnu kdtr %f8,%f0 ed82100b0008| gnu keb %f8,11(%r2,%r1) - b3080080| gnu kebr %f8,%f0 + b3080080| gnu kebr %f8,%f0 ba80100b| gnu cs %r8,%r0,11(%r1) eb80100b0014| gnu csy %r8,%r0,11(%r1) eb80100b0030| gnu csg %r8,%r0,11(%r1) - b2500080| gnu csp %r8,%r0 - b98a0080| gnu cspg %r8,%r0 + b2500080| gnu csp %r8,%r0 + b98a0080| gnu cspg %r8,%r0 c8022006100b| gnu csst 6(%r2),11(%r1),%r0 - b9723080| gnu crt %r8,%r0,3 - b9603080| gnu cgrt %r8,%r0,3 - b3fc0080| gnu cextr %f8,%f0 - b3f40080| gnu cedtr %f8,%f0 + b9723080| gnu crt %r8,%r0,3 + b9603080| gnu cgrt %r8,%r0,3 + b3fc0080| gnu cextr %f8,%f0 + b3f40080| gnu cedtr %f8,%f0 f9332006100b| gnu cp 6(4,%r2),11(4,%r1) bb80100b| gnu cds %r8,%r0,11(%r1) eb80100b0031| gnu cdsy %r8,%r0,11(%r1) @@ -172,105 +172,105 @@ eb80100b003e| gnu cdsg %r8,%r0,11(%r1) 4982100b| gnu ch %r8,11(%r2,%r1) e382100b0079| gnu chy %r8,11(%r2,%r1) e382100b0034| gnu cgh %r8,11(%r2,%r1) -e55420060008| gnu chhsi 6(%r2),8 - a78e0008| gnu chi %r8,8 -e55c20060008| gnu chsi 6(%r2),8 - a78f0008| gnu cghi %r8,8 -e55820060008| gnu cghsi 6(%r2),8 -c68500000000| gnu chrl %r8,0x330 -c68400000000| gnu cghrl %r8,0x336 +e55420060008| gnu chhsi 6(%r2),8 + a78e0008| gnu chi %r8,8 +e55c20060008| gnu chsi 6(%r2),8 + a78f0008| gnu cghi %r8,8 +e55820060008| gnu cghsi 6(%r2),8 +c68500000000| gnu chrl %r8,0x330 +c68400000000| gnu cghrl %r8,0x336 e382100b00cd| gnu chf %r8,11(%r2,%r1) - b9cd0080| gnu chhr %r8,%r0 - b9dd0080| gnu chlr %r8,%r0 -c28d00000008| gnu cfi %r8,8 -c28c00000008| gnu cgfi %r8,8 + b9cd0080| gnu chhr %r8,%r0 + b9dd0080| gnu chlr %r8,%r0 +c28d00000008| gnu cfi %r8,8 +c28c00000008| gnu cgfi %r8,8 ec8380cd08fe| gnu cib %r8,8,3,205(%r8) ec8380cd08fc| gnu cgib %r8,8,3,205(%r8) -ec83ffac087e| gnu cij %r8,8,3,0x2ba +ec83ffac087e| gnu cij %r8,8,3,0x2ba ec83ffac087c| gnu cgij %r8,8,3,0x2c0 -ec8000083072| gnu cit %r8,8,3 -ec8000083070| gnu cgit %r8,8,3 -cc8d00000008| gnu cih %r8,8 +ec8000083072| gnu cit %r8,8,3 +ec8000083070| gnu cgit %r8,8,3 +cc8d00000008| gnu cih %r8,8 5582100b| gnu cl %r8,11(%r2,%r1) - 1580| gnu clr %r8,%r0 + 1580| gnu clr %r8,%r0 e382100b0055| gnu cly %r8,11(%r2,%r1) e382100b0021| gnu clg %r8,11(%r2,%r1) - b9210080| gnu clgr %r8,%r0 + b9210080| gnu clgr %r8,%r0 e382100b0031| gnu clgf %r8,11(%r2,%r1) - b9310080| gnu clgfr %r8,%r0 + b9310080| gnu clgfr %r8,%r0 d5032006100b| gnu clc 6(4,%r2),11(%r1) - 95082006| gnu cli 6(%r2),8 -eb0820060055| gnu cliy 6(%r2),8 + 95082006| gnu cli 6(%r2),8 +eb0820060055| gnu cliy 6(%r2),8 ec8080cd30f7| gnu clrb %r8,%r0,3,205(%r8) ec8080cd30e5| gnu clgrb %r8,%r0,3,205(%r8) ec80ffac3077| gnu clrj %r8,%r0,3,0x314 ec80ffac3065| gnu clgrj %r8,%r0,3,0x31a - b9733080| gnu clrt %r8,%r0,3 -eb83100b0023| gnu clt %r8,3,11(%r1) - b9613080| gnu clgrt %r8,%r0,3 + b9733080| gnu clrt %r8,%r0,3 +eb83100b0023| gnu clt %r8,3,11(%r1) + b9613080| gnu clgrt %r8,%r0,3 eb83100b002b| gnu clgt %r8,3,11(%r1) eb83100b0020| gnu clmh %r8,3,11(%r1) - bd83100b| gnu clm %r8,3,11(%r1) + bd83100b| gnu clm %r8,3,11(%r1) eb83100b0021| gnu clmy %r8,3,11(%r1) e382100b00cf| gnu clhf %r8,11(%r2,%r1) - b9cf0080| gnu clhhr %r8,%r0 - b9df0080| gnu clhlr %r8,%r0 -e55520060008| gnu clhhsi 6(%r2),8 -c28f00000008| gnu clfi %r8,8 -e55d20060008| gnu clfhsi 6(%r2),8 -e55920060008| gnu clghsi 6(%r2),8 -c28e00000008| gnu clgfi %r8,8 + b9cf0080| gnu clhhr %r8,%r0 + b9df0080| gnu clhlr %r8,%r0 +e55520060008| gnu clhhsi 6(%r2),8 +c28f00000008| gnu clfi %r8,8 +e55d20060008| gnu clfhsi 6(%r2),8 +e55920060008| gnu clghsi 6(%r2),8 +c28e00000008| gnu clgfi %r8,8 ec8380cd08ff| gnu clib %r8,8,3,205(%r8) ec8380cd08fd| gnu clgib %r8,8,3,205(%r8) ec83ffac087f| gnu clij %r8,8,3,0x37c ec83ffac087d| gnu clgij %r8,8,3,0x382 -ec8000083073| gnu clfit %r8,8,3 -ec8000083071| gnu clgit %r8,8,3 -cc8f00000008| gnu clih %r8,8 - 0f80| gnu clcl %r8,%r0 +ec8000083073| gnu clfit %r8,8,3 +ec8000083071| gnu clgit %r8,8,3 +cc8f00000008| gnu clih %r8,8 + 0f80| gnu clcl %r8,%r0 a980100b| gnu clcle %r8,%r0,11(%r1) eb80100b008f| gnu clclu %r8,%r0,11(%r1) -c68f00000000| gnu clrl %r8,0x44e -c68700000000| gnu clhrl %r8,0x454 -c68a00000000| gnu clgrl %r8,0x45a -c68600000000| gnu clghrl %r8,0x460 -c68e00000000| gnu clgfrl %r8,0x466 - b25d0080| gnu clst %r8,%r0 -c68d00000000| gnu crl %r8,0x470 -c68800000000| gnu cgrl %r8,0x476 -c68c00000000| gnu cgfrl %r8,0x47c - b2570080| gnu cuse %r8,%r0 - b2630080| gnu cmpsc %r8,%r0 - b93a0080| gnu kdsa %r8,%r0 - b93e0080| gnu kimd %r8,%r0 - b93f0080| gnu klmd %r8,%r0 - b91e0080| gnu kmac %r8,%r0 - b3590080| gnu thdr %f8,%f0 - b3580080| gnu thder %f8,%f0 - b3960080| gnu cxfbr %f8,%r0 +c68f00000000| gnu clrl %r8,0x44e +c68700000000| gnu clhrl %r8,0x454 +c68a00000000| gnu clgrl %r8,0x45a +c68600000000| gnu clghrl %r8,0x460 +c68e00000000| gnu clgfrl %r8,0x466 + b25d0080| gnu clst %r8,%r0 +c68d00000000| gnu crl %r8,0x470 +c68800000000| gnu cgrl %r8,0x476 +c68c00000000| gnu cgfrl %r8,0x47c + b2570080| gnu cuse %r8,%r0 + b2630080| gnu cmpsc %r8,%r0 + b93a0080| gnu kdsa %r8,%r0 + b93e0080| gnu kimd %r8,%r0 + b93f0080| gnu klmd %r8,%r0 + b91e0080| gnu kmac %r8,%r0 + b3590080| gnu thdr %f8,%f0 + b3580080| gnu thder %f8,%f0 + b3960080| gnu cxfbr %f8,%r0 b3963180| gnu cxfbra %f8,3,%r0,1 - b9593180| gnu cxftr %f8,3,%r0,1 - b3b60080| gnu cxfr %f8,%r0 - b3950080| gnu cdfbr %f8,%r0 + b9593180| gnu cxftr %f8,3,%r0,1 + b3b60080| gnu cxfr %f8,%r0 + b3950080| gnu cdfbr %f8,%r0 b3953180| gnu cdfbra %f8,3,%r0,1 - b9513180| gnu cdftr %f8,3,%r0,1 - b3b50080| gnu cdfr %f8,%r0 - b3940080| gnu cefbr %f8,%r0 + b9513180| gnu cdftr %f8,3,%r0,1 + b3b50080| gnu cdfr %f8,%r0 + b3940080| gnu cefbr %f8,%r0 b3943180| gnu cefbra %f8,3,%r0,1 - b3b40080| gnu cefr %f8,%r0 - b3a60080| gnu cxgbr %f8,%r0 + b3b40080| gnu cefr %f8,%r0 + b3a60080| gnu cxgbr %f8,%r0 b3a63180| gnu cxgbra %f8,3,%r0,1 - b3f90080| gnu cxgtr %f8,%r0 + b3f90080| gnu cxgtr %f8,%r0 b3f93180| gnu cxgtra %f8,3,%r0,1 - b3c60080| gnu cxgr %f8,%r0 - b3a50080| gnu cdgbr %f8,%r0 + b3c60080| gnu cxgr %f8,%r0 + b3a50080| gnu cdgbr %f8,%r0 b3a53180| gnu cdgbra %f8,3,%r0,1 - b3f10080| gnu cdgtr %f8,%r0 + b3f10080| gnu cdgtr %f8,%r0 b3f13180| gnu cdgtra %f8,3,%r0,1 - b3c50080| gnu cdgr %f8,%r0 - b3a40080| gnu cegbr %f8,%r0 + b3c50080| gnu cdgr %f8,%r0 + b3a40080| gnu cegbr %f8,%r0 b3a43180| gnu cegbra %f8,3,%r0,1 - b3c40080| gnu cegr %f8,%r0 + b3c40080| gnu cegr %f8,%r0 b3923180| gnu cxlfbr %f8,3,%r0,1 b95b3180| gnu cxlftr %f8,3,%r0,1 b3913180| gnu cdlfbr %f8,3,%r0,1 @@ -283,44 +283,44 @@ c68c00000000| gnu cgfrl %r8,0x47c b3a03180| gnu celgbr %f8,3,%r0,1 ed03100b83af| gnu cxpt %f8,11(4,%r1),3 ed03100b83ae| gnu cdpt %f8,11(4,%r1),3 - b3fb0080| gnu cxstr %f8,%r0 - b3f30080| gnu cdstr %f8,%r0 - b3fa0080| gnu cxutr %f8,%r0 - b3f20080| gnu cdutr %f8,%r0 + b3fb0080| gnu cxstr %f8,%r0 + b3f30080| gnu cdstr %f8,%r0 + b3fa0080| gnu cxutr %f8,%r0 + b3f20080| gnu cdutr %f8,%r0 ed03100b83ab| gnu cxzt %f8,11(4,%r1),3 ed03100b83aa| gnu cdzt %f8,11(4,%r1),3 - b3503080| gnu tbedr %f8,3,%f0 - b3513080| gnu tbdr %f8,3,%f0 + b3503080| gnu tbedr %f8,3,%f0 + b3513080| gnu tbdr %f8,3,%f0 4f82100b| gnu cvb %r8,11(%r2,%r1) e382100b0006| gnu cvby %r8,11(%r2,%r1) e382100b000e| gnu cvbg %r8,11(%r2,%r1) 4e82100b| gnu cvd %r8,11(%r2,%r1) e382100b0026| gnu cvdy %r8,11(%r2,%r1) e382100b002e| gnu cvdg %r8,11(%r2,%r1) - b39a3080| gnu cfxbr %r8,3,%f0 + b39a3080| gnu cfxbr %r8,3,%f0 b39a3180| gnu cfxbra %r8,3,%f0,1 - b3aa3080| gnu cgxbr %r8,3,%f0 + b3aa3080| gnu cgxbr %r8,3,%f0 b3aa3180| gnu cgxbra %r8,3,%f0,1 - b9493180| gnu cfxtr %r8,3,%f0,1 - b3e93080| gnu cgxtr %r8,3,%f0 + b9493180| gnu cfxtr %r8,3,%f0,1 + b3e93080| gnu cgxtr %r8,3,%f0 b3e93180| gnu cgxtra %r8,3,%f0,1 - b3ba3080| gnu cfxr %r8,3,%f0 - b3ca3080| gnu cgxr %r8,3,%f0 - b3993080| gnu cfdbr %r8,3,%f0 + b3ba3080| gnu cfxr %r8,3,%f0 + b3ca3080| gnu cgxr %r8,3,%f0 + b3993080| gnu cfdbr %r8,3,%f0 b3993180| gnu cfdbra %r8,3,%f0,1 - b3a93080| gnu cgdbr %r8,3,%f0 + b3a93080| gnu cgdbr %r8,3,%f0 b3a93180| gnu cgdbra %r8,3,%f0,1 - b9413180| gnu cfdtr %r8,3,%f0,1 - b3e13080| gnu cgdtr %r8,3,%f0 + b9413180| gnu cfdtr %r8,3,%f0,1 + b3e13080| gnu cgdtr %r8,3,%f0 b3e13180| gnu cgdtra %r8,3,%f0,1 - b3b93080| gnu cfdr %r8,3,%f0 - b3c93080| gnu cgdr %r8,3,%f0 - b3983080| gnu cfebr %r8,3,%f0 + b3b93080| gnu cfdr %r8,3,%f0 + b3c93080| gnu cgdr %r8,3,%f0 + b3983080| gnu cfebr %r8,3,%f0 b3983180| gnu cfebra %r8,3,%f0,1 - b3a83080| gnu cgebr %r8,3,%f0 + b3a83080| gnu cgebr %r8,3,%f0 b3a83180| gnu cgebra %r8,3,%f0,1 - b3b83080| gnu cfer %r8,3,%f0 - b3c83080| gnu cger %r8,3,%f0 + b3b83080| gnu cfer %r8,3,%f0 + b3c83080| gnu cger %r8,3,%f0 b39e3180| gnu clfxbr %r8,3,%f0,1 b3ae3180| gnu clgxbr %r8,3,%f0,1 b94b3180| gnu clfxtr %r8,3,%f0,1 @@ -333,124 +333,124 @@ e382100b002e| gnu cvdg %r8,11(%r2,%r1) b3ac3180| gnu clgebr %r8,3,%f0,1 ed03100b83ad| gnu cpxt %f8,11(4,%r1),3 ed03100b83ac| gnu cpdt %f8,11(4,%r1),3 - b3eb0180| gnu csxtr %r8,%f0,1 - b3e30180| gnu csdtr %r8,%f0,1 - b3ea0080| gnu cuxtr %r8,%f0 - b3e20080| gnu cudtr %r8,%f0 + b3eb0180| gnu csxtr %r8,%f0,1 + b3e30180| gnu csdtr %r8,%f0,1 + b3ea0080| gnu cuxtr %r8,%f0 + b3e20080| gnu cudtr %r8,%f0 ed03100b83a9| gnu czxt %f8,11(4,%r1),3 ed03100b83a8| gnu czdt %f8,11(4,%r1),3 - b2a63080| gnu cu21 %r8,%r0,3 - b9b13080| gnu cu24 %r8,%r0,3 - b2a63080| gnu cu21 %r8,%r0,3 - b2a73080| gnu cu12 %r8,%r0,3 - b2a73080| gnu cu12 %r8,%r0,3 - b9b03080| gnu cu14 %r8,%r0,3 - b9b30080| gnu cu42 %r8,%r0 - b9b20080| gnu cu41 %r8,%r0 - b24d0080| gnu cpya %a8,%a0 - b3720080| gnu cpsdr %f8,%f0,%f0 + b2a63080| gnu cu21 %r8,%r0,3 + b9b13080| gnu cu24 %r8,%r0,3 + b2a63080| gnu cu21 %r8,%r0,3 + b2a73080| gnu cu12 %r8,%r0,3 + b2a73080| gnu cu12 %r8,%r0,3 + b9b03080| gnu cu14 %r8,%r0,3 + b9b30080| gnu cu42 %r8,%r0 + b9b20080| gnu cu41 %r8,%r0 + b24d0080| gnu cpya %a8,%a0 + b3720080| gnu cpsdr %f8,%f0,%f0 e6235000087c| gnu vscshp %v18,%v3,%v5 e62350901874| gnu vschp %v18,%v3,%v5,1,9 b9390080| gnu dfltcc %r8,%r0,%r0 - 5d82100b| gnu d %r8,11(%r2,%r1) - 1d80| gnu dr %r8,%r0 - b34d0080| gnu dxbr %f8,%f0 - b3d90080| gnu dxtr %f8,%f0,%f0 + 5d82100b| gnu d %r8,11(%r2,%r1) + 1d80| gnu dr %r8,%r0 + b34d0080| gnu dxbr %f8,%f0 + b3d90080| gnu dxtr %f8,%f0,%f0 b3d90180| gnu dxtra %f8,%f0,%f0,1 - b22d0080| gnu dxr %f8,%f0 + b22d0080| gnu dxr %f8,%f0 ed82100b001d| gnu ddb %f8,11(%r2,%r1) - b31d0080| gnu ddbr %f8,%f0 - b3d10080| gnu ddtr %f8,%f0,%f0 + b31d0080| gnu ddbr %f8,%f0 + b3d10080| gnu ddtr %f8,%f0,%f0 b3d10180| gnu ddtra %f8,%f0,%f0,1 6d82100b| gnu dd %f8,11(%r2,%r1) - 2d80| gnu ddr %f8,%f0 + 2d80| gnu ddr %f8,%f0 ed82100b000d| gnu deb %f8,11(%r2,%r1) - b30d0080| gnu debr %f8,%f0 + b30d0080| gnu debr %f8,%f0 7d82100b| gnu de %f8,11(%r2,%r1) - 3d80| gnu der %f8,%f0 + 3d80| gnu der %f8,%f0 fd332006100b| gnu dp 6(4,%r2),11(4,%r1) e382100b0097| gnu dl %r8,11(%r2,%r1) - b9970080| gnu dlr %r8,%r0 + b9970080| gnu dlr %r8,%r0 e382100b0087| gnu dlg %r8,11(%r2,%r1) - b9870080| gnu dlgr %r8,%r0 + b9870080| gnu dlgr %r8,%r0 e382100b000d| gnu dsg %r8,11(%r2,%r1) - b90d0080| gnu dsgr %r8,%r0 + b90d0080| gnu dsgr %r8,%r0 e382100b001d| gnu dsgf %r8,11(%r2,%r1) - b91d0080| gnu dsgfr %r8,%r0 + b91d0080| gnu dsgfr %r8,%r0 b35b0180| gnu didbr %f8,%f0,%f0,1 b3530180| gnu diebr %f8,%f0,%f0,1 de032006100b| gnu ed 6(4,%r2),11(%r1) df032006100b| gnu edmk 6(4,%r2),11(%r1) - 5782100b| gnu x %r8,11(%r2,%r1) - 1780| gnu xr %r8,%r0 - b9f70080| gnu xrk %r8,%r0,%r0 + 5782100b| gnu x %r8,11(%r2,%r1) + 1780| gnu xr %r8,%r0 + b9f70080| gnu xrk %r8,%r0,%r0 e382100b0057| gnu xy %r8,11(%r2,%r1) e382100b0082| gnu xg %r8,11(%r2,%r1) - b9820080| gnu xgr %r8,%r0 - b9e70080| gnu xgrk %r8,%r0,%r0 + b9820080| gnu xgr %r8,%r0 + b9e70080| gnu xgrk %r8,%r0,%r0 d7032006100b| gnu xc 6(4,%r2),11(%r1) - 97082006| gnu xi 6(%r2),8 -eb0820060057| gnu xiy 6(%r2),8 -c08600000008| gnu xihf %r8,8 -c08700000008| gnu xilf %r8,8 + 97ff2006| gnu xi 6(%r2),-1 +ebff20060057| gnu xiy 6(%r2),-1 +c086ffffffff| gnu xihf %r8,-1 +c087ffffffff| gnu xilf %r8,-1 4482100b| gnu ex %r8,11(%r2,%r1) -c68000000000| gnu exrl %r8,0x720 - b24f0080| gnu ear %r8,%a0 - b99d0080| gnu esea %r8 - b3ed0080| gnu eextr %r8,%f0 - b3e50080| gnu eedtr %r8,%f0 +c68000000000| gnu exrl %r8,0x720 + b24f0080| gnu ear %r8,%a0 + b99d0080| gnu esea %r8 + b3ed0080| gnu eextr %r8,%f0 + b3e50080| gnu eedtr %r8,%f0 eb80100b004c| gnu ecag %r8,%r0,11(%r1) c8012006100b| gnu ectg 6(%r2),11(%r1),%r0 - b38c0080| gnu efpc %r8 - b2260080| gnu epar %r8 - b99a0080| gnu epair %r8 - b98d0080| gnu epsw %r8,%r0 - b2270080| gnu esar %r8 - b99b0080| gnu esair %r8 - b3ef0080| gnu esxtr %r8,%f0 - b3e70080| gnu esdtr %r8,%f0 - b2490080| gnu ereg %r8,%r0 - b90e0080| gnu eregg %r8,%r0 - b24a0080| gnu esta %r8,%r0 - b2ec0080| gnu etnd %r8 - b9830080| gnu flogr %r8,%r0 - b2310000| gnu hsch - 2480| gnu hdr %f8,%f0 - 3480| gnu her %f8,%f0 - b2240080| gnu iac %r8 - b3fe0080| gnu iextr %f8,%f0,%r0 - b3f60080| gnu iedtr %f8,%f0,%r0 + b38c0080| gnu efpc %r8 + b2260080| gnu epar %r8 + b99a0080| gnu epair %r8 + b98d0080| gnu epsw %r8,%r0 + b2270080| gnu esar %r8 + b99b0080| gnu esair %r8 + b3ef0080| gnu esxtr %r8,%f0 + b3e70080| gnu esdtr %r8,%f0 + b2490080| gnu ereg %r8,%r0 + b90e0080| gnu eregg %r8,%r0 + b24a0080| gnu esta %r8,%r0 + b2ec0080| gnu etnd %r8 + b9830080| gnu flogr %r8,%r0 + b2310000| gnu hsch + 2480| gnu hdr %f8,%f0 + 3480| gnu her %f8,%f0 + b2240080| gnu iac %r8 + b3fe0080| gnu iextr %f8,%f0,%r0 + b3f60080| gnu iedtr %f8,%f0,%r0 4382100b| gnu ic %r8,11(%r2,%r1) e382100b0073| gnu icy %r8,11(%r2,%r1) eb83100b0080| gnu icmh %r8,3,11(%r1) - bf83100b| gnu icm %r8,3,11(%r1) + bf83100b| gnu icm %r8,3,11(%r1) eb83100b0081| gnu icmy %r8,3,11(%r1) - a5800008| gnu iihh %r8,8 - a5810008| gnu iihl %r8,8 -c08800000008| gnu iihf %r8,8 - a5820008| gnu iilh %r8,8 - a5830008| gnu iill %r8,8 -c08900000008| gnu iilf %r8,8 - b2220080| gnu ipm %r8 - b20b0000| gnu ipk - b9ac0080| gnu irbm %r8,%r0 - b2290080| gnu iske %r8,%r0 - b2230080| gnu ivsk %r8,%r0 + a5800008| gnu iihh %r8,8 + a5810008| gnu iihl %r8,8 +c08800000008| gnu iihf %r8,8 + a5820008| gnu iilh %r8,8 + a5830008| gnu iill %r8,8 +c08900000008| gnu iilf %r8,8 + b2220080| gnu ipm %r8 + b20b0000| gnu ipk + b9ac0080| gnu irbm %r8,%r0 + b2290080| gnu iske %r8,%r0 + b2230080| gnu ivsk %r8,%r0 b98e0180| gnu idte %r8,%r0,%r0,1 b2210180| gnu ipte %r8,%r0,%r0,1 - 5882100b| gnu l %r8,11(%r2,%r1) - 1880| gnu lr %r8,%r0 + 5882100b| gnu l %r8,11(%r2,%r1) + 1880| gnu lr %r8,%r0 e382100b0058| gnu ly %r8,11(%r2,%r1) e382100b0004| gnu lg %r8,11(%r2,%r1) - b9040080| gnu lgr %r8,%r0 + b9040080| gnu lgr %r8,%r0 e382100b0014| gnu lgf %r8,11(%r2,%r1) - b9140080| gnu lgfr %r8,%r0 - b3650080| gnu lxr %f8,%f0 + b9140080| gnu lgfr %r8,%r0 + b3650080| gnu lxr %f8,%f0 6882100b| gnu ld %f8,11(%r2,%r1) - 2880| gnu ldr %f8,%f0 + 2880| gnu ldr %f8,%f0 ed82100b0065| gnu ldy %f8,11(%r2,%r1) 7882100b| gnu le %f8,11(%r2,%r1) - 3880| gnu ler %f8,%f0 + 3880| gnu ler %f8,%f0 ed82100b0064| gnu ley %f8,11(%r2,%r1) 9a80100b| gnu lam %a8,%a0,11(%r1) eb80100b009a| gnu lamy %a8,%a0,11(%r1) @@ -458,7 +458,7 @@ eb80100b009a| gnu lamy %a8,%a0,11(%r1) e382100b0071| gnu lay %r8,11(%r2,%r1) 5182100b| gnu lae %r8,11(%r2,%r1) e382100b0075| gnu laey %r8,11(%r2,%r1) -c08000000000| gnu larl %r8,0x836 +c08000000000| gnu larl %r8,0x836 e5002006100b| gnu lasp 6(%r2),11(%r1) eb80100b00f8| gnu laa %r8,%r0,11(%r1) eb80100b00e8| gnu laag %r8,%r0,11(%r1) @@ -471,200 +471,200 @@ eb80100b00e7| gnu laxg %r8,%r0,11(%r1) eb80100b00f6| gnu lao %r8,%r0,11(%r1) eb80100b00e6| gnu laog %r8,%r0,11(%r1) e382100b0012| gnu lt %r8,11(%r2,%r1) - 1280| gnu ltr %r8,%r0 + 1280| gnu ltr %r8,%r0 e382100b0002| gnu ltg %r8,11(%r2,%r1) - b9020080| gnu ltgr %r8,%r0 + b9020080| gnu ltgr %r8,%r0 e382100b0032| gnu ltgf %r8,11(%r2,%r1) - b9120080| gnu ltgfr %r8,%r0 - b3420080| gnu ltxbr %f8,%f0 - b3de0080| gnu ltxtr %f8,%f0 - b3620080| gnu ltxr %f8,%f0 - b3120080| gnu ltdbr %f8,%f0 - b3d60080| gnu ltdtr %f8,%f0 - 2280| gnu ltdr %f8,%f0 - b3020080| gnu ltebr %f8,%f0 - 3280| gnu lter %f8,%f0 + b9120080| gnu ltgfr %r8,%r0 + b3420080| gnu ltxbr %f8,%f0 + b3de0080| gnu ltxtr %f8,%f0 + b3620080| gnu ltxr %f8,%f0 + b3120080| gnu ltdbr %f8,%f0 + b3d60080| gnu ltdtr %f8,%f0 + 2280| gnu ltdr %f8,%f0 + b3020080| gnu ltebr %f8,%f0 + 3280| gnu lter %f8,%f0 e382100b009f| gnu lat %r8,11(%r2,%r1) e382100b0085| gnu lgat %r8,11(%r2,%r1) e382100b003b| gnu lzrf %r8,11(%r2,%r1) e382100b002a| gnu lzrg %r8,11(%r2,%r1) - b200100b| gnu lbear 11(%r1) + b200100b| gnu lbear 11(%r1) e382100b0076| gnu lb %r8,11(%r2,%r1) - b9260080| gnu lbr %r8,%r0 + b9260080| gnu lbr %r8,%r0 e382100b0077| gnu lgb %r8,11(%r2,%r1) - b9060080| gnu lgbr %r8,%r0 + b9060080| gnu lgbr %r8,%r0 e382100b00c0| gnu lbh %r8,11(%r2,%r1) - 1380| gnu lcr %r8,%r0 - b9030080| gnu lcgr %r8,%r0 - b9130080| gnu lcgfr %r8,%r0 - b3430080| gnu lcxbr %f8,%f0 - b3630080| gnu lcxr %f8,%f0 - b3130080| gnu lcdbr %f8,%f0 - 2380| gnu lcdr %f8,%f0 - b3730080| gnu lcdfr %f8,%f0 - b3030080| gnu lcebr %f8,%f0 - 3380| gnu lcer %f8,%f0 + 1380| gnu lcr %r8,%r0 + b9030080| gnu lcgr %r8,%r0 + b9130080| gnu lcgfr %r8,%r0 + b3430080| gnu lcxbr %f8,%f0 + b3630080| gnu lcxr %f8,%f0 + b3130080| gnu lcdbr %f8,%f0 + 2380| gnu lcdr %f8,%f0 + b3730080| gnu lcdfr %f8,%f0 + b3030080| gnu lcebr %f8,%f0 + 3380| gnu lcer %f8,%f0 b780100b| gnu lctl %c8,%c0,11(%r1) eb80100b002f| gnu lctlg %c8,%c0,11(%r1) e782100b3027| gnu lcbb %r8,11(%r2,%r1),3 - b3473080| gnu fixbr %f8,3,%f0 + b3473080| gnu fixbr %f8,3,%f0 b3473180| gnu fixbra %f8,3,%f0,1 - b3df3180| gnu fixtr %f8,3,%f0,1 - b3670080| gnu fixr %f8,%f0 - b35f3080| gnu fidbr %f8,3,%f0 + b3df3180| gnu fixtr %f8,3,%f0,1 + b3670080| gnu fixr %f8,%f0 + b35f3080| gnu fidbr %f8,3,%f0 b35f3180| gnu fidbra %f8,3,%f0,1 - b3d73180| gnu fidtr %f8,3,%f0,1 - b37f0080| gnu fidr %f8,%f0 - b3573080| gnu fiebr %f8,3,%f0 + b3d73180| gnu fidtr %f8,3,%f0,1 + b37f0080| gnu fidr %f8,%f0 + b3573080| gnu fiebr %f8,3,%f0 b3573180| gnu fiebra %f8,3,%f0,1 - b3770080| gnu fier %f8,%f0 - b29d100b| gnu lfpc 11(%r1) - b2bd100b| gnu lfas 11(%r1) - b3c10080| gnu ldgr %f8,%r0 - b3cd0080| gnu lgdr %r8,%f0 + b3770080| gnu fier %f8,%f0 + b29d100b| gnu lfpc 11(%r1) + b2bd100b| gnu lfas 11(%r1) + b3c10080| gnu ldgr %f8,%r0 + b3cd0080| gnu lgdr %r8,%f0 e382100b004c| gnu lgg %r8,11(%r2,%r1) e382100b004d| gnu lgsc %r8,11(%r2,%r1) 4882100b| gnu lh %r8,11(%r2,%r1) - b9270080| gnu lhr %r8,%r0 + b9270080| gnu lhr %r8,%r0 e382100b0078| gnu lhy %r8,11(%r2,%r1) e382100b0015| gnu lgh %r8,11(%r2,%r1) - b9070080| gnu lghr %r8,%r0 + b9070080| gnu lghr %r8,%r0 e382100b00c4| gnu lhh %r8,11(%r2,%r1) -ec830008004e| gnu lochhinle %r8,8 - a7880008| gnu lhi %r8,8 - a7890008| gnu lghi %r8,8 -ec8300080042| gnu lochinle %r8,8 -ec8300080046| gnu locghinle %r8,8 -c48500000000| gnu lhrl %r8,0x99e -c48400000000| gnu lghrl %r8,0x9a4 +ec830008004e| gnu lochhinle %r8,8 + a7880008| gnu lhi %r8,8 + a7890008| gnu lghi %r8,8 +ec8300080042| gnu lochinle %r8,8 +ec8300080046| gnu locghinle %r8,8 +c48500000000| gnu lhrl %r8,0x99e +c48400000000| gnu lghrl %r8,0x9a4 e382100b00ca| gnu lfh %r8,11(%r2,%r1) e382100b00c8| gnu lfhat %r8,11(%r2,%r1) eb83100b00e0| gnu locfhnle %r8,11(%r1) - b9e03080| gnu locfhrnle %r8,%r0 -c08100000008| gnu lgfi %r8,8 + b9e03080| gnu locfhrnle %r8,%r0 +c081fffffffe| gnu lgfi %r8,-2 ed82100b0005| gnu lxdb %f8,11(%r2,%r1) - b3050080| gnu lxdbr %f8,%f0 - b3dc0180| gnu lxdtr %f8,%f0,1 + b3050080| gnu lxdbr %f8,%f0 + b3dc0180| gnu lxdtr %f8,%f0,1 ed82100b0025| gnu lxd %f8,11(%r2,%r1) - b3250080| gnu lxdr %f8,%f0 + b3250080| gnu lxdr %f8,%f0 ed82100b0006| gnu lxeb %f8,11(%r2,%r1) - b3060080| gnu lxebr %f8,%f0 + b3060080| gnu lxebr %f8,%f0 ed82100b0026| gnu lxe %f8,11(%r2,%r1) - b3260080| gnu lxer %f8,%f0 + b3260080| gnu lxer %f8,%f0 ed82100b0004| gnu ldeb %f8,11(%r2,%r1) - b3040080| gnu ldebr %f8,%f0 - b3d40180| gnu ldetr %f8,%f0,1 + b3040080| gnu ldebr %f8,%f0 + b3d40180| gnu ldetr %f8,%f0,1 ed82100b0024| gnu lde %f8,11(%r2,%r1) - b3240080| gnu lder %f8,%f0 + b3240080| gnu lder %f8,%f0 e382100b0016| gnu llgf %r8,11(%r2,%r1) - b9160080| gnu llgfr %r8,%r0 + b9160080| gnu llgfr %r8,%r0 e382100b0048| gnu llgfsg %r8,11(%r2,%r1) e382100b009d| gnu llgfat %r8,11(%r2,%r1) e382100b003a| gnu llzrgf %r8,11(%r2,%r1) e382100b0094| gnu llc %r8,11(%r2,%r1) - b9940080| gnu llcr %r8,%r0 + b9940080| gnu llcr %r8,%r0 e382100b0090| gnu llgc %r8,11(%r2,%r1) - b9840080| gnu llgcr %r8,%r0 + b9840080| gnu llgcr %r8,%r0 e382100b00c2| gnu llch %r8,11(%r2,%r1) e382100b0095| gnu llh %r8,11(%r2,%r1) - b9950080| gnu llhr %r8,%r0 + b9950080| gnu llhr %r8,%r0 e382100b0091| gnu llgh %r8,11(%r2,%r1) - b9850080| gnu llghr %r8,%r0 + b9850080| gnu llghr %r8,%r0 e382100b00c6| gnu llhh %r8,11(%r2,%r1) -c48200000000| gnu llhrl %r8,0xa5a -c48600000000| gnu llghrl %r8,0xa60 - a58c0008| gnu llihh %r8,8 - a58d0008| gnu llihl %r8,8 -c08e00000008| gnu llihf %r8,8 - a58e0008| gnu llilh %r8,8 - a58f0008| gnu llill %r8,8 -c08f00000008| gnu llilf %r8,8 -c48e00000000| gnu llgfrl %r8,0xa82 +c48200000000| gnu llhrl %r8,0xa5a +c48600000000| gnu llghrl %r8,0xa60 + a58cffff| gnu llihh %r8,-1 + a58dffff| gnu llihl %r8,-1 +c08efffffffe| gnu llihf %r8,-2 + a58effff| gnu llilh %r8,-1 + a58fffff| gnu llill %r8,-1 +c08ffffffffe| gnu llilf %r8,-2 +c48e00000000| gnu llgfrl %r8,0xa82 e382100b0017| gnu llgt %r8,11(%r2,%r1) - b9170080| gnu llgtr %r8,%r0 + b9170080| gnu llgtr %r8,%r0 e382100b009c| gnu llgtat %r8,11(%r2,%r1) 9880100b| gnu lm %r8,%r0,11(%r1) eb80100b0098| gnu lmy %r8,%r0,11(%r1) eb80100b0004| gnu lmg %r8,%r0,11(%r1) ef80100b80cd| gnu lmd %r8,%r0,11(%r1),205(%r8) eb80100b0096| gnu lmh %r8,%r0,11(%r1) - 1180| gnu lnr %r8,%r0 - b9010080| gnu lngr %r8,%r0 - b9110080| gnu lngfr %r8,%r0 - b3410080| gnu lnxbr %f8,%f0 - b3610080| gnu lnxr %f8,%f0 - b3110080| gnu lndbr %f8,%f0 - 2180| gnu lndr %f8,%f0 - b3710080| gnu lndfr %f8,%f0 - b3010080| gnu lnebr %f8,%f0 - 3180| gnu lner %f8,%f0 + 1180| gnu lnr %r8,%r0 + b9010080| gnu lngr %r8,%r0 + b9110080| gnu lngfr %r8,%r0 + b3410080| gnu lnxbr %f8,%f0 + b3610080| gnu lnxr %f8,%f0 + b3110080| gnu lndbr %f8,%f0 + 2180| gnu lndr %f8,%f0 + b3710080| gnu lndfr %f8,%f0 + b3010080| gnu lnebr %f8,%f0 + 3180| gnu lner %f8,%f0 eb83100b00f2| gnu locnle %r8,11(%r1) - b9f23080| gnu locrnle %r8,%r0 + b9f23080| gnu locrnle %r8,%r0 eb83100b00e2| gnu locgnle %r8,11(%r1) - b9e23080| gnu locgrnle %r8,%r0 + b9e23080| gnu locgrnle %r8,%r0 b9aa0180| gnu lptea %r8,%r0,%r0,1 c8042006100b| gnu lpd %r0,6(%r2),11(%r1) c8052006100b| gnu lpdg %r0,6(%r2),11(%r1) e382100b008f| gnu lpq %r8,11(%r2,%r1) - 1080| gnu lpr %r8,%r0 - b9000080| gnu lpgr %r8,%r0 - b9100080| gnu lpgfr %r8,%r0 - b3400080| gnu lpxbr %f8,%f0 - b3600080| gnu lpxr %f8,%f0 - b3100080| gnu lpdbr %f8,%f0 - 2080| gnu lpdr %f8,%f0 - b3700080| gnu lpdfr %f8,%f0 - b3000080| gnu lpebr %f8,%f0 - 3080| gnu lper %f8,%f0 - 82002006| gnu lpsw 6(%r2) - b2b2100b| gnu lpswe 11(%r1) -eb0020060071| gnu lpswey 6(%r2) + 1080| gnu lpr %r8,%r0 + b9000080| gnu lpgr %r8,%r0 + b9100080| gnu lpgfr %r8,%r0 + b3400080| gnu lpxbr %f8,%f0 + b3600080| gnu lpxr %f8,%f0 + b3100080| gnu lpdbr %f8,%f0 + 2080| gnu lpdr %f8,%f0 + b3700080| gnu lpdfr %f8,%f0 + b3000080| gnu lpebr %f8,%f0 + 3080| gnu lper %f8,%f0 + 82002006| gnu lpsw 6(%r2) + b2b2100b| gnu lpswe 11(%r1) +eb0020060071| gnu lpswey 6(%r2) b182100b| gnu lra %r8,11(%r2,%r1) e382100b0013| gnu lray %r8,11(%r2,%r1) e382100b0003| gnu lrag %r8,11(%r2,%r1) -c48d00000000| gnu lrl %r8,0xb40 -c48800000000| gnu lgrl %r8,0xb46 -c48c00000000| gnu lgfrl %r8,0xb4c +c48d00000000| gnu lrl %r8,0xb40 +c48800000000| gnu lgrl %r8,0xb46 +c48c00000000| gnu lgfrl %r8,0xb4c e382100b001f| gnu lrvh %r8,11(%r2,%r1) e382100b001e| gnu lrv %r8,11(%r2,%r1) - b91f0080| gnu lrvr %r8,%r0 + b91f0080| gnu lrvr %r8,%r0 e382100b000f| gnu lrvg %r8,11(%r2,%r1) - b90f0080| gnu lrvgr %r8,%r0 - b3450080| gnu ldxbr %f8,%f0 + b90f0080| gnu lrvgr %r8,%r0 + b3450080| gnu ldxbr %f8,%f0 b3453180| gnu ldxbra %f8,3,%f0,1 - b3dd3180| gnu ldxtr %f8,3,%f0,1 - 2580| gnu ldxr %f8,%f0 - 2580| gnu ldxr %f8,%f0 - b3460080| gnu lexbr %f8,%f0 + b3dd3180| gnu ldxtr %f8,3,%f0,1 + 2580| gnu ldxr %f8,%f0 + 2580| gnu ldxr %f8,%f0 + b3460080| gnu lexbr %f8,%f0 b3463180| gnu lexbra %f8,3,%f0,1 - b3660080| gnu lexr %f8,%f0 - b3440080| gnu ledbr %f8,%f0 + b3660080| gnu lexr %f8,%f0 + b3440080| gnu ledbr %f8,%f0 b3443180| gnu ledbra %f8,3,%f0,1 - b3d53180| gnu ledtr %f8,3,%f0,1 - 3580| gnu ledr %f8,%f0 - 3580| gnu ledr %f8,%f0 - b24b0080| gnu lura %r8,%r0 - b9050080| gnu lurag %r8,%r0 - b3760080| gnu lzxr %f8 - b3750080| gnu lzdr %f8 - b3740080| gnu lzer %f8 - b2470080| gnu msta %r8 - b232100b| gnu msch 11(%r1) - af082006| gnu mc 6(%r2),8 -e54420060008| gnu mvhhi 6(%r2),8 -e54c20060008| gnu mvhi 6(%r2),8 -e54820060008| gnu mvghi 6(%r2),8 + b3d53180| gnu ledtr %f8,3,%f0,1 + 3580| gnu ledr %f8,%f0 + 3580| gnu ledr %f8,%f0 + b24b0080| gnu lura %r8,%r0 + b9050080| gnu lurag %r8,%r0 + b3760080| gnu lzxr %f8 + b3750080| gnu lzdr %f8 + b3740080| gnu lzer %f8 + b2470080| gnu msta %r8 + b232100b| gnu msch 11(%r1) + af082006| gnu mc 6(%r2),8 +e54420060008| gnu mvhhi 6(%r2),8 +e54c20060008| gnu mvhi 6(%r2),8 +e54820060008| gnu mvghi 6(%r2),8 d2032006100b| gnu mvc 6(4,%r2),11(%r1) - 92082006| gnu mvi 6(%r2),8 -eb0820060052| gnu mviy 6(%r2),8 + 92082006| gnu mvi 6(%r2),8 +eb0820060052| gnu mviy 6(%r2),8 e8032006100b| gnu mvcin 6(4,%r2),11(%r1) - 0e80| gnu mvcl %r8,%r0 + 0e80| gnu mvcl %r8,%r0 a880100b| gnu mvcle %r8,%r0,11(%r1) eb80100b008e| gnu mvclu %r8,%r0,11(%r1) d1032006100b| gnu mvn 6(4,%r2),11(%r1) - b2540080| gnu mvpg %r8,%r0 + b2540080| gnu mvpg %r8,%r0 e50a2006100b| gnu mvcrl 6(%r2),11(%r1) - b2550080| gnu mvst %r8,%r0 + b2550080| gnu mvst %r8,%r0 da802006100b| gnu mvcp 6(%r8,%r2),11(%r1),%r0 db802006100b| gnu mvcs 6(%r8,%r2),11(%r1),%r0 e50f2006100b| gnu mvcdk 6(%r2),11(%r1) @@ -674,147 +674,147 @@ c8002006100b| gnu mvcos 6(%r2),11(%r1),%r0 e50e2006100b| gnu mvcsk 6(%r2),11(%r1) d3032006100b| gnu mvz 6(4,%r2),11(%r1) e382100b0084| gnu mg %r8,11(%r2,%r1) - b9ec0080| gnu mgrk %r8,%r0,%r0 - 5c82100b| gnu m %r8,11(%r2,%r1) + b9ec0080| gnu mgrk %r8,%r0,%r0 + 5c82100b| gnu m %r8,11(%r2,%r1) e382100b005c| gnu mfy %r8,11(%r2,%r1) - 1c80| gnu mr %r8,%r0 - b34c0080| gnu mxbr %f8,%f0 - b3d80080| gnu mxtr %f8,%f0,%f0 + 1c80| gnu mr %r8,%r0 + b34c0080| gnu mxbr %f8,%f0 + b3d80080| gnu mxtr %f8,%f0,%f0 b3d80180| gnu mxtra %f8,%f0,%f0,1 - 2680| gnu mxr %f8,%f0 + 2680| gnu mxr %f8,%f0 ed82100b001c| gnu mdb %f8,11(%r2,%r1) - b31c0080| gnu mdbr %f8,%f0 - b3d00080| gnu mdtr %f8,%f0,%f0 + b31c0080| gnu mdbr %f8,%f0 + b3d00080| gnu mdtr %f8,%f0,%f0 b3d00180| gnu mdtra %f8,%f0,%f0,1 6c82100b| gnu md %f8,11(%r2,%r1) - 2c80| gnu mdr %f8,%f0 + 2c80| gnu mdr %f8,%f0 ed82100b0007| gnu mxdb %f8,11(%r2,%r1) - b3070080| gnu mxdbr %f8,%f0 + b3070080| gnu mxdbr %f8,%f0 6782100b| gnu mxd %f8,11(%r2,%r1) - 2780| gnu mxdr %f8,%f0 + 2780| gnu mxdr %f8,%f0 ed82100b0017| gnu meeb %f8,11(%r2,%r1) - b3170080| gnu meebr %f8,%f0 + b3170080| gnu meebr %f8,%f0 ed82100b0037| gnu mee %f8,11(%r2,%r1) - b3370080| gnu meer %f8,%f0 + b3370080| gnu meer %f8,%f0 ed82100b000c| gnu mdeb %f8,11(%r2,%r1) - b30c0080| gnu mdebr %f8,%f0 + b30c0080| gnu mdebr %f8,%f0 7c82100b| gnu mde %f8,11(%r2,%r1) - 3c80| gnu mder %f8,%f0 + 3c80| gnu mder %f8,%f0 7c82100b| gnu mde %f8,11(%r2,%r1) - 3c80| gnu mder %f8,%f0 + 3c80| gnu mder %f8,%f0 ed02100b803a| gnu may %f8,%f0,11(%r2,%r1) - b33a8000| gnu mayr %f8,%f0,%f0 + b33a8000| gnu mayr %f8,%f0,%f0 ed02100b801e| gnu madb %f8,%f0,11(%r2,%r1) - b31e8000| gnu madbr %f8,%f0,%f0 + b31e8000| gnu madbr %f8,%f0,%f0 ed02100b803e| gnu mad %f8,%f0,11(%r2,%r1) - b33e8000| gnu madr %f8,%f0,%f0 + b33e8000| gnu madr %f8,%f0,%f0 ed02100b800e| gnu maeb %f8,%f0,11(%r2,%r1) - b30e8000| gnu maebr %f8,%f0,%f0 + b30e8000| gnu maebr %f8,%f0,%f0 ed02100b802e| gnu mae %f8,%f0,11(%r2,%r1) - b32e8000| gnu maer %f8,%f0,%f0 + b32e8000| gnu maer %f8,%f0,%f0 ed02100b803c| gnu mayh %f8,%f0,11(%r2,%r1) - b33c8000| gnu mayhr %f8,%f0,%f0 + b33c8000| gnu mayhr %f8,%f0,%f0 ed02100b8038| gnu mayl %f8,%f0,11(%r2,%r1) - b3388000| gnu maylr %f8,%f0,%f0 + b3388000| gnu maylr %f8,%f0,%f0 ed02100b801f| gnu msdb %f8,%f0,11(%r2,%r1) - b31f8000| gnu msdbr %f8,%f0,%f0 + b31f8000| gnu msdbr %f8,%f0,%f0 ed02100b803f| gnu msd %f8,%f0,11(%r2,%r1) - b33f8000| gnu msdr %f8,%f0,%f0 + b33f8000| gnu msdr %f8,%f0,%f0 ed02100b800f| gnu mseb %f8,%f0,11(%r2,%r1) - b30f8000| gnu msebr %f8,%f0,%f0 + b30f8000| gnu msebr %f8,%f0,%f0 ed02100b802f| gnu mse %f8,%f0,11(%r2,%r1) - b32f8000| gnu mser %f8,%f0,%f0 + b32f8000| gnu mser %f8,%f0,%f0 fc332006100b| gnu mp 6(4,%r2),11(4,%r1) 4c82100b| gnu mh %r8,11(%r2,%r1) e382100b007c| gnu mhy %r8,11(%r2,%r1) e382100b003c| gnu mgh %r8,11(%r2,%r1) - a78c0008| gnu mhi %r8,8 - a78d0008| gnu mghi %r8,8 + a78cfffd| gnu mhi %r8,-3 + a78dfffd| gnu mghi %r8,-3 e382100b0086| gnu mlg %r8,11(%r2,%r1) - b9860080| gnu mlgr %r8,%r0 + b9860080| gnu mlgr %r8,%r0 e382100b0096| gnu ml %r8,11(%r2,%r1) - b9960080| gnu mlr %r8,%r0 + b9960080| gnu mlr %r8,%r0 7182100b| gnu ms %r8,11(%r2,%r1) e382100b0053| gnu msc %r8,11(%r2,%r1) - b2520080| gnu msr %r8,%r0 - b9fd0080| gnu msrkc %r8,%r0,%r0 + b2520080| gnu msr %r8,%r0 + b9fd0080| gnu msrkc %r8,%r0,%r0 e382100b0051| gnu msy %r8,11(%r2,%r1) e382100b000c| gnu msg %r8,11(%r2,%r1) e382100b0083| gnu msgc %r8,11(%r2,%r1) - b90c0080| gnu msgr %r8,%r0 + b90c0080| gnu msgr %r8,%r0 b9ed0080| gnu msgrkc %r8,%r0,%r0 e382100b001c| gnu msgf %r8,11(%r2,%r1) - b91c0080| gnu msgfr %r8,%r0 -c28100000008| gnu msfi %r8,8 -c28000000008| gnu msgfi %r8,8 + b91c0080| gnu msgfr %r8,%r0 +c281ffffffff| gnu msfi %r8,-1 +c280ffffffff| gnu msgfi %r8,-1 ed02100b803d| gnu myh %f8,%f0,11(%r2,%r1) - b33d8000| gnu myhr %f8,%f0,%f0 + b33d8000| gnu myhr %f8,%f0,%f0 ed02100b8039| gnu myl %f8,%f0,11(%r2,%r1) - b3398000| gnu mylr %f8,%f0,%f0 + b3398000| gnu mylr %f8,%f0,%f0 ed02100b803b| gnu my %f8,%f0,11(%r2,%r1) - b33b8000| gnu myr %f8,%f0,%f0 - b9740080| gnu nnrk %r8,%r0,%r0 - b9640080| gnu nngrk %r8,%r0,%r0 - b93b0000| gnu nnpa - b2fa00c8| gnu niai 12,8 + b33b8000| gnu myr %f8,%f0,%f0 + b9740080| gnu nnrk %r8,%r0,%r0 + b9640080| gnu nngrk %r8,%r0,%r0 + b93b0000| gnu nnpa + b2fa00c8| gnu niai 12,8 e382100b0025| gnu ntstg %r8,11(%r2,%r1) - b9760080| gnu nork %r8,%r0,%r0 - b9660080| gnu nogrk %r8,%r0,%r0 - b9770080| gnu nxrk %r8,%r0,%r0 - b9670080| gnu nxgrk %r8,%r0,%r0 - 5682100b| gnu o %r8,11(%r2,%r1) - 1680| gnu or %r8,%r0 - b9f60080| gnu ork %r8,%r0,%r0 + b9760080| gnu nork %r8,%r0,%r0 + b9660080| gnu nogrk %r8,%r0,%r0 + b9770080| gnu nxrk %r8,%r0,%r0 + b9670080| gnu nxgrk %r8,%r0,%r0 + 5682100b| gnu o %r8,11(%r2,%r1) + 1680| gnu or %r8,%r0 + b9f60080| gnu ork %r8,%r0,%r0 e382100b0056| gnu oy %r8,11(%r2,%r1) e382100b0081| gnu og %r8,11(%r2,%r1) - b9810080| gnu ogr %r8,%r0 - b9e60080| gnu ogrk %r8,%r0,%r0 + b9810080| gnu ogr %r8,%r0 + b9e60080| gnu ogrk %r8,%r0,%r0 d6032006100b| gnu oc 6(4,%r2),11(%r1) - 96082006| gnu oi 6(%r2),8 -eb0820060056| gnu oiy 6(%r2),8 - a5880008| gnu oihh %r8,8 - a5890008| gnu oihl %r8,8 -c08c00000008| gnu oihf %r8,8 - a58a0008| gnu oilh %r8,8 - a58b0008| gnu oill %r8,8 -c08d00000008| gnu oilf %r8,8 - b9750080| gnu ocrk %r8,%r0,%r0 - b9650080| gnu ocgrk %r8,%r0,%r0 + 96ff2006| gnu oi 6(%r2),-1 +ebff20060056| gnu oiy 6(%r2),-1 + a588ffff| gnu oihh %r8,-1 + a589ffff| gnu oihl %r8,-1 +c08cffffffff| gnu oihf %r8,-1 + a58affff| gnu oilh %r8,-1 + a58bffff| gnu oill %r8,-1 +c08dffffffff| gnu oilf %r8,-1 + b9750080| gnu ocrk %r8,%r0,%r0 + b9650080| gnu ocgrk %r8,%r0,%r0 f2332006100b| gnu pack 6(4,%r2),11(4,%r1) e9032006100b| gnu pka 6(%r2),11(4,%r1) e1032006100b| gnu pku 6(%r2),11(4,%r1) - b22e0080| gnu pgin %r8,%r0 - b22f0080| gnu pgout %r8,%r0 - b92c0000| gnu pcc - b9280000| gnu pckmo - 010a| gnu pfpo - b9af0080| gnu pfmf %r8,%r0 + b22e0080| gnu pgin %r8,%r0 + b22f0080| gnu pgout %r8,%r0 + b92c0000| gnu pcc + b9280000| gnu pckmo + 010a| gnu pfpo + b9af0080| gnu pfmf %r8,%r0 ee80100b80cd| gnu plo %r8,11(%r1),%r0,205(%r8) - b2e83080| gnu ppa %r8,%r0,3 - b93c0080| gnu prno %r8,%r0 - b93c0080| gnu prno %r8,%r0 - 0104| gnu ptff - b9a20080| gnu ptf %r8 - b9e13080| gnu popcnt %r8,%r0,3 -e372100b0036| gnu pfd 7,11(%r2,%r1) -c67200000000| gnu pfdrl 7,0xe68 - b218100b| gnu pc 11(%r1) - 0101| gnu pr - b2280080| gnu pt %r8,%r0 - b99e0080| gnu pti %r8,%r0 - b2480000| gnu palb - b20d0000| gnu ptlb + b2e83080| gnu ppa %r8,%r0,3 + b93c0080| gnu prno %r8,%r0 + b93c0080| gnu prno %r8,%r0 + 0104| gnu ptff + b9a20080| gnu ptf %r8 + b9e13080| gnu popcnt %r8,%r0,3 +e372100b0036| gnu pfd 7,11(%r2,%r1) +c67200000000| gnu pfdrl 7,0xe68 + b218100b| gnu pc 11(%r1) + 0101| gnu pr + b2280080| gnu pt %r8,%r0 + b99e0080| gnu pti %r8,%r0 + b2480000| gnu palb + b20d0000| gnu ptlb b3fd0180| gnu qaxtr %f8,%f0,%f0,1 b3f50180| gnu qadtr %f8,%f0,%f0,1 - b28f100b| gnu qpaci 11(%r1) + b28f100b| gnu qpaci 11(%r1) b3ff0180| gnu rrxtr %f8,%f0,%r0,1 b3f70180| gnu rrdtr %f8,%f0,%r0,1 - b23b0000| gnu rchp - b98b0180| gnu rdp %r8,%r0,%r0,1 - b22a0080| gnu rrbe %r8,%r0 - b9ae0080| gnu rrbm %r8,%r0 - b277100b| gnu rp 11(%r1) - b2380000| gnu rsch + b23b0000| gnu rchp + b98b0180| gnu rdp %r8,%r0,%r0,1 + b22a0080| gnu rrbe %r8,%r0 + b9ae0080| gnu rrbm %r8,%r0 + b277100b| gnu rp 11(%r1) + b2380000| gnu rsch eb80100b001d| gnu rll %r8,%r0,11(%r1) eb80100b001c| gnu rllg %r8,%r0,11(%r1) ec8009691254| gnu rnsbg %r8,%r0,9,105,18 @@ -824,50 +824,50 @@ ec8009691259| gnu risbgn %r8,%r0,9,105,18 ec800969125d| gnu risbhg %r8,%r0,9,105,18 ec8009691251| gnu risblg %r8,%r0,9,105,18 ec8009691256| gnu rosbg %r8,%r0,9,105,18 - b25e0080| gnu srst %r8,%r0 - b9be0080| gnu srstu %r8,%r0 - b9f00180| gnu selro %r8,%r0,%r0 + b25e0080| gnu srst %r8,%r0 + b9be0080| gnu srstu %r8,%r0 + b9f00180| gnu selro %r8,%r0,%r0 b9e30180| gnu selgro %r8,%r0,%r0 b9c00180| gnu selfhro %r8,%r0,%r0 - b24e0080| gnu sar %a8,%r0 - b2370000| gnu sal - b219100b| gnu sac 11(%r1) - b279100b| gnu sacf 11(%r1) - 010c| gnu sam24 - 010d| gnu sam31 - 010e| gnu sam64 - b299100b| gnu srnm 11(%r1) - b2b8100b| gnu srnmb 11(%r1) - b23c0000| gnu schm - b204100b| gnu sck 11(%r1) - b206100b| gnu sckc 11(%r1) - 0107| gnu sckpf - b208100b| gnu spt 11(%r1) - b2b9100b| gnu srnmt 11(%r1) - b3840080| gnu sfpc %r8 - b3850080| gnu sfasr %r8 - b210100b| gnu spx 11(%r1) - 0480| gnu spm %r8 - b20a100b| gnu spka 11(%r1) - b2250080| gnu ssar %r8 - b99f0080| gnu ssair %r8 - b22b3080| gnu sske %r8,%r0,3 - 80002006| gnu ssm 6(%r2) + b24e0080| gnu sar %a8,%r0 + b2370000| gnu sal + b219100b| gnu sac 11(%r1) + b279100b| gnu sacf 11(%r1) + 010c| gnu sam24 + 010d| gnu sam31 + 010e| gnu sam64 + b299100b| gnu srnm 11(%r1) + b2b8100b| gnu srnmb 11(%r1) + b23c0000| gnu schm + b204100b| gnu sck 11(%r1) + b206100b| gnu sckc 11(%r1) + 0107| gnu sckpf + b208100b| gnu spt 11(%r1) + b2b9100b| gnu srnmt 11(%r1) + b3840080| gnu sfpc %r8 + b3850080| gnu sfasr %r8 + b210100b| gnu spx 11(%r1) + 0480| gnu spm %r8 + b20a100b| gnu spka 11(%r1) + b2250080| gnu ssar %r8 + b99f0080| gnu ssair %r8 + b22b3080| gnu sske %r8,%r0,3 + 80002006| gnu ssm 6(%r2) f0392006100b| gnu srp 6(4,%r2),11(%r1),9 - 8f80100b| gnu slda %r8,11(%r1) - 8d80100b| gnu sldl %r8,11(%r1) - 8b80100b| gnu sla %r8,11(%r1) + 8f80100b| gnu slda %r8,11(%r1) + 8d80100b| gnu sldl %r8,11(%r1) + 8b80100b| gnu sla %r8,11(%r1) eb80100b00dd| gnu slak %r8,%r0,11(%r1) eb80100b000b| gnu slag %r8,%r0,11(%r1) - 8980100b| gnu sll %r8,11(%r1) + 8980100b| gnu sll %r8,11(%r1) eb80100b00df| gnu sllk %r8,%r0,11(%r1) eb80100b000d| gnu sllg %r8,%r0,11(%r1) - 8e80100b| gnu srda %r8,11(%r1) - 8c80100b| gnu srdl %r8,11(%r1) - 8a80100b| gnu sra %r8,11(%r1) + 8e80100b| gnu srda %r8,11(%r1) + 8c80100b| gnu srdl %r8,11(%r1) + 8a80100b| gnu sra %r8,11(%r1) eb80100b00dc| gnu srak %r8,%r0,11(%r1) eb80100b000a| gnu srag %r8,%r0,11(%r1) - 8880100b| gnu srl %r8,11(%r1) + 8880100b| gnu srl %r8,11(%r1) eb80100b00de| gnu srlk %r8,%r0,11(%r1) eb80100b000c| gnu srlg %r8,%r0,11(%r1) ed02100b8048| gnu slxt %f8,%f0,11(%r2,%r1) @@ -875,18 +875,18 @@ ed02100b8040| gnu sldt %f8,%f0,11(%r2,%r1) ed02100b8049| gnu srxt %f8,%f0,11(%r2,%r1) ed02100b8041| gnu srdt %f8,%f0,11(%r2,%r1) ae80100b| gnu sigp %r8,%r0,11(%r1) - b9380080| gnu sortl %r8,%r0 - b3160080| gnu sqxbr %f8,%f0 - b3360080| gnu sqxr %f8,%f0 + b9380080| gnu sortl %r8,%r0 + b3160080| gnu sqxbr %f8,%f0 + b3360080| gnu sqxr %f8,%f0 ed82100b0015| gnu sqdb %f8,11(%r2,%r1) - b3150080| gnu sqdbr %f8,%f0 + b3150080| gnu sqdbr %f8,%f0 ed82100b0035| gnu sqd %f8,11(%r2,%r1) - b2440080| gnu sqdr %f8,%f0 + b2440080| gnu sqdr %f8,%f0 ed82100b0014| gnu sqeb %f8,11(%r2,%r1) - b3140080| gnu sqebr %f8,%f0 + b3140080| gnu sqebr %f8,%f0 ed82100b0034| gnu sqe %f8,11(%r2,%r1) - b2450080| gnu sqer %f8,%f0 - b233100b| gnu ssch 11(%r1) + b2450080| gnu sqer %f8,%f0 + b233100b| gnu ssch 11(%r1) 5082100b| gnu st %r8,11(%r2,%r1) e382100b0050| gnu sty %r8,11(%r2,%r1) e382100b0024| gnu stg %r8,11(%r2,%r1) @@ -896,32 +896,32 @@ ed82100b0067| gnu stdy %f8,11(%r2,%r1) ed82100b0066| gnu stey %f8,11(%r2,%r1) 9b80100b| gnu stam %a8,%a0,11(%r1) eb80100b009b| gnu stamy %a8,%a0,11(%r1) - b201100b| gnu stbear 11(%r1) - b23a100b| gnu stcps 11(%r1) - b239100b| gnu stcrw 11(%r1) + b201100b| gnu stbear 11(%r1) + b23a100b| gnu stcps 11(%r1) + b239100b| gnu stcrw 11(%r1) 4282100b| gnu stc %r8,11(%r2,%r1) e382100b0072| gnu stcy %r8,11(%r2,%r1) e382100b00c3| gnu stch %r8,11(%r2,%r1) eb83100b002c| gnu stcmh %r8,3,11(%r1) be83100b| gnu stcm %r8,3,11(%r1) eb83100b002d| gnu stcmy %r8,3,11(%r1) - b205100b| gnu stck 11(%r1) - b207100b| gnu stckc 11(%r1) - b278100b| gnu stcke 11(%r1) - b27c100b| gnu stckf 11(%r1) + b205100b| gnu stck 11(%r1) + b207100b| gnu stckc 11(%r1) + b278100b| gnu stcke 11(%r1) + b27c100b| gnu stckf 11(%r1) b680100b| gnu stctl %c8,%c0,11(%r1) eb80100b0025| gnu stctg %c8,%c0,11(%r1) - b212100b| gnu stap 11(%r1) - b202100b| gnu stidp 11(%r1) - b209100b| gnu stpt 11(%r1) - b2b1100b| gnu stfl 11(%r1) - b2b0100b| gnu stfle 11(%r1) - b29c100b| gnu stfpc 11(%r1) + b212100b| gnu stap 11(%r1) + b202100b| gnu stidp 11(%r1) + b209100b| gnu stpt 11(%r1) + b2b1100b| gnu stfl 11(%r1) + b2b0100b| gnu stfle 11(%r1) + b29c100b| gnu stfpc 11(%r1) e382100b0049| gnu stgsc %r8,11(%r2,%r1) 4082100b| gnu sth %r8,11(%r2,%r1) e382100b0070| gnu sthy %r8,11(%r2,%r1) e382100b00c7| gnu sthh %r8,11(%r2,%r1) -c48700000000| gnu sthrl %r8,0x109c +c48700000000| gnu sthrl %r8,0x109c e382100b00cb| gnu stfh %r8,11(%r2,%r1) eb83100b00e1| gnu stocfhnle %r8,11(%r1) 9080100b| gnu stm %r8,%r0,11(%r1) @@ -931,67 +931,67 @@ eb80100b0026| gnu stmh %r8,%r0,11(%r1) eb83100b00f3| gnu stocnle %r8,11(%r1) eb83100b00e3| gnu stocgnle %r8,11(%r1) e382100b008e| gnu stpq %r8,11(%r2,%r1) - b211100b| gnu stpx 11(%r1) + b211100b| gnu stpx 11(%r1) e5022006100b| gnu strag 6(%r2),11(%r1) -c48f00000000| gnu strl %r8,0x10e0 -c48b00000000| gnu stgrl %r8,0x10e6 +c48f00000000| gnu strl %r8,0x10e0 +c48b00000000| gnu stgrl %r8,0x10e6 e382100b003f| gnu strvh %r8,11(%r2,%r1) e382100b003e| gnu strv %r8,11(%r2,%r1) e382100b002f| gnu strvg %r8,11(%r2,%r1) - b234100b| gnu stsch 11(%r1) - b27d100b| gnu stsi 11(%r1) - ac082006| gnu stnsm 6(%r2),8 - ad082006| gnu stosm 6(%r2),8 - b2460080| gnu stura %r8,%r0 - b9250080| gnu sturg %r8,%r0 - 5b82100b| gnu s %r8,11(%r2,%r1) - 1b80| gnu sr %r8,%r0 - b9f90080| gnu srk %r8,%r0,%r0 + b234100b| gnu stsch 11(%r1) + b27d100b| gnu stsi 11(%r1) + ac082006| gnu stnsm 6(%r2),8 + ad082006| gnu stosm 6(%r2),8 + b2460080| gnu stura %r8,%r0 + b9250080| gnu sturg %r8,%r0 + 5b82100b| gnu s %r8,11(%r2,%r1) + 1b80| gnu sr %r8,%r0 + b9f90080| gnu srk %r8,%r0,%r0 e382100b005b| gnu sy %r8,11(%r2,%r1) e382100b0009| gnu sg %r8,11(%r2,%r1) - b9090080| gnu sgr %r8,%r0 - b9e90080| gnu sgrk %r8,%r0,%r0 + b9090080| gnu sgr %r8,%r0 + b9e90080| gnu sgrk %r8,%r0,%r0 e382100b0019| gnu sgf %r8,11(%r2,%r1) - b9190080| gnu sgfr %r8,%r0 - b34b0080| gnu sxbr %f8,%f0 - b3db0080| gnu sxtr %f8,%f0,%f0 + b9190080| gnu sgfr %r8,%r0 + b34b0080| gnu sxbr %f8,%f0 + b3db0080| gnu sxtr %f8,%f0,%f0 b3db0180| gnu sxtra %f8,%f0,%f0,1 ed82100b001b| gnu sdb %f8,11(%r2,%r1) - b31b0080| gnu sdbr %f8,%f0 - b3d30080| gnu sdtr %f8,%f0,%f0 + b31b0080| gnu sdbr %f8,%f0 + b3d30080| gnu sdtr %f8,%f0,%f0 b3d30180| gnu sdtra %f8,%f0,%f0,1 ed82100b000b| gnu seb %f8,11(%r2,%r1) - b30b0080| gnu sebr %f8,%f0 + b30b0080| gnu sebr %f8,%f0 fb332006100b| gnu sp 6(4,%r2),11(4,%r1) 4b82100b| gnu sh %r8,11(%r2,%r1) e382100b007b| gnu shy %r8,11(%r2,%r1) e382100b0039| gnu sgh %r8,11(%r2,%r1) - b9c90080| gnu shhhr %r8,%r0,%r0 - b9d90080| gnu shhlr %r8,%r0,%r0 + b9c90080| gnu shhhr %r8,%r0,%r0 + b9d90080| gnu shhlr %r8,%r0,%r0 5f82100b| gnu sl %r8,11(%r2,%r1) - 1f80| gnu slr %r8,%r0 - b9fb0080| gnu slrk %r8,%r0,%r0 + 1f80| gnu slr %r8,%r0 + b9fb0080| gnu slrk %r8,%r0,%r0 e382100b005f| gnu sly %r8,11(%r2,%r1) e382100b000b| gnu slg %r8,11(%r2,%r1) - b90b0080| gnu slgr %r8,%r0 - b9eb0080| gnu slgrk %r8,%r0,%r0 + b90b0080| gnu slgr %r8,%r0 + b9eb0080| gnu slgrk %r8,%r0,%r0 e382100b001b| gnu slgf %r8,11(%r2,%r1) - b91b0080| gnu slgfr %r8,%r0 + b91b0080| gnu slgfr %r8,%r0 b9cb0080| gnu slhhhr %r8,%r0,%r0 b9db0080| gnu slhhlr %r8,%r0,%r0 -c28500000008| gnu slfi %r8,8 -c28400000008| gnu slgfi %r8,8 +c28500000008| gnu slfi %r8,8 +c28400000008| gnu slgfi %r8,8 e382100b0099| gnu slb %r8,11(%r2,%r1) - b9990080| gnu slbr %r8,%r0 + b9990080| gnu slbr %r8,%r0 e382100b0089| gnu slbg %r8,11(%r2,%r1) - b9890080| gnu slbgr %r8,%r0 - 3780| gnu sxr %f8,%f0 + b9890080| gnu slbgr %r8,%r0 + 3780| gnu sxr %f8,%f0 6b82100b| gnu sd %f8,11(%r2,%r1) - 2b80| gnu sdr %f8,%f0 + 2b80| gnu sdr %f8,%f0 7b82100b| gnu se %f8,11(%r2,%r1) - 3b80| gnu ser %f8,%f0 + 3b80| gnu ser %f8,%f0 6f82100b| gnu sw %f8,11(%r2,%r1) - 2f80| gnu swr %f8,%f0 + 2f80| gnu swr %f8,%f0 7f82100b| gnu su %f8,11(%r2,%r1) 3f80| gnu sur %f8,%f0 0a7c| gnu svc 124 @@ -1032,7 +1032,7 @@ dd032006100b| gnu trt 6(4,%r2),11(%r1) b9bf3080| gnu trte %r8,%r0,3 d0032006100b| gnu trtr 6(4,%r2),11(%r1) b9bd3080| gnu trtre %r8,%r0,3 - b2a50080| gnu tre %r8,%r0 + b2a50080| gnu tre %r8,%r0 b9933080| gnu troo %r8,%r0,3 b9923080| gnu trot %r8,%r0,3 b9913080| gnu trto %r8,%r0,3 @@ -1136,10 +1136,10 @@ e722100b3801| gnu vleh %v18,11(%r2,%r1),3 e722100b3803| gnu vlef %v18,11(%r2,%r1),3 e722100b3802| gnu vleg %v18,11(%r2,%r1),3 e722100b3800| gnu vleb %v18,11(%r2,%r1),3 -e72000083841| gnu vleih %v18,8,3 -e72000083843| gnu vleif %v18,8,3 -e72000083842| gnu vleig %v18,8,3 -e72000083840| gnu vleib %v18,8,3 +e720ffff3841| gnu vleih %v18,-1,3 +e720ffff3843| gnu vleif %v18,-1,3 +e720ffff3842| gnu vleig %v18,-1,3 +e720ffff3840| gnu vleib %v18,-1,3 e622100b3807| gnu vlerg %v18,11(%r2,%r1) e723009138c7| gnu vfidb %v18,%v3,1,9 e785100b1021| gnu vlgvh %r8,%v5,11(%r1) diff --git a/s390x/s390xmap/map.go b/s390x/s390xmap/map.go index 9ba698f4..1adfdfbe 100644 --- a/s390x/s390xmap/map.go +++ b/s390x/s390xmap/map.go @@ -272,7 +272,7 @@ func computeMaskValueReserved(args Args, text string) (mask, value, reserved uin } func Imm_signed_8bit_check(op string) bool { - imm_8 := []string{"ASI", "AGSI", "ALSI", "ALGSI", "CIB", "CGIB", "CIJ", "CGIJ"} + imm_8 := []string{"ASI", "AGSI", "ALSI", "ALGSI", "CIB", "CGIB", "CIJ", "CGIJ", "NI", "NIY", "OI", "OIY", "XI", "XIY"} var ret bool ret = false for _, str := range imm_8 { @@ -285,7 +285,7 @@ func Imm_signed_8bit_check(op string) bool { } func Imm_signed_16bit_check(op string) bool { - imm_16 := []string{"AHI", "AGHI", "ALHSIK", "ALGHSIK", "AHIK", "AGHIK", "LHI", "LGHI", "MVGHI", "CIT", "CGIT", "CGHI", "CGHSI", "CHHSI", "CHI", "CHSI", "CRJ", "CGRJ"} + imm_16 := []string{"AHI", "AGHI", "ALHSIK", "ALGHSIK", "AHIK", "AGHIK", "LHI", "LGHI", "MVGHI", "CIT", "CGIT", "CGHI", "CGHSI", "CHHSI", "CHI", "CHSI", "CRJ", "CGRJ", "NIHH", "NILL", "NIHL", "NILH", "LLIHH", "LLILL", "LLIHL", "LLILH", "OIHH", "OILL", "OIHL", "OILH", "VLEIB", "VLEIH", "VLEIF", "VLEIG"} var ret bool ret = false for _, str := range imm_16 { @@ -298,7 +298,7 @@ func Imm_signed_16bit_check(op string) bool { } func Imm_signed_32bit_check(op string) bool { - imm_32 := []string{"AFI", "AGFI", "AIH", "CIH", "CFI", "CGFI", "CRL", "STRL", "STGRL"} + imm_32 := []string{"AFI", "AGFI", "AIH", "CIH", "CFI", "CGFI", "CRL", "STRL", "STGRL", "LGFI", "LLIHF", "LLILF", "MSFI", "MSGFI", "MGHI", "MHI", "NIHF", "NILF", "OILF", "OIHF", "XILF", "XIHF"} var ret bool ret = false for _, str := range imm_32 { @@ -326,7 +326,6 @@ func check_flags(flags string) bool { // detected instructions into p. One entry may generate multiple intruction // entries as each extended mnemonic listed in text is treated like a unique // instruction. -// func add(p *Prog, text, mnemonics, encoding, format string) { func add(p *Prog, text, mnemonics, encoding, flags string) { // Parse encoding, building size and offset of each field. // The first field in the encoding is the smallest offset. @@ -338,7 +337,6 @@ func add(p *Prog, text, mnemonics, encoding, flags string) { mask, value, dontCare := computeMaskValueReserved(args, text) // split mnemonics into individual instructions - // example: "b target_addr (AA=0 LK=0)|ba target_addr (AA=1 LK=0)|bl target_addr (AA=0 LK=1)|bla target_addr (AA=1 LK=1)" inst := Inst{Text: text, Encoding: mnemonics, Value: value, Mask: mask, DontCare: dontCare} // order inst.Args according to mnemonics order From 153a480e3389d31cbec417e0df8ae6a72cba4702 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Wed, 16 Oct 2024 08:15:10 +0200 Subject: [PATCH 034/200] s390x: add s390x disassembler support for the plan9 Change-Id: I0eb9a10535175bb70dbab5737c4e02e68fd44c94 Reviewed-on: https://go-review.googlesource.com/c/arch/+/620475 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Pratt Reviewed-by: Vishwanatha HD --- s390x/s390xasm/decode_test.go | 2 + s390x/s390xasm/plan9.go | 1282 ++++++++++++++++++++++++++++ s390x/s390xasm/testdata/decode.txt | 162 ++++ 3 files changed, 1446 insertions(+) create mode 100644 s390x/s390xasm/plan9.go create mode 100644 s390x/s390xasm/testdata/decode.txt diff --git a/s390x/s390xasm/decode_test.go b/s390x/s390xasm/decode_test.go index 5ca0b741..29bce8e8 100644 --- a/s390x/s390xasm/decode_test.go +++ b/s390x/s390xasm/decode_test.go @@ -75,6 +75,8 @@ func decode(data []byte, t *testing.T, filename string) { switch syntax { case "gnu": out = GNUSyntax(inst, pc) + case "plan9": + out = GoSyntax(inst, pc, nil) default: t.Errorf("unknown syntax %q", syntax) continue diff --git a/s390x/s390xasm/plan9.go b/s390x/s390xasm/plan9.go new file mode 100644 index 00000000..b4df0b89 --- /dev/null +++ b/s390x/s390xasm/plan9.go @@ -0,0 +1,1282 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s390xasm + +import ( + "fmt" + "strconv" + "strings" +) + +var vectorSize = map[int]string{0: "B", 1: "H", 2: "F", 3: "G", 4: "Q"} +var vectorCS = map[int]string{0: "BS", 1: "HS", 2: "FS", 3: "GS"} + +// GoSyntax returns the Go assembler syntax for the instruction. +// The syntax was originally defined by Plan 9. +// The inst relates to single instruction. +// The pc is the program counter of the instruction, used for +// expanding PC-relative addresses into absolute ones. +// The symname function queries the symbol table for the program +// being disassembled. Given a target address it returns the name +// and base address of the symbol containing the target, if any; +// otherwise it returns "", 0. +func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) string { + if symname == nil { + symname = func(uint64) (string, uint64) { return "", 0 } + } + + var args []string + opString := inst.Op.String() + op := strings.ToUpper(opString) + for i := 0; i < len(inst.Args); i++ { + if inst.Args[i] == nil { + break + } + switch inst.Args[i].(type) { + case Disp12, Disp20: + var temp []string + switch inst.Args[i+1].(type) { + case Index: // D(X,B) + for j := 0; j < 3; j++ { + temp = append(temp, plan9Arg(&inst, pc, symname, inst.Args[i+j])) + } + args = append(args, mem_operandx(temp)) + i = i + 2 + case Base: // D(B) + for j := 0; j < 2; j++ { + temp = append(temp, plan9Arg(&inst, pc, symname, inst.Args[i+j])) + } + args = append(args, mem_operand(temp)) + i = i + 1 + case VReg: // D(B) + for j := 0; j < 3; j++ { + temp = append(temp, plan9Arg(&inst, pc, symname, inst.Args[i+j])) + } + args = append(args, mem_operandv(temp)) + i = i + 2 + case Len: // D(L,B) + for j := 0; j < 3; j++ { + temp = append(temp, plan9Arg(&inst, pc, symname, inst.Args[i+j])) + } + ar1, ar2 := mem_operandl(temp) + args = append(args, ar1, ar2) + i = i + 2 + default: // D(R,B) + for j := 0; j < 3; j++ { + temp = append(temp, plan9Arg(&inst, pc, symname, inst.Args[i+j])) + } + args = append(args, mem_operandx(temp)) + i = i + 2 + } + default: + args = append(args, plan9Arg(&inst, pc, symname, inst.Args[i])) + } + } + if strings.HasPrefix(op, "V") || strings.Contains(op, "WFC") || strings.Contains(op, "WFK") { + args = args[:len(args)-1] + } + + switch inst.Op { + default: + switch len(args) { + case 0: + return op + case 1: + return fmt.Sprintf("%s %s", op, args[0]) + case 2: + if reverseOperandOrder(inst.Op) { + args[0], args[1] = args[1], args[0] + } + case 3: + if reverseOperandOrder(inst.Op) { + args[0], args[2] = args[2], args[0] + } else if reverseAllOperands(inst.Op) { + args[0], args[1], args[2] = args[1], args[2], args[0] + } + case 4: + if reverseOperandOrder(inst.Op) { + args[0], args[3] = args[3], args[0] + } else if reverseAllOperands(inst.Op) { + args[0], args[1], args[2], args[3] = args[1], args[2], args[3], args[0] + } + } + case LCGR, LCGFR: + switch inst.Op { + case LCGR: + op = "NEG" + case LCGFR: + op = "NEGW" + } + if args[0] == args[1] { + args = args[:1] + } else { + args[0], args[1] = args[1], args[0] + } + case LD, LE, LG, LGF, LLGF, LGH, LLGH, LGB, LLGC, LDY, LEY, LRVG, LRV, LRVH: + args[0], args[1] = args[1], args[0] + switch inst.Op { + case LG: + op = "MOVD" + case LGF: + op = "MOVW" + case LLGF: + op = "MOVWZ" + case LGH: + op = "MOVH" + case LLGH: + op = "MOVHZ" + case LGB: + op = "MOVB" + case LLGC: + op = "MOVBZ" + case LDY, LD: + op = "FMOVD" + case LEY, LE: + op = "FMOVS" + case LRVG: + op = "MOVDBR" + case LRV: + op = "MOVWBR" + case LRVH: + op = "MOVHBR" + } + case LA, LAY: + args[0], args[1] = args[1], args[0] + op = "MOVD" + + case LAA, LAAG, LAAL, LAALG, LAN, LANG, LAX, LAXG, LAO, LAOG: + args[0], args[1] = args[1], args[0] + case LM, LMY, LMG: // Load Multiple + switch inst.Op { + case LM, LMY: + op = "LMY" + } + args[0], args[1], args[2] = args[2], args[0], args[1] + + case STM, STMY, STMG: // Store Multiple + switch inst.Op { + case STM, STMY: + op = "STMY" + } + case ST, STY, STG, STHY, STCY, STRVG, STRV: + switch inst.Op { + case ST, STY: + op = "MOVW" + case STHY: + op = "MOVH" + case STCY: + op = "MOVB" + case STG: + op = "MOVD" + case STRVG: + op = "MOVDBR" + case STRV: + op = "MOVWBR" + } + case LGR, LGFR, LGHR, LGBR, LLGFR, LLGHR, LLGCR, LRVGR, LRVR, LDR: + switch inst.Op { + case LGR: + op = "MOVD" + case LGFR: + op = "MOVW" + case LGHR: + op = "MOVH" + case LGBR: + op = "MOVB" + case LLGFR: + op = "MOVWZ" + case LLGHR: + op = "MOVHZ" + case LLGCR: + op = "MOVBZ" + case LRVGR: + op = "MOVDBR" + case LRVR: + op = "MOVWBR" + case LDR: + op = "FMOVD" + } + args[0], args[1] = args[1], args[0] + case LZDR: + op = "FMOVD" + return op + " " + "$0" + ", " + args[0] + case LZER: + op = "FMOVS" + return op + " " + "$0" + ", " + args[0] + case STD, STDY, STE, STEY: + switch inst.Op { + case STD, STDY: + op = "FMOVD" + case STE, STEY: + op = "FMOVS" + } + + case LGHI, LLILH, LLIHL, LLIHH, LGFI, LLILF, LLIHF: + switch inst.Op { + case LGFI: + op = "MOVW" + case LGHI: + num, err := strconv.ParseInt(args[1][1:], 10, 16) + if err != nil { + return fmt.Sprintf("plan9Arg: error in converting ParseInt:%s", err) + } + if num == int64(int8(num)) { + op = "MOVB" + } else { + op = "MOVH" + } + default: + op = "MOVD" + } + args[0], args[1] = args[1], args[0] + case ARK, AGRK, ALGRK: + switch inst.Op { + case ARK: + op = "ADDW" + case AGRK: + op = "ADD" + case ALGRK: + op = "ADDC" + } + if args[0] == args[1] { + args[0], args[1] = args[2], args[0] + args = args[:2] + } else { + args[0], args[1], args[2] = args[2], args[1], args[0] + } + case AGHIK, AHIK, ALGHSIK: + num, err := strconv.ParseInt(args[2][1:], 10, 32) + if err != nil { + return fmt.Sprintf("plan9Arg: error in converting ParseInt:%s", err) + } + switch inst.Op { + case AGHIK: + if num < 0 { + op = "SUB" + args[2] = args[2][:1] + args[2][2:] + } else { + op = "ADD" + } + case AHIK: + op = "ADDW" + case ALGHSIK: + if num < 0 { + op = "SUBC" + args[2] = args[2][:1] + args[2][2:] + } else { + op = "ADDC" + } + } + args[0], args[1], args[2] = args[2], args[1], args[0] + case AGHI, AHI, AGFI, AFI, AR, ALCGR: + num, err := strconv.ParseInt(args[1][1:], 10, 32) + if err != nil { + return fmt.Sprintf("plan9Arg: error in converting ParseInt:%s", err) + } + switch inst.Op { + case AGHI, AGFI: + if num < 0 { + op = "SUB" + args[1] = args[1][:1] + args[1][2:] + } else { + op = "ADD" + } + case AHI, AFI, AR: + op = "ADDW" + case ALCGR: + op = "ADDE" + } + args[0], args[1] = args[1], args[0] + case AEBR, ADBR, DDBR, DEBR, MDBR, MEEBR, SDBR, SEBR, LPDBR, LNDBR, LPDFR, LNDFR, LCDFR, LCEBR, LEDBR, LDEBR, SQDBR, SQEBR: + switch inst.Op { + case AEBR: + op = "FADDS" + case ADBR: + op = "FADD" + case DDBR: + op = "FDIV" + case DEBR: + op = "FDIVS" + case MDBR: + op = "FMUL" + case MEEBR: + op = "FMULS" + case SDBR: + op = "FSUB" + case SEBR: + op = "FSUBS" + case LPDBR: + op = "FABS" + case LNDBR: + op = "FNABS" + case LCDFR: + op = "FNEG" + case LCEBR: + op = "FNEGS" + case SQDBR: + op = "FSQRT" + case SQEBR: + op = "FSQRTS" + } + args[0], args[1] = args[1], args[0] + case SR, SGR, SLGR, SLFI: + switch inst.Op { + case SR, SLFI: + op = "SUBW" + case SGR: + op = "SUB" + case SLGR: + op = "SUBC" + } + args[0], args[1] = args[1], args[0] + case SGRK, SLGRK, SRK: + switch inst.Op { + case SGRK: + op = "SUB" + case SLGRK: + op = "SUBC" + case SRK: + op = "SUBW" + } + if args[0] == args[1] { + args[0], args[1] = args[2], args[0] + args = args[:2] + } else { + args[0], args[1], args[2] = args[2], args[1], args[0] + } + case SLBGR: + op = "SUBE" + args[0], args[1] = args[1], args[0] + case MSGFR, MHI, MSFI, MSGFI: + switch inst.Op { + case MSGFR, MHI, MSFI: + op = "MULLW" + case MSGFI: + op = "MULLD" + } + args[0], args[1] = args[1], args[0] + + case NGR, NR, NILL, NILF, NILH, OGR, OR, OILL, OILF, OILH, XGR, XR, XILF: + op = bitwise_op(inst.Op) + args[0], args[1] = args[1], args[0] + switch inst.Op { + case NILL: + if int(inst.Args[1].(Sign16)) < 0 { + op = "ANDW" + } + + case NILF: + if int(inst.Args[1].(Sign32)) < 0 { + op = "AND" + } + case OILF: + if int(inst.Args[1].(Sign32)) < 0 { + op = "ORW" + } + case XILF: + if int(inst.Args[1].(Sign32)) < 0 { + op = "XORW" + } + } + + case NGRK, NRK, OGRK, ORK, XGRK, XRK: // opcode R1, R2, R3 + op = bitwise_op(inst.Op) + args[0], args[1], args[2] = args[1], args[2], args[0] + case SLLG, SRLG, SLLK, SRLK, RLL, RLLG, SRAK, SRAG: + switch inst.Op { + case SLLG: + op = "SLD" + case SRLG: + op = "SRD" + case SLLK: + op = "SLW" + case SRLK: + op = "SRW" + case SRAK: + op = "SRAW" + case SRAG: + op = "SRAD" + } + args[0], args[1], args[2] = args[2], args[1], args[0] + case TRAP2, SVC: + op = "SYSALL" + case CR, CLR, CGR, CLGR, KDBR, CDBR, CEBR, CGHI, CHI, CGFI, CLGFI, CFI, CLFI: + switch inst.Op { + case CGHI, CGFI, CGR: + op = "CMP" + case CHI, CFI, CR: + op = "CMPW" + case CLGFI, CLGR: + op = "CMPU" + case CLFI, CLR: + op = "CMPWU" + case CDBR: + op = "FCMPU" + case KDBR: + op = "FCMPO" + } + case CEFBRA, CDFBRA, CEGBRA, CDGBRA, CELFBR, CDLFBR, CELGBR, CDLGBR, CFEBRA, CFDBRA, CGEBRA, CGDBRA, CLFEBR, CLFDBR, CLGEBR, CLGDBR: + args[0], args[1] = args[2], args[0] + args = args[:2] + case CGRJ, CGIJ: + mask, err := strconv.Atoi(args[2][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + var check bool + switch mask & 0xf { + case 2: + op = "CMPBGT" + check = true + case 4: + op = "CMPBLT" + check = true + case 6: + op = "CMPBNE" + check = true + case 8: + op = "CMPBEQ" + check = true + case 10: + op = "CMPBGE" + check = true + case 12: + op = "CMPBLE" + check = true + } + if check { + args[2] = args[3] + args = args[:3] + } + case CLGRJ, CLGIJ: + mask, err := strconv.Atoi(args[2][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + var check bool + switch mask & 0xf { + case 2: + op = "CMPUBGT" + check = true + case 4: + op = "CMPUBLT" + check = true + case 7: + op = "CMPUBNE" + check = true + case 8: + op = "CMPUBEQ" + check = true + case 10: + op = "CMPUBGE" + check = true + case 12: + op = "CMPUBLE" + check = true + } + if check { + args[2] = args[3] + args = args[:3] + } + case CLRJ, CRJ, CIJ, CLIJ: + args[0], args[1], args[2], args[3] = args[2], args[0], args[1], args[3] + case BRC, BRCL: + mask, err := strconv.Atoi(args[0][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + opStr, check := branch_relative_op(mask, inst.Op) + if opStr != "" { + op = opStr + } + if check { + args[0] = args[1] + args = args[:1] + } + case BCR: + mask, err := strconv.Atoi(args[0][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + opStr, check := branchOnConditionOp(mask, inst.Op) + if opStr != "" { + op = opStr + } + if op == "SYNC" || op == "NOPH" { + return op + } + if check { + args[0] = args[1] + args = args[:1] + } + case LOCGR: + mask, err := strconv.Atoi(args[2][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + var check bool + switch mask & 0xf { + case 2: //Greaterthan (M=2) + op = "MOVDGT" + check = true + case 4: //Lessthan (M=4) + op = "MOVDLT" + check = true + case 7: // Not Equal (M=7) + op = "MOVDNE" + check = true + case 8: // Equal (M=8) + op = "MOVDEQ" + check = true + case 10: // Greaterthan or Equal (M=10) + op = "MOVDGE" + check = true + case 12: // Lessthan or Equal (M=12) + op = "MOVDLE" + check = true + } + if check { + args[0], args[1] = args[1], args[0] + args = args[:2] + } else { + args[0], args[1], args[2] = args[2], args[1], args[0] + } + case BRASL: + op = "CALL" // BL + args[0] = args[1] + args = args[:1] + case X, XY, XG: + switch inst.Op { + case X, XY: + op = "XORW" + case XG: + op = "XOR" + } + case N, NY, NG, O, OY, OG, XC, NC, OC, MVC, MVCIN, CLC: + switch inst.Op { + case N, NY: + op = "ANDW" + case NG: + op = "AND" + case O, OY: + op = "ORW" + case OG: + op = "OR" + } + args[0], args[1] = args[1], args[0] + case S, SY, SLBG, SLG, SG: + switch inst.Op { + case S, SY: + op = "SUBW" + case SLBG: + op = "SUBE" + case SLG: + op = "SUBC" + case SG: + op = "SUB" + } + args[0], args[1] = args[1], args[0] + case MSG, MSY, MS: + switch inst.Op { + case MSG: + op = "MULLD" + case MSY, MS: + op = "MULLW" + } + case A, AY, ALCG, ALG, AG: + switch inst.Op { + case A, AY: + op = "ADDW" + case ALCG: + op = "ADDE" + case ALG: + op = "ADDC" + case AG: + op = "ADD" + } + args[0], args[1] = args[1], args[0] + case RISBG, RISBGN, RISBHG, RISBLG, RNSBG, RXSBG, ROSBG: + switch inst.Op { + case RNSBG, RXSBG, ROSBG: + num, err := strconv.Atoi(args[2][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + if ((num >> 7) & 0x1) != 0 { + op = op + "T" + } + case RISBG, RISBGN, RISBHG, RISBLG: + num, err := strconv.Atoi(args[3][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + if ((num >> 7) & 0x1) != 0 { + op = op + "Z" + } + } + if len(args) == 5 { + args[0], args[1], args[2], args[3], args[4] = args[2], args[3], args[4], args[1], args[0] + } else { + args[0], args[1], args[2], args[3] = args[2], args[3], args[1], args[0] + } + + case VEC, VECL, VCLZ, VCTZ, VREPI, VPOPCT: //mnemonic V1, V2, M3 + mask, err := strconv.Atoi(args[2][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi for %q:%s", op, err) + } + val := mask & 0x7 + if val >= 0 && val < 4 { + op = op + vectorSize[val] + args = args[:2] + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask) + } + switch inst.Op { + case VCLZ, VCTZ, VREPI, VPOPCT: + args[0], args[1] = args[1], args[0] + default: + } + //Mnemonic V1, V2, V3, M4 or Mnemonic V1, I2, I3, M4 or Mnemonic V1, V3, I2, M4 + case VA, VS, VACC, VAVG, VAVGL, VMX, VMXL, VMN, VMNL, VGFM, VGM, VREP, VERLLV, VESLV, VSCBI, VSUM, VSUMG, VSUMQ, VMH, VMLH, VML, VME, VMLE, VMO, VMLO: + mask, err := strconv.Atoi(args[3][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + val := mask & 0x7 + switch inst.Op { + case VA, VS, VACC, VSCBI: + if val >= 0 && val < 5 { + if args[0] == args[2] { + args[0], args[1] = args[1], args[0] + args = args[:2] + } else if inst.Op == VS { + if args[0] == args[1] { + args[0] = args[2] + args = args[:2] + } else { + args[0], args[2] = args[2], args[0] + args = args[:3] + } + } else { + args[0], args[1], args[2] = args[1], args[2], args[0] + args = args[:3] + } + op = op + vectorSize[val] + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask) + } + case VAVG, VAVGL, VMX, VMXL, VMN, VMNL, VGFM, VGM: + if val >= 0 && val < 4 { + op = op + vectorSize[val] + args[0], args[1], args[2] = args[1], args[2], args[0] + args = args[:3] + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask) + } + case VREP, VERLLV, VESLV: + if val >= 0 && val < 4 { + op = op + vectorSize[val] + args[0], args[1], args[2] = args[2], args[1], args[0] + args = args[:3] + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask) + } + case VSUM, VSUMG, VSUMQ: + var off int + switch inst.Op { + case VSUM: + off = 0 + case VSUMG: + off = 1 + case VSUMQ: + off = 2 + } + if (val > (-1 + off)) && (val < (2 + off)) { + op = op + vectorSize[val] + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask) + } + args = args[:3] + case VML, VMH, VMLH, VME, VMLE, VMO, VMLO: + if val >= 0 && val < 3 { + op = op + vectorSize[val] + } + if op == "VML" && val == 2 { + op = op + "W" + } + if args[0] == args[2] { + args[0], args[1] = args[1], args[0] + args = args[:2] + } else { + args[0], args[1], args[2] = args[1], args[2], args[0] + args = args[:3] + } + } + + case VGFMA, VERIM, VMAH, VMALH: // Mnemonic V1, V2, V3, V4/I4, M5 + mask, err := strconv.Atoi(args[4][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + val := mask & 0x7 + args = args[:4] + var off int + switch inst.Op { + case VMAH, VMALH: + off = -1 + } + + if val >= 0 && val < (4+off) { + op = op + vectorSize[val] + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask) + } + switch inst.Op { + case VGFMA, VMAH, VMALH: + args[0], args[1], args[2], args[3] = args[1], args[2], args[3], args[0] + default: + args[0], args[3] = args[3], args[0] + } + case VSTRC, VFAE, VFEE, VFENE: + var off uint8 + switch inst.Op { + case VSTRC: + off = uint8(1) + default: + off = uint8(0) + } + m1, err := strconv.Atoi(args[3+off][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + m2, err := strconv.Atoi(args[4+off][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + index := m1 & 0x3 + if index < 0 || index > 2 { + return fmt.Sprintf("specification exception is recognized for %q with mask values: %v, %v \n", op, m1, m2) + } + switch m2 { + case 0: + op = op + vectorSize[index] + case 1: + op = op + vectorCS[index] + case 2: + op = op + "Z" + vectorSize[index] + case 3: + op = op + "Z" + vectorCS[index] + default: + return fmt.Sprintf("specification exception is recognized for %q with mask values: %v, %v \n", op, m1, m2) + } + switch inst.Op { + case VSTRC: + args[0], args[1], args[2], args[3] = args[1], args[2], args[3], args[0] + default: + args[0], args[1], args[2] = args[1], args[2], args[0] + } + args = args[:3+off] + + case VCEQ, VCH, VCHL: // Mnemonic V1, V2, V3, M4, M5 + m4, err := strconv.Atoi(args[3][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err) + } + m5, err := strconv.Atoi(args[4][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err) + } + val := (m4 & 0x7) + if m5 == 0 { + if val >= 0 && val < 4 { + op = op + vectorSize[val] + args[0], args[1], args[2] = args[1], args[2], args[0] + args = args[:3] + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask(m4) value: %v \n", op, m4) + } + } else if m5 == 1 { + if val >= 0 && val < 4 { + op = op + vectorCS[val] + args[0], args[1], args[2] = args[1], args[2], args[0] + args = args[:3] + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask(m4) value: %v \n", op, m4) + } + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask(m5) value: %v \n", op, m5) + } + case VFMA, VFMS, VMSL: //Mnemonic V1, V2, V3, V4, M5, M6 + m5, err := strconv.Atoi(args[4][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err) + } + m6, err := strconv.Atoi(args[5][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err) + } + switch inst.Op { + case VMSL: + if m5 == 3 && m6 == 8 { + op = op + "EG" + } else if m5 == 3 && m6 == 4 { + op = op + "OG" + } else if m5 == 3 && m6 == 12 { + op = op + "EOG" + } else if m5 == 3 { + op = op + "G" + } + default: + if m5 == 0 && m6 == 3 { + op = op + "DB" + } else if m5 == 8 && m6 == 3 { + op = "W" + op[1:] + "DB" + } else { + return fmt.Sprintf("specification exception is recognized for %q with m5: %v m6: %v \n", op, m5, m6) + } + } + args[0], args[1], args[2], args[3] = args[1], args[2], args[3], args[0] + args = args[:4] + + case VFCE, VFCH, VFCHE: //Mnemonic V1,V2,V3,M4,M5,M6 + m4, err := strconv.Atoi(args[3][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err) + } + m5, err := strconv.Atoi(args[4][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err) + } + m6, err := strconv.Atoi(args[5][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err) + } + if m5 == 0 { + if m4 == 3 && m6 == 0 { + op = op + "DB" + } else if m4 == 3 && m6 == 1 { + op = op + "DBS" + } else { + return fmt.Sprintf("specification exception is recognized for %q with m4: %v, m6: %v \n", op, m4, m6) + } + + } else if m5 == 8 { + if m4 == 3 && m6 == 0 { + op = "W" + op[1:] + "DB" + } else if m4 == 3 && m6 == 1 { + op = "W" + op[1:] + "DBS" + } else { + return fmt.Sprintf("specification exception is recognized for %q with m4: %v, m6: %v \n", op, m4, m6) + } + } else { + return fmt.Sprintf("specification exception is recognized for %q with m5: %v \n", op, m5) + } + args[0], args[1], args[2] = args[1], args[2], args[0] + args = args[:3] + + case VFTCI: + m4, err := strconv.Atoi(args[3][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err) + } + m5, err := strconv.Atoi(args[4][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err) + } + val := (m4 & 0x7) + if m5 == 0 { + switch val { + case 2: + op = op + "SB" + case 3: + op = op + "DB" + default: + return fmt.Sprintf("specification exception is recognized for %q with mask(m4) value: %v \n", op, m4) + } + } else if m5 == 8 { + switch val { + case 2: + op = "W" + op[1:] + "SB" + case 3: + op = "W" + op[1:] + "DB" + case 4: + op = "W" + op[1:] + "XB" + default: + return fmt.Sprintf("specification exception is recognized for %q with mask(m4) value: %v \n", op, m4) + } + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask(m5) value: %v \n", op, m5) + } + args[0], args[1], args[2] = args[2], args[1], args[0] + args = args[:3] + case VAC, VACCC: + mask, err := strconv.Atoi(args[4][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + if mask&0x04 == 0 { + return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask) + } + op = op + "Q" + args[0], args[1], args[2], args[3] = args[1], args[2], args[3], args[0] + args = args[:4] + case VL, VLREP: + switch inst.Op { + case VL: + args[0], args[1] = args[1], args[0] + case VLREP: + args[0], args[1] = args[1], args[0] + mask, err := strconv.Atoi(args[2][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + if mask >= 0 && mask < 4 { + op = op + vectorSize[mask] + } + } + args = args[:2] + case VST, VSTEB, VSTEH, VSTEF, VSTEG, VLEB, VLEH, VLEF, VLEG: //Mnemonic V1, D2(X2,B2), M3 + m, err := strconv.Atoi(args[2][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + switch inst.Op { + case VST: + if m == 0 || (m > 2 && m < 5) { + args = args[:2] + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, m) + } + case VLEB, VLEH, VLEF, VLEG: + args[0], args[2] = args[2], args[0] + default: + args[0], args[1], args[2] = args[2], args[0], args[1] + } + case VSTM, VSTL, VESL, VESRA, VLM, VERLL, VLVG: //Mnemonic V1, V3, D2(B2)[,M4] or V1, R3,D2(B2) + switch inst.Op { + case VSTM, VLM: + m, err := strconv.Atoi(args[3][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + if !(m == 0 || (m > 2 && m < 5)) { + return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, m) + } + if inst.Op == VLM { + args[0], args[1], args[2] = args[2], args[0], args[1] + } + args = args[:3] + case VESL, VESRA, VERLL, VLVG: + m, err := strconv.Atoi(args[3][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + if m >= 0 && m < 4 { + op = op + vectorSize[m] + } else { + return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, m) + } + switch inst.Op { + case VLVG: + args[0], args[2] = args[2], args[0] + args = args[:3] + default: + if args[0] == args[1] { + args[0], args[1] = args[2], args[1] + args = args[:2] + break + } + args[0], args[2] = args[2], args[0] + args = args[:3] + } + case VSTL: + args[0], args[1] = args[1], args[0] + args = args[:3] + } + case VGBM: + val, err := strconv.Atoi(args[1][1:]) + if err != nil { + return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) + } + if val == 0 { + op = "VZERO" + args = args[:1] + } else if val == 0xffff { + op = "VONE" + args = args[:1] + } else { + args[0], args[1] = args[1], args[0] + args = args[:2] + } + case VN, VNC, VO, VX, VNO: //mnemonic V1, V2, V3 + if args[0] == args[2] { + args = args[:2] + args[0], args[1] = args[1], args[0] + } else { + args[0], args[1], args[2] = args[1], args[2], args[0] + } + if op == "VNO" { + op = op + "T" + } + case VGEG, VGEF, VSCEG, VSCEF: //Mnemonic V1, D2(V2, B2), M3 + args[0], args[2] = args[2], args[0] + + } + if args != nil { + op += " " + strings.Join(args, ", ") + } + + return op +} + +// This function returns corresponding extended mnemonic for the given +// branch on relative mnemonic. +func branch_relative_op(mask int, opconst Op) (op string, check bool) { + switch mask & 0xf { + case 2: + op = "BGT" + check = true + case 4: + op = "BLT" + check = true + case 5: + op = "BLTU" + check = true + case 7: + op = "BNE" + check = true + case 8: + op = "BEQ" + check = true + case 10: + op = "BGE" + check = true + case 12: + op = "BLE" + check = true + case 13: + op = "BLEU" + check = true + case 15: + op = "JMP" // BR + check = true + } + return op, check +} + +// This function returns corresponding extended mnemonic for the given +// brach on condition mnemonic. +func branchOnConditionOp(mask int, opconst Op) (op string, check bool) { + switch mask & 0xf { + case 0: + op = "NOPH" + case 14: + op = "SYNC" + case 15: + op = "JMP" + check = true + } + return op, check +} + +// This function returns corresponding plan9 mnemonic for the native bitwise mnemonic. +func bitwise_op(op Op) string { + var ret string + switch op { + case NGR, NGRK, NILL: + ret = "AND" + case NR, NRK, NILH, NILF: + ret = "ANDW" + case OGR, OGRK, OILF: + ret = "OR" + case OR, ORK, OILH, OILL: + ret = "ORW" + case XGR, XGRK, XILF: + ret = "XOR" + case XR, XRK: + ret = "XORW" + } + return ret +} + +// This function parses memory operand of type D(B) +func mem_operand(args []string) string { + if args[0] != "" && args[1] != "" { + args[0] = fmt.Sprintf("%s(%s)", args[0], args[1]) + } else if args[0] != "" { + args[0] = fmt.Sprintf("$%s", args[0]) + } else if args[1] != "" { + args[0] = fmt.Sprintf("(%s)", args[1]) + } else { + args[0] = "" + } + return args[0] +} + +// This function parses memory operand of type D(X,B) +func mem_operandx(args []string) string { + if args[1] != "" && args[2] != "" { + args[1] = fmt.Sprintf("(%s)(%s*1)", args[2], args[1]) + } else if args[1] != "" { + args[1] = fmt.Sprintf("(%s)", args[1]) + } else if args[2] != "" { + args[1] = fmt.Sprintf("(%s)", args[2]) + } else if args[0] != "" { + args[1] = "" + } + if args[0] != "" && args[1] != "" { + args[0] = fmt.Sprintf("%s%s", args[0], args[1]) + } else if args[0] != "" { + args[0] = fmt.Sprintf("$%s", args[0]) + } else if args[1] != "" { + args[0] = fmt.Sprintf("%s", args[1]) + } else { + args[0] = "" + } + return args[0] +} + +// This function parses memory operand of type D(V,B) +func mem_operandv(args []string) string { + if args[1] != "" && args[2] != "" { + args[1] = fmt.Sprintf("(%s)(%s*1)", args[2], args[1]) + } else if args[1] != "" { + args[1] = fmt.Sprintf("(%s*1)", args[1]) + } else if args[2] != "" { + args[1] = fmt.Sprintf("(%s)", args[2]) + } else if args[0] != "" { + args[1] = "" + } + if args[0] != "" && args[1] != "" { + args[0] = fmt.Sprintf("%s%s", args[0], args[1]) + } else if args[0] != "" { + args[0] = fmt.Sprintf("$%s", args[0]) + } else if args[1] != "" { + args[0] = fmt.Sprintf("%s", args[1]) + } else { + args[0] = "" + } + return args[0] +} + +// This function parses memory operand of type D(L,B) +func mem_operandl(args []string) (string, string) { + if args[0] != "" && args[2] != "" { + args[0] = fmt.Sprintf("%s(%s)", args[0], args[2]) + } else if args[2] != "" { + args[0] = fmt.Sprintf("(%s)", args[2]) + } else { + args[0] = fmt.Sprintf("%s", args[0]) + } + return args[0], args[1] +} + +// plan9Arg formats arg (which is the argIndex's arg in inst) according to Plan 9 rules. +// NOTE: because Plan9Syntax is the only caller of this func, and it receives a copy +// of inst, it's ok to modify inst.Args here. +func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string { + switch arg.(type) { + case Reg: + if arg == R13 { + return "g" + } + return strings.ToUpper(arg.String(pc)[1:]) + case Base: + if arg == R13 { + return "g" + } + s := arg.String(pc) + if s != "" { + return strings.ToUpper(s[1 : len(s)-1]) + } + return "R0" + case Index: + if arg == R13 { + return "g" + } + s := arg.String(pc) + if s != "" { + return strings.ToUpper(s[1:]) + } + return "" + case VReg: + return strings.ToUpper(arg.String(pc)[1:]) + case Disp20, Disp12: + numstr := arg.String(pc) + num, err := strconv.Atoi(numstr[:len(numstr)]) + if err != nil { + return fmt.Sprintf("plan9Arg: error in converting Atoi:%s", err) + } + if num == 0 { + return "" + } else { + return strconv.Itoa(num) + } + case RegIm12, RegIm16, RegIm24, RegIm32: + addr, err := strconv.ParseUint(arg.String(pc)[2:], 16, 64) + if err != nil { + return fmt.Sprintf("plan9Arg: error in converting ParseUint:%s", err) + } + off := int(addr - pc) + s, base := symname(addr) + if s != "" && addr == base { + return fmt.Sprintf("%s(SB)", s) + } + off = off / inst.Len + return fmt.Sprintf("%v(PC)", off) + case Imm, Sign8, Sign16, Sign32: + numImm := arg.String(pc) + switch arg.(type) { + case Sign32, Sign16, Imm: + num, err := strconv.ParseInt(numImm, 10, 64) + if err != nil { + return fmt.Sprintf("plan9Arg: error in converting ParseInt:%s", err) + } + switch inst.Op { + case LLIHF: + num = num << 32 + case LLILH: + num = num << 16 + case NILH: + num = (num << 16) | int64(0xFFFF) + case OILH: + num = num << 16 + } + numImm = fmt.Sprintf("%d", num) + } + return fmt.Sprintf("$%s", numImm) + case Mask, Len: + num := arg.String(pc) + return fmt.Sprintf("$%s", num) + } + return fmt.Sprintf("???(%v)", arg) +} + +// It checks any 2 args of given instructions to swap or not +func reverseOperandOrder(op Op) bool { + switch op { + case LOCR, MLGR: + return true + case LTEBR, LTDBR: + return true + case VLEIB, VLEIH, VLEIF, VLEIG, VPDI: + return true + case VSLDB: + return true + } + return false +} + +// It checks whether to reverse all the args of given mnemonic or not +func reverseAllOperands(op Op) bool { + switch op { + case VLVGP: //3-operand list + return true + case VSEL, VPERM: //4-Operand list + return true + } + return false +} diff --git a/s390x/s390xasm/testdata/decode.txt b/s390x/s390xasm/testdata/decode.txt new file mode 100644 index 00000000..f04715b2 --- /dev/null +++ b/s390x/s390xasm/testdata/decode.txt @@ -0,0 +1,162 @@ + b9040021| plan9 MOVD R1, R2 + b9140043| plan9 MOVW R3, R4 + b9070065| plan9 MOVH R5, R6 + b9060087| plan9 MOVB R7, R8 + b9160021| plan9 MOVWZ R1, R2 + b9850032| plan9 MOVHZ R2, R3 + b9840054| plan9 MOVBZ R4, R5 + b90f0021| plan9 MOVDBR R1, R2 + b91f0043| plan9 MOVWBR R3, R4 + b9e28010| plan9 MOVDEQ R0, R1 + b9e2a032| plan9 MOVDGE R2, R3 + b9e22054| plan9 MOVDGT R4, R5 + b9e2c076| plan9 MOVDLE R6, R7 + b9e24098| plan9 MOVDLT R8, R9 + b9e270ba| plan9 MOVDNE R10, R11 + b9f23012| plan9 LOCR $3, R2, R1 + b9e27065| plan9 MOVDNE R5, R6 +e310f0000004| plan9 MOVD (R15), R1 +e320f0000014| plan9 MOVW (R15), R2 +e330f0000015| plan9 MOVH (R15), R3 +e340f0000077| plan9 MOVB (R15), R4 +e350f0000016| plan9 MOVWZ (R15), R5 +e360f0000091| plan9 MOVHZ (R15), R6 +e370f0000090| plan9 MOVBZ (R15), R7 +e380f000000f| plan9 MOVDBR (R15), R8 +e390f000001e| plan9 MOVWBR (R15), R9 +e310fff8ff24| plan9 MOVD R1, -8(R15) +e320fff8ff50| plan9 MOVW R2, -8(R15) +e330fff8ff70| plan9 MOVH R3, -8(R15) +e340fff8ff72| plan9 MOVB R4, -8(R15) +e350fff8ff2f| plan9 MOVDBR R5, -8(R15) +e360fff8ff3e| plan9 MOVWBR R6, -8(R15) +c01efffffffe| plan9 MOVD $-8589934592, R1 +c021fffe0000| plan9 MOVW $-131072, R2 + a739fe00| plan9 MOVH $-512, R3 + a749ffff| plan9 MOVB $-1, R4 + b9e81022| plan9 ADD R1, R2 + b9e81032| plan9 ADD R1, R2, R3 + a71b2000| plan9 ADD $8192, R1 +ec21200000d9| plan9 ADD $8192, R1, R2 +c21800008000| plan9 ADD $32768, R1 + b9ea1022| plan9 ADDC R1, R2 + b9ea1032| plan9 ADDC R1, R2, R3 +ec21000100db| plan9 ADDC $1, R1, R2 +ec21ffff00db| plan9 SUBC $1, R1, R2 + 1a21| plan9 ADDW R1, R2 + b9f81032| plan9 ADDW R1, R2, R3 + a71a2000| plan9 ADDW $8192, R1 +ec21200000d8| plan9 ADDW $8192, R1, R2 + b9880021| plan9 ADDE R1, R2 +e3201000000a| plan9 ADDC (R1), R2 + 5a605000| plan9 ADDW (R5), R6 + 5a807fff| plan9 ADDW 4095(R7), R8 +e3201fffff5a| plan9 ADDW -1(R1), R2 +e34030000188| plan9 ADDE 4096(R3), R4 +e34230000188| plan9 ADDE 4096(R3)(R2*1), R4 + b9090043| plan9 SUB R3, R4 + b9e93054| plan9 SUB R3, R4, R5 + a73be000| plan9 SUB $8192, R3 +ec43e00000d9| plan9 SUB $8192, R3, R4 + b90b0021| plan9 SUBC R1, R2 +ec43ffff00db| plan9 SUBC $1, R3, R4 + b9eb2043| plan9 SUBC R2, R3, R4 + 1b43| plan9 SUBW R3, R4 + b9f93054| plan9 SUBW R3, R4, R5 +c21500002000| plan9 SUBW $8192, R1 +e320400f0089| plan9 SUBE 15(R4), R2 +e32040080009| plan9 SUB 8(R4), R2 + 5b204000| plan9 SUBW (R4), R2 +e3204fffff5b| plan9 SUBW -1(R4), R2 + b91c0076| plan9 MULLW R6, R7 + a76c2000| plan9 MULLW $8192, R6 +c2810000000f| plan9 MULLW $15, R8 +c281ffff7fff| plan9 MULLW $-32769, R8 +c21080000000| plan9 MULLD $-2147483648, R1 + b9860021| plan9 MLGR R1, R2 + b9030011| plan9 NEG R1 + b9030021| plan9 NEG R1, R2 + b9130011| plan9 NEGW R1 + b9130021| plan9 NEGW R1, R2 + b9830022| plan9 FLOGR R2, R2 + b9800021| plan9 AND R1, R2 + b9e42031| plan9 AND R1, R2, R3 + a517ffff| plan9 ANDW $-1, R1 +c01bffff0000| plan9 AND $-65536, R1 + 1421| plan9 ANDW R1, R2 + b9f42031| plan9 ANDW R1, R2, R3 +c01b00000001| plan9 ANDW $1, R1 + a5160001| plan9 ANDW $131071, R1 +c01b00010000| plan9 ANDW $65536, R1 + a517fffe| plan9 ANDW $-2, R1 + a517000f| plan9 AND $15, R1 +e32010000080| plan9 AND (R1), R2 + 54201000| plan9 ANDW (R1), R2 +e32010000154| plan9 ANDW 4096(R1), R2 + b9810021| plan9 OR R1, R2 + b9e62031| plan9 OR R1, R2, R3 + a51a0001| plan9 ORW $65536, R1 + a51bffff| plan9 ORW $-1, R1 + a51b0001| plan9 ORW $1, R1 + 1621| plan9 ORW R1, R2 +c01d0001ffff| plan9 OR $131071, R1 +c01dffffffff| plan9 ORW $-1, R1 + b9f62031| plan9 ORW R1, R2, R3 +e32010000081| plan9 OR (R1), R2 + 56201000| plan9 ORW (R1), R2 +e3201fffff56| plan9 ORW -1(R1), R2 + b9820021| plan9 XOR R1, R2 + b9e72031| plan9 XOR R1, R2, R3 +c01700000001| plan9 XOR $1, R1 +c0170001ffff| plan9 XOR $131071, R1 +c01700010000| plan9 XOR $65536, R1 + 1721| plan9 XORW R1, R2 + b9f72031| plan9 XORW R1, R2, R3 +c017fffffffe| plan9 XORW $-2, R1 + 0700| plan9 NOPH + 07e0| plan9 SYNC + b92e0024| plan9 KM R2, R4 + b92f0026| plan9 KMC R2, R6 + b93f0028| plan9 KLMD R2, R8 + b93e0004| plan9 KIMD R0, R4 + b93a0008| plan9 KDSA R0, R8 + b9296024| plan9 KMA R2, R6, R4 + b92d6024| plan9 KMCTR R2, R6, R4 +e743400000f3| plan9 VAB V3, V4 +e743600000f3| plan9 VAB V3, V6, V4 +e743400010f3| plan9 VAH V3, V4 +e743600010f3| plan9 VAH V3, V6, V4 +e743400020f3| plan9 VAF V3, V4 +e743600020f3| plan9 VAF V3, V6, V4 +e743400030f3| plan9 VAG V3, V4 +e743600030f3| plan9 VAG V3, V6, V4 +e743400040f3| plan9 VAQ V3, V4 +e743600040f3| plan9 VAQ V3, V6, V4 +e734600000f7| plan9 VSB V6, V4, V3 +e722100000f7| plan9 VSB V1, V2 +e734600010f7| plan9 VSH V6, V4, V3 +e722100010f7| plan9 VSH V1, V2 +e734600020f7| plan9 VSF V6, V4, V3 +e722100020f7| plan9 VSF V1, V2 +e734600030f7| plan9 VSG V6, V4, V3 +e722100030f7| plan9 VSG V1, V2 +e734600040f7| plan9 VSQ V6, V4, V3 +e722100040f7| plan9 VSQ V1, V2 +e7824000608a| plan9 VSTRCB V2, V4, V6, V8 +e7824100608a| plan9 VSTRCH V2, V4, V6, V8 +e7824200608a| plan9 VSTRCF V2, V4, V6, V8 +e7824010608a| plan9 VSTRCBS V2, V4, V6, V8 +e7824110608a| plan9 VSTRCHS V2, V4, V6, V8 +e7824210608a| plan9 VSTRCFS V2, V4, V6, V8 +e710ffff0044| plan9 VONE V1 +e70000000844| plan9 VZERO V16 +e70210000068| plan9 VN V2, V1, V0 +e71010000468| plan9 VN V16, V1 +e70210000069| plan9 VNC V2, V1, V0 +e71010000469| plan9 VNC V16, V1 +e7021000006a| plan9 VO V2, V1, V0 +e7101000046a| plan9 VO V16, V1 +e7021000006d| plan9 VX V2, V1, V0 +e7101000046d| plan9 VX V16, V1 +e7101000046b| plan9 VNOT V16, V1 +e78340000062| plan9 VLVGP R3, R4, V8 From ec82f99f1e535a0f28111fed3c35e456e2dad2c5 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 5 Nov 2024 11:29:31 -0800 Subject: [PATCH 035/200] README: mention the git repo Change-Id: Ib185a246393d19c57c5a42d0c59e92b0cb4a724d Reviewed-on: https://go-review.googlesource.com/c/arch/+/625655 Commit-Queue: Ian Lance Taylor LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui Reviewed-by: Ian Lance Taylor Auto-Submit: Ian Lance Taylor --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d2351348..9995b5e9 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,10 @@ The parts needed in the main Go repository are copied in. ## Report Issues / Send Patches This repository uses Gerrit for code changes. To learn how to submit changes to -this repository, see https://golang.org/doc/contribute.html. +this repository, see https://go.dev/doc/contribute. + +The git repository is https://go.googlesource.com/arch. The main issue tracker for the arch repository is located at -https://github.com/golang/go/issues. Prefix your issue with "x/arch:" in the +https://go.dev/issues. Prefix your issue with "x/arch:" in the subject line, so it is easy to find. From f977c2e4e3f4a03cfac4ffe5a928d04a3e933b64 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Wed, 6 Nov 2024 09:31:05 +0100 Subject: [PATCH 036/200] s390x/s390xasm: fix self-assignment error It removes all the argument self-assignments. Change-Id: Ifabd0629a205211d5aaf8fc00847b70806e0ddc8 Reviewed-on: https://go-review.googlesource.com/c/arch/+/625875 LUCI-TryBot-Result: Go LUCI Reviewed-by: Vishwanatha HD Reviewed-by: Cherry Mui Reviewed-by: David Chase --- s390x/s390xasm/plan9.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/s390x/s390xasm/plan9.go b/s390x/s390xasm/plan9.go index b4df0b89..95464294 100644 --- a/s390x/s390xasm/plan9.go +++ b/s390x/s390xasm/plan9.go @@ -244,7 +244,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin args[0], args[1] = args[2], args[0] args = args[:2] } else { - args[0], args[1], args[2] = args[2], args[1], args[0] + args[0], args[2] = args[2], args[0] } case AGHIK, AHIK, ALGHSIK: num, err := strconv.ParseInt(args[2][1:], 10, 32) @@ -269,7 +269,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin op = "ADDC" } } - args[0], args[1], args[2] = args[2], args[1], args[0] + args[0], args[2] = args[2], args[0] case AGHI, AHI, AGFI, AFI, AR, ALCGR: num, err := strconv.ParseInt(args[1][1:], 10, 32) if err != nil { @@ -344,7 +344,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin args[0], args[1] = args[2], args[0] args = args[:2] } else { - args[0], args[1], args[2] = args[2], args[1], args[0] + args[0], args[2] = args[2], args[0] } case SLBGR: op = "SUBE" @@ -399,7 +399,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin case SRAG: op = "SRAD" } - args[0], args[1], args[2] = args[2], args[1], args[0] + args[0], args[2] = args[2], args[0] case TRAP2, SVC: op = "SYSALL" case CR, CLR, CGR, CLGR, KDBR, CDBR, CEBR, CGHI, CHI, CGFI, CLGFI, CFI, CLFI: @@ -481,7 +481,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin args = args[:3] } case CLRJ, CRJ, CIJ, CLIJ: - args[0], args[1], args[2], args[3] = args[2], args[0], args[1], args[3] + args[0], args[1], args[2] = args[2], args[0], args[1] case BRC, BRCL: mask, err := strconv.Atoi(args[0][1:]) if err != nil { @@ -541,7 +541,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin args[0], args[1] = args[1], args[0] args = args[:2] } else { - args[0], args[1], args[2] = args[2], args[1], args[0] + args[0], args[2] = args[2], args[0] } case BRASL: op = "CALL" // BL @@ -679,7 +679,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin case VREP, VERLLV, VESLV: if val >= 0 && val < 4 { op = op + vectorSize[val] - args[0], args[1], args[2] = args[2], args[1], args[0] + args[0], args[2] = args[2], args[0] args = args[:3] } else { return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask) @@ -910,7 +910,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin } else { return fmt.Sprintf("specification exception is recognized for %q with mask(m5) value: %v \n", op, m5) } - args[0], args[1], args[2] = args[2], args[1], args[0] + args[0], args[2] = args[2], args[0] args = args[:3] case VAC, VACCC: mask, err := strconv.Atoi(args[4][1:]) @@ -985,7 +985,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin args = args[:3] default: if args[0] == args[1] { - args[0], args[1] = args[2], args[1] + args[0] = args[2] args = args[:2] break } From f23035dd65d1c4767490f13c554b34562902d91f Mon Sep 17 00:00:00 2001 From: Lin Runze Date: Sun, 4 Aug 2024 19:23:00 +0800 Subject: [PATCH 037/200] riscv64: add tests for riscv64asm Add validation tests for riscv64asm GNU/Plan9 decoder, including objdump test and external test. Change-Id: Id7442704ea7e10c22ca4a799cdfc9f7d043f85c3 Reviewed-on: https://go-review.googlesource.com/c/arch/+/602916 Reviewed-by: Meng Zhuo LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase Reviewed-by: Mark Ryan Reviewed-by: Cherry Mui --- riscv64/riscv64asm/decode_test.go | 80 +++++ riscv64/riscv64asm/ext_test.go | 330 +++++++++++++++++ riscv64/riscv64asm/objdump_test.go | 86 +++++ riscv64/riscv64asm/objdumpext_test.go | 299 ++++++++++++++++ riscv64/riscv64asm/testdata/Makefile | 9 + riscv64/riscv64asm/testdata/gnucases.txt | 390 +++++++++++++++++++++ riscv64/riscv64asm/testdata/plan9cases.txt | 336 ++++++++++++++++++ 7 files changed, 1530 insertions(+) create mode 100644 riscv64/riscv64asm/decode_test.go create mode 100644 riscv64/riscv64asm/ext_test.go create mode 100644 riscv64/riscv64asm/objdump_test.go create mode 100644 riscv64/riscv64asm/objdumpext_test.go create mode 100644 riscv64/riscv64asm/testdata/Makefile create mode 100644 riscv64/riscv64asm/testdata/gnucases.txt create mode 100644 riscv64/riscv64asm/testdata/plan9cases.txt diff --git a/riscv64/riscv64asm/decode_test.go b/riscv64/riscv64asm/decode_test.go new file mode 100644 index 00000000..1590aaac --- /dev/null +++ b/riscv64/riscv64asm/decode_test.go @@ -0,0 +1,80 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv64asm + +import ( + "bufio" + "encoding/hex" + "os" + "path/filepath" + "strings" + "testing" +) + +func testDecode(t *testing.T, syntax string) { + input := filepath.Join("testdata", syntax+"cases.txt") + f, err := os.Open(input) + if err != nil { + t.Fatal(err) + } + defer f.Close() + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + f := strings.SplitN(line, "\t", 2) + i := strings.Index(f[0], "|") + + if i < 0 { + t.Errorf("parsing %q: missing | separator", f[0]) + continue + } + if i%2 != 0 { + t.Errorf("parsing %q: misaligned | separator", f[0]) + } + code, err := hex.DecodeString(f[0][:i] + f[0][i+1:]) + if err != nil { + t.Errorf("parsing %q: %v", f[0], err) + continue + } + asm0 := strings.Replace(f[1], " ", " ", -1) + asm := strings.TrimSpace(asm0) + inst, decodeErr := Decode(code) + if decodeErr != nil && decodeErr != errUnknown { + if asm == "illegalins" && decodeErr == errShort { + continue + } + // Some rarely used system instructions are not supported + // Following logicals will filter such unknown instructions + t.Errorf("parsing %x: %s", code, decodeErr) + continue + } + + var out string + switch syntax { + case "gnu": + out = GNUSyntax(inst) + case "plan9": + out = GoSyntax(inst, 0, nil, nil) + default: + t.Errorf("unknown syntax %q", syntax) + continue + } + + if asm != out { + t.Errorf("Decode(%s) [%s] = %s want %s", f[0], syntax, out, asm) + } + } +} + +func TestDecodeGNUSyntax(t *testing.T) { + testDecode(t, "gnu") +} + +func TestDecodeGoSyntax(t *testing.T) { + testDecode(t, "plan9") +} diff --git a/riscv64/riscv64asm/ext_test.go b/riscv64/riscv64asm/ext_test.go new file mode 100644 index 00000000..fa6961f2 --- /dev/null +++ b/riscv64/riscv64asm/ext_test.go @@ -0,0 +1,330 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Support for testing against external disassembler program. + +package riscv64asm + +import ( + "bufio" + "bytes" + "encoding/hex" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "math/rand" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" +) + +var ( + dumpTest = flag.Bool("dump", false, "dump all encodings") + mismatch = flag.Bool("mismatch", false, "log allowed mismatches") + keep = flag.Bool("keep", false, "keep object files around") + debug = false +) + +// An ExtInst represents a single decoded instruction parsed +// from an external disassembler's output. +type ExtInst struct { + addr uint64 + enc [4]byte + nenc int + text string +} + +func (r ExtInst) String() string { + return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text) +} + +// An ExtDis is a connection between an external disassembler and a test. +type ExtDis struct { + Dec chan ExtInst + File *os.File + Size int + Cmd *exec.Cmd +} + +// Run runs the given command - the external disassembler - and returns +// a buffered reader of its standard output. +func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) { + if *keep { + log.Printf("%s\n", strings.Join(cmd, " ")) + } + ext.Cmd = exec.Command(cmd[0], cmd[1:]...) + out, err := ext.Cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("stdoutpipe: %v", err) + } + if err := ext.Cmd.Start(); err != nil { + return nil, fmt.Errorf("exec: %v", err) + } + + b := bufio.NewReaderSize(out, 1<<20) + return b, nil +} + +// Wait waits for the command started with Run to exit. +func (ext *ExtDis) Wait() error { + return ext.Cmd.Wait() +} + +// testExtDis tests a set of byte sequences against an external disassembler. +// The disassembler is expected to produce the given syntax and run +// in the given architecture mode (16, 32, or 64-bit). +// The extdis function must start the external disassembler +// and then parse its output, sending the parsed instructions on ext.Dec. +// The generate function calls its argument f once for each byte sequence +// to be tested. The generate function itself will be called twice, and it must +// make the same sequence of calls to f each time. +// When a disassembly does not match the internal decoding, +// allowedMismatch determines whether this mismatch should be +// allowed, or else considered an error. +func testExtDis( + t *testing.T, + syntax string, + extdis func(ext *ExtDis) error, + generate func(f func([]byte)), + allowedMismatch func(text string, inst *Inst, dec ExtInst) bool, +) { + start := time.Now() + ext := &ExtDis{ + Dec: make(chan ExtInst), + } + errc := make(chan error) + + // First pass: write instructions to input file for external disassembler. + file, f, size, err := writeInst(generate) + if err != nil { + t.Fatal(err) + } + ext.Size = size + ext.File = f + defer func() { + f.Close() + if !*keep { + os.Remove(file) + } + }() + + // Second pass: compare disassembly against our decodings. + var ( + totalTests = 0 + totalSkips = 0 + totalErrors = 0 + + errors = make([]string, 0, 100) // Sampled errors, at most cap + ) + go func() { + errc <- extdis(ext) + }() + + generate(func(enc []byte) { + dec, ok := <-ext.Dec + if !ok { + t.Errorf("decoding stream ended early") + return + } + inst, text := disasm(syntax, pad(enc)) + + totalTests++ + if *dumpTest { + fmt.Printf("%x -> %s [%d]\n", enc, dec.text, dec.nenc) + } + + if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" { + suffix := "" + if allowedMismatch(text, &inst, dec) { + totalSkips++ + if !*mismatch { + return + } + suffix += " (allowed mismatch)" + } + totalErrors++ + cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix) + + if len(errors) >= cap(errors) { + j := rand.Intn(totalErrors) + if j >= cap(errors) { + return + } + errors = append(errors[:j], errors[j+1:]...) + } + errors = append(errors, cmp) + } + }) + + if *mismatch { + totalErrors -= totalSkips + } + + fmt.Printf("totalTest: %d total skip: %d total error: %d\n", totalTests, totalSkips, totalErrors) + // Here are some errors about mismatches(44) + for _, b := range errors { + t.Log(b) + } + + if totalErrors > 0 { + t.Fail() + } + t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds()) + t.Logf("decoder coverage: %.1f%%;\n", decodeCoverage()) +} + +// Start address of text. +const start = 0x8000 + +// writeInst writes the generated byte sequences to a new file +// starting at offset start. That file is intended to be the input to +// the external disassembler. +func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) { + f, err = ioutil.TempFile("", "riscv64asm") + if err != nil { + return + } + + file = f.Name() + + f.Seek(start, io.SeekStart) + w := bufio.NewWriter(f) + defer w.Flush() + size = 0 + generate(func(x []byte) { + if debug { + fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):]) + } + w.Write(x) + w.Write(zeros[len(x):]) + size += len(zeros) + }) + return file, f, size, nil +} + +var zeros = []byte{0, 0, 0, 0} + +// pad pads the code sequence with pops. +func pad(enc []byte) []byte { + if len(enc) < 4 { + enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...) + } + return enc +} + +// disasm returns the decoded instruction and text +// for the given source bytes, using the given syntax and mode. +func disasm(syntax string, src []byte) (inst Inst, text string) { + var err error + inst, err = Decode(src) + if err != nil { + text = "error: " + err.Error() + return + } + text = inst.String() + switch syntax { + case "gnu": + text = GNUSyntax(inst) + case "plan9": // [sic] + text = GoSyntax(inst, 0, nil, nil) + default: + text = "error: unknown syntax " + syntax + } + return +} + +// decodecoverage returns a floating point number denoting the +// decoder coverage. +func decodeCoverage() float64 { + n := 0 + for _, t := range decoderCover { + if t { + n++ + } + } + return 100 * float64(1+n) / float64(1+len(decoderCover)) +} + +// Helpers for writing disassembler output parsers. + +// isHex reports whether b is a hexadecimal character (0-9a-fA-F). +func isHex(b byte) bool { + return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F') +} + +// parseHex parses the hexadecimal byte dump in src, +// appending the parsed bytes to raw and returning the updated slice. +// The returned bool reports whether any invalid hex was found. +// Spaces and tabs between bytes are okay but any other non-hex is not. +func parseHex(src []byte, raw []byte) ([]byte, bool) { + src = bytes.TrimSpace(src) + raw, err := hex.AppendDecode(raw, src) + if err != nil { + return nil, false + } + return raw, true +} + +// Generators. +// +// The test cases are described as functions that invoke a callback repeatedly, +// with a new input sequence each time. These helpers make writing those +// a little easier. + +// hexCases generates the cases written in hexadecimal in the encoded string. +// Spaces in 'encoded' separate entire test cases, not individual bytes. +func hexCases(t *testing.T, encoded string) func(func([]byte)) { + return func(try func([]byte)) { + for _, x := range strings.Fields(encoded) { + src, err := hex.DecodeString(x) + if err != nil { + t.Errorf("parsing %q: %v", x, err) + } + try(src) + } + } +} + +// testdataCases generates the test cases recorded in testdata/cases.txt. +// It only uses the inputs; it ignores the answers recorded in that file. +func testdataCases(t *testing.T, syntax string) func(func([]byte)) { + var codes [][]byte + input := filepath.Join("testdata", syntax+"cases.txt") + data, err := ioutil.ReadFile(input) + if err != nil { + t.Fatal(err) + } + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + f := strings.Fields(line)[0] + i := strings.Index(f, "|") + if i < 0 { + t.Errorf("parsing %q: missing | separator", f) + continue + } + if i%2 != 0 { + t.Errorf("parsing %q: misaligned | separator", f) + } + code, err := hex.DecodeString(f[:i] + f[i+1:]) + if err != nil { + t.Errorf("parsing %q: %v", f, err) + continue + } + codes = append(codes, code) + } + + return func(try func([]byte)) { + for _, code := range codes { + try(code) + } + } +} diff --git a/riscv64/riscv64asm/objdump_test.go b/riscv64/riscv64asm/objdump_test.go new file mode 100644 index 00000000..479301bb --- /dev/null +++ b/riscv64/riscv64asm/objdump_test.go @@ -0,0 +1,86 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv64asm + +import ( + "strings" + "testing" +) + +func TestObjdumpRISCV64TestDecodeGNUSyntaxdata(t *testing.T) { + testObjdumpRISCV64(t, testdataCases(t, "gnu")) +} +func TestObjdumpRISCV64TestDecodeGoSyntaxdata(t *testing.T) { + testObjdumpRISCV64(t, testdataCases(t, "plan9")) +} + +func TestObjdumpRISCV64Manual(t *testing.T) { + testObjdumpRISCV64(t, hexCases(t, objdumpManualTests)) +} + +// objdumpManualTests holds test cases that will be run by TestObjdumpRISCV64Manual. +// If you are debugging a few cases that turned up in a longer run, it can be useful +// to list them here and then use -run=Manual, particularly with tracing enabled. +// Note that these are byte sequences, so they must be reversed from the usual +// word presentation. +var objdumpManualTests = ` +93020300 +13000000 +9b020300 +afb5b50e +73b012c0 +73f01fc0 +73a012c0 +73e01fc0 +f3223000 +f3221000 +f3222000 +f3123300 +f3121300 +f3122300 +739012c0 +73d01fc0 +53a01022 +53a01020 +53801022 +53801020 +53901022 +53901020 +67800000 +67800200 +b3026040 +bb026040 +9342f3ff +f32200c0 +f32200c8 +f32220c0 +f32220c8 +f32210c0 +f32210c8 +` + +// allowedMismatchObjdump reports whether the mismatch between text and dec +// should be allowed by the test. +func allowedMismatchObjdump(text string, inst *Inst, dec ExtInst) bool { + // Allow the mismatch of Branch/Jump instruction's offset. + decsp := strings.Split(dec.text, ",") + + switch inst.Op { + case BEQ, BGE, BGEU, BLT, BLTU, BNE: + if inst.Args[2].(Simm).String() != decsp[len(decsp)-1] { + return true + } + case JAL: + if inst.Args[1].(Simm).String() != decsp[len(decsp)-1] { + return true + } + case JALR: + if inst.Args[1].(RegOffset).Ofs.String() != decsp[len(decsp)-1] { + return true + } + } + + return false +} diff --git a/riscv64/riscv64asm/objdumpext_test.go b/riscv64/riscv64asm/objdumpext_test.go new file mode 100644 index 00000000..4f1f21a5 --- /dev/null +++ b/riscv64/riscv64asm/objdumpext_test.go @@ -0,0 +1,299 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package riscv64asm + +import ( + "bytes" + "debug/elf" + "encoding/binary" + "fmt" + "io" + "log" + "os" + "os/exec" + "strconv" + "strings" + "testing" +) + +var objdumpPath = "riscv64-linux-gnu-objdump" + +func testObjdumpRISCV64(t *testing.T, generate func(func([]byte))) { + testObjdumpArch(t, generate) +} + +func testObjdumpArch(t *testing.T, generate func(func([]byte))) { + checkObjdumpRISCV64(t) + testExtDis(t, "gnu", objdump, generate, allowedMismatchObjdump) + testExtDis(t, "plan9", objdump, generate, allowedMismatchObjdump) +} + +func checkObjdumpRISCV64(t *testing.T) { + objdumpPath, err := exec.LookPath(objdumpPath) + if err != nil { + objdumpPath = "objdump" + } + out, err := exec.Command(objdumpPath, "-i").Output() + if err != nil { + t.Skipf("cannot run objdump: %v\n%s", err, out) + } + if !strings.Contains(string(out), "riscv") { + t.Skip("objdump does not have RISC-V support") + } +} + +func objdump(ext *ExtDis) error { + // File already written with instructions; add ELF header. + if err := writeELF64(ext.File, ext.Size); err != nil { + return err + } + + b, err := ext.Run(objdumpPath, "-M numeric", "-d", "-z", ext.File.Name()) + if err != nil { + return err + } + + var ( + nmatch int + reading bool + next uint64 = start + addr uint64 + encbuf [4]byte + enc []byte + text string + ) + flush := func() { + if addr == next { + // PC-relative addresses are translated to absolute addresses based on PC by GNU objdump + // Following logical rewrites the absolute addresses back to PC-relative ones for comparing + // with our disassembler output which are PC-relative + if text == "undefined" && len(enc) == 4 { + text = "error: unknown instruction" + enc = nil + } + if len(enc) == 4 { + // prints as word but we want to record bytes + enc[0], enc[3] = enc[3], enc[0] + enc[1], enc[2] = enc[2], enc[1] + } + ext.Dec <- ExtInst{addr, encbuf, len(enc), text} + encbuf = [4]byte{} + enc = nil + next += 4 + } + } + var textangle = []byte("<.text>:") + for { + line, err := b.ReadSlice('\n') + if err != nil { + if err == io.EOF { + break + } + return fmt.Errorf("reading objdump output: %v", err) + } + if bytes.Contains(line, textangle) { + reading = true + continue + } + if !reading { + continue + } + if debug { + os.Stdout.Write(line) + } + if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil { + enc = enc1 + continue + } + flush() + nmatch++ + addr, enc, text = parseLine(line, encbuf[:0]) + if addr > next { + return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line) + } + } + flush() + if next != start+uint64(ext.Size) { + return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size) + } + if err := ext.Wait(); err != nil { + return fmt.Errorf("exec: %v", err) + } + + return nil +} + +var ( + undefined = []byte("undefined") + unpredictable = []byte("unpredictable") + slashslash = []byte("//") +) + +func parseLine(line []byte, encstart []byte) (addr uint64, enc []byte, text string) { + ok := false + oline := line + i := bytes.Index(line, []byte(":\t")) + if i < 0 { + log.Fatalf("cannot parse disassembly: %q", oline) + } + x, err := strconv.ParseUint(string(bytes.TrimSpace(line[:i])), 16, 32) + if err != nil { + log.Fatalf("cannot parse disassembly: %q", oline) + } + addr = uint64(x) + line = line[i+2:] + i = bytes.IndexByte(line, '\t') + if i < 0 { + log.Fatalf("cannot parse disassembly: %q", oline) + } + enc, ok = parseHex(line[:i], encstart) + if !ok { + log.Fatalf("cannot parse disassembly: %q", oline) + } + line = bytes.TrimSpace(line[i:]) + if bytes.Contains(line, undefined) { + text = "undefined" + return + } + if false && bytes.Contains(line, unpredictable) { + text = "unpredictable" + return + } + // Strip trailing comment starting with '#' + if i := bytes.IndexByte(line, '#'); i >= 0 { + line = bytes.TrimSpace(line[:i]) + } + // Strip trailing comment starting with "//" + if i := bytes.Index(line, slashslash); i >= 0 { + line = bytes.TrimSpace(line[:i]) + } + text = string(fixSpace(line)) + return +} + +// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s. +// If s must be rewritten, it is rewritten in place. +func fixSpace(s []byte) []byte { + s = bytes.TrimSpace(s) + for i := 0; i < len(s); i++ { + if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' { + goto Fix + } + } + return s + +Fix: + b := s + w := 0 + for i := 0; i < len(s); i++ { + c := s[i] + if c == '\t' || c == '\n' { + c = ' ' + } + if c == ' ' && w > 0 && b[w-1] == ' ' { + continue + } + b[w] = c + w++ + } + if w > 0 && b[w-1] == ' ' { + w-- + } + return b[:w] +} + +func parseContinuation(line []byte, enc []byte) []byte { + i := bytes.Index(line, []byte(":\t")) + if i < 0 { + return nil + } + line = line[i+1:] + enc, _ = parseHex(line, enc) + return enc +} + +// writeELF64 writes an ELF64 header to the file, describing a text +// segment that starts at start (0x8000) and extends for size bytes. +func writeELF64(f *os.File, size int) error { + f.Seek(0, io.SeekStart) + var hdr elf.Header64 + var prog elf.Prog64 + var sect elf.Section64 + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, &hdr) + off1 := buf.Len() + binary.Write(&buf, binary.LittleEndian, &prog) + off2 := buf.Len() + binary.Write(&buf, binary.LittleEndian, §) + off3 := buf.Len() + buf.Reset() + data := byte(elf.ELFDATA2LSB) + hdr = elf.Header64{ + Ident: [16]byte{0x7F, 'E', 'L', 'F', 2, data, 1}, + Type: 2, + Machine: uint16(elf.EM_RISCV), + Version: 1, + Entry: start, + Phoff: uint64(off1), + Shoff: uint64(off2), + Flags: 0x5, + Ehsize: uint16(off1), + Phentsize: uint16(off2 - off1), + Phnum: 1, + Shentsize: uint16(off3 - off2), + Shnum: 4, + Shstrndx: 3, + } + binary.Write(&buf, binary.LittleEndian, &hdr) + prog = elf.Prog64{ + Type: 1, + Off: start, + Vaddr: start, + Paddr: start, + Filesz: uint64(size), + Memsz: uint64(size), + Flags: 5, + Align: start, + } + binary.Write(&buf, binary.LittleEndian, &prog) + binary.Write(&buf, binary.LittleEndian, §) // NULL section + sect = elf.Section64{ + Name: 1, + Type: uint32(elf.SHT_PROGBITS), + Addr: start, + Off: start, + Size: uint64(size), + Flags: uint64(elf.SHF_ALLOC | elf.SHF_EXECINSTR), + Addralign: 4, + } + binary.Write(&buf, binary.LittleEndian, §) // .text + strtabsize := len("\x00.text\x00.riscv.attributes\x00.shstrtab\x00") + // RISC-V objdump needs the .riscv.attributes section to identify + // the RV64G (not include compressed) extensions. + sect = elf.Section64{ + Name: uint32(len("\x00.text\x00")), + Type: uint32(0x70000003), // SHT_RISCV_ATTRIBUTES + Addr: 0, + Off: uint64(off2 + (off3-off2)*4 + strtabsize), + Size: 102, + Addralign: 1, + } + binary.Write(&buf, binary.LittleEndian, §) + sect = elf.Section64{ + Name: uint32(len("\x00.text\x00.riscv.attributes\x00")), + Type: uint32(elf.SHT_STRTAB), + Addr: 0, + Off: uint64(off2 + (off3-off2)*4), + Size: uint64(strtabsize), + Addralign: 1, + } + binary.Write(&buf, binary.LittleEndian, §) + buf.WriteString("\x00.text\x00.riscv.attributes\x00.shstrtab\x00") + // Contents of .riscv.attributes section + // which specify the extension and priv spec version. (1.11) + buf.WriteString("Ae\x00\x00\x00riscv\x00\x01[\x00\x00\x00\x05rv64i2p1_m2pp_a2p1_f2p2_d2p2_q2p2_zibsr2p0_zifencei2p0_zmmul1p0_zfh1p0_zfhmin1p0\x00\x08\x01\x0a\x0b") + f.Write(buf.Bytes()) + return nil +} diff --git a/riscv64/riscv64asm/testdata/Makefile b/riscv64/riscv64asm/testdata/Makefile new file mode 100644 index 00000000..8ac7835b --- /dev/null +++ b/riscv64/riscv64asm/testdata/Makefile @@ -0,0 +1,9 @@ +go test command: + cd ..; go generate + cd ..; go test -run 'ObjdumpRISCV64TestGUNSyntaxdata' -v -timeout 10h 2>&1 | tee -a log + cd ..; go test -run 'ObjdumpRISCV64TestGoSyntaxdata' -v -timeout 10h 2>&1 | tee -a log + cd ..; go test -run 'ObjdumpRISCV64' -v -timeout 10h 2>&1 | tee -a log + cd ..; go test -run 'ObjdumpRISCV64Manual' -v -timeout 10h 2>&1 | tee -a log + cd ..; go test -run 'TestDecodeGNUSyntax' + cd ..; go test -run 'TestDecodeGoSyntax' + cd ..; go test -run '.*' diff --git a/riscv64/riscv64asm/testdata/gnucases.txt b/riscv64/riscv64asm/testdata/gnucases.txt new file mode 100644 index 00000000..dad05a7e --- /dev/null +++ b/riscv64/riscv64asm/testdata/gnucases.txt @@ -0,0 +1,390 @@ +b3027300| add x5,x6,x7 +9302f3ff| addi x5,x6,-1 +9302f37f| addi x5,x6,2047 +1305a000| li x10,10 +13000000| nop +93870900| mv x15,x19 +9b02f37f| addiw x5,x6,2047 +1b830a00| sext.w x6,x21 +bb027300| addw x5,x6,x7 +afb26300| amoadd.d x5,x6,(x7) +afb26304| amoadd.d.aq x5,x6,(x7) +afb26302| amoadd.d.rl x5,x6,(x7) +afa26300| amoadd.w x5,x6,(x7) +afa26304| amoadd.w.aq x5,x6,(x7) +afa26302| amoadd.w.rl x5,x6,(x7) +afb26360| amoand.d x5,x6,(x7) +afb26364| amoand.d.aq x5,x6,(x7) +afb26362| amoand.d.rl x5,x6,(x7) +afa26360| amoand.w x5,x6,(x7) +afa26364| amoand.w.aq x5,x6,(x7) +afa26362| amoand.w.rl x5,x6,(x7) +afb263e0| amomaxu.d x5,x6,(x7) +afb263e4| amomaxu.d.aq x5,x6,(x7) +afb263e2| amomaxu.d.rl x5,x6,(x7) +afa263e0| amomaxu.w x5,x6,(x7) +afa263e4| amomaxu.w.aq x5,x6,(x7) +afa263e2| amomaxu.w.rl x5,x6,(x7) +afb263a0| amomax.d x5,x6,(x7) +afb263a4| amomax.d.aq x5,x6,(x7) +afb263a2| amomax.d.rl x5,x6,(x7) +afa263a0| amomax.w x5,x6,(x7) +afa263a4| amomax.w.aq x5,x6,(x7) +afa263a2| amomax.w.rl x5,x6,(x7) +afb263c0| amominu.d x5,x6,(x7) +afb263c4| amominu.d.aq x5,x6,(x7) +afb263c2| amominu.d.rl x5,x6,(x7) +afa263c0| amominu.w x5,x6,(x7) +afa263c4| amominu.w.aq x5,x6,(x7) +afa263c2| amominu.w.rl x5,x6,(x7) +afb26380| amomin.d x5,x6,(x7) +afb26384| amomin.d.aq x5,x6,(x7) +afb26382| amomin.d.rl x5,x6,(x7) +afa26380| amomin.w x5,x6,(x7) +afa26384| amomin.w.aq x5,x6,(x7) +afa26382| amomin.w.rl x5,x6,(x7) +afb26340| amoor.d x5,x6,(x7) +afb26344| amoor.d.aq x5,x6,(x7) +afb26342| amoor.d.rl x5,x6,(x7) +afa26340| amoor.w x5,x6,(x7) +afa26344| amoor.w.aq x5,x6,(x7) +afa26342| amoor.w.rl x5,x6,(x7) +afb26308| amoswap.d x5,x6,(x7) +afb2630c| amoswap.d.aq x5,x6,(x7) +afb2630a| amoswap.d.rl x5,x6,(x7) +afa26308| amoswap.w x5,x6,(x7) +afa2630c| amoswap.w.aq x5,x6,(x7) +afa2630a| amoswap.w.rl x5,x6,(x7) +afb26320| amoxor.d x5,x6,(x7) +afb26324| amoxor.d.aq x5,x6,(x7) +afb26322| amoxor.d.rl x5,x6,(x7) +afa26320| amoxor.w x5,x6,(x7) +afa26324| amoxor.w.aq x5,x6,(x7) +afa26322| amoxor.w.rl x5,x6,(x7) +b3727300| and x5,x6,x7 +9372f3ff| andi x5,x6,-1 +9372f37f| andi x5,x6,2047 +97020000| auipc x5,0x0 +97028000| auipc x5,0x800 +e38062f0| beq x5,x6,-256 +e3de62ee| bge x5,x6,-260 +e3fc62ee| bgeu x5,x6,-264 +e3ca62ee| blt x5,x6,-268 +e3e862ee| bltu x5,x6,-272 +e39662ee| bne x5,x6,-276 +63940200| bnez x5,8 +63c40400| bltz x9,8 +63447000| bgtz x7,8 +63d40900| bgez x19,8 +6354d001| blez x29,8 +63040800| beqz x16,8 +f33213c0| csrrc x5,time,x6 +f3f21fc0| csrrci x5,time,31 +f32213c0| csrrs x5,time,x6 +f3e21fc0| csrrsi x5,time,31 +f31213c0| csrrw x5,time,x6 +f3d21fc0| csrrwi x5,time,31 +733015c0| csrc time,x10 +73f010c0| csrci time,1 +73253000| frcsr x10 +f3251000| frflags x11 +73262000| frrm x12 +f32400c0| rdcycle x9 +732920c0| rdinstret x18 +f32910c0| rdtime x19 +f3224014| csrr x5,sip +73201bc0| csrs time,x22 +736014c8| csrsi timeh,8 +73903700| fscsr x15 +73101800| fsflags x16 +73902800| fsrm x17 +731014c0| csrw time,x8 +735016c8| csrwi timeh,12 +b3427302| div x5,x6,x7 +b3527302| divu x5,x6,x7 +bb527302| divuw x5,x6,x7 +bb427302| divw x5,x6,x7 +73001000| ebreak +73000000| ecall +0f00f00f| fence +53f02002| fadd.d f0,f1,f2 +53f02004| fadd.h f0,f1,f2 +53f02006| fadd.q f0,f1,f2 +53f02000| fadd.s f0,f1,f2 +d31200e2| fclass.d x5,f0 +d31200e4| fclass.h x5,f0 +d31200e6| fclass.q x5,f0 +d31200e0| fclass.s x5,f0 +53f022d2| fcvt.d.l f0,x5 +53f032d2| fcvt.d.lu f0,x5 +53f03042| fcvt.d.q f0,f1 +53800042| fcvt.d.s f0,f1 +538002d2| fcvt.d.w f0,x5 +538012d2| fcvt.d.wu f0,x5 +53f022d4| fcvt.h.l f0,x5 +53f032d4| fcvt.h.lu f0,x5 +53f00044| fcvt.h.s f0,f1 +53f002d4| fcvt.h.w f0,x5 +53f012d4| fcvt.h.wu f0,x5 +d37230c2| fcvt.lu.d x5,f0 +d37230c4| fcvt.lu.h x5,f0 +d37230c6| fcvt.lu.q x5,f0 +d37230c0| fcvt.lu.s x5,f0 +d37220c2| fcvt.l.d x5,f0 +d37220c4| fcvt.l.h x5,f0 +d37220c6| fcvt.l.q x5,f0 +d37220c0| fcvt.l.s x5,f0 +53801046| fcvt.q.d f0,f1 +538022d6| fcvt.q.l f0,x5 +538032d6| fcvt.q.lu f0,x5 +53800046| fcvt.q.s f0,f1 +538002d6| fcvt.q.w f0,x5 +538012d6| fcvt.q.wu f0,x5 +53f01040| fcvt.s.d f0,f1 +53802040| fcvt.s.h f0,f1 +53f022d0| fcvt.s.l f0,x5 +53f032d0| fcvt.s.lu f0,x5 +53f03040| fcvt.s.q f0,f1 +53f002d0| fcvt.s.w f0,x5 +53f012d0| fcvt.s.wu f0,x5 +d37210c2| fcvt.wu.d x5,f0 +d37210c4| fcvt.wu.h x5,f0 +d37210c6| fcvt.wu.q x5,f0 +d37210c0| fcvt.wu.s x5,f0 +d37200c2| fcvt.w.d x5,f0 +d37200c4| fcvt.w.h x5,f0 +d37200c6| fcvt.w.q x5,f0 +d37200c0| fcvt.w.s x5,f0 +53f0201a| fdiv.d f0,f1,f2 +53f0201c| fdiv.h f0,f1,f2 +53f0201e| fdiv.q f0,f1,f2 +53f02018| fdiv.s f0,f1,f2 +0f00f00f| fence +0f100000| fence.i +d32210a2| feq.d x5,f0,f1 +d32210a4| feq.h x5,f0,f1 +d32210a6| feq.q x5,f0,f1 +d32210a0| feq.s x5,f0,f1 +07b0f27f| fld f0,2047(x5) +d30210a2| fle.d x5,f0,f1 +d30210a4| fle.h x5,f0,f1 +d30210a6| fle.q x5,f0,f1 +d30210a0| fle.s x5,f0,f1 +0790f27f| flh f0,2047(x5) +07c0f27f| flq f0,2047(x5) +d31210a2| flt.d x5,f0,f1 +d31210a4| flt.h x5,f0,f1 +d31210a6| flt.q x5,f0,f1 +d31210a0| flt.s x5,f0,f1 +07a0f27f| flw f0,2047(x5) +43f0201a| fmadd.d f0,f1,f2,f3 +43f0201c| fmadd.h f0,f1,f2,f3 +43f0201e| fmadd.q f0,f1,f2,f3 +43f02018| fmadd.s f0,f1,f2,f3 +5390202a| fmax.d f0,f1,f2 +5390202c| fmax.h f0,f1,f2 +5390202e| fmax.q f0,f1,f2 +53902028| fmax.s f0,f1,f2 +5380202a| fmin.d f0,f1,f2 +5380202c| fmin.h f0,f1,f2 +5380202e| fmin.q f0,f1,f2 +53802028| fmin.s f0,f1,f2 +47f0201a| fmsub.d f0,f1,f2,f3 +47f0201c| fmsub.h f0,f1,f2,f3 +47f0201e| fmsub.q f0,f1,f2,f3 +47f02018| fmsub.s f0,f1,f2,f3 +53f02012| fmul.d f0,f1,f2 +53f02014| fmul.h f0,f1,f2 +53f02016| fmul.q f0,f1,f2 +53f02010| fmul.s f0,f1,f2 +538002f2| fmv.d.x f0,x5 +538002f4| fmv.h.x f0,x5 +d30200e2| fmv.x.d x5,f0 +d30200e4| fmv.x.h x5,f0 +d30200e0| fmv.x.w x5,f0 +4ff0201a| fnmadd.d f0,f1,f2,f3 +4ff0201c| fnmadd.h f0,f1,f2,f3 +4ff0201e| fnmadd.q f0,f1,f2,f3 +4ff02018| fnmadd.s f0,f1,f2,f3 +4bf0201a| fnmsub.d f0,f1,f2,f3 +4bf0201c| fnmsub.h f0,f1,f2,f3 +4bf0201e| fnmsub.q f0,f1,f2,f3 +4bf02018| fnmsub.s f0,f1,f2,f3 +a7bf027e| fsd f0,2047(x5) +53902022| fsgnjn.d f0,f1,f2 +53902024| fsgnjn.h f0,f1,f2 +53902026| fsgnjn.q f0,f1,f2 +53902020| fsgnjn.s f0,f1,f2 +53a02022| fsgnjx.d f0,f1,f2 +53a02024| fsgnjx.h f0,f1,f2 +53a02026| fsgnjx.q f0,f1,f2 +53a02020| fsgnjx.s f0,f1,f2 +53802022| fsgnj.d f0,f1,f2 +53802024| fsgnj.h f0,f1,f2 +53802026| fsgnj.q f0,f1,f2 +53802020| fsgnj.s f0,f1,f2 +53a01022| fabs.d f0,f1 +53a49420| fabs.s f8,f9 +d305c622| fmv.d f11,f12 +d306e720| fmv.s f13,f14 +d3170823| fneg.d f15,f16 +d398f720| fneg.s f17,f15 +a79f027e| fsh f0,2047(x5) +a7cf027e| fsq f0,2047(x5) +53f0005a| fsqrt.d f0,f1 +53f0005c| fsqrt.h f0,f1 +53f0005e| fsqrt.q f0,f1 +53f00058| fsqrt.s f0,f1 +53f0200a| fsub.d f0,f1,f2 +53f0200c| fsub.h f0,f1,f2 +53f0200e| fsub.q f0,f1,f2 +53f02008| fsub.s f0,f1,f2 +a7af027e| fsw f0,2047(x5) +6ff0dfcb| j -836 +eff09fcb| jal -840 +eff25fcb| jal x5,-844 +67800200| jr x5 +e7800202| jalr x1,32(x5) +67800000| ret +6700a500| jr 10(x10) +8302f37f| lb x5,2047(x6) +8342f37f| lbu x5,2047(x6) +af320310| lr.d x5,(x6) +af320314| lr.d.aq x5,(x6) +af320312| lr.d.rl x5,(x6) +af220310| lr.w x5,(x6) +af220314| lr.w.aq x5,(x6) +af220312| lr.w.rl x5,(x6) +b7829102| lui x5,0x2918 +8322f37f| lw x5,2047(x6) +8362f37f| lwu x5,2047(x6) +b3027302| mul x5,x6,x7 +b3127302| mulh x5,x6,x7 +b3227302| mulhsu x5,x6,x7 +b3327302| mulhu x5,x6,x7 +bb027302| mulw x5,x6,x7 +b3627300| or x5,x6,x7 +93620380| ori x5,x6,-2048 +b3627302| rem x5,x6,x7 +b3727302| remu x5,x6,x7 +bb627302| remw x5,x6,x7 +a30f537e| sb x5,2047(x6) +afb26318| sc.d x5,x6,(x7) +afb2631c| sc.d.aq x5,x6,(x7) +afb2631a| sc.d.rl x5,x6,(x7) +afa26318| sc.w x5,x6,(x7) +afa2631c| sc.w.aq x5,x6,(x7) +afa2631a| sc.w.rl x5,x6,(x7) +a33f537e| sd x5,2047(x6) +23105380| sh x5,-2048(x6) +b3127300| sll x5,x6,x7 +93124303| slli x5,x6,0x34 +9b127301| slliw x5,x6,0x17 +b3227300| slt x5,x6,x7 +b3226000| sgtz x5,x6 +b32e0f00| sltz x29,x30 +9322f37f| slti x5,x6,2047 +93320380| sltiu x5,x6,-2048 +93321300| seqz x5,x6 +b3327300| sltu x5,x6,x7 +33394001| snez x18,x20 +b3527340| sra x5,x6,x7 +93524343| srai x5,x6,0x34 +9b526341| sraiw x5,x6,0x16 +bb527340| sraw x5,x6,x7 +b3527300| srl x5,x6,x7 +93524303| srli x5,x6,0x34 +9b526301| srliw x5,x6,0x16 +bb527300| srlw x5,x6,x7 +b3027340| sub x5,x6,x7 +b3026040| neg x5,x6 +bb027340| subw x5,x6,x7 +3b0ff041| negw x30,x31 +a32f537e| sw x5,2047(x6) +b3427300| xor x5,x6,x7 +9342f37f| xori x5,x6,2047 +93c2ffff| not x5,x31 +bb003108| add.uw x1,x2,x3 +33a26220| sh1add x4,x5,x6 +bb239420| sh1add.uw x7,x8,x9 +33c5c520| sh2add x10,x11,x12 +bb46f720| sh2add.uw x13,x14,x15 +33e82821| sh3add x16,x17,x18 +bb695a21| sh3add.uw x19,x20,x21 +1b9b7b09| slli.uw x22,x23,0x17 +33fcac41| andn x24,x25,x26 +b36dde41| orn x27,x28,x29 +33cf1f40| xnor x30,x31,x1 +13910160| clz x2,x3 +1b920260| clzw x4,x5 +13931360| ctz x6,x7 +1b941460| ctzw x8,x9 +13952560| cpop x10,x11 +1b962660| cpopw x12,x13 +33e7070b| max x14,x15,x16 +b378390b| maxu x17,x18,x19 +33ca6a0b| min x20,x21,x22 +b35b9c0b| minu x23,x24,x25 +139d4d60| sext.b x26,x27 +139e5e60| sext.h x28,x29 +3bcf0f08| zext.h x30,x31 +b3102060| rol x1,x0,x2 +bb115260| rolw x3,x4,x5 +33d38360| ror x6,x7,x8 +9354a560| rori x9,x10,0xa +9b55e660| roriw x11,x12,0xe +bb56f760| rorw x13,x14,x15 +13d87828| orc.b x16,x17 +13d9896b| rev8 x18,x19 +339a6a49| bclr x20,x21,x22 +931bfc48| bclri x23,x24,0xf +b35cbd49| bext x25,x26,x27 +13de8e48| bexti x28,x29,0x8 +339f0f68| binv x30,x31,x0 +9310016a| binvi x1,x2,0x20 +b3115228| bset x3,x4,x5 +1393f32b| bseti x6,x7,0x3f +4000| addi x8,x2,4 +2041| lw x8,64(x10) +94d0| sw x13,32(x9) +0100| nop +811f| addi x31,x31,-32 +4111| addi x2,x2,-16 +8158| li x17,-32 +4161| addi x2,x2,16 +4163| lui x6,0x10 +819b| andi x15,x15,-32 +0d8c| sub x8,x8,x11 +b18c| xor x9,x9,x12 +558c| or x8,x8,x13 +f98c| and x9,x9,x14 +01a8| j 16 +99c5| beqz x11,14 +85e3| bnez x15,32 +c248| lw x17,16(x2) +8283| jr x7 +fa88| mv x17,x30 +0290| ebreak +0295| jalr x10 +c297| add x15,x15,x16 +76c4| sw x29,8(x2) +8873| ld x10,32(x15) +00ea| sd x8,16(x12) +3d31| addiw x2,x2,-17 +2180| srli x8,x8,0x8 +c184| srai x9,x9,0x10 +919d| subw x11,x11,x12 +b99e| addw x13,x13,x14 +4a01| slli x2,x2,0x12 +027d| ld x26,32(x2) +a260| ld x1,8(x2) +864d| lw x27,64(x2) +2021| fld f8,64(x10) +8cb0| fsd f11,32(x9) +8624| fld f9,64(x2) +3eb0| fsd f15,32(x2) +0000| unimp +ab| illegalins +f3| illegalins +abc3| illegalins +abcde3| illegalins diff --git a/riscv64/riscv64asm/testdata/plan9cases.txt b/riscv64/riscv64asm/testdata/plan9cases.txt new file mode 100644 index 00000000..d38c5eba --- /dev/null +++ b/riscv64/riscv64asm/testdata/plan9cases.txt @@ -0,0 +1,336 @@ +b3027300| ADD X7, X6, X5 +9302f3ff| ADDI $-1, X6, X5 +9302f37f| ADDI $2047, X6, X5 +93870900| MOV X19, X15 +93070100| MOV X2, X15 +9b02f37f| ADDIW $2047, X6, X5 +1b830a00| MOVW X21, X6 +1b810a00| MOVW X21, X2 +bb027300| ADDW X7, X6, X5 +afb26300| AMOADDD X6, (X7), X5 +afb26304| AMOADDD X6, (X7), X5 +afb26302| AMOADDD X6, (X7), X5 +afa26300| AMOADDW X6, (X7), X5 +afa26304| AMOADDW X6, (X7), X5 +afa26302| AMOADDW X6, (X7), X5 +afb26360| AMOANDD X6, (X7), X5 +afb26364| AMOANDD X6, (X7), X5 +afb26362| AMOANDD X6, (X7), X5 +afa26360| AMOANDW X6, (X7), X5 +afa26364| AMOANDW X6, (X7), X5 +afa26362| AMOANDW X6, (X7), X5 +afb263e0| AMOMAXUD X6, (X7), X5 +afb263e4| AMOMAXUD X6, (X7), X5 +afb263e2| AMOMAXUD X6, (X7), X5 +afa263e0| AMOMAXUW X6, (X7), X5 +afa263e4| AMOMAXUW X6, (X7), X5 +afa263e2| AMOMAXUW X6, (X7), X5 +afb263a0| AMOMAXD X6, (X7), X5 +afb263a4| AMOMAXD X6, (X7), X5 +afb263a2| AMOMAXD X6, (X7), X5 +afa263a0| AMOMAXW X6, (X7), X5 +afa263a4| AMOMAXW X6, (X7), X5 +afa263a2| AMOMAXW X6, (X7), X5 +afb263c0| AMOMINUD X6, (X7), X5 +afb263c4| AMOMINUD X6, (X7), X5 +afb263c2| AMOMINUD X6, (X7), X5 +afa263c0| AMOMINUW X6, (X7), X5 +afa263c4| AMOMINUW X6, (X7), X5 +afa263c2| AMOMINUW X6, (X7), X5 +afb26380| AMOMIND X6, (X7), X5 +afb26384| AMOMIND X6, (X7), X5 +afb26382| AMOMIND X6, (X7), X5 +afa26380| AMOMINW X6, (X7), X5 +afa26384| AMOMINW X6, (X7), X5 +afa26382| AMOMINW X6, (X7), X5 +afb26340| AMOORD X6, (X7), X5 +afb26344| AMOORD X6, (X7), X5 +afb26342| AMOORD X6, (X7), X5 +afa26340| AMOORW X6, (X7), X5 +afa26344| AMOORW X6, (X7), X5 +afa26342| AMOORW X6, (X7), X5 +afb26308| AMOSWAPD X6, (X7), X5 +afb2630c| AMOSWAPD X6, (X7), X5 +afb2630a| AMOSWAPD X6, (X7), X5 +afa26308| AMOSWAPW X6, (X7), X5 +afa2630c| AMOSWAPW X6, (X7), X5 +afa2630a| AMOSWAPW X6, (X7), X5 +afb26320| AMOXORD X6, (X7), X5 +afb26324| AMOXORD X6, (X7), X5 +afb26322| AMOXORD X6, (X7), X5 +afa26320| AMOXORW X6, (X7), X5 +afa26324| AMOXORW X6, (X7), X5 +afa26322| AMOXORW X6, (X7), X5 +b3727300| AND X7, X6, X5 +9372f3ff| ANDI $-1, X6, X5 +9372f37f| ANDI $2047, X6, X5 +9372f30f| MOVBU X6, X5 +97020000| AUIPC $0, X5 +97028000| AUIPC $2048, X5 +e38062f0| BEQ X5, X6, -64(PC) +e3de62ee| BGE X5, X6, -65(PC) +e3fc62ee| BGEU X5, X6, -66(PC) +e3ca62ee| BLT X5, X6, -67(PC) +e3e862ee| BLTU X5, X6, -68(PC) +e39662ee| BNE X5, X6, -69(PC) +e30403ee| BEQZ X6, -70(PC) +e35203ee| BGEZ X6, -71(PC) +e34003ee| BLTZ X6, -72(PC) +e31e03ec| BNEZ X6, -73(PC) +f33213c0| CSRRC X6, TIME, X5 +f3f21fc0| CSRRCI $31, TIME, X5 +f32213c0| CSRRS X6, TIME, X5 +f3e21fc0| CSRRSI $31, TIME, X5 +f31213c0| CSRRW X6, TIME, X5 +f3d21fc0| CSRRWI $31, TIME, X5 +733015c0| CSRRC X10, TIME, X0 +73f010c0| CSRRCI $1, TIME, X0 +73253000| FRCSR X10 +f3251000| FRFLAGS X11 +73262000| FRRM X12 +f32400c0| RDCYCLE X9 +732920c0| RDINSTRET X18 +f32910c0| RDTIME X19 +f3224014| CSRRS X0, SIP, X5 +73201bc0| CSRRS X22, TIME, X0 +736014c8| CSRRSI $8, TIMEH, X0 +73903700| FSCSR X15, X0 +73101800| FSFLAGS X16, X0 +73902800| FSRM X17, X0 +731014c0| CSRRW X8, TIME, X0 +735016c8| CSRRWI $12, TIMEH, X0 +b3427302| DIV X7, X6, X5 +b3527302| DIVU X7, X6, X5 +bb527302| DIVUW X7, X6, X5 +bb427302| DIVW X7, X6, X5 +73001000| EBREAK +73000000| ECALL +53f02002| FADDD F2, F1, F0 +53f02004| FADDH F2, F1, F0 +53f02006| FADDQ F2, F1, F0 +53f02000| FADDS F2, F1, F0 +d31200e2| FCLASSD F0, X5 +d31200e4| FCLASSH F0, X5 +d31200e6| FCLASSQ F0, X5 +d31200e0| FCLASSS F0, X5 +53f022d2| FCVTDL X5, F0 +53f032d2| FCVTDLU X5, F0 +53f03042| FCVTDQ F1, F0 +53800042| FCVTDS F1, F0 +538002d2| FCVTDW X5, F0 +538012d2| FCVTDWU X5, F0 +53f022d4| FCVTHL X5, F0 +53f032d4| FCVTHLU X5, F0 +53f00044| FCVTHS F1, F0 +53f002d4| FCVTHW X5, F0 +53f012d4| FCVTHWU X5, F0 +d37230c2| FCVTLUD F0, X5 +d37230c4| FCVTLUH F0, X5 +d37230c6| FCVTLUQ F0, X5 +d37230c0| FCVTLUS F0, X5 +d37220c2| FCVTLD F0, X5 +d37220c4| FCVTLH F0, X5 +d37220c6| FCVTLQ F0, X5 +d37220c0| FCVTLS F0, X5 +53801046| FCVTQD F1, F0 +538022d6| FCVTQL X5, F0 +538032d6| FCVTQLU X5, F0 +53800046| FCVTQS F1, F0 +538002d6| FCVTQW X5, F0 +538012d6| FCVTQWU X5, F0 +53f01040| FCVTSD F1, F0 +53802040| FCVTSH F1, F0 +53f022d0| FCVTSL X5, F0 +53f032d0| FCVTSLU X5, F0 +53f03040| FCVTSQ F1, F0 +53f002d0| FCVTSW X5, F0 +53f012d0| FCVTSWU X5, F0 +d37210c2| FCVTWUD F0, X5 +d37210c4| FCVTWUH F0, X5 +d37210c6| FCVTWUQ F0, X5 +d37210c0| FCVTWUS F0, X5 +d37200c2| FCVTWD F0, X5 +d37200c4| FCVTWH F0, X5 +d37200c6| FCVTWQ F0, X5 +d37200c0| FCVTWS F0, X5 +53f0201a| FDIVD F2, F1, F0 +53f0201c| FDIVH F2, F1, F0 +53f0201e| FDIVQ F2, F1, F0 +53f02018| FDIVS F2, F1, F0 +0f00f00f| FENCE +0f100000| FENCEI +d32210a2| FEQD F1, F0, X5 +d32210a4| FEQH F1, F0, X5 +d32210a6| FEQQ F1, F0, X5 +d32210a0| FEQS F1, F0, X5 +07b0f27f| MOVD F0, 2047(X5) +d30210a2| FLED F1, F0, X5 +d30210a4| FLEH F1, F0, X5 +d30210a6| FLEQ F1, F0, X5 +d30210a0| FLES F1, F0, X5 +0790f27f| FLH 2047(X5), F0 +07c0f27f| FLQ 2047(X5), F0 +d31210a2| FLTD F1, F0, X5 +d31210a4| FLTH F1, F0, X5 +d31210a6| FLTQ F1, F0, X5 +d31210a0| FLTS F1, F0, X5 +07a0f27f| MOVF F0, 2047(X5) +43f0201a| FMADDD F1, F2, F3, F0 +43f0201c| FMADDH F1, F2, F3, F0 +43f0201e| FMADDQ F1, F2, F3, F0 +43f02018| FMADDS F1, F2, F3, F0 +5390202a| FMAXD F2, F1, F0 +5390202c| FMAXH F2, F1, F0 +5390202e| FMAXQ F2, F1, F0 +53902028| FMAXS F2, F1, F0 +5380202a| FMIND F2, F1, F0 +5380202c| FMINH F2, F1, F0 +5380202e| FMINQ F2, F1, F0 +53802028| FMINS F2, F1, F0 +47f0201a| FMSUBD F1, F2, F3, F0 +47f0201c| FMSUBH F1, F2, F3, F0 +47f0201e| FMSUBQ F1, F2, F3, F0 +47f02018| FMSUBS F1, F2, F3, F0 +53f02012| FMULD F2, F1, F0 +53f02014| FMULH F2, F1, F0 +53f02016| FMULQ F2, F1, F0 +53f02010| FMULS F2, F1, F0 +538002f2| FMVDX X5, F0 +538002f4| FMVHX X5, F0 +d30200e2| FMVXD F0, X5 +d30200e4| FMVXH F0, X5 +d30200e0| FMVXW F0, X5 +4ff0201a| FNMADDD F1, F2, F3, F0 +4ff0201c| FNMADDH F1, F2, F3, F0 +4ff0201e| FNMADDQ F1, F2, F3, F0 +4ff02018| FNMADDS F1, F2, F3, F0 +4bf0201a| FNMSUBD F1, F2, F3, F0 +4bf0201c| FNMSUBH F1, F2, F3, F0 +4bf0201e| FNMSUBQ F1, F2, F3, F0 +4bf02018| FNMSUBS F1, F2, F3, F0 +a7bf027e| MOVD 2047(X5), F0 +53902022| FSGNJND F2, F1, F0 +53902024| FSGNJNH F2, F1, F0 +53902026| FSGNJNQ F2, F1, F0 +53902020| FSGNJNS F2, F1, F0 +53a02022| FSGNJXD F2, F1, F0 +53a02024| FSGNJXH F2, F1, F0 +53a02026| FSGNJXQ F2, F1, F0 +53a02020| FSGNJXS F2, F1, F0 +53802022| FSGNJD F2, F1, F0 +53802024| FSGNJH F2, F1, F0 +53802026| FSGNJQ F2, F1, F0 +53802020| FSGNJS F2, F1, F0 +a79f027e| FSH 2047(X5), F0 +a7cf027e| FSQ 2047(X5), F0 +53f0005a| FSQRTD F1, F0 +53f0005c| FSQRTH F1, F0 +53f0005e| FSQRTQ F1, F0 +53f00058| FSQRTS F1, F0 +53f0200a| FSUBD F2, F1, F0 +53f0200c| FSUBH F2, F1, F0 +53f0200e| FSUBQ F2, F1, F0 +53f02008| FSUBS F2, F1, F0 +a7af027e| MOVF 2047(X5), F0 +6ff0dfcb| JMP -209(PC) +eff09fcb| CALL -210(PC) +eff25fcb| JAL X5, -211(PC) +67800202| JMP 32(X5) +e7800202| CALL 32(X5) +e7820202| JALR X5, 32(X5) +67800000| RET +8302f37f| MOVB 2047(X6), X5 +8342f37f| MOVBU 2047(X6), X5 +af320310| LRD (X6), X5 +af320314| LRD (X6), X5 +af320312| LRD (X6), X5 +af220310| LRW (X6), X5 +af220314| LRW (X6), X5 +af220312| LRW (X6), X5 +b7829102| LUI $10520, X5 +8322f37f| MOVW 2047(X6), X5 +8362f37f| MOVWU 2047(X6), X5 +b3027302| MUL X7, X6, X5 +b3127302| MULH X7, X6, X5 +b3227302| MULHSU X7, X6, X5 +b3327302| MULHU X7, X6, X5 +bb027302| MULW X7, X6, X5 +b3627300| OR X7, X6, X5 +93620380| ORI $-2048, X6, X5 +b3627302| REM X7, X6, X5 +b3727302| REMU X7, X6, X5 +bb627302| REMW X7, X6, X5 +a30f537e| MOVB X5, 2047(X6) +afb26318| SCD X6, (X7), X5 +afb2631c| SCD X6, (X7), X5 +afb2631a| SCD X6, (X7), X5 +afa26318| SCW X6, (X7), X5 +afa2631c| SCW X6, (X7), X5 +afa2631a| SCW X6, (X7), X5 +a33f537e| MOV X5, 2047(X6) +23105380| MOVH X5, -2048(X6) +b3127300| SLL X7, X6, X5 +93124303| SLLI $52, X6, X5 +9b127301| SLLIW $23, X6, X5 +b3227300| SLT X7, X6, X5 +9322f37f| SLTI $2047, X6, X5 +93320380| SLTIU $-2048, X6, X5 +93321300| SEQZ X6, X5 +b3327300| SLTU X7, X6, X5 +33394001| SNEZ X20, X18 +b3527340| SRA X7, X6, X5 +93524343| SRAI $52, X6, X5 +9b526341| SRAIW $22, X6, X5 +bb527340| SRAW X7, X6, X5 +b3527300| SRL X7, X6, X5 +93524303| SRLI $52, X6, X5 +9b526301| SRLIW $22, X6, X5 +bb527300| SRLW X7, X6, X5 +b3027340| SUB X7, X6, X5 +b3026040| NEG X6, X5 +bb027340| SUBW X7, X6, X5 +a32f537e| MOVW X5, 2047(X6) +b3427300| XOR X7, X6, X5 +9342f37f| XORI $2047, X6, X5 +93c2ffff| NOT X31, X5 +bb003108| ADDUW X3, X2, X1 +33a26220| SH1ADD X6, X5, X4 +bb239420| SH1ADDUW X9, X8, X7 +33c5c520| SH2ADD X12, X11, X10 +bb46f720| SH2ADDUW X15, X14, X13 +33e82821| SH3ADD X18, X17, X16 +bb695a21| SH3ADDUW X21, X20, X19 +1b9b7b09| SLLIUW $23, X23, X22 +33fcac41| ANDN X26, X25, X24 +b36dde41| ORN X29, X28, X27 +33cf1f40| XNOR X1, X31, X30 +13910160| CLZ X3, X2 +1b920260| CLZW X5, X4 +13931360| CTZ X7, X6 +1b941460| CTZW X9, X8 +13952560| CPOP X11, X10 +1b962660| CPOPW X13, X12 +33e7070b| MAX X16, X15, X14 +b378390b| MAXU X19, X18, X17 +33ca6a0b| MIN X22, X21, X20 +b35b9c0b| MINU X25, X24, X23 +139d4d60| SEXTB X27, X26 +139e5e60| SEXTH X29, X28 +3bcf0f08| ZEXTH X31, X30 +b3102060| ROL X2, X0, X1 +bb115260| ROLW X5, X4, X3 +33d38360| ROR X8, X7, X6 +9354a560| RORI $10, X10, X9 +9b55e660| RORIW $14, X12, X11 +bb56f760| RORW X15, X14, X13 +13d87828| ORCB X17, X16 +13d9896b| REV8 X19, X18 +339a6a49| BCLR X22, X21, X20 +931bfc48| BCLRI $15, X24, X23 +b35cbd49| BEXT X27, X26, X25 +13de8e48| BEXTI $8, X29, X28 +339f0f68| BINV X0, X31, X30 +9310016a| BINVI $32, X2, X1 +b3115228| BSET X5, X4, X3 +1393f32b| BSETI $63, X7, X6 +0000| UNIMP From 5f2ba6d460c9ac55abe4b3ed2eedee937561d025 Mon Sep 17 00:00:00 2001 From: Lin Runze Date: Tue, 17 Dec 2024 01:43:00 +0800 Subject: [PATCH 038/200] riscv64: fix objdump related tests The TestObjdumpRISCV64TestDecodeGNUSyntaxdata and TestObjdumpRISCV64TestDecodeGoSyntaxdata was failed due to lack of .riscv.attributes in elf file, this patch adds these extension's attributes and allow `inst` output mismatch. Fix: golang/go#70864 Change-Id: I6bcac3e6a3bd0ec3cc66356998914a1aa8d7c468 Reviewed-on: https://go-review.googlesource.com/c/arch/+/636735 Reviewed-by: David Chase Reviewed-by: Meng Zhuo Reviewed-by: Carlos Amedee LUCI-TryBot-Result: Go LUCI --- riscv64/riscv64asm/ext_test.go | 3 +++ riscv64/riscv64asm/gnu.go | 5 +++++ riscv64/riscv64asm/objdumpext_test.go | 4 ++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/riscv64/riscv64asm/ext_test.go b/riscv64/riscv64asm/ext_test.go index fa6961f2..25e2bf75 100644 --- a/riscv64/riscv64asm/ext_test.go +++ b/riscv64/riscv64asm/ext_test.go @@ -148,6 +148,9 @@ func testExtDis( } suffix += " (allowed mismatch)" } + if strings.Contains(text, "unknown") && strings.Contains(dec.text, ".insn") { + return + } totalErrors++ cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix) diff --git a/riscv64/riscv64asm/gnu.go b/riscv64/riscv64asm/gnu.go index d6b3dc04..3ee04496 100644 --- a/riscv64/riscv64asm/gnu.go +++ b/riscv64/riscv64asm/gnu.go @@ -42,6 +42,11 @@ func GNUSyntax(inst Inst) string { } } + if inst.Op == ANDI && inst.Args[2].(Simm).Imm == 255 { + op = "zext.b" + args = args[:len(args)-1] + } + if inst.Op == ADDIW && inst.Args[2].(Simm).Imm == 0 { op = "sext.w" args = args[:len(args)-1] diff --git a/riscv64/riscv64asm/objdumpext_test.go b/riscv64/riscv64asm/objdumpext_test.go index 4f1f21a5..1dc09641 100644 --- a/riscv64/riscv64asm/objdumpext_test.go +++ b/riscv64/riscv64asm/objdumpext_test.go @@ -277,7 +277,7 @@ func writeELF64(f *os.File, size int) error { Type: uint32(0x70000003), // SHT_RISCV_ATTRIBUTES Addr: 0, Off: uint64(off2 + (off3-off2)*4 + strtabsize), - Size: 102, + Size: 114, Addralign: 1, } binary.Write(&buf, binary.LittleEndian, §) @@ -293,7 +293,7 @@ func writeELF64(f *os.File, size int) error { buf.WriteString("\x00.text\x00.riscv.attributes\x00.shstrtab\x00") // Contents of .riscv.attributes section // which specify the extension and priv spec version. (1.11) - buf.WriteString("Ae\x00\x00\x00riscv\x00\x01[\x00\x00\x00\x05rv64i2p1_m2pp_a2p1_f2p2_d2p2_q2p2_zibsr2p0_zifencei2p0_zmmul1p0_zfh1p0_zfhmin1p0\x00\x08\x01\x0a\x0b") + buf.WriteString("Aq\x00\x00\x00riscv\x00\x01g\x00\x00\x00\x05rv64i2p0_m2p0_a2p0_f2p0_d2p0_q2p0_c2p0_zmmul1p0_zfh1p0_zfhmin1p0_zba1p0_zbb1p0_zbc1p0_zbs1p0\x00\x08\x01\x0a\x0b") f.Write(buf.Bytes()) return nil } From bde81be39b9efbf5b80719bb91ad1a0ebc5186b5 Mon Sep 17 00:00:00 2001 From: cuishuang Date: Wed, 1 Jan 2025 09:32:57 +0800 Subject: [PATCH 039/200] all: make function comments match function names Change-Id: I279c95d1d1e5f07e0c7a0c4416bf3613d85bb950 Reviewed-on: https://go-review.googlesource.com/c/arch/+/639476 Reviewed-by: Ian Lance Taylor Auto-Submit: Ian Lance Taylor Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- arm64/arm64asm/ext_test.go | 2 +- loong64/loong64asm/ext_test.go | 2 +- riscv64/riscv64asm/ext_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arm64/arm64asm/ext_test.go b/arm64/arm64asm/ext_test.go index f432203e..e8a79de2 100644 --- a/arm64/arm64asm/ext_test.go +++ b/arm64/arm64asm/ext_test.go @@ -268,7 +268,7 @@ func disasm(syntax string, src []byte) (inst Inst, text string) { return } -// decodecoverage returns a floating point number denoting the +// decodeCoverage returns a floating point number denoting the // decoder coverage. func decodeCoverage() float64 { n := 0 diff --git a/loong64/loong64asm/ext_test.go b/loong64/loong64asm/ext_test.go index 5c90586e..5e73c80d 100644 --- a/loong64/loong64asm/ext_test.go +++ b/loong64/loong64asm/ext_test.go @@ -240,7 +240,7 @@ func disasm(syntax string, src []byte) (inst Inst, text string) { return } -// decodecoverage returns a floating point number denoting the +// decodeCoverage returns a floating point number denoting the // decoder coverage. func decodeCoverage() float64 { n := 0 diff --git a/riscv64/riscv64asm/ext_test.go b/riscv64/riscv64asm/ext_test.go index 25e2bf75..556cd015 100644 --- a/riscv64/riscv64asm/ext_test.go +++ b/riscv64/riscv64asm/ext_test.go @@ -242,7 +242,7 @@ func disasm(syntax string, src []byte) (inst Inst, text string) { return } -// decodecoverage returns a floating point number denoting the +// decodeCoverage returns a floating point number denoting the // decoder coverage. func decodeCoverage() float64 { n := 0 From ccff9d40e363f5ec4b7580073c04f1e04841295d Mon Sep 17 00:00:00 2001 From: alirezaarzehgar Date: Fri, 17 Jan 2025 18:50:14 +0330 Subject: [PATCH 040/200] x/arch: silent go vet warnings go vet -all ./... arm/armspec/spec.go:584:26: rsc.io/pdf.Text struct literal uses unkeyed fields arm64/arm64spec/spec.go:688:26: rsc.io/pdf.Text struct literal uses unkeyed fields loong64/loong64spec/spec.go:361:26: rsc.io/pdf.Text struct literal uses unkeyed fields ppc64/ppc64map/map.go:290:5: unreachable code ppc64/ppc64spec/spec.go:468:26: rsc.io/pdf.Text struct literal uses unkeyed fields s390x/s390xmap/map.go:218:5: unreachable code s390x/s390xspec/spec.go:1045:26: rsc.io/pdf.Text struct literal uses unkeyed fields x86/x86spec/parse.go:513:26: rsc.io/pdf.Text struct literal uses unkeyed fields Change-Id: I4f7e29c80231ebfc5287e0d5bb57edae5b712603 Reviewed-on: https://go-review.googlesource.com/c/arch/+/642738 Reviewed-by: Ian Lance Taylor Auto-Submit: Ian Lance Taylor Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Commit-Queue: Ian Lance Taylor --- arm/armspec/spec.go | 9 ++++++++- arm64/arm64spec/spec.go | 9 ++++++++- loong64/loong64spec/spec.go | 9 ++++++++- ppc64/ppc64map/map.go | 4 ++-- ppc64/ppc64spec/spec.go | 9 ++++++++- s390x/s390xmap/map.go | 4 ++-- s390x/s390xspec/spec.go | 12 ++++++++++-- x86/x86asm/tables.go | 2 +- x86/x86spec/parse.go | 9 ++++++++- 9 files changed, 55 insertions(+), 12 deletions(-) diff --git a/arm/armspec/spec.go b/arm/armspec/spec.go index 60579a05..5b458e1e 100644 --- a/arm/armspec/spec.go +++ b/arm/armspec/spec.go @@ -581,7 +581,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) { f := ck.Font f = strings.TrimSuffix(f, ",Italic") f = strings.TrimSuffix(f, "-Italic") - words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s}) + words = append(words, pdf.Text{ + Font: f, + FontSize: ck.FontSize, + X: ck.X, + Y: ck.Y, + W: end - ck.X, + S: s, + }) k = l } i = j diff --git a/arm64/arm64spec/spec.go b/arm64/arm64spec/spec.go index ee784e55..feedf83c 100644 --- a/arm64/arm64spec/spec.go +++ b/arm64/arm64spec/spec.go @@ -685,7 +685,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) { f := ck.Font f = strings.TrimSuffix(f, ",Italic") f = strings.TrimSuffix(f, "-Italic") - words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s}) + words = append(words, pdf.Text{ + Font: f, + FontSize: ck.FontSize, + X: ck.X, + Y: ck.Y, + W: end - ck.X, + S: s, + }) k = l } i = j diff --git a/loong64/loong64spec/spec.go b/loong64/loong64spec/spec.go index 3e69a24e..177df41b 100644 --- a/loong64/loong64spec/spec.go +++ b/loong64/loong64spec/spec.go @@ -358,7 +358,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) { break } f := ck.Font - words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s}) + words = append(words, pdf.Text{ + Font: f, + FontSize: ck.FontSize, + X: ck.X, + Y: ck.Y, + W: end - ck.X, + S: s, + }) k = l } i = j diff --git a/ppc64/ppc64map/map.go b/ppc64/ppc64map/map.go index 1e3b1b6e..4e50843c 100644 --- a/ppc64/ppc64map/map.go +++ b/ppc64/ppc64map/map.go @@ -24,7 +24,6 @@ import ( "flag" "fmt" gofmt "go/format" - asm "golang.org/x/arch/ppc64/ppc64asm" "log" "math/bits" "os" @@ -33,6 +32,8 @@ import ( "strconv" "strings" "text/template" + + asm "golang.org/x/arch/ppc64/ppc64asm" ) var format = flag.String("fmt", "text", "output format: text, decoder, asm") @@ -287,7 +288,6 @@ func parseFields(encoding, text string, word int8) Args { if j < 0 { fmt.Fprintf(os.Stderr, "%s: wrong %d-th encoding field: %q\n", text, i, f) panic("Invalid encoding entry.") - continue } k := strings.Index(f[j+1:], " ") if k >= 0 { diff --git a/ppc64/ppc64spec/spec.go b/ppc64/ppc64spec/spec.go index 4167d6dc..ad9411f1 100644 --- a/ppc64/ppc64spec/spec.go +++ b/ppc64/ppc64spec/spec.go @@ -465,7 +465,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) { f := ck.Font f = strings.TrimSuffix(f, ",Italic") f = strings.TrimSuffix(f, "-Italic") - words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s}) + words = append(words, pdf.Text{ + Font: f, + FontSize: ck.FontSize, + X: ck.X, + Y: ck.Y, + W: end - ck.X, + S: s, + }) k = l } i = j diff --git a/s390x/s390xmap/map.go b/s390x/s390xmap/map.go index 1adfdfbe..3fc89f11 100644 --- a/s390x/s390xmap/map.go +++ b/s390x/s390xmap/map.go @@ -24,12 +24,13 @@ import ( "flag" "fmt" gofmt "go/format" - asm "golang.org/x/arch/s390x/s390xasm" "log" "os" "regexp" "strconv" "strings" + + asm "golang.org/x/arch/s390x/s390xasm" ) var format = flag.String("fmt", "text", "output format: text, decoder, asm") @@ -215,7 +216,6 @@ func parseFields(encoding, text string) Args { if j < 0 { fmt.Fprintf(os.Stderr, "%s: wrong %d-th encoding field: %q\n", text, i, f) panic("Invalid encoding entry.") - continue } off, err = strconv.Atoi(f[j+1:]) if err != nil { diff --git a/s390x/s390xspec/spec.go b/s390x/s390xspec/spec.go index cc0ebade..1b24be85 100644 --- a/s390x/s390xspec/spec.go +++ b/s390x/s390xspec/spec.go @@ -30,10 +30,11 @@ import ( "log" "math" "os" - "rsc.io/pdf" "sort" "strconv" "strings" + + "rsc.io/pdf" ) type Inst struct { @@ -1042,7 +1043,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) { f := ck.Font f = strings.TrimSuffix(f, ",Italic") f = strings.TrimSuffix(f, "-Italic") - words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s}) + words = append(words, pdf.Text{ + Font: f, + FontSize: ck.FontSize, + X: ck.X, + Y: ck.Y, + W: end - ck.X, + S: s, + }) k = l } i = j diff --git a/x86/x86asm/tables.go b/x86/x86asm/tables.go index 6f57c70b..9710bbd8 100644 --- a/x86/x86asm/tables.go +++ b/x86/x86asm/tables.go @@ -1,4 +1,4 @@ -// Code generated by x86map -fmt=decoder x86.csv DO NOT EDIT. +// Code generated by x86map -fmt=decoder ../x86.csv DO NOT EDIT. package x86asm diff --git a/x86/x86spec/parse.go b/x86/x86spec/parse.go index e5324bea..8a9adc98 100644 --- a/x86/x86spec/parse.go +++ b/x86/x86spec/parse.go @@ -510,7 +510,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) { f := ck.Font f = strings.TrimSuffix(f, ",Italic") f = strings.TrimSuffix(f, "-Italic") - words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end, s}) + words = append(words, pdf.Text{ + Font: f, + FontSize: ck.FontSize, + X: ck.X, + Y: ck.Y, + W: end, + S: s, + }) k = l } i = j From fa4651658ac77df0a02d29474ffe1140e058b1de Mon Sep 17 00:00:00 2001 From: Roland Shoemaker Date: Tue, 28 Jan 2025 08:57:26 -0800 Subject: [PATCH 041/200] x86: add SHA-512 instructions to all-dec-instructions.txt Unclear if this is the "correct" way to go about this. XED taken from https://github.com/intelxed/xed/blob/main/datafiles/sha512/sha512-isa.xed.txt. Change-Id: If5d7026f3020698727964e9838a6dff1653c846f Reviewed-on: https://go-review.googlesource.com/c/arch/+/645035 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- .../testdata/xedpath/all-dec-instructions.txt | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt b/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt index aad0b816..70bda8dd 100644 --- a/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt +++ b/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt @@ -58027,3 +58027,79 @@ PATTERN : 0x0F 0xAE MOD[mm] MOD!=3 REG[0b100] RM[nnn] f3_refining_prefix no6 OPERANDS : MEM0:r:y } + + + + +###FILE: ./datafiles/sha512/sha512-isa.xed.txt + +#BEGIN_LEGAL +# +#Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#END_LEGAL +# +# +# +# ***** GENERATED FILE -- DO NOT EDIT! ***** +# ***** GENERATED FILE -- DO NOT EDIT! ***** +# ***** GENERATED FILE -- DO NOT EDIT! ***** +# +# +# +AVX_INSTRUCTIONS():: +# EMITTING VSHA512MSG1 (VSHA512MSG1-256-1) +{ +ICLASS: VSHA512MSG1 +CPL: 3 +CATEGORY: SHA512 +EXTENSION: SHA512 +ISA_SET: SHA512 +EXCEPTIONS: avx-type-6 +REAL_OPCODE: Y +PATTERN: VV1 0xCC VF2 V0F38 MOD[0b11] MOD=3 REG[rrr] RM[nnn] W0 VL256 NOVSR +OPERANDS: REG0=YMM_R():rw:qq:u64 REG1=XMM_B():r:dq:u64 +IFORM: VSHA512MSG1_YMMu64_XMMu64 +} + + +# EMITTING VSHA512MSG2 (VSHA512MSG2-256-1) +{ +ICLASS: VSHA512MSG2 +CPL: 3 +CATEGORY: SHA512 +EXTENSION: SHA512 +ISA_SET: SHA512 +EXCEPTIONS: avx-type-6 +REAL_OPCODE: Y +PATTERN: VV1 0xCD VF2 V0F38 MOD[0b11] MOD=3 REG[rrr] RM[nnn] W0 VL256 NOVSR +OPERANDS: REG0=YMM_R():rw:qq:u64 REG1=YMM_B():r:qq:u64 +IFORM: VSHA512MSG2_YMMu64_YMMu64 +} + + +# EMITTING VSHA512RNDS2 (VSHA512RNDS2-256-1) +{ +ICLASS: VSHA512RNDS2 +CPL: 3 +CATEGORY: SHA512 +EXTENSION: SHA512 +ISA_SET: SHA512 +EXCEPTIONS: avx-type-6 +REAL_OPCODE: Y +PATTERN: VV1 0xCB VF2 V0F38 MOD[0b11] MOD=3 REG[rrr] RM[nnn] W0 VL256 +OPERANDS: REG0=YMM_R():rw:qq:u64 REG1=YMM_N():r:qq:u64 REG2=XMM_B():r:dq:u64 +IFORM: VSHA512RNDS2_YMMu64_YMMu64_XMMu64 +} \ No newline at end of file From 1ae429ed9af22ab5442b13b72e44386be2f9f92e Mon Sep 17 00:00:00 2001 From: Gopher Robot Date: Fri, 14 Feb 2025 18:57:42 +0000 Subject: [PATCH 042/200] all: upgrade go directive to at least 1.23.0 [generated] By now Go 1.24.0 has been released, and Go 1.22 is no longer supported per the Go Release Policy (https://go.dev/doc/devel/release#policy). For golang/go#69095. [git-generate] (cd . && go get go@1.23.0 && go mod tidy && go fix ./... && go mod edit -toolchain=none) Change-Id: I4dfe2096905891f3a5c7d34c2e82c6119cdd2448 Reviewed-on: https://go-review.googlesource.com/c/arch/+/649695 Auto-Submit: Gopher Robot LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui Reviewed-by: Dmitri Shuralyov --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 355098da..b72ba1a5 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module golang.org/x/arch -go 1.18 +go 1.23.0 require rsc.io/pdf v0.1.1 From c375763cfd40d3f71b0348622a4d2c635527b2c5 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Mon, 23 Sep 2024 13:49:49 +0000 Subject: [PATCH 043/200] arm64: Fix arm64asm warnings Fix warnings of the following types. - go-staticcheck - (U1000) var longTest is unused - (SA4006) this value of text is never used - (S1011) should replace loop with instsN = append(instsN, insts...) - (S1002) should omit comparison to bool constant, can be simplified to rea.show_zero - (S1039) unnecessary use of fmt.Sprintf - simplifycompositelit - simplifyslicedefault Change-Id: I84cb4f867bf6f923ffa21d6e0a2072641299eaf5 Reviewed-on: https://go-review.googlesource.com/c/arch/+/615055 LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Knyszek Reviewed-by: Cherry Mui --- arm64/arm64asm/ext_test.go | 11 +- arm64/arm64asm/inst.go | 216 ++++++++++++++++++------------------- 2 files changed, 111 insertions(+), 116 deletions(-) diff --git a/arm64/arm64asm/ext_test.go b/arm64/arm64asm/ext_test.go index e8a79de2..f0d18e93 100644 --- a/arm64/arm64asm/ext_test.go +++ b/arm64/arm64asm/ext_test.go @@ -31,7 +31,6 @@ import ( var ( dumpTest = flag.Bool("dump", false, "dump all encodings") mismatch = flag.Bool("mismatch", false, "log allowed mismatches") - longTest = flag.Bool("long", false, "long test") keep = flag.Bool("keep", false, "keep object files around") debug = false ) @@ -164,7 +163,7 @@ func testExtDis( totalTests++ if *dumpTest { - fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc) + fmt.Printf("%x -> %s [%d]\n", enc, dec.text, dec.nenc) } if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" { suffix := "" @@ -256,7 +255,6 @@ func disasm(syntax string, src []byte) (inst Inst, text string) { text = "error: " + err.Error() return } - text = inst.String() switch syntax { case "gnu": text = GNUSyntax(inst) @@ -518,11 +516,8 @@ func JSONCases(t *testing.T) func(func([]byte)) { t.Fatal(err) } // Append instructions to get more test cases. - for i := 0; i < N; { - for _, inst := range insts { - instsN = append(instsN, inst) - } - i++ + for i := 0; i < N; i++ { + instsN = append(instsN, insts...) } Round = 0 for i := range instsN { diff --git a/arm64/arm64asm/inst.go b/arm64/arm64asm/inst.go index 866e399c..39ddaf75 100644 --- a/arm64/arm64asm/inst.go +++ b/arm64/arm64asm/inst.go @@ -469,7 +469,7 @@ func (rea RegExtshiftAmount) String() string { if rea.amount != 0 { buf += fmt.Sprintf(" #%d", rea.amount) } else { - if rea.show_zero == true { + if rea.show_zero { buf += fmt.Sprintf(" #%d", rea.amount) } } @@ -527,7 +527,7 @@ func (m MemImmediate) String() string { postR := post.String() return fmt.Sprintf("[%s], %s", R, postR) } - return fmt.Sprintf("unimplemented!") + return "unimplemented!" } // A MemExtend is a memory reference made up of a base R and index expression X. @@ -1021,110 +1021,110 @@ func (s sysInstFields) getType() sys { } var sysInstsAttrs = map[sysInstFields]sysInstAttrs{ - sysInstFields{0, 8, 3, 0}: {sys_TLBI, "VMALLE1IS", false}, - sysInstFields{0, 8, 3, 1}: {sys_TLBI, "VAE1IS", true}, - sysInstFields{0, 8, 3, 2}: {sys_TLBI, "ASIDE1IS", true}, - sysInstFields{0, 8, 3, 3}: {sys_TLBI, "VAAE1IS", true}, - sysInstFields{0, 8, 3, 5}: {sys_TLBI, "VALE1IS", true}, - sysInstFields{0, 8, 3, 7}: {sys_TLBI, "VAALE1IS", true}, - sysInstFields{0, 8, 7, 0}: {sys_TLBI, "VMALLE1", false}, - sysInstFields{0, 8, 7, 1}: {sys_TLBI, "VAE1", true}, - sysInstFields{0, 8, 7, 2}: {sys_TLBI, "ASIDE1", true}, - sysInstFields{0, 8, 7, 3}: {sys_TLBI, "VAAE1", true}, - sysInstFields{0, 8, 7, 5}: {sys_TLBI, "VALE1", true}, - sysInstFields{0, 8, 7, 7}: {sys_TLBI, "VAALE1", true}, - sysInstFields{4, 8, 0, 1}: {sys_TLBI, "IPAS2E1IS", true}, - sysInstFields{4, 8, 0, 5}: {sys_TLBI, "IPAS2LE1IS", true}, - sysInstFields{4, 8, 3, 0}: {sys_TLBI, "ALLE2IS", false}, - sysInstFields{4, 8, 3, 1}: {sys_TLBI, "VAE2IS", true}, - sysInstFields{4, 8, 3, 4}: {sys_TLBI, "ALLE1IS", false}, - sysInstFields{4, 8, 3, 5}: {sys_TLBI, "VALE2IS", true}, - sysInstFields{4, 8, 3, 6}: {sys_TLBI, "VMALLS12E1IS", false}, - sysInstFields{4, 8, 4, 1}: {sys_TLBI, "IPAS2E1", true}, - sysInstFields{4, 8, 4, 5}: {sys_TLBI, "IPAS2LE1", true}, - sysInstFields{4, 8, 7, 0}: {sys_TLBI, "ALLE2", false}, - sysInstFields{4, 8, 7, 1}: {sys_TLBI, "VAE2", true}, - sysInstFields{4, 8, 7, 4}: {sys_TLBI, "ALLE1", false}, - sysInstFields{4, 8, 7, 5}: {sys_TLBI, "VALE2", true}, - sysInstFields{4, 8, 7, 6}: {sys_TLBI, "VMALLS12E1", false}, - sysInstFields{6, 8, 3, 0}: {sys_TLBI, "ALLE3IS", false}, - sysInstFields{6, 8, 3, 1}: {sys_TLBI, "VAE3IS", true}, - sysInstFields{6, 8, 3, 5}: {sys_TLBI, "VALE3IS", true}, - sysInstFields{6, 8, 7, 0}: {sys_TLBI, "ALLE3", false}, - sysInstFields{6, 8, 7, 1}: {sys_TLBI, "VAE3", true}, - sysInstFields{6, 8, 7, 5}: {sys_TLBI, "VALE3", true}, - sysInstFields{0, 8, 1, 0}: {sys_TLBI, "VMALLE1OS", false}, - sysInstFields{0, 8, 1, 1}: {sys_TLBI, "VAE1OS", true}, - sysInstFields{0, 8, 1, 2}: {sys_TLBI, "ASIDE1OS", true}, - sysInstFields{0, 8, 1, 3}: {sys_TLBI, "VAAE1OS", true}, - sysInstFields{0, 8, 1, 5}: {sys_TLBI, "VALE1OS", true}, - sysInstFields{0, 8, 1, 7}: {sys_TLBI, "VAALE1OS", true}, - sysInstFields{0, 8, 2, 1}: {sys_TLBI, "RVAE1IS", true}, - sysInstFields{0, 8, 2, 3}: {sys_TLBI, "RVAAE1IS", true}, - sysInstFields{0, 8, 2, 5}: {sys_TLBI, "RVALE1IS", true}, - sysInstFields{0, 8, 2, 7}: {sys_TLBI, "RVAALE1IS", true}, - sysInstFields{0, 8, 5, 1}: {sys_TLBI, "RVAE1OS", true}, - sysInstFields{0, 8, 5, 3}: {sys_TLBI, "RVAAE1OS", true}, - sysInstFields{0, 8, 5, 5}: {sys_TLBI, "RVALE1OS", true}, - sysInstFields{0, 8, 5, 7}: {sys_TLBI, "RVAALE1OS", true}, - sysInstFields{0, 8, 6, 1}: {sys_TLBI, "RVAE1", true}, - sysInstFields{0, 8, 6, 3}: {sys_TLBI, "RVAAE1", true}, - sysInstFields{0, 8, 6, 5}: {sys_TLBI, "RVALE1", true}, - sysInstFields{0, 8, 6, 7}: {sys_TLBI, "RVAALE1", true}, - sysInstFields{4, 8, 0, 2}: {sys_TLBI, "RIPAS2E1IS", true}, - sysInstFields{4, 8, 0, 6}: {sys_TLBI, "RIPAS2LE1IS", true}, - sysInstFields{4, 8, 1, 0}: {sys_TLBI, "ALLE2OS", false}, - sysInstFields{4, 8, 1, 1}: {sys_TLBI, "VAE2OS", true}, - sysInstFields{4, 8, 1, 4}: {sys_TLBI, "ALLE1OS", false}, - sysInstFields{4, 8, 1, 5}: {sys_TLBI, "VALE2OS", true}, - sysInstFields{4, 8, 1, 6}: {sys_TLBI, "VMALLS12E1OS", false}, - sysInstFields{4, 8, 2, 1}: {sys_TLBI, "RVAE2IS", true}, - sysInstFields{4, 8, 2, 5}: {sys_TLBI, "RVALE2IS", true}, - sysInstFields{4, 8, 4, 0}: {sys_TLBI, "IPAS2E1OS", true}, - sysInstFields{4, 8, 4, 2}: {sys_TLBI, "RIPAS2E1", true}, - sysInstFields{4, 8, 4, 3}: {sys_TLBI, "RIPAS2E1OS", true}, - sysInstFields{4, 8, 4, 4}: {sys_TLBI, "IPAS2LE1OS", true}, - sysInstFields{4, 8, 4, 6}: {sys_TLBI, "RIPAS2LE1", true}, - sysInstFields{4, 8, 4, 7}: {sys_TLBI, "RIPAS2LE1OS", true}, - sysInstFields{4, 8, 5, 1}: {sys_TLBI, "RVAE2OS", true}, - sysInstFields{4, 8, 5, 5}: {sys_TLBI, "RVALE2OS", true}, - sysInstFields{4, 8, 6, 1}: {sys_TLBI, "RVAE2", true}, - sysInstFields{4, 8, 6, 5}: {sys_TLBI, "RVALE2", true}, - sysInstFields{6, 8, 1, 0}: {sys_TLBI, "ALLE3OS", false}, - sysInstFields{6, 8, 1, 1}: {sys_TLBI, "VAE3OS", true}, - sysInstFields{6, 8, 1, 5}: {sys_TLBI, "VALE3OS", true}, - sysInstFields{6, 8, 2, 1}: {sys_TLBI, "RVAE3IS", true}, - sysInstFields{6, 8, 2, 5}: {sys_TLBI, "RVALE3IS", true}, - sysInstFields{6, 8, 5, 1}: {sys_TLBI, "RVAE3OS", true}, - sysInstFields{6, 8, 5, 5}: {sys_TLBI, "RVALE3OS", true}, - sysInstFields{6, 8, 6, 1}: {sys_TLBI, "RVAE3", true}, - sysInstFields{6, 8, 6, 5}: {sys_TLBI, "RVALE3", true}, - sysInstFields{0, 7, 6, 1}: {sys_DC, "IVAC", true}, - sysInstFields{0, 7, 6, 2}: {sys_DC, "ISW", true}, - sysInstFields{0, 7, 10, 2}: {sys_DC, "CSW", true}, - sysInstFields{0, 7, 14, 2}: {sys_DC, "CISW", true}, - sysInstFields{3, 7, 4, 1}: {sys_DC, "ZVA", true}, - sysInstFields{3, 7, 10, 1}: {sys_DC, "CVAC", true}, - sysInstFields{3, 7, 11, 1}: {sys_DC, "CVAU", true}, - sysInstFields{3, 7, 14, 1}: {sys_DC, "CIVAC", true}, - sysInstFields{0, 7, 6, 3}: {sys_DC, "IGVAC", true}, - sysInstFields{0, 7, 6, 4}: {sys_DC, "IGSW", true}, - sysInstFields{0, 7, 6, 5}: {sys_DC, "IGDVAC", true}, - sysInstFields{0, 7, 6, 6}: {sys_DC, "IGDSW", true}, - sysInstFields{0, 7, 10, 4}: {sys_DC, "CGSW", true}, - sysInstFields{0, 7, 10, 6}: {sys_DC, "CGDSW", true}, - sysInstFields{0, 7, 14, 4}: {sys_DC, "CIGSW", true}, - sysInstFields{0, 7, 14, 6}: {sys_DC, "CIGDSW", true}, - sysInstFields{3, 7, 4, 3}: {sys_DC, "GVA", true}, - sysInstFields{3, 7, 4, 4}: {sys_DC, "GZVA", true}, - sysInstFields{3, 7, 10, 3}: {sys_DC, "CGVAC", true}, - sysInstFields{3, 7, 10, 5}: {sys_DC, "CGDVAC", true}, - sysInstFields{3, 7, 12, 3}: {sys_DC, "CGVAP", true}, - sysInstFields{3, 7, 12, 5}: {sys_DC, "CGDVAP", true}, - sysInstFields{3, 7, 13, 3}: {sys_DC, "CGVADP", true}, - sysInstFields{3, 7, 13, 5}: {sys_DC, "CGDVADP", true}, - sysInstFields{3, 7, 14, 3}: {sys_DC, "CIGVAC", true}, - sysInstFields{3, 7, 14, 5}: {sys_DC, "CIGDVAC", true}, - sysInstFields{3, 7, 12, 1}: {sys_DC, "CVAP", true}, - sysInstFields{3, 7, 13, 1}: {sys_DC, "CVADP", true}, + {0, 8, 3, 0}: {sys_TLBI, "VMALLE1IS", false}, + {0, 8, 3, 1}: {sys_TLBI, "VAE1IS", true}, + {0, 8, 3, 2}: {sys_TLBI, "ASIDE1IS", true}, + {0, 8, 3, 3}: {sys_TLBI, "VAAE1IS", true}, + {0, 8, 3, 5}: {sys_TLBI, "VALE1IS", true}, + {0, 8, 3, 7}: {sys_TLBI, "VAALE1IS", true}, + {0, 8, 7, 0}: {sys_TLBI, "VMALLE1", false}, + {0, 8, 7, 1}: {sys_TLBI, "VAE1", true}, + {0, 8, 7, 2}: {sys_TLBI, "ASIDE1", true}, + {0, 8, 7, 3}: {sys_TLBI, "VAAE1", true}, + {0, 8, 7, 5}: {sys_TLBI, "VALE1", true}, + {0, 8, 7, 7}: {sys_TLBI, "VAALE1", true}, + {4, 8, 0, 1}: {sys_TLBI, "IPAS2E1IS", true}, + {4, 8, 0, 5}: {sys_TLBI, "IPAS2LE1IS", true}, + {4, 8, 3, 0}: {sys_TLBI, "ALLE2IS", false}, + {4, 8, 3, 1}: {sys_TLBI, "VAE2IS", true}, + {4, 8, 3, 4}: {sys_TLBI, "ALLE1IS", false}, + {4, 8, 3, 5}: {sys_TLBI, "VALE2IS", true}, + {4, 8, 3, 6}: {sys_TLBI, "VMALLS12E1IS", false}, + {4, 8, 4, 1}: {sys_TLBI, "IPAS2E1", true}, + {4, 8, 4, 5}: {sys_TLBI, "IPAS2LE1", true}, + {4, 8, 7, 0}: {sys_TLBI, "ALLE2", false}, + {4, 8, 7, 1}: {sys_TLBI, "VAE2", true}, + {4, 8, 7, 4}: {sys_TLBI, "ALLE1", false}, + {4, 8, 7, 5}: {sys_TLBI, "VALE2", true}, + {4, 8, 7, 6}: {sys_TLBI, "VMALLS12E1", false}, + {6, 8, 3, 0}: {sys_TLBI, "ALLE3IS", false}, + {6, 8, 3, 1}: {sys_TLBI, "VAE3IS", true}, + {6, 8, 3, 5}: {sys_TLBI, "VALE3IS", true}, + {6, 8, 7, 0}: {sys_TLBI, "ALLE3", false}, + {6, 8, 7, 1}: {sys_TLBI, "VAE3", true}, + {6, 8, 7, 5}: {sys_TLBI, "VALE3", true}, + {0, 8, 1, 0}: {sys_TLBI, "VMALLE1OS", false}, + {0, 8, 1, 1}: {sys_TLBI, "VAE1OS", true}, + {0, 8, 1, 2}: {sys_TLBI, "ASIDE1OS", true}, + {0, 8, 1, 3}: {sys_TLBI, "VAAE1OS", true}, + {0, 8, 1, 5}: {sys_TLBI, "VALE1OS", true}, + {0, 8, 1, 7}: {sys_TLBI, "VAALE1OS", true}, + {0, 8, 2, 1}: {sys_TLBI, "RVAE1IS", true}, + {0, 8, 2, 3}: {sys_TLBI, "RVAAE1IS", true}, + {0, 8, 2, 5}: {sys_TLBI, "RVALE1IS", true}, + {0, 8, 2, 7}: {sys_TLBI, "RVAALE1IS", true}, + {0, 8, 5, 1}: {sys_TLBI, "RVAE1OS", true}, + {0, 8, 5, 3}: {sys_TLBI, "RVAAE1OS", true}, + {0, 8, 5, 5}: {sys_TLBI, "RVALE1OS", true}, + {0, 8, 5, 7}: {sys_TLBI, "RVAALE1OS", true}, + {0, 8, 6, 1}: {sys_TLBI, "RVAE1", true}, + {0, 8, 6, 3}: {sys_TLBI, "RVAAE1", true}, + {0, 8, 6, 5}: {sys_TLBI, "RVALE1", true}, + {0, 8, 6, 7}: {sys_TLBI, "RVAALE1", true}, + {4, 8, 0, 2}: {sys_TLBI, "RIPAS2E1IS", true}, + {4, 8, 0, 6}: {sys_TLBI, "RIPAS2LE1IS", true}, + {4, 8, 1, 0}: {sys_TLBI, "ALLE2OS", false}, + {4, 8, 1, 1}: {sys_TLBI, "VAE2OS", true}, + {4, 8, 1, 4}: {sys_TLBI, "ALLE1OS", false}, + {4, 8, 1, 5}: {sys_TLBI, "VALE2OS", true}, + {4, 8, 1, 6}: {sys_TLBI, "VMALLS12E1OS", false}, + {4, 8, 2, 1}: {sys_TLBI, "RVAE2IS", true}, + {4, 8, 2, 5}: {sys_TLBI, "RVALE2IS", true}, + {4, 8, 4, 0}: {sys_TLBI, "IPAS2E1OS", true}, + {4, 8, 4, 2}: {sys_TLBI, "RIPAS2E1", true}, + {4, 8, 4, 3}: {sys_TLBI, "RIPAS2E1OS", true}, + {4, 8, 4, 4}: {sys_TLBI, "IPAS2LE1OS", true}, + {4, 8, 4, 6}: {sys_TLBI, "RIPAS2LE1", true}, + {4, 8, 4, 7}: {sys_TLBI, "RIPAS2LE1OS", true}, + {4, 8, 5, 1}: {sys_TLBI, "RVAE2OS", true}, + {4, 8, 5, 5}: {sys_TLBI, "RVALE2OS", true}, + {4, 8, 6, 1}: {sys_TLBI, "RVAE2", true}, + {4, 8, 6, 5}: {sys_TLBI, "RVALE2", true}, + {6, 8, 1, 0}: {sys_TLBI, "ALLE3OS", false}, + {6, 8, 1, 1}: {sys_TLBI, "VAE3OS", true}, + {6, 8, 1, 5}: {sys_TLBI, "VALE3OS", true}, + {6, 8, 2, 1}: {sys_TLBI, "RVAE3IS", true}, + {6, 8, 2, 5}: {sys_TLBI, "RVALE3IS", true}, + {6, 8, 5, 1}: {sys_TLBI, "RVAE3OS", true}, + {6, 8, 5, 5}: {sys_TLBI, "RVALE3OS", true}, + {6, 8, 6, 1}: {sys_TLBI, "RVAE3", true}, + {6, 8, 6, 5}: {sys_TLBI, "RVALE3", true}, + {0, 7, 6, 1}: {sys_DC, "IVAC", true}, + {0, 7, 6, 2}: {sys_DC, "ISW", true}, + {0, 7, 10, 2}: {sys_DC, "CSW", true}, + {0, 7, 14, 2}: {sys_DC, "CISW", true}, + {3, 7, 4, 1}: {sys_DC, "ZVA", true}, + {3, 7, 10, 1}: {sys_DC, "CVAC", true}, + {3, 7, 11, 1}: {sys_DC, "CVAU", true}, + {3, 7, 14, 1}: {sys_DC, "CIVAC", true}, + {0, 7, 6, 3}: {sys_DC, "IGVAC", true}, + {0, 7, 6, 4}: {sys_DC, "IGSW", true}, + {0, 7, 6, 5}: {sys_DC, "IGDVAC", true}, + {0, 7, 6, 6}: {sys_DC, "IGDSW", true}, + {0, 7, 10, 4}: {sys_DC, "CGSW", true}, + {0, 7, 10, 6}: {sys_DC, "CGDSW", true}, + {0, 7, 14, 4}: {sys_DC, "CIGSW", true}, + {0, 7, 14, 6}: {sys_DC, "CIGDSW", true}, + {3, 7, 4, 3}: {sys_DC, "GVA", true}, + {3, 7, 4, 4}: {sys_DC, "GZVA", true}, + {3, 7, 10, 3}: {sys_DC, "CGVAC", true}, + {3, 7, 10, 5}: {sys_DC, "CGDVAC", true}, + {3, 7, 12, 3}: {sys_DC, "CGVAP", true}, + {3, 7, 12, 5}: {sys_DC, "CGDVAP", true}, + {3, 7, 13, 3}: {sys_DC, "CGVADP", true}, + {3, 7, 13, 5}: {sys_DC, "CGDVADP", true}, + {3, 7, 14, 3}: {sys_DC, "CIGVAC", true}, + {3, 7, 14, 5}: {sys_DC, "CIGDVAC", true}, + {3, 7, 12, 1}: {sys_DC, "CVAP", true}, + {3, 7, 13, 1}: {sys_DC, "CVADP", true}, } From eba65f7f1ad807647a71227220f0854b4f0acc5d Mon Sep 17 00:00:00 2001 From: Cherry Mui Date: Wed, 5 Mar 2025 22:35:14 -0500 Subject: [PATCH 044/200] x86/xeddata: document how to obtain XED data files Change-Id: I6ec052a15caaeeb0c896af641db07c390321e249 Reviewed-on: https://go-review.googlesource.com/c/arch/+/655315 Reviewed-by: Austin Clements LUCI-TryBot-Result: Go LUCI --- x86/xeddata/doc.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/x86/xeddata/doc.go b/x86/xeddata/doc.go index 23d51dc5..4439552b 100644 --- a/x86/xeddata/doc.go +++ b/x86/xeddata/doc.go @@ -23,6 +23,9 @@ // where appropriate, x86csv names are provided // as an alternative. // +// Suppose $XED is the path of a checkout of the +// https://github.com/intelxed/xed repo. +// // "$XED/foo/bar.txt" notation is used to specify a path to "foo/bar.txt" // file under local XED source repository folder. // @@ -32,14 +35,20 @@ // 3. Operate on XED objects. // // See example_test.go for complete examples. +// See testdata/xed_objects.txt for examples of "XED objects". +// +// # Obtain XED datafiles // // It is required to build Intel XED before attempting to use -// its datafiles, as this package expects "all" versions that +// its datafiles, as this package expects the "all" versions that // are a concatenated final versions of datafiles. +// To build it, follow the instruction on https://github.com/intelxed/xed. +// +// Once built, the "all" versions of data files are in "$XED/obj/dgen/". // If "$XED/obj/dgen/" does not contain relevant files, // then either this documentation is stale or your XED is not built. -// -// To see examples of "XED objects" see "testdata/xed_objects.txt". +// Pass $XED/obj/dgen (or a copy of it) as the "xedPath" to [NewDatabase] +// or to x86avxgen -xedPath. // // Intel XED https://github.com/intelxed/xed provides all documentation // that can be required to understand datafiles. From f6424c2b3339190600843af381c630228a803dd4 Mon Sep 17 00:00:00 2001 From: Cherry Mui Date: Thu, 6 Mar 2025 09:37:06 -0500 Subject: [PATCH 045/200] x86/xeddata: trim leading space in parsing The new version of XED data file contains some comment lines with a leading space, e.g. " # EOSZ=2 not64". The current parser doesn't recognize it. Trim the leading space. Change-Id: Ia5aa244aece7a1cee2d7842d69c60c9d5335dcce Reviewed-on: https://go-review.googlesource.com/c/arch/+/655435 Reviewed-by: Austin Clements LUCI-TryBot-Result: Go LUCI --- x86/xeddata/reader.go | 1 + x86/xeddata/testdata/xed_objects.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/x86/xeddata/reader.go b/x86/xeddata/reader.go index fc8aa94b..cd235100 100644 --- a/x86/xeddata/reader.go +++ b/x86/xeddata/reader.go @@ -113,6 +113,7 @@ func (r *Reader) parseLines(lines []string) (*Object, error) { ) for _, l := range lines { + l = strings.TrimLeft(l, " ") if l[0] == '#' { // Skip comment lines. continue } diff --git a/x86/xeddata/testdata/xed_objects.txt b/x86/xeddata/testdata/xed_objects.txt index 9d0c52e5..c252acfd 100644 --- a/x86/xeddata/testdata/xed_objects.txt +++ b/x86/xeddata/testdata/xed_objects.txt @@ -13,6 +13,7 @@ # { # ICLASS : ADD # } + # comment with leading space ==== [] From 2c5af0f6833f7fa307cb6e9e2f3e5ad8ebd810ec Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sun, 9 Mar 2025 10:58:35 -0400 Subject: [PATCH 046/200] x86/xeddata: strings.Split -> strings.Cut Change-Id: If015414f94680a753deef3ce6ff400b06c207f45 Reviewed-on: https://go-review.googlesource.com/c/arch/+/656236 LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui Auto-Submit: Austin Clements --- x86/xeddata/operand.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/x86/xeddata/operand.go b/x86/xeddata/operand.go index 1632828d..90614165 100644 --- a/x86/xeddata/operand.go +++ b/x86/xeddata/operand.go @@ -141,13 +141,15 @@ func (op *Operand) NonterminalName() bool { // NameLHS returns left hand side part of the non-terminal name. // Example: NameLHS("REG0=GPRv()") => "REG0". func (op *Operand) NameLHS() string { - return strings.Split(op.Name, "=")[0] + lhs, _, _ := strings.Cut(op.Name, "=") + return lhs } // NameRHS returns right hand side part of the non-terminal name. // Example: NameLHS("REG0=GPRv()") => "GPRv()". func (op *Operand) NameRHS() string { - return strings.Split(op.Name, "=")[1] + _, rhs, _ := strings.Cut(op.Name, "=") + return rhs } // IsVisible returns true for operands that are usually From 813340cc1fdc52a372ae3250c2a8df379435f73b Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 7 Mar 2025 21:16:18 -0500 Subject: [PATCH 047/200] x86/xeddata: fix some operand width issues The XED data has various ways of specifying defaults for operand widths that we hadn't implemented. Specifically, - Some operands don't specify a width code at all and it's implied from the operand type by the extra-widths table. We weren't parsing this table at all, so this adds a parser and uses this table to populate the width if it's missing. - Many width codes have a default xtype. We were already parsing this from the widths table, but not populating it in operands. Change-Id: I5dc9e33e072fe076624ee8695d6627196a30c7f5 Reviewed-on: https://go-review.googlesource.com/c/arch/+/656237 Reviewed-by: Cherry Mui Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI --- .../testdata/xedpath/all-extra-widths.txt | 3 + x86/xeddata/database.go | 48 ++++++++- x86/xeddata/operand.go | 42 +++++++- x86/xeddata/readlines.go | 101 ++++++++++++++++++ .../testdata/xedpath/all-extra-widths.txt | 3 + x86/xeddata/xeddata_test.go | 66 +++++++++--- 6 files changed, 246 insertions(+), 17 deletions(-) create mode 100644 x86/x86avxgen/testdata/xedpath/all-extra-widths.txt create mode 100644 x86/xeddata/readlines.go create mode 100644 x86/xeddata/testdata/xedpath/all-extra-widths.txt diff --git a/x86/x86avxgen/testdata/xedpath/all-extra-widths.txt b/x86/x86avxgen/testdata/xedpath/all-extra-widths.txt new file mode 100644 index 00000000..30a004e6 --- /dev/null +++ b/x86/x86avxgen/testdata/xedpath/all-extra-widths.txt @@ -0,0 +1,3 @@ +# Copyright 2025 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. diff --git a/x86/xeddata/database.go b/x86/xeddata/database.go index 94d21de0..a6ec760d 100644 --- a/x86/xeddata/database.go +++ b/x86/xeddata/database.go @@ -74,9 +74,19 @@ type Database struct { widths map[string]*width // all-widths.txt states map[string]string // all-state.txt xtypes map[string]*xtype // all-element-types.txt + + // extraWidth is a "all-extra-widths.txt" record. + // + // It provides a default mapping from an operand type to a width code. + // + // The key is one of three things: + // - "XED_REG_" for a register (e.g., "XED_REG_EAX") + // - "()" for a non-terminal (e.g., "GPR32_R()"") + // - "" for an immediate const (e.g., "AGEN") + extraWidths map[string]string // all-extra-widths.txt } -// width is a "all-width.txt" record. +// width is a "all-widths.txt" record. type width struct { // Default xtype name (examples: int, i8, f32). xtype string @@ -140,6 +150,14 @@ func NewDatabase(xedPath string) (*Database, error) { } } + extraWidths, err := os.Open(filepath.Join(xedPath, "all-extra-widths.txt")) + if err == nil { + db.extraWidths, err = parseExtraWidths(extraWidths) + if err != nil { + return &db, err + } + } + xtypes, err := os.Open(filepath.Join(xedPath, "all-element-types.txt")) if err == nil { err = db.LoadXtypes(xtypes) @@ -181,6 +199,10 @@ func (db *Database) LoadXtypes(r io.Reader) error { // WidthSize translates width string to size string using desired // SizeMode m. For some widths output is the same for any valid value of m. +// +// The size string may be a decimal number of bytes, like "8". It may of the +// form "%dbits" to indicate a bit width. Or in some cases it's "0" for +// "unusual" registers. func (db *Database) WidthSize(width string, m OperandSizeMode) string { info := db.widths[width] if info == nil { @@ -235,6 +257,30 @@ func parseWidths(r io.Reader) (map[string]*width, error) { return widths, nil } +func parseExtraWidths(r io.Reader) (map[string]string, error) { + extraWidths := make(map[string]string) + for line, err := range readLines(r) { + if err != nil { + return nil, err + } + f := bytes.Fields(line.data) + if len(f) != 3 { + return nil, fmt.Errorf("want 3 fields, got %d", len(f)) + } + switch string(f[0]) { + default: + return nil, fmt.Errorf("unknown extra width type %s", f[0]) + case "imm_const": + extraWidths[string(f[1])] = string(f[2]) + case "reg": + extraWidths["XED_REG_"+string(f[1])] = string(f[2]) + case "nt": + extraWidths[string(f[1])+"()"] = string(f[2]) + } + } + return extraWidths, nil +} + func parseStates(r io.Reader) (map[string]string, error) { data, err := ioutil.ReadAll(r) if err != nil { diff --git a/x86/xeddata/operand.go b/x86/xeddata/operand.go index 90614165..8de99d67 100644 --- a/x86/xeddata/operand.go +++ b/x86/xeddata/operand.go @@ -51,6 +51,11 @@ type Operand struct { // Width descriptor. It can express simple width like "w" (word, 16bit) // or meta-width like "v", which corresponds to {16, 32, 64} bits. // + // The first column in all-widths.txt lists all possible widths. + // + // To deterine the size given a width string and a mode, use + // [Database.WidthSize]. + // // Possible values: "", "q", "ds", "dq", ... // Optional. Width string @@ -91,11 +96,14 @@ var xedVisibilities = map[string]OperandVisibility{ // See "$XED/pysrc/opnds.py" to learn about fields format // and valid combinations. // -// Requires database with xtypes and widths info. +// Requires database with xtypes, widths, and extraWidths info. func NewOperand(db *Database, s string) (*Operand, error) { if db.widths == nil { return nil, errors.New("Database.widths is nil") } + if db.extraWidths == nil { + return nil, errors.New("Database.extraWidths is nil") + } fields := strings.Split(s, ":") switch len(fields) { @@ -111,9 +119,10 @@ func NewOperand(db *Database, s string) (*Operand, error) { op.Action = fields[1] // Optional fields. + var w string for _, f := range fields[2:] { - if db.widths[f] != nil && op.Width == "" { - op.Width = f + if db.widths[f] != nil && w == "" { + w = f } else if vis, ok := xedVisibilities[f]; ok { op.Visibility = vis } else if xtype := db.xtypes[f]; xtype != nil { @@ -126,6 +135,33 @@ func NewOperand(db *Database, s string) (*Operand, error) { } } + // Get default width from operand type. + if w == "" { + if op.NonterminalName() { + if strings.HasPrefix(op.NameLHS(), "REG") { + rhs := op.NameRHS() + if strings.HasPrefix(rhs, "XED_REG_") { + // Register + w = db.extraWidths[rhs] + } else if strings.HasSuffix(rhs, "()") { + // Non-terminal + w = db.extraWidths[rhs] + } + } + } else { + // Try as an immediate. + w = db.extraWidths[op.Name] + } + } + + if w != "" { + op.Width = w + // If operand did not specify an xtype, get the default from the width + if op.Xtype == "" && db.widths[w] != nil { + op.Xtype = db.widths[w].xtype + } + } + return &op, nil } diff --git a/x86/xeddata/readlines.go b/x86/xeddata/readlines.go new file mode 100644 index 00000000..610bb834 --- /dev/null +++ b/x86/xeddata/readlines.go @@ -0,0 +1,101 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xeddata + +import ( + "bufio" + "bytes" + "fmt" + "io" + "iter" + "path/filepath" + "strings" +) + +type lineInfo struct { + Pos + data []byte +} + +type Pos struct { + Path string + Line int +} + +func (p Pos) String() string { + if p.Line == 0 { + if p.Path == "" { + return "?:?" + } + return p.Path + } else if p.Path == "" { + return fmt.Sprintf("?:%d", p.Line) + } + return fmt.Sprintf("%s:%d", p.Path, p.Line) +} + +func (p Pos) ShortString() string { + p2 := p + p2.Path = filepath.Base(p.Path) + return p2.String() +} + +// readLines yields lines from r, with continuation lines folded, comments and +// trailing whitespace removed, and blank lines omitted. +// +// The returned lineInfo.data buffer may be reused between yields. +// +// If r has a Name() string method, this is used to populate lineInfo.Path. +// +// TODO: Rewrite Reader to use this. +func readLines(r io.Reader) iter.Seq2[lineInfo, error] { + type Named interface { + Name() string // Matches os.File + } + path := "" + if f, ok := r.(Named); ok { + path = f.Name() + } + + s := bufio.NewScanner(r) + return func(yield func(lineInfo, error) bool) { + var info lineInfo + info.Path = path + var lineBuf []byte + for s.Scan() { + info.Line++ + + lineBuf = append(lineBuf, s.Bytes()...) + if len(lineBuf) > 0 && lineBuf[len(lineBuf)-1] == '\\' { + // Continuation line. Drop the \ and keep reading. + lineBuf = lineBuf[:len(lineBuf)-1] + continue + } + // Remove comments and trailing whitespace + if i := strings.IndexByte(string(lineBuf), '#'); i >= 0 { + lineBuf = lineBuf[:i] + } + lineBuf = bytes.TrimRight(lineBuf, " \t") + // Don't yield blank lines + if len(lineBuf) == 0 { + continue + } + + info.data = lineBuf + if !yield(info, nil) { + return + } + lineBuf = lineBuf[:0] + } + + if err := s.Err(); err != nil { + yield(lineInfo{}, err) + return + } + if len(lineBuf) > 0 { + yield(lineInfo{}, fmt.Errorf("continuation line at EOF")) + } + } +} diff --git a/x86/xeddata/testdata/xedpath/all-extra-widths.txt b/x86/xeddata/testdata/xedpath/all-extra-widths.txt new file mode 100644 index 00000000..30a004e6 --- /dev/null +++ b/x86/xeddata/testdata/xedpath/all-extra-widths.txt @@ -0,0 +1,3 @@ +# Copyright 2025 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. diff --git a/x86/xeddata/xeddata_test.go b/x86/xeddata/xeddata_test.go index ad0de1ec..fc98d86c 100644 --- a/x86/xeddata/xeddata_test.go +++ b/x86/xeddata/xeddata_test.go @@ -69,6 +69,12 @@ var ( "mem80real": {xtype: "f80", sizes: [3]string{"10", "10", "10"}}, "mfpxenv": {xtype: "struct", sizes: [3]string{"512", "512", "512"}}, } + + extraWidthsMap = map[string]string{ + "AGEN": "pseudo", + "XED_REG_EAX": "d", + "GPR32_R()": "d", + } ) // newStatesSource returns a reader that mocks "all-state.txt" file. @@ -118,6 +124,22 @@ func newWidthsSource() io.Reader { return &buf } +func newExtraWidthsSource() io.Reader { + var buf bytes.Buffer + for name, width := range extraWidthsMap { + buf.WriteString("# Line comment\n") + buf.WriteString("#\n\n\n") + if reg, ok := strings.CutPrefix(name, "XED_REG_"); ok { + fmt.Fprintf(&buf, "reg %s %s\n", reg, width) + } else if nt, ok := strings.CutSuffix(name, "()"); ok { + fmt.Fprintf(&buf, "nt %s %s\n", nt, width) + } else { + fmt.Fprintf(&buf, "imm_const %s %s\n", name, width) + } + } + return &buf +} + // newXtypesSource returns a reader that mocks "all-element-types.txt" file. // Input content is generated based on xtypesMap. func newXtypesSource() io.Reader { @@ -150,6 +172,10 @@ func newTestDatabase(t *testing.T) *Database { if err != nil { t.Fatal(err) } + db.extraWidths, err = parseExtraWidths(newExtraWidthsSource()) + if err != nil { + t.Fatal(err) + } err = db.LoadXtypes(newXtypesSource()) if err != nil { t.Fatal(err) @@ -258,7 +284,7 @@ func TestNewOperand(t *testing.T) { }, { "MEM0:rw:q", - Operand{Name: "MEM0", Action: "rw", Width: "q"}, + Operand{Name: "MEM0", Action: "rw", Width: "q", Xtype: "i64"}, }, { "REG0=XMM_R():rcw:ps:f32", @@ -266,13 +292,27 @@ func TestNewOperand(t *testing.T) { }, { "IMM0:r:z", - Operand{Name: "IMM0", Action: "r", Width: "z"}, + Operand{Name: "IMM0", Action: "r", Width: "z", Xtype: "int"}, }, { "IMM1:cw:b:i8", Operand{Name: "IMM1", Action: "cw", Width: "b", Xtype: "i8"}, }, + // Implied width code + { + "AGEN:r", + Operand{Name: "AGEN", Action: "r", Width: "pseudo"}, + }, + { + "REG0=XED_REG_EAX:r", + Operand{Name: "REG0=XED_REG_EAX", Action: "r", Width: "d", Xtype: "i32"}, + }, + { + "REG0=GPR32_R():r", + Operand{Name: "REG0=GPR32_R()", Action: "r", Width: "d", Xtype: "i32"}, + }, + // Optional fields and visibility. { "REG2:r:EXPL", @@ -280,19 +320,19 @@ func TestNewOperand(t *testing.T) { }, { "MEM1:w:d:IMPL", - Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisImplicit}, + Operand{Name: "MEM1", Action: "w", Width: "d", Xtype: "i32", Visibility: VisImplicit}, }, { "MEM1:w:IMPL:d", - Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisImplicit}, + Operand{Name: "MEM1", Action: "w", Width: "d", Xtype: "i32", Visibility: VisImplicit}, }, { - "MEM1:w:d:SUPP:i32", - Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "i32"}, + "MEM1:w:d:SUPP:f32", + Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "f32"}, }, { - "MEM1:w:SUPP:d:i32", - Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "i32"}, + "MEM1:w:SUPP:d:f32", + Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "f32"}, }, // Ambiguity: xtypes that look like widths. @@ -304,7 +344,7 @@ func TestNewOperand(t *testing.T) { // TXT=X field. { "REG1=MASK1():r:mskw:TXT=ZEROSTR", - Operand{Name: "REG1=MASK1()", Action: "r", Width: "mskw", + Operand{Name: "REG1=MASK1()", Action: "r", Width: "mskw", Xtype: "i1", Attributes: map[string]bool{"TXT=ZEROSTR": true}}, }, { @@ -314,26 +354,26 @@ func TestNewOperand(t *testing.T) { }, { "REG0=ZMM_R3():w:zf32:TXT=SAESTR", - Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf32", + Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf32", Xtype: "f32", Attributes: map[string]bool{"TXT=SAESTR": true}}, }, { "REG0=ZMM_R3():w:zf64:TXT=ROUNDC", - Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf64", + Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf64", Xtype: "f64", Attributes: map[string]bool{"TXT=ROUNDC": true}}, }, // Multi-source. { "REG2=ZMM_N3():r:zf32:MULTISOURCE4", - Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32", + Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32", Xtype: "f32", Attributes: map[string]bool{"MULTISOURCE4": true}}, }, // Multi-source + EVEX.b context. { "REG2=ZMM_N3():r:zf32:MULTISOURCE4:TXT=SAESTR", - Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32", + Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32", Xtype: "f32", Attributes: map[string]bool{"MULTISOURCE4": true, "TXT=SAESTR": true}}, }, } From 913b04b6e4a46a6b638203803966d86204608c75 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sun, 9 Mar 2025 12:11:38 -0400 Subject: [PATCH 048/200] x86/xeddata: rewrite Reader to use new readLines iterator The parsing in readLines is simpler and more robust. It also includes line information (which would have been pretty annoying to add to the old parser), so this is a step toward adding position information to Inst. One downside of this is that Reader.Read has to use a pull iterator. However, as far as I can tell, the only callers of this were Reader.ReadAll and WalkInsts, both of which we rewrite to use readObjects directly. Change-Id: I4ca58c877fbfd5295209aea31999c8abd6876f17 Reviewed-on: https://go-review.googlesource.com/c/arch/+/656238 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- x86/xeddata/reader.go | 127 ++++++++++++++++++--------------------- x86/xeddata/readlines.go | 2 - x86/xeddata/xeddata.go | 11 +--- 3 files changed, 61 insertions(+), 79 deletions(-) diff --git a/x86/xeddata/reader.go b/x86/xeddata/reader.go index cd235100..c1a0aa7e 100644 --- a/x86/xeddata/reader.go +++ b/x86/xeddata/reader.go @@ -5,47 +5,27 @@ package xeddata import ( - "bufio" "errors" "fmt" "io" + "iter" "regexp" "strings" ) // Reader reads enc/dec-instruction objects from XED datafile. type Reader struct { - scanner *bufio.Scanner + r io.Reader - lines []string // Re-used between Read calls - - // True if last line ends with newline escape (backslash). - joinLines bool + // Initialized on first call to Read + next func() (*Object, error, bool) + stop func() + err error } // NewReader returns a new Reader that reads from r. func NewReader(r io.Reader) *Reader { - return newReader(bufio.NewScanner(r)) -} - -func newReader(scanner *bufio.Scanner) *Reader { - r := &Reader{ - lines: make([]string, 0, 64), - scanner: scanner, - } - scanner.Split(r.split) - return r -} - -// split implements bufio.SplitFunc for Reader. -func (r *Reader) split(data []byte, atEOF bool) (int, []byte, error) { - // Wrapping bufio.ScanLines to handle \-style newline escapes. - // joinLines flag affects Reader.scanLine behavior. - advance, tok, err := bufio.ScanLines(data, atEOF) - if err == nil && len(tok) >= 1 { - r.joinLines = tok[len(tok)-1] == '\\' - } - return advance, tok, err + return &Reader{r: r} } // Read reads single XED instruction object from @@ -54,37 +34,65 @@ func (r *Reader) split(data []byte, atEOF bool) (int, []byte, error) { // If there is no data left to be read, // returned error is io.EOF. func (r *Reader) Read() (*Object, error) { - for line := r.scanLine(); line != ""; line = r.scanLine() { - if line[0] != '{' { - continue - } - lines := r.lines[:0] // Object lines - for line := r.scanLine(); line != ""; line = r.scanLine() { - if line[0] == '}' { - return r.parseLines(lines) - } - lines = append(lines, line) - } - return nil, errors.New("no matching '}' found") + if r.err != nil { + return nil, r.err } - - return nil, io.EOF + if r.next == nil { + r.next, r.stop = iter.Pull2(readObjects(r.r)) + } + obj, err, end := r.next() + if end { + err = io.EOF + } + if err != nil { + r.stop() + r.err, r.next, r.stop = err, nil, nil + return nil, err + } + return obj, nil } // ReadAll reads all the remaining objects from r. // A successful call returns err == nil, not err == io.EOF, // just like csv.Reader.ReadAll(). func (r *Reader) ReadAll() ([]*Object, error) { - objects := []*Object{} - for { - o, err := r.Read() - if err == io.EOF { - return objects, nil - } + var objects []*Object + for obj, err := range readObjects(r.r) { if err != nil { return objects, err } - objects = append(objects, o) + objects = append(objects, obj) + } + return objects, nil +} + +// readObjects yields all of the objects from r. +func readObjects(r io.Reader) iter.Seq2[*Object, error] { + iterLines := readLines(r) + return func(yield func(*Object, error) bool) { + var block []string // Reused on each iteration + inBlock := false + for line, err := range iterLines { + if err != nil { + yield(nil, err) + return + } + if !inBlock { + inBlock = line.data[0] == '{' + } else if line.data[0] == '}' { + inBlock = false + obj, err := parseObjectLines(block) + if !yield(obj, err) { + return + } + block = block[:0] + } else { + block = append(block, string(line.data)) + } + } + if inBlock { + yield(nil, errors.New("no matching '}' found")) + } } } @@ -96,11 +104,10 @@ func (r *Reader) ReadAll() ([]*Object, error) { // unquoted field name "[A-Z_]+" (captured) // field value delimiter ":" // field value string (captured) -// optional trailing comment that is ignored "[^#]*" -var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s*:\s*([^#]*)`) +var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s*:\s*(.*)`) // parseLines turns collected object lines into Object. -func (r *Reader) parseLines(lines []string) (*Object, error) { +func parseObjectLines(lines []string) (*Object, error) { o := &Object{} // Repeatable tokens. @@ -192,21 +199,3 @@ func (r *Reader) parseLines(lines []string) (*Object, error) { return o, nil } - -// scanLine tries to fetch non-empty line from scanner. -// -// Returns empty line when scanner.Scan() returns false -// before non-empty line is found. -func (r *Reader) scanLine() string { - for r.scanner.Scan() { - line := r.scanner.Text() - if line == "" { - continue - } - if r.joinLines { - return line[:len(line)-len("\\")] + r.scanLine() - } - return line - } - return "" -} diff --git a/x86/xeddata/readlines.go b/x86/xeddata/readlines.go index 610bb834..d9638973 100644 --- a/x86/xeddata/readlines.go +++ b/x86/xeddata/readlines.go @@ -48,8 +48,6 @@ func (p Pos) ShortString() string { // The returned lineInfo.data buffer may be reused between yields. // // If r has a Name() string method, this is used to populate lineInfo.Path. -// -// TODO: Rewrite Reader to use this. func readLines(r io.Reader) iter.Seq2[lineInfo, error] { type Named interface { Name() string // Matches os.File diff --git a/x86/xeddata/xeddata.go b/x86/xeddata/xeddata.go index 7cc7a087..4d2b2012 100644 --- a/x86/xeddata/xeddata.go +++ b/x86/xeddata/xeddata.go @@ -5,7 +5,6 @@ package xeddata import ( - "io" "os" "path/filepath" ) @@ -16,17 +15,13 @@ func WalkInsts(xedPath string, visit func(*Inst)) error { if err != nil { return err } - r := NewReader(f) - for { - o, err := r.Read() - if err == io.EOF { - return nil - } + for obj, err := range readObjects(f) { if err != nil { return err } - for _, inst := range o.Insts { + for _, inst := range obj.Insts { visit(inst) } } + return nil } From adeecab5f209ca3732e844b3b59716281155fbf8 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sun, 9 Mar 2025 13:38:47 -0400 Subject: [PATCH 049/200] x86/xeddata: add Pos to Object and Inst Change-Id: Ib9ad5e2c4bbd005b7fad15b7d0dc8943f2747689 Reviewed-on: https://go-review.googlesource.com/c/arch/+/656239 Auto-Submit: Austin Clements Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- x86/xeddata/object.go | 6 ++++++ x86/xeddata/reader.go | 16 ++++++++++++---- x86/xeddata/xeddata_test.go | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/x86/xeddata/object.go b/x86/xeddata/object.go index 4a739739..662aa694 100644 --- a/x86/xeddata/object.go +++ b/x86/xeddata/object.go @@ -21,6 +21,9 @@ import ( // Object contains multiple Inst elements that represent concrete // instruction with encoding pattern and operands description. type Object struct { + // Pos is the file position of the start of this object. + Pos Pos + // Iclass is instruction class name (opcode). // Iclass alone is not enough to uniquely identify machine instructions. // Example: "PSRLW". @@ -128,6 +131,9 @@ type Inst struct { // Inst objects. *Object + // Pos is the file position of this Inst's PATTERN. + Pos Pos + // Index is the position inside XED object. // Object.Insts[Index] returns this inst. Index int diff --git a/x86/xeddata/reader.go b/x86/xeddata/reader.go index c1a0aa7e..02fd9c81 100644 --- a/x86/xeddata/reader.go +++ b/x86/xeddata/reader.go @@ -70,7 +70,9 @@ func (r *Reader) ReadAll() ([]*Object, error) { func readObjects(r io.Reader) iter.Seq2[*Object, error] { iterLines := readLines(r) return func(yield func(*Object, error) bool) { + var blockPos Pos var block []string // Reused on each iteration + var linePos []Pos inBlock := false for line, err := range iterLines { if err != nil { @@ -79,15 +81,17 @@ func readObjects(r io.Reader) iter.Seq2[*Object, error] { } if !inBlock { inBlock = line.data[0] == '{' + blockPos = line.Pos } else if line.data[0] == '}' { inBlock = false - obj, err := parseObjectLines(block) + obj, err := parseObjectLines(blockPos, block, linePos) if !yield(obj, err) { return } - block = block[:0] + block, linePos = block[:0], linePos[:0] } else { block = append(block, string(line.data)) + linePos = append(linePos, line.Pos) } } if inBlock { @@ -107,8 +111,9 @@ func readObjects(r io.Reader) iter.Seq2[*Object, error] { var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s*:\s*(.*)`) // parseLines turns collected object lines into Object. -func parseObjectLines(lines []string) (*Object, error) { +func parseObjectLines(blockPos Pos, lines []string, linePos []Pos) (*Object, error) { o := &Object{} + o.Pos = blockPos // Repeatable tokens. // We can not assign them eagerly, because these fields @@ -117,9 +122,10 @@ func parseObjectLines(lines []string) (*Object, error) { operands []string iforms []string patterns []string + poses []Pos ) - for _, l := range lines { + for i, l := range lines { l = strings.TrimLeft(l, " ") if l[0] == '#' { // Skip comment lines. continue @@ -167,6 +173,7 @@ func parseObjectLines(lines []string) (*Object, error) { operands = append(operands, val) case "PATTERN": patterns = append(patterns, val) + poses = append(poses, linePos[i]) case "IFORM": iforms = append(iforms, val) @@ -188,6 +195,7 @@ func parseObjectLines(lines []string) (*Object, error) { Object: o, Index: i, Pattern: patterns[i], + Pos: poses[i], Operands: operands[i], } // There can be less IFORMs than insts. diff --git a/x86/xeddata/xeddata_test.go b/x86/xeddata/xeddata_test.go index fc98d86c..8b64be4a 100644 --- a/x86/xeddata/xeddata_test.go +++ b/x86/xeddata/xeddata_test.go @@ -470,6 +470,42 @@ func TestReader(t *testing.T) { } } +func TestReaderPos(t *testing.T) { + const data = `# Comment +{ +ICLASS: iclass1 +DISASM: disasm1 + +PATTERN: pat1 pat1 +OPERANDS: ops1 ops1 +}` + r := NewReader(namedReader{strings.NewReader(data), "test"}) + objects, err := r.ReadAll() + if err != nil { + t.Fatal(err) + } + + if want := "test:2"; objects[0].Pos.String() != want { + t.Errorf("object Pos: got %q, want %q", objects[0].Pos, want) + } + if want := "test:6"; objects[0].Insts[0].Pos.String() != want { + t.Errorf("inst Pos: got %q, want %q", objects[0].Insts[0].Pos, want) + } +} + +type namedReader struct { + r io.Reader + name string +} + +func (n namedReader) Read(p []byte) (int, error) { + return n.r.Read(p) +} + +func (n namedReader) Name() string { + return n.name +} + func TestMacroExpand(t *testing.T) { tests := [...]struct { input string From f2e9665ba3565e2dd3b57b260d5f46b952e6b8c3 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 20 Mar 2025 11:14:42 -0700 Subject: [PATCH 050/200] x86/x86asm: don't symbolize immediate constants, mostly Fixes golang/go#72942 Change-Id: Ib3bfef301fa8502f2c2d692f91d38acd2df20275 Reviewed-on: https://go-review.googlesource.com/c/arch/+/659675 Reviewed-by: Cherry Mui Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- x86/x86asm/gnu.go | 12 +++++++----- x86/x86asm/intel.go | 12 +++++++----- x86/x86asm/plan9x.go | 21 ++++++++++++++++----- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/x86/x86asm/gnu.go b/x86/x86asm/gnu.go index 8eba1fd0..864f32c1 100644 --- a/x86/x86asm/gnu.go +++ b/x86/x86asm/gnu.go @@ -667,12 +667,14 @@ func gnuArg(inst *Inst, pc uint64, symname SymLookup, x Arg, usedPrefixes *bool) } } case Imm: - if s, base := symname(uint64(x)); s != "" { - suffix := "" - if uint64(x) != base { - suffix = fmt.Sprintf("%+d", uint64(x)-base) + if (inst.Op == MOV || inst.Op == PUSH) && inst.DataSize == 32 { // See comment in plan9x.go. + if s, base := symname(uint64(x)); s != "" { + suffix := "" + if uint64(x) != base { + suffix = fmt.Sprintf("%+d", uint64(x)-base) + } + return fmt.Sprintf("$%s%s", s, suffix) } - return fmt.Sprintf("$%s%s", s, suffix) } if inst.Mode == 32 { return fmt.Sprintf("$%#x", uint32(x)) diff --git a/x86/x86asm/intel.go b/x86/x86asm/intel.go index 472eabda..a0622998 100644 --- a/x86/x86asm/intel.go +++ b/x86/x86asm/intel.go @@ -341,12 +341,14 @@ func IntelSyntax(inst Inst, pc uint64, symname SymLookup) string { func intelArg(inst *Inst, pc uint64, symname SymLookup, arg Arg) string { switch a := arg.(type) { case Imm: - if s, base := symname(uint64(a)); s != "" { - suffix := "" - if uint64(a) != base { - suffix = fmt.Sprintf("%+d", uint64(a)-base) + if (inst.Op == MOV || inst.Op == PUSH) && inst.DataSize == 32 { // See comment in plan9x.go. + if s, base := symname(uint64(a)); s != "" { + suffix := "" + if uint64(a) != base { + suffix = fmt.Sprintf("%+d", uint64(a)-base) + } + return fmt.Sprintf("$%s%s", s, suffix) } - return fmt.Sprintf("$%s%s", s, suffix) } if inst.Mode == 32 { return fmt.Sprintf("%#x", uint32(a)) diff --git a/x86/x86asm/plan9x.go b/x86/x86asm/plan9x.go index 9e866d87..e82349ce 100644 --- a/x86/x86asm/plan9x.go +++ b/x86/x86asm/plan9x.go @@ -116,12 +116,23 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg return fmt.Sprintf("%#x", addr) case Imm: - if s, base := symname(uint64(a)); s != "" { - suffix := "" - if uint64(a) != base { - suffix = fmt.Sprintf("%+d", uint64(a)-base) + if (inst.Op == MOV || inst.Op == PUSH) && inst.DataSize == 32 { + // Only try to convert an immediate to a symbol in certain + // special circumstances. See issue 72942. + // + // On 64-bit, symbol addresses always hit the Mem case below. + // Particularly, we use LEAQ to materialize the address of + // a global or function. + // + // On 32-bit, we sometimes use MOVL. Still try to symbolize + // those immediates. + if s, base := symname(uint64(a)); s != "" { + suffix := "" + if uint64(a) != base { + suffix = fmt.Sprintf("%+d", uint64(a)-base) + } + return fmt.Sprintf("$%s%s(SB)", s, suffix) } - return fmt.Sprintf("$%s%s(SB)", s, suffix) } if inst.Mode == 32 { return fmt.Sprintf("$%#x", uint32(a)) From ad2912dbb8f1d4d5715569c985dee84d51562f06 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 17 Apr 2025 22:33:09 -0400 Subject: [PATCH 051/200] internal/unify: new package for structured value unification The plan is to use this package as part of generating SIMD mappings. Change-Id: Ie67bf7fe87222b8dffdbb12a99729c0fe0f7bc38 Reviewed-on: https://go-review.googlesource.com/c/arch/+/666515 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- go.mod | 2 + go.sum | 4 + internal/unify/closure.go | 148 +++++++++ internal/unify/domain.go | 311 ++++++++++++++++++ internal/unify/dot.go | 183 +++++++++++ internal/unify/env.go | 500 +++++++++++++++++++++++++++++ internal/unify/html.go | 190 +++++++++++ internal/unify/pos.go | 33 ++ internal/unify/testdata/unify.yaml | 174 ++++++++++ internal/unify/testdata/vars.yaml | 175 ++++++++++ internal/unify/trace.go | 168 ++++++++++ internal/unify/unify.go | 322 +++++++++++++++++++ internal/unify/unify_test.go | 154 +++++++++ internal/unify/value.go | 129 ++++++++ internal/unify/value_test.go | 36 +++ internal/unify/yaml.go | 475 +++++++++++++++++++++++++++ internal/unify/yaml_test.go | 91 ++++++ 17 files changed, 3095 insertions(+) create mode 100644 internal/unify/closure.go create mode 100644 internal/unify/domain.go create mode 100644 internal/unify/dot.go create mode 100644 internal/unify/env.go create mode 100644 internal/unify/html.go create mode 100644 internal/unify/pos.go create mode 100644 internal/unify/testdata/unify.yaml create mode 100644 internal/unify/testdata/vars.yaml create mode 100644 internal/unify/trace.go create mode 100644 internal/unify/unify.go create mode 100644 internal/unify/unify_test.go create mode 100644 internal/unify/value.go create mode 100644 internal/unify/value_test.go create mode 100644 internal/unify/yaml.go create mode 100644 internal/unify/yaml_test.go diff --git a/go.mod b/go.mod index b72ba1a5..72642f75 100644 --- a/go.mod +++ b/go.mod @@ -3,3 +3,5 @@ module golang.org/x/arch go 1.23.0 require rsc.io/pdf v0.1.1 + +require gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum index e854d25c..cf7dae80 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,6 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= rsc.io/pdf v0.1.1 h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/internal/unify/closure.go b/internal/unify/closure.go new file mode 100644 index 00000000..8a1636de --- /dev/null +++ b/internal/unify/closure.go @@ -0,0 +1,148 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "fmt" + "iter" + "maps" + "slices" +) + +type Closure struct { + val *Value + env nonDetEnv +} + +func NewSum(vs ...*Value) Closure { + id := &ident{name: "sum"} + return Closure{NewValue(Var{id}), topEnv.bind(id, vs...)} +} + +// IsBottom returns whether c consists of no values. +func (c Closure) IsBottom() bool { + return c.val.Domain == nil +} + +// Summands returns the top-level Values of c. This assumes the top-level of c +// was constructed as a sum, and is mostly useful for debugging. +func (c Closure) Summands() iter.Seq[*Value] { + if v, ok := c.val.Domain.(Var); ok { + parts := c.env.partitionBy(v.id) + return func(yield func(*Value) bool) { + for _, part := range parts { + if !yield(part.value) { + return + } + } + } + } + return func(yield func(*Value) bool) { + yield(c.val) + } +} + +// All enumerates all possible concrete values of c by substituting variables +// from the environment. +// +// E.g., enumerating this Value +// +// a: !sum [1, 2] +// b: !sum [3, 4] +// +// results in +// +// - {a: 1, b: 3} +// - {a: 1, b: 4} +// - {a: 2, b: 3} +// - {a: 2, b: 4} +func (c Closure) All() iter.Seq[*Value] { + // In order to enumerate all concrete values under all possible variable + // bindings, we use a "non-deterministic continuation passing style" to + // implement this. We use CPS to traverse the Value tree, threading the + // (possibly narrowing) environment through that CPS following an Euler + // tour. Where the environment permits multiple choices, we invoke the same + // continuation for each choice. Similar to a yield function, the + // continuation can return false to stop the non-deterministic walk. + return func(yield func(*Value) bool) { + c.val.all1(c.env, func(v *Value, e nonDetEnv) bool { + return yield(v) + }) + } +} + +func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool { + switch d := v.Domain.(type) { + default: + panic(fmt.Sprintf("unknown domain type %T", d)) + + case nil: + return true + + case Top, String: + return cont(v, e) + + case Def: + fields := d.keys() + // We can reuse this parts slice because we're doing a DFS through the + // state space. (Otherwise, we'd have to do some messy threading of an + // immutable slice-like value through allElt.) + parts := make(map[string]*Value, len(fields)) + + // TODO: If there are no Vars or Sums under this Def, then nothing can + // change the Value or env, so we could just cont(v, e). + var allElt func(elt int, e nonDetEnv) bool + allElt = func(elt int, e nonDetEnv) bool { + if elt == len(fields) { + // Build a new Def from the concrete parts. Clone parts because + // we may reuse it on other non-deterministic branches. + nVal := newValueFrom(Def{maps.Clone(parts)}, v) + return cont(nVal, e) + } + + return d.fields[fields[elt]].all1(e, func(v *Value, e nonDetEnv) bool { + parts[fields[elt]] = v + return allElt(elt+1, e) + }) + } + return allElt(0, e) + + case Tuple: + // Essentially the same as Def. + if d.repeat != nil { + // There's nothing we can do with this. + return cont(v, e) + } + parts := make([]*Value, len(d.vs)) + var allElt func(elt int, e nonDetEnv) bool + allElt = func(elt int, e nonDetEnv) bool { + if elt == len(d.vs) { + // Build a new tuple from the concrete parts. Clone parts because + // we may reuse it on other non-deterministic branches. + nVal := newValueFrom(Tuple{vs: slices.Clone(parts)}, v) + return cont(nVal, e) + } + + return d.vs[elt].all1(e, func(v *Value, e nonDetEnv) bool { + parts[elt] = v + return allElt(elt+1, e) + }) + } + return allElt(0, e) + + case Var: + // Go each way this variable can be bound. + for _, ePart := range e.partitionBy(d.id) { + // d.id is no longer bound in this environment partition. We'll may + // need it later in the Euler tour, so bind it back to this single + // value. + env := ePart.env.bind(d.id, ePart.value) + if !ePart.value.all1(env, cont) { + return false + } + } + return true + } +} diff --git a/internal/unify/domain.go b/internal/unify/domain.go new file mode 100644 index 00000000..c59bd621 --- /dev/null +++ b/internal/unify/domain.go @@ -0,0 +1,311 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "fmt" + "iter" + "maps" + "reflect" + "regexp" + "slices" + "strconv" + "strings" +) + +// A Domain is a non-empty set of values, all of the same kind. +// +// Domain may be a scalar: +// +// - [String] - Represents string-typed values. +// +// Or a composite: +// +// - [Def] - A mapping from fixed keys to [Domain]s. +// +// - [Tuple] - A fixed-length sequence of [Domain]s or +// all possible lengths repeating a [Domain]. +// +// Or top or bottom: +// +// - [Top] - Represents all possible values of all kinds. +// +// - nil - Represents no values. +// +// Or a variable: +// +// - [Var] - A value captured in the environment. +type Domain interface { + Exact() bool + + // decode stores this value in a Go value. If this value is not exact, this + // returns a potentially wrapped *inexactError. + decode(reflect.Value) error +} + +type inexactError struct { + valueType string + goType string +} + +func (e *inexactError) Error() string { + return fmt.Sprintf("cannot store inexact %s value in %s", e.valueType, e.goType) +} + +type decodeError struct { + path string + err error +} + +func newDecodeError(path string, err error) *decodeError { + if err, ok := err.(*decodeError); ok { + return &decodeError{path: path + "." + err.path, err: err.err} + } + return &decodeError{path: path, err: err} +} + +func (e *decodeError) Unwrap() error { + return e.err +} + +func (e *decodeError) Error() string { + return fmt.Sprintf("%s: %s", e.path, e.err) +} + +// Top represents all possible values of all possible types. +type Top struct{} + +func (t Top) Exact() bool { return false } + +func (t Top) decode(rv reflect.Value) error { + // We can decode Top into a pointer-typed value as nil. + if rv.Kind() != reflect.Pointer { + return &inexactError{"top", rv.Type().String()} + } + rv.SetZero() + return nil +} + +// A Def is a mapping from field names to [Value]s. Any fields not explicitly +// listed have [Value] [Top]. +type Def struct { + fields map[string]*Value +} + +// NewDef creates a new [Def]. +// +// The fields and values slices must have the same length. +func NewDef(fields []string, values []*Value) Def { + if len(fields) != len(values) { + panic("fields and values must have the same length") + } + m := make(map[string]*Value, len(fields)) + for i := range fields { + if _, ok := m[fields[i]]; ok { + panic(fmt.Sprintf("duplicate field %q", fields[i])) + } + m[fields[i]] = values[i] + } + return Def{m} +} + +// Exact returns true if all field Values are exact. +func (d Def) Exact() bool { + for _, v := range d.fields { + if !v.Exact() { + return false + } + } + return true +} + +func (d Def) decode(rv reflect.Value) error { + rv, err := preDecode(rv, reflect.Struct, "Def") + if err != nil { + return err + } + var lowered map[string]string // Lower case -> canonical for d.fields. + rt := rv.Type() + for fi := range rv.NumField() { + fType := rt.Field(fi) + if fType.PkgPath != "" { + continue + } + v := d.fields[fType.Name] + if v == nil { + v = topValue + + // Try a case-insensitive match + canon, ok := d.fields[strings.ToLower(fType.Name)] + if ok { + v = canon + } else { + if lowered == nil { + lowered = make(map[string]string, len(d.fields)) + for k := range d.fields { + l := strings.ToLower(k) + if k != l { + lowered[l] = k + } + } + } + canon, ok := lowered[strings.ToLower(fType.Name)] + if ok { + v = d.fields[canon] + } + } + } + if err := v.Domain.decode(rv.Field(fi)); err != nil { + return newDecodeError(fType.Name, err) + } + } + return nil +} + +func (d Def) keys() []string { + return slices.Sorted(maps.Keys(d.fields)) +} + +func (d Def) All() iter.Seq2[string, *Value] { + // TODO: We call All fairly often. It's probably bad to sort this every + // time. + keys := slices.Sorted(maps.Keys(d.fields)) + return func(yield func(string, *Value) bool) { + for _, k := range keys { + if !yield(k, d.fields[k]) { + return + } + } + } +} + +// A Tuple is a sequence of Values in one of two forms: 1. a fixed-length tuple, +// where each Value can be different or 2. a "repeated tuple", which is a Value +// repeated 0 or more times. +type Tuple struct { + vs []*Value + + // repeat, if non-nil, means this Tuple consists of an element repeated 0 or + // more times. If repeat is non-nil, vs must be nil. This is a generator + // function because we don't necessarily want *exactly* the same Value + // repeated. For example, in YAML encoding, a !sum in a repeated tuple needs + // a fresh variable in each instance. + repeat []func(nonDetEnv) (*Value, nonDetEnv) +} + +func NewTuple(vs ...*Value) Tuple { + return Tuple{vs: vs} +} + +func NewRepeat(gens ...func(nonDetEnv) (*Value, nonDetEnv)) Tuple { + return Tuple{repeat: gens} +} + +func (d Tuple) Exact() bool { + if d.repeat != nil { + return false + } + for _, v := range d.vs { + if !v.Exact() { + return false + } + } + return true +} + +func (d Tuple) decode(rv reflect.Value) error { + if d.repeat != nil { + return &inexactError{"repeated tuple", rv.Type().String()} + } + // TODO: We could also do arrays. + rv, err := preDecode(rv, reflect.Slice, "Tuple") + if err != nil { + return err + } + if rv.IsNil() || rv.Cap() < len(d.vs) { + rv.Set(reflect.MakeSlice(rv.Type(), len(d.vs), len(d.vs))) + } else { + rv.SetLen(len(d.vs)) + } + for i, v := range d.vs { + if err := v.Domain.decode(rv.Index(i)); err != nil { + return newDecodeError(fmt.Sprintf("%d", i), err) + } + } + return nil +} + +// A String represents a set of strings. It can represent the intersection of a +// set of regexps, or a single exact string. In general, the domain of a String +// is non-empty, but we do not attempt to prove emptiness of a regexp value. +type String struct { + kind stringKind + re []*regexp.Regexp // Intersection of regexps + exact string +} + +type stringKind int + +const ( + stringRegex stringKind = iota + stringExact +) + +func NewStringRegex(exprs ...string) (String, error) { + if len(exprs) == 0 { + exprs = []string{""} + } + v := String{kind: -1} + for _, expr := range exprs { + re, err := regexp.Compile(`\A(?:` + expr + `)\z`) + if err != nil { + return String{}, fmt.Errorf("parsing value: %s", err) + } + + // An exact value narrows the whole domain to exact, so we're done, but + // should keep parsing. + if v.kind == stringExact { + continue + } + + if _, complete := re.LiteralPrefix(); complete { + v = String{kind: stringExact, exact: expr} + } else { + v.kind = stringRegex + v.re = append(v.re, re) + } + } + return v, nil +} + +func NewStringExact(s string) String { + return String{kind: stringExact, exact: s} +} + +// Exact returns whether this Value is known to consist of a single string. +func (d String) Exact() bool { + return d.kind == stringExact +} + +func (d String) decode(rv reflect.Value) error { + if d.kind != stringExact { + return &inexactError{"regex", rv.Type().String()} + } + rv2, err := preDecode(rv, reflect.String, "String") + if err == nil { + rv2.SetString(d.exact) + return nil + } + rv2, err = preDecode(rv, reflect.Int, "String") + if err == nil { + i, err := strconv.Atoi(d.exact) + if err != nil { + return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err) + } + rv2.SetInt(int64(i)) + return nil + } + return err +} diff --git a/internal/unify/dot.go b/internal/unify/dot.go new file mode 100644 index 00000000..143fa615 --- /dev/null +++ b/internal/unify/dot.go @@ -0,0 +1,183 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "bytes" + "fmt" + "html" + "io" + "os" + "os/exec" + "strings" +) + +const maxNodes = 30 + +type dotEncoder struct { + w *bytes.Buffer + + idGen int // Node name generation + valLimit int // Limit the number of Values in a subgraph + + idp identPrinter +} + +func newDotEncoder() *dotEncoder { + return &dotEncoder{ + w: new(bytes.Buffer), + } +} + +func (enc *dotEncoder) clear() { + enc.w.Reset() + enc.idGen = 0 +} + +func (enc *dotEncoder) writeTo(w io.Writer) { + fmt.Fprintln(w, "digraph {") + // Use the "new" ranking algorithm, which lets us put nodes from different + // clusters in the same rank. + fmt.Fprintln(w, "newrank=true;") + fmt.Fprintln(w, "node [shape=box, ordering=out];") + + w.Write(enc.w.Bytes()) + fmt.Fprintln(w, "}") +} + +func (enc *dotEncoder) writeSvg(w io.Writer) error { + cmd := exec.Command("dot", "-Tsvg") + in, err := cmd.StdinPipe() + if err != nil { + return err + } + var out bytes.Buffer + cmd.Stdout = &out + cmd.Stderr = os.Stderr + if err := cmd.Start(); err != nil { + return err + } + enc.writeTo(in) + in.Close() + if err := cmd.Wait(); err != nil { + return err + } + // Trim SVG header so the result can be embedded + // + // TODO: In Graphviz 10.0.1, we could use -Tsvg_inline. + svg := out.Bytes() + if i := bytes.Index(svg, []byte("= 0 { + svg = svg[i:] + } + _, err = w.Write(svg) + return err +} + +func (enc *dotEncoder) newID(f string) string { + id := fmt.Sprintf(f, enc.idGen) + enc.idGen++ + return id +} + +func (enc *dotEncoder) node(label, sublabel string) string { + id := enc.newID("n%d") + l := html.EscapeString(label) + if sublabel != "" { + l += fmt.Sprintf("
%s", html.EscapeString(sublabel)) + } + fmt.Fprintf(enc.w, "%s [label=<%s>];\n", id, l) + return id +} + +func (enc *dotEncoder) edge(from, to string, label string, args ...any) { + l := fmt.Sprintf(label, args...) + fmt.Fprintf(enc.w, "%s -> %s [label=%q];\n", from, to, l) +} + +func (enc *dotEncoder) subgraph(v *Value) (vID, cID string) { + enc.valLimit = maxNodes + cID = enc.newID("cluster_%d") + fmt.Fprintf(enc.w, "subgraph %s {\n", cID) + fmt.Fprintf(enc.w, "style=invis;") + vID = enc.value(v) + fmt.Fprintf(enc.w, "}\n") + return +} + +func (enc *dotEncoder) value(v *Value) string { + if enc.valLimit <= 0 { + id := enc.newID("n%d") + fmt.Fprintf(enc.w, "%s [label=\"...\", shape=triangle];\n", id) + return id + } + enc.valLimit-- + + switch vd := v.Domain.(type) { + default: + panic(fmt.Sprintf("unknown domain type %T", vd)) + + case nil: + return enc.node("_|_", "") + + case Top: + return enc.node("_", "") + + // TODO: Like in YAML, figure out if this is just a sum. In dot, we + // could say any unentangled variable is a sum, and if it has more than + // one reference just share the node. + + // case Sum: + // node := enc.node("Sum", "") + // for i, elt := range vd.vs { + // enc.edge(node, enc.value(elt), "%d", i) + // if enc.valLimit <= 0 { + // break + // } + // } + // return node + + case Def: + node := enc.node("Def", "") + for k, v := range vd.All() { + enc.edge(node, enc.value(v), "%s", k) + if enc.valLimit <= 0 { + break + } + } + return node + + case Tuple: + if vd.repeat == nil { + label := "Tuple" + node := enc.node(label, "") + for i, elt := range vd.vs { + enc.edge(node, enc.value(elt), "%d", i) + if enc.valLimit <= 0 { + break + } + } + return node + } else { + // TODO + return enc.node("TODO: Repeat", "") + } + + case String: + switch vd.kind { + case stringExact: + return enc.node(fmt.Sprintf("%q", vd.exact), "") + case stringRegex: + var parts []string + for _, re := range vd.re { + parts = append(parts, fmt.Sprintf("%q", re)) + } + return enc.node(strings.Join(parts, "&"), "") + } + panic("bad String kind") + + case Var: + return enc.node(fmt.Sprintf("Var %s", enc.idp.unique(vd.id)), "") + } +} diff --git a/internal/unify/env.go b/internal/unify/env.go new file mode 100644 index 00000000..618887cd --- /dev/null +++ b/internal/unify/env.go @@ -0,0 +1,500 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "fmt" + "iter" + "reflect" + "slices" + "strings" +) + +// A nonDetEnv is a non-deterministic mapping from [ident]s to [Value]s. +// +// Logically, this is just a set of deterministic environments, where each +// deterministic environment is a complete mapping from each [ident]s to exactly +// one [Value]. In particular, [ident]s are NOT necessarily independent of each +// other. For example, an environment may have both {x: 1, y: 1} and {x: 2, y: +// 2}, but not {x: 1, y: 2}. +// +// A nonDetEnv is immutable. +// +// Often [ident]s are independent of each other, so the representation optimizes +// for this by using a cross-product of environment factors, where each factor +// is a sum of deterministic environments. These operations obey the usual +// distributional laws, so we can always canonicalize into this form. (It MAY be +// worthwhile to allow more general expressions of sums and products.) +// +// For example, to represent {{x: 1, y: 1}, {x: 2, y: 2}}, in which the +// variables x and y are dependent, we need a single factor that covers x and y +// and consists of two terms: {x: 1, y: 1} + {x: 2, y: 2}. +// +// If we add a third variable z that can be 1 or 2, independent of x and y, we +// get four logical environments: +// +// {x: 1, y: 1, z: 1} +// {x: 2, y: 2, z: 1} +// {x: 1, y: 1, z: 2} +// {x: 2, y: 2, z: 2} +// +// This could be represented as a single factor that is the sum of these four +// detEnvs, but because z is independent, it can be a separate factor. Hence, +// the most compact representation of this environment is: +// +// ({x: 1, y: 1} + {x: 2, y: 2}) ⨯ ({z: 1} + {z: 2}) +// +// That is, two factors, where each is the sum of two terms. +type nonDetEnv struct { + // factors is a list of the multiplicative factors in this environment. The + // set of deterministic environments is the cross-product of these factors. + // All factors must have disjoint variables. + factors []*envSum +} + +// envSum is a sum of deterministic environments, all with the same set of +// variables. +type envSum struct { + ids []*ident // TODO: Do we ever use this as a slice? Should it be a map? + terms []detEnv +} + +type detEnv struct { + vals []*Value // Indexes correspond to envSum.ids +} + +var ( + // zeroEnvFactor is the "0" value of an [envSum]. It's a a factor with no + // sum terms. This is easiest to think of as: an empty sum must be the + // additive identity, 0. + zeroEnvFactor = &envSum{} + + // topEnv is the algebraic one value of a [nonDetEnv]. It has no factors + // because the product of no factors is the multiplicative identity. + topEnv = nonDetEnv{} + // bottomEnv is the algebraic zero value of a [nonDetEnv]. The product of + // bottomEnv with x is bottomEnv, and the sum of bottomEnv with y is y. + bottomEnv = nonDetEnv{factors: []*envSum{zeroEnvFactor}} +) + +// bind binds id to each of vals in e. +// +// Its panics if id is already bound in e. +// +// Environments are typically initially constructed by starting with [topEnv] +// and calling bind one or more times. +func (e nonDetEnv) bind(id *ident, vals ...*Value) nonDetEnv { + if e.isBottom() { + return bottomEnv + } + + // TODO: If any of vals are _, should we just not do anything? We're kind of + // inconsistent about whether an id missing from e means id is invalid or + // means id is _. + + // Check that id isn't present in e. + for _, f := range e.factors { + if slices.Contains(f.ids, id) { + panic("id " + id.name + " already present in environment") + } + } + + // Create the new sum term. + sum := &envSum{ids: []*ident{id}} + for _, val := range vals { + sum.terms = append(sum.terms, detEnv{vals: []*Value{val}}) + } + // Multiply it in. + factors := append(e.factors[:len(e.factors):len(e.factors)], sum) + return nonDetEnv{factors} +} + +func (e nonDetEnv) isBottom() bool { + if len(e.factors) == 0 { + // This is top. + return false + } + return len(e.factors[0].terms) == 0 +} + +func (e nonDetEnv) vars() iter.Seq[*ident] { + return func(yield func(*ident) bool) { + for _, t := range e.factors { + for _, id := range t.ids { + if !yield(id) { + return + } + } + } + } +} + +// all enumerates all deterministic environments in e. +// +// The result slice is in the same order as the slice returned by +// [nonDetEnv2.vars]. The slice is reused between iterations. +func (e nonDetEnv) all() iter.Seq[[]*Value] { + return func(yield func([]*Value) bool) { + var vals []*Value + var walk func(int) bool + walk = func(i int) bool { + if i == len(e.factors) { + return yield(vals) + } + start := len(vals) + for _, term := range e.factors[i].terms { + vals = append(vals[:start], term.vals...) + if !walk(i + 1) { + return false + } + } + return true + } + walk(0) + } +} + +// allOrdered is like all, but idOrder controls the order of the values in the +// resulting slice. Any [ident]s in idOrder that are missing from e are set to +// topValue. The values of idOrder must be a bijection with [0, n). +func (e nonDetEnv) allOrdered(idOrder map[*ident]int) iter.Seq[[]*Value] { + valsLen := 0 + for _, idx := range idOrder { + valsLen = max(valsLen, idx+1) + } + + return func(yield func([]*Value) bool) { + vals := make([]*Value, valsLen) + // e may not have all of the IDs in idOrder. Make sure any missing + // values are top. + for i := range vals { + vals[i] = topValue + } + var walk func(int) bool + walk = func(i int) bool { + if i == len(e.factors) { + return yield(vals) + } + for _, term := range e.factors[i].terms { + for j, id := range e.factors[i].ids { + vals[idOrder[id]] = term.vals[j] + } + if !walk(i + 1) { + return false + } + } + return true + } + walk(0) + } +} + +func crossEnvs(envs ...nonDetEnv) nonDetEnv { + // Combine the factors of envs + var factors []*envSum + haveIDs := map[*ident]struct{}{} + for _, e := range envs { + if e.isBottom() { + // The environment is bottom, so the whole product goes to + // bottom. + return bottomEnv + } + // Check that all ids are disjoint. + for _, f := range e.factors { + for _, id := range f.ids { + if _, ok := haveIDs[id]; ok { + panic("conflict on " + id.name) + } + haveIDs[id] = struct{}{} + } + } + // Everything checks out. Multiply the factors. + factors = append(factors, e.factors...) + } + return nonDetEnv{factors: factors} +} + +func sumEnvs(envs ...nonDetEnv) nonDetEnv { + // nonDetEnv is a product at the top level, so we implement summation using + // the distributive law. We also use associativity to keep as many top-level + // factors as we can, since those are what keep the environment compact. + // + // a * b * c + a * d (where a, b, c, and d are factors) + // (combine common factors) + // = a * (b * c + d) + // (expand factors into their sum terms) + // = a * ((b_1 + b_2 + ...) * (c_1 + c_2 + ...) + d) + // (where b_i and c_i are deterministic environments) + // (FOIL) + // = a * (b_1 * c_1 + b_1 * c_2 + b_2 * c_1 + b_2 * c2 + d) + // (all factors are now in canonical form) + // = a * e + // + // The product of two deterministic environments is a deterministic + // environment, and the sum of deterministic environments is a factor, so + // this process results in the canonical product-of-sums form. + // + // TODO: This is a bit of a one-way process. We could try to factor the + // environment to reduce the number of sums. I'm not sure how to do this + // efficiently. It might be possible to guide it by gathering the + // distributions of each ID's bindings. E.g., if there are 12 deterministic + // environments in a sum and $x is bound to 4 different values, each 3 + // times, then it *might* be possible to factor out $x into a 4-way sum of + // its own. + + factors, toSum := commonFactors(envs) + + if len(toSum) > 0 { + // Collect all IDs into a single order. + var ids []*ident + idOrder := make(map[*ident]int) + for _, e := range toSum { + for v := range e.vars() { + if _, ok := idOrder[v]; !ok { + idOrder[v] = len(ids) + ids = append(ids, v) + } + } + } + + // Flatten out each term in the sum. + var summands []detEnv + for _, env := range toSum { + for vals := range env.allOrdered(idOrder) { + summands = append(summands, detEnv{vals: slices.Clone(vals)}) + } + } + factors = append(factors, &envSum{ids: ids, terms: summands}) + } + + return nonDetEnv{factors: factors} +} + +// commonFactors finds common factors that can be factored out of a summation of +// [nonDetEnv]s. +func commonFactors(envs []nonDetEnv) (common []*envSum, toSum []nonDetEnv) { + // Drop any bottom environments. They don't contribute to the sum and they + // would complicate some logic below. + envs = slices.DeleteFunc(envs, func(e nonDetEnv) bool { + return e.isBottom() + }) + if len(envs) == 0 { + return bottomEnv.factors, nil + } + + // It's very common that the exact same factor will appear across all envs. + // Keep those factored out. + // + // TODO: Is it also common to have vars that are bound to the same value + // across all envs? If so, we could also factor those into common terms. + counts := map[*envSum]int{} + for _, e := range envs { + for _, f := range e.factors { + counts[f]++ + } + } + for _, f := range envs[0].factors { + if counts[f] == len(envs) { + // Common factor + common = append(common, f) + } + } + + // Any other factors need to be multiplied out. + for _, env := range envs { + var newFactors []*envSum + for _, f := range env.factors { + if counts[f] != len(envs) { + newFactors = append(newFactors, f) + } + } + if len(newFactors) > 0 { + toSum = append(toSum, nonDetEnv{factors: newFactors}) + } + } + + return common, toSum +} + +// envPartition is a subset of an env where id is bound to value in all +// deterministic environments. +type envPartition struct { + id *ident + value *Value + env nonDetEnv +} + +func (e nonDetEnv) partitionBy(id *ident) []envPartition { + if e.isBottom() { + // Bottom contains all variables + return []envPartition{{id: id, value: bottomValue, env: e}} + } + + // Find the factor containing id and id's index in that factor. + idFactor, idIndex := -1, -1 + var newIDs []*ident + for factI, fact := range e.factors { + idI := slices.Index(fact.ids, id) + if idI < 0 { + continue + } else if idFactor != -1 { + panic("multiple factors containing id " + id.name) + } else { + idFactor, idIndex = factI, idI + // Drop id from this factor's IDs + newIDs = without(fact.ids, idI) + } + } + if idFactor == -1 { + panic("id " + id.name + " not found in environment") + } + + // If id is the only term in its factor, then dropping it is equivalent to + // making the factor be the unit value, so we can just drop the factor. (And + // if this is the only factor, we'll arrive at [topEnv], which is exactly + // what we want!). In this case we can use the same nonDetEnv in all of the + // partitions. + isUnit := len(newIDs) == 0 + var unitFactors []*envSum + if isUnit { + unitFactors = without(e.factors, idFactor) + } + + // Create a partition for each distinct value of id. + var parts []envPartition + partIndex := map[*Value]int{} + for _, det := range e.factors[idFactor].terms { + val := det.vals[idIndex] + i, ok := partIndex[val] + if !ok { + i = len(parts) + var factors []*envSum + if isUnit { + factors = unitFactors + } else { + // Copy all other factor + factors = slices.Clone(e.factors) + factors[idFactor] = &envSum{ids: newIDs} + } + parts = append(parts, envPartition{id: id, value: val, env: nonDetEnv{factors: factors}}) + partIndex[val] = i + } + + if !isUnit { + factor := parts[i].env.factors[idFactor] + newVals := without(det.vals, idIndex) + factor.terms = append(factor.terms, detEnv{vals: newVals}) + } + } + return parts +} + +type ident struct { + _ [0]func() // Not comparable (only compare *ident) + name string +} + +type Var struct { + id *ident +} + +func (d Var) Exact() bool { + // These can't appear in concrete Values. + panic("Exact called on non-concrete Value") +} + +func (d Var) decode(rv reflect.Value) error { + return &inexactError{"var", rv.Type().String()} +} + +func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) { + // TODO: Vars from !sums in the input can have a huge number of values. + // Unifying these could be way more efficient with some indexes over any + // exact values we can pull out, like Def fields that are exact Strings. + // Maybe we try to produce an array of yes/no/maybe matches and then we only + // have to do deeper evaluation of the maybes. We could probably cache this + // on an envTerm. It may also help to special-case Var/Var unification to + // pick which one to index versus enumerate. + + if vd, ok := w.Domain.(Var); ok && d.id == vd.id { + // Unifying $x with $x results in $x. If we descend into this we'll have + // problems because we strip $x out of the environment to keep ourselves + // honest and then can't find it on the other side. + // + // TODO: I'm not positive this is the right fix. + return vd, e, nil + } + + // We need to unify w with the value of d in each possible environment. We + // can save some work by grouping environments by the value of d, since + // there will be a lot of redundancy here. + var nEnvs []nonDetEnv + envParts := e.partitionBy(d.id) + for i, envPart := range envParts { + exit := uf.enterVar(d.id, i) + // Each branch logically gets its own copy of the initial environment + // (narrowed down to just this binding of the variable), and each branch + // may result in different changes to that starting environment. + res, e2, err := w.unify(envPart.value, envPart.env, swap, uf) + exit.exit() + if err != nil { + return nil, nonDetEnv{}, err + } + if res.Domain == nil { + // This branch entirely failed to unify, so it's gone. + continue + } + nEnv := e2.bind(d.id, res) + nEnvs = append(nEnvs, nEnv) + } + + if len(nEnvs) == 0 { + // All branches failed + return nil, bottomEnv, nil + } + + // The effect of this is entirely captured in the environment. We can return + // back the same Bind node. + return d, sumEnvs(nEnvs...), nil +} + +// An identPrinter maps [ident]s to unique string names. +type identPrinter struct { + ids map[*ident]string + idGen map[string]int +} + +func (p *identPrinter) unique(id *ident) string { + if p.ids == nil { + p.ids = make(map[*ident]string) + p.idGen = make(map[string]int) + } + + name, ok := p.ids[id] + if !ok { + gen := p.idGen[id.name] + p.idGen[id.name]++ + if gen == 0 { + name = id.name + } else { + name = fmt.Sprintf("%s#%d", id.name, gen) + } + p.ids[id] = name + } + + return name +} + +func (p *identPrinter) slice(ids []*ident) string { + var strs []string + for _, id := range ids { + strs = append(strs, p.unique(id)) + } + return fmt.Sprintf("[%s]", strings.Join(strs, ", ")) +} + +func without[Elt any](s []Elt, i int) []Elt { + return append(s[:i:i], s[i+1:]...) +} diff --git a/internal/unify/html.go b/internal/unify/html.go new file mode 100644 index 00000000..d2434fe4 --- /dev/null +++ b/internal/unify/html.go @@ -0,0 +1,190 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "fmt" + "html" + "io" + "strings" +) + +func (t *tracer) writeHTML(w io.Writer) { + if !t.saveTree { + panic("writeHTML called without tracer.saveTree") + } + + fmt.Fprintf(w, "", htmlCSS) + for _, root := range t.trees { + dot := newDotEncoder() + html := htmlTracer{w: w, dot: dot} + html.writeTree(root) + } + fmt.Fprintf(w, "\n") +} + +const htmlCSS = ` +.unify { + display: grid; + grid-auto-columns: min-content; + text-align: center; +} + +.header { + grid-row: 1; + font-weight: bold; + padding: 0.25em; + position: sticky; + top: 0; + background: white; +} + +.envFactor { + display: grid; + grid-auto-rows: min-content; + grid-template-columns: subgrid; + text-align: center; +} +` + +type htmlTracer struct { + w io.Writer + dot *dotEncoder + svgs map[*Value]string +} + +func (t *htmlTracer) writeTree(node *traceTree) { + // TODO: This could be really nice. + // + // - Put nodes that were unified on the same rank with {rank=same; a; b} + // + // - On hover, highlight nodes that node was unified with and the result. If + // it's a variable, highlight it in the environment, too. + // + // - On click, show the details of unifying that node. + // + // This could be the only way to navigate, without necessarily needing the + // whole nest of nodes. + + // TODO: It might be possible to write this out on the fly. + + t.emit([]*Value{node.v, node.w}, []string{"v", "w"}, node.envIn) + + // Render children. + for i, child := range node.children { + if i >= 10 { + fmt.Fprintf(t.w, `
...
`) + break + } + fmt.Fprintf(t.w, `
%s`, html.EscapeString(child.label)) + t.writeTree(child) + fmt.Fprintf(t.w, "
\n") + } + + // Render result. + if node.err != nil { + fmt.Fprintf(t.w, "Error: %s\n", html.EscapeString(node.err.Error())) + } else { + t.emit([]*Value{node.res}, []string{"res"}, node.env) + } +} + +func (t *htmlTracer) svg(v *Value) string { + if s, ok := t.svgs[v]; ok { + return s + } + var buf strings.Builder + t.dot.subgraph(v) + t.dot.writeSvg(&buf) + t.dot.clear() + svg := buf.String() + if t.svgs == nil { + t.svgs = make(map[*Value]string) + } + t.svgs[v] = svg + buf.Reset() + return svg +} + +func (t *htmlTracer) emit(vs []*Value, labels []string, env nonDetEnv) { + fmt.Fprintf(t.w, `
`) + for i, v := range vs { + fmt.Fprintf(t.w, `
%s
`, i+1, html.EscapeString(labels[i])) + fmt.Fprintf(t.w, `
%s
`, i+1, t.svg(v)) + } + + t.emitEnv(env, len(vs)) + + fmt.Fprintf(t.w, `
`) +} + +func (t *htmlTracer) emitEnv(env nonDetEnv, colStart int) { + if env.isBottom() { + fmt.Fprintf(t.w, `
_|_
`, colStart+1) + return + } + + colLimit := 10 + col := colStart + for i, f := range env.factors { + if i > 0 { + // Print * between each factor. + fmt.Fprintf(t.w, `
×
`, col+1) + col++ + } + + var idCols []int + for i, id := range f.ids { + var str string + if i == 0 && len(f.ids) > 1 { + str = "(" + } + if colLimit <= 0 { + str += "..." + } else { + str += html.EscapeString(t.dot.idp.unique(id)) + } + if (i == len(f.ids)-1 || colLimit <= 0) && len(f.ids) > 1 { + str += ")" + } + + fmt.Fprintf(t.w, `
%s
`, col+1, str) + idCols = append(idCols, col) + + col++ + if colLimit <= 0 { + break + } + colLimit-- + } + + fmt.Fprintf(t.w, `
`, idCols[0]+1, col+1) + rowLimit := 10 + row := 0 + for _, term := range f.terms { + // TODO: Print + between rows? With some horizontal something to + // make it clear what it applies across? + + for i, val := range term.vals { + fmt.Fprintf(t.w, `
`, row+1, idCols[i]-idCols[0]+1) + if i < len(term.vals)-1 && i == len(idCols)-1 { + fmt.Fprintf(t.w, `...
`) + break + } else if rowLimit <= 0 { + fmt.Fprintf(t.w, `...
`) + } else { + fmt.Fprintf(t.w, `%s`, t.svg(val)) + } + } + + row++ + if rowLimit <= 0 { + break + } + rowLimit-- + } + fmt.Fprintf(t.w, ``) + } +} diff --git a/internal/unify/pos.go b/internal/unify/pos.go new file mode 100644 index 00000000..4f7046a4 --- /dev/null +++ b/internal/unify/pos.go @@ -0,0 +1,33 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "fmt" +) + +type Pos struct { + Path string + Line int +} + +func (p Pos) String() string { + var b []byte + b, _ = p.AppendText(b) + return string(b) +} + +func (p Pos) AppendText(b []byte) ([]byte, error) { + if p.Line == 0 { + if p.Path == "" { + return append(b, "?:?"...), nil + } else { + return append(b, p.Path...), nil + } + } else if p.Path == "" { + return fmt.Appendf(b, "?:%d", p.Line), nil + } + return fmt.Appendf(b, "%s:%d", p.Path, p.Line), nil +} diff --git a/internal/unify/testdata/unify.yaml b/internal/unify/testdata/unify.yaml new file mode 100644 index 00000000..131e527c --- /dev/null +++ b/internal/unify/testdata/unify.yaml @@ -0,0 +1,174 @@ +# Basic tests of unification + +# +# Terminals +# + +unify: +- _ +- _ +want: + _ +--- +unify: +- _ +- test +want: + test +--- +unify: +- test +- t?est +want: + test +--- +unify: +- 1 +- 1 +want: + 1 +--- +unify: +- test +- foo +want: + _|_ + +# +# Tuple +# + +--- +unify: +- [a, b] +- [a, b] +want: + [a, b] +--- +unify: +- [a, _] +- [_, b] +want: + [a, b] +--- +unify: +- ["ab?c", "de?f"] +- [ac, def] +want: + [ac, def] + +# +# Repeats +# + +--- +unify: +- !repeat [a] +- [_] +want: + [a] +--- +unify: +- !repeat [a] +- [_, _] +want: + [a, a] +--- +unify: +- !repeat [a] +- [b] +want: + _|_ +--- +unify: +- !repeat [xy*] +- [x, xy, xyy] +want: + [x, xy, xyy] +--- +unify: +- !repeat [xy*] +- !repeat ["xz?y*"] +- [x, xy, xyy] +want: + [x, xy, xyy] +--- +unify: +- !repeat [!sum [a, b]] +- [a, b, a] +all: +- [a, b, a] +--- +unify: +- !repeat [!sum [a, b]] +- !repeat [!sum [b, c]] +- [b, b, b] +all: +- [b, b, b] +--- +unify: +- !repeat [!sum [a, b]] +- !repeat [!sum [b, c]] +- [a] +all: [] + +# +# Def +# + +--- +unify: +- {a: a, b: b} +- {a: a, b: b} +want: + {a: a, b: b} +--- +unify: +- {a: a} +- {b: b} +want: + {a: a, b: b} + +# +# Sum +# + +--- +unify: +- !sum [1, 2] +- !sum [2, 3] +all: +- 2 +--- +unify: +- !sum [{label: a, value: abc}, {label: b, value: def}] +- !sum [{value: "ab?c", extra: d}, {value: "def?", extra: g}] +all: +- {extra: d, label: a, value: abc} +- {extra: g, label: b, value: def} +--- +# A sum of repeats must deal with different dynamically-created variables in +# each branch. +unify: +- !sum [!repeat [a], !repeat [b]] +- [a, a, a] +all: +- [a, a, a] +--- +unify: +- !sum [!repeat [a], !repeat [b]] +- [a, a, b] +all: [] +--- +# Exercise sumEnvs with more than one result +unify: +- !sum + - [a|b, c|d] + - [e, g] +- [!sum [a, b, e, f], !sum [c, d, g, h]] +all: +- [a, c] +- [a, d] +- [b, c] +- [b, d] +- [e, g] diff --git a/internal/unify/testdata/vars.yaml b/internal/unify/testdata/vars.yaml new file mode 100644 index 00000000..fe8a57e4 --- /dev/null +++ b/internal/unify/testdata/vars.yaml @@ -0,0 +1,175 @@ +# +# Basic tests +# + +name: "basic string" +unify: +- $x +- test +all: +- test +--- +name: "basic tuple" +unify: +- [$x, $x] +- [test, test] +all: +- [test, test] +--- +name: "three tuples" +unify: +- [$x, $x] +- [test, _] +- [_, test] +all: +- [test, test] +--- +name: "basic def" +unify: +- {a: $x, b: $x} +- {a: test, b: test} +all: +- {a: test, b: test} +--- +name: "three defs" +unify: +- {a: $x, b: $x} +- {a: test} +- {b: test} +all: +- {a: test, b: test} + +# +# Bottom tests +# + +--- +name: "basic bottom" +unify: +- [$x, $x] +- [test, foo] +all: [] +--- +name: "three-way bottom" +unify: +- [$x, $x] +- [test, _] +- [_, foo] +all: [] + +# +# Basic sum tests +# + +--- +name: "basic sum" +unify: +- $x +- !sum [a, b] +all: +- a +- b +--- +name: "sum of tuples" +unify: +- [$x] +- !sum [[a], [b]] +all: +- [a] +- [b] +--- +name: "acausal sum" +unify: +- [_, !sum [a, b]] +- [$x, $x] +all: +- [a, a] +- [b, b] + +# +# Transitivity tests +# + +--- +name: "transitivity" +unify: +- [_, _, _, test] +- [$x, $x, _, _] +- [ _, $x, $x, _] +- [ _, _, $x, $x] +all: +- [test, test, test, test] + +# +# Multiple vars +# + +--- +name: "basic uncorrelated vars" +unify: +- - !sum [1, 2] + - !sum [3, 4] +- - $a + - $b +all: +- [1, 3] +- [1, 4] +- [2, 3] +- [2, 4] +--- +name: "uncorrelated vars" +unify: +- - !sum [1, 2] + - !sum [3, 4] + - !sum [1, 2] +- - $a + - $b + - $a +all: +- [1, 3, 1] +- [1, 4, 1] +- [2, 3, 2] +- [2, 4, 2] +--- +name: "entangled vars" +unify: +- - !sum [[1,2],[3,4]] + - !sum [[2,1],[3,4],[4,3]] +- - [$a, $b] + - [$b, $a] +all: +- - [1, 2] + - [2, 1] +- - [3, 4] + - [4, 3] + +# +# End-to-end examples +# + +--- +name: "end-to-end" +unify: +- go: Add + in: + - go: $t + - go: $t +- in: !repeat + - !sum + - go: Int32x4 + base: int + - go: Uint32x4 + base: uint +all: +- go: Add + in: + - base: int + go: Int32x4 + - base: int + go: Int32x4 +- go: Add + in: + - base: uint + go: Uint32x4 + - base: uint + go: Uint32x4 diff --git a/internal/unify/trace.go b/internal/unify/trace.go new file mode 100644 index 00000000..f1a7ea2c --- /dev/null +++ b/internal/unify/trace.go @@ -0,0 +1,168 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "fmt" + "io" + "strings" + + "gopkg.in/yaml.v3" +) + +// debugDotInHTML, if true, includes dot code for all graphs in the HTML. Useful +// for debugging the dot output itself. +const debugDotInHTML = false + +var Debug struct { + // UnifyLog, if non-nil, receives a streaming text trace of unification. + UnifyLog io.Writer + + // HTML, if non-nil, writes an HTML trace of unification to HTML. + HTML io.Writer +} + +type tracer struct { + logw io.Writer + + enc yamlEncoder // Print consistent idents throughout + + saveTree bool // if set, record tree; required for HTML output + + path []string + + node *traceTree + trees []*traceTree +} + +type traceTree struct { + label string // Identifies this node as a child of parent + v, w *Value // Unification inputs + envIn nonDetEnv + res *Value // Unification result + env nonDetEnv + err error // or error + + parent *traceTree + children []*traceTree +} + +type tracerExit struct { + t *tracer + len int + node *traceTree +} + +func (t *tracer) enter(pat string, vals ...any) tracerExit { + if t == nil { + return tracerExit{} + } + + label := fmt.Sprintf(pat, vals...) + + var p *traceTree + if t.saveTree { + p = t.node + if p != nil { + t.node = &traceTree{label: label, parent: p} + p.children = append(p.children, t.node) + } + } + + t.path = append(t.path, label) + return tracerExit{t, len(t.path) - 1, p} +} + +func (t *tracer) enterVar(id *ident, branch int) tracerExit { + if t == nil { + return tracerExit{} + } + + // Use the tracer's ident printer + return t.enter("Var %s br %d", t.enc.idp.unique(id), branch) +} + +func (te tracerExit) exit() { + if te.t == nil { + return + } + te.t.path = te.t.path[:te.len] + te.t.node = te.node +} + +func indentf(prefix string, pat string, vals ...any) string { + s := fmt.Sprintf(pat, vals...) + if len(prefix) == 0 { + return s + } + if !strings.Contains(s, "\n") { + return prefix + s + } + + indent := prefix + if strings.TrimLeft(prefix, " ") != "" { + // Prefix has non-space characters in it. Construct an all space-indent. + indent = strings.Repeat(" ", len(prefix)) + } + return prefix + strings.ReplaceAll(s, "\n", "\n"+indent) +} + +func yamlf(prefix string, node *yaml.Node) string { + b, err := yaml.Marshal(node) + if err != nil { + return fmt.Sprintf("", err) + } + return strings.TrimRight(indentf(prefix, "%s", b), " \n") +} + +func (t *tracer) logf(pat string, vals ...any) { + if t == nil || t.logw == nil { + return + } + prefix := fmt.Sprintf("[%s] ", strings.Join(t.path, "/")) + s := indentf(prefix, pat, vals...) + s = strings.TrimRight(s, " \n") + fmt.Fprintf(t.logw, "%s\n", s) +} + +func (t *tracer) traceUnify(v, w *Value, e nonDetEnv) { + if t == nil { + return + } + + t.logf("Unify\n%s\nwith\n%s\nin\n%s", + yamlf(" ", t.enc.value(v)), + yamlf(" ", t.enc.value(w)), + yamlf(" ", t.enc.env(e))) + + if t.saveTree { + if t.node == nil { + t.node = &traceTree{} + t.trees = append(t.trees, t.node) + } + t.node.v, t.node.w, t.node.envIn = v, w, e + } +} + +func (t *tracer) traceDone(res *Value, e nonDetEnv, err error) { + if t == nil { + return + } + + if err != nil { + t.logf("==> %s", err) + } else { + t.logf("==>\n%s", yamlf(" ", t.enc.closure(Closure{res, e}))) + } + + if t.saveTree { + node := t.node + if node == nil { + panic("popped top of trace stack") + } + node.res, node.err = res, err + node.env = e + } +} diff --git a/internal/unify/unify.go b/internal/unify/unify.go new file mode 100644 index 00000000..6ebed7bd --- /dev/null +++ b/internal/unify/unify.go @@ -0,0 +1,322 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package unify implements unification of structured values. +// +// A [Value] represents a possibly infinite set of concrete values, where a +// value is either a string ([String]), a tuple of values ([Tuple]), or a +// string-keyed map of values called a "def" ([Def]). These sets can be further +// constrained by variables ([Var]). A [Value] combined with bindings of +// variables is a [Closure]. +// +// [Unify] finds a [Closure] that satisfies two or more other [Closure]s. This +// can be thought of as intersecting the sets represented by these Closures' +// values, or as the greatest lower bound/infimum of these Closures. If no such +// Closure exists, the result of unification is "bottom", or the empty set. +// +// # Examples +// +// The regular expression "a*" is the infinite set of strings of zero or more +// "a"s. "a*" can be unified with "a" or "aa" or "aaa", and the result is just +// "a", "aa", or "aaa", respectively. However, unifying "a*" with "b" fails +// because there are no values that satisfy both. +// +// Sums express sets directly. For example, !sum [a, b] is the set consisting of +// "a" and "b". Unifying this with !sum [b, c] results in just "b". This also +// makes it easy to demonstrate that unification isn't necessarily a single +// concrete value. For example, unifying !sum [a, b, c] with !sum [b, c, d] +// results in two concrete values: "b" and "c". +// +// The special value _ or "top" represents all possible values. Unifying _ with +// any value x results in x. +// +// Unifying composite values—tuples and defs—unifies their elements. +// +// The value [a*, aa] is an infinite set of tuples. If we unify that with the +// value [aaa, a*], the only possible value that satisfies both is [aaa, aa]. +// Likewise, this is the intersection of the sets described by these two values. +// +// Defs are similar to tuples, but they are indexed by strings and don't have a +// fixed length. For example, {x: a, y: b} is a def with two fields. Any field +// not mentioned in a def is implicitly top. Thus, unifying this with {y: b, z: +// c} results in {x: a, y: b, z: c}. +// +// Variables constrain values. For example, the value [$x, $x] represents all +// tuples whose first and second values are the same, but doesn't otherwise +// constrain that value. Thus, this set includes [a, a] as well as [[b, c, d], +// [b, c, d]], but it doesn't include [a, b]. +// +// Sums are internally implemented as fresh variables that are simultaneously +// bound to all values of the sum. That is !sum [a, b] is actually $var (where +// var is some fresh name), closed under the environment $var=a | $var=b. +package unify + +import ( + "errors" + "fmt" + "slices" +) + +// Unify computes a Closure that satisfies each input Closure. If no such +// Closure exists, it returns bottom. +func Unify(closures ...Closure) (Closure, error) { + if len(closures) == 0 { + return Closure{topValue, topEnv}, nil + } + + var trace *tracer + if Debug.UnifyLog != nil || Debug.HTML != nil { + trace = &tracer{ + logw: Debug.UnifyLog, + saveTree: Debug.HTML != nil, + } + } + + unified := closures[0] + for _, c := range closures[1:] { + var err error + uf := newUnifier() + uf.tracer = trace + e := crossEnvs(unified.env, c.env) + unified.val, unified.env, err = unified.val.unify(c.val, e, false, uf) + if Debug.HTML != nil { + uf.writeHTML(Debug.HTML) + } + if err != nil { + return Closure{}, err + } + } + + return unified, nil +} + +type unifier struct { + *tracer +} + +func newUnifier() *unifier { + return &unifier{} +} + +// errDomains is a sentinel error used between unify and unify1 to indicate that +// unify1 could not unify the domains of the two values. +var errDomains = errors.New("cannot unify domains") + +func (v *Value) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (*Value, nonDetEnv, error) { + if swap { + // Put the values in order. This just happens to be a handy choke-point + // to do this at. + v, w = w, v + } + + uf.traceUnify(v, w, e) + + d, e2, err := v.unify1(w, e, false, uf) + if err == errDomains { + // Try the other order. + d, e2, err = w.unify1(v, e, true, uf) + if err == errDomains { + // Okay, we really can't unify these. + err = fmt.Errorf("cannot unify %T (%s) and %T (%s): kind mismatch", v.Domain, v.PosString(), w.Domain, w.PosString()) + } + } + if err != nil { + uf.traceDone(nil, nonDetEnv{}, err) + return nil, nonDetEnv{}, err + } + res := unified(d, v, w) + uf.traceDone(res, e2, nil) + if d == nil { + // Double check that a bottom Value also has a bottom env. + if !e2.isBottom() { + panic("bottom Value has non-bottom environment") + } + } + + return res, e2, nil +} + +func (v *Value) unify1(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) { + // TODO: If there's an error, attach position information to it. + + vd, wd := v.Domain, w.Domain + + // Bottom returns bottom, and eliminates all possible environments. + if vd == nil || wd == nil { + return nil, bottomEnv, nil + } + + // Top always returns the other. + if _, ok := vd.(Top); ok { + return wd, e, nil + } + + // Variables + if vd, ok := vd.(Var); ok { + return vd.unify(w, e, swap, uf) + } + + // Composite values + if vd, ok := vd.(Def); ok { + if wd, ok := wd.(Def); ok { + return vd.unify(wd, e, swap, uf) + } + } + if vd, ok := vd.(Tuple); ok { + if wd, ok := wd.(Tuple); ok { + return vd.unify(wd, e, swap, uf) + } + } + + // Scalar values + if vd, ok := vd.(String); ok { + if wd, ok := wd.(String); ok { + res := vd.unify(wd) + if res == nil { + e = bottomEnv + } + return res, e, nil + } + } + + return nil, nonDetEnv{}, errDomains +} + +func (d Def) unify(o Def, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) { + out := Def{fields: make(map[string]*Value)} + + // Check keys of d against o. + for key, dv := range d.All() { + ov, ok := o.fields[key] + if !ok { + // ov is implicitly Top. Bypass unification. + out.fields[key] = dv + continue + } + exit := uf.enter("%s", key) + res, e2, err := dv.unify(ov, e, swap, uf) + exit.exit() + if err != nil { + return nil, nonDetEnv{}, err + } else if res.Domain == nil { + // No match. + return nil, bottomEnv, nil + } + out.fields[key] = res + e = e2 + } + // Check keys of o that we didn't already check. These all implicitly match + // because we know the corresponding fields in d are all Top. + for key, dv := range o.All() { + if _, ok := d.fields[key]; !ok { + out.fields[key] = dv + } + } + return out, e, nil +} + +func (v Tuple) unify(w Tuple, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) { + if v.repeat != nil && w.repeat != nil { + // Since we generate the content of these lazily, there's not much we + // can do but just stick them on a list to unify later. + return Tuple{repeat: concat(v.repeat, w.repeat)}, e, nil + } + + // Expand any repeated tuples. + tuples := make([]Tuple, 0, 2) + if v.repeat == nil { + tuples = append(tuples, v) + } else { + v2, e2 := v.doRepeat(e, len(w.vs)) + tuples = append(tuples, v2...) + e = e2 + } + if w.repeat == nil { + tuples = append(tuples, w) + } else { + w2, e2 := w.doRepeat(e, len(v.vs)) + tuples = append(tuples, w2...) + e = e2 + } + + // Now unify all of the tuples (usually this will be just 2 tuples) + out := tuples[0] + for _, t := range tuples[1:] { + if len(out.vs) != len(t.vs) { + uf.logf("tuple length mismatch") + return nil, bottomEnv, nil + } + zs := make([]*Value, len(out.vs)) + for i, v1 := range out.vs { + exit := uf.enter("%d", i) + z, e2, err := v1.unify(t.vs[i], e, swap, uf) + exit.exit() + if err != nil { + return nil, nonDetEnv{}, err + } else if z.Domain == nil { + return nil, bottomEnv, nil + } + zs[i] = z + e = e2 + } + out = Tuple{vs: zs} + } + + return out, e, nil +} + +// doRepeat creates a fixed-length tuple from a repeated tuple. The caller is +// expected to unify the returned tuples. +func (v Tuple) doRepeat(e nonDetEnv, n int) ([]Tuple, nonDetEnv) { + res := make([]Tuple, len(v.repeat)) + for i, gen := range v.repeat { + res[i].vs = make([]*Value, n) + for j := range n { + res[i].vs[j], e = gen(e) + } + } + return res, e +} + +// unify intersects the domains of two [String]s. If it can prove that this +// domain is empty, it returns nil (bottom). +// +// TODO: Consider splitting literals and regexps into two domains. +func (v String) unify(w String) Domain { + // Unification is symmetric, so put them in order of string kind so we only + // have to deal with half the cases. + if v.kind > w.kind { + v, w = w, v + } + + switch v.kind { + case stringRegex: + switch w.kind { + case stringRegex: + // Construct a match against all of the regexps + return String{kind: stringRegex, re: slices.Concat(v.re, w.re)} + case stringExact: + for _, re := range v.re { + if !re.MatchString(w.exact) { + return nil + } + } + return w + } + case stringExact: + if v.exact != w.exact { + return nil + } + return v + } + panic("bad string kind") +} + +func concat[T any](s1, s2 []T) []T { + // Reuse s1 or s2 if possible. + if len(s1) == 0 { + return s2 + } + return append(s1[:len(s1):len(s1)], s2...) +} diff --git a/internal/unify/unify_test.go b/internal/unify/unify_test.go new file mode 100644 index 00000000..8071e0c9 --- /dev/null +++ b/internal/unify/unify_test.go @@ -0,0 +1,154 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "bytes" + "fmt" + "io" + "os" + "path/filepath" + "slices" + "strings" + "testing" + + "gopkg.in/yaml.v3" +) + +func TestUnify(t *testing.T) { + paths, err := filepath.Glob("testdata/*") + if err != nil { + t.Fatal(err) + } + if len(paths) == 0 { + t.Fatal("no testdata found") + } + for _, path := range paths { + // Skip paths starting with _ so experimental files can be added. + base := filepath.Base(path) + if base[0] == '_' { + continue + } + if !strings.HasSuffix(base, ".yaml") { + t.Errorf("non-.yaml file in testdata: %s", base) + continue + } + base = strings.TrimSuffix(base, ".yaml") + + t.Run(base, func(t *testing.T) { + testUnify(t, path) + }) + } +} + +func testUnify(t *testing.T, path string) { + f, err := os.Open(path) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + type testCase struct { + Skip bool + Name string + Unify []Closure + Want yaml.Node + All yaml.Node + } + dec := yaml.NewDecoder(f) + + for i := 0; ; i++ { + var tc testCase + err := dec.Decode(&tc) + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + + name := tc.Name + if name == "" { + name = fmt.Sprint(i) + } + + t.Run(name, func(t *testing.T) { + if tc.Skip { + t.Skip("skip: true set in test case") + } + + defer func() { + p := recover() + if p != nil || t.Failed() { + // Redo with a trace + // + // TODO: Use t.Output() in Go 1.25. + var buf bytes.Buffer + Debug.UnifyLog = &buf + func() { + defer func() { + // If the original unify panicked, the second one + // probably will, too. Ignore it and let the first panic + // bubble. + recover() + }() + Unify(tc.Unify...) + }() + Debug.UnifyLog = nil + t.Logf("Trace:\n%s", buf.String()) + } + if p != nil { + panic(p) + } + }() + + // Unify the test cases + // + // TODO: Try reordering the inputs also + c, err := Unify(tc.Unify...) + if err != nil { + // TODO: Tests of errors + t.Fatal(err) + } + + // Encode the result back to YAML so we can check if it's structurally + // equal. + clean := func(val any) *yaml.Node { + var node yaml.Node + node.Encode(val) + for n := range allYamlNodes(&node) { + // Canonicalize the style. There may be other style flags we need to + // muck with. + n.Style &^= yaml.FlowStyle + n.HeadComment = "" + n.LineComment = "" + n.FootComment = "" + } + return &node + } + check := func(gotVal any, wantNode *yaml.Node) { + got, err := yaml.Marshal(clean(gotVal)) + if err != nil { + t.Fatalf("Encoding Value back to yaml failed: %s", err) + } + want, err := yaml.Marshal(clean(wantNode)) + if err != nil { + t.Fatalf("Encoding Want back to yaml failed: %s", err) + } + + if !bytes.Equal(got, want) { + t.Errorf("%s:%d:\nwant:\n%sgot\n%s", f.Name(), wantNode.Line, want, got) + } + } + if tc.Want.Kind != 0 { + check(c.val, &tc.Want) + } + if tc.All.Kind != 0 { + fVal := slices.Collect(c.All()) + check(fVal, &tc.All) + } + }) + } +} diff --git a/internal/unify/value.go b/internal/unify/value.go new file mode 100644 index 00000000..6bf121af --- /dev/null +++ b/internal/unify/value.go @@ -0,0 +1,129 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "fmt" + "iter" + "reflect" +) + +// A Value represents a structured, non-deterministic value consisting of +// strings, tuples of Values, and string-keyed maps of Values. A +// non-deterministic Value will also contain variables, which are resolved via +// an environment as part of a [Closure]. +// +// For debugging, a Value can also track the source position it was read from in +// an input file, and its provenance from other Values. +type Value struct { + Domain Domain + + // A Value has either a pos or parents (or neither). + pos *Pos + parents *[2]*Value +} + +var ( + topValue = &Value{Domain: Top{}} + bottomValue = &Value{Domain: nil} +) + +// NewValue returns a new [Value] with the given domain and no position +// information. +func NewValue(d Domain) *Value { + return &Value{Domain: d} +} + +// NewValuePos returns a new [Value] with the given domain at position p. +func NewValuePos(d Domain, p Pos) *Value { + return &Value{Domain: d, pos: &p} +} + +// newValueFrom returns a new [Value] with the given domain that copies the +// position information of p. +func newValueFrom(d Domain, p *Value) *Value { + return &Value{Domain: d, pos: p.pos, parents: p.parents} +} + +func unified(d Domain, p1, p2 *Value) *Value { + return &Value{Domain: d, parents: &[2]*Value{p1, p2}} +} + +func (v *Value) Pos() Pos { + if v.pos == nil { + return Pos{} + } + return *v.pos +} + +func (v *Value) PosString() string { + var b []byte + for root := range v.Provenance() { + if len(b) > 0 { + b = append(b, ' ') + } + b, _ = root.pos.AppendText(b) + } + return string(b) +} + +func (v *Value) Exact() bool { + if v.Domain == nil { + return false + } + return v.Domain.Exact() +} + +// Decode decodes v into a Go value. +// +// v must be exact, except that it can include Top. into must be a pointer. +// [Def]s are decoded into structs. [Tuple]s are decoded into slices. [String]s +// are decoded into strings or ints. Any field can itself be a pointer to one of +// these types. Top can be decoded into a pointer-typed field and will set the +// field to nil. Anything else will allocate a value if necessary. +func (v *Value) Decode(into any) error { + rv := reflect.ValueOf(into) + if rv.Kind() != reflect.Pointer { + return fmt.Errorf("cannot decode into non-pointer %T", into) + } + return v.Domain.decode(rv) +} + +func preDecode(rv reflect.Value, kind reflect.Kind, name string) (reflect.Value, error) { + if rv.Kind() == kind { + return rv, nil + } + if rv.Kind() == reflect.Pointer && rv.Type().Elem().Kind() == kind { + if rv.IsNil() { + rv.Set(reflect.New(rv.Type().Elem())) + } + return rv.Elem(), nil + } + return reflect.Value{}, fmt.Errorf("cannot decode %s into %s", name, rv.Type()) +} + +// Provenance iterates over all of the source Values that have contributed to +// this Value. +func (v *Value) Provenance() iter.Seq[*Value] { + return func(yield func(*Value) bool) { + var rec func(d *Value) bool + rec = func(d *Value) bool { + if d.pos != nil { + if !yield(d) { + return false + } + } + if d.parents != nil { + for _, p := range d.parents { + if !rec(p) { + return false + } + } + } + return true + } + rec(v) + } +} diff --git a/internal/unify/value_test.go b/internal/unify/value_test.go new file mode 100644 index 00000000..28d22b25 --- /dev/null +++ b/internal/unify/value_test.go @@ -0,0 +1,36 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import "slices" + +func ExampleClosure_All_tuple() { + v := mustParse(` +- !sum [1, 2] +- !sum [3, 4] +`) + printYaml(slices.Collect(v.All())) + + // Output: + // - [1, 3] + // - [1, 4] + // - [2, 3] + // - [2, 4] +} + +func ExampleClosure_All_def() { + v := mustParse(` +a: !sum [1, 2] +b: !sum [3, 4] +c: 5 +`) + printYaml(slices.Collect(v.All())) + + // Output: + // - {a: 1, b: 3, c: 5} + // - {a: 1, b: 4, c: 5} + // - {a: 2, b: 3, c: 5} + // - {a: 2, b: 4, c: 5} +} diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go new file mode 100644 index 00000000..4731140b --- /dev/null +++ b/internal/unify/yaml.go @@ -0,0 +1,475 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "errors" + "fmt" + "io" + "regexp" + "strings" + + "gopkg.in/yaml.v3" +) + +// UnmarshalOpts provides options to unmarshaling. The zero value is the default +// options. +type UnmarshalOpts struct { + // Path is the file path to store in the [Pos] of all [Value]s. + Path string + + // StringReplacer, if non-nil, is called for each string value to perform + // any application-specific string interpolation. + StringReplacer func(string) string +} + +// UnmarshalYAML unmarshals a YAML node into a Closure. +// +// This is how UnmarshalYAML maps YAML nodes into terminal Values: +// +// - "_" or !top _ is the top value ([Top]). +// +// - "_|_" or !bottom _ is the bottom value. This is an error during +// unmarshaling, but can appear in marshaled values. +// +// - "$" or !var is a variable ([Var]). Everywhere the same name +// appears within a single unmarshal operation, it is mapped to the same +// variable. Different unmarshal operations get different variables, even if +// they have the same string name. +// +// - !regex "x" is a regular expression ([String]), as is any string that +// doesn't match "_", "_|_", or "$...". Regular expressions are implicitly +// anchored at the beginning and end. If the string doesn't contain any +// meta-characters (that is, it's a "literal" regular expression), then it's +// treated as an exact string. +// +// - !string "x", or any int, float, bool, or binary value is an exact string +// ([String]). +// +// - !regex [x, y, ...] is an intersection of regular expressions ([String]). +// +// This is how UnmarshalYAML maps YAML nodes into non-terminal Values: +// +// - Sequence nodes like [x, y, z] are tuples ([Tuple]). +// +// - !repeat [x] is a repeated tuple ([Tuple]), which is 0 or more instances of +// x. There must be exactly one element in the list. +// +// - Mapping nodes like {a: x, b: y} are defs ([Def]). Any fields not listed are +// implicitly top. +// +// - !sum [x, y, z] is a sum of its children. This can be thought of as a union +// of the values x, y, and z, or as a non-deterministic choice between x, y, and +// z. If a variable appears both inside the sum and outside of it, only the +// non-deterministic choice view really works. The unifier does not directly +// implement sums; instead, this is decoded as a fresh variable that's +// simultaneously bound to x, y, and z. +func (c *Closure) UnmarshalYAML(node *yaml.Node) error { + return c.unmarshal(node, UnmarshalOpts{}) +} + +// Unmarshal is like [UnmarshalYAML], but accepts options and reads from r. If +// opts.Path is "" and r has a Name() string method, the result of r.Name() is +// used as the path for all [Value]s read from r. +func (c *Closure) Unmarshal(r io.Reader, opts UnmarshalOpts) error { + if opts.Path == "" { + type named interface{ Name() string } + if n, ok := r.(named); ok { + opts.Path = n.Name() + } + } + + var node yaml.Node + if err := yaml.NewDecoder(r).Decode(&node); err != nil { + return err + } + np := &node + if np.Kind == yaml.DocumentNode { + np = node.Content[0] + } + return c.unmarshal(np, opts) +} + +func (c *Closure) unmarshal(node *yaml.Node, opts UnmarshalOpts) error { + dec := &yamlDecoder{opts: opts, vars: make(map[string]*ident)} + val, err := dec.value(node) + if err != nil { + return err + } + vars := make(map[*ident]*Value) + for _, id := range dec.vars { + vars[id] = topValue + } + *c = Closure{val, dec.env} + return nil +} + +type yamlDecoder struct { + opts UnmarshalOpts + + vars map[string]*ident + nSums int + + env nonDetEnv +} + +func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { + pos := &Pos{Path: dec.opts.Path, Line: node.Line} + + // Resolve alias nodes. + if node.Kind == yaml.AliasNode { + node = node.Alias + } + + mk := func(d Domain) (*Value, error) { + v := &Value{Domain: d, pos: pos} + return v, nil + } + mk2 := func(d Domain, err error) (*Value, error) { + if err != nil { + return nil, err + } + return mk(d) + } + + // is tests the kind and long tag of node. + is := func(kind yaml.Kind, tag string) bool { + return node.Kind == kind && node.LongTag() == tag + } + isExact := func() bool { + if node.Kind != yaml.ScalarNode { + return false + } + // We treat any string-ish YAML node as a string. + switch node.LongTag() { + case "!string", "tag:yaml.org,2002:int", "tag:yaml.org,2002:float", "tag:yaml.org,2002:bool", "tag:yaml.org,2002:binary": + return true + } + return false + } + + // !!str nodes provide a short-hand syntax for several leaf domains that are + // also available under explicit tags. To simplify checking below, we set + // strVal to non-"" only for !!str nodes. + strVal := "" + isStr := is(yaml.ScalarNode, "tag:yaml.org,2002:str") + if isStr { + strVal = node.Value + } + + switch { + case is(yaml.ScalarNode, "!var"): + strVal = "$" + node.Value + fallthrough + case strings.HasPrefix(strVal, "$"): + id, ok := dec.vars[strVal] + if !ok { + // We encode different idents with the same string name by adding a + // #N suffix. Strip that off so it doesn't accumulate. This isn't + // meant to be used in user-written input, though nothing stops that. + name, _, _ := strings.Cut(strVal, "#") + id = &ident{name: name} + dec.vars[strVal] = id + dec.env = dec.env.bind(id, topValue) + } + return mk(Var{id: id}) + + case strVal == "_" || is(yaml.ScalarNode, "!top"): + return mk(Top{}) + + case strVal == "_|_" || is(yaml.ScalarNode, "!bottom"): + return nil, errors.New("found bottom") + + case isExact(): + val := node.Value + if dec.opts.StringReplacer != nil { + val = dec.opts.StringReplacer(val) + } + return mk(NewStringExact(val)) + + case isStr || is(yaml.ScalarNode, "!regex"): + // Any other string we treat as a regex. This will produce an exact + // string anyway if the regex is literal. + val := node.Value + if dec.opts.StringReplacer != nil { + val = dec.opts.StringReplacer(val) + } + return mk2(NewStringRegex(val)) + + case is(yaml.SequenceNode, "!regex"): + var vals []string + if err := node.Decode(&vals); err != nil { + return nil, err + } + return mk2(NewStringRegex(vals...)) + + case is(yaml.MappingNode, "tag:yaml.org,2002:map"): + var fields []string + var vals []*Value + for i := 0; i < len(node.Content); i += 2 { + key := node.Content[i] + if key.Kind != yaml.ScalarNode { + return nil, fmt.Errorf("non-scalar key %q", key.Value) + } + val, err := dec.value(node.Content[i+1]) + if err != nil { + return nil, err + } + fields = append(fields, key.Value) + vals = append(vals, val) + } + return mk(NewDef(fields, vals)) + + case is(yaml.SequenceNode, "tag:yaml.org,2002:seq"): + elts := node.Content + vs := make([]*Value, 0, len(elts)) + for _, elt := range elts { + v, err := dec.value(elt) + if err != nil { + return nil, err + } + vs = append(vs, v) + } + return mk(NewTuple(vs...)) + + case is(yaml.SequenceNode, "!repeat") || is(yaml.SequenceNode, "!repeat-unify"): + // !repeat must have one child. !repeat-unify is used internally for + // delayed unification, and is the same, it's just allowed to have more + // than one child. + if node.LongTag() == "!repeat" && len(node.Content) != 1 { + return nil, fmt.Errorf("!repeat must have exactly one child") + } + + // Decode the children to make sure they're well-formed, but otherwise + // discard that decoding and do it again every time we need a new + // element. + var gen []func(e nonDetEnv) (*Value, nonDetEnv) + origEnv := dec.env + elts := node.Content + for i, elt := range elts { + _, err := dec.value(elt) + if err != nil { + return nil, err + } + // Undo any effects on the environment. We *do* keep any named + // variables that were added to the vars map in case they were + // introduced within the element. + dec.env = origEnv + // Add a generator function + gen = append(gen, func(e nonDetEnv) (*Value, nonDetEnv) { + dec.env = e + // TODO: If this is in a sum, this tends to generate a ton of + // fresh variables that are different on each branch of the + // parent sum. Does it make sense to hold on to the i'th value + // of the tuple after we've generated it? + v, err := dec.value(elts[i]) + if err != nil { + // It worked the first time, so this really shouldn't hapen. + panic("decoding repeat element failed") + } + return v, dec.env + }) + } + return mk(NewRepeat(gen...)) + + case is(yaml.SequenceNode, "!sum"): + vs := make([]*Value, 0, len(node.Content)) + for _, elt := range node.Content { + v, err := dec.value(elt) + if err != nil { + return nil, err + } + vs = append(vs, v) + } + if len(vs) == 1 { + return vs[0], nil + } + + // A sum is implemented as a fresh variable that's simultaneously bound + // to each of the descendants. + id := &ident{name: fmt.Sprintf("sum%d", dec.nSums)} + dec.nSums++ + dec.env = dec.env.bind(id, vs...) + return mk(Var{id: id}) + } + + return nil, fmt.Errorf("unknown node kind %d %v", node.Kind, node.Tag) +} + +type yamlEncoder struct { + idp identPrinter + e nonDetEnv // We track the environment for !repeat nodes. +} + +// TODO: Switch some Value marshaling to Closure? + +func (c Closure) MarshalYAML() (any, error) { + // TODO: If the environment is trivial, just marshal the value. + enc := &yamlEncoder{} + return enc.closure(c), nil +} + +func (c Closure) String() string { + b, err := yaml.Marshal(c) + if err != nil { + return fmt.Sprintf("marshal failed: %s", err) + } + return string(b) +} + +func (v *Value) MarshalYAML() (any, error) { + enc := &yamlEncoder{} + return enc.value(v), nil +} + +func (v *Value) String() string { + b, err := yaml.Marshal(v) + if err != nil { + return fmt.Sprintf("marshal failed: %s", err) + } + return string(b) +} + +func (enc *yamlEncoder) closure(c Closure) *yaml.Node { + enc.e = c.env + var n yaml.Node + n.Kind = yaml.MappingNode + n.Tag = "!closure" + n.Content = make([]*yaml.Node, 4) + n.Content[0] = new(yaml.Node) + n.Content[0].SetString("env") + n.Content[2] = new(yaml.Node) + n.Content[2].SetString("in") + n.Content[3] = enc.value(c.val) + // Fill in the env after we've written the value in case value encoding + // affects the env. + n.Content[1] = enc.env(enc.e) + enc.e = nonDetEnv{} // Allow GC'ing the env + return &n +} + +func (enc *yamlEncoder) env(e nonDetEnv) *yaml.Node { + var n yaml.Node + n.Kind = yaml.SequenceNode + n.Tag = "!env" + for _, term := range e.factors { + var nTerm yaml.Node + n.Content = append(n.Content, &nTerm) + nTerm.Kind = yaml.SequenceNode + for _, det := range term.terms { + var nDet yaml.Node + nTerm.Content = append(nTerm.Content, &nDet) + nDet.Kind = yaml.MappingNode + for i, val := range det.vals { + var nLabel yaml.Node + nLabel.SetString(enc.idp.unique(term.ids[i])) + nDet.Content = append(nDet.Content, &nLabel, enc.value(val)) + } + } + } + return &n +} + +var yamlIntRe = regexp.MustCompile(`^-?[0-9]+$`) + +func (enc *yamlEncoder) value(v *Value) *yaml.Node { + var n yaml.Node + switch d := v.Domain.(type) { + case nil: + // Not allowed by unmarshaler, but useful for understanding when + // something goes horribly wrong. + // + // TODO: We might be able to track useful provenance for this, which + // would really help with debugging unexpected bottoms. + n.SetString("_|_") + return &n + + case Top: + n.SetString("_") + return &n + + case Def: + n.Kind = yaml.MappingNode + for k, elt := range d.All() { + var kn yaml.Node + kn.SetString(k) + n.Content = append(n.Content, &kn, enc.value(elt)) + } + n.HeadComment = v.PosString() + return &n + + case Tuple: + n.Kind = yaml.SequenceNode + if d.repeat == nil { + for _, elt := range d.vs { + n.Content = append(n.Content, enc.value(elt)) + } + } else { + if len(d.repeat) == 1 { + n.Tag = "!repeat" + } else { + n.Tag = "!repeat-unify" + } + // TODO: I'm not positive this will round-trip everything correctly. + for _, gen := range d.repeat { + v, e := gen(enc.e) + enc.e = e + n.Content = append(n.Content, enc.value(v)) + } + } + return &n + + case String: + switch d.kind { + case stringExact: + // Make this into a "nice" !!int node if I can. + if yamlIntRe.MatchString(d.exact) { + n.SetString(d.exact) + n.Tag = "tag:yaml.org,2002:int" + return &n + } + n.SetString(regexp.QuoteMeta(d.exact)) + return &n + case stringRegex: + o := make([]string, 0, 1) + for _, re := range d.re { + s := re.String() + s = strings.TrimSuffix(strings.TrimPrefix(s, `\A(?:`), `)\z`) + o = append(o, s) + } + if len(o) == 1 { + n.SetString(o[0]) + return &n + } + n.Encode(o) + n.Tag = "!regex" + return &n + } + panic("bad String kind") + + case Var: + // TODO: If Var only appears once in the whole Value and is independent + // in the environment (part of a term that is only over Var), then emit + // this as a !sum instead. + if false { + var vs []*Value // TODO: Get values of this var. + if len(vs) == 1 { + return enc.value(vs[0]) + } + n.Kind = yaml.SequenceNode + n.Tag = "!sum" + for _, elt := range vs { + n.Content = append(n.Content, enc.value(elt)) + } + return &n + } + n.SetString(enc.idp.unique(d.id)) + if !strings.HasPrefix(d.id.name, "$") { + n.Tag = "!var" + } + return &n + } + panic(fmt.Sprintf("unknown domain type %T", v.Domain)) +} diff --git a/internal/unify/yaml_test.go b/internal/unify/yaml_test.go new file mode 100644 index 00000000..af73001d --- /dev/null +++ b/internal/unify/yaml_test.go @@ -0,0 +1,91 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unify + +import ( + "bytes" + "fmt" + "iter" + + "gopkg.in/yaml.v3" +) + +func mustParse(expr string) Closure { + var c Closure + if err := yaml.Unmarshal([]byte(expr), &c); err != nil { + panic(err) + } + return c +} + +func printYaml(val any) { + b, err := yaml.Marshal(val) + if err != nil { + panic(err) + } + var node yaml.Node + if err := yaml.Unmarshal(b, &node); err != nil { + panic(err) + } + + // Map lines to start offsets. We'll use this to figure out when nodes are + // "small" and should use inline style. + lines := []int{-1, 0} + for pos := 0; pos < len(b); { + next := bytes.IndexByte(b[pos:], '\n') + if next == -1 { + break + } + pos += next + 1 + lines = append(lines, pos) + } + lines = append(lines, len(b)) + + // Strip comments and switch small nodes to inline style + cleanYaml(&node, lines, len(b)) + + b, err = yaml.Marshal(&node) + if err != nil { + panic(err) + } + fmt.Println(string(b)) +} + +func cleanYaml(node *yaml.Node, lines []int, endPos int) { + node.HeadComment = "" + node.FootComment = "" + node.LineComment = "" + + for i, n2 := range node.Content { + end2 := endPos + if i < len(node.Content)-1 { + end2 = lines[node.Content[i+1].Line] + } + cleanYaml(n2, lines, end2) + } + + // Use inline style? + switch node.Kind { + case yaml.MappingNode, yaml.SequenceNode: + if endPos-lines[node.Line] < 40 { + node.Style = yaml.FlowStyle + } + } +} + +func allYamlNodes(n *yaml.Node) iter.Seq[*yaml.Node] { + return func(yield func(*yaml.Node) bool) { + if !yield(n) { + return + } + for _, n2 := range n.Content { + for n3 := range allYamlNodes(n2) { + if !yield(n3) { + return + } + } + } + } +} From bd330f759f85522c5d585854ef9dcc217a26b712 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sun, 20 Apr 2025 17:09:40 -0400 Subject: [PATCH 052/200] internal/simdgen: initial work on Go<->SIMD generator This can parse XED data into a unifier structure, and unify it with hand-written definitions of SIMD-to-Go mappings. Change-Id: Ie89e328845cde5752ddb3013ebfccc167e85b0bf Reviewed-on: https://go-review.googlesource.com/c/arch/+/667035 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/asm.yaml.toy | 92 +++++++++++++ internal/simdgen/categories.yaml | 7 + internal/simdgen/go.yaml | 19 +++ internal/simdgen/godefs.go | 98 ++++++++++++++ internal/simdgen/main.go | 217 ++++++++++++++++++++++++++++++ internal/simdgen/types.yaml | 41 ++++++ internal/simdgen/xed.go | 221 +++++++++++++++++++++++++++++++ 7 files changed, 695 insertions(+) create mode 100644 internal/simdgen/asm.yaml.toy create mode 100644 internal/simdgen/categories.yaml create mode 100644 internal/simdgen/go.yaml create mode 100644 internal/simdgen/godefs.go create mode 100644 internal/simdgen/main.go create mode 100644 internal/simdgen/types.yaml create mode 100644 internal/simdgen/xed.go diff --git a/internal/simdgen/asm.yaml.toy b/internal/simdgen/asm.yaml.toy new file mode 100644 index 00000000..76970868 --- /dev/null +++ b/internal/simdgen/asm.yaml.toy @@ -0,0 +1,92 @@ +# Hand-written toy input like -xedPath would generate. +# This input can be substituted for -xedPath. +!sum +- asm: ADDPS + goarch: amd64 + feature: "SSE2" + in: + - asmPos: 0 + base: float + bits: 32 + w: 128 + - asmPos: 1 + base: float + bits: 32 + w: 128 + out: + - asmPos: 0 + base: float + bits: 32 + w: 128 + +- asm: ADDPD + goarch: amd64 + feature: "SSE2" + in: + - asmPos: 0 + base: float + bits: 64 + w: 128 + - asmPos: 1 + base: float + bits: 64 + w: 128 + out: + - asmPos: 0 + base: float + bits: 64 + w: 128 + +- asm: PADDB + goarch: amd64 + feature: "SSE2" + in: + - asmPos: 0 + base: int|uint + bits: 32 + w: 128 + - asmPos: 1 + base: int|uint + bits: 32 + w: 128 + out: + - asmPos: 0 + base: int|uint + bits: 32 + w: 128 + +- asm: VPADDB + goarch: amd64 + feature: "AVX" + in: + - asmPos: 1 + base: int|uint + bits: 8 + w: 128 + - asmPos: 2 + base: int|uint + bits: 8 + w: 128 + out: + - asmPos: 0 + base: int|uint + bits: 8 + w: 128 + +- asm: VPADDB + goarch: amd64 + feature: "AVX2" + in: + - asmPos: 1 + base: int|uint + bits: 8 + w: 256 + - asmPos: 2 + base: int|uint + bits: 8 + w: 256 + out: + - asmPos: 0 + base: int|uint + bits: 8 + w: 256 diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml new file mode 100644 index 00000000..b7be71ce --- /dev/null +++ b/internal/simdgen/categories.yaml @@ -0,0 +1,7 @@ +!sum +- go: Add + category: binary +- go: AddSaturated + category: binary +- go: Sub + category: binary diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml new file mode 100644 index 00000000..efa51303 --- /dev/null +++ b/internal/simdgen/go.yaml @@ -0,0 +1,19 @@ +!sum +# For binary operations, we constrain their two inputs and one output to the +# same Go type using a variable. +- go: Add + asm: "V?PADD$xi|V?ADDP$xf" + in: + - go: $t + - go: $t + out: + - go: $t + +- go: Sub + goarch: amd64 + asm: "V?PSUB$xi|V?SUBP$xf" + in: + - go: $t + - go: $t + out: + - go: $t diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go new file mode 100644 index 00000000..6a6ff4f5 --- /dev/null +++ b/internal/simdgen/godefs.go @@ -0,0 +1,98 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "io" + "log" + + "golang.org/x/arch/internal/unify" +) + +type Operation struct { + Go string // Go method name + Category *string // General operation category (optional) + + GoArch string // GOARCH for this definition + Asm string // Assembly mnemonic + + In []Operand // Arguments + Out []Operand // Results +} + +type Operand struct { + Go string // Go type of this operand + AsmPos int // Position of this operand in the assembly instruction + + Base string // Base Go type ("int", "uint", "float") + Bits int // Element bit width + W int // Total vector bit width +} + +func writeGoDefs(w io.Writer, cl unify.Closure) { + // TODO: Merge operations with the same signature but multiple + // implementations (e.g., SSE vs AVX) + + // TODO: This code is embarrassing, but I'm very tired. + + var op Operation + for def := range cl.All() { + if !def.Exact() { + continue + } + if err := def.Decode(&op); err != nil { + log.Println(err.Error()) + continue + } + + fmt.Fprintf(w, "func (x %s) %s(", op.In[0].Go, op.Go) + for i, arg := range op.In[1:] { + if i > 0 { + fmt.Fprint(w, ", ") + } + fmt.Fprintf(w, "%c %s", 'y'+i, arg.Go) + } + fmt.Fprintf(w, ") (") + for i, res := range op.Out { + if i > 0 { + fmt.Fprint(w, ", ") + } + fmt.Fprintf(w, "%c %s", 'o'+i, res.Go) + } + fmt.Fprintf(w, ") {\n") + + asmPosToArg := make(map[int]byte) + asmPosToRes := make(map[int]byte) + for i, arg := range op.In { + asmPosToArg[arg.AsmPos] = 'x' + byte(i) + } + for i, res := range op.Out { + asmPosToRes[res.AsmPos] = 'o' + byte(i) + } + fmt.Fprintf(w, "\t// %s", op.Asm) + for i := 0; ; i++ { + arg, okArg := asmPosToArg[i] + if okArg { + fmt.Fprintf(w, " %c", arg) + } + res, okRes := asmPosToRes[i] + if okRes { + if okArg { + fmt.Fprintf(w, "/") + } else { + fmt.Fprintf(w, " ") + } + fmt.Fprintf(w, "%c", res) + } + if !okArg && !okRes { + break + } + } + fmt.Fprintf(w, "\n") + + fmt.Fprintf(w, "}\n") + } +} diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go new file mode 100644 index 00000000..91aa07ce --- /dev/null +++ b/internal/simdgen/main.go @@ -0,0 +1,217 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// simdgen is an experiment in generating Go <-> asm SIMD mappings. +// +// Usage: simdgen [-xedPath=path] [-q=query] input.yaml... +// +// If -xedPath is provided, one of the inputs is a sum of op-code definitions +// generated from the Intel XED data at path. +// +// If input YAML files are provided, each file is read as an input value. See +// [unify.Closure.UnmarshalYAML] or "go doc unify.Closure.UnmarshalYAML" for the +// format of these files. +// +// TODO: Example definitions and values. +// +// The command unifies across all of the inputs and prints all possible results +// of this unification. +// +// If the -q flag is provided, its string value is parsed as a value and treated +// as another input to unification. This is intended as a way to "query" the +// result, typically by narrowing it down to a small subset of results. +// +// Typical usage: +// +// go run . -xedPath $XEDPATH *.yaml +// +// To see just the definitions generated from XED, run: +// +// go run . -xedPath $XEDPATH +// +// (This works because if there's only one input, there's nothing to unify it +// with, so the result is simply itself.) +// +// To see just the definitions for VPADDQ: +// +// go run . -xedPath $XEDPATH -q '{asm: VPADDQ}' +package main + +// Big TODOs: +// +// - This can produce duplicates, which can also lead to less efficient +// environment merging. Add hashing and use it for deduplication. Be careful +// about how this shows up in debug traces, since it could make things +// confusing if we don't show it happening. +// +// - Do I need Closure, Value, and Domain? It feels like I should only need two +// types. + +import ( + "cmp" + "flag" + "fmt" + "log" + "maps" + "os" + "slices" + "strings" + + "golang.org/x/arch/internal/unify" + "gopkg.in/yaml.v3" +) + +var ( + xedPath = flag.String("xedPath", "", "load XED datafiles from `path`") + flagQ = flag.String("q", "", "query: read `def` as another input (skips final validation)") + flagO = flag.String("o", "yaml", "output type: yaml, godefs") + + flagDebugXED = flag.Bool("debug-xed", false, "show XED instructions") + flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace") + flagDebugHTML = flag.String("debug-html", "", "write unification trace to `file.html`") +) + +var yamlSubs = strings.NewReplacer( + "$xi", "[BWDQ]", // x86 integer suffixes + "$xf", "[SD]", // x86 float suffixes +) + +func main() { + flag.Parse() + + var inputs []unify.Closure + + // Load XED into a defs set. + if *xedPath != "" { + xedDefs := loadXED(*xedPath) + inputs = append(inputs, unify.NewSum(xedDefs...)) + } + + // Load query. + if *flagQ != "" { + r := strings.NewReader(*flagQ) + var def unify.Closure + if err := def.Unmarshal(r, unify.UnmarshalOpts{Path: "", StringReplacer: yamlSubs.Replace}); err != nil { + log.Fatalf("parsing -q: %s", err) + } + inputs = append(inputs, def) + } + + // Load defs files. + must := make(map[*unify.Value]struct{}) + for _, path := range flag.Args() { + defs, err := loadValue(path) + if err != nil { + log.Fatal(err) + } + inputs = append(inputs, defs) + + if path == "go.yaml" { + // These must all be used in the final result + for def := range defs.Summands() { + must[def] = struct{}{} + } + } + } + + // Prepare for unification + if *flagDebugUnify { + unify.Debug.UnifyLog = os.Stderr + } + if *flagDebugHTML != "" { + f, err := os.Create(*flagDebugHTML) + if err != nil { + log.Fatal(err) + } + unify.Debug.HTML = f + defer f.Close() + } + + // Unify! + unified, err := unify.Unify(inputs...) + if err != nil { + log.Fatal(err) + } + + // Print results. + switch *flagO { + case "yaml": + // Produce a result that looks like encoding a slice, but stream it. + var val1 [1]*unify.Value + for val := range unified.All() { + val1[0] = val + // We have to make a new encoder each time or it'll print a document + // separator between each object. + enc := yaml.NewEncoder(os.Stdout) + if err := enc.Encode(val1); err != nil { + log.Fatal(err) + } + enc.Close() + } + case "godefs": + writeGoDefs(os.Stdout, unified) + } + + // Validate results. + // + // Don't validate if this is a command-line query because that tends to + // eliminate lots of required defs and is used in cases where maybe defs + // aren't enumerable anyway. + if *flagQ == "" && len(must) > 0 { + validate(unified, must) + } +} + +func loadValue(path string) (unify.Closure, error) { + f, err := os.Open(path) + if err != nil { + return unify.Closure{}, err + } + defer f.Close() + + var c unify.Closure + if err := c.Unmarshal(f, unify.UnmarshalOpts{StringReplacer: yamlSubs.Replace}); err != nil { + return unify.Closure{}, fmt.Errorf("%s: %v", path, err) + } + return c, nil +} + +func validate(cl unify.Closure, required map[*unify.Value]struct{}) { + // Validate that: + // 1. All final defs are exact + // 2. All required defs are used + for def := range cl.All() { + if _, ok := def.Domain.(unify.Def); !ok { + fmt.Fprintf(os.Stderr, "%s: expected Def, got %T\n", def.PosString(), def.Domain) + continue + } + + if !def.Exact() { + fmt.Fprintf(os.Stderr, "%s: def not reduced to an exact value:\n", def.PosString()) + fmt.Fprintf(os.Stderr, "\t%s\n", strings.ReplaceAll(def.String(), "\n", "\n\t")) + } + + for root := range def.Provenance() { + delete(required, root) + } + } + // Report unused defs + unused := slices.SortedFunc(maps.Keys(required), + func(a, b *unify.Value) int { + return cmp.Or( + cmp.Compare(a.Pos().Path, b.Pos().Path), + cmp.Compare(a.Pos().Line, b.Pos().Line), + ) + }) + for _, def := range unused { + // TODO: Can we say anything more actionable? This is always a problem + // with unification: if it fails, it's very hard to point a finger at + // any particular reason. We could go back and try unifying this again + // with each subset of the inputs (starting with individual inputs) to + // at least say "it doesn't unify with anything in x.yaml". That's a lot + // of work, but if we have trouble debugging unification failure it may + // be worth it. + fmt.Fprintf(os.Stderr, "%s: def required, but did not unify\n", def.PosString()) + } +} diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml new file mode 100644 index 00000000..9397888c --- /dev/null +++ b/internal/simdgen/types.yaml @@ -0,0 +1,41 @@ +# This file defines the possible types of each operand and result. +# +# In general, we're able to narrow this down on some attributes directly from +# the machine instruction descriptions, but the Go mappings need to further +# constrain them and how they relate. For example, on x86 we can't distinguish +# int and uint, though we can distinguish these from float. + +in: !repeat +- !sum &types + - {go: Int8x16, base: "int", bits: 8, w: 128} + - {go: Uint8x16, base: "uint", bits: 8, w: 128} + - {go: Int16x8, base: "int", bits: 16, w: 128} + - {go: Uint16x8, base: "uint", bits: 16, w: 128} + - {go: Int32x4, base: "int", bits: 32, w: 128} + - {go: Uint32x4, base: "uint", bits: 32, w: 128} + - {go: Int64x2, base: "int", bits: 64, w: 128} + - {go: Uint64x2, base: "uint", bits: 64, w: 128} + - {go: Float32x4, base: "float", bits: 32, w: 128} + - {go: Float64x2, base: "float", bits: 64, w: 128} + - {go: Int8x32, base: "int", bits: 8, w: 256} + - {go: Uint8x32, base: "uint", bits: 8, w: 256} + - {go: Int16x16, base: "int", bits: 16, w: 256} + - {go: Uint16x16, base: "uint", bits: 16, w: 256} + - {go: Int32x8, base: "int", bits: 32, w: 256} + - {go: Uint32x8, base: "uint", bits: 32, w: 256} + - {go: Int64x4, base: "int", bits: 64, w: 256} + - {go: Uint64x4, base: "uint", bits: 64, w: 256} + - {go: Float32x8, base: "float", bits: 32, w: 256} + - {go: Float64x4, base: "float", bits: 64, w: 256} + - {go: Int8x64, base: "int", bits: 8, w: 512} + - {go: Uint8x64, base: "uint", bits: 8, w: 512} + - {go: Int16x32, base: "int", bits: 16, w: 512} + - {go: Uint16x32, base: "uint", bits: 16, w: 512} + - {go: Int32x16, base: "int", bits: 32, w: 512} + - {go: Uint32x16, base: "uint", bits: 32, w: 512} + - {go: Int64x8, base: "int", bits: 64, w: 512} + - {go: Uint64x8, base: "uint", bits: 64, w: 512} + - {go: Float32x16, base: "float", bits: 32, w: 512} + - {go: Float64x8, base: "float", bits: 64, w: 512} +out: !repeat +- *types diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go new file mode 100644 index 00000000..e500d713 --- /dev/null +++ b/internal/simdgen/xed.go @@ -0,0 +1,221 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "log" + "regexp" + "strconv" + "strings" + + "golang.org/x/arch/internal/unify" + "golang.org/x/arch/x86/xeddata" + "gopkg.in/yaml.v3" +) + +// TODO: Doc. Returns Values with Def domains. +func loadXED(xedPath string) []*unify.Value { + // TODO: Obviously a bunch more to do here. + + db, err := xeddata.NewDatabase(xedPath) + if err != nil { + log.Fatalf("open database: %v", err) + } + + var defs []*unify.Value + err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) { + inst.Pattern = xeddata.ExpandStates(db, inst.Pattern) + + switch { + case inst.RealOpcode == "N": + return // Skip unstable instructions + case !(strings.HasPrefix(inst.Extension, "SSE") || strings.HasPrefix(inst.Extension, "AVX")): + // We're only intested in SSE and AVX instuctions. + return // Skip non-AVX or SSE instructions + } + + if *flagDebugXED { + fmt.Printf("%s:\n%+v\n", inst.Pos, inst) + } + + ins, outs := decodeOperands(db, strings.Fields(inst.Operands)) + // TODO: "feature" + fields := []string{"goarch", "asm", "in", "out"} + values := []*unify.Value{ + unify.NewValue(unify.NewStringExact("amd64")), + unify.NewValue(unify.NewStringExact(inst.Opcode())), + unify.NewValue(ins), + unify.NewValue(outs), + } + pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line} + defs = append(defs, unify.NewValuePos(unify.NewDef(fields, values), pos)) + if *flagDebugXED { + y, _ := yaml.Marshal(defs[len(defs)-1]) + fmt.Printf("==>\n%s\n", y) + } + }) + if err != nil { + log.Fatalf("walk insts: %v", err) + } + return defs +} + +func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tuple) { + var inVals, outVals []*unify.Value + for asmPos, o := range operands { + op, err := xeddata.NewOperand(db, o) + if err != nil { + log.Fatalf("parsing operand %q: %v", o, err) + } + if *flagDebugXED { + fmt.Printf(" %+v\n", op) + } + + // TODO: We should have a fixed set of fields once this gets more cleaned up. + var fields []string + var values []*unify.Value + add := func(f string, v *unify.Value) { + fields = append(fields, f) + values = append(values, v) + } + + add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos)))) + + var r, w bool + switch op.Action { + case "r": + r = true + case "w": + w = true + case "rw": + r, w = true, true + default: + continue + } + + lhs := op.NameLHS() + if strings.HasPrefix(lhs, "MEM") { + add("mem", unify.NewValue(unify.NewStringExact("true"))) + add("w", unify.NewValue(unify.NewStringExact("TODO"))) + add("base", unify.NewValue(unify.NewStringExact("TODO"))) + } else if strings.HasPrefix(lhs, "REG") { + if op.Width == "mskw" { + add("mask", unify.NewValue(unify.NewStringExact("true"))) + add("w", unify.NewValue(unify.NewStringExact("TODO"))) + add("base", unify.NewValue(unify.NewStringExact("TODO"))) + } else { + width, ok := decodeReg(op) + if !ok { + return + } + baseRe, bits, ok := decodeBits(op) + if !ok { + return + } + baseDomain, err := unify.NewStringRegex(baseRe) + if err != nil { + panic("parsing baseRe: " + err.Error()) + } + add("bits", unify.NewValue(unify.NewStringExact(fmt.Sprint(bits)))) + add("w", unify.NewValue(unify.NewStringExact(fmt.Sprint(width)))) + add("base", unify.NewValue(baseDomain)) + } + } else { + // TODO: Immediates + add("UNKNOWN", unify.NewValue(unify.NewStringExact(o))) + } + // dq => 128 bits (XMM) + // qq => 256 bits (YMM) + // mskw => K + // z[iuf?](8|16|32|...) => 512 bits (ZMM) + // + // Are these always XMM/YMM/ZMM or can other irregular things + // with large widths use these same codes? + // + // The only zi* is zi32. I don't understand the difference between + // zi32 and zu32 or why there are a bunch of zu* but only one zi. + // + // The xtype tells you the element type. i8, i16, i32, i64, etc. + // + // Things like AVX2 VPAND have an xtype of u256. + // I think we have to map that to all widths. + // There's no u512 (presumably those are all masked, so elem width matters). + // These are all Category: LOGICAL. Maybe we use that info? + + if r { + inVal := unify.NewValue(unify.NewDef(fields, values)) + inVals = append(inVals, inVal) + } + if w { + outVal := unify.NewValue(unify.NewDef(fields, values)) + outVals = append(outVals, outVal) + } + } + + return unify.NewTuple(inVals...), unify.NewTuple(outVals...) +} + +func decodeReg(op *xeddata.Operand) (w int, ok bool) { + if !strings.HasPrefix(op.NameLHS(), "REG") { + return 0, false + } + // TODO: We shouldn't be relying on the macro naming conventions. We should + // use all-dec-patterns.txt, but xeddata doesn't support that table right now. + rhs := op.NameRHS() + if !strings.HasSuffix(rhs, "()") { + return 0, false + } + switch { + case strings.HasPrefix(rhs, "XMM_"): + return 128, true + case strings.HasPrefix(rhs, "YMM_"): + return 256, true + case strings.HasPrefix(rhs, "ZMM_"): + return 512, true + } + return 0, false +} + +var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`) + +func decodeBits(op *xeddata.Operand) (baseRe string, bits int, ok bool) { + // Handle some weird ones. + switch op.Xtype { + // 8-bit float formats as defined by Open Compute Project "OCP 8-bit + // Floating Point Specification (OFP8)". + case "bf8", // E5M2 float + "hf8": // E4M3 float + return "", 0, false // TODO + case "bf16": // bfloat16 float + return "", 0, false // TODO + case "2f16": + // Complex consisting of 2 float16s. Doesn't exist in Go, but we can say + // what it would be. + return "complex", 32, true + case "2i8", "2I8": + // These just use the lower INT8 in each 16 bit field. + // As far as I can tell, "2I8" is a typo. + return "int", 8, true + } + + // The rest follow a simple pattern. + m := xtypeRe.FindStringSubmatch(op.Xtype) + if m == nil { + // TODO: Report unrecognized xtype + return "", 0, false + } + bits, _ = strconv.Atoi(m[2]) + switch m[1] { + case "i", "u": + // XED is rather inconsistent about what's signed, unsigned, or doesn't + // matter, so merge them together and let the Go definitions narrow as + // appropriate. Maybe there's a better way to do this. + baseRe = "int|uint" + case "f": + baseRe = "float" + } + return baseRe, bits, true +} From ca84b2ccdbe04587d9f0bddda2d3b22e4ee9f31b Mon Sep 17 00:00:00 2001 From: Koichi Shiraishi Date: Sat, 26 Apr 2025 04:48:13 +0900 Subject: [PATCH 053/200] go.mod: format require section Change-Id: Ia54ecb1cd5f05c2502acd13f1cf3db5f66070d15 Reviewed-on: https://go-review.googlesource.com/c/arch/+/668335 Reviewed-by: Carlos Amedee LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Reviewed-by: Sean Liao --- go.mod | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 72642f75..bcca36b6 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,7 @@ module golang.org/x/arch go 1.23.0 -require rsc.io/pdf v0.1.1 - -require gopkg.in/yaml.v3 v3.0.1 +require ( + gopkg.in/yaml.v3 v3.0.1 + rsc.io/pdf v0.1.1 +) From 097aeb1e7accfa1038f6f6b6f31a1a6493e63d99 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 19 May 2025 20:09:00 -0400 Subject: [PATCH 054/200] internal/ximdgen: drop $xi/$xf shorthands These were string shorthands for x86 integer and float suffixes, respectively, but in practice they were easy to confuse with unification variables (also $) and just required knowing more stuff in order to understand the unification rules. Drop them and just spell it out. Change-Id: I38a0446e428a92c9e89be4e90d1beff16e48e714 Reviewed-on: https://go-review.googlesource.com/c/arch/+/674177 Auto-Submit: Austin Clements Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/go.yaml | 4 ++-- internal/simdgen/main.go | 9 ++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index efa51303..a0665947 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -2,7 +2,7 @@ # For binary operations, we constrain their two inputs and one output to the # same Go type using a variable. - go: Add - asm: "V?PADD$xi|V?ADDP$xf" + asm: "V?PADD[BWDQ]|V?ADDP[SD]" in: - go: $t - go: $t @@ -11,7 +11,7 @@ - go: Sub goarch: amd64 - asm: "V?PSUB$xi|V?SUBP$xf" + asm: "V?PSUB[BWDQ]|V?SUBP[SD]" in: - go: $t - go: $t diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index 91aa07ce..82c31c06 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -72,11 +72,6 @@ var ( flagDebugHTML = flag.String("debug-html", "", "write unification trace to `file.html`") ) -var yamlSubs = strings.NewReplacer( - "$xi", "[BWDQ]", // x86 integer suffixes - "$xf", "[SD]", // x86 float suffixes -) - func main() { flag.Parse() @@ -92,7 +87,7 @@ func main() { if *flagQ != "" { r := strings.NewReader(*flagQ) var def unify.Closure - if err := def.Unmarshal(r, unify.UnmarshalOpts{Path: "", StringReplacer: yamlSubs.Replace}); err != nil { + if err := def.Unmarshal(r, unify.UnmarshalOpts{Path: ""}); err != nil { log.Fatalf("parsing -q: %s", err) } inputs = append(inputs, def) @@ -171,7 +166,7 @@ func loadValue(path string) (unify.Closure, error) { defer f.Close() var c unify.Closure - if err := c.Unmarshal(f, unify.UnmarshalOpts{StringReplacer: yamlSubs.Replace}); err != nil { + if err := c.Unmarshal(f, unify.UnmarshalOpts{}); err != nil { return unify.Closure{}, fmt.Errorf("%s: %v", path, err) } return c, nil From 4fbd317a2cd482e27515e5ae3e271b2a10918c13 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 22 Apr 2025 20:33:28 -0400 Subject: [PATCH 055/200] internal/simdgen: support masks and immediates This significantly reworks the XED loader and tweaks the operand representation to support more than just vector registers. In particular, each operand now has a "class" string that determines the meaning of several other fields. We add AVX-512 == and < to demonstrate both masks and immediates. Change-Id: I6d025dbcb66e5914472b60697b3a7e4cc6174d78 Reviewed-on: https://go-review.googlesource.com/c/arch/+/667435 LUCI-TryBot-Result: Go LUCI Auto-Submit: Austin Clements Reviewed-by: Junyang Shao --- internal/simdgen/asm.yaml.toy | 75 +++--- internal/simdgen/go.yaml | 204 ++++++++++++++++ internal/simdgen/godefs.go | 87 +++++-- internal/simdgen/types.yaml | 73 +++--- internal/simdgen/xed.go | 421 +++++++++++++++++++++++++++------- x86/xeddata/operand.go | 4 +- 6 files changed, 691 insertions(+), 173 deletions(-) diff --git a/internal/simdgen/asm.yaml.toy b/internal/simdgen/asm.yaml.toy index 76970868..7885c776 100644 --- a/internal/simdgen/asm.yaml.toy +++ b/internal/simdgen/asm.yaml.toy @@ -6,87 +6,102 @@ feature: "SSE2" in: - asmPos: 0 + class: vreg base: float - bits: 32 - w: 128 + elemBits: 32 + bits: 128 - asmPos: 1 + class: vreg base: float - bits: 32 - w: 128 + elemBits: 32 + bits: 128 out: - asmPos: 0 + class: vreg base: float - bits: 32 - w: 128 + elemBits: 32 + bits: 128 - asm: ADDPD goarch: amd64 feature: "SSE2" in: - asmPos: 0 + class: vreg base: float - bits: 64 - w: 128 + elemBits: 64 + bits: 128 - asmPos: 1 + class: vreg base: float - bits: 64 - w: 128 + elemBits: 64 + bits: 128 out: - asmPos: 0 + class: vreg base: float - bits: 64 - w: 128 + elemBits: 64 + bits: 128 - asm: PADDB goarch: amd64 feature: "SSE2" in: - asmPos: 0 + class: vreg base: int|uint - bits: 32 - w: 128 + elemBits: 32 + bits: 128 - asmPos: 1 + class: vreg base: int|uint - bits: 32 - w: 128 + elemBits: 32 + bits: 128 out: - asmPos: 0 + class: vreg base: int|uint - bits: 32 - w: 128 + elemBits: 32 + bits: 128 - asm: VPADDB goarch: amd64 feature: "AVX" in: - asmPos: 1 + class: vreg base: int|uint - bits: 8 - w: 128 + elemBits: 8 + bits: 128 - asmPos: 2 + class: vreg base: int|uint - bits: 8 - w: 128 + elemBits: 8 + bits: 128 out: - asmPos: 0 + class: vreg base: int|uint - bits: 8 - w: 128 + elemBits: 8 + bits: 128 - asm: VPADDB goarch: amd64 feature: "AVX2" in: - asmPos: 1 + class: vreg base: int|uint - bits: 8 - w: 256 + elemBits: 8 + bits: 256 - asmPos: 2 + class: vreg base: int|uint - bits: 8 - w: 256 + elemBits: 8 + bits: 256 out: - asmPos: 0 + class: vreg base: int|uint - bits: 8 - w: 256 + elemBits: 8 + bits: 256 diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index a0665947..6a6ca1eb 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -17,3 +17,207 @@ - go: $t out: - go: $t + +# +# AVX-512 Comparisons +# + +# TODO(austin): I'm not happy with how much copy-pasting this requires. We could +# do a functional abstraction, but that feels bolted on. Unification is *almost* +# capable of collapsing all of this. +# +# One thing that might work is having a !let node that lets you extend the +# nonDetEnv with explicit values: +# +# !let +# - {$go: Equal, $imm: 0, $mask: K0} +# - {$go: EqualMasked, $imm: 0, $mask: _} +# - {$go: Less, $imm: 1, $mask: K0} +# - {$go: LessMasked, $imm: 1, $mask: _} +# - !let +# - {$asm: "VPCMP[BWDQ]", $base: int} +# - {$asm: "VPCMPU[BWDQ]", $base: uint} +# - go: $go +# asm: $asm +# in: +# - const: $mask +# - base: $base +# go: $t +# - base: $base +# go: $t +# - class: immediate +# const: $imm +# out: +# - class: mask +# +# That's not bad, but it's very hierachical. CUE has a "mixin" approach to this. +# +# - !unify +# # All AVX-512 comparisons have the same basic operand shape +# - {in: [_, {go: $t}, {go: $t}, _], out: [{class: mask}]} +# # There are signed and unsigned variants +# - !sum +# - {asm: "VPCMP[BWDQ]", in: [_, {base: int}, {base: int}, _]} +# - {asm: "VPCMPU[BWDQ]", in: [_, {base: uint}, {base: uint}, _]} +# # Finally, list out the operations. +# - !let +# - $equal: {in: [_, _, _, {class: immedate, const: 0}]} +# $less: {in: [_, _, _, {class: immedate, const: 1}]} +# $masked: _ +# $unmasked: {in: [const: K0, _, _, _]} +# - !sum +# - !unify [go: Equal, $equal, $unmasked] +# - !unify [go: EqualMasked, $equal, $masked] +# - !unify [go: Less, $less, $unmasked] +# - !unify [go: LessMasked, $less, $masked] +# +# Maybe !let is just a feature of !sum that introduces an environment factor for +# all following branches? That would let me do the above in-line with the big +# top-level !sum: +# +# - !sum +# ... +# - !let # Adds a factor that is the sum of the following terms: +# - {$go: Equal, $imm: 0, $mask: K0} +# - {$go: EqualMasked, $imm: 0, $mask: _} +# - {$go: Less, $imm: 1, $mask: K0} +# - {$go: LessMasked, $imm: 1, $mask: _} +# - !let # Adds another factor: +# - {$asm: "VPCMP[BWDQ]", $base: int} +# - {$asm: "VPCMPU[BWDQ]", $base: uint} +# - go: $go +# asm: $asm +# in: +# - const: $mask +# - base: $base +# go: $t +# - base: $base +# go: $t +# - class: immediate +# const: $imm +# out: +# - class: mask +# +# I may need to choose names more carefully in that case. This is a general +# problem with names being file-global. (This is less of a problem with the +# mixin style because those names tend to be more specific anyway.) Or maybe it +# makes sense for each !let to introduce fresh idents, even if the string names +# are the same? + +- go: Equal + goarch: amd64 + asm: "VPCMP[BWDQ]" # Signed comparison + in: + - const: K0 + - base: int + go: $t + - base: int + go: $t + - class: immediate + const: 0 + out: + - class: mask + +- go: EqualMasked + goarch: amd64 + asm: "VPCMP[BWDQ]" # Signed comparison + in: + - _ + - base: int + go: $t + - base: int + go: $t + - class: immediate + const: 0 + out: + - class: mask + +- go: Equal + goarch: amd64 + asm: "VPCMPU[BWDQ]" # Unsigned comparison + in: + - const: K0 + - base: uint + go: $t + - base: uint + go: $t + - class: immediate + const: 0 + out: + - class: mask + +- go: EqualMasked + goarch: amd64 + asm: "VPCMPU[BWDQ]" # Unsigned comparison + in: + - _ + - base: uint + go: $t + - base: uint + go: $t + - class: immediate + const: 0 + out: + - class: mask + +- go: Less + goarch: amd64 + asm: "VPCMP[BWDQ]" # Signed comparison + in: + - const: K0 + - base: int + go: $t + - base: int + go: $t + - class: immediate + const: 1 + out: + - class: mask + +- go: LessMasked + goarch: amd64 + asm: "VPCMP[BWDQ]" # Signed comparison + in: + - _ + - base: int + go: $t + - base: int + go: $t + - class: immediate + const: 1 + out: + - class: mask + +- go: Less + goarch: amd64 + asm: "VPCMPU[BWDQ]" # Unsigned comparison + in: + - const: K0 + - base: uint + go: $t + - base: uint + go: $t + - class: immediate + const: 1 + out: + - class: mask + +- go: LessMasked + goarch: amd64 + asm: "VPCMPU[BWDQ]" # Unsigned comparison + in: + - _ + - base: uint + go: $t + - base: uint + go: $t + - class: immediate + const: 1 + out: + - class: mask + +# TODO: +# 2: OP := LE; +# 4: OP := NEQ; +# 5: OP := NLT; +# 6: OP := NLE; diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 6a6ff4f5..037c11fa 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "log" + "slices" "golang.org/x/arch/internal/unify" ) @@ -24,14 +25,31 @@ type Operation struct { } type Operand struct { - Go string // Go type of this operand - AsmPos int // Position of this operand in the assembly instruction + Class string - Base string // Base Go type ("int", "uint", "float") - Bits int // Element bit width - W int // Total vector bit width + Go *string // Go type of this operand + AsmPos int // Position of this operand in the assembly instruction + + Base *string // Base Go type ("int", "uint", "float") + ElemBits *int // Element bit width + Bits int // Total vector bit width + + Const *string // Optional constant value +} + +func (o Operand) Compare(p Operand) int { + // Put mask operands after others + if o.Class != "mask" && p.Class == "mask" { + return -1 + } + if o.Class == "mask" && p.Class != "mask" { + return 1 + } + return 0 } +var argNames = []string{"x", "y", "z", "w"} + func writeGoDefs(w io.Writer, cl unify.Closure) { // TODO: Merge operations with the same signature but multiple // implementations (e.g., SSE vs AVX) @@ -45,39 +63,64 @@ func writeGoDefs(w io.Writer, cl unify.Closure) { } if err := def.Decode(&op); err != nil { log.Println(err.Error()) + log.Println(def) continue } - fmt.Fprintf(w, "func (x %s) %s(", op.In[0].Go, op.Go) - for i, arg := range op.In[1:] { - if i > 0 { - fmt.Fprint(w, ", ") + in := slices.Clone(op.In) + slices.SortStableFunc(in, Operand.Compare) + out := slices.Clone(op.Out) + slices.SortStableFunc(out, Operand.Compare) + + type argExtra struct { + *Operand + varName string + } + asmPosToArg := make(map[int]argExtra) + asmPosToRes := make(map[int]argExtra) + argNames := argNames + + fmt.Fprintf(w, "func (%s %s) %s(", argNames[0], *in[0].Go, op.Go) + asmPosToArg[in[0].AsmPos] = argExtra{&in[0], argNames[0]} + argNames = argNames[1:] + i := 0 + for _, arg := range in[1:] { + varName := "" + + // Drop operands with constant values + if arg.Const == nil { + if i > 0 { + fmt.Fprint(w, ", ") + } + i++ + varName = argNames[0] + fmt.Fprintf(w, "%s %s", varName, *arg.Go) + argNames = argNames[1:] } - fmt.Fprintf(w, "%c %s", 'y'+i, arg.Go) + asmPosToArg[arg.AsmPos] = argExtra{&arg, varName} } fmt.Fprintf(w, ") (") - for i, res := range op.Out { + for i, res := range out { if i > 0 { fmt.Fprint(w, ", ") } - fmt.Fprintf(w, "%c %s", 'o'+i, res.Go) + varName := string('o' + byte(i)) + fmt.Fprintf(w, "%s %s", varName, *res.Go) + asmPosToRes[res.AsmPos] = argExtra{&res, varName} } fmt.Fprintf(w, ") {\n") - asmPosToArg := make(map[int]byte) - asmPosToRes := make(map[int]byte) - for i, arg := range op.In { - asmPosToArg[arg.AsmPos] = 'x' + byte(i) - } - for i, res := range op.Out { - asmPosToRes[res.AsmPos] = 'o' + byte(i) - } fmt.Fprintf(w, "\t// %s", op.Asm) for i := 0; ; i++ { arg, okArg := asmPosToArg[i] if okArg { - fmt.Fprintf(w, " %c", arg) + if arg.Const != nil { + fmt.Fprintf(w, " %s", *arg.Const) + } else { + fmt.Fprintf(w, " %s", arg.varName) + } } + res, okRes := asmPosToRes[i] if okRes { if okArg { @@ -85,7 +128,7 @@ func writeGoDefs(w io.Writer, cl unify.Closure) { } else { fmt.Fprintf(w, " ") } - fmt.Fprintf(w, "%c", res) + fmt.Fprintf(w, "%s", res.varName) } if !okArg && !okRes { break diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml index 9397888c..9092224e 100644 --- a/internal/simdgen/types.yaml +++ b/internal/simdgen/types.yaml @@ -7,35 +7,48 @@ in: !repeat - !sum &types - - {go: Int8x16, base: "int", bits: 8, w: 128} - - {go: Uint8x16, base: "uint", bits: 8, w: 128} - - {go: Int16x8, base: "int", bits: 16, w: 128} - - {go: Uint16x8, base: "uint", bits: 16, w: 128} - - {go: Int32x4, base: "int", bits: 32, w: 128} - - {go: Uint32x4, base: "uint", bits: 32, w: 128} - - {go: Int64x2, base: "int", bits: 64, w: 128} - - {go: Uint64x2, base: "uint", bits: 64, w: 128} - - {go: Float32x4, base: "float", bits: 32, w: 128} - - {go: Float64x2, base: "float", bits: 64, w: 128} - - {go: Int8x32, base: "int", bits: 8, w: 256} - - {go: Uint8x32, base: "uint", bits: 8, w: 256} - - {go: Int16x16, base: "int", bits: 16, w: 256} - - {go: Uint16x16, base: "uint", bits: 16, w: 256} - - {go: Int32x8, base: "int", bits: 32, w: 256} - - {go: Uint32x8, base: "uint", bits: 32, w: 256} - - {go: Int64x4, base: "int", bits: 64, w: 256} - - {go: Uint64x4, base: "uint", bits: 64, w: 256} - - {go: Float32x8, base: "float", bits: 32, w: 256} - - {go: Float64x4, base: "float", bits: 64, w: 256} - - {go: Int8x64, base: "int", bits: 8, w: 512} - - {go: Uint8x64, base: "uint", bits: 8, w: 512} - - {go: Int16x32, base: "int", bits: 16, w: 512} - - {go: Uint16x32, base: "uint", bits: 16, w: 512} - - {go: Int32x16, base: "int", bits: 32, w: 512} - - {go: Uint32x16, base: "uint", bits: 32, w: 512} - - {go: Int64x8, base: "int", bits: 64, w: 512} - - {go: Uint64x8, base: "uint", bits: 64, w: 512} - - {go: Float32x16, base: "float", bits: 32, w: 512} - - {go: Float64x8, base: "float", bits: 64, w: 512} + - {class: vreg, go: Int8x16, base: "int", elemBits: 8, bits: 128} + - {class: vreg, go: Uint8x16, base: "uint", elemBits: 8, bits: 128} + - {class: vreg, go: Int16x8, base: "int", elemBits: 16, bits: 128} + - {class: vreg, go: Uint16x8, base: "uint", elemBits: 16, bits: 128} + - {class: vreg, go: Int32x4, base: "int", elemBits: 32, bits: 128} + - {class: vreg, go: Uint32x4, base: "uint", elemBits: 32, bits: 128} + - {class: vreg, go: Int64x2, base: "int", elemBits: 64, bits: 128} + - {class: vreg, go: Uint64x2, base: "uint", elemBits: 64, bits: 128} + - {class: vreg, go: Float32x4, base: "float", elemBits: 32, bits: 128} + - {class: vreg, go: Float64x2, base: "float", elemBits: 64, bits: 128} + - {class: vreg, go: Int8x32, base: "int", elemBits: 8, bits: 256} + - {class: vreg, go: Uint8x32, base: "uint", elemBits: 8, bits: 256} + - {class: vreg, go: Int16x16, base: "int", elemBits: 16, bits: 256} + - {class: vreg, go: Uint16x16, base: "uint", elemBits: 16, bits: 256} + - {class: vreg, go: Int32x8, base: "int", elemBits: 32, bits: 256} + - {class: vreg, go: Uint32x8, base: "uint", elemBits: 32, bits: 256} + - {class: vreg, go: Int64x4, base: "int", elemBits: 64, bits: 256} + - {class: vreg, go: Uint64x4, base: "uint", elemBits: 64, bits: 256} + - {class: vreg, go: Float32x8, base: "float", elemBits: 32, bits: 256} + - {class: vreg, go: Float64x4, base: "float", elemBits: 64, bits: 256} + - {class: vreg, go: Int8x64, base: "int", elemBits: 8, bits: 512} + - {class: vreg, go: Uint8x64, base: "uint", elemBits: 8, bits: 512} + - {class: vreg, go: Int16x32, base: "int", elemBits: 16, bits: 512} + - {class: vreg, go: Uint16x32, base: "uint", elemBits: 16, bits: 512} + - {class: vreg, go: Int32x16, base: "int", elemBits: 32, bits: 512} + - {class: vreg, go: Uint32x16, base: "uint", elemBits: 32, bits: 512} + - {class: vreg, go: Int64x8, base: "int", elemBits: 64, bits: 512} + - {class: vreg, go: Uint64x8, base: "uint", elemBits: 64, bits: 512} + - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512} + - {class: vreg, go: Float64x8, base: "float", elemBits: 64, bits: 512} + - {class: mask, go: Mask8x16, elemBits: 8, bits: 128} + - {class: mask, go: Mask16x8, elemBits: 16, bits: 128} + - {class: mask, go: Mask32x4, elemBits: 32, bits: 128} + - {class: mask, go: Mask64x2, elemBits: 64, bits: 128} + - {class: mask, go: Mask8x32, elemBits: 8, bits: 256} + - {class: mask, go: Mask16x16, elemBits: 16, bits: 256} + - {class: mask, go: Mask32x8, elemBits: 32, bits: 256} + - {class: mask, go: Mask64x4, elemBits: 64, bits: 256} + - {class: mask, go: Mask8x64, elemBits: 8, bits: 512} + - {class: mask, go: Mask16x32, elemBits: 16, bits: 512} + - {class: mask, go: Mask32x16, elemBits: 32, bits: 512} + - {class: mask, go: Mask64x8, elemBits: 64, bits: 512} + - {class: immediate} # TODO out: !repeat - *types diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index e500d713..292411cb 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -41,7 +41,11 @@ func loadXED(xedPath string) []*unify.Value { fmt.Printf("%s:\n%+v\n", inst.Pos, inst) } - ins, outs := decodeOperands(db, strings.Fields(inst.Operands)) + ins, outs, err := decodeOperands(db, strings.Fields(inst.Operands)) + if err != nil { + log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err) + return + } // TODO: "feature" fields := []string{"goarch", "asm", "in", "out"} values := []*unify.Value{ @@ -63,102 +67,296 @@ func loadXED(xedPath string) []*unify.Value { return defs } -func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tuple) { - var inVals, outVals []*unify.Value - for asmPos, o := range operands { - op, err := xeddata.NewOperand(db, o) +type operandCommon struct { + action operandAction +} + +// operandAction defines whether this operand is read and/or written. +// +// TODO: Should this live in [xeddata.Operand]? +type operandAction struct { + r bool // Read + w bool // Written + cr bool // Read is conditional (implies r==true) + cw bool // Write is conditional (implies w==true) +} + +type operandMem struct { + operandCommon + // TODO +} + +type vecShape struct { + elemBits int // Element size in bits + bits int // Register width in bits (total vector bits) +} + +type operandVReg struct { // Vector register + operandCommon + vecShape + elemBaseType scalarBaseType +} + +// operandMask is a vector mask. +// +// Regardless of the actual mask representation, the [vecShape] of this operand +// corresponds to the "bit for bit" type of mask. That is, elemBits gives the +// element width covered by each mask element, and bits/elemBits gives the total +// number of mask elements. (bits gives the total number of bits as if this were +// a bit-for-bit mask, which may be meaningless on its own.) +type operandMask struct { + operandCommon + vecShape +} + +type operandImm struct { + operandCommon + bits int // Immediate size in bits +} + +type operand interface { + common() operandCommon + toValue() (fields []string, vals []*unify.Value) +} + +func strVal(s any) *unify.Value { + return unify.NewValue(unify.NewStringExact(fmt.Sprint(s))) +} + +func (o operandCommon) common() operandCommon { + return o +} + +func (o operandMem) toValue() (fields []string, vals []*unify.Value) { + // TODO: w, base + return []string{"class"}, []*unify.Value{strVal("memory")} +} + +func (o operandVReg) toValue() (fields []string, vals []*unify.Value) { + baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex()) + if err != nil { + panic("parsing baseRe: " + err.Error()) + } + return []string{"class", "elemBits", "bits", "base"}, []*unify.Value{ + strVal("vreg"), + strVal(o.elemBits), + strVal(o.bits), + unify.NewValue(baseDomain)} +} + +func (o operandMask) toValue() (fields []string, vals []*unify.Value) { + return []string{"class", "elemBits", "bits"}, []*unify.Value{strVal("mask"), strVal(o.elemBits), strVal(o.bits)} +} + +func (o operandImm) toValue() (fields []string, vals []*unify.Value) { + return []string{"class", "bits"}, []*unify.Value{strVal("immediate"), strVal(o.bits)} +} + +var actionEncoding = map[string]operandAction{ + "r": {r: true}, + "cr": {r: true, cr: true}, + "w": {w: true}, + "cw": {w: true, cw: true}, + "rw": {r: true, w: true}, + "crw": {r: true, w: true, cr: true}, + "rcw": {r: true, w: true, cw: true}, +} + +func decodeOperand(db *xeddata.Database, operand string) (operand, error) { + op, err := xeddata.NewOperand(db, operand) + if err != nil { + log.Fatalf("parsing operand %q: %v", operand, err) + } + if *flagDebugXED { + fmt.Printf(" %+v\n", op) + } + + // TODO: See xed_decoded_inst_operand_action. This might need to be more + // complicated. + action, ok := actionEncoding[op.Action] + if !ok { + return nil, fmt.Errorf("unknown action %q", op.Action) + } + common := operandCommon{action: action} + + lhs := op.NameLHS() + if strings.HasPrefix(lhs, "MEM") { + // TODO: Width, base type + return operandMem{ + operandCommon: common, + }, nil + } else if strings.HasPrefix(lhs, "REG") { + if op.Width == "mskw" { + // The mask operand doesn't specify a width. We have to infer it. + return operandMask{ + operandCommon: common, + }, nil + } else { + regBits, ok := decodeReg(op) + if !ok { + return nil, fmt.Errorf("failed to decode register %q", operand) + } + baseType, elemBits, ok := decodeType(op) + if !ok { + return nil, fmt.Errorf("failed to decode register width %q", operand) + } + shape := vecShape{elemBits: elemBits, bits: regBits} + return operandVReg{ + operandCommon: common, + vecShape: shape, + elemBaseType: baseType, + }, nil + } + } else if strings.HasPrefix(lhs, "IMM") { + _, bits, ok := decodeType(op) + if !ok { + return nil, fmt.Errorf("failed to decode register width %q", operand) + } + return operandImm{ + operandCommon: common, + bits: bits, + }, nil + } + + // TODO: BASE and SEG + return nil, fmt.Errorf("unknown operand LHS %q in %q", lhs, operand) +} + +func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tuple, err error) { + fail := func(err error) (unify.Tuple, unify.Tuple, error) { + return unify.Tuple{}, unify.Tuple{}, err + } + + // Decode all of the operands. + var ops []operand + for _, o := range operands { + op, err := decodeOperand(db, o) if err != nil { - log.Fatalf("parsing operand %q: %v", o, err) + return unify.Tuple{}, unify.Tuple{}, err } - if *flagDebugXED { - fmt.Printf(" %+v\n", op) - } - - // TODO: We should have a fixed set of fields once this gets more cleaned up. - var fields []string - var values []*unify.Value - add := func(f string, v *unify.Value) { - fields = append(fields, f) - values = append(values, v) - } - - add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos)))) - - var r, w bool - switch op.Action { - case "r": - r = true - case "w": - w = true - case "rw": - r, w = true, true - default: - continue - } - - lhs := op.NameLHS() - if strings.HasPrefix(lhs, "MEM") { - add("mem", unify.NewValue(unify.NewStringExact("true"))) - add("w", unify.NewValue(unify.NewStringExact("TODO"))) - add("base", unify.NewValue(unify.NewStringExact("TODO"))) - } else if strings.HasPrefix(lhs, "REG") { - if op.Width == "mskw" { - add("mask", unify.NewValue(unify.NewStringExact("true"))) - add("w", unify.NewValue(unify.NewStringExact("TODO"))) - add("base", unify.NewValue(unify.NewStringExact("TODO"))) - } else { - width, ok := decodeReg(op) - if !ok { - return + ops = append(ops, op) + } + + // XED doesn't encode the size of mask operands. If there are mask operands, + // try to infer their sizes from other operands. + // + // This is a heuristic and it falls apart in some cases: + // + // - Mask operations like KAND[BWDQ] have *nothing* in the XED to indicate + // mask size. + // + // - VINSERT*, VPSLL*, VPSRA*, and VPSRL* and some others naturally have + // mixed input sizes and the XED doesn't indicate which operands the mask + // applies to. + // + // - VPDP* and VP4DP* have really complex mixed operand patterns. + // + // I think for these we may just have to hand-write a table of which + // operands each mask applies to. + inferMask := func(r, w bool) error { + var masks []int + var rSizes, wSizes, sizes []vecShape + for i, op := range ops { + action := op.common().action + if _, ok := op.(operandMask); ok { + if action.r && action.w { + return fmt.Errorf("unexpected rw mask") + } + if action.r == r || action.w == w { + masks = append(masks, i) } - baseRe, bits, ok := decodeBits(op) - if !ok { - return + } else if reg, ok := op.(operandVReg); ok { + if action.r { + rSizes = append(rSizes, reg.vecShape) } - baseDomain, err := unify.NewStringRegex(baseRe) - if err != nil { - panic("parsing baseRe: " + err.Error()) + if action.w { + wSizes = append(wSizes, reg.vecShape) } - add("bits", unify.NewValue(unify.NewStringExact(fmt.Sprint(bits)))) - add("w", unify.NewValue(unify.NewStringExact(fmt.Sprint(width)))) - add("base", unify.NewValue(baseDomain)) } - } else { - // TODO: Immediates - add("UNKNOWN", unify.NewValue(unify.NewStringExact(o))) - } - // dq => 128 bits (XMM) - // qq => 256 bits (YMM) - // mskw => K - // z[iuf?](8|16|32|...) => 512 bits (ZMM) - // - // Are these always XMM/YMM/ZMM or can other irregular things - // with large widths use these same codes? - // - // The only zi* is zi32. I don't understand the difference between - // zi32 and zu32 or why there are a bunch of zu* but only one zi. - // - // The xtype tells you the element type. i8, i16, i32, i64, etc. - // - // Things like AVX2 VPAND have an xtype of u256. - // I think we have to map that to all widths. - // There's no u512 (presumably those are all masked, so elem width matters). - // These are all Category: LOGICAL. Maybe we use that info? + } + if len(masks) == 0 { + return nil + } if r { + sizes = rSizes + if len(sizes) == 0 { + sizes = wSizes + } + } + if w { + sizes = wSizes + if len(sizes) == 0 { + sizes = rSizes + } + } + + if len(sizes) == 0 { + return fmt.Errorf("cannot infer mask size: no register operands") + } + shape, ok := singular(sizes) + if !ok { + return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes) + } + for _, i := range masks { + m := ops[i].(operandMask) + m.vecShape = shape + ops[i] = m + } + return nil + } + if err := inferMask(true, false); err != nil { + return fail(err) + } + if err := inferMask(false, true); err != nil { + return fail(err) + } + + var inVals, outVals []*unify.Value + for asmPos, op := range ops { + fields, values := op.toValue() + + fields = append(fields, "asmPos") + values = append(values, unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos)))) + + action := op.common().action + if action.r { inVal := unify.NewValue(unify.NewDef(fields, values)) inVals = append(inVals, inVal) } - if w { + if action.w { outVal := unify.NewValue(unify.NewDef(fields, values)) outVals = append(outVals, outVal) } } - return unify.NewTuple(inVals...), unify.NewTuple(outVals...) + return unify.NewTuple(inVals...), unify.NewTuple(outVals...), nil +} + +func singular[T comparable](xs []T) (T, bool) { + if len(xs) == 0 { + return *new(T), false + } + for _, x := range xs[1:] { + if x != xs[0] { + return *new(T), false + } + } + return xs[0], true } func decodeReg(op *xeddata.Operand) (w int, ok bool) { + // op.Width tells us the total width, e.g.,: + // + // dq => 128 bits (XMM) + // qq => 256 bits (YMM) + // mskw => K + // z[iuf?](8|16|32|...) => 512 bits (ZMM) + // + // But the encoding is really weird and it's not clear if these *always* + // mean XMM/YMM/ZMM or if other irregular things can use these large widths. + // Hence, we dig into the register sets themselves. + if !strings.HasPrefix(op.NameLHS(), "REG") { return 0, false } @@ -181,31 +379,75 @@ func decodeReg(op *xeddata.Operand) (w int, ok bool) { var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`) -func decodeBits(op *xeddata.Operand) (baseRe string, bits int, ok bool) { +// scalarBaseType describes the base type of a scalar element. This is a Go +// type, but without the bit width suffix (with the exception of +// scalarBaseIntOrUint). +type scalarBaseType int + +const ( + scalarBaseInt scalarBaseType = iota + scalarBaseUint + scalarBaseIntOrUint // Signed or unsigned is unspecified + scalarBaseFloat + scalarBaseComplex + scalarBaseBFloat + scalarBaseHFloat +) + +func (s scalarBaseType) regex() string { + switch s { + case scalarBaseInt: + return "int" + case scalarBaseUint: + return "uint" + case scalarBaseIntOrUint: + return "int|uint" + case scalarBaseFloat: + return "float" + case scalarBaseComplex: + return "complex" + case scalarBaseBFloat: + return "BFloat" + case scalarBaseHFloat: + return "HFloat" + } + panic(fmt.Sprintf("unknown scalar base type %d", s)) +} + +func decodeType(op *xeddata.Operand) (base scalarBaseType, bits int, ok bool) { + // The xtype tells you the element type. i8, i16, i32, i64, f32, etc. + // + // TODO: Things like AVX2 VPAND have an xtype of u256 because they're + // element-width agnostic. Do I map that to all widths, or just omit the + // element width and let unification flesh it out? There's no u512 + // (presumably those are all masked, so elem width matters). These are all + // Category: LOGICAL, so maybe we could use that info? + // Handle some weird ones. switch op.Xtype { // 8-bit float formats as defined by Open Compute Project "OCP 8-bit // Floating Point Specification (OFP8)". - case "bf8", // E5M2 float - "hf8": // E4M3 float - return "", 0, false // TODO + case "bf8": // E5M2 float + return scalarBaseBFloat, 8, true + case "hf8": // E4M3 float + return scalarBaseHFloat, 8, true case "bf16": // bfloat16 float - return "", 0, false // TODO + return scalarBaseBFloat, 16, true case "2f16": // Complex consisting of 2 float16s. Doesn't exist in Go, but we can say // what it would be. - return "complex", 32, true + return scalarBaseComplex, 32, true case "2i8", "2I8": // These just use the lower INT8 in each 16 bit field. // As far as I can tell, "2I8" is a typo. - return "int", 8, true + return scalarBaseInt, 8, true } // The rest follow a simple pattern. m := xtypeRe.FindStringSubmatch(op.Xtype) if m == nil { // TODO: Report unrecognized xtype - return "", 0, false + return 0, 0, false } bits, _ = strconv.Atoi(m[2]) switch m[1] { @@ -213,9 +455,10 @@ func decodeBits(op *xeddata.Operand) (baseRe string, bits int, ok bool) { // XED is rather inconsistent about what's signed, unsigned, or doesn't // matter, so merge them together and let the Go definitions narrow as // appropriate. Maybe there's a better way to do this. - baseRe = "int|uint" + return scalarBaseIntOrUint, bits, true case "f": - baseRe = "float" + return scalarBaseFloat, bits, true + default: + panic("unreachable") } - return baseRe, bits, true } diff --git a/x86/xeddata/operand.go b/x86/xeddata/operand.go index 8de99d67..33c5610f 100644 --- a/x86/xeddata/operand.go +++ b/x86/xeddata/operand.go @@ -44,8 +44,8 @@ type Operand struct { // Action describes argument types. // - // Possible values: "r", "w", "rw", "cr", "cw", "crw". - // Optional "c" prefix represents conditional access. + // Possible values: "r", "w", "rw", "cr", "cw", "crw", "rcw". + // The "c" prefix before "r" or "w" represents conditional read or write. Action string // Width descriptor. It can express simple width like "w" (word, 16bit) From 302262805d806c94f67ec5ee3585f3ddbb1caa84 Mon Sep 17 00:00:00 2001 From: Vishwanatha HD Date: Wed, 9 Apr 2025 12:17:20 +0000 Subject: [PATCH 056/200] s390x/s390xasm: fix plan9 disassembly regressions on s390x Regressions were seen in plan9 disassembled syntax on s390x machines. Raising a CL to fix all of them. Below are the plan9 syntax regressions noticed: 1) LARL was printed instead of MOVD. 2) Operands for LGDR and LCDBR were printed in a reverse order. 3) MADBR was printed instead of FMADD. 4) VFM was printed instead of WFMDB. Also the mask fields were unnecessarily getting printed. 5) VFS was printed instead of WFSDB. Also the mask fields were unnecessarily getting printed. 6) JMP was printed instead of BR. 7) JMP R14 was printed instead of RET. 8) BRC was printed instead of BVS. Change-Id: I9166f8ab51ad827bfeeed24a219ceb9b8c41c470 Reviewed-on: https://go-review.googlesource.com/c/arch/+/663756 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Auto-Submit: Keith Randall --- s390x/s390xasm/decode_test.go | 9 -------- s390x/s390xasm/plan9.go | 35 ++++++++++++++++++++---------- s390x/s390xasm/testdata/decode.txt | 8 +++++++ 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/s390x/s390xasm/decode_test.go b/s390x/s390xasm/decode_test.go index 29bce8e8..047ebeed 100644 --- a/s390x/s390xasm/decode_test.go +++ b/s390x/s390xasm/decode_test.go @@ -30,15 +30,6 @@ func TestDecode(t *testing.T) { } } -// Provide a fake symbol to verify PCrel argument decoding. -func symlookup(pc uint64) (string, uint64) { - foopc := uint64(0x100000) - if pc >= foopc && pc < foopc+0x10 { - return "foo", foopc - } - return "", 0 -} - func decode(data []byte, t *testing.T, filename string) { all := string(data) // Simulate PC based on number of instructions found in the test file. diff --git a/s390x/s390xasm/plan9.go b/s390x/s390xasm/plan9.go index 95464294..482433b4 100644 --- a/s390x/s390xasm/plan9.go +++ b/s390x/s390xasm/plan9.go @@ -142,7 +142,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin case LRVH: op = "MOVHBR" } - case LA, LAY: + case LA, LAY, LARL: args[0], args[1] = args[1], args[0] op = "MOVD" @@ -349,6 +349,17 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin case SLBGR: op = "SUBE" args[0], args[1] = args[1], args[0] + case MADBR: + op = "FMADD" + args[0], args[1], args[2] = args[1], args[2], args[0] + case VFM: + op = "WFMDB" + args[0], args[1], args[2] = args[1], args[2], args[0] + args = args[0:3] + case VFS: + op = "WFSDB" + args[0], args[1], args[2] = args[2], args[1], args[0] + args = args[0:3] case MSGFR, MHI, MSFI, MSGFI: switch inst.Op { case MSGFR, MHI, MSFI: @@ -500,16 +511,16 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin if err != nil { return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err) } - opStr, check := branchOnConditionOp(mask, inst.Op) + opStr := branchOnConditionOp(mask, inst.Op) if opStr != "" { op = opStr } if op == "SYNC" || op == "NOPH" { return op } - if check { - args[0] = args[1] - args = args[:1] + if op == "RET" { + args = args[:0] + return op } case LOCGR: mask, err := strconv.Atoi(args[2][1:]) @@ -1036,6 +1047,9 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin // branch on relative mnemonic. func branch_relative_op(mask int, opconst Op) (op string, check bool) { switch mask & 0xf { + case 1: + op = "BVS" + check = true case 2: op = "BGT" check = true @@ -1061,7 +1075,7 @@ func branch_relative_op(mask int, opconst Op) (op string, check bool) { op = "BLEU" check = true case 15: - op = "JMP" // BR + op = "BR" check = true } return op, check @@ -1069,17 +1083,16 @@ func branch_relative_op(mask int, opconst Op) (op string, check bool) { // This function returns corresponding extended mnemonic for the given // brach on condition mnemonic. -func branchOnConditionOp(mask int, opconst Op) (op string, check bool) { +func branchOnConditionOp(mask int, opconst Op) (op string) { switch mask & 0xf { case 0: op = "NOPH" case 14: op = "SYNC" case 15: - op = "JMP" - check = true + op = "RET" } - return op, check + return op } // This function returns corresponding plan9 mnemonic for the native bitwise mnemonic. @@ -1260,7 +1273,7 @@ func reverseOperandOrder(op Op) bool { switch op { case LOCR, MLGR: return true - case LTEBR, LTDBR: + case LTEBR, LTDBR, LCDBR, LGDR: return true case VLEIB, VLEIH, VLEIF, VLEIG, VPDI: return true diff --git a/s390x/s390xasm/testdata/decode.txt b/s390x/s390xasm/testdata/decode.txt index f04715b2..29a5e699 100644 --- a/s390x/s390xasm/testdata/decode.txt +++ b/s390x/s390xasm/testdata/decode.txt @@ -14,6 +14,7 @@ b9e24098| plan9 MOVDLT R8, R9 b9e270ba| plan9 MOVDNE R10, R11 b9f23012| plan9 LOCR $3, R2, R1 + b3130020| plan9 LCDBR F0, F2 b9e27065| plan9 MOVDNE R5, R6 e310f0000004| plan9 MOVD (R15), R1 e320f0000014| plan9 MOVW (R15), R2 @@ -122,6 +123,13 @@ c017fffffffe| plan9 XORW $-2, R1 b93a0008| plan9 KDSA R0, R8 b9296024| plan9 KMA R2, R6, R4 b92d6024| plan9 KMCTR R2, R6, R4 + b31e0042| plan9 FMADD F4, F2, F0 +e748a00830e7| plan9 WFMDB V8, V10, V4 +e743000830e2| plan9 WFSDB V0, V3, V4 + b3cd0026| plan9 LGDR F6, R2 + a7f4008c| plan9 BR 70(PC) + a7140005| plan9 BVS 2(PC) + 07fe| plan9 RET e743400000f3| plan9 VAB V3, V4 e743600000f3| plan9 VAB V3, V6, V4 e743400010f3| plan9 VAH V3, V4 From b0f513ddd30597a7a8c8ec3e32df653a6b016054 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 21 Apr 2025 19:48:03 +0000 Subject: [PATCH 057/200] internal/simdgen: complete the godef code gen This generator supports generating the godefs of SIMD instructions with some shape constraints, please check gen_utility.go for the details. Change-Id: I87151740e161919c67c3f20c0258de6611e4955e Reviewed-on: https://go-review.googlesource.com/c/arch/+/667155 LUCI-TryBot-Result: Go LUCI Auto-Submit: Junyang Shao Reviewed-by: Junyang Shao Reviewed-by: David Chase --- internal/simdgen/.gitignore | 1 + internal/simdgen/categories.yaml | 119 +++- internal/simdgen/gen_simdGenericOps.go | 57 ++ internal/simdgen/gen_simdIntrinsics.go | 109 ++++ internal/simdgen/gen_simdMachineOps.go | 130 ++++ internal/simdgen/gen_simdTypes.go | 227 +++++++ internal/simdgen/gen_simdrules.go | 146 +++++ internal/simdgen/gen_simdssa.go | 210 +++++++ internal/simdgen/gen_utility.go | 581 ++++++++++++++++++ internal/simdgen/go.yaml | 558 +++++++++++------ internal/simdgen/godefs.go | 170 +++-- internal/simdgen/main.go | 61 +- internal/simdgen/ops/AddSub/categories.yaml | 29 + internal/simdgen/ops/AddSub/go.yaml | 101 +++ .../simdgen/ops/BitwiseLogic/categories.yaml | 31 + internal/simdgen/ops/BitwiseLogic/go.yaml | 149 +++++ internal/simdgen/ops/Compares/categories.yaml | 19 + internal/simdgen/ops/Compares/go.yaml | 57 ++ .../simdgen/ops/FPonlyArith/categories.yaml | 8 + internal/simdgen/ops/FPonlyArith/go.yaml | 18 + internal/simdgen/ops/Mul/categories.yaml | 35 ++ internal/simdgen/ops/Mul/go.yaml | 116 ++++ internal/simdgen/ops/main.go | 75 +++ internal/simdgen/types.yaml | 86 +-- internal/simdgen/xed.go | 52 +- 25 files changed, 2817 insertions(+), 328 deletions(-) create mode 100644 internal/simdgen/.gitignore create mode 100644 internal/simdgen/gen_simdGenericOps.go create mode 100644 internal/simdgen/gen_simdIntrinsics.go create mode 100644 internal/simdgen/gen_simdMachineOps.go create mode 100644 internal/simdgen/gen_simdTypes.go create mode 100644 internal/simdgen/gen_simdrules.go create mode 100644 internal/simdgen/gen_simdssa.go create mode 100644 internal/simdgen/gen_utility.go create mode 100644 internal/simdgen/ops/AddSub/categories.yaml create mode 100644 internal/simdgen/ops/AddSub/go.yaml create mode 100644 internal/simdgen/ops/BitwiseLogic/categories.yaml create mode 100644 internal/simdgen/ops/BitwiseLogic/go.yaml create mode 100644 internal/simdgen/ops/Compares/categories.yaml create mode 100644 internal/simdgen/ops/Compares/go.yaml create mode 100644 internal/simdgen/ops/FPonlyArith/categories.yaml create mode 100644 internal/simdgen/ops/FPonlyArith/go.yaml create mode 100644 internal/simdgen/ops/Mul/categories.yaml create mode 100644 internal/simdgen/ops/Mul/go.yaml create mode 100644 internal/simdgen/ops/main.go diff --git a/internal/simdgen/.gitignore b/internal/simdgen/.gitignore new file mode 100644 index 00000000..1cc9ae43 --- /dev/null +++ b/internal/simdgen/.gitignore @@ -0,0 +1 @@ +testdata/* diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index b7be71ce..1f2fb056 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -1,7 +1,118 @@ !sum - go: Add - category: binary -- go: AddSaturated - category: binary + commutative: "true" + extension: "AVX.*" +- go: SaturatedAdd + commutative: "true" + extension: "AVX.*" +- go: MaskedAdd + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: MaskedSaturatedAdd + masked: "true" + commutative: "true" + extension: "AVX.*" - go: Sub - category: binary + commutative: "true" + extension: "AVX.*" +- go: SaturatedSub + commutative: "true" + extension: "AVX.*" +- go: MaskedSub + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: MaskedSaturatedSub + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: And + commutative: "true" + extension: "AVX.*" +- go: MaskedAnd + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: Or + commutative: "true" + extension: "AVX.*" +- go: MaskedOr + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: AndNot + commutative: "true" + extension: "AVX.*" +- go: MaskedAndNot + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: Xor + commutative: "true" + extension: "AVX.*" +- go: MaskedXor + masked: "true" + commutative: "true" + extension: "AVX.*" +# We also have PTEST and VPTERNLOG, those should be hidden from the users +# and only appear in rewrite rules. +- go: Equal + constImm: 0 + commutative: "true" + extension: "AVX.*" +- go: Greater + constImm: 6 + commutative: "false" + extension: "AVX.*" +- go: MaskedEqual + constImm: 0 + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: MaskedGreater + constImm: 6 + masked: "true" + commutative: "false" + extension: "AVX.*" +- go: Div + commutative: "true" + extension: "AVX.*" +- go: MaskedDiv + commutative: "true" + masked: "true" + extension: "AVX.*" +- go: Mul + commutative: "true" + extension: "AVX.*" +- go: MulEvenWiden + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" +- go: MulHigh + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" +- go: MulLow + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" +- go: MaskedMul + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: MaskedMulEvenWiden + masked: "true" + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" +- go: MaskedMulHigh + masked: "true" + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" +- go: MaskedMulLow + masked: "true" + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go new file mode 100644 index 00000000..2b0fa008 --- /dev/null +++ b/internal/simdgen/gen_simdGenericOps.go @@ -0,0 +1,57 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "sort" +) + +const simdGenericOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +package main + +func simdGenericOps() []opData { + return []opData{ +{{- range . }} + {name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}}, +{{- end }} + } +} +` + +// writeSIMDGenericOps generates the generic ops and writes it to simdAMD64ops.go +// within the specified directory. +func writeSIMDGenericOps(directory string, ops []Operation) error { + file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go", simdGenericOpsTmpl) + if err != nil { + return err + } + defer file.Close() + type genericOpsData struct { + sortKey string + OpName string + OpInLen int + Comm string + } + opsData := make([]genericOpsData, 0) + for _, op := range ops { + _, _, _, _, _, gOp, err := op.shape() + if err != nil { + return err + } + genericNames := gOp.Go + *gOp.In[0].Go + opsData = append(opsData, genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative}) + } + sort.Slice(opsData, func(i, j int) bool { + return opsData[i].sortKey < opsData[j].sortKey + }) + + err = t.Execute(file, opsData) + if err != nil { + return fmt.Errorf("failed to execute template: %w", err) + } + + return nil +} diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go new file mode 100644 index 00000000..5d4a27f2 --- /dev/null +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -0,0 +1,109 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" +) + +const simdIntrinsicsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +package ssagen + +import ( + "cmd/compile/internal/ir" + "cmd/compile/internal/ssa" + "cmd/compile/internal/types" + "cmd/internal/sys" +) + +func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { +{{- range .OpsLen1}} + addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{- end}} +{{- range .OpsLen2}} + addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{- end}} +{{- range .OpsLen3}} + addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{- end}} + +{{- range .VectorConversions }} + addF("internal/simd", "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) +{{- end}} + +{{- range $size, $ts := .TypeMap }} +{{- range $t := $ts }} + addF("internal/simd", "Load{{$t.Name}}", simdLoad(), sys.AMD64) + addF("internal/simd", "{{$t.Name}}.Store", simdStore(), sys.AMD64) +{{- end}} +{{- end}} +{{- range .Masks }} + addF("internal/simd", "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF("internal/simd", "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF("internal/simd", "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) + addF("internal/simd", "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) +{{- end}} +} + +func opLen1(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(op, t, args[0]) + } +} + +func opLen2(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue2(op, t, args[0], args[1]) + } +} + +func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue3(op, t, args[0], args[1], args[2]) + } +} + +func simdLoad() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue2(ssa.OpLoad, n.Type(), args[0], s.mem()) + } +} + +func simdStore() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.store(args[0].Type, args[1], args[0]) + return nil + } +} +` + +// writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go +// within the specified directory. +func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap) error { + file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssagen/simdintrinsics.go", simdIntrinsicsTmpl) + if err != nil { + return err + } + defer file.Close() + opsLen1, opsLen2, opsLen3, err := genericOpsByLen(ops) + if err != nil { + return err + } + + type templateData struct { + OpsLen1 []Operation + OpsLen2 []Operation + OpsLen3 []Operation + TypeMap simdTypeMap + VectorConversions []simdTypePair + Masks []simdType + } + err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, typeMap, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)}) + if err != nil { + return fmt.Errorf("failed to execute template: %w", err) + } + + return nil +} diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go new file mode 100644 index 00000000..0deec9c6 --- /dev/null +++ b/internal/simdgen/gen_simdMachineOps.go @@ -0,0 +1,130 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "sort" +) + +const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +package main + +func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp2m1fp1, fp2m1m1 regInfo) []opData { + return []opData{ +{{- range .OpsData }} + {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}"}, +{{- end }} +{{- range .OpsDataImm }} + {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "Int8", commutative: {{.Comm}}, typ: "{{.Type}}"}, +{{- end }} + } +} +` + +// writeSIMDMachineOps generates the machine ops and writes it to simdAMD64ops.go +// within the specified directory. +func writeSIMDMachineOps(directory string, ops []Operation) error { + file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go", simdMachineOpsTmpl) + if err != nil { + return err + } + defer file.Close() + type opData struct { + sortKey string + OpName string + Asm string + OpInLen int + RegInfo string + Comm string + Type string + } + type machineOpsData struct { + OpsData []opData + OpsDataImm []opData + } + seen := map[string]struct{}{} + regInfoSet := map[string]bool{"fp1fp1": true, "fp2fp1": true, "fp2m1": true, "fp2m1fp1": true, "fp2m1m1": true} + opsData := make([]opData, 0) + opsDataImm := make([]opData, 0) + for _, op := range ops { + shapeIn, shapeOut, maskType, _, _, gOp, err := op.shape() + if err != nil { + return err + } + asm := gOp.Asm + if maskType == OneMask { + asm += "Masked" + } + asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits) + // TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy + // one here with a name suffix "Merging". The rewrite rules will need them. + if _, ok := seen[asm]; ok { + continue + } + seen[asm] = struct{}{} + var regInfo string + // Process input reg shapes. + var vRegInCnt, kMaskInCnt, vRegOutCnt, kMaskOutCnt int + for _, in := range gOp.In { + if in.Class == "vreg" { + vRegInCnt++ + } else if in.Class == "mask" { + kMaskInCnt++ + } + } + for _, out := range gOp.Out { + // If class overwrite is happening, that's not really a mask but a vreg. + if out.Class == "vreg" || out.OverwriteClass != nil { + vRegOutCnt++ + } else if out.Class == "mask" { + kMaskOutCnt++ + } + } + var vRegInS, kMaskInS, vRegOutS, kMaskOutS string + if vRegInCnt > 0 { + vRegInS = fmt.Sprintf("fp%d", vRegInCnt) + } + if kMaskInCnt > 0 { + kMaskInS = fmt.Sprintf("m%d", kMaskInCnt) + } + if vRegOutCnt > 0 { + vRegOutS = fmt.Sprintf("fp%d", vRegOutCnt) + } + if kMaskOutCnt > 0 { + kMaskOutS = fmt.Sprintf("m%d", kMaskOutCnt) + } + regInfo = fmt.Sprintf("%s%s%s%s", vRegInS, kMaskInS, vRegOutS, kMaskOutS) + if _, ok := regInfoSet[regInfo]; !ok { + return fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s", regInfo) + } + var outType string + if shapeOut == OneVregOut || gOp.Out[0].OverwriteClass != nil { + // If class overwrite is happening, that's not really a mask but a vreg. + outType = fmt.Sprintf("Vec%d", *gOp.Out[0].Bits) + } else if shapeOut == OneKmaskOut { + outType = "Mask" + } else { + return fmt.Errorf("simdgen does not recognize this output shape: %+v", shapeOut) + } + if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn { + opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType}) + } else { + opsData = append(opsData, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType}) + } + } + sort.Slice(opsData, func(i, j int) bool { + return opsData[i].sortKey < opsData[j].sortKey + }) + sort.Slice(opsDataImm, func(i, j int) bool { + return opsDataImm[i].sortKey < opsDataImm[j].sortKey + }) + err = t.Execute(file, machineOpsData{opsData, opsDataImm}) + if err != nil { + return fmt.Errorf("failed to execute template: %w", err) + } + + return nil +} diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go new file mode 100644 index 00000000..14395010 --- /dev/null +++ b/internal/simdgen/gen_simdTypes.go @@ -0,0 +1,227 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "strings" +) + +type simdType struct { + Name string // The go type name of this simd type, for example Int32x4. + Lanes int // The number of elements in this vector/mask. + Base string // The element's type, like for Int32x4 it will be int32. + Fields string // The struct fields, it should be right formatted. + Type string // Either "mask" or "vreg" + VectorCounterpart string // For mask use only: just replacing the "Mask" in [simdType.Name] with "Int" + ReshapedVectorWithAndOr string // For mask use only: vector AND and OR are only available in some shape with element width 32. + Size int // The size of the type +} + +type simdTypeMap map[int][]simdType + +type simdTypePair struct { + Tsrc simdType + Tdst simdType +} + +const simdTypesTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. + +package simd + +{{- range $size, $ts := .TypeMap }} + +// v{{$size}} is a tag type that tells the compiler that this is really {{$size}}-bit SIMD +type v{{$size}} struct { + _{{$size}} struct{} +} + +{{- range $i, $tsrc := $ts }} + +// {{$tsrc.Name}} is a {{$size}}-bit SIMD vector of {{$tsrc.Lanes}} {{$tsrc.Base}} +type {{$tsrc.Name}} struct { +{{$tsrc.Fields}} +} + +// Len returns the number of elements in a {{$tsrc.Name}} +func (x {{$tsrc.Name}}) Len() int { return {{$tsrc.Lanes}} } + +// Load{{$tsrc.Name}} loads a {{$tsrc.Name}} from an array +// +//go:noescape +func Load{{$tsrc.Name}}(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}}) {{$tsrc.Name}} + +// Store stores a {{$tsrc.Name}} to an array +// +//go:noescape +func (x {{$tsrc.Name}}) Store(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}}) + +{{- end}} +{{- end}} +` + +const simdStubsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. + +package simd + +{{- range .OpsLen1}} + +// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}} +func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}} + +{{- end}} +{{- range .OpsLen2}} + +// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}} + +{{- end}} +{{- range .OpsLen3}} + +// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}} + +{{- end}} +{{- range .VectorConversions }} + +// {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}} +func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}}) + +{{- end}} +{{- range .Masks }} + +// converts from {{.Name}} to {{.VectorCounterpart}} +func (from {{.Name}}) As{{.VectorCounterpart}}() (to {{.VectorCounterpart}}) + +// converts from {{.VectorCounterpart}} to {{.Name}} +func (from {{.VectorCounterpart}}) As{{.Name}}() (to {{.Name}}) + +func (x {{.Name}}) And(y {{.Name}}) {{.Name}} + +func (x {{.Name}}) Or(y {{.Name}}) {{.Name}} + +{{- end}} +` + +// parseSIMDTypes groups go simd types by their vector sizes, and +// returns a map whose key is the vector size, value is the simd type. +func parseSIMDTypes(ops []Operation) simdTypeMap { + // TODO: maybe instead of going over ops, let's try go over types.yaml. + ret := map[int][]simdType{} + seen := map[string]struct{}{} + processArg := func(arg Operand) { + if arg.Class == "immediate" { + // Immediates are not encoded as vector types. + return + } + if _, ok := seen[*arg.Go]; ok { + return + } + seen[*arg.Go] = struct{}{} + lanes := *arg.Bits / *arg.ElemBits + base := fmt.Sprintf("%s%d", *arg.Base, *arg.ElemBits) + tagFieldNameS := fmt.Sprintf("%sx%d", base, lanes) + tagFieldS := fmt.Sprintf("%s v%d", tagFieldNameS, *arg.Bits) + valFieldS := fmt.Sprintf("vals%s[%d]%s", strings.Repeat(" ", len(tagFieldNameS)-3), lanes, base) + fields := fmt.Sprintf("\t%s\n\t%s", tagFieldS, valFieldS) + if arg.Class == "mask" { + vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int") + reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32) + ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits}) + // In case the vector counterpart of a mask is not present, put its vector counterpart typedef into the map as well. + if _, ok := seen[vectorCounterpart]; !ok { + seen[vectorCounterpart] = struct{}{} + ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits}) + } + } else { + ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits}) + } + } + for _, op := range ops { + for _, arg := range op.In { + processArg(arg) + } + for _, arg := range op.Out { + processArg(arg) + } + } + return ret +} + +func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair { + v := []simdTypePair{} + for _, ts := range typeMap { + for i, tsrc := range ts { + for j, tdst := range ts { + if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" { + v = append(v, simdTypePair{tsrc, tdst}) + } + } + } + } + return v +} + +func masksFromTypeMap(typeMap simdTypeMap) []simdType { + m := []simdType{} + for _, ts := range typeMap { + for _, tsrc := range ts { + if tsrc.Type == "mask" { + m = append(m, tsrc) + } + } + } + return m +} + +// writeSIMDTypes generates the simd vector type and writes it to types_amd64.go +// within the specified directory. +func writeSIMDTypes(directory string, typeMap simdTypeMap) error { + file, t, err := openFileAndPrepareTemplate(directory, "src/internal/simd/types_amd64.go", simdTypesTmpl) + if err != nil { + return err + } + defer file.Close() + + type templateData struct { + TypeMap simdTypeMap + } + + err = t.Execute(file, templateData{typeMap}) + if err != nil { + return fmt.Errorf("failed to execute template: %w", err) + } + + return nil +} + +// writeSIMDStubs generates the simd vector intrinsic stubs and writes it to stubs_amd64.go +// within the specified directory. +func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) error { + file, t, err := openFileAndPrepareTemplate(directory, "src/internal/simd/stubs_amd64.go", simdStubsTmpl) + if err != nil { + return err + } + defer file.Close() + opsLen1, opsLen2, opsLen3, err := genericOpsByLen(ops) + if err != nil { + return err + } + + type templateData struct { + OpsLen1 []Operation + OpsLen2 []Operation + OpsLen3 []Operation + VectorConversions []simdTypePair + Masks []simdType + } + + err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)}) + if err != nil { + return fmt.Errorf("failed to execute template : %w", err) + } + + return nil +} diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go new file mode 100644 index 00000000..c368c770 --- /dev/null +++ b/internal/simdgen/gen_simdrules.go @@ -0,0 +1,146 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "sort" +) + +const simdrulesTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. + +// The AVX instruction encodings orders vector register from right to left, for example: +// VSUBPS X Y Z means Z=Y-X +// The rules here swapped the order of such X and Y because the ssa to prog lowering in simdssa.go assumes a +// left to right order. +// TODO: we should offload the logic to simdssa.go, instead of here. +// +// Masks are always at the end, immediates always at the beginning. + +{{- range .Ops }} +{{if eq (len .In) 1}}({{.Go}}{{(index .In 0).Go}} x) => ({{.Asm}} x){{end}}{{if eq (len .In) 2}}({{.Go}}{{(index .In 0).Go}} x y) => ({{.Asm}} y x){{end}} +{{- end }} +{{- range .OpsImm }} +({{.Go}}{{(index .In 1).Go}} x y) => ({{.Asm}} [{{(index .In 0).Const}}] y x) +{{- end }} +{{- range .OpsMask}} +({{.Go}}{{(index .In 0).Go}} x y z) => ({{.Asm}} y x (VPMOVVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}}ToM z)) +{{- end }} +{{- range .OpsImmMask}} +({{.Go}}{{(index .In 1).Go}} x y z) => ({{.Asm}} [{{(index .In 0).Const}}] y x (VPMOVVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}}ToM z)) +{{- end }} +{{- range .OpsMaskOut}} +({{.Go}}{{(index .In 0).Go}} x y) => (VPMOVMToVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}} ({{.Asm}} y x)) +{{- end }} +{{- range .OpsImmInMaskOut}} +({{.Go}}{{(index .In 1).Go}} x y) => (VPMOVMToVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}} ({{.Asm}} [{{(index .In 0).Const}}] y x)) +{{- end }} +{{- range .OpsMaskInMaskOut}} +({{.Go}}{{(index .In 0).Go}} x y z) => (VPMOVMToVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}} ({{.Asm}} y x (VPMOVVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}}ToM z))) +{{- end }} +{{- range .OpsImmMaskInMaskOut}} +({{.Go}}{{(index .In 1).Go}} x y z) => (VPMOVMToVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}} ({{.Asm}} [{{(index .In 0).Const}}] y x (VPMOVVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}}ToM z))) +{{- end }} +` + +// writeSIMDRules generates the lowering and rewrite rules for ssa and writes it to simdAMD64.rules +// within the specified directory. +func writeSIMDRules(directory string, ops []Operation) error { + file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules", simdrulesTmpl) + if err != nil { + return err + } + defer file.Close() + Ops := make([]Operation, 0) + OpsImm := make([]Operation, 0) + OpsMask := make([]Operation, 0) + OpsImmMask := make([]Operation, 0) + OpsMaskOut := make([]Operation, 0) + OpsImmInMaskOut := make([]Operation, 0) + OpsMaskInMaskOut := make([]Operation, 0) + OpsImmMaskInMaskOut := make([]Operation, 0) + + for _, op := range ops { + opInShape, opOutShape, maskType, _, op, _, err := op.shape() + if err != nil { + return err + } + if maskType == OneMask { + op.Asm += "Masked" + } + op.Asm = fmt.Sprintf("%s%d", op.Asm, *op.Out[0].Bits) + // If class overwrite is happening, that's not really a mask but a vreg. + if opOutShape == OneVregOut || op.Out[0].OverwriteClass != nil { + switch opInShape { + case PureVregIn: + Ops = append(Ops, op) + case OneKmaskIn: + OpsMask = append(OpsMask, op) + case OneConstImmIn: + OpsImm = append(OpsImm, op) + case OneKmaskConstImmIn: + OpsImmMask = append(OpsImmMask, op) + case PureKmaskIn: + return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations") + } + } else { + // OneKmaskOut case + switch opInShape { + case PureVregIn: + OpsMaskOut = append(OpsMaskOut, op) + case OneKmaskIn: + OpsMaskInMaskOut = append(OpsMaskInMaskOut, op) + case OneConstImmIn: + OpsImmInMaskOut = append(OpsImmInMaskOut, op) + case OneKmaskConstImmIn: + OpsImmMaskInMaskOut = append(OpsImmMaskInMaskOut, op) + case PureKmaskIn: + return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations") + } + } + } + sortKey := func(op *Operation) string { + return *op.In[0].Go + op.Go + } + sortBySortKey := func(ops []Operation) { + sort.Slice(ops, func(i, j int) bool { + return sortKey(&ops[i]) < sortKey(&ops[j]) + }) + } + sortBySortKey(Ops) + sortBySortKey(OpsImm) + sortBySortKey(OpsMask) + sortBySortKey(OpsImmMask) + sortBySortKey(OpsMaskOut) + sortBySortKey(OpsImmInMaskOut) + sortBySortKey(OpsMaskInMaskOut) + sortBySortKey(OpsImmMaskInMaskOut) + + type templateData struct { + Ops []Operation + OpsImm []Operation + OpsMask []Operation + OpsImmMask []Operation + OpsMaskOut []Operation + OpsImmInMaskOut []Operation + OpsMaskInMaskOut []Operation + OpsImmMaskInMaskOut []Operation + } + + err = t.Execute(file, templateData{ + Ops, + OpsImm, + OpsMask, + OpsImmMask, + OpsMaskOut, + OpsImmInMaskOut, + OpsMaskInMaskOut, + OpsImmMaskInMaskOut}) + if err != nil { + return fmt.Errorf("failed to execute template: %w", err) + } + + return nil +} diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go new file mode 100644 index 00000000..10222bc7 --- /dev/null +++ b/internal/simdgen/gen_simdssa.go @@ -0,0 +1,210 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "strings" +) + +const simdssaTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. + +package amd64 + +import ( + "cmd/compile/internal/ssa" + "cmd/compile/internal/ssagen" + "cmd/internal/obj" + "cmd/internal/obj/x86" +) + +func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { + p := s.Prog(v.Op.Asm()) + // First arg + switch v.Op {{"{"}}{{if gt (len .ImmFirst) 0}} + // Imm + case {{.ImmFirst}}: + imm := v.AuxInt + if imm < 0 || imm > 255 { + v.Fatalf("Invalid source selection immediate") + } + p.From.Offset = imm + p.From.Type = obj.TYPE_CONST +{{end}}{{if gt (len .VregFirst) 0}} + // vreg + case {{.VregFirst}}: + p.From.Type = obj.TYPE_REG + p.From.Reg = simdReg(v.Args[0]) +{{end}} + default: + // At least one arg is required. + return false + } + + // Second arg + switch v.Op {{"{"}}{{if gt (len .VregSecond) 0}} + // vreg + case {{.VregSecond}}: + if p.From.Type == obj.TYPE_CONST { + p.AddRestSourceReg(simdReg(v.Args[0])) + } else { + p.AddRestSourceReg(simdReg(v.Args[1])) + }{{end}} + } + + // Third arg + switch v.Op {{"{"}}{{if gt (len .VregThird) 0}} + // vreg + case {{.VregThird}}: + if p.From.Type == obj.TYPE_CONST { + p.AddRestSourceReg(simdReg(v.Args[1])) + } else { + p.AddRestSourceReg(simdReg(v.Args[2])) + } +{{end}}{{if gt (len .MaskThird) 0}} + // k mask + case {{.MaskThird}}: + if p.From.Type == obj.TYPE_CONST { + p.AddRestSourceReg(v.Args[1].Reg()) + } else { + p.AddRestSourceReg(v.Args[2].Reg()) + }{{end}} + } + + // Fourth arg + switch v.Op {{"{"}}{{if gt (len .MaskFourth) 0}} + case {{.MaskFourth}}: + if p.From.Type == obj.TYPE_CONST { + p.AddRestSourceReg(v.Args[2].Reg()) + } else { + p.AddRestSourceReg(v.Args[3].Reg()) + }{{end}} + } + + // Output + switch v.Op {{"{"}}{{if gt (len .VregOut) 0}} + case {{.VregOut}}: + p.To.Type = obj.TYPE_REG + p.To.Reg = simdReg(v) +{{end}}{{if gt (len .MaskOut) 0}} + case {{.MaskOut}}: + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() +{{end}} + default: + // One result is required. + return false + } +{{if gt (len .ZeroingMask) 0}} + // Masked operation are always compiled with zeroing. + switch v.Op { + case {{.ZeroingMask}}: + x86.ParseSuffix(p, "Z") + } +{{end}} + return true +} +` + +// writeSIMDSSA generates the ssa to prog lowering codes and writes it to simdssa.go +// within the specified directory. +func writeSIMDSSA(directory string, ops []Operation) error { + var ImmFirst []string + var VregFirst []string + var VregSecond []string + var MaskThird []string + var VregThird []string + var MaskFourth []string + var VregOut []string + var MaskOut []string + var ZeroingMask []string + + seen := map[string]struct{}{} + for _, op := range ops { + asm := op.Asm + shapeIn, shapeOut, maskType, _, _, gOp, err := op.shape() + if err != nil { + return err + } + if maskType == 2 { + asm += "Masked" + } + asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits) + if _, ok := seen[asm]; ok { + continue + } + seen[asm] = struct{}{} + caseStr := fmt.Sprintf("ssa.OpAMD64%s", asm) + if shapeIn == PureVregIn || shapeIn == PureKmaskIn { + // Masks and vreg are handled together by simdReg() + VregFirst = append(VregFirst, caseStr) + if len(gOp.In) > 1 { + VregSecond = append(VregSecond, caseStr) + } + } else if shapeIn == OneKmaskIn { + VregFirst = append(VregFirst, caseStr) + VregSecond = append(VregSecond, caseStr) + MaskThird = append(MaskThird, caseStr) + if gOp.Zeroing == nil { + ZeroingMask = append(ZeroingMask, caseStr) + } + } else if shapeIn == OneConstImmIn { + ImmFirst = append(ImmFirst, caseStr) + VregSecond = append(VregSecond, caseStr) + VregThird = append(VregThird, caseStr) + } else { + // OneKmaskConstImmIn case + ImmFirst = append(ImmFirst, caseStr) + VregSecond = append(VregSecond, caseStr) + VregThird = append(VregThird, caseStr) + MaskFourth = append(MaskFourth, caseStr) + if gOp.Zeroing == nil { + ZeroingMask = append(ZeroingMask, caseStr) + } + } + if shapeOut == OneVregOut || gOp.Out[0].OverwriteClass != nil { + // If class overwrite is happening, that's not really a mask but a vreg. + VregOut = append(VregOut, caseStr) + } else { + // OneKmaskOut case + MaskOut = append(MaskOut, caseStr) + } + } + + data := struct { + ImmFirst string + VregFirst string + VregSecond string + MaskThird string + VregThird string + MaskFourth string + VregOut string + MaskOut string + ZeroingMask string + }{ + strings.Join(ImmFirst, ", "), + strings.Join(VregFirst, ", "), + strings.Join(VregSecond, ", "), + strings.Join(MaskThird, ", "), + strings.Join(VregThird, ", "), + strings.Join(MaskFourth, ", "), + strings.Join(VregOut, ", "), + strings.Join(MaskOut, ", "), + strings.Join(ZeroingMask, ", "), + } + + file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/amd64/simdssa.go", simdssaTmpl) + if err != nil { + return err + } + defer file.Close() + + err = t.Execute(file, data) + if err != nil { + return fmt.Errorf("failed to execute template: %w", err) + } + + return nil +} diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go new file mode 100644 index 00000000..1e822980 --- /dev/null +++ b/internal/simdgen/gen_utility.go @@ -0,0 +1,581 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "log" + "os" + "path/filepath" + "reflect" + "slices" + "sort" + "strings" + "text/template" + "unicode" +) + +func openFileAndPrepareTemplate(goroot string, file string, temp string) (*os.File, *template.Template, error) { + fp := filepath.Join(goroot, file) + dir := filepath.Dir(fp) + err := os.MkdirAll(dir, 0755) + if err != nil { + return nil, nil, fmt.Errorf("failed to create directory %s: %w", dir, err) + } + f, err := os.Create(fp) + if err != nil { + return nil, nil, fmt.Errorf("failed to create file %s: %w", fp, err) + } + t, err := template.New(fp).Parse(temp) + if err != nil { + f.Close() + return nil, nil, fmt.Errorf("failed to parse template: %w", err) + } + return f, t, nil +} + +const ( + InvalidIn int = iota + PureVregIn + OneKmaskIn + OneConstImmIn + OneKmaskConstImmIn + PureKmaskIn +) + +const ( + InvalidOut int = iota + NoOut + OneVregOut + OneKmaskOut +) + +const ( + InvalidMask int = iota + NoMask + OneMask + OneConstMask + AllMasks +) + +// opShape returns the an int denoting the shape of the operation: +// +// shapeIn: +// InvalidIn: unknown, with err set to the error message +// PureVregIn: pure vreg operation +// OneKmaskIn: operation with one k mask input (TODO: verify if it's always opmask predicate) +// OneConstImmIn: operation with one const imm input +// OneKmaskConstImmIn: operation with one k mask input and one const imm input +// PureKmaskIn: it's a K mask instruction (which can use K0) +// +// shapeOut: +// InvalidOut: unknown, with err set to the error message +// NoOut: no outputs, this is invalid now. +// OneVregOut: one vreg output +// OneKmaskOut: one mask output +// +// maskType: +// InvalidMask: unknown, with err set to the error message +// NoMask: no mask +// OneMask: with mask (K1 to K7) +// OneConstMask: with const mask K0 +// AllMasks: it's a K mask instruction +// +// opNoImm is op with its inputs excluding the const imm. +// opNoConstMask is op with its inputs excluding the const mask. +// opNoConstImmMask is op with its inputs excluding the const imm and mask. +func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Operation, opNoConstMask Operation, opNoConstImmMask Operation, err error) { + if len(op.Out) > 1 { + err = fmt.Errorf("simdgen only supports 1 output: %s", op) + return + } + if len(op.Out) == 1 { + if op.Out[0].Class == "vreg" { + shapeOut = OneVregOut + } else if op.Out[0].Class == "mask" { + shapeOut = OneKmaskOut + } else { + err = fmt.Errorf("simdgen only supports output of class vreg or mask: %s", op) + return + } + } else { + shapeOut = NoOut + // TODO: are these only Load/Stores? + // We manually supported two Load and Store, are those enough? + err = fmt.Errorf("simdgen only supports 1 output: %s", op) + return + } + hasImm := false + maskCount := 0 + iConstMask := -1 + hasVreg := false + for i, in := range op.In { + if in.Class == "immediate" { + // A manual check on XED data found that AMD64 SIMD instructions at most + // have 1 immediates. So we don't need to check this here. + if in.Const == nil { + err = fmt.Errorf("simdgen doesn't support non-const immediates: %s", op) + return + } + if *in.Bits != 8 { + err = fmt.Errorf("simdgen only supports immediates of 8 bits: %s", op) + return + } + hasImm = true + } else if in.Class == "mask" { + if in.Const != nil { + if *in.Const == "K0" { + if iConstMask != -1 { + err = fmt.Errorf("simdgen only supports one const mask in inputs: %s", op) + return + } + iConstMask = i + // Const mask should be invisible in ssa and prog, so we don't treat it as a mask. + // More specifically in prog, it's optional: when missing the assembler will default it to K0). + // TODO: verify the above assumption is safe. + } else { + err = fmt.Errorf("simdgen only supports const mask K0 in inputs: %s", op) + } + } else { + maskCount++ + } + } else { + hasVreg = true + } + } + opNoConstImm = *op + opNoConstMask = *op + opNoConstImmMask = *op + removeConstMask := func(o *Operation) { + o.In = append(o.In[:iConstMask], o.In[iConstMask+1:]...) + } + if iConstMask != -1 { + removeConstMask(&opNoConstMask) + removeConstMask(&opNoConstImmMask) + } + removeConstImm := func(o *Operation) { + o.In = o.In[1:] + } + if hasImm { + removeConstImm(&opNoConstImm) + removeConstImm(&opNoConstImmMask) + } + if maskCount == 0 { + if iConstMask == -1 { + maskType = NoMask + } else { + maskType = OneConstMask + } + } else { + maskType = OneMask + } + checkPureMask := func() bool { + if hasImm { + err = fmt.Errorf("simdgen does not support immediates in pure mask operations: %s", op) + return true + } + if iConstMask != -1 { + err = fmt.Errorf("simdgen does not support const mask in pure mask operations: %s", op) + return true + } + if hasVreg { + err = fmt.Errorf("simdgen does not support more than 1 masks in non-pure mask operations: %s", op) + return true + } + return false + } + if !hasImm && maskCount == 0 { + shapeIn = PureVregIn + } else if !hasImm && maskCount > 0 { + if maskCount == 1 { + shapeIn = OneKmaskIn + } else { + if checkPureMask() { + return + } + shapeIn = PureKmaskIn + maskType = AllMasks + } + } else if hasImm && maskCount == 0 { + shapeIn = OneConstImmIn + } else { + if maskCount == 1 { + shapeIn = OneKmaskConstImmIn + } else { + checkPureMask() + return + } + } + // Exclude some shape combination that are not yet supported in simdssa.go + if shapeIn == PureVregIn { + if len(opNoConstImmMask.In) > 2 { + err = fmt.Errorf("simdgen doesn't support more than 2 vreg args: %s", op) + return + } + } + if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn { + if len(opNoConstImmMask.In) != 3 { + err = fmt.Errorf("simdgen only supports mask operations with 2 vreg args: %s", op) + return + } + } + if shapeIn == OneConstImmIn { + if len(opNoConstImmMask.In) != 2 { + err = fmt.Errorf("simdgen only supports immediate operations with 2 vreg args: %s", op) + return + } + } + if shapeIn == PureKmaskIn { + if len(opNoConstImmMask.In) != 2 { + err = fmt.Errorf("simdgen only supports pure k mask operations with 2 vreg args: %s", op) + return + } + } + return +} + +// sortOperand sorts op.In by putting immediates first, then vreg, and mask the last. +// TODO: verify that this is a safe assumption of the prog strcture. +// from my observation looks like in asm, imms are always the first, masks are always the last, with +// vreg in betwee... +func (op *Operation) sortOperand() { + priority := map[string]int{"immediate": 2, "vreg": 1, "mask": 0} + sort.SliceStable(op.In, func(i, j int) bool { + return priority[op.In[i].Class]-priority[op.In[j].Class] > 0 + }) +} + +// genericOpsByLen returns the lists of generic ops aggregated by input length. +func genericOpsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3 []Operation, e error) { + opsLen1 = make([]Operation, 0) + opsLen2 = make([]Operation, 0) + opsLen3 = make([]Operation, 0) + for _, op := range ops { + _, shapeOut, _, _, _, gOp, err := op.shape() + if err != nil { + e = err + return + } + // Put the go ssa type in Class field, simd intrinsics need it. + if shapeOut == OneVregOut || shapeOut == OneKmaskOut { + gOp.GoArch = fmt.Sprintf("types.TypeVec%d", *gOp.Out[0].Bits) + } + if len(gOp.In) == 1 { + opsLen1 = append(opsLen1, gOp) + } else if len(gOp.In) == 2 { + opsLen2 = append(opsLen2, gOp) + } else if len(gOp.In) == 3 { + opsLen3 = append(opsLen3, gOp) + } + } + sortKey := func(op *Operation) string { + return *op.In[0].Go + op.Go + } + sortBySortKey := func(ops []Operation) { + sort.Slice(ops, func(i, j int) bool { + return sortKey(&ops[i]) < sortKey(&ops[j]) + }) + } + sortBySortKey(opsLen1) + sortBySortKey(opsLen2) + sortBySortKey(opsLen3) + return +} + +// dedup is deduping operations in the full structure level. +func dedup(ops []Operation) (deduped []Operation) { + for _, op := range ops { + seen := false + for _, dop := range deduped { + if reflect.DeepEqual(op, dop) { + seen = true + break + } + } + if !seen { + deduped = append(deduped, op) + } + } + return +} + +// splitMask splits operations with a single mask vreg input to be masked and unmasked(const: K0). +// It also remove the "Masked" keyword from the name. +func splitMask(ops []Operation) ([]Operation, error) { + splited := []Operation{} + for _, op := range ops { + splited = append(splited, op) + if op.Masked == nil || *op.Masked != "true" { + continue + } + shapeIn, _, _, _, _, _, err := op.shape() + if err != nil { + return nil, err + } + if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn { + op2 := op + op2.In = slices.Clone(op.In) + constMask := "K0" + // The ops should be sorted when calling this function, the mask is in the end. + op2.In[len(op2.In)-1].Const = &constMask + if !strings.HasPrefix(op2.Go, "Masked") { + return nil, fmt.Errorf("simdgen only recognizes masked operations with name starting with 'Masked': %s", op) + } + op2.Go = strings.ReplaceAll(op2.Go, "Masked", "") + splited = append(splited, op2) + } else { + return nil, fmt.Errorf("simdgen only recognizes masked operations with exactly one mask input: %s", op) + } + } + return splited, nil +} + +// dedupGodef is deduping operations in [Op.Go]+[*Op.In[0].Go] level. +// By deduping, it means picking the least advanced architecture that satisfy the requirement: +// AVX512 will be least preferred. +// If FlagNoDedup is set, it will report the duplicates to the console. +func dedupGodef(ops []Operation) ([]Operation, error) { + seen := map[string][]Operation{} + for _, op := range ops { + _, _, _, _, _, gOp, err := op.shape() + if err != nil { + return nil, err + } + genericNames := gOp.Go + *gOp.In[0].Go + seen[genericNames] = append(seen[genericNames], op) + } + if *FlagReportDup { + for gName, dup := range seen { + if len(dup) > 1 { + log.Printf("Duplicate for %s:\n", gName) + for _, op := range dup { + log.Printf("%s\n", op) + } + } + } + return ops, nil + } + isAVX512 := func(op Operation) bool { + return strings.Contains(op.Extension, "AVX512") + } + deduped := []Operation{} + for _, dup := range seen { + if len(dup) > 1 { + sort.Slice(dup, func(i, j int) bool { + // Put non-AVX512 candidates at the beginning + if !isAVX512(dup[i]) && isAVX512(dup[j]) { + return true + } + // TODO: make the sorting logic finer-grained. + return false + }) + } + deduped = append(deduped, dup[0]) + } + return deduped, nil +} + +// Copy op.ConstImm to op.In[0].Const +// This is a hack to reduce the size of defs we need for const imm operations. +func copyConstImm(ops []Operation) error { + for _, op := range ops { + if op.ConstImm == nil { + continue + } + shapeIn, _, _, _, _, _, err := op.shape() + if err != nil { + return err + } + if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn { + op.In[0].Const = op.ConstImm + } + // Otherwise, just not port it - e.g. {VPCMP[BWDQ] imm=0} and {VPCMPEQ[BWDQ]} are + // the same operations "Equal", [dedupgodef] should be able to distinguish them. + } + return nil +} + +// overwrite corrects some errors due to: +// - The XED data is wrong +// - Go's SIMD API requirement, for example AVX2 compares should also produce masks. +// This rewrite has strict constraints, please see the error message. +// These constraints are also explointed in [writeSIMDRules], [writeSIMDMachineOps] +// and [writeSIMDSSA], please be careful when updating these constraints. +func overwrite(ops []Operation) error { + capitalizeFirst := func(s string) string { + if s == "" { + return "" + } + // Convert the string to a slice of runes to handle multi-byte characters correctly. + r := []rune(s) + r[0] = unicode.ToUpper(r[0]) + return string(r) + } + hasClassOverwrite := false + overwrite := func(op []Operand, idx int) error { + if op[idx].OverwriteClass != nil { + if op[idx].OverwriteBase == nil { + return fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx]) + } + oBase := *op[idx].OverwriteBase + oClass := *op[idx].OverwriteClass + if oClass != "mask" { + return fmt.Errorf("simdgen: [Class] overwrite only supports overwritting to mask: %s", op[idx]) + } + if oBase != "uint" { + return fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to uint: %s", op[idx]) + } + if op[idx].Class != "vreg" { + return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx]) + } + if *op[idx].Base != "uint" && *op[idx].Base != "int" { + return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Base] from int|uint: %s", op[idx]) + } + hasClassOverwrite = true + *op[idx].Base = oBase + op[idx].Class = oClass + *op[idx].Go = fmt.Sprintf("Mask%dx%d", *op[idx].ElemBits, *op[idx].Lanes) + } else if op[idx].OverwriteBase != nil { + oBase := *op[idx].OverwriteBase + *op[idx].Go = strings.ReplaceAll(*op[idx].Go, capitalizeFirst(*op[idx].Base), capitalizeFirst(oBase)) + *op[idx].Base = oBase + } + return nil + } + for i := range ops { + hasClassOverwrite = false + for j := range ops[i].In { + if err := overwrite(ops[i].In, j); err != nil { + return err + } + if hasClassOverwrite { + return fmt.Errorf("simdgen does not support [OverwriteClass] in inputs: %s", ops[i]) + } + } + for j := range ops[i].Out { + if err := overwrite(ops[i].Out, j); err != nil { + return err + } + } + if hasClassOverwrite { + for _, in := range ops[i].In { + if in.Class == "mask" { + return fmt.Errorf("simdgen only supports [OverwriteClass] for operations without mask inputs") + } + } + } + } + return nil +} + +func (o Operation) String() string { + var sb strings.Builder + sb.WriteString("Operation {\n") + sb.WriteString(fmt.Sprintf(" Go: %s\n", o.Go)) + sb.WriteString(fmt.Sprintf(" GoArch: %s\n", o.GoArch)) + sb.WriteString(fmt.Sprintf(" Asm: %s\n", o.Asm)) + + sb.WriteString(" In: [\n") + for _, op := range o.In { + sb.WriteString(fmt.Sprintf(" %s,\n", op.String())) + } + sb.WriteString(" ]\n") + + sb.WriteString(" Out: [\n") + for _, op := range o.Out { + sb.WriteString(fmt.Sprintf(" %s,\n", op.String())) + } + sb.WriteString(" ]\n") + + sb.WriteString(fmt.Sprintf(" Commutative: %s\n", o.Commutative)) + sb.WriteString(fmt.Sprintf(" Extension: %s\n", o.Extension)) + + if o.Zeroing != nil { + sb.WriteString(fmt.Sprintf(" Zeroing: %s\n", *o.Zeroing)) + } else { + sb.WriteString(" Zeroing: \n") + } + + if o.Documentation != nil { + sb.WriteString(fmt.Sprintf(" Documentation: %s\n", *o.Documentation)) + } else { + sb.WriteString(" Documentation: \n") + } + + if o.ConstImm != nil { + sb.WriteString(fmt.Sprintf(" ConstImm: %s\n", *o.ConstImm)) + } else { + sb.WriteString(" ConstImm: \n") + } + + if o.Masked != nil { + sb.WriteString(fmt.Sprintf(" Masked: %s\n", *o.Masked)) + } else { + sb.WriteString(" Masked: \n") + } + + sb.WriteString("}\n") + return sb.String() +} + +// String returns a string representation of the Operand. +func (op Operand) String() string { + var sb strings.Builder + sb.WriteString("Operand {\n") + sb.WriteString(fmt.Sprintf(" Class: %s\n", op.Class)) + + if op.Go != nil { + sb.WriteString(fmt.Sprintf(" Go: %s\n", *op.Go)) + } else { + sb.WriteString(" Go: \n") + } + + sb.WriteString(fmt.Sprintf(" AsmPos: %d\n", op.AsmPos)) + + if op.Base != nil { + sb.WriteString(fmt.Sprintf(" Base: %s\n", *op.Base)) + } else { + sb.WriteString(" Base: \n") + } + + if op.ElemBits != nil { + sb.WriteString(fmt.Sprintf(" ElemBits: %d\n", *op.ElemBits)) + } else { + sb.WriteString(" ElemBits: \n") + } + + if op.Bits != nil { + sb.WriteString(fmt.Sprintf(" Bits: %d\n", *op.Bits)) + } else { + sb.WriteString(" Bits: \n") + } + + if op.Const != nil { + sb.WriteString(fmt.Sprintf(" Const: %s\n", *op.Const)) + } else { + sb.WriteString(" Const: \n") + } + + if op.Lanes != nil { + sb.WriteString(fmt.Sprintf(" Lanes: %d\n", *op.Lanes)) + } else { + sb.WriteString(" Lanes: \n") + } + + if op.OverwriteClass != nil { + sb.WriteString(fmt.Sprintf(" OverwriteClass: %s\n", *op.OverwriteClass)) + } else { + sb.WriteString(" OverwriteClass: \n") + } + + if op.OverwriteBase != nil { + sb.WriteString(fmt.Sprintf(" OverwriteBase: %s\n", *op.OverwriteBase)) + } else { + sb.WriteString(" OverwriteBase: \n") + } + + sb.WriteString(" }\n") + return sb.String() +} diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 6a6ca1eb..0ec1ee0d 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -1,223 +1,437 @@ !sum -# For binary operations, we constrain their two inputs and one output to the -# same Go type using a variable. +# Add - go: Add - asm: "V?PADD[BWDQ]|V?ADDP[SD]" + asm: "VPADD[BWDQ]|VADDP[SD]" + in: + - &any + go: $t + - *any + out: + - *any +- go: MaskedAdd + asm: "VPADD[BWDQ]|VADDP[SD]" + in: + - class: mask + - *any + - *any + out: + - *any +# Saturated Add +- go: SaturatedAdd + asm: "VPADDS[BWDQ]" + in: + - &int + go: $t + base: int + - *int + out: + - *int +- go: SaturatedAdd + asm: "VPADDS[BWDQ]" + in: + - &uint + go: $t + base: uint + - *uint + out: + - *uint +- go: MaskedSaturatedAdd + asm: "VPADDS[BWDQ]" + in: + - class: mask + - *int + - *int + out: + - *int +- go: MaskedSaturatedAdd + asm: "VPADDS[BWDQ]" in: - - go: $t - - go: $t + - class: mask + - *uint + - *uint out: - - go: $t + - *uint +# Sub - go: Sub - goarch: amd64 - asm: "V?PSUB[BWDQ]|V?SUBP[SD]" + asm: "VPSUB[BWDQ]|VADDP[SD]" + in: + - *any + - *any + out: + - *any +- go: MaskedSub + asm: "VPSUB[BWDQ]|VADDP[SD]" + in: + - class: mask + - *any + - *any + out: + - *any +# Saturated Sub +- go: SaturatedSub + asm: "VPSUBS[BWDQ]" + in: + - *int + - *int + out: + - *int +- go: SaturatedSub + asm: "VPSUBS[BWDQ]" in: - - go: $t - - go: $t + - *uint + - *uint out: - - go: $t + - *uint +- go: MaskedSaturatedSub + asm: "VPSUBS[BWDQ]" + in: + - class: mask + - *int + - *int + out: + - *int +- go: MaskedSaturatedSub + asm: "VPSUBS[BWDQ]" + in: + - class: mask + - *uint + - *uint + out: + - *uint +# In the XED data, *all* floating point bitwise logic operation has their +# operand type marked as uint. We are not trying to understand why Intel +# decided that they want FP bit-wise logic operations, but this irregularity +# has to be dealed with in separate rules with some overwrites. -# -# AVX-512 Comparisons -# +# Int/Uint operations. +# Non-masked for 128/256-bit vectors +# For binary operations, we constrain their two inputs and one output to the +# same Go type using a variable. This will map to instructions before AVX512. +- go: And + asm: "VPAND" + in: + - &any + go: $t + - *any + out: + - *any +# Masked +# Looks like VPAND$xi works only for 2 shapes for integer: +# Dword and Qword. +# TODO: should we wildcard other smaller elemBits to VPANDQ or +# VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations. +- go: MaskedAnd + asm: "VPAND[DQ]" + in: + - class: mask + - *any + - *any + out: + - *any -# TODO(austin): I'm not happy with how much copy-pasting this requires. We could -# do a functional abstraction, but that feels bolted on. Unification is *almost* -# capable of collapsing all of this. -# -# One thing that might work is having a !let node that lets you extend the -# nonDetEnv with explicit values: -# -# !let -# - {$go: Equal, $imm: 0, $mask: K0} -# - {$go: EqualMasked, $imm: 0, $mask: _} -# - {$go: Less, $imm: 1, $mask: K0} -# - {$go: LessMasked, $imm: 1, $mask: _} -# - !let -# - {$asm: "VPCMP[BWDQ]", $base: int} -# - {$asm: "VPCMPU[BWDQ]", $base: uint} -# - go: $go -# asm: $asm -# in: -# - const: $mask -# - base: $base -# go: $t -# - base: $base -# go: $t -# - class: immediate -# const: $imm -# out: -# - class: mask -# -# That's not bad, but it's very hierachical. CUE has a "mixin" approach to this. -# -# - !unify -# # All AVX-512 comparisons have the same basic operand shape -# - {in: [_, {go: $t}, {go: $t}, _], out: [{class: mask}]} -# # There are signed and unsigned variants -# - !sum -# - {asm: "VPCMP[BWDQ]", in: [_, {base: int}, {base: int}, _]} -# - {asm: "VPCMPU[BWDQ]", in: [_, {base: uint}, {base: uint}, _]} -# # Finally, list out the operations. -# - !let -# - $equal: {in: [_, _, _, {class: immedate, const: 0}]} -# $less: {in: [_, _, _, {class: immedate, const: 1}]} -# $masked: _ -# $unmasked: {in: [const: K0, _, _, _]} -# - !sum -# - !unify [go: Equal, $equal, $unmasked] -# - !unify [go: EqualMasked, $equal, $masked] -# - !unify [go: Less, $less, $unmasked] -# - !unify [go: LessMasked, $less, $masked] -# -# Maybe !let is just a feature of !sum that introduces an environment factor for -# all following branches? That would let me do the above in-line with the big -# top-level !sum: -# -# - !sum -# ... -# - !let # Adds a factor that is the sum of the following terms: -# - {$go: Equal, $imm: 0, $mask: K0} -# - {$go: EqualMasked, $imm: 0, $mask: _} -# - {$go: Less, $imm: 1, $mask: K0} -# - {$go: LessMasked, $imm: 1, $mask: _} -# - !let # Adds another factor: -# - {$asm: "VPCMP[BWDQ]", $base: int} -# - {$asm: "VPCMPU[BWDQ]", $base: uint} -# - go: $go -# asm: $asm -# in: -# - const: $mask -# - base: $base -# go: $t -# - base: $base -# go: $t -# - class: immediate -# const: $imm -# out: -# - class: mask -# -# I may need to choose names more carefully in that case. This is a general -# problem with names being file-global. (This is less of a problem with the -# mixin style because those names tend to be more specific anyway.) Or maybe it -# makes sense for each !let to introduce fresh idents, even if the string names -# are the same? +- go: AndNot + asm: "VPANDN" + in: + - *any + - *any + out: + - *any +- go: MaskedAndNot + asm: "VPANDN[DQ]" + in: + - class: mask + - *any + - *any + out: + - *any -- go: Equal - goarch: amd64 - asm: "VPCMP[BWDQ]" # Signed comparison +- go: Or + asm: "VPOR" in: - - const: K0 - - base: int - go: $t - - base: int - go: $t - - class: immediate - const: 0 + - *any + - *any out: + - *any +- go: MaskedOr + asm: "VPOR[DQ]" + in: - class: mask + - *any + - *any + out: + - *any -- go: EqualMasked - goarch: amd64 - asm: "VPCMP[BWDQ]" # Signed comparison +- go: Xor + asm: "VPXOR" in: - - _ - - base: int - go: $t - - base: int - go: $t - - class: immediate - const: 0 + - *any + - *any out: + - *any +- go: MaskedXor + asm: "VPXOR[DQ]" + in: - class: mask + - *any + - *any + out: + - *any -- go: Equal - goarch: amd64 - asm: "VPCMPU[BWDQ]" # Unsigned comparison +# FP operations. +# Set the [base] to be "int" to not include duplicates(excluding "uint"). +# [base] is not used when [overwriteBase] is present. +- go: And + asm: "VANDP[SD]" in: - - const: K0 - - base: uint + - &intToFloat go: $t - - base: uint - go: $t - - class: immediate - const: 0 + base: int + overwriteBase: float + - *intToFloat out: + - *intToFloat +- go: MaskedAnd + asm: "VANDP[SD]" + in: - class: mask + - *intToFloat + - *intToFloat + out: + - *intToFloat -- go: EqualMasked - goarch: amd64 - asm: "VPCMPU[BWDQ]" # Unsigned comparison +- go: AndNot + asm: "VANDNP[SD]" in: - - _ - - base: uint - go: $t - - base: uint - go: $t - - class: immediate - const: 0 + - *intToFloat + - *intToFloat + out: + - *intToFloat +- go: MaskedAndNot + asm: "VANDNP[SD]" + in: + - class: mask + - *intToFloat + - *intToFloat + out: + - *intToFloat + +- go: Or + asm: "VORP[SD]" + in: + - *intToFloat + - *intToFloat out: + - *intToFloat +- go: MaskedOr + asm: "VORP[SD]" + in: - class: mask + - *intToFloat + - *intToFloat + out: + - *intToFloat -- go: Less - goarch: amd64 - asm: "VPCMP[BWDQ]" # Signed comparison +- go: Xor + asm: "VXORP[SD]" + in: + - *intToFloat + - *intToFloat + out: + - *intToFloat +- go: MaskedXor + asm: "VXORP[SD]" in: - - const: K0 - - base: int + - class: mask + - *intToFloat + - *intToFloat + out: + - *intToFloat +- go: Equal + asm: "V?PCMPEQ[BWDQ]" + in: &int2 + - &int go: $t - - base: int + base: int # Looks like PCMP is on signed integers - but for equals does it really matters? + - *int + out: + - &anyvregToMask + go: $t # We still need the output to be the same shape as inputs. + overwriteBase: uint + overwriteClass: mask +- go: Greater + asm: "V?PCMPGT[BWDQ]" + in: *int2 + out: + - *anyvregToMask +- go: MaskedEqual + asm: "V?PCMPEQ[BWDQ]" + in: &maskint2 + - class: mask + - *int + - *int + out: + - class: mask +- go: MaskedGreater + asm: "V?PCMPGT[BWDQ]" + in: *maskint2 + out: + - class: mask +# The const imm predicated compares after AVX512, please see categories.yaml +# for const imm specification. +- go: Masked(Equal|Greater) + asm: "VPCMP[BWDQ]" + in: + - class: mask + - &int go: $t + base: int + - *int - class: immediate - const: 1 + const: 0 # Just a placeholder, will be overwritten by const imm porting. out: - class: mask - -- go: LessMasked - goarch: amd64 - asm: "VPCMP[BWDQ]" # Signed comparison +- go: Masked(Equal|Greater) + asm: "VPCMPU[BWDQ]" in: - - _ - - base: int - go: $t - - base: int + - class: mask + - &uint go: $t + base: uint + - *uint - class: immediate - const: 1 + const: 0 out: - class: mask - -- go: Less - goarch: amd64 - asm: "VPCMPU[BWDQ]" # Unsigned comparison +- go: Div + asm: "V?DIVP[SD]" in: - - const: K0 - - base: uint + - &fp go: $t - - base: uint + base: float + - *fp + out: + - *fp +- go: MaskedDiv + asm: "V?DIVP[SD]" + in: + - class: mask + - *fp + - *fp + out: + - *fp +# "Normal" multiplication is only available for floats. +# This only covers the single and double precision. +- go: Mul + asm: "VMULP[SD]" + in: + - &fp go: $t - - class: immediate - const: 1 + base: float + - *fp out: + - *fp +- go: MaskedMul + asm: "VMULP[SD]" + in: - class: mask + - *fp + - *fp + out: + - *fp -- go: LessMasked - goarch: amd64 - asm: "VPCMPU[BWDQ]" # Unsigned comparison +# Integer multiplications. + +# MulEvenWiden +# Dword only. +- go: MulEvenWiden + asm: "VPMULDQ" in: - - _ - - base: uint + - &int go: $t - - base: uint + base: int + - *int + out: + - &int2 + go: $t2 + base: int +- go: MulEvenWiden + asm: "VPMULUDQ" + in: + - &uint go: $t - - class: immediate - const: 1 + base: uint + - *uint + out: + - &uint2 + go: $t2 + base: uint +- go: MaskedMulEvenWiden + asm: "VPMULDQ" + in: + - class: mask + - *int + - *int out: + - *int2 +- go: MaskedMulEvenWiden + asm: "VPMULUDQ" + in: - class: mask + - *uint + - *uint + out: + - *uint2 -# TODO: -# 2: OP := LE; -# 4: OP := NEQ; -# 5: OP := NLT; -# 6: OP := NLE; +# MulHigh +# Word only. +# Non-masked +- go: MulHigh + asm: "VPMULHW" + in: + - *int + - *int + out: + - *int2 +- go: MulHigh + asm: "VPMULHUW" + in: + - *uint + - *uint + out: + - *uint2 +- go: MaskedMulHigh + asm: "VPMULHW" + in: + - class: mask + - *int + - *int + out: + - *int2 +- go: MaskedMulHigh + asm: "VPMULHUW" + in: + - class: mask + - *uint + - *uint + out: + - *uint2 + +# MulLow +# Signed int only. +# Non-masked +- go: MulLow + asm: "VPMULL[WDQ]" + in: + - *int + - *int + out: + - *int2 +- go: MaskedMulLow + asm: "VPMULL[WDQ]" + in: + - class: mask + - *int + - *int + out: + - *int2 diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 037c11fa..2a611c9e 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -5,59 +5,57 @@ package main import ( - "fmt" - "io" "log" - "slices" "golang.org/x/arch/internal/unify" ) type Operation struct { - Go string // Go method name - Category *string // General operation category (optional) + Go string // Go method name GoArch string // GOARCH for this definition Asm string // Assembly mnemonic - In []Operand // Arguments - Out []Operand // Results + In []Operand // Arguments + Out []Operand // Results + Commutative string // Commutativity + Extension string // Extension + Zeroing *string // Zeroing is a flag for asm prefix "Z", if non-nil it will always be "false" + Documentation *string // Documentation will be appended to the stubs comments. + // ConstMask is a hack to reduce the size of defs the user writes for const-immediate + // If present, it will be copied to [In[0].Const]. + ConstImm *string + // Masked indicates that this is a masked operation, this field has to be set for masked operations + // otherwise simdgen won't recognize it in [splitMask]. + Masked *string } type Operand struct { - Class string + Class string // One of "mask", "immediate", "vreg" and "mem" Go *string // Go type of this operand AsmPos int // Position of this operand in the assembly instruction Base *string // Base Go type ("int", "uint", "float") ElemBits *int // Element bit width - Bits int // Total vector bit width + Bits *int // Total vector bit width Const *string // Optional constant value + Lanes *int // Lanes should equal Bits/ElemBits + // If non-nil, it means the [Class] field is overwritten here, right now this is used to + // overwrite the results of AVX2 compares to masks. + OverwriteClass *string + // If non-nil, it means the [Base] field is overwritten here. This field exist solely + // because Intel's XED data is inconsistent. e.g. VANDNP[SD] marks its operand int. + OverwriteBase *string } -func (o Operand) Compare(p Operand) int { - // Put mask operands after others - if o.Class != "mask" && p.Class == "mask" { - return -1 - } - if o.Class == "mask" && p.Class != "mask" { - return 1 - } - return 0 -} - -var argNames = []string{"x", "y", "z", "w"} - -func writeGoDefs(w io.Writer, cl unify.Closure) { +func writeGoDefs(path string, cl unify.Closure) error { // TODO: Merge operations with the same signature but multiple // implementations (e.g., SSE vs AVX) - - // TODO: This code is embarrassing, but I'm very tired. - - var op Operation + var ops []Operation for def := range cl.All() { + var op Operation if !def.Exact() { continue } @@ -66,76 +64,58 @@ func writeGoDefs(w io.Writer, cl unify.Closure) { log.Println(def) continue } - - in := slices.Clone(op.In) - slices.SortStableFunc(in, Operand.Compare) - out := slices.Clone(op.Out) - slices.SortStableFunc(out, Operand.Compare) - - type argExtra struct { - *Operand - varName string - } - asmPosToArg := make(map[int]argExtra) - asmPosToRes := make(map[int]argExtra) - argNames := argNames - - fmt.Fprintf(w, "func (%s %s) %s(", argNames[0], *in[0].Go, op.Go) - asmPosToArg[in[0].AsmPos] = argExtra{&in[0], argNames[0]} - argNames = argNames[1:] - i := 0 - for _, arg := range in[1:] { - varName := "" - - // Drop operands with constant values - if arg.Const == nil { - if i > 0 { - fmt.Fprint(w, ", ") - } - i++ - varName = argNames[0] - fmt.Fprintf(w, "%s %s", varName, *arg.Go) - argNames = argNames[1:] - } - asmPosToArg[arg.AsmPos] = argExtra{&arg, varName} + // TODO: verify that this is safe. + op.sortOperand() + ops = append(ops, op) + } + // The parsed XED data might contain duplicates, like + // 512 bits VPADDP. + deduped := dedup(ops) + log.Printf("dedup len: %d\n", len(ops)) + var err error + if err = overwrite(deduped); err != nil { + return err + } + log.Printf("dedup len: %d\n", len(deduped)) + if !*FlagNoSplitMask { + if deduped, err = splitMask(deduped); err != nil { + return err } - fmt.Fprintf(w, ") (") - for i, res := range out { - if i > 0 { - fmt.Fprint(w, ", ") - } - varName := string('o' + byte(i)) - fmt.Fprintf(w, "%s %s", varName, *res.Go) - asmPosToRes[res.AsmPos] = argExtra{&res, varName} + } + log.Printf("dedup len: %d\n", len(deduped)) + if !*FlagNoDedup { + if deduped, err = dedupGodef(deduped); err != nil { + return err } - fmt.Fprintf(w, ") {\n") - - fmt.Fprintf(w, "\t// %s", op.Asm) - for i := 0; ; i++ { - arg, okArg := asmPosToArg[i] - if okArg { - if arg.Const != nil { - fmt.Fprintf(w, " %s", *arg.Const) - } else { - fmt.Fprintf(w, " %s", arg.varName) - } - } - - res, okRes := asmPosToRes[i] - if okRes { - if okArg { - fmt.Fprintf(w, "/") - } else { - fmt.Fprintf(w, " ") - } - fmt.Fprintf(w, "%s", res.varName) - } - if !okArg && !okRes { - break - } + } + log.Printf("dedup len: %d\n", len(deduped)) + if !*FlagNoConstImmPorting { + if err = copyConstImm(deduped); err != nil { + return err } - fmt.Fprintf(w, "\n") - - fmt.Fprintf(w, "}\n") } + log.Printf("dedup len: %d\n", len(deduped)) + typeMap := parseSIMDTypes(deduped) + if err = writeSIMDTypes(path, typeMap); err != nil { + return err + } + if err = writeSIMDStubs(path, deduped, typeMap); err != nil { + return err + } + if err = writeSIMDIntrinsics(path, deduped, typeMap); err != nil { + return err + } + if err = writeSIMDGenericOps(path, deduped); err != nil { + return err + } + if err = writeSIMDMachineOps(path, deduped); err != nil { + return err + } + if err = writeSIMDRules(path, deduped); err != nil { + return err + } + if err = writeSIMDSSA(path, deduped); err != nil { + return err + } + return nil } diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index 82c31c06..84d8a92f 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -36,6 +36,42 @@ // To see just the definitions for VPADDQ: // // go run . -xedPath $XEDPATH -q '{asm: VPADDQ}' +// +// simdgen can also generate Go definitions of SIMD mappings: +// To generate go files to the go root, run: +// +// go run . -xedPath $XEDPATH -godefroot $/PATH/TO/go go.yaml categories.yaml types.yaml +// +// types.yaml is already written, it specifies the shapes of vectors. +// categories.yaml and go.yaml contains definitions that unifies with types.yaml and XED +// data, you can find an example in ops/AddSub/. +// +// To produce an aggregation of go.yaml and categoris.yaml from ./ops/ to ./, run: +// +// go generate +// +// When generating Go definitions, simdgen do 3 "magic"s: +// - It splits masked operations(with op's [Masked] field set) to const and non const: +// - One is a normal masked operation, the original +// - The other has its mask operand's [Const] fields set to "K0". +// - This way the user does not need to provide a separate "K0"-masked operation def. +// +// - It deduplicates intrinsic names that have duplicates: +// - If there are two operations that shares the same signature, one is AVX512 the other +// is before AVX512, the other will be selected. +// - This happens often when some operations are defined both before AVX512 and after. +// This way the user does not need to provide a separate "K0" operation for the +// AVX512 counterpart. +// +// - It copies the op's [ConstImm] field to its immediate operand's [Const] field. +// - This way the user does not need to provide verbose op definition while only +// the const immediate field is different. This is useful to reduce verbosity of +// compares with imm control predicates. +// +// These 3 magics could be disabled by enabling -nosplitmask, -nodedup or +// -noconstimmporting flags. +// +// simdgen right now only supports amd64, -arch=$OTHERARCH will trigger a fatal error. package main // Big TODOs: @@ -48,6 +84,8 @@ package main // - Do I need Closure, Value, and Domain? It feels like I should only need two // types. +//go:generate go run ./ops/. + import ( "cmp" "flag" @@ -55,6 +93,7 @@ import ( "log" "maps" "os" + "path/filepath" "slices" "strings" @@ -63,13 +102,19 @@ import ( ) var ( - xedPath = flag.String("xedPath", "", "load XED datafiles from `path`") - flagQ = flag.String("q", "", "query: read `def` as another input (skips final validation)") - flagO = flag.String("o", "yaml", "output type: yaml, godefs") + xedPath = flag.String("xedPath", "", "load XED datafiles from `path`") + flagQ = flag.String("q", "", "query: read `def` as another input (skips final validation)") + flagO = flag.String("o", "yaml", "output type: yaml, godefs") + flagGoDefRoot = flag.String("godefroot", ".", "the path to the directory containing the generated godefs") + FlagNoDedup = flag.Bool("nodedup", false, "disable deduplicating godefs of 2 qualifying operations from different extensions") + FlagNoSplitMask = flag.Bool("nosplitmask", false, "disable splitting the masks to const and non const") + FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand") + FlagArch = flag.String("arch", "amd64", "the target architecture") flagDebugXED = flag.Bool("debug-xed", false, "show XED instructions") flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace") flagDebugHTML = flag.String("debug-html", "", "write unification trace to `file.html`") + FlagReportDup = flag.Bool("reportdup", false, "report the duplicate godefs") ) func main() { @@ -77,6 +122,10 @@ func main() { var inputs []unify.Closure + if *FlagArch != "amd64" { + log.Fatalf("simdgen only supports amd64") + } + // Load XED into a defs set. if *xedPath != "" { xedDefs := loadXED(*xedPath) @@ -102,7 +151,7 @@ func main() { } inputs = append(inputs, defs) - if path == "go.yaml" { + if filepath.Base(path) == "go.yaml" { // These must all be used in the final result for def := range defs.Summands() { must[def] = struct{}{} @@ -145,7 +194,9 @@ func main() { enc.Close() } case "godefs": - writeGoDefs(os.Stdout, unified) + if err := writeGoDefs(*flagGoDefRoot, unified); err != nil { + log.Fatalf("Failed writing godefs: %+v", err) + } } // Validate results. diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml new file mode 100644 index 00000000..8da031f7 --- /dev/null +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -0,0 +1,29 @@ +!sum +- go: Add + commutative: "true" + extension: "AVX.*" +- go: SaturatedAdd + commutative: "true" + extension: "AVX.*" +- go: MaskedAdd + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: MaskedSaturatedAdd + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: Sub + commutative: "true" + extension: "AVX.*" +- go: SaturatedSub + commutative: "true" + extension: "AVX.*" +- go: MaskedSub + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: MaskedSaturatedSub + masked: "true" + commutative: "true" + extension: "AVX.*" \ No newline at end of file diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml new file mode 100644 index 00000000..9e8dc57d --- /dev/null +++ b/internal/simdgen/ops/AddSub/go.yaml @@ -0,0 +1,101 @@ +!sum +# Add +- go: Add + asm: "VPADD[BWDQ]|VADDP[SD]" + in: + - &any + go: $t + - *any + out: + - *any +- go: MaskedAdd + asm: "VPADD[BWDQ]|VADDP[SD]" + in: + - class: mask + - *any + - *any + out: + - *any +# Saturated Add +- go: SaturatedAdd + asm: "VPADDS[BWDQ]" + in: + - &int + go: $t + base: int + - *int + out: + - *int +- go: SaturatedAdd + asm: "VPADDS[BWDQ]" + in: + - &uint + go: $t + base: uint + - *uint + out: + - *uint +- go: MaskedSaturatedAdd + asm: "VPADDS[BWDQ]" + in: + - class: mask + - *int + - *int + out: + - *int +- go: MaskedSaturatedAdd + asm: "VPADDS[BWDQ]" + in: + - class: mask + - *uint + - *uint + out: + - *uint + +# Sub +- go: Sub + asm: "VPSUB[BWDQ]|VADDP[SD]" + in: + - *any + - *any + out: + - *any +- go: MaskedSub + asm: "VPSUB[BWDQ]|VADDP[SD]" + in: + - class: mask + - *any + - *any + out: + - *any +# Saturated Sub +- go: SaturatedSub + asm: "VPSUBS[BWDQ]" + in: + - *int + - *int + out: + - *int +- go: SaturatedSub + asm: "VPSUBS[BWDQ]" + in: + - *uint + - *uint + out: + - *uint +- go: MaskedSaturatedSub + asm: "VPSUBS[BWDQ]" + in: + - class: mask + - *int + - *int + out: + - *int +- go: MaskedSaturatedSub + asm: "VPSUBS[BWDQ]" + in: + - class: mask + - *uint + - *uint + out: + - *uint diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml new file mode 100644 index 00000000..bc4eda74 --- /dev/null +++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml @@ -0,0 +1,31 @@ +!sum +- go: And + commutative: "true" + extension: "AVX.*" +- go: MaskedAnd + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: Or + commutative: "true" + extension: "AVX.*" +- go: MaskedOr + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: AndNot + commutative: "true" + extension: "AVX.*" +- go: MaskedAndNot + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: Xor + commutative: "true" + extension: "AVX.*" +- go: MaskedXor + masked: "true" + commutative: "true" + extension: "AVX.*" +# We also have PTEST and VPTERNLOG, those should be hidden from the users +# and only appear in rewrite rules. \ No newline at end of file diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml new file mode 100644 index 00000000..7e7adf7a --- /dev/null +++ b/internal/simdgen/ops/BitwiseLogic/go.yaml @@ -0,0 +1,149 @@ +!sum +# In the XED data, *all* floating point bitwise logic operation has their +# operand type marked as uint. We are not trying to understand why Intel +# decided that they want FP bit-wise logic operations, but this irregularity +# has to be dealed with in separate rules with some overwrites. + +# Int/Uint operations. +# Non-masked for 128/256-bit vectors +# For binary operations, we constrain their two inputs and one output to the +# same Go type using a variable. This will map to instructions before AVX512. +- go: And + asm: "VPAND" + in: + - &any + go: $t + - *any + out: + - *any +# Masked +# Looks like VPAND$xi works only for 2 shapes for integer: +# Dword and Qword. +# TODO: should we wildcard other smaller elemBits to VPANDQ or +# VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations. +- go: MaskedAnd + asm: "VPAND[DQ]" + in: + - class: mask + - *any + - *any + out: + - *any + +- go: AndNot + asm: "VPANDN" + in: + - *any + - *any + out: + - *any +- go: MaskedAndNot + asm: "VPANDN[DQ]" + in: + - class: mask + - *any + - *any + out: + - *any + +- go: Or + asm: "VPOR" + in: + - *any + - *any + out: + - *any +- go: MaskedOr + asm: "VPOR[DQ]" + in: + - class: mask + - *any + - *any + out: + - *any + +- go: Xor + asm: "VPXOR" + in: + - *any + - *any + out: + - *any +- go: MaskedXor + asm: "VPXOR[DQ]" + in: + - class: mask + - *any + - *any + out: + - *any + +# FP operations. +# Set the [base] to be "int" to not include duplicates(excluding "uint"). +# [base] is not used when [overwriteBase] is present. +- go: And + asm: "VANDP[SD]" + in: + - &intToFloat + go: $t + base: int + overwriteBase: float + - *intToFloat + out: + - *intToFloat +- go: MaskedAnd + asm: "VANDP[SD]" + in: + - class: mask + - *intToFloat + - *intToFloat + out: + - *intToFloat + +- go: AndNot + asm: "VANDNP[SD]" + in: + - *intToFloat + - *intToFloat + out: + - *intToFloat +- go: MaskedAndNot + asm: "VANDNP[SD]" + in: + - class: mask + - *intToFloat + - *intToFloat + out: + - *intToFloat + +- go: Or + asm: "VORP[SD]" + in: + - *intToFloat + - *intToFloat + out: + - *intToFloat +- go: MaskedOr + asm: "VORP[SD]" + in: + - class: mask + - *intToFloat + - *intToFloat + out: + - *intToFloat + +- go: Xor + asm: "VXORP[SD]" + in: + - *intToFloat + - *intToFloat + out: + - *intToFloat +- go: MaskedXor + asm: "VXORP[SD]" + in: + - class: mask + - *intToFloat + - *intToFloat + out: + - *intToFloat \ No newline at end of file diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml new file mode 100644 index 00000000..cac97d4e --- /dev/null +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -0,0 +1,19 @@ +!sum +- go: Equal + constImm: 0 + commutative: "true" + extension: "AVX.*" +- go: Greater + constImm: 6 + commutative: "false" + extension: "AVX.*" +- go: MaskedEqual + constImm: 0 + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: MaskedGreater + constImm: 6 + masked: "true" + commutative: "false" + extension: "AVX.*" \ No newline at end of file diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml new file mode 100644 index 00000000..f0a8cab1 --- /dev/null +++ b/internal/simdgen/ops/Compares/go.yaml @@ -0,0 +1,57 @@ +!sum +- go: Equal + asm: "V?PCMPEQ[BWDQ]" + in: &int2 + - &int + go: $t + base: int # Looks like PCMP is on signed integers - but for equals does it really matters? + - *int + out: + - &anyvregToMask + go: $t # We still need the output to be the same shape as inputs. + overwriteBase: uint + overwriteClass: mask +- go: Greater + asm: "V?PCMPGT[BWDQ]" + in: *int2 + out: + - *anyvregToMask +- go: MaskedEqual + asm: "V?PCMPEQ[BWDQ]" + in: &maskint2 + - class: mask + - *int + - *int + out: + - class: mask +- go: MaskedGreater + asm: "V?PCMPGT[BWDQ]" + in: *maskint2 + out: + - class: mask +# The const imm predicated compares after AVX512, please see categories.yaml +# for const imm specification. +- go: Masked(Equal|Greater) + asm: "VPCMP[BWDQ]" + in: + - class: mask + - &int + go: $t + base: int + - *int + - class: immediate + const: 0 # Just a placeholder, will be overwritten by const imm porting. + out: + - class: mask +- go: Masked(Equal|Greater) + asm: "VPCMPU[BWDQ]" + in: + - class: mask + - &uint + go: $t + base: uint + - *uint + - class: immediate + const: 0 + out: + - class: mask diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml new file mode 100644 index 00000000..9166f1fa --- /dev/null +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -0,0 +1,8 @@ +!sum +- go: Div + commutative: "true" + extension: "AVX.*" +- go: MaskedDiv + commutative: "true" + masked: "true" + extension: "AVX.*" \ No newline at end of file diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml new file mode 100644 index 00000000..4c74d253 --- /dev/null +++ b/internal/simdgen/ops/FPonlyArith/go.yaml @@ -0,0 +1,18 @@ +!sum +- go: Div + asm: "V?DIVP[SD]" + in: + - &fp + go: $t + base: float + - *fp + out: + - *fp +- go: MaskedDiv + asm: "V?DIVP[SD]" + in: + - class: mask + - *fp + - *fp + out: + - *fp \ No newline at end of file diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml new file mode 100644 index 00000000..0ef6cf57 --- /dev/null +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -0,0 +1,35 @@ +!sum +- go: Mul + commutative: "true" + extension: "AVX.*" +- go: MulEvenWiden + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" +- go: MulHigh + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" +- go: MulLow + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" +- go: MaskedMul + masked: "true" + commutative: "true" + extension: "AVX.*" +- go: MaskedMulEvenWiden + masked: "true" + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" +- go: MaskedMulHigh + masked: "true" + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" +- go: MaskedMulLow + masked: "true" + commutative: "true" + extension: "AVX.*" + documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" \ No newline at end of file diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml new file mode 100644 index 00000000..a75f4188 --- /dev/null +++ b/internal/simdgen/ops/Mul/go.yaml @@ -0,0 +1,116 @@ +!sum +# "Normal" multiplication is only available for floats. +# This only covers the single and double precision. +- go: Mul + asm: "VMULP[SD]" + in: + - &fp + go: $t + base: float + - *fp + out: + - *fp +- go: MaskedMul + asm: "VMULP[SD]" + in: + - class: mask + - *fp + - *fp + out: + - *fp + +# Integer multiplications. + +# MulEvenWiden +# Dword only. +- go: MulEvenWiden + asm: "VPMULDQ" + in: + - &int + go: $t + base: int + - *int + out: + - &int2 + go: $t2 + base: int +- go: MulEvenWiden + asm: "VPMULUDQ" + in: + - &uint + go: $t + base: uint + - *uint + out: + - &uint2 + go: $t2 + base: uint +- go: MaskedMulEvenWiden + asm: "VPMULDQ" + in: + - class: mask + - *int + - *int + out: + - *int2 +- go: MaskedMulEvenWiden + asm: "VPMULUDQ" + in: + - class: mask + - *uint + - *uint + out: + - *uint2 + +# MulHigh +# Word only. +# Non-masked +- go: MulHigh + asm: "VPMULHW" + in: + - *int + - *int + out: + - *int2 +- go: MulHigh + asm: "VPMULHUW" + in: + - *uint + - *uint + out: + - *uint2 +- go: MaskedMulHigh + asm: "VPMULHW" + in: + - class: mask + - *int + - *int + out: + - *int2 +- go: MaskedMulHigh + asm: "VPMULHUW" + in: + - class: mask + - *uint + - *uint + out: + - *uint2 + +# MulLow +# Signed int only. +# Non-masked +- go: MulLow + asm: "VPMULL[WDQ]" + in: + - *int + - *int + out: + - *int2 +- go: MaskedMulLow + asm: "VPMULL[WDQ]" + in: + - class: mask + - *int + - *int + out: + - *int2 \ No newline at end of file diff --git a/internal/simdgen/ops/main.go b/internal/simdgen/ops/main.go new file mode 100644 index 00000000..7e462bf7 --- /dev/null +++ b/internal/simdgen/ops/main.go @@ -0,0 +1,75 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "path/filepath" +) + +const baseDir = "ops" // The main directory containing A, B, C, etc. + +func main() { + if err := mergeYamlFiles("categories.yaml"); err != nil { + fmt.Printf("Error processing categories.yaml: %v\n", err) + os.Exit(1) + } + if err := mergeYamlFiles("go.yaml"); err != nil { + fmt.Printf("Error processing go.yaml: %v\n", err) + os.Exit(1) + } +} + +func mergeYamlFiles(targetFileName string) error { + outputFile, err := os.Create(targetFileName) + if err != nil { + return fmt.Errorf("failed to create output file %s: %w", targetFileName, err) + } + defer outputFile.Close() + + writer := bufio.NewWriter(outputFile) + _, err = writer.WriteString("!sum\n") + if err != nil { + return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err) + } + + entries, err := os.ReadDir(baseDir) + if err != nil { + return fmt.Errorf("failed to read base directory %s: %w", baseDir, err) + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + subdirPath := filepath.Join(baseDir, entry.Name()) + sourceFilePath := filepath.Join(subdirPath, targetFileName) + + sourceFile, err := os.Open(sourceFilePath) + if err != nil { + if os.IsNotExist(err) { + fmt.Printf("Skipping: %s not found in %s\n", targetFileName, subdirPath) + continue + } + return fmt.Errorf("failed to open source file %s: %w", sourceFilePath, err) + } + defer sourceFile.Close() + + scanner := bufio.NewScanner(sourceFile) + // Skip first line + scanner.Scan() + // Append the rest of the lines to the output file + for scanner.Scan() { + line := scanner.Text() + _, err = writer.WriteString(line + "\n") + if err != nil { + return fmt.Errorf("failed to write line from %s to %s: %w", sourceFilePath, targetFileName, err) + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading lines from %s: %w", sourceFilePath, err) + } + } + return writer.Flush() +} diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml index 9092224e..c8b3660e 100644 --- a/internal/simdgen/types.yaml +++ b/internal/simdgen/types.yaml @@ -7,48 +7,48 @@ in: !repeat - !sum &types - - {class: vreg, go: Int8x16, base: "int", elemBits: 8, bits: 128} - - {class: vreg, go: Uint8x16, base: "uint", elemBits: 8, bits: 128} - - {class: vreg, go: Int16x8, base: "int", elemBits: 16, bits: 128} - - {class: vreg, go: Uint16x8, base: "uint", elemBits: 16, bits: 128} - - {class: vreg, go: Int32x4, base: "int", elemBits: 32, bits: 128} - - {class: vreg, go: Uint32x4, base: "uint", elemBits: 32, bits: 128} - - {class: vreg, go: Int64x2, base: "int", elemBits: 64, bits: 128} - - {class: vreg, go: Uint64x2, base: "uint", elemBits: 64, bits: 128} - - {class: vreg, go: Float32x4, base: "float", elemBits: 32, bits: 128} - - {class: vreg, go: Float64x2, base: "float", elemBits: 64, bits: 128} - - {class: vreg, go: Int8x32, base: "int", elemBits: 8, bits: 256} - - {class: vreg, go: Uint8x32, base: "uint", elemBits: 8, bits: 256} - - {class: vreg, go: Int16x16, base: "int", elemBits: 16, bits: 256} - - {class: vreg, go: Uint16x16, base: "uint", elemBits: 16, bits: 256} - - {class: vreg, go: Int32x8, base: "int", elemBits: 32, bits: 256} - - {class: vreg, go: Uint32x8, base: "uint", elemBits: 32, bits: 256} - - {class: vreg, go: Int64x4, base: "int", elemBits: 64, bits: 256} - - {class: vreg, go: Uint64x4, base: "uint", elemBits: 64, bits: 256} - - {class: vreg, go: Float32x8, base: "float", elemBits: 32, bits: 256} - - {class: vreg, go: Float64x4, base: "float", elemBits: 64, bits: 256} - - {class: vreg, go: Int8x64, base: "int", elemBits: 8, bits: 512} - - {class: vreg, go: Uint8x64, base: "uint", elemBits: 8, bits: 512} - - {class: vreg, go: Int16x32, base: "int", elemBits: 16, bits: 512} - - {class: vreg, go: Uint16x32, base: "uint", elemBits: 16, bits: 512} - - {class: vreg, go: Int32x16, base: "int", elemBits: 32, bits: 512} - - {class: vreg, go: Uint32x16, base: "uint", elemBits: 32, bits: 512} - - {class: vreg, go: Int64x8, base: "int", elemBits: 64, bits: 512} - - {class: vreg, go: Uint64x8, base: "uint", elemBits: 64, bits: 512} - - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512} - - {class: vreg, go: Float64x8, base: "float", elemBits: 64, bits: 512} - - {class: mask, go: Mask8x16, elemBits: 8, bits: 128} - - {class: mask, go: Mask16x8, elemBits: 16, bits: 128} - - {class: mask, go: Mask32x4, elemBits: 32, bits: 128} - - {class: mask, go: Mask64x2, elemBits: 64, bits: 128} - - {class: mask, go: Mask8x32, elemBits: 8, bits: 256} - - {class: mask, go: Mask16x16, elemBits: 16, bits: 256} - - {class: mask, go: Mask32x8, elemBits: 32, bits: 256} - - {class: mask, go: Mask64x4, elemBits: 64, bits: 256} - - {class: mask, go: Mask8x64, elemBits: 8, bits: 512} - - {class: mask, go: Mask16x32, elemBits: 16, bits: 512} - - {class: mask, go: Mask32x16, elemBits: 32, bits: 512} - - {class: mask, go: Mask64x8, elemBits: 64, bits: 512} - - {class: immediate} # TODO + - {class: vreg, go: Int8x16, base: "int", elemBits: 8, bits: 128, lanes: 16} + - {class: vreg, go: Uint8x16, base: "uint", elemBits: 8, bits: 128, lanes: 16} + - {class: vreg, go: Int16x8, base: "int", elemBits: 16, bits: 128, lanes: 8} + - {class: vreg, go: Uint16x8, base: "uint", elemBits: 16, bits: 128, lanes: 8} + - {class: vreg, go: Int32x4, base: "int", elemBits: 32, bits: 128, lanes: 4} + - {class: vreg, go: Uint32x4, base: "uint", elemBits: 32, bits: 128, lanes: 4} + - {class: vreg, go: Int64x2, base: "int", elemBits: 64, bits: 128, lanes: 2} + - {class: vreg, go: Uint64x2, base: "uint", elemBits: 64, bits: 128, lanes: 2} + - {class: vreg, go: Float32x4, base: "float", elemBits: 32, bits: 128, lanes: 4} + - {class: vreg, go: Float64x2, base: "float", elemBits: 64, bits: 128, lanes: 2} + - {class: vreg, go: Int8x32, base: "int", elemBits: 8, bits: 256, lanes: 32} + - {class: vreg, go: Uint8x32, base: "uint", elemBits: 8, bits: 256, lanes: 32} + - {class: vreg, go: Int16x16, base: "int", elemBits: 16, bits: 256, lanes: 16} + - {class: vreg, go: Uint16x16, base: "uint", elemBits: 16, bits: 256, lanes: 16} + - {class: vreg, go: Int32x8, base: "int", elemBits: 32, bits: 256, lanes: 8} + - {class: vreg, go: Uint32x8, base: "uint", elemBits: 32, bits: 256, lanes: 8} + - {class: vreg, go: Int64x4, base: "int", elemBits: 64, bits: 256, lanes: 4} + - {class: vreg, go: Uint64x4, base: "uint", elemBits: 64, bits: 256, lanes: 4} + - {class: vreg, go: Float32x8, base: "float", elemBits: 32, bits: 256, lanes: 8} + - {class: vreg, go: Float64x4, base: "float", elemBits: 64, bits: 256, lanes: 4} + - {class: vreg, go: Int8x64, base: "int", elemBits: 8, bits: 512, lanes: 64} + - {class: vreg, go: Uint8x64, base: "uint", elemBits: 8, bits: 512, lanes: 64} + - {class: vreg, go: Int16x32, base: "int", elemBits: 16, bits: 512, lanes: 32} + - {class: vreg, go: Uint16x32, base: "uint", elemBits: 16, bits: 512, lanes: 32} + - {class: vreg, go: Int32x16, base: "int", elemBits: 32, bits: 512, lanes: 16} + - {class: vreg, go: Uint32x16, base: "uint", elemBits: 32, bits: 512, lanes: 16} + - {class: vreg, go: Int64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} + - {class: vreg, go: Uint64x8, base: "uint", elemBits: 64, bits: 512, lanes: 8} + - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512, lanes: 16} + - {class: vreg, go: Float64x8, base: "float", elemBits: 64, bits: 512, lanes: 8} + - {class: mask, go: Mask8x16, base: "int", elemBits: 8, bits: 128, lanes: 16} + - {class: mask, go: Mask16x8, base: "int", elemBits: 16, bits: 128, lanes: 8} + - {class: mask, go: Mask32x4, base: "int", elemBits: 32, bits: 128, lanes: 4} + - {class: mask, go: Mask64x2, base: "int", elemBits: 64, bits: 128, lanes: 2} + - {class: mask, go: Mask8x32, base: "int", elemBits: 8, bits: 256, lanes: 32} + - {class: mask, go: Mask16x16, base: "int", elemBits: 16, bits: 256, lanes: 16} + - {class: mask, go: Mask32x8, base: "int", elemBits: 32, bits: 256, lanes: 8} + - {class: mask, go: Mask64x4, base: "int", elemBits: 64, bits: 256, lanes: 4} + - {class: mask, go: Mask8x64, base: "int", elemBits: 8, bits: 512, lanes: 64} + - {class: mask, go: Mask16x32, base: "int", elemBits: 16, bits: 512, lanes: 32} + - {class: mask, go: Mask32x16, base: "int", elemBits: 32, bits: 512, lanes: 16} + - {class: mask, go: Mask64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} + - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now. out: !repeat - *types diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index 292411cb..004a815f 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -47,12 +47,17 @@ func loadXED(xedPath string) []*unify.Value { return } // TODO: "feature" - fields := []string{"goarch", "asm", "in", "out"} + fields := []string{"goarch", "asm", "in", "out", "extension"} values := []*unify.Value{ unify.NewValue(unify.NewStringExact("amd64")), unify.NewValue(unify.NewStringExact(inst.Opcode())), unify.NewValue(ins), unify.NewValue(outs), + unify.NewValue(unify.NewStringExact(inst.Extension)), + } + if strings.Contains(inst.Pattern, "ZEROING=0") { + fields = append(fields, "zeroing") + values = append(values, unify.NewValue(unify.NewStringExact("false"))) } pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line} defs = append(defs, unify.NewValuePos(unify.NewDef(fields, values), pos)) @@ -107,6 +112,8 @@ type operandVReg struct { // Vector register type operandMask struct { operandCommon vecShape + // Bits in the mask is w/bits. + allMasks bool } type operandImm struct { @@ -137,17 +144,25 @@ func (o operandVReg) toValue() (fields []string, vals []*unify.Value) { if err != nil { panic("parsing baseRe: " + err.Error()) } - return []string{"class", "elemBits", "bits", "base"}, []*unify.Value{ + fields, vals = []string{"class", "bits", "base"}, []*unify.Value{ strVal("vreg"), - strVal(o.elemBits), strVal(o.bits), unify.NewValue(baseDomain)} + if o.elemBits != o.bits { + fields, vals = append(fields, "elemBits"), append(vals, strVal(o.elemBits)) + } + // otherwise it means the vector could be any shape. + return } func (o operandMask) toValue() (fields []string, vals []*unify.Value) { return []string{"class", "elemBits", "bits"}, []*unify.Value{strVal("mask"), strVal(o.elemBits), strVal(o.bits)} } +func (o operandMask) zeroMaskValue() (fields []string, vals []*unify.Value) { + return []string{"class"}, []*unify.Value{strVal("mask")} +} + func (o operandImm) toValue() (fields []string, vals []*unify.Value) { return []string{"class", "bits"}, []*unify.Value{strVal("immediate"), strVal(o.bits)} } @@ -256,6 +271,7 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu inferMask := func(r, w bool) error { var masks []int var rSizes, wSizes, sizes []vecShape + allMasks := true for i, op := range ops { action := op.common().action if _, ok := op.(operandMask); ok { @@ -265,12 +281,15 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu if action.r == r || action.w == w { masks = append(masks, i) } - } else if reg, ok := op.(operandVReg); ok { - if action.r { - rSizes = append(rSizes, reg.vecShape) - } - if action.w { - wSizes = append(wSizes, reg.vecShape) + } else { + allMasks = false + if reg, ok := op.(operandVReg); ok { + if action.r { + rSizes = append(rSizes, reg.vecShape) + } + if action.w { + wSizes = append(wSizes, reg.vecShape) + } } } } @@ -292,6 +311,15 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu } if len(sizes) == 0 { + // If all operands are masks, leave the mask inferrence to the users. + if allMasks { + for _, i := range masks { + m := ops[i].(operandMask) + m.allMasks = true + ops[i] = m + } + return nil + } return fmt.Errorf("cannot infer mask size: no register operands") } shape, ok := singular(sizes) @@ -315,6 +343,12 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu var inVals, outVals []*unify.Value for asmPos, op := range ops { fields, values := op.toValue() + if opm, ok := op.(operandMask); ok { + if opm.allMasks { + // If all operands are masks, leave the mask inferrence to the users. + fields, values = opm.zeroMaskValue() + } + } fields = append(fields, "asmPos") values = append(values, unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos)))) From 99cca1d98223c070eb9c1236e346e562af4fd505 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 26 May 2025 21:06:00 +0000 Subject: [PATCH 058/200] internal/simdgen: complete defs for compares. This CL generates codes that passed ./make.bash. Change-Id: I7e645edd4eeda6322a669a427fa6164e7db18315 Reviewed-on: https://go-review.googlesource.com/c/arch/+/676415 Reviewed-by: David Chase Auto-Submit: Junyang Shao Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 67 ++++++++++++++++++ internal/simdgen/gen_utility.go | 3 - internal/simdgen/go.yaml | 30 +++++++- internal/simdgen/ops/Compares/categories.yaml | 69 ++++++++++++++++++- internal/simdgen/ops/Compares/go.yaml | 30 +++++++- 5 files changed, 191 insertions(+), 8 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 1f2fb056..80d4bf41 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -57,24 +57,91 @@ extension: "AVX.*" # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. +# const imm predicate(holds for both float and int|uint): +# 0: Equal +# 1: Less +# 2: LessEqual +# 4: NotEqual +# 5: GreaterEqual +# 6: Greater - go: Equal constImm: 0 commutative: "true" extension: "AVX.*" + documentation: "Predicate immediate is 0 if it has;" +- go: Less + constImm: 1 + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 1 if it has;" +- go: LessEqual + constImm: 2 + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 2 if it has;" +- go: IsNan # For float only. + constImm: 3 + commutative: "true" + extension: "AVX.*" + documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" +- go: NotEqual + constImm: 4 + commutative: "true" + extension: "AVX.*" + documentation: "Predicate immediate is 4 if it has;" +- go: GreaterEqual + constImm: 5 + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 5 if it has;" - go: Greater constImm: 6 commutative: "false" extension: "AVX.*" + documentation: "Predicate immediate is 6 if it has;" + - go: MaskedEqual constImm: 0 masked: "true" commutative: "true" extension: "AVX.*" + documentation: "Predicate immediate is 0 if it has;" +- go: MaskedLess + constImm: 1 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 1 if it has;" +- go: MaskedLessEqual + constImm: 2 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 2 if it has;" +- go: MaskedIsNan # For float only. + constImm: 3 + masked: "true" + commutative: "true" + extension: "AVX.*" + documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" +- go: MaskedNotEqual + constImm: 4 + masked: "true" + commutative: "true" + extension: "AVX.*" + documentation: "Predicate immediate is 4 if it has;" +- go: MaskedGreaterEqual + constImm: 5 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 5 if it has;" - go: MaskedGreater constImm: 6 masked: "true" commutative: "false" extension: "AVX.*" + documentation: "Predicate immediate is 6 if it has;" - go: Div commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 1e822980..74ab0e9f 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -430,9 +430,6 @@ func overwrite(ops []Operation) error { if op[idx].Class != "vreg" { return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx]) } - if *op[idx].Base != "uint" && *op[idx].Base != "int" { - return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Base] from int|uint: %s", op[idx]) - } hasClassOverwrite = true *op[idx].Base = oBase op[idx].Class = oClass diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 0ec1ee0d..514f4540 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -247,6 +247,7 @@ - *intToFloat out: - *intToFloat +# Ints - go: Equal asm: "V?PCMPEQ[BWDQ]" in: &int2 @@ -279,7 +280,7 @@ - class: mask # The const imm predicated compares after AVX512, please see categories.yaml # for const imm specification. -- go: Masked(Equal|Greater) +- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) asm: "VPCMP[BWDQ]" in: - class: mask @@ -291,7 +292,7 @@ const: 0 # Just a placeholder, will be overwritten by const imm porting. out: - class: mask -- go: Masked(Equal|Greater) +- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) asm: "VPCMPU[BWDQ]" in: - class: mask @@ -303,6 +304,31 @@ const: 0 out: - class: mask + +# Floats +- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan + asm: "VCMPP[SD]" + in: + - &float + go: $t + base: float + - *float + - class: immediate + const: 0 + out: + - go: $t # We still need the output to be the same shape as inputs. + overwriteBase: uint + overwriteClass: mask +- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) + asm: "VCMPP[SD]" + in: + - class: mask + - *float + - *float + - class: immediate + const: 0 + out: + - class: mask - go: Div asm: "V?DIVP[SD]" in: diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index cac97d4e..027c8e8d 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -1,19 +1,86 @@ !sum +# const imm predicate(holds for both float and int|uint): +# 0: Equal +# 1: Less +# 2: LessEqual +# 4: NotEqual +# 5: GreaterEqual +# 6: Greater - go: Equal constImm: 0 commutative: "true" extension: "AVX.*" + documentation: "Predicate immediate is 0 if it has;" +- go: Less + constImm: 1 + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 1 if it has;" +- go: LessEqual + constImm: 2 + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 2 if it has;" +- go: IsNan # For float only. + constImm: 3 + commutative: "true" + extension: "AVX.*" + documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" +- go: NotEqual + constImm: 4 + commutative: "true" + extension: "AVX.*" + documentation: "Predicate immediate is 4 if it has;" +- go: GreaterEqual + constImm: 5 + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 5 if it has;" - go: Greater constImm: 6 commutative: "false" extension: "AVX.*" + documentation: "Predicate immediate is 6 if it has;" + - go: MaskedEqual constImm: 0 masked: "true" commutative: "true" extension: "AVX.*" + documentation: "Predicate immediate is 0 if it has;" +- go: MaskedLess + constImm: 1 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 1 if it has;" +- go: MaskedLessEqual + constImm: 2 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 2 if it has;" +- go: MaskedIsNan # For float only. + constImm: 3 + masked: "true" + commutative: "true" + extension: "AVX.*" + documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" +- go: MaskedNotEqual + constImm: 4 + masked: "true" + commutative: "true" + extension: "AVX.*" + documentation: "Predicate immediate is 4 if it has;" +- go: MaskedGreaterEqual + constImm: 5 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: "Predicate immediate is 5 if it has;" - go: MaskedGreater constImm: 6 masked: "true" commutative: "false" - extension: "AVX.*" \ No newline at end of file + extension: "AVX.*" + documentation: "Predicate immediate is 6 if it has;" \ No newline at end of file diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml index f0a8cab1..c3a52394 100644 --- a/internal/simdgen/ops/Compares/go.yaml +++ b/internal/simdgen/ops/Compares/go.yaml @@ -1,4 +1,5 @@ !sum +# Ints - go: Equal asm: "V?PCMPEQ[BWDQ]" in: &int2 @@ -31,7 +32,7 @@ - class: mask # The const imm predicated compares after AVX512, please see categories.yaml # for const imm specification. -- go: Masked(Equal|Greater) +- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) asm: "VPCMP[BWDQ]" in: - class: mask @@ -43,7 +44,7 @@ const: 0 # Just a placeholder, will be overwritten by const imm porting. out: - class: mask -- go: Masked(Equal|Greater) +- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) asm: "VPCMPU[BWDQ]" in: - class: mask @@ -55,3 +56,28 @@ const: 0 out: - class: mask + +# Floats +- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan + asm: "VCMPP[SD]" + in: + - &float + go: $t + base: float + - *float + - class: immediate + const: 0 + out: + - go: $t # We still need the output to be the same shape as inputs. + overwriteBase: uint + overwriteClass: mask +- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) + asm: "VCMPP[SD]" + in: + - class: mask + - *float + - *float + - class: immediate + const: 0 + out: + - class: mask \ No newline at end of file From 6a7b46808a5ff3b74d86db551fcfea7707676022 Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 27 May 2025 15:59:22 -0400 Subject: [PATCH 059/200] internal/simd: tweak command line flags This change the command line flag -godefroot to -goroot so it matches the "Code generated by" comment at the top. Change-Id: Iee8044fe6573d4d87a53181ac635dadbee4a9843 Reviewed-on: https://go-review.googlesource.com/c/arch/+/676497 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Auto-Submit: Junyang Shao --- internal/simdgen/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index 84d8a92f..b0ae52cb 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -104,8 +104,8 @@ import ( var ( xedPath = flag.String("xedPath", "", "load XED datafiles from `path`") flagQ = flag.String("q", "", "query: read `def` as another input (skips final validation)") - flagO = flag.String("o", "yaml", "output type: yaml, godefs") - flagGoDefRoot = flag.String("godefroot", ".", "the path to the directory containing the generated godefs") + flagO = flag.String("o", "yaml", "output type: yaml, godefs (generate definitions into a Go source tree") + flagGoDefRoot = flag.String("goroot", ".", "the path to the Go dev directory that will receive the generated files") FlagNoDedup = flag.Bool("nodedup", false, "disable deduplicating godefs of 2 qualifying operations from different extensions") FlagNoSplitMask = flag.Bool("nosplitmask", false, "disable splitting the masks to const and non const") FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand") From 2a0236b5d14f8fb59383ca43b1c267e09088dfca Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 28 May 2025 17:14:47 -0400 Subject: [PATCH 060/200] internal/simdgen: change simd package to "simd"; add test this puts the heavyweight end-to-end test in the repo, so we can be sure we agree on what the test is/does. Change-Id: I7f31835594bdd6571a6fa682cd8c2b22fb833e03 Reviewed-on: https://go-review.googlesource.com/c/arch/+/676757 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/etetest.sh | 21 +++++++++++++++++++++ internal/simdgen/gen_simdIntrinsics.go | 21 +++++++++++---------- internal/simdgen/gen_simdTypes.go | 4 ++-- internal/simdgen/main.go | 2 ++ 4 files changed, 36 insertions(+), 12 deletions(-) create mode 100755 internal/simdgen/etetest.sh diff --git a/internal/simdgen/etetest.sh b/internal/simdgen/etetest.sh new file mode 100755 index 00000000..f83b6b37 --- /dev/null +++ b/internal/simdgen/etetest.sh @@ -0,0 +1,21 @@ +#!/bin/bash -x + +cat <<\\EOF + +This is an end-to-end test of Go SIMD. It checks out a fresh Go +repository from the go.simd branch, then generates the SIMD input +files and runs simdgen writing into the fresh repository. + +After that it generates the modified ssa pattern matching files, then +builds the compiler. + +\EOF + +rm -rf go-test +git clone https://go.googlesource.com/go -b dev.simd go-test +go generate +go run . -xedPath xeddata -o godefs -goroot ./go-test go.yaml types.yaml categories.yaml +(cd go-test/src/cmd/compile/internal/ssa/_gen ; go run *.go ) +(cd go-test/src ; GOEXPERIMENT=simd ./make.bash ) +(cd go-test/bin; b=`pwd` ; cd ../src/simd/testdata; GOARCH=amd64 $b/go run .) +# next, add some tests of SIMD itself diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 5d4a27f2..93174937 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -18,32 +18,33 @@ import ( "cmd/internal/sys" ) +const simdPackage = "` + simdPackage + `" func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { {{- range .OpsLen1}} - addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) + addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) {{- end}} {{- range .OpsLen2}} - addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) + addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) {{- end}} {{- range .OpsLen3}} - addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) + addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) {{- end}} {{- range .VectorConversions }} - addF("internal/simd", "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) {{- end}} {{- range $size, $ts := .TypeMap }} {{- range $t := $ts }} - addF("internal/simd", "Load{{$t.Name}}", simdLoad(), sys.AMD64) - addF("internal/simd", "{{$t.Name}}.Store", simdStore(), sys.AMD64) + addF(simdPackage, "Load{{$t.Name}}", simdLoad(), sys.AMD64) + addF(simdPackage, "{{$t.Name}}.Store", simdStore(), sys.AMD64) {{- end}} {{- end}} {{- range .Masks }} - addF("internal/simd", "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF("internal/simd", "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF("internal/simd", "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) - addF("internal/simd", "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) + addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) + addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) + addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) {{- end}} } diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 14395010..d06bb25a 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -179,7 +179,7 @@ func masksFromTypeMap(typeMap simdTypeMap) []simdType { // writeSIMDTypes generates the simd vector type and writes it to types_amd64.go // within the specified directory. func writeSIMDTypes(directory string, typeMap simdTypeMap) error { - file, t, err := openFileAndPrepareTemplate(directory, "src/internal/simd/types_amd64.go", simdTypesTmpl) + file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/types_amd64.go", simdTypesTmpl) if err != nil { return err } @@ -200,7 +200,7 @@ func writeSIMDTypes(directory string, typeMap simdTypeMap) error { // writeSIMDStubs generates the simd vector intrinsic stubs and writes it to stubs_amd64.go // within the specified directory. func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) error { - file, t, err := openFileAndPrepareTemplate(directory, "src/internal/simd/stubs_amd64.go", simdStubsTmpl) + file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/stubs_amd64.go", simdStubsTmpl) if err != nil { return err } diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index b0ae52cb..14bf9b8f 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -117,6 +117,8 @@ var ( FlagReportDup = flag.Bool("reportdup", false, "report the duplicate godefs") ) +const simdPackage = "simd" + func main() { flag.Parse() From 88f5c58204869f4dcc4c6ca72feab854d1ed6b7e Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Wed, 28 May 2025 21:16:14 +0000 Subject: [PATCH 061/200] internal/simdgen: Support more op shapes, add more ops This CL tries to support FP only arithmetic operations. To facilitate this it also amend simdgen to support more op shapes. This CL also added logics to exclude input and output sharing the same register case. This will be a TODO for simdgen. Change-Id: Ied981bfb53663d060a117e3c3ff1b82494b743fb Reviewed-on: https://go-review.googlesource.com/c/arch/+/676996 Reviewed-by: David Chase Commit-Queue: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 29 +++- internal/simdgen/gen_simdMachineOps.go | 6 +- internal/simdgen/gen_simdrules.go | 89 +++++++----- internal/simdgen/gen_simdssa.go | 130 +++++++----------- internal/simdgen/gen_utility.go | 31 +---- internal/simdgen/go.yaml | 33 ++++- .../simdgen/ops/FPonlyArith/categories.yaml | 31 ++++- internal/simdgen/ops/FPonlyArith/go.yaml | 35 ++++- 8 files changed, 222 insertions(+), 162 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 80d4bf41..c28926c8 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -143,10 +143,35 @@ extension: "AVX.*" documentation: "Predicate immediate is 6 if it has;" - go: Div - commutative: "true" + commutative: "false" extension: "AVX.*" - go: MaskedDiv - commutative: "true" + commutative: "false" + masked: "true" + extension: "AVX.*" +- go: Sqrt + commutative: "false" + extension: "AVX.*" +- go: MaskedSqrt + commutative: "false" + masked: "true" + extension: "AVX.*" +- go: ApproximateReciprocal + commutative: "false" + extension: "AVX.*" +- go: MaskedApproximateReciprocal + commutative: "false" + masked: "true" + extension: "AVX.*" +- go: ApproximateReciprocalOfSqrt + commutative: "false" + extension: "AVX.*" +- go: MaskedApproximateReciprocalOfSqrt + commutative: "false" + masked: "true" + extension: "AVX.*" +- go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated. + commutative: "false" masked: "true" extension: "AVX.*" - go: Mul diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 0deec9c6..ec3eaba9 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -12,7 +12,7 @@ import ( const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main -func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp2m1fp1, fp2m1m1 regInfo) []opData { +func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp1m1fp1, fp2m1fp1, fp2m1m1 regInfo) []opData { return []opData{ {{- range .OpsData }} {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}"}, @@ -46,7 +46,7 @@ func writeSIMDMachineOps(directory string, ops []Operation) error { OpsDataImm []opData } seen := map[string]struct{}{} - regInfoSet := map[string]bool{"fp1fp1": true, "fp2fp1": true, "fp2m1": true, "fp2m1fp1": true, "fp2m1m1": true} + regInfoSet := map[string]bool{"fp1fp1": true, "fp2fp1": true, "fp2m1": true, "fp2m1fp1": true, "fp2m1m1": true, "fp1m1fp1": true} opsData := make([]opData, 0) opsDataImm := make([]opData, 0) for _, op := range ops { @@ -107,7 +107,7 @@ func writeSIMDMachineOps(directory string, ops []Operation) error { } else if shapeOut == OneKmaskOut { outType = "Mask" } else { - return fmt.Errorf("simdgen does not recognize this output shape: %+v", shapeOut) + return fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut) } if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn { opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType}) diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index c368c770..172282eb 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -20,28 +20,28 @@ const simdrulesTmpl = `// Code generated by x/arch/internal/simdgen using 'go ru // Masks are always at the end, immediates always at the beginning. {{- range .Ops }} -{{if eq (len .In) 1}}({{.Go}}{{(index .In 0).Go}} x) => ({{.Asm}} x){{end}}{{if eq (len .In) 2}}({{.Go}}{{(index .In 0).Go}} x y) => ({{.Asm}} y x){{end}} +({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}}) => ({{.Op.Asm}} {{.ReverseArgs}}) {{- end }} {{- range .OpsImm }} -({{.Go}}{{(index .In 1).Go}} x y) => ({{.Asm}} [{{(index .In 0).Const}}] y x) +({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}}) => ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}}) {{- end }} {{- range .OpsMask}} -({{.Go}}{{(index .In 0).Go}} x y z) => ({{.Asm}} y x (VPMOVVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}}ToM z)) +({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}} mask) => ({{.Op.Asm}} {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}}ToM mask)) {{- end }} {{- range .OpsImmMask}} -({{.Go}}{{(index .In 1).Go}} x y z) => ({{.Asm}} [{{(index .In 0).Const}}] y x (VPMOVVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}}ToM z)) +({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}} mask) => ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}}ToM mask)) {{- end }} {{- range .OpsMaskOut}} -({{.Go}}{{(index .In 0).Go}} x y) => (VPMOVMToVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}} ({{.Asm}} y x)) +({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}}) => (VPMOVMToVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}} ({{.Op.Asm}} {{.ReverseArgs}})) {{- end }} {{- range .OpsImmInMaskOut}} -({{.Go}}{{(index .In 1).Go}} x y) => (VPMOVMToVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}} ({{.Asm}} [{{(index .In 0).Const}}] y x)) +({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}}) => (VPMOVMToVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}} ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}})) {{- end }} {{- range .OpsMaskInMaskOut}} -({{.Go}}{{(index .In 0).Go}} x y z) => (VPMOVMToVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}} ({{.Asm}} y x (VPMOVVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}}ToM z))) +({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}} mask) => (VPMOVMToVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}} ({{.Op.Asm}} {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}}ToM mask))) {{- end }} {{- range .OpsImmMaskInMaskOut}} -({{.Go}}{{(index .In 1).Go}} x y z) => (VPMOVMToVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}} ({{.Asm}} [{{(index .In 0).Const}}] y x (VPMOVVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}}ToM z))) +({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}} mask) => (VPMOVMToVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}} ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}}ToM mask))) {{- end }} ` @@ -53,35 +53,52 @@ func writeSIMDRules(directory string, ops []Operation) error { return err } defer file.Close() - Ops := make([]Operation, 0) - OpsImm := make([]Operation, 0) - OpsMask := make([]Operation, 0) - OpsImmMask := make([]Operation, 0) - OpsMaskOut := make([]Operation, 0) - OpsImmInMaskOut := make([]Operation, 0) - OpsMaskInMaskOut := make([]Operation, 0) - OpsImmMaskInMaskOut := make([]Operation, 0) + type OpAndArgList struct { + Op Operation + Args string // "x y", does not include masks + ReverseArgs string // "y x", does not include masks + } + Ops := make([]OpAndArgList, 0) + OpsImm := make([]OpAndArgList, 0) + OpsMask := make([]OpAndArgList, 0) + OpsImmMask := make([]OpAndArgList, 0) + OpsMaskOut := make([]OpAndArgList, 0) + OpsImmInMaskOut := make([]OpAndArgList, 0) + OpsMaskInMaskOut := make([]OpAndArgList, 0) + OpsImmMaskInMaskOut := make([]OpAndArgList, 0) for _, op := range ops { - opInShape, opOutShape, maskType, _, op, _, err := op.shape() + opInShape, opOutShape, maskType, _, op, gOp, err := op.shape() if err != nil { return err } + vregInCnt := len(gOp.In) if maskType == OneMask { op.Asm += "Masked" + vregInCnt-- } op.Asm = fmt.Sprintf("%s%d", op.Asm, *op.Out[0].Bits) + opData := OpAndArgList{Op: op} + if vregInCnt == 1 { + opData.Args = "x" + opData.ReverseArgs = "x" + } else if vregInCnt == 2 { + opData.Args = "x y" + opData.ReverseArgs = "y x" + } else { + return fmt.Errorf("simdgen does not support more than 2 vreg in inputs") + } // If class overwrite is happening, that's not really a mask but a vreg. if opOutShape == OneVregOut || op.Out[0].OverwriteClass != nil { switch opInShape { case PureVregIn: - Ops = append(Ops, op) + Ops = append(Ops, opData) case OneKmaskIn: - OpsMask = append(OpsMask, op) + OpsMask = append(OpsMask, opData) case OneConstImmIn: - OpsImm = append(OpsImm, op) + OpsImm = append(OpsImm, opData) case OneKmaskConstImmIn: - OpsImmMask = append(OpsImmMask, op) + OpsImmMask = append(OpsImmMask, opData) case PureKmaskIn: return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations") } @@ -89,22 +106,22 @@ func writeSIMDRules(directory string, ops []Operation) error { // OneKmaskOut case switch opInShape { case PureVregIn: - OpsMaskOut = append(OpsMaskOut, op) + OpsMaskOut = append(OpsMaskOut, opData) case OneKmaskIn: - OpsMaskInMaskOut = append(OpsMaskInMaskOut, op) + OpsMaskInMaskOut = append(OpsMaskInMaskOut, opData) case OneConstImmIn: - OpsImmInMaskOut = append(OpsImmInMaskOut, op) + OpsImmInMaskOut = append(OpsImmInMaskOut, opData) case OneKmaskConstImmIn: - OpsImmMaskInMaskOut = append(OpsImmMaskInMaskOut, op) + OpsImmMaskInMaskOut = append(OpsImmMaskInMaskOut, opData) case PureKmaskIn: return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations") } } } - sortKey := func(op *Operation) string { - return *op.In[0].Go + op.Go + sortKey := func(op *OpAndArgList) string { + return *op.Op.In[0].Go + op.Op.Go } - sortBySortKey := func(ops []Operation) { + sortBySortKey := func(ops []OpAndArgList) { sort.Slice(ops, func(i, j int) bool { return sortKey(&ops[i]) < sortKey(&ops[j]) }) @@ -119,14 +136,14 @@ func writeSIMDRules(directory string, ops []Operation) error { sortBySortKey(OpsImmMaskInMaskOut) type templateData struct { - Ops []Operation - OpsImm []Operation - OpsMask []Operation - OpsImmMask []Operation - OpsMaskOut []Operation - OpsImmInMaskOut []Operation - OpsMaskInMaskOut []Operation - OpsImmMaskInMaskOut []Operation + Ops []OpAndArgList + OpsImm []OpAndArgList + OpsMask []OpAndArgList + OpsImmMask []OpAndArgList + OpsMaskOut []OpAndArgList + OpsImmInMaskOut []OpAndArgList + OpsMaskInMaskOut []OpAndArgList + OpsImmMaskInMaskOut []OpAndArgList } err = t.Execute(file, templateData{ diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index 10222bc7..92bfed79 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -23,18 +23,18 @@ import ( func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { p := s.Prog(v.Op.Asm()) // First arg - switch v.Op {{"{"}}{{if gt (len .ImmFirst) 0}} - // Imm - case {{.ImmFirst}}: + switch v.Op {{"{"}}{{if gt (len .Imms) 0}} + // Immediates + case {{.Imms}}: imm := v.AuxInt if imm < 0 || imm > 255 { v.Fatalf("Invalid source selection immediate") } p.From.Offset = imm p.From.Type = obj.TYPE_CONST -{{end}}{{if gt (len .VregFirst) 0}} - // vreg - case {{.VregFirst}}: +{{end}}{{if gt (len .Reg0) 0}} + // Registers + case {{.Reg0}}: p.From.Type = obj.TYPE_REG p.From.Reg = simdReg(v.Args[0]) {{end}} @@ -44,9 +44,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { } // Second arg - switch v.Op {{"{"}}{{if gt (len .VregSecond) 0}} - // vreg - case {{.VregSecond}}: + switch v.Op {{"{"}}{{if gt (len .Reg1) 0}} + // Registers + case {{.Reg1}}: if p.From.Type == obj.TYPE_CONST { p.AddRestSourceReg(simdReg(v.Args[0])) } else { @@ -55,43 +55,31 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { } // Third arg - switch v.Op {{"{"}}{{if gt (len .VregThird) 0}} - // vreg - case {{.VregThird}}: + switch v.Op {{"{"}}{{if gt (len .Reg2) 0}} + // Registers + case {{.Reg2}}: if p.From.Type == obj.TYPE_CONST { p.AddRestSourceReg(simdReg(v.Args[1])) } else { p.AddRestSourceReg(simdReg(v.Args[2])) - } -{{end}}{{if gt (len .MaskThird) 0}} - // k mask - case {{.MaskThird}}: - if p.From.Type == obj.TYPE_CONST { - p.AddRestSourceReg(v.Args[1].Reg()) - } else { - p.AddRestSourceReg(v.Args[2].Reg()) }{{end}} } // Fourth arg - switch v.Op {{"{"}}{{if gt (len .MaskFourth) 0}} - case {{.MaskFourth}}: + switch v.Op {{"{"}}{{if gt (len .Reg3) 0}} + case {{.Reg3}}: if p.From.Type == obj.TYPE_CONST { - p.AddRestSourceReg(v.Args[2].Reg()) + p.AddRestSourceReg(simdReg(v.Args[2])) } else { - p.AddRestSourceReg(v.Args[3].Reg()) + p.AddRestSourceReg(simdReg(v.Args[3])) }{{end}} } // Output - switch v.Op {{"{"}}{{if gt (len .VregOut) 0}} - case {{.VregOut}}: + switch v.Op {{"{"}}{{if gt (len .All) 0}} + case {{.All}}: p.To.Type = obj.TYPE_REG p.To.Reg = simdReg(v) -{{end}}{{if gt (len .MaskOut) 0}} - case {{.MaskOut}}: - p.To.Type = obj.TYPE_REG - p.To.Reg = v.Reg() {{end}} default: // One result is required. @@ -111,20 +99,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { // writeSIMDSSA generates the ssa to prog lowering codes and writes it to simdssa.go // within the specified directory. func writeSIMDSSA(directory string, ops []Operation) error { - var ImmFirst []string - var VregFirst []string - var VregSecond []string - var MaskThird []string - var VregThird []string - var MaskFourth []string - var VregOut []string - var MaskOut []string + var Imms []string + var All []string var ZeroingMask []string + Regs := map[int][]string{} seen := map[string]struct{}{} for _, op := range ops { asm := op.Asm - shapeIn, shapeOut, maskType, _, _, gOp, err := op.shape() + shapeIn, _, maskType, _, _, gOp, err := op.shape() if err != nil { return err } @@ -137,61 +120,40 @@ func writeSIMDSSA(directory string, ops []Operation) error { } seen[asm] = struct{}{} caseStr := fmt.Sprintf("ssa.OpAMD64%s", asm) - if shapeIn == PureVregIn || shapeIn == PureKmaskIn { - // Masks and vreg are handled together by simdReg() - VregFirst = append(VregFirst, caseStr) - if len(gOp.In) > 1 { - VregSecond = append(VregSecond, caseStr) - } - } else if shapeIn == OneKmaskIn { - VregFirst = append(VregFirst, caseStr) - VregSecond = append(VregSecond, caseStr) - MaskThird = append(MaskThird, caseStr) - if gOp.Zeroing == nil { - ZeroingMask = append(ZeroingMask, caseStr) - } - } else if shapeIn == OneConstImmIn { - ImmFirst = append(ImmFirst, caseStr) - VregSecond = append(VregSecond, caseStr) - VregThird = append(VregThird, caseStr) - } else { - // OneKmaskConstImmIn case - ImmFirst = append(ImmFirst, caseStr) - VregSecond = append(VregSecond, caseStr) - VregThird = append(VregThird, caseStr) - MaskFourth = append(MaskFourth, caseStr) + if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn { if gOp.Zeroing == nil { ZeroingMask = append(ZeroingMask, caseStr) } } - if shapeOut == OneVregOut || gOp.Out[0].OverwriteClass != nil { - // If class overwrite is happening, that's not really a mask but a vreg. - VregOut = append(VregOut, caseStr) - } else { - // OneKmaskOut case - MaskOut = append(MaskOut, caseStr) + immCount := 0 + if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn { + immCount++ + Imms = append(Imms, caseStr) + } + for i := range len(gOp.In) { + if i > 2 { + return fmt.Errorf("simdgen does not recognize more than 3 registers: %s", gOp) + } + Regs[i+immCount] = append(Regs[i+immCount], caseStr) } + All = append(All, caseStr) } data := struct { - ImmFirst string - VregFirst string - VregSecond string - MaskThird string - VregThird string - MaskFourth string - VregOut string - MaskOut string + Imms string + Reg0 string + Reg1 string + Reg2 string + Reg3 string + All string ZeroingMask string }{ - strings.Join(ImmFirst, ", "), - strings.Join(VregFirst, ", "), - strings.Join(VregSecond, ", "), - strings.Join(MaskThird, ", "), - strings.Join(VregThird, ", "), - strings.Join(MaskFourth, ", "), - strings.Join(VregOut, ", "), - strings.Join(MaskOut, ", "), + strings.Join(Imms, ", "), + strings.Join(Regs[0], ", "), + strings.Join(Regs[1], ", "), + strings.Join(Regs[2], ", "), + strings.Join(Regs[3], ", "), + strings.Join(All, ", "), strings.Join(ZeroingMask, ", "), } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 74ab0e9f..53362e61 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -91,7 +91,9 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper err = fmt.Errorf("simdgen only supports 1 output: %s", op) return } + var outputReg int if len(op.Out) == 1 { + outputReg = op.Out[0].AsmPos if op.Out[0].Class == "vreg" { shapeOut = OneVregOut } else if op.Out[0].Class == "mask" { @@ -112,6 +114,10 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper iConstMask := -1 hasVreg := false for i, in := range op.In { + if in.AsmPos == outputReg { + err = fmt.Errorf("simdgen doesn't support output and input sharing the same position: %s", op) + return + } if in.Class == "immediate" { // A manual check on XED data found that AMD64 SIMD instructions at most // have 1 immediates. So we don't need to check this here. @@ -208,31 +214,6 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper return } } - // Exclude some shape combination that are not yet supported in simdssa.go - if shapeIn == PureVregIn { - if len(opNoConstImmMask.In) > 2 { - err = fmt.Errorf("simdgen doesn't support more than 2 vreg args: %s", op) - return - } - } - if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn { - if len(opNoConstImmMask.In) != 3 { - err = fmt.Errorf("simdgen only supports mask operations with 2 vreg args: %s", op) - return - } - } - if shapeIn == OneConstImmIn { - if len(opNoConstImmMask.In) != 2 { - err = fmt.Errorf("simdgen only supports immediate operations with 2 vreg args: %s", op) - return - } - } - if shapeIn == PureKmaskIn { - if len(opNoConstImmMask.In) != 2 { - err = fmt.Errorf("simdgen only supports pure k mask operations with 2 vreg args: %s", op) - return - } - } return } diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 514f4540..157bc3ef 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -331,21 +331,46 @@ - class: mask - go: Div asm: "V?DIVP[SD]" - in: + in: &2fp - &fp go: $t base: float - *fp - out: + out: &1fp - *fp - go: MaskedDiv asm: "V?DIVP[SD]" - in: + in: &1mask2fp - class: mask - *fp - *fp - out: + out: *1fp +- go: Sqrt + asm: "V?SQRTP[SD]" + in: *1fp + out: *1fp +- go: MaskedSqrt + asm: "V?SQRTP[SD]" + in: &1mask1fp + - class: mask - *fp + out: *1fp +- go: MaskedApproximateReciprocal + asm: "VRCP14P[SD]" + in: *1mask1fp + out: *1fp +- go: ApproximateReciprocalOfSqrt + asm: "V?RSQRTPS" + in: *1fp + out: *1fp +- go: MaskedApproximateReciprocalOfSqrt + asm: "VRSQRT14P[SD]" + in: *1mask1fp + out: *1fp +- go: MaskedMulByPowOf2 + asm: "VSCALEFP[SD]" + in: *1mask2fp + out: *1fp # "Normal" multiplication is only available for floats. # This only covers the single and double precision. - go: Mul diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index 9166f1fa..3c46f1f4 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -1,8 +1,33 @@ !sum - go: Div - commutative: "true" + commutative: "false" extension: "AVX.*" - go: MaskedDiv - commutative: "true" + commutative: "false" masked: "true" - extension: "AVX.*" \ No newline at end of file + extension: "AVX.*" +- go: Sqrt + commutative: "false" + extension: "AVX.*" +- go: MaskedSqrt + commutative: "false" + masked: "true" + extension: "AVX.*" +- go: ApproximateReciprocal + commutative: "false" + extension: "AVX.*" +- go: MaskedApproximateReciprocal + commutative: "false" + masked: "true" + extension: "AVX.*" +- go: ApproximateReciprocalOfSqrt + commutative: "false" + extension: "AVX.*" +- go: MaskedApproximateReciprocalOfSqrt + commutative: "false" + masked: "true" + extension: "AVX.*" +- go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated. + commutative: "false" + masked: "true" + extension: "AVX.*" diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml index 4c74d253..bd774e1d 100644 --- a/internal/simdgen/ops/FPonlyArith/go.yaml +++ b/internal/simdgen/ops/FPonlyArith/go.yaml @@ -1,18 +1,43 @@ !sum - go: Div asm: "V?DIVP[SD]" - in: + in: &2fp - &fp go: $t base: float - *fp - out: + out: &1fp - *fp - go: MaskedDiv asm: "V?DIVP[SD]" - in: + in: &1mask2fp - class: mask - *fp - *fp - out: - - *fp \ No newline at end of file + out: *1fp +- go: Sqrt + asm: "V?SQRTP[SD]" + in: *1fp + out: *1fp +- go: MaskedSqrt + asm: "V?SQRTP[SD]" + in: &1mask1fp + - class: mask + - *fp + out: *1fp +- go: MaskedApproximateReciprocal + asm: "VRCP14P[SD]" + in: *1mask1fp + out: *1fp +- go: ApproximateReciprocalOfSqrt + asm: "V?RSQRTPS" + in: *1fp + out: *1fp +- go: MaskedApproximateReciprocalOfSqrt + asm: "VRSQRT14P[SD]" + in: *1mask1fp + out: *1fp +- go: MaskedMulByPowOf2 + asm: "VSCALEFP[SD]" + in: *1mask2fp + out: *1fp \ No newline at end of file From ff8bee83384752b8dc32f89c347d41f511f44ae8 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 29 May 2025 16:55:50 +0000 Subject: [PATCH 062/200] internal/simd: fix gofmt issues in text template Add a newline to mute "gofmt" error when git committing the generated codes. Change-Id: I37eeab52ce32ba4badc9c9367ce8ae770acd44c9 Reviewed-on: https://go-review.googlesource.com/c/arch/+/677275 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase Auto-Submit: Junyang Shao --- internal/simdgen/gen_simdIntrinsics.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 93174937..377026b9 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -19,6 +19,7 @@ import ( ) const simdPackage = "` + simdPackage + `" + func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { {{- range .OpsLen1}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) From 5a34366aa3f831bf652de26d641702aed58b7d78 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 30 May 2025 02:58:12 +0000 Subject: [PATCH 063/200] internal/simdgen: add build tag to simd package Change-Id: I384d188b31a597177555e215e6e2827b802207c7 Reviewed-on: https://go-review.googlesource.com/c/arch/+/677279 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdTypes.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index d06bb25a..2b1a5b2f 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -29,6 +29,8 @@ type simdTypePair struct { const simdTypesTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +//go:build goexperiment.simd + package simd {{- range $size, $ts := .TypeMap }} @@ -64,6 +66,8 @@ func (x {{$tsrc.Name}}) Store(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}}) const simdStubsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +//go:build goexperiment.simd + package simd {{- range .OpsLen1}} From e201ba44648b5eb5478e4061f5a3a9d2005f0d11 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 30 May 2025 02:39:02 +0000 Subject: [PATCH 064/200] internal/simdgen: add min/max instructions support Change-Id: Ia784578d2b815f6b76fe3c165f2f4c4e88c0f1f2 Reviewed-on: https://go-review.googlesource.com/c/arch/+/677197 Auto-Submit: Junyang Shao Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 14 ++++ internal/simdgen/go.yaml | 74 ++++++++++++++++++++ internal/simdgen/ops/MinMax/categories.yaml | 15 +++++ internal/simdgen/ops/MinMax/go.yaml | 75 +++++++++++++++++++++ 4 files changed, 178 insertions(+) create mode 100644 internal/simdgen/ops/MinMax/categories.yaml create mode 100644 internal/simdgen/ops/MinMax/go.yaml diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index c28926c8..ca278805 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -174,6 +174,20 @@ commutative: "false" masked: "true" extension: "AVX.*" +- go: Max + commutative: "true" + extension: "AVX.*" +- go: MaskedMax + commutative: "true" + masked: "true" + extension: "AVX.*" +- go: Min + commutative: "true" + extension: "AVX.*" +- go: MaskedMin + commutative: "true" + masked: "true" + extension: "AVX.*" - go: Mul commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 157bc3ef..a78f3614 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -371,6 +371,80 @@ asm: "VSCALEFP[SD]" in: *1mask2fp out: *1fp +- go: Max + asm: "V?PMAXS[BWDQ]" + in: &2int + - &int + go: $t + base: int + - *int + out: &1int + - *int +- go: Max + asm: "V?PMAXU[BWDQ]" + in: &2uint + - &uint + go: $t + base: uint + - *uint + out: &1uint + - *uint +- go: MaskedMax + asm: "V?PMAXS[BWDQ]" + in: &1mask2int + - class: mask + - *int + - *int + out: *1int +- go: MaskedMax + asm: "V?PMAXU[BWDQ]" + in: &1mask2uint + - class: mask + - *uint + - *uint + out: *1uint + +- go: Min + asm: "V?PMINS[BWDQ]" + in: *2int + out: *1int +- go: Min + asm: "V?PMINU[BWDQ]" + in: *2uint + out: *1uint +- go: MaskedMin + asm: "V?PMINS[BWDQ]" + in: *1mask2int + out: *1int +- go: MaskedMin + asm: "V?PMINU[BWDQ]" + in: *1mask2uint + out: *1uint + +- go: Max + asm: "V?MAXP[SD]" + in: &2float + - &float + go: $t + base: float + - *float + out: &1float + - *float +- go: MaskedMax + asm: "V?MAXP[SD]" + in: &1mask2float + - class: mask + - *float + - *float + out: *1float +- go: Min + asm: "V?MINP[SD]" + in: *2float + out: *1float +- go: MaskedMin + asm: "V?MINP[SD]" + in: *1mask2float + out: *1float # "Normal" multiplication is only available for floats. # This only covers the single and double precision. - go: Mul diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml new file mode 100644 index 00000000..d5131958 --- /dev/null +++ b/internal/simdgen/ops/MinMax/categories.yaml @@ -0,0 +1,15 @@ +!sum +- go: Max + commutative: "true" + extension: "AVX.*" +- go: MaskedMax + commutative: "true" + masked: "true" + extension: "AVX.*" +- go: Min + commutative: "true" + extension: "AVX.*" +- go: MaskedMin + commutative: "true" + masked: "true" + extension: "AVX.*" diff --git a/internal/simdgen/ops/MinMax/go.yaml b/internal/simdgen/ops/MinMax/go.yaml new file mode 100644 index 00000000..f307e6b6 --- /dev/null +++ b/internal/simdgen/ops/MinMax/go.yaml @@ -0,0 +1,75 @@ +!sum +- go: Max + asm: "V?PMAXS[BWDQ]" + in: &2int + - &int + go: $t + base: int + - *int + out: &1int + - *int +- go: Max + asm: "V?PMAXU[BWDQ]" + in: &2uint + - &uint + go: $t + base: uint + - *uint + out: &1uint + - *uint +- go: MaskedMax + asm: "V?PMAXS[BWDQ]" + in: &1mask2int + - class: mask + - *int + - *int + out: *1int +- go: MaskedMax + asm: "V?PMAXU[BWDQ]" + in: &1mask2uint + - class: mask + - *uint + - *uint + out: *1uint + +- go: Min + asm: "V?PMINS[BWDQ]" + in: *2int + out: *1int +- go: Min + asm: "V?PMINU[BWDQ]" + in: *2uint + out: *1uint +- go: MaskedMin + asm: "V?PMINS[BWDQ]" + in: *1mask2int + out: *1int +- go: MaskedMin + asm: "V?PMINU[BWDQ]" + in: *1mask2uint + out: *1uint + +- go: Max + asm: "V?MAXP[SD]" + in: &2float + - &float + go: $t + base: float + - *float + out: &1float + - *float +- go: MaskedMax + asm: "V?MAXP[SD]" + in: &1mask2float + - class: mask + - *float + - *float + out: *1float +- go: Min + asm: "V?MINP[SD]" + in: *2float + out: *1float +- go: MaskedMin + asm: "V?MINP[SD]" + in: *1mask2float + out: *1float \ No newline at end of file From 3c204e9c8b48dd0e918f2f70a37e32efd19d5160 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 30 May 2025 17:42:35 +0000 Subject: [PATCH 065/200] internal/simdgen: fix bugs when overwriting class to mask The base type of mask should be int instead of uint. Change-Id: I48a4ba1bfc06a2ac7eabd4c5aee12223b910c5f2 Reviewed-on: https://go-review.googlesource.com/c/arch/+/677615 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_utility.go | 4 ++-- internal/simdgen/go.yaml | 4 ++-- internal/simdgen/ops/Compares/go.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 53362e61..28a451d3 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -405,8 +405,8 @@ func overwrite(ops []Operation) error { if oClass != "mask" { return fmt.Errorf("simdgen: [Class] overwrite only supports overwritting to mask: %s", op[idx]) } - if oBase != "uint" { - return fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to uint: %s", op[idx]) + if oBase != "int" { + return fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to int: %s", op[idx]) } if op[idx].Class != "vreg" { return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx]) diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index a78f3614..52ca7703 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -258,7 +258,7 @@ out: - &anyvregToMask go: $t # We still need the output to be the same shape as inputs. - overwriteBase: uint + overwriteBase: int overwriteClass: mask - go: Greater asm: "V?PCMPGT[BWDQ]" @@ -317,7 +317,7 @@ const: 0 out: - go: $t # We still need the output to be the same shape as inputs. - overwriteBase: uint + overwriteBase: int overwriteClass: mask - go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) asm: "VCMPP[SD]" diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml index c3a52394..2fc1f225 100644 --- a/internal/simdgen/ops/Compares/go.yaml +++ b/internal/simdgen/ops/Compares/go.yaml @@ -10,7 +10,7 @@ out: - &anyvregToMask go: $t # We still need the output to be the same shape as inputs. - overwriteBase: uint + overwriteBase: int overwriteClass: mask - go: Greater asm: "V?PCMPGT[BWDQ]" @@ -69,7 +69,7 @@ const: 0 out: - go: $t # We still need the output to be the same shape as inputs. - overwriteBase: uint + overwriteBase: int overwriteClass: mask - go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) asm: "VCMPP[SD]" From 206ef99dc4c47f525e15660d8d461a56c2a0f559 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 30 May 2025 20:24:01 +0000 Subject: [PATCH 066/200] internal/simdgen: add more int instructions This CL is partially generated by Gemini Code Assist, and I promise I eyeballed it :D!!! Change-Id: I8ad33c9ea4146bbbd5c606b01adcda60bd78eeca Reviewed-on: https://go-review.googlesource.com/c/arch/+/677715 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 27 ++++++++ internal/simdgen/go.yaml | 60 ++++++++++++++++++ .../simdgen/ops/IntOnlyArith/categories.yaml | 28 +++++++++ internal/simdgen/ops/IntOnlyArith/go.yaml | 61 +++++++++++++++++++ 4 files changed, 176 insertions(+) create mode 100644 internal/simdgen/ops/IntOnlyArith/categories.yaml create mode 100644 internal/simdgen/ops/IntOnlyArith/go.yaml diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index ca278805..dfc65453 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -174,6 +174,33 @@ commutative: "false" masked: "true" extension: "AVX.*" +- go: Average + commutative: "true" + extension: "AVX.*" # VPAVGB/W are available across various AVX versions +- go: MaskedAverage + commutative: "true" + masked: "true" + extension: "AVX512.*" # Masked operations are typically AVX512 + +- go: Absolute + commutative: "false" + # Unary operation, not commutative + extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 +- go: MaskedAbsolute + commutative: "false" + masked: "true" + extension: "AVX512.*" + +- go: Sign + # Applies sign of second operand to first: sign(val, sign_src) + commutative: "false" + extension: "AVX.*" + # Sign does not have masked version + +- go: MaskedPopCount + commutative: "false" + masked: "true" + extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) - go: Max commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 52ca7703..ed7c7b1a 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -371,6 +371,66 @@ asm: "VSCALEFP[SD]" in: *1mask2fp out: *1fp +# Average (unsigned byte, unsigned word) +# Instructions: VPAVGB, VPAVGW +- go: Average + asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word) + in: + - &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW + go: $t + base: uint + - *uint_t + out: + - *uint_t +- go: MaskedAverage + asm: "VPAVG[BW]" + in: + - class: mask + - *uint_t + - *uint_t + out: + - *uint_t + +# Absolute Value (signed byte, word, dword, qword) +# Instructions: VPABSB, VPABSW, VPABSD, VPABSQ +- go: Absolute + asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ + in: + - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN + go: $t + base: int + out: + - *int_t # Output is magnitude, fits in the same signed type +- go: MaskedAbsolute + asm: "VPABS[BWDQ]" + in: + - class: mask + - *int_t + out: + - *int_t + +# Sign Operation (signed byte, word, dword) +# Applies sign of second operand to the first. +# Instructions: VPSIGNB, VPSIGNW, VPSIGND +- go: Sign + asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND + in: + - *int_t # value to apply sign to + - *int_t # value from which to take the sign + out: + - *int_t + +# Population Count (count set bits in each element) +# Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG) +# VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ) +- go: MaskedPopCount + asm: "VPOPCNT[BWDQ]" + in: + - class: mask + - &any + go: $t + out: + - *any - go: Max asm: "V?PMAXS[BWDQ]" in: &2int diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml new file mode 100644 index 00000000..c74b57c4 --- /dev/null +++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml @@ -0,0 +1,28 @@ +!sum +- go: Average + commutative: "true" + extension: "AVX.*" # VPAVGB/W are available across various AVX versions +- go: MaskedAverage + commutative: "true" + masked: "true" + extension: "AVX512.*" # Masked operations are typically AVX512 + +- go: Absolute + commutative: "false" + # Unary operation, not commutative + extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 +- go: MaskedAbsolute + commutative: "false" + masked: "true" + extension: "AVX512.*" + +- go: Sign + # Applies sign of second operand to first: sign(val, sign_src) + commutative: "false" + extension: "AVX.*" + # Sign does not have masked version + +- go: MaskedPopCount + commutative: "false" + masked: "true" + extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) \ No newline at end of file diff --git a/internal/simdgen/ops/IntOnlyArith/go.yaml b/internal/simdgen/ops/IntOnlyArith/go.yaml new file mode 100644 index 00000000..e8aca3c6 --- /dev/null +++ b/internal/simdgen/ops/IntOnlyArith/go.yaml @@ -0,0 +1,61 @@ +!sum +# Average (unsigned byte, unsigned word) +# Instructions: VPAVGB, VPAVGW +- go: Average + asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word) + in: + - &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW + go: $t + base: uint + - *uint_t + out: + - *uint_t +- go: MaskedAverage + asm: "VPAVG[BW]" + in: + - class: mask + - *uint_t + - *uint_t + out: + - *uint_t + +# Absolute Value (signed byte, word, dword, qword) +# Instructions: VPABSB, VPABSW, VPABSD, VPABSQ +- go: Absolute + asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ + in: + - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN + go: $t + base: int + out: + - *int_t # Output is magnitude, fits in the same signed type +- go: MaskedAbsolute + asm: "VPABS[BWDQ]" + in: + - class: mask + - *int_t + out: + - *int_t + +# Sign Operation (signed byte, word, dword) +# Applies sign of second operand to the first. +# Instructions: VPSIGNB, VPSIGNW, VPSIGND +- go: Sign + asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND + in: + - *int_t # value to apply sign to + - *int_t # value from which to take the sign + out: + - *int_t + +# Population Count (count set bits in each element) +# Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG) +# VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ) +- go: MaskedPopCount + asm: "VPOPCNT[BWDQ]" + in: + - class: mask + - &any + go: $t + out: + - *any \ No newline at end of file From b6e9ef6db54609e2b9363ec7c5c855953c577b35 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 30 May 2025 11:32:21 -0400 Subject: [PATCH 067/200] arch/internal: add more to the end-to-end test this adds some tests that were a pain to get right when importing simd to a Go repository. Change-Id: If94255105a1a601a4c92f6b7d0ce0369d18c26ee Reviewed-on: https://go-review.googlesource.com/c/arch/+/677535 Auto-Submit: Junyang Shao LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/etetest.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/internal/simdgen/etetest.sh b/internal/simdgen/etetest.sh index f83b6b37..a88776bf 100755 --- a/internal/simdgen/etetest.sh +++ b/internal/simdgen/etetest.sh @@ -18,4 +18,16 @@ go run . -xedPath xeddata -o godefs -goroot ./go-test go.yaml types.yaml categ (cd go-test/src/cmd/compile/internal/ssa/_gen ; go run *.go ) (cd go-test/src ; GOEXPERIMENT=simd ./make.bash ) (cd go-test/bin; b=`pwd` ; cd ../src/simd/testdata; GOARCH=amd64 $b/go run .) +(cd go-test/bin; b=`pwd` ; cd ../src ; +GOEXPERIMENT=simd $b/go test go/doc +GOEXPERIMENT=simd $b/go test go/build +GOEXPERIMENT=simd $b/go test cmd/api -v -check +$b/go test go/doc +$b/go test go/build +$b/go test cmd/api -v -check + +$b/go test cmd/compile/internal/ssagen -simd=0 +GOEXPERIMENT=simd $b/go test cmd/compile/internal/ssagen -simd=0 +) + # next, add some tests of SIMD itself From 441e8c15dcad485da09ed2ad0c68d0e57f1583b6 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 3 Jun 2025 16:43:00 +0000 Subject: [PATCH 068/200] internal/simdgen: add pairwise add/sub Change-Id: Id0b678ec956e0c4ebdaae7f8b0a7ad01365f92df Reviewed-on: https://go-review.googlesource.com/c/arch/+/678376 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 16 +++++++++++ internal/simdgen/go.yaml | 32 ++++++++++++++++++--- internal/simdgen/ops/AddSub/categories.yaml | 18 +++++++++++- internal/simdgen/ops/AddSub/go.yaml | 32 ++++++++++++++++++--- 4 files changed, 89 insertions(+), 9 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index dfc65453..aae0cc9e 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -27,6 +27,22 @@ masked: "true" commutative: "true" extension: "AVX.*" +- go: PairwiseAdd + commutative: "false" + extension: "AVX.*" + documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target" +- go: PairwiseSub + commutative: "false" + extension: "AVX.*" + documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target" +- go: SaturatedPairwiseAdd + commutative: "false" + extension: "AVX.*" + documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation" +- go: SaturatedPairwiseSub + commutative: "false" + extension: "AVX.*" + documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation" - go: And commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index ed7c7b1a..35f0bf75 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -55,10 +55,10 @@ # Sub - go: Sub asm: "VPSUB[BWDQ]|VADDP[SD]" - in: + in: &2any - *any - *any - out: + out: &1any - *any - go: MaskedSub asm: "VPSUB[BWDQ]|VADDP[SD]" @@ -71,10 +71,10 @@ # Saturated Sub - go: SaturatedSub asm: "VPSUBS[BWDQ]" - in: + in: &2int - *int - *int - out: + out: &1int - *int - go: SaturatedSub asm: "VPSUBS[BWDQ]" @@ -99,6 +99,30 @@ - *uint out: - *uint +- go: PairwiseAdd + asm: "VPHADD[DW]" + in: *2any + out: *1any +- go: PairwiseSub + asm: "VPHSUB[DW]" + in: *2any + out: *1any +- go: PairwiseAdd + asm: "VHADDP[SD]" # floats + in: *2any + out: *1any +- go: PairwiseSub + asm: "VHSUBP[SD]" # floats + in: *2any + out: *1any +- go: SaturatedPairwiseAdd + asm: "VPHADDS[DW]" + in: *2int + out: *1int +- go: SaturatedPairwiseSub + asm: "VPHSUBS[DW]" + in: *2int + out: *1int # In the XED data, *all* floating point bitwise logic operation has their # operand type marked as uint. We are not trying to understand why Intel # decided that they want FP bit-wise logic operations, but this irregularity diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml index 8da031f7..e44412c2 100644 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -26,4 +26,20 @@ - go: MaskedSaturatedSub masked: "true" commutative: "true" - extension: "AVX.*" \ No newline at end of file + extension: "AVX.*" +- go: PairwiseAdd + commutative: "false" + extension: "AVX.*" + documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target" +- go: PairwiseSub + commutative: "false" + extension: "AVX.*" + documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target" +- go: SaturatedPairwiseAdd + commutative: "false" + extension: "AVX.*" + documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation" +- go: SaturatedPairwiseSub + commutative: "false" + extension: "AVX.*" + documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation" \ No newline at end of file diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml index 9e8dc57d..75222a1b 100644 --- a/internal/simdgen/ops/AddSub/go.yaml +++ b/internal/simdgen/ops/AddSub/go.yaml @@ -55,10 +55,10 @@ # Sub - go: Sub asm: "VPSUB[BWDQ]|VADDP[SD]" - in: + in: &2any - *any - *any - out: + out: &1any - *any - go: MaskedSub asm: "VPSUB[BWDQ]|VADDP[SD]" @@ -71,10 +71,10 @@ # Saturated Sub - go: SaturatedSub asm: "VPSUBS[BWDQ]" - in: + in: &2int - *int - *int - out: + out: &1int - *int - go: SaturatedSub asm: "VPSUBS[BWDQ]" @@ -99,3 +99,27 @@ - *uint out: - *uint +- go: PairwiseAdd + asm: "VPHADD[DW]" + in: *2any + out: *1any +- go: PairwiseSub + asm: "VPHSUB[DW]" + in: *2any + out: *1any +- go: PairwiseAdd + asm: "VHADDP[SD]" # floats + in: *2any + out: *1any +- go: PairwiseSub + asm: "VHSUBP[SD]" # floats + in: *2any + out: *1any +- go: SaturatedPairwiseAdd + asm: "VPHADDS[DW]" + in: *2int + out: *1int +- go: SaturatedPairwiseSub + asm: "VPHSUBS[DW]" + in: *2int + out: *1int From 61fd4bb3e16cea130005b525c80c82f85be7cb44 Mon Sep 17 00:00:00 2001 From: Carlos Amedee Date: Thu, 5 Jun 2025 14:00:28 -0400 Subject: [PATCH 069/200] s390x/s390xasm: fix failing vet check for self assigment This change removes a self assignment which causes a vet check to fail. Change-Id: I88bde9297f8f63b6552feb7b4dce204a6f9fa132 Reviewed-on: https://go-review.googlesource.com/c/arch/+/679237 LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Reviewed-by: Dmitri Shuralyov --- s390x/s390xasm/plan9.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s390x/s390xasm/plan9.go b/s390x/s390xasm/plan9.go index 482433b4..fa5e3362 100644 --- a/s390x/s390xasm/plan9.go +++ b/s390x/s390xasm/plan9.go @@ -358,7 +358,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin args = args[0:3] case VFS: op = "WFSDB" - args[0], args[1], args[2] = args[2], args[1], args[0] + args[0], args[2] = args[2], args[0] args = args[0:3] case MSGFR, MHI, MSFI, MSGFI: switch inst.Op { From b2f4e2807decde481fc811cb5da9655c84dbcdcc Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 5 Jun 2025 17:41:44 +0000 Subject: [PATCH 070/200] internal/simdgen: adjust type defs for masks and fix errors Change-Id: I88b970c754450080c5780b7223808072f72dd61f Reviewed-on: https://go-review.googlesource.com/c/arch/+/679275 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 8 ++++---- internal/simdgen/gen_simdTypes.go | 3 +++ internal/simdgen/gen_simdssa.go | 14 +++++++------- internal/simdgen/ops/AddSub/categories.yaml | 8 ++++---- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index aae0cc9e..9d1fd5d7 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -14,18 +14,18 @@ commutative: "true" extension: "AVX.*" - go: Sub - commutative: "true" + commutative: "false" extension: "AVX.*" - go: SaturatedSub - commutative: "true" + commutative: "false" extension: "AVX.*" - go: MaskedSub masked: "true" - commutative: "true" + commutative: "false" extension: "AVX.*" - go: MaskedSaturatedSub masked: "true" - commutative: "true" + commutative: "false" extension: "AVX.*" - go: PairwiseAdd commutative: "false" diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 2b1a5b2f..d19c8d2c 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -47,6 +47,8 @@ type {{$tsrc.Name}} struct { {{$tsrc.Fields}} } +{{- if ne $tsrc.Type "mask"}} + // Len returns the number of elements in a {{$tsrc.Name}} func (x {{$tsrc.Name}}) Len() int { return {{$tsrc.Lanes}} } @@ -60,6 +62,7 @@ func Load{{$tsrc.Name}}(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}}) {{$tsrc.Name}} //go:noescape func (x {{$tsrc.Name}}) Store(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}}) +{{- end}} {{- end}} {{- end}} ` diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index 92bfed79..1f61d071 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -148,13 +148,13 @@ func writeSIMDSSA(directory string, ops []Operation) error { All string ZeroingMask string }{ - strings.Join(Imms, ", "), - strings.Join(Regs[0], ", "), - strings.Join(Regs[1], ", "), - strings.Join(Regs[2], ", "), - strings.Join(Regs[3], ", "), - strings.Join(All, ", "), - strings.Join(ZeroingMask, ", "), + strings.Join(Imms, ",\n\t\t"), + strings.Join(Regs[0], ",\n\t\t"), + strings.Join(Regs[1], ",\n\t\t"), + strings.Join(Regs[2], ",\n\t\t"), + strings.Join(Regs[3], ",\n\t\t"), + strings.Join(All, ",\n\t\t"), + strings.Join(ZeroingMask, ",\n\t\t"), } file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/amd64/simdssa.go", simdssaTmpl) diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml index e44412c2..1d08a94b 100644 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -14,18 +14,18 @@ commutative: "true" extension: "AVX.*" - go: Sub - commutative: "true" + commutative: "false" extension: "AVX.*" - go: SaturatedSub - commutative: "true" + commutative: "false" extension: "AVX.*" - go: MaskedSub masked: "true" - commutative: "true" + commutative: "false" extension: "AVX.*" - go: MaskedSaturatedSub masked: "true" - commutative: "true" + commutative: "false" extension: "AVX.*" - go: PairwiseAdd commutative: "false" From db8b269b3637e72e96359d969705eeb508f227b1 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 6 Jun 2025 19:59:43 +0000 Subject: [PATCH 071/200] internal/simdgen: parse more register types This CL added more heuristics to parse more register types. This change is necessary to make VPDP* appear in the XED yaml. Change-Id: Ic502278edb798efe3e09deb8ea1165af3d774869 Reviewed-on: https://go-review.googlesource.com/c/arch/+/679735 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Auto-Submit: Junyang Shao Reviewed-by: David Chase --- internal/simdgen/xed.go | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index 004a815f..44360435 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -190,6 +190,10 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) { // complicated. action, ok := actionEncoding[op.Action] if !ok { + if strings.HasPrefix(op.Name, "EMX_BROADCAST") { + // BROADCAST looks like to contain an obsolete operand. + return nil, nil + } return nil, fmt.Errorf("unknown action %q", op.Action) } common := operandCommon{action: action} @@ -249,7 +253,9 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu if err != nil { return unify.Tuple{}, unify.Tuple{}, err } - ops = append(ops, op) + if op != nil { + ops = append(ops, op) + } } // XED doesn't encode the size of mask operands. If there are mask operands, @@ -272,6 +278,7 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu var masks []int var rSizes, wSizes, sizes []vecShape allMasks := true + hasWMask := false for i, op := range ops { action := op.common().action if _, ok := op.(operandMask); ok { @@ -281,6 +288,9 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu if action.r == r || action.w == w { masks = append(masks, i) } + if action.w { + hasWMask = true + } } else { allMasks = false if reg, ok := op.(operandVReg); ok { @@ -320,11 +330,17 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu } return nil } - return fmt.Errorf("cannot infer mask size: no register operands") + return fmt.Errorf("cannot infer mask size: no register operands: %+v", operands) } shape, ok := singular(sizes) if !ok { - return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes) + if !hasWMask && len(wSizes) == 1 && len(masks) == 1 { + // This pattern looks like predicate mask, so its shape should align with the + // output. TODO: verify this is a safe assumption. + shape = wSizes[0] + } else { + return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes) + } } for _, i := range masks { m := ops[i].(operandMask) @@ -407,6 +423,10 @@ func decodeReg(op *xeddata.Operand) (w int, ok bool) { return 256, true case strings.HasPrefix(rhs, "ZMM_"): return 512, true + case strings.HasPrefix(rhs, "GPR64_"), strings.HasPrefix(rhs, "VGPR64_"): + return 64, true + case strings.HasPrefix(rhs, "GPR32_"), strings.HasPrefix(rhs, "VGPR32_"): + return 32, true } return 0, false } @@ -475,6 +495,19 @@ func decodeType(op *xeddata.Operand) (base scalarBaseType, bits int, ok bool) { // These just use the lower INT8 in each 16 bit field. // As far as I can tell, "2I8" is a typo. return scalarBaseInt, 8, true + case "2u16", "2U16": + // some VPDP* has it + // TODO: does "z" means it has zeroing? + return scalarBaseUint, 16, true + case "2i16", "2I16": + // some VPDP* has it + return scalarBaseInt, 16, true + case "4u8", "4U8": + // some VPDP* has it + return scalarBaseUint, 8, true + case "4i8", "4I8": + // some VPDP* has it + return scalarBaseInt, 8, true } // The rest follow a simple pattern. From 57d64c8c469c8a812603990690bac37d6493bbaf Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 10 Jun 2025 11:53:17 -0400 Subject: [PATCH 072/200] internal/simdgen: add more register masks to simdAMD64Ops These were added in the call in the compiler, so these must change to match, else the end-to-end test fails. Change-Id: I215c188d5935d4589e0c1ee14e2c51def80a2e36 Reviewed-on: https://go-review.googlesource.com/c/arch/+/680438 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdMachineOps.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index ec3eaba9..206c1665 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -12,7 +12,7 @@ import ( const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main -func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp1m1fp1, fp2m1fp1, fp2m1m1 regInfo) []opData { +func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp1m1fp1, fp2m1fp1, fp2m1m1, fp3fp1, fp3m1fp1 regInfo) []opData { return []opData{ {{- range .OpsData }} {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}"}, From cd4b42487a5111fd5a984a2201e96d3c658db99e Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 9 Jun 2025 15:52:06 -0400 Subject: [PATCH 073/200] internal/simdgen: remove map-iteration dependence from output This makes checking for (lack of) effects from changes much easier. Change-Id: I0b8c49381798d924541abb95bbfcbe8281d37950 Reviewed-on: https://go-review.googlesource.com/c/arch/+/680178 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdTypes.go | 83 +++++++++++++++++++++---------- internal/simdgen/gen_utility.go | 1 + internal/simdgen/godefs.go | 44 ++++++++++++++++ 3 files changed, 102 insertions(+), 26 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index d19c8d2c..0405c584 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -6,6 +6,8 @@ package main import ( "fmt" + "slices" + "sort" "strings" ) @@ -20,6 +22,14 @@ type simdType struct { Size int // The size of the type } +func compareSimdTypes(x, y simdType) int { + c := strings.Compare(x.Name, y.Name) + if c != 0 { + return c + } + return strings.Compare(x.Type, y.Type) +} + type simdTypeMap map[int][]simdType type simdTypePair struct { @@ -27,44 +37,51 @@ type simdTypePair struct { Tdst simdType } -const simdTypesTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +func compareSimdTypePairs(x, y simdTypePair) int { + c := compareSimdTypes(x.Tsrc, y.Tsrc) + if c != 0 { + return c + } + return compareSimdTypes(x.Tdst, y.Tdst) +} + +const simdTypesTemplates = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. //go:build goexperiment.simd package simd +{{end}} -{{- range $size, $ts := .TypeMap }} - -// v{{$size}} is a tag type that tells the compiler that this is really {{$size}}-bit SIMD -type v{{$size}} struct { - _{{$size}} struct{} +{{define "sizeTmpl"}} +// v{{.}} is a tag type that tells the compiler that this is really {{.}}-bit SIMD +type v{{.}} struct { + _{{.}} struct{} } +{{end}} -{{- range $i, $tsrc := $ts }} - -// {{$tsrc.Name}} is a {{$size}}-bit SIMD vector of {{$tsrc.Lanes}} {{$tsrc.Base}} -type {{$tsrc.Name}} struct { -{{$tsrc.Fields}} +{{define "typeTmpl"}} +// {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}} +type {{.Name}} struct { +{{.Fields}} } -{{- if ne $tsrc.Type "mask"}} +{{- if ne .Type "mask"}} -// Len returns the number of elements in a {{$tsrc.Name}} -func (x {{$tsrc.Name}}) Len() int { return {{$tsrc.Lanes}} } +// Len returns the number of elements in a {{.Name}} +func (x {{.Name}}) Len() int { return {{.Lanes}} } -// Load{{$tsrc.Name}} loads a {{$tsrc.Name}} from an array +// Load{{.Name}} loads a {{.Name}} from an array // //go:noescape -func Load{{$tsrc.Name}}(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}}) {{$tsrc.Name}} +func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}} -// Store stores a {{$tsrc.Name}} to an array +// Store stores a {{.Name}} to an array // //go:noescape -func (x {{$tsrc.Name}}) Store(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}}) +func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}}) {{- end}} -{{- end}} -{{- end}} +{{end}} ` const simdStubsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. @@ -168,6 +185,7 @@ func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair { } } } + slices.SortFunc(v, compareSimdTypePairs) return v } @@ -180,25 +198,38 @@ func masksFromTypeMap(typeMap simdTypeMap) []simdType { } } } + slices.SortFunc(m, compareSimdTypes) return m } // writeSIMDTypes generates the simd vector type and writes it to types_amd64.go // within the specified directory. func writeSIMDTypes(directory string, typeMap simdTypeMap) error { - file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/types_amd64.go", simdTypesTmpl) + file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/types_amd64.go", simdTypesTemplates) if err != nil { return err } defer file.Close() - type templateData struct { - TypeMap simdTypeMap + if err := t.ExecuteTemplate(file, "fileHeader", nil); err != nil { + return fmt.Errorf("failed to execute fileHeader template: %w", err) } - err = t.Execute(file, templateData{typeMap}) - if err != nil { - return fmt.Errorf("failed to execute template: %w", err) + sizes := make([]int, 0, len(typeMap)) + for size := range typeMap { + sizes = append(sizes, size) + } + sort.Ints(sizes) + + for _, size := range sizes { + if err := t.ExecuteTemplate(file, "sizeTmpl", size); err != nil { + return fmt.Errorf("failed to execute size template for size %d: %w", size, err) + } + for _, typeDef := range typeMap[size] { + if err := t.ExecuteTemplate(file, "typeTmpl", typeDef); err != nil { + return fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err) + } + } } return nil diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 28a451d3..7fcade60 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -355,6 +355,7 @@ func dedupGodef(ops []Operation) ([]Operation, error) { } deduped = append(deduped, dup[0]) } + slices.SortFunc(deduped, compareOperations) return deduped, nil } diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 2a611c9e..9309e0ce 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -6,6 +6,8 @@ package main import ( "log" + "slices" + "strings" "golang.org/x/arch/internal/unify" ) @@ -30,6 +32,47 @@ type Operation struct { Masked *string } +func compareStringPointers(x, y *string) int { + if x != nil && y != nil { + return strings.Compare(*x, *y) + } + if x == nil && y == nil { + return 0 + } + if x == nil { + return -1 + } + return 1 +} + +func compareOperations(x, y Operation) int { + if c := strings.Compare(x.Go, y.Go); c != 0 { + return c + } + if c := strings.Compare(x.GoArch, y.GoArch); c != 0 { + return c + } + if len(x.In) < len(y.In) { + return -1 + } + if len(x.In) > len(y.In) { + return 1 + } + if len(x.Out) < len(y.Out) { + return -1 + } + if len(x.Out) > len(y.Out) { + return 1 + } + for i := range x.In { + ox, oy := &x.In[i], y.In[i] + if c := compareStringPointers(ox.Go, oy.Go); c != 0 { + return c + } + } + return 0 +} + type Operand struct { Class string // One of "mask", "immediate", "vreg" and "mem" @@ -68,6 +111,7 @@ func writeGoDefs(path string, cl unify.Closure) error { op.sortOperand() ops = append(ops, op) } + slices.SortFunc(ops, compareOperations) // The parsed XED data might contain duplicates, like // 512 bits VPADDP. deduped := dedup(ops) From 43295e6b7043ce004e33ac281d107156bf8b2d58 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 6 Jun 2025 17:29:43 -0400 Subject: [PATCH 074/200] internal/simdgen: simplify gen_simdrules.go This gets the control flow out of the templates, simplifies the templates, and allows better sorting of the generated rules. Change-Id: Ic31f2554bf3d2aaf1d3efd27a8a5060c8904767f Reviewed-on: https://go-review.googlesource.com/c/arch/+/680275 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdrules.go | 238 +++++++++++++++++------------- 1 file changed, 132 insertions(+), 106 deletions(-) diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index 172282eb..2f2178d6 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -6,99 +6,140 @@ package main import ( "fmt" - "sort" + "io" + "os" + "path/filepath" + "slices" + "strings" + "text/template" ) -const simdrulesTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +var ( + ruleTemplates = template.Must(template.New("simdRules").Parse(` +{{define "pureVregInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} {{.ReverseArgs}}) +{{end}} +{{define "oneKmaskInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM mask)) +{{end}} +{{define "oneConstImmInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} [{{.Const}}] {{.ReverseArgs}}) +{{end}} +{{define "oneKmaskConstImmInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} [{{.Const}}] {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM mask)) +{{end}} +{{define "pureVregInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} {{.ReverseArgs}})) +{{end}} +{{define "oneKmaskInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM mask))) +{{end}} +{{define "oneConstImmInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} [{{.Const}}] {{.ReverseArgs}})) +{{end}} +{{define "oneKmaskConstImmInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} [{{.Const}}] {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM mask))) +{{end}} +`)) +) -// The AVX instruction encodings orders vector register from right to left, for example: -// VSUBPS X Y Z means Z=Y-X -// The rules here swapped the order of such X and Y because the ssa to prog lowering in simdssa.go assumes a -// left to right order. -// TODO: we should offload the logic to simdssa.go, instead of here. -// -// Masks are always at the end, immediates always at the beginning. - -{{- range .Ops }} -({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}}) => ({{.Op.Asm}} {{.ReverseArgs}}) -{{- end }} -{{- range .OpsImm }} -({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}}) => ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}}) -{{- end }} -{{- range .OpsMask}} -({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}} mask) => ({{.Op.Asm}} {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}}ToM mask)) -{{- end }} -{{- range .OpsImmMask}} -({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}} mask) => ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}}ToM mask)) -{{- end }} -{{- range .OpsMaskOut}} -({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}}) => (VPMOVMToVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}} ({{.Op.Asm}} {{.ReverseArgs}})) -{{- end }} -{{- range .OpsImmInMaskOut}} -({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}}) => (VPMOVMToVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}} ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}})) -{{- end }} -{{- range .OpsMaskInMaskOut}} -({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}} mask) => (VPMOVMToVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}} ({{.Op.Asm}} {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}}ToM mask))) -{{- end }} -{{- range .OpsImmMaskInMaskOut}} -({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}} mask) => (VPMOVMToVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}} ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}}ToM mask))) -{{- end }} -` +type tplRuleData struct { + tplName string + GoOp string + GoType string + Args string + Asm string + ReverseArgs string + ElemBits int + Lanes int + Const string +} + +func compareTplRuleData(x, y tplRuleData) int { + // TODO should MaskedXYZ compare just after XYZ? + if c := strings.Compare(x.GoOp, y.GoOp); c != 0 { + return c + } + if c := strings.Compare(x.GoType, y.GoType); c != 0 { + return c + } + if c := strings.Compare(x.Const, y.Const); c != 0 { + return c + } + return 0 +} // writeSIMDRules generates the lowering and rewrite rules for ssa and writes it to simdAMD64.rules // within the specified directory. func writeSIMDRules(directory string, ops []Operation) error { - file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules", simdrulesTmpl) + + outPath := filepath.Join(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules") + if err := os.MkdirAll(filepath.Dir(outPath), 0755); err != nil { + return fmt.Errorf("failed to create directory for %s: %w", outPath, err) + } + file, err := os.Create(outPath) if err != nil { - return err + return fmt.Errorf("failed to create %s: %w", outPath, err) } defer file.Close() - type OpAndArgList struct { - Op Operation - Args string // "x y", does not include masks - ReverseArgs string // "y x", does not include masks + + header := `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. + +// The AVX instruction encodings orders vector register from right to left, for example: +// VSUBPS X Y Z means Z=Y-X +// The rules here swapped the order of such X and Y because the ssa to prog lowering in simdssa.go assumes a +// left to right order. +// TODO: we should offload the logic to simdssa.go, instead of here. +// + +` + if _, err := io.WriteString(file, header); err != nil { + return fmt.Errorf("failed to write header to %s: %w", outPath, err) } - Ops := make([]OpAndArgList, 0) - OpsImm := make([]OpAndArgList, 0) - OpsMask := make([]OpAndArgList, 0) - OpsImmMask := make([]OpAndArgList, 0) - OpsMaskOut := make([]OpAndArgList, 0) - OpsImmInMaskOut := make([]OpAndArgList, 0) - OpsMaskInMaskOut := make([]OpAndArgList, 0) - OpsImmMaskInMaskOut := make([]OpAndArgList, 0) - - for _, op := range ops { - opInShape, opOutShape, maskType, _, op, gOp, err := op.shape() + + var allData []tplRuleData + + for _, opr := range ops { + opInShape, opOutShape, maskType, _, o, gOp, err := opr.shape() if err != nil { return err } vregInCnt := len(gOp.In) if maskType == OneMask { - op.Asm += "Masked" + o.Asm += "Masked" vregInCnt-- } - op.Asm = fmt.Sprintf("%s%d", op.Asm, *op.Out[0].Bits) - opData := OpAndArgList{Op: op} + o.Asm = fmt.Sprintf("%s%d", o.Asm, *o.Out[0].Bits) + + data := tplRuleData{ + GoOp: o.Go, + Asm: o.Asm, + } + if vregInCnt == 1 { - opData.Args = "x" - opData.ReverseArgs = "x" + data.Args = "x" + data.ReverseArgs = "x" } else if vregInCnt == 2 { - opData.Args = "x y" - opData.ReverseArgs = "y x" + data.Args = "x y" + data.ReverseArgs = "y x" } else { return fmt.Errorf("simdgen does not support more than 2 vreg in inputs") } + + var tplName string // If class overwrite is happening, that's not really a mask but a vreg. - if opOutShape == OneVregOut || op.Out[0].OverwriteClass != nil { + if opOutShape == OneVregOut || o.Out[0].OverwriteClass != nil { switch opInShape { case PureVregIn: - Ops = append(Ops, opData) + tplName = "pureVregInVregOut" + data.GoType = *o.In[0].Go case OneKmaskIn: - OpsMask = append(OpsMask, opData) + tplName = "oneKmaskInVregOut" + data.GoType = *o.In[0].Go + data.ElemBits = *o.In[0].ElemBits + data.Lanes = *o.In[0].Lanes case OneConstImmIn: - OpsImm = append(OpsImm, opData) + tplName = "oneConstImmInVregOut" + data.GoType = *o.In[1].Go + data.Const = *o.In[0].Const case OneKmaskConstImmIn: - OpsImmMask = append(OpsImmMask, opData) + tplName = "oneKmaskConstImmInVregOut" + data.GoType = *o.In[1].Go + data.Const = *o.In[0].Const + data.ElemBits = *o.In[1].ElemBits + data.Lanes = *o.In[1].Lanes case PureKmaskIn: return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations") } @@ -106,57 +147,42 @@ func writeSIMDRules(directory string, ops []Operation) error { // OneKmaskOut case switch opInShape { case PureVregIn: - OpsMaskOut = append(OpsMaskOut, opData) + tplName = "pureVregInKmaskOut" + data.GoType = *o.In[0].Go + data.ElemBits = *o.In[0].ElemBits + data.Lanes = *o.In[0].Lanes case OneKmaskIn: - OpsMaskInMaskOut = append(OpsMaskInMaskOut, opData) + tplName = "oneKmaskInKmaskOut" + data.GoType = *o.In[0].Go + data.ElemBits = *o.In[0].ElemBits + data.Lanes = *o.In[0].Lanes case OneConstImmIn: - OpsImmInMaskOut = append(OpsImmInMaskOut, opData) + tplName = "oneConstImmInKmaskOut" + data.GoType = *o.In[1].Go + data.Const = *o.In[0].Const + data.ElemBits = *o.In[1].ElemBits + data.Lanes = *o.In[1].Lanes case OneKmaskConstImmIn: - OpsImmMaskInMaskOut = append(OpsImmMaskInMaskOut, opData) + tplName = "oneKmaskConstImmInKmaskOut" + data.GoType = *o.In[1].Go + data.Const = *o.In[0].Const + data.ElemBits = *o.In[1].ElemBits + data.Lanes = *o.In[1].Lanes case PureKmaskIn: return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations") } } - } - sortKey := func(op *OpAndArgList) string { - return *op.Op.In[0].Go + op.Op.Go - } - sortBySortKey := func(ops []OpAndArgList) { - sort.Slice(ops, func(i, j int) bool { - return sortKey(&ops[i]) < sortKey(&ops[j]) - }) - } - sortBySortKey(Ops) - sortBySortKey(OpsImm) - sortBySortKey(OpsMask) - sortBySortKey(OpsImmMask) - sortBySortKey(OpsMaskOut) - sortBySortKey(OpsImmInMaskOut) - sortBySortKey(OpsMaskInMaskOut) - sortBySortKey(OpsImmMaskInMaskOut) - - type templateData struct { - Ops []OpAndArgList - OpsImm []OpAndArgList - OpsMask []OpAndArgList - OpsImmMask []OpAndArgList - OpsMaskOut []OpAndArgList - OpsImmInMaskOut []OpAndArgList - OpsMaskInMaskOut []OpAndArgList - OpsImmMaskInMaskOut []OpAndArgList + + data.tplName = tplName + allData = append(allData, data) } - err = t.Execute(file, templateData{ - Ops, - OpsImm, - OpsMask, - OpsImmMask, - OpsMaskOut, - OpsImmInMaskOut, - OpsMaskInMaskOut, - OpsImmMaskInMaskOut}) - if err != nil { - return fmt.Errorf("failed to execute template: %w", err) + slices.SortFunc(allData, compareTplRuleData) + + for _, data := range allData { + if err := ruleTemplates.ExecuteTemplate(file, data.tplName, data); err != nil { + return fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.GoOp+data.GoType, err) + } } return nil From 53c00bdecf19ba032a082f4db4aad970536b373b Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 12 Jun 2025 02:18:30 +0000 Subject: [PATCH 075/200] internal/simdgen: refactor and support more shapes This CL refactors gen_simdrules.go and gen_simdssa.go: Instead of reversing the operand orders at lowering rules and maintain a state machine at prog writing, now machine ops and generic ops will have the same operand order and prog writing will adjust the register order. This CL supports operations with immediate args: During intrinsic, the compiler will check if the passed-in arg is a const or not; if not it will insert a runtime panic. This CL supports operations with result in arg0(same register). This CL supports more longer operations up to 4 register and 1 immediates. This CL also cleans up stubs documentation formats. This CL generates CL 681215. Change-Id: I3d14fbfafa5adc2ac189e27cd82b88623aa0150c Reviewed-on: https://go-review.googlesource.com/c/arch/+/681195 LUCI-TryBot-Result: Go LUCI Auto-Submit: Junyang Shao Reviewed-by: David Chase Reviewed-by: Junyang Shao --- internal/simdgen/categories.yaml | 48 ++-- internal/simdgen/gen_simdGenericOps.go | 27 +- internal/simdgen/gen_simdIntrinsics.go | 94 ++++++- internal/simdgen/gen_simdMachineOps.go | 71 ++--- internal/simdgen/gen_simdTypes.go | 67 ++++- internal/simdgen/gen_simdrules.go | 134 +++++----- internal/simdgen/gen_simdssa.go | 190 +++++++------- internal/simdgen/gen_utility.go | 245 +++++++++++++----- internal/simdgen/godefs.go | 13 +- internal/simdgen/ops/AddSub/categories.yaml | 8 +- internal/simdgen/ops/Compares/categories.yaml | 28 +- internal/simdgen/ops/Mul/categories.yaml | 12 +- 12 files changed, 590 insertions(+), 347 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 9d1fd5d7..4a4affbc 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -30,19 +30,19 @@ - go: PairwiseAdd commutative: "false" extension: "AVX.*" - documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target" + documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target" - go: PairwiseSub commutative: "false" extension: "AVX.*" - documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target" + documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target" - go: SaturatedPairwiseAdd commutative: "false" extension: "AVX.*" - documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation" + documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation" - go: SaturatedPairwiseSub commutative: "false" extension: "AVX.*" - documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation" + documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation" - go: And commutative: "true" extension: "AVX.*" @@ -84,80 +84,80 @@ constImm: 0 commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 0 if it has;" + documentation: "// Predicate immediate is 0 if it has;" - go: Less constImm: 1 commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 1 if it has;" + documentation: "// Predicate immediate is 1 if it has;" - go: LessEqual constImm: 2 commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 2 if it has;" + documentation: "// Predicate immediate is 2 if it has;" - go: IsNan # For float only. constImm: 3 commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" + documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" - go: NotEqual constImm: 4 commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 4 if it has;" + documentation: "// Predicate immediate is 4 if it has;" - go: GreaterEqual constImm: 5 commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 5 if it has;" + documentation: "// Predicate immediate is 5 if it has;" - go: Greater constImm: 6 commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 6 if it has;" + documentation: "// Predicate immediate is 6 if it has;" - go: MaskedEqual constImm: 0 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 0 if it has;" + documentation: "// Predicate immediate is 0 if it has;" - go: MaskedLess constImm: 1 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 1 if it has;" + documentation: "// Predicate immediate is 1 if it has;" - go: MaskedLessEqual constImm: 2 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 2 if it has;" + documentation: "// Predicate immediate is 2 if it has;" - go: MaskedIsNan # For float only. constImm: 3 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" + documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" - go: MaskedNotEqual constImm: 4 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 4 if it has;" + documentation: "// Predicate immediate is 4 if it has;" - go: MaskedGreaterEqual constImm: 5 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 5 if it has;" + documentation: "// Predicate immediate is 5 if it has;" - go: MaskedGreater constImm: 6 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 6 if it has;" + documentation: "// Predicate immediate is 6 if it has;" - go: Div commutative: "false" extension: "AVX.*" @@ -237,15 +237,15 @@ - go: MulEvenWiden commutative: "true" extension: "AVX.*" - documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" + documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" - go: MulHigh commutative: "true" extension: "AVX.*" - documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" + documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" - go: MulLow commutative: "true" extension: "AVX.*" - documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" + documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" - go: MaskedMul masked: "true" commutative: "true" @@ -254,14 +254,14 @@ masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" + documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" - go: MaskedMulHigh masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" + documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" - go: MaskedMulLow masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" + documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index 2b0fa008..bdda8b80 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -14,8 +14,11 @@ package main func simdGenericOps() []opData { return []opData{ -{{- range . }} +{{- range .Ops }} {name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}}, +{{- end }} +{{- range .OpsImm }} + {name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}, aux: "Int8"}, {{- end }} } } @@ -35,17 +38,29 @@ func writeSIMDGenericOps(directory string, ops []Operation) error { OpInLen int Comm string } - opsData := make([]genericOpsData, 0) + type opData struct { + Ops []genericOpsData + OpsImm []genericOpsData + } + var opsData opData for _, op := range ops { - _, _, _, _, _, gOp, err := op.shape() + _, _, _, immType, _, _, gOp, err := op.shape() if err != nil { return err } genericNames := gOp.Go + *gOp.In[0].Go - opsData = append(opsData, genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative}) + gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative} + if immType == VarImm || immType == ConstVarImm { + opsData.OpsImm = append(opsData.OpsImm, gOpData) + } else { + opsData.Ops = append(opsData.Ops, gOpData) + } } - sort.Slice(opsData, func(i, j int) bool { - return opsData[i].sortKey < opsData[j].sortKey + sort.Slice(opsData.Ops, func(i, j int) bool { + return opsData.Ops[i].sortKey < opsData.Ops[j].sortKey + }) + sort.Slice(opsData.OpsImm, func(i, j int) bool { + return opsData.OpsImm[i].sortKey < opsData.OpsImm[j].sortKey }) err = t.Execute(file, opsData) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 377026b9..5b6b74cf 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -30,6 +30,21 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . {{- range .OpsLen3}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) {{- end}} +{{- range .OpsLen4}} + addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{- end}} +{{- range .OpsLen1Imm8}} + addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{- end}} +{{- range .OpsLen2Imm8}} + addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{- end}} +{{- range .OpsLen3Imm8}} + addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{- end}} +{{- range .OpsLen4Imm8}} + addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{- end}} {{- range .VectorConversions }} addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) @@ -67,6 +82,76 @@ func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa } } +func opLen4(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue4(op, t, args[0], args[1], args[2], args[3]) + } +} + +func plainPanicSimdImm(s *state) { + cmp := s.newValue0(ssa.OpConstBool, types.Types[types.TBOOL]) + cmp.AuxInt = 1 + // TODO: make this a standalone panic instead of reusing the overflow panic. + // Or maybe after we implement the switch table this will be obsolete anyway. + s.check(cmp, ir.Syms.Panicoverflow) +} + +func opLen1Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if args[1].Op == ssa.OpConst8 { + return s.newValue1I(op, t, args[1].AuxInt< 0 { - vRegInS = fmt.Sprintf("fp%d", vRegInCnt) - } - if kMaskInCnt > 0 { - kMaskInS = fmt.Sprintf("m%d", kMaskInCnt) - } - if vRegOutCnt > 0 { - vRegOutS = fmt.Sprintf("fp%d", vRegOutCnt) - } - if kMaskOutCnt > 0 { - kMaskOutS = fmt.Sprintf("m%d", kMaskOutCnt) + regInfo, err := op.regShape() + if err != nil { + return err } - regInfo = fmt.Sprintf("%s%s%s%s", vRegInS, kMaskInS, vRegOutS, kMaskOutS) if _, ok := regInfoSet[regInfo]; !ok { return fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s", regInfo) } var outType string - if shapeOut == OneVregOut || gOp.Out[0].OverwriteClass != nil { + if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil { // If class overwrite is happening, that's not really a mask but a vreg. outType = fmt.Sprintf("Vec%d", *gOp.Out[0].Bits) } else if shapeOut == OneKmaskOut { @@ -109,10 +82,14 @@ func writeSIMDMachineOps(directory string, ops []Operation) error { } else { return fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut) } - if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn { - opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType}) + resultInArg0 := "false" + if shapeOut == OneVregOutAtIn { + resultInArg0 = "true" + } + if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { + opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) } else { - opsData = append(opsData, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType}) + opsData = append(opsData, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) } } sort.Slice(opsData, func(i, j int) bool { diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 0405c584..50480b30 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -91,22 +91,68 @@ const simdStubsTmpl = `// Code generated by x/arch/internal/simdgen using 'go ru package simd {{- range .OpsLen1}} - -// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}} +{{if .Documentation}} +{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}} {{- end}} {{- range .OpsLen2}} - -// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}} +{{if .Documentation}} +{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}} {{- end}} {{- range .OpsLen3}} - -// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}} +{{if .Documentation}} +{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}} +{{- end}} +{{- range .OpsLen4}} +{{if .Documentation}} +{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}} + +{{- end}} +{{- range .OpsLen1Imm8}} +{{if .Documentation}} +{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}} + +{{- end}} +{{- range .OpsLen2Imm8}} +{{if .Documentation}} +{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}} + +{{- end}} +{{- range .OpsLen3Imm8}} +{{if .Documentation}} +{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}} + +{{- end}} +{{- range .OpsLen3Imm8}} +{{if .Documentation}} +{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}} + {{- end}} {{- range .VectorConversions }} @@ -243,7 +289,7 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro return err } defer file.Close() - opsLen1, opsLen2, opsLen3, err := genericOpsByLen(ops) + opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, err := opsByLen(ops) if err != nil { return err } @@ -252,11 +298,16 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro OpsLen1 []Operation OpsLen2 []Operation OpsLen3 []Operation + OpsLen4 []Operation + OpsLen1Imm8 []Operation + OpsLen2Imm8 []Operation + OpsLen3Imm8 []Operation + OpsLen4Imm8 []Operation VectorConversions []simdTypePair Masks []simdType } - err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)}) + err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)}) if err != nil { return fmt.Errorf("failed to execute template : %w", err) } diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index 2f2178d6..5f51c6f8 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -16,35 +16,26 @@ import ( var ( ruleTemplates = template.Must(template.New("simdRules").Parse(` -{{define "pureVregInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} {{.ReverseArgs}}) +{{define "pureVreg"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} {{.ArgsOut}}) {{end}} -{{define "oneKmaskInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM mask)) +{{define "maskIn"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} mask)) {{end}} -{{define "oneConstImmInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} [{{.Const}}] {{.ReverseArgs}}) +{{define "maskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}})) {{end}} -{{define "oneKmaskConstImmInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} [{{.Const}}] {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM mask)) -{{end}} -{{define "pureVregInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} {{.ReverseArgs}})) -{{end}} -{{define "oneKmaskInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM mask))) -{{end}} -{{define "oneConstImmInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} [{{.Const}}] {{.ReverseArgs}})) -{{end}} -{{define "oneKmaskConstImmInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} [{{.Const}}] {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM mask))) +{{define "maskInMaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} mask))) {{end}} `)) ) type tplRuleData struct { - tplName string - GoOp string - GoType string - Args string - Asm string - ReverseArgs string - ElemBits int - Lanes int - Const string + tplName string + GoOp string + GoType string + Args string + Asm string + ArgsOut string + MaskInConvert string + MaskOutConvert string } func compareTplRuleData(x, y tplRuleData) int { @@ -55,7 +46,7 @@ func compareTplRuleData(x, y tplRuleData) int { if c := strings.Compare(x.GoType, y.GoType); c != 0 { return c } - if c := strings.Compare(x.Const, y.Const); c != 0 { + if c := strings.Compare(x.Args, y.Args); c != 0 { return c } return 0 @@ -77,13 +68,6 @@ func writeSIMDRules(directory string, ops []Operation) error { header := `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. -// The AVX instruction encodings orders vector register from right to left, for example: -// VSUBPS X Y Z means Z=Y-X -// The rules here swapped the order of such X and Y because the ssa to prog lowering in simdssa.go assumes a -// left to right order. -// TODO: we should offload the logic to simdssa.go, instead of here. -// - ` if _, err := io.WriteString(file, header); err != nil { return fmt.Errorf("failed to write header to %s: %w", outPath, err) @@ -92,82 +76,84 @@ func writeSIMDRules(directory string, ops []Operation) error { var allData []tplRuleData for _, opr := range ops { - opInShape, opOutShape, maskType, _, o, gOp, err := opr.shape() + opInShape, opOutShape, maskType, immType, _, _, gOp, err := opr.shape() if err != nil { return err } vregInCnt := len(gOp.In) + asm := gOp.Asm if maskType == OneMask { - o.Asm += "Masked" + asm += "Masked" vregInCnt-- } - o.Asm = fmt.Sprintf("%s%d", o.Asm, *o.Out[0].Bits) + asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits) data := tplRuleData{ - GoOp: o.Go, - Asm: o.Asm, + GoOp: gOp.Go, + Asm: asm, } if vregInCnt == 1 { data.Args = "x" - data.ReverseArgs = "x" + data.ArgsOut = data.Args } else if vregInCnt == 2 { data.Args = "x y" - data.ReverseArgs = "y x" + data.ArgsOut = data.Args + } else if vregInCnt == 3 { + data.Args = "x y z" + data.ArgsOut = data.Args } else { - return fmt.Errorf("simdgen does not support more than 2 vreg in inputs") + return fmt.Errorf("simdgen does not support more than 3 vreg in inputs") + } + if immType == ConstImm { + data.ArgsOut = fmt.Sprintf("[%s] %s", *opr.In[0].Const, data.ArgsOut) + } else if immType == VarImm { + data.Args = fmt.Sprintf("[a] %s", data.Args) + data.ArgsOut = fmt.Sprintf("[a] %s", data.ArgsOut) + } else if immType == ConstVarImm { + data.Args = fmt.Sprintf("[a] %s", data.Args) + data.ArgsOut = fmt.Sprintf("[a+%s] %s", *opr.In[0].Const, data.ArgsOut) } var tplName string // If class overwrite is happening, that's not really a mask but a vreg. - if opOutShape == OneVregOut || o.Out[0].OverwriteClass != nil { + if opOutShape == OneVregOut || opOutShape == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil { switch opInShape { + case OneImmIn: + tplName = "pureVreg" + data.GoType = *gOp.In[0].Go case PureVregIn: - tplName = "pureVregInVregOut" - data.GoType = *o.In[0].Go + tplName = "pureVreg" + data.GoType = *gOp.In[0].Go + data.Args = "..." + data.ArgsOut = "..." + case OneKmaskImmIn: + fallthrough case OneKmaskIn: - tplName = "oneKmaskInVregOut" - data.GoType = *o.In[0].Go - data.ElemBits = *o.In[0].ElemBits - data.Lanes = *o.In[0].Lanes - case OneConstImmIn: - tplName = "oneConstImmInVregOut" - data.GoType = *o.In[1].Go - data.Const = *o.In[0].Const - case OneKmaskConstImmIn: - tplName = "oneKmaskConstImmInVregOut" - data.GoType = *o.In[1].Go - data.Const = *o.In[0].Const - data.ElemBits = *o.In[1].ElemBits - data.Lanes = *o.In[1].Lanes + tplName = "maskIn" + data.GoType = *gOp.In[0].Go + rearIdx := len(gOp.In) - 1 + // Mask is at the end. + data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes) case PureKmaskIn: return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations") } } else { // OneKmaskOut case + data.MaskOutConvert = fmt.Sprintf("VPMOVMToVec%dx%d", *gOp.Out[0].ElemBits, *gOp.In[0].Lanes) switch opInShape { + case OneImmIn: + fallthrough case PureVregIn: - tplName = "pureVregInKmaskOut" - data.GoType = *o.In[0].Go - data.ElemBits = *o.In[0].ElemBits - data.Lanes = *o.In[0].Lanes + tplName = "maskOut" + data.GoType = *gOp.In[0].Go + case OneKmaskImmIn: + fallthrough case OneKmaskIn: - tplName = "oneKmaskInKmaskOut" - data.GoType = *o.In[0].Go - data.ElemBits = *o.In[0].ElemBits - data.Lanes = *o.In[0].Lanes - case OneConstImmIn: - tplName = "oneConstImmInKmaskOut" - data.GoType = *o.In[1].Go - data.Const = *o.In[0].Const - data.ElemBits = *o.In[1].ElemBits - data.Lanes = *o.In[1].Lanes - case OneKmaskConstImmIn: - tplName = "oneKmaskConstImmInKmaskOut" - data.GoType = *o.In[1].Go - data.Const = *o.In[0].Const - data.ElemBits = *o.In[1].ElemBits - data.Lanes = *o.In[1].Lanes + tplName = "maskInMaskOut" + data.GoType = *gOp.In[0].Go + rearIdx := len(gOp.In) - 1 + data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes) case PureKmaskIn: return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations") } diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index 1f61d071..b7d94251 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -7,9 +7,12 @@ package main import ( "fmt" "strings" + "text/template" ) -const simdssaTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +var ( + ssaTemplates = template.Must(template.New("simdSSA").Parse(` +{{define "header"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package amd64 @@ -21,93 +24,67 @@ import ( ) func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { - p := s.Prog(v.Op.Asm()) - // First arg - switch v.Op {{"{"}}{{if gt (len .Imms) 0}} - // Immediates - case {{.Imms}}: - imm := v.AuxInt - if imm < 0 || imm > 255 { - v.Fatalf("Invalid source selection immediate") - } - p.From.Offset = imm - p.From.Type = obj.TYPE_CONST -{{end}}{{if gt (len .Reg0) 0}} - // Registers - case {{.Reg0}}: - p.From.Type = obj.TYPE_REG - p.From.Reg = simdReg(v.Args[0]) + var p *obj.Prog + switch v.Op {{"{"}}{{end}} +{{define "case"}} + case {{.Cases}}: + p = {{.Helper}}(s, v) {{end}} +{{define "footer"}} default: - // At least one arg is required. + // Unknown reg shape return false } - - // Second arg - switch v.Op {{"{"}}{{if gt (len .Reg1) 0}} - // Registers - case {{.Reg1}}: - if p.From.Type == obj.TYPE_CONST { - p.AddRestSourceReg(simdReg(v.Args[0])) - } else { - p.AddRestSourceReg(simdReg(v.Args[1])) - }{{end}} - } - - // Third arg - switch v.Op {{"{"}}{{if gt (len .Reg2) 0}} - // Registers - case {{.Reg2}}: - if p.From.Type == obj.TYPE_CONST { - p.AddRestSourceReg(simdReg(v.Args[1])) - } else { - p.AddRestSourceReg(simdReg(v.Args[2])) - }{{end}} - } - - // Fourth arg - switch v.Op {{"{"}}{{if gt (len .Reg3) 0}} - case {{.Reg3}}: - if p.From.Type == obj.TYPE_CONST { - p.AddRestSourceReg(simdReg(v.Args[2])) - } else { - p.AddRestSourceReg(simdReg(v.Args[3])) - }{{end}} - } - - // Output - switch v.Op {{"{"}}{{if gt (len .All) 0}} - case {{.All}}: - p.To.Type = obj.TYPE_REG - p.To.Reg = simdReg(v) {{end}} - default: - // One result is required. - return false - } -{{if gt (len .ZeroingMask) 0}} +{{define "zeroing"}} // Masked operation are always compiled with zeroing. switch v.Op { - case {{.ZeroingMask}}: + case {{.}}: x86.ParseSuffix(p, "Z") } {{end}} +{{define "ending"}} return true } -` +{{end}}`)) +) + +type tplSSAData struct { + Cases string + Helper string +} // writeSIMDSSA generates the ssa to prog lowering codes and writes it to simdssa.go // within the specified directory. func writeSIMDSSA(directory string, ops []Operation) error { - var Imms []string - var All []string var ZeroingMask []string - Regs := map[int][]string{} + regInfoKeys := []string{ + "fp11", + "fp21", + "fp2m1", + "fp2m1fp1", + "fp2m1m1", + "fp1m1fp1", + "fp31", + "fp3m1fp1", + "fp11Imm8", + "fp1m1fp1Imm8", + "fp21Imm8", + "fp2m1Imm8", + "fp2m1m1Imm8", + "fp31ResultInArg0", + "fp3m1fp1ResultInArg0", + } + regInfoSet := map[string][]string{} + for _, key := range regInfoKeys { + regInfoSet[key] = []string{} + } seen := map[string]struct{}{} + allUnseen := map[string]struct{}{} for _, op := range ops { asm := op.Asm - shapeIn, _, maskType, _, _, gOp, err := op.shape() + shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape() if err != nil { return err } @@ -120,52 +97,67 @@ func writeSIMDSSA(directory string, ops []Operation) error { } seen[asm] = struct{}{} caseStr := fmt.Sprintf("ssa.OpAMD64%s", asm) - if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn { + if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn { if gOp.Zeroing == nil { ZeroingMask = append(ZeroingMask, caseStr) } } - immCount := 0 - if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn { - immCount++ - Imms = append(Imms, caseStr) + regShape, err := op.regShape() + if err != nil { + return err + } + if shapeOut == OneVregOutAtIn { + regShape += "ResultInArg0" } - for i := range len(gOp.In) { - if i > 2 { - return fmt.Errorf("simdgen does not recognize more than 3 registers: %s", gOp) - } - Regs[i+immCount] = append(Regs[i+immCount], caseStr) + if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { + regShape += "Imm8" + } + if _, ok := regInfoSet[regShape]; !ok { + allUnseen[regShape] = struct{}{} } - All = append(All, caseStr) + regInfoSet[regShape] = append(regInfoSet[regShape], caseStr) } - - data := struct { - Imms string - Reg0 string - Reg1 string - Reg2 string - Reg3 string - All string - ZeroingMask string - }{ - strings.Join(Imms, ",\n\t\t"), - strings.Join(Regs[0], ",\n\t\t"), - strings.Join(Regs[1], ",\n\t\t"), - strings.Join(Regs[2], ",\n\t\t"), - strings.Join(Regs[3], ",\n\t\t"), - strings.Join(All, ",\n\t\t"), - strings.Join(ZeroingMask, ",\n\t\t"), + if len(allUnseen) != 0 { + return fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen) } - file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/amd64/simdssa.go", simdssaTmpl) + file, _, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/amd64/simdssa.go", "") if err != nil { return err } defer file.Close() - err = t.Execute(file, data) - if err != nil { - return fmt.Errorf("failed to execute template: %w", err) + if err := ssaTemplates.ExecuteTemplate(file, "header", nil); err != nil { + return fmt.Errorf("failed to execute header template: %w", err) + } + + for _, regShape := range regInfoKeys { + // Stable traversal of regInfoSet + cases := regInfoSet[regShape] + if len(cases) == 0 { + continue + } + data := tplSSAData{ + Cases: strings.Join(cases, ",\n\t\t"), + Helper: "simdGen" + capitalizeFirst(regShape), + } + if err := ssaTemplates.ExecuteTemplate(file, "case", data); err != nil { + return fmt.Errorf("failed to execute case template for %s: %w", regShape, err) + } + } + + if err := ssaTemplates.ExecuteTemplate(file, "footer", nil); err != nil { + return fmt.Errorf("failed to execute footer template: %w", err) + } + + if len(ZeroingMask) != 0 { + if err := ssaTemplates.ExecuteTemplate(file, "zeroing", strings.Join(ZeroingMask, ",\n\t\t")); err != nil { + return fmt.Errorf("failed to execute footer template: %w", err) + } + } + + if err := ssaTemplates.ExecuteTemplate(file, "ending", nil); err != nil { + return fmt.Errorf("failed to execute footer template: %w", err) } return nil diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 7fcade60..7f2af75c 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -40,8 +40,8 @@ const ( InvalidIn int = iota PureVregIn OneKmaskIn - OneConstImmIn - OneKmaskConstImmIn + OneImmIn + OneKmaskImmIn PureKmaskIn ) @@ -50,6 +50,7 @@ const ( NoOut OneVregOut OneKmaskOut + OneVregOutAtIn ) const ( @@ -60,33 +61,51 @@ const ( AllMasks ) +const ( + InvalidImm int = iota + NoImm + ConstImm + VarImm + ConstVarImm +) + // opShape returns the an int denoting the shape of the operation: // -// shapeIn: -// InvalidIn: unknown, with err set to the error message -// PureVregIn: pure vreg operation -// OneKmaskIn: operation with one k mask input (TODO: verify if it's always opmask predicate) -// OneConstImmIn: operation with one const imm input -// OneKmaskConstImmIn: operation with one k mask input and one const imm input -// PureKmaskIn: it's a K mask instruction (which can use K0) +// shapeIn: +// InvalidIn: unknown, with err set to the error message +// PureVregIn: pure vreg operation +// OneKmaskIn: operation with one k mask input (TODO: verify if it's always opmask predicate) +// OneImmIn: operation with one imm input +// OneKmaskImmIn: operation with one k mask input and one imm input +// PureKmaskIn: it's a K mask instruction (which can use K0) // -// shapeOut: -// InvalidOut: unknown, with err set to the error message -// NoOut: no outputs, this is invalid now. -// OneVregOut: one vreg output -// OneKmaskOut: one mask output +// shapeOut: +// InvalidOut: unknown, with err set to the error message +// NoOut: no outputs, this is invalid now. +// OneVregOut: one vreg output +// OneKmaskOut: one mask output +// OneVregOutAtIn: one vreg output, it's at the same time the first input // -// maskType: -// InvalidMask: unknown, with err set to the error message -// NoMask: no mask -// OneMask: with mask (K1 to K7) -// OneConstMask: with const mask K0 -// AllMasks: it's a K mask instruction +// maskType: +// InvalidMask: unknown, with err set to the error message +// NoMask: no mask +// OneMask: with mask (K1 to K7) +// OneConstMask: with const mask K0 +// AllMasks: it's a K mask instruction +// +// immType: +// InvalidImm: unrecognize immediate structure +// NoImm: no immediate +// ConstImm: const only immediate +// VarImm: pure imm argument provided by the users +// ConstVarImm: a combination of user arg and const // // opNoImm is op with its inputs excluding the const imm. // opNoConstMask is op with its inputs excluding the const mask. // opNoConstImmMask is op with its inputs excluding the const imm and mask. -func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Operation, opNoConstMask Operation, opNoConstImmMask Operation, err error) { +// +// This function does not modify op. +func (op *Operation) shape() (shapeIn, shapeOut, maskType, immTyppe int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation, err error) { if len(op.Out) > 1 { err = fmt.Errorf("simdgen only supports 1 output: %s", op) return @@ -115,16 +134,16 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper hasVreg := false for i, in := range op.In { if in.AsmPos == outputReg { - err = fmt.Errorf("simdgen doesn't support output and input sharing the same position: %s", op) - return + if shapeOut != OneVregOutAtIn && in.AsmPos == 0 && in.Class == "vreg" { + shapeOut = OneVregOutAtIn + } else { + err = fmt.Errorf("simdgen only support output and input sharing the same position case of \"the first input is vreg and the only output\": %s", op) + return + } } if in.Class == "immediate" { // A manual check on XED data found that AMD64 SIMD instructions at most // have 1 immediates. So we don't need to check this here. - if in.Const == nil { - err = fmt.Errorf("simdgen doesn't support non-const immediates: %s", op) - return - } if *in.Bits != 8 { err = fmt.Errorf("simdgen only supports immediates of 8 bits: %s", op) return @@ -151,22 +170,36 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper hasVreg = true } } - opNoConstImm = *op + opNoImm = *op opNoConstMask = *op - opNoConstImmMask = *op + opNoImmConstMask = *op removeConstMask := func(o *Operation) { o.In = append(o.In[:iConstMask], o.In[iConstMask+1:]...) } if iConstMask != -1 { removeConstMask(&opNoConstMask) - removeConstMask(&opNoConstImmMask) + removeConstMask(&opNoImmConstMask) } - removeConstImm := func(o *Operation) { + removeImm := func(o *Operation) { o.In = o.In[1:] } if hasImm { - removeConstImm(&opNoConstImm) - removeConstImm(&opNoConstImmMask) + removeImm(&opNoImm) + removeImm(&opNoImmConstMask) + if op.In[0].Const != nil { + if op.In[0].ImmOffset != nil { + immTyppe = ConstVarImm + } else { + immTyppe = ConstImm + } + } else if op.In[0].ImmOffset != nil { + immTyppe = VarImm + } else { + err = fmt.Errorf("simdgen requires imm to have at least one of ImmOffset or Const set: %s", op) + return + } + } else { + immTyppe = NoImm } if maskCount == 0 { if iConstMask == -1 { @@ -205,10 +238,10 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper maskType = AllMasks } } else if hasImm && maskCount == 0 { - shapeIn = OneConstImmIn + shapeIn = OneImmIn } else { if maskCount == 1 { - shapeIn = OneKmaskConstImmIn + shapeIn = OneKmaskImmIn } else { checkPureMask() return @@ -217,6 +250,48 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper return } +// regShape returns a string representation of the register shape. +func (op *Operation) regShape() (string, error) { + _, _, _, _, _, _, gOp, _ := op.shape() + var regInfo string + var vRegInCnt, kMaskInCnt, vRegOutCnt, kMaskOutCnt int + for _, in := range gOp.In { + if in.Class == "vreg" { + vRegInCnt++ + } else if in.Class == "mask" { + kMaskInCnt++ + } + } + for _, out := range gOp.Out { + // If class overwrite is happening, that's not really a mask but a vreg. + if out.Class == "vreg" || out.OverwriteClass != nil { + vRegOutCnt++ + } else if out.Class == "mask" { + kMaskOutCnt++ + } + } + var vRegInS, kMaskInS, vRegOutS, kMaskOutS string + if vRegInCnt > 0 { + vRegInS = fmt.Sprintf("fp%d", vRegInCnt) + } + if kMaskInCnt > 0 { + kMaskInS = fmt.Sprintf("m%d", kMaskInCnt) + } + if vRegOutCnt > 0 { + vRegOutS = fmt.Sprintf("fp%d", vRegOutCnt) + } + if kMaskOutCnt > 0 { + kMaskOutS = fmt.Sprintf("m%d", kMaskOutCnt) + } + if kMaskInCnt == 0 && kMaskOutCnt == 0 { + // For pure fp we can abbreviate it as fp%d%d. + regInfo = fmt.Sprintf("fp%d%d", vRegInCnt, vRegOutCnt) + } else { + regInfo = fmt.Sprintf("%s%s%s%s", vRegInS, kMaskInS, vRegOutS, kMaskOutS) + } + return regInfo, nil +} + // sortOperand sorts op.In by putting immediates first, then vreg, and mask the last. // TODO: verify that this is a safe assumption of the prog strcture. // from my observation looks like in asm, imms are always the first, masks are always the last, with @@ -224,31 +299,66 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper func (op *Operation) sortOperand() { priority := map[string]int{"immediate": 2, "vreg": 1, "mask": 0} sort.SliceStable(op.In, func(i, j int) bool { - return priority[op.In[i].Class]-priority[op.In[j].Class] > 0 + pi := priority[op.In[i].Class] + pj := priority[op.In[j].Class] + if pi != pj { + return pi > pj + } + return op.In[i].AsmPos < op.In[j].AsmPos }) } -// genericOpsByLen returns the lists of generic ops aggregated by input length. -func genericOpsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3 []Operation, e error) { +// opsByLen returns the lists of ops stripping the const masks away, aggregated by input length. +// Ops with only const imms also has their immediates removed. +func opsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8 []Operation, e error) { opsLen1 = make([]Operation, 0) opsLen2 = make([]Operation, 0) opsLen3 = make([]Operation, 0) + opsLen4 = make([]Operation, 0) + opsLen1Imm8 = make([]Operation, 0) + opsLen2Imm8 = make([]Operation, 0) + opsLen3Imm8 = make([]Operation, 0) + opsLen4Imm8 = make([]Operation, 0) for _, op := range ops { - _, shapeOut, _, _, _, gOp, err := op.shape() + _, shapeOut, _, immType, _, opNoConstMask, gOp, err := op.shape() if err != nil { e = err return } - // Put the go ssa type in Class field, simd intrinsics need it. - if shapeOut == OneVregOut || shapeOut == OneKmaskOut { + // Put the go ssa type in GoArch field, simd intrinsics need it. + if shapeOut == OneVregOut || shapeOut == OneKmaskOut || shapeOut == OneVregOutAtIn { + opNoConstMask.GoArch = fmt.Sprintf("types.TypeVec%d", *opNoConstMask.Out[0].Bits) gOp.GoArch = fmt.Sprintf("types.TypeVec%d", *gOp.Out[0].Bits) } - if len(gOp.In) == 1 { - opsLen1 = append(opsLen1, gOp) - } else if len(gOp.In) == 2 { - opsLen2 = append(opsLen2, gOp) - } else if len(gOp.In) == 3 { - opsLen3 = append(opsLen3, gOp) + if immType == VarImm || immType == ConstVarImm { + switch len(opNoConstMask.In) { + case 1: + e = fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op) + return + case 2: + opsLen1Imm8 = append(opsLen1Imm8, opNoConstMask) + case 3: + opsLen2Imm8 = append(opsLen2Imm8, opNoConstMask) + case 4: + opsLen3Imm8 = append(opsLen3Imm8, opNoConstMask) + case 5: + opsLen4Imm8 = append(opsLen4Imm8, opNoConstMask) + default: + e = fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op) + } + } else { + switch len(gOp.In) { + case 1: + opsLen1 = append(opsLen1, gOp) + case 2: + opsLen2 = append(opsLen2, gOp) + case 3: + opsLen3 = append(opsLen3, gOp) + case 4: + opsLen4 = append(opsLen4, gOp) + default: + e = fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op) + } } } sortKey := func(op *Operation) string { @@ -262,6 +372,11 @@ func genericOpsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3 []Operation, e sortBySortKey(opsLen1) sortBySortKey(opsLen2) sortBySortKey(opsLen3) + sortBySortKey(opsLen4) + sortBySortKey(opsLen1Imm8) + sortBySortKey(opsLen2Imm8) + sortBySortKey(opsLen3Imm8) + sortBySortKey(opsLen4Imm8) return } @@ -291,11 +406,11 @@ func splitMask(ops []Operation) ([]Operation, error) { if op.Masked == nil || *op.Masked != "true" { continue } - shapeIn, _, _, _, _, _, err := op.shape() + shapeIn, _, _, _, _, _, _, err := op.shape() if err != nil { return nil, err } - if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn { + if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn { op2 := op op2.In = slices.Clone(op.In) constMask := "K0" @@ -305,6 +420,9 @@ func splitMask(ops []Operation) ([]Operation, error) { return nil, fmt.Errorf("simdgen only recognizes masked operations with name starting with 'Masked': %s", op) } op2.Go = strings.ReplaceAll(op2.Go, "Masked", "") + if op2.Documentation != nil { + *op2.Documentation = strings.ReplaceAll(*op2.Documentation, "Masked", "") + } splited = append(splited, op2) } else { return nil, fmt.Errorf("simdgen only recognizes masked operations with exactly one mask input: %s", op) @@ -320,7 +438,7 @@ func splitMask(ops []Operation) ([]Operation, error) { func dedupGodef(ops []Operation) ([]Operation, error) { seen := map[string][]Operation{} for _, op := range ops { - _, _, _, _, _, gOp, err := op.shape() + _, _, _, _, _, _, gOp, err := op.shape() if err != nil { return nil, err } @@ -366,11 +484,11 @@ func copyConstImm(ops []Operation) error { if op.ConstImm == nil { continue } - shapeIn, _, _, _, _, _, err := op.shape() + _, _, _, immType, _, _, _, err := op.shape() if err != nil { return err } - if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn { + if immType == ConstImm || immType == ConstVarImm { op.In[0].Const = op.ConstImm } // Otherwise, just not port it - e.g. {VPCMP[BWDQ] imm=0} and {VPCMPEQ[BWDQ]} are @@ -379,6 +497,16 @@ func copyConstImm(ops []Operation) error { return nil } +func capitalizeFirst(s string) string { + if s == "" { + return "" + } + // Convert the string to a slice of runes to handle multi-byte characters correctly. + r := []rune(s) + r[0] = unicode.ToUpper(r[0]) + return string(r) +} + // overwrite corrects some errors due to: // - The XED data is wrong // - Go's SIMD API requirement, for example AVX2 compares should also produce masks. @@ -386,15 +514,6 @@ func copyConstImm(ops []Operation) error { // These constraints are also explointed in [writeSIMDRules], [writeSIMDMachineOps] // and [writeSIMDSSA], please be careful when updating these constraints. func overwrite(ops []Operation) error { - capitalizeFirst := func(s string) string { - if s == "" { - return "" - } - // Convert the string to a slice of runes to handle multi-byte characters correctly. - r := []rune(s) - r[0] = unicode.ToUpper(r[0]) - return string(r) - } hasClassOverwrite := false overwrite := func(op []Operand, idx int) error { if op[idx].OverwriteClass != nil { @@ -421,6 +540,10 @@ func overwrite(ops []Operation) error { *op[idx].Go = strings.ReplaceAll(*op[idx].Go, capitalizeFirst(*op[idx].Base), capitalizeFirst(oBase)) *op[idx].Base = oBase } + if op[idx].OverwriteElementBits != nil { + *op[idx].ElemBits = *op[idx].OverwriteElementBits + *op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Bits / *op[idx].ElemBits) + } return nil } for i := range ops { diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 9309e0ce..b6d872be 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -83,14 +83,23 @@ type Operand struct { ElemBits *int // Element bit width Bits *int // Total vector bit width - Const *string // Optional constant value - Lanes *int // Lanes should equal Bits/ElemBits + Const *string // Optional constant value for immediates. + // Optional immediate arg offsets. If this field is non-nil, + // This operand will be an immediate operand: + // The compiler will right-shift the user-passed value by ImmOffset and set it as the AuxInt + // field of the operation. + ImmOffset *string + Lanes *int // Lanes should equal Bits/ElemBits // If non-nil, it means the [Class] field is overwritten here, right now this is used to // overwrite the results of AVX2 compares to masks. OverwriteClass *string // If non-nil, it means the [Base] field is overwritten here. This field exist solely // because Intel's XED data is inconsistent. e.g. VANDNP[SD] marks its operand int. OverwriteBase *string + // If non-nil, it means the [ElementBits] field is overwritten. This field exist solely + // because Intel's XED data is inconsistent. e.g. AVX512 VPMADDUBSW marks its operand + // elemBits 16, which should be 8. + OverwriteElementBits *int } func writeGoDefs(path string, cl unify.Closure) error { diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml index 1d08a94b..592790ca 100644 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -30,16 +30,16 @@ - go: PairwiseAdd commutative: "false" extension: "AVX.*" - documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target" + documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target" - go: PairwiseSub commutative: "false" extension: "AVX.*" - documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target" + documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target" - go: SaturatedPairwiseAdd commutative: "false" extension: "AVX.*" - documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation" + documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation" - go: SaturatedPairwiseSub commutative: "false" extension: "AVX.*" - documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation" \ No newline at end of file + documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation" \ No newline at end of file diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index 027c8e8d..06a1caa2 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -10,77 +10,77 @@ constImm: 0 commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 0 if it has;" + documentation: "// Predicate immediate is 0 if it has;" - go: Less constImm: 1 commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 1 if it has;" + documentation: "// Predicate immediate is 1 if it has;" - go: LessEqual constImm: 2 commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 2 if it has;" + documentation: "// Predicate immediate is 2 if it has;" - go: IsNan # For float only. constImm: 3 commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" + documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" - go: NotEqual constImm: 4 commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 4 if it has;" + documentation: "// Predicate immediate is 4 if it has;" - go: GreaterEqual constImm: 5 commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 5 if it has;" + documentation: "// Predicate immediate is 5 if it has;" - go: Greater constImm: 6 commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 6 if it has;" + documentation: "// Predicate immediate is 6 if it has;" - go: MaskedEqual constImm: 0 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 0 if it has;" + documentation: "// Predicate immediate is 0 if it has;" - go: MaskedLess constImm: 1 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 1 if it has;" + documentation: "// Predicate immediate is 1 if it has;" - go: MaskedLessEqual constImm: 2 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 2 if it has;" + documentation: "// Predicate immediate is 2 if it has;" - go: MaskedIsNan # For float only. constImm: 3 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" + documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" - go: MaskedNotEqual constImm: 4 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Predicate immediate is 4 if it has;" + documentation: "// Predicate immediate is 4 if it has;" - go: MaskedGreaterEqual constImm: 5 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 5 if it has;" + documentation: "// Predicate immediate is 5 if it has;" - go: MaskedGreater constImm: 6 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "Predicate immediate is 6 if it has;" \ No newline at end of file + documentation: "// Predicate immediate is 6 if it has;" \ No newline at end of file diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml index 0ef6cf57..42275e24 100644 --- a/internal/simdgen/ops/Mul/categories.yaml +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -5,15 +5,15 @@ - go: MulEvenWiden commutative: "true" extension: "AVX.*" - documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" + documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" - go: MulHigh commutative: "true" extension: "AVX.*" - documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" + documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" - go: MulLow commutative: "true" extension: "AVX.*" - documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" + documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" - go: MaskedMul masked: "true" commutative: "true" @@ -22,14 +22,14 @@ masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" + documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" - go: MaskedMulHigh masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" + documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" - go: MaskedMulLow masked: "true" commutative: "true" extension: "AVX.*" - documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" \ No newline at end of file + documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" \ No newline at end of file From 36000cf2bf76dea89e3d78bce3d5a6874622ce22 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 13 Jun 2025 03:12:28 +0000 Subject: [PATCH 076/200] internal/simdgen: fix register and prog func names This CL generates CL 681215. Change-Id: I97032505ee3221340df146686dea87b7320edf45 Reviewed-on: https://go-review.googlesource.com/c/arch/+/681395 Auto-Submit: Junyang Shao Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdMachineOps.go | 4 ++-- internal/simdgen/gen_simdssa.go | 20 ++++++++++---------- internal/simdgen/gen_utility.go | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 3cacc990..f09b5568 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -12,7 +12,7 @@ import ( const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main -func simdAMD64Ops(fp11, fp21, fp2m1, fp1m1fp1, fp2m1fp1, fp2m1m1, fp31, fp3m1fp1 regInfo) []opData { +func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 regInfo) []opData { return []opData{ {{- range .OpsData }} {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, @@ -47,7 +47,7 @@ func writeSIMDMachineOps(directory string, ops []Operation) error { OpsDataImm []opData } seen := map[string]struct{}{} - regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2m1": true, "fp2m1fp1": true, "fp2m1m1": true, "fp1m1fp1": true, "fp31": true, "fp3m1fp1": true} + regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k1": true, "fp2k1fp1": true, "fp2k1k1": true, "fp1k1fp1": true, "fp31": true, "fp3k1fp1": true} opsData := make([]opData, 0) opsDataImm := make([]opData, 0) for _, op := range ops { diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index b7d94251..14f97e60 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -61,19 +61,19 @@ func writeSIMDSSA(directory string, ops []Operation) error { regInfoKeys := []string{ "fp11", "fp21", - "fp2m1", - "fp2m1fp1", - "fp2m1m1", - "fp1m1fp1", + "fp2k1", + "fp2k1fp1", + "fp2k1k1", + "fp1k1fp1", "fp31", - "fp3m1fp1", + "fp3k1fp1", "fp11Imm8", - "fp1m1fp1Imm8", + "fp1k1fp1Imm8", "fp21Imm8", - "fp2m1Imm8", - "fp2m1m1Imm8", + "fp2k1Imm8", + "fp2k1k1Imm8", "fp31ResultInArg0", - "fp3m1fp1ResultInArg0", + "fp3k1fp1ResultInArg0", } regInfoSet := map[string][]string{} for _, key := range regInfoKeys { @@ -139,7 +139,7 @@ func writeSIMDSSA(directory string, ops []Operation) error { } data := tplSSAData{ Cases: strings.Join(cases, ",\n\t\t"), - Helper: "simdGen" + capitalizeFirst(regShape), + Helper: "simd" + capitalizeFirst(regShape), } if err := ssaTemplates.ExecuteTemplate(file, "case", data); err != nil { return fmt.Errorf("failed to execute case template for %s: %w", regShape, err) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 7f2af75c..074be682 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -275,13 +275,13 @@ func (op *Operation) regShape() (string, error) { vRegInS = fmt.Sprintf("fp%d", vRegInCnt) } if kMaskInCnt > 0 { - kMaskInS = fmt.Sprintf("m%d", kMaskInCnt) + kMaskInS = fmt.Sprintf("k%d", kMaskInCnt) } if vRegOutCnt > 0 { vRegOutS = fmt.Sprintf("fp%d", vRegOutCnt) } if kMaskOutCnt > 0 { - kMaskOutS = fmt.Sprintf("m%d", kMaskOutCnt) + kMaskOutS = fmt.Sprintf("k%d", kMaskOutCnt) } if kMaskInCnt == 0 && kMaskOutCnt == 0 { // For pure fp we can abbreviate it as fp%d%d. From ea2d5edcba09cb9673cb2b86943400c4be784fec Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 13 Jun 2025 14:21:18 -0400 Subject: [PATCH 077/200] internal/simdgen: fix commutativity of AndNot ops Change-Id: Ie313f7aa8227eaff9d944da57a366841fa48c1d6 Reviewed-on: https://go-review.googlesource.com/c/arch/+/681477 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/categories.yaml | 4 ++-- internal/simdgen/ops/BitwiseLogic/categories.yaml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 4a4affbc..3492ffbb 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -58,11 +58,11 @@ commutative: "true" extension: "AVX.*" - go: AndNot - commutative: "true" + commutative: "false" extension: "AVX.*" - go: MaskedAndNot masked: "true" - commutative: "true" + commutative: "false" extension: "AVX.*" - go: Xor commutative: "true" diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml index bc4eda74..064f42b0 100644 --- a/internal/simdgen/ops/BitwiseLogic/categories.yaml +++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml @@ -14,11 +14,11 @@ commutative: "true" extension: "AVX.*" - go: AndNot - commutative: "true" + commutative: "false" extension: "AVX.*" - go: MaskedAndNot masked: "true" - commutative: "true" + commutative: "false" extension: "AVX.*" - go: Xor commutative: "true" @@ -28,4 +28,4 @@ commutative: "true" extension: "AVX.*" # We also have PTEST and VPTERNLOG, those should be hidden from the users -# and only appear in rewrite rules. \ No newline at end of file +# and only appear in rewrite rules. From bdb36ed7fe3d64f233fea98b785d9fe0e87d7ec1 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 2 Jun 2025 21:37:17 +0000 Subject: [PATCH 078/200] internal/simdgen: add round operations This CL also simplifies some simdgen logics. This CL generates CL 681295. Change-Id: Ibceb7d514353cf7b479913c905b21469c1b80df3 Reviewed-on: https://go-review.googlesource.com/c/arch/+/678195 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/.gitignore | 2 + internal/simdgen/categories.yaml | 104 ++++++++++++++++++ internal/simdgen/go.yaml | 35 ++++++ .../simdgen/ops/FPonlyArith/categories.yaml | 104 ++++++++++++++++++ internal/simdgen/ops/FPonlyArith/go.yaml | 37 ++++++- 5 files changed, 281 insertions(+), 1 deletion(-) diff --git a/internal/simdgen/.gitignore b/internal/simdgen/.gitignore index 1cc9ae43..de579f6b 100644 --- a/internal/simdgen/.gitignore +++ b/internal/simdgen/.gitignore @@ -1 +1,3 @@ testdata/* +.gemini/* +.gemini* diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 3492ffbb..7a55f58e 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -190,6 +190,110 @@ commutative: "false" masked: "true" extension: "AVX.*" + +- go: Round + commutative: "false" + extension: "AVX.*" + constImm: 0 +- go: MaskedRoundWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 0 + masked: "true" +- go: MaskedRoundSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 8 + masked: "true" +- go: MaskedDiffWithRoundWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 0 + masked: "true" +- go: MaskedDiffWithRoundSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 8 + masked: "true" + +- go: Floor + commutative: "false" + extension: "AVX.*" + constImm: 1 +- go: MaskedFloorWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 1 + masked: "true" +- go: MaskedFloorSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 9 + masked: "true" +- go: MaskedDiffWithFloorWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 1 + masked: "true" +- go: MaskedDiffWithFloorSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 9 + masked: "true" + +- go: Ceil + commutative: "false" + extension: "AVX.*" + constImm: 2 +- go: MaskedCeilWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 2 + masked: "true" +- go: MaskedCeilSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 10 + masked: "true" +- go: MaskedDiffWithCeilWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 2 + masked: "true" +- go: MaskedDiffWithCeilSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 10 + masked: "true" + +- go: Trunc + commutative: "false" + extension: "AVX.*" + constImm: 3 +- go: MaskedTruncWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 3 + masked: "true" +- go: MaskedTruncSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 11 + masked: "true" +- go: MaskedDiffWithTruncWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 3 + masked: "true" +- go: MaskedDiffWithTruncSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 11 + masked: "true" + +- go: AddSub + commutative: "false" + extension: "AVX.*" - go: Average commutative: "true" extension: "AVX.*" # VPAVGB/W are available across various AVX versions diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 35f0bf75..997f3992 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -395,6 +395,41 @@ asm: "VSCALEFP[SD]" in: *1mask2fp out: *1fp + +- go: "Round|Ceil|Floor|Trunc" + asm: "VROUNDP[SD]" + in: + - *fp + - class: immediate + const: 0 # place holder + out: *1fp + +- go: "Masked(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision" + asm: "VRNDSCALEP[SD]" + in: + - class: mask + - *fp + - class: immediate + const: 0 # place holder + immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). + out: *1fp +- go: "MaskedDiffWith(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision" + asm: "VREDUCEP[SD]" + in: + - class: mask + - *fp + - class: immediate + const: 0 # place holder + immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). + out: *1fp + +- go: "AddSub" + asm: "VADDSUBP[SD]" + in: + - *fp + - *fp + out: + - *fp # Average (unsigned byte, unsigned word) # Instructions: VPAVGB, VPAVGW - go: Average diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index 3c46f1f4..e486225e 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -31,3 +31,107 @@ commutative: "false" masked: "true" extension: "AVX.*" + +- go: Round + commutative: "false" + extension: "AVX.*" + constImm: 0 +- go: MaskedRoundWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 0 + masked: "true" +- go: MaskedRoundSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 8 + masked: "true" +- go: MaskedDiffWithRoundWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 0 + masked: "true" +- go: MaskedDiffWithRoundSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 8 + masked: "true" + +- go: Floor + commutative: "false" + extension: "AVX.*" + constImm: 1 +- go: MaskedFloorWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 1 + masked: "true" +- go: MaskedFloorSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 9 + masked: "true" +- go: MaskedDiffWithFloorWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 1 + masked: "true" +- go: MaskedDiffWithFloorSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 9 + masked: "true" + +- go: Ceil + commutative: "false" + extension: "AVX.*" + constImm: 2 +- go: MaskedCeilWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 2 + masked: "true" +- go: MaskedCeilSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 10 + masked: "true" +- go: MaskedDiffWithCeilWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 2 + masked: "true" +- go: MaskedDiffWithCeilSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 10 + masked: "true" + +- go: Trunc + commutative: "false" + extension: "AVX.*" + constImm: 3 +- go: MaskedTruncWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 3 + masked: "true" +- go: MaskedTruncSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 11 + masked: "true" +- go: MaskedDiffWithTruncWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 3 + masked: "true" +- go: MaskedDiffWithTruncSuppressExceptionWithPrecision + commutative: "false" + extension: "AVX.*" + constImm: 11 + masked: "true" + +- go: AddSub + commutative: "false" + extension: "AVX.*" \ No newline at end of file diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml index bd774e1d..48e071ec 100644 --- a/internal/simdgen/ops/FPonlyArith/go.yaml +++ b/internal/simdgen/ops/FPonlyArith/go.yaml @@ -40,4 +40,39 @@ - go: MaskedMulByPowOf2 asm: "VSCALEFP[SD]" in: *1mask2fp - out: *1fp \ No newline at end of file + out: *1fp + +- go: "Round|Ceil|Floor|Trunc" + asm: "VROUNDP[SD]" + in: + - *fp + - class: immediate + const: 0 # place holder + out: *1fp + +- go: "Masked(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision" + asm: "VRNDSCALEP[SD]" + in: + - class: mask + - *fp + - class: immediate + const: 0 # place holder + immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). + out: *1fp +- go: "MaskedDiffWith(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision" + asm: "VREDUCEP[SD]" + in: + - class: mask + - *fp + - class: immediate + const: 0 # place holder + immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). + out: *1fp + +- go: "AddSub" + asm: "VADDSUBP[SD]" + in: + - *fp + - *fp + out: + - *fp \ No newline at end of file From c2a8f7037160725497d502d69a9f1aa5fa83ac6d Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 3 Jun 2025 17:53:42 +0000 Subject: [PATCH 079/200] internal/simdgen: add dot products This CL also updates the sortOperand method; before dot products, all vreg inputs has the same type, so their order doesn't really matter; Now with dot products we have to make sure their order is correct after sort. This CL generates CL 681296. Change-Id: I20506eb889979ea5f390b36615f4cf934fc418c9 Reviewed-on: https://go-review.googlesource.com/c/arch/+/678515 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 24 +++++++++++ internal/simdgen/go.yaml | 49 +++++++++++++++++++++ internal/simdgen/ops/MLOps/categories.yaml | 25 +++++++++++ internal/simdgen/ops/MLOps/go.yaml | 50 ++++++++++++++++++++++ 4 files changed, 148 insertions(+) create mode 100644 internal/simdgen/ops/MLOps/categories.yaml create mode 100644 internal/simdgen/ops/MLOps/go.yaml diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 7a55f58e..00672a14 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -321,6 +321,30 @@ commutative: "false" masked: "true" extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) +- go: PairDotProd + commutative: "false" + extension: "AVX.*" + documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size" +- go: MaskedPairDotProd + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size" +- go: SaturatedUnsignedSignedPairDotProd + commutative: "false" + extension: "AVX.*" + documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size" +- go: MaskedSaturatedUnsignedSignedPairDotProd + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size" + +# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. +- go: DotProdBroadcast + commutative: "true" + extension: "AVX.*" + documentation: "// Multiply all the elements and add them together; the result is a broadcast of the dot product" - go: Max commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 997f3992..765eea0b 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -490,6 +490,55 @@ go: $t out: - *any +- go: PairDotProd + asm: VPMADDWD + in: + - &int + go: $t + base: int + - *int + out: + - &int2 # The elemBits are different + go: $t2 + base: int +- go: MaskedPairDotProd + asm: VPMADDWD + in: + - class: mask + - *int + - *int + out: + - *int2 +- go: SaturatedUnsignedSignedPairDotProd + asm: VPMADDUBSW + in: + - &uint + go: $t + base: uint + - &int3 + go: $t3 + base: int + out: + - *int2 +- go: MaskedSaturatedUnsignedSignedPairDotProd + asm: VPMADDUBSW + in: + - class: mask + - *uint + - *int3 + out: + - *int2 +- go: DotProdBroadcast + asm: VDPPD + in: + - &float + go: $t + base: float + - *float + - class: immediate + const: 127 # make sure the control bits [4:5] are all 1 + out: + - *float - go: Max asm: "V?PMAXS[BWDQ]" in: &2int diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml new file mode 100644 index 00000000..6ebb12a0 --- /dev/null +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -0,0 +1,25 @@ +!sum +- go: PairDotProd + commutative: "false" + extension: "AVX.*" + documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size" +- go: MaskedPairDotProd + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size" +- go: SaturatedUnsignedSignedPairDotProd + commutative: "false" + extension: "AVX.*" + documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size" +- go: MaskedSaturatedUnsignedSignedPairDotProd + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size" + +# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. +- go: DotProdBroadcast + commutative: "true" + extension: "AVX.*" + documentation: "// Multiply all the elements and add them together; the result is a broadcast of the dot product" diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml new file mode 100644 index 00000000..9e06d3c9 --- /dev/null +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -0,0 +1,50 @@ +!sum +- go: PairDotProd + asm: VPMADDWD + in: + - &int + go: $t + base: int + - *int + out: + - &int2 # The elemBits are different + go: $t2 + base: int +- go: MaskedPairDotProd + asm: VPMADDWD + in: + - class: mask + - *int + - *int + out: + - *int2 +- go: SaturatedUnsignedSignedPairDotProd + asm: VPMADDUBSW + in: + - &uint + go: $t + base: uint + - &int3 + go: $t3 + base: int + out: + - *int2 +- go: MaskedSaturatedUnsignedSignedPairDotProd + asm: VPMADDUBSW + in: + - class: mask + - *uint + - *int3 + out: + - *int2 +- go: DotProdBroadcast + asm: VDPPD + in: + - &float + go: $t + base: float + - *float + - class: immediate + const: 127 # make sure the control bits [4:5] are all 1 + out: + - *float \ No newline at end of file From 606013036de41e033aebb39e0507d3fe3631213d Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Sat, 7 Jun 2025 18:54:20 +0000 Subject: [PATCH 080/200] internal/simdgen: update documentations This CL is generated by Gemini This CL generates CL 681297. Change-Id: If3323c3a23b0d2197390d1a239bdcbedd60615d2 Reviewed-on: https://go-review.googlesource.com/c/arch/+/679955 Reviewed-by: David Chase Auto-Submit: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 270 ++++++++++++++++-- internal/simdgen/go.yaml | 1 + internal/simdgen/ops/AddSub/categories.yaml | 32 ++- .../simdgen/ops/BitwiseLogic/categories.yaml | 16 ++ internal/simdgen/ops/Compares/categories.yaml | 59 +++- .../simdgen/ops/FPonlyArith/categories.yaml | 91 +++++- .../simdgen/ops/IntOnlyArith/categories.yaml | 15 +- internal/simdgen/ops/MLOps/categories.yaml | 19 +- internal/simdgen/ops/MinMax/categories.yaml | 8 + internal/simdgen/ops/Mul/categories.yaml | 33 ++- internal/simdgen/ops/main.go | 4 + 11 files changed, 488 insertions(+), 60 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 00672a14..f4194101 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -1,76 +1,117 @@ !sum +# TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes. - go: Add commutative: "true" extension: "AVX.*" + documentation: !string |- + // Add adds corresponding elements of two vectors. - go: SaturatedAdd commutative: "true" extension: "AVX.*" + documentation: !string |- + // SaturatedAdd adds corresponding elements of two vectors with saturation. - go: MaskedAdd masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedAdd adds corresponding elements of two vectors. - go: MaskedSaturatedAdd masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedSaturatedAdd adds corresponding elements of two vectors with saturation. - go: Sub commutative: "false" extension: "AVX.*" + documentation: !string |- + // Sub subtracts corresponding elements of two vectors. - go: SaturatedSub commutative: "false" extension: "AVX.*" + documentation: !string |- + // SaturatedSub subtracts corresponding elements of two vectors with saturation. - go: MaskedSub masked: "true" commutative: "false" extension: "AVX.*" + documentation: !string |- + // MaskedSub subtracts corresponding elements of two vectors. - go: MaskedSaturatedSub masked: "true" commutative: "false" extension: "AVX.*" + documentation: !string |- + // MaskedSaturatedSub subtracts corresponding elements of two vectors with saturation. - go: PairwiseAdd commutative: "false" extension: "AVX.*" - documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target" + documentation: !string |- + // PairwiseAdd horizontally adds adjacent pairs of elements. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: PairwiseSub commutative: "false" extension: "AVX.*" - documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target" + documentation: !string |- + // PairwiseSub horizontally subtracts adjacent pairs of elements. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. - go: SaturatedPairwiseAdd commutative: "false" extension: "AVX.*" - documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation" + documentation: !string |- + // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: SaturatedPairwiseSub commutative: "false" extension: "AVX.*" - documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation" + documentation: !string |- + // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. - go: And commutative: "true" extension: "AVX.*" + documentation: !string |- + // And performs a bitwise AND operation between two vectors. - go: MaskedAnd masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedAnd performs a masked bitwise AND operation between two vectors. - go: Or commutative: "true" extension: "AVX.*" + documentation: !string |- + // Or performs a bitwise OR operation between two vectors. - go: MaskedOr masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedOr performs a masked bitwise OR operation between two vectors. - go: AndNot commutative: "false" extension: "AVX.*" + documentation: !string |- + // AndNot performs a bitwise AND NOT operation between two vectors. - go: MaskedAndNot masked: "true" commutative: "false" extension: "AVX.*" + documentation: !string |- + // MaskedAndNot performs a masked bitwise AND NOT operation between two vectors. - go: Xor commutative: "true" extension: "AVX.*" + documentation: !string |- + // Xor performs a bitwise XOR operation between two vectors. - go: MaskedXor masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedXor performs a masked bitwise XOR operation between two vectors. # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. # const imm predicate(holds for both float and int|uint): @@ -84,312 +125,483 @@ constImm: 0 commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 0 if it has;" + documentation: !string |- + // Equal compares for equality. + // Const Immediate = 0. - go: Less constImm: 1 commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 1 if it has;" + documentation: !string |- + // Less compares for less than. + // Const Immediate = 1. - go: LessEqual constImm: 2 commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 2 if it has;" + documentation: !string |- + // LessEqual compares for less than or equal. + // Const Immediate = 2. - go: IsNan # For float only. constImm: 3 commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" + documentation: !string |- + // IsNan checks if elements are NaN. Use as x.IsNan(x). + // Const Immediate = 3. - go: NotEqual constImm: 4 commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 4 if it has;" + documentation: !string |- + // NotEqual compares for inequality. + // Const Immediate = 4. - go: GreaterEqual constImm: 5 commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 5 if it has;" + documentation: !string |- + // GreaterEqual compares for greater than or equal. + // Const Immediate = 5. - go: Greater constImm: 6 commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 6 if it has;" + documentation: !string |- + // Greater compares for greater than. + // Const Immediate = 6. - go: MaskedEqual constImm: 0 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 0 if it has;" + documentation: !string |- + // MaskedEqual compares for equality, masked. + // Const Immediate = 0. + docUnmasked: !string |- + // Equal compares for equality. + // Const Immediate = 0. - go: MaskedLess constImm: 1 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 1 if it has;" + documentation: !string |- + // MaskedLess compares for less than. + // Const Immediate = 1. - go: MaskedLessEqual constImm: 2 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 2 if it has;" + documentation: !string |- + // MaskedLessEqual compares for less than or equal. + // Const Immediate = 2. - go: MaskedIsNan # For float only. constImm: 3 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" + documentation: !string |- + // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x). + // Const Immediate = 3. - go: MaskedNotEqual constImm: 4 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 4 if it has;" + documentation: !string |- + // MaskedNotEqual compares for inequality. + // Const Immediate = 4. - go: MaskedGreaterEqual constImm: 5 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 5 if it has;" + documentation: !string |- + // MaskedGreaterEqual compares for greater than or equal. + // Const Immediate = 5. - go: MaskedGreater constImm: 6 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 6 if it has;" + documentation: !string |- + // MaskedGreater compares for greater than. + // Const Immediate = 6. - go: Div commutative: "false" extension: "AVX.*" + documentation: !string |- + // Div divides elements of two vectors. - go: MaskedDiv commutative: "false" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedDiv divides elements of two vectors. - go: Sqrt commutative: "false" extension: "AVX.*" + documentation: !string |- + // Sqrt computes the square root of each element. - go: MaskedSqrt commutative: "false" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedSqrt computes the square root of each element. - go: ApproximateReciprocal commutative: "false" extension: "AVX.*" + documentation: !string |- + // ApproximateReciprocal computes an approximate reciprocal of each element. - go: MaskedApproximateReciprocal commutative: "false" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedApproximateReciprocal computes an approximate reciprocal of each element. - go: ApproximateReciprocalOfSqrt commutative: "false" extension: "AVX.*" + documentation: !string |- + // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. - go: MaskedApproximateReciprocalOfSqrt commutative: "false" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. - go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated. commutative: "false" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedMulByPowOf2 multiplies elements by a power of 2. - go: Round commutative: "false" extension: "AVX.*" constImm: 0 + documentation: !string |- + // Round rounds elements to the nearest integer. + // Const Immediate = 0. - go: MaskedRoundWithPrecision commutative: "false" extension: "AVX.*" constImm: 0 masked: "true" + documentation: !string |- + // MaskedRoundWithPrecision rounds elements with specified precision. + // Const Immediate = 0. - go: MaskedRoundSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 8 masked: "true" + documentation: !string |- + // MaskedRoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. + // Const Immediate = 8. - go: MaskedDiffWithRoundWithPrecision commutative: "false" extension: "AVX.*" constImm: 0 masked: "true" + documentation: !string |- + // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision. + // Const Immediate = 0. - go: MaskedDiffWithRoundSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 8 masked: "true" + documentation: !string |- + // MaskedDiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. + // Const Immediate = 8. - go: Floor commutative: "false" extension: "AVX.*" constImm: 1 + documentation: !string |- + // Floor rounds elements down to the nearest integer. + // Const Immediate = 1. - go: MaskedFloorWithPrecision commutative: "false" extension: "AVX.*" constImm: 1 masked: "true" + documentation: !string |- + // MaskedFloorWithPrecision rounds elements down with specified precision, masked. + // Const Immediate = 1. + docUnmasked: !string |- + // FloorWithPrecision rounds elements down with specified precision. + // Const Immediate = 1. - go: MaskedFloorSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 9 masked: "true" + documentation: !string |- + // MaskedFloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. + // Const Immediate = 9. + docUnmasked: !string |- + // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions. + // Const Immediate = 9. - go: MaskedDiffWithFloorWithPrecision commutative: "false" extension: "AVX.*" constImm: 1 masked: "true" + documentation: !string |- + // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision. + // Const Immediate = 1. - go: MaskedDiffWithFloorSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 9 masked: "true" + documentation: !string |- + // MaskedDiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. + // Const Immediate = 9. - go: Ceil commutative: "false" extension: "AVX.*" constImm: 2 + documentation: !string |- + // Ceil rounds elements up to the nearest integer. + // Const Immediate = 2. - go: MaskedCeilWithPrecision commutative: "false" extension: "AVX.*" constImm: 2 masked: "true" + documentation: !string |- + // MaskedCeilWithPrecision rounds elements up with specified precision, masked. + // Const Immediate = 2. + docUnmasked: !string |- + // CeilWithPrecision rounds elements up with specified precision. + // Const Immediate = 2. - go: MaskedCeilSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 10 masked: "true" + documentation: !string |- + // MaskedCeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. + // Const Immediate = 10. - go: MaskedDiffWithCeilWithPrecision commutative: "false" extension: "AVX.*" constImm: 2 masked: "true" + documentation: !string |- + // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision. + // Const Immediate = 2. - go: MaskedDiffWithCeilSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 10 masked: "true" + documentation: !string |- + // MaskedDiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. + // Const Immediate = 10. - go: Trunc commutative: "false" extension: "AVX.*" constImm: 3 + documentation: !string |- + // Trunc truncates elements towards zero. + // Const Immediate = 3. - go: MaskedTruncWithPrecision commutative: "false" extension: "AVX.*" constImm: 3 masked: "true" + documentation: !string |- + // MaskedTruncWithPrecision truncates elements with specified precision. + // Const Immediate = 3. - go: MaskedTruncSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 11 masked: "true" + documentation: !string |- + // MaskedTruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. + // Const Immediate = 11. - go: MaskedDiffWithTruncWithPrecision commutative: "false" extension: "AVX.*" constImm: 3 masked: "true" + documentation: !string |- + // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision. + // Const Immediate = 3. - go: MaskedDiffWithTruncSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 11 masked: "true" + documentation: !string |- + // MaskedDiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. + // Const Immediate = 11. - go: AddSub commutative: "false" extension: "AVX.*" + documentation: !string |- + // AddSub subtracts even elements and adds odd elements of two vectors. - go: Average commutative: "true" extension: "AVX.*" # VPAVGB/W are available across various AVX versions + documentation: !string |- + // Average computes the rounded average of corresponding elements. - go: MaskedAverage commutative: "true" masked: "true" extension: "AVX512.*" # Masked operations are typically AVX512 + documentation: !string |- + // MaskedAverage computes the rounded average of corresponding elements. - go: Absolute commutative: "false" # Unary operation, not commutative extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 + documentation: !string |- + // Absolute computes the absolute value of each element. - go: MaskedAbsolute commutative: "false" masked: "true" extension: "AVX512.*" + documentation: !string |- + // MaskedAbsolute computes the absolute value of each element. - go: Sign # Applies sign of second operand to first: sign(val, sign_src) commutative: "false" extension: "AVX.*" + documentation: !string |- + // Sign returns the product of the first operand with -1, 0, or 1, + // whichever constant is nearest to the value of the second operand. # Sign does not have masked version - go: MaskedPopCount commutative: "false" masked: "true" extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) + documentation: !string |- + // MaskedPopCount counts the number of set bits in each element. - go: PairDotProd commutative: "false" extension: "AVX.*" - documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size" + documentation: !string |- + // PairDotProd multiplies the elements and add the pairs together, + // yielding a vector of half as many elements with twice the input element size. - go: MaskedPairDotProd masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size" + documentation: !string |- + // MaskedPairDotProd multiplies the elements and add the pairs together, + // yielding a vector of half as many elements with twice the input element size. - go: SaturatedUnsignedSignedPairDotProd commutative: "false" extension: "AVX.*" - documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size" + documentation: !string |- + // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, + // yielding a vector of half as many elements with twice the input element size. - go: MaskedSaturatedUnsignedSignedPairDotProd masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size" + documentation: !string |- + // MaskedSaturatedPairDotProd multiplies the elements and add the pairs together with saturation, + // yielding a vector of half as many elements with twice the input element size. # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. - go: DotProdBroadcast commutative: "true" extension: "AVX.*" - documentation: "// Multiply all the elements and add them together; the result is a broadcast of the dot product" + documentation: !string |- + // DotProdBroadcast multiplies all elements and broadcasts the sum. - go: Max commutative: "true" extension: "AVX.*" + documentation: !string |- + // Max computes the maximum of corresponding elements. - go: MaskedMax commutative: "true" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedMax computes the maximum of corresponding elements. - go: Min commutative: "true" extension: "AVX.*" + documentation: !string |- + // Min computes the minimum of corresponding elements. - go: MaskedMin commutative: "true" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedMin computes the minimum of corresponding elements. - go: Mul commutative: "true" extension: "AVX.*" + documentation: !string |- + // Mul multiplies corresponding elements of two vectors. - go: MulEvenWiden commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" + documentation: !string |- + // MulEvenWiden multiplies even-indexed elements, widening the result. + // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHigh commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" + documentation: !string |- + // MulHigh multiplies elements and stores the high part of the result. - go: MulLow commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" + documentation: !string |- + // MulLow multiplies elements and stores the low part of the result. - go: MaskedMul masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedMul multiplies corresponding elements of two vectors, masked. + docUnmasked: !string |- + // Mul multiplies corresponding elements of two vectors. - go: MaskedMulEvenWiden masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" + documentation: !string |- + // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked. + // Result[i] = v1.Even[i] * v2.Even[i]. + docUnmasked: !string |- + // MulEvenWiden multiplies even-indexed elements, widening the result. + // Result[i] = v1.Even[i] * v2.Even[i]. - go: MaskedMulHigh masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" + documentation: !string |- + // MaskedMulHigh multiplies elements and stores the high part of the result, masked. + docUnmasked: !string |- + // MulHigh multiplies elements and stores the high part of the result. - go: MaskedMulLow masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" + documentation: !string |- + // MaskedMulLow multiplies elements and stores the low part of the result, masked. + docUnmasked: !string |- + // MulLow multiplies elements and stores the low part of the result. diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 765eea0b..de65a04e 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -1,4 +1,5 @@ !sum +# TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes. # Add - go: Add asm: "VPADD[BWDQ]|VADDP[SD]" diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml index 592790ca..e87ead1d 100644 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -2,44 +2,68 @@ - go: Add commutative: "true" extension: "AVX.*" + documentation: !string |- + // Add adds corresponding elements of two vectors. - go: SaturatedAdd commutative: "true" extension: "AVX.*" + documentation: !string |- + // SaturatedAdd adds corresponding elements of two vectors with saturation. - go: MaskedAdd masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedAdd adds corresponding elements of two vectors. - go: MaskedSaturatedAdd masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedSaturatedAdd adds corresponding elements of two vectors with saturation. - go: Sub commutative: "false" extension: "AVX.*" + documentation: !string |- + // Sub subtracts corresponding elements of two vectors. - go: SaturatedSub commutative: "false" extension: "AVX.*" + documentation: !string |- + // SaturatedSub subtracts corresponding elements of two vectors with saturation. - go: MaskedSub masked: "true" commutative: "false" extension: "AVX.*" + documentation: !string |- + // MaskedSub subtracts corresponding elements of two vectors. - go: MaskedSaturatedSub masked: "true" commutative: "false" extension: "AVX.*" + documentation: !string |- + // MaskedSaturatedSub subtracts corresponding elements of two vectors with saturation. - go: PairwiseAdd commutative: "false" extension: "AVX.*" - documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target" + documentation: !string |- + // PairwiseAdd horizontally adds adjacent pairs of elements. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: PairwiseSub commutative: "false" extension: "AVX.*" - documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target" + documentation: !string |- + // PairwiseSub horizontally subtracts adjacent pairs of elements. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. - go: SaturatedPairwiseAdd commutative: "false" extension: "AVX.*" - documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation" + documentation: !string |- + // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: SaturatedPairwiseSub commutative: "false" extension: "AVX.*" - documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation" \ No newline at end of file + documentation: !string |- + // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml index 064f42b0..4d948364 100644 --- a/internal/simdgen/ops/BitwiseLogic/categories.yaml +++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml @@ -2,30 +2,46 @@ - go: And commutative: "true" extension: "AVX.*" + documentation: !string |- + // And performs a bitwise AND operation between two vectors. - go: MaskedAnd masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedAnd performs a masked bitwise AND operation between two vectors. - go: Or commutative: "true" extension: "AVX.*" + documentation: !string |- + // Or performs a bitwise OR operation between two vectors. - go: MaskedOr masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedOr performs a masked bitwise OR operation between two vectors. - go: AndNot commutative: "false" extension: "AVX.*" + documentation: !string |- + // AndNot performs a bitwise AND NOT operation between two vectors. - go: MaskedAndNot masked: "true" commutative: "false" extension: "AVX.*" + documentation: !string |- + // MaskedAndNot performs a masked bitwise AND NOT operation between two vectors. - go: Xor commutative: "true" extension: "AVX.*" + documentation: !string |- + // Xor performs a bitwise XOR operation between two vectors. - go: MaskedXor masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedXor performs a masked bitwise XOR operation between two vectors. # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index 06a1caa2..bd4d8c76 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -10,77 +10,108 @@ constImm: 0 commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 0 if it has;" + documentation: !string |- + // Equal compares for equality. + // Const Immediate = 0. - go: Less constImm: 1 commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 1 if it has;" + documentation: !string |- + // Less compares for less than. + // Const Immediate = 1. - go: LessEqual constImm: 2 commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 2 if it has;" + documentation: !string |- + // LessEqual compares for less than or equal. + // Const Immediate = 2. - go: IsNan # For float only. constImm: 3 commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" + documentation: !string |- + // IsNan checks if elements are NaN. Use as x.IsNan(x). + // Const Immediate = 3. - go: NotEqual constImm: 4 commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 4 if it has;" + documentation: !string |- + // NotEqual compares for inequality. + // Const Immediate = 4. - go: GreaterEqual constImm: 5 commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 5 if it has;" + documentation: !string |- + // GreaterEqual compares for greater than or equal. + // Const Immediate = 5. - go: Greater constImm: 6 commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 6 if it has;" + documentation: !string |- + // Greater compares for greater than. + // Const Immediate = 6. - go: MaskedEqual constImm: 0 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 0 if it has;" + documentation: !string |- + // MaskedEqual compares for equality, masked. + // Const Immediate = 0. + docUnmasked: !string |- + // Equal compares for equality. + // Const Immediate = 0. - go: MaskedLess constImm: 1 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 1 if it has;" + documentation: !string |- + // MaskedLess compares for less than. + // Const Immediate = 1. - go: MaskedLessEqual constImm: 2 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 2 if it has;" + documentation: !string |- + // MaskedLessEqual compares for less than or equal. + // Const Immediate = 2. - go: MaskedIsNan # For float only. constImm: 3 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;" + documentation: !string |- + // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x). + // Const Immediate = 3. - go: MaskedNotEqual constImm: 4 masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Predicate immediate is 4 if it has;" + documentation: !string |- + // MaskedNotEqual compares for inequality. + // Const Immediate = 4. - go: MaskedGreaterEqual constImm: 5 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 5 if it has;" + documentation: !string |- + // MaskedGreaterEqual compares for greater than or equal. + // Const Immediate = 5. - go: MaskedGreater constImm: 6 masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Predicate immediate is 6 if it has;" \ No newline at end of file + documentation: !string |- + // MaskedGreater compares for greater than. + // Const Immediate = 6. diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index e486225e..c00d43d6 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -2,136 +2,225 @@ - go: Div commutative: "false" extension: "AVX.*" + documentation: !string |- + // Div divides elements of two vectors. - go: MaskedDiv commutative: "false" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedDiv divides elements of two vectors. - go: Sqrt commutative: "false" extension: "AVX.*" + documentation: !string |- + // Sqrt computes the square root of each element. - go: MaskedSqrt commutative: "false" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedSqrt computes the square root of each element. - go: ApproximateReciprocal commutative: "false" extension: "AVX.*" + documentation: !string |- + // ApproximateReciprocal computes an approximate reciprocal of each element. - go: MaskedApproximateReciprocal commutative: "false" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedApproximateReciprocal computes an approximate reciprocal of each element. - go: ApproximateReciprocalOfSqrt commutative: "false" extension: "AVX.*" + documentation: !string |- + // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. - go: MaskedApproximateReciprocalOfSqrt commutative: "false" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. - go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated. commutative: "false" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedMulByPowOf2 multiplies elements by a power of 2. - go: Round commutative: "false" extension: "AVX.*" constImm: 0 + documentation: !string |- + // Round rounds elements to the nearest integer. + // Const Immediate = 0. - go: MaskedRoundWithPrecision commutative: "false" extension: "AVX.*" constImm: 0 masked: "true" + documentation: !string |- + // MaskedRoundWithPrecision rounds elements with specified precision. + // Const Immediate = 0. - go: MaskedRoundSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 8 masked: "true" + documentation: !string |- + // MaskedRoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. + // Const Immediate = 8. - go: MaskedDiffWithRoundWithPrecision commutative: "false" extension: "AVX.*" constImm: 0 masked: "true" + documentation: !string |- + // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision. + // Const Immediate = 0. - go: MaskedDiffWithRoundSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 8 masked: "true" + documentation: !string |- + // MaskedDiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. + // Const Immediate = 8. - go: Floor commutative: "false" extension: "AVX.*" constImm: 1 + documentation: !string |- + // Floor rounds elements down to the nearest integer. + // Const Immediate = 1. - go: MaskedFloorWithPrecision commutative: "false" extension: "AVX.*" constImm: 1 masked: "true" + documentation: !string |- + // MaskedFloorWithPrecision rounds elements down with specified precision, masked. + // Const Immediate = 1. + docUnmasked: !string |- + // FloorWithPrecision rounds elements down with specified precision. + // Const Immediate = 1. - go: MaskedFloorSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 9 masked: "true" + documentation: !string |- + // MaskedFloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. + // Const Immediate = 9. + docUnmasked: !string |- + // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions. + // Const Immediate = 9. - go: MaskedDiffWithFloorWithPrecision commutative: "false" extension: "AVX.*" constImm: 1 masked: "true" + documentation: !string |- + // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision. + // Const Immediate = 1. - go: MaskedDiffWithFloorSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 9 masked: "true" + documentation: !string |- + // MaskedDiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. + // Const Immediate = 9. - go: Ceil commutative: "false" extension: "AVX.*" constImm: 2 + documentation: !string |- + // Ceil rounds elements up to the nearest integer. + // Const Immediate = 2. - go: MaskedCeilWithPrecision commutative: "false" extension: "AVX.*" constImm: 2 masked: "true" + documentation: !string |- + // MaskedCeilWithPrecision rounds elements up with specified precision, masked. + // Const Immediate = 2. + docUnmasked: !string |- + // CeilWithPrecision rounds elements up with specified precision. + // Const Immediate = 2. - go: MaskedCeilSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 10 masked: "true" + documentation: !string |- + // MaskedCeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. + // Const Immediate = 10. - go: MaskedDiffWithCeilWithPrecision commutative: "false" extension: "AVX.*" constImm: 2 masked: "true" + documentation: !string |- + // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision. + // Const Immediate = 2. - go: MaskedDiffWithCeilSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 10 masked: "true" + documentation: !string |- + // MaskedDiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. + // Const Immediate = 10. - go: Trunc commutative: "false" extension: "AVX.*" constImm: 3 + documentation: !string |- + // Trunc truncates elements towards zero. + // Const Immediate = 3. - go: MaskedTruncWithPrecision commutative: "false" extension: "AVX.*" constImm: 3 masked: "true" + documentation: !string |- + // MaskedTruncWithPrecision truncates elements with specified precision. + // Const Immediate = 3. - go: MaskedTruncSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 11 masked: "true" + documentation: !string |- + // MaskedTruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. + // Const Immediate = 11. - go: MaskedDiffWithTruncWithPrecision commutative: "false" extension: "AVX.*" constImm: 3 masked: "true" + documentation: !string |- + // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision. + // Const Immediate = 3. - go: MaskedDiffWithTruncSuppressExceptionWithPrecision commutative: "false" extension: "AVX.*" constImm: 11 masked: "true" + documentation: !string |- + // MaskedDiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. + // Const Immediate = 11. - go: AddSub commutative: "false" - extension: "AVX.*" \ No newline at end of file + extension: "AVX.*" + documentation: !string |- + // AddSub subtracts even elements and adds odd elements of two vectors. \ No newline at end of file diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml index c74b57c4..b6c83bf3 100644 --- a/internal/simdgen/ops/IntOnlyArith/categories.yaml +++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml @@ -2,27 +2,40 @@ - go: Average commutative: "true" extension: "AVX.*" # VPAVGB/W are available across various AVX versions + documentation: !string |- + // Average computes the rounded average of corresponding elements. - go: MaskedAverage commutative: "true" masked: "true" extension: "AVX512.*" # Masked operations are typically AVX512 + documentation: !string |- + // MaskedAverage computes the rounded average of corresponding elements. - go: Absolute commutative: "false" # Unary operation, not commutative extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 + documentation: !string |- + // Absolute computes the absolute value of each element. - go: MaskedAbsolute commutative: "false" masked: "true" extension: "AVX512.*" + documentation: !string |- + // MaskedAbsolute computes the absolute value of each element. - go: Sign # Applies sign of second operand to first: sign(val, sign_src) commutative: "false" extension: "AVX.*" + documentation: !string |- + // Sign returns the product of the first operand with -1, 0, or 1, + // whichever constant is nearest to the value of the second operand. # Sign does not have masked version - go: MaskedPopCount commutative: "false" masked: "true" - extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) \ No newline at end of file + extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) + documentation: !string |- + // MaskedPopCount counts the number of set bits in each element. \ No newline at end of file diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 6ebb12a0..18cfd967 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -2,24 +2,33 @@ - go: PairDotProd commutative: "false" extension: "AVX.*" - documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size" + documentation: !string |- + // PairDotProd multiplies the elements and add the pairs together, + // yielding a vector of half as many elements with twice the input element size. - go: MaskedPairDotProd masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size" + documentation: !string |- + // MaskedPairDotProd multiplies the elements and add the pairs together, + // yielding a vector of half as many elements with twice the input element size. - go: SaturatedUnsignedSignedPairDotProd commutative: "false" extension: "AVX.*" - documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size" + documentation: !string |- + // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, + // yielding a vector of half as many elements with twice the input element size. - go: MaskedSaturatedUnsignedSignedPairDotProd masked: "true" commutative: "false" extension: "AVX.*" - documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size" + documentation: !string |- + // MaskedSaturatedPairDotProd multiplies the elements and add the pairs together with saturation, + // yielding a vector of half as many elements with twice the input element size. # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. - go: DotProdBroadcast commutative: "true" extension: "AVX.*" - documentation: "// Multiply all the elements and add them together; the result is a broadcast of the dot product" + documentation: !string |- + // DotProdBroadcast multiplies all elements and broadcasts the sum. diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml index d5131958..c64eb24e 100644 --- a/internal/simdgen/ops/MinMax/categories.yaml +++ b/internal/simdgen/ops/MinMax/categories.yaml @@ -2,14 +2,22 @@ - go: Max commutative: "true" extension: "AVX.*" + documentation: !string |- + // Max computes the maximum of corresponding elements. - go: MaskedMax commutative: "true" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedMax computes the maximum of corresponding elements. - go: Min commutative: "true" extension: "AVX.*" + documentation: !string |- + // Min computes the minimum of corresponding elements. - go: MaskedMin commutative: "true" masked: "true" extension: "AVX.*" + documentation: !string |- + // MaskedMin computes the minimum of corresponding elements. diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml index 42275e24..def502f3 100644 --- a/internal/simdgen/ops/Mul/categories.yaml +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -2,34 +2,55 @@ - go: Mul commutative: "true" extension: "AVX.*" + documentation: !string |- + // Mul multiplies corresponding elements of two vectors. - go: MulEvenWiden commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" + documentation: !string |- + // MulEvenWiden multiplies even-indexed elements, widening the result. + // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHigh commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" + documentation: !string |- + // MulHigh multiplies elements and stores the high part of the result. - go: MulLow commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" + documentation: !string |- + // MulLow multiplies elements and stores the low part of the result. - go: MaskedMul masked: "true" commutative: "true" extension: "AVX.*" + documentation: !string |- + // MaskedMul multiplies corresponding elements of two vectors, masked. + docUnmasked: !string |- + // Mul multiplies corresponding elements of two vectors. - go: MaskedMulEvenWiden masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2" + documentation: !string |- + // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked. + // Result[i] = v1.Even[i] * v2.Even[i]. + docUnmasked: !string |- + // MulEvenWiden multiplies even-indexed elements, widening the result. + // Result[i] = v1.Even[i] * v2.Even[i]. - go: MaskedMulHigh masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i" + documentation: !string |- + // MaskedMulHigh multiplies elements and stores the high part of the result, masked. + docUnmasked: !string |- + // MulHigh multiplies elements and stores the high part of the result. - go: MaskedMulLow masked: "true" commutative: "true" extension: "AVX.*" - documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i" \ No newline at end of file + documentation: !string |- + // MaskedMulLow multiplies elements and stores the low part of the result, masked. + docUnmasked: !string |- + // MulLow multiplies elements and stores the low part of the result. diff --git a/internal/simdgen/ops/main.go b/internal/simdgen/ops/main.go index 7e462bf7..c71f3ff6 100644 --- a/internal/simdgen/ops/main.go +++ b/internal/simdgen/ops/main.go @@ -32,6 +32,10 @@ func mergeYamlFiles(targetFileName string) error { if err != nil { return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err) } + _, err = writer.WriteString("# TODO: remove the \"Const Immediate\" from the documentation field, it's there only for debug purposes.\n") + if err != nil { + return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err) + } entries, err := os.ReadDir(baseDir) if err != nil { From f26e3cc49a9405339efc8d819966243ef17bac89 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Sun, 8 Jun 2025 02:45:08 +0000 Subject: [PATCH 081/200] internal/simdgen: more dot products This CL's yaml data is generated by Gemini and reviewed by me. This CL also updates simdgen to fit into some new op shapes. This CL geneartes CL 681298. Change-Id: Iae240af704a79eeb1dc78f24e11c3894f76b6bb7 Reviewed-on: https://go-review.googlesource.com/c/arch/+/680215 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 45 +++++++++ internal/simdgen/go.yaml | 105 +++++++++++++++++++-- internal/simdgen/ops/MLOps/categories.yaml | 45 +++++++++ internal/simdgen/ops/MLOps/go.yaml | 105 +++++++++++++++++++-- 4 files changed, 286 insertions(+), 14 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index f4194101..dbf5b41d 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -528,6 +528,51 @@ extension: "AVX.*" documentation: !string |- // DotProdBroadcast multiplies all elements and broadcasts the sum. + // Const Immediate = 127. +- go: UnsignedSignedQuadDotProdAccumulate + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +- go: MaskedUnsignedSignedQuadDotProdAccumulate + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedUnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +- go: SaturatedUnsignedSignedQuadDotProdAccumulate + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +- go: PairDotProdAccumulate + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +- go: MaskedPairDotProdAccumulate + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +- go: SaturatedPairDotProdAccumulate + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +- go: MaskedSaturatedPairDotProdAccumulate + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. - go: Max commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index de65a04e..b5aca038 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -525,21 +525,112 @@ asm: VPMADDUBSW in: - class: mask - - *uint - - *int3 + - go: $t1 + base: uint + overwriteElemBits: 8 + - go: $t2 + base: int + overwriteElemBits: 8 out: - - *int2 + - *int3 - go: DotProdBroadcast asm: VDPPD in: - - &float + - &dpb_src go: $t base: float - - *float + elemBits: 64 + bits: $bits + - *dpb_src - class: immediate - const: 127 # make sure the control bits [4:5] are all 1 + const: 127 out: - - *float + - *dpb_src +- go: UnsignedSignedQuadDotProdAccumulate + asm: "VPDPBUSD" + in: + - &qdpa_acc + go: $t_acc + elemBits: 32 + - &qdpa_src1 + go: $t_src1 + base: uint + overwriteElemBits: 8 + - &qdpa_src2 + go: $t_src2 + base: int + overwriteElemBits: 8 + out: + - *qdpa_acc +- go: MaskedUnsignedSignedQuadDotProdAccumulate + asm: "VPDPBUSD" + in: + - *qdpa_acc + - class: mask + - *qdpa_src1 + - *qdpa_src2 + out: + - *qdpa_acc +- go: SaturatedUnsignedSignedQuadDotProdAccumulate + asm: "VPDPBUSDS" + in: + - *qdpa_acc + - *qdpa_src1 + - *qdpa_src2 + out: + - *qdpa_acc +- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate + asm: "VPDPBUSDS" + in: + - *qdpa_acc + - class: mask + - *qdpa_src1 + - *qdpa_src2 + out: + - *qdpa_acc +- go: PairDotProdAccumulate + asm: "VPDPWSSD" + in: + - &pdpa_acc + go: $t_acc + base: int + elemBits: 32 + - &pdpa_src1 + go: $t_src1 + base: int + overwriteElemBits: 16 + - &pdpa_src2 + go: $t_src2 + base: int + overwriteElemBits: 16 + out: + - *pdpa_acc +- go: MaskedPairDotProdAccumulate + asm: "VPDPWSSD" + in: + - *pdpa_acc + - class: mask + - *pdpa_src1 + - *pdpa_src2 + out: + - *pdpa_acc +- go: SaturatedPairDotProdAccumulate + asm: "VPDPWSSDS" + in: + - *pdpa_acc + - *pdpa_src1 + - *pdpa_src2 + out: + - *pdpa_acc +- go: MaskedSaturatedPairDotProdAccumulate + asm: "VPDPWSSDS" + in: + - *pdpa_acc + - class: mask + - *pdpa_src1 + - *pdpa_src2 + out: + - *pdpa_acc - go: Max asm: "V?PMAXS[BWDQ]" in: &2int diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 18cfd967..6375534c 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -32,3 +32,48 @@ extension: "AVX.*" documentation: !string |- // DotProdBroadcast multiplies all elements and broadcasts the sum. + // Const Immediate = 127. +- go: UnsignedSignedQuadDotProdAccumulate + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +- go: MaskedUnsignedSignedQuadDotProdAccumulate + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedUnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +- go: SaturatedUnsignedSignedQuadDotProdAccumulate + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +- go: PairDotProdAccumulate + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +- go: MaskedPairDotProdAccumulate + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +- go: SaturatedPairDotProdAccumulate + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +- go: MaskedSaturatedPairDotProdAccumulate + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml index 9e06d3c9..be8a054c 100644 --- a/internal/simdgen/ops/MLOps/go.yaml +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -33,18 +33,109 @@ asm: VPMADDUBSW in: - class: mask - - *uint - - *int3 + - go: $t1 + base: uint + overwriteElemBits: 8 + - go: $t2 + base: int + overwriteElemBits: 8 out: - - *int2 + - *int3 - go: DotProdBroadcast asm: VDPPD in: - - &float + - &dpb_src go: $t base: float - - *float + elemBits: 64 + bits: $bits + - *dpb_src - class: immediate - const: 127 # make sure the control bits [4:5] are all 1 + const: 127 + out: + - *dpb_src +- go: UnsignedSignedQuadDotProdAccumulate + asm: "VPDPBUSD" + in: + - &qdpa_acc + go: $t_acc + elemBits: 32 + - &qdpa_src1 + go: $t_src1 + base: uint + overwriteElemBits: 8 + - &qdpa_src2 + go: $t_src2 + base: int + overwriteElemBits: 8 + out: + - *qdpa_acc +- go: MaskedUnsignedSignedQuadDotProdAccumulate + asm: "VPDPBUSD" + in: + - *qdpa_acc + - class: mask + - *qdpa_src1 + - *qdpa_src2 + out: + - *qdpa_acc +- go: SaturatedUnsignedSignedQuadDotProdAccumulate + asm: "VPDPBUSDS" + in: + - *qdpa_acc + - *qdpa_src1 + - *qdpa_src2 + out: + - *qdpa_acc +- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate + asm: "VPDPBUSDS" + in: + - *qdpa_acc + - class: mask + - *qdpa_src1 + - *qdpa_src2 + out: + - *qdpa_acc +- go: PairDotProdAccumulate + asm: "VPDPWSSD" + in: + - &pdpa_acc + go: $t_acc + base: int + elemBits: 32 + - &pdpa_src1 + go: $t_src1 + base: int + overwriteElemBits: 16 + - &pdpa_src2 + go: $t_src2 + base: int + overwriteElemBits: 16 + out: + - *pdpa_acc +- go: MaskedPairDotProdAccumulate + asm: "VPDPWSSD" + in: + - *pdpa_acc + - class: mask + - *pdpa_src1 + - *pdpa_src2 + out: + - *pdpa_acc +- go: SaturatedPairDotProdAccumulate + asm: "VPDPWSSDS" + in: + - *pdpa_acc + - *pdpa_src1 + - *pdpa_src2 + out: + - *pdpa_acc +- go: MaskedSaturatedPairDotProdAccumulate + asm: "VPDPWSSDS" + in: + - *pdpa_acc + - class: mask + - *pdpa_src1 + - *pdpa_src2 out: - - *float \ No newline at end of file + - *pdpa_acc From 030c5e4e253991e0c3b78dda6c9099ea7c3d9774 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 10 Jun 2025 17:51:46 +0000 Subject: [PATCH 082/200] internal/simdgen: add fused mul add sub This CL is generated by Gemini and reviewed by myself. This CL generates CL 681299. Change-Id: I5bc57185ce104d0d80dfd82c8eff15312c397aeb Reviewed-on: https://go-review.googlesource.com/c/arch/+/680595 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 108 ++++++++++++++ internal/simdgen/go.yaml | 164 +++++++++++++++++++++ internal/simdgen/ops/MLOps/categories.yaml | 108 ++++++++++++++ internal/simdgen/ops/MLOps/go.yaml | 164 +++++++++++++++++++++ 4 files changed, 544 insertions(+) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index dbf5b41d..8b325fa4 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -573,6 +573,114 @@ extension: "AVX.*" documentation: !string |- // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +- go: MaskedFusedMultiplyAdd132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAdd132 performs `(v1 * v3) + v2`. +- go: MaskedFusedMultiplyAdd213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAdd213 performs `(v2 * v1) + v3`. +- go: MaskedFusedMultiplyAdd231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAdd231 performs `(v2 * v3) + v1`. +- go: MaskedFusedMultiplySub132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySub132 performs `(v1 * v3) - v2`. +- go: MaskedFusedMultiplySub213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySub213 performs `(v2 * v1) - v3`. +- go: MaskedFusedMultiplySub231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySub231 performs `(v2 * v3) - v1`. +- go: MaskedFusedNegativeMultiplyAdd132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +- go: MaskedFusedNegativeMultiplyAdd213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +- go: MaskedFusedNegativeMultiplyAdd231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +- go: MaskedFusedNegativeMultiplySub132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +- go: MaskedFusedNegativeMultiplySub213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +- go: MaskedFusedNegativeMultiplySub231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +- go: MaskedFusedMultiplyAddSub132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +- go: MaskedFusedMultiplyAddSub213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +- go: MaskedFusedMultiplyAddSub231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +- go: MaskedFusedMultiplySubAdd132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +- go: MaskedFusedMultiplySubAdd213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +- go: MaskedFusedMultiplySubAdd231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. - go: Max commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index b5aca038..6fb817be 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -631,6 +631,170 @@ - *pdpa_src2 out: - *pdpa_acc +- go: MaskedFusedMultiplyAdd132 + asm: "VFMADD132PS|VFMADD132PD" + in: + - &fma_op + go: $t + base: float + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplyAdd213 + asm: "VFMADD213PS|VFMADD213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplyAdd231 + asm: "VFMADD231PS|VFMADD231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySub132 + asm: "VFMSUB132PS|VFMSUB132PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySub213 + asm: "VFMSUB213PS|VFMSUB213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySub231 + asm: "VFMSUB231PS|VFMSUB231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplyAdd132 + asm: "VFNMADD132PS|VFNMADD132PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplyAdd213 + asm: "VFNMADD213PS|VFNMADD213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplyAdd231 + asm: "VFNMADD231PS|VFNMADD231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplySub132 + asm: "VFNMSUB132PS|VFNMSUB132PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplySub213 + asm: "VFNMSUB213PS|VFNMSUB213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplySub231 + asm: "VFNMSUB231PS|VFNMSUB231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplyAddSub132 + asm: "VFMADDSUB132PS|VFMADDSUB132PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplyAddSub213 + asm: "VFMADDSUB213PS|VFMADDSUB213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplyAddSub231 + asm: "VFMADDSUB231PS|VFMADDSUB231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySubAdd132 + asm: "VFMSUBADD132PS|VFMSUBADD132PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySubAdd213 + asm: "VFMSUBADD213PS|VFMSUBADD213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySubAdd231 + asm: "VFMSUBADD231PS|VFMSUBADD231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op - go: Max asm: "V?PMAXS[BWDQ]" in: &2int diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 6375534c..412af692 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -77,3 +77,111 @@ extension: "AVX.*" documentation: !string |- // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +- go: MaskedFusedMultiplyAdd132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAdd132 performs `(v1 * v3) + v2`. +- go: MaskedFusedMultiplyAdd213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAdd213 performs `(v2 * v1) + v3`. +- go: MaskedFusedMultiplyAdd231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAdd231 performs `(v2 * v3) + v1`. +- go: MaskedFusedMultiplySub132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySub132 performs `(v1 * v3) - v2`. +- go: MaskedFusedMultiplySub213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySub213 performs `(v2 * v1) - v3`. +- go: MaskedFusedMultiplySub231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySub231 performs `(v2 * v3) - v1`. +- go: MaskedFusedNegativeMultiplyAdd132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. +- go: MaskedFusedNegativeMultiplyAdd213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. +- go: MaskedFusedNegativeMultiplyAdd231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. +- go: MaskedFusedNegativeMultiplySub132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. +- go: MaskedFusedNegativeMultiplySub213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. +- go: MaskedFusedNegativeMultiplySub231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. +- go: MaskedFusedMultiplyAddSub132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. +- go: MaskedFusedMultiplyAddSub213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. +- go: MaskedFusedMultiplyAddSub231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. +- go: MaskedFusedMultiplySubAdd132 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. +- go: MaskedFusedMultiplySubAdd213 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. +- go: MaskedFusedMultiplySubAdd231 + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedFusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml index be8a054c..e0cefda2 100644 --- a/internal/simdgen/ops/MLOps/go.yaml +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -139,3 +139,167 @@ - *pdpa_src2 out: - *pdpa_acc +- go: MaskedFusedMultiplyAdd132 + asm: "VFMADD132PS|VFMADD132PD" + in: + - &fma_op + go: $t + base: float + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplyAdd213 + asm: "VFMADD213PS|VFMADD213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplyAdd231 + asm: "VFMADD231PS|VFMADD231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySub132 + asm: "VFMSUB132PS|VFMSUB132PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySub213 + asm: "VFMSUB213PS|VFMSUB213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySub231 + asm: "VFMSUB231PS|VFMSUB231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplyAdd132 + asm: "VFNMADD132PS|VFNMADD132PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplyAdd213 + asm: "VFNMADD213PS|VFNMADD213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplyAdd231 + asm: "VFNMADD231PS|VFNMADD231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplySub132 + asm: "VFNMSUB132PS|VFNMSUB132PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplySub213 + asm: "VFNMSUB213PS|VFNMSUB213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedNegativeMultiplySub231 + asm: "VFNMSUB231PS|VFNMSUB231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplyAddSub132 + asm: "VFMADDSUB132PS|VFMADDSUB132PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplyAddSub213 + asm: "VFMADDSUB213PS|VFMADDSUB213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplyAddSub231 + asm: "VFMADDSUB231PS|VFMADDSUB231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySubAdd132 + asm: "VFMSUBADD132PS|VFMSUBADD132PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySubAdd213 + asm: "VFMSUBADD213PS|VFMSUBADD213PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op +- go: MaskedFusedMultiplySubAdd231 + asm: "VFMSUBADD231PS|VFMSUBADD231PD" + in: + - *fma_op + - class: mask + - *fma_op + - *fma_op + out: + - *fma_op \ No newline at end of file From 934a84cceceb03defe90c20c29568395e786d853 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 13 Jun 2025 16:15:40 -0400 Subject: [PATCH 083/200] arch/internal: move simd helpers into compiler, out of generated code PAIRED with CL 681500 from cmd/compile/internal/ssagen Change-Id: I42775ce43c3810fac83cb6c1674e1cae1b83d4db Reviewed-on: https://go-review.googlesource.com/c/arch/+/681615 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Reviewed-by: Cherry Mui --- internal/simdgen/gen_simdIntrinsics.go | 101 ------------------------- 1 file changed, 101 deletions(-) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 5b6b74cf..02eb5a63 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -63,107 +63,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) {{- end}} } - -func opLen1(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue1(op, t, args[0]) - } -} - -func opLen2(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue2(op, t, args[0], args[1]) - } -} - -func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue3(op, t, args[0], args[1], args[2]) - } -} - -func opLen4(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return s.newValue4(op, t, args[0], args[1], args[2], args[3]) - } -} - -func plainPanicSimdImm(s *state) { - cmp := s.newValue0(ssa.OpConstBool, types.Types[types.TBOOL]) - cmp.AuxInt = 1 - // TODO: make this a standalone panic instead of reusing the overflow panic. - // Or maybe after we implement the switch table this will be obsolete anyway. - s.check(cmp, ir.Syms.Panicoverflow) -} - -func opLen1Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if args[1].Op == ssa.OpConst8 { - return s.newValue1I(op, t, args[1].AuxInt< Date: Mon, 16 Jun 2025 17:47:09 +0000 Subject: [PATCH 084/200] internal/simdgen: fix float Sub instruction error FP sub is mapped to Add, this CL fixes that. Change-Id: I5645f9427cb89b989baa1b3d4f6e7503eb5f24dc Reviewed-on: https://go-review.googlesource.com/c/arch/+/681976 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/go.yaml | 4 ++-- internal/simdgen/ops/AddSub/go.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 6fb817be..f7e771c6 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -55,14 +55,14 @@ # Sub - go: Sub - asm: "VPSUB[BWDQ]|VADDP[SD]" + asm: "VPSUB[BWDQ]|VSUBP[SD]" in: &2any - *any - *any out: &1any - *any - go: MaskedSub - asm: "VPSUB[BWDQ]|VADDP[SD]" + asm: "VPSUB[BWDQ]|VSUBP[SD]" in: - class: mask - *any diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml index 75222a1b..c2df1e2c 100644 --- a/internal/simdgen/ops/AddSub/go.yaml +++ b/internal/simdgen/ops/AddSub/go.yaml @@ -54,14 +54,14 @@ # Sub - go: Sub - asm: "VPSUB[BWDQ]|VADDP[SD]" + asm: "VPSUB[BWDQ]|VSUBP[SD]" in: &2any - *any - *any out: &1any - *any - go: MaskedSub - asm: "VPSUB[BWDQ]|VADDP[SD]" + asm: "VPSUB[BWDQ]|VSUBP[SD]" in: - class: mask - *any From ebbfc1c588ead2032daf83f9d5fe6fce2b3c7ab4 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 13 Jun 2025 18:13:15 -0400 Subject: [PATCH 085/200] arch/internal/simdgen: remove control flow from template this is a prerequisite to sort in some other order Change-Id: Ie8683dfdf028195044b706388a018da54c265d16 Reviewed-on: https://go-review.googlesource.com/c/arch/+/681995 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdTypes.go | 109 +++++++++++++++++------------- 1 file changed, 61 insertions(+), 48 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 50480b30..13920333 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -84,84 +84,73 @@ func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}}) {{end}} ` -const simdStubsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +const simdStubsTmpl = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. //go:build goexperiment.simd -package simd - -{{- range .OpsLen1}} +package simd{{end}} +{{define "opsLen1"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}} +func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}{{end}} -{{- end}} -{{- range .OpsLen2}} +{{define "opsLen2"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}{{end}} -{{- end}} -{{- range .OpsLen3}} +{{define "opsLen3"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}} -{{- end}} -{{- range .OpsLen4}} +{{define "opsLen4"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}} -{{- end}} -{{- range .OpsLen1Imm8}} +{{define "opsLen1Imm8"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}}{{end}} -{{- end}} -{{- range .OpsLen2Imm8}} +{{define "opsLen2Imm8"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}} -{{- end}} -{{- range .OpsLen3Imm8}} +{{define "opsLen3Imm8"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}} -{{- end}} -{{- range .OpsLen3Imm8}} +{{define "opsLen4Imm8"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}} - -{{- end}} -{{- range .VectorConversions }} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{(index .Out 0).Go}}{{end}} +{{define "vectorConversion"}} // {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}} func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}}) +{{end}} -{{- end}} -{{- range .Masks }} - +{{define "mask"}} // converts from {{.Name}} to {{.VectorCounterpart}} func (from {{.Name}}) As{{.VectorCounterpart}}() (to {{.VectorCounterpart}}) @@ -171,8 +160,7 @@ func (from {{.VectorCounterpart}}) As{{.Name}}() (to {{.Name}}) func (x {{.Name}}) And(y {{.Name}}) {{.Name}} func (x {{.Name}}) Or(y {{.Name}}) {{.Name}} - -{{- end}} +{{end}} ` // parseSIMDTypes groups go simd types by their vector sizes, and @@ -289,27 +277,52 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro return err } defer file.Close() + + if err := t.ExecuteTemplate(file, "fileHeader", nil); err != nil { + return fmt.Errorf("failed to execute fileHeader template: %w", err) + } + if _, err := file.WriteString("\n"); err != nil { + return err + } + opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, err := opsByLen(ops) if err != nil { return err } - type templateData struct { - OpsLen1 []Operation - OpsLen2 []Operation - OpsLen3 []Operation - OpsLen4 []Operation - OpsLen1Imm8 []Operation - OpsLen2Imm8 []Operation - OpsLen3Imm8 []Operation - OpsLen4Imm8 []Operation - VectorConversions []simdTypePair - Masks []simdType + opLists := map[string][]Operation{ + "opsLen1": opsLen1, + "opsLen2": opsLen2, + "opsLen3": opsLen3, + "opsLen4": opsLen4, + "opsLen1Imm8": opsLen1Imm8, + "opsLen2Imm8": opsLen2Imm8, + "opsLen3Imm8": opsLen3Imm8, + "opsLen4Imm8": opsLen4Imm8, } - err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)}) - if err != nil { - return fmt.Errorf("failed to execute template : %w", err) + templateNames := []string{"opsLen1", "opsLen2", "opsLen3", "opsLen4", "opsLen1Imm8", "opsLen2Imm8", "opsLen3Imm8", "opsLen4Imm8"} + + for _, name := range templateNames { + for _, op := range opLists[name] { + if err := t.ExecuteTemplate(file, name, op); err != nil { + return fmt.Errorf("failed to execute template %s for op %s: %w", name, op.Go, err) + } + } + } + + vectorConversions := vConvertFromTypeMap(typeMap) + for _, conv := range vectorConversions { + if err := t.ExecuteTemplate(file, "vectorConversion", conv); err != nil { + return fmt.Errorf("failed to execute vectorConversion template: %w", err) + } + } + + masks := masksFromTypeMap(typeMap) + for _, mask := range masks { + if err := t.ExecuteTemplate(file, "mask", mask); err != nil { + return fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err) + } } return nil From 7b7f349a95426492e8f0867d783ac515b7e5afd1 Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 16 Jun 2025 14:05:30 -0400 Subject: [PATCH 086/200] internal/simdgen: generate stubs in alphabetical order Includes tweaking the Operator order so that all the operations come out ordered by method name element type element width element count and inserts break between all the grouped methods. E.g. ``` package simd // Absolute // Absolute computes the absolute value of each element. // // Asm: VPABSB, CPU Feature: AVX func (x Int8x16) Absolute() Int8x16 // Absolute computes the absolute value of each element. // // Asm: VPABSB, CPU Feature: AVX2 func (x Int8x32) Absolute() Int8x32 ... // Absolute computes the absolute value of each element. // // Asm: VPABSQ, CPU Feature: AVX512EVEX func (x Int64x8) Absolute() Int64x8 // Add // Add adds corresponding elements of two vectors. // // Asm: VADDPS, CPU Feature: AVX func (x Float32x4) Add(y Float32x4) Float32x4 ... ``` Change-Id: I97d1c051d1cc9a1b610c907ce13c84907a48f7e2 Reviewed-on: https://go-review.googlesource.com/c/arch/+/681996 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdTypes.go | 51 ++++++++++++------------------- internal/simdgen/gen_utility.go | 41 +++++++++++++++++++++++++ internal/simdgen/godefs.go | 35 ++++++++++++++++++--- 3 files changed, 90 insertions(+), 37 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 13920333..93bd49c3 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -89,56 +89,56 @@ const simdStubsTmpl = `{{define "fileHeader"}}// Code generated by x/arch/intern //go:build goexperiment.simd package simd{{end}} -{{define "opsLen1"}} +{{define "op1"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}{{end}} -{{define "opsLen2"}} +{{define "op2"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}{{end}} -{{define "opsLen3"}} +{{define "op3"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}} -{{define "opsLen4"}} +{{define "op4"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}} -{{define "opsLen1Imm8"}} +{{define "op1Imm8"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}}{{end}} -{{define "opsLen2Imm8"}} +{{define "op2Imm8"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}} -{{define "opsLen3Imm8"}} +{{define "op3Imm8"}} {{if .Documentation}} {{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}} -{{define "opsLen4Imm8"}} +{{define "op4Imm8"}} {{if .Documentation}} {{.Documentation}} //{{end}} @@ -281,33 +281,20 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro if err := t.ExecuteTemplate(file, "fileHeader", nil); err != nil { return fmt.Errorf("failed to execute fileHeader template: %w", err) } - if _, err := file.WriteString("\n"); err != nil { - return err - } - opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, err := opsByLen(ops) - if err != nil { - return err - } + slices.SortFunc(ops, compareOperations) - opLists := map[string][]Operation{ - "opsLen1": opsLen1, - "opsLen2": opsLen2, - "opsLen3": opsLen3, - "opsLen4": opsLen4, - "opsLen1Imm8": opsLen1Imm8, - "opsLen2Imm8": opsLen2Imm8, - "opsLen3Imm8": opsLen3Imm8, - "opsLen4Imm8": opsLen4Imm8, - } - - templateNames := []string{"opsLen1", "opsLen2", "opsLen3", "opsLen4", "opsLen1Imm8", "opsLen2Imm8", "opsLen3Imm8", "opsLen4Imm8"} - - for _, name := range templateNames { - for _, op := range opLists[name] { - if err := t.ExecuteTemplate(file, name, op); err != nil { - return fmt.Errorf("failed to execute template %s for op %s: %w", name, op.Go, err) + for i, op := range ops { + if s, op, err := classifyOp(op); err == nil { + if i == 0 || op.Go != ops[i-1].Go { + fmt.Fprintf(file, "\n\n// %s", op.Go) + } + if err := t.ExecuteTemplate(file, s, op); err != nil { + return fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err) } + + } else { + return fmt.Errorf("failed to classify op %v: %w", op.Go, err) } } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 074be682..cc97c3b3 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -308,6 +308,47 @@ func (op *Operation) sortOperand() { }) } +func classifyOp(op Operation) (string, Operation, error) { + _, shapeOut, _, immType, _, opNoConstMask, gOp, err := op.shape() + if err != nil { + return "", op, err + } + // Put the go ssa type in GoArch field, simd intrinsics need it. + if shapeOut == OneVregOut || shapeOut == OneKmaskOut || shapeOut == OneVregOutAtIn { + opNoConstMask.GoArch = fmt.Sprintf("types.TypeVec%d", *opNoConstMask.Out[0].Bits) + gOp.GoArch = fmt.Sprintf("types.TypeVec%d", *gOp.Out[0].Bits) + } + if immType == VarImm || immType == ConstVarImm { + switch len(opNoConstMask.In) { + case 1: + return "", op, fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op) + case 2: + return "op1Imm8", opNoConstMask, nil + case 3: + return "op2Imm8", opNoConstMask, nil + case 4: + return "op3Imm8", opNoConstMask, nil + case 5: + return "op4Imm8", opNoConstMask, nil + default: + return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op) + } + } else { + switch len(gOp.In) { + case 1: + return "op1", gOp, nil + case 2: + return "op2", gOp, nil + case 3: + return "op3", gOp, nil + case 4: + return "op4", gOp, nil + default: + return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op) + } + } +} + // opsByLen returns the lists of ops stripping the const masks away, aggregated by input length. // Ops with only const imms also has their immediates removed. func opsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8 []Operation, e error) { diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index b6d872be..7701f2fb 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -52,10 +52,18 @@ func compareOperations(x, y Operation) int { if c := strings.Compare(x.GoArch, y.GoArch); c != 0 { return c } - if len(x.In) < len(y.In) { + xIn, yIn := x.In, y.In + + if len(xIn) > len(yIn) && xIn[len(xIn)-1].Class == "mask" { + xIn = xIn[:len(xIn)-1] + } else if len(xIn) < len(yIn) && yIn[len(yIn)-1].Class == "mask" { + yIn = yIn[:len(yIn)-1] + } + + if len(xIn) < len(yIn) { return -1 } - if len(x.In) > len(y.In) { + if len(xIn) > len(yIn) { return 1 } if len(x.Out) < len(y.Out) { @@ -64,15 +72,32 @@ func compareOperations(x, y Operation) int { if len(x.Out) > len(y.Out) { return 1 } - for i := range x.In { - ox, oy := &x.In[i], y.In[i] - if c := compareStringPointers(ox.Go, oy.Go); c != 0 { + for i := range xIn { + ox, oy := &xIn[i], &yIn[i] + if c := compareOperands(ox, oy); c != 0 { return c } } return 0 } +func compareOperands(x, y *Operand) int { + if c := strings.Compare(x.Class, y.Class); c != 0 { + return c + } + if x.Class == "immediate" { + return compareStringPointers(x.ImmOffset, y.ImmOffset) + } else { + if c := strings.Compare(*x.Base, *y.Base); c != 0 { + return c + } + if c := *x.ElemBits - *y.ElemBits; c != 0 { + return c + } + return *x.Bits - *y.Bits + } +} + type Operand struct { Class string // One of "mask", "immediate", "vreg" and "mem" From b5719843eceadfabbd4a61dbdfd5ebaf0cdad4d2 Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 16 Jun 2025 14:15:01 -0400 Subject: [PATCH 087/200] internal/simdgen: add 'go test simd' to etetest.sh The end-to-end test should run this test. Change-Id: I1ea64ef808cb18529b68d126640d4f1583a1eb79 Reviewed-on: https://go-review.googlesource.com/c/arch/+/681997 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/etetest.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/simdgen/etetest.sh b/internal/simdgen/etetest.sh index a88776bf..ea270429 100755 --- a/internal/simdgen/etetest.sh +++ b/internal/simdgen/etetest.sh @@ -19,6 +19,7 @@ go run . -xedPath xeddata -o godefs -goroot ./go-test go.yaml types.yaml categ (cd go-test/src ; GOEXPERIMENT=simd ./make.bash ) (cd go-test/bin; b=`pwd` ; cd ../src/simd/testdata; GOARCH=amd64 $b/go run .) (cd go-test/bin; b=`pwd` ; cd ../src ; +GOEXPERIMENT=simd GOARCH=amd64 $b/go test -v simd GOEXPERIMENT=simd $b/go test go/doc GOEXPERIMENT=simd $b/go test go/build GOEXPERIMENT=simd $b/go test cmd/api -v -check From 9edca9c6938c5693ab5769f37d5c6cd53e01408b Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 16 Jun 2025 19:40:02 +0000 Subject: [PATCH 088/200] internal/simdgen: make stubs file gofmt compliant This CL makes the generated code pass git gofmt check. This CL also make the top method Go name inside a /* */ comment, for easier tracking of generated stubs. Change-Id: I5eb6c98f9275b068205dcdece880cf585ef4ab3a Reviewed-on: https://go-review.googlesource.com/c/arch/+/682035 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_simdTypes.go | 54 ++++++++++++++++--------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 93bd49c3..ee118453 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -88,62 +88,64 @@ const simdStubsTmpl = `{{define "fileHeader"}}// Code generated by x/arch/intern //go:build goexperiment.simd -package simd{{end}} +package simd +{{end}} + {{define "op1"}} -{{if .Documentation}} -{{.Documentation}} +{{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}{{end}} +func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}} +{{end}} {{define "op2"}} -{{if .Documentation}} -{{.Documentation}} +{{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}{{end}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}} +{{end}} {{define "op3"}} -{{if .Documentation}} -{{.Documentation}} +{{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}} +{{end}} {{define "op4"}} -{{if .Documentation}} -{{.Documentation}} +{{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}} +{{end}} {{define "op1Imm8"}} -{{if .Documentation}} -{{.Documentation}} +{{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}}{{end}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}} +{{end}} {{define "op2Imm8"}} -{{if .Documentation}} -{{.Documentation}} +{{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}} +{{end}} {{define "op3Imm8"}} -{{if .Documentation}} -{{.Documentation}} +{{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}} +{{end}} {{define "op4Imm8"}} -{{if .Documentation}} -{{.Documentation}} +{{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{(index .Out 0).Go}}{{end}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{(index .Out 0).Go}} +{{end}} {{define "vectorConversion"}} // {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}} @@ -287,7 +289,7 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro for i, op := range ops { if s, op, err := classifyOp(op); err == nil { if i == 0 || op.Go != ops[i-1].Go { - fmt.Fprintf(file, "\n\n// %s", op.Go) + fmt.Fprintf(file, "\n/* %s */\n", op.Go) } if err := t.ExecuteTemplate(file, s, op); err != nil { return fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err) From 2739fbbb93c46ea92032ddc524c1f0e7a1a01a68 Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 16 Jun 2025 15:17:13 -0400 Subject: [PATCH 089/200] internal/simdgen: generate intrinsics in alphabetical order To do this, rewrote template to remove iteration, changed call to opsByLen to instead use classifyOp, commented classifyOp, deleted dead code. The alphabetized output appears in CL 682295 Change-Id: I30ef0fe1c6f3b0cdc2003b7da5ee794986272205 Reviewed-on: https://go-review.googlesource.com/c/arch/+/682036 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdIntrinsics.go | 128 ++++++++++++++----------- internal/simdgen/gen_utility.go | 75 +-------------- 2 files changed, 73 insertions(+), 130 deletions(-) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 02eb5a63..091320a5 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -6,9 +6,10 @@ package main import ( "fmt" + "slices" ) -const simdIntrinsicsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +const simdIntrinsicsTmpl = `{{define "header"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package ssagen import ( @@ -21,48 +22,40 @@ import ( const simdPackage = "` + simdPackage + `" func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { -{{- range .OpsLen1}} - addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) -{{- end}} -{{- range .OpsLen2}} - addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) -{{- end}} -{{- range .OpsLen3}} - addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) -{{- end}} -{{- range .OpsLen4}} - addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) -{{- end}} -{{- range .OpsLen1Imm8}} - addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) -{{- end}} -{{- range .OpsLen2Imm8}} - addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) -{{- end}} -{{- range .OpsLen3Imm8}} - addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) -{{- end}} -{{- range .OpsLen4Imm8}} - addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) -{{- end}} - -{{- range .VectorConversions }} - addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) -{{- end}} - -{{- range $size, $ts := .TypeMap }} -{{- range $t := $ts }} - addF(simdPackage, "Load{{$t.Name}}", simdLoad(), sys.AMD64) - addF(simdPackage, "{{$t.Name}}.Store", simdStore(), sys.AMD64) -{{- end}} -{{- end}} -{{- range .Masks }} - addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) +{{end}} + +{{define "op1"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{end}} +{{define "op2"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{end}} +{{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{end}} +{{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{end}} +{{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{end}} +{{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{end}} +{{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{end}} +{{define "op4Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{end}} + +{{define "vectorConversion"}} addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) +{{end}} + +{{define "typeMap"}} addF(simdPackage, "Load{{.Name}}", simdLoad(), sys.AMD64) + addF(simdPackage, "{{.Name}}.Store", simdStore(), sys.AMD64) +{{end}} + +{{define "mask"}} addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) -{{- end}} -} +{{end}} + +{{define "footer"}}} +{{end}} ` // writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go @@ -73,27 +66,46 @@ func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap) return err } defer file.Close() - opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, err := opsByLen(ops) - if err != nil { - return err + + if err := t.ExecuteTemplate(file, "header", nil); err != nil { + return fmt.Errorf("failed to execute header template: %w", err) } - type templateData struct { - OpsLen1 []Operation - OpsLen2 []Operation - OpsLen3 []Operation - OpsLen4 []Operation - OpsLen1Imm8 []Operation - OpsLen2Imm8 []Operation - OpsLen3Imm8 []Operation - OpsLen4Imm8 []Operation - TypeMap simdTypeMap - VectorConversions []simdTypePair - Masks []simdType + slices.SortFunc(ops, compareOperations) + + for _, op := range ops { + if s, op, err := classifyOp(op); err == nil { + if err := t.ExecuteTemplate(file, s, op); err != nil { + return fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err) + } + + } else { + return fmt.Errorf("failed to classify op %v: %w", op.Go, err) + } } - err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, typeMap, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)}) - if err != nil { - return fmt.Errorf("failed to execute template: %w", err) + + for _, conv := range vConvertFromTypeMap(typeMap) { + if err := t.ExecuteTemplate(file, "vectorConversion", conv); err != nil { + return fmt.Errorf("failed to execute vectorConversion template: %w", err) + } + } + + for _, ts := range typeMap { + for _, typ := range ts { + if err := t.ExecuteTemplate(file, "typeMap", typ); err != nil { + return fmt.Errorf("failed to execute typeMap template: %w", err) + } + } + } + + for _, mask := range masksFromTypeMap(typeMap) { + if err := t.ExecuteTemplate(file, "mask", mask); err != nil { + return fmt.Errorf("failed to execute mask template: %w", err) + } + } + + if err := t.ExecuteTemplate(file, "footer", nil); err != nil { + return fmt.Errorf("failed to execute footer template: %w", err) } return nil diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index cc97c3b3..6ec1b6f6 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -308,6 +308,9 @@ func (op *Operation) sortOperand() { }) } +// classifyOp returns a classification string, modified operation, and perhaps error based +// on the stub and intrinsic shape for the operation. +// The classification string is in the regular expression set "op[1234](Imm8)?" func classifyOp(op Operation) (string, Operation, error) { _, shapeOut, _, immType, _, opNoConstMask, gOp, err := op.shape() if err != nil { @@ -349,78 +352,6 @@ func classifyOp(op Operation) (string, Operation, error) { } } -// opsByLen returns the lists of ops stripping the const masks away, aggregated by input length. -// Ops with only const imms also has their immediates removed. -func opsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8 []Operation, e error) { - opsLen1 = make([]Operation, 0) - opsLen2 = make([]Operation, 0) - opsLen3 = make([]Operation, 0) - opsLen4 = make([]Operation, 0) - opsLen1Imm8 = make([]Operation, 0) - opsLen2Imm8 = make([]Operation, 0) - opsLen3Imm8 = make([]Operation, 0) - opsLen4Imm8 = make([]Operation, 0) - for _, op := range ops { - _, shapeOut, _, immType, _, opNoConstMask, gOp, err := op.shape() - if err != nil { - e = err - return - } - // Put the go ssa type in GoArch field, simd intrinsics need it. - if shapeOut == OneVregOut || shapeOut == OneKmaskOut || shapeOut == OneVregOutAtIn { - opNoConstMask.GoArch = fmt.Sprintf("types.TypeVec%d", *opNoConstMask.Out[0].Bits) - gOp.GoArch = fmt.Sprintf("types.TypeVec%d", *gOp.Out[0].Bits) - } - if immType == VarImm || immType == ConstVarImm { - switch len(opNoConstMask.In) { - case 1: - e = fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op) - return - case 2: - opsLen1Imm8 = append(opsLen1Imm8, opNoConstMask) - case 3: - opsLen2Imm8 = append(opsLen2Imm8, opNoConstMask) - case 4: - opsLen3Imm8 = append(opsLen3Imm8, opNoConstMask) - case 5: - opsLen4Imm8 = append(opsLen4Imm8, opNoConstMask) - default: - e = fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op) - } - } else { - switch len(gOp.In) { - case 1: - opsLen1 = append(opsLen1, gOp) - case 2: - opsLen2 = append(opsLen2, gOp) - case 3: - opsLen3 = append(opsLen3, gOp) - case 4: - opsLen4 = append(opsLen4, gOp) - default: - e = fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op) - } - } - } - sortKey := func(op *Operation) string { - return *op.In[0].Go + op.Go - } - sortBySortKey := func(ops []Operation) { - sort.Slice(ops, func(i, j int) bool { - return sortKey(&ops[i]) < sortKey(&ops[j]) - }) - } - sortBySortKey(opsLen1) - sortBySortKey(opsLen2) - sortBySortKey(opsLen3) - sortBySortKey(opsLen4) - sortBySortKey(opsLen1Imm8) - sortBySortKey(opsLen2Imm8) - sortBySortKey(opsLen3Imm8) - sortBySortKey(opsLen4Imm8) - return -} - // dedup is deduping operations in the full structure level. func dedup(ops []Operation) (deduped []Operation) { for _, op := range ops { From 2f50423530578e96e7e5a08fdb622539b8906dbd Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 17 Jun 2025 11:40:52 -0400 Subject: [PATCH 090/200] internal/simdgen: remove map iteration; tweak type comparison This makes the type conversion part of the simd intrinsics have a repeatable order, and aligns the type comparison order with the one chosen for methods. Type order is element base type, element type width, vector width Output CL (dev.simd) is CL 682355 Change-Id: If483f86bec1c1e24689913d89b58acc07c18477f Reviewed-on: https://go-review.googlesource.com/c/arch/+/682316 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdIntrinsics.go | 8 +++----- internal/simdgen/gen_simdTypes.go | 28 +++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 091320a5..b34ffeb8 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -90,11 +90,9 @@ func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap) } } - for _, ts := range typeMap { - for _, typ := range ts { - if err := t.ExecuteTemplate(file, "typeMap", typ); err != nil { - return fmt.Errorf("failed to execute typeMap template: %w", err) - } + for _, typ := range typesFromTypeMap(typeMap) { + if err := t.ExecuteTemplate(file, "typeMap", typ); err != nil { + return fmt.Errorf("failed to execute typeMap template: %w", err) } } diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index ee118453..3120eb85 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -23,11 +23,22 @@ type simdType struct { } func compareSimdTypes(x, y simdType) int { - c := strings.Compare(x.Name, y.Name) - if c != 0 { + // "mask" then "vreg" + if c := strings.Compare(x.Type, y.Type); c != 0 { + return c + } + // want "flo" < "int" < "uin" (and then 8 < 16 < 32 < 64), + // not "int16" < "int32" < "int64" < "int8") + // so limit comparison to first 3 bytes in string. + if c := strings.Compare(x.Base[:3], y.Base[:3]); c != 0 { return c } - return strings.Compare(x.Type, y.Type) + // base type size, 8 < 16 < 32 < 64 + if c := x.Size/x.Lanes - y.Size/y.Lanes; c != 0 { + return c + } + // vector size last + return x.Size - y.Size } type simdTypeMap map[int][]simdType @@ -238,6 +249,17 @@ func masksFromTypeMap(typeMap simdTypeMap) []simdType { return m } +func typesFromTypeMap(typeMap simdTypeMap) []simdType { + m := []simdType{} + for _, ts := range typeMap { + for _, tsrc := range ts { + m = append(m, tsrc) + } + } + slices.SortFunc(m, compareSimdTypes) + return m +} + // writeSIMDTypes generates the simd vector type and writes it to types_amd64.go // within the specified directory. func writeSIMDTypes(directory string, typeMap simdTypeMap) error { From 6a5ee49ac3818cb695f36315ce3be4d74f49b48e Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 16 Jun 2025 22:53:58 +0000 Subject: [PATCH 091/200] internal/simdgen: fix typo in PairDotProdAccumulate. Change-Id: Ieb593812e2c53c3c22e76cc972f81a9a199abc90 Reviewed-on: https://go-review.googlesource.com/c/arch/+/682135 Auto-Submit: Junyang Shao LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_utility.go | 6 ++++++ internal/simdgen/go.yaml | 12 ++++++------ internal/simdgen/ops/MLOps/go.yaml | 12 ++++++------ 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 6ec1b6f6..848570ca 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -650,6 +650,12 @@ func (op Operand) String() string { sb.WriteString(" OverwriteBase: \n") } + if op.OverwriteElementBits != nil { + sb.WriteString(fmt.Sprintf(" OverwriteElementBits: %d\n", *op.OverwriteElementBits)) + } else { + sb.WriteString(" OverwriteElementBits: \n") + } + sb.WriteString(" }\n") return sb.String() } diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index f7e771c6..401a90fb 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -527,10 +527,10 @@ - class: mask - go: $t1 base: uint - overwriteElemBits: 8 + overwriteElementBits: 8 - go: $t2 base: int - overwriteElemBits: 8 + overwriteElementBits: 8 out: - *int3 - go: DotProdBroadcast @@ -555,11 +555,11 @@ - &qdpa_src1 go: $t_src1 base: uint - overwriteElemBits: 8 + overwriteElementBits: 8 - &qdpa_src2 go: $t_src2 base: int - overwriteElemBits: 8 + overwriteElementBits: 8 out: - *qdpa_acc - go: MaskedUnsignedSignedQuadDotProdAccumulate @@ -598,11 +598,11 @@ - &pdpa_src1 go: $t_src1 base: int - overwriteElemBits: 16 + overwriteElementBits: 16 - &pdpa_src2 go: $t_src2 base: int - overwriteElemBits: 16 + overwriteElementBits: 16 out: - *pdpa_acc - go: MaskedPairDotProdAccumulate diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml index e0cefda2..b9add167 100644 --- a/internal/simdgen/ops/MLOps/go.yaml +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -35,10 +35,10 @@ - class: mask - go: $t1 base: uint - overwriteElemBits: 8 + overwriteElementBits: 8 - go: $t2 base: int - overwriteElemBits: 8 + overwriteElementBits: 8 out: - *int3 - go: DotProdBroadcast @@ -63,11 +63,11 @@ - &qdpa_src1 go: $t_src1 base: uint - overwriteElemBits: 8 + overwriteElementBits: 8 - &qdpa_src2 go: $t_src2 base: int - overwriteElemBits: 8 + overwriteElementBits: 8 out: - *qdpa_acc - go: MaskedUnsignedSignedQuadDotProdAccumulate @@ -106,11 +106,11 @@ - &pdpa_src1 go: $t_src1 base: int - overwriteElemBits: 16 + overwriteElementBits: 16 - &pdpa_src2 go: $t_src2 base: int - overwriteElemBits: 16 + overwriteElementBits: 16 out: - *pdpa_acc - go: MaskedPairDotProdAccumulate From 2c11faf3e697e1f2f777e394813d1b4159c0a0d8 Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 16 Jun 2025 16:03:03 -0400 Subject: [PATCH 092/200] internal/simdgen: separate template creation from file opening Change-Id: I158b1d6f3c6a010cbe2778447cf3d818c5014d51 Reviewed-on: https://go-review.googlesource.com/c/arch/+/682115 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdGenericOps.go | 3 ++- internal/simdgen/gen_simdIntrinsics.go | 3 ++- internal/simdgen/gen_simdMachineOps.go | 3 ++- internal/simdgen/gen_simdTypes.go | 6 ++++-- internal/simdgen/gen_simdssa.go | 2 +- internal/simdgen/gen_utility.go | 21 ++++++++++++--------- internal/simdgen/godefs.go | 1 + 7 files changed, 24 insertions(+), 15 deletions(-) diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index bdda8b80..114888e7 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -27,7 +27,8 @@ func simdGenericOps() []opData { // writeSIMDGenericOps generates the generic ops and writes it to simdAMD64ops.go // within the specified directory. func writeSIMDGenericOps(directory string, ops []Operation) error { - file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go", simdGenericOpsTmpl) + t := templateOf(simdGenericOpsTmpl, "simdgenericOps") + file, err := createPath(directory, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go") if err != nil { return err } diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index b34ffeb8..92f5b6df 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -61,7 +61,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . // writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go // within the specified directory. func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap) error { - file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssagen/simdintrinsics.go", simdIntrinsicsTmpl) + t := templateOf(simdIntrinsicsTmpl, "simdintrinsics") + file, err := createPath(directory, "src/cmd/compile/internal/ssagen/simdintrinsics.go") if err != nil { return err } diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index f09b5568..c7c47d94 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -27,7 +27,8 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 // writeSIMDMachineOps generates the machine ops and writes it to simdAMD64ops.go // within the specified directory. func writeSIMDMachineOps(directory string, ops []Operation) error { - file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go", simdMachineOpsTmpl) + t := templateOf(simdMachineOpsTmpl, "simdAMD64Ops") + file, err := createPath(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go") if err != nil { return err } diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 3120eb85..00de88d0 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -263,7 +263,8 @@ func typesFromTypeMap(typeMap simdTypeMap) []simdType { // writeSIMDTypes generates the simd vector type and writes it to types_amd64.go // within the specified directory. func writeSIMDTypes(directory string, typeMap simdTypeMap) error { - file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/types_amd64.go", simdTypesTemplates) + t := templateOf(simdTypesTemplates, "types_amd64") + file, err := createPath(directory, "src/"+simdPackage+"/types_amd64.go") if err != nil { return err } @@ -296,7 +297,8 @@ func writeSIMDTypes(directory string, typeMap simdTypeMap) error { // writeSIMDStubs generates the simd vector intrinsic stubs and writes it to stubs_amd64.go // within the specified directory. func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) error { - file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/stubs_amd64.go", simdStubsTmpl) + t := templateOf(simdStubsTmpl, "simdStubs") + file, err := createPath(directory, "src/"+simdPackage+"/stubs_amd64.go") if err != nil { return err } diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index 14f97e60..53ded489 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -121,7 +121,7 @@ func writeSIMDSSA(directory string, ops []Operation) error { return fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen) } - file, _, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/amd64/simdssa.go", "") + file, err := createPath(directory, "src/cmd/compile/internal/amd64/simdssa.go") if err != nil { return err } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 848570ca..daa0db75 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -17,23 +17,26 @@ import ( "unicode" ) -func openFileAndPrepareTemplate(goroot string, file string, temp string) (*os.File, *template.Template, error) { +func templateOf(temp, name string) *template.Template { + t, err := template.New(name).Parse(temp) + if err != nil { + panic(fmt.Errorf("failed to parse template %s: %w", name, err)) + } + return t +} + +func createPath(goroot string, file string) (*os.File, error) { fp := filepath.Join(goroot, file) dir := filepath.Dir(fp) err := os.MkdirAll(dir, 0755) if err != nil { - return nil, nil, fmt.Errorf("failed to create directory %s: %w", dir, err) + return nil, fmt.Errorf("failed to create directory %s: %w", dir, err) } f, err := os.Create(fp) if err != nil { - return nil, nil, fmt.Errorf("failed to create file %s: %w", fp, err) - } - t, err := template.New(fp).Parse(temp) - if err != nil { - f.Close() - return nil, nil, fmt.Errorf("failed to parse template: %w", err) + return nil, fmt.Errorf("failed to create file %s: %w", fp, err) } - return f, t, nil + return f, nil } const ( diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 7701f2fb..009520df 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -174,6 +174,7 @@ func writeGoDefs(path string, cl unify.Closure) error { } log.Printf("dedup len: %d\n", len(deduped)) typeMap := parseSIMDTypes(deduped) + if err = writeSIMDTypes(path, typeMap); err != nil { return err } From 9884aef72aede3ef3690240d2427e94ac7dfad8c Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 16 Jun 2025 18:07:19 -0400 Subject: [PATCH 093/200] internal/simdgen: use go/format on generated Go also refactor generators to return a *bytes.Buffer, and turn error returns into panics because it is easier to read. There are no changes to the output; the parent generates correctly formatted Go. Change-Id: I9ac3ed25c1e868f900fb6a6a9b80e8e33b5fe5cd Reviewed-on: https://go-review.googlesource.com/c/arch/+/682116 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdGenericOps.go | 18 ++++----- internal/simdgen/gen_simdIntrinsics.go | 37 ++++++++--------- internal/simdgen/gen_simdMachineOps.go | 24 +++++------ internal/simdgen/gen_simdTypes.go | 56 +++++++++++--------------- internal/simdgen/gen_simdrules.go | 34 ++++++---------- internal/simdgen/gen_simdssa.go | 37 ++++++++--------- internal/simdgen/gen_utility.go | 20 +++++++++ internal/simdgen/godefs.go | 29 ++++--------- 8 files changed, 117 insertions(+), 138 deletions(-) diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index 114888e7..6f8b16b7 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -5,6 +5,7 @@ package main import ( + "bytes" "fmt" "sort" ) @@ -26,13 +27,10 @@ func simdGenericOps() []opData { // writeSIMDGenericOps generates the generic ops and writes it to simdAMD64ops.go // within the specified directory. -func writeSIMDGenericOps(directory string, ops []Operation) error { +func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { t := templateOf(simdGenericOpsTmpl, "simdgenericOps") - file, err := createPath(directory, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go") - if err != nil { - return err - } - defer file.Close() + buffer := new(bytes.Buffer) + type genericOpsData struct { sortKey string OpName string @@ -47,7 +45,7 @@ func writeSIMDGenericOps(directory string, ops []Operation) error { for _, op := range ops { _, _, _, immType, _, _, gOp, err := op.shape() if err != nil { - return err + panic(err) } genericNames := gOp.Go + *gOp.In[0].Go gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative} @@ -64,10 +62,10 @@ func writeSIMDGenericOps(directory string, ops []Operation) error { return opsData.OpsImm[i].sortKey < opsData.OpsImm[j].sortKey }) - err = t.Execute(file, opsData) + err := t.Execute(buffer, opsData) if err != nil { - return fmt.Errorf("failed to execute template: %w", err) + panic(fmt.Errorf("failed to execute template: %w", err)) } - return nil + return buffer } diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 92f5b6df..3910e028 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -5,6 +5,7 @@ package main import ( + "bytes" "fmt" "slices" ) @@ -60,52 +61,48 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . // writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go // within the specified directory. -func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap) error { +func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { t := templateOf(simdIntrinsicsTmpl, "simdintrinsics") - file, err := createPath(directory, "src/cmd/compile/internal/ssagen/simdintrinsics.go") - if err != nil { - return err - } - defer file.Close() + buffer := new(bytes.Buffer) - if err := t.ExecuteTemplate(file, "header", nil); err != nil { - return fmt.Errorf("failed to execute header template: %w", err) + if err := t.ExecuteTemplate(buffer, "header", nil); err != nil { + panic(fmt.Errorf("failed to execute header template: %w", err)) } slices.SortFunc(ops, compareOperations) for _, op := range ops { if s, op, err := classifyOp(op); err == nil { - if err := t.ExecuteTemplate(file, s, op); err != nil { - return fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err) + if err := t.ExecuteTemplate(buffer, s, op); err != nil { + panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err)) } } else { - return fmt.Errorf("failed to classify op %v: %w", op.Go, err) + panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err)) } } for _, conv := range vConvertFromTypeMap(typeMap) { - if err := t.ExecuteTemplate(file, "vectorConversion", conv); err != nil { - return fmt.Errorf("failed to execute vectorConversion template: %w", err) + if err := t.ExecuteTemplate(buffer, "vectorConversion", conv); err != nil { + panic(fmt.Errorf("failed to execute vectorConversion template: %w", err)) } } for _, typ := range typesFromTypeMap(typeMap) { - if err := t.ExecuteTemplate(file, "typeMap", typ); err != nil { - return fmt.Errorf("failed to execute typeMap template: %w", err) + if err := t.ExecuteTemplate(buffer, "typeMap", typ); err != nil { + panic(fmt.Errorf("failed to execute typeMap template: %w", err)) } } for _, mask := range masksFromTypeMap(typeMap) { - if err := t.ExecuteTemplate(file, "mask", mask); err != nil { - return fmt.Errorf("failed to execute mask template: %w", err) + if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil { + panic(fmt.Errorf("failed to execute mask template: %w", err)) } } - if err := t.ExecuteTemplate(file, "footer", nil); err != nil { - return fmt.Errorf("failed to execute footer template: %w", err) + if err := t.ExecuteTemplate(buffer, "footer", nil); err != nil { + panic(fmt.Errorf("failed to execute footer template: %w", err)) } - return nil + return buffer } diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index c7c47d94..43ede6ec 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -5,6 +5,7 @@ package main import ( + "bytes" "fmt" "sort" ) @@ -26,13 +27,10 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 // writeSIMDMachineOps generates the machine ops and writes it to simdAMD64ops.go // within the specified directory. -func writeSIMDMachineOps(directory string, ops []Operation) error { +func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { t := templateOf(simdMachineOpsTmpl, "simdAMD64Ops") - file, err := createPath(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go") - if err != nil { - return err - } - defer file.Close() + buffer := new(bytes.Buffer) + type opData struct { sortKey string OpName string @@ -54,7 +52,7 @@ func writeSIMDMachineOps(directory string, ops []Operation) error { for _, op := range ops { shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape() if err != nil { - return err + panic(err) } asm := gOp.Asm if maskType == OneMask { @@ -69,10 +67,10 @@ func writeSIMDMachineOps(directory string, ops []Operation) error { seen[asm] = struct{}{} regInfo, err := op.regShape() if err != nil { - return err + panic(err) } if _, ok := regInfoSet[regInfo]; !ok { - return fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s", regInfo) + panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s", regInfo)) } var outType string if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil { @@ -81,7 +79,7 @@ func writeSIMDMachineOps(directory string, ops []Operation) error { } else if shapeOut == OneKmaskOut { outType = "Mask" } else { - return fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut) + panic(fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut)) } resultInArg0 := "false" if shapeOut == OneVregOutAtIn { @@ -99,10 +97,10 @@ func writeSIMDMachineOps(directory string, ops []Operation) error { sort.Slice(opsDataImm, func(i, j int) bool { return opsDataImm[i].sortKey < opsDataImm[j].sortKey }) - err = t.Execute(file, machineOpsData{opsData, opsDataImm}) + err := t.Execute(buffer, machineOpsData{opsData, opsDataImm}) if err != nil { - return fmt.Errorf("failed to execute template: %w", err) + panic(fmt.Errorf("failed to execute template: %w", err)) } - return nil + return buffer } diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 00de88d0..4966dae7 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -5,6 +5,7 @@ package main import ( + "bytes" "fmt" "slices" "sort" @@ -260,18 +261,13 @@ func typesFromTypeMap(typeMap simdTypeMap) []simdType { return m } -// writeSIMDTypes generates the simd vector type and writes it to types_amd64.go -// within the specified directory. -func writeSIMDTypes(directory string, typeMap simdTypeMap) error { +// writeSIMDTypes generates the simd vector types into a bytes.Buffer +func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { t := templateOf(simdTypesTemplates, "types_amd64") - file, err := createPath(directory, "src/"+simdPackage+"/types_amd64.go") - if err != nil { - return err - } - defer file.Close() + buffer := new(bytes.Buffer) - if err := t.ExecuteTemplate(file, "fileHeader", nil); err != nil { - return fmt.Errorf("failed to execute fileHeader template: %w", err) + if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil { + panic(fmt.Errorf("failed to execute fileHeader template: %w", err)) } sizes := make([]int, 0, len(typeMap)) @@ -281,31 +277,27 @@ func writeSIMDTypes(directory string, typeMap simdTypeMap) error { sort.Ints(sizes) for _, size := range sizes { - if err := t.ExecuteTemplate(file, "sizeTmpl", size); err != nil { - return fmt.Errorf("failed to execute size template for size %d: %w", size, err) + if err := t.ExecuteTemplate(buffer, "sizeTmpl", size); err != nil { + panic(fmt.Errorf("failed to execute size template for size %d: %w", size, err)) } for _, typeDef := range typeMap[size] { - if err := t.ExecuteTemplate(file, "typeTmpl", typeDef); err != nil { - return fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err) + if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil { + panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err)) } } } - return nil + return buffer } // writeSIMDStubs generates the simd vector intrinsic stubs and writes it to stubs_amd64.go // within the specified directory. -func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) error { +func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { t := templateOf(simdStubsTmpl, "simdStubs") - file, err := createPath(directory, "src/"+simdPackage+"/stubs_amd64.go") - if err != nil { - return err - } - defer file.Close() + buffer := new(bytes.Buffer) - if err := t.ExecuteTemplate(file, "fileHeader", nil); err != nil { - return fmt.Errorf("failed to execute fileHeader template: %w", err) + if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil { + panic(fmt.Errorf("failed to execute fileHeader template: %w", err)) } slices.SortFunc(ops, compareOperations) @@ -313,30 +305,30 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro for i, op := range ops { if s, op, err := classifyOp(op); err == nil { if i == 0 || op.Go != ops[i-1].Go { - fmt.Fprintf(file, "\n/* %s */\n", op.Go) + fmt.Fprintf(buffer, "\n/* %s */\n", op.Go) } - if err := t.ExecuteTemplate(file, s, op); err != nil { - return fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err) + if err := t.ExecuteTemplate(buffer, s, op); err != nil { + panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err)) } } else { - return fmt.Errorf("failed to classify op %v: %w", op.Go, err) + panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err)) } } vectorConversions := vConvertFromTypeMap(typeMap) for _, conv := range vectorConversions { - if err := t.ExecuteTemplate(file, "vectorConversion", conv); err != nil { - return fmt.Errorf("failed to execute vectorConversion template: %w", err) + if err := t.ExecuteTemplate(buffer, "vectorConversion", conv); err != nil { + panic(fmt.Errorf("failed to execute vectorConversion template: %w", err)) } } masks := masksFromTypeMap(typeMap) for _, mask := range masks { - if err := t.ExecuteTemplate(file, "mask", mask); err != nil { - return fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err) + if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil { + panic(fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err)) } } - return nil + return buffer } diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index 5f51c6f8..651ae382 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -5,10 +5,9 @@ package main import ( + "bytes" "fmt" "io" - "os" - "path/filepath" "slices" "strings" "text/template" @@ -54,23 +53,14 @@ func compareTplRuleData(x, y tplRuleData) int { // writeSIMDRules generates the lowering and rewrite rules for ssa and writes it to simdAMD64.rules // within the specified directory. -func writeSIMDRules(directory string, ops []Operation) error { - - outPath := filepath.Join(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules") - if err := os.MkdirAll(filepath.Dir(outPath), 0755); err != nil { - return fmt.Errorf("failed to create directory for %s: %w", outPath, err) - } - file, err := os.Create(outPath) - if err != nil { - return fmt.Errorf("failed to create %s: %w", outPath, err) - } - defer file.Close() +func writeSIMDRules(ops []Operation) *bytes.Buffer { + buffer := new(bytes.Buffer) header := `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. ` - if _, err := io.WriteString(file, header); err != nil { - return fmt.Errorf("failed to write header to %s: %w", outPath, err) + if _, err := io.WriteString(buffer, header); err != nil { + panic(fmt.Errorf("failed to write header: %w", err)) } var allData []tplRuleData @@ -78,7 +68,7 @@ func writeSIMDRules(directory string, ops []Operation) error { for _, opr := range ops { opInShape, opOutShape, maskType, immType, _, _, gOp, err := opr.shape() if err != nil { - return err + panic(err) } vregInCnt := len(gOp.In) asm := gOp.Asm @@ -103,7 +93,7 @@ func writeSIMDRules(directory string, ops []Operation) error { data.Args = "x y z" data.ArgsOut = data.Args } else { - return fmt.Errorf("simdgen does not support more than 3 vreg in inputs") + panic(fmt.Errorf("simdgen does not support more than 3 vreg in inputs")) } if immType == ConstImm { data.ArgsOut = fmt.Sprintf("[%s] %s", *opr.In[0].Const, data.ArgsOut) @@ -136,7 +126,7 @@ func writeSIMDRules(directory string, ops []Operation) error { // Mask is at the end. data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes) case PureKmaskIn: - return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations") + panic(fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")) } } else { // OneKmaskOut case @@ -155,7 +145,7 @@ func writeSIMDRules(directory string, ops []Operation) error { rearIdx := len(gOp.In) - 1 data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes) case PureKmaskIn: - return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations") + panic(fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")) } } @@ -166,10 +156,10 @@ func writeSIMDRules(directory string, ops []Operation) error { slices.SortFunc(allData, compareTplRuleData) for _, data := range allData { - if err := ruleTemplates.ExecuteTemplate(file, data.tplName, data); err != nil { - return fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.GoOp+data.GoType, err) + if err := ruleTemplates.ExecuteTemplate(buffer, data.tplName, data); err != nil { + panic(fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.GoOp+data.GoType, err)) } } - return nil + return buffer } diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index 53ded489..e606b69c 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -5,6 +5,7 @@ package main import ( + "bytes" "fmt" "strings" "text/template" @@ -56,7 +57,7 @@ type tplSSAData struct { // writeSIMDSSA generates the ssa to prog lowering codes and writes it to simdssa.go // within the specified directory. -func writeSIMDSSA(directory string, ops []Operation) error { +func writeSIMDSSA(ops []Operation) *bytes.Buffer { var ZeroingMask []string regInfoKeys := []string{ "fp11", @@ -86,7 +87,7 @@ func writeSIMDSSA(directory string, ops []Operation) error { asm := op.Asm shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape() if err != nil { - return err + panic(err) } if maskType == 2 { asm += "Masked" @@ -104,7 +105,7 @@ func writeSIMDSSA(directory string, ops []Operation) error { } regShape, err := op.regShape() if err != nil { - return err + panic(err) } if shapeOut == OneVregOutAtIn { regShape += "ResultInArg0" @@ -118,17 +119,13 @@ func writeSIMDSSA(directory string, ops []Operation) error { regInfoSet[regShape] = append(regInfoSet[regShape], caseStr) } if len(allUnseen) != 0 { - return fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen) + panic(fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen)) } - file, err := createPath(directory, "src/cmd/compile/internal/amd64/simdssa.go") - if err != nil { - return err - } - defer file.Close() + buffer := new(bytes.Buffer) - if err := ssaTemplates.ExecuteTemplate(file, "header", nil); err != nil { - return fmt.Errorf("failed to execute header template: %w", err) + if err := ssaTemplates.ExecuteTemplate(buffer, "header", nil); err != nil { + panic(fmt.Errorf("failed to execute header template: %w", err)) } for _, regShape := range regInfoKeys { @@ -141,24 +138,24 @@ func writeSIMDSSA(directory string, ops []Operation) error { Cases: strings.Join(cases, ",\n\t\t"), Helper: "simd" + capitalizeFirst(regShape), } - if err := ssaTemplates.ExecuteTemplate(file, "case", data); err != nil { - return fmt.Errorf("failed to execute case template for %s: %w", regShape, err) + if err := ssaTemplates.ExecuteTemplate(buffer, "case", data); err != nil { + panic(fmt.Errorf("failed to execute case template for %s: %w", regShape, err)) } } - if err := ssaTemplates.ExecuteTemplate(file, "footer", nil); err != nil { - return fmt.Errorf("failed to execute footer template: %w", err) + if err := ssaTemplates.ExecuteTemplate(buffer, "footer", nil); err != nil { + panic(fmt.Errorf("failed to execute footer template: %w", err)) } if len(ZeroingMask) != 0 { - if err := ssaTemplates.ExecuteTemplate(file, "zeroing", strings.Join(ZeroingMask, ",\n\t\t")); err != nil { - return fmt.Errorf("failed to execute footer template: %w", err) + if err := ssaTemplates.ExecuteTemplate(buffer, "zeroing", strings.Join(ZeroingMask, ",\n\t\t")); err != nil { + panic(fmt.Errorf("failed to execute footer template: %w", err)) } } - if err := ssaTemplates.ExecuteTemplate(file, "ending", nil); err != nil { - return fmt.Errorf("failed to execute footer template: %w", err) + if err := ssaTemplates.ExecuteTemplate(buffer, "ending", nil); err != nil { + panic(fmt.Errorf("failed to execute footer template: %w", err)) } - return nil + return buffer } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index daa0db75..6cdc54ea 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -5,7 +5,9 @@ package main import ( + "bytes" "fmt" + "go/format" "log" "os" "path/filepath" @@ -39,6 +41,24 @@ func createPath(goroot string, file string) (*os.File, error) { return f, nil } +func formatWriteAndClose(out *bytes.Buffer, goroot string, file string) { + b, err := format.Source(out.Bytes()) + if err != nil { + panic(err) + } else { + writeAndClose(b, goroot, file) + } +} + +func writeAndClose(b []byte, goroot string, file string) { + ofile, err := createPath(goroot, file) + if err != nil { + panic(err) + } + ofile.Write(b) + ofile.Close() +} + const ( InvalidIn int = iota PureVregIn diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 009520df..64c2e6e9 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -175,26 +175,13 @@ func writeGoDefs(path string, cl unify.Closure) error { log.Printf("dedup len: %d\n", len(deduped)) typeMap := parseSIMDTypes(deduped) - if err = writeSIMDTypes(path, typeMap); err != nil { - return err - } - if err = writeSIMDStubs(path, deduped, typeMap); err != nil { - return err - } - if err = writeSIMDIntrinsics(path, deduped, typeMap); err != nil { - return err - } - if err = writeSIMDGenericOps(path, deduped); err != nil { - return err - } - if err = writeSIMDMachineOps(path, deduped); err != nil { - return err - } - if err = writeSIMDRules(path, deduped); err != nil { - return err - } - if err = writeSIMDSSA(path, deduped); err != nil { - return err - } + formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go") + formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/stubs_amd64.go") + formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go") + formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go") + formatWriteAndClose(writeSIMDMachineOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go") + formatWriteAndClose(writeSIMDSSA(deduped), path, "src/cmd/compile/internal/amd64/simdssa.go") + writeAndClose(writeSIMDRules(deduped).Bytes(), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules") + return nil } From 3373f86e010c24aa2ee246b58bfae78cc4d2e0d9 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 17 Jun 2025 19:39:58 +0000 Subject: [PATCH 094/200] internal/simdgen: remove redundant shapes of fused mul/add/sub. The users do not need to see every shape of these VFM* instructions at the API level. This CL keeps only one shape(213) of them; The rest will be generated by lowering rules as an optimization. This CL generates CL 682435. Change-Id: I59ea9c568b0c00c8af6757b9c74f779abf397e3c Reviewed-on: https://go-review.googlesource.com/c/arch/+/682436 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 102 +------------- internal/simdgen/go.yaml | 147 +-------------------- internal/simdgen/ops/MLOps/categories.yaml | 102 +------------- internal/simdgen/ops/MLOps/go.yaml | 147 +-------------------- 4 files changed, 24 insertions(+), 474 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 8b325fa4..c33a62e6 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -573,114 +573,24 @@ extension: "AVX.*" documentation: !string |- // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -- go: MaskedFusedMultiplyAdd132 +- go: MaskedFusedMultiplyAdd masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplyAdd132 performs `(v1 * v3) + v2`. -- go: MaskedFusedMultiplyAdd213 + // MaskedFusedMultiplyAdd performs `(v1 * v2) + v3`. +- go: MaskedFusedMultiplyAddSub masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplyAdd213 performs `(v2 * v1) + v3`. -- go: MaskedFusedMultiplyAdd231 + // MaskedFusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. +- go: MaskedFusedMultiplySubAdd masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplyAdd231 performs `(v2 * v3) + v1`. -- go: MaskedFusedMultiplySub132 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySub132 performs `(v1 * v3) - v2`. -- go: MaskedFusedMultiplySub213 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySub213 performs `(v2 * v1) - v3`. -- go: MaskedFusedMultiplySub231 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySub231 performs `(v2 * v3) - v1`. -- go: MaskedFusedNegativeMultiplyAdd132 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -- go: MaskedFusedNegativeMultiplyAdd213 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -- go: MaskedFusedNegativeMultiplyAdd231 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -- go: MaskedFusedNegativeMultiplySub132 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -- go: MaskedFusedNegativeMultiplySub213 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -- go: MaskedFusedNegativeMultiplySub231 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -- go: MaskedFusedMultiplyAddSub132 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -- go: MaskedFusedMultiplyAddSub213 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -- go: MaskedFusedMultiplyAddSub231 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -- go: MaskedFusedMultiplySubAdd132 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -- go: MaskedFusedMultiplySubAdd213 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -- go: MaskedFusedMultiplySubAdd231 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. + // MaskedFusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. - go: Max commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 401a90fb..20bd9d57 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -631,126 +631,18 @@ - *pdpa_src2 out: - *pdpa_acc -- go: MaskedFusedMultiplyAdd132 - asm: "VFMADD132PS|VFMADD132PD" - in: - - &fma_op - go: $t - base: float - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplyAdd213 +- go: MaskedFusedMultiplyAdd asm: "VFMADD213PS|VFMADD213PD" in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplyAdd231 - asm: "VFMADD231PS|VFMADD231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplySub132 - asm: "VFMSUB132PS|VFMSUB132PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplySub213 - asm: "VFMSUB213PS|VFMSUB213PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplySub231 - asm: "VFMSUB231PS|VFMSUB231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplyAdd132 - asm: "VFNMADD132PS|VFNMADD132PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplyAdd213 - asm: "VFNMADD213PS|VFNMADD213PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplyAdd231 - asm: "VFNMADD231PS|VFNMADD231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplySub132 - asm: "VFNMSUB132PS|VFNMSUB132PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplySub213 - asm: "VFNMSUB213PS|VFNMSUB213PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplySub231 - asm: "VFNMSUB231PS|VFNMSUB231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplyAddSub132 - asm: "VFMADDSUB132PS|VFMADDSUB132PD" - in: - - *fma_op + - &fma_op + go: $t + base: float - class: mask - *fma_op - *fma_op out: - *fma_op -- go: MaskedFusedMultiplyAddSub213 +- go: MaskedFusedMultiplyAddSub asm: "VFMADDSUB213PS|VFMADDSUB213PD" in: - *fma_op @@ -759,25 +651,7 @@ - *fma_op out: - *fma_op -- go: MaskedFusedMultiplyAddSub231 - asm: "VFMADDSUB231PS|VFMADDSUB231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplySubAdd132 - asm: "VFMSUBADD132PS|VFMSUBADD132PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplySubAdd213 +- go: MaskedFusedMultiplySubAdd asm: "VFMSUBADD213PS|VFMSUBADD213PD" in: - *fma_op @@ -786,15 +660,6 @@ - *fma_op out: - *fma_op -- go: MaskedFusedMultiplySubAdd231 - asm: "VFMSUBADD231PS|VFMSUBADD231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op - go: Max asm: "V?PMAXS[BWDQ]" in: &2int diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 412af692..6923dd37 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -77,111 +77,21 @@ extension: "AVX.*" documentation: !string |- // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -- go: MaskedFusedMultiplyAdd132 +- go: MaskedFusedMultiplyAdd masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplyAdd132 performs `(v1 * v3) + v2`. -- go: MaskedFusedMultiplyAdd213 + // MaskedFusedMultiplyAdd performs `(v1 * v2) + v3`. +- go: MaskedFusedMultiplyAddSub masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplyAdd213 performs `(v2 * v1) + v3`. -- go: MaskedFusedMultiplyAdd231 + // MaskedFusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. +- go: MaskedFusedMultiplySubAdd masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplyAdd231 performs `(v2 * v3) + v1`. -- go: MaskedFusedMultiplySub132 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySub132 performs `(v1 * v3) - v2`. -- go: MaskedFusedMultiplySub213 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySub213 performs `(v2 * v1) - v3`. -- go: MaskedFusedMultiplySub231 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySub231 performs `(v2 * v3) - v1`. -- go: MaskedFusedNegativeMultiplyAdd132 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`. -- go: MaskedFusedNegativeMultiplyAdd213 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`. -- go: MaskedFusedNegativeMultiplyAdd231 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`. -- go: MaskedFusedNegativeMultiplySub132 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`. -- go: MaskedFusedNegativeMultiplySub213 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`. -- go: MaskedFusedNegativeMultiplySub231 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`. -- go: MaskedFusedMultiplyAddSub132 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements. -- go: MaskedFusedMultiplyAddSub213 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements. -- go: MaskedFusedMultiplyAddSub231 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements. -- go: MaskedFusedMultiplySubAdd132 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements. -- go: MaskedFusedMultiplySubAdd213 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements. -- go: MaskedFusedMultiplySubAdd231 - masked: "true" - commutative: "false" - extension: "AVX.*" - documentation: !string |- - // MaskedFusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements. + // MaskedFusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml index b9add167..da894ac7 100644 --- a/internal/simdgen/ops/MLOps/go.yaml +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -139,126 +139,18 @@ - *pdpa_src2 out: - *pdpa_acc -- go: MaskedFusedMultiplyAdd132 - asm: "VFMADD132PS|VFMADD132PD" - in: - - &fma_op - go: $t - base: float - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplyAdd213 +- go: MaskedFusedMultiplyAdd asm: "VFMADD213PS|VFMADD213PD" in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplyAdd231 - asm: "VFMADD231PS|VFMADD231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplySub132 - asm: "VFMSUB132PS|VFMSUB132PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplySub213 - asm: "VFMSUB213PS|VFMSUB213PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplySub231 - asm: "VFMSUB231PS|VFMSUB231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplyAdd132 - asm: "VFNMADD132PS|VFNMADD132PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplyAdd213 - asm: "VFNMADD213PS|VFNMADD213PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplyAdd231 - asm: "VFNMADD231PS|VFNMADD231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplySub132 - asm: "VFNMSUB132PS|VFNMSUB132PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplySub213 - asm: "VFNMSUB213PS|VFNMSUB213PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedNegativeMultiplySub231 - asm: "VFNMSUB231PS|VFNMSUB231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplyAddSub132 - asm: "VFMADDSUB132PS|VFMADDSUB132PD" - in: - - *fma_op + - &fma_op + go: $t + base: float - class: mask - *fma_op - *fma_op out: - *fma_op -- go: MaskedFusedMultiplyAddSub213 +- go: MaskedFusedMultiplyAddSub asm: "VFMADDSUB213PS|VFMADDSUB213PD" in: - *fma_op @@ -267,25 +159,7 @@ - *fma_op out: - *fma_op -- go: MaskedFusedMultiplyAddSub231 - asm: "VFMADDSUB231PS|VFMADDSUB231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplySubAdd132 - asm: "VFMSUBADD132PS|VFMSUBADD132PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - - *fma_op -- go: MaskedFusedMultiplySubAdd213 +- go: MaskedFusedMultiplySubAdd asm: "VFMSUBADD213PS|VFMSUBADD213PD" in: - *fma_op @@ -293,13 +167,4 @@ - *fma_op - *fma_op out: - - *fma_op -- go: MaskedFusedMultiplySubAdd231 - asm: "VFMSUBADD231PS|VFMSUBADD231PD" - in: - - *fma_op - - class: mask - - *fma_op - - *fma_op - out: - *fma_op \ No newline at end of file From 85157288609be6cc2263598ead327572426b0ee8 Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 18 Jun 2025 14:07:58 -0400 Subject: [PATCH 095/200] internal/simdgen: added fp1gp1fp1 register mask This is for VPINSR[BWDQ] and is paired with dev.simd CL 682656 Change-Id: I66d71c37c04a27e3cf113a0c2ffa63c5713cacf1 Reviewed-on: https://go-review.googlesource.com/c/arch/+/682679 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdMachineOps.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 43ede6ec..0d357305 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -13,7 +13,7 @@ import ( const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main -func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 regInfo) []opData { +func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1, fp1gp1fp1 regInfo) []opData { return []opData{ {{- range .OpsData }} {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, @@ -46,7 +46,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { OpsDataImm []opData } seen := map[string]struct{}{} - regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k1": true, "fp2k1fp1": true, "fp2k1k1": true, "fp1k1fp1": true, "fp31": true, "fp3k1fp1": true} + regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k1": true, "fp2k1fp1": true, "fp2k1k1": true, "fp1k1fp1": true, "fp31": true, "fp3k1fp1": true, "fp1gp1fp1": true} opsData := make([]opData, 0) opsDataImm := make([]opData, 0) for _, op := range ops { From 9e765e371c565b49ab89e639ca5dee1f12441d4f Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 18 Jun 2025 13:06:05 -0400 Subject: [PATCH 096/200] internal/simdgen: add Operation.ResultType This allows GoArch to retain its original use. Also includes minor template renaming. Change-Id: Idea71cb4b8c2e12356cff848b897f84549f536b0 Reviewed-on: https://go-review.googlesource.com/c/arch/+/682676 Reviewed-by: Cherry Mui TryBot-Bypass: David Chase --- internal/simdgen/gen_simdIntrinsics.go | 22 +++++++++++----------- internal/simdgen/gen_utility.go | 12 ++++++------ internal/simdgen/godefs.go | 22 ++++++++++++++++------ 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 3910e028..14a5d41a 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -25,27 +25,27 @@ const simdPackage = "` + simdPackage + `" func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { {{end}} -{{define "op1"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{define "op1"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64) {{end}} -{{define "op2"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{define "op2"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64) {{end}} -{{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64) {{end}} -{{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64) +{{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64) {{end}} -{{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op4Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op4Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} {{define "vectorConversion"}} addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) {{end}} -{{define "typeMap"}} addF(simdPackage, "Load{{.Name}}", simdLoad(), sys.AMD64) +{{define "loadStore"}} addF(simdPackage, "Load{{.Name}}", simdLoad(), sys.AMD64) addF(simdPackage, "{{.Name}}.Store", simdStore(), sys.AMD64) {{end}} @@ -89,8 +89,8 @@ func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { } for _, typ := range typesFromTypeMap(typeMap) { - if err := t.ExecuteTemplate(buffer, "typeMap", typ); err != nil { - panic(fmt.Errorf("failed to execute typeMap template: %w", err)) + if err := t.ExecuteTemplate(buffer, "loadStore", typ); err != nil { + panic(fmt.Errorf("failed to execute loadStore template: %w", err)) } } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 6cdc54ea..e3545fe0 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -331,19 +331,19 @@ func (op *Operation) sortOperand() { }) } +func (op Operation) ResultType() string { + return fmt.Sprintf("types.TypeVec%d", *op.Out[0].Bits) +} + // classifyOp returns a classification string, modified operation, and perhaps error based // on the stub and intrinsic shape for the operation. // The classification string is in the regular expression set "op[1234](Imm8)?" func classifyOp(op Operation) (string, Operation, error) { - _, shapeOut, _, immType, _, opNoConstMask, gOp, err := op.shape() + _, _, _, immType, _, opNoConstMask, gOp, err := op.shape() if err != nil { return "", op, err } - // Put the go ssa type in GoArch field, simd intrinsics need it. - if shapeOut == OneVregOut || shapeOut == OneKmaskOut || shapeOut == OneVregOutAtIn { - opNoConstMask.GoArch = fmt.Sprintf("types.TypeVec%d", *opNoConstMask.Out[0].Bits) - gOp.GoArch = fmt.Sprintf("types.TypeVec%d", *gOp.Out[0].Bits) - } + if immType == VarImm || immType == ConstVarImm { switch len(opNoConstMask.In) { case 1: diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 64c2e6e9..1650fa2d 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -45,13 +45,23 @@ func compareStringPointers(x, y *string) int { return 1 } +func compareIntPointers(x, y *int) int { + if x != nil && y != nil { + return *x - *y + } + if x == nil && y == nil { + return 0 + } + if x == nil { + return -1 + } + return 1 +} + func compareOperations(x, y Operation) int { if c := strings.Compare(x.Go, y.Go); c != 0 { return c } - if c := strings.Compare(x.GoArch, y.GoArch); c != 0 { - return c - } xIn, yIn := x.In, y.In if len(xIn) > len(yIn) && xIn[len(xIn)-1].Class == "mask" { @@ -88,13 +98,13 @@ func compareOperands(x, y *Operand) int { if x.Class == "immediate" { return compareStringPointers(x.ImmOffset, y.ImmOffset) } else { - if c := strings.Compare(*x.Base, *y.Base); c != 0 { + if c := compareStringPointers(x.Base, y.Base); c != 0 { return c } - if c := *x.ElemBits - *y.ElemBits; c != 0 { + if c := compareIntPointers(x.ElemBits, y.ElemBits); c != 0 { return c } - return *x.Bits - *y.Bits + return compareIntPointers(x.Bits, y.Bits) } } From 5de79ea63f8bc82d2a64331fa2150671172883e2 Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 18 Jun 2025 13:19:44 -0400 Subject: [PATCH 097/200] internal/simdgen: more verbose+informative error printing I found that I needed this while adding some new operations, and probably we will need this again in the future. Change-Id: I15bfe3a6117c7cb222df4c18258dded66f05e883 Reviewed-on: https://go-review.googlesource.com/c/arch/+/682677 TryBot-Bypass: David Chase Reviewed-by: Junyang Shao --- internal/simdgen/gen_utility.go | 12 ++++++++---- internal/simdgen/main.go | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index e3545fe0..88e6c068 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -510,7 +510,7 @@ func capitalizeFirst(s string) string { // and [writeSIMDSSA], please be careful when updating these constraints. func overwrite(ops []Operation) error { hasClassOverwrite := false - overwrite := func(op []Operand, idx int) error { + overwrite := func(op []Operand, idx int, o Operation) error { if op[idx].OverwriteClass != nil { if op[idx].OverwriteBase == nil { return fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx]) @@ -536,15 +536,19 @@ func overwrite(ops []Operation) error { *op[idx].Base = oBase } if op[idx].OverwriteElementBits != nil { + if op[idx].ElemBits == nil { + panic(fmt.Errorf("ElemBits is nil at operand %d of %v", idx, o)) + } *op[idx].ElemBits = *op[idx].OverwriteElementBits *op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Bits / *op[idx].ElemBits) + } return nil } - for i := range ops { + for i, o := range ops { hasClassOverwrite = false for j := range ops[i].In { - if err := overwrite(ops[i].In, j); err != nil { + if err := overwrite(ops[i].In, j, o); err != nil { return err } if hasClassOverwrite { @@ -552,7 +556,7 @@ func overwrite(ops []Operation) error { } } for j := range ops[i].Out { - if err := overwrite(ops[i].Out, j); err != nil { + if err := overwrite(ops[i].Out, j, o); err != nil { return err } } diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index 14bf9b8f..f1c9dc8b 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -260,6 +260,7 @@ func validate(cl unify.Closure, required map[*unify.Value]struct{}) { // at least say "it doesn't unify with anything in x.yaml". That's a lot // of work, but if we have trouble debugging unification failure it may // be worth it. - fmt.Fprintf(os.Stderr, "%s: def required, but did not unify\n", def.PosString()) + fmt.Fprintf(os.Stderr, "%s: def required, but did not unify (%v)\n", + def.PosString(), def) } } From 09e5d8adfaa9ea1ec8deea023a394d10926eb9af Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 18 Jun 2025 13:22:44 -0400 Subject: [PATCH 098/200] internal/simdgen: add some support for scalar args This is not complete; the SSA op change needs to be paired with a change to the compiler, so it will be small and separate. Change-Id: Iee0523152f0f9b158c6bfaa6403ee73c71a99665 Reviewed-on: https://go-review.googlesource.com/c/arch/+/682678 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdTypes.go | 17 +++++-- internal/simdgen/gen_utility.go | 14 +++--- internal/simdgen/godefs.go | 2 +- internal/simdgen/types.yaml | 82 ++++++++++++++++++------------- 4 files changed, 69 insertions(+), 46 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 4966dae7..d5ba1267 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -192,7 +192,8 @@ func parseSIMDTypes(ops []Operation) simdTypeMap { return } seen[*arg.Go] = struct{}{} - lanes := *arg.Bits / *arg.ElemBits + + lanes := *arg.Lanes base := fmt.Sprintf("%s%d", *arg.Base, *arg.ElemBits) tagFieldNameS := fmt.Sprintf("%sx%d", base, lanes) tagFieldS := fmt.Sprintf("%s v%d", tagFieldNameS, *arg.Bits) @@ -227,7 +228,8 @@ func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair { for _, ts := range typeMap { for i, tsrc := range ts { for j, tdst := range ts { - if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" { + if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" && + tsrc.Lanes > 1 && tdst.Lanes > 1 { v = append(v, simdTypePair{tsrc, tdst}) } } @@ -254,7 +256,9 @@ func typesFromTypeMap(typeMap simdTypeMap) []simdType { m := []simdType{} for _, ts := range typeMap { for _, tsrc := range ts { - m = append(m, tsrc) + if tsrc.Lanes > 1 { + m = append(m, tsrc) + } } } slices.SortFunc(m, compareSimdTypes) @@ -277,10 +281,17 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { sort.Ints(sizes) for _, size := range sizes { + if size <= 64 { + // these are scalar + continue + } if err := t.ExecuteTemplate(buffer, "sizeTmpl", size); err != nil { panic(fmt.Errorf("failed to execute size template for size %d: %w", size, err)) } for _, typeDef := range typeMap[size] { + if typeDef.Lanes == 1 { + continue + } if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil { panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err)) } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 88e6c068..1aab1d6e 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -128,7 +128,7 @@ const ( // opNoConstImmMask is op with its inputs excluding the const imm and mask. // // This function does not modify op. -func (op *Operation) shape() (shapeIn, shapeOut, maskType, immTyppe int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation, err error) { +func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation, err error) { if len(op.Out) > 1 { err = fmt.Errorf("simdgen only supports 1 output: %s", op) return @@ -211,18 +211,18 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immTyppe int, opNoImm removeImm(&opNoImmConstMask) if op.In[0].Const != nil { if op.In[0].ImmOffset != nil { - immTyppe = ConstVarImm + immType = ConstVarImm } else { - immTyppe = ConstImm + immType = ConstImm } } else if op.In[0].ImmOffset != nil { - immTyppe = VarImm + immType = VarImm } else { err = fmt.Errorf("simdgen requires imm to have at least one of ImmOffset or Const set: %s", op) return } } else { - immTyppe = NoImm + immType = NoImm } if maskCount == 0 { if iConstMask == -1 { @@ -317,8 +317,8 @@ func (op *Operation) regShape() (string, error) { // sortOperand sorts op.In by putting immediates first, then vreg, and mask the last. // TODO: verify that this is a safe assumption of the prog strcture. -// from my observation looks like in asm, imms are always the first, masks are always the last, with -// vreg in betwee... +// from my observation looks like in asm, imms are always the first, +// masks are always the last, with vreg in between. func (op *Operation) sortOperand() { priority := map[string]int{"immediate": 2, "vreg": 1, "mask": 0} sort.SliceStable(op.In, func(i, j int) bool { diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 1650fa2d..a8dd9791 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -124,7 +124,7 @@ type Operand struct { // The compiler will right-shift the user-passed value by ImmOffset and set it as the AuxInt // field of the operation. ImmOffset *string - Lanes *int // Lanes should equal Bits/ElemBits + Lanes *int // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1 // If non-nil, it means the [Class] field is overwritten here, right now this is used to // overwrite the results of AVX2 compares to masks. OverwriteClass *string diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml index c8b3660e..ec087ffd 100644 --- a/internal/simdgen/types.yaml +++ b/internal/simdgen/types.yaml @@ -7,48 +7,60 @@ in: !repeat - !sum &types - - {class: vreg, go: Int8x16, base: "int", elemBits: 8, bits: 128, lanes: 16} - - {class: vreg, go: Uint8x16, base: "uint", elemBits: 8, bits: 128, lanes: 16} - - {class: vreg, go: Int16x8, base: "int", elemBits: 16, bits: 128, lanes: 8} - - {class: vreg, go: Uint16x8, base: "uint", elemBits: 16, bits: 128, lanes: 8} - - {class: vreg, go: Int32x4, base: "int", elemBits: 32, bits: 128, lanes: 4} - - {class: vreg, go: Uint32x4, base: "uint", elemBits: 32, bits: 128, lanes: 4} - - {class: vreg, go: Int64x2, base: "int", elemBits: 64, bits: 128, lanes: 2} - - {class: vreg, go: Uint64x2, base: "uint", elemBits: 64, bits: 128, lanes: 2} - - {class: vreg, go: Float32x4, base: "float", elemBits: 32, bits: 128, lanes: 4} - - {class: vreg, go: Float64x2, base: "float", elemBits: 64, bits: 128, lanes: 2} - - {class: vreg, go: Int8x32, base: "int", elemBits: 8, bits: 256, lanes: 32} - - {class: vreg, go: Uint8x32, base: "uint", elemBits: 8, bits: 256, lanes: 32} + - {class: vreg, go: Int8x16, base: "int", elemBits: 8, bits: 128, lanes: 16} + - {class: vreg, go: Uint8x16, base: "uint", elemBits: 8, bits: 128, lanes: 16} + - {class: vreg, go: Int16x8, base: "int", elemBits: 16, bits: 128, lanes: 8} + - {class: vreg, go: Uint16x8, base: "uint", elemBits: 16, bits: 128, lanes: 8} + - {class: vreg, go: Int32x4, base: "int", elemBits: 32, bits: 128, lanes: 4} + - {class: vreg, go: Uint32x4, base: "uint", elemBits: 32, bits: 128, lanes: 4} + - {class: vreg, go: Int64x2, base: "int", elemBits: 64, bits: 128, lanes: 2} + - {class: vreg, go: Uint64x2, base: "uint", elemBits: 64, bits: 128, lanes: 2} + - {class: vreg, go: Float32x4, base: "float", elemBits: 32, bits: 128, lanes: 4} + - {class: vreg, go: Float64x2, base: "float", elemBits: 64, bits: 128, lanes: 2} + - {class: vreg, go: Int8x32, base: "int", elemBits: 8, bits: 256, lanes: 32} + - {class: vreg, go: Uint8x32, base: "uint", elemBits: 8, bits: 256, lanes: 32} - {class: vreg, go: Int16x16, base: "int", elemBits: 16, bits: 256, lanes: 16} - {class: vreg, go: Uint16x16, base: "uint", elemBits: 16, bits: 256, lanes: 16} - - {class: vreg, go: Int32x8, base: "int", elemBits: 32, bits: 256, lanes: 8} - - {class: vreg, go: Uint32x8, base: "uint", elemBits: 32, bits: 256, lanes: 8} - - {class: vreg, go: Int64x4, base: "int", elemBits: 64, bits: 256, lanes: 4} - - {class: vreg, go: Uint64x4, base: "uint", elemBits: 64, bits: 256, lanes: 4} - - {class: vreg, go: Float32x8, base: "float", elemBits: 32, bits: 256, lanes: 8} - - {class: vreg, go: Float64x4, base: "float", elemBits: 64, bits: 256, lanes: 4} - - {class: vreg, go: Int8x64, base: "int", elemBits: 8, bits: 512, lanes: 64} - - {class: vreg, go: Uint8x64, base: "uint", elemBits: 8, bits: 512, lanes: 64} + - {class: vreg, go: Int32x8, base: "int", elemBits: 32, bits: 256, lanes: 8} + - {class: vreg, go: Uint32x8, base: "uint", elemBits: 32, bits: 256, lanes: 8} + - {class: vreg, go: Int64x4, base: "int", elemBits: 64, bits: 256, lanes: 4} + - {class: vreg, go: Uint64x4, base: "uint", elemBits: 64, bits: 256, lanes: 4} + - {class: vreg, go: Float32x8, base: "float", elemBits: 32, bits: 256, lanes: 8} + - {class: vreg, go: Float64x4, base: "float", elemBits: 64, bits: 256, lanes: 4} + - {class: vreg, go: Int8x64, base: "int", elemBits: 8, bits: 512, lanes: 64} + - {class: vreg, go: Uint8x64, base: "uint", elemBits: 8, bits: 512, lanes: 64} - {class: vreg, go: Int16x32, base: "int", elemBits: 16, bits: 512, lanes: 32} - {class: vreg, go: Uint16x32, base: "uint", elemBits: 16, bits: 512, lanes: 32} - {class: vreg, go: Int32x16, base: "int", elemBits: 32, bits: 512, lanes: 16} - {class: vreg, go: Uint32x16, base: "uint", elemBits: 32, bits: 512, lanes: 16} - - {class: vreg, go: Int64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} - - {class: vreg, go: Uint64x8, base: "uint", elemBits: 64, bits: 512, lanes: 8} + - {class: vreg, go: Int64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} + - {class: vreg, go: Uint64x8, base: "uint", elemBits: 64, bits: 512, lanes: 8} - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512, lanes: 16} - - {class: vreg, go: Float64x8, base: "float", elemBits: 64, bits: 512, lanes: 8} - - {class: mask, go: Mask8x16, base: "int", elemBits: 8, bits: 128, lanes: 16} - - {class: mask, go: Mask16x8, base: "int", elemBits: 16, bits: 128, lanes: 8} - - {class: mask, go: Mask32x4, base: "int", elemBits: 32, bits: 128, lanes: 4} - - {class: mask, go: Mask64x2, base: "int", elemBits: 64, bits: 128, lanes: 2} - - {class: mask, go: Mask8x32, base: "int", elemBits: 8, bits: 256, lanes: 32} - - {class: mask, go: Mask16x16, base: "int", elemBits: 16, bits: 256, lanes: 16} - - {class: mask, go: Mask32x8, base: "int", elemBits: 32, bits: 256, lanes: 8} - - {class: mask, go: Mask64x4, base: "int", elemBits: 64, bits: 256, lanes: 4} - - {class: mask, go: Mask8x64, base: "int", elemBits: 8, bits: 512, lanes: 64} - - {class: mask, go: Mask16x32, base: "int", elemBits: 16, bits: 512, lanes: 32} - - {class: mask, go: Mask32x16, base: "int", elemBits: 32, bits: 512, lanes: 16} - - {class: mask, go: Mask64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} + - {class: vreg, go: Float64x8, base: "float", elemBits: 64, bits: 512, lanes: 8} + - {class: mask, go: Mask8x16, base: "int", elemBits: 8, bits: 128, lanes: 16} + - {class: mask, go: Mask16x8, base: "int", elemBits: 16, bits: 128, lanes: 8} + - {class: mask, go: Mask32x4, base: "int", elemBits: 32, bits: 128, lanes: 4} + - {class: mask, go: Mask64x2, base: "int", elemBits: 64, bits: 128, lanes: 2} + - {class: mask, go: Mask8x32, base: "int", elemBits: 8, bits: 256, lanes: 32} + - {class: mask, go: Mask16x16, base: "int", elemBits: 16, bits: 256, lanes: 16} + - {class: mask, go: Mask32x8, base: "int", elemBits: 32, bits: 256, lanes: 8} + - {class: mask, go: Mask64x4, base: "int", elemBits: 64, bits: 256, lanes: 4} + - {class: mask, go: Mask8x64, base: "int", elemBits: 8, bits: 512, lanes: 64} + - {class: mask, go: Mask16x32, base: "int", elemBits: 16, bits: 512, lanes: 32} + - {class: mask, go: Mask32x16, base: "int", elemBits: 32, bits: 512, lanes: 16} + - {class: mask, go: Mask64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} + + - {class: vreg, go: float64, base: "float", elemBits: 64, bits: 64, lanes: 1} + - {class: vreg, go: float32, base: "float", elemBits: 32, bits: 32, lanes: 1} + - {class: vreg, go: int64, base: "int", elemBits: 64, bits: 64, lanes: 1} + - {class: vreg, go: int32, base: "int", elemBits: 32, bits: 32, lanes: 1} + - {class: vreg, go: int16, base: "int", elemBits: 16, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction + - {class: vreg, go: int8, base: "int", elemBits: 8, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction + - {class: vreg, go: uint64, base: "uint", elemBits: 64, bits: 64, lanes: 1} + - {class: vreg, go: uint32, base: "uint", elemBits: 32, bits: 32, lanes: 1} + - {class: vreg, go: uint16, base: "uint", elemBits: 16, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction + - {class: vreg, go: uint8, base: "uint", elemBits: 8, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction + - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now. out: !repeat - *types From b9b711eaf5b36f070d135c447ad8d18a0612b3a3 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 20 Jun 2025 15:34:17 -0400 Subject: [PATCH 099/200] internal/simdgen: add VPINSR[BWDQ] includes adjustments to register mask and code generation helper generation. Paired with dev.simd CL 683035 Change-Id: Ibfd42bac14596601f81190535ecf6095dfb41123 Reviewed-on: https://go-review.googlesource.com/c/arch/+/683055 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/categories.yaml | 5 +++ internal/simdgen/gen_simdMachineOps.go | 2 +- internal/simdgen/gen_simdssa.go | 5 +-- internal/simdgen/gen_utility.go | 36 ++++++++++++++++------ internal/simdgen/go.yaml | 13 ++++++++ internal/simdgen/ops/Moves/categories.yaml | 6 ++++ internal/simdgen/ops/Moves/go.yaml | 14 +++++++++ 7 files changed, 68 insertions(+), 13 deletions(-) create mode 100644 internal/simdgen/ops/Moves/categories.yaml create mode 100644 internal/simdgen/ops/Moves/go.yaml diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index c33a62e6..d8081bc4 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -613,6 +613,11 @@ extension: "AVX.*" documentation: !string |- // MaskedMin computes the minimum of corresponding elements. +- go: SetElem + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // SetElem sets a single constant-indexed element's value. - go: Mul commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 0d357305..f251e2e6 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -70,7 +70,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { panic(err) } if _, ok := regInfoSet[regInfo]; !ok { - panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s", regInfo)) + panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s. Op is %s", regInfo, op)) } var outType string if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil { diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index e606b69c..a7305c9b 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -75,6 +75,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { "fp2k1k1Imm8", "fp31ResultInArg0", "fp3k1fp1ResultInArg0", + "fp1gp1fp1Imm8", } regInfoSet := map[string][]string{} for _, key := range regInfoKeys { @@ -82,7 +83,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { } seen := map[string]struct{}{} - allUnseen := map[string]struct{}{} + allUnseen := make(map[string][]Operation) for _, op := range ops { asm := op.Asm shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape() @@ -114,7 +115,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { regShape += "Imm8" } if _, ok := regInfoSet[regShape]; !ok { - allUnseen[regShape] = struct{}{} + allUnseen[regShape] = append(allUnseen[regShape], op) } regInfoSet[regShape] = append(regInfoSet[regShape], caseStr) } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 1aab1d6e..eb9e82b6 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -277,10 +277,14 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm func (op *Operation) regShape() (string, error) { _, _, _, _, _, _, gOp, _ := op.shape() var regInfo string - var vRegInCnt, kMaskInCnt, vRegOutCnt, kMaskOutCnt int + var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int for _, in := range gOp.In { if in.Class == "vreg" { - vRegInCnt++ + if *in.Lanes == 1 { + gRegInCnt++ + } else { + vRegInCnt++ + } } else if in.Class == "mask" { kMaskInCnt++ } @@ -288,29 +292,41 @@ func (op *Operation) regShape() (string, error) { for _, out := range gOp.Out { // If class overwrite is happening, that's not really a mask but a vreg. if out.Class == "vreg" || out.OverwriteClass != nil { - vRegOutCnt++ + if out.Lanes != nil && *out.Lanes == 1 { + gRegOutCnt++ + } else { + vRegOutCnt++ + } } else if out.Class == "mask" { kMaskOutCnt++ } } - var vRegInS, kMaskInS, vRegOutS, kMaskOutS string + var inRegs, inMasks, outRegs, outMasks string if vRegInCnt > 0 { - vRegInS = fmt.Sprintf("fp%d", vRegInCnt) + inRegs = fmt.Sprintf("fp%d", vRegInCnt) + } + if gRegInCnt > 0 { + inRegs += fmt.Sprintf("gp%d", gRegInCnt) } if kMaskInCnt > 0 { - kMaskInS = fmt.Sprintf("k%d", kMaskInCnt) + inMasks = fmt.Sprintf("k%d", kMaskInCnt) } if vRegOutCnt > 0 { - vRegOutS = fmt.Sprintf("fp%d", vRegOutCnt) + outRegs = fmt.Sprintf("fp%d", vRegOutCnt) + } + if gRegOutCnt > 0 { + outRegs += fmt.Sprintf("gp%d", gRegOutCnt) } if kMaskOutCnt > 0 { - kMaskOutS = fmt.Sprintf("k%d", kMaskOutCnt) + outMasks = fmt.Sprintf("k%d", kMaskOutCnt) } - if kMaskInCnt == 0 && kMaskOutCnt == 0 { + if kMaskInCnt == 0 && kMaskOutCnt == 0 && gRegInCnt == 0 && gRegOutCnt == 0 { // For pure fp we can abbreviate it as fp%d%d. regInfo = fmt.Sprintf("fp%d%d", vRegInCnt, vRegOutCnt) + } else if kMaskInCnt == 0 && kMaskOutCnt == 0 { + regInfo = fmt.Sprintf("%s%s", inRegs, outRegs) } else { - regInfo = fmt.Sprintf("%s%s%s%s", vRegInS, kMaskInS, vRegOutS, kMaskOutS) + regInfo = fmt.Sprintf("%s%s%s%s", inRegs, inMasks, outRegs, outMasks) } return regInfo, nil } diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 20bd9d57..e36fc350 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -734,6 +734,19 @@ asm: "V?MINP[SD]" in: *1mask2float out: *1float +- go: SetElem + asm: "VPINSR[BWDQ]" + in: + - &t + class: vreg + base: $b + - class: vreg + base: $b + lanes: 1 # Scalar, darn it! + - class: immediate + immOffset: 0 + out: + - *t # "Normal" multiplication is only available for floats. # This only covers the single and double precision. - go: Mul diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml new file mode 100644 index 00000000..26a1aa7d --- /dev/null +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -0,0 +1,6 @@ +!sum +- go: SetElem + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // SetElem sets a single constant-indexed element's value diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml new file mode 100644 index 00000000..f015395e --- /dev/null +++ b/internal/simdgen/ops/Moves/go.yaml @@ -0,0 +1,14 @@ +!sum +- go: SetElem + asm: "VPINSR[BWDQ]" + in: + - &t + class: vreg + base: $b + - class: vreg + base: $b + lanes: 1 # Scalar, darn it! + - class: immediate + immOffset: 0 + out: + - *t From 738b605eb97c90d324cb7b39f287714fa13ddf4b Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 20 Jun 2025 17:11:30 -0400 Subject: [PATCH 100/200] internal/simdgen: make simd regmask naming more like existing conventions Paired with dev.simd CL 683115 Change-Id: Ic1e8332480dbd5d7858912c603d35ec17032239e Reviewed-on: https://go-review.googlesource.com/c/arch/+/682937 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdMachineOps.go | 4 +-- internal/simdgen/gen_simdssa.go | 20 +++++++------- internal/simdgen/gen_utility.go | 36 ++++++++++++++------------ 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index f251e2e6..a5ab8f27 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -13,7 +13,7 @@ import ( const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main -func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1, fp1gp1fp1 regInfo) []opData { +func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp regInfo) []opData { return []opData{ {{- range .OpsData }} {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, @@ -46,7 +46,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { OpsDataImm []opData } seen := map[string]struct{}{} - regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k1": true, "fp2k1fp1": true, "fp2k1k1": true, "fp1k1fp1": true, "fp31": true, "fp3k1fp1": true, "fp1gp1fp1": true} + regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k": true, "fp2kfp": true, "fp2kk": true, "fpkfp": true, "fp31": true, "fp3kfp": true, "fpgpfp": true} opsData := make([]opData, 0) opsDataImm := make([]opData, 0) for _, op := range ops { diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index a7305c9b..dc121507 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -62,20 +62,20 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { regInfoKeys := []string{ "fp11", "fp21", - "fp2k1", - "fp2k1fp1", - "fp2k1k1", - "fp1k1fp1", + "fp2k", + "fp2kfp", + "fp2kk", + "fpkfp", "fp31", - "fp3k1fp1", + "fp3kfp", "fp11Imm8", - "fp1k1fp1Imm8", + "fpkfpImm8", "fp21Imm8", - "fp2k1Imm8", - "fp2k1k1Imm8", + "fp2kImm8", + "fp2kkImm8", "fp31ResultInArg0", - "fp3k1fp1ResultInArg0", - "fp1gp1fp1Imm8", + "fp3kfpResultInArg0", + "fpgpfpImm8", } regInfoSet := map[string][]string{} for _, key := range regInfoKeys { diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index eb9e82b6..b1ff4347 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -302,24 +302,26 @@ func (op *Operation) regShape() (string, error) { } } var inRegs, inMasks, outRegs, outMasks string - if vRegInCnt > 0 { - inRegs = fmt.Sprintf("fp%d", vRegInCnt) - } - if gRegInCnt > 0 { - inRegs += fmt.Sprintf("gp%d", gRegInCnt) - } - if kMaskInCnt > 0 { - inMasks = fmt.Sprintf("k%d", kMaskInCnt) - } - if vRegOutCnt > 0 { - outRegs = fmt.Sprintf("fp%d", vRegOutCnt) - } - if gRegOutCnt > 0 { - outRegs += fmt.Sprintf("gp%d", gRegOutCnt) - } - if kMaskOutCnt > 0 { - outMasks = fmt.Sprintf("k%d", kMaskOutCnt) + + rmAbbrev := func(s string, i int) string { + if i == 0 { + return "" + } + if i == 1 { + return s + } + return fmt.Sprintf("%s%d", s, i) + } + + inRegs = rmAbbrev("fp", vRegInCnt) + inRegs += rmAbbrev("gp", gRegInCnt) + inMasks = rmAbbrev("k", kMaskInCnt) + + outRegs = rmAbbrev("fp", vRegOutCnt) + outRegs += rmAbbrev("gp", gRegOutCnt) + outMasks = rmAbbrev("k", kMaskOutCnt) + if kMaskInCnt == 0 && kMaskOutCnt == 0 && gRegInCnt == 0 && gRegOutCnt == 0 { // For pure fp we can abbreviate it as fp%d%d. regInfo = fmt.Sprintf("fp%d%d", vRegInCnt, vRegOutCnt) From 71e8be6bb281f5a0612bc03c64d1711b4f70deef Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 23 Jun 2025 11:05:30 -0400 Subject: [PATCH 101/200] internal/simdgen: fix priority This reorders so that the order numbers ascending and the comparision is "<", which conforms to comparison function conventions and will allow attending another operand class (memory) without counting down into negative numbers. Change-Id: Ib8d229dd68c018c072f29ebd02424868004aa94b Reviewed-on: https://go-review.googlesource.com/c/arch/+/683335 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_utility.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index b1ff4347..24d42106 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -338,12 +338,12 @@ func (op *Operation) regShape() (string, error) { // from my observation looks like in asm, imms are always the first, // masks are always the last, with vreg in between. func (op *Operation) sortOperand() { - priority := map[string]int{"immediate": 2, "vreg": 1, "mask": 0} + priority := map[string]int{"immediate": 0, "vreg": 1, "mask": 2} sort.SliceStable(op.In, func(i, j int) bool { pi := priority[op.In[i].Class] pj := priority[op.In[j].Class] if pi != pj { - return pi > pj + return pi < pj } return op.In[i].AsmPos < op.In[j].AsmPos }) From 2389045c982e0529eff298f84c7de93d1c47350f Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 23 Jun 2025 11:58:09 -0400 Subject: [PATCH 102/200] internal/arch: add separate "greg" operand class For operations with scalar operands Change-Id: I4849e6aec623787b07e5ebb26b053631c0ba5abe Reviewed-on: https://go-review.googlesource.com/c/arch/+/683375 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_utility.go | 20 ++++------ internal/simdgen/go.yaml | 2 +- internal/simdgen/godefs.go | 2 +- internal/simdgen/ops/Moves/go.yaml | 2 +- internal/simdgen/types.yaml | 20 +++++----- internal/simdgen/xed.go | 64 ++++++++++++++++++++++++------ 6 files changed, 73 insertions(+), 37 deletions(-) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 24d42106..5b3d2052 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -280,11 +280,9 @@ func (op *Operation) regShape() (string, error) { var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int for _, in := range gOp.In { if in.Class == "vreg" { - if *in.Lanes == 1 { - gRegInCnt++ - } else { - vRegInCnt++ - } + vRegInCnt++ + } else if in.Class == "greg" { + gRegInCnt++ } else if in.Class == "mask" { kMaskInCnt++ } @@ -292,11 +290,9 @@ func (op *Operation) regShape() (string, error) { for _, out := range gOp.Out { // If class overwrite is happening, that's not really a mask but a vreg. if out.Class == "vreg" || out.OverwriteClass != nil { - if out.Lanes != nil && *out.Lanes == 1 { - gRegOutCnt++ - } else { - vRegOutCnt++ - } + vRegOutCnt++ + } else if out.Class == "greg" { + gRegOutCnt++ } else if out.Class == "mask" { kMaskOutCnt++ } @@ -334,11 +330,11 @@ func (op *Operation) regShape() (string, error) { } // sortOperand sorts op.In by putting immediates first, then vreg, and mask the last. -// TODO: verify that this is a safe assumption of the prog strcture. +// TODO: verify that this is a safe assumption of the prog structure. // from my observation looks like in asm, imms are always the first, // masks are always the last, with vreg in between. func (op *Operation) sortOperand() { - priority := map[string]int{"immediate": 0, "vreg": 1, "mask": 2} + priority := map[string]int{"immediate": 0, "vreg": 1, "greg": 1, "mask": 2} sort.SliceStable(op.In, func(i, j int) bool { pi := priority[op.In[i].Class] pj := priority[op.In[j].Class] diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index e36fc350..572f02ed 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -740,7 +740,7 @@ - &t class: vreg base: $b - - class: vreg + - class: greg base: $b lanes: 1 # Scalar, darn it! - class: immediate diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index a8dd9791..d9d0c20f 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -109,7 +109,7 @@ func compareOperands(x, y *Operand) int { } type Operand struct { - Class string // One of "mask", "immediate", "vreg" and "mem" + Class string // One of "mask", "immediate", "vreg", "greg", and "mem" Go *string // Go type of this operand AsmPos int // Position of this operand in the assembly instruction diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index f015395e..cdcb0ee8 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -5,7 +5,7 @@ - &t class: vreg base: $b - - class: vreg + - class: greg base: $b lanes: 1 # Scalar, darn it! - class: immediate diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml index ec087ffd..765ae2e0 100644 --- a/internal/simdgen/types.yaml +++ b/internal/simdgen/types.yaml @@ -50,16 +50,16 @@ in: !repeat - {class: mask, go: Mask32x16, base: "int", elemBits: 32, bits: 512, lanes: 16} - {class: mask, go: Mask64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} - - {class: vreg, go: float64, base: "float", elemBits: 64, bits: 64, lanes: 1} - - {class: vreg, go: float32, base: "float", elemBits: 32, bits: 32, lanes: 1} - - {class: vreg, go: int64, base: "int", elemBits: 64, bits: 64, lanes: 1} - - {class: vreg, go: int32, base: "int", elemBits: 32, bits: 32, lanes: 1} - - {class: vreg, go: int16, base: "int", elemBits: 16, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction - - {class: vreg, go: int8, base: "int", elemBits: 8, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction - - {class: vreg, go: uint64, base: "uint", elemBits: 64, bits: 64, lanes: 1} - - {class: vreg, go: uint32, base: "uint", elemBits: 32, bits: 32, lanes: 1} - - {class: vreg, go: uint16, base: "uint", elemBits: 16, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction - - {class: vreg, go: uint8, base: "uint", elemBits: 8, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction + - {class: greg, go: float64, base: "float", elemBits: 64, bits: 64, lanes: 1} + - {class: greg, go: float32, base: "float", elemBits: 32, bits: 32, lanes: 1} + - {class: greg, go: int64, base: "int", elemBits: 64, bits: 64, lanes: 1} + - {class: greg, go: int32, base: "int", elemBits: 32, bits: 32, lanes: 1} + - {class: greg, go: int16, base: "int", elemBits: 16, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction + - {class: greg, go: int8, base: "int", elemBits: 8, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction + - {class: greg, go: uint64, base: "uint", elemBits: 64, bits: 64, lanes: 1} + - {class: greg, go: uint32, base: "uint", elemBits: 32, bits: 32, lanes: 1} + - {class: greg, go: uint16, base: "uint", elemBits: 16, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction + - {class: greg, go: uint8, base: "uint", elemBits: 8, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now. out: !repeat diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index 44360435..e46e1be4 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -16,6 +16,12 @@ import ( "gopkg.in/yaml.v3" ) +const ( + NOT_REG_CLASS = 0 // not a register + VREG_CLASS = 1 // classify as a vector register; see + GREG_CLASS = 2 // classify as a general register +) + // TODO: Doc. Returns Values with Def domains. func loadXED(xedPath string) []*unify.Value { // TODO: Obviously a bunch more to do here. @@ -102,6 +108,12 @@ type operandVReg struct { // Vector register elemBaseType scalarBaseType } +type operandGReg struct { // Vector register + operandCommon + vecShape + elemBaseType scalarBaseType +} + // operandMask is a vector mask. // // Regardless of the actual mask representation, the [vecShape] of this operand @@ -155,6 +167,22 @@ func (o operandVReg) toValue() (fields []string, vals []*unify.Value) { return } +func (o operandGReg) toValue() (fields []string, vals []*unify.Value) { + baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex()) + if err != nil { + panic("parsing baseRe: " + err.Error()) + } + fields, vals = []string{"class", "bits", "base"}, []*unify.Value{ + strVal("greg"), + strVal(o.bits), + unify.NewValue(baseDomain)} + if o.elemBits != o.bits { + fields, vals = append(fields, "elemBits"), append(vals, strVal(o.elemBits)) + } + // otherwise it means the vector could be any shape. + return +} + func (o operandMask) toValue() (fields []string, vals []*unify.Value) { return []string{"class", "elemBits", "bits"}, []*unify.Value{strVal("mask"), strVal(o.elemBits), strVal(o.bits)} } @@ -211,8 +239,8 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) { operandCommon: common, }, nil } else { - regBits, ok := decodeReg(op) - if !ok { + class, regBits := decodeReg(op) + if class == NOT_REG_CLASS { return nil, fmt.Errorf("failed to decode register %q", operand) } baseType, elemBits, ok := decodeType(op) @@ -220,11 +248,20 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) { return nil, fmt.Errorf("failed to decode register width %q", operand) } shape := vecShape{elemBits: elemBits, bits: regBits} - return operandVReg{ + if class == VREG_CLASS { + return operandVReg{ + operandCommon: common, + vecShape: shape, + elemBaseType: baseType, + }, nil + } + // general register + return operandGReg{ operandCommon: common, vecShape: shape, elemBaseType: baseType, }, nil + } } else if strings.HasPrefix(lhs, "IMM") { _, bits, ok := decodeType(op) @@ -395,7 +432,10 @@ func singular[T comparable](xs []T) (T, bool) { return xs[0], true } -func decodeReg(op *xeddata.Operand) (w int, ok bool) { +// decodeReg returns class (NOT_REG_CLASS, VREG_CLASS, GREG_CLASS), +// and width in bits. If the operand cannot be decided as a register, +// then the clas is NOT_REG_CLASS. +func decodeReg(op *xeddata.Operand) (class, width int) { // op.Width tells us the total width, e.g.,: // // dq => 128 bits (XMM) @@ -408,27 +448,27 @@ func decodeReg(op *xeddata.Operand) (w int, ok bool) { // Hence, we dig into the register sets themselves. if !strings.HasPrefix(op.NameLHS(), "REG") { - return 0, false + return NOT_REG_CLASS, 0 } // TODO: We shouldn't be relying on the macro naming conventions. We should // use all-dec-patterns.txt, but xeddata doesn't support that table right now. rhs := op.NameRHS() if !strings.HasSuffix(rhs, "()") { - return 0, false + return NOT_REG_CLASS, 0 } switch { case strings.HasPrefix(rhs, "XMM_"): - return 128, true + return VREG_CLASS, 128 case strings.HasPrefix(rhs, "YMM_"): - return 256, true + return VREG_CLASS, 256 case strings.HasPrefix(rhs, "ZMM_"): - return 512, true + return VREG_CLASS, 512 case strings.HasPrefix(rhs, "GPR64_"), strings.HasPrefix(rhs, "VGPR64_"): - return 64, true + return GREG_CLASS, 64 case strings.HasPrefix(rhs, "GPR32_"), strings.HasPrefix(rhs, "VGPR32_"): - return 32, true + return GREG_CLASS, 32 } - return 0, false + return NOT_REG_CLASS, 0 } var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`) From 1a0b84a45921bab571f64a62d4083f44a1c7474f Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 20 Jun 2025 05:54:38 +0000 Subject: [PATCH 103/200] internal/simdgen: add test wrapper generation Thic CL generates CL 683015. Change-Id: Idc7e8656835942aaefdb670c0de98e07c2cde8e1 Reviewed-on: https://go-review.googlesource.com/c/arch/+/682995 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdTypes.go | 162 ++++++++++++++++++++++++++++++ internal/simdgen/godefs.go | 1 + 2 files changed, 163 insertions(+) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index d5ba1267..f43cc268 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -177,6 +177,168 @@ func (x {{.Name}}) Or(y {{.Name}}) {{.Name}} {{end}} ` +const simdTestsWrapperTmpl = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. + +//go:build goexperiment.simd + +package simd_test + +import ( + "simd" + "testing" +) +{{end}} +{{define "op"}} +func test{{.OpShape}}(t *testing.T, {{.BaseArgDefList}}, want []{{.ResBaseType}}, which string) { + t.Helper() + var gotv simd.{{.ResVecType}} + got := make([]{{.ResBaseType}}, len(want)){{range $i, $a := .ArgVecTypes}} + vec{{$i}} := simd.Load{{$a}}Slice(v{{$i}}){{end}} + switch which { +{{range .Ops}}case "{{.}}": + gotv = vec0.{{.}}({{$.VecArgList}}){{$.OptionalMaskToInt}} +{{end}} + default: + t.Errorf("Unknown method: {{.Arg0VecType}}.%s", which) + } + gotv.StoreSlice(got) + for i := range len(want) { + if got[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} +{{end}} +` + +// writeSIMDTestsWrapper generates the test wrappers and writes it to simd_amd64_testwrappers.go +// within the specified directory. +func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { + t := templateOf(simdTestsWrapperTmpl, "simdTestWrappers") + buffer := new(bytes.Buffer) + + if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil { + panic(fmt.Errorf("failed to execute fileHeader template: %w", err)) + } + + // The comment shows an example of Uint8x64.Add + type opData struct { + OpShape string // "Uint8x64Uint8x64Uint8x64" + BaseArgDefList string // "v0 uint8[], v1 uint8[]" + VecArgList string // "vec1" + ResBaseType string // "uint8" + ResVecType string // "Uint8x64" + Arg0VecType string // "Uint8x64" + ArgVecTypes []string // ["Uint8x64", "Uint8x64"] + OptionalMaskToInt string // ".AsInt8x64()" or "" + Ops []string // ["Add", "Sub"] + } + + opsByShape := make(map[string]opData) + + for _, o := range ops { + _, _, _, immType, _, _, gOp, err := o.shape() + if err != nil { + panic(err) + } + if immType == VarImm || immType == ConstVarImm { + // Operations with variable immediates should be called directly + // instead of through wrappers. + continue + } + var shape string + var baseArgDefList []string + var vecArgList []string + var argVecTypes []string + var vec string + allSameVec := true + masked := strings.HasPrefix(gOp.Go, "Masked") + skippedMaskCnt := 0 + vecCnt := 0 + for i, in := range gOp.In { + baseArgDefList = append(baseArgDefList, fmt.Sprintf("v%d []%s%d", i, *in.Base, *in.ElemBits)) + if i != 0 { + maskConversion := "" + if in.Class == "mask" { + maskConversion = fmt.Sprintf(".As%s()", *in.Go) + } + vecArgList = append(vecArgList, fmt.Sprintf("vec%d%s", i, maskConversion)) + } + // gOp will only have either mask or vreg operand, so the following check + // is sufficient to detect whether it's a pure vreg or masked pure vreg operation + // with all the same vectors. + if in.Class == "mask" { + if masked && skippedMaskCnt == 0 { + skippedMaskCnt++ + } else { + allSameVec = false + } + } else { + if len(vec) > 0 { + if vec != *in.Go { + allSameVec = false + } + } + vecCnt++ + vec = *in.Go + } + shape += *in.Go + argVecTypes = append(argVecTypes, strings.ReplaceAll(*in.Go, "Mask", "Int")) + } + if *gOp.Out[0].Go != vec { + allSameVec = false + } + shape += *gOp.Out[0].Go + if allSameVec { + numToName := map[int]string{1: "Unary", 2: "Binary", 3: "Ternary"} + if _, ok := numToName[vecCnt]; !ok { + panic(fmt.Errorf("unknown shape: %s", shape)) + } + shape = vec + numToName[vecCnt] + if masked { + shape = "Masked" + shape + } + } + optionalMaskToInt := "" + if gOp.Out[0].Class == "mask" { + optionalMaskToInt = fmt.Sprintf(".As%s()", strings.ReplaceAll(*gOp.Out[0].Go, "Mask", "Int")) + } + if _, ok := opsByShape[shape]; !ok { + opsByShape[shape] = opData{ + OpShape: shape, + BaseArgDefList: strings.Join(baseArgDefList, ", "), + VecArgList: strings.Join(vecArgList, ", "), + ResBaseType: fmt.Sprintf("%s%d", *gOp.Out[0].Base, *gOp.Out[0].ElemBits), + ResVecType: strings.ReplaceAll(*gOp.Out[0].Go, "Mask", "Int"), + Arg0VecType: *gOp.In[0].Go, + ArgVecTypes: argVecTypes, + OptionalMaskToInt: optionalMaskToInt, + } + } + data := opsByShape[shape] + data.Ops = append(data.Ops, gOp.Go) + opsByShape[shape] = data + } + + compareOpData := func(x, y opData) int { + return strings.Compare(x.OpShape, y.OpShape) + } + data := make([]opData, 0) + for _, d := range opsByShape { + slices.SortFunc(d.Ops, strings.Compare) + data = append(data, d) + } + slices.SortFunc(data, compareOpData) + + for _, d := range data { + if err := t.ExecuteTemplate(buffer, "op", d); err != nil { + panic(fmt.Errorf("failed to execute op template for op shape %s: %w", d.OpShape, err)) + } + } + + return buffer +} + // parseSIMDTypes groups go simd types by their vector sizes, and // returns a map whose key is the vector size, value is the simd type. func parseSIMDTypes(ops []Operation) simdTypeMap { diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index d9d0c20f..15cce7dd 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -187,6 +187,7 @@ func writeGoDefs(path string, cl unify.Closure) error { formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go") formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/stubs_amd64.go") + formatWriteAndClose(writeSIMDTestsWrapper(deduped), path, "src/"+simdPackage+"/simd_wrapped_test.go") formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go") formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go") formatWriteAndClose(writeSIMDMachineOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go") From 9468e52a75e4ade23b53e06a15aa72b8b97b52d5 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 23 Jun 2025 19:17:39 +0000 Subject: [PATCH 104/200] internal/simdgen: change test wrapper names Makes ($vector [, $vector]*) => $mask operations to be under wrapper test$(vector)(Unary|Binary|Ternary)Compare. Change-Id: I2194053d54f38e7f55b2822ced7fc7702b34f54f Reviewed-on: https://go-review.googlesource.com/c/arch/+/683455 Auto-Submit: Junyang Shao Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdTypes.go | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index f43cc268..01c5b503 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -251,6 +251,7 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { var vecArgList []string var argVecTypes []string var vec string + var vecOp Operand allSameVec := true masked := strings.HasPrefix(gOp.Go, "Masked") skippedMaskCnt := 0 @@ -281,12 +282,24 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { } vecCnt++ vec = *in.Go + vecOp = in } shape += *in.Go argVecTypes = append(argVecTypes, strings.ReplaceAll(*in.Go, "Mask", "Int")) } - if *gOp.Out[0].Go != vec { - allSameVec = false + isCompare := false + isWiden := false + outOp := gOp.Out[0] + if *outOp.Go != vec { + if allSameVec && outOp.Class == "mask" && *outOp.Bits == *vecOp.Bits && *outOp.Lanes == *vecOp.Lanes { + isCompare = true + } + if allSameVec && outOp.Class == "vreg" && *outOp.Bits == *vecOp.Bits && *outOp.Base == *vecOp.Base && *outOp.Lanes == *vecOp.Lanes/2 { + isWiden = true + } + if !isCompare && !isWiden { + allSameVec = false + } } shape += *gOp.Out[0].Go if allSameVec { @@ -296,7 +309,17 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { } shape = vec + numToName[vecCnt] if masked { - shape = "Masked" + shape + shape += "Masked" + } + if isCompare { + if vecCnt == 2 { + // Remove "Binary" + shape = strings.ReplaceAll(shape, "Binary", "") + } + shape += "Compare" + } + if isWiden { + shape += "Widen" } } optionalMaskToInt := "" From d0b1dcac0887bbb854c914107d92d9b37b46c94a Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 23 Jun 2025 15:50:46 -0400 Subject: [PATCH 105/200] internal/simdgen: corrected type size confusion mistakes were made through the magic of unification. This seems to set the scalar type sizes in a way that works better. Before, weirdly, 32-bit scalars were claimed to have only 8 bits of width. This seems to make that right. Change-Id: Ia89261e80e1529c757e6e26ca337523f76244a18 Reviewed-on: https://go-review.googlesource.com/c/arch/+/683495 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdTypes.go | 2 +- internal/simdgen/ops/Moves/categories.yaml | 2 +- internal/simdgen/types.yaml | 20 ++++++++++---------- internal/simdgen/xed.go | 2 ++ 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 01c5b503..bf238f58 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -369,7 +369,7 @@ func parseSIMDTypes(ops []Operation) simdTypeMap { ret := map[int][]simdType{} seen := map[string]struct{}{} processArg := func(arg Operand) { - if arg.Class == "immediate" { + if arg.Class == "immediate" || arg.Class == "greg" { // Immediates are not encoded as vector types. return } diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index 26a1aa7d..9cf443a2 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -3,4 +3,4 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // SetElem sets a single constant-indexed element's value + // SetElem sets a single constant-indexed element's value. diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml index 765ae2e0..5178a216 100644 --- a/internal/simdgen/types.yaml +++ b/internal/simdgen/types.yaml @@ -50,16 +50,16 @@ in: !repeat - {class: mask, go: Mask32x16, base: "int", elemBits: 32, bits: 512, lanes: 16} - {class: mask, go: Mask64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} - - {class: greg, go: float64, base: "float", elemBits: 64, bits: 64, lanes: 1} - - {class: greg, go: float32, base: "float", elemBits: 32, bits: 32, lanes: 1} - - {class: greg, go: int64, base: "int", elemBits: 64, bits: 64, lanes: 1} - - {class: greg, go: int32, base: "int", elemBits: 32, bits: 32, lanes: 1} - - {class: greg, go: int16, base: "int", elemBits: 16, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction - - {class: greg, go: int8, base: "int", elemBits: 8, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction - - {class: greg, go: uint64, base: "uint", elemBits: 64, bits: 64, lanes: 1} - - {class: greg, go: uint32, base: "uint", elemBits: 32, bits: 32, lanes: 1} - - {class: greg, go: uint16, base: "uint", elemBits: 16, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction - - {class: greg, go: uint8, base: "uint", elemBits: 8, bits: 32, lanes: 1} # bits: 32 is from XED for at least one instruction + - {class: greg, go: float64, base: "float", bits: 64, lanes: 1} + - {class: greg, go: float32, base: "float", bits: 32, lanes: 1} + - {class: greg, go: int64, base: "int", bits: 64, lanes: 1} + - {class: greg, go: int32, base: "int", bits: 32, lanes: 1} + - {class: greg, go: int16, base: "int", bits: 16, lanes: 1} + - {class: greg, go: int8, base: "int", bits: 8, lanes: 1} + - {class: greg, go: uint64, base: "uint", bits: 64, lanes: 1} + - {class: greg, go: uint32, base: "uint", bits: 32, lanes: 1} + - {class: greg, go: uint16, base: "uint", bits: 16, lanes: 1} + - {class: greg, go: uint8, base: "uint", bits: 8, lanes: 1} - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now. out: !repeat diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index e46e1be4..1c26e1d1 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -256,6 +256,8 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) { }, nil } // general register + m := min(shape.bits, shape.elemBits) + shape.bits, shape.elemBits = m, m return operandGReg{ operandCommon: common, vecShape: shape, From 38b7fb13370c8364f7d3350bff021e892c98c669 Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 24 Jun 2025 12:31:30 -0400 Subject: [PATCH 106/200] internal/simdgen: convert return-error to panics. Panics are more helpful for debugging, and this is not end-user code. Change-Id: I965acf50f0e13b3e2b71ba509195df11e6b75e63 Reviewed-on: https://go-review.googlesource.com/c/arch/+/683855 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdGenericOps.go | 5 +- internal/simdgen/gen_simdMachineOps.go | 8 +-- internal/simdgen/gen_simdTypes.go | 8 +-- internal/simdgen/gen_simdrules.go | 6 +- internal/simdgen/gen_simdssa.go | 6 +- internal/simdgen/gen_utility.go | 83 +++++++++++++------------- 6 files changed, 52 insertions(+), 64 deletions(-) diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index 6f8b16b7..f34cf9a1 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -43,10 +43,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { } var opsData opData for _, op := range ops { - _, _, _, immType, _, _, gOp, err := op.shape() - if err != nil { - panic(err) - } + _, _, _, immType, _, _, gOp := op.shape() genericNames := gOp.Go + *gOp.In[0].Go gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative} if immType == VarImm || immType == ConstVarImm { diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index a5ab8f27..f1de3104 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -50,15 +50,15 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { opsData := make([]opData, 0) opsDataImm := make([]opData, 0) for _, op := range ops { - shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape() - if err != nil { - panic(err) - } + shapeIn, shapeOut, maskType, _, _, _, gOp := op.shape() + asm := gOp.Asm if maskType == OneMask { asm += "Masked" } + asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits) + // TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy // one here with a name suffix "Merging". The rewrite rules will need them. if _, ok := seen[asm]; ok { diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index bf238f58..b4ec0206 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -237,10 +237,8 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { opsByShape := make(map[string]opData) for _, o := range ops { - _, _, _, immType, _, _, gOp, err := o.shape() - if err != nil { - panic(err) - } + _, _, _, immType, _, _, gOp := o.shape() + if immType == VarImm || immType == ConstVarImm { // Operations with variable immediates should be called directly // instead of through wrappers. @@ -504,7 +502,7 @@ func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { fmt.Fprintf(buffer, "\n/* %s */\n", op.Go) } if err := t.ExecuteTemplate(buffer, s, op); err != nil { - panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err)) + panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err)) } } else { diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index 651ae382..00ef8568 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -66,10 +66,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { var allData []tplRuleData for _, opr := range ops { - opInShape, opOutShape, maskType, immType, _, _, gOp, err := opr.shape() - if err != nil { - panic(err) - } + opInShape, opOutShape, maskType, immType, _, _, gOp := opr.shape() + vregInCnt := len(gOp.In) asm := gOp.Asm if maskType == OneMask { diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index dc121507..2993f27a 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -86,10 +86,8 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { allUnseen := make(map[string][]Operation) for _, op := range ops { asm := op.Asm - shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape() - if err != nil { - panic(err) - } + shapeIn, shapeOut, maskType, _, _, _, gOp := op.shape() + if maskType == 2 { asm += "Masked" } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 5b3d2052..af7b984f 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -128,10 +128,9 @@ const ( // opNoConstImmMask is op with its inputs excluding the const imm and mask. // // This function does not modify op. -func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation, err error) { +func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation) { if len(op.Out) > 1 { - err = fmt.Errorf("simdgen only supports 1 output: %s", op) - return + panic(fmt.Errorf("simdgen only supports 1 output: %s", op)) } var outputReg int if len(op.Out) == 1 { @@ -141,15 +140,13 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm } else if op.Out[0].Class == "mask" { shapeOut = OneKmaskOut } else { - err = fmt.Errorf("simdgen only supports output of class vreg or mask: %s", op) - return + panic(fmt.Errorf("simdgen only supports output of class vreg or mask: %s", op)) } } else { shapeOut = NoOut // TODO: are these only Load/Stores? // We manually supported two Load and Store, are those enough? - err = fmt.Errorf("simdgen only supports 1 output: %s", op) - return + panic(fmt.Errorf("simdgen only supports 1 output: %s", op)) } hasImm := false maskCount := 0 @@ -160,31 +157,28 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm if shapeOut != OneVregOutAtIn && in.AsmPos == 0 && in.Class == "vreg" { shapeOut = OneVregOutAtIn } else { - err = fmt.Errorf("simdgen only support output and input sharing the same position case of \"the first input is vreg and the only output\": %s", op) - return + panic(fmt.Errorf("simdgen only support output and input sharing the same position case of \"the first input is vreg and the only output\": %s", op)) } } if in.Class == "immediate" { // A manual check on XED data found that AMD64 SIMD instructions at most // have 1 immediates. So we don't need to check this here. if *in.Bits != 8 { - err = fmt.Errorf("simdgen only supports immediates of 8 bits: %s", op) - return + panic(fmt.Errorf("simdgen only supports immediates of 8 bits: %s", op)) } hasImm = true } else if in.Class == "mask" { if in.Const != nil { if *in.Const == "K0" { if iConstMask != -1 { - err = fmt.Errorf("simdgen only supports one const mask in inputs: %s", op) - return + panic(fmt.Errorf("simdgen only supports one const mask in inputs: %s", op)) } iConstMask = i // Const mask should be invisible in ssa and prog, so we don't treat it as a mask. // More specifically in prog, it's optional: when missing the assembler will default it to K0). // TODO: verify the above assumption is safe. } else { - err = fmt.Errorf("simdgen only supports const mask K0 in inputs: %s", op) + panic(fmt.Errorf("simdgen only supports const mask K0 in inputs: %s", op)) } } else { maskCount++ @@ -218,8 +212,7 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm } else if op.In[0].ImmOffset != nil { immType = VarImm } else { - err = fmt.Errorf("simdgen requires imm to have at least one of ImmOffset or Const set: %s", op) - return + panic(fmt.Errorf("simdgen requires imm to have at least one of ImmOffset or Const set: %s", op)) } } else { immType = NoImm @@ -235,16 +228,13 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm } checkPureMask := func() bool { if hasImm { - err = fmt.Errorf("simdgen does not support immediates in pure mask operations: %s", op) - return true + panic(fmt.Errorf("simdgen does not support immediates in pure mask operations: %s", op)) } if iConstMask != -1 { - err = fmt.Errorf("simdgen does not support const mask in pure mask operations: %s", op) - return true + panic(fmt.Errorf("simdgen does not support const mask in pure mask operations: %s", op)) } if hasVreg { - err = fmt.Errorf("simdgen does not support more than 1 masks in non-pure mask operations: %s", op) - return true + panic(fmt.Errorf("simdgen does not support more than 1 masks in non-pure mask operations: %s", op)) } return false } @@ -275,7 +265,7 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm // regShape returns a string representation of the register shape. func (op *Operation) regShape() (string, error) { - _, _, _, _, _, _, gOp, _ := op.shape() + _, _, _, _, _, _, gOp := op.shape() var regInfo string var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int for _, in := range gOp.In { @@ -349,14 +339,27 @@ func (op Operation) ResultType() string { return fmt.Sprintf("types.TypeVec%d", *op.Out[0].Bits) } +// GoType returns the Go type returned by this operation (relative to the simd package), +// for example "int32" or "Int8x16". This is used in a template. +func (op Operation) GoType() string { + if op.Out[0].Class == "greg" { + if op.Go == "GetElem" { + at := 0 // proper value of at depends on whether immediate was stripped or not + if op.In[at].Class == "immediate" { + at++ + } + return fmt.Sprintf("%s%d", *op.Out[0].Base, *op.In[at].ElemBits) + } + panic(fmt.Errorf("Implement this case for %v", op)) + } + return *op.Out[0].Go +} + // classifyOp returns a classification string, modified operation, and perhaps error based // on the stub and intrinsic shape for the operation. // The classification string is in the regular expression set "op[1234](Imm8)?" func classifyOp(op Operation) (string, Operation, error) { - _, _, _, immType, _, opNoConstMask, gOp, err := op.shape() - if err != nil { - return "", op, err - } + _, _, _, immType, _, opNoConstMask, gOp := op.shape() if immType == VarImm || immType == ConstVarImm { switch len(opNoConstMask.In) { @@ -415,10 +418,8 @@ func splitMask(ops []Operation) ([]Operation, error) { if op.Masked == nil || *op.Masked != "true" { continue } - shapeIn, _, _, _, _, _, _, err := op.shape() - if err != nil { - return nil, err - } + shapeIn, _, _, _, _, _, _ := op.shape() + if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn { op2 := op op2.In = slices.Clone(op.In) @@ -447,10 +448,8 @@ func splitMask(ops []Operation) ([]Operation, error) { func dedupGodef(ops []Operation) ([]Operation, error) { seen := map[string][]Operation{} for _, op := range ops { - _, _, _, _, _, _, gOp, err := op.shape() - if err != nil { - return nil, err - } + _, _, _, _, _, _, gOp := op.shape() + genericNames := gOp.Go + *gOp.In[0].Go seen[genericNames] = append(seen[genericNames], op) } @@ -493,10 +492,8 @@ func copyConstImm(ops []Operation) error { if op.ConstImm == nil { continue } - _, _, _, immType, _, _, _, err := op.shape() - if err != nil { - return err - } + _, _, _, immType, _, _, _ := op.shape() + if immType == ConstImm || immType == ConstVarImm { op.In[0].Const = op.ConstImm } @@ -527,18 +524,18 @@ func overwrite(ops []Operation) error { overwrite := func(op []Operand, idx int, o Operation) error { if op[idx].OverwriteClass != nil { if op[idx].OverwriteBase == nil { - return fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx]) + panic(fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx])) } oBase := *op[idx].OverwriteBase oClass := *op[idx].OverwriteClass if oClass != "mask" { - return fmt.Errorf("simdgen: [Class] overwrite only supports overwritting to mask: %s", op[idx]) + panic(fmt.Errorf("simdgen: [Class] overwrite only supports overwritting to mask: %s", op[idx])) } if oBase != "int" { - return fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to int: %s", op[idx]) + panic(fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to int: %s", op[idx])) } if op[idx].Class != "vreg" { - return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx]) + panic(fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx])) } hasClassOverwrite = true *op[idx].Base = oBase From 9a776a7966febb277a303d90f42e8ef642c54207 Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 24 Jun 2025 15:10:04 -0400 Subject: [PATCH 107/200] internal/simdgen: mute the not-normally-useful errors from XED All they do is blow context off the screen Change-Id: Id1b4d9cd487f568a161d9353b3c04d5e6630a115 Reviewed-on: https://go-review.googlesource.com/c/arch/+/683857 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/godefs.go | 21 ++++++++++++++++----- internal/simdgen/main.go | 10 ++++++++++ internal/simdgen/xed.go | 7 ++++++- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 15cce7dd..8dc928f8 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -159,30 +159,41 @@ func writeGoDefs(path string, cl unify.Closure) error { // The parsed XED data might contain duplicates, like // 512 bits VPADDP. deduped := dedup(ops) - log.Printf("dedup len: %d\n", len(ops)) + + if *Verbose { + log.Printf("dedup len: %d\n", len(ops)) + } var err error if err = overwrite(deduped); err != nil { return err } - log.Printf("dedup len: %d\n", len(deduped)) + if *Verbose { + log.Printf("dedup len: %d\n", len(deduped)) + } if !*FlagNoSplitMask { if deduped, err = splitMask(deduped); err != nil { return err } } - log.Printf("dedup len: %d\n", len(deduped)) + if *Verbose { + log.Printf("dedup len: %d\n", len(deduped)) + } if !*FlagNoDedup { if deduped, err = dedupGodef(deduped); err != nil { return err } } - log.Printf("dedup len: %d\n", len(deduped)) + if *Verbose { + log.Printf("dedup len: %d\n", len(deduped)) + } if !*FlagNoConstImmPorting { if err = copyConstImm(deduped); err != nil { return err } } - log.Printf("dedup len: %d\n", len(deduped)) + if *Verbose { + log.Printf("dedup len: %d\n", len(deduped)) + } typeMap := parseSIMDTypes(deduped) formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go") diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index f1c9dc8b..db77d8c3 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -111,6 +111,8 @@ var ( FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand") FlagArch = flag.String("arch", "amd64", "the target architecture") + Verbose = flag.Bool("v", false, "verbose") + flagDebugXED = flag.Bool("debug-xed", false, "show XED instructions") flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace") flagDebugHTML = flag.String("debug-html", "", "write unification trace to `file.html`") @@ -201,6 +203,14 @@ func main() { } } + if !*Verbose { + if operandRemarks == 0 { + fmt.Printf("XED decoding generated no errors, which is unusual.\n") + } else { + fmt.Printf("XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks) + } + } + // Validate results. // // Don't validate if this is a command-line query because that tends to diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index 1c26e1d1..387db08a 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -22,6 +22,8 @@ const ( GREG_CLASS = 2 // classify as a general register ) +var operandRemarks int + // TODO: Doc. Returns Values with Def domains. func loadXED(xedPath string) []*unify.Value { // TODO: Obviously a bunch more to do here. @@ -49,7 +51,10 @@ func loadXED(xedPath string) []*unify.Value { ins, outs, err := decodeOperands(db, strings.Fields(inst.Operands)) if err != nil { - log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err) + operandRemarks++ + if *Verbose { + log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err) + } return } // TODO: "feature" From 85ea620b1467dd69facea30c096c269be8aaf76a Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 24 Jun 2025 14:51:19 -0400 Subject: [PATCH 108/200] internal/simdgen: cleanups, and prep for VPEXTR* Adding VPEXTR* requires a non-vector output, which required some changes. There's at least two more follow-on CLs, one that will pair with a dev.simd glue update, and then one to add VPEXTR* Change-Id: I06ed9eb8b74304e39e0dc1356d726bae35295c79 Reviewed-on: https://go-review.googlesource.com/c/arch/+/683856 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdIntrinsics.go | 16 +++++----- internal/simdgen/gen_simdMachineOps.go | 6 ++-- internal/simdgen/gen_simdTypes.go | 16 +++++----- internal/simdgen/gen_simdrules.go | 5 ++- internal/simdgen/gen_simdssa.go | 3 +- internal/simdgen/gen_utility.go | 43 ++++++++++++++++++++------ internal/simdgen/godefs.go | 15 +++++++++ 7 files changed, 75 insertions(+), 29 deletions(-) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 14a5d41a..3fea1568 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -25,21 +25,21 @@ const simdPackage = "` + simdPackage + `" func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { {{end}} -{{define "op1"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64) +{{define "op1"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op2"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64) +{{define "op2"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64) +{{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64) +{{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op4Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op4Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} {{define "vectorConversion"}} addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index f1de3104..4525ac85 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -46,7 +46,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { OpsDataImm []opData } seen := map[string]struct{}{} - regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k": true, "fp2kfp": true, "fp2kk": true, "fpkfp": true, "fp31": true, "fp3kfp": true, "fpgpfp": true} + regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k": true, "fp2kfp": true, "fp2kk": true, "fpkfp": true, "fp31": true, "fp3kfp": true, "fpgpfp": true, "fpgp": true} opsData := make([]opData, 0) opsDataImm := make([]opData, 0) for _, op := range ops { @@ -57,7 +57,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { asm += "Masked" } - asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits) + asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth()) // TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy // one here with a name suffix "Merging". The rewrite rules will need them. @@ -76,6 +76,8 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil { // If class overwrite is happening, that's not really a mask but a vreg. outType = fmt.Sprintf("Vec%d", *gOp.Out[0].Bits) + } else if shapeOut == OneGregOut { + outType = gOp.GoType() // this is a straight Go type, not a VecNNN type } else if shapeOut == OneKmaskOut { outType = "Mask" } else { diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index b4ec0206..c5e7d2fa 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -107,56 +107,56 @@ package simd {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}} +func (x {{(index .In 0).Go}}) {{.Go}}() {{.GoType}} {{end}} {{define "op2"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{.GoType}} {{end}} {{define "op3"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{.GoType}} {{end}} {{define "op4"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{.GoType}} {{end}} {{define "op1Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{.GoType}} {{end}} {{define "op2Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{.GoType}} {{end}} {{define "op3Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{.GoType}} {{end}} {{define "op4Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{.GoType}} {{end}} {{define "vectorConversion"}} diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index 00ef8568..ad260829 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -74,7 +74,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { asm += "Masked" vregInCnt-- } - asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits) + asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth()) data := tplRuleData{ GoOp: gOp.Go, @@ -126,6 +126,9 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { case PureKmaskIn: panic(fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")) } + } else if opOutShape == OneGregOut { + tplName = "pureVreg" // TODO this will be wrong + data.GoType = *gOp.In[0].Go } else { // OneKmaskOut case data.MaskOutConvert = fmt.Sprintf("VPMOVMToVec%dx%d", *gOp.Out[0].ElemBits, *gOp.In[0].Lanes) diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index 2993f27a..35a061bf 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -76,6 +76,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { "fp31ResultInArg0", "fp3kfpResultInArg0", "fpgpfpImm8", + "fpgpImm8", } regInfoSet := map[string][]string{} for _, key := range regInfoKeys { @@ -91,7 +92,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { if maskType == 2 { asm += "Masked" } - asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits) + asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth()) if _, ok := seen[asm]; ok { continue } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index af7b984f..83a3e982 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -72,6 +72,7 @@ const ( InvalidOut int = iota NoOut OneVregOut + OneGregOut OneKmaskOut OneVregOutAtIn ) @@ -137,6 +138,8 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm outputReg = op.Out[0].AsmPos if op.Out[0].Class == "vreg" { shapeOut = OneVregOut + } else if op.Out[0].Class == "greg" { + shapeOut = OneGregOut } else if op.Out[0].Class == "mask" { shapeOut = OneKmaskOut } else { @@ -335,7 +338,36 @@ func (op *Operation) sortOperand() { }) } -func (op Operation) ResultType() string { +// goNormalType returns the Go type name for the result of an Op that +// does not return a vector, i.e., that returns a result in a general +// register. Currently there's only one family of Ops in Go's simd library +// that does this (GetElem), and so this is specialized to work for that, +// but the problem (mismatch betwen hardware register width and Go type +// width) seems likely to recur if there are any other cases. +func (op Operation) goNormalType() string { + if op.Go == "GetElem" { + // GetElem returns an element of the vector into a general register + // but as far as the hardware is concerned, that result is either 32 + // or 64 bits wide, no matter what the vector element width is. + // This is not "wrong" but it is not the right answer for Go source code. + // To get the Go type right, combine the base type ("int", "uint", "float"), + // with the input vector element width in bits (8,16,32,64). + + at := 0 // proper value of at depends on whether immediate was stripped or not + if op.In[at].Class == "immediate" { + at++ + } + return fmt.Sprintf("%s%d", *op.Out[0].Base, *op.In[at].ElemBits) + } + panic(fmt.Errorf("Implement goNormalType for %v", op)) +} + +// SSAType returns the string for the type reference in SSA generation, +// for example in the intrinsics generating template. +func (op Operation) SSAType() string { + if op.Out[0].Class == "greg" { + return fmt.Sprintf("types.Types[types.T%s]", strings.ToUpper(op.goNormalType())) + } return fmt.Sprintf("types.TypeVec%d", *op.Out[0].Bits) } @@ -343,14 +375,7 @@ func (op Operation) ResultType() string { // for example "int32" or "Int8x16". This is used in a template. func (op Operation) GoType() string { if op.Out[0].Class == "greg" { - if op.Go == "GetElem" { - at := 0 // proper value of at depends on whether immediate was stripped or not - if op.In[at].Class == "immediate" { - at++ - } - return fmt.Sprintf("%s%d", *op.Out[0].Base, *op.In[at].ElemBits) - } - panic(fmt.Errorf("Implement this case for %v", op)) + return op.goNormalType() } return *op.Out[0].Go } diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 8dc928f8..6b30dee2 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -5,6 +5,7 @@ package main import ( + "fmt" "log" "slices" "strings" @@ -32,6 +33,20 @@ type Operation struct { Masked *string } +func (o *Operation) VectorWidth() int { + out := o.Out[0] + if out.Class == "vreg" { + return *out.Bits + } else if out.Class == "greg" || out.Class == "mask" { + for i := range o.In { + if o.In[i].Class == "vreg" { + return *o.In[i].Bits + } + } + } + panic(fmt.Errorf("Figure out what the vector width is for %v and implement it", *o)) +} + func compareStringPointers(x, y *string) int { if x != nil && y != nil { return strings.Compare(*x, *y) From 8c668448498d5d2a28619f706c0f29e3e4ecff54 Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 24 Jun 2025 16:21:58 -0400 Subject: [PATCH 109/200] internal/simdgen: changes to generated code, for VPEXTR These changes generate dev.simd CL 683816 which should be submitted after this CL. Change-Id: I9e26bc8aec74199d8e5dea9aca2520d455818a46 Reviewed-on: https://go-review.googlesource.com/c/arch/+/683858 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdMachineOps.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 4525ac85..f879791d 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -13,7 +13,7 @@ import ( const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main -func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp regInfo) []opData { +func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, fpgp regInfo) []opData { return []opData{ {{- range .OpsData }} {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, From 7f641766c6c3fd78bfe5b218703d96afa7f32ebf Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 23 Jun 2025 19:33:46 +0000 Subject: [PATCH 110/200] internal/simdgen: add shift and rotate operations Change-Id: Id593b325b4585010488e1cadc91c7f14637bc4cd Reviewed-on: https://go-review.googlesource.com/c/arch/+/683475 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 141 ++++++++++++ internal/simdgen/gen_simdTypes.go | 57 ++++- internal/simdgen/gen_simdssa.go | 16 ++ internal/simdgen/gen_utility.go | 78 +++++++ internal/simdgen/go.yaml | 208 +++++++++++++++++ internal/simdgen/godefs.go | 6 + .../simdgen/ops/ShiftRotate/categories.yaml | 142 ++++++++++++ internal/simdgen/ops/ShiftRotate/go.yaml | 209 ++++++++++++++++++ 8 files changed, 856 insertions(+), 1 deletion(-) create mode 100644 internal/simdgen/ops/ShiftRotate/categories.yaml create mode 100644 internal/simdgen/ops/ShiftRotate/go.yaml diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index d8081bc4..b349fc51 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -673,3 +673,144 @@ // MaskedMulLow multiplies elements and stores the low part of the result, masked. docUnmasked: !string |- // MulLow multiplies elements and stores the low part of the result. +- go: ShiftAllLeft + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +- go: MaskedShiftAllLeft + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +- go: ShiftAllRight + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +- go: MaskedShiftAllRight + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +- go: ShiftAllRightSignExtended + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +- go: MaskedShiftAllRightSignExtended + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. + +- go: ShiftLeft + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +- go: MaskedShiftLeft + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +- go: ShiftRight + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +- go: MaskedShiftRight + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +- go: ShiftRightSignExtended + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +- go: MaskedShiftRightSignExtended + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. + +- go: MaskedRotateAllLeft + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedRotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +- go: MaskedRotateLeft + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedRotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +- go: MaskedRotateAllRight + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedRotateAllRight rotates each element to the right by the number of bits specified by the immediate. +- go: MaskedRotateRight + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedRotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. + +- go: MaskedShiftAllLeftAndFillUpperFrom + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the + // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +- go: MaskedShiftAllRightAndFillUpperFrom + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the + // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +- go: MaskedShiftLeftAndFillUpperFrom + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the + // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +- go: MaskedShiftRightAndFillUpperFrom + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the + // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index c5e7d2fa..864e0b7f 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -124,6 +124,20 @@ func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{.GoType}} func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{.GoType}} {{end}} +{{define "op2VecAsScalar"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}} +{{end}} + +{{define "op3VecAsScalar"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}} +{{end}} + {{define "op4"}} {{if .Documentation}}{{.Documentation}} //{{end}} @@ -209,6 +223,11 @@ func test{{.OpShape}}(t *testing.T, {{.BaseArgDefList}}, want []{{.ResBaseType}} } } {{end}} +{{define "untestedOpHeader"}} +/* The operations below cannot be tested via wrappers, please test them directly */ +{{end}} +{{define "untestedOp"}} +// {{.}}{{end}} ` // writeSIMDTestsWrapper generates the test wrappers and writes it to simd_amd64_testwrappers.go @@ -235,15 +254,24 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { } opsByShape := make(map[string]opData) - + opsSkipped := map[string]struct{}{} for _, o := range ops { _, _, _, immType, _, _, gOp := o.shape() if immType == VarImm || immType == ConstVarImm { // Operations with variable immediates should be called directly // instead of through wrappers. + opsSkipped[o.Go] = struct{}{} continue } + if vasIdx, err := checkVecAsScalar(o); err != nil { + panic(err) + } else if vasIdx != -1 { + // TODO: these could be tested via wrappers, implement this. + opsSkipped[o.Go] = struct{}{} + continue + } + var shape string var baseArgDefList []string var vecArgList []string @@ -357,6 +385,22 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { } } + if len(opsSkipped) != 0 { + if err := t.ExecuteTemplate(buffer, "untestedOpHeader", nil); err != nil { + panic(fmt.Errorf("failed to execute untestedOpHeader")) + } + opsK := []string{} + for k := range opsSkipped { + opsK = append(opsK, k) + } + slices.SortFunc(opsK, strings.Compare) + for _, k := range opsK { + if err := t.ExecuteTemplate(buffer, "untestedOp", k); err != nil { + panic(fmt.Errorf("failed to execute untestedOp")) + } + } + } + return buffer } @@ -497,7 +541,18 @@ func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { slices.SortFunc(ops, compareOperations) for i, op := range ops { + idxVecAsScalar, err := checkVecAsScalar(op) + if err != nil { + panic(err) + } if s, op, err := classifyOp(op); err == nil { + if idxVecAsScalar != -1 { + if s == "op2" || s == "op3" { + s += "VecAsScalar" + } else { + panic(fmt.Errorf("simdgen only supports op2 or op3 with TreatLikeAScalarOfSize")) + } + } if i == 0 || op.Go != ops[i-1].Go { fmt.Fprintf(buffer, "\n/* %s */\n", op.Go) } diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index 35a061bf..ffb172a6 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -75,8 +75,11 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { "fp2kkImm8", "fp31ResultInArg0", "fp3kfpResultInArg0", + "fpXfp", + "fpXkfp", "fpgpfpImm8", "fpgpImm8", + "fp2kfpImm8", } regInfoSet := map[string][]string{} for _, key := range regInfoKeys { @@ -113,6 +116,19 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { regShape += "Imm8" } + idx, err := checkVecAsScalar(op) + if err != nil { + panic(err) + } + if idx != -1 { + if regShape == "fp21" { + regShape = "fpXfp" + } else if regShape == "fp2kfp" { + regShape = "fpXkfp" + } else { + panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regShape, op)) + } + } if _, ok := regInfoSet[regShape]; !ok { allUnseen[regShape] = append(allUnseen[regShape], op) } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 83a3e982..cba608e0 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -417,6 +417,33 @@ func classifyOp(op Operation) (string, Operation, error) { } } +func checkVecAsScalar(op Operation) (idx int, err error) { + idx = -1 + sSize := 0 + for i, o := range op.In { + if o.TreatLikeAScalarOfSize != nil { + if idx == -1 { + idx = i + sSize = *o.TreatLikeAScalarOfSize + } else { + err = fmt.Errorf("simdgen only supports one TreatLikeAScalarOfSize in the arg list: %s", op) + return + } + } + } + if idx >= 0 { + if idx != 1 { + err = fmt.Errorf("simdgen only supports TreatLikeAScalarOfSize at the 2nd arg of the arg list: %s", op) + return + } + if sSize != 8 && sSize != 16 && sSize != 32 && sSize != 64 { + err = fmt.Errorf("simdgen does not recognize this uint size: %d, %s", sSize, op) + return + } + } + return +} + // dedup is deduping operations in the full structure level. func dedup(ops []Operation) (deduped []Operation) { for _, op := range ops { @@ -607,6 +634,51 @@ func overwrite(ops []Operation) error { return nil } +// reportXEDInconsistency reports potential XED inconsistencies. +// We can add more fields to [Operation] to enable more checks and implement it here. +// Supported checks: +// [NameAndSizeCheck]: NAME[BWDQ] should set the elemBits accordingly. +// This check is useful to find inconsistencies, then we can add overwrite fields to +// those defs to correct them manually. +func reportXEDInconsistency(ops []Operation) error { + for _, o := range ops { + if o.NameAndSizeCheck != nil { + suffixSizeMap := map[byte]int{'B': 8, 'W': 16, 'D': 32, 'Q': 64} + checkOperand := func(opr Operand) error { + if opr.ElemBits == nil { + return fmt.Errorf("simdgen expects elemBits to be set when performing NameAndSizeCheck") + } + if v, ok := suffixSizeMap[o.Asm[len(o.Asm)-1]]; !ok { + return fmt.Errorf("simdgen expects asm to end with [BWDQ] when performing NameAndSizeCheck") + } else { + if v != *opr.ElemBits { + return fmt.Errorf("simdgen finds NameAndSizeCheck inconsistency in def: %s", o) + } + } + return nil + } + for _, in := range o.In { + if in.Class != "vreg" && in.Class != "mask" { + continue + } + if in.TreatLikeAScalarOfSize != nil { + // This is an irregular operand, don't check it. + continue + } + if err := checkOperand(in); err != nil { + return err + } + } + for _, out := range o.Out { + if err := checkOperand(out); err != nil { + return err + } + } + } + } + return nil +} + func (o Operation) String() string { var sb strings.Builder sb.WriteString("Operation {\n") @@ -719,6 +791,12 @@ func (op Operand) String() string { sb.WriteString(" OverwriteElementBits: \n") } + if op.TreatLikeAScalarOfSize != nil { + sb.WriteString(fmt.Sprintf(" TreatLikeAScalarOfSize: %d\n", *op.TreatLikeAScalarOfSize)) + } else { + sb.WriteString(" TreatLikeAScalarOfSize: \n") + } + sb.WriteString(" }\n") return sb.String() } diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 572f02ed..52fef3b7 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -862,3 +862,211 @@ - *int out: - *int2 +# Integers +# ShiftAll* +- go: ShiftAllLeft + asm: "VPSLL[WDQ]" + in: + - &any + go: $t + - &vecAsScalar64 + treatLikeAScalarOfSize: 64 + go: Uint64x2 + out: + - *any +- go: MaskedShiftAllLeft + asm: "VPSLL[WDQ]" + in: + - class: mask + - *any + - *vecAsScalar64 + out: + - *any +- go: ShiftAllRight + asm: "VPSRL[WDQ]" + in: + - *any + - *vecAsScalar64 + out: + - *any +- go: MaskedShiftAllRight + asm: "VPSRL[WDQ]" + in: + - class: mask + - *any + - *vecAsScalar64 + out: + - *any +- go: ShiftAllRightSignExtended + asm: "VPSRA[WDQ]" + in: + - &int + go: $t + base: int + - *vecAsScalar64 + out: + - *int +- go: MaskedShiftAllRightSignExtended + asm: "VPSRA[WDQ]" + in: + - class: mask + - *int + - *vecAsScalar64 + out: + - *int + +# Shift* (variable) +- go: ShiftLeft + asm: "VPSLLV[WD]" + in: + - *any + - *any + out: + - *any +- go: MaskedShiftLeft + asm: "VPSLLV[WD]" + in: + - class: mask + - *any + - *any + out: + - *any +# XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite +# it to 64. +- go: ShiftLeft + asm: "VPSLLVQ" + in: + - &anyOverwriteElemBits + go: $t + overwriteElementBits: 64 + - *anyOverwriteElemBits + out: + - *anyOverwriteElemBits +- go: MaskedShiftLeft + asm: "VPSLLVQ" + in: + - class: mask + - *anyOverwriteElemBits + - *anyOverwriteElemBits + out: + - *anyOverwriteElemBits +- go: ShiftRight + asm: "VPSRLV[WD]" + in: + - *any + - *any + out: + - *any +- go: MaskedShiftRight + asm: "VPSRLV[WD]" + in: + - class: mask + - *any + - *any + out: + - *any +# XED data of VPSRLVQ needs the same overwrite as VPSLLVQ. +- go: ShiftRight + asm: "VPSRLVQ" + in: + - *anyOverwriteElemBits + - *anyOverwriteElemBits + out: + - *anyOverwriteElemBits +- go: MaskedShiftRight + asm: "VPSRLVQ" + in: + - class: mask + - *anyOverwriteElemBits + - *anyOverwriteElemBits + out: + - *anyOverwriteElemBits +- go: ShiftRightSignExtended + asm: "VPSRAV[WDQ]" + in: + - *any + - *any + out: + - *any +- go: MaskedShiftRightSignExtended + asm: "VPSRAV[WDQ]" + in: + - class: mask + - *any + - *any + out: + - *any + +# Rotate +- go: MaskedRotateAllLeft + asm: "VPROL[DQ]" + in: + - class: mask + - *any + - &pureImm + class: immediate + immOffset: 0 + out: + - *any +- go: MaskedRotateAllRight + asm: "VPROR[DQ]" + in: + - class: mask + - *any + - *pureImm + out: + - *any +- go: MaskedRotateLeft + asm: "VPROLV[DQ]" + in: + - class: mask + - *any + - *any + out: + - *any +- go: MaskedRotateRight + asm: "VPRORV[DQ]" + in: + - class: mask + - *any + - *any + out: + - *any + +# Bizzare shifts. +- go: MaskedShiftAllLeftAndFillUpperFrom + asm: "VPSHLD[WDQ]" + in: + - class: mask + - *any + - *any + - *pureImm + out: + - *any +- go: MaskedShiftAllRightAndFillUpperFrom + asm: "VPSHRD[WDQ]" + in: + - class: mask + - *any + - *any + - *pureImm + out: + - *any +- go: MaskedShiftLeftAndFillUpperFrom + asm: "VPSHLDV[WDQ]" + in: + - *any + - class: mask + - *any + - *any + out: + - *any +- go: MaskedShiftRightAndFillUpperFrom + asm: "VPSHRDV[WDQ]" + in: + - *any + - class: mask + - *any + - *any + out: + - *any diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 6b30dee2..1dcd48ec 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -31,6 +31,8 @@ type Operation struct { // Masked indicates that this is a masked operation, this field has to be set for masked operations // otherwise simdgen won't recognize it in [splitMask]. Masked *string + // NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits. + NameAndSizeCheck *string } func (o *Operation) VectorWidth() int { @@ -140,6 +142,10 @@ type Operand struct { // field of the operation. ImmOffset *string Lanes *int // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1 + // TreatLikeAScalarOfSize means only the lower $TreatLikeAScalarOfSize bits of the vector + // is used, so at the API level we can make it just a scalar value of this size; Then we + // can overwrite it to a vector of the right size during intrinsics stage. + TreatLikeAScalarOfSize *int // If non-nil, it means the [Class] field is overwritten here, right now this is used to // overwrite the results of AVX2 compares to masks. OverwriteClass *string diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml new file mode 100644 index 00000000..91a0e3d0 --- /dev/null +++ b/internal/simdgen/ops/ShiftRotate/categories.yaml @@ -0,0 +1,142 @@ +!sum +- go: ShiftAllLeft + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +- go: MaskedShiftAllLeft + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +- go: ShiftAllRight + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +- go: MaskedShiftAllRight + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +- go: ShiftAllRightSignExtended + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +- go: MaskedShiftAllRightSignExtended + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. + +- go: ShiftLeft + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +- go: MaskedShiftLeft + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +- go: ShiftRight + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +- go: MaskedShiftRight + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +- go: ShiftRightSignExtended + nameAndSizeCheck: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +- go: MaskedShiftRightSignExtended + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. + +- go: MaskedRotateAllLeft + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedRotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +- go: MaskedRotateLeft + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedRotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +- go: MaskedRotateAllRight + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedRotateAllRight rotates each element to the right by the number of bits specified by the immediate. +- go: MaskedRotateRight + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedRotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. + +- go: MaskedShiftAllLeftAndFillUpperFrom + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the + // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +- go: MaskedShiftAllRightAndFillUpperFrom + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the + // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +- go: MaskedShiftLeftAndFillUpperFrom + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the + // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +- go: MaskedShiftRightAndFillUpperFrom + nameAndSizeCheck: "true" + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // MaskedShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the + // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml new file mode 100644 index 00000000..7205bab3 --- /dev/null +++ b/internal/simdgen/ops/ShiftRotate/go.yaml @@ -0,0 +1,209 @@ +!sum +# Integers +# ShiftAll* +- go: ShiftAllLeft + asm: "VPSLL[WDQ]" + in: + - &any + go: $t + - &vecAsScalar64 + treatLikeAScalarOfSize: 64 + go: Uint64x2 + out: + - *any +- go: MaskedShiftAllLeft + asm: "VPSLL[WDQ]" + in: + - class: mask + - *any + - *vecAsScalar64 + out: + - *any +- go: ShiftAllRight + asm: "VPSRL[WDQ]" + in: + - *any + - *vecAsScalar64 + out: + - *any +- go: MaskedShiftAllRight + asm: "VPSRL[WDQ]" + in: + - class: mask + - *any + - *vecAsScalar64 + out: + - *any +- go: ShiftAllRightSignExtended + asm: "VPSRA[WDQ]" + in: + - &int + go: $t + base: int + - *vecAsScalar64 + out: + - *int +- go: MaskedShiftAllRightSignExtended + asm: "VPSRA[WDQ]" + in: + - class: mask + - *int + - *vecAsScalar64 + out: + - *int + +# Shift* (variable) +- go: ShiftLeft + asm: "VPSLLV[WD]" + in: + - *any + - *any + out: + - *any +- go: MaskedShiftLeft + asm: "VPSLLV[WD]" + in: + - class: mask + - *any + - *any + out: + - *any +# XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite +# it to 64. +- go: ShiftLeft + asm: "VPSLLVQ" + in: + - &anyOverwriteElemBits + go: $t + overwriteElementBits: 64 + - *anyOverwriteElemBits + out: + - *anyOverwriteElemBits +- go: MaskedShiftLeft + asm: "VPSLLVQ" + in: + - class: mask + - *anyOverwriteElemBits + - *anyOverwriteElemBits + out: + - *anyOverwriteElemBits +- go: ShiftRight + asm: "VPSRLV[WD]" + in: + - *any + - *any + out: + - *any +- go: MaskedShiftRight + asm: "VPSRLV[WD]" + in: + - class: mask + - *any + - *any + out: + - *any +# XED data of VPSRLVQ needs the same overwrite as VPSLLVQ. +- go: ShiftRight + asm: "VPSRLVQ" + in: + - *anyOverwriteElemBits + - *anyOverwriteElemBits + out: + - *anyOverwriteElemBits +- go: MaskedShiftRight + asm: "VPSRLVQ" + in: + - class: mask + - *anyOverwriteElemBits + - *anyOverwriteElemBits + out: + - *anyOverwriteElemBits +- go: ShiftRightSignExtended + asm: "VPSRAV[WDQ]" + in: + - *any + - *any + out: + - *any +- go: MaskedShiftRightSignExtended + asm: "VPSRAV[WDQ]" + in: + - class: mask + - *any + - *any + out: + - *any + +# Rotate +- go: MaskedRotateAllLeft + asm: "VPROL[DQ]" + in: + - class: mask + - *any + - &pureImm + class: immediate + immOffset: 0 + out: + - *any +- go: MaskedRotateAllRight + asm: "VPROR[DQ]" + in: + - class: mask + - *any + - *pureImm + out: + - *any +- go: MaskedRotateLeft + asm: "VPROLV[DQ]" + in: + - class: mask + - *any + - *any + out: + - *any +- go: MaskedRotateRight + asm: "VPRORV[DQ]" + in: + - class: mask + - *any + - *any + out: + - *any + +# Bizzare shifts. +- go: MaskedShiftAllLeftAndFillUpperFrom + asm: "VPSHLD[WDQ]" + in: + - class: mask + - *any + - *any + - *pureImm + out: + - *any +- go: MaskedShiftAllRightAndFillUpperFrom + asm: "VPSHRD[WDQ]" + in: + - class: mask + - *any + - *any + - *pureImm + out: + - *any +- go: MaskedShiftLeftAndFillUpperFrom + asm: "VPSHLDV[WDQ]" + in: + - *any + - class: mask + - *any + - *any + out: + - *any +- go: MaskedShiftRightAndFillUpperFrom + asm: "VPSHRDV[WDQ]" + in: + - *any + - class: mask + - *any + - *any + out: + - *any \ No newline at end of file From 11b9f365aeeaca3d5de7d4c6b56eba2d2e35f36b Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 26 Jun 2025 04:07:24 +0000 Subject: [PATCH 111/200] internal/simdgen: add galois field instructions This CL generates CL 684175. Change-Id: I1b327fd1d3d3aa15cd23523371f186ceef37db76 Reviewed-on: https://go-review.googlesource.com/c/arch/+/684155 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 26 ++++++++++++++++ internal/simdgen/go.yaml | 30 ++++++++++++++++++ .../simdgen/ops/GaloisField/categories.yaml | 27 ++++++++++++++++ internal/simdgen/ops/GaloisField/go.yaml | 31 +++++++++++++++++++ 4 files changed, 114 insertions(+) create mode 100644 internal/simdgen/ops/GaloisField/categories.yaml create mode 100644 internal/simdgen/ops/GaloisField/go.yaml diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index b349fc51..4b21d5a6 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -455,6 +455,32 @@ extension: "AVX.*" documentation: !string |- // AddSub subtracts even elements and adds odd elements of two vectors. +- go: MaskedGaloisFieldAffineTransform + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): + // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; + // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y + // corresponding to a group of 8 elements in x. +- go: MaskedGaloisFieldAffineTransformInversed + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), + // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: + // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; + // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y + // corresponding to a group of 8 elements in x. +- go: MaskedGaloisFieldMul + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // GaloisFieldMul computes element-wise GF(2^8) multiplication with + // reduction polynomial x^8 + x^4 + x^3 + x + 1. - go: Average commutative: "true" extension: "AVX.*" # VPAVGB/W are available across various AVX versions diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 52fef3b7..2d1038da 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -431,6 +431,36 @@ - *fp out: - *fp +- go: MaskedGaloisFieldAffineTransform + asm: VGF2P8AFFINEQB + in: &AffineArgs + - class: mask + - &uint8 + go: $t + base: uint + - &uint8x8 + go: $t2 + base: uint + - &pureImmVar + class: immediate + immOffset: 0 + out: + - *uint8 + +- go: MaskedGaloisFieldAffineTransformInversed + asm: VGF2P8AFFINEINVQB + in: *AffineArgs + out: + - *uint8 + +- go: MaskedGaloisFieldMul + asm: VGF2P8MULB + in: + - class: mask + - *uint8 + - *uint8 + out: + - *uint8 # Average (unsigned byte, unsigned word) # Instructions: VPAVGB, VPAVGW - go: Average diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml new file mode 100644 index 00000000..915d3ec1 --- /dev/null +++ b/internal/simdgen/ops/GaloisField/categories.yaml @@ -0,0 +1,27 @@ +!sum +- go: MaskedGaloisFieldAffineTransform + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): + // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; + // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y + // corresponding to a group of 8 elements in x. +- go: MaskedGaloisFieldAffineTransformInversed + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), + // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: + // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; + // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y + // corresponding to a group of 8 elements in x. +- go: MaskedGaloisFieldMul + masked: "true" + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // GaloisFieldMul computes element-wise GF(2^8) multiplication with + // reduction polynomial x^8 + x^4 + x^3 + x + 1. \ No newline at end of file diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml new file mode 100644 index 00000000..9008ab28 --- /dev/null +++ b/internal/simdgen/ops/GaloisField/go.yaml @@ -0,0 +1,31 @@ +!sum +- go: MaskedGaloisFieldAffineTransform + asm: VGF2P8AFFINEQB + in: &AffineArgs + - class: mask + - &uint8 + go: $t + base: uint + - &uint8x8 + go: $t2 + base: uint + - &pureImmVar + class: immediate + immOffset: 0 + out: + - *uint8 + +- go: MaskedGaloisFieldAffineTransformInversed + asm: VGF2P8AFFINEINVQB + in: *AffineArgs + out: + - *uint8 + +- go: MaskedGaloisFieldMul + asm: VGF2P8MULB + in: + - class: mask + - *uint8 + - *uint8 + out: + - *uint8 \ No newline at end of file From 0d73a5dacbc176103b28aeb254a961af60b206c5 Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 24 Jun 2025 18:28:10 -0400 Subject: [PATCH 112/200] arch/internal: add VPEXTR* instructions This CL generates dev.simd CL 683797 and this CL should be submitted before that one. Change-Id: I3d2e292df2bed94aeb7c710a47c5e3c99c868b58 Reviewed-on: https://go-review.googlesource.com/c/arch/+/683836 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 6 ++++++ internal/simdgen/go.yaml | 12 ++++++++++++ internal/simdgen/ops/Moves/categories.yaml | 6 ++++++ internal/simdgen/ops/Moves/go.yaml | 12 ++++++++++++ 4 files changed, 36 insertions(+) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 4b21d5a6..bb4492ed 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -644,6 +644,12 @@ extension: "AVX.*" documentation: !string |- // SetElem sets a single constant-indexed element's value. +- go: GetElem + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // GetElem retrieves a single constant-indexed element's value. + - go: Mul commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 2d1038da..76321579 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -777,6 +777,18 @@ immOffset: 0 out: - *t +- go: GetElem + asm: "VPEXTR[BWDQ]" + in: + - class: vreg + base: $b + elemBits: $e + - class: immediate + immOffset: 0 + out: + - class: greg + base: $b + bits: $e # "Normal" multiplication is only available for floats. # This only covers the single and double precision. - go: Mul diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index 9cf443a2..d0d4a304 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -4,3 +4,9 @@ extension: "AVX.*" documentation: !string |- // SetElem sets a single constant-indexed element's value. +- go: GetElem + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // GetElem retrieves a single constant-indexed element's value. + diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index cdcb0ee8..20d4a053 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -12,3 +12,15 @@ immOffset: 0 out: - *t +- go: GetElem + asm: "VPEXTR[BWDQ]" + in: + - class: vreg + base: $b + elemBits: $e + - class: immediate + immOffset: 0 + out: + - class: greg + base: $b + bits: $e From 9b12b481df57f2ceb86aae447134afa1cadc7e7a Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 25 Jun 2025 15:35:23 -0400 Subject: [PATCH 113/200] internal/simdgen: modify sorting so it is prettier int8 < int16 < int32 etc Paired with dev.simd CL 684076 This CL should submit first. Change-Id: I1a6e80e06eef61f99556d0da13aa9e37dfd5285a Reviewed-on: https://go-review.googlesource.com/c/arch/+/684056 Reviewed-by: Junyang Shao TryBot-Bypass: David Chase Commit-Queue: David Chase Reviewed-by: Cherry Mui --- internal/simdgen/gen_simdTypes.go | 8 ++-- internal/simdgen/gen_simdrules.go | 7 ++-- internal/simdgen/godefs.go | 66 +++++++++++++++++++++++++++++-- internal/simdgen/sort_test.go | 37 +++++++++++++++++ 4 files changed, 107 insertions(+), 11 deletions(-) create mode 100644 internal/simdgen/sort_test.go diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 864e0b7f..a87586aa 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -25,13 +25,13 @@ type simdType struct { func compareSimdTypes(x, y simdType) int { // "mask" then "vreg" - if c := strings.Compare(x.Type, y.Type); c != 0 { + if c := compareNatural(x.Type, y.Type); c != 0 { return c } // want "flo" < "int" < "uin" (and then 8 < 16 < 32 < 64), // not "int16" < "int32" < "int64" < "int8") // so limit comparison to first 3 bytes in string. - if c := strings.Compare(x.Base[:3], y.Base[:3]); c != 0 { + if c := compareNatural(x.Base[:3], y.Base[:3]); c != 0 { return c } // base type size, 8 < 16 < 32 < 64 @@ -370,11 +370,11 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { } compareOpData := func(x, y opData) int { - return strings.Compare(x.OpShape, y.OpShape) + return compareNatural(x.OpShape, y.OpShape) } data := make([]opData, 0) for _, d := range opsByShape { - slices.SortFunc(d.Ops, strings.Compare) + slices.SortFunc(d.Ops, compareNatural) data = append(data, d) } slices.SortFunc(data, compareOpData) diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index ad260829..c3686a56 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -9,7 +9,6 @@ import ( "fmt" "io" "slices" - "strings" "text/template" ) @@ -39,13 +38,13 @@ type tplRuleData struct { func compareTplRuleData(x, y tplRuleData) int { // TODO should MaskedXYZ compare just after XYZ? - if c := strings.Compare(x.GoOp, y.GoOp); c != 0 { + if c := compareNatural(x.GoOp, y.GoOp); c != 0 { return c } - if c := strings.Compare(x.GoType, y.GoType); c != 0 { + if c := compareNatural(x.GoType, y.GoType); c != 0 { return c } - if c := strings.Compare(x.Args, y.Args); c != 0 { + if c := compareNatural(x.Args, y.Args); c != 0 { return c } return 0 diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 1dcd48ec..36e2409b 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -8,6 +8,7 @@ import ( "fmt" "log" "slices" + "strconv" "strings" "golang.org/x/arch/internal/unify" @@ -51,7 +52,7 @@ func (o *Operation) VectorWidth() int { func compareStringPointers(x, y *string) int { if x != nil && y != nil { - return strings.Compare(*x, *y) + return compareNatural(*x, *y) } if x == nil && y == nil { return 0 @@ -76,7 +77,7 @@ func compareIntPointers(x, y *int) int { } func compareOperations(x, y Operation) int { - if c := strings.Compare(x.Go, y.Go); c != 0 { + if c := compareNatural(x.Go, y.Go); c != 0 { return c } xIn, yIn := x.In, y.In @@ -109,7 +110,7 @@ func compareOperations(x, y Operation) int { } func compareOperands(x, y *Operand) int { - if c := strings.Compare(x.Class, y.Class); c != 0 { + if c := compareNatural(x.Class, y.Class); c != 0 { return c } if x.Class == "immediate" { @@ -158,6 +159,65 @@ type Operand struct { OverwriteElementBits *int } +// isDigit returns true if the byte is an ASCII digit. +func isDigit(b byte) bool { + return b >= '0' && b <= '9' +} + +// compareNatural performs a "natural sort" comparison of two strings. +// It compares non-digit sections lexicographically and digit sections +// numerically. In the case of string-unequal "equal" strings like +// "a01b" and "a1b", strings.Compare breaks the tie. +// +// It returns: +// +// -1 if s1 < s2 +// 0 if s1 == s2 +// +1 if s1 > s2 +func compareNatural(s1, s2 string) int { + i, j := 0, 0 + len1, len2 := len(s1), len(s2) + + for i < len1 && j < len2 { + // Find a non-digit segment or a number segment in both strings. + if isDigit(s1[i]) && isDigit(s2[j]) { + // Number segment comparison. + numStart1 := i + for i < len1 && isDigit(s1[i]) { + i++ + } + num1, _ := strconv.Atoi(s1[numStart1:i]) + + numStart2 := j + for j < len2 && isDigit(s2[j]) { + j++ + } + num2, _ := strconv.Atoi(s2[numStart2:j]) + + if num1 < num2 { + return -1 + } + if num1 > num2 { + return 1 + } + // If numbers are equal, continue to the next segment. + } else { + // Non-digit comparison. + if s1[i] < s2[j] { + return -1 + } + if s1[i] > s2[j] { + return 1 + } + i++ + j++ + } + } + + // deal with a01b vs a1b; there needs to be an order. + return strings.Compare(s1, s2) +} + func writeGoDefs(path string, cl unify.Closure) error { // TODO: Merge operations with the same signature but multiple // implementations (e.g., SSE vs AVX) diff --git a/internal/simdgen/sort_test.go b/internal/simdgen/sort_test.go new file mode 100644 index 00000000..43a9fd64 --- /dev/null +++ b/internal/simdgen/sort_test.go @@ -0,0 +1,37 @@ +package main + +import "testing" + +func TestSort(t *testing.T) { + testCases := []struct { + s1, s2 string + want int + }{ + {"a1", "a2", -1}, + {"a11a", "a11b", -1}, + {"a01a1", "a1a01", -1}, + {"a2", "a1", 1}, + {"a10", "a2", 1}, + {"a1", "a10", -1}, + {"z11", "z2", 1}, + {"z2", "z11", -1}, + {"abc", "abd", -1}, + {"123", "45", 1}, + {"file1", "file1", 0}, + {"file", "file1", -1}, + {"file1", "file", 1}, + {"a01", "a1", -1}, + {"a1a", "a1b", -1}, + } + + for _, tc := range testCases { + got := compareNatural(tc.s1, tc.s2) + result := "✅" + if got != tc.want { + result = "❌" + t.Errorf("%s CompareNatural(\"%s\", \"%s\") -> got %2d, want %2d\n", result, tc.s1, tc.s2, got, tc.want) + } else { + t.Logf("%s CompareNatural(\"%s\", \"%s\") -> got %2d, want %2d\n", result, tc.s1, tc.s2, got, tc.want) + } + } +} From 025062f86f69c0d424f82bdf8d1a04c78ae14a2b Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 27 Jun 2025 13:47:37 -0400 Subject: [PATCH 114/200] internal/simdgen: possible way to configure parameter names includes some name+type shorthand to make templates a little less verbose. changes appear in dev.simd CL 684775 Change-Id: I83945f681729a6d97cc8acaccb7b2f35744811d9 Reviewed-on: https://go-review.googlesource.com/c/arch/+/684655 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdTypes.go | 16 ++-- internal/simdgen/gen_utility.go | 100 +++++++++++++++++++++++ internal/simdgen/go.yaml | 2 + internal/simdgen/godefs.go | 3 +- internal/simdgen/ops/GaloisField/go.yaml | 2 + 5 files changed, 114 insertions(+), 9 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index a87586aa..45f41bbf 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -114,14 +114,14 @@ func (x {{(index .In 0).Go}}) {{.Go}}() {{.GoType}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{.GoType}} +func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}} {{end}} {{define "op3"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{.GoType}} +func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} {{end}} {{define "op2VecAsScalar"}} @@ -135,42 +135,42 @@ func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSi {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}} +func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}} {{end}} {{define "op4"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{.GoType}} +func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} {{end}} {{define "op1Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{.GoType}} +func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} {{end}} {{define "op2Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{.GoType}} +func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} {{end}} {{define "op3Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{.GoType}} +func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}} {{end}} {{define "op4Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.Extension}} -func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{.GoType}} +func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}} {{end}} {{define "vectorConversion"}} diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index cba608e0..5ea475f1 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -5,6 +5,7 @@ package main import ( + "bufio" "bytes" "fmt" "go/format" @@ -44,6 +45,9 @@ func createPath(goroot string, file string) (*os.File, error) { func formatWriteAndClose(out *bytes.Buffer, goroot string, file string) { b, err := format.Source(out.Bytes()) if err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + fmt.Fprintf(os.Stderr, "%s\n", numberLines(out.Bytes())) + fmt.Fprintf(os.Stderr, "%v\n", err) panic(err) } else { writeAndClose(b, goroot, file) @@ -59,6 +63,18 @@ func writeAndClose(b []byte, goroot string, file string) { ofile.Close() } +// numberLines takes a slice of bytes, and returns a string where each line +// is numbered, starting from 1. +func numberLines(data []byte) string { + var buf bytes.Buffer + r := bytes.NewReader(data) + s := bufio.NewScanner(r) + for i := 1; s.Scan(); i++ { + fmt.Fprintf(&buf, "%d: %s\n", i, s.Text()) + } + return buf.String() +} + const ( InvalidIn int = iota PureVregIn @@ -380,6 +396,84 @@ func (op Operation) GoType() string { return *op.Out[0].Go } +// ImmName returns the name to use for an operation's immediate operand. +// This can be overriden in the yaml with "name" on an operand, +// otherwise, for now, it is "imm" but +// TODO come up with a better default immediate parameter name. +func (op Operation) ImmName() string { + return op.Op0Name("imm") +} + +func (o Operand) OpName(s string) string { + if n := o.Name; n != nil { + return *n + } + return s +} + +func (o Operand) OpNameAndType(s string) string { + return o.OpName(s) + " " + *o.Go +} + +// Op0Name returns the name to use for the 0 operand, +// if any is present, otherwise the parameter is used. +func (op Operation) Op0Name(s string) string { + return op.In[0].OpName(s) +} + +// Op1Name returns the name to use for the 1 operand, +// if any is present, otherwise the parameter is used. +func (op Operation) Op1Name(s string) string { + return op.In[1].OpName(s) +} + +// Op2Name returns the name to use for the 2 operand, +// if any is present, otherwise the parameter is used. +func (op Operation) Op2Name(s string) string { + return op.In[2].OpName(s) +} + +// Op3Name returns the name to use for the 3 operand, +// if any is present, otherwise the parameter is used. +func (op Operation) Op3Name(s string) string { + return op.In[3].OpName(s) +} + +// Op0NameAndType returns the name and type to use for +// the 0 operand, if a name is provided, otherwise +// the parameter value is used as the default. +func (op Operation) Op0NameAndType(s string) string { + return op.In[0].OpNameAndType(s) +} + +// Op1NameAndType returns the name and type to use for +// the 1 operand, if a name is provided, otherwise +// the parameter value is used as the default. +func (op Operation) Op1NameAndType(s string) string { + return op.In[1].OpNameAndType(s) +} + +// Op2NameAndType returns the name and type to use for +// the 2 operand, if a name is provided, otherwise +// the parameter value is used as the default. +func (op Operation) Op2NameAndType(s string) string { + return op.In[2].OpNameAndType(s) +} + +// Op3NameAndType returns the name and type to use for +// the 3 operand, if a name is provided, otherwise +// the parameter value is used as the default. +func (op Operation) Op3NameAndType(s string) string { + return op.In[3].OpNameAndType(s) +} + +// Op4NameAndType returns the name and type to use for +// the 4 operand, if a name is provided, otherwise +// the parameter value is used as the default. +func (op Operation) Op4NameAndType(s string) string { + return op.In[4].OpNameAndType(s) +} + // classifyOp returns a classification string, modified operation, and perhaps error based // on the stub and intrinsic shape for the operation. // The classification string is in the regular expression set "op[1234](Imm8)?" @@ -773,6 +867,12 @@ func (op Operand) String() string { sb.WriteString(" Lanes: \n") } + if op.Name != nil { + sb.WriteString(fmt.Sprintf(" Name: %s\n", *op.Name)) + } else { + sb.WriteString(" Name: \n") + } + if op.OverwriteClass != nil { sb.WriteString(fmt.Sprintf(" OverwriteClass: %s\n", *op.OverwriteClass)) } else { diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 76321579..925cc842 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -435,6 +435,7 @@ asm: VGF2P8AFFINEQB in: &AffineArgs - class: mask + name: m - &uint8 go: $t base: uint @@ -444,6 +445,7 @@ - &pureImmVar class: immediate immOffset: 0 + name: b out: - *uint8 diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 36e2409b..d76ee58b 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -142,7 +142,8 @@ type Operand struct { // The compiler will right-shift the user-passed value by ImmOffset and set it as the AuxInt // field of the operation. ImmOffset *string - Lanes *int // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1 + Name *string // optional name in the Go intrinsic declaration + Lanes *int // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1 // TreatLikeAScalarOfSize means only the lower $TreatLikeAScalarOfSize bits of the vector // is used, so at the API level we can make it just a scalar value of this size; Then we // can overwrite it to a vector of the right size during intrinsics stage. diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml index 9008ab28..159bfb1f 100644 --- a/internal/simdgen/ops/GaloisField/go.yaml +++ b/internal/simdgen/ops/GaloisField/go.yaml @@ -3,6 +3,7 @@ asm: VGF2P8AFFINEQB in: &AffineArgs - class: mask + name: m - &uint8 go: $t base: uint @@ -12,6 +13,7 @@ - &pureImmVar class: immediate immOffset: 0 + name: b out: - *uint8 From 992047bbee528297556496f56fb3d34382e3e195 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 27 Jun 2025 15:33:48 -0400 Subject: [PATCH 115/200] internal/simdgen: possible way to configure parameter orders This allows specification of an ad hoc (just a name) parameter reordering that must be recognized by the Go declaration and intrinsic mapping templates as a suffix ("_2I" in this case) to an existing "shape", and that will probably also need to be paired with a new helper function in ssagen/intrinsics.go generated and glue code changes appear in dev.simd CL 684776 Change-Id: I95e107bc5c2684c5fa5b11c05169718d5d680148 Reviewed-on: https://go-review.googlesource.com/c/arch/+/684019 Reviewed-by: Cherry Mui Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 4 +- internal/simdgen/gen_simdIntrinsics.go | 4 ++ internal/simdgen/gen_simdTypes.go | 16 +++++++ internal/simdgen/gen_utility.go | 42 ++++++++++--------- internal/simdgen/go.yaml | 2 + internal/simdgen/godefs.go | 5 ++- .../simdgen/ops/GaloisField/categories.yaml | 4 +- internal/simdgen/ops/GaloisField/go.yaml | 2 + 8 files changed, 54 insertions(+), 25 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index bb4492ed..7d0c526d 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -462,7 +462,7 @@ documentation: !string |- // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; - // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y + // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. - go: MaskedGaloisFieldAffineTransformInversed masked: "true" @@ -472,7 +472,7 @@ // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; - // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y + // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. - go: MaskedGaloisFieldMul masked: "true" diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 3fea1568..3c40856b 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -37,8 +37,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . {{end}} {{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} +{{define "op2Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_2I(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{end}} {{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} +{{define "op3Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{end}} {{define "op4Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 45f41bbf..9b57e472 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -159,6 +159,14 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} {{end}} +{{define "op2Imm8_2I"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}} +{{end}} + + {{define "op3Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} @@ -166,6 +174,14 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"} func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}} {{end}} +{{define "op3Imm8_2I"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}} +{{end}} + + {{define "op4Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 5ea475f1..a01bc0c4 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -474,40 +474,44 @@ func (op Operation) Op4NameAndType(s string) string { return op.In[4].OpNameAndType(s) } +var immClasses []string = []string{"BAD0Imm", "BAD1Imm", "op1Imm8", "op2Imm8", "op3Imm8", "op4Imm8"} +var classes []string = []string{"BAD0", "op1", "op2", "op3", "op4"} + // classifyOp returns a classification string, modified operation, and perhaps error based // on the stub and intrinsic shape for the operation. -// The classification string is in the regular expression set "op[1234](Imm8)?" +// The classification string is in the regular expression set "op[1234](Imm8)?(_)?" +// where the "" suffix is optionally attached to the Operation in its input yaml. +// The classification string is used to select a template or a clause of a template +// for intrinsics declaration and the ssagen intrinisics glue code in the compiler. func classifyOp(op Operation) (string, Operation, error) { _, _, _, immType, _, opNoConstMask, gOp := op.shape() + var class string + if immType == VarImm || immType == ConstVarImm { - switch len(opNoConstMask.In) { + switch l := len(opNoConstMask.In); l { case 1: return "", op, fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op) - case 2: - return "op1Imm8", opNoConstMask, nil - case 3: - return "op2Imm8", opNoConstMask, nil - case 4: - return "op3Imm8", opNoConstMask, nil - case 5: - return "op4Imm8", opNoConstMask, nil + case 2, 3, 4, 5: + class = immClasses[l] default: return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op) } + if order := op.OperandOrder; order != nil { + class += "_" + *order + } + return class, opNoConstMask, nil } else { - switch len(gOp.In) { - case 1: - return "op1", gOp, nil - case 2: - return "op2", gOp, nil - case 3: - return "op3", gOp, nil - case 4: - return "op4", gOp, nil + switch l := len(gOp.In); l { + case 1, 2, 3, 4: + class = classes[l] default: return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op) } + if order := op.OperandOrder; order != nil { + class += "_" + *order + } + return class, gOp, nil } } diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 925cc842..5b3aa6a0 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -433,6 +433,7 @@ - *fp - go: MaskedGaloisFieldAffineTransform asm: VGF2P8AFFINEQB + operandOrder: 2I # 2nd operand, then immediate in: &AffineArgs - class: mask name: m @@ -451,6 +452,7 @@ - go: MaskedGaloisFieldAffineTransformInversed asm: VGF2P8AFFINEINVQB + operandOrder: 2I # 2nd operand, then immediate in: *AffineArgs out: - *uint8 diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index d76ee58b..4ac4a9a1 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -17,8 +17,9 @@ import ( type Operation struct { Go string // Go method name - GoArch string // GOARCH for this definition - Asm string // Assembly mnemonic + GoArch string // GOARCH for this definition + Asm string // Assembly mnemonic + OperandOrder *string // optional Operand order for better Go declarations In []Operand // Arguments Out []Operand // Results diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml index 915d3ec1..2515893b 100644 --- a/internal/simdgen/ops/GaloisField/categories.yaml +++ b/internal/simdgen/ops/GaloisField/categories.yaml @@ -6,7 +6,7 @@ documentation: !string |- // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; - // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y + // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. - go: MaskedGaloisFieldAffineTransformInversed masked: "true" @@ -16,7 +16,7 @@ // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; - // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y + // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. - go: MaskedGaloisFieldMul masked: "true" diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml index 159bfb1f..c4d02e17 100644 --- a/internal/simdgen/ops/GaloisField/go.yaml +++ b/internal/simdgen/ops/GaloisField/go.yaml @@ -1,6 +1,7 @@ !sum - go: MaskedGaloisFieldAffineTransform asm: VGF2P8AFFINEQB + operandOrder: 2I # 2nd operand, then immediate in: &AffineArgs - class: mask name: m @@ -19,6 +20,7 @@ - go: MaskedGaloisFieldAffineTransformInversed asm: VGF2P8AFFINEINVQB + operandOrder: 2I # 2nd operand, then immediate in: *AffineArgs out: - *uint8 From 203f3d62f49b8b27488de7216051da8a339a6cbf Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 30 Jun 2025 18:34:53 +0000 Subject: [PATCH 116/200] internal/simdgen: cleanup unnecessary APIs, documentations This CL cleans up: 1. Removes instructions that supress exceptions, defaults to use MXCSR instead. 2. Remove "Const Immediate" from documentations. 3. Correct the documentation for masked operations. Change-Id: Ic53db59252093ec0132e99b6f73039bcaf20a614 Reviewed-on: https://go-review.googlesource.com/c/arch/+/685035 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 121 +----------------- internal/simdgen/go.yaml | 1 - internal/simdgen/ops/Compares/categories.yaml | 17 --- .../simdgen/ops/FPonlyArith/categories.yaml | 85 ------------ .../simdgen/ops/GaloisField/categories.yaml | 6 +- internal/simdgen/ops/MLOps/categories.yaml | 3 +- internal/simdgen/ops/Mul/categories.yaml | 9 -- internal/simdgen/ops/main.go | 4 - 8 files changed, 8 insertions(+), 238 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 7d0c526d..fbfd6613 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -1,5 +1,4 @@ !sum -# TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes. - go: Add commutative: "true" extension: "AVX.*" @@ -127,49 +126,42 @@ extension: "AVX.*" documentation: !string |- // Equal compares for equality. - // Const Immediate = 0. - go: Less constImm: 1 commutative: "false" extension: "AVX.*" documentation: !string |- // Less compares for less than. - // Const Immediate = 1. - go: LessEqual constImm: 2 commutative: "false" extension: "AVX.*" documentation: !string |- // LessEqual compares for less than or equal. - // Const Immediate = 2. - go: IsNan # For float only. constImm: 3 commutative: "true" extension: "AVX.*" documentation: !string |- // IsNan checks if elements are NaN. Use as x.IsNan(x). - // Const Immediate = 3. - go: NotEqual constImm: 4 commutative: "true" extension: "AVX.*" documentation: !string |- // NotEqual compares for inequality. - // Const Immediate = 4. - go: GreaterEqual constImm: 5 commutative: "false" extension: "AVX.*" documentation: !string |- // GreaterEqual compares for greater than or equal. - // Const Immediate = 5. - go: Greater constImm: 6 commutative: "false" extension: "AVX.*" documentation: !string |- // Greater compares for greater than. - // Const Immediate = 6. - go: MaskedEqual constImm: 0 @@ -178,10 +170,6 @@ extension: "AVX.*" documentation: !string |- // MaskedEqual compares for equality, masked. - // Const Immediate = 0. - docUnmasked: !string |- - // Equal compares for equality. - // Const Immediate = 0. - go: MaskedLess constImm: 1 masked: "true" @@ -189,7 +177,6 @@ extension: "AVX.*" documentation: !string |- // MaskedLess compares for less than. - // Const Immediate = 1. - go: MaskedLessEqual constImm: 2 masked: "true" @@ -197,7 +184,6 @@ extension: "AVX.*" documentation: !string |- // MaskedLessEqual compares for less than or equal. - // Const Immediate = 2. - go: MaskedIsNan # For float only. constImm: 3 masked: "true" @@ -205,7 +191,6 @@ extension: "AVX.*" documentation: !string |- // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x). - // Const Immediate = 3. - go: MaskedNotEqual constImm: 4 masked: "true" @@ -213,7 +198,6 @@ extension: "AVX.*" documentation: !string |- // MaskedNotEqual compares for inequality. - // Const Immediate = 4. - go: MaskedGreaterEqual constImm: 5 masked: "true" @@ -221,7 +205,6 @@ extension: "AVX.*" documentation: !string |- // MaskedGreaterEqual compares for greater than or equal. - // Const Immediate = 5. - go: MaskedGreater constImm: 6 masked: "true" @@ -229,7 +212,6 @@ extension: "AVX.*" documentation: !string |- // MaskedGreater compares for greater than. - // Const Immediate = 6. - go: Div commutative: "false" extension: "AVX.*" @@ -287,7 +269,6 @@ constImm: 0 documentation: !string |- // Round rounds elements to the nearest integer. - // Const Immediate = 0. - go: MaskedRoundWithPrecision commutative: "false" extension: "AVX.*" @@ -295,15 +276,6 @@ masked: "true" documentation: !string |- // MaskedRoundWithPrecision rounds elements with specified precision. - // Const Immediate = 0. -- go: MaskedRoundSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 8 - masked: "true" - documentation: !string |- - // MaskedRoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. - // Const Immediate = 8. - go: MaskedDiffWithRoundWithPrecision commutative: "false" extension: "AVX.*" @@ -311,15 +283,6 @@ masked: "true" documentation: !string |- // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision. - // Const Immediate = 0. -- go: MaskedDiffWithRoundSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 8 - masked: "true" - documentation: !string |- - // MaskedDiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. - // Const Immediate = 8. - go: Floor commutative: "false" @@ -327,7 +290,6 @@ constImm: 1 documentation: !string |- // Floor rounds elements down to the nearest integer. - // Const Immediate = 1. - go: MaskedFloorWithPrecision commutative: "false" extension: "AVX.*" @@ -335,21 +297,6 @@ masked: "true" documentation: !string |- // MaskedFloorWithPrecision rounds elements down with specified precision, masked. - // Const Immediate = 1. - docUnmasked: !string |- - // FloorWithPrecision rounds elements down with specified precision. - // Const Immediate = 1. -- go: MaskedFloorSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 9 - masked: "true" - documentation: !string |- - // MaskedFloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. - // Const Immediate = 9. - docUnmasked: !string |- - // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions. - // Const Immediate = 9. - go: MaskedDiffWithFloorWithPrecision commutative: "false" extension: "AVX.*" @@ -357,15 +304,6 @@ masked: "true" documentation: !string |- // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision. - // Const Immediate = 1. -- go: MaskedDiffWithFloorSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 9 - masked: "true" - documentation: !string |- - // MaskedDiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. - // Const Immediate = 9. - go: Ceil commutative: "false" @@ -373,7 +311,6 @@ constImm: 2 documentation: !string |- // Ceil rounds elements up to the nearest integer. - // Const Immediate = 2. - go: MaskedCeilWithPrecision commutative: "false" extension: "AVX.*" @@ -381,18 +318,6 @@ masked: "true" documentation: !string |- // MaskedCeilWithPrecision rounds elements up with specified precision, masked. - // Const Immediate = 2. - docUnmasked: !string |- - // CeilWithPrecision rounds elements up with specified precision. - // Const Immediate = 2. -- go: MaskedCeilSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 10 - masked: "true" - documentation: !string |- - // MaskedCeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. - // Const Immediate = 10. - go: MaskedDiffWithCeilWithPrecision commutative: "false" extension: "AVX.*" @@ -400,15 +325,6 @@ masked: "true" documentation: !string |- // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision. - // Const Immediate = 2. -- go: MaskedDiffWithCeilSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 10 - masked: "true" - documentation: !string |- - // MaskedDiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. - // Const Immediate = 10. - go: Trunc commutative: "false" @@ -416,7 +332,6 @@ constImm: 3 documentation: !string |- // Trunc truncates elements towards zero. - // Const Immediate = 3. - go: MaskedTruncWithPrecision commutative: "false" extension: "AVX.*" @@ -424,15 +339,6 @@ masked: "true" documentation: !string |- // MaskedTruncWithPrecision truncates elements with specified precision. - // Const Immediate = 3. -- go: MaskedTruncSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 11 - masked: "true" - documentation: !string |- - // MaskedTruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. - // Const Immediate = 11. - go: MaskedDiffWithTruncWithPrecision commutative: "false" extension: "AVX.*" @@ -440,15 +346,6 @@ masked: "true" documentation: !string |- // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision. - // Const Immediate = 3. -- go: MaskedDiffWithTruncSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 11 - masked: "true" - documentation: !string |- - // MaskedDiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. - // Const Immediate = 11. - go: AddSub commutative: "false" @@ -460,7 +357,7 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): + // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. @@ -469,7 +366,7 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), + // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y @@ -479,7 +376,7 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // GaloisFieldMul computes element-wise GF(2^8) multiplication with + // MaskedGaloisFieldMul computes element-wise GF(2^8) multiplication with // reduction polynomial x^8 + x^4 + x^3 + x + 1. - go: Average commutative: "true" @@ -554,7 +451,6 @@ extension: "AVX.*" documentation: !string |- // DotProdBroadcast multiplies all elements and broadcasts the sum. - // Const Immediate = 127. - go: UnsignedSignedQuadDotProdAccumulate commutative: "false" extension: "AVX.*" @@ -576,7 +472,7 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // MaskedSaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. - go: PairDotProdAccumulate commutative: "false" extension: "AVX.*" @@ -677,8 +573,6 @@ extension: "AVX.*" documentation: !string |- // MaskedMul multiplies corresponding elements of two vectors, masked. - docUnmasked: !string |- - // Mul multiplies corresponding elements of two vectors. - go: MaskedMulEvenWiden masked: "true" commutative: "true" @@ -686,25 +580,18 @@ documentation: !string |- // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked. // Result[i] = v1.Even[i] * v2.Even[i]. - docUnmasked: !string |- - // MulEvenWiden multiplies even-indexed elements, widening the result. - // Result[i] = v1.Even[i] * v2.Even[i]. - go: MaskedMulHigh masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- // MaskedMulHigh multiplies elements and stores the high part of the result, masked. - docUnmasked: !string |- - // MulHigh multiplies elements and stores the high part of the result. - go: MaskedMulLow masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- // MaskedMulLow multiplies elements and stores the low part of the result, masked. - docUnmasked: !string |- - // MulLow multiplies elements and stores the low part of the result. - go: ShiftAllLeft nameAndSizeCheck: "true" commutative: "false" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 5b3aa6a0..f91bafac 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -1,5 +1,4 @@ !sum -# TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes. # Add - go: Add asm: "VPADD[BWDQ]|VADDP[SD]" diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index bd4d8c76..08b153c7 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -12,49 +12,42 @@ extension: "AVX.*" documentation: !string |- // Equal compares for equality. - // Const Immediate = 0. - go: Less constImm: 1 commutative: "false" extension: "AVX.*" documentation: !string |- // Less compares for less than. - // Const Immediate = 1. - go: LessEqual constImm: 2 commutative: "false" extension: "AVX.*" documentation: !string |- // LessEqual compares for less than or equal. - // Const Immediate = 2. - go: IsNan # For float only. constImm: 3 commutative: "true" extension: "AVX.*" documentation: !string |- // IsNan checks if elements are NaN. Use as x.IsNan(x). - // Const Immediate = 3. - go: NotEqual constImm: 4 commutative: "true" extension: "AVX.*" documentation: !string |- // NotEqual compares for inequality. - // Const Immediate = 4. - go: GreaterEqual constImm: 5 commutative: "false" extension: "AVX.*" documentation: !string |- // GreaterEqual compares for greater than or equal. - // Const Immediate = 5. - go: Greater constImm: 6 commutative: "false" extension: "AVX.*" documentation: !string |- // Greater compares for greater than. - // Const Immediate = 6. - go: MaskedEqual constImm: 0 @@ -63,10 +56,6 @@ extension: "AVX.*" documentation: !string |- // MaskedEqual compares for equality, masked. - // Const Immediate = 0. - docUnmasked: !string |- - // Equal compares for equality. - // Const Immediate = 0. - go: MaskedLess constImm: 1 masked: "true" @@ -74,7 +63,6 @@ extension: "AVX.*" documentation: !string |- // MaskedLess compares for less than. - // Const Immediate = 1. - go: MaskedLessEqual constImm: 2 masked: "true" @@ -82,7 +70,6 @@ extension: "AVX.*" documentation: !string |- // MaskedLessEqual compares for less than or equal. - // Const Immediate = 2. - go: MaskedIsNan # For float only. constImm: 3 masked: "true" @@ -90,7 +77,6 @@ extension: "AVX.*" documentation: !string |- // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x). - // Const Immediate = 3. - go: MaskedNotEqual constImm: 4 masked: "true" @@ -98,7 +84,6 @@ extension: "AVX.*" documentation: !string |- // MaskedNotEqual compares for inequality. - // Const Immediate = 4. - go: MaskedGreaterEqual constImm: 5 masked: "true" @@ -106,7 +91,6 @@ extension: "AVX.*" documentation: !string |- // MaskedGreaterEqual compares for greater than or equal. - // Const Immediate = 5. - go: MaskedGreater constImm: 6 masked: "true" @@ -114,4 +98,3 @@ extension: "AVX.*" documentation: !string |- // MaskedGreater compares for greater than. - // Const Immediate = 6. diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index c00d43d6..51dfd04d 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -56,7 +56,6 @@ constImm: 0 documentation: !string |- // Round rounds elements to the nearest integer. - // Const Immediate = 0. - go: MaskedRoundWithPrecision commutative: "false" extension: "AVX.*" @@ -64,15 +63,6 @@ masked: "true" documentation: !string |- // MaskedRoundWithPrecision rounds elements with specified precision. - // Const Immediate = 0. -- go: MaskedRoundSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 8 - masked: "true" - documentation: !string |- - // MaskedRoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions. - // Const Immediate = 8. - go: MaskedDiffWithRoundWithPrecision commutative: "false" extension: "AVX.*" @@ -80,15 +70,6 @@ masked: "true" documentation: !string |- // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision. - // Const Immediate = 0. -- go: MaskedDiffWithRoundSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 8 - masked: "true" - documentation: !string |- - // MaskedDiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions. - // Const Immediate = 8. - go: Floor commutative: "false" @@ -96,7 +77,6 @@ constImm: 1 documentation: !string |- // Floor rounds elements down to the nearest integer. - // Const Immediate = 1. - go: MaskedFloorWithPrecision commutative: "false" extension: "AVX.*" @@ -104,21 +84,6 @@ masked: "true" documentation: !string |- // MaskedFloorWithPrecision rounds elements down with specified precision, masked. - // Const Immediate = 1. - docUnmasked: !string |- - // FloorWithPrecision rounds elements down with specified precision. - // Const Immediate = 1. -- go: MaskedFloorSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 9 - masked: "true" - documentation: !string |- - // MaskedFloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked. - // Const Immediate = 9. - docUnmasked: !string |- - // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions. - // Const Immediate = 9. - go: MaskedDiffWithFloorWithPrecision commutative: "false" extension: "AVX.*" @@ -126,15 +91,6 @@ masked: "true" documentation: !string |- // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision. - // Const Immediate = 1. -- go: MaskedDiffWithFloorSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 9 - masked: "true" - documentation: !string |- - // MaskedDiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions. - // Const Immediate = 9. - go: Ceil commutative: "false" @@ -142,7 +98,6 @@ constImm: 2 documentation: !string |- // Ceil rounds elements up to the nearest integer. - // Const Immediate = 2. - go: MaskedCeilWithPrecision commutative: "false" extension: "AVX.*" @@ -150,18 +105,6 @@ masked: "true" documentation: !string |- // MaskedCeilWithPrecision rounds elements up with specified precision, masked. - // Const Immediate = 2. - docUnmasked: !string |- - // CeilWithPrecision rounds elements up with specified precision. - // Const Immediate = 2. -- go: MaskedCeilSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 10 - masked: "true" - documentation: !string |- - // MaskedCeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions. - // Const Immediate = 10. - go: MaskedDiffWithCeilWithPrecision commutative: "false" extension: "AVX.*" @@ -169,15 +112,6 @@ masked: "true" documentation: !string |- // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision. - // Const Immediate = 2. -- go: MaskedDiffWithCeilSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 10 - masked: "true" - documentation: !string |- - // MaskedDiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions. - // Const Immediate = 10. - go: Trunc commutative: "false" @@ -185,7 +119,6 @@ constImm: 3 documentation: !string |- // Trunc truncates elements towards zero. - // Const Immediate = 3. - go: MaskedTruncWithPrecision commutative: "false" extension: "AVX.*" @@ -193,15 +126,6 @@ masked: "true" documentation: !string |- // MaskedTruncWithPrecision truncates elements with specified precision. - // Const Immediate = 3. -- go: MaskedTruncSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 11 - masked: "true" - documentation: !string |- - // MaskedTruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions. - // Const Immediate = 11. - go: MaskedDiffWithTruncWithPrecision commutative: "false" extension: "AVX.*" @@ -209,15 +133,6 @@ masked: "true" documentation: !string |- // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision. - // Const Immediate = 3. -- go: MaskedDiffWithTruncSuppressExceptionWithPrecision - commutative: "false" - extension: "AVX.*" - constImm: 11 - masked: "true" - documentation: !string |- - // MaskedDiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions. - // Const Immediate = 11. - go: AddSub commutative: "false" diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml index 2515893b..0b3978a4 100644 --- a/internal/simdgen/ops/GaloisField/categories.yaml +++ b/internal/simdgen/ops/GaloisField/categories.yaml @@ -4,7 +4,7 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): + // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. @@ -13,7 +13,7 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // GaloisFieldAffineTransform computes an affine transformation in GF(2^8), + // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y @@ -23,5 +23,5 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // GaloisFieldMul computes element-wise GF(2^8) multiplication with + // MaskedGaloisFieldMul computes element-wise GF(2^8) multiplication with // reduction polynomial x^8 + x^4 + x^3 + x + 1. \ No newline at end of file diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 6923dd37..54911b16 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -32,7 +32,6 @@ extension: "AVX.*" documentation: !string |- // DotProdBroadcast multiplies all elements and broadcasts the sum. - // Const Immediate = 127. - go: UnsignedSignedQuadDotProdAccumulate commutative: "false" extension: "AVX.*" @@ -54,7 +53,7 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // MaskedSaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. - go: PairDotProdAccumulate commutative: "false" extension: "AVX.*" diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml index def502f3..c0f87beb 100644 --- a/internal/simdgen/ops/Mul/categories.yaml +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -26,8 +26,6 @@ extension: "AVX.*" documentation: !string |- // MaskedMul multiplies corresponding elements of two vectors, masked. - docUnmasked: !string |- - // Mul multiplies corresponding elements of two vectors. - go: MaskedMulEvenWiden masked: "true" commutative: "true" @@ -35,22 +33,15 @@ documentation: !string |- // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked. // Result[i] = v1.Even[i] * v2.Even[i]. - docUnmasked: !string |- - // MulEvenWiden multiplies even-indexed elements, widening the result. - // Result[i] = v1.Even[i] * v2.Even[i]. - go: MaskedMulHigh masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- // MaskedMulHigh multiplies elements and stores the high part of the result, masked. - docUnmasked: !string |- - // MulHigh multiplies elements and stores the high part of the result. - go: MaskedMulLow masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- // MaskedMulLow multiplies elements and stores the low part of the result, masked. - docUnmasked: !string |- - // MulLow multiplies elements and stores the low part of the result. diff --git a/internal/simdgen/ops/main.go b/internal/simdgen/ops/main.go index c71f3ff6..7e462bf7 100644 --- a/internal/simdgen/ops/main.go +++ b/internal/simdgen/ops/main.go @@ -32,10 +32,6 @@ func mergeYamlFiles(targetFileName string) error { if err != nil { return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err) } - _, err = writer.WriteString("# TODO: remove the \"Const Immediate\" from the documentation field, it's there only for debug purposes.\n") - if err != nil { - return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err) - } entries, err := os.ReadDir(baseDir) if err != nil { From bce7f6b5c889f86561d0586349fef0bcb8d22a18 Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 30 Jun 2025 14:51:58 -0400 Subject: [PATCH 117/200] arch/internal: cleanup {Operation,Operand}.String() replaced repeated open-coded if+formatting with a small number of local functions, consolidated all the nil values at the end, added some missing fields. Change-Id: I5f261ba34626f173789d4c5e138345c16e1b341c Reviewed-on: https://go-review.googlesource.com/c/arch/+/685116 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_utility.go | 157 +++++++++++++------------------- 1 file changed, 65 insertions(+), 92 deletions(-) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index a01bc0c4..10c5af7c 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -779,48 +779,47 @@ func reportXEDInconsistency(ops []Operation) error { func (o Operation) String() string { var sb strings.Builder + var nils string + + optStr := func(field string, ps *string) { + if ps != nil { + fmt.Fprintf(&sb, " %s: %s\n", field, *ps) + } else { + nils += " " + field + } + } + + // two spaces then field: value + str := func(field string, value string) { + fmt.Fprintf(&sb, " %s: %s\n", field, value) + } + sb.WriteString("Operation {\n") - sb.WriteString(fmt.Sprintf(" Go: %s\n", o.Go)) - sb.WriteString(fmt.Sprintf(" GoArch: %s\n", o.GoArch)) - sb.WriteString(fmt.Sprintf(" Asm: %s\n", o.Asm)) + str("Go", o.Go) + str("GoArch", o.GoArch) + str("Asm", o.Asm) + str("Commutative", o.Commutative) + str("Extension", o.Extension) + optStr("ConstImm", o.ConstImm) + optStr("Masked", o.Masked) + optStr("Zeroing", o.Zeroing) + optStr("OperandOrder", o.OperandOrder) sb.WriteString(" In: [\n") for _, op := range o.In { - sb.WriteString(fmt.Sprintf(" %s,\n", op.String())) + fmt.Fprintf(&sb, " %s,\n", op.String()) } sb.WriteString(" ]\n") sb.WriteString(" Out: [\n") for _, op := range o.Out { - sb.WriteString(fmt.Sprintf(" %s,\n", op.String())) + fmt.Fprintf(&sb, " %s,\n", op.String()) } sb.WriteString(" ]\n") - sb.WriteString(fmt.Sprintf(" Commutative: %s\n", o.Commutative)) - sb.WriteString(fmt.Sprintf(" Extension: %s\n", o.Extension)) - - if o.Zeroing != nil { - sb.WriteString(fmt.Sprintf(" Zeroing: %s\n", *o.Zeroing)) - } else { - sb.WriteString(" Zeroing: \n") - } - - if o.Documentation != nil { - sb.WriteString(fmt.Sprintf(" Documentation: %s\n", *o.Documentation)) - } else { - sb.WriteString(" Documentation: \n") - } - - if o.ConstImm != nil { - sb.WriteString(fmt.Sprintf(" ConstImm: %s\n", *o.ConstImm)) - } else { - sb.WriteString(" ConstImm: \n") - } - - if o.Masked != nil { - sb.WriteString(fmt.Sprintf(" Masked: %s\n", *o.Masked)) - } else { - sb.WriteString(" Masked: \n") + optStr("Documentation", o.Documentation) + if len(nils) != 0 { + sb.WriteString(" nils = " + nils[1:] + "\n") } sb.WriteString("}\n") @@ -830,75 +829,49 @@ func (o Operation) String() string { // String returns a string representation of the Operand. func (op Operand) String() string { var sb strings.Builder - sb.WriteString("Operand {\n") - sb.WriteString(fmt.Sprintf(" Class: %s\n", op.Class)) - - if op.Go != nil { - sb.WriteString(fmt.Sprintf(" Go: %s\n", *op.Go)) - } else { - sb.WriteString(" Go: \n") - } - - sb.WriteString(fmt.Sprintf(" AsmPos: %d\n", op.AsmPos)) - - if op.Base != nil { - sb.WriteString(fmt.Sprintf(" Base: %s\n", *op.Base)) - } else { - sb.WriteString(" Base: \n") - } - - if op.ElemBits != nil { - sb.WriteString(fmt.Sprintf(" ElemBits: %d\n", *op.ElemBits)) - } else { - sb.WriteString(" ElemBits: \n") - } - - if op.Bits != nil { - sb.WriteString(fmt.Sprintf(" Bits: %d\n", *op.Bits)) - } else { - sb.WriteString(" Bits: \n") - } - - if op.Const != nil { - sb.WriteString(fmt.Sprintf(" Const: %s\n", *op.Const)) - } else { - sb.WriteString(" Const: \n") - } - - if op.Lanes != nil { - sb.WriteString(fmt.Sprintf(" Lanes: %d\n", *op.Lanes)) - } else { - sb.WriteString(" Lanes: \n") - } + var nils string - if op.Name != nil { - sb.WriteString(fmt.Sprintf(" Name: %s\n", *op.Name)) - } else { - sb.WriteString(" Name: \n") + optStr := func(field string, ps *string) { + if ps != nil { + fmt.Fprintf(&sb, " %s: %s\n", field, *ps) + } else { + nils += " " + field + } } - if op.OverwriteClass != nil { - sb.WriteString(fmt.Sprintf(" OverwriteClass: %s\n", *op.OverwriteClass)) - } else { - sb.WriteString(" OverwriteClass: \n") + optNum := func(field string, pi *int) { + if pi != nil { + fmt.Fprintf(&sb, " %s: %d\n", field, *pi) + } else { + nils += " " + field + } } - if op.OverwriteBase != nil { - sb.WriteString(fmt.Sprintf(" OverwriteBase: %s\n", *op.OverwriteBase)) - } else { - sb.WriteString(" OverwriteBase: \n") + // four spaces then field: value + str := func(field string, value string) { + fmt.Fprintf(&sb, " %s: %s\n", field, value) } - - if op.OverwriteElementBits != nil { - sb.WriteString(fmt.Sprintf(" OverwriteElementBits: %d\n", *op.OverwriteElementBits)) - } else { - sb.WriteString(" OverwriteElementBits: \n") + num := func(field string, value int) { + fmt.Fprintf(&sb, " %s: %d\n", field, value) } - - if op.TreatLikeAScalarOfSize != nil { - sb.WriteString(fmt.Sprintf(" TreatLikeAScalarOfSize: %d\n", *op.TreatLikeAScalarOfSize)) - } else { - sb.WriteString(" TreatLikeAScalarOfSize: \n") + sb.WriteString("Operand {\n") + str("Class", op.Class) + optStr("Go", op.Go) + num("AsmPos", op.AsmPos) + optStr("Base", op.Base) + optNum("ElemBits", op.ElemBits) + optNum("Bits", op.Bits) + optStr("Const", op.Const) + optStr("ImmOffset", op.ImmOffset) + optNum("Lanes", op.Lanes) + optStr("Name", op.Name) + optNum("TreatLikeAScalarOfSize", op.TreatLikeAScalarOfSize) + optStr("OverwriteClass", op.OverwriteClass) + optStr("OverwriteBase", op.OverwriteBase) + optNum("OverwriteElementBits", op.OverwriteElementBits) + + if len(nils) != 0 { + sb.WriteString(" nils = " + nils[1:] + "\n") } sb.WriteString(" }\n") From e3aff9d88cb120ff00c221d85384ea5464912345 Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 2 Jul 2025 15:07:19 -0400 Subject: [PATCH 118/200] internal/simdgen: make types order really invariant type declarations had an input-dependent ordering. Not as bad as map iteration, but spurious changes could still happen. Also tweaks the type ordering to make it slightly "better" (masks after vectors, not before). Generates dev.simd CL 685615 Change-Id: I77af136e8817415c1465707575a222bed9ce88be Reviewed-on: https://go-review.googlesource.com/c/arch/+/685595 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdTypes.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 9b57e472..6aeb4ee8 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -24,8 +24,8 @@ type simdType struct { } func compareSimdTypes(x, y simdType) int { - // "mask" then "vreg" - if c := compareNatural(x.Type, y.Type); c != 0 { + // "vreg" then "mask" + if c := -compareNatural(x.Type, y.Type); c != 0 { return c } // want "flo" < "int" < "uin" (and then 8 < 16 < 32 < 64), @@ -518,7 +518,8 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { } sizes := make([]int, 0, len(typeMap)) - for size := range typeMap { + for size, types := range typeMap { + slices.SortFunc(types, compareSimdTypes) sizes = append(sizes, size) } sort.Ints(sizes) From df255ae7a1bae09254fc506851ea829f128208ec Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 2 Jul 2025 14:54:25 -0400 Subject: [PATCH 119/200] internal/simdgen: handle K0 by deleting operand This removes some special cases from the code. Change-Id: I69a25446a17fd4e50f202aa323bed3a6f3bb90bc Reviewed-on: https://go-review.googlesource.com/c/arch/+/685596 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdGenericOps.go | 2 +- internal/simdgen/gen_simdMachineOps.go | 2 +- internal/simdgen/gen_simdTypes.go | 2 +- internal/simdgen/gen_simdrules.go | 2 +- internal/simdgen/gen_simdssa.go | 2 +- internal/simdgen/gen_utility.go | 65 +++++++------------------- 6 files changed, 21 insertions(+), 54 deletions(-) diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index f34cf9a1..1be01810 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -43,7 +43,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { } var opsData opData for _, op := range ops { - _, _, _, immType, _, _, gOp := op.shape() + _, _, _, immType, _, gOp := op.shape() genericNames := gOp.Go + *gOp.In[0].Go gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative} if immType == VarImm || immType == ConstVarImm { diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index f879791d..7e4f1d1a 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -50,7 +50,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { opsData := make([]opData, 0) opsDataImm := make([]opData, 0) for _, op := range ops { - shapeIn, shapeOut, maskType, _, _, _, gOp := op.shape() + shapeIn, shapeOut, maskType, _, _, gOp := op.shape() asm := gOp.Asm if maskType == OneMask { diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 6aeb4ee8..16286736 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -272,7 +272,7 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { opsByShape := make(map[string]opData) opsSkipped := map[string]struct{}{} for _, o := range ops { - _, _, _, immType, _, _, gOp := o.shape() + _, _, _, immType, _, gOp := o.shape() if immType == VarImm || immType == ConstVarImm { // Operations with variable immediates should be called directly diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index c3686a56..6f84b912 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -65,7 +65,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { var allData []tplRuleData for _, opr := range ops { - opInShape, opOutShape, maskType, immType, _, _, gOp := opr.shape() + opInShape, opOutShape, maskType, immType, _, gOp := opr.shape() vregInCnt := len(gOp.In) asm := gOp.Asm diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index ffb172a6..ee30c8eb 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -90,7 +90,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { allUnseen := make(map[string][]Operation) for _, op := range ops { asm := op.Asm - shapeIn, shapeOut, maskType, _, _, _, gOp := op.shape() + shapeIn, shapeOut, maskType, _, _, gOp := op.shape() if maskType == 2 { asm += "Masked" diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 10c5af7c..b9206cda 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -97,7 +97,6 @@ const ( InvalidMask int = iota NoMask OneMask - OneConstMask AllMasks ) @@ -130,7 +129,6 @@ const ( // InvalidMask: unknown, with err set to the error message // NoMask: no mask // OneMask: with mask (K1 to K7) -// OneConstMask: with const mask K0 // AllMasks: it's a K mask instruction // // immType: @@ -145,7 +143,7 @@ const ( // opNoConstImmMask is op with its inputs excluding the const imm and mask. // // This function does not modify op. -func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation) { +func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoImmConstMask Operation) { if len(op.Out) > 1 { panic(fmt.Errorf("simdgen only supports 1 output: %s", op)) } @@ -169,9 +167,8 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm } hasImm := false maskCount := 0 - iConstMask := -1 hasVreg := false - for i, in := range op.In { + for _, in := range op.In { if in.AsmPos == outputReg { if shapeOut != OneVregOutAtIn && in.AsmPos == 0 && in.Class == "vreg" { shapeOut = OneVregOutAtIn @@ -187,35 +184,14 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm } hasImm = true } else if in.Class == "mask" { - if in.Const != nil { - if *in.Const == "K0" { - if iConstMask != -1 { - panic(fmt.Errorf("simdgen only supports one const mask in inputs: %s", op)) - } - iConstMask = i - // Const mask should be invisible in ssa and prog, so we don't treat it as a mask. - // More specifically in prog, it's optional: when missing the assembler will default it to K0). - // TODO: verify the above assumption is safe. - } else { - panic(fmt.Errorf("simdgen only supports const mask K0 in inputs: %s", op)) - } - } else { - maskCount++ - } + maskCount++ } else { hasVreg = true } } opNoImm = *op - opNoConstMask = *op opNoImmConstMask = *op - removeConstMask := func(o *Operation) { - o.In = append(o.In[:iConstMask], o.In[iConstMask+1:]...) - } - if iConstMask != -1 { - removeConstMask(&opNoConstMask) - removeConstMask(&opNoImmConstMask) - } + removeImm := func(o *Operation) { o.In = o.In[1:] } @@ -237,11 +213,7 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm immType = NoImm } if maskCount == 0 { - if iConstMask == -1 { - maskType = NoMask - } else { - maskType = OneConstMask - } + maskType = NoMask } else { maskType = OneMask } @@ -249,9 +221,6 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm if hasImm { panic(fmt.Errorf("simdgen does not support immediates in pure mask operations: %s", op)) } - if iConstMask != -1 { - panic(fmt.Errorf("simdgen does not support const mask in pure mask operations: %s", op)) - } if hasVreg { panic(fmt.Errorf("simdgen does not support more than 1 masks in non-pure mask operations: %s", op)) } @@ -284,7 +253,7 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm // regShape returns a string representation of the register shape. func (op *Operation) regShape() (string, error) { - _, _, _, _, _, _, gOp := op.shape() + _, _, _, _, _, gOp := op.shape() var regInfo string var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int for _, in := range gOp.In { @@ -484,29 +453,29 @@ var classes []string = []string{"BAD0", "op1", "op2", "op3", "op4"} // The classification string is used to select a template or a clause of a template // for intrinsics declaration and the ssagen intrinisics glue code in the compiler. func classifyOp(op Operation) (string, Operation, error) { - _, _, _, immType, _, opNoConstMask, gOp := op.shape() + _, _, _, immType, _, gOp := op.shape() var class string if immType == VarImm || immType == ConstVarImm { - switch l := len(opNoConstMask.In); l { + switch l := len(op.In); l { case 1: return "", op, fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op) case 2, 3, 4, 5: class = immClasses[l] default: - return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op) + return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(op.In), op) } if order := op.OperandOrder; order != nil { class += "_" + *order } - return class, opNoConstMask, nil + return class, op, nil } else { switch l := len(gOp.In); l { case 1, 2, 3, 4: class = classes[l] default: - return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op) + return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(op.In), op) } if order := op.OperandOrder; order != nil { class += "_" + *order @@ -568,14 +537,12 @@ func splitMask(ops []Operation) ([]Operation, error) { if op.Masked == nil || *op.Masked != "true" { continue } - shapeIn, _, _, _, _, _, _ := op.shape() + shapeIn, _, _, _, _, _ := op.shape() if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn { op2 := op - op2.In = slices.Clone(op.In) - constMask := "K0" - // The ops should be sorted when calling this function, the mask is in the end. - op2.In[len(op2.In)-1].Const = &constMask + // The ops should be sorted when calling this function, the mask is in the end, drop the mask + op2.In = slices.Clone(op.In)[:len(op.In)-1] if !strings.HasPrefix(op2.Go, "Masked") { return nil, fmt.Errorf("simdgen only recognizes masked operations with name starting with 'Masked': %s", op) } @@ -598,7 +565,7 @@ func splitMask(ops []Operation) ([]Operation, error) { func dedupGodef(ops []Operation) ([]Operation, error) { seen := map[string][]Operation{} for _, op := range ops { - _, _, _, _, _, _, gOp := op.shape() + _, _, _, _, _, gOp := op.shape() genericNames := gOp.Go + *gOp.In[0].Go seen[genericNames] = append(seen[genericNames], op) @@ -642,7 +609,7 @@ func copyConstImm(ops []Operation) error { if op.ConstImm == nil { continue } - _, _, _, immType, _, _, _ := op.shape() + _, _, _, immType, _, _ := op.shape() if immType == ConstImm || immType == ConstVarImm { op.In[0].Const = op.ConstImm From 19fdaf8b68a218640532eefdc430218ab2afb21b Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 25 Jun 2025 13:24:30 -0400 Subject: [PATCH 120/200] internal/simdgen: add INSERT[IF]128 instructions Paired with dev.simd CL 684077 This CL should submit first. Change-Id: Ia9a0abce2d92b79db087256ca1bf17838e0b2dbb Reviewed-on: https://go-review.googlesource.com/c/arch/+/684055 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/categories.yaml | 6 +- internal/simdgen/gen_utility.go | 4 +- internal/simdgen/go.yaml | 106 +++++++++++++++++++++ internal/simdgen/godefs.go | 5 +- internal/simdgen/ops/Moves/categories.yaml | 6 +- internal/simdgen/ops/Moves/go.yaml | 106 +++++++++++++++++++++ internal/simdgen/types.yaml | 21 ++++ 7 files changed, 249 insertions(+), 5 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index fbfd6613..6f3db7a0 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -545,7 +545,11 @@ extension: "AVX.*" documentation: !string |- // GetElem retrieves a single constant-indexed element's value. - +- go: Set128 + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. - go: Mul commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index b9206cda..fa9920dd 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -668,8 +668,8 @@ func overwrite(ops []Operation) error { panic(fmt.Errorf("ElemBits is nil at operand %d of %v", idx, o)) } *op[idx].ElemBits = *op[idx].OverwriteElementBits - *op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Bits / *op[idx].ElemBits) - + *op[idx].Lanes = *op[idx].Bits / *op[idx].ElemBits + *op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Lanes) } return nil } diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index f91bafac..f8b27d85 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -792,6 +792,112 @@ - class: greg base: $b bits: $e + +- go: Set128 + asm: "VINSERTI128" + in: + - &i8x32 + class: vreg + base: $t + bits: 256 + OverwriteElementBits: 8 + - &i8x16 + class: vreg + base: $t + bits: 128 + OverwriteElementBits: 8 + - class: immediate + immOffset: 0 + out: + - *i8x32 + +- go: Set128 + asm: "VINSERTI128" + in: + - &i16x16 + class: vreg + base: $t + bits: 256 + OverwriteElementBits: 16 + - &i16x8 + class: vreg + base: $t + bits: 128 + OverwriteElementBits: 16 + - class: immediate + immOffset: 0 + out: + - *i16x16 + +- go: Set128 + asm: "VINSERTI128" + in: + - &i32x8 + class: vreg + base: $t + bits: 256 + OverwriteElementBits: 32 + - &i32x4 + class: vreg + base: $t + bits: 128 + OverwriteElementBits: 32 + - class: immediate + immOffset: 0 + out: + - *i32x8 + +- go: Set128 + asm: "VINSERTI128" + in: + - &i64x4 + class: vreg + base: $t + bits: 256 + OverwriteElementBits: 64 + - &i64x2 + class: vreg + base: $t + bits: 128 + OverwriteElementBits: 64 + - class: immediate + immOffset: 0 + out: + - *i64x4 + +- go: Set128 + asm: "VINSERTF128" + in: + - &f32x8 + class: vreg + base: $t + bits: 256 + OverwriteElementBits: 32 + - &f32x4 + class: vreg + base: $t + bits: 128 + OverwriteElementBits: 32 + - class: immediate + immOffset: 0 + out: + - *f32x8 + +- go: Set128 + asm: "VINSERTF128" + in: + - &f64x4 + class: vreg + base: $t + bits: 256 + - &f64x2 + class: vreg + base: $t + bits: 128 + - class: immediate + immOffset: 0 + out: + - *f64x4 # "Normal" multiplication is only available for floats. # This only covers the single and double precision. - go: Mul diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 4ac4a9a1..e5ad5b82 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -123,7 +123,10 @@ func compareOperands(x, y *Operand) int { if c := compareIntPointers(x.ElemBits, y.ElemBits); c != 0 { return c } - return compareIntPointers(x.Bits, y.Bits) + if c := compareIntPointers(x.Bits, y.Bits); c != 0 { + return c + } + return 0 } } diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index d0d4a304..3ebb24f3 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -9,4 +9,8 @@ extension: "AVX.*" documentation: !string |- // GetElem retrieves a single constant-indexed element's value. - +- go: Set128 + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index 20d4a053..89bd612c 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -24,3 +24,109 @@ - class: greg base: $b bits: $e + +- go: Set128 + asm: "VINSERTI128" + in: + - &i8x32 + class: vreg + base: $t + bits: 256 + OverwriteElementBits: 8 + - &i8x16 + class: vreg + base: $t + bits: 128 + OverwriteElementBits: 8 + - class: immediate + immOffset: 0 + out: + - *i8x32 + +- go: Set128 + asm: "VINSERTI128" + in: + - &i16x16 + class: vreg + base: $t + bits: 256 + OverwriteElementBits: 16 + - &i16x8 + class: vreg + base: $t + bits: 128 + OverwriteElementBits: 16 + - class: immediate + immOffset: 0 + out: + - *i16x16 + +- go: Set128 + asm: "VINSERTI128" + in: + - &i32x8 + class: vreg + base: $t + bits: 256 + OverwriteElementBits: 32 + - &i32x4 + class: vreg + base: $t + bits: 128 + OverwriteElementBits: 32 + - class: immediate + immOffset: 0 + out: + - *i32x8 + +- go: Set128 + asm: "VINSERTI128" + in: + - &i64x4 + class: vreg + base: $t + bits: 256 + OverwriteElementBits: 64 + - &i64x2 + class: vreg + base: $t + bits: 128 + OverwriteElementBits: 64 + - class: immediate + immOffset: 0 + out: + - *i64x4 + +- go: Set128 + asm: "VINSERTF128" + in: + - &f32x8 + class: vreg + base: $t + bits: 256 + OverwriteElementBits: 32 + - &f32x4 + class: vreg + base: $t + bits: 128 + OverwriteElementBits: 32 + - class: immediate + immOffset: 0 + out: + - *f32x8 + +- go: Set128 + asm: "VINSERTF128" + in: + - &f64x4 + class: vreg + base: $t + bits: 256 + - &f64x2 + class: vreg + base: $t + bits: 128 + - class: immediate + immOffset: 0 + out: + - *f64x4 diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml index 5178a216..17f5be55 100644 --- a/internal/simdgen/types.yaml +++ b/internal/simdgen/types.yaml @@ -50,6 +50,7 @@ in: !repeat - {class: mask, go: Mask32x16, base: "int", elemBits: 32, bits: 512, lanes: 16} - {class: mask, go: Mask64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} + - {class: greg, go: float64, base: "float", bits: 64, lanes: 1} - {class: greg, go: float32, base: "float", bits: 32, lanes: 1} - {class: greg, go: int64, base: "int", bits: 64, lanes: 1} @@ -61,6 +62,26 @@ in: !repeat - {class: greg, go: uint16, base: "uint", bits: 16, lanes: 1} - {class: greg, go: uint8, base: "uint", bits: 8, lanes: 1} +# Special shapes just to make INSERT[IF]128 work. +# The elemBits field of these shapes are wrong, it would be overwritten by overwriteElemBits. + - {class: vreg, go: Int8x16, base: "int", elemBits: 128, bits: 128, lanes: 16} + - {class: vreg, go: Uint8x16, base: "uint", elemBits: 128, bits: 128, lanes: 16} + - {class: vreg, go: Int16x8, base: "int", elemBits: 128, bits: 128, lanes: 8} + - {class: vreg, go: Uint16x8, base: "uint", elemBits: 128, bits: 128, lanes: 8} + - {class: vreg, go: Int32x4, base: "int", elemBits: 128, bits: 128, lanes: 4} + - {class: vreg, go: Uint32x4, base: "uint", elemBits: 128, bits: 128, lanes: 4} + - {class: vreg, go: Int64x2, base: "int", elemBits: 128, bits: 128, lanes: 2} + - {class: vreg, go: Uint64x2, base: "uint", elemBits: 128, bits: 128, lanes: 2} + + - {class: vreg, go: Int8x32, base: "int", elemBits: 128, bits: 256, lanes: 32} + - {class: vreg, go: Uint8x32, base: "uint", elemBits: 128, bits: 256, lanes: 32} + - {class: vreg, go: Int16x16, base: "int", elemBits: 128, bits: 256, lanes: 16} + - {class: vreg, go: Uint16x16, base: "uint", elemBits: 128, bits: 256, lanes: 16} + - {class: vreg, go: Int32x8, base: "int", elemBits: 128, bits: 256, lanes: 8} + - {class: vreg, go: Uint32x8, base: "uint", elemBits: 128, bits: 256, lanes: 8} + - {class: vreg, go: Int64x4, base: "int", elemBits: 128, bits: 256, lanes: 4} + - {class: vreg, go: Uint64x4, base: "uint", elemBits: 128, bits: 256, lanes: 4} + - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now. out: !repeat - *types From 36da9b0a515c938c425536fb5317df5934b3336f Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 25 Jun 2025 18:19:15 -0400 Subject: [PATCH 121/200] internal/simdgen: add EXTRACT[IF]128 instructions This generates dev.simd CL 684115 Change-Id: Ibb8e77e40c426b2cf3dd73c996e5118d5fd5afff Reviewed-on: https://go-review.googlesource.com/c/arch/+/684080 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/categories.yaml | 5 ++ internal/simdgen/go.yaml | 82 ++++++++++++++++++---- internal/simdgen/ops/Moves/categories.yaml | 5 ++ internal/simdgen/ops/Moves/go.yaml | 82 ++++++++++++++++++---- 4 files changed, 146 insertions(+), 28 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 6f3db7a0..d5cf67a5 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -550,6 +550,11 @@ extension: "AVX.*" documentation: !string |- // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +- go: Get128 + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. - go: Mul commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index f8b27d85..18c937ac 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -776,23 +776,25 @@ - class: greg base: $b lanes: 1 # Scalar, darn it! - - class: immediate + - &imm + class: immediate immOffset: 0 out: - *t + - go: GetElem asm: "VPEXTR[BWDQ]" in: - class: vreg base: $b elemBits: $e - - class: immediate - immOffset: 0 + - *imm out: - class: greg base: $b bits: $e + - go: Set128 asm: "VINSERTI128" in: @@ -806,11 +808,20 @@ base: $t bits: 128 OverwriteElementBits: 8 - - class: immediate + - &imm01 # This immediate should be only 0 or 1 + class: immediate immOffset: 0 out: - *i8x32 +- go: Get128 + asm: "VEXTRACTI128" + in: + - *i8x32 + - *imm01 + out: + - *i8x16 + - go: Set128 asm: "VINSERTI128" in: @@ -824,11 +835,18 @@ base: $t bits: 128 OverwriteElementBits: 16 - - class: immediate - immOffset: 0 + - *imm01 out: - *i16x16 +- go: Get128 + asm: "VEXTRACTI128" + in: + - *i16x16 + - *imm01 + out: + - *i16x8 + - go: Set128 asm: "VINSERTI128" in: @@ -842,11 +860,18 @@ base: $t bits: 128 OverwriteElementBits: 32 - - class: immediate - immOffset: 0 + - *imm01 out: - *i32x8 +- go: Get128 + asm: "VEXTRACTI128" + in: + - *i32x8 + - *imm01 + out: + - *i32x4 + - go: Set128 asm: "VINSERTI128" in: @@ -860,11 +885,18 @@ base: $t bits: 128 OverwriteElementBits: 64 - - class: immediate - immOffset: 0 + - *imm01 out: - *i64x4 +- go: Get128 + asm: "VEXTRACTI128" + in: + - *i64x4 + - *imm01 + out: + - *i64x2 + - go: Set128 asm: "VINSERTF128" in: @@ -878,11 +910,18 @@ base: $t bits: 128 OverwriteElementBits: 32 - - class: immediate - immOffset: 0 + - *imm01 out: - *f32x8 +- go: Get128 + asm: "VEXTRACTF128" + in: + - *f32x8 + - *imm01 + out: + - *f32x4 + - go: Set128 asm: "VINSERTF128" in: @@ -894,10 +933,25 @@ class: vreg base: $t bits: 128 - - class: immediate - immOffset: 0 + - *imm01 out: - *f64x4 + +- go: Get128 + asm: "VEXTRACTF128" + in: + - *f64x4 + - *imm01 + out: + - *f64x2 + + + + + + + + # "Normal" multiplication is only available for floats. # This only covers the single and double precision. - go: Mul diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index 3ebb24f3..db36efd4 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -14,3 +14,8 @@ extension: "AVX.*" documentation: !string |- // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +- go: Get128 + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index 89bd612c..e6cd40f6 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -8,23 +8,25 @@ - class: greg base: $b lanes: 1 # Scalar, darn it! - - class: immediate + - &imm + class: immediate immOffset: 0 out: - *t + - go: GetElem asm: "VPEXTR[BWDQ]" in: - class: vreg base: $b elemBits: $e - - class: immediate - immOffset: 0 + - *imm out: - class: greg base: $b bits: $e + - go: Set128 asm: "VINSERTI128" in: @@ -38,11 +40,20 @@ base: $t bits: 128 OverwriteElementBits: 8 - - class: immediate + - &imm01 # This immediate should be only 0 or 1 + class: immediate immOffset: 0 out: - *i8x32 +- go: Get128 + asm: "VEXTRACTI128" + in: + - *i8x32 + - *imm01 + out: + - *i8x16 + - go: Set128 asm: "VINSERTI128" in: @@ -56,11 +67,18 @@ base: $t bits: 128 OverwriteElementBits: 16 - - class: immediate - immOffset: 0 + - *imm01 out: - *i16x16 +- go: Get128 + asm: "VEXTRACTI128" + in: + - *i16x16 + - *imm01 + out: + - *i16x8 + - go: Set128 asm: "VINSERTI128" in: @@ -74,11 +92,18 @@ base: $t bits: 128 OverwriteElementBits: 32 - - class: immediate - immOffset: 0 + - *imm01 out: - *i32x8 +- go: Get128 + asm: "VEXTRACTI128" + in: + - *i32x8 + - *imm01 + out: + - *i32x4 + - go: Set128 asm: "VINSERTI128" in: @@ -92,11 +117,18 @@ base: $t bits: 128 OverwriteElementBits: 64 - - class: immediate - immOffset: 0 + - *imm01 out: - *i64x4 +- go: Get128 + asm: "VEXTRACTI128" + in: + - *i64x4 + - *imm01 + out: + - *i64x2 + - go: Set128 asm: "VINSERTF128" in: @@ -110,11 +142,18 @@ base: $t bits: 128 OverwriteElementBits: 32 - - class: immediate - immOffset: 0 + - *imm01 out: - *f32x8 +- go: Get128 + asm: "VEXTRACTF128" + in: + - *f32x8 + - *imm01 + out: + - *f32x4 + - go: Set128 asm: "VINSERTF128" in: @@ -126,7 +165,22 @@ class: vreg base: $t bits: 128 - - class: immediate - immOffset: 0 + - *imm01 out: - *f64x4 + +- go: Get128 + asm: "VEXTRACTF128" + in: + - *f64x4 + - *imm01 + out: + - *f64x2 + + + + + + + + From d0672853d4341b23d0beba9977f7aaa7c137281e Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 8 Jul 2025 02:42:34 +0000 Subject: [PATCH 122/200] internal/simdgen: make all compares between NaNs false This CL updates the immediate predicate of Equal, GreaterEqual and Greater. This CL generates CL 686235. Change-Id: Iffabd0704e9f8f5c8800d81a688367bda5642416 Reviewed-on: https://go-review.googlesource.com/c/arch/+/686215 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 8 ++++---- internal/simdgen/ops/Compares/categories.yaml | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index d5cf67a5..2384923b 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -151,13 +151,13 @@ documentation: !string |- // NotEqual compares for inequality. - go: GreaterEqual - constImm: 5 + constImm: 13 commutative: "false" extension: "AVX.*" documentation: !string |- // GreaterEqual compares for greater than or equal. - go: Greater - constImm: 6 + constImm: 14 commutative: "false" extension: "AVX.*" documentation: !string |- @@ -199,14 +199,14 @@ documentation: !string |- // MaskedNotEqual compares for inequality. - go: MaskedGreaterEqual - constImm: 5 + constImm: 13 masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // MaskedGreaterEqual compares for greater than or equal. - go: MaskedGreater - constImm: 6 + constImm: 14 masked: "true" commutative: "false" extension: "AVX.*" diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index 08b153c7..3c607c76 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -37,13 +37,13 @@ documentation: !string |- // NotEqual compares for inequality. - go: GreaterEqual - constImm: 5 + constImm: 13 commutative: "false" extension: "AVX.*" documentation: !string |- // GreaterEqual compares for greater than or equal. - go: Greater - constImm: 6 + constImm: 14 commutative: "false" extension: "AVX.*" documentation: !string |- @@ -85,14 +85,14 @@ documentation: !string |- // MaskedNotEqual compares for inequality. - go: MaskedGreaterEqual - constImm: 5 + constImm: 13 masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // MaskedGreaterEqual compares for greater than or equal. - go: MaskedGreater - constImm: 6 + constImm: 14 masked: "true" commutative: "false" extension: "AVX.*" From 8237fc3b7f473cdbc602e0a3cf86cf68cd5e246e Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 8 Jul 2025 17:27:23 +0000 Subject: [PATCH 123/200] internal/simdgen: remove FP bitwise logic operations. This CL generates CL 686496. Change-Id: I22ee7df6de59c11d00e041dfa56e2b1c442d82fa Reviewed-on: https://go-review.googlesource.com/c/arch/+/686555 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/go.yaml | 70 ---------------------- internal/simdgen/ops/BitwiseLogic/go.yaml | 72 +---------------------- 2 files changed, 1 insertion(+), 141 deletions(-) diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 18c937ac..c4e648cf 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -201,76 +201,6 @@ - *any out: - *any - -# FP operations. -# Set the [base] to be "int" to not include duplicates(excluding "uint"). -# [base] is not used when [overwriteBase] is present. -- go: And - asm: "VANDP[SD]" - in: - - &intToFloat - go: $t - base: int - overwriteBase: float - - *intToFloat - out: - - *intToFloat -- go: MaskedAnd - asm: "VANDP[SD]" - in: - - class: mask - - *intToFloat - - *intToFloat - out: - - *intToFloat - -- go: AndNot - asm: "VANDNP[SD]" - in: - - *intToFloat - - *intToFloat - out: - - *intToFloat -- go: MaskedAndNot - asm: "VANDNP[SD]" - in: - - class: mask - - *intToFloat - - *intToFloat - out: - - *intToFloat - -- go: Or - asm: "VORP[SD]" - in: - - *intToFloat - - *intToFloat - out: - - *intToFloat -- go: MaskedOr - asm: "VORP[SD]" - in: - - class: mask - - *intToFloat - - *intToFloat - out: - - *intToFloat - -- go: Xor - asm: "VXORP[SD]" - in: - - *intToFloat - - *intToFloat - out: - - *intToFloat -- go: MaskedXor - asm: "VXORP[SD]" - in: - - class: mask - - *intToFloat - - *intToFloat - out: - - *intToFloat # Ints - go: Equal asm: "V?PCMPEQ[BWDQ]" diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml index 7e7adf7a..79d149ca 100644 --- a/internal/simdgen/ops/BitwiseLogic/go.yaml +++ b/internal/simdgen/ops/BitwiseLogic/go.yaml @@ -76,74 +76,4 @@ - *any - *any out: - - *any - -# FP operations. -# Set the [base] to be "int" to not include duplicates(excluding "uint"). -# [base] is not used when [overwriteBase] is present. -- go: And - asm: "VANDP[SD]" - in: - - &intToFloat - go: $t - base: int - overwriteBase: float - - *intToFloat - out: - - *intToFloat -- go: MaskedAnd - asm: "VANDP[SD]" - in: - - class: mask - - *intToFloat - - *intToFloat - out: - - *intToFloat - -- go: AndNot - asm: "VANDNP[SD]" - in: - - *intToFloat - - *intToFloat - out: - - *intToFloat -- go: MaskedAndNot - asm: "VANDNP[SD]" - in: - - class: mask - - *intToFloat - - *intToFloat - out: - - *intToFloat - -- go: Or - asm: "VORP[SD]" - in: - - *intToFloat - - *intToFloat - out: - - *intToFloat -- go: MaskedOr - asm: "VORP[SD]" - in: - - class: mask - - *intToFloat - - *intToFloat - out: - - *intToFloat - -- go: Xor - asm: "VXORP[SD]" - in: - - *intToFloat - - *intToFloat - out: - - *intToFloat -- go: MaskedXor - asm: "VXORP[SD]" - in: - - class: mask - - *intToFloat - - *intToFloat - out: - - *intToFloat \ No newline at end of file + - *any \ No newline at end of file From dea0129702c94fb714f87a0bfe1e11e4b5bca696 Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 8 Jul 2025 12:32:59 -0400 Subject: [PATCH 124/200] internal/simdgen: doc/type cleanup around 'shape' and its return values. No changes to the generated code, this helps a bit with IDE tool tips. Change-Id: I6aa286a3cf5b8f562c9149c943524b666345b643 Reviewed-on: https://go-review.googlesource.com/c/arch/+/686377 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_utility.go | 82 ++++++++++++--------------------- 1 file changed, 30 insertions(+), 52 deletions(-) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index fa9920dd..371132ed 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -75,75 +75,53 @@ func numberLines(data []byte) string { return buf.String() } +type inShape uint8 +type outShape uint8 +type maskShape uint8 +type immShape uint8 + const ( - InvalidIn int = iota - PureVregIn - OneKmaskIn - OneImmIn - OneKmaskImmIn - PureKmaskIn + InvalidIn inShape = iota + PureVregIn // vector register input only + OneKmaskIn // vector and kmask input + OneImmIn // vector and immediate input + OneKmaskImmIn // vector, kmask, and immediate inputs + PureKmaskIn // only mask inputs. ) const ( - InvalidOut int = iota - NoOut - OneVregOut - OneGregOut - OneKmaskOut - OneVregOutAtIn + InvalidOut outShape = iota + NoOut // no output + OneVregOut // (one) vector register output + OneGregOut // (one) general register output + OneKmaskOut // mask output + OneVregOutAtIn // the first input is also the output ) const ( - InvalidMask int = iota - NoMask - OneMask - AllMasks + InvalidMask maskShape = iota + NoMask // no mask + OneMask // with mask (K1 to K7) + AllMasks // a K mask instruction (K0-K7) ) const ( - InvalidImm int = iota - NoImm - ConstImm - VarImm - ConstVarImm + InvalidImm immShape = iota + NoImm // no immediate + ConstImm // const only immediate + VarImm // pure imm argument provided by the users + ConstVarImm // a combination of user arg and const ) -// opShape returns the an int denoting the shape of the operation: -// -// shapeIn: -// InvalidIn: unknown, with err set to the error message -// PureVregIn: pure vreg operation -// OneKmaskIn: operation with one k mask input (TODO: verify if it's always opmask predicate) -// OneImmIn: operation with one imm input -// OneKmaskImmIn: operation with one k mask input and one imm input -// PureKmaskIn: it's a K mask instruction (which can use K0) -// -// shapeOut: -// InvalidOut: unknown, with err set to the error message -// NoOut: no outputs, this is invalid now. -// OneVregOut: one vreg output -// OneKmaskOut: one mask output -// OneVregOutAtIn: one vreg output, it's at the same time the first input -// -// maskType: -// InvalidMask: unknown, with err set to the error message -// NoMask: no mask -// OneMask: with mask (K1 to K7) -// AllMasks: it's a K mask instruction -// -// immType: -// InvalidImm: unrecognize immediate structure -// NoImm: no immediate -// ConstImm: const only immediate -// VarImm: pure imm argument provided by the users -// ConstVarImm: a combination of user arg and const +// opShape returns the several integers describing the shape of the operation, +// and modified versions of the op: // // opNoImm is op with its inputs excluding the const imm. -// opNoConstMask is op with its inputs excluding the const mask. // opNoConstImmMask is op with its inputs excluding the const imm and mask. // // This function does not modify op. -func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoImmConstMask Operation) { +func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskShape, immType immShape, + opNoImm Operation, opNoImmConstMask Operation) { if len(op.Out) > 1 { panic(fmt.Errorf("simdgen only supports 1 output: %s", op)) } From f7578004bf0cfb0bc4e1f8dd80f425dcf09a63aa Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 8 Jul 2025 18:06:02 +0000 Subject: [PATCH 125/200] internal/simdgen: change op name Masked$OP to $(OP)Masked This CL generates CL 686516. Change-Id: Ifa5320c656806b0e4aea921b27c0eb54671c9f36 Reviewed-on: https://go-review.googlesource.com/c/arch/+/686575 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 252 +++++++++--------- internal/simdgen/gen_simdTypes.go | 2 +- internal/simdgen/gen_utility.go | 9 +- internal/simdgen/go.yaml | 130 ++++----- internal/simdgen/godefs.go | 2 +- internal/simdgen/ops/AddSub/categories.yaml | 16 +- internal/simdgen/ops/AddSub/go.yaml | 12 +- .../simdgen/ops/BitwiseLogic/categories.yaml | 16 +- internal/simdgen/ops/BitwiseLogic/go.yaml | 8 +- internal/simdgen/ops/Compares/categories.yaml | 28 +- internal/simdgen/ops/Compares/go.yaml | 10 +- .../simdgen/ops/FPonlyArith/categories.yaml | 52 ++-- internal/simdgen/ops/FPonlyArith/go.yaml | 14 +- .../simdgen/ops/GaloisField/categories.yaml | 12 +- internal/simdgen/ops/GaloisField/go.yaml | 6 +- .../simdgen/ops/IntOnlyArith/categories.yaml | 12 +- internal/simdgen/ops/IntOnlyArith/go.yaml | 6 +- internal/simdgen/ops/MLOps/categories.yaml | 36 +-- internal/simdgen/ops/MLOps/go.yaml | 18 +- internal/simdgen/ops/MinMax/categories.yaml | 8 +- internal/simdgen/ops/MinMax/go.yaml | 12 +- internal/simdgen/ops/Mul/categories.yaml | 16 +- internal/simdgen/ops/Mul/go.yaml | 12 +- .../simdgen/ops/ShiftRotate/categories.yaml | 56 ++-- internal/simdgen/ops/ShiftRotate/go.yaml | 32 +-- 25 files changed, 389 insertions(+), 388 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 2384923b..bfb0ff80 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -9,18 +9,18 @@ extension: "AVX.*" documentation: !string |- // SaturatedAdd adds corresponding elements of two vectors with saturation. -- go: MaskedAdd +- go: AddMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedAdd adds corresponding elements of two vectors. -- go: MaskedSaturatedAdd + // AddMasked adds corresponding elements of two vectors. +- go: SaturatedAddMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedSaturatedAdd adds corresponding elements of two vectors with saturation. + // SaturatedAddMasked adds corresponding elements of two vectors with saturation. - go: Sub commutative: "false" extension: "AVX.*" @@ -31,18 +31,18 @@ extension: "AVX.*" documentation: !string |- // SaturatedSub subtracts corresponding elements of two vectors with saturation. -- go: MaskedSub +- go: SubMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedSub subtracts corresponding elements of two vectors. -- go: MaskedSaturatedSub + // SubMasked subtracts corresponding elements of two vectors. +- go: SaturatedSubMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedSaturatedSub subtracts corresponding elements of two vectors with saturation. + // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. - go: PairwiseAdd commutative: "false" extension: "AVX.*" @@ -72,45 +72,45 @@ extension: "AVX.*" documentation: !string |- // And performs a bitwise AND operation between two vectors. -- go: MaskedAnd +- go: AndMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedAnd performs a masked bitwise AND operation between two vectors. + // AndMasked performs a masked bitwise AND operation between two vectors. - go: Or commutative: "true" extension: "AVX.*" documentation: !string |- // Or performs a bitwise OR operation between two vectors. -- go: MaskedOr +- go: OrMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedOr performs a masked bitwise OR operation between two vectors. + // OrMasked performs a masked bitwise OR operation between two vectors. - go: AndNot commutative: "false" extension: "AVX.*" documentation: !string |- // AndNot performs a bitwise AND NOT operation between two vectors. -- go: MaskedAndNot +- go: AndNotMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedAndNot performs a masked bitwise AND NOT operation between two vectors. + // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. - go: Xor commutative: "true" extension: "AVX.*" documentation: !string |- // Xor performs a bitwise XOR operation between two vectors. -- go: MaskedXor +- go: XorMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedXor performs a masked bitwise XOR operation between two vectors. + // XorMasked performs a masked bitwise XOR operation between two vectors. # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. # const imm predicate(holds for both float and int|uint): @@ -163,105 +163,105 @@ documentation: !string |- // Greater compares for greater than. -- go: MaskedEqual +- go: EqualMasked constImm: 0 masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedEqual compares for equality, masked. -- go: MaskedLess + // EqualMasked compares for equality, masked. +- go: LessMasked constImm: 1 masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedLess compares for less than. -- go: MaskedLessEqual + // LessMasked compares for less than. +- go: LessEqualMasked constImm: 2 masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedLessEqual compares for less than or equal. -- go: MaskedIsNan # For float only. + // LessEqualMasked compares for less than or equal. +- go: IsNanMasked # For float only. constImm: 3 masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x). -- go: MaskedNotEqual + // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). +- go: NotEqualMasked constImm: 4 masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedNotEqual compares for inequality. -- go: MaskedGreaterEqual + // NotEqualMasked compares for inequality. +- go: GreaterEqualMasked constImm: 13 masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedGreaterEqual compares for greater than or equal. -- go: MaskedGreater + // GreaterEqualMasked compares for greater than or equal. +- go: GreaterMasked constImm: 14 masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedGreater compares for greater than. + // GreaterMasked compares for greater than. - go: Div commutative: "false" extension: "AVX.*" documentation: !string |- // Div divides elements of two vectors. -- go: MaskedDiv +- go: DivMasked commutative: "false" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedDiv divides elements of two vectors. + // DivMasked divides elements of two vectors. - go: Sqrt commutative: "false" extension: "AVX.*" documentation: !string |- // Sqrt computes the square root of each element. -- go: MaskedSqrt +- go: SqrtMasked commutative: "false" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedSqrt computes the square root of each element. + // SqrtMasked computes the square root of each element. - go: ApproximateReciprocal commutative: "false" extension: "AVX.*" documentation: !string |- // ApproximateReciprocal computes an approximate reciprocal of each element. -- go: MaskedApproximateReciprocal +- go: ApproximateReciprocalMasked commutative: "false" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedApproximateReciprocal computes an approximate reciprocal of each element. + // ApproximateReciprocalMasked computes an approximate reciprocal of each element. - go: ApproximateReciprocalOfSqrt commutative: "false" extension: "AVX.*" documentation: !string |- // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -- go: MaskedApproximateReciprocalOfSqrt +- go: ApproximateReciprocalOfSqrtMasked commutative: "false" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -- go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated. + // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. +- go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated. commutative: "false" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedMulByPowOf2 multiplies elements by a power of 2. + // MulByPowOf2Masked multiplies elements by a power of 2. - go: Round commutative: "false" @@ -269,20 +269,20 @@ constImm: 0 documentation: !string |- // Round rounds elements to the nearest integer. -- go: MaskedRoundWithPrecision +- go: RoundWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 0 masked: "true" documentation: !string |- - // MaskedRoundWithPrecision rounds elements with specified precision. -- go: MaskedDiffWithRoundWithPrecision + // RoundWithPrecisionMasked rounds elements with specified precision. +- go: DiffWithRoundWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 0 masked: "true" documentation: !string |- - // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision. + // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. - go: Floor commutative: "false" @@ -290,20 +290,20 @@ constImm: 1 documentation: !string |- // Floor rounds elements down to the nearest integer. -- go: MaskedFloorWithPrecision +- go: FloorWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 1 masked: "true" documentation: !string |- - // MaskedFloorWithPrecision rounds elements down with specified precision, masked. -- go: MaskedDiffWithFloorWithPrecision + // FloorWithPrecisionMasked rounds elements down with specified precision, masked. +- go: DiffWithFloorWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 1 masked: "true" documentation: !string |- - // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision. + // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. - go: Ceil commutative: "false" @@ -311,20 +311,20 @@ constImm: 2 documentation: !string |- // Ceil rounds elements up to the nearest integer. -- go: MaskedCeilWithPrecision +- go: CeilWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 2 masked: "true" documentation: !string |- - // MaskedCeilWithPrecision rounds elements up with specified precision, masked. -- go: MaskedDiffWithCeilWithPrecision + // CeilWithPrecisionMasked rounds elements up with specified precision, masked. +- go: DiffWithCeilWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 2 masked: "true" documentation: !string |- - // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision. + // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. - go: Trunc commutative: "false" @@ -332,63 +332,63 @@ constImm: 3 documentation: !string |- // Trunc truncates elements towards zero. -- go: MaskedTruncWithPrecision +- go: TruncWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 3 masked: "true" documentation: !string |- - // MaskedTruncWithPrecision truncates elements with specified precision. -- go: MaskedDiffWithTruncWithPrecision + // TruncWithPrecisionMasked truncates elements with specified precision. +- go: DiffWithTruncWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 3 masked: "true" documentation: !string |- - // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision. + // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. - go: AddSub commutative: "false" extension: "AVX.*" documentation: !string |- // AddSub subtracts even elements and adds odd elements of two vectors. -- go: MaskedGaloisFieldAffineTransform +- go: GaloisFieldAffineTransformMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8): + // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. -- go: MaskedGaloisFieldAffineTransformInversed +- go: GaloisFieldAffineTransformInversedMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8), + // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. -- go: MaskedGaloisFieldMul +- go: GaloisFieldMulMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedGaloisFieldMul computes element-wise GF(2^8) multiplication with + // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with // reduction polynomial x^8 + x^4 + x^3 + x + 1. - go: Average commutative: "true" extension: "AVX.*" # VPAVGB/W are available across various AVX versions documentation: !string |- // Average computes the rounded average of corresponding elements. -- go: MaskedAverage +- go: AverageMasked commutative: "true" masked: "true" extension: "AVX512.*" # Masked operations are typically AVX512 documentation: !string |- - // MaskedAverage computes the rounded average of corresponding elements. + // AverageMasked computes the rounded average of corresponding elements. - go: Absolute commutative: "false" @@ -396,12 +396,12 @@ extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 documentation: !string |- // Absolute computes the absolute value of each element. -- go: MaskedAbsolute +- go: AbsoluteMasked commutative: "false" masked: "true" extension: "AVX512.*" documentation: !string |- - // MaskedAbsolute computes the absolute value of each element. + // AbsoluteMasked computes the absolute value of each element. - go: Sign # Applies sign of second operand to first: sign(val, sign_src) @@ -412,24 +412,24 @@ // whichever constant is nearest to the value of the second operand. # Sign does not have masked version -- go: MaskedPopCount +- go: PopCountMasked commutative: "false" masked: "true" extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) documentation: !string |- - // MaskedPopCount counts the number of set bits in each element. + // PopCountMasked counts the number of set bits in each element. - go: PairDotProd commutative: "false" extension: "AVX.*" documentation: !string |- // PairDotProd multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. -- go: MaskedPairDotProd +- go: PairDotProdMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedPairDotProd multiplies the elements and add the pairs together, + // PairDotProdMasked multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. - go: SaturatedUnsignedSignedPairDotProd commutative: "false" @@ -437,12 +437,12 @@ documentation: !string |- // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. -- go: MaskedSaturatedUnsignedSignedPairDotProd +- go: SaturatedUnsignedSignedPairDotProdMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedSaturatedPairDotProd multiplies the elements and add the pairs together with saturation, + // SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. @@ -456,85 +456,85 @@ extension: "AVX.*" documentation: !string |- // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -- go: MaskedUnsignedSignedQuadDotProdAccumulate +- go: UnsignedSignedQuadDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedUnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. - go: SaturatedUnsignedSignedQuadDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate +- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedSaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. - go: PairDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -- go: MaskedPairDotProdAccumulate +- go: PairDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. + // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. - go: SaturatedPairDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -- go: MaskedSaturatedPairDotProdAccumulate +- go: SaturatedPairDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -- go: MaskedFusedMultiplyAdd + // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. +- go: FusedMultiplyAddMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplyAdd performs `(v1 * v2) + v3`. -- go: MaskedFusedMultiplyAddSub + // FusedMultiplyAddMasked performs `(v1 * v2) + v3`. +- go: FusedMultiplyAddSubMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. -- go: MaskedFusedMultiplySubAdd + // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. +- go: FusedMultiplySubAddMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. + // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. - go: Max commutative: "true" extension: "AVX.*" documentation: !string |- // Max computes the maximum of corresponding elements. -- go: MaskedMax +- go: MaxMasked commutative: "true" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedMax computes the maximum of corresponding elements. + // MaxMasked computes the maximum of corresponding elements. - go: Min commutative: "true" extension: "AVX.*" documentation: !string |- // Min computes the minimum of corresponding elements. -- go: MaskedMin +- go: MinMasked commutative: "true" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedMin computes the minimum of corresponding elements. + // MinMasked computes the minimum of corresponding elements. - go: SetElem commutative: "false" extension: "AVX.*" @@ -576,70 +576,70 @@ extension: "AVX.*" documentation: !string |- // MulLow multiplies elements and stores the low part of the result. -- go: MaskedMul +- go: MulMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedMul multiplies corresponding elements of two vectors, masked. -- go: MaskedMulEvenWiden + // MulMasked multiplies corresponding elements of two vectors, masked. +- go: MulEvenWidenMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked. + // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked. // Result[i] = v1.Even[i] * v2.Even[i]. -- go: MaskedMulHigh +- go: MulHighMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedMulHigh multiplies elements and stores the high part of the result, masked. -- go: MaskedMulLow + // MulHighMasked multiplies elements and stores the high part of the result, masked. +- go: MulLowMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedMulLow multiplies elements and stores the low part of the result, masked. + // MulLowMasked multiplies elements and stores the low part of the result, masked. - go: ShiftAllLeft nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -- go: MaskedShiftAllLeft +- go: ShiftAllLeftMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. + // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -- go: MaskedShiftAllRight +- go: ShiftAllRightMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. + // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRightSignExtended nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -- go: MaskedShiftAllRightSignExtended +- go: ShiftAllRightSignExtendedMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. + // ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftLeft nameAndSizeCheck: "true" @@ -647,98 +647,98 @@ extension: "AVX.*" documentation: !string |- // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -- go: MaskedShiftLeft +- go: ShiftLeftMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. + // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -- go: MaskedShiftRight +- go: ShiftRightMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. + // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRightSignExtended nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: MaskedShiftRightSignExtended +- go: ShiftRightSignExtendedMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. + // ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: MaskedRotateAllLeft +- go: RotateAllLeftMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedRotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -- go: MaskedRotateLeft + // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. +- go: RotateLeftMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedRotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -- go: MaskedRotateAllRight + // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. +- go: RotateAllRightMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedRotateAllRight rotates each element to the right by the number of bits specified by the immediate. -- go: MaskedRotateRight + // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. +- go: RotateRightMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedRotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. + // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. -- go: MaskedShiftAllLeftAndFillUpperFrom +- go: ShiftAllLeftAndFillUpperFromMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the + // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -- go: MaskedShiftAllRightAndFillUpperFrom +- go: ShiftAllRightAndFillUpperFromMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the + // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -- go: MaskedShiftLeftAndFillUpperFrom +- go: ShiftLeftAndFillUpperFromMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the + // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -- go: MaskedShiftRightAndFillUpperFrom +- go: ShiftRightAndFillUpperFromMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the + // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 16286736..7dcbc145 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -295,7 +295,7 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { var vec string var vecOp Operand allSameVec := true - masked := strings.HasPrefix(gOp.Go, "Masked") + masked := strings.HasSuffix(gOp.Go, "Masked") skippedMaskCnt := 0 vecCnt := 0 for i, in := range gOp.In { diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 371132ed..42aab212 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -521,12 +521,13 @@ func splitMask(ops []Operation) ([]Operation, error) { op2 := op // The ops should be sorted when calling this function, the mask is in the end, drop the mask op2.In = slices.Clone(op.In)[:len(op.In)-1] - if !strings.HasPrefix(op2.Go, "Masked") { - return nil, fmt.Errorf("simdgen only recognizes masked operations with name starting with 'Masked': %s", op) + if !strings.HasSuffix(op2.Go, "Masked") { + return nil, fmt.Errorf("simdgen only recognizes masked operations with name ending with 'Masked': %s", op) } - op2.Go = strings.ReplaceAll(op2.Go, "Masked", "") + maskedOpName := op2.Go + op2.Go = strings.TrimSuffix(op2.Go, "Masked") if op2.Documentation != nil { - *op2.Documentation = strings.ReplaceAll(*op2.Documentation, "Masked", "") + *op2.Documentation = strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go) } splited = append(splited, op2) } else { diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index c4e648cf..16dbf1e6 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -8,7 +8,7 @@ - *any out: - *any -- go: MaskedAdd +- go: AddMasked asm: "VPADD[BWDQ]|VADDP[SD]" in: - class: mask @@ -35,7 +35,7 @@ - *uint out: - *uint -- go: MaskedSaturatedAdd +- go: SaturatedAddMasked asm: "VPADDS[BWDQ]" in: - class: mask @@ -43,7 +43,7 @@ - *int out: - *int -- go: MaskedSaturatedAdd +- go: SaturatedAddMasked asm: "VPADDS[BWDQ]" in: - class: mask @@ -60,7 +60,7 @@ - *any out: &1any - *any -- go: MaskedSub +- go: SubMasked asm: "VPSUB[BWDQ]|VSUBP[SD]" in: - class: mask @@ -83,7 +83,7 @@ - *uint out: - *uint -- go: MaskedSaturatedSub +- go: SaturatedSubMasked asm: "VPSUBS[BWDQ]" in: - class: mask @@ -91,7 +91,7 @@ - *int out: - *int -- go: MaskedSaturatedSub +- go: SaturatedSubMasked asm: "VPSUBS[BWDQ]" in: - class: mask @@ -145,7 +145,7 @@ # Dword and Qword. # TODO: should we wildcard other smaller elemBits to VPANDQ or # VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations. -- go: MaskedAnd +- go: AndMasked asm: "VPAND[DQ]" in: - class: mask @@ -161,7 +161,7 @@ - *any out: - *any -- go: MaskedAndNot +- go: AndNotMasked asm: "VPANDN[DQ]" in: - class: mask @@ -177,7 +177,7 @@ - *any out: - *any -- go: MaskedOr +- go: OrMasked asm: "VPOR[DQ]" in: - class: mask @@ -193,7 +193,7 @@ - *any out: - *any -- go: MaskedXor +- go: XorMasked asm: "VPXOR[DQ]" in: - class: mask @@ -219,7 +219,7 @@ in: *int2 out: - *anyvregToMask -- go: MaskedEqual +- go: EqualMasked asm: "V?PCMPEQ[BWDQ]" in: &maskint2 - class: mask @@ -227,14 +227,14 @@ - *int out: - class: mask -- go: MaskedGreater +- go: GreaterMasked asm: "V?PCMPGT[BWDQ]" in: *maskint2 out: - class: mask # The const imm predicated compares after AVX512, please see categories.yaml # for const imm specification. -- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked asm: "VPCMP[BWDQ]" in: - class: mask @@ -246,7 +246,7 @@ const: 0 # Just a placeholder, will be overwritten by const imm porting. out: - class: mask -- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked asm: "VPCMPU[BWDQ]" in: - class: mask @@ -273,7 +273,7 @@ - go: $t # We still need the output to be the same shape as inputs. overwriteBase: int overwriteClass: mask -- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked asm: "VCMPP[SD]" in: - class: mask @@ -292,7 +292,7 @@ - *fp out: &1fp - *fp -- go: MaskedDiv +- go: DivMasked asm: "V?DIVP[SD]" in: &1mask2fp - class: mask @@ -303,13 +303,13 @@ asm: "V?SQRTP[SD]" in: *1fp out: *1fp -- go: MaskedSqrt +- go: SqrtMasked asm: "V?SQRTP[SD]" in: &1mask1fp - class: mask - *fp out: *1fp -- go: MaskedApproximateReciprocal +- go: ApproximateReciprocalMasked asm: "VRCP14P[SD]" in: *1mask1fp out: *1fp @@ -317,11 +317,11 @@ asm: "V?RSQRTPS" in: *1fp out: *1fp -- go: MaskedApproximateReciprocalOfSqrt +- go: ApproximateReciprocalOfSqrtMasked asm: "VRSQRT14P[SD]" in: *1mask1fp out: *1fp -- go: MaskedMulByPowOf2 +- go: MulByPowOf2Masked asm: "VSCALEFP[SD]" in: *1mask2fp out: *1fp @@ -334,7 +334,7 @@ const: 0 # place holder out: *1fp -- go: "Masked(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision" +- go: "(Round|Ceil|Floor|Trunc)WithPrecisionMasked" asm: "VRNDSCALEP[SD]" in: - class: mask @@ -343,7 +343,7 @@ const: 0 # place holder immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). out: *1fp -- go: "MaskedDiffWith(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision" +- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked" asm: "VREDUCEP[SD]" in: - class: mask @@ -360,7 +360,7 @@ - *fp out: - *fp -- go: MaskedGaloisFieldAffineTransform +- go: GaloisFieldAffineTransformMasked asm: VGF2P8AFFINEQB operandOrder: 2I # 2nd operand, then immediate in: &AffineArgs @@ -379,14 +379,14 @@ out: - *uint8 -- go: MaskedGaloisFieldAffineTransformInversed +- go: GaloisFieldAffineTransformInversedMasked asm: VGF2P8AFFINEINVQB operandOrder: 2I # 2nd operand, then immediate in: *AffineArgs out: - *uint8 -- go: MaskedGaloisFieldMul +- go: GaloisFieldMulMasked asm: VGF2P8MULB in: - class: mask @@ -405,7 +405,7 @@ - *uint_t out: - *uint_t -- go: MaskedAverage +- go: AverageMasked asm: "VPAVG[BW]" in: - class: mask @@ -424,7 +424,7 @@ base: int out: - *int_t # Output is magnitude, fits in the same signed type -- go: MaskedAbsolute +- go: AbsoluteMasked asm: "VPABS[BWDQ]" in: - class: mask @@ -446,7 +446,7 @@ # Population Count (count set bits in each element) # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG) # VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ) -- go: MaskedPopCount +- go: PopCountMasked asm: "VPOPCNT[BWDQ]" in: - class: mask @@ -465,7 +465,7 @@ - &int2 # The elemBits are different go: $t2 base: int -- go: MaskedPairDotProd +- go: PairDotProdMasked asm: VPMADDWD in: - class: mask @@ -484,7 +484,7 @@ base: int out: - *int2 -- go: MaskedSaturatedUnsignedSignedPairDotProd +- go: SaturatedUnsignedSignedPairDotProdMasked asm: VPMADDUBSW in: - class: mask @@ -525,7 +525,7 @@ overwriteElementBits: 8 out: - *qdpa_acc -- go: MaskedUnsignedSignedQuadDotProdAccumulate +- go: UnsignedSignedQuadDotProdAccumulateMasked asm: "VPDPBUSD" in: - *qdpa_acc @@ -542,7 +542,7 @@ - *qdpa_src2 out: - *qdpa_acc -- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate +- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked asm: "VPDPBUSDS" in: - *qdpa_acc @@ -568,7 +568,7 @@ overwriteElementBits: 16 out: - *pdpa_acc -- go: MaskedPairDotProdAccumulate +- go: PairDotProdAccumulateMasked asm: "VPDPWSSD" in: - *pdpa_acc @@ -585,7 +585,7 @@ - *pdpa_src2 out: - *pdpa_acc -- go: MaskedSaturatedPairDotProdAccumulate +- go: SaturatedPairDotProdAccumulateMasked asm: "VPDPWSSDS" in: - *pdpa_acc @@ -594,7 +594,7 @@ - *pdpa_src2 out: - *pdpa_acc -- go: MaskedFusedMultiplyAdd +- go: FusedMultiplyAddMasked asm: "VFMADD213PS|VFMADD213PD" in: - &fma_op @@ -605,7 +605,7 @@ - *fma_op out: - *fma_op -- go: MaskedFusedMultiplyAddSub +- go: FusedMultiplyAddSubMasked asm: "VFMADDSUB213PS|VFMADDSUB213PD" in: - *fma_op @@ -614,7 +614,7 @@ - *fma_op out: - *fma_op -- go: MaskedFusedMultiplySubAdd +- go: FusedMultiplySubAddMasked asm: "VFMSUBADD213PS|VFMSUBADD213PD" in: - *fma_op @@ -641,14 +641,14 @@ - *uint out: &1uint - *uint -- go: MaskedMax +- go: MaxMasked asm: "V?PMAXS[BWDQ]" in: &1mask2int - class: mask - *int - *int out: *1int -- go: MaskedMax +- go: MaxMasked asm: "V?PMAXU[BWDQ]" in: &1mask2uint - class: mask @@ -664,11 +664,11 @@ asm: "V?PMINU[BWDQ]" in: *2uint out: *1uint -- go: MaskedMin +- go: MinMasked asm: "V?PMINS[BWDQ]" in: *1mask2int out: *1int -- go: MaskedMin +- go: MinMasked asm: "V?PMINU[BWDQ]" in: *1mask2uint out: *1uint @@ -682,7 +682,7 @@ - *float out: &1float - *float -- go: MaskedMax +- go: MaxMasked asm: "V?MAXP[SD]" in: &1mask2float - class: mask @@ -693,7 +693,7 @@ asm: "V?MINP[SD]" in: *2float out: *1float -- go: MaskedMin +- go: MinMasked asm: "V?MINP[SD]" in: *1mask2float out: *1float @@ -893,7 +893,7 @@ - *fp out: - *fp -- go: MaskedMul +- go: MulMasked asm: "VMULP[SD]" in: - class: mask @@ -928,7 +928,7 @@ - &uint2 go: $t2 base: uint -- go: MaskedMulEvenWiden +- go: MulEvenWidenMasked asm: "VPMULDQ" in: - class: mask @@ -936,7 +936,7 @@ - *int out: - *int2 -- go: MaskedMulEvenWiden +- go: MulEvenWidenMasked asm: "VPMULUDQ" in: - class: mask @@ -962,7 +962,7 @@ - *uint out: - *uint2 -- go: MaskedMulHigh +- go: MulHighMasked asm: "VPMULHW" in: - class: mask @@ -970,7 +970,7 @@ - *int out: - *int2 -- go: MaskedMulHigh +- go: MulHighMasked asm: "VPMULHUW" in: - class: mask @@ -989,7 +989,7 @@ - *int out: - *int2 -- go: MaskedMulLow +- go: MulLowMasked asm: "VPMULL[WDQ]" in: - class: mask @@ -1009,7 +1009,7 @@ go: Uint64x2 out: - *any -- go: MaskedShiftAllLeft +- go: ShiftAllLeftMasked asm: "VPSLL[WDQ]" in: - class: mask @@ -1024,7 +1024,7 @@ - *vecAsScalar64 out: - *any -- go: MaskedShiftAllRight +- go: ShiftAllRightMasked asm: "VPSRL[WDQ]" in: - class: mask @@ -1041,7 +1041,7 @@ - *vecAsScalar64 out: - *int -- go: MaskedShiftAllRightSignExtended +- go: ShiftAllRightSignExtendedMasked asm: "VPSRA[WDQ]" in: - class: mask @@ -1058,7 +1058,7 @@ - *any out: - *any -- go: MaskedShiftLeft +- go: ShiftLeftMasked asm: "VPSLLV[WD]" in: - class: mask @@ -1077,7 +1077,7 @@ - *anyOverwriteElemBits out: - *anyOverwriteElemBits -- go: MaskedShiftLeft +- go: ShiftLeftMasked asm: "VPSLLVQ" in: - class: mask @@ -1092,7 +1092,7 @@ - *any out: - *any -- go: MaskedShiftRight +- go: ShiftRightMasked asm: "VPSRLV[WD]" in: - class: mask @@ -1108,7 +1108,7 @@ - *anyOverwriteElemBits out: - *anyOverwriteElemBits -- go: MaskedShiftRight +- go: ShiftRightMasked asm: "VPSRLVQ" in: - class: mask @@ -1123,7 +1123,7 @@ - *any out: - *any -- go: MaskedShiftRightSignExtended +- go: ShiftRightSignExtendedMasked asm: "VPSRAV[WDQ]" in: - class: mask @@ -1133,7 +1133,7 @@ - *any # Rotate -- go: MaskedRotateAllLeft +- go: RotateAllLeftMasked asm: "VPROL[DQ]" in: - class: mask @@ -1143,7 +1143,7 @@ immOffset: 0 out: - *any -- go: MaskedRotateAllRight +- go: RotateAllRightMasked asm: "VPROR[DQ]" in: - class: mask @@ -1151,7 +1151,7 @@ - *pureImm out: - *any -- go: MaskedRotateLeft +- go: RotateLeftMasked asm: "VPROLV[DQ]" in: - class: mask @@ -1159,7 +1159,7 @@ - *any out: - *any -- go: MaskedRotateRight +- go: RotateRightMasked asm: "VPRORV[DQ]" in: - class: mask @@ -1169,7 +1169,7 @@ - *any # Bizzare shifts. -- go: MaskedShiftAllLeftAndFillUpperFrom +- go: ShiftAllLeftAndFillUpperFromMasked asm: "VPSHLD[WDQ]" in: - class: mask @@ -1178,7 +1178,7 @@ - *pureImm out: - *any -- go: MaskedShiftAllRightAndFillUpperFrom +- go: ShiftAllRightAndFillUpperFromMasked asm: "VPSHRD[WDQ]" in: - class: mask @@ -1187,7 +1187,7 @@ - *pureImm out: - *any -- go: MaskedShiftLeftAndFillUpperFrom +- go: ShiftLeftAndFillUpperFromMasked asm: "VPSHLDV[WDQ]" in: - *any @@ -1196,7 +1196,7 @@ - *any out: - *any -- go: MaskedShiftRightAndFillUpperFrom +- go: ShiftRightAndFillUpperFromMasked asm: "VPSHRDV[WDQ]" in: - *any diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index e5ad5b82..52cfd1e8 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -283,7 +283,7 @@ func writeGoDefs(path string, cl unify.Closure) error { typeMap := parseSIMDTypes(deduped) formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go") - formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/stubs_amd64.go") + formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/ops_amd64.go") formatWriteAndClose(writeSIMDTestsWrapper(deduped), path, "src/"+simdPackage+"/simd_wrapped_test.go") formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go") formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go") diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml index e87ead1d..95775bb8 100644 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -9,18 +9,18 @@ extension: "AVX.*" documentation: !string |- // SaturatedAdd adds corresponding elements of two vectors with saturation. -- go: MaskedAdd +- go: AddMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedAdd adds corresponding elements of two vectors. -- go: MaskedSaturatedAdd + // AddMasked adds corresponding elements of two vectors. +- go: SaturatedAddMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedSaturatedAdd adds corresponding elements of two vectors with saturation. + // SaturatedAddMasked adds corresponding elements of two vectors with saturation. - go: Sub commutative: "false" extension: "AVX.*" @@ -31,18 +31,18 @@ extension: "AVX.*" documentation: !string |- // SaturatedSub subtracts corresponding elements of two vectors with saturation. -- go: MaskedSub +- go: SubMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedSub subtracts corresponding elements of two vectors. -- go: MaskedSaturatedSub + // SubMasked subtracts corresponding elements of two vectors. +- go: SaturatedSubMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedSaturatedSub subtracts corresponding elements of two vectors with saturation. + // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. - go: PairwiseAdd commutative: "false" extension: "AVX.*" diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml index c2df1e2c..793bc489 100644 --- a/internal/simdgen/ops/AddSub/go.yaml +++ b/internal/simdgen/ops/AddSub/go.yaml @@ -8,7 +8,7 @@ - *any out: - *any -- go: MaskedAdd +- go: AddMasked asm: "VPADD[BWDQ]|VADDP[SD]" in: - class: mask @@ -35,7 +35,7 @@ - *uint out: - *uint -- go: MaskedSaturatedAdd +- go: SaturatedAddMasked asm: "VPADDS[BWDQ]" in: - class: mask @@ -43,7 +43,7 @@ - *int out: - *int -- go: MaskedSaturatedAdd +- go: SaturatedAddMasked asm: "VPADDS[BWDQ]" in: - class: mask @@ -60,7 +60,7 @@ - *any out: &1any - *any -- go: MaskedSub +- go: SubMasked asm: "VPSUB[BWDQ]|VSUBP[SD]" in: - class: mask @@ -83,7 +83,7 @@ - *uint out: - *uint -- go: MaskedSaturatedSub +- go: SaturatedSubMasked asm: "VPSUBS[BWDQ]" in: - class: mask @@ -91,7 +91,7 @@ - *int out: - *int -- go: MaskedSaturatedSub +- go: SaturatedSubMasked asm: "VPSUBS[BWDQ]" in: - class: mask diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml index 4d948364..1ef1d360 100644 --- a/internal/simdgen/ops/BitwiseLogic/categories.yaml +++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml @@ -4,44 +4,44 @@ extension: "AVX.*" documentation: !string |- // And performs a bitwise AND operation between two vectors. -- go: MaskedAnd +- go: AndMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedAnd performs a masked bitwise AND operation between two vectors. + // AndMasked performs a masked bitwise AND operation between two vectors. - go: Or commutative: "true" extension: "AVX.*" documentation: !string |- // Or performs a bitwise OR operation between two vectors. -- go: MaskedOr +- go: OrMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedOr performs a masked bitwise OR operation between two vectors. + // OrMasked performs a masked bitwise OR operation between two vectors. - go: AndNot commutative: "false" extension: "AVX.*" documentation: !string |- // AndNot performs a bitwise AND NOT operation between two vectors. -- go: MaskedAndNot +- go: AndNotMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedAndNot performs a masked bitwise AND NOT operation between two vectors. + // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. - go: Xor commutative: "true" extension: "AVX.*" documentation: !string |- // Xor performs a bitwise XOR operation between two vectors. -- go: MaskedXor +- go: XorMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedXor performs a masked bitwise XOR operation between two vectors. + // XorMasked performs a masked bitwise XOR operation between two vectors. # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml index 79d149ca..49e2dbc9 100644 --- a/internal/simdgen/ops/BitwiseLogic/go.yaml +++ b/internal/simdgen/ops/BitwiseLogic/go.yaml @@ -21,7 +21,7 @@ # Dword and Qword. # TODO: should we wildcard other smaller elemBits to VPANDQ or # VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations. -- go: MaskedAnd +- go: AndMasked asm: "VPAND[DQ]" in: - class: mask @@ -37,7 +37,7 @@ - *any out: - *any -- go: MaskedAndNot +- go: AndNotMasked asm: "VPANDN[DQ]" in: - class: mask @@ -53,7 +53,7 @@ - *any out: - *any -- go: MaskedOr +- go: OrMasked asm: "VPOR[DQ]" in: - class: mask @@ -69,7 +69,7 @@ - *any out: - *any -- go: MaskedXor +- go: XorMasked asm: "VPXOR[DQ]" in: - class: mask diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index 3c607c76..3b021e4c 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -49,52 +49,52 @@ documentation: !string |- // Greater compares for greater than. -- go: MaskedEqual +- go: EqualMasked constImm: 0 masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedEqual compares for equality, masked. -- go: MaskedLess + // EqualMasked compares for equality, masked. +- go: LessMasked constImm: 1 masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedLess compares for less than. -- go: MaskedLessEqual + // LessMasked compares for less than. +- go: LessEqualMasked constImm: 2 masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedLessEqual compares for less than or equal. -- go: MaskedIsNan # For float only. + // LessEqualMasked compares for less than or equal. +- go: IsNanMasked # For float only. constImm: 3 masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x). -- go: MaskedNotEqual + // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). +- go: NotEqualMasked constImm: 4 masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedNotEqual compares for inequality. -- go: MaskedGreaterEqual + // NotEqualMasked compares for inequality. +- go: GreaterEqualMasked constImm: 13 masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedGreaterEqual compares for greater than or equal. -- go: MaskedGreater + // GreaterEqualMasked compares for greater than or equal. +- go: GreaterMasked constImm: 14 masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedGreater compares for greater than. + // GreaterMasked compares for greater than. diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml index 2fc1f225..8e46cdbd 100644 --- a/internal/simdgen/ops/Compares/go.yaml +++ b/internal/simdgen/ops/Compares/go.yaml @@ -17,7 +17,7 @@ in: *int2 out: - *anyvregToMask -- go: MaskedEqual +- go: EqualMasked asm: "V?PCMPEQ[BWDQ]" in: &maskint2 - class: mask @@ -25,14 +25,14 @@ - *int out: - class: mask -- go: MaskedGreater +- go: GreaterMasked asm: "V?PCMPGT[BWDQ]" in: *maskint2 out: - class: mask # The const imm predicated compares after AVX512, please see categories.yaml # for const imm specification. -- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked asm: "VPCMP[BWDQ]" in: - class: mask @@ -44,7 +44,7 @@ const: 0 # Just a placeholder, will be overwritten by const imm porting. out: - class: mask -- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked asm: "VPCMPU[BWDQ]" in: - class: mask @@ -71,7 +71,7 @@ - go: $t # We still need the output to be the same shape as inputs. overwriteBase: int overwriteClass: mask -- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked asm: "VCMPP[SD]" in: - class: mask diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index 51dfd04d..356b06d3 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -4,51 +4,51 @@ extension: "AVX.*" documentation: !string |- // Div divides elements of two vectors. -- go: MaskedDiv +- go: DivMasked commutative: "false" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedDiv divides elements of two vectors. + // DivMasked divides elements of two vectors. - go: Sqrt commutative: "false" extension: "AVX.*" documentation: !string |- // Sqrt computes the square root of each element. -- go: MaskedSqrt +- go: SqrtMasked commutative: "false" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedSqrt computes the square root of each element. + // SqrtMasked computes the square root of each element. - go: ApproximateReciprocal commutative: "false" extension: "AVX.*" documentation: !string |- // ApproximateReciprocal computes an approximate reciprocal of each element. -- go: MaskedApproximateReciprocal +- go: ApproximateReciprocalMasked commutative: "false" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedApproximateReciprocal computes an approximate reciprocal of each element. + // ApproximateReciprocalMasked computes an approximate reciprocal of each element. - go: ApproximateReciprocalOfSqrt commutative: "false" extension: "AVX.*" documentation: !string |- // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -- go: MaskedApproximateReciprocalOfSqrt +- go: ApproximateReciprocalOfSqrtMasked commutative: "false" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. -- go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated. + // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. +- go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated. commutative: "false" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedMulByPowOf2 multiplies elements by a power of 2. + // MulByPowOf2Masked multiplies elements by a power of 2. - go: Round commutative: "false" @@ -56,20 +56,20 @@ constImm: 0 documentation: !string |- // Round rounds elements to the nearest integer. -- go: MaskedRoundWithPrecision +- go: RoundWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 0 masked: "true" documentation: !string |- - // MaskedRoundWithPrecision rounds elements with specified precision. -- go: MaskedDiffWithRoundWithPrecision + // RoundWithPrecisionMasked rounds elements with specified precision. +- go: DiffWithRoundWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 0 masked: "true" documentation: !string |- - // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision. + // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. - go: Floor commutative: "false" @@ -77,20 +77,20 @@ constImm: 1 documentation: !string |- // Floor rounds elements down to the nearest integer. -- go: MaskedFloorWithPrecision +- go: FloorWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 1 masked: "true" documentation: !string |- - // MaskedFloorWithPrecision rounds elements down with specified precision, masked. -- go: MaskedDiffWithFloorWithPrecision + // FloorWithPrecisionMasked rounds elements down with specified precision, masked. +- go: DiffWithFloorWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 1 masked: "true" documentation: !string |- - // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision. + // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. - go: Ceil commutative: "false" @@ -98,20 +98,20 @@ constImm: 2 documentation: !string |- // Ceil rounds elements up to the nearest integer. -- go: MaskedCeilWithPrecision +- go: CeilWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 2 masked: "true" documentation: !string |- - // MaskedCeilWithPrecision rounds elements up with specified precision, masked. -- go: MaskedDiffWithCeilWithPrecision + // CeilWithPrecisionMasked rounds elements up with specified precision, masked. +- go: DiffWithCeilWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 2 masked: "true" documentation: !string |- - // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision. + // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. - go: Trunc commutative: "false" @@ -119,20 +119,20 @@ constImm: 3 documentation: !string |- // Trunc truncates elements towards zero. -- go: MaskedTruncWithPrecision +- go: TruncWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 3 masked: "true" documentation: !string |- - // MaskedTruncWithPrecision truncates elements with specified precision. -- go: MaskedDiffWithTruncWithPrecision + // TruncWithPrecisionMasked truncates elements with specified precision. +- go: DiffWithTruncWithPrecisionMasked commutative: "false" extension: "AVX.*" constImm: 3 masked: "true" documentation: !string |- - // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision. + // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. - go: AddSub commutative: "false" diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml index 48e071ec..29a7f43b 100644 --- a/internal/simdgen/ops/FPonlyArith/go.yaml +++ b/internal/simdgen/ops/FPonlyArith/go.yaml @@ -8,7 +8,7 @@ - *fp out: &1fp - *fp -- go: MaskedDiv +- go: DivMasked asm: "V?DIVP[SD]" in: &1mask2fp - class: mask @@ -19,13 +19,13 @@ asm: "V?SQRTP[SD]" in: *1fp out: *1fp -- go: MaskedSqrt +- go: SqrtMasked asm: "V?SQRTP[SD]" in: &1mask1fp - class: mask - *fp out: *1fp -- go: MaskedApproximateReciprocal +- go: ApproximateReciprocalMasked asm: "VRCP14P[SD]" in: *1mask1fp out: *1fp @@ -33,11 +33,11 @@ asm: "V?RSQRTPS" in: *1fp out: *1fp -- go: MaskedApproximateReciprocalOfSqrt +- go: ApproximateReciprocalOfSqrtMasked asm: "VRSQRT14P[SD]" in: *1mask1fp out: *1fp -- go: MaskedMulByPowOf2 +- go: MulByPowOf2Masked asm: "VSCALEFP[SD]" in: *1mask2fp out: *1fp @@ -50,7 +50,7 @@ const: 0 # place holder out: *1fp -- go: "Masked(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision" +- go: "(Round|Ceil|Floor|Trunc)WithPrecisionMasked" asm: "VRNDSCALEP[SD]" in: - class: mask @@ -59,7 +59,7 @@ const: 0 # place holder immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). out: *1fp -- go: "MaskedDiffWith(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision" +- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked" asm: "VREDUCEP[SD]" in: - class: mask diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml index 0b3978a4..84b64cc1 100644 --- a/internal/simdgen/ops/GaloisField/categories.yaml +++ b/internal/simdgen/ops/GaloisField/categories.yaml @@ -1,27 +1,27 @@ !sum -- go: MaskedGaloisFieldAffineTransform +- go: GaloisFieldAffineTransformMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8): + // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. -- go: MaskedGaloisFieldAffineTransformInversed +- go: GaloisFieldAffineTransformInversedMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8), + // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. -- go: MaskedGaloisFieldMul +- go: GaloisFieldMulMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedGaloisFieldMul computes element-wise GF(2^8) multiplication with + // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with // reduction polynomial x^8 + x^4 + x^3 + x + 1. \ No newline at end of file diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml index c4d02e17..84dc1619 100644 --- a/internal/simdgen/ops/GaloisField/go.yaml +++ b/internal/simdgen/ops/GaloisField/go.yaml @@ -1,5 +1,5 @@ !sum -- go: MaskedGaloisFieldAffineTransform +- go: GaloisFieldAffineTransformMasked asm: VGF2P8AFFINEQB operandOrder: 2I # 2nd operand, then immediate in: &AffineArgs @@ -18,14 +18,14 @@ out: - *uint8 -- go: MaskedGaloisFieldAffineTransformInversed +- go: GaloisFieldAffineTransformInversedMasked asm: VGF2P8AFFINEINVQB operandOrder: 2I # 2nd operand, then immediate in: *AffineArgs out: - *uint8 -- go: MaskedGaloisFieldMul +- go: GaloisFieldMulMasked asm: VGF2P8MULB in: - class: mask diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml index b6c83bf3..96015d28 100644 --- a/internal/simdgen/ops/IntOnlyArith/categories.yaml +++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml @@ -4,12 +4,12 @@ extension: "AVX.*" # VPAVGB/W are available across various AVX versions documentation: !string |- // Average computes the rounded average of corresponding elements. -- go: MaskedAverage +- go: AverageMasked commutative: "true" masked: "true" extension: "AVX512.*" # Masked operations are typically AVX512 documentation: !string |- - // MaskedAverage computes the rounded average of corresponding elements. + // AverageMasked computes the rounded average of corresponding elements. - go: Absolute commutative: "false" @@ -17,12 +17,12 @@ extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 documentation: !string |- // Absolute computes the absolute value of each element. -- go: MaskedAbsolute +- go: AbsoluteMasked commutative: "false" masked: "true" extension: "AVX512.*" documentation: !string |- - // MaskedAbsolute computes the absolute value of each element. + // AbsoluteMasked computes the absolute value of each element. - go: Sign # Applies sign of second operand to first: sign(val, sign_src) @@ -33,9 +33,9 @@ // whichever constant is nearest to the value of the second operand. # Sign does not have masked version -- go: MaskedPopCount +- go: PopCountMasked commutative: "false" masked: "true" extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) documentation: !string |- - // MaskedPopCount counts the number of set bits in each element. \ No newline at end of file + // PopCountMasked counts the number of set bits in each element. \ No newline at end of file diff --git a/internal/simdgen/ops/IntOnlyArith/go.yaml b/internal/simdgen/ops/IntOnlyArith/go.yaml index e8aca3c6..3ccce6f0 100644 --- a/internal/simdgen/ops/IntOnlyArith/go.yaml +++ b/internal/simdgen/ops/IntOnlyArith/go.yaml @@ -10,7 +10,7 @@ - *uint_t out: - *uint_t -- go: MaskedAverage +- go: AverageMasked asm: "VPAVG[BW]" in: - class: mask @@ -29,7 +29,7 @@ base: int out: - *int_t # Output is magnitude, fits in the same signed type -- go: MaskedAbsolute +- go: AbsoluteMasked asm: "VPABS[BWDQ]" in: - class: mask @@ -51,7 +51,7 @@ # Population Count (count set bits in each element) # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG) # VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ) -- go: MaskedPopCount +- go: PopCountMasked asm: "VPOPCNT[BWDQ]" in: - class: mask diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 54911b16..343b8f54 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -5,12 +5,12 @@ documentation: !string |- // PairDotProd multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. -- go: MaskedPairDotProd +- go: PairDotProdMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedPairDotProd multiplies the elements and add the pairs together, + // PairDotProdMasked multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. - go: SaturatedUnsignedSignedPairDotProd commutative: "false" @@ -18,12 +18,12 @@ documentation: !string |- // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. -- go: MaskedSaturatedUnsignedSignedPairDotProd +- go: SaturatedUnsignedSignedPairDotProdMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedSaturatedPairDotProd multiplies the elements and add the pairs together with saturation, + // SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. @@ -37,60 +37,60 @@ extension: "AVX.*" documentation: !string |- // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. -- go: MaskedUnsignedSignedQuadDotProdAccumulate +- go: UnsignedSignedQuadDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedUnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. - go: SaturatedUnsignedSignedQuadDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate +- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedSaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. - go: PairDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -- go: MaskedPairDotProdAccumulate +- go: PairDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. + // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. - go: SaturatedPairDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -- go: MaskedSaturatedPairDotProdAccumulate +- go: SaturatedPairDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -- go: MaskedFusedMultiplyAdd + // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. +- go: FusedMultiplyAddMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplyAdd performs `(v1 * v2) + v3`. -- go: MaskedFusedMultiplyAddSub + // FusedMultiplyAddMasked performs `(v1 * v2) + v3`. +- go: FusedMultiplyAddSubMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. -- go: MaskedFusedMultiplySubAdd + // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. +- go: FusedMultiplySubAddMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedFusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. + // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml index da894ac7..fb6b4fd1 100644 --- a/internal/simdgen/ops/MLOps/go.yaml +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -10,7 +10,7 @@ - &int2 # The elemBits are different go: $t2 base: int -- go: MaskedPairDotProd +- go: PairDotProdMasked asm: VPMADDWD in: - class: mask @@ -29,7 +29,7 @@ base: int out: - *int2 -- go: MaskedSaturatedUnsignedSignedPairDotProd +- go: SaturatedUnsignedSignedPairDotProdMasked asm: VPMADDUBSW in: - class: mask @@ -70,7 +70,7 @@ overwriteElementBits: 8 out: - *qdpa_acc -- go: MaskedUnsignedSignedQuadDotProdAccumulate +- go: UnsignedSignedQuadDotProdAccumulateMasked asm: "VPDPBUSD" in: - *qdpa_acc @@ -87,7 +87,7 @@ - *qdpa_src2 out: - *qdpa_acc -- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate +- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked asm: "VPDPBUSDS" in: - *qdpa_acc @@ -113,7 +113,7 @@ overwriteElementBits: 16 out: - *pdpa_acc -- go: MaskedPairDotProdAccumulate +- go: PairDotProdAccumulateMasked asm: "VPDPWSSD" in: - *pdpa_acc @@ -130,7 +130,7 @@ - *pdpa_src2 out: - *pdpa_acc -- go: MaskedSaturatedPairDotProdAccumulate +- go: SaturatedPairDotProdAccumulateMasked asm: "VPDPWSSDS" in: - *pdpa_acc @@ -139,7 +139,7 @@ - *pdpa_src2 out: - *pdpa_acc -- go: MaskedFusedMultiplyAdd +- go: FusedMultiplyAddMasked asm: "VFMADD213PS|VFMADD213PD" in: - &fma_op @@ -150,7 +150,7 @@ - *fma_op out: - *fma_op -- go: MaskedFusedMultiplyAddSub +- go: FusedMultiplyAddSubMasked asm: "VFMADDSUB213PS|VFMADDSUB213PD" in: - *fma_op @@ -159,7 +159,7 @@ - *fma_op out: - *fma_op -- go: MaskedFusedMultiplySubAdd +- go: FusedMultiplySubAddMasked asm: "VFMSUBADD213PS|VFMSUBADD213PD" in: - *fma_op diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml index c64eb24e..33578ee4 100644 --- a/internal/simdgen/ops/MinMax/categories.yaml +++ b/internal/simdgen/ops/MinMax/categories.yaml @@ -4,20 +4,20 @@ extension: "AVX.*" documentation: !string |- // Max computes the maximum of corresponding elements. -- go: MaskedMax +- go: MaxMasked commutative: "true" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedMax computes the maximum of corresponding elements. + // MaxMasked computes the maximum of corresponding elements. - go: Min commutative: "true" extension: "AVX.*" documentation: !string |- // Min computes the minimum of corresponding elements. -- go: MaskedMin +- go: MinMasked commutative: "true" masked: "true" extension: "AVX.*" documentation: !string |- - // MaskedMin computes the minimum of corresponding elements. + // MinMasked computes the minimum of corresponding elements. diff --git a/internal/simdgen/ops/MinMax/go.yaml b/internal/simdgen/ops/MinMax/go.yaml index f307e6b6..db4286f3 100644 --- a/internal/simdgen/ops/MinMax/go.yaml +++ b/internal/simdgen/ops/MinMax/go.yaml @@ -17,14 +17,14 @@ - *uint out: &1uint - *uint -- go: MaskedMax +- go: MaxMasked asm: "V?PMAXS[BWDQ]" in: &1mask2int - class: mask - *int - *int out: *1int -- go: MaskedMax +- go: MaxMasked asm: "V?PMAXU[BWDQ]" in: &1mask2uint - class: mask @@ -40,11 +40,11 @@ asm: "V?PMINU[BWDQ]" in: *2uint out: *1uint -- go: MaskedMin +- go: MinMasked asm: "V?PMINS[BWDQ]" in: *1mask2int out: *1int -- go: MaskedMin +- go: MinMasked asm: "V?PMINU[BWDQ]" in: *1mask2uint out: *1uint @@ -58,7 +58,7 @@ - *float out: &1float - *float -- go: MaskedMax +- go: MaxMasked asm: "V?MAXP[SD]" in: &1mask2float - class: mask @@ -69,7 +69,7 @@ asm: "V?MINP[SD]" in: *2float out: *1float -- go: MaskedMin +- go: MinMasked asm: "V?MINP[SD]" in: *1mask2float out: *1float \ No newline at end of file diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml index c0f87beb..34b3ab56 100644 --- a/internal/simdgen/ops/Mul/categories.yaml +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -20,28 +20,28 @@ extension: "AVX.*" documentation: !string |- // MulLow multiplies elements and stores the low part of the result. -- go: MaskedMul +- go: MulMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedMul multiplies corresponding elements of two vectors, masked. -- go: MaskedMulEvenWiden + // MulMasked multiplies corresponding elements of two vectors, masked. +- go: MulEvenWidenMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked. + // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked. // Result[i] = v1.Even[i] * v2.Even[i]. -- go: MaskedMulHigh +- go: MulHighMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedMulHigh multiplies elements and stores the high part of the result, masked. -- go: MaskedMulLow + // MulHighMasked multiplies elements and stores the high part of the result, masked. +- go: MulLowMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MaskedMulLow multiplies elements and stores the low part of the result, masked. + // MulLowMasked multiplies elements and stores the low part of the result, masked. diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml index a75f4188..9ae3a850 100644 --- a/internal/simdgen/ops/Mul/go.yaml +++ b/internal/simdgen/ops/Mul/go.yaml @@ -10,7 +10,7 @@ - *fp out: - *fp -- go: MaskedMul +- go: MulMasked asm: "VMULP[SD]" in: - class: mask @@ -45,7 +45,7 @@ - &uint2 go: $t2 base: uint -- go: MaskedMulEvenWiden +- go: MulEvenWidenMasked asm: "VPMULDQ" in: - class: mask @@ -53,7 +53,7 @@ - *int out: - *int2 -- go: MaskedMulEvenWiden +- go: MulEvenWidenMasked asm: "VPMULUDQ" in: - class: mask @@ -79,7 +79,7 @@ - *uint out: - *uint2 -- go: MaskedMulHigh +- go: MulHighMasked asm: "VPMULHW" in: - class: mask @@ -87,7 +87,7 @@ - *int out: - *int2 -- go: MaskedMulHigh +- go: MulHighMasked asm: "VPMULHUW" in: - class: mask @@ -106,7 +106,7 @@ - *int out: - *int2 -- go: MaskedMulLow +- go: MulLowMasked asm: "VPMULL[WDQ]" in: - class: mask diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml index 91a0e3d0..b8bcb28e 100644 --- a/internal/simdgen/ops/ShiftRotate/categories.yaml +++ b/internal/simdgen/ops/ShiftRotate/categories.yaml @@ -5,39 +5,39 @@ extension: "AVX.*" documentation: !string |- // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -- go: MaskedShiftAllLeft +- go: ShiftAllLeftMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. + // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -- go: MaskedShiftAllRight +- go: ShiftAllRightMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. + // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRightSignExtended nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -- go: MaskedShiftAllRightSignExtended +- go: ShiftAllRightSignExtendedMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. + // ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftLeft nameAndSizeCheck: "true" @@ -45,98 +45,98 @@ extension: "AVX.*" documentation: !string |- // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -- go: MaskedShiftLeft +- go: ShiftLeftMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. + // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -- go: MaskedShiftRight +- go: ShiftRightMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. + // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRightSignExtended nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: MaskedShiftRightSignExtended +- go: ShiftRightSignExtendedMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. + // ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: MaskedRotateAllLeft +- go: RotateAllLeftMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedRotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -- go: MaskedRotateLeft + // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. +- go: RotateLeftMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedRotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -- go: MaskedRotateAllRight + // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. +- go: RotateAllRightMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedRotateAllRight rotates each element to the right by the number of bits specified by the immediate. -- go: MaskedRotateRight + // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. +- go: RotateRightMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedRotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. + // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. -- go: MaskedShiftAllLeftAndFillUpperFrom +- go: ShiftAllLeftAndFillUpperFromMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the + // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -- go: MaskedShiftAllRightAndFillUpperFrom +- go: ShiftAllRightAndFillUpperFromMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the + // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -- go: MaskedShiftLeftAndFillUpperFrom +- go: ShiftLeftAndFillUpperFromMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the + // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -- go: MaskedShiftRightAndFillUpperFrom +- go: ShiftRightAndFillUpperFromMasked nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // MaskedShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the + // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml index 7205bab3..a42241db 100644 --- a/internal/simdgen/ops/ShiftRotate/go.yaml +++ b/internal/simdgen/ops/ShiftRotate/go.yaml @@ -11,7 +11,7 @@ go: Uint64x2 out: - *any -- go: MaskedShiftAllLeft +- go: ShiftAllLeftMasked asm: "VPSLL[WDQ]" in: - class: mask @@ -26,7 +26,7 @@ - *vecAsScalar64 out: - *any -- go: MaskedShiftAllRight +- go: ShiftAllRightMasked asm: "VPSRL[WDQ]" in: - class: mask @@ -43,7 +43,7 @@ - *vecAsScalar64 out: - *int -- go: MaskedShiftAllRightSignExtended +- go: ShiftAllRightSignExtendedMasked asm: "VPSRA[WDQ]" in: - class: mask @@ -60,7 +60,7 @@ - *any out: - *any -- go: MaskedShiftLeft +- go: ShiftLeftMasked asm: "VPSLLV[WD]" in: - class: mask @@ -79,7 +79,7 @@ - *anyOverwriteElemBits out: - *anyOverwriteElemBits -- go: MaskedShiftLeft +- go: ShiftLeftMasked asm: "VPSLLVQ" in: - class: mask @@ -94,7 +94,7 @@ - *any out: - *any -- go: MaskedShiftRight +- go: ShiftRightMasked asm: "VPSRLV[WD]" in: - class: mask @@ -110,7 +110,7 @@ - *anyOverwriteElemBits out: - *anyOverwriteElemBits -- go: MaskedShiftRight +- go: ShiftRightMasked asm: "VPSRLVQ" in: - class: mask @@ -125,7 +125,7 @@ - *any out: - *any -- go: MaskedShiftRightSignExtended +- go: ShiftRightSignExtendedMasked asm: "VPSRAV[WDQ]" in: - class: mask @@ -135,7 +135,7 @@ - *any # Rotate -- go: MaskedRotateAllLeft +- go: RotateAllLeftMasked asm: "VPROL[DQ]" in: - class: mask @@ -145,7 +145,7 @@ immOffset: 0 out: - *any -- go: MaskedRotateAllRight +- go: RotateAllRightMasked asm: "VPROR[DQ]" in: - class: mask @@ -153,7 +153,7 @@ - *pureImm out: - *any -- go: MaskedRotateLeft +- go: RotateLeftMasked asm: "VPROLV[DQ]" in: - class: mask @@ -161,7 +161,7 @@ - *any out: - *any -- go: MaskedRotateRight +- go: RotateRightMasked asm: "VPRORV[DQ]" in: - class: mask @@ -171,7 +171,7 @@ - *any # Bizzare shifts. -- go: MaskedShiftAllLeftAndFillUpperFrom +- go: ShiftAllLeftAndFillUpperFromMasked asm: "VPSHLD[WDQ]" in: - class: mask @@ -180,7 +180,7 @@ - *pureImm out: - *any -- go: MaskedShiftAllRightAndFillUpperFrom +- go: ShiftAllRightAndFillUpperFromMasked asm: "VPSHRD[WDQ]" in: - class: mask @@ -189,7 +189,7 @@ - *pureImm out: - *any -- go: MaskedShiftLeftAndFillUpperFrom +- go: ShiftLeftAndFillUpperFromMasked asm: "VPSHLDV[WDQ]" in: - *any @@ -198,7 +198,7 @@ - *any out: - *any -- go: MaskedShiftRightAndFillUpperFrom +- go: ShiftRightAndFillUpperFromMasked asm: "VPSHRDV[WDQ]" in: - *any From a3ce8a70821471585a478cd4d46099505dedc7f3 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 8 Jul 2025 20:28:25 +0000 Subject: [PATCH 126/200] internal/simdgen: rename register mask fp to v This CL also makes a special case for VPSLL This CL generates CL 686476. Change-Id: I13b13901e44c123a02ad869ce0b84d052047b485 Reviewed-on: https://go-review.googlesource.com/c/arch/+/686556 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_simdMachineOps.go | 17 +++++++-- internal/simdgen/gen_simdssa.go | 48 +++++++++++++------------- internal/simdgen/gen_utility.go | 8 ++--- 3 files changed, 43 insertions(+), 30 deletions(-) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 7e4f1d1a..509cafcf 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -13,7 +13,7 @@ import ( const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main -func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, fpgp regInfo) []opData { +func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv regInfo) []opData { return []opData{ {{- range .OpsData }} {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, @@ -46,7 +46,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { OpsDataImm []opData } seen := map[string]struct{}{} - regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k": true, "fp2kfp": true, "fp2kk": true, "fpkfp": true, "fp31": true, "fp3kfp": true, "fpgpfp": true, "fpgp": true} + regInfoSet := map[string]bool{"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true} opsData := make([]opData, 0) opsDataImm := make([]opData, 0) for _, op := range ops { @@ -69,6 +69,19 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { if err != nil { panic(err) } + idx, err := checkVecAsScalar(op) + if err != nil { + panic(err) + } + if idx != -1 { + if regInfo == "v21" { + regInfo = "vfpv" + } else if regInfo == "v2kv" { + regInfo = "vfpkv" + } else { + panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regInfo, op)) + } + } if _, ok := regInfoSet[regInfo]; !ok { panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s. Op is %s", regInfo, op)) } diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index ee30c8eb..d42b264b 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -60,26 +60,26 @@ type tplSSAData struct { func writeSIMDSSA(ops []Operation) *bytes.Buffer { var ZeroingMask []string regInfoKeys := []string{ - "fp11", - "fp21", - "fp2k", - "fp2kfp", - "fp2kk", - "fpkfp", - "fp31", - "fp3kfp", - "fp11Imm8", - "fpkfpImm8", - "fp21Imm8", - "fp2kImm8", - "fp2kkImm8", - "fp31ResultInArg0", - "fp3kfpResultInArg0", - "fpXfp", - "fpXkfp", - "fpgpfpImm8", - "fpgpImm8", - "fp2kfpImm8", + "v11", + "v21", + "v2k", + "v2kv", + "v2kk", + "vkv", + "v31", + "v3kv", + "v11Imm8", + "vkvImm8", + "v21Imm8", + "v2kImm8", + "v2kkImm8", + "v31ResultInArg0", + "v3kvResultInArg0", + "vfpv", + "vfpkv", + "vgpvImm8", + "vgpImm8", + "v2kvImm8", } regInfoSet := map[string][]string{} for _, key := range regInfoKeys { @@ -121,10 +121,10 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { panic(err) } if idx != -1 { - if regShape == "fp21" { - regShape = "fpXfp" - } else if regShape == "fp2kfp" { - regShape = "fpXkfp" + if regShape == "v21" { + regShape = "vfpv" + } else if regShape == "v2kv" { + regShape = "vfpkv" } else { panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regShape, op)) } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 42aab212..2f25d420 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -266,17 +266,17 @@ func (op *Operation) regShape() (string, error) { } - inRegs = rmAbbrev("fp", vRegInCnt) + inRegs = rmAbbrev("v", vRegInCnt) inRegs += rmAbbrev("gp", gRegInCnt) inMasks = rmAbbrev("k", kMaskInCnt) - outRegs = rmAbbrev("fp", vRegOutCnt) + outRegs = rmAbbrev("v", vRegOutCnt) outRegs += rmAbbrev("gp", gRegOutCnt) outMasks = rmAbbrev("k", kMaskOutCnt) if kMaskInCnt == 0 && kMaskOutCnt == 0 && gRegInCnt == 0 && gRegOutCnt == 0 { - // For pure fp we can abbreviate it as fp%d%d. - regInfo = fmt.Sprintf("fp%d%d", vRegInCnt, vRegOutCnt) + // For pure v we can abbreviate it as v%d%d. + regInfo = fmt.Sprintf("v%d%d", vRegInCnt, vRegOutCnt) } else if kMaskInCnt == 0 && kMaskOutCnt == 0 { regInfo = fmt.Sprintf("%s%s", inRegs, outRegs) } else { From 098cba1c797e6f9d663cccb864393a05bb0fa87a Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Wed, 9 Jul 2025 04:09:07 +0000 Subject: [PATCH 127/200] internal/simdgen: make AVX512 op use upper registers This CL generates CL 686695. Change-Id: I3397d2b63f80fb797778fbcb84f22a6c9e09a5a2 Reviewed-on: https://go-review.googlesource.com/c/arch/+/686775 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_simdMachineOps.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 509cafcf..05a191c5 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -8,12 +8,13 @@ import ( "bytes" "fmt" "sort" + "strings" ) const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main -func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv regInfo) []opData { +func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw regInfo) []opData { return []opData{ {{- range .OpsData }} {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, @@ -46,7 +47,9 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { OpsDataImm []opData } seen := map[string]struct{}{} - regInfoSet := map[string]bool{"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true} + regInfoSet := map[string]bool{ + "v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true, + "w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true} opsData := make([]opData, 0) opsDataImm := make([]opData, 0) for _, op := range ops { @@ -82,6 +85,10 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regInfo, op)) } } + // Makes AVX512 operations use upper registers + if strings.Contains(op.Extension, "AVX512") { + regInfo = strings.ReplaceAll(regInfo, "v", "w") + } if _, ok := regInfoSet[regInfo]; !ok { panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s. Op is %s", regInfo, op)) } From c69f75918652a2d4ce0b652c65e7dfa948055631 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Wed, 9 Jul 2025 16:23:00 +0000 Subject: [PATCH 128/200] internal/simdgen: Int64x2 Greater and Uint* Equals 1. Fix XED data error for Int64x2 Greater compare, add overwrite defs. 2. Uint* equals could just use Int* equals, relaxed the defs. This CL generates CL 686876. Change-Id: Ib110e2547246a4e197348912c77793ab2bfc9466 Reviewed-on: https://go-review.googlesource.com/c/arch/+/686817 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/go.yaml | 46 +++++++++++++++++++-------- internal/simdgen/ops/Compares/go.yaml | 46 +++++++++++++++++++-------- 2 files changed, 64 insertions(+), 28 deletions(-) diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 16dbf1e6..dd61308f 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -204,32 +204,52 @@ # Ints - go: Equal asm: "V?PCMPEQ[BWDQ]" - in: &int2 - - &int + in: + - &any go: $t - base: int # Looks like PCMP is on signed integers - but for equals does it really matters? - - *int + - *any out: - &anyvregToMask - go: $t # We still need the output to be the same shape as inputs. + go: $t overwriteBase: int overwriteClass: mask - go: Greater asm: "V?PCMPGT[BWDQ]" - in: *int2 + in: + - &int + go: $t + base: int + - *int out: - *anyvregToMask +# 256-bit VCMPGTQ's output elemBits is marked 32-bit in the XED data, we +# believe this is an error, so add this definition to overwrite. +- go: Greater + asm: "VPCMPGTQ" + in: + - &int64 + go: $t + base: int + elemBits: 64 + - *int64 + out: + - base: int + elemBits: 32 + overwriteElementBits: 64 - go: EqualMasked asm: "V?PCMPEQ[BWDQ]" - in: &maskint2 + in: - class: mask - - *int - - *int + - *any + - *any out: - class: mask - go: GreaterMasked asm: "V?PCMPGT[BWDQ]" - in: *maskint2 + in: + - class: mask + - *int + - *int out: - class: mask # The const imm predicated compares after AVX512, please see categories.yaml @@ -238,9 +258,7 @@ asm: "VPCMP[BWDQ]" in: - class: mask - - &int - go: $t - base: int + - *int - *int - class: immediate const: 0 # Just a placeholder, will be overwritten by const imm porting. @@ -270,7 +288,7 @@ - class: immediate const: 0 out: - - go: $t # We still need the output to be the same shape as inputs. + - go: $t overwriteBase: int overwriteClass: mask - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml index 8e46cdbd..d8bef2d9 100644 --- a/internal/simdgen/ops/Compares/go.yaml +++ b/internal/simdgen/ops/Compares/go.yaml @@ -2,32 +2,52 @@ # Ints - go: Equal asm: "V?PCMPEQ[BWDQ]" - in: &int2 - - &int + in: + - &any go: $t - base: int # Looks like PCMP is on signed integers - but for equals does it really matters? - - *int + - *any out: - &anyvregToMask - go: $t # We still need the output to be the same shape as inputs. + go: $t overwriteBase: int overwriteClass: mask - go: Greater asm: "V?PCMPGT[BWDQ]" - in: *int2 + in: + - &int + go: $t + base: int + - *int out: - *anyvregToMask +# 256-bit VCMPGTQ's output elemBits is marked 32-bit in the XED data, we +# believe this is an error, so add this definition to overwrite. +- go: Greater + asm: "VPCMPGTQ" + in: + - &int64 + go: $t + base: int + elemBits: 64 + - *int64 + out: + - base: int + elemBits: 32 + overwriteElementBits: 64 - go: EqualMasked asm: "V?PCMPEQ[BWDQ]" - in: &maskint2 + in: - class: mask - - *int - - *int + - *any + - *any out: - class: mask - go: GreaterMasked asm: "V?PCMPGT[BWDQ]" - in: *maskint2 + in: + - class: mask + - *int + - *int out: - class: mask # The const imm predicated compares after AVX512, please see categories.yaml @@ -36,9 +56,7 @@ asm: "VPCMP[BWDQ]" in: - class: mask - - &int - go: $t - base: int + - *int - *int - class: immediate const: 0 # Just a placeholder, will be overwritten by const imm porting. @@ -68,7 +86,7 @@ - class: immediate const: 0 out: - - go: $t # We still need the output to be the same shape as inputs. + - go: $t overwriteBase: int overwriteClass: mask - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked From f999474202702cf6673d03edb26d8ceb28d9eb16 Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 8 Jul 2025 12:54:02 -0400 Subject: [PATCH 129/200] internal/simdgen: emit more "..." rules for SSA rewriter This removes a bnunch of ssa rewrite generator warnings and also generates better (shorter and more efficient) rewrite rules. Paired with go.simd CL 686495 Change-Id: I14643670c4fba7ac26f309cdbc45ac8c59d58a08 Reviewed-on: https://go-review.googlesource.com/c/arch/+/686378 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdrules.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index 6f84b912..c52ff50b 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -112,8 +112,6 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { case PureVregIn: tplName = "pureVreg" data.GoType = *gOp.In[0].Go - data.Args = "..." - data.ArgsOut = "..." case OneKmaskImmIn: fallthrough case OneKmaskIn: @@ -149,6 +147,11 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { } } + if tplName == "pureVreg" && data.Args == data.ArgsOut { + data.Args = "..." + data.ArgsOut = "..." + } + data.tplName = tplName allData = append(allData, data) } From 24c76b94abdf3b44badf7a0d05dd38132af44a82 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Wed, 9 Jul 2025 18:56:19 +0000 Subject: [PATCH 130/200] internal/simdgen: cleanup unneeded return value from shape There is no difference between opNoImm and opNoImmConstMask Change-Id: Ic4be860cf65d0b2f78ea39b7bcb3608267b42feb Reviewed-on: https://go-review.googlesource.com/c/arch/+/686956 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdGenericOps.go | 2 +- internal/simdgen/gen_simdMachineOps.go | 2 +- internal/simdgen/gen_simdTypes.go | 2 +- internal/simdgen/gen_simdrules.go | 2 +- internal/simdgen/gen_simdssa.go | 2 +- internal/simdgen/gen_utility.go | 15 ++++++--------- 6 files changed, 11 insertions(+), 14 deletions(-) diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index 1be01810..7fd04b7c 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -43,7 +43,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { } var opsData opData for _, op := range ops { - _, _, _, immType, _, gOp := op.shape() + _, _, _, immType, gOp := op.shape() genericNames := gOp.Go + *gOp.In[0].Go gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative} if immType == VarImm || immType == ConstVarImm { diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 05a191c5..cca7d945 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -53,7 +53,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { opsData := make([]opData, 0) opsDataImm := make([]opData, 0) for _, op := range ops { - shapeIn, shapeOut, maskType, _, _, gOp := op.shape() + shapeIn, shapeOut, maskType, _, gOp := op.shape() asm := gOp.Asm if maskType == OneMask { diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 7dcbc145..552bf51d 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -272,7 +272,7 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { opsByShape := make(map[string]opData) opsSkipped := map[string]struct{}{} for _, o := range ops { - _, _, _, immType, _, gOp := o.shape() + _, _, _, immType, gOp := o.shape() if immType == VarImm || immType == ConstVarImm { // Operations with variable immediates should be called directly diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index c52ff50b..bddcab43 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -65,7 +65,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { var allData []tplRuleData for _, opr := range ops { - opInShape, opOutShape, maskType, immType, _, gOp := opr.shape() + opInShape, opOutShape, maskType, immType, gOp := opr.shape() vregInCnt := len(gOp.In) asm := gOp.Asm diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index d42b264b..b664b0f4 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -90,7 +90,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { allUnseen := make(map[string][]Operation) for _, op := range ops { asm := op.Asm - shapeIn, shapeOut, maskType, _, _, gOp := op.shape() + shapeIn, shapeOut, maskType, _, gOp := op.shape() if maskType == 2 { asm += "Masked" diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 2f25d420..d5c2492e 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -117,11 +117,10 @@ const ( // and modified versions of the op: // // opNoImm is op with its inputs excluding the const imm. -// opNoConstImmMask is op with its inputs excluding the const imm and mask. // // This function does not modify op. func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskShape, immType immShape, - opNoImm Operation, opNoImmConstMask Operation) { + opNoImm Operation) { if len(op.Out) > 1 { panic(fmt.Errorf("simdgen only supports 1 output: %s", op)) } @@ -168,14 +167,12 @@ func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskS } } opNoImm = *op - opNoImmConstMask = *op removeImm := func(o *Operation) { o.In = o.In[1:] } if hasImm { removeImm(&opNoImm) - removeImm(&opNoImmConstMask) if op.In[0].Const != nil { if op.In[0].ImmOffset != nil { immType = ConstVarImm @@ -231,7 +228,7 @@ func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskS // regShape returns a string representation of the register shape. func (op *Operation) regShape() (string, error) { - _, _, _, _, _, gOp := op.shape() + _, _, _, _, gOp := op.shape() var regInfo string var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int for _, in := range gOp.In { @@ -431,7 +428,7 @@ var classes []string = []string{"BAD0", "op1", "op2", "op3", "op4"} // The classification string is used to select a template or a clause of a template // for intrinsics declaration and the ssagen intrinisics glue code in the compiler. func classifyOp(op Operation) (string, Operation, error) { - _, _, _, immType, _, gOp := op.shape() + _, _, _, immType, gOp := op.shape() var class string @@ -515,7 +512,7 @@ func splitMask(ops []Operation) ([]Operation, error) { if op.Masked == nil || *op.Masked != "true" { continue } - shapeIn, _, _, _, _, _ := op.shape() + shapeIn, _, _, _, _ := op.shape() if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn { op2 := op @@ -544,7 +541,7 @@ func splitMask(ops []Operation) ([]Operation, error) { func dedupGodef(ops []Operation) ([]Operation, error) { seen := map[string][]Operation{} for _, op := range ops { - _, _, _, _, _, gOp := op.shape() + _, _, _, _, gOp := op.shape() genericNames := gOp.Go + *gOp.In[0].Go seen[genericNames] = append(seen[genericNames], op) @@ -588,7 +585,7 @@ func copyConstImm(ops []Operation) error { if op.ConstImm == nil { continue } - _, _, _, immType, _, _ := op.shape() + _, _, _, immType, _ := op.shape() if immType == ConstImm || immType == ConstVarImm { op.In[0].Const = op.ConstImm From 2de10e9ab058ee449ec2d60ee71e7f3910526c1e Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Wed, 9 Jul 2025 21:15:38 +0000 Subject: [PATCH 131/200] internal/simdgen: fix Int64x2 Greater output type to mask This CL generates CL 686998. Change-Id: I050a79b01a089102ff2e8b1d1f7340e3b8c83b1a Reviewed-on: https://go-review.googlesource.com/c/arch/+/686821 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_utility.go | 16 ++++++++-------- internal/simdgen/go.yaml | 2 ++ internal/simdgen/ops/Compares/go.yaml | 2 ++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index d5c2492e..75a8713f 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -615,6 +615,14 @@ func capitalizeFirst(s string) string { func overwrite(ops []Operation) error { hasClassOverwrite := false overwrite := func(op []Operand, idx int, o Operation) error { + if op[idx].OverwriteElementBits != nil { + if op[idx].ElemBits == nil { + panic(fmt.Errorf("ElemBits is nil at operand %d of %v", idx, o)) + } + *op[idx].ElemBits = *op[idx].OverwriteElementBits + *op[idx].Lanes = *op[idx].Bits / *op[idx].ElemBits + *op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Lanes) + } if op[idx].OverwriteClass != nil { if op[idx].OverwriteBase == nil { panic(fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx])) @@ -639,14 +647,6 @@ func overwrite(ops []Operation) error { *op[idx].Go = strings.ReplaceAll(*op[idx].Go, capitalizeFirst(*op[idx].Base), capitalizeFirst(oBase)) *op[idx].Base = oBase } - if op[idx].OverwriteElementBits != nil { - if op[idx].ElemBits == nil { - panic(fmt.Errorf("ElemBits is nil at operand %d of %v", idx, o)) - } - *op[idx].ElemBits = *op[idx].OverwriteElementBits - *op[idx].Lanes = *op[idx].Bits / *op[idx].ElemBits - *op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Lanes) - } return nil } for i, o := range ops { diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index dd61308f..cc097f8f 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -236,6 +236,8 @@ - base: int elemBits: 32 overwriteElementBits: 64 + overwriteClass: mask + overwriteBase: int - go: EqualMasked asm: "V?PCMPEQ[BWDQ]" in: diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml index d8bef2d9..c1ea2061 100644 --- a/internal/simdgen/ops/Compares/go.yaml +++ b/internal/simdgen/ops/Compares/go.yaml @@ -34,6 +34,8 @@ - base: int elemBits: 32 overwriteElementBits: 64 + overwriteClass: mask + overwriteBase: int - go: EqualMasked asm: "V?PCMPEQ[BWDQ]" in: From 8033e000cd3118dfd40e031dc3603b80f9ea1bae Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 11 Jul 2025 02:10:18 +0000 Subject: [PATCH 132/200] internal/simdgen: fix documentations This CL fixes some errors of op name in the documentation, make sure they are consistent; This CL also fix the documentation for masked operations. This CL generates CL 687376. Change-Id: I272de4ae9043345f33c4417c92cc542abfbdc127 Reviewed-on: https://go-review.googlesource.com/c/arch/+/687415 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 11 ++++++----- internal/simdgen/gen_utility.go | 5 ++--- internal/simdgen/go.yaml | 2 +- internal/simdgen/ops/GaloisField/categories.yaml | 6 +++--- internal/simdgen/ops/GaloisField/go.yaml | 2 +- internal/simdgen/ops/MLOps/categories.yaml | 5 +++-- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index bfb0ff80..942d4d41 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -361,13 +361,13 @@ // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. -- go: GaloisFieldAffineTransformInversedMasked +- go: GaloisFieldAffineTransformInverseMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8), - // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: + // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8), + // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. @@ -431,18 +431,19 @@ documentation: !string |- // PairDotProdMasked multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. +# TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. - go: SaturatedUnsignedSignedPairDotProd commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, + // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. - go: SaturatedUnsignedSignedPairDotProdMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation, + // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 75a8713f..dbd7d6aa 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -523,9 +523,8 @@ func splitMask(ops []Operation) ([]Operation, error) { } maskedOpName := op2.Go op2.Go = strings.TrimSuffix(op2.Go, "Masked") - if op2.Documentation != nil { - *op2.Documentation = strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go) - } + op2Doc := strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go) + op2.Documentation = &op2Doc splited = append(splited, op2) } else { return nil, fmt.Errorf("simdgen only recognizes masked operations with exactly one mask input: %s", op) diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index cc097f8f..4828bf01 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -399,7 +399,7 @@ out: - *uint8 -- go: GaloisFieldAffineTransformInversedMasked +- go: GaloisFieldAffineTransformInverseMasked asm: VGF2P8AFFINEINVQB operandOrder: 2I # 2nd operand, then immediate in: *AffineArgs diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml index 84b64cc1..3caa13cf 100644 --- a/internal/simdgen/ops/GaloisField/categories.yaml +++ b/internal/simdgen/ops/GaloisField/categories.yaml @@ -8,13 +8,13 @@ // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. -- go: GaloisFieldAffineTransformInversedMasked +- go: GaloisFieldAffineTransformInverseMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8), - // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: + // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8), + // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml index 84dc1619..68875d17 100644 --- a/internal/simdgen/ops/GaloisField/go.yaml +++ b/internal/simdgen/ops/GaloisField/go.yaml @@ -18,7 +18,7 @@ out: - *uint8 -- go: GaloisFieldAffineTransformInversedMasked +- go: GaloisFieldAffineTransformInverseMasked asm: VGF2P8AFFINEINVQB operandOrder: 2I # 2nd operand, then immediate in: *AffineArgs diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 343b8f54..17c318a9 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -12,18 +12,19 @@ documentation: !string |- // PairDotProdMasked multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. +# TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. - go: SaturatedUnsignedSignedPairDotProd commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, + // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. - go: SaturatedUnsignedSignedPairDotProdMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation, + // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. From 505fade1723f01045821dc090b32006ec40f7078 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 11 Jul 2025 02:14:46 +0000 Subject: [PATCH 133/200] internal/simdgen: change imm param name to constant This CL generates CL 687377. Change-Id: I7d0c742aad91f2e6219ac90137a6e7adc6cd48bc Reviewed-on: https://go-review.googlesource.com/c/arch/+/687416 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdTypes.go | 12 ++++++++++++ internal/simdgen/gen_utility.go | 5 ++--- internal/simdgen/go.yaml | 5 +++++ internal/simdgen/ops/FPonlyArith/go.yaml | 2 ++ internal/simdgen/ops/Moves/go.yaml | 2 ++ internal/simdgen/ops/ShiftRotate/go.yaml | 1 + 6 files changed, 24 insertions(+), 3 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 552bf51d..d8a4de63 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -148,6 +148,8 @@ func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType {{define "op1Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} +// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} {{end}} @@ -155,6 +157,8 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} {{define "op2Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} +// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} {{end}} @@ -162,6 +166,8 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"} {{define "op2Imm8_2I"}} {{if .Documentation}}{{.Documentation}} //{{end}} +// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}} {{end}} @@ -170,6 +176,8 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint {{define "op3Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} +// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}} {{end}} @@ -177,6 +185,8 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"} {{define "op3Imm8_2I"}} {{if .Documentation}}{{.Documentation}} //{{end}} +// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}} {{end}} @@ -185,6 +195,8 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint {{define "op4Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} +// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// // Asm: {{.Asm}}, CPU Feature: {{.Extension}} func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}} {{end}} diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index dbd7d6aa..fc5f296b 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -342,10 +342,9 @@ func (op Operation) GoType() string { // ImmName returns the name to use for an operation's immediate operand. // This can be overriden in the yaml with "name" on an operand, -// otherwise, for now, it is "imm" but -// TODO come up with a better default immediate parameter name. +// otherwise, for now, "constant" func (op Operation) ImmName() string { - return op.Op0Name("imm") + return op.Op0Name("constant") } func (o Operand) OpName(s string) string { diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 4828bf01..b1401bde 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -362,6 +362,7 @@ - class: immediate const: 0 # place holder immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). + name: prec out: *1fp - go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked" asm: "VREDUCEP[SD]" @@ -371,6 +372,7 @@ - class: immediate const: 0 # place holder immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). + name: prec out: *1fp - go: "AddSub" @@ -729,6 +731,7 @@ - &imm class: immediate immOffset: 0 + name: index out: - *t @@ -761,6 +764,7 @@ - &imm01 # This immediate should be only 0 or 1 class: immediate immOffset: 0 + name: index out: - *i8x32 @@ -1161,6 +1165,7 @@ - &pureImm class: immediate immOffset: 0 + name: shift out: - *any - go: RotateAllRightMasked diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml index 29a7f43b..d35610df 100644 --- a/internal/simdgen/ops/FPonlyArith/go.yaml +++ b/internal/simdgen/ops/FPonlyArith/go.yaml @@ -58,6 +58,7 @@ - class: immediate const: 0 # place holder immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). + name: prec out: *1fp - go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked" asm: "VREDUCEP[SD]" @@ -67,6 +68,7 @@ - class: immediate const: 0 # place holder immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). + name: prec out: *1fp - go: "AddSub" diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index e6cd40f6..dd9ae79d 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -11,6 +11,7 @@ - &imm class: immediate immOffset: 0 + name: index out: - *t @@ -43,6 +44,7 @@ - &imm01 # This immediate should be only 0 or 1 class: immediate immOffset: 0 + name: index out: - *i8x32 diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml index a42241db..398047f2 100644 --- a/internal/simdgen/ops/ShiftRotate/go.yaml +++ b/internal/simdgen/ops/ShiftRotate/go.yaml @@ -143,6 +143,7 @@ - &pureImm class: immediate immOffset: 0 + name: shift out: - *any - go: RotateAllRightMasked From d3b287a03afd8f9f0d27bb4b7a20ca296468741a Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 11 Jul 2025 17:57:56 +0000 Subject: [PATCH 134/200] internal/simdgen: adjust Shift.* operations This CL does: 1. Removes ShiftRightSignExtended, default signed vectors to shift arithmetic, and unsigned to shift logical. 2. Add the missing Shifts which were left out by YAML error in the generator. This CL generates CL 687596. Change-Id: I42e21d12cb64e325fe15f44d732353fd6b3b0bf5 Reviewed-on: https://go-review.googlesource.com/c/arch/+/687595 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 24 ++++--- internal/simdgen/go.yaml | 69 +++++++++++-------- .../simdgen/ops/ShiftRotate/categories.yaml | 24 ++++--- internal/simdgen/ops/ShiftRotate/go.yaml | 69 +++++++++++-------- 4 files changed, 116 insertions(+), 70 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 942d4d41..802dc9eb 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -616,31 +616,35 @@ documentation: !string |- // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight + signed: false nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRightMasked + signed: false nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -- go: ShiftAllRightSignExtended +- go: ShiftAllRight + signed: true nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -- go: ShiftAllRightSignExtendedMasked + // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +- go: ShiftAllRightMasked + signed: true nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. + // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftLeft nameAndSizeCheck: "true" @@ -656,31 +660,35 @@ documentation: !string |- // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight + signed: false nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRightMasked + signed: false nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -- go: ShiftRightSignExtended +- go: ShiftRight + signed: true nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: ShiftRightSignExtendedMasked + // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +- go: ShiftRightMasked + signed: true nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. + // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: RotateAllLeftMasked nameAndSizeCheck: "true" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index b1401bde..27c45900 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -1029,8 +1029,8 @@ - &any go: $t - &vecAsScalar64 + go: "Uint.*" treatLikeAScalarOfSize: 64 - go: Uint64x2 out: - *any - go: ShiftAllLeftMasked @@ -1042,21 +1042,26 @@ out: - *any - go: ShiftAllRight + signed: false asm: "VPSRL[WDQ]" in: - - *any + - &uint + go: $t + base: uint - *vecAsScalar64 out: - - *any + - *uint - go: ShiftAllRightMasked + signed: false asm: "VPSRL[WDQ]" in: - class: mask - - *any + - *uint - *vecAsScalar64 out: - - *any -- go: ShiftAllRightSignExtended + - *uint +- go: ShiftAllRight + signed: true asm: "VPSRA[WDQ]" in: - &int @@ -1065,7 +1070,8 @@ - *vecAsScalar64 out: - *int -- go: ShiftAllRightSignExtendedMasked +- go: ShiftAllRightMasked + signed: true asm: "VPSRA[WDQ]" in: - class: mask @@ -1110,51 +1116,60 @@ out: - *anyOverwriteElemBits - go: ShiftRight + signed: false asm: "VPSRLV[WD]" in: - - *any - - *any + - *uint + - *uint out: - - *any + - *uint - go: ShiftRightMasked + signed: false asm: "VPSRLV[WD]" in: - class: mask - - *any - - *any + - *uint + - *uint out: - - *any + - *uint # XED data of VPSRLVQ needs the same overwrite as VPSLLVQ. - go: ShiftRight + signed: false asm: "VPSRLVQ" in: - - *anyOverwriteElemBits - - *anyOverwriteElemBits + - &uintOverwriteElemBits + go: $t + base: uint + overwriteElementBits: 64 + - *uintOverwriteElemBits out: - - *anyOverwriteElemBits + - *uintOverwriteElemBits - go: ShiftRightMasked + signed: false asm: "VPSRLVQ" in: - class: mask - - *anyOverwriteElemBits - - *anyOverwriteElemBits + - *uintOverwriteElemBits + - *uintOverwriteElemBits out: - - *anyOverwriteElemBits -- go: ShiftRightSignExtended + - *uintOverwriteElemBits +- go: ShiftRight + signed: true asm: "VPSRAV[WDQ]" in: - - *any - - *any + - *int + - *int out: - - *any -- go: ShiftRightSignExtendedMasked + - *int +- go: ShiftRightMasked + signed: true asm: "VPSRAV[WDQ]" in: - class: mask - - *any - - *any + - *int + - *int out: - - *any + - *int # Rotate - go: RotateAllLeftMasked diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml index b8bcb28e..09c04dfa 100644 --- a/internal/simdgen/ops/ShiftRotate/categories.yaml +++ b/internal/simdgen/ops/ShiftRotate/categories.yaml @@ -13,31 +13,35 @@ documentation: !string |- // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight + signed: false nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRightMasked + signed: false nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -- go: ShiftAllRightSignExtended +- go: ShiftAllRight + signed: true nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -- go: ShiftAllRightSignExtendedMasked + // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +- go: ShiftAllRightMasked + signed: true nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. + // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftLeft nameAndSizeCheck: "true" @@ -53,31 +57,35 @@ documentation: !string |- // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight + signed: false nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRightMasked + signed: false nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -- go: ShiftRightSignExtended +- go: ShiftRight + signed: true nameAndSizeCheck: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: ShiftRightSignExtendedMasked + // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +- go: ShiftRightMasked + signed: true nameAndSizeCheck: "true" masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. + // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: RotateAllLeftMasked nameAndSizeCheck: "true" diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml index 398047f2..637de935 100644 --- a/internal/simdgen/ops/ShiftRotate/go.yaml +++ b/internal/simdgen/ops/ShiftRotate/go.yaml @@ -7,8 +7,8 @@ - &any go: $t - &vecAsScalar64 + go: "Uint.*" treatLikeAScalarOfSize: 64 - go: Uint64x2 out: - *any - go: ShiftAllLeftMasked @@ -20,21 +20,26 @@ out: - *any - go: ShiftAllRight + signed: false asm: "VPSRL[WDQ]" in: - - *any + - &uint + go: $t + base: uint - *vecAsScalar64 out: - - *any + - *uint - go: ShiftAllRightMasked + signed: false asm: "VPSRL[WDQ]" in: - class: mask - - *any + - *uint - *vecAsScalar64 out: - - *any -- go: ShiftAllRightSignExtended + - *uint +- go: ShiftAllRight + signed: true asm: "VPSRA[WDQ]" in: - &int @@ -43,7 +48,8 @@ - *vecAsScalar64 out: - *int -- go: ShiftAllRightSignExtendedMasked +- go: ShiftAllRightMasked + signed: true asm: "VPSRA[WDQ]" in: - class: mask @@ -88,51 +94,60 @@ out: - *anyOverwriteElemBits - go: ShiftRight + signed: false asm: "VPSRLV[WD]" in: - - *any - - *any + - *uint + - *uint out: - - *any + - *uint - go: ShiftRightMasked + signed: false asm: "VPSRLV[WD]" in: - class: mask - - *any - - *any + - *uint + - *uint out: - - *any + - *uint # XED data of VPSRLVQ needs the same overwrite as VPSLLVQ. - go: ShiftRight + signed: false asm: "VPSRLVQ" in: - - *anyOverwriteElemBits - - *anyOverwriteElemBits + - &uintOverwriteElemBits + go: $t + base: uint + overwriteElementBits: 64 + - *uintOverwriteElemBits out: - - *anyOverwriteElemBits + - *uintOverwriteElemBits - go: ShiftRightMasked + signed: false asm: "VPSRLVQ" in: - class: mask - - *anyOverwriteElemBits - - *anyOverwriteElemBits + - *uintOverwriteElemBits + - *uintOverwriteElemBits out: - - *anyOverwriteElemBits -- go: ShiftRightSignExtended + - *uintOverwriteElemBits +- go: ShiftRight + signed: true asm: "VPSRAV[WDQ]" in: - - *any - - *any + - *int + - *int out: - - *any -- go: ShiftRightSignExtendedMasked + - *int +- go: ShiftRightMasked + signed: true asm: "VPSRAV[WDQ]" in: - class: mask - - *any - - *any + - *int + - *int out: - - *any + - *int # Rotate - go: RotateAllLeftMasked From 17f47198bffe0d3559e613f7e5b39ecd1c45d4b3 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 11 Jul 2025 20:03:26 +0000 Subject: [PATCH 135/200] internal/simdgen: updates CPU Feature in doc This CL picks ISA set whenever available for CPU Feature, otherwise picks Extension. This CL generates CL 687675. Change-Id: I6eebd730c65dad8e3557b5bbd2fbb2de01bd18f7 Reviewed-on: https://go-review.googlesource.com/c/arch/+/687655 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_simdTypes.go | 24 +++++++------- internal/simdgen/gen_utility.go | 52 ++++++++++++++++++++++++++++--- internal/simdgen/godefs.go | 8 +++++ internal/simdgen/xed.go | 3 +- 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index d8a4de63..412c7126 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -106,42 +106,42 @@ package simd {{define "op1"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 0).Go}}) {{.Go}}() {{.GoType}} {{end}} {{define "op2"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}} {{end}} {{define "op3"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} {{end}} {{define "op2VecAsScalar"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}} {{end}} {{define "op3VecAsScalar"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}} {{end}} {{define "op4"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} {{end}} @@ -150,7 +150,7 @@ func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType //{{end}} // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} {{end}} @@ -159,7 +159,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} //{{end}} // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} {{end}} @@ -168,7 +168,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"} //{{end}} // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}} {{end}} @@ -178,7 +178,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint //{{end}} // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}} {{end}} @@ -187,7 +187,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"} //{{end}} // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}} {{end}} @@ -197,7 +197,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint //{{end}} // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: {{.Asm}}, CPU Feature: {{.Extension}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}} {{end}} diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index fc5f296b..25503510 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -502,6 +502,44 @@ func dedup(ops []Operation) (deduped []Operation) { return } +func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation) { + for _, op := range ops { + if op.ISASet == "" { + newS := op.Extension + op.CPUFeature = &newS + } else { + newS := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(op.ISASet, "_128"), "_256"), "_512") + newS = strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(newS, "_128N"), "_256N"), "_512N") + op.CPUFeature = &newS + } + if *op.CPUFeature == "AVX" || *op.CPUFeature == "AVX2" || strings.HasPrefix(*op.CPUFeature, "AVX512") || + strings.HasPrefix(*op.CPUFeature, "AVX_") || strings.HasPrefix(*op.CPUFeature, "AVX2_") { + // This excludes instructions from CPU Features like AVX10.1, which usually are rebrandings of AVX512. + filled = append(filled, op) + if strings.Contains(*op.CPUFeature, "_") { + *op.CPUFeature = strings.ReplaceAll(*op.CPUFeature, "_", "") + } + } else { + excluded = append(excluded, op) + } + } + // Sanity check, make sure we are not excluding the only definition of an operation + filledSeen := map[string]struct{}{} + excludedSeen := map[string]Operation{} + for _, op := range filled { + filledSeen[op.Go+*op.In[0].Go] = struct{}{} + } + for _, op := range excluded { + excludedSeen[op.Go+*op.In[0].Go] = op + } + for k, op := range excludedSeen { + if _, ok := filledSeen[k]; !ok { + panic(fmt.Sprintf("simdgen is excluding the only def of op: %s", op)) + } + } + return +} + // splitMask splits operations with a single mask vreg input to be masked and unmasked(const: K0). // It also remove the "Masked" keyword from the name. func splitMask(ops []Operation) ([]Operation, error) { @@ -561,13 +599,15 @@ func dedupGodef(ops []Operation) ([]Operation, error) { deduped := []Operation{} for _, dup := range seen { if len(dup) > 1 { - sort.Slice(dup, func(i, j int) bool { + slices.SortFunc(dup, func(i, j Operation) int { // Put non-AVX512 candidates at the beginning - if !isAVX512(dup[i]) && isAVX512(dup[j]) { - return true + if !isAVX512(i) && isAVX512(j) { + return -1 + } + if isAVX512(i) && !isAVX512(j) { + return 1 } - // TODO: make the sorting logic finer-grained. - return false + return strings.Compare(*i.CPUFeature, *j.CPUFeature) }) } deduped = append(deduped, dup[0]) @@ -741,6 +781,8 @@ func (o Operation) String() string { str("Asm", o.Asm) str("Commutative", o.Commutative) str("Extension", o.Extension) + str("ISASet", o.ISASet) + optStr("CPUFeature", o.CPUFeature) optStr("ConstImm", o.ConstImm) optStr("Masked", o.Masked) optStr("Zeroing", o.Zeroing) diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 52cfd1e8..803e5306 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -25,6 +25,8 @@ type Operation struct { Out []Operand // Results Commutative string // Commutativity Extension string // Extension + ISASet string // ISASet + CPUFeature *string // If ISASet is empty, then Extension, otherwise ISASet Zeroing *string // Zeroing is a flag for asm prefix "Z", if non-nil it will always be "false" Documentation *string // Documentation will be appended to the stubs comments. // ConstMask is a hack to reduce the size of defs the user writes for const-immediate @@ -245,6 +247,11 @@ func writeGoDefs(path string, cl unify.Closure) error { // The parsed XED data might contain duplicates, like // 512 bits VPADDP. deduped := dedup(ops) + var excluded []Operation + deduped, excluded = fillCPUFeature(deduped) + if *Verbose { + log.Printf("excluded len: %d\n", len(excluded)) + } if *Verbose { log.Printf("dedup len: %d\n", len(ops)) @@ -280,6 +287,7 @@ func writeGoDefs(path string, cl unify.Closure) error { if *Verbose { log.Printf("dedup len: %d\n", len(deduped)) } + reportXEDInconsistency(deduped) typeMap := parseSIMDTypes(deduped) formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go") diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index 387db08a..5f348cbc 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -58,13 +58,14 @@ func loadXED(xedPath string) []*unify.Value { return } // TODO: "feature" - fields := []string{"goarch", "asm", "in", "out", "extension"} + fields := []string{"goarch", "asm", "in", "out", "extension", "isaset"} values := []*unify.Value{ unify.NewValue(unify.NewStringExact("amd64")), unify.NewValue(unify.NewStringExact(inst.Opcode())), unify.NewValue(ins), unify.NewValue(outs), unify.NewValue(unify.NewStringExact(inst.Extension)), + unify.NewValue(unify.NewStringExact(inst.ISASet)), } if strings.Contains(inst.Pattern, "ZEROING=0") { fields = append(fields, "zeroing") From 6a376630333dcf66417cd7c87ab5a0fca5e9bfbe Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 14 Jul 2025 17:23:25 +0000 Subject: [PATCH 136/200] internal/simdgen: add VDPPS This is a missing instruction that was left out. This CL generates CL 687916. Change-Id: I0361e7a1ecda67792b315887c996d72af404cd85 Reviewed-on: https://go-review.googlesource.com/c/arch/+/687915 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/go.yaml | 5 +---- internal/simdgen/ops/MLOps/go.yaml | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 27c45900..8fadf2a9 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -519,13 +519,10 @@ out: - *int3 - go: DotProdBroadcast - asm: VDPPD + asm: VDPP[SD] in: - &dpb_src go: $t - base: float - elemBits: 64 - bits: $bits - *dpb_src - class: immediate const: 127 diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml index fb6b4fd1..278daa87 100644 --- a/internal/simdgen/ops/MLOps/go.yaml +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -42,13 +42,10 @@ out: - *int3 - go: DotProdBroadcast - asm: VDPPD + asm: VDPP[SD] in: - &dpb_src go: $t - base: float - elemBits: 64 - bits: $bits - *dpb_src - class: immediate const: 127 From cd05644f3eb9d5e2a7fe014a0cdb95561554ed0d Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 14 Jul 2025 19:09:54 +0000 Subject: [PATCH 137/200] internal/simdgen: add variable Permute This CL only adds the "PermuteVar"(namings borrowed from C#) variant of permutes. The immediate variant of permute will be in another CL. This CL generates CL 687939. Change-Id: I3072ede18d623f23a007f66114ee0e429dd2aa0d Reviewed-on: https://go-review.googlesource.com/c/arch/+/687919 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 28 ++++++++++ internal/simdgen/gen_simdGenericOps.go | 3 +- internal/simdgen/gen_simdIntrinsics.go | 8 +++ internal/simdgen/gen_simdTypes.go | 65 ++++++++++++++++++---- internal/simdgen/gen_simdrules.go | 24 ++++++-- internal/simdgen/gen_utility.go | 17 +++++- internal/simdgen/go.yaml | 39 +++++++++++-- internal/simdgen/ops/Moves/categories.yaml | 28 ++++++++++ internal/simdgen/ops/Moves/go.yaml | 41 ++++++++++++-- 9 files changed, 220 insertions(+), 33 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 802dc9eb..a6dfaf19 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -556,6 +556,34 @@ extension: "AVX.*" documentation: !string |- // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. + + +- go: Permute + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // Permute performs a full permutation of vector x using indices: + // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // Only the needed bits to represent x's index are used in indices' elements. + +- go: PermuteMasked + commutative: "false" + masked: "true" + extension: "AVX.*" + documentation: !string |- + // PermuteMasked performs a full permutation of vector y using indices: + // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // Only the needed bits to represent x's index are used in indices' elements. + +- go: Permute2Masked # Permute2Masked is only available on or after AVX512 + commutative: "false" + masked: "true" + extension: "AVX.*" + documentation: !string |- + // Permute2Masked performs a full permutation of vector x, y using indices: + // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} + // where xy is x appending y. + // Only the needed bits to represent xy's index are used in indices' elements. - go: Mul commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index 7fd04b7c..f6c7a4a6 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -44,8 +44,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { var opsData opData for _, op := range ops { _, _, _, immType, gOp := op.shape() - genericNames := gOp.Go + *gOp.In[0].Go - gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative} + gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericName(gOp), len(gOp.In), op.Commutative} if immType == VarImm || immType == ConstVarImm { opsData.OpsImm = append(opsData.OpsImm, gOpData) } else { diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 3c40856b..244f2360 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -29,10 +29,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . {{end}} {{define "op2"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} +{{define "op2_21Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{end}} {{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} +{{define "op3_21Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{end}} +{{define "op3_231Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{end}} {{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} +{{define "op4_231Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{end}} {{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} {{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 412c7126..76de4f02 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -107,42 +107,70 @@ package simd {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 0).Go}}) {{.Go}}() {{.GoType}} +func ({{.Op0NameAndType "x"}}) {{.Go}}() {{.GoType}} {{end}} {{define "op2"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}} +func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}} +{{end}} + +{{define "op2_21Uint"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}} {{end}} {{define "op3"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} +func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} +{{end}} + +{{define "op3_21Uint"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} +{{end}} + +{{define "op3_231Uint"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}} {{end}} {{define "op2VecAsScalar"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}} +func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}} {{end}} {{define "op3VecAsScalar"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}} +func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}} {{end}} {{define "op4"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} +func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} +{{end}} + +{{define "op4_231Uint"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} {{end}} {{define "op1Imm8"}} @@ -151,7 +179,7 @@ func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} {{end}} {{define "op2Imm8"}} @@ -160,7 +188,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} {{end}} {{define "op2Imm8_2I"}} @@ -169,7 +197,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"} // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}} {{end}} @@ -179,7 +207,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}} {{end}} {{define "op3Imm8_2I"}} @@ -188,7 +216,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"} // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}} {{end}} @@ -198,7 +226,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}} {{end}} {{define "vectorConversion"}} @@ -283,6 +311,7 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { opsByShape := make(map[string]opData) opsSkipped := map[string]struct{}{} +outerLoop: for _, o := range ops { _, _, _, immType, gOp := o.shape() @@ -299,6 +328,18 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { opsSkipped[o.Go] = struct{}{} continue } + if o.OperandOrder != nil { + // We need to check if the customize order change the function signature. + // It is only safe to proceed generating the test wrappers if the function + // signature stays the same. + // Filtering out unqualified cases as a hack now, this test wrapper + // infrastrcuture should be changing soon so it should be fine. + switch *o.OperandOrder { + default: + opsSkipped[o.Go] = struct{}{} + continue outerLoop + } + } var shape string var baseArgDefList []string diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index bddcab43..e684058d 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -102,21 +102,33 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { data.ArgsOut = fmt.Sprintf("[a+%s] %s", *opr.In[0].Const, data.ArgsOut) } + goType := func(op Operation) string { + if op.OperandOrder != nil { + switch *op.OperandOrder { + case "21Uint": + fallthrough + case "231Uint": + // Permute + return *op.In[1].Go + } + } + return *op.In[0].Go + } var tplName string // If class overwrite is happening, that's not really a mask but a vreg. if opOutShape == OneVregOut || opOutShape == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil { switch opInShape { case OneImmIn: tplName = "pureVreg" - data.GoType = *gOp.In[0].Go + data.GoType = goType(gOp) case PureVregIn: tplName = "pureVreg" - data.GoType = *gOp.In[0].Go + data.GoType = goType(gOp) case OneKmaskImmIn: fallthrough case OneKmaskIn: tplName = "maskIn" - data.GoType = *gOp.In[0].Go + data.GoType = goType(gOp) rearIdx := len(gOp.In) - 1 // Mask is at the end. data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes) @@ -125,7 +137,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { } } else if opOutShape == OneGregOut { tplName = "pureVreg" // TODO this will be wrong - data.GoType = *gOp.In[0].Go + data.GoType = goType(gOp) } else { // OneKmaskOut case data.MaskOutConvert = fmt.Sprintf("VPMOVMToVec%dx%d", *gOp.Out[0].ElemBits, *gOp.In[0].Lanes) @@ -134,12 +146,12 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { fallthrough case PureVregIn: tplName = "maskOut" - data.GoType = *gOp.In[0].Go + data.GoType = goType(gOp) case OneKmaskImmIn: fallthrough case OneKmaskIn: tplName = "maskInMaskOut" - data.GoType = *gOp.In[0].Go + data.GoType = goType(gOp) rearIdx := len(gOp.In) - 1 data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes) case PureKmaskIn: diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 25503510..11c5e75a 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -570,6 +570,19 @@ func splitMask(ops []Operation) ([]Operation, error) { return splited, nil } +func genericName(op Operation) string { + if op.OperandOrder != nil { + switch *op.OperandOrder { + case "21Uint": + fallthrough + case "231Uint": + // Permute + return op.Go + *op.In[1].Go + } + } + return op.Go + *op.In[0].Go +} + // dedupGodef is deduping operations in [Op.Go]+[*Op.In[0].Go] level. // By deduping, it means picking the least advanced architecture that satisfy the requirement: // AVX512 will be least preferred. @@ -579,8 +592,8 @@ func dedupGodef(ops []Operation) ([]Operation, error) { for _, op := range ops { _, _, _, _, gOp := op.shape() - genericNames := gOp.Go + *gOp.In[0].Go - seen[genericNames] = append(seen[genericNames], op) + gN := genericName(gOp) + seen[gN] = append(seen[gN], op) } if *FlagReportDup { for gName, dup := range seen { diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 8fadf2a9..0b894ab2 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -896,13 +896,42 @@ out: - *f64x2 +- go: Permute + asm: "VPERM[BWDQ]|VPERMP[SD]" + operandOrder: "21Uint" + in: + - &anyindices + go: $t + name: indices + overwriteBase: uint + - &any + go: $t + out: + - *any +- go: PermuteMasked + asm: "VPERM[BWDQ]|VPERMP[SD]" + operandOrder: "21Uint" + in: + - class: mask + - *anyindices + - *any + out: + - *any - - - - - +- go: Permute2Masked + asm: "VPERMI2[BWDQ]|VPERMI2P[SD]" + # Because we are overwriting the receiver's type, we + # have to move the receiver to be a parameter so that + # we can have no duplication. + operandOrder: "231Uint" + in: + - *anyindices # result in arg 0 + - class: mask + - *any + - *any + out: + - *any # "Normal" multiplication is only available for floats. # This only covers the single and double precision. - go: Mul diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index db36efd4..bb9fae8d 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -19,3 +19,31 @@ extension: "AVX.*" documentation: !string |- // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. + + +- go: Permute + commutative: "false" + extension: "AVX.*" + documentation: !string |- + // Permute performs a full permutation of vector x using indices: + // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // Only the needed bits to represent x's index are used in indices' elements. + +- go: PermuteMasked + commutative: "false" + masked: "true" + extension: "AVX.*" + documentation: !string |- + // PermuteMasked performs a full permutation of vector y using indices: + // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // Only the needed bits to represent x's index are used in indices' elements. + +- go: Permute2Masked # Permute2Masked is only available on or after AVX512 + commutative: "false" + masked: "true" + extension: "AVX.*" + documentation: !string |- + // Permute2Masked performs a full permutation of vector x, y using indices: + // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} + // where xy is x appending y. + // Only the needed bits to represent xy's index are used in indices' elements. \ No newline at end of file diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index dd9ae79d..49b67a28 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -179,10 +179,39 @@ out: - *f64x2 +- go: Permute + asm: "VPERM[BWDQ]|VPERMP[SD]" + operandOrder: "21Uint" + in: + - &anyindices + go: $t + name: indices + overwriteBase: uint + - &any + go: $t + out: + - *any - - - - - - +- go: PermuteMasked + asm: "VPERM[BWDQ]|VPERMP[SD]" + operandOrder: "21Uint" + in: + - class: mask + - *anyindices + - *any + out: + - *any + +- go: Permute2Masked + asm: "VPERMI2[BWDQ]|VPERMI2P[SD]" + # Because we are overwriting the receiver's type, we + # have to move the receiver to be a parameter so that + # we can have no duplication. + operandOrder: "231Uint" + in: + - *anyindices # result in arg 0 + - class: mask + - *any + - *any + out: + - *any \ No newline at end of file From d0fd62e6cd0f5368bfd48c862170e8143c19da42 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 14 Jul 2025 19:45:20 +0000 Subject: [PATCH 138/200] internal/simdgen: default mask param's name to mask This CL generates CL 687955. Change-Id: I6606d6857c9fc9d9ed0f1025fdb1c3c45238aa04 Reviewed-on: https://go-review.googlesource.com/c/arch/+/687920 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_utility.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 11c5e75a..136223a1 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -351,6 +351,9 @@ func (o Operand) OpName(s string) string { if n := o.Name; n != nil { return *n } + if o.Class == "mask" { + return "mask" + } return s } From ca6dc8eff52c568a7bfbd4f18734f048eb8604a3 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 14 Jul 2025 20:30:36 +0000 Subject: [PATCH 139/200] internal/simdgen: add Compress This CL generates CL 687995. Change-Id: I889a065743936a592037032c67b6df161bcb3cde Reviewed-on: https://go-review.googlesource.com/c/arch/+/687975 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 8 ++++++++ internal/simdgen/go.yaml | 8 ++++++++ internal/simdgen/ops/Moves/categories.yaml | 10 +++++++++- internal/simdgen/ops/Moves/go.yaml | 8 ++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index a6dfaf19..947d8456 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -584,6 +584,14 @@ // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} // where xy is x appending y. // Only the needed bits to represent xy's index are used in indices' elements. + +- go: Compress + commutative: "false" + # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked" + extension: "AVX.*" + documentation: !string |- + // Compress performs a compression on vector x using mask by + // selecting elements as indicated by mask, and pack them to lower indexed elements. - go: Mul commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 0b894ab2..c158204d 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -932,6 +932,14 @@ - *any out: - *any + +- go: Compress + asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]" + in: + - class: mask + - *any + out: + - *any # "Normal" multiplication is only available for floats. # This only covers the single and double precision. - go: Mul diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index bb9fae8d..8dfe372a 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -46,4 +46,12 @@ // Permute2Masked performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} // where xy is x appending y. - // Only the needed bits to represent xy's index are used in indices' elements. \ No newline at end of file + // Only the needed bits to represent xy's index are used in indices' elements. + +- go: Compress + commutative: "false" + # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked" + extension: "AVX.*" + documentation: !string |- + // Compress performs a compression on vector x using mask by + // selecting elements as indicated by mask, and pack them to lower indexed elements. diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index 49b67a28..cf5608f2 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -214,4 +214,12 @@ - *any - *any out: + - *any + +- go: Compress + asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]" + in: + - class: mask + - *any + out: - *any \ No newline at end of file From 7928003cb8e91526da3f5b6aa96fb6b8afbd001c Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 14 Jul 2025 22:01:22 +0000 Subject: [PATCH 140/200] internal/simdgen: adjust param order for AndNot This CL generates CL 687996 Change-Id: I21fd71c40177b06660a075e4cb157a3f3b92ae74 Reviewed-on: https://go-review.googlesource.com/c/arch/+/687977 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 4 ++-- internal/simdgen/gen_simdIntrinsics.go | 4 ++++ internal/simdgen/gen_simdTypes.go | 16 ++++++++++++++++ internal/simdgen/go.yaml | 2 ++ .../simdgen/ops/BitwiseLogic/categories.yaml | 4 ++-- internal/simdgen/ops/BitwiseLogic/go.yaml | 2 ++ 6 files changed, 28 insertions(+), 4 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 947d8456..8f4ffbe1 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -93,13 +93,13 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // AndNot performs a bitwise AND NOT operation between two vectors. + // AndNot performs a bitwise x &^ y. - go: AndNotMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. + // AndNotMasked performs a bitwise x &^ y. - go: Xor commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 244f2360..0dce757f 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -29,10 +29,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . {{end}} {{define "op2"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} +{{define "op2_21"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{end}} {{define "op2_21Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) {{end}} {{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} +{{define "op3_21"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{end}} {{define "op3_21Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) {{end}} {{define "op3_231Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 76de4f02..a5aaf1b3 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -117,6 +117,13 @@ func ({{.Op0NameAndType "x"}}) {{.Go}}() {{.GoType}} func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}} {{end}} +{{define "op2_21"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}} +{{end}} + {{define "op2_21Uint"}} {{if .Documentation}}{{.Documentation}} //{{end}} @@ -131,6 +138,13 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}} func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} {{end}} +{{define "op3_21"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} +func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} +{{end}} + {{define "op3_21Uint"}} {{if .Documentation}}{{.Documentation}} //{{end}} @@ -335,6 +349,8 @@ outerLoop: // Filtering out unqualified cases as a hack now, this test wrapper // infrastrcuture should be changing soon so it should be fine. switch *o.OperandOrder { + case "21": + // No op because it's only set in AndNot, and opr[2] and opr[1] has the same shape default: opsSkipped[o.Go] = struct{}{} continue outerLoop diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index c158204d..bcfb97c1 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -156,6 +156,7 @@ - go: AndNot asm: "VPANDN" + operandOrder: "21" # switch the arg order in: - *any - *any @@ -163,6 +164,7 @@ - *any - go: AndNotMasked asm: "VPANDN[DQ]" + operandOrder: "21" in: - class: mask - *any diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml index 1ef1d360..afda77b8 100644 --- a/internal/simdgen/ops/BitwiseLogic/categories.yaml +++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml @@ -25,13 +25,13 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // AndNot performs a bitwise AND NOT operation between two vectors. + // AndNot performs a bitwise x &^ y. - go: AndNotMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // AndNotMasked performs a masked bitwise AND NOT operation between two vectors. + // AndNotMasked performs a bitwise x &^ y. - go: Xor commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml index 49e2dbc9..acc7a51e 100644 --- a/internal/simdgen/ops/BitwiseLogic/go.yaml +++ b/internal/simdgen/ops/BitwiseLogic/go.yaml @@ -32,6 +32,7 @@ - go: AndNot asm: "VPANDN" + operandOrder: "21" # switch the arg order in: - *any - *any @@ -39,6 +40,7 @@ - *any - go: AndNotMasked asm: "VPANDN[DQ]" + operandOrder: "21" in: - class: mask - *any From ad6e2ac689d7249505ad024dd62a9ad3b69499fc Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 15 Jul 2025 21:51:00 +0000 Subject: [PATCH 141/200] internal/simdgen: cleans up the shape of Permute This CL addressed comments in CL 687919. Change-Id: I77d488f6128658c6f0ad27f2fa9565335f6829a9 Reviewed-on: https://go-review.googlesource.com/c/arch/+/688295 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_simdIntrinsics.go | 8 ++++---- internal/simdgen/gen_simdTypes.go | 8 ++++---- internal/simdgen/gen_simdrules.go | 6 ++---- internal/simdgen/gen_utility.go | 6 ++---- internal/simdgen/go.yaml | 6 +++--- internal/simdgen/ops/Moves/go.yaml | 6 +++--- 6 files changed, 18 insertions(+), 22 deletions(-) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 0dce757f..bf2a180f 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -31,19 +31,19 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . {{end}} {{define "op2_21"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op2_21Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op2_21Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) {{end}} {{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} {{define "op3_21"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op3_21Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op3_21Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op3_231Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op3_231Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) {{end}} {{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op4_231Uint"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op4_231Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) {{end}} {{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index a5aaf1b3..6832f751 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -124,7 +124,7 @@ func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}} {{end}} -{{define "op2_21Uint"}} +{{define "op2_21Type1"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} @@ -145,14 +145,14 @@ func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndTyp func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} {{end}} -{{define "op3_21Uint"}} +{{define "op3_21Type1"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} {{end}} -{{define "op3_231Uint"}} +{{define "op3_231Type1"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} @@ -180,7 +180,7 @@ func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfS func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} {{end}} -{{define "op4_231Uint"}} +{{define "op4_231Type1"}} {{if .Documentation}}{{.Documentation}} //{{end}} // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index e684058d..81aba7a0 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -105,10 +105,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { goType := func(op Operation) string { if op.OperandOrder != nil { switch *op.OperandOrder { - case "21Uint": - fallthrough - case "231Uint": - // Permute + case "21Type1", "231Type1": + // Permute uses operand[1] for method receiver. return *op.In[1].Go } } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 136223a1..698caef3 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -576,10 +576,8 @@ func splitMask(ops []Operation) ([]Operation, error) { func genericName(op Operation) string { if op.OperandOrder != nil { switch *op.OperandOrder { - case "21Uint": - fallthrough - case "231Uint": - // Permute + case "21Type1", "231Type1": + // Permute uses operand[1] for method receiver. return op.Go + *op.In[1].Go } } diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index bcfb97c1..3369e669 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -900,7 +900,7 @@ - go: Permute asm: "VPERM[BWDQ]|VPERMP[SD]" - operandOrder: "21Uint" + operandOrder: "21Type1" in: - &anyindices go: $t @@ -913,7 +913,7 @@ - go: PermuteMasked asm: "VPERM[BWDQ]|VPERMP[SD]" - operandOrder: "21Uint" + operandOrder: "21Type1" in: - class: mask - *anyindices @@ -926,7 +926,7 @@ # Because we are overwriting the receiver's type, we # have to move the receiver to be a parameter so that # we can have no duplication. - operandOrder: "231Uint" + operandOrder: "231Type1" in: - *anyindices # result in arg 0 - class: mask diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index cf5608f2..44a1c3c3 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -181,7 +181,7 @@ - go: Permute asm: "VPERM[BWDQ]|VPERMP[SD]" - operandOrder: "21Uint" + operandOrder: "21Type1" in: - &anyindices go: $t @@ -194,7 +194,7 @@ - go: PermuteMasked asm: "VPERM[BWDQ]|VPERMP[SD]" - operandOrder: "21Uint" + operandOrder: "21Type1" in: - class: mask - *anyindices @@ -207,7 +207,7 @@ # Because we are overwriting the receiver's type, we # have to move the receiver to be a parameter so that # we can have no duplication. - operandOrder: "231Uint" + operandOrder: "231Type1" in: - *anyindices # result in arg 0 - class: mask From 4344dd88a4a859fc9b3c16a814cb8f18ea06adf7 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 15 Jul 2025 05:15:30 +0000 Subject: [PATCH 142/200] internal/simdgen: reorder PairDotProdAccumulate This CL reorders the param order of PairDotProdAccumulate family. This CL also updates some other ML Ops documentation. This CL generates CL 688095. Change-Id: I1b493852209c5370083ebf7ac75ad2b6d8b501b3 Reviewed-on: https://go-review.googlesource.com/c/arch/+/688115 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 22 +++++++++++----------- internal/simdgen/gen_simdIntrinsics.go | 4 ++++ internal/simdgen/gen_simdTypes.go | 14 ++++++++++++++ internal/simdgen/go.yaml | 9 +++++++++ internal/simdgen/ops/MLOps/categories.yaml | 22 +++++++++++----------- internal/simdgen/ops/MLOps/go.yaml | 9 +++++++++ 6 files changed, 58 insertions(+), 22 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 8f4ffbe1..01881ab5 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -456,64 +456,64 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z. - go: UnsignedSignedQuadDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: PairDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- - // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. + // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. - go: PairDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. + // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. + // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. + // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. - go: FusedMultiplyAddMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // FusedMultiplyAddMasked performs `(v1 * v2) + v3`. + // FusedMultiplyAddMasked performs (x * y) + z. - go: FusedMultiplyAddSubMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. + // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. - go: FusedMultiplySubAddMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. + // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. - go: Max commutative: "true" extension: "AVX.*" diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index bf2a180f..6d7a75d1 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -41,10 +41,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . {{end}} {{define "op3_231Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) {{end}} +{{define "op3_31"}} addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen3_31(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{end}} {{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) {{end}} {{define "op4_231Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) {{end}} +{{define "op4_31"}} addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen4_31(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{end}} {{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} {{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 6832f751..f43e1eb7 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -138,6 +138,13 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}} func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} {{end}} +{{define "op3_31"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} +func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}} +{{end}} + {{define "op3_21"}} {{if .Documentation}}{{.Documentation}} //{{end}} @@ -187,6 +194,13 @@ func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndTyp func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} {{end}} +{{define "op4_31"}} +{{if .Documentation}}{{.Documentation}} +//{{end}} +// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} +func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} +{{end}} + {{define "op1Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 3369e669..8ef04b8e 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -532,9 +532,11 @@ - *dpb_src - go: UnsignedSignedQuadDotProdAccumulate asm: "VPDPBUSD" + operandOrder: "31" # switch operand 3 and 1 in: - &qdpa_acc go: $t_acc + base: int elemBits: 32 - &qdpa_src1 go: $t_src1 @@ -548,6 +550,7 @@ - *qdpa_acc - go: UnsignedSignedQuadDotProdAccumulateMasked asm: "VPDPBUSD" + operandOrder: "31" # switch operand 3 and 1 in: - *qdpa_acc - class: mask @@ -557,6 +560,7 @@ - *qdpa_acc - go: SaturatedUnsignedSignedQuadDotProdAccumulate asm: "VPDPBUSDS" + operandOrder: "31" # switch operand 3 and 1 in: - *qdpa_acc - *qdpa_src1 @@ -565,6 +569,7 @@ - *qdpa_acc - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked asm: "VPDPBUSDS" + operandOrder: "31" # switch operand 3 and 1 in: - *qdpa_acc - class: mask @@ -574,6 +579,7 @@ - *qdpa_acc - go: PairDotProdAccumulate asm: "VPDPWSSD" + operandOrder: "31" # switch operand 3 and 1 in: - &pdpa_acc go: $t_acc @@ -591,6 +597,7 @@ - *pdpa_acc - go: PairDotProdAccumulateMasked asm: "VPDPWSSD" + operandOrder: "31" # switch operand 3 and 1 in: - *pdpa_acc - class: mask @@ -600,6 +607,7 @@ - *pdpa_acc - go: SaturatedPairDotProdAccumulate asm: "VPDPWSSDS" + operandOrder: "31" # switch operand 3 and 1 in: - *pdpa_acc - *pdpa_src1 @@ -608,6 +616,7 @@ - *pdpa_acc - go: SaturatedPairDotProdAccumulateMasked asm: "VPDPWSSDS" + operandOrder: "31" # switch operand 3 and 1 in: - *pdpa_acc - class: mask diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 17c318a9..962ae50d 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -37,61 +37,61 @@ commutative: "false" extension: "AVX.*" documentation: !string |- - // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z. - go: UnsignedSignedQuadDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. + // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: PairDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- - // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. + // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. - go: PairDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. + // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulate commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. + // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulateMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x. + // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. - go: FusedMultiplyAddMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // FusedMultiplyAddMasked performs `(v1 * v2) + v3`. + // FusedMultiplyAddMasked performs (x * y) + z. - go: FusedMultiplyAddSubMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. + // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. - go: FusedMultiplySubAddMasked masked: "true" commutative: "false" extension: "AVX.*" documentation: !string |- - // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. + // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml index 278daa87..76512b1e 100644 --- a/internal/simdgen/ops/MLOps/go.yaml +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -53,9 +53,11 @@ - *dpb_src - go: UnsignedSignedQuadDotProdAccumulate asm: "VPDPBUSD" + operandOrder: "31" # switch operand 3 and 1 in: - &qdpa_acc go: $t_acc + base: int elemBits: 32 - &qdpa_src1 go: $t_src1 @@ -69,6 +71,7 @@ - *qdpa_acc - go: UnsignedSignedQuadDotProdAccumulateMasked asm: "VPDPBUSD" + operandOrder: "31" # switch operand 3 and 1 in: - *qdpa_acc - class: mask @@ -78,6 +81,7 @@ - *qdpa_acc - go: SaturatedUnsignedSignedQuadDotProdAccumulate asm: "VPDPBUSDS" + operandOrder: "31" # switch operand 3 and 1 in: - *qdpa_acc - *qdpa_src1 @@ -86,6 +90,7 @@ - *qdpa_acc - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked asm: "VPDPBUSDS" + operandOrder: "31" # switch operand 3 and 1 in: - *qdpa_acc - class: mask @@ -95,6 +100,7 @@ - *qdpa_acc - go: PairDotProdAccumulate asm: "VPDPWSSD" + operandOrder: "31" # switch operand 3 and 1 in: - &pdpa_acc go: $t_acc @@ -112,6 +118,7 @@ - *pdpa_acc - go: PairDotProdAccumulateMasked asm: "VPDPWSSD" + operandOrder: "31" # switch operand 3 and 1 in: - *pdpa_acc - class: mask @@ -121,6 +128,7 @@ - *pdpa_acc - go: SaturatedPairDotProdAccumulate asm: "VPDPWSSDS" + operandOrder: "31" # switch operand 3 and 1 in: - *pdpa_acc - *pdpa_src1 @@ -129,6 +137,7 @@ - *pdpa_acc - go: SaturatedPairDotProdAccumulateMasked asm: "VPDPWSSDS" + operandOrder: "31" # switch operand 3 and 1 in: - *pdpa_acc - class: mask From c81c31d188992d3d6293b31ccd9fc8c97b2837f2 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 15 Jul 2025 19:52:36 +0000 Subject: [PATCH 143/200] internal/simdgen: add logging of all CPU Features This CL makes -v print all CPU Features that will appear in the API. This is useful for us to add CPU Feature check. Change-Id: I19cc964b531abcb3a4730b45206ffd9752e49653 Reviewed-on: https://go-review.googlesource.com/c/arch/+/688216 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_utility.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 698caef3..439a5e14 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -506,6 +506,7 @@ func dedup(ops []Operation) (deduped []Operation) { } func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation) { + allCPUFeatures := map[string]struct{}{} for _, op := range ops { if op.ISASet == "" { newS := op.Extension @@ -522,6 +523,7 @@ func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation) if strings.Contains(*op.CPUFeature, "_") { *op.CPUFeature = strings.ReplaceAll(*op.CPUFeature, "_", "") } + allCPUFeatures[*op.CPUFeature] = struct{}{} } else { excluded = append(excluded, op) } @@ -540,6 +542,10 @@ func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation) panic(fmt.Sprintf("simdgen is excluding the only def of op: %s", op)) } } + if *Verbose { + // It might contain + log.Printf("All CPU Features: %v\n", allCPUFeatures) + } return } From d50ebdc696d024df4e9e0fa2d670442cdeba2d86 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Wed, 16 Jul 2025 17:01:22 +0000 Subject: [PATCH 144/200] internal/simdgen: clean up masked op doc This CL makes masked and un-masked op doc identical, and then apply a mechanism to append a write-mask description to the doc uniformly. The previous version contains some inconsistency, "masked" might appear in unmasked op's documentation. This CL generates CL 688396. Change-Id: Ia52ed6606a129f2bda15315b3a87d5d1dcef24e9 Reviewed-on: https://go-review.googlesource.com/c/arch/+/688395 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 20 +++++++++---------- internal/simdgen/gen_utility.go | 11 ++++++++++ internal/simdgen/godefs.go | 1 + .../simdgen/ops/BitwiseLogic/categories.yaml | 6 +++--- internal/simdgen/ops/Compares/categories.yaml | 2 +- .../simdgen/ops/FPonlyArith/categories.yaml | 4 ++-- internal/simdgen/ops/Mul/categories.yaml | 8 ++++---- 7 files changed, 32 insertions(+), 20 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 01881ab5..5b0e5597 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -77,7 +77,7 @@ commutative: "true" extension: "AVX.*" documentation: !string |- - // AndMasked performs a masked bitwise AND operation between two vectors. + // AndMasked performs a bitwise AND operation between two vectors. - go: Or commutative: "true" extension: "AVX.*" @@ -88,7 +88,7 @@ commutative: "true" extension: "AVX.*" documentation: !string |- - // OrMasked performs a masked bitwise OR operation between two vectors. + // OrMasked performs a bitwise OR operation between two vectors. - go: AndNot commutative: "false" extension: "AVX.*" @@ -110,7 +110,7 @@ commutative: "true" extension: "AVX.*" documentation: !string |- - // XorMasked performs a masked bitwise XOR operation between two vectors. + // XorMasked performs a bitwise XOR operation between two vectors. # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. # const imm predicate(holds for both float and int|uint): @@ -169,7 +169,7 @@ commutative: "true" extension: "AVX.*" documentation: !string |- - // EqualMasked compares for equality, masked. + // EqualMasked compares for equality. - go: LessMasked constImm: 1 masked: "true" @@ -296,7 +296,7 @@ constImm: 1 masked: "true" documentation: !string |- - // FloorWithPrecisionMasked rounds elements down with specified precision, masked. + // FloorWithPrecisionMasked rounds elements down with specified precision. - go: DiffWithFloorWithPrecisionMasked commutative: "false" extension: "AVX.*" @@ -317,7 +317,7 @@ constImm: 2 masked: "true" documentation: !string |- - // CeilWithPrecisionMasked rounds elements up with specified precision, masked. + // CeilWithPrecisionMasked rounds elements up with specified precision. - go: DiffWithCeilWithPrecisionMasked commutative: "false" extension: "AVX.*" @@ -618,26 +618,26 @@ commutative: "true" extension: "AVX.*" documentation: !string |- - // MulMasked multiplies corresponding elements of two vectors, masked. + // MulMasked multiplies corresponding elements of two vectors. - go: MulEvenWidenMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked. + // MulEvenWidenMasked multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHighMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MulHighMasked multiplies elements and stores the high part of the result, masked. + // MulHighMasked multiplies elements and stores the high part of the result. - go: MulLowMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MulLowMasked multiplies elements and stores the low part of the result, masked. + // MulLowMasked multiplies elements and stores the low part of the result. - go: ShiftAllLeft nameAndSizeCheck: "true" commutative: "false" diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 439a5e14..0be1df38 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -571,6 +571,7 @@ func splitMask(ops []Operation) ([]Operation, error) { op2.Go = strings.TrimSuffix(op2.Go, "Masked") op2Doc := strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go) op2.Documentation = &op2Doc + op2.Masked = nil // It's no longer masked. splited = append(splited, op2) } else { return nil, fmt.Errorf("simdgen only recognizes masked operations with exactly one mask input: %s", op) @@ -579,6 +580,16 @@ func splitMask(ops []Operation) ([]Operation, error) { return splited, nil } +func insertMaskDescToDoc(ops []Operation) { + for i, _ := range ops { + if ops[i].Masked != nil && *ops[i].Masked == "true" { + if ops[i].Documentation != nil { + *ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask." + } + } + } +} + func genericName(op Operation) string { if op.OperandOrder != nil { switch *op.OperandOrder { diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 803e5306..9a40f83b 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -268,6 +268,7 @@ func writeGoDefs(path string, cl unify.Closure) error { return err } } + insertMaskDescToDoc(deduped) if *Verbose { log.Printf("dedup len: %d\n", len(deduped)) } diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml index afda77b8..d6ea3ed1 100644 --- a/internal/simdgen/ops/BitwiseLogic/categories.yaml +++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml @@ -9,7 +9,7 @@ commutative: "true" extension: "AVX.*" documentation: !string |- - // AndMasked performs a masked bitwise AND operation between two vectors. + // AndMasked performs a bitwise AND operation between two vectors. - go: Or commutative: "true" extension: "AVX.*" @@ -20,7 +20,7 @@ commutative: "true" extension: "AVX.*" documentation: !string |- - // OrMasked performs a masked bitwise OR operation between two vectors. + // OrMasked performs a bitwise OR operation between two vectors. - go: AndNot commutative: "false" extension: "AVX.*" @@ -42,6 +42,6 @@ commutative: "true" extension: "AVX.*" documentation: !string |- - // XorMasked performs a masked bitwise XOR operation between two vectors. + // XorMasked performs a bitwise XOR operation between two vectors. # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index 3b021e4c..f7383555 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -55,7 +55,7 @@ commutative: "true" extension: "AVX.*" documentation: !string |- - // EqualMasked compares for equality, masked. + // EqualMasked compares for equality. - go: LessMasked constImm: 1 masked: "true" diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index 356b06d3..e0d5836d 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -83,7 +83,7 @@ constImm: 1 masked: "true" documentation: !string |- - // FloorWithPrecisionMasked rounds elements down with specified precision, masked. + // FloorWithPrecisionMasked rounds elements down with specified precision. - go: DiffWithFloorWithPrecisionMasked commutative: "false" extension: "AVX.*" @@ -104,7 +104,7 @@ constImm: 2 masked: "true" documentation: !string |- - // CeilWithPrecisionMasked rounds elements up with specified precision, masked. + // CeilWithPrecisionMasked rounds elements up with specified precision. - go: DiffWithCeilWithPrecisionMasked commutative: "false" extension: "AVX.*" diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml index 34b3ab56..b466eb01 100644 --- a/internal/simdgen/ops/Mul/categories.yaml +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -25,23 +25,23 @@ commutative: "true" extension: "AVX.*" documentation: !string |- - // MulMasked multiplies corresponding elements of two vectors, masked. + // MulMasked multiplies corresponding elements of two vectors. - go: MulEvenWidenMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked. + // MulEvenWidenMasked multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHighMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MulHighMasked multiplies elements and stores the high part of the result, masked. + // MulHighMasked multiplies elements and stores the high part of the result. - go: MulLowMasked masked: "true" commutative: "true" extension: "AVX.*" documentation: !string |- - // MulLowMasked multiplies elements and stores the low part of the result, masked. + // MulLowMasked multiplies elements and stores the low part of the result. From d0d5d5ba49324d2e8e5df13e68c4939a3b337d23 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 18 Jul 2025 04:24:37 +0000 Subject: [PATCH 145/200] internal/simdgen: support load from bits for mask This CL adds the code generation to construct K masks from bits. This will enable more flexible and performant SIMD programming. This CL generates CL 688875. Change-Id: Idb576a5e2343b1dd0762ea3e52cf8f06a4e3af13 Reviewed-on: https://go-review.googlesource.com/c/arch/+/688855 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdIntrinsics.go | 7 +++++-- internal/simdgen/gen_simdTypes.go | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 6d7a75d1..355c8d14 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -73,6 +73,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) + addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) {{end}} {{define "footer"}}} @@ -109,8 +110,10 @@ func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { } for _, typ := range typesFromTypeMap(typeMap) { - if err := t.ExecuteTemplate(buffer, "loadStore", typ); err != nil { - panic(fmt.Errorf("failed to execute loadStore template: %w", err)) + if typ.Type != "mask" { + if err := t.ExecuteTemplate(buffer, "loadStore", typ); err != nil { + panic(fmt.Errorf("failed to execute loadStore template: %w", err)) + } } } diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index f43e1eb7..50553b4e 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -21,6 +21,7 @@ type simdType struct { VectorCounterpart string // For mask use only: just replacing the "Mask" in [simdType.Name] with "Int" ReshapedVectorWithAndOr string // For mask use only: vector AND and OR are only available in some shape with element width 32. Size int // The size of the type + ElemBits int // Size / Lanes } func compareSimdTypes(x, y simdType) int { @@ -92,7 +93,15 @@ func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}} //go:noescape func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}}) -{{- end}} +{{- else}} + +// {{.Name}}FromBits constructs a {{.Name}} from an a bitmap, where 1 means set for the indexed element, 0 means unset. +// Only the lower {{.Lanes}} bits of y are used. +// +//go:noescape +func Load{{.Name}}FromBits(y *uint64) {{.Name}} + +{{end}} {{end}} ` @@ -528,14 +537,14 @@ func parseSIMDTypes(ops []Operation) simdTypeMap { if arg.Class == "mask" { vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int") reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32) - ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits}) + ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits, *arg.Bits / lanes}) // In case the vector counterpart of a mask is not present, put its vector counterpart typedef into the map as well. if _, ok := seen[vectorCounterpart]; !ok { seen[vectorCounterpart] = struct{}{} - ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits}) + ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits, *arg.Bits / lanes}) } } else { - ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits}) + ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits, *arg.Bits / lanes}) } } for _, op := range ops { From d63b4ec3794a220ec4e2d0f90e458c5d2c48c004 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 22 Jul 2025 21:48:20 -0400 Subject: [PATCH 146/200] internal/unify: fix round-tripping strings with regexp metacharacters Change-Id: I92956b13c7532b9a96386947ee19aa61142337c8 Reviewed-on: https://go-review.googlesource.com/c/arch/+/689478 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/unify/domain.go | 4 ++-- internal/unify/yaml.go | 9 +++++++- internal/unify/yaml_test.go | 44 +++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/internal/unify/domain.go b/internal/unify/domain.go index c59bd621..00c80902 100644 --- a/internal/unify/domain.go +++ b/internal/unify/domain.go @@ -270,8 +270,8 @@ func NewStringRegex(exprs ...string) (String, error) { continue } - if _, complete := re.LiteralPrefix(); complete { - v = String{kind: stringExact, exact: expr} + if exact, complete := re.LiteralPrefix(); complete { + v = String{kind: stringExact, exact: exact} } else { v.kind = stringRegex v.re = append(v.re, re) diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go index 4731140b..6782b313 100644 --- a/internal/unify/yaml.go +++ b/internal/unify/yaml.go @@ -430,7 +430,14 @@ func (enc *yamlEncoder) value(v *Value) *yaml.Node { n.Tag = "tag:yaml.org,2002:int" return &n } - n.SetString(regexp.QuoteMeta(d.exact)) + // If this doesn't require escaping, leave it as a str node to avoid + // the annoying YAML tags. Otherwise, mark it as an exact string. + // Alternatively, we could always emit a str node with regexp + // quoting. + n.SetString(d.exact) + if d.exact != regexp.QuoteMeta(d.exact) { + n.Tag = "!string" + } return &n case stringRegex: o := make([]string, 0, 1) diff --git a/internal/unify/yaml_test.go b/internal/unify/yaml_test.go index af73001d..05a26beb 100644 --- a/internal/unify/yaml_test.go +++ b/internal/unify/yaml_test.go @@ -8,6 +8,9 @@ import ( "bytes" "fmt" "iter" + "log" + "strings" + "testing" "gopkg.in/yaml.v3" ) @@ -20,6 +23,19 @@ func mustParse(expr string) Closure { return c } +func oneValue(t *testing.T, c Closure) *Value { + t.Helper() + var v *Value + var i int + for v = range c.All() { + i++ + } + if i != 1 { + t.Fatalf("expected 1 value, got %d", i) + } + return v +} + func printYaml(val any) { b, err := yaml.Marshal(val) if err != nil { @@ -89,3 +105,31 @@ func allYamlNodes(n *yaml.Node) iter.Seq[*yaml.Node] { } } } + +func TestRoundTripString(t *testing.T) { + // Check that we can round-trip a string with regexp meta-characters in it. + const y = `!string test*` + t.Logf("input:\n%s", y) + + v1 := oneValue(t, mustParse(y)) + var buf1 strings.Builder + enc := yaml.NewEncoder(&buf1) + if err := enc.Encode(v1); err != nil { + log.Fatal(err) + } + enc.Close() + t.Logf("after parse 1:\n%s", buf1.String()) + + v2 := oneValue(t, mustParse(buf1.String())) + var buf2 strings.Builder + enc = yaml.NewEncoder(&buf2) + if err := enc.Encode(v2); err != nil { + log.Fatal(err) + } + enc.Close() + t.Logf("after parse 2:\n%s", buf2.String()) + + if buf1.String() != buf2.String() { + t.Fatal("parse 1 and parse 2 differ") + } +} From e4b518043dabc2fde86eadba7095fd1c1af7a538 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 22 Jul 2025 22:11:41 -0400 Subject: [PATCH 147/200] internal/unify: fix parsing of empty string nodes Currently, if a node is just "", we incorrectly parse that as a regexp instead of an exact string. This happens because we build it into the regexp "\A(?:)\z" and then test if it's "exact" by asking if it has a "literal prefix". But for the empty string and exactly the empty string, it's literal but has no prefix, so this check fails and we treat this as a regexp instead of an exact string. Fix this by special-casing "". I believe this bug is harmless for unification, since either way it describes the same set of strings, but it affects any "exactness" test and affects YAML round-tripping. Change-Id: I3223ef9a27c3cb6bfd2f5a0be9a0b7b71059840f Reviewed-on: https://go-review.googlesource.com/c/arch/+/689479 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI Auto-Submit: Austin Clements --- internal/unify/domain.go | 8 ++++++++ internal/unify/yaml_test.go | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/internal/unify/domain.go b/internal/unify/domain.go index 00c80902..7f573826 100644 --- a/internal/unify/domain.go +++ b/internal/unify/domain.go @@ -259,6 +259,14 @@ func NewStringRegex(exprs ...string) (String, error) { } v := String{kind: -1} for _, expr := range exprs { + if expr == "" { + // Skip constructing the regexp. It won't have a "literal prefix" + // and so we wind up thinking this is a regexp instead of an exact + // (empty) string. + v = String{kind: stringExact, exact: ""} + continue + } + re, err := regexp.Compile(`\A(?:` + expr + `)\z`) if err != nil { return String{}, fmt.Errorf("parsing value: %s", err) diff --git a/internal/unify/yaml_test.go b/internal/unify/yaml_test.go index 05a26beb..e04d05d4 100644 --- a/internal/unify/yaml_test.go +++ b/internal/unify/yaml_test.go @@ -133,3 +133,15 @@ func TestRoundTripString(t *testing.T) { t.Fatal("parse 1 and parse 2 differ") } } + +func TestEmptyString(t *testing.T) { + // Regression test. Make sure an empty string is parsed as an exact string, + // not a regexp. + const y = `""` + t.Logf("input:\n%s", y) + + v1 := oneValue(t, mustParse(y)) + if !v1.Exact() { + t.Fatal("expected exact string") + } +} From 414be249652c489008b5d41abc8003127c8652d9 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 22 Jul 2025 20:07:05 -0400 Subject: [PATCH 148/200] internal/simdgen: fix YAML round-tripping There were some errors in the YAML format output that prevented it from being read back in by the unifier. Fix these. This lets you, for example, capture the full XED unification with: go run . -xedPath $XED go.yaml types.yaml categories.yaml > /tmp/unified.yaml Then work directly with this pre-unified result: go run . -o godefs /tmp/unified.yaml The results are identical to running godefs on the original inputs. Change-Id: I9a8130aaa494819b4ce7e0802d93efec38431451 Reviewed-on: https://go-review.googlesource.com/c/arch/+/689480 LUCI-TryBot-Result: Go LUCI Auto-Submit: Austin Clements Reviewed-by: David Chase --- internal/simdgen/main.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index db77d8c3..b48c600e 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -186,6 +186,7 @@ func main() { switch *flagO { case "yaml": // Produce a result that looks like encoding a slice, but stream it. + fmt.Println("!sum") var val1 [1]*unify.Value for val := range unified.All() { val1[0] = val @@ -203,11 +204,11 @@ func main() { } } - if !*Verbose { + if !*Verbose && *xedPath != "" { if operandRemarks == 0 { - fmt.Printf("XED decoding generated no errors, which is unusual.\n") + fmt.Fprintf(os.Stderr, "XED decoding generated no errors, which is unusual.\n") } else { - fmt.Printf("XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks) + fmt.Fprintf(os.Stderr, "XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks) } } From 0e925773f47b6a9cff83429f9ffcd5a05b90e78a Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 23 Jul 2025 15:58:34 -0400 Subject: [PATCH 149/200] internal/simdgen: fix or clarify some ISA misconceptions This doesn't affect the generated output (in some cases we were doing the right thing for the wrong reasons). Change-Id: If2c3fc760eb3363487bc3be858229dfa6988d31b Reviewed-on: https://go-review.googlesource.com/c/arch/+/690015 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/godefs.go | 2 +- internal/simdgen/main.go | 2 +- internal/simdgen/xed.go | 27 +++++++++++++++++++++++---- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 9a40f83b..6179d98d 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -27,7 +27,7 @@ type Operation struct { Extension string // Extension ISASet string // ISASet CPUFeature *string // If ISASet is empty, then Extension, otherwise ISASet - Zeroing *string // Zeroing is a flag for asm prefix "Z", if non-nil it will always be "false" + Zeroing *string // nil => use asm suffix ".Z"; "false" => do not use asm suffix ".Z" Documentation *string // Documentation will be appended to the stubs comments. // ConstMask is a hack to reduce the size of defs the user writes for const-immediate // If present, it will be copied to [In[0].Const]. diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index b48c600e..2b0e65f7 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -40,7 +40,7 @@ // simdgen can also generate Go definitions of SIMD mappings: // To generate go files to the go root, run: // -// go run . -xedPath $XEDPATH -godefroot $/PATH/TO/go go.yaml categories.yaml types.yaml +// go run . -xedPath $XEDPATH -o godefs -goroot $PATH/TO/go go.yaml categories.yaml types.yaml // // types.yaml is already written, it specifies the shapes of vectors. // categories.yaml and go.yaml contains definitions that unifies with types.yaml and XED diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index 5f348cbc..4a1b93b2 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -68,6 +68,21 @@ func loadXED(xedPath string) []*unify.Value { unify.NewValue(unify.NewStringExact(inst.ISASet)), } if strings.Contains(inst.Pattern, "ZEROING=0") { + // This is an EVEX instruction, but the ".Z" (zero-merging) + // instruction flag is NOT valid. EVEX.z must be zero. + // + // This can mean a few things: + // + // - The output of an instruction is a mask, so merging modes don't + // make any sense. E.g., VCMPPS. + // + // - There are no masks involved anywhere. (Maybe MASK=0 is also set + // in this case?) E.g., VINSERTPS. + // + // - The operation inherently performs merging. E.g., VCOMPRESSPS + // with a mem operand. + // + // There may be other reasons. fields = append(fields, "zeroing") values = append(values, unify.NewValue(unify.NewStringExact("false"))) } @@ -220,14 +235,18 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) { fmt.Printf(" %+v\n", op) } + if strings.HasPrefix(op.Name, "EMX_BROADCAST") { + // This refers to a set of macros defined in all-state.txt that set a + // BCAST operand to various fixed values. But the BCAST operand is + // itself suppressed and "internal", so I think we can just ignore this + // operand. + return nil, nil + } + // TODO: See xed_decoded_inst_operand_action. This might need to be more // complicated. action, ok := actionEncoding[op.Action] if !ok { - if strings.HasPrefix(op.Name, "EMX_BROADCAST") { - // BROADCAST looks like to contain an obsolete operand. - return nil, nil - } return nil, fmt.Errorf("unknown action %q", op.Action) } common := operandCommon{action: action} From 60f586f2dc983eab70ee8ce24c0fd82e2e40bcde Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 23 Jul 2025 16:52:29 -0400 Subject: [PATCH 150/200] internal/simdgen: replace hand-written formatters with reflect This is actually less code, and will transparently keep up with any changes to these types. Change-Id: I3a869898be51f06a4649b4d5bc3f877381afbcd2 Reviewed-on: https://go-review.googlesource.com/c/arch/+/690016 Auto-Submit: Austin Clements Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_utility.go | 99 +-------------------------------- internal/simdgen/pprint.go | 73 ++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 97 deletions(-) create mode 100644 internal/simdgen/pprint.go diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 0be1df38..e3bf45a9 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -790,104 +790,9 @@ func reportXEDInconsistency(ops []Operation) error { } func (o Operation) String() string { - var sb strings.Builder - var nils string - - optStr := func(field string, ps *string) { - if ps != nil { - fmt.Fprintf(&sb, " %s: %s\n", field, *ps) - } else { - nils += " " + field - } - } - - // two spaces then field: value - str := func(field string, value string) { - fmt.Fprintf(&sb, " %s: %s\n", field, value) - } - - sb.WriteString("Operation {\n") - str("Go", o.Go) - str("GoArch", o.GoArch) - str("Asm", o.Asm) - str("Commutative", o.Commutative) - str("Extension", o.Extension) - str("ISASet", o.ISASet) - optStr("CPUFeature", o.CPUFeature) - optStr("ConstImm", o.ConstImm) - optStr("Masked", o.Masked) - optStr("Zeroing", o.Zeroing) - optStr("OperandOrder", o.OperandOrder) - - sb.WriteString(" In: [\n") - for _, op := range o.In { - fmt.Fprintf(&sb, " %s,\n", op.String()) - } - sb.WriteString(" ]\n") - - sb.WriteString(" Out: [\n") - for _, op := range o.Out { - fmt.Fprintf(&sb, " %s,\n", op.String()) - } - sb.WriteString(" ]\n") - - optStr("Documentation", o.Documentation) - if len(nils) != 0 { - sb.WriteString(" nils = " + nils[1:] + "\n") - } - - sb.WriteString("}\n") - return sb.String() + return pprints(o) } -// String returns a string representation of the Operand. func (op Operand) String() string { - var sb strings.Builder - var nils string - - optStr := func(field string, ps *string) { - if ps != nil { - fmt.Fprintf(&sb, " %s: %s\n", field, *ps) - } else { - nils += " " + field - } - } - - optNum := func(field string, pi *int) { - if pi != nil { - fmt.Fprintf(&sb, " %s: %d\n", field, *pi) - } else { - nils += " " + field - } - } - - // four spaces then field: value - str := func(field string, value string) { - fmt.Fprintf(&sb, " %s: %s\n", field, value) - } - num := func(field string, value int) { - fmt.Fprintf(&sb, " %s: %d\n", field, value) - } - sb.WriteString("Operand {\n") - str("Class", op.Class) - optStr("Go", op.Go) - num("AsmPos", op.AsmPos) - optStr("Base", op.Base) - optNum("ElemBits", op.ElemBits) - optNum("Bits", op.Bits) - optStr("Const", op.Const) - optStr("ImmOffset", op.ImmOffset) - optNum("Lanes", op.Lanes) - optStr("Name", op.Name) - optNum("TreatLikeAScalarOfSize", op.TreatLikeAScalarOfSize) - optStr("OverwriteClass", op.OverwriteClass) - optStr("OverwriteBase", op.OverwriteBase) - optNum("OverwriteElementBits", op.OverwriteElementBits) - - if len(nils) != 0 { - sb.WriteString(" nils = " + nils[1:] + "\n") - } - - sb.WriteString(" }\n") - return sb.String() + return pprints(op) } diff --git a/internal/simdgen/pprint.go b/internal/simdgen/pprint.go new file mode 100644 index 00000000..054b5176 --- /dev/null +++ b/internal/simdgen/pprint.go @@ -0,0 +1,73 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "reflect" + "strconv" +) + +func pprints(v any) string { + var pp pprinter + pp.val(reflect.ValueOf(v), 0) + return string(pp.buf) +} + +type pprinter struct { + buf []byte +} + +func (p *pprinter) indent(by int) { + for range by { + p.buf = append(p.buf, '\t') + } +} + +func (p *pprinter) val(v reflect.Value, indent int) { + switch v.Kind() { + default: + p.buf = fmt.Appendf(p.buf, "unsupported kind %v", v.Kind()) + + case reflect.Bool: + p.buf = strconv.AppendBool(p.buf, v.Bool()) + + case reflect.Int, reflect.Int16, reflect.Int32, reflect.Int64: + p.buf = strconv.AppendInt(p.buf, v.Int(), 10) + + case reflect.String: + p.buf = strconv.AppendQuote(p.buf, v.String()) + + case reflect.Pointer: + if v.IsNil() { + p.buf = append(p.buf, "nil"...) + } else { + p.buf = append(p.buf, "&"...) + p.val(v.Elem(), indent) + } + + case reflect.Slice, reflect.Array: + p.buf = append(p.buf, "[\n"...) + for i := range v.Len() { + p.indent(indent + 1) + p.val(v.Index(i), indent+1) + p.buf = append(p.buf, ",\n"...) + } + p.indent(indent) + p.buf = append(p.buf, ']') + + case reflect.Struct: + vt := v.Type() + p.buf = append(append(p.buf, vt.String()...), "{\n"...) + for f := range v.NumField() { + p.indent(indent + 1) + p.buf = append(append(p.buf, vt.Field(f).Name...), ": "...) + p.val(v.Field(f), indent+1) + p.buf = append(p.buf, ",\n"...) + } + p.indent(indent) + p.buf = append(p.buf, '}') + } +} From a2126014d85ecda5c3a0f2ae64c0495ecf06156b Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 23 Jul 2025 17:00:21 -0400 Subject: [PATCH 151/200] internal/simdgen: use bools for bools There are several bool flags that are strings right now. Make them bools. The changes to internal/simdgen/ops were automated with: sed -i "" -e 's/"true"/true/g;s/"false"/false/g' $(find -name '*.yaml' ops) go generate Change-Id: I6b4c4baf98736c4c05705953543263628a2fd829 Reviewed-on: https://go-review.googlesource.com/c/arch/+/690017 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase Auto-Submit: Austin Clements --- internal/simdgen/categories.yaml | 412 +++++++++--------- internal/simdgen/gen_simdGenericOps.go | 2 +- internal/simdgen/gen_simdMachineOps.go | 8 +- internal/simdgen/gen_utility.go | 4 +- internal/simdgen/godefs.go | 8 +- internal/simdgen/ops/AddSub/categories.yaml | 32 +- .../simdgen/ops/BitwiseLogic/categories.yaml | 24 +- internal/simdgen/ops/Compares/categories.yaml | 42 +- .../simdgen/ops/FPonlyArith/categories.yaml | 70 +-- .../simdgen/ops/GaloisField/categories.yaml | 12 +- .../simdgen/ops/IntOnlyArith/categories.yaml | 18 +- internal/simdgen/ops/MLOps/categories.yaml | 50 +-- internal/simdgen/ops/MinMax/categories.yaml | 12 +- internal/simdgen/ops/Moves/categories.yaml | 20 +- internal/simdgen/ops/Mul/categories.yaml | 24 +- .../simdgen/ops/ShiftRotate/categories.yaml | 108 ++--- internal/unify/domain.go | 11 +- internal/unify/yaml.go | 15 +- 18 files changed, 442 insertions(+), 430 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 5b0e5597..f839f69b 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -1,113 +1,113 @@ !sum - go: Add - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Add adds corresponding elements of two vectors. - go: SaturatedAdd - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // SaturatedAdd adds corresponding elements of two vectors with saturation. - go: AddMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // AddMasked adds corresponding elements of two vectors. - go: SaturatedAddMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // SaturatedAddMasked adds corresponding elements of two vectors with saturation. - go: Sub - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Sub subtracts corresponding elements of two vectors. - go: SaturatedSub - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedSub subtracts corresponding elements of two vectors with saturation. - go: SubMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // SubMasked subtracts corresponding elements of two vectors. - go: SaturatedSubMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. - go: PairwiseAdd - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // PairwiseAdd horizontally adds adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: PairwiseSub - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // PairwiseSub horizontally subtracts adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. - go: SaturatedPairwiseAdd - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: SaturatedPairwiseSub - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. - go: And - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // And performs a bitwise AND operation between two vectors. - go: AndMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // AndMasked performs a bitwise AND operation between two vectors. - go: Or - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Or performs a bitwise OR operation between two vectors. - go: OrMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // OrMasked performs a bitwise OR operation between two vectors. - go: AndNot - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // AndNot performs a bitwise x &^ y. - go: AndNotMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // AndNotMasked performs a bitwise x &^ y. - go: Xor - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Xor performs a bitwise XOR operation between two vectors. - go: XorMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // XorMasked performs a bitwise XOR operation between two vectors. @@ -122,239 +122,239 @@ # 6: Greater - go: Equal constImm: 0 - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Equal compares for equality. - go: Less constImm: 1 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Less compares for less than. - go: LessEqual constImm: 2 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // LessEqual compares for less than or equal. - go: IsNan # For float only. constImm: 3 - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // IsNan checks if elements are NaN. Use as x.IsNan(x). - go: NotEqual constImm: 4 - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // NotEqual compares for inequality. - go: GreaterEqual constImm: 13 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // GreaterEqual compares for greater than or equal. - go: Greater constImm: 14 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Greater compares for greater than. - go: EqualMasked constImm: 0 - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // EqualMasked compares for equality. - go: LessMasked constImm: 1 - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // LessMasked compares for less than. - go: LessEqualMasked constImm: 2 - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // LessEqualMasked compares for less than or equal. - go: IsNanMasked # For float only. constImm: 3 - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). - go: NotEqualMasked constImm: 4 - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // NotEqualMasked compares for inequality. - go: GreaterEqualMasked constImm: 13 - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // GreaterEqualMasked compares for greater than or equal. - go: GreaterMasked constImm: 14 - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // GreaterMasked compares for greater than. - go: Div - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Div divides elements of two vectors. - go: DivMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // DivMasked divides elements of two vectors. - go: Sqrt - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Sqrt computes the square root of each element. - go: SqrtMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // SqrtMasked computes the square root of each element. - go: ApproximateReciprocal - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // ApproximateReciprocal computes an approximate reciprocal of each element. - go: ApproximateReciprocalMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // ApproximateReciprocalMasked computes an approximate reciprocal of each element. - go: ApproximateReciprocalOfSqrt - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. - go: ApproximateReciprocalOfSqrtMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. - go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated. - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // MulByPowOf2Masked multiplies elements by a power of 2. - go: Round - commutative: "false" + commutative: false extension: "AVX.*" constImm: 0 documentation: !string |- // Round rounds elements to the nearest integer. - go: RoundWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 0 - masked: "true" + masked: true documentation: !string |- // RoundWithPrecisionMasked rounds elements with specified precision. - go: DiffWithRoundWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 0 - masked: "true" + masked: true documentation: !string |- // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. - go: Floor - commutative: "false" + commutative: false extension: "AVX.*" constImm: 1 documentation: !string |- // Floor rounds elements down to the nearest integer. - go: FloorWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 1 - masked: "true" + masked: true documentation: !string |- // FloorWithPrecisionMasked rounds elements down with specified precision. - go: DiffWithFloorWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 1 - masked: "true" + masked: true documentation: !string |- // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. - go: Ceil - commutative: "false" + commutative: false extension: "AVX.*" constImm: 2 documentation: !string |- // Ceil rounds elements up to the nearest integer. - go: CeilWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 2 - masked: "true" + masked: true documentation: !string |- // CeilWithPrecisionMasked rounds elements up with specified precision. - go: DiffWithCeilWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 2 - masked: "true" + masked: true documentation: !string |- // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. - go: Trunc - commutative: "false" + commutative: false extension: "AVX.*" constImm: 3 documentation: !string |- // Trunc truncates elements towards zero. - go: TruncWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 3 - masked: "true" + masked: true documentation: !string |- // TruncWithPrecisionMasked truncates elements with specified precision. - go: DiffWithTruncWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 3 - masked: "true" + masked: true documentation: !string |- // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. - go: AddSub - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // AddSub subtracts even elements and adds odd elements of two vectors. - go: GaloisFieldAffineTransformMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8): @@ -362,8 +362,8 @@ // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. - go: GaloisFieldAffineTransformInverseMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8), @@ -372,40 +372,40 @@ // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. - go: GaloisFieldMulMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with // reduction polynomial x^8 + x^4 + x^3 + x + 1. - go: Average - commutative: "true" + commutative: true extension: "AVX.*" # VPAVGB/W are available across various AVX versions documentation: !string |- // Average computes the rounded average of corresponding elements. - go: AverageMasked - commutative: "true" - masked: "true" + commutative: true + masked: true extension: "AVX512.*" # Masked operations are typically AVX512 documentation: !string |- // AverageMasked computes the rounded average of corresponding elements. - go: Absolute - commutative: "false" + commutative: false # Unary operation, not commutative extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 documentation: !string |- // Absolute computes the absolute value of each element. - go: AbsoluteMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX512.*" documentation: !string |- // AbsoluteMasked computes the absolute value of each element. - go: Sign # Applies sign of second operand to first: sign(val, sign_src) - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Sign returns the product of the first operand with -1, 0, or 1, @@ -413,34 +413,34 @@ # Sign does not have masked version - go: PopCountMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) documentation: !string |- // PopCountMasked counts the number of set bits in each element. - go: PairDotProd - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // PairDotProd multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. - go: PairDotProdMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // PairDotProdMasked multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. - go: SaturatedUnsignedSignedPairDotProd - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. - go: SaturatedUnsignedSignedPairDotProdMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation, @@ -448,118 +448,118 @@ # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. - go: DotProdBroadcast - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // DotProdBroadcast multiplies all elements and broadcasts the sum. - go: UnsignedSignedQuadDotProdAccumulate - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z. - go: UnsignedSignedQuadDotProdAccumulateMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulate - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: PairDotProdAccumulate - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. - go: PairDotProdAccumulateMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulate - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulateMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. - go: FusedMultiplyAddMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // FusedMultiplyAddMasked performs (x * y) + z. - go: FusedMultiplyAddSubMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. - go: FusedMultiplySubAddMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. - go: Max - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Max computes the maximum of corresponding elements. - go: MaxMasked - commutative: "true" - masked: "true" + commutative: true + masked: true extension: "AVX.*" documentation: !string |- // MaxMasked computes the maximum of corresponding elements. - go: Min - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Min computes the minimum of corresponding elements. - go: MinMasked - commutative: "true" - masked: "true" + commutative: true + masked: true extension: "AVX.*" documentation: !string |- // MinMasked computes the minimum of corresponding elements. - go: SetElem - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SetElem sets a single constant-indexed element's value. - go: GetElem - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // GetElem retrieves a single constant-indexed element's value. - go: Set128 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. - go: Get128 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. - go: Permute - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Permute performs a full permutation of vector x using indices: @@ -567,8 +567,8 @@ // Only the needed bits to represent x's index are used in indices' elements. - go: PermuteMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // PermuteMasked performs a full permutation of vector y using indices: @@ -576,8 +576,8 @@ // Only the needed bits to represent x's index are used in indices' elements. - go: Permute2Masked # Permute2Masked is only available on or after AVX512 - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // Permute2Masked performs a full permutation of vector x, y using indices: @@ -586,203 +586,203 @@ // Only the needed bits to represent xy's index are used in indices' elements. - go: Compress - commutative: "false" + commutative: false # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked" extension: "AVX.*" documentation: !string |- // Compress performs a compression on vector x using mask by // selecting elements as indicated by mask, and pack them to lower indexed elements. - go: Mul - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Mul multiplies corresponding elements of two vectors. - go: MulEvenWiden - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // MulEvenWiden multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHigh - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // MulHigh multiplies elements and stores the high part of the result. - go: MulLow - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // MulLow multiplies elements and stores the low part of the result. - go: MulMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // MulMasked multiplies corresponding elements of two vectors. - go: MulEvenWidenMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // MulEvenWidenMasked multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHighMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // MulHighMasked multiplies elements and stores the high part of the result. - go: MulLowMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // MulLowMasked multiplies elements and stores the low part of the result. - go: ShiftAllLeft - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllLeftMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight signed: false - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRightMasked signed: false - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRight signed: true - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftAllRightMasked signed: true - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftLeft - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftLeftMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight signed: false - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRightMasked signed: false - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRight signed: true - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: ShiftRightMasked signed: true - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: RotateAllLeftMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. - go: RotateLeftMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. - go: RotateAllRightMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. - go: RotateRightMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. - go: ShiftAllLeftAndFillUpperFromMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. - go: ShiftAllRightAndFillUpperFromMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. - go: ShiftLeftAndFillUpperFromMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. - go: ShiftRightAndFillUpperFromMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index f6c7a4a6..4eb47b44 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -35,7 +35,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { sortKey string OpName string OpInLen int - Comm string + Comm bool } type opData struct { Ops []genericOpsData diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index cca7d945..39bf2ec1 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -38,9 +38,9 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { Asm string OpInLen int RegInfo string - Comm string + Comm bool Type string - ResultInArg0 string + ResultInArg0 bool } type machineOpsData struct { OpsData []opData @@ -103,9 +103,9 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { } else { panic(fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut)) } - resultInArg0 := "false" + resultInArg0 := false if shapeOut == OneVregOutAtIn { - resultInArg0 = "true" + resultInArg0 = true } if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index e3bf45a9..ea4d56ac 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -555,7 +555,7 @@ func splitMask(ops []Operation) ([]Operation, error) { splited := []Operation{} for _, op := range ops { splited = append(splited, op) - if op.Masked == nil || *op.Masked != "true" { + if op.Masked == nil || !*op.Masked { continue } shapeIn, _, _, _, _ := op.shape() @@ -582,7 +582,7 @@ func splitMask(ops []Operation) ([]Operation, error) { func insertMaskDescToDoc(ops []Operation) { for i, _ := range ops { - if ops[i].Masked != nil && *ops[i].Masked == "true" { + if ops[i].Masked != nil && *ops[i].Masked { if ops[i].Documentation != nil { *ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask." } diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 6179d98d..32dc29cf 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -23,20 +23,20 @@ type Operation struct { In []Operand // Arguments Out []Operand // Results - Commutative string // Commutativity + Commutative bool // Commutativity Extension string // Extension ISASet string // ISASet CPUFeature *string // If ISASet is empty, then Extension, otherwise ISASet - Zeroing *string // nil => use asm suffix ".Z"; "false" => do not use asm suffix ".Z" + Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z" Documentation *string // Documentation will be appended to the stubs comments. // ConstMask is a hack to reduce the size of defs the user writes for const-immediate // If present, it will be copied to [In[0].Const]. ConstImm *string // Masked indicates that this is a masked operation, this field has to be set for masked operations // otherwise simdgen won't recognize it in [splitMask]. - Masked *string + Masked *bool // NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits. - NameAndSizeCheck *string + NameAndSizeCheck *bool } func (o *Operation) VectorWidth() int { diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml index 95775bb8..9bae42e9 100644 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -1,68 +1,68 @@ !sum - go: Add - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Add adds corresponding elements of two vectors. - go: SaturatedAdd - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // SaturatedAdd adds corresponding elements of two vectors with saturation. - go: AddMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // AddMasked adds corresponding elements of two vectors. - go: SaturatedAddMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // SaturatedAddMasked adds corresponding elements of two vectors with saturation. - go: Sub - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Sub subtracts corresponding elements of two vectors. - go: SaturatedSub - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedSub subtracts corresponding elements of two vectors with saturation. - go: SubMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // SubMasked subtracts corresponding elements of two vectors. - go: SaturatedSubMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. - go: PairwiseAdd - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // PairwiseAdd horizontally adds adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: PairwiseSub - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // PairwiseSub horizontally subtracts adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. - go: SaturatedPairwiseAdd - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: SaturatedPairwiseSub - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation. diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml index d6ea3ed1..c6a00cc2 100644 --- a/internal/simdgen/ops/BitwiseLogic/categories.yaml +++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml @@ -1,45 +1,45 @@ !sum - go: And - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // And performs a bitwise AND operation between two vectors. - go: AndMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // AndMasked performs a bitwise AND operation between two vectors. - go: Or - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Or performs a bitwise OR operation between two vectors. - go: OrMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // OrMasked performs a bitwise OR operation between two vectors. - go: AndNot - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // AndNot performs a bitwise x &^ y. - go: AndNotMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // AndNotMasked performs a bitwise x &^ y. - go: Xor - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Xor performs a bitwise XOR operation between two vectors. - go: XorMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // XorMasked performs a bitwise XOR operation between two vectors. diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index f7383555..d1080513 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -8,93 +8,93 @@ # 6: Greater - go: Equal constImm: 0 - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Equal compares for equality. - go: Less constImm: 1 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Less compares for less than. - go: LessEqual constImm: 2 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // LessEqual compares for less than or equal. - go: IsNan # For float only. constImm: 3 - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // IsNan checks if elements are NaN. Use as x.IsNan(x). - go: NotEqual constImm: 4 - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // NotEqual compares for inequality. - go: GreaterEqual constImm: 13 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // GreaterEqual compares for greater than or equal. - go: Greater constImm: 14 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Greater compares for greater than. - go: EqualMasked constImm: 0 - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // EqualMasked compares for equality. - go: LessMasked constImm: 1 - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // LessMasked compares for less than. - go: LessEqualMasked constImm: 2 - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // LessEqualMasked compares for less than or equal. - go: IsNanMasked # For float only. constImm: 3 - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). - go: NotEqualMasked constImm: 4 - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // NotEqualMasked compares for inequality. - go: GreaterEqualMasked constImm: 13 - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // GreaterEqualMasked compares for greater than or equal. - go: GreaterMasked constImm: 14 - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // GreaterMasked compares for greater than. diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index e0d5836d..1347b533 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -1,141 +1,141 @@ !sum - go: Div - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Div divides elements of two vectors. - go: DivMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // DivMasked divides elements of two vectors. - go: Sqrt - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Sqrt computes the square root of each element. - go: SqrtMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // SqrtMasked computes the square root of each element. - go: ApproximateReciprocal - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // ApproximateReciprocal computes an approximate reciprocal of each element. - go: ApproximateReciprocalMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // ApproximateReciprocalMasked computes an approximate reciprocal of each element. - go: ApproximateReciprocalOfSqrt - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. - go: ApproximateReciprocalOfSqrtMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. - go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated. - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // MulByPowOf2Masked multiplies elements by a power of 2. - go: Round - commutative: "false" + commutative: false extension: "AVX.*" constImm: 0 documentation: !string |- // Round rounds elements to the nearest integer. - go: RoundWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 0 - masked: "true" + masked: true documentation: !string |- // RoundWithPrecisionMasked rounds elements with specified precision. - go: DiffWithRoundWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 0 - masked: "true" + masked: true documentation: !string |- // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. - go: Floor - commutative: "false" + commutative: false extension: "AVX.*" constImm: 1 documentation: !string |- // Floor rounds elements down to the nearest integer. - go: FloorWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 1 - masked: "true" + masked: true documentation: !string |- // FloorWithPrecisionMasked rounds elements down with specified precision. - go: DiffWithFloorWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 1 - masked: "true" + masked: true documentation: !string |- // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. - go: Ceil - commutative: "false" + commutative: false extension: "AVX.*" constImm: 2 documentation: !string |- // Ceil rounds elements up to the nearest integer. - go: CeilWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 2 - masked: "true" + masked: true documentation: !string |- // CeilWithPrecisionMasked rounds elements up with specified precision. - go: DiffWithCeilWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 2 - masked: "true" + masked: true documentation: !string |- // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. - go: Trunc - commutative: "false" + commutative: false extension: "AVX.*" constImm: 3 documentation: !string |- // Trunc truncates elements towards zero. - go: TruncWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 3 - masked: "true" + masked: true documentation: !string |- // TruncWithPrecisionMasked truncates elements with specified precision. - go: DiffWithTruncWithPrecisionMasked - commutative: "false" + commutative: false extension: "AVX.*" constImm: 3 - masked: "true" + masked: true documentation: !string |- // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. - go: AddSub - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // AddSub subtracts even elements and adds odd elements of two vectors. \ No newline at end of file diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml index 3caa13cf..4184c5e4 100644 --- a/internal/simdgen/ops/GaloisField/categories.yaml +++ b/internal/simdgen/ops/GaloisField/categories.yaml @@ -1,7 +1,7 @@ !sum - go: GaloisFieldAffineTransformMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8): @@ -9,8 +9,8 @@ // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. - go: GaloisFieldAffineTransformInverseMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8), @@ -19,8 +19,8 @@ // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. - go: GaloisFieldMulMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml index 96015d28..fc277f81 100644 --- a/internal/simdgen/ops/IntOnlyArith/categories.yaml +++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml @@ -1,32 +1,32 @@ !sum - go: Average - commutative: "true" + commutative: true extension: "AVX.*" # VPAVGB/W are available across various AVX versions documentation: !string |- // Average computes the rounded average of corresponding elements. - go: AverageMasked - commutative: "true" - masked: "true" + commutative: true + masked: true extension: "AVX512.*" # Masked operations are typically AVX512 documentation: !string |- // AverageMasked computes the rounded average of corresponding elements. - go: Absolute - commutative: "false" + commutative: false # Unary operation, not commutative extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 documentation: !string |- // Absolute computes the absolute value of each element. - go: AbsoluteMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX512.*" documentation: !string |- // AbsoluteMasked computes the absolute value of each element. - go: Sign # Applies sign of second operand to first: sign(val, sign_src) - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Sign returns the product of the first operand with -1, 0, or 1, @@ -34,8 +34,8 @@ # Sign does not have masked version - go: PopCountMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) documentation: !string |- // PopCountMasked counts the number of set bits in each element. \ No newline at end of file diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 962ae50d..d26b846d 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -1,27 +1,27 @@ !sum - go: PairDotProd - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // PairDotProd multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. - go: PairDotProdMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // PairDotProdMasked multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. - go: SaturatedUnsignedSignedPairDotProd - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. - go: SaturatedUnsignedSignedPairDotProdMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation, @@ -29,69 +29,69 @@ # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. - go: DotProdBroadcast - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // DotProdBroadcast multiplies all elements and broadcasts the sum. - go: UnsignedSignedQuadDotProdAccumulate - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z. - go: UnsignedSignedQuadDotProdAccumulateMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulate - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: PairDotProdAccumulate - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. - go: PairDotProdAccumulateMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulate - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulateMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. - go: FusedMultiplyAddMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // FusedMultiplyAddMasked performs (x * y) + z. - go: FusedMultiplyAddSubMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. - go: FusedMultiplySubAddMasked - masked: "true" - commutative: "false" + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml index 33578ee4..929bfadd 100644 --- a/internal/simdgen/ops/MinMax/categories.yaml +++ b/internal/simdgen/ops/MinMax/categories.yaml @@ -1,23 +1,23 @@ !sum - go: Max - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Max computes the maximum of corresponding elements. - go: MaxMasked - commutative: "true" - masked: "true" + commutative: true + masked: true extension: "AVX.*" documentation: !string |- // MaxMasked computes the maximum of corresponding elements. - go: Min - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Min computes the minimum of corresponding elements. - go: MinMasked - commutative: "true" - masked: "true" + commutative: true + masked: true extension: "AVX.*" documentation: !string |- // MinMasked computes the minimum of corresponding elements. diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index 8dfe372a..d6c4d5da 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -1,28 +1,28 @@ !sum - go: SetElem - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // SetElem sets a single constant-indexed element's value. - go: GetElem - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // GetElem retrieves a single constant-indexed element's value. - go: Set128 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. - go: Get128 - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. - go: Permute - commutative: "false" + commutative: false extension: "AVX.*" documentation: !string |- // Permute performs a full permutation of vector x using indices: @@ -30,8 +30,8 @@ // Only the needed bits to represent x's index are used in indices' elements. - go: PermuteMasked - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // PermuteMasked performs a full permutation of vector y using indices: @@ -39,8 +39,8 @@ // Only the needed bits to represent x's index are used in indices' elements. - go: Permute2Masked # Permute2Masked is only available on or after AVX512 - commutative: "false" - masked: "true" + commutative: false + masked: true extension: "AVX.*" documentation: !string |- // Permute2Masked performs a full permutation of vector x, y using indices: @@ -49,7 +49,7 @@ // Only the needed bits to represent xy's index are used in indices' elements. - go: Compress - commutative: "false" + commutative: false # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked" extension: "AVX.*" documentation: !string |- diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml index b466eb01..1884d660 100644 --- a/internal/simdgen/ops/Mul/categories.yaml +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -1,47 +1,47 @@ !sum - go: Mul - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // Mul multiplies corresponding elements of two vectors. - go: MulEvenWiden - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // MulEvenWiden multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHigh - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // MulHigh multiplies elements and stores the high part of the result. - go: MulLow - commutative: "true" + commutative: true extension: "AVX.*" documentation: !string |- // MulLow multiplies elements and stores the low part of the result. - go: MulMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // MulMasked multiplies corresponding elements of two vectors. - go: MulEvenWidenMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // MulEvenWidenMasked multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHighMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // MulHighMasked multiplies elements and stores the high part of the result. - go: MulLowMasked - masked: "true" - commutative: "true" + masked: true + commutative: true extension: "AVX.*" documentation: !string |- // MulLowMasked multiplies elements and stores the low part of the result. diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml index 09c04dfa..b9e2a634 100644 --- a/internal/simdgen/ops/ShiftRotate/categories.yaml +++ b/internal/simdgen/ops/ShiftRotate/categories.yaml @@ -1,149 +1,149 @@ !sum - go: ShiftAllLeft - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllLeftMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight signed: false - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRightMasked signed: false - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRight signed: true - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftAllRightMasked signed: true - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftLeft - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftLeftMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight signed: false - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRightMasked signed: false - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRight signed: true - nameAndSizeCheck: "true" - commutative: "false" + nameAndSizeCheck: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: ShiftRightMasked signed: true - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: RotateAllLeftMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. - go: RotateLeftMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. - go: RotateAllRightMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. - go: RotateRightMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. - go: ShiftAllLeftAndFillUpperFromMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. - go: ShiftAllRightAndFillUpperFromMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. - go: ShiftLeftAndFillUpperFromMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. - go: ShiftRightAndFillUpperFromMasked - nameAndSizeCheck: "true" - masked: "true" - commutative: "false" + nameAndSizeCheck: true + masked: true + commutative: false extension: "AVX.*" documentation: !string |- // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the diff --git a/internal/unify/domain.go b/internal/unify/domain.go index 7f573826..7386ea2c 100644 --- a/internal/unify/domain.go +++ b/internal/unify/domain.go @@ -310,10 +310,19 @@ func (d String) decode(rv reflect.Value) error { if err == nil { i, err := strconv.Atoi(d.exact) if err != nil { - return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err) + return fmt.Errorf("cannot decode string into %s: %s", rv.Type(), err) } rv2.SetInt(int64(i)) return nil } + rv2, err = preDecode(rv, reflect.Bool, "Bool") + if err == nil { + b, err := strconv.ParseBool(d.exact) + if err != nil { + return fmt.Errorf("cannot decode string into %s: %s", rv.Type(), err) + } + rv2.SetBool(b) + return nil + } return err } diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go index 6782b313..08b060d1 100644 --- a/internal/unify/yaml.go +++ b/internal/unify/yaml.go @@ -424,18 +424,21 @@ func (enc *yamlEncoder) value(v *Value) *yaml.Node { case String: switch d.kind { case stringExact: + n.SetString(d.exact) + switch { // Make this into a "nice" !!int node if I can. - if yamlIntRe.MatchString(d.exact) { - n.SetString(d.exact) + case yamlIntRe.MatchString(d.exact): n.Tag = "tag:yaml.org,2002:int" - return &n - } + + // Or a "nice" !!bool node. + case d.exact == "false" || d.exact == "true": + n.Tag = "tag:yaml.org,2002:bool" + // If this doesn't require escaping, leave it as a str node to avoid // the annoying YAML tags. Otherwise, mark it as an exact string. // Alternatively, we could always emit a str node with regexp // quoting. - n.SetString(d.exact) - if d.exact != regexp.QuoteMeta(d.exact) { + case d.exact != regexp.QuoteMeta(d.exact): n.Tag = "!string" } return &n From f9e09651d0a171d6f5e87cae6ba63c50c82c6f2a Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 23 Jul 2025 17:43:19 -0400 Subject: [PATCH 152/200] internal/simdgen: refactor decodeOperands and loadXED These functions were getting too big and doing too many things at once. Split them up and slightly rejigger the relationship between them. decodeOperands now stops at parsing the XED to []operand, and all translation to unifier values is done in the new instToUVal functions and its helpers. No effect on generated godefs. Change-Id: Idf97ebd521b8963d5703caf695fcfffc422633cd Reviewed-on: https://go-review.googlesource.com/c/arch/+/691336 LUCI-TryBot-Result: Go LUCI Auto-Submit: Austin Clements Reviewed-by: Junyang Shao --- internal/simdgen/xed.go | 107 +++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 46 deletions(-) diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index 4a1b93b2..26d0adb2 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -49,7 +49,7 @@ func loadXED(xedPath string) []*unify.Value { fmt.Printf("%s:\n%+v\n", inst.Pos, inst) } - ins, outs, err := decodeOperands(db, strings.Fields(inst.Operands)) + ops, err := decodeOperands(db, strings.Fields(inst.Operands)) if err != nil { operandRemarks++ if *Verbose { @@ -57,39 +57,12 @@ func loadXED(xedPath string) []*unify.Value { } return } - // TODO: "feature" - fields := []string{"goarch", "asm", "in", "out", "extension", "isaset"} - values := []*unify.Value{ - unify.NewValue(unify.NewStringExact("amd64")), - unify.NewValue(unify.NewStringExact(inst.Opcode())), - unify.NewValue(ins), - unify.NewValue(outs), - unify.NewValue(unify.NewStringExact(inst.Extension)), - unify.NewValue(unify.NewStringExact(inst.ISASet)), - } - if strings.Contains(inst.Pattern, "ZEROING=0") { - // This is an EVEX instruction, but the ".Z" (zero-merging) - // instruction flag is NOT valid. EVEX.z must be zero. - // - // This can mean a few things: - // - // - The output of an instruction is a mask, so merging modes don't - // make any sense. E.g., VCMPPS. - // - // - There are no masks involved anywhere. (Maybe MASK=0 is also set - // in this case?) E.g., VINSERTPS. - // - // - The operation inherently performs merging. E.g., VCOMPRESSPS - // with a mem operand. - // - // There may be other reasons. - fields = append(fields, "zeroing") - values = append(values, unify.NewValue(unify.NewStringExact("false"))) - } - pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line} - defs = append(defs, unify.NewValuePos(unify.NewDef(fields, values), pos)) + + uval := instToUVal(inst, ops) + defs = append(defs, uval) + if *flagDebugXED { - y, _ := yaml.Marshal(defs[len(defs)-1]) + y, _ := yaml.Marshal(uval) fmt.Printf("==>\n%s\n", y) } }) @@ -305,17 +278,12 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) { return nil, fmt.Errorf("unknown operand LHS %q in %q", lhs, operand) } -func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tuple, err error) { - fail := func(err error) (unify.Tuple, unify.Tuple, error) { - return unify.Tuple{}, unify.Tuple{}, err - } - - // Decode all of the operands. - var ops []operand +func decodeOperands(db *xeddata.Database, operands []string) (ops []operand, err error) { + // Decode the XED operand descriptions. for _, o := range operands { op, err := decodeOperand(db, o) if err != nil { - return unify.Tuple{}, unify.Tuple{}, err + return nil, err } if op != nil { ops = append(ops, op) @@ -324,7 +292,14 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu // XED doesn't encode the size of mask operands. If there are mask operands, // try to infer their sizes from other operands. - // + if err := inferMaskSizes(ops); err != nil { + return nil, fmt.Errorf("%w in operands %+v", err, operands) + } + + return ops, nil +} + +func inferMaskSizes(ops []operand) error { // This is a heuristic and it falls apart in some cases: // // - Mask operations like KAND[BWDQ] have *nothing* in the XED to indicate @@ -394,7 +369,7 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu } return nil } - return fmt.Errorf("cannot infer mask size: no register operands: %+v", operands) + return fmt.Errorf("cannot infer mask size: no register operands") } shape, ok := singular(sizes) if !ok { @@ -414,12 +389,15 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu return nil } if err := inferMask(true, false); err != nil { - return fail(err) + return err } if err := inferMask(false, true); err != nil { - return fail(err) + return err } + return nil +} +func operandsToUVals(ops []operand) (in, out unify.Tuple) { var inVals, outVals []*unify.Value for asmPos, op := range ops { fields, values := op.toValue() @@ -444,7 +422,44 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu } } - return unify.NewTuple(inVals...), unify.NewTuple(outVals...), nil + return unify.NewTuple(inVals...), unify.NewTuple(outVals...) +} + +func instToUVal(inst *xeddata.Inst, ops []operand) *unify.Value { + // Map operands to unifier values. + ins, outs := operandsToUVals(ops) + + // TODO: "feature" + fields := []string{"goarch", "asm", "in", "out", "extension", "isaset"} + values := []*unify.Value{ + unify.NewValue(unify.NewStringExact("amd64")), + unify.NewValue(unify.NewStringExact(inst.Opcode())), + unify.NewValue(ins), + unify.NewValue(outs), + unify.NewValue(unify.NewStringExact(inst.Extension)), + unify.NewValue(unify.NewStringExact(inst.ISASet)), + } + if strings.Contains(inst.Pattern, "ZEROING=0") { + // This is an EVEX instruction, but the ".Z" (zero-merging) + // instruction flag is NOT valid. EVEX.z must be zero. + // + // This can mean a few things: + // + // - The output of an instruction is a mask, so merging modes don't + // make any sense. E.g., VCMPPS. + // + // - There are no masks involved anywhere. (Maybe MASK=0 is also set + // in this case?) E.g., VINSERTPS. + // + // - The operation inherently performs merging. E.g., VCOMPRESSPS + // with a mem operand. + // + // There may be other reasons. + fields = append(fields, "zeroing") + values = append(values, unify.NewValue(unify.NewStringExact("false"))) + } + pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line} + return unify.NewValuePos(unify.NewDef(fields, values), pos) } func singular[T comparable](xs []T) (T, bool) { From ed8e6937ca944f2bcf5dd134a6d1bb30c423e3f2 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 24 Jul 2025 14:20:09 -0400 Subject: [PATCH 153/200] internal/{unify,simdgen}: replace NewDef with DefBuilder NewDef requires two separate slices of field names and field values, which is really awkward to use, both because you have to pair up the i'th positions in two slices when reading code, and because it makes it really awkward to conditionally add fields. Fix this by replacing NewDef with a DefBuilder type that lets you add field/value pairs one by one to build a Def. No effect on generated godefs. Change-Id: I75dfb6ac798585e717965ab9d5d0f1bc6a157aef Reviewed-on: https://go-review.googlesource.com/c/arch/+/691337 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/xed.go | 94 ++++++++++++++++++---------------------- internal/unify/domain.go | 30 +++++++------ internal/unify/yaml.go | 8 ++-- 3 files changed, 63 insertions(+), 69 deletions(-) diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index 26d0adb2..dddf5395 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -119,7 +119,8 @@ type operandMask struct { operandCommon vecShape // Bits in the mask is w/bits. - allMasks bool + + allMasks bool // If set, size cannot be inferred because all operands are masks. } type operandImm struct { @@ -129,7 +130,7 @@ type operandImm struct { type operand interface { common() operandCommon - toValue() (fields []string, vals []*unify.Value) + addToDef(b *unify.DefBuilder) } func strVal(s any) *unify.Value { @@ -140,53 +141,52 @@ func (o operandCommon) common() operandCommon { return o } -func (o operandMem) toValue() (fields []string, vals []*unify.Value) { +func (o operandMem) addToDef(b *unify.DefBuilder) { // TODO: w, base - return []string{"class"}, []*unify.Value{strVal("memory")} + b.Add("class", strVal("memory")) } -func (o operandVReg) toValue() (fields []string, vals []*unify.Value) { +func (o operandVReg) addToDef(b *unify.DefBuilder) { baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex()) if err != nil { panic("parsing baseRe: " + err.Error()) } - fields, vals = []string{"class", "bits", "base"}, []*unify.Value{ - strVal("vreg"), - strVal(o.bits), - unify.NewValue(baseDomain)} + b.Add("class", strVal("vreg")) + b.Add("bits", strVal(o.bits)) + b.Add("base", unify.NewValue(baseDomain)) + // If elemBits == bits, then the vector can be ANY shape. This happens with, + // for example, logical ops. if o.elemBits != o.bits { - fields, vals = append(fields, "elemBits"), append(vals, strVal(o.elemBits)) + b.Add("elemBits", strVal(o.elemBits)) } - // otherwise it means the vector could be any shape. - return } -func (o operandGReg) toValue() (fields []string, vals []*unify.Value) { +func (o operandGReg) addToDef(b *unify.DefBuilder) { baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex()) if err != nil { panic("parsing baseRe: " + err.Error()) } - fields, vals = []string{"class", "bits", "base"}, []*unify.Value{ - strVal("greg"), - strVal(o.bits), - unify.NewValue(baseDomain)} + b.Add("class", strVal("greg")) + b.Add("bits", strVal(o.bits)) + b.Add("base", unify.NewValue(baseDomain)) if o.elemBits != o.bits { - fields, vals = append(fields, "elemBits"), append(vals, strVal(o.elemBits)) + b.Add("elemBits", strVal(o.elemBits)) } - // otherwise it means the vector could be any shape. - return -} - -func (o operandMask) toValue() (fields []string, vals []*unify.Value) { - return []string{"class", "elemBits", "bits"}, []*unify.Value{strVal("mask"), strVal(o.elemBits), strVal(o.bits)} } -func (o operandMask) zeroMaskValue() (fields []string, vals []*unify.Value) { - return []string{"class"}, []*unify.Value{strVal("mask")} +func (o operandMask) addToDef(b *unify.DefBuilder) { + b.Add("class", strVal("mask")) + if o.allMasks { + // If all operands are masks, omit sizes and let unification determine mask sizes. + return + } + b.Add("elemBits", strVal(o.elemBits)) + b.Add("bits", strVal(o.bits)) } -func (o operandImm) toValue() (fields []string, vals []*unify.Value) { - return []string{"class", "bits"}, []*unify.Value{strVal("immediate"), strVal(o.bits)} +func (o operandImm) addToDef(b *unify.DefBuilder) { + b.Add("class", strVal("immediate")) + b.Add("bits", strVal(o.bits)) } var actionEncoding = map[string]operandAction{ @@ -400,24 +400,18 @@ func inferMaskSizes(ops []operand) error { func operandsToUVals(ops []operand) (in, out unify.Tuple) { var inVals, outVals []*unify.Value for asmPos, op := range ops { - fields, values := op.toValue() - if opm, ok := op.(operandMask); ok { - if opm.allMasks { - // If all operands are masks, leave the mask inferrence to the users. - fields, values = opm.zeroMaskValue() - } - } + var db unify.DefBuilder + op.addToDef(&db) - fields = append(fields, "asmPos") - values = append(values, unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos)))) + db.Add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos)))) action := op.common().action if action.r { - inVal := unify.NewValue(unify.NewDef(fields, values)) + inVal := unify.NewValue(db.Build()) inVals = append(inVals, inVal) } if action.w { - outVal := unify.NewValue(unify.NewDef(fields, values)) + outVal := unify.NewValue(db.Build()) outVals = append(outVals, outVal) } } @@ -430,15 +424,14 @@ func instToUVal(inst *xeddata.Inst, ops []operand) *unify.Value { ins, outs := operandsToUVals(ops) // TODO: "feature" - fields := []string{"goarch", "asm", "in", "out", "extension", "isaset"} - values := []*unify.Value{ - unify.NewValue(unify.NewStringExact("amd64")), - unify.NewValue(unify.NewStringExact(inst.Opcode())), - unify.NewValue(ins), - unify.NewValue(outs), - unify.NewValue(unify.NewStringExact(inst.Extension)), - unify.NewValue(unify.NewStringExact(inst.ISASet)), - } + var db unify.DefBuilder + db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64"))) + db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode()))) + db.Add("in", unify.NewValue(ins)) + db.Add("out", unify.NewValue(outs)) + db.Add("extension", unify.NewValue(unify.NewStringExact(inst.Extension))) + db.Add("isaset", unify.NewValue(unify.NewStringExact(inst.ISASet))) + if strings.Contains(inst.Pattern, "ZEROING=0") { // This is an EVEX instruction, but the ".Z" (zero-merging) // instruction flag is NOT valid. EVEX.z must be zero. @@ -455,11 +448,10 @@ func instToUVal(inst *xeddata.Inst, ops []operand) *unify.Value { // with a mem operand. // // There may be other reasons. - fields = append(fields, "zeroing") - values = append(values, unify.NewValue(unify.NewStringExact("false"))) + db.Add("zeroing", unify.NewValue(unify.NewStringExact("false"))) } pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line} - return unify.NewValuePos(unify.NewDef(fields, values), pos) + return unify.NewValuePos(db.Build(), pos) } func singular[T comparable](xs []T) (T, bool) { diff --git a/internal/unify/domain.go b/internal/unify/domain.go index 7386ea2c..1cd5af14 100644 --- a/internal/unify/domain.go +++ b/internal/unify/domain.go @@ -94,21 +94,25 @@ type Def struct { fields map[string]*Value } -// NewDef creates a new [Def]. -// -// The fields and values slices must have the same length. -func NewDef(fields []string, values []*Value) Def { - if len(fields) != len(values) { - panic("fields and values must have the same length") +// A DefBuilder builds a [Def] one field at a time. The zero value is an empty +// [Def]. +type DefBuilder struct { + fields map[string]*Value +} + +func (b *DefBuilder) Add(name string, v *Value) { + if b.fields == nil { + b.fields = make(map[string]*Value) } - m := make(map[string]*Value, len(fields)) - for i := range fields { - if _, ok := m[fields[i]]; ok { - panic(fmt.Sprintf("duplicate field %q", fields[i])) - } - m[fields[i]] = values[i] + if _, ok := b.fields[name]; ok { + panic(fmt.Sprintf("duplicate field %q", name)) } - return Def{m} + b.fields[name] = v +} + +// Build constructs a [Def] from the fields added to this builder. +func (b *DefBuilder) Build() Def { + return Def{maps.Clone(b.fields)} } // Exact returns true if all field Values are exact. diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go index 08b060d1..1b1c8139 100644 --- a/internal/unify/yaml.go +++ b/internal/unify/yaml.go @@ -206,8 +206,7 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { return mk2(NewStringRegex(vals...)) case is(yaml.MappingNode, "tag:yaml.org,2002:map"): - var fields []string - var vals []*Value + var db DefBuilder for i := 0; i < len(node.Content); i += 2 { key := node.Content[i] if key.Kind != yaml.ScalarNode { @@ -217,10 +216,9 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { if err != nil { return nil, err } - fields = append(fields, key.Value) - vals = append(vals, val) + db.Add(key.Value, val) } - return mk(NewDef(fields, vals)) + return mk(db.Build()) case is(yaml.SequenceNode, "tag:yaml.org,2002:seq"): elts := node.Content From 213dc9a3a311abffdac206e7fde39fed46dc97ea Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 25 Jul 2025 17:01:19 -0400 Subject: [PATCH 154/200] internal/unify: support custom type decoding This adds a Decoder interface that types can implement to support custom decoding from a unify.Value. No effect on generated godefs. Change-Id: I7f22d7194670b2c25fba414a165c176931c935cc Reviewed-on: https://go-review.googlesource.com/c/arch/+/691338 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/unify/domain.go | 44 ++++++++++++++++-------------------- internal/unify/value.go | 49 ++++++++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 34 deletions(-) diff --git a/internal/unify/domain.go b/internal/unify/domain.go index 1cd5af14..36239054 100644 --- a/internal/unify/domain.go +++ b/internal/unify/domain.go @@ -126,10 +126,10 @@ func (d Def) Exact() bool { } func (d Def) decode(rv reflect.Value) error { - rv, err := preDecode(rv, reflect.Struct, "Def") - if err != nil { - return err + if rv.Kind() != reflect.Struct { + return fmt.Errorf("cannot decode Def into %s", rv.Type()) } + var lowered map[string]string // Lower case -> canonical for d.fields. rt := rv.Type() for fi := range rv.NumField() { @@ -161,7 +161,7 @@ func (d Def) decode(rv reflect.Value) error { } } } - if err := v.Domain.decode(rv.Field(fi)); err != nil { + if err := decodeReflect(v, rv.Field(fi)); err != nil { return newDecodeError(fType.Name, err) } } @@ -224,9 +224,8 @@ func (d Tuple) decode(rv reflect.Value) error { return &inexactError{"repeated tuple", rv.Type().String()} } // TODO: We could also do arrays. - rv, err := preDecode(rv, reflect.Slice, "Tuple") - if err != nil { - return err + if rv.Kind() != reflect.Slice { + return fmt.Errorf("cannot decode Tuple into %s", rv.Type()) } if rv.IsNil() || rv.Cap() < len(d.vs) { rv.Set(reflect.MakeSlice(rv.Type(), len(d.vs), len(d.vs))) @@ -234,7 +233,7 @@ func (d Tuple) decode(rv reflect.Value) error { rv.SetLen(len(d.vs)) } for i, v := range d.vs { - if err := v.Domain.decode(rv.Index(i)); err != nil { + if err := decodeReflect(v, rv.Index(i)); err != nil { return newDecodeError(fmt.Sprintf("%d", i), err) } } @@ -305,28 +304,23 @@ func (d String) decode(rv reflect.Value) error { if d.kind != stringExact { return &inexactError{"regex", rv.Type().String()} } - rv2, err := preDecode(rv, reflect.String, "String") - if err == nil { - rv2.SetString(d.exact) - return nil - } - rv2, err = preDecode(rv, reflect.Int, "String") - if err == nil { + switch rv.Kind() { + default: + return fmt.Errorf("cannot decode String into %s", rv.Type()) + case reflect.String: + rv.SetString(d.exact) + case reflect.Int: i, err := strconv.Atoi(d.exact) if err != nil { - return fmt.Errorf("cannot decode string into %s: %s", rv.Type(), err) + return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err) } - rv2.SetInt(int64(i)) - return nil - } - rv2, err = preDecode(rv, reflect.Bool, "Bool") - if err == nil { + rv.SetInt(int64(i)) + case reflect.Bool: b, err := strconv.ParseBool(d.exact) if err != nil { - return fmt.Errorf("cannot decode string into %s: %s", rv.Type(), err) + return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err) } - rv2.SetBool(b) - return nil + rv.SetBool(b) } - return err + return nil } diff --git a/internal/unify/value.go b/internal/unify/value.go index 6bf121af..87387bbf 100644 --- a/internal/unify/value.go +++ b/internal/unify/value.go @@ -83,27 +83,58 @@ func (v *Value) Exact() bool { // are decoded into strings or ints. Any field can itself be a pointer to one of // these types. Top can be decoded into a pointer-typed field and will set the // field to nil. Anything else will allocate a value if necessary. +// +// Any type may implement [Decoder], in which case its DecodeUnified method will +// be called instead of using the default decoding scheme. func (v *Value) Decode(into any) error { rv := reflect.ValueOf(into) if rv.Kind() != reflect.Pointer { return fmt.Errorf("cannot decode into non-pointer %T", into) } - return v.Domain.decode(rv) + return decodeReflect(v, rv.Elem()) } -func preDecode(rv reflect.Value, kind reflect.Kind, name string) (reflect.Value, error) { - if rv.Kind() == kind { - return rv, nil - } - if rv.Kind() == reflect.Pointer && rv.Type().Elem().Kind() == kind { +func decodeReflect(v *Value, rv reflect.Value) error { + var ptr reflect.Value + if rv.Kind() == reflect.Pointer { if rv.IsNil() { - rv.Set(reflect.New(rv.Type().Elem())) + // Transparently allocate through pointers, *except* for Top, which + // wants to set the pointer to nil. + // + // TODO: Drop this condition if I switch to an explicit Optional[T] + // or move the Top logic into Def. + if _, ok := v.Domain.(Top); !ok { + // Allocate the value to fill in, but don't actually store it in + // the pointer until we successfully decode. + ptr = rv + rv = reflect.New(rv.Type().Elem()).Elem() + } + } else { + rv = rv.Elem() } - return rv.Elem(), nil } - return reflect.Value{}, fmt.Errorf("cannot decode %s into %s", name, rv.Type()) + + var err error + if reflect.PointerTo(rv.Type()).Implements(decoderType) { + // Use the custom decoder. + err = rv.Addr().Interface().(Decoder).DecodeUnified(v) + } else { + err = v.Domain.decode(rv) + } + if err == nil && ptr.IsValid() { + ptr.Set(rv.Addr()) + } + return err +} + +// Decoder can be implemented by types as a custom implementation of [Decode] +// for that type. +type Decoder interface { + DecodeUnified(v *Value) error } +var decoderType = reflect.TypeOf((*Decoder)(nil)).Elem() + // Provenance iterates over all of the source Values that have contributed to // this Value. func (v *Value) Provenance() iter.Seq[*Value] { From 63d3a519a5d6f506b1593ca7a2aa980362a0bc44 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 25 Jul 2025 17:13:29 -0400 Subject: [PATCH 155/200] internal/simdgen: split Operation into rawOperation This prepares us to parse the raw unification results into rawOperation and then override what makes sense with "more parsed" forms in Operation, while simultaneously keeping everything working. No effect on generated godefs. Change-Id: Ic0dd6643488b3dbb3125fb17b31725576da84a2c Reviewed-on: https://go-review.googlesource.com/c/arch/+/691339 LUCI-TryBot-Result: Go LUCI Auto-Submit: Austin Clements Reviewed-by: Junyang Shao --- internal/simdgen/godefs.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 32dc29cf..32467de7 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -15,6 +15,12 @@ import ( ) type Operation struct { + rawOperation +} + +// rawOperation is the unifier representation of an [Operation]. It is +// translated into a more parsed form after unifier decoding. +type rawOperation struct { Go string // Go method name GoArch string // GOARCH for this definition @@ -39,6 +45,13 @@ type Operation struct { NameAndSizeCheck *bool } +func (o *Operation) DecodeUnified(v *unify.Value) error { + if err := v.Decode(&o.rawOperation); err != nil { + return err + } + return nil +} + func (o *Operation) VectorWidth() int { out := o.Out[0] if out.Class == "vreg" { From ce2e40c7d7dfb66cde2d1e77ce2ed27b1714d1b1 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 25 Jul 2025 17:31:06 -0400 Subject: [PATCH 156/200] internal/simdgen: use "NAME" for method names in doc strings This will let us merge operations that differ only in their method names. In particular, this will give us a robust way to insert computed method names. The YAML changes were done using the following Gemini CLI prompt: In all of the files named "categories.yaml", for each operation in the YAML list, find the Go method name from the "go" field, and replace anywhere that operation name appears as a word in the "documentation" field with the literal string "NAME". Please do this using Go with the yaml.v3 library. The yaml.v3 library is already imported in this module. No effect on generated godefs. Change-Id: Ifdac95c5d62475937fc33a8013d0b0c5c5dca312 Reviewed-on: https://go-review.googlesource.com/c/arch/+/691340 LUCI-TryBot-Result: Go LUCI Auto-Submit: Austin Clements Reviewed-by: Junyang Shao --- internal/simdgen/categories.yaml | 261 ++++++++---------- internal/simdgen/gen_utility.go | 8 +- internal/simdgen/godefs.go | 20 ++ internal/simdgen/ops/AddSub/categories.yaml | 24 +- .../simdgen/ops/BitwiseLogic/categories.yaml | 17 +- internal/simdgen/ops/Compares/categories.yaml | 29 +- .../simdgen/ops/FPonlyArith/categories.yaml | 49 ++-- .../simdgen/ops/GaloisField/categories.yaml | 8 +- .../simdgen/ops/IntOnlyArith/categories.yaml | 15 +- internal/simdgen/ops/MLOps/categories.yaml | 33 ++- internal/simdgen/ops/MinMax/categories.yaml | 8 +- internal/simdgen/ops/Moves/categories.yaml | 21 +- internal/simdgen/ops/Mul/categories.yaml | 16 +- .../simdgen/ops/ShiftRotate/categories.yaml | 43 ++- 14 files changed, 268 insertions(+), 284 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index f839f69b..5a7e711d 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -3,114 +3,115 @@ commutative: true extension: "AVX.*" documentation: !string |- - // Add adds corresponding elements of two vectors. + // NAME adds corresponding elements of two vectors. - go: SaturatedAdd commutative: true extension: "AVX.*" documentation: !string |- - // SaturatedAdd adds corresponding elements of two vectors with saturation. + // NAME adds corresponding elements of two vectors with saturation. - go: AddMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // AddMasked adds corresponding elements of two vectors. + // NAME adds corresponding elements of two vectors. - go: SaturatedAddMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // SaturatedAddMasked adds corresponding elements of two vectors with saturation. + // NAME adds corresponding elements of two vectors with saturation. - go: Sub commutative: false extension: "AVX.*" documentation: !string |- - // Sub subtracts corresponding elements of two vectors. + // NAME subtracts corresponding elements of two vectors. - go: SaturatedSub commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedSub subtracts corresponding elements of two vectors with saturation. + // NAME subtracts corresponding elements of two vectors with saturation. - go: SubMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // SubMasked subtracts corresponding elements of two vectors. + // NAME subtracts corresponding elements of two vectors. - go: SaturatedSubMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. + // NAME subtracts corresponding elements of two vectors with saturation. - go: PairwiseAdd commutative: false extension: "AVX.*" documentation: !string |- - // PairwiseAdd horizontally adds adjacent pairs of elements. + // NAME horizontally adds adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: PairwiseSub commutative: false extension: "AVX.*" documentation: !string |- - // PairwiseSub horizontally subtracts adjacent pairs of elements. + // NAME horizontally subtracts adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. - go: SaturatedPairwiseAdd commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. + // NAME horizontally adds adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: SaturatedPairwiseSub commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation. + // NAME horizontally subtracts adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. - go: And commutative: true extension: "AVX.*" documentation: !string |- - // And performs a bitwise AND operation between two vectors. + // NAME performs a bitwise AND operation between two vectors. - go: AndMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // AndMasked performs a bitwise AND operation between two vectors. + // NAME performs a bitwise AND operation between two vectors. - go: Or commutative: true extension: "AVX.*" documentation: !string |- - // Or performs a bitwise OR operation between two vectors. + // NAME performs a bitwise OR operation between two vectors. - go: OrMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // OrMasked performs a bitwise OR operation between two vectors. + // NAME performs a bitwise OR operation between two vectors. - go: AndNot commutative: false extension: "AVX.*" documentation: !string |- - // AndNot performs a bitwise x &^ y. + // NAME performs a bitwise x &^ y. - go: AndNotMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // AndNotMasked performs a bitwise x &^ y. + // NAME performs a bitwise x &^ y. - go: Xor commutative: true extension: "AVX.*" documentation: !string |- - // Xor performs a bitwise XOR operation between two vectors. + // NAME performs a bitwise XOR operation between two vectors. - go: XorMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // XorMasked performs a bitwise XOR operation between two vectors. + // NAME performs a bitwise XOR operation between two vectors. + # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. # const imm predicate(holds for both float and int|uint): @@ -125,239 +126,233 @@ commutative: true extension: "AVX.*" documentation: !string |- - // Equal compares for equality. + // NAME compares for equality. - go: Less constImm: 1 commutative: false extension: "AVX.*" documentation: !string |- - // Less compares for less than. + // NAME compares for less than. - go: LessEqual constImm: 2 commutative: false extension: "AVX.*" documentation: !string |- - // LessEqual compares for less than or equal. + // NAME compares for less than or equal. - go: IsNan # For float only. constImm: 3 commutative: true extension: "AVX.*" documentation: !string |- - // IsNan checks if elements are NaN. Use as x.IsNan(x). + // NAME checks if elements are NaN. Use as x.IsNan(x). - go: NotEqual constImm: 4 commutative: true extension: "AVX.*" documentation: !string |- - // NotEqual compares for inequality. + // NAME compares for inequality. - go: GreaterEqual constImm: 13 commutative: false extension: "AVX.*" documentation: !string |- - // GreaterEqual compares for greater than or equal. + // NAME compares for greater than or equal. - go: Greater constImm: 14 commutative: false extension: "AVX.*" documentation: !string |- - // Greater compares for greater than. - + // NAME compares for greater than. - go: EqualMasked constImm: 0 masked: true commutative: true extension: "AVX.*" documentation: !string |- - // EqualMasked compares for equality. + // NAME compares for equality. - go: LessMasked constImm: 1 masked: true commutative: false extension: "AVX.*" documentation: !string |- - // LessMasked compares for less than. + // NAME compares for less than. - go: LessEqualMasked constImm: 2 masked: true commutative: false extension: "AVX.*" documentation: !string |- - // LessEqualMasked compares for less than or equal. + // NAME compares for less than or equal. - go: IsNanMasked # For float only. constImm: 3 masked: true commutative: true extension: "AVX.*" documentation: !string |- - // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). + // NAME checks if elements are NaN. Use as x.IsNan(x). - go: NotEqualMasked constImm: 4 masked: true commutative: true extension: "AVX.*" documentation: !string |- - // NotEqualMasked compares for inequality. + // NAME compares for inequality. - go: GreaterEqualMasked constImm: 13 masked: true commutative: false extension: "AVX.*" documentation: !string |- - // GreaterEqualMasked compares for greater than or equal. + // NAME compares for greater than or equal. - go: GreaterMasked constImm: 14 masked: true commutative: false extension: "AVX.*" documentation: !string |- - // GreaterMasked compares for greater than. + // NAME compares for greater than. - go: Div commutative: false extension: "AVX.*" documentation: !string |- - // Div divides elements of two vectors. + // NAME divides elements of two vectors. - go: DivMasked commutative: false masked: true extension: "AVX.*" documentation: !string |- - // DivMasked divides elements of two vectors. + // NAME divides elements of two vectors. - go: Sqrt commutative: false extension: "AVX.*" documentation: !string |- - // Sqrt computes the square root of each element. + // NAME computes the square root of each element. - go: SqrtMasked commutative: false masked: true extension: "AVX.*" documentation: !string |- - // SqrtMasked computes the square root of each element. + // NAME computes the square root of each element. - go: ApproximateReciprocal commutative: false extension: "AVX.*" documentation: !string |- - // ApproximateReciprocal computes an approximate reciprocal of each element. + // NAME computes an approximate reciprocal of each element. - go: ApproximateReciprocalMasked commutative: false masked: true extension: "AVX.*" documentation: !string |- - // ApproximateReciprocalMasked computes an approximate reciprocal of each element. + // NAME computes an approximate reciprocal of each element. - go: ApproximateReciprocalOfSqrt commutative: false extension: "AVX.*" documentation: !string |- - // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. + // NAME computes an approximate reciprocal of the square root of each element. - go: ApproximateReciprocalOfSqrtMasked commutative: false masked: true extension: "AVX.*" documentation: !string |- - // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. + // NAME computes an approximate reciprocal of the square root of each element. - go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated. commutative: false masked: true extension: "AVX.*" documentation: !string |- - // MulByPowOf2Masked multiplies elements by a power of 2. - + // NAME multiplies elements by a power of 2. - go: Round commutative: false extension: "AVX.*" constImm: 0 documentation: !string |- - // Round rounds elements to the nearest integer. + // NAME rounds elements to the nearest integer. - go: RoundWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 0 masked: true documentation: !string |- - // RoundWithPrecisionMasked rounds elements with specified precision. + // NAME rounds elements with specified precision. - go: DiffWithRoundWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 0 masked: true documentation: !string |- - // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. - + // NAME computes the difference after rounding with specified precision. - go: Floor commutative: false extension: "AVX.*" constImm: 1 documentation: !string |- - // Floor rounds elements down to the nearest integer. + // NAME rounds elements down to the nearest integer. - go: FloorWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 1 masked: true documentation: !string |- - // FloorWithPrecisionMasked rounds elements down with specified precision. + // NAME rounds elements down with specified precision. - go: DiffWithFloorWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 1 masked: true documentation: !string |- - // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. - + // NAME computes the difference after flooring with specified precision. - go: Ceil commutative: false extension: "AVX.*" constImm: 2 documentation: !string |- - // Ceil rounds elements up to the nearest integer. + // NAME rounds elements up to the nearest integer. - go: CeilWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 2 masked: true documentation: !string |- - // CeilWithPrecisionMasked rounds elements up with specified precision. + // NAME rounds elements up with specified precision. - go: DiffWithCeilWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 2 masked: true documentation: !string |- - // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. - + // NAME computes the difference after ceiling with specified precision. - go: Trunc commutative: false extension: "AVX.*" constImm: 3 documentation: !string |- - // Trunc truncates elements towards zero. + // NAME truncates elements towards zero. - go: TruncWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 3 masked: true documentation: !string |- - // TruncWithPrecisionMasked truncates elements with specified precision. + // NAME truncates elements with specified precision. - go: DiffWithTruncWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 3 masked: true documentation: !string |- - // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. - + // NAME computes the difference after truncating with specified precision. - go: AddSub commutative: false extension: "AVX.*" documentation: !string |- - // AddSub subtracts even elements and adds odd elements of two vectors. + // NAME subtracts even elements and adds odd elements of two vectors. - go: GaloisFieldAffineTransformMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8): + // NAME computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. @@ -366,7 +361,7 @@ commutative: false extension: "AVX.*" documentation: !string |- - // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8), + // NAME computes an affine transformation in GF(2^8), // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y @@ -376,288 +371,279 @@ commutative: false extension: "AVX.*" documentation: !string |- - // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with + // NAME computes element-wise GF(2^8) multiplication with // reduction polynomial x^8 + x^4 + x^3 + x + 1. - go: Average commutative: true extension: "AVX.*" # VPAVGB/W are available across various AVX versions documentation: !string |- - // Average computes the rounded average of corresponding elements. + // NAME computes the rounded average of corresponding elements. - go: AverageMasked commutative: true masked: true extension: "AVX512.*" # Masked operations are typically AVX512 documentation: !string |- - // AverageMasked computes the rounded average of corresponding elements. - + // NAME computes the rounded average of corresponding elements. - go: Absolute commutative: false # Unary operation, not commutative extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 documentation: !string |- - // Absolute computes the absolute value of each element. + // NAME computes the absolute value of each element. - go: AbsoluteMasked commutative: false masked: true extension: "AVX512.*" documentation: !string |- - // AbsoluteMasked computes the absolute value of each element. - + // NAME computes the absolute value of each element. - go: Sign # Applies sign of second operand to first: sign(val, sign_src) commutative: false extension: "AVX.*" documentation: !string |- - // Sign returns the product of the first operand with -1, 0, or 1, + // NAME returns the product of the first operand with -1, 0, or 1, // whichever constant is nearest to the value of the second operand. # Sign does not have masked version - - go: PopCountMasked commutative: false masked: true extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) documentation: !string |- - // PopCountMasked counts the number of set bits in each element. + // NAME counts the number of set bits in each element. - go: PairDotProd commutative: false extension: "AVX.*" documentation: !string |- - // PairDotProd multiplies the elements and add the pairs together, + // NAME multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. - go: PairDotProdMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // PairDotProdMasked multiplies the elements and add the pairs together, + // NAME multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. - go: SaturatedUnsignedSignedPairDotProd commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation, + // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. - go: SaturatedUnsignedSignedPairDotProdMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation, + // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. - # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. - go: DotProdBroadcast commutative: true extension: "AVX.*" documentation: !string |- - // DotProdBroadcast multiplies all elements and broadcasts the sum. + // NAME multiplies all elements and broadcasts the sum. - go: UnsignedSignedQuadDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- - // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z. + // NAME performs dot products on groups of 4 elements of x and y and then adds z. - go: UnsignedSignedQuadDotProdAccumulateMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z. + // NAME performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z. + // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z. + // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: PairDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- - // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. + // NAME performs dot products on pairs of elements of x and y and then adds z. - go: PairDotProdAccumulateMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. + // NAME performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. + // NAME performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulateMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. + // NAME performs dot products on pairs of elements of x and y and then adds z. - go: FusedMultiplyAddMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // FusedMultiplyAddMasked performs (x * y) + z. + // NAME performs (x * y) + z. - go: FusedMultiplyAddSubMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. + // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. - go: FusedMultiplySubAddMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. + // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. - go: Max commutative: true extension: "AVX.*" documentation: !string |- - // Max computes the maximum of corresponding elements. + // NAME computes the maximum of corresponding elements. - go: MaxMasked commutative: true masked: true extension: "AVX.*" documentation: !string |- - // MaxMasked computes the maximum of corresponding elements. + // NAME computes the maximum of corresponding elements. - go: Min commutative: true extension: "AVX.*" documentation: !string |- - // Min computes the minimum of corresponding elements. + // NAME computes the minimum of corresponding elements. - go: MinMasked commutative: true masked: true extension: "AVX.*" documentation: !string |- - // MinMasked computes the minimum of corresponding elements. + // NAME computes the minimum of corresponding elements. - go: SetElem commutative: false extension: "AVX.*" documentation: !string |- - // SetElem sets a single constant-indexed element's value. + // NAME sets a single constant-indexed element's value. - go: GetElem commutative: false extension: "AVX.*" documentation: !string |- - // GetElem retrieves a single constant-indexed element's value. + // NAME retrieves a single constant-indexed element's value. - go: Set128 commutative: false extension: "AVX.*" documentation: !string |- - // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. + // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. - go: Get128 commutative: false extension: "AVX.*" documentation: !string |- - // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. - - + // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. - go: Permute commutative: false extension: "AVX.*" documentation: !string |- - // Permute performs a full permutation of vector x using indices: + // NAME performs a full permutation of vector x using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. - - go: PermuteMasked commutative: false masked: true extension: "AVX.*" documentation: !string |- - // PermuteMasked performs a full permutation of vector y using indices: + // NAME performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. - - go: Permute2Masked # Permute2Masked is only available on or after AVX512 commutative: false masked: true extension: "AVX.*" documentation: !string |- - // Permute2Masked performs a full permutation of vector x, y using indices: + // NAME performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} // where xy is x appending y. // Only the needed bits to represent xy's index are used in indices' elements. - - go: Compress commutative: false # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked" extension: "AVX.*" documentation: !string |- - // Compress performs a compression on vector x using mask by + // NAME performs a compression on vector x using mask by // selecting elements as indicated by mask, and pack them to lower indexed elements. - go: Mul commutative: true extension: "AVX.*" documentation: !string |- - // Mul multiplies corresponding elements of two vectors. + // NAME multiplies corresponding elements of two vectors. - go: MulEvenWiden commutative: true extension: "AVX.*" documentation: !string |- - // MulEvenWiden multiplies even-indexed elements, widening the result. + // NAME multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHigh commutative: true extension: "AVX.*" documentation: !string |- - // MulHigh multiplies elements and stores the high part of the result. + // NAME multiplies elements and stores the high part of the result. - go: MulLow commutative: true extension: "AVX.*" documentation: !string |- - // MulLow multiplies elements and stores the low part of the result. + // NAME multiplies elements and stores the low part of the result. - go: MulMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // MulMasked multiplies corresponding elements of two vectors. + // NAME multiplies corresponding elements of two vectors. - go: MulEvenWidenMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // MulEvenWidenMasked multiplies even-indexed elements, widening the result. + // NAME multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHighMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // MulHighMasked multiplies elements and stores the high part of the result. + // NAME multiplies elements and stores the high part of the result. - go: MulLowMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // MulLowMasked multiplies elements and stores the low part of the result. + // NAME multiplies elements and stores the low part of the result. - go: ShiftAllLeft nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. + // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllLeftMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. + // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight signed: false nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. + // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRightMasked signed: false nameAndSizeCheck: true @@ -665,14 +651,14 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. + // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRight signed: true nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. + // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftAllRightMasked signed: true nameAndSizeCheck: true @@ -680,28 +666,27 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - + // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftLeft nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. + // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftLeftMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. + // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight signed: false nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. + // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRightMasked signed: false nameAndSizeCheck: true @@ -709,14 +694,14 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. + // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRight signed: true nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. + // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: ShiftRightMasked signed: true nameAndSizeCheck: true @@ -724,44 +709,42 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - + // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: RotateAllLeftMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. + // NAME rotates each element to the left by the number of bits specified by the immediate. - go: RotateLeftMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. + // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements. - go: RotateAllRightMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. + // NAME rotates each element to the right by the number of bits specified by the immediate. - go: RotateRightMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. - + // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements. - go: ShiftAllLeftAndFillUpperFromMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the + // NAME shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. - go: ShiftAllRightAndFillUpperFromMasked nameAndSizeCheck: true @@ -769,7 +752,7 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the + // NAME shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. - go: ShiftLeftAndFillUpperFromMasked nameAndSizeCheck: true @@ -777,7 +760,7 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the + // NAME shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. - go: ShiftRightAndFillUpperFromMasked nameAndSizeCheck: true @@ -785,5 +768,5 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the + // NAME shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index ea4d56ac..be0a945d 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -569,8 +569,8 @@ func splitMask(ops []Operation) ([]Operation, error) { } maskedOpName := op2.Go op2.Go = strings.TrimSuffix(op2.Go, "Masked") - op2Doc := strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go) - op2.Documentation = &op2Doc + op2Doc := strings.ReplaceAll(op2.Documentation, maskedOpName, op2.Go) + op2.Documentation = op2Doc op2.Masked = nil // It's no longer masked. splited = append(splited, op2) } else { @@ -583,9 +583,7 @@ func splitMask(ops []Operation) ([]Operation, error) { func insertMaskDescToDoc(ops []Operation) { for i, _ := range ops { if ops[i].Masked != nil && *ops[i].Masked { - if ops[i].Documentation != nil { - *ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask." - } + ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask." } } } diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 32467de7..b45c249f 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -7,6 +7,7 @@ package main import ( "fmt" "log" + "regexp" "slices" "strconv" "strings" @@ -16,6 +17,16 @@ import ( type Operation struct { rawOperation + + // Documentation is the doc string for this API. + // + // It is computed from the raw documentation: + // + // - "NAME" is replaced by the Go method name. + // + // - For masked operation, the method name is updated and a sentence about + // masking is added. + Documentation string } // rawOperation is the unifier representation of an [Operation]. It is @@ -49,6 +60,15 @@ func (o *Operation) DecodeUnified(v *unify.Value) error { if err := v.Decode(&o.rawOperation); err != nil { return err } + + // Compute doc string. + if o.rawOperation.Documentation != nil { + o.Documentation = *o.rawOperation.Documentation + } else { + o.Documentation = "// UNDOCUMENTED" + } + o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go) + return nil } diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml index 9bae42e9..667508b5 100644 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -3,67 +3,67 @@ commutative: true extension: "AVX.*" documentation: !string |- - // Add adds corresponding elements of two vectors. + // NAME adds corresponding elements of two vectors. - go: SaturatedAdd commutative: true extension: "AVX.*" documentation: !string |- - // SaturatedAdd adds corresponding elements of two vectors with saturation. + // NAME adds corresponding elements of two vectors with saturation. - go: AddMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // AddMasked adds corresponding elements of two vectors. + // NAME adds corresponding elements of two vectors. - go: SaturatedAddMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // SaturatedAddMasked adds corresponding elements of two vectors with saturation. + // NAME adds corresponding elements of two vectors with saturation. - go: Sub commutative: false extension: "AVX.*" documentation: !string |- - // Sub subtracts corresponding elements of two vectors. + // NAME subtracts corresponding elements of two vectors. - go: SaturatedSub commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedSub subtracts corresponding elements of two vectors with saturation. + // NAME subtracts corresponding elements of two vectors with saturation. - go: SubMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // SubMasked subtracts corresponding elements of two vectors. + // NAME subtracts corresponding elements of two vectors. - go: SaturatedSubMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation. + // NAME subtracts corresponding elements of two vectors with saturation. - go: PairwiseAdd commutative: false extension: "AVX.*" documentation: !string |- - // PairwiseAdd horizontally adds adjacent pairs of elements. + // NAME horizontally adds adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: PairwiseSub commutative: false extension: "AVX.*" documentation: !string |- - // PairwiseSub horizontally subtracts adjacent pairs of elements. + // NAME horizontally subtracts adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. - go: SaturatedPairwiseAdd commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. + // NAME horizontally adds adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: SaturatedPairwiseSub commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation. + // NAME horizontally subtracts adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml index c6a00cc2..3d2eda7c 100644 --- a/internal/simdgen/ops/BitwiseLogic/categories.yaml +++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml @@ -3,45 +3,46 @@ commutative: true extension: "AVX.*" documentation: !string |- - // And performs a bitwise AND operation between two vectors. + // NAME performs a bitwise AND operation between two vectors. - go: AndMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // AndMasked performs a bitwise AND operation between two vectors. + // NAME performs a bitwise AND operation between two vectors. - go: Or commutative: true extension: "AVX.*" documentation: !string |- - // Or performs a bitwise OR operation between two vectors. + // NAME performs a bitwise OR operation between two vectors. - go: OrMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // OrMasked performs a bitwise OR operation between two vectors. + // NAME performs a bitwise OR operation between two vectors. - go: AndNot commutative: false extension: "AVX.*" documentation: !string |- - // AndNot performs a bitwise x &^ y. + // NAME performs a bitwise x &^ y. - go: AndNotMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // AndNotMasked performs a bitwise x &^ y. + // NAME performs a bitwise x &^ y. - go: Xor commutative: true extension: "AVX.*" documentation: !string |- - // Xor performs a bitwise XOR operation between two vectors. + // NAME performs a bitwise XOR operation between two vectors. - go: XorMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // XorMasked performs a bitwise XOR operation between two vectors. + // NAME performs a bitwise XOR operation between two vectors. + # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index d1080513..e17e45db 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -11,90 +11,89 @@ commutative: true extension: "AVX.*" documentation: !string |- - // Equal compares for equality. + // NAME compares for equality. - go: Less constImm: 1 commutative: false extension: "AVX.*" documentation: !string |- - // Less compares for less than. + // NAME compares for less than. - go: LessEqual constImm: 2 commutative: false extension: "AVX.*" documentation: !string |- - // LessEqual compares for less than or equal. + // NAME compares for less than or equal. - go: IsNan # For float only. constImm: 3 commutative: true extension: "AVX.*" documentation: !string |- - // IsNan checks if elements are NaN. Use as x.IsNan(x). + // NAME checks if elements are NaN. Use as x.IsNan(x). - go: NotEqual constImm: 4 commutative: true extension: "AVX.*" documentation: !string |- - // NotEqual compares for inequality. + // NAME compares for inequality. - go: GreaterEqual constImm: 13 commutative: false extension: "AVX.*" documentation: !string |- - // GreaterEqual compares for greater than or equal. + // NAME compares for greater than or equal. - go: Greater constImm: 14 commutative: false extension: "AVX.*" documentation: !string |- - // Greater compares for greater than. - + // NAME compares for greater than. - go: EqualMasked constImm: 0 masked: true commutative: true extension: "AVX.*" documentation: !string |- - // EqualMasked compares for equality. + // NAME compares for equality. - go: LessMasked constImm: 1 masked: true commutative: false extension: "AVX.*" documentation: !string |- - // LessMasked compares for less than. + // NAME compares for less than. - go: LessEqualMasked constImm: 2 masked: true commutative: false extension: "AVX.*" documentation: !string |- - // LessEqualMasked compares for less than or equal. + // NAME compares for less than or equal. - go: IsNanMasked # For float only. constImm: 3 masked: true commutative: true extension: "AVX.*" documentation: !string |- - // IsNanMasked checks if elements are NaN. Use as x.IsNan(x). + // NAME checks if elements are NaN. Use as x.IsNan(x). - go: NotEqualMasked constImm: 4 masked: true commutative: true extension: "AVX.*" documentation: !string |- - // NotEqualMasked compares for inequality. + // NAME compares for inequality. - go: GreaterEqualMasked constImm: 13 masked: true commutative: false extension: "AVX.*" documentation: !string |- - // GreaterEqualMasked compares for greater than or equal. + // NAME compares for greater than or equal. - go: GreaterMasked constImm: 14 masked: true commutative: false extension: "AVX.*" documentation: !string |- - // GreaterMasked compares for greater than. + // NAME compares for greater than. diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index 1347b533..53292048 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -3,139 +3,134 @@ commutative: false extension: "AVX.*" documentation: !string |- - // Div divides elements of two vectors. + // NAME divides elements of two vectors. - go: DivMasked commutative: false masked: true extension: "AVX.*" documentation: !string |- - // DivMasked divides elements of two vectors. + // NAME divides elements of two vectors. - go: Sqrt commutative: false extension: "AVX.*" documentation: !string |- - // Sqrt computes the square root of each element. + // NAME computes the square root of each element. - go: SqrtMasked commutative: false masked: true extension: "AVX.*" documentation: !string |- - // SqrtMasked computes the square root of each element. + // NAME computes the square root of each element. - go: ApproximateReciprocal commutative: false extension: "AVX.*" documentation: !string |- - // ApproximateReciprocal computes an approximate reciprocal of each element. + // NAME computes an approximate reciprocal of each element. - go: ApproximateReciprocalMasked commutative: false masked: true extension: "AVX.*" documentation: !string |- - // ApproximateReciprocalMasked computes an approximate reciprocal of each element. + // NAME computes an approximate reciprocal of each element. - go: ApproximateReciprocalOfSqrt commutative: false extension: "AVX.*" documentation: !string |- - // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. + // NAME computes an approximate reciprocal of the square root of each element. - go: ApproximateReciprocalOfSqrtMasked commutative: false masked: true extension: "AVX.*" documentation: !string |- - // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element. + // NAME computes an approximate reciprocal of the square root of each element. - go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated. commutative: false masked: true extension: "AVX.*" documentation: !string |- - // MulByPowOf2Masked multiplies elements by a power of 2. - + // NAME multiplies elements by a power of 2. - go: Round commutative: false extension: "AVX.*" constImm: 0 documentation: !string |- - // Round rounds elements to the nearest integer. + // NAME rounds elements to the nearest integer. - go: RoundWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 0 masked: true documentation: !string |- - // RoundWithPrecisionMasked rounds elements with specified precision. + // NAME rounds elements with specified precision. - go: DiffWithRoundWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 0 masked: true documentation: !string |- - // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision. - + // NAME computes the difference after rounding with specified precision. - go: Floor commutative: false extension: "AVX.*" constImm: 1 documentation: !string |- - // Floor rounds elements down to the nearest integer. + // NAME rounds elements down to the nearest integer. - go: FloorWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 1 masked: true documentation: !string |- - // FloorWithPrecisionMasked rounds elements down with specified precision. + // NAME rounds elements down with specified precision. - go: DiffWithFloorWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 1 masked: true documentation: !string |- - // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision. - + // NAME computes the difference after flooring with specified precision. - go: Ceil commutative: false extension: "AVX.*" constImm: 2 documentation: !string |- - // Ceil rounds elements up to the nearest integer. + // NAME rounds elements up to the nearest integer. - go: CeilWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 2 masked: true documentation: !string |- - // CeilWithPrecisionMasked rounds elements up with specified precision. + // NAME rounds elements up with specified precision. - go: DiffWithCeilWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 2 masked: true documentation: !string |- - // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision. - + // NAME computes the difference after ceiling with specified precision. - go: Trunc commutative: false extension: "AVX.*" constImm: 3 documentation: !string |- - // Trunc truncates elements towards zero. + // NAME truncates elements towards zero. - go: TruncWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 3 masked: true documentation: !string |- - // TruncWithPrecisionMasked truncates elements with specified precision. + // NAME truncates elements with specified precision. - go: DiffWithTruncWithPrecisionMasked commutative: false extension: "AVX.*" constImm: 3 masked: true documentation: !string |- - // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision. - + // NAME computes the difference after truncating with specified precision. - go: AddSub commutative: false extension: "AVX.*" documentation: !string |- - // AddSub subtracts even elements and adds odd elements of two vectors. \ No newline at end of file + // NAME subtracts even elements and adds odd elements of two vectors. diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml index 4184c5e4..62d8709e 100644 --- a/internal/simdgen/ops/GaloisField/categories.yaml +++ b/internal/simdgen/ops/GaloisField/categories.yaml @@ -4,7 +4,7 @@ commutative: false extension: "AVX.*" documentation: !string |- - // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8): + // NAME computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. @@ -13,7 +13,7 @@ commutative: false extension: "AVX.*" documentation: !string |- - // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8), + // NAME computes an affine transformation in GF(2^8), // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y @@ -23,5 +23,5 @@ commutative: false extension: "AVX.*" documentation: !string |- - // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with - // reduction polynomial x^8 + x^4 + x^3 + x + 1. \ No newline at end of file + // NAME computes element-wise GF(2^8) multiplication with + // reduction polynomial x^8 + x^4 + x^3 + x + 1. diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml index fc277f81..76ab14ba 100644 --- a/internal/simdgen/ops/IntOnlyArith/categories.yaml +++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml @@ -3,39 +3,36 @@ commutative: true extension: "AVX.*" # VPAVGB/W are available across various AVX versions documentation: !string |- - // Average computes the rounded average of corresponding elements. + // NAME computes the rounded average of corresponding elements. - go: AverageMasked commutative: true masked: true extension: "AVX512.*" # Masked operations are typically AVX512 documentation: !string |- - // AverageMasked computes the rounded average of corresponding elements. - + // NAME computes the rounded average of corresponding elements. - go: Absolute commutative: false # Unary operation, not commutative extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 documentation: !string |- - // Absolute computes the absolute value of each element. + // NAME computes the absolute value of each element. - go: AbsoluteMasked commutative: false masked: true extension: "AVX512.*" documentation: !string |- - // AbsoluteMasked computes the absolute value of each element. - + // NAME computes the absolute value of each element. - go: Sign # Applies sign of second operand to first: sign(val, sign_src) commutative: false extension: "AVX.*" documentation: !string |- - // Sign returns the product of the first operand with -1, 0, or 1, + // NAME returns the product of the first operand with -1, 0, or 1, // whichever constant is nearest to the value of the second operand. # Sign does not have masked version - - go: PopCountMasked commutative: false masked: true extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) documentation: !string |- - // PopCountMasked counts the number of set bits in each element. \ No newline at end of file + // NAME counts the number of set bits in each element. diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index d26b846d..65f7462e 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -3,95 +3,94 @@ commutative: false extension: "AVX.*" documentation: !string |- - // PairDotProd multiplies the elements and add the pairs together, + // NAME multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. - go: PairDotProdMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // PairDotProdMasked multiplies the elements and add the pairs together, + // NAME multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. - go: SaturatedUnsignedSignedPairDotProd commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation, + // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. - go: SaturatedUnsignedSignedPairDotProdMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation, + // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. - # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. - go: DotProdBroadcast commutative: true extension: "AVX.*" documentation: !string |- - // DotProdBroadcast multiplies all elements and broadcasts the sum. + // NAME multiplies all elements and broadcasts the sum. - go: UnsignedSignedQuadDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- - // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z. + // NAME performs dot products on groups of 4 elements of x and y and then adds z. - go: UnsignedSignedQuadDotProdAccumulateMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z. + // NAME performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z. + // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z. + // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: PairDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- - // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. + // NAME performs dot products on pairs of elements of x and y and then adds z. - go: PairDotProdAccumulateMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. + // NAME performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z. + // NAME performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulateMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z. + // NAME performs dot products on pairs of elements of x and y and then adds z. - go: FusedMultiplyAddMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // FusedMultiplyAddMasked performs (x * y) + z. + // NAME performs (x * y) + z. - go: FusedMultiplyAddSubMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. + // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. - go: FusedMultiplySubAddMasked masked: true commutative: false extension: "AVX.*" documentation: !string |- - // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. + // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml index 929bfadd..ce87994f 100644 --- a/internal/simdgen/ops/MinMax/categories.yaml +++ b/internal/simdgen/ops/MinMax/categories.yaml @@ -3,21 +3,21 @@ commutative: true extension: "AVX.*" documentation: !string |- - // Max computes the maximum of corresponding elements. + // NAME computes the maximum of corresponding elements. - go: MaxMasked commutative: true masked: true extension: "AVX.*" documentation: !string |- - // MaxMasked computes the maximum of corresponding elements. + // NAME computes the maximum of corresponding elements. - go: Min commutative: true extension: "AVX.*" documentation: !string |- - // Min computes the minimum of corresponding elements. + // NAME computes the minimum of corresponding elements. - go: MinMasked commutative: true masked: true extension: "AVX.*" documentation: !string |- - // MinMasked computes the minimum of corresponding elements. + // NAME computes the minimum of corresponding elements. diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index d6c4d5da..dd30ca8a 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -3,55 +3,50 @@ commutative: false extension: "AVX.*" documentation: !string |- - // SetElem sets a single constant-indexed element's value. + // NAME sets a single constant-indexed element's value. - go: GetElem commutative: false extension: "AVX.*" documentation: !string |- - // GetElem retrieves a single constant-indexed element's value. + // NAME retrieves a single constant-indexed element's value. - go: Set128 commutative: false extension: "AVX.*" documentation: !string |- - // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. + // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. - go: Get128 commutative: false extension: "AVX.*" documentation: !string |- - // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. - - + // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. - go: Permute commutative: false extension: "AVX.*" documentation: !string |- - // Permute performs a full permutation of vector x using indices: + // NAME performs a full permutation of vector x using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. - - go: PermuteMasked commutative: false masked: true extension: "AVX.*" documentation: !string |- - // PermuteMasked performs a full permutation of vector y using indices: + // NAME performs a full permutation of vector y using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. - - go: Permute2Masked # Permute2Masked is only available on or after AVX512 commutative: false masked: true extension: "AVX.*" documentation: !string |- - // Permute2Masked performs a full permutation of vector x, y using indices: + // NAME performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} // where xy is x appending y. // Only the needed bits to represent xy's index are used in indices' elements. - - go: Compress commutative: false # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked" extension: "AVX.*" documentation: !string |- - // Compress performs a compression on vector x using mask by + // NAME performs a compression on vector x using mask by // selecting elements as indicated by mask, and pack them to lower indexed elements. diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml index 1884d660..8dc51f45 100644 --- a/internal/simdgen/ops/Mul/categories.yaml +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -3,45 +3,45 @@ commutative: true extension: "AVX.*" documentation: !string |- - // Mul multiplies corresponding elements of two vectors. + // NAME multiplies corresponding elements of two vectors. - go: MulEvenWiden commutative: true extension: "AVX.*" documentation: !string |- - // MulEvenWiden multiplies even-indexed elements, widening the result. + // NAME multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHigh commutative: true extension: "AVX.*" documentation: !string |- - // MulHigh multiplies elements and stores the high part of the result. + // NAME multiplies elements and stores the high part of the result. - go: MulLow commutative: true extension: "AVX.*" documentation: !string |- - // MulLow multiplies elements and stores the low part of the result. + // NAME multiplies elements and stores the low part of the result. - go: MulMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // MulMasked multiplies corresponding elements of two vectors. + // NAME multiplies corresponding elements of two vectors. - go: MulEvenWidenMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // MulEvenWidenMasked multiplies even-indexed elements, widening the result. + // NAME multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHighMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // MulHighMasked multiplies elements and stores the high part of the result. + // NAME multiplies elements and stores the high part of the result. - go: MulLowMasked masked: true commutative: true extension: "AVX.*" documentation: !string |- - // MulLowMasked multiplies elements and stores the low part of the result. + // NAME multiplies elements and stores the low part of the result. diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml index b9e2a634..71e78251 100644 --- a/internal/simdgen/ops/ShiftRotate/categories.yaml +++ b/internal/simdgen/ops/ShiftRotate/categories.yaml @@ -4,21 +4,21 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. + // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllLeftMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. + // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight signed: false nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. + // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRightMasked signed: false nameAndSizeCheck: true @@ -26,14 +26,14 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. + // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRight signed: true nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. + // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftAllRightMasked signed: true nameAndSizeCheck: true @@ -41,28 +41,27 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - + // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftLeft nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. + // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftLeftMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. + // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight signed: false nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. + // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRightMasked signed: false nameAndSizeCheck: true @@ -70,14 +69,14 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. + // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRight signed: true nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. + // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: ShiftRightMasked signed: true nameAndSizeCheck: true @@ -85,44 +84,42 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - + // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: RotateAllLeftMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate. + // NAME rotates each element to the left by the number of bits specified by the immediate. - go: RotateLeftMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements. + // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements. - go: RotateAllRightMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate. + // NAME rotates each element to the right by the number of bits specified by the immediate. - go: RotateRightMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements. - + // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements. - go: ShiftAllLeftAndFillUpperFromMasked nameAndSizeCheck: true masked: true commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the + // NAME shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. - go: ShiftAllRightAndFillUpperFromMasked nameAndSizeCheck: true @@ -130,7 +127,7 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the + // NAME shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. - go: ShiftLeftAndFillUpperFromMasked nameAndSizeCheck: true @@ -138,7 +135,7 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the + // NAME shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. - go: ShiftRightAndFillUpperFromMasked nameAndSizeCheck: true @@ -146,5 +143,5 @@ commutative: false extension: "AVX.*" documentation: !string |- - // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the + // NAME shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. From 6ef798663e3f9ab0392f5e7e1575085409694a2d Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 29 Jul 2025 10:54:54 -0400 Subject: [PATCH 157/200] internal/simdgen: introduce instruction variants and use for masked ops In AVX-512, nearly all operations take a mask, but it's optional. Currently, the XED loader produces only the masked form of these instructions. Since this mask is one of the input operands, the Go definitions YAML needs a separate entry to match the masked form. To generate both the masked and unmasked forms, we unify only the masked form and then when generating the Go API we recognize this and duplicate the operation in unmasked form. Unfortunately, since pre-AVX-512 operations never have this mask input, we wind up duplicating many unifier definitions to match both the pre-AVX-512 and AVX-512 forms, even though the unmasked operation produced during API generation looks like a pre-AVX-512 definition. To fix all this, we flip things around. Instead of generating the masked and unmasked variants at API generation, we generate them much earlier, during XED loading. The XED data already contains a clear marker for which masks are optional (it's wrong in a few cases, which we work around). For instructions with an optional mask, the XED loader now generates both the masked and unmasked forms. Then, to make both easy to match, we put the mask operand into a new top-level tuple called "inVariant". This way, a single unifier def can match the pre-AVX-512 instruction, and the masked and unmasked AVX-512 instructions. When we load the results of unification for generating the API, we do some light canonicalization of the operation. We append any inVariant operands to the input operands list; and if there's a mask in inVariant, we append "Masked" to the Go method name. With all of this done, we can delete all of the "*Masked" forms of operations from the YAML. In a few cases, we have to merge some information from the masked form into the unmasked form. For operations that were introduced in AVX-512, we currently *only* have the Masked form; so for these we keep the definition but strip out the masking. This ultimately has very little effect on the generated API. In a few cases it fills in holes that let us pick a better instruction. It fixes some doc strings that got duplicated incorrectly between masked and unmasked forms. This change makes it much easier to write other tools besides the Go API generator because it moves nearly all masking logic out of writeGoDefs. It also eliminates some fragile Operation duplication logic from writeGoDefs. I plan to move a few more things out of the Go API generator, but this is definitely the big one. Change-Id: I17ee70cff15a80e8025eec96a7286266233546d9 Reviewed-on: https://go-review.googlesource.com/c/arch/+/691341 Auto-Submit: Austin Clements Reviewed-by: David Chase Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 340 +------------ internal/simdgen/gen_utility.go | 39 -- internal/simdgen/go.yaml | 465 +++--------------- internal/simdgen/godefs.go | 48 +- internal/simdgen/main.go | 1 - internal/simdgen/ops/AddSub/categories.yaml | 24 - internal/simdgen/ops/AddSub/go.yaml | 48 -- .../simdgen/ops/BitwiseLogic/categories.yaml | 24 - internal/simdgen/ops/BitwiseLogic/go.yaml | 77 ++- internal/simdgen/ops/Compares/categories.yaml | 49 -- internal/simdgen/ops/Compares/go.yaml | 16 +- .../simdgen/ops/FPonlyArith/categories.yaml | 51 +- internal/simdgen/ops/FPonlyArith/go.yaml | 38 +- .../simdgen/ops/GaloisField/categories.yaml | 9 +- internal/simdgen/ops/GaloisField/go.yaml | 11 +- .../simdgen/ops/IntOnlyArith/categories.yaml | 17 +- internal/simdgen/ops/IntOnlyArith/go.yaml | 20 +- internal/simdgen/ops/MLOps/categories.yaml | 47 +- internal/simdgen/ops/MLOps/go.yaml | 71 +-- internal/simdgen/ops/MinMax/categories.yaml | 12 - internal/simdgen/ops/MinMax/go.yaml | 33 -- internal/simdgen/ops/Moves/categories.yaml | 12 +- internal/simdgen/ops/Moves/go.yaml | 16 +- internal/simdgen/ops/Mul/categories.yaml | 25 - internal/simdgen/ops/Mul/go.yaml | 50 -- .../simdgen/ops/ShiftRotate/categories.yaml | 70 +-- internal/simdgen/ops/ShiftRotate/go.yaml | 95 +--- internal/simdgen/types.yaml | 3 + internal/simdgen/xed.go | 117 ++++- 29 files changed, 329 insertions(+), 1499 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 5a7e711d..c13fd431 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -9,18 +9,6 @@ extension: "AVX.*" documentation: !string |- // NAME adds corresponding elements of two vectors with saturation. -- go: AddMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME adds corresponding elements of two vectors. -- go: SaturatedAddMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME adds corresponding elements of two vectors with saturation. - go: Sub commutative: false extension: "AVX.*" @@ -31,18 +19,6 @@ extension: "AVX.*" documentation: !string |- // NAME subtracts corresponding elements of two vectors with saturation. -- go: SubMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME subtracts corresponding elements of two vectors. -- go: SaturatedSubMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME subtracts corresponding elements of two vectors with saturation. - go: PairwiseAdd commutative: false extension: "AVX.*" @@ -72,45 +48,21 @@ extension: "AVX.*" documentation: !string |- // NAME performs a bitwise AND operation between two vectors. -- go: AndMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise AND operation between two vectors. - go: Or commutative: true extension: "AVX.*" documentation: !string |- // NAME performs a bitwise OR operation between two vectors. -- go: OrMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise OR operation between two vectors. - go: AndNot commutative: false extension: "AVX.*" documentation: !string |- // NAME performs a bitwise x &^ y. -- go: AndNotMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise x &^ y. - go: Xor commutative: true extension: "AVX.*" documentation: !string |- // NAME performs a bitwise XOR operation between two vectors. -- go: XorMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise XOR operation between two vectors. # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. @@ -163,102 +115,28 @@ extension: "AVX.*" documentation: !string |- // NAME compares for greater than. -- go: EqualMasked - constImm: 0 - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME compares for equality. -- go: LessMasked - constImm: 1 - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for less than. -- go: LessEqualMasked - constImm: 2 - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for less than or equal. -- go: IsNanMasked # For float only. - constImm: 3 - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME checks if elements are NaN. Use as x.IsNan(x). -- go: NotEqualMasked - constImm: 4 - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME compares for inequality. -- go: GreaterEqualMasked - constImm: 13 - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for greater than or equal. -- go: GreaterMasked - constImm: 14 - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for greater than. - go: Div commutative: false extension: "AVX.*" documentation: !string |- // NAME divides elements of two vectors. -- go: DivMasked - commutative: false - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME divides elements of two vectors. - go: Sqrt commutative: false extension: "AVX.*" documentation: !string |- // NAME computes the square root of each element. -- go: SqrtMasked - commutative: false - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME computes the square root of each element. - go: ApproximateReciprocal commutative: false extension: "AVX.*" documentation: !string |- // NAME computes an approximate reciprocal of each element. -- go: ApproximateReciprocalMasked - commutative: false - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME computes an approximate reciprocal of each element. - go: ApproximateReciprocalOfSqrt commutative: false extension: "AVX.*" documentation: !string |- // NAME computes an approximate reciprocal of the square root of each element. -- go: ApproximateReciprocalOfSqrtMasked - commutative: false - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME computes an approximate reciprocal of the square root of each element. -- go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated. +- go: MulByPowOf2 commutative: false - masked: true extension: "AVX.*" documentation: !string |- // NAME multiplies elements by a power of 2. @@ -268,18 +146,16 @@ constImm: 0 documentation: !string |- // NAME rounds elements to the nearest integer. -- go: RoundWithPrecisionMasked +- go: RoundWithPrecision commutative: false extension: "AVX.*" constImm: 0 - masked: true documentation: !string |- // NAME rounds elements with specified precision. -- go: DiffWithRoundWithPrecisionMasked +- go: DiffWithRoundWithPrecision commutative: false extension: "AVX.*" constImm: 0 - masked: true documentation: !string |- // NAME computes the difference after rounding with specified precision. - go: Floor @@ -288,18 +164,16 @@ constImm: 1 documentation: !string |- // NAME rounds elements down to the nearest integer. -- go: FloorWithPrecisionMasked +- go: FloorWithPrecision commutative: false extension: "AVX.*" constImm: 1 - masked: true documentation: !string |- // NAME rounds elements down with specified precision. -- go: DiffWithFloorWithPrecisionMasked +- go: DiffWithFloorWithPrecision commutative: false extension: "AVX.*" constImm: 1 - masked: true documentation: !string |- // NAME computes the difference after flooring with specified precision. - go: Ceil @@ -308,18 +182,16 @@ constImm: 2 documentation: !string |- // NAME rounds elements up to the nearest integer. -- go: CeilWithPrecisionMasked +- go: CeilWithPrecision commutative: false extension: "AVX.*" constImm: 2 - masked: true documentation: !string |- // NAME rounds elements up with specified precision. -- go: DiffWithCeilWithPrecisionMasked +- go: DiffWithCeilWithPrecision commutative: false extension: "AVX.*" constImm: 2 - masked: true documentation: !string |- // NAME computes the difference after ceiling with specified precision. - go: Trunc @@ -328,18 +200,16 @@ constImm: 3 documentation: !string |- // NAME truncates elements towards zero. -- go: TruncWithPrecisionMasked +- go: TruncWithPrecision commutative: false extension: "AVX.*" constImm: 3 - masked: true documentation: !string |- // NAME truncates elements with specified precision. -- go: DiffWithTruncWithPrecisionMasked +- go: DiffWithTruncWithPrecision commutative: false extension: "AVX.*" constImm: 3 - masked: true documentation: !string |- // NAME computes the difference after truncating with specified precision. - go: AddSub @@ -347,8 +217,7 @@ extension: "AVX.*" documentation: !string |- // NAME subtracts even elements and adds odd elements of two vectors. -- go: GaloisFieldAffineTransformMasked - masked: true +- go: GaloisFieldAffineTransform commutative: false extension: "AVX.*" documentation: !string |- @@ -356,8 +225,7 @@ // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. -- go: GaloisFieldAffineTransformInverseMasked - masked: true +- go: GaloisFieldAffineTransformInverse commutative: false extension: "AVX.*" documentation: !string |- @@ -366,8 +234,7 @@ // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. -- go: GaloisFieldMulMasked - masked: true +- go: GaloisFieldMul commutative: false extension: "AVX.*" documentation: !string |- @@ -378,24 +245,12 @@ extension: "AVX.*" # VPAVGB/W are available across various AVX versions documentation: !string |- // NAME computes the rounded average of corresponding elements. -- go: AverageMasked - commutative: true - masked: true - extension: "AVX512.*" # Masked operations are typically AVX512 - documentation: !string |- - // NAME computes the rounded average of corresponding elements. - go: Absolute commutative: false # Unary operation, not commutative extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 documentation: !string |- // NAME computes the absolute value of each element. -- go: AbsoluteMasked - commutative: false - masked: true - extension: "AVX512.*" - documentation: !string |- - // NAME computes the absolute value of each element. - go: Sign # Applies sign of second operand to first: sign(val, sign_src) commutative: false @@ -404,10 +259,9 @@ // NAME returns the product of the first operand with -1, 0, or 1, // whichever constant is nearest to the value of the second operand. # Sign does not have masked version -- go: PopCountMasked +- go: PopCount commutative: false - masked: true - extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) + extension: "AVX512.*" documentation: !string |- // NAME counts the number of set bits in each element. - go: PairDotProd @@ -416,13 +270,6 @@ documentation: !string |- // NAME multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. -- go: PairDotProdMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME multiplies the elements and add the pairs together, - // yielding a vector of half as many elements with twice the input element size. # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. - go: SaturatedUnsignedSignedPairDotProd commutative: false @@ -430,13 +277,6 @@ documentation: !string |- // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. -- go: SaturatedUnsignedSignedPairDotProdMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME multiplies the elements and add the pairs together with saturation, - // yielding a vector of half as many elements with twice the input element size. # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. - go: DotProdBroadcast commutative: true @@ -448,59 +288,32 @@ extension: "AVX.*" documentation: !string |- // NAME performs dot products on groups of 4 elements of x and y and then adds z. -- go: UnsignedSignedQuadDotProdAccumulateMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. -- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: PairDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- // NAME performs dot products on pairs of elements of x and y and then adds z. -- go: PairDotProdAccumulateMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- // NAME performs dot products on pairs of elements of x and y and then adds z. -- go: SaturatedPairDotProdAccumulateMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs dot products on pairs of elements of x and y and then adds z. -- go: FusedMultiplyAddMasked - masked: true +- go: FusedMultiplyAdd commutative: false extension: "AVX.*" documentation: !string |- // NAME performs (x * y) + z. -- go: FusedMultiplyAddSubMasked - masked: true +- go: FusedMultiplyAddSub commutative: false extension: "AVX.*" documentation: !string |- // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. -- go: FusedMultiplySubAddMasked - masked: true +- go: FusedMultiplySubAdd commutative: false extension: "AVX.*" documentation: !string |- @@ -510,23 +323,11 @@ extension: "AVX.*" documentation: !string |- // NAME computes the maximum of corresponding elements. -- go: MaxMasked - commutative: true - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME computes the maximum of corresponding elements. - go: Min commutative: true extension: "AVX.*" documentation: !string |- // NAME computes the minimum of corresponding elements. -- go: MinMasked - commutative: true - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME computes the minimum of corresponding elements. - go: SetElem commutative: false extension: "AVX.*" @@ -554,17 +355,8 @@ // NAME performs a full permutation of vector x using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. -- go: PermuteMasked +- go: Permute2 # Permute2 is only available on or after AVX512 commutative: false - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a full permutation of vector y using indices: - // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} - // Only the needed bits to represent x's index are used in indices' elements. -- go: Permute2Masked # Permute2Masked is only available on or after AVX512 - commutative: false - masked: true extension: "AVX.*" documentation: !string |- // NAME performs a full permutation of vector x, y using indices: @@ -573,7 +365,6 @@ // Only the needed bits to represent xy's index are used in indices' elements. - go: Compress commutative: false - # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked" extension: "AVX.*" documentation: !string |- // NAME performs a compression on vector x using mask by @@ -599,44 +390,12 @@ extension: "AVX.*" documentation: !string |- // NAME multiplies elements and stores the low part of the result. -- go: MulMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies corresponding elements of two vectors. -- go: MulEvenWidenMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies even-indexed elements, widening the result. - // Result[i] = v1.Even[i] * v2.Even[i]. -- go: MulHighMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies elements and stores the high part of the result. -- go: MulLowMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies elements and stores the low part of the result. - go: ShiftAllLeft nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -- go: ShiftAllLeftMasked - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight signed: false nameAndSizeCheck: true @@ -644,14 +403,6 @@ extension: "AVX.*" documentation: !string |- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -- go: ShiftAllRightMasked - signed: false - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRight signed: true nameAndSizeCheck: true @@ -659,27 +410,12 @@ extension: "AVX.*" documentation: !string |- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -- go: ShiftAllRightMasked - signed: true - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftLeft nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -- go: ShiftLeftMasked - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight signed: false nameAndSizeCheck: true @@ -687,14 +423,6 @@ extension: "AVX.*" documentation: !string |- // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -- go: ShiftRightMasked - signed: false - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRight signed: true nameAndSizeCheck: true @@ -702,69 +430,53 @@ extension: "AVX.*" documentation: !string |- // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: ShiftRightMasked - signed: true - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: RotateAllLeftMasked +- go: RotateAllLeft nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME rotates each element to the left by the number of bits specified by the immediate. -- go: RotateLeftMasked +- go: RotateLeft nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements. -- go: RotateAllRightMasked +- go: RotateAllRight nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME rotates each element to the right by the number of bits specified by the immediate. -- go: RotateRightMasked +- go: RotateRight nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements. -- go: ShiftAllLeftAndFillUpperFromMasked +- go: ShiftAllLeftAndFillUpperFrom nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -- go: ShiftAllRightAndFillUpperFromMasked +- go: ShiftAllRightAndFillUpperFrom nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -- go: ShiftLeftAndFillUpperFromMasked +- go: ShiftLeftAndFillUpperFrom nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -- go: ShiftRightAndFillUpperFromMasked +- go: ShiftRightAndFillUpperFrom nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index be0a945d..f1cfcfe9 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -549,45 +549,6 @@ func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation) return } -// splitMask splits operations with a single mask vreg input to be masked and unmasked(const: K0). -// It also remove the "Masked" keyword from the name. -func splitMask(ops []Operation) ([]Operation, error) { - splited := []Operation{} - for _, op := range ops { - splited = append(splited, op) - if op.Masked == nil || !*op.Masked { - continue - } - shapeIn, _, _, _, _ := op.shape() - - if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn { - op2 := op - // The ops should be sorted when calling this function, the mask is in the end, drop the mask - op2.In = slices.Clone(op.In)[:len(op.In)-1] - if !strings.HasSuffix(op2.Go, "Masked") { - return nil, fmt.Errorf("simdgen only recognizes masked operations with name ending with 'Masked': %s", op) - } - maskedOpName := op2.Go - op2.Go = strings.TrimSuffix(op2.Go, "Masked") - op2Doc := strings.ReplaceAll(op2.Documentation, maskedOpName, op2.Go) - op2.Documentation = op2Doc - op2.Masked = nil // It's no longer masked. - splited = append(splited, op2) - } else { - return nil, fmt.Errorf("simdgen only recognizes masked operations with exactly one mask input: %s", op) - } - } - return splited, nil -} - -func insertMaskDescToDoc(ops []Operation) { - for i, _ := range ops { - if ops[i].Masked != nil && *ops[i].Masked { - ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask." - } - } -} - func genericName(op Operation) string { if op.OperandOrder != nil { switch *op.OperandOrder { diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 8ef04b8e..c58d692e 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -8,14 +8,6 @@ - *any out: - *any -- go: AddMasked - asm: "VPADD[BWDQ]|VADDP[SD]" - in: - - class: mask - - *any - - *any - out: - - *any # Saturated Add - go: SaturatedAdd asm: "VPADDS[BWDQ]" @@ -35,22 +27,6 @@ - *uint out: - *uint -- go: SaturatedAddMasked - asm: "VPADDS[BWDQ]" - in: - - class: mask - - *int - - *int - out: - - *int -- go: SaturatedAddMasked - asm: "VPADDS[BWDQ]" - in: - - class: mask - - *uint - - *uint - out: - - *uint # Sub - go: Sub @@ -60,14 +36,6 @@ - *any out: &1any - *any -- go: SubMasked - asm: "VPSUB[BWDQ]|VSUBP[SD]" - in: - - class: mask - - *any - - *any - out: - - *any # Saturated Sub - go: SaturatedSub asm: "VPSUBS[BWDQ]" @@ -83,22 +51,6 @@ - *uint out: - *uint -- go: SaturatedSubMasked - asm: "VPSUBS[BWDQ]" - in: - - class: mask - - *int - - *int - out: - - *int -- go: SaturatedSubMasked - asm: "VPSUBS[BWDQ]" - in: - - class: mask - - *uint - - *uint - out: - - *uint - go: PairwiseAdd asm: "VPHADD[DW]" in: *2any @@ -128,77 +80,64 @@ # decided that they want FP bit-wise logic operations, but this irregularity # has to be dealed with in separate rules with some overwrites. -# Int/Uint operations. -# Non-masked for 128/256-bit vectors +# For many bit-wise operations, we have the following non-orthogonal +# choices: +# +# - Non-masked AVX operations have no element width (because it +# doesn't matter), but only cover 128 and 256 bit vectors. +# +# - Masked AVX-512 operations have an element width (because it needs +# to know how to interpret the mask), and cover 128, 256, and 512 bit +# vectors. These only cover 32- and 64-bit element widths. +# +# - Non-masked AVX-512 operations still have an element width (because +# they're just the masked operations with an implicit K0 mask) but it +# doesn't matter! This is the only option for non-masked 512 bit +# operations, and we can pick any of the element widths. +# +# We unify with ALL of these operations and the compiler generator +# picks when there are multiple options. + +# TODO: We don't currently generate unmasked bit-wise operations on 512 bit +# vectors of 8- or 16-bit elements. AVX-512 only has *masked* bit-wise +# operations for 32- and 64-bit elements; while the element width doesn't matter +# for unmasked operations, right now we don't realize that we can just use the +# 32- or 64-bit version for the unmasked form. Maybe in the XED decoder we +# should recognize bit-wise operations when generating unmasked versions and +# omit the element width. + # For binary operations, we constrain their two inputs and one output to the -# same Go type using a variable. This will map to instructions before AVX512. +# same Go type using a variable. + - go: And - asm: "VPAND" + asm: "VPAND[DQ]?" in: - &any go: $t - *any out: - *any -# Masked -# Looks like VPAND$xi works only for 2 shapes for integer: -# Dword and Qword. -# TODO: should we wildcard other smaller elemBits to VPANDQ or -# VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations. -- go: AndMasked - asm: "VPAND[DQ]" - in: - - class: mask - - *any - - *any - out: - - *any - go: AndNot - asm: "VPANDN" + asm: "VPANDN[DQ]?" operandOrder: "21" # switch the arg order in: - *any - *any out: - *any -- go: AndNotMasked - asm: "VPANDN[DQ]" - operandOrder: "21" - in: - - class: mask - - *any - - *any - out: - - *any - go: Or - asm: "VPOR" + asm: "VPOR[DQ]?" in: - *any - *any out: - *any -- go: OrMasked - asm: "VPOR[DQ]" - in: - - class: mask - - *any - - *any - out: - - *any - go: Xor - asm: "VPXOR" - in: - - *any - - *any - out: - - *any -- go: XorMasked - asm: "VPXOR[DQ]" + asm: "VPXOR[DQ]?" in: - - class: mask - *any - *any out: @@ -240,38 +179,35 @@ overwriteElementBits: 64 overwriteClass: mask overwriteBase: int -- go: EqualMasked +# AVX-512 compares produce masks. +- go: Equal asm: "V?PCMPEQ[BWDQ]" in: - - class: mask - *any - *any out: - class: mask -- go: GreaterMasked +- go: Greater asm: "V?PCMPGT[BWDQ]" in: - - class: mask - *int - *int out: - class: mask # The const imm predicated compares after AVX512, please see categories.yaml # for const imm specification. -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) asm: "VPCMP[BWDQ]" in: - - class: mask - *int - *int - class: immediate const: 0 # Just a placeholder, will be overwritten by const imm porting. out: - class: mask -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) asm: "VPCMPU[BWDQ]" in: - - class: mask - &uint go: $t base: uint @@ -295,10 +231,9 @@ - go: $t overwriteBase: int overwriteClass: mask -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) asm: "VCMPP[SD]" in: - - class: mask - *float - *float - class: immediate @@ -314,38 +249,22 @@ - *fp out: &1fp - *fp -- go: DivMasked - asm: "V?DIVP[SD]" - in: &1mask2fp - - class: mask - - *fp - - *fp - out: *1fp - go: Sqrt asm: "V?SQRTP[SD]" in: *1fp out: *1fp -- go: SqrtMasked - asm: "V?SQRTP[SD]" - in: &1mask1fp - - class: mask - - *fp - out: *1fp -- go: ApproximateReciprocalMasked - asm: "VRCP14P[SD]" - in: *1mask1fp +# TODO: Provide separate methods for 12-bit precision and 14-bit precision? +- go: ApproximateReciprocal + asm: "VRCP(14)?P[SD]" + in: *1fp out: *1fp - go: ApproximateReciprocalOfSqrt - asm: "V?RSQRTPS" + asm: "V?RSQRT(14)?P[SD]" in: *1fp out: *1fp -- go: ApproximateReciprocalOfSqrtMasked - asm: "VRSQRT14P[SD]" - in: *1mask1fp - out: *1fp -- go: MulByPowOf2Masked +- go: MulByPowOf2 asm: "VSCALEFP[SD]" - in: *1mask2fp + in: *2fp out: *1fp - go: "Round|Ceil|Floor|Trunc" @@ -356,20 +275,18 @@ const: 0 # place holder out: *1fp -- go: "(Round|Ceil|Floor|Trunc)WithPrecisionMasked" +- go: "(Round|Ceil|Floor|Trunc)WithPrecision" asm: "VRNDSCALEP[SD]" in: - - class: mask - *fp - class: immediate const: 0 # place holder immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). name: prec out: *1fp -- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked" +- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecision" asm: "VREDUCEP[SD]" in: - - class: mask - *fp - class: immediate const: 0 # place holder @@ -384,12 +301,10 @@ - *fp out: - *fp -- go: GaloisFieldAffineTransformMasked +- go: GaloisFieldAffineTransform asm: VGF2P8AFFINEQB operandOrder: 2I # 2nd operand, then immediate in: &AffineArgs - - class: mask - name: m - &uint8 go: $t base: uint @@ -403,17 +318,16 @@ out: - *uint8 -- go: GaloisFieldAffineTransformInverseMasked +- go: GaloisFieldAffineTransformInverse asm: VGF2P8AFFINEINVQB operandOrder: 2I # 2nd operand, then immediate in: *AffineArgs out: - *uint8 -- go: GaloisFieldMulMasked +- go: GaloisFieldMul asm: VGF2P8MULB in: - - class: mask - *uint8 - *uint8 out: @@ -429,14 +343,6 @@ - *uint_t out: - *uint_t -- go: AverageMasked - asm: "VPAVG[BW]" - in: - - class: mask - - *uint_t - - *uint_t - out: - - *uint_t # Absolute Value (signed byte, word, dword, qword) # Instructions: VPABSB, VPABSW, VPABSD, VPABSQ @@ -448,13 +354,6 @@ base: int out: - *int_t # Output is magnitude, fits in the same signed type -- go: AbsoluteMasked - asm: "VPABS[BWDQ]" - in: - - class: mask - - *int_t - out: - - *int_t # Sign Operation (signed byte, word, dword) # Applies sign of second operand to the first. @@ -470,10 +369,9 @@ # Population Count (count set bits in each element) # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG) # VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ) -- go: PopCountMasked +- go: PopCount asm: "VPOPCNT[BWDQ]" in: - - class: mask - &any go: $t out: @@ -489,37 +387,19 @@ - &int2 # The elemBits are different go: $t2 base: int -- go: PairDotProdMasked - asm: VPMADDWD - in: - - class: mask - - *int - - *int - out: - - *int2 - go: SaturatedUnsignedSignedPairDotProd asm: VPMADDUBSW in: - &uint go: $t base: uint + overwriteElementBits: 8 - &int3 go: $t3 base: int - out: - - *int2 -- go: SaturatedUnsignedSignedPairDotProdMasked - asm: VPMADDUBSW - in: - - class: mask - - go: $t1 - base: uint - overwriteElementBits: 8 - - go: $t2 - base: int overwriteElementBits: 8 out: - - *int3 + - *int2 - go: DotProdBroadcast asm: VDPP[SD] in: @@ -548,16 +428,6 @@ overwriteElementBits: 8 out: - *qdpa_acc -- go: UnsignedSignedQuadDotProdAccumulateMasked - asm: "VPDPBUSD" - operandOrder: "31" # switch operand 3 and 1 - in: - - *qdpa_acc - - class: mask - - *qdpa_src1 - - *qdpa_src2 - out: - - *qdpa_acc - go: SaturatedUnsignedSignedQuadDotProdAccumulate asm: "VPDPBUSDS" operandOrder: "31" # switch operand 3 and 1 @@ -567,16 +437,6 @@ - *qdpa_src2 out: - *qdpa_acc -- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked - asm: "VPDPBUSDS" - operandOrder: "31" # switch operand 3 and 1 - in: - - *qdpa_acc - - class: mask - - *qdpa_src1 - - *qdpa_src2 - out: - - *qdpa_acc - go: PairDotProdAccumulate asm: "VPDPWSSD" operandOrder: "31" # switch operand 3 and 1 @@ -595,16 +455,6 @@ overwriteElementBits: 16 out: - *pdpa_acc -- go: PairDotProdAccumulateMasked - asm: "VPDPWSSD" - operandOrder: "31" # switch operand 3 and 1 - in: - - *pdpa_acc - - class: mask - - *pdpa_src1 - - *pdpa_src2 - out: - - *pdpa_acc - go: SaturatedPairDotProdAccumulate asm: "VPDPWSSDS" operandOrder: "31" # switch operand 3 and 1 @@ -614,41 +464,28 @@ - *pdpa_src2 out: - *pdpa_acc -- go: SaturatedPairDotProdAccumulateMasked - asm: "VPDPWSSDS" - operandOrder: "31" # switch operand 3 and 1 - in: - - *pdpa_acc - - class: mask - - *pdpa_src1 - - *pdpa_src2 - out: - - *pdpa_acc -- go: FusedMultiplyAddMasked +- go: FusedMultiplyAdd asm: "VFMADD213PS|VFMADD213PD" in: - &fma_op go: $t base: float - - class: mask - *fma_op - *fma_op out: - *fma_op -- go: FusedMultiplyAddSubMasked +- go: FusedMultiplyAddSub asm: "VFMADDSUB213PS|VFMADDSUB213PD" in: - *fma_op - - class: mask - *fma_op - *fma_op out: - *fma_op -- go: FusedMultiplySubAddMasked +- go: FusedMultiplySubAdd asm: "VFMSUBADD213PS|VFMSUBADD213PD" in: - *fma_op - - class: mask - *fma_op - *fma_op out: @@ -671,20 +508,6 @@ - *uint out: &1uint - *uint -- go: MaxMasked - asm: "V?PMAXS[BWDQ]" - in: &1mask2int - - class: mask - - *int - - *int - out: *1int -- go: MaxMasked - asm: "V?PMAXU[BWDQ]" - in: &1mask2uint - - class: mask - - *uint - - *uint - out: *1uint - go: Min asm: "V?PMINS[BWDQ]" @@ -694,14 +517,6 @@ asm: "V?PMINU[BWDQ]" in: *2uint out: *1uint -- go: MinMasked - asm: "V?PMINS[BWDQ]" - in: *1mask2int - out: *1int -- go: MinMasked - asm: "V?PMINU[BWDQ]" - in: *1mask2uint - out: *1uint - go: Max asm: "V?MAXP[SD]" @@ -712,21 +527,10 @@ - *float out: &1float - *float -- go: MaxMasked - asm: "V?MAXP[SD]" - in: &1mask2float - - class: mask - - *float - - *float - out: *1float - go: Min asm: "V?MINP[SD]" in: *2float out: *1float -- go: MinMasked - asm: "V?MINP[SD]" - in: *1mask2float - out: *1float - go: SetElem asm: "VPINSR[BWDQ]" in: @@ -920,17 +724,7 @@ out: - *any -- go: PermuteMasked - asm: "VPERM[BWDQ]|VPERMP[SD]" - operandOrder: "21Type1" - in: - - class: mask - - *anyindices - - *any - out: - - *any - -- go: Permute2Masked +- go: Permute2 asm: "VPERMI2[BWDQ]|VPERMI2P[SD]" # Because we are overwriting the receiver's type, we # have to move the receiver to be a parameter so that @@ -938,7 +732,6 @@ operandOrder: "231Type1" in: - *anyindices # result in arg 0 - - class: mask - *any - *any out: @@ -947,6 +740,7 @@ - go: Compress asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]" in: + # The mask in Compress is a control mask rather than a write mask, so it's not optional. - class: mask - *any out: @@ -962,14 +756,6 @@ - *fp out: - *fp -- go: MulMasked - asm: "VMULP[SD]" - in: - - class: mask - - *fp - - *fp - out: - - *fp # Integer multiplications. @@ -997,26 +783,9 @@ - &uint2 go: $t2 base: uint -- go: MulEvenWidenMasked - asm: "VPMULDQ" - in: - - class: mask - - *int - - *int - out: - - *int2 -- go: MulEvenWidenMasked - asm: "VPMULUDQ" - in: - - class: mask - - *uint - - *uint - out: - - *uint2 # MulHigh # Word only. -# Non-masked - go: MulHigh asm: "VPMULHW" in: @@ -1031,26 +800,9 @@ - *uint out: - *uint2 -- go: MulHighMasked - asm: "VPMULHW" - in: - - class: mask - - *int - - *int - out: - - *int2 -- go: MulHighMasked - asm: "VPMULHUW" - in: - - class: mask - - *uint - - *uint - out: - - *uint2 # MulLow # Signed int only. -# Non-masked - go: MulLow asm: "VPMULL[WDQ]" in: @@ -1058,14 +810,6 @@ - *int out: - *int2 -- go: MulLowMasked - asm: "VPMULL[WDQ]" - in: - - class: mask - - *int - - *int - out: - - *int2 # Integers # ShiftAll* - go: ShiftAllLeft @@ -1078,14 +822,6 @@ treatLikeAScalarOfSize: 64 out: - *any -- go: ShiftAllLeftMasked - asm: "VPSLL[WDQ]" - in: - - class: mask - - *any - - *vecAsScalar64 - out: - - *any - go: ShiftAllRight signed: false asm: "VPSRL[WDQ]" @@ -1096,15 +832,6 @@ - *vecAsScalar64 out: - *uint -- go: ShiftAllRightMasked - signed: false - asm: "VPSRL[WDQ]" - in: - - class: mask - - *uint - - *vecAsScalar64 - out: - - *uint - go: ShiftAllRight signed: true asm: "VPSRA[WDQ]" @@ -1115,15 +842,6 @@ - *vecAsScalar64 out: - *int -- go: ShiftAllRightMasked - signed: true - asm: "VPSRA[WDQ]" - in: - - class: mask - - *int - - *vecAsScalar64 - out: - - *int # Shift* (variable) - go: ShiftLeft @@ -1133,14 +851,6 @@ - *any out: - *any -- go: ShiftLeftMasked - asm: "VPSLLV[WD]" - in: - - class: mask - - *any - - *any - out: - - *any # XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite # it to 64. - go: ShiftLeft @@ -1152,14 +862,6 @@ - *anyOverwriteElemBits out: - *anyOverwriteElemBits -- go: ShiftLeftMasked - asm: "VPSLLVQ" - in: - - class: mask - - *anyOverwriteElemBits - - *anyOverwriteElemBits - out: - - *anyOverwriteElemBits - go: ShiftRight signed: false asm: "VPSRLV[WD]" @@ -1168,15 +870,6 @@ - *uint out: - *uint -- go: ShiftRightMasked - signed: false - asm: "VPSRLV[WD]" - in: - - class: mask - - *uint - - *uint - out: - - *uint # XED data of VPSRLVQ needs the same overwrite as VPSLLVQ. - go: ShiftRight signed: false @@ -1189,15 +882,6 @@ - *uintOverwriteElemBits out: - *uintOverwriteElemBits -- go: ShiftRightMasked - signed: false - asm: "VPSRLVQ" - in: - - class: mask - - *uintOverwriteElemBits - - *uintOverwriteElemBits - out: - - *uintOverwriteElemBits - go: ShiftRight signed: true asm: "VPSRAV[WDQ]" @@ -1206,21 +890,11 @@ - *int out: - *int -- go: ShiftRightMasked - signed: true - asm: "VPSRAV[WDQ]" - in: - - class: mask - - *int - - *int - out: - - *int # Rotate -- go: RotateAllLeftMasked +- go: RotateAllLeft asm: "VPROL[DQ]" in: - - class: mask - *any - &pureImm class: immediate @@ -1228,64 +902,57 @@ name: shift out: - *any -- go: RotateAllRightMasked +- go: RotateAllRight asm: "VPROR[DQ]" in: - - class: mask - *any - *pureImm out: - *any -- go: RotateLeftMasked +- go: RotateLeft asm: "VPROLV[DQ]" in: - - class: mask - *any - *any out: - *any -- go: RotateRightMasked +- go: RotateRight asm: "VPRORV[DQ]" in: - - class: mask - *any - *any out: - *any # Bizzare shifts. -- go: ShiftAllLeftAndFillUpperFromMasked +- go: ShiftAllLeftAndFillUpperFrom asm: "VPSHLD[WDQ]" in: - - class: mask - *any - *any - *pureImm out: - *any -- go: ShiftAllRightAndFillUpperFromMasked +- go: ShiftAllRightAndFillUpperFrom asm: "VPSHRD[WDQ]" in: - - class: mask - *any - *any - *pureImm out: - *any -- go: ShiftLeftAndFillUpperFromMasked +- go: ShiftLeftAndFillUpperFrom asm: "VPSHLDV[WDQ]" in: - *any - - class: mask - *any - *any out: - *any -- go: ShiftRightAndFillUpperFromMasked +- go: ShiftRightAndFillUpperFrom asm: "VPSHRDV[WDQ]" in: - *any - - class: mask - *any - *any out: diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index b45c249f..741214bb 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -18,27 +18,38 @@ import ( type Operation struct { rawOperation + // Go is the Go method name of this operation. + // + // It is derived from the raw Go method name by adding optional suffixes. + // Currently, "Masked" is the only suffix. + Go string + // Documentation is the doc string for this API. // // It is computed from the raw documentation: // // - "NAME" is replaced by the Go method name. // - // - For masked operation, the method name is updated and a sentence about - // masking is added. + // - For masked operation, a sentence about masking is added. Documentation string + + // In is the sequence of parameters to the Go method. + // + // For masked operations, this will have the mask operand appended. + In []Operand } // rawOperation is the unifier representation of an [Operation]. It is // translated into a more parsed form after unifier decoding. type rawOperation struct { - Go string // Go method name + Go string // Base Go method name GoArch string // GOARCH for this definition Asm string // Assembly mnemonic OperandOrder *string // optional Operand order for better Go declarations - In []Operand // Arguments + In []Operand // Parameters + InVariant []Operand // Optional parameters Out []Operand // Results Commutative bool // Commutativity Extension string // Extension @@ -49,9 +60,6 @@ type rawOperation struct { // ConstMask is a hack to reduce the size of defs the user writes for const-immediate // If present, it will be copied to [In[0].Const]. ConstImm *string - // Masked indicates that this is a masked operation, this field has to be set for masked operations - // otherwise simdgen won't recognize it in [splitMask]. - Masked *bool // NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits. NameAndSizeCheck *bool } @@ -61,6 +69,21 @@ func (o *Operation) DecodeUnified(v *unify.Value) error { return err } + isMasked := false + if len(o.InVariant) == 0 { + // No variant + } else if len(o.InVariant) == 1 && o.InVariant[0].Class == "mask" { + isMasked = true + } else { + return fmt.Errorf("unknown inVariant") + } + + // Compute full Go method name. + o.Go = o.rawOperation.Go + if isMasked { + o.Go += "Masked" + } + // Compute doc string. if o.rawOperation.Documentation != nil { o.Documentation = *o.rawOperation.Documentation @@ -68,6 +91,11 @@ func (o *Operation) DecodeUnified(v *unify.Value) error { o.Documentation = "// UNDOCUMENTED" } o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go) + if isMasked { + o.Documentation += "\n//\n// This operation is applied selectively under a write mask." + } + + o.In = append(o.rawOperation.In, o.rawOperation.InVariant...) return nil } @@ -296,12 +324,6 @@ func writeGoDefs(path string, cl unify.Closure) error { if *Verbose { log.Printf("dedup len: %d\n", len(deduped)) } - if !*FlagNoSplitMask { - if deduped, err = splitMask(deduped); err != nil { - return err - } - } - insertMaskDescToDoc(deduped) if *Verbose { log.Printf("dedup len: %d\n", len(deduped)) } diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index 2b0e65f7..6ac22a68 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -107,7 +107,6 @@ var ( flagO = flag.String("o", "yaml", "output type: yaml, godefs (generate definitions into a Go source tree") flagGoDefRoot = flag.String("goroot", ".", "the path to the Go dev directory that will receive the generated files") FlagNoDedup = flag.Bool("nodedup", false, "disable deduplicating godefs of 2 qualifying operations from different extensions") - FlagNoSplitMask = flag.Bool("nosplitmask", false, "disable splitting the masks to const and non const") FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand") FlagArch = flag.String("arch", "amd64", "the target architecture") diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml index 667508b5..2ffd1e23 100644 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -9,18 +9,6 @@ extension: "AVX.*" documentation: !string |- // NAME adds corresponding elements of two vectors with saturation. -- go: AddMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME adds corresponding elements of two vectors. -- go: SaturatedAddMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME adds corresponding elements of two vectors with saturation. - go: Sub commutative: false extension: "AVX.*" @@ -31,18 +19,6 @@ extension: "AVX.*" documentation: !string |- // NAME subtracts corresponding elements of two vectors with saturation. -- go: SubMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME subtracts corresponding elements of two vectors. -- go: SaturatedSubMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME subtracts corresponding elements of two vectors with saturation. - go: PairwiseAdd commutative: false extension: "AVX.*" diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml index 793bc489..c952c150 100644 --- a/internal/simdgen/ops/AddSub/go.yaml +++ b/internal/simdgen/ops/AddSub/go.yaml @@ -8,14 +8,6 @@ - *any out: - *any -- go: AddMasked - asm: "VPADD[BWDQ]|VADDP[SD]" - in: - - class: mask - - *any - - *any - out: - - *any # Saturated Add - go: SaturatedAdd asm: "VPADDS[BWDQ]" @@ -35,22 +27,6 @@ - *uint out: - *uint -- go: SaturatedAddMasked - asm: "VPADDS[BWDQ]" - in: - - class: mask - - *int - - *int - out: - - *int -- go: SaturatedAddMasked - asm: "VPADDS[BWDQ]" - in: - - class: mask - - *uint - - *uint - out: - - *uint # Sub - go: Sub @@ -60,14 +36,6 @@ - *any out: &1any - *any -- go: SubMasked - asm: "VPSUB[BWDQ]|VSUBP[SD]" - in: - - class: mask - - *any - - *any - out: - - *any # Saturated Sub - go: SaturatedSub asm: "VPSUBS[BWDQ]" @@ -83,22 +51,6 @@ - *uint out: - *uint -- go: SaturatedSubMasked - asm: "VPSUBS[BWDQ]" - in: - - class: mask - - *int - - *int - out: - - *int -- go: SaturatedSubMasked - asm: "VPSUBS[BWDQ]" - in: - - class: mask - - *uint - - *uint - out: - - *uint - go: PairwiseAdd asm: "VPHADD[DW]" in: *2any diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml index 3d2eda7c..320cfd18 100644 --- a/internal/simdgen/ops/BitwiseLogic/categories.yaml +++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml @@ -4,45 +4,21 @@ extension: "AVX.*" documentation: !string |- // NAME performs a bitwise AND operation between two vectors. -- go: AndMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise AND operation between two vectors. - go: Or commutative: true extension: "AVX.*" documentation: !string |- // NAME performs a bitwise OR operation between two vectors. -- go: OrMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise OR operation between two vectors. - go: AndNot commutative: false extension: "AVX.*" documentation: !string |- // NAME performs a bitwise x &^ y. -- go: AndNotMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise x &^ y. - go: Xor commutative: true extension: "AVX.*" documentation: !string |- // NAME performs a bitwise XOR operation between two vectors. -- go: XorMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise XOR operation between two vectors. # We also have PTEST and VPTERNLOG, those should be hidden from the users # and only appear in rewrite rules. diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml index acc7a51e..0d0f1c8c 100644 --- a/internal/simdgen/ops/BitwiseLogic/go.yaml +++ b/internal/simdgen/ops/BitwiseLogic/go.yaml @@ -4,78 +4,65 @@ # decided that they want FP bit-wise logic operations, but this irregularity # has to be dealed with in separate rules with some overwrites. -# Int/Uint operations. -# Non-masked for 128/256-bit vectors +# For many bit-wise operations, we have the following non-orthogonal +# choices: +# +# - Non-masked AVX operations have no element width (because it +# doesn't matter), but only cover 128 and 256 bit vectors. +# +# - Masked AVX-512 operations have an element width (because it needs +# to know how to interpret the mask), and cover 128, 256, and 512 bit +# vectors. These only cover 32- and 64-bit element widths. +# +# - Non-masked AVX-512 operations still have an element width (because +# they're just the masked operations with an implicit K0 mask) but it +# doesn't matter! This is the only option for non-masked 512 bit +# operations, and we can pick any of the element widths. +# +# We unify with ALL of these operations and the compiler generator +# picks when there are multiple options. + +# TODO: We don't currently generate unmasked bit-wise operations on 512 bit +# vectors of 8- or 16-bit elements. AVX-512 only has *masked* bit-wise +# operations for 32- and 64-bit elements; while the element width doesn't matter +# for unmasked operations, right now we don't realize that we can just use the +# 32- or 64-bit version for the unmasked form. Maybe in the XED decoder we +# should recognize bit-wise operations when generating unmasked versions and +# omit the element width. + # For binary operations, we constrain their two inputs and one output to the -# same Go type using a variable. This will map to instructions before AVX512. +# same Go type using a variable. + - go: And - asm: "VPAND" + asm: "VPAND[DQ]?" in: - &any go: $t - *any out: - *any -# Masked -# Looks like VPAND$xi works only for 2 shapes for integer: -# Dword and Qword. -# TODO: should we wildcard other smaller elemBits to VPANDQ or -# VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations. -- go: AndMasked - asm: "VPAND[DQ]" - in: - - class: mask - - *any - - *any - out: - - *any - go: AndNot - asm: "VPANDN" + asm: "VPANDN[DQ]?" operandOrder: "21" # switch the arg order in: - *any - *any out: - *any -- go: AndNotMasked - asm: "VPANDN[DQ]" - operandOrder: "21" - in: - - class: mask - - *any - - *any - out: - - *any - go: Or - asm: "VPOR" + asm: "VPOR[DQ]?" in: - *any - *any out: - *any -- go: OrMasked - asm: "VPOR[DQ]" - in: - - class: mask - - *any - - *any - out: - - *any - go: Xor - asm: "VPXOR" + asm: "VPXOR[DQ]?" in: - *any - *any out: - *any -- go: XorMasked - asm: "VPXOR[DQ]" - in: - - class: mask - - *any - - *any - out: - - *any \ No newline at end of file diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index e17e45db..e3d990ed 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -48,52 +48,3 @@ extension: "AVX.*" documentation: !string |- // NAME compares for greater than. -- go: EqualMasked - constImm: 0 - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME compares for equality. -- go: LessMasked - constImm: 1 - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for less than. -- go: LessEqualMasked - constImm: 2 - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for less than or equal. -- go: IsNanMasked # For float only. - constImm: 3 - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME checks if elements are NaN. Use as x.IsNan(x). -- go: NotEqualMasked - constImm: 4 - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME compares for inequality. -- go: GreaterEqualMasked - constImm: 13 - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for greater than or equal. -- go: GreaterMasked - constImm: 14 - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for greater than. diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml index c1ea2061..a8e2368f 100644 --- a/internal/simdgen/ops/Compares/go.yaml +++ b/internal/simdgen/ops/Compares/go.yaml @@ -36,38 +36,35 @@ overwriteElementBits: 64 overwriteClass: mask overwriteBase: int -- go: EqualMasked +# AVX-512 compares produce masks. +- go: Equal asm: "V?PCMPEQ[BWDQ]" in: - - class: mask - *any - *any out: - class: mask -- go: GreaterMasked +- go: Greater asm: "V?PCMPGT[BWDQ]" in: - - class: mask - *int - *int out: - class: mask # The const imm predicated compares after AVX512, please see categories.yaml # for const imm specification. -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) asm: "VPCMP[BWDQ]" in: - - class: mask - *int - *int - class: immediate const: 0 # Just a placeholder, will be overwritten by const imm porting. out: - class: mask -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) asm: "VPCMPU[BWDQ]" in: - - class: mask - &uint go: $t base: uint @@ -91,10 +88,9 @@ - go: $t overwriteBase: int overwriteClass: mask -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) asm: "VCMPP[SD]" in: - - class: mask - *float - *float - class: immediate diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index 53292048..0fb727d5 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -4,48 +4,23 @@ extension: "AVX.*" documentation: !string |- // NAME divides elements of two vectors. -- go: DivMasked - commutative: false - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME divides elements of two vectors. - go: Sqrt commutative: false extension: "AVX.*" documentation: !string |- // NAME computes the square root of each element. -- go: SqrtMasked - commutative: false - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME computes the square root of each element. - go: ApproximateReciprocal commutative: false extension: "AVX.*" documentation: !string |- // NAME computes an approximate reciprocal of each element. -- go: ApproximateReciprocalMasked - commutative: false - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME computes an approximate reciprocal of each element. - go: ApproximateReciprocalOfSqrt commutative: false extension: "AVX.*" documentation: !string |- // NAME computes an approximate reciprocal of the square root of each element. -- go: ApproximateReciprocalOfSqrtMasked - commutative: false - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME computes an approximate reciprocal of the square root of each element. -- go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated. +- go: MulByPowOf2 commutative: false - masked: true extension: "AVX.*" documentation: !string |- // NAME multiplies elements by a power of 2. @@ -55,18 +30,16 @@ constImm: 0 documentation: !string |- // NAME rounds elements to the nearest integer. -- go: RoundWithPrecisionMasked +- go: RoundWithPrecision commutative: false extension: "AVX.*" constImm: 0 - masked: true documentation: !string |- // NAME rounds elements with specified precision. -- go: DiffWithRoundWithPrecisionMasked +- go: DiffWithRoundWithPrecision commutative: false extension: "AVX.*" constImm: 0 - masked: true documentation: !string |- // NAME computes the difference after rounding with specified precision. - go: Floor @@ -75,18 +48,16 @@ constImm: 1 documentation: !string |- // NAME rounds elements down to the nearest integer. -- go: FloorWithPrecisionMasked +- go: FloorWithPrecision commutative: false extension: "AVX.*" constImm: 1 - masked: true documentation: !string |- // NAME rounds elements down with specified precision. -- go: DiffWithFloorWithPrecisionMasked +- go: DiffWithFloorWithPrecision commutative: false extension: "AVX.*" constImm: 1 - masked: true documentation: !string |- // NAME computes the difference after flooring with specified precision. - go: Ceil @@ -95,18 +66,16 @@ constImm: 2 documentation: !string |- // NAME rounds elements up to the nearest integer. -- go: CeilWithPrecisionMasked +- go: CeilWithPrecision commutative: false extension: "AVX.*" constImm: 2 - masked: true documentation: !string |- // NAME rounds elements up with specified precision. -- go: DiffWithCeilWithPrecisionMasked +- go: DiffWithCeilWithPrecision commutative: false extension: "AVX.*" constImm: 2 - masked: true documentation: !string |- // NAME computes the difference after ceiling with specified precision. - go: Trunc @@ -115,18 +84,16 @@ constImm: 3 documentation: !string |- // NAME truncates elements towards zero. -- go: TruncWithPrecisionMasked +- go: TruncWithPrecision commutative: false extension: "AVX.*" constImm: 3 - masked: true documentation: !string |- // NAME truncates elements with specified precision. -- go: DiffWithTruncWithPrecisionMasked +- go: DiffWithTruncWithPrecision commutative: false extension: "AVX.*" constImm: 3 - masked: true documentation: !string |- // NAME computes the difference after truncating with specified precision. - go: AddSub diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml index d35610df..71d1cb5f 100644 --- a/internal/simdgen/ops/FPonlyArith/go.yaml +++ b/internal/simdgen/ops/FPonlyArith/go.yaml @@ -8,38 +8,22 @@ - *fp out: &1fp - *fp -- go: DivMasked - asm: "V?DIVP[SD]" - in: &1mask2fp - - class: mask - - *fp - - *fp - out: *1fp - go: Sqrt asm: "V?SQRTP[SD]" in: *1fp out: *1fp -- go: SqrtMasked - asm: "V?SQRTP[SD]" - in: &1mask1fp - - class: mask - - *fp - out: *1fp -- go: ApproximateReciprocalMasked - asm: "VRCP14P[SD]" - in: *1mask1fp +# TODO: Provide separate methods for 12-bit precision and 14-bit precision? +- go: ApproximateReciprocal + asm: "VRCP(14)?P[SD]" + in: *1fp out: *1fp - go: ApproximateReciprocalOfSqrt - asm: "V?RSQRTPS" + asm: "V?RSQRT(14)?P[SD]" in: *1fp out: *1fp -- go: ApproximateReciprocalOfSqrtMasked - asm: "VRSQRT14P[SD]" - in: *1mask1fp - out: *1fp -- go: MulByPowOf2Masked +- go: MulByPowOf2 asm: "VSCALEFP[SD]" - in: *1mask2fp + in: *2fp out: *1fp - go: "Round|Ceil|Floor|Trunc" @@ -50,20 +34,18 @@ const: 0 # place holder out: *1fp -- go: "(Round|Ceil|Floor|Trunc)WithPrecisionMasked" +- go: "(Round|Ceil|Floor|Trunc)WithPrecision" asm: "VRNDSCALEP[SD]" in: - - class: mask - *fp - class: immediate const: 0 # place holder immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). name: prec out: *1fp -- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked" +- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecision" asm: "VREDUCEP[SD]" in: - - class: mask - *fp - class: immediate const: 0 # place holder @@ -77,4 +59,4 @@ - *fp - *fp out: - - *fp \ No newline at end of file + - *fp diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml index 62d8709e..d57b5265 100644 --- a/internal/simdgen/ops/GaloisField/categories.yaml +++ b/internal/simdgen/ops/GaloisField/categories.yaml @@ -1,6 +1,5 @@ !sum -- go: GaloisFieldAffineTransformMasked - masked: true +- go: GaloisFieldAffineTransform commutative: false extension: "AVX.*" documentation: !string |- @@ -8,8 +7,7 @@ // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. -- go: GaloisFieldAffineTransformInverseMasked - masked: true +- go: GaloisFieldAffineTransformInverse commutative: false extension: "AVX.*" documentation: !string |- @@ -18,8 +16,7 @@ // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y // corresponding to a group of 8 elements in x. -- go: GaloisFieldMulMasked - masked: true +- go: GaloisFieldMul commutative: false extension: "AVX.*" documentation: !string |- diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml index 68875d17..e86211cb 100644 --- a/internal/simdgen/ops/GaloisField/go.yaml +++ b/internal/simdgen/ops/GaloisField/go.yaml @@ -1,10 +1,8 @@ !sum -- go: GaloisFieldAffineTransformMasked +- go: GaloisFieldAffineTransform asm: VGF2P8AFFINEQB operandOrder: 2I # 2nd operand, then immediate in: &AffineArgs - - class: mask - name: m - &uint8 go: $t base: uint @@ -18,18 +16,17 @@ out: - *uint8 -- go: GaloisFieldAffineTransformInverseMasked +- go: GaloisFieldAffineTransformInverse asm: VGF2P8AFFINEINVQB operandOrder: 2I # 2nd operand, then immediate in: *AffineArgs out: - *uint8 -- go: GaloisFieldMulMasked +- go: GaloisFieldMul asm: VGF2P8MULB in: - - class: mask - *uint8 - *uint8 out: - - *uint8 \ No newline at end of file + - *uint8 diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml index 76ab14ba..477b1896 100644 --- a/internal/simdgen/ops/IntOnlyArith/categories.yaml +++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml @@ -4,24 +4,12 @@ extension: "AVX.*" # VPAVGB/W are available across various AVX versions documentation: !string |- // NAME computes the rounded average of corresponding elements. -- go: AverageMasked - commutative: true - masked: true - extension: "AVX512.*" # Masked operations are typically AVX512 - documentation: !string |- - // NAME computes the rounded average of corresponding elements. - go: Absolute commutative: false # Unary operation, not commutative extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 documentation: !string |- // NAME computes the absolute value of each element. -- go: AbsoluteMasked - commutative: false - masked: true - extension: "AVX512.*" - documentation: !string |- - // NAME computes the absolute value of each element. - go: Sign # Applies sign of second operand to first: sign(val, sign_src) commutative: false @@ -30,9 +18,8 @@ // NAME returns the product of the first operand with -1, 0, or 1, // whichever constant is nearest to the value of the second operand. # Sign does not have masked version -- go: PopCountMasked +- go: PopCount commutative: false - masked: true - extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ) + extension: "AVX512.*" documentation: !string |- // NAME counts the number of set bits in each element. diff --git a/internal/simdgen/ops/IntOnlyArith/go.yaml b/internal/simdgen/ops/IntOnlyArith/go.yaml index 3ccce6f0..4c73be26 100644 --- a/internal/simdgen/ops/IntOnlyArith/go.yaml +++ b/internal/simdgen/ops/IntOnlyArith/go.yaml @@ -10,14 +10,6 @@ - *uint_t out: - *uint_t -- go: AverageMasked - asm: "VPAVG[BW]" - in: - - class: mask - - *uint_t - - *uint_t - out: - - *uint_t # Absolute Value (signed byte, word, dword, qword) # Instructions: VPABSB, VPABSW, VPABSD, VPABSQ @@ -29,13 +21,6 @@ base: int out: - *int_t # Output is magnitude, fits in the same signed type -- go: AbsoluteMasked - asm: "VPABS[BWDQ]" - in: - - class: mask - - *int_t - out: - - *int_t # Sign Operation (signed byte, word, dword) # Applies sign of second operand to the first. @@ -51,11 +36,10 @@ # Population Count (count set bits in each element) # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG) # VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ) -- go: PopCountMasked +- go: PopCount asm: "VPOPCNT[BWDQ]" in: - - class: mask - &any go: $t out: - - *any \ No newline at end of file + - *any diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 65f7462e..c90942de 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -5,13 +5,6 @@ documentation: !string |- // NAME multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. -- go: PairDotProdMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME multiplies the elements and add the pairs together, - // yielding a vector of half as many elements with twice the input element size. # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. - go: SaturatedUnsignedSignedPairDotProd commutative: false @@ -19,13 +12,6 @@ documentation: !string |- // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. -- go: SaturatedUnsignedSignedPairDotProdMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME multiplies the elements and add the pairs together with saturation, - // yielding a vector of half as many elements with twice the input element size. # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. - go: DotProdBroadcast commutative: true @@ -37,59 +23,32 @@ extension: "AVX.*" documentation: !string |- // NAME performs dot products on groups of 4 elements of x and y and then adds z. -- go: UnsignedSignedQuadDotProdAccumulateMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs dot products on groups of 4 elements of x and y and then adds z. - go: SaturatedUnsignedSignedQuadDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. -- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: PairDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- // NAME performs dot products on pairs of elements of x and y and then adds z. -- go: PairDotProdAccumulateMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs dot products on pairs of elements of x and y and then adds z. - go: SaturatedPairDotProdAccumulate commutative: false extension: "AVX.*" documentation: !string |- // NAME performs dot products on pairs of elements of x and y and then adds z. -- go: SaturatedPairDotProdAccumulateMasked - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs dot products on pairs of elements of x and y and then adds z. -- go: FusedMultiplyAddMasked - masked: true +- go: FusedMultiplyAdd commutative: false extension: "AVX.*" documentation: !string |- // NAME performs (x * y) + z. -- go: FusedMultiplyAddSubMasked - masked: true +- go: FusedMultiplyAddSub commutative: false extension: "AVX.*" documentation: !string |- // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. -- go: FusedMultiplySubAddMasked - masked: true +- go: FusedMultiplySubAdd commutative: false extension: "AVX.*" documentation: !string |- diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml index 76512b1e..2fb3e52f 100644 --- a/internal/simdgen/ops/MLOps/go.yaml +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -10,37 +10,19 @@ - &int2 # The elemBits are different go: $t2 base: int -- go: PairDotProdMasked - asm: VPMADDWD - in: - - class: mask - - *int - - *int - out: - - *int2 - go: SaturatedUnsignedSignedPairDotProd asm: VPMADDUBSW in: - &uint go: $t base: uint + overwriteElementBits: 8 - &int3 go: $t3 base: int - out: - - *int2 -- go: SaturatedUnsignedSignedPairDotProdMasked - asm: VPMADDUBSW - in: - - class: mask - - go: $t1 - base: uint - overwriteElementBits: 8 - - go: $t2 - base: int overwriteElementBits: 8 out: - - *int3 + - *int2 - go: DotProdBroadcast asm: VDPP[SD] in: @@ -69,16 +51,6 @@ overwriteElementBits: 8 out: - *qdpa_acc -- go: UnsignedSignedQuadDotProdAccumulateMasked - asm: "VPDPBUSD" - operandOrder: "31" # switch operand 3 and 1 - in: - - *qdpa_acc - - class: mask - - *qdpa_src1 - - *qdpa_src2 - out: - - *qdpa_acc - go: SaturatedUnsignedSignedQuadDotProdAccumulate asm: "VPDPBUSDS" operandOrder: "31" # switch operand 3 and 1 @@ -88,16 +60,6 @@ - *qdpa_src2 out: - *qdpa_acc -- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked - asm: "VPDPBUSDS" - operandOrder: "31" # switch operand 3 and 1 - in: - - *qdpa_acc - - class: mask - - *qdpa_src1 - - *qdpa_src2 - out: - - *qdpa_acc - go: PairDotProdAccumulate asm: "VPDPWSSD" operandOrder: "31" # switch operand 3 and 1 @@ -116,16 +78,6 @@ overwriteElementBits: 16 out: - *pdpa_acc -- go: PairDotProdAccumulateMasked - asm: "VPDPWSSD" - operandOrder: "31" # switch operand 3 and 1 - in: - - *pdpa_acc - - class: mask - - *pdpa_src1 - - *pdpa_src2 - out: - - *pdpa_acc - go: SaturatedPairDotProdAccumulate asm: "VPDPWSSDS" operandOrder: "31" # switch operand 3 and 1 @@ -135,41 +87,28 @@ - *pdpa_src2 out: - *pdpa_acc -- go: SaturatedPairDotProdAccumulateMasked - asm: "VPDPWSSDS" - operandOrder: "31" # switch operand 3 and 1 - in: - - *pdpa_acc - - class: mask - - *pdpa_src1 - - *pdpa_src2 - out: - - *pdpa_acc -- go: FusedMultiplyAddMasked +- go: FusedMultiplyAdd asm: "VFMADD213PS|VFMADD213PD" in: - &fma_op go: $t base: float - - class: mask - *fma_op - *fma_op out: - *fma_op -- go: FusedMultiplyAddSubMasked +- go: FusedMultiplyAddSub asm: "VFMADDSUB213PS|VFMADDSUB213PD" in: - *fma_op - - class: mask - *fma_op - *fma_op out: - *fma_op -- go: FusedMultiplySubAddMasked +- go: FusedMultiplySubAdd asm: "VFMSUBADD213PS|VFMSUBADD213PD" in: - *fma_op - - class: mask - *fma_op - *fma_op out: diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml index ce87994f..9ac0d3d4 100644 --- a/internal/simdgen/ops/MinMax/categories.yaml +++ b/internal/simdgen/ops/MinMax/categories.yaml @@ -4,20 +4,8 @@ extension: "AVX.*" documentation: !string |- // NAME computes the maximum of corresponding elements. -- go: MaxMasked - commutative: true - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME computes the maximum of corresponding elements. - go: Min commutative: true extension: "AVX.*" documentation: !string |- // NAME computes the minimum of corresponding elements. -- go: MinMasked - commutative: true - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME computes the minimum of corresponding elements. diff --git a/internal/simdgen/ops/MinMax/go.yaml b/internal/simdgen/ops/MinMax/go.yaml index db4286f3..55f1e18b 100644 --- a/internal/simdgen/ops/MinMax/go.yaml +++ b/internal/simdgen/ops/MinMax/go.yaml @@ -17,20 +17,6 @@ - *uint out: &1uint - *uint -- go: MaxMasked - asm: "V?PMAXS[BWDQ]" - in: &1mask2int - - class: mask - - *int - - *int - out: *1int -- go: MaxMasked - asm: "V?PMAXU[BWDQ]" - in: &1mask2uint - - class: mask - - *uint - - *uint - out: *1uint - go: Min asm: "V?PMINS[BWDQ]" @@ -40,14 +26,6 @@ asm: "V?PMINU[BWDQ]" in: *2uint out: *1uint -- go: MinMasked - asm: "V?PMINS[BWDQ]" - in: *1mask2int - out: *1int -- go: MinMasked - asm: "V?PMINU[BWDQ]" - in: *1mask2uint - out: *1uint - go: Max asm: "V?MAXP[SD]" @@ -58,18 +36,7 @@ - *float out: &1float - *float -- go: MaxMasked - asm: "V?MAXP[SD]" - in: &1mask2float - - class: mask - - *float - - *float - out: *1float - go: Min asm: "V?MINP[SD]" in: *2float out: *1float -- go: MinMasked - asm: "V?MINP[SD]" - in: *1mask2float - out: *1float \ No newline at end of file diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index dd30ca8a..a6dd7bab 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -26,17 +26,8 @@ // NAME performs a full permutation of vector x using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. -- go: PermuteMasked +- go: Permute2 # Permute2 is only available on or after AVX512 commutative: false - masked: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a full permutation of vector y using indices: - // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} - // Only the needed bits to represent x's index are used in indices' elements. -- go: Permute2Masked # Permute2Masked is only available on or after AVX512 - commutative: false - masked: true extension: "AVX.*" documentation: !string |- // NAME performs a full permutation of vector x, y using indices: @@ -45,7 +36,6 @@ // Only the needed bits to represent xy's index are used in indices' elements. - go: Compress commutative: false - # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked" extension: "AVX.*" documentation: !string |- // NAME performs a compression on vector x using mask by diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index 44a1c3c3..c1dd6e4d 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -192,17 +192,7 @@ out: - *any -- go: PermuteMasked - asm: "VPERM[BWDQ]|VPERMP[SD]" - operandOrder: "21Type1" - in: - - class: mask - - *anyindices - - *any - out: - - *any - -- go: Permute2Masked +- go: Permute2 asm: "VPERMI2[BWDQ]|VPERMI2P[SD]" # Because we are overwriting the receiver's type, we # have to move the receiver to be a parameter so that @@ -210,7 +200,6 @@ operandOrder: "231Type1" in: - *anyindices # result in arg 0 - - class: mask - *any - *any out: @@ -219,7 +208,8 @@ - go: Compress asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]" in: + # The mask in Compress is a control mask rather than a write mask, so it's not optional. - class: mask - *any out: - - *any \ No newline at end of file + - *any diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml index 8dc51f45..9a9b8328 100644 --- a/internal/simdgen/ops/Mul/categories.yaml +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -20,28 +20,3 @@ extension: "AVX.*" documentation: !string |- // NAME multiplies elements and stores the low part of the result. -- go: MulMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies corresponding elements of two vectors. -- go: MulEvenWidenMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies even-indexed elements, widening the result. - // Result[i] = v1.Even[i] * v2.Even[i]. -- go: MulHighMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies elements and stores the high part of the result. -- go: MulLowMasked - masked: true - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies elements and stores the low part of the result. diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml index 9ae3a850..c9ddeb4a 100644 --- a/internal/simdgen/ops/Mul/go.yaml +++ b/internal/simdgen/ops/Mul/go.yaml @@ -10,14 +10,6 @@ - *fp out: - *fp -- go: MulMasked - asm: "VMULP[SD]" - in: - - class: mask - - *fp - - *fp - out: - - *fp # Integer multiplications. @@ -45,26 +37,9 @@ - &uint2 go: $t2 base: uint -- go: MulEvenWidenMasked - asm: "VPMULDQ" - in: - - class: mask - - *int - - *int - out: - - *int2 -- go: MulEvenWidenMasked - asm: "VPMULUDQ" - in: - - class: mask - - *uint - - *uint - out: - - *uint2 # MulHigh # Word only. -# Non-masked - go: MulHigh asm: "VPMULHW" in: @@ -79,26 +54,9 @@ - *uint out: - *uint2 -- go: MulHighMasked - asm: "VPMULHW" - in: - - class: mask - - *int - - *int - out: - - *int2 -- go: MulHighMasked - asm: "VPMULHUW" - in: - - class: mask - - *uint - - *uint - out: - - *uint2 # MulLow # Signed int only. -# Non-masked - go: MulLow asm: "VPMULL[WDQ]" in: @@ -106,11 +64,3 @@ - *int out: - *int2 -- go: MulLowMasked - asm: "VPMULL[WDQ]" - in: - - class: mask - - *int - - *int - out: - - *int2 \ No newline at end of file diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml index 71e78251..f9a92652 100644 --- a/internal/simdgen/ops/ShiftRotate/categories.yaml +++ b/internal/simdgen/ops/ShiftRotate/categories.yaml @@ -5,13 +5,6 @@ extension: "AVX.*" documentation: !string |- // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -- go: ShiftAllLeftMasked - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight signed: false nameAndSizeCheck: true @@ -19,14 +12,6 @@ extension: "AVX.*" documentation: !string |- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -- go: ShiftAllRightMasked - signed: false - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRight signed: true nameAndSizeCheck: true @@ -34,27 +19,12 @@ extension: "AVX.*" documentation: !string |- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -- go: ShiftAllRightMasked - signed: true - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: ShiftLeft nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -- go: ShiftLeftMasked - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight signed: false nameAndSizeCheck: true @@ -62,14 +32,6 @@ extension: "AVX.*" documentation: !string |- // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -- go: ShiftRightMasked - signed: false - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRight signed: true nameAndSizeCheck: true @@ -77,69 +39,53 @@ extension: "AVX.*" documentation: !string |- // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: ShiftRightMasked - signed: true - nameAndSizeCheck: true - masked: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: RotateAllLeftMasked +- go: RotateAllLeft nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME rotates each element to the left by the number of bits specified by the immediate. -- go: RotateLeftMasked +- go: RotateLeft nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements. -- go: RotateAllRightMasked +- go: RotateAllRight nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME rotates each element to the right by the number of bits specified by the immediate. -- go: RotateRightMasked +- go: RotateRight nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements. -- go: ShiftAllLeftAndFillUpperFromMasked +- go: ShiftAllLeftAndFillUpperFrom nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -- go: ShiftAllRightAndFillUpperFromMasked +- go: ShiftAllRightAndFillUpperFrom nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -- go: ShiftLeftAndFillUpperFromMasked +- go: ShiftLeftAndFillUpperFrom nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -- go: ShiftRightAndFillUpperFromMasked +- go: ShiftRightAndFillUpperFrom nameAndSizeCheck: true - masked: true commutative: false extension: "AVX.*" documentation: !string |- diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml index 637de935..ff4c3156 100644 --- a/internal/simdgen/ops/ShiftRotate/go.yaml +++ b/internal/simdgen/ops/ShiftRotate/go.yaml @@ -11,14 +11,6 @@ treatLikeAScalarOfSize: 64 out: - *any -- go: ShiftAllLeftMasked - asm: "VPSLL[WDQ]" - in: - - class: mask - - *any - - *vecAsScalar64 - out: - - *any - go: ShiftAllRight signed: false asm: "VPSRL[WDQ]" @@ -29,15 +21,6 @@ - *vecAsScalar64 out: - *uint -- go: ShiftAllRightMasked - signed: false - asm: "VPSRL[WDQ]" - in: - - class: mask - - *uint - - *vecAsScalar64 - out: - - *uint - go: ShiftAllRight signed: true asm: "VPSRA[WDQ]" @@ -48,15 +31,6 @@ - *vecAsScalar64 out: - *int -- go: ShiftAllRightMasked - signed: true - asm: "VPSRA[WDQ]" - in: - - class: mask - - *int - - *vecAsScalar64 - out: - - *int # Shift* (variable) - go: ShiftLeft @@ -66,14 +40,6 @@ - *any out: - *any -- go: ShiftLeftMasked - asm: "VPSLLV[WD]" - in: - - class: mask - - *any - - *any - out: - - *any # XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite # it to 64. - go: ShiftLeft @@ -85,14 +51,6 @@ - *anyOverwriteElemBits out: - *anyOverwriteElemBits -- go: ShiftLeftMasked - asm: "VPSLLVQ" - in: - - class: mask - - *anyOverwriteElemBits - - *anyOverwriteElemBits - out: - - *anyOverwriteElemBits - go: ShiftRight signed: false asm: "VPSRLV[WD]" @@ -101,15 +59,6 @@ - *uint out: - *uint -- go: ShiftRightMasked - signed: false - asm: "VPSRLV[WD]" - in: - - class: mask - - *uint - - *uint - out: - - *uint # XED data of VPSRLVQ needs the same overwrite as VPSLLVQ. - go: ShiftRight signed: false @@ -122,15 +71,6 @@ - *uintOverwriteElemBits out: - *uintOverwriteElemBits -- go: ShiftRightMasked - signed: false - asm: "VPSRLVQ" - in: - - class: mask - - *uintOverwriteElemBits - - *uintOverwriteElemBits - out: - - *uintOverwriteElemBits - go: ShiftRight signed: true asm: "VPSRAV[WDQ]" @@ -139,21 +79,11 @@ - *int out: - *int -- go: ShiftRightMasked - signed: true - asm: "VPSRAV[WDQ]" - in: - - class: mask - - *int - - *int - out: - - *int # Rotate -- go: RotateAllLeftMasked +- go: RotateAllLeft asm: "VPROL[DQ]" in: - - class: mask - *any - &pureImm class: immediate @@ -161,65 +91,58 @@ name: shift out: - *any -- go: RotateAllRightMasked +- go: RotateAllRight asm: "VPROR[DQ]" in: - - class: mask - *any - *pureImm out: - *any -- go: RotateLeftMasked +- go: RotateLeft asm: "VPROLV[DQ]" in: - - class: mask - *any - *any out: - *any -- go: RotateRightMasked +- go: RotateRight asm: "VPRORV[DQ]" in: - - class: mask - *any - *any out: - *any # Bizzare shifts. -- go: ShiftAllLeftAndFillUpperFromMasked +- go: ShiftAllLeftAndFillUpperFrom asm: "VPSHLD[WDQ]" in: - - class: mask - *any - *any - *pureImm out: - *any -- go: ShiftAllRightAndFillUpperFromMasked +- go: ShiftAllRightAndFillUpperFrom asm: "VPSHRD[WDQ]" in: - - class: mask - *any - *any - *pureImm out: - *any -- go: ShiftLeftAndFillUpperFromMasked +- go: ShiftLeftAndFillUpperFrom asm: "VPSHLDV[WDQ]" in: - *any - - class: mask - *any - *any out: - *any -- go: ShiftRightAndFillUpperFromMasked +- go: ShiftRightAndFillUpperFrom asm: "VPSHRDV[WDQ]" in: - *any - - class: mask - *any - *any out: - - *any \ No newline at end of file + - *any diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml index 17f5be55..f7a01cb3 100644 --- a/internal/simdgen/types.yaml +++ b/internal/simdgen/types.yaml @@ -37,6 +37,7 @@ in: !repeat - {class: vreg, go: Uint64x8, base: "uint", elemBits: 64, bits: 512, lanes: 8} - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512, lanes: 16} - {class: vreg, go: Float64x8, base: "float", elemBits: 64, bits: 512, lanes: 8} + - {class: mask, go: Mask8x16, base: "int", elemBits: 8, bits: 128, lanes: 16} - {class: mask, go: Mask16x8, base: "int", elemBits: 16, bits: 128, lanes: 8} - {class: mask, go: Mask32x4, base: "int", elemBits: 32, bits: 128, lanes: 4} @@ -83,5 +84,7 @@ in: !repeat - {class: vreg, go: Uint64x4, base: "uint", elemBits: 128, bits: 256, lanes: 4} - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now. +inVariant: !repeat +- *types out: !repeat - *types diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index dddf5395..6a3feb36 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -22,6 +22,18 @@ const ( GREG_CLASS = 2 // classify as a general register ) +// instVariant is a bitmap indicating a variant of an instruction that has +// optional parameters. +type instVariant uint8 + +const ( + instVariantNone instVariant = 0 + + // instVariantMasked indicates that this is the masked variant of an + // optionally-masked instruction. + instVariantMasked instVariant = 1 << iota +) + var operandRemarks int // TODO: Doc. Returns Values with Def domains. @@ -58,12 +70,16 @@ func loadXED(xedPath string) []*unify.Value { return } - uval := instToUVal(inst, ops) - defs = append(defs, uval) + applyQuirks(inst, ops) + + defsPos := len(defs) + defs = append(defs, instToUVal(inst, ops)...) if *flagDebugXED { - y, _ := yaml.Marshal(uval) - fmt.Printf("==>\n%s\n", y) + for i := defsPos; i < len(defs); i++ { + y, _ := yaml.Marshal(defs[i]) + fmt.Printf("==>\n%s\n", y) + } } }) if err != nil { @@ -72,6 +88,35 @@ func loadXED(xedPath string) []*unify.Value { return defs } +var ( + maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]`) + maskOptionalRe = regexp.MustCompile(`VPCMP(EQ|GT|U)?[BWDQ]|VCMPP[SD]`) +) + +func applyQuirks(inst *xeddata.Inst, ops []operand) { + opc := inst.Opcode() + switch { + case maskRequiredRe.MatchString(opc): + // The mask on these instructions is marked optional, but the + // instruction is pointless without the mask. + for i, op := range ops { + if op, ok := op.(operandMask); ok { + op.optional = false + ops[i] = op + } + } + + case maskOptionalRe.MatchString(opc): + // Conversely, these masks should be marked optional and aren't. + for i, op := range ops { + if op, ok := op.(operandMask); ok && op.action.r { + op.optional = true + ops[i] = op + } + } + } +} + type operandCommon struct { action operandAction } @@ -121,6 +166,9 @@ type operandMask struct { // Bits in the mask is w/bits. allMasks bool // If set, size cannot be inferred because all operands are masks. + + // Mask can be omitted, in which case it defaults to K0/"no mask" + optional bool } type operandImm struct { @@ -233,8 +281,12 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) { } else if strings.HasPrefix(lhs, "REG") { if op.Width == "mskw" { // The mask operand doesn't specify a width. We have to infer it. + // + // XED uses the marker ZEROSTR to indicate that a mask operand is + // optional and, if omitted, implies K0, aka "no mask". return operandMask{ operandCommon: common, + optional: op.Attributes["TXT=ZEROSTR"], }, nil } else { class, regBits := decodeReg(op) @@ -397,38 +449,63 @@ func inferMaskSizes(ops []operand) error { return nil } -func operandsToUVals(ops []operand) (in, out unify.Tuple) { - var inVals, outVals []*unify.Value - for asmPos, op := range ops { +// addOperandstoDef adds "in", "inVariant", and "out" to an instruction Def. +// +// Optional mask input operands are added to the inVariant field if +// variant&instVariantMasked, and omitted otherwise. +func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVariant) { + var inVals, inVar, outVals []*unify.Value + asmPos := 0 + for _, op := range ops { var db unify.DefBuilder op.addToDef(&db) - db.Add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos)))) action := op.common().action + asmCount := 1 // # of assembly operands; 0 or 1 if action.r { inVal := unify.NewValue(db.Build()) - inVals = append(inVals, inVal) + // If this is an optional mask, put it in the input variant tuple. + if mask, ok := op.(operandMask); ok && mask.optional { + if variant&instVariantMasked != 0 { + inVar = append(inVar, inVal) + } else { + // This operand doesn't appear in the assembly at all. + asmCount = 0 + } + } else { + // Just a regular input operand. + inVals = append(inVals, inVal) + } } if action.w { outVal := unify.NewValue(db.Build()) outVals = append(outVals, outVal) } + + asmPos += asmCount } - return unify.NewTuple(inVals...), unify.NewTuple(outVals...) + instDB.Add("in", unify.NewValue(unify.NewTuple(inVals...))) + instDB.Add("inVariant", unify.NewValue(unify.NewTuple(inVar...))) + instDB.Add("out", unify.NewValue(unify.NewTuple(outVals...))) } -func instToUVal(inst *xeddata.Inst, ops []operand) *unify.Value { - // Map operands to unifier values. - ins, outs := operandsToUVals(ops) +func instToUVal(inst *xeddata.Inst, ops []operand) []*unify.Value { + var vals []*unify.Value + vals = append(vals, instToUVal1(inst, ops, instVariantNone)) + if hasOptionalMask(ops) { + vals = append(vals, instToUVal1(inst, ops, instVariantMasked)) + } + return vals +} +func instToUVal1(inst *xeddata.Inst, ops []operand, variant instVariant) *unify.Value { // TODO: "feature" var db unify.DefBuilder db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64"))) db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode()))) - db.Add("in", unify.NewValue(ins)) - db.Add("out", unify.NewValue(outs)) + addOperandsToDef(ops, &db, variant) db.Add("extension", unify.NewValue(unify.NewStringExact(inst.Extension))) db.Add("isaset", unify.NewValue(unify.NewStringExact(inst.ISASet))) @@ -454,6 +531,16 @@ func instToUVal(inst *xeddata.Inst, ops []operand) *unify.Value { return unify.NewValuePos(db.Build(), pos) } +// hasOptionalMask returns whether there is an optional mask operand in ops. +func hasOptionalMask(ops []operand) bool { + for _, op := range ops { + if op, ok := op.(operandMask); ok && op.optional { + return true + } + } + return false +} + func singular[T comparable](xs []T) (T, bool) { if len(xs) == 0 { return *new(T), false From f4fa54fbebd1d3dec45e7c293b1c63c751fec9f5 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Wed, 23 Jul 2025 08:40:44 +0000 Subject: [PATCH 158/200] internal/simdgen: support load from bits for mask This CL adds the code generation to store K masks to bits. This will enable more flexible and performant SIMD programming. This CL generates CL 689795. Change-Id: I3fe99fb3dc5073f267c9a3831fde04bb14834d90 Reviewed-on: https://go-review.googlesource.com/c/arch/+/689775 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Reviewed-by: David Chase --- internal/simdgen/gen_simdIntrinsics.go | 1 + internal/simdgen/gen_simdTypes.go | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 355c8d14..7140eda2 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -74,6 +74,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) + addF(simdPackage, "{{.Name}}.StoreToBits", simdStoreMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) {{end}} {{define "footer"}}} diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 50553b4e..35b4a720 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -95,12 +95,20 @@ func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}}) {{- else}} -// {{.Name}}FromBits constructs a {{.Name}} from an a bitmap, where 1 means set for the indexed element, 0 means unset. +// Load{{.Name}}FromBits constructs a {{.Name}} from a bitmap, where 1 means set for the indexed element, 0 means unset. // Only the lower {{.Lanes}} bits of y are used. // +// CPU Features: AVX512 //go:noescape func Load{{.Name}}FromBits(y *uint64) {{.Name}} +// StoreToBits stores a {{.Name}} as a bitmap, where 1 means set for the indexed element, 0 means unset. +// Only the lower {{.Lanes}} bits of y are used. +// +// CPU Features: AVX512 +//go:noescape +func (x {{.Name}}) StoreToBits(y *uint64) + {{end}} {{end}} ` From 2b75d2ffd5e9104cbaab1dc2f5092d710406e626 Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 16 Jul 2025 14:19:04 -0400 Subject: [PATCH 159/200] internal/simdgen: remove automatically generated tests This pairs with Go dev.simd CL 686057 that adds test infrastucture in that repo. Change-Id: I1968933e0ce0a32598c303b310e0efe1e49e12ee Reviewed-on: https://go-review.googlesource.com/c/arch/+/689275 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdTypes.go | 228 ------------------------------ internal/simdgen/godefs.go | 1 - 2 files changed, 229 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 35b4a720..98d2b5a3 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -292,234 +292,6 @@ func (x {{.Name}}) Or(y {{.Name}}) {{.Name}} {{end}} ` -const simdTestsWrapperTmpl = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. - -//go:build goexperiment.simd - -package simd_test - -import ( - "simd" - "testing" -) -{{end}} -{{define "op"}} -func test{{.OpShape}}(t *testing.T, {{.BaseArgDefList}}, want []{{.ResBaseType}}, which string) { - t.Helper() - var gotv simd.{{.ResVecType}} - got := make([]{{.ResBaseType}}, len(want)){{range $i, $a := .ArgVecTypes}} - vec{{$i}} := simd.Load{{$a}}Slice(v{{$i}}){{end}} - switch which { -{{range .Ops}}case "{{.}}": - gotv = vec0.{{.}}({{$.VecArgList}}){{$.OptionalMaskToInt}} -{{end}} - default: - t.Errorf("Unknown method: {{.Arg0VecType}}.%s", which) - } - gotv.StoreSlice(got) - for i := range len(want) { - if got[i] != want[i] { - t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) - } - } -} -{{end}} -{{define "untestedOpHeader"}} -/* The operations below cannot be tested via wrappers, please test them directly */ -{{end}} -{{define "untestedOp"}} -// {{.}}{{end}} -` - -// writeSIMDTestsWrapper generates the test wrappers and writes it to simd_amd64_testwrappers.go -// within the specified directory. -func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer { - t := templateOf(simdTestsWrapperTmpl, "simdTestWrappers") - buffer := new(bytes.Buffer) - - if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil { - panic(fmt.Errorf("failed to execute fileHeader template: %w", err)) - } - - // The comment shows an example of Uint8x64.Add - type opData struct { - OpShape string // "Uint8x64Uint8x64Uint8x64" - BaseArgDefList string // "v0 uint8[], v1 uint8[]" - VecArgList string // "vec1" - ResBaseType string // "uint8" - ResVecType string // "Uint8x64" - Arg0VecType string // "Uint8x64" - ArgVecTypes []string // ["Uint8x64", "Uint8x64"] - OptionalMaskToInt string // ".AsInt8x64()" or "" - Ops []string // ["Add", "Sub"] - } - - opsByShape := make(map[string]opData) - opsSkipped := map[string]struct{}{} -outerLoop: - for _, o := range ops { - _, _, _, immType, gOp := o.shape() - - if immType == VarImm || immType == ConstVarImm { - // Operations with variable immediates should be called directly - // instead of through wrappers. - opsSkipped[o.Go] = struct{}{} - continue - } - if vasIdx, err := checkVecAsScalar(o); err != nil { - panic(err) - } else if vasIdx != -1 { - // TODO: these could be tested via wrappers, implement this. - opsSkipped[o.Go] = struct{}{} - continue - } - if o.OperandOrder != nil { - // We need to check if the customize order change the function signature. - // It is only safe to proceed generating the test wrappers if the function - // signature stays the same. - // Filtering out unqualified cases as a hack now, this test wrapper - // infrastrcuture should be changing soon so it should be fine. - switch *o.OperandOrder { - case "21": - // No op because it's only set in AndNot, and opr[2] and opr[1] has the same shape - default: - opsSkipped[o.Go] = struct{}{} - continue outerLoop - } - } - - var shape string - var baseArgDefList []string - var vecArgList []string - var argVecTypes []string - var vec string - var vecOp Operand - allSameVec := true - masked := strings.HasSuffix(gOp.Go, "Masked") - skippedMaskCnt := 0 - vecCnt := 0 - for i, in := range gOp.In { - baseArgDefList = append(baseArgDefList, fmt.Sprintf("v%d []%s%d", i, *in.Base, *in.ElemBits)) - if i != 0 { - maskConversion := "" - if in.Class == "mask" { - maskConversion = fmt.Sprintf(".As%s()", *in.Go) - } - vecArgList = append(vecArgList, fmt.Sprintf("vec%d%s", i, maskConversion)) - } - // gOp will only have either mask or vreg operand, so the following check - // is sufficient to detect whether it's a pure vreg or masked pure vreg operation - // with all the same vectors. - if in.Class == "mask" { - if masked && skippedMaskCnt == 0 { - skippedMaskCnt++ - } else { - allSameVec = false - } - } else { - if len(vec) > 0 { - if vec != *in.Go { - allSameVec = false - } - } - vecCnt++ - vec = *in.Go - vecOp = in - } - shape += *in.Go - argVecTypes = append(argVecTypes, strings.ReplaceAll(*in.Go, "Mask", "Int")) - } - isCompare := false - isWiden := false - outOp := gOp.Out[0] - if *outOp.Go != vec { - if allSameVec && outOp.Class == "mask" && *outOp.Bits == *vecOp.Bits && *outOp.Lanes == *vecOp.Lanes { - isCompare = true - } - if allSameVec && outOp.Class == "vreg" && *outOp.Bits == *vecOp.Bits && *outOp.Base == *vecOp.Base && *outOp.Lanes == *vecOp.Lanes/2 { - isWiden = true - } - if !isCompare && !isWiden { - allSameVec = false - } - } - shape += *gOp.Out[0].Go - if allSameVec { - numToName := map[int]string{1: "Unary", 2: "Binary", 3: "Ternary"} - if _, ok := numToName[vecCnt]; !ok { - panic(fmt.Errorf("unknown shape: %s", shape)) - } - shape = vec + numToName[vecCnt] - if masked { - shape += "Masked" - } - if isCompare { - if vecCnt == 2 { - // Remove "Binary" - shape = strings.ReplaceAll(shape, "Binary", "") - } - shape += "Compare" - } - if isWiden { - shape += "Widen" - } - } - optionalMaskToInt := "" - if gOp.Out[0].Class == "mask" { - optionalMaskToInt = fmt.Sprintf(".As%s()", strings.ReplaceAll(*gOp.Out[0].Go, "Mask", "Int")) - } - if _, ok := opsByShape[shape]; !ok { - opsByShape[shape] = opData{ - OpShape: shape, - BaseArgDefList: strings.Join(baseArgDefList, ", "), - VecArgList: strings.Join(vecArgList, ", "), - ResBaseType: fmt.Sprintf("%s%d", *gOp.Out[0].Base, *gOp.Out[0].ElemBits), - ResVecType: strings.ReplaceAll(*gOp.Out[0].Go, "Mask", "Int"), - Arg0VecType: *gOp.In[0].Go, - ArgVecTypes: argVecTypes, - OptionalMaskToInt: optionalMaskToInt, - } - } - data := opsByShape[shape] - data.Ops = append(data.Ops, gOp.Go) - opsByShape[shape] = data - } - - compareOpData := func(x, y opData) int { - return compareNatural(x.OpShape, y.OpShape) - } - data := make([]opData, 0) - for _, d := range opsByShape { - slices.SortFunc(d.Ops, compareNatural) - data = append(data, d) - } - slices.SortFunc(data, compareOpData) - - for _, d := range data { - if err := t.ExecuteTemplate(buffer, "op", d); err != nil { - panic(fmt.Errorf("failed to execute op template for op shape %s: %w", d.OpShape, err)) - } - } - - if len(opsSkipped) != 0 { - if err := t.ExecuteTemplate(buffer, "untestedOpHeader", nil); err != nil { - panic(fmt.Errorf("failed to execute untestedOpHeader")) - } - opsK := []string{} - for k := range opsSkipped { - opsK = append(opsK, k) - } - slices.SortFunc(opsK, strings.Compare) - for _, k := range opsK { - if err := t.ExecuteTemplate(buffer, "untestedOp", k); err != nil { - panic(fmt.Errorf("failed to execute untestedOp")) - } - } - } - - return buffer -} - // parseSIMDTypes groups go simd types by their vector sizes, and // returns a map whose key is the vector size, value is the simd type. func parseSIMDTypes(ops []Operation) simdTypeMap { diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 741214bb..1bdfec1b 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -348,7 +348,6 @@ func writeGoDefs(path string, cl unify.Closure) error { formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go") formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/ops_amd64.go") - formatWriteAndClose(writeSIMDTestsWrapper(deduped), path, "src/"+simdPackage+"/simd_wrapped_test.go") formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go") formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go") formatWriteAndClose(writeSIMDMachineOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go") From 1167cd0b22b37def53455af89123822867ea2bd0 Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 21 Jul 2025 13:32:05 -0400 Subject: [PATCH 160/200] internal/simdgen: add generated declarations for AVX2 masked load/store generates Go dev.simd CL 689335 (which also includes one basic test) Change-Id: Icd948396a3ca265b307747437efbc0e6f4548c76 Reviewed-on: https://go-review.googlesource.com/c/arch/+/689276 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdIntrinsics.go | 12 +++++ internal/simdgen/gen_simdTypes.go | 64 +++++++++++++++++++++----- 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 7140eda2..d114b4cd 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -69,6 +69,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "{{.Name}}.Store", simdStore(), sys.AMD64) {{end}} +{{define "maskedLoadStore"}} addF(simdPackage, "LoadMasked{{.Name}}", simdMaskedLoad(ssa.OpLoadMasked{{.ElemBits}}), sys.AMD64) + addF(simdPackage, "{{.Name}}.StoreMasked", simdMaskedStore(ssa.OpStoreMasked{{.ElemBits}}), sys.AMD64) +{{end}} + {{define "mask"}} addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) @@ -118,6 +122,14 @@ func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { } } + for _, typ := range typesFromTypeMap(typeMap) { + if typ.MaskedLoadStoreFilter() { + if err := t.ExecuteTemplate(buffer, "maskedLoadStore", typ); err != nil { + panic(fmt.Errorf("failed to execute maskedLoadStore template: %w", err)) + } + } + } + for _, mask := range masksFromTypeMap(typeMap) { if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil { panic(fmt.Errorf("failed to execute mask template: %w", err)) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 98d2b5a3..6739b9fc 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -20,8 +20,18 @@ type simdType struct { Type string // Either "mask" or "vreg" VectorCounterpart string // For mask use only: just replacing the "Mask" in [simdType.Name] with "Int" ReshapedVectorWithAndOr string // For mask use only: vector AND and OR are only available in some shape with element width 32. - Size int // The size of the type - ElemBits int // Size / Lanes + Size int // The size of the vector type +} + +func (x simdType) ElemBits() int { + return x.Size / x.Lanes +} + +// MaskedLoadStoreFilter encodes which simd type type currently +// get masked loads/stores generated, it is used in two places, +// this forces coordination. +func (x simdType) MaskedLoadStoreFilter() bool { + return x.Size < 512 && x.ElemBits() >= 32 && x.Type != "mask" } func compareSimdTypes(x, y simdType) int { @@ -36,7 +46,7 @@ func compareSimdTypes(x, y simdType) int { return c } // base type size, 8 < 16 < 32 < 64 - if c := x.Size/x.Lanes - y.Size/y.Lanes; c != 0 { + if c := x.ElemBits() - y.ElemBits(); c != 0 { return c } // vector size last @@ -78,8 +88,10 @@ type {{.Name}} struct { {{.Fields}} } -{{- if ne .Type "mask"}} +{{end}} +` +const simdLoadStoreTemplate = ` // Len returns the number of elements in a {{.Name}} func (x {{.Name}}) Len() int { return {{.Lanes}} } @@ -92,9 +104,9 @@ func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}} // //go:noescape func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}}) +` -{{- else}} - +const simdMaskFromBitsTemplate = ` // Load{{.Name}}FromBits constructs a {{.Name}} from a bitmap, where 1 means set for the indexed element, 0 means unset. // Only the lower {{.Lanes}} bits of y are used. // @@ -108,9 +120,20 @@ func Load{{.Name}}FromBits(y *uint64) {{.Name}} // CPU Features: AVX512 //go:noescape func (x {{.Name}}) StoreToBits(y *uint64) +` -{{end}} -{{end}} +const simdMaskedLoadStoreTemplate = ` +// LoadMasked{{.Name}} loads a {{.Name}} from an array, +// at those elements enabled by mask +// +//go:noescape +func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}} + +// StoreMasked stores a {{.Name}} to an array, +// at those elements enabled by mask +// +//go:noescape +func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) ` const simdStubsTmpl = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. @@ -317,14 +340,14 @@ func parseSIMDTypes(ops []Operation) simdTypeMap { if arg.Class == "mask" { vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int") reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32) - ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits, *arg.Bits / lanes}) + ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits}) // In case the vector counterpart of a mask is not present, put its vector counterpart typedef into the map as well. if _, ok := seen[vectorCounterpart]; !ok { seen[vectorCounterpart] = struct{}{} - ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits, *arg.Bits / lanes}) + ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits}) } } else { - ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits, *arg.Bits / lanes}) + ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits}) } } for _, op := range ops { @@ -383,6 +406,10 @@ func typesFromTypeMap(typeMap simdTypeMap) []simdType { // writeSIMDTypes generates the simd vector types into a bytes.Buffer func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { t := templateOf(simdTypesTemplates, "types_amd64") + loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64") + maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64") + maskFromBits := templateOf(simdMaskFromBitsTemplate, "maskFromBits_amd64") + buffer := new(bytes.Buffer) if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil { @@ -411,6 +438,21 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil { panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err)) } + if typeDef.Type != "mask" { + if err := loadStore.ExecuteTemplate(buffer, "loadstore_amd64", typeDef); err != nil { + panic(fmt.Errorf("failed to execute loadstore template for type %s: %w", typeDef.Name, err)) + } + // restrict to AVX2 masked loads/stores first. + if typeDef.MaskedLoadStoreFilter() { + if err := maskedLoadStore.ExecuteTemplate(buffer, "maskedloadstore_amd64", typeDef); err != nil { + panic(fmt.Errorf("failed to execute maskedloadstore template for type %s: %w", typeDef.Name, err)) + } + } + } else { + if err := maskFromBits.ExecuteTemplate(buffer, "maskFromBits_amd64", typeDef); err != nil { + panic(fmt.Errorf("failed to execute maskFromBits template for type %s: %w", typeDef.Name, err)) + } + } } } From 357d0b5ab3e25a75a99794118ec4084ba90a6485 Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 22 Jul 2025 14:48:33 -0400 Subject: [PATCH 161/200] internal/simdgen: modify sorting for generic/ssa ops, rules there was some incomplete ordering that sometimes caused gratuitous changes. Change-Id: I919136c0ab954a3c3151e2745b7626ba83352c52 Reviewed-on: https://go-review.googlesource.com/c/arch/+/689655 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdGenericOps.go | 7 +++---- internal/simdgen/gen_simdMachineOps.go | 9 ++++----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index 4eb47b44..c345793a 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -32,7 +32,6 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { buffer := new(bytes.Buffer) type genericOpsData struct { - sortKey string OpName string OpInLen int Comm bool @@ -44,7 +43,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { var opsData opData for _, op := range ops { _, _, _, immType, gOp := op.shape() - gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericName(gOp), len(gOp.In), op.Commutative} + gOpData := genericOpsData{genericName(gOp), len(gOp.In), op.Commutative} if immType == VarImm || immType == ConstVarImm { opsData.OpsImm = append(opsData.OpsImm, gOpData) } else { @@ -52,10 +51,10 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { } } sort.Slice(opsData.Ops, func(i, j int) bool { - return opsData.Ops[i].sortKey < opsData.Ops[j].sortKey + return compareNatural(opsData.Ops[i].OpName, opsData.Ops[j].OpName) < 0 }) sort.Slice(opsData.OpsImm, func(i, j int) bool { - return opsData.OpsImm[i].sortKey < opsData.OpsImm[j].sortKey + return compareNatural(opsData.OpsImm[i].OpName, opsData.OpsImm[j].OpName) < 0 }) err := t.Execute(buffer, opsData) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 39bf2ec1..22893a22 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -33,7 +33,6 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { buffer := new(bytes.Buffer) type opData struct { - sortKey string OpName string Asm string OpInLen int @@ -108,16 +107,16 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { resultInArg0 = true } if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { - opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) + opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) } else { - opsData = append(opsData, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) + opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) } } sort.Slice(opsData, func(i, j int) bool { - return opsData[i].sortKey < opsData[j].sortKey + return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0 }) sort.Slice(opsDataImm, func(i, j int) bool { - return opsDataImm[i].sortKey < opsDataImm[j].sortKey + return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0 }) err := t.Execute(buffer, machineOpsData{opsData, opsDataImm}) if err != nil { From d3ce7fc27510fe87f7631081224000bbd26f342b Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 22 Jul 2025 14:47:55 -0400 Subject: [PATCH 162/200] internal/simdgen: add some conversion ops Generates dev.simd CL 689716 Change-Id: I6444cdaf94a560d50828fc6291e790f651f42f8e Reviewed-on: https://go-review.googlesource.com/c/arch/+/689735 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 11 ++++++++++ internal/simdgen/go.yaml | 20 ++++++++++++++++++ internal/simdgen/ops/Converts/categories.yaml | 12 +++++++++++ internal/simdgen/ops/Converts/go.yaml | 21 +++++++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 internal/simdgen/ops/Converts/categories.yaml create mode 100644 internal/simdgen/ops/Converts/go.yaml diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index c13fd431..0f883bfa 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -115,6 +115,17 @@ extension: "AVX.*" documentation: !string |- // NAME compares for greater than. +- go: ConvertToInt32 + commutative: false + extension: "AVX.*" + documentation: !string |- + // ConvertToInt32 converts element values to int32. + +- go: ConvertToUint32 + commutative: false + extension: "AVX.*" + documentation: !string |- + // ConvertToUint32Masked converts element values to uint32. - go: Div commutative: false extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index c58d692e..df8f341c 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -240,6 +240,26 @@ const: 0 out: - class: mask +- go: ConvertToInt32 + asm: "VCVTTPS2DQ" + in: + - &fp + go: $t + base: float + out: + - &i32 + go: $u + base: int + elemBits: 32 +- go: ConvertToUint32 + asm: "VCVTPS2UDQ" + in: + - *fp + out: + - &u32 + go: $u + base: uint + elemBits: 32 - go: Div asm: "V?DIVP[SD]" in: &2fp diff --git a/internal/simdgen/ops/Converts/categories.yaml b/internal/simdgen/ops/Converts/categories.yaml new file mode 100644 index 00000000..16316ed3 --- /dev/null +++ b/internal/simdgen/ops/Converts/categories.yaml @@ -0,0 +1,12 @@ +!sum +- go: ConvertToInt32 + commutative: false + extension: "AVX.*" + documentation: !string |- + // ConvertToInt32 converts element values to int32. + +- go: ConvertToUint32 + commutative: false + extension: "AVX.*" + documentation: !string |- + // ConvertToUint32Masked converts element values to uint32. diff --git a/internal/simdgen/ops/Converts/go.yaml b/internal/simdgen/ops/Converts/go.yaml new file mode 100644 index 00000000..4e251728 --- /dev/null +++ b/internal/simdgen/ops/Converts/go.yaml @@ -0,0 +1,21 @@ +!sum +- go: ConvertToInt32 + asm: "VCVTTPS2DQ" + in: + - &fp + go: $t + base: float + out: + - &i32 + go: $u + base: int + elemBits: 32 +- go: ConvertToUint32 + asm: "VCVTPS2UDQ" + in: + - *fp + out: + - &u32 + go: $u + base: uint + elemBits: 32 From 0f343f3f4c0a732316241d19d6335927e0b34b19 Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 23 Jul 2025 14:31:49 -0400 Subject: [PATCH 163/200] internal/simdgen: add declarations+intrinsics for mask-from-value This generates Go dev.simd CL 689936. Change-Id: Ib63abe15f3c6c4ca01583f4cc72636ceb67eb528 Reviewed-on: https://go-review.googlesource.com/c/arch/+/689955 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdIntrinsics.go | 1 + internal/simdgen/gen_simdTypes.go | 30 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index d114b4cd..d3b35218 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -79,6 +79,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) addF(simdPackage, "{{.Name}}.StoreToBits", simdStoreMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) + addF(simdPackage, "{{.Name}}FromBits", simdCvtMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) {{end}} {{define "footer"}}} diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 6739b9fc..d1e4d495 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -27,6 +27,25 @@ func (x simdType) ElemBits() int { return x.Size / x.Lanes } +// LanesContainer returns the smallest int/uint bit size that is +// large enough to hold one bit for each lane. E.g., Mask32x4 +// is 4 lanes, and a uint8 is the smallest uint that has 4 bits. +func (x simdType) LanesContainer() int { + if x.Lanes > 64 { + panic("too many lanes") + } + if x.Lanes > 32 { + return 64 + } + if x.Lanes > 16 { + return 32 + } + if x.Lanes > 8 { + return 16 + } + return 8 +} + // MaskedLoadStoreFilter encodes which simd type type currently // get masked loads/stores generated, it is used in two places, // this forces coordination. @@ -122,6 +141,13 @@ func Load{{.Name}}FromBits(y *uint64) {{.Name}} func (x {{.Name}}) StoreToBits(y *uint64) ` +const simdMaskFromValTemplate = ` +// {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset. +// Only the lower {{.Lanes}} bits of y are used. +// +func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}} +` + const simdMaskedLoadStoreTemplate = ` // LoadMasked{{.Name}} loads a {{.Name}} from an array, // at those elements enabled by mask @@ -409,6 +435,7 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64") maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64") maskFromBits := templateOf(simdMaskFromBitsTemplate, "maskFromBits_amd64") + maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64") buffer := new(bytes.Buffer) @@ -452,6 +479,9 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { if err := maskFromBits.ExecuteTemplate(buffer, "maskFromBits_amd64", typeDef); err != nil { panic(fmt.Errorf("failed to execute maskFromBits template for type %s: %w", typeDef.Name, err)) } + if err := maskFromVal.ExecuteTemplate(buffer, "maskFromVal_amd64", typeDef); err != nil { + panic(fmt.Errorf("failed to execute maskFromVal template for type %s: %w", typeDef.Name, err)) + } } } } From 968e15c5e5f7bc777d67019c792a8a25ef55eab7 Mon Sep 17 00:00:00 2001 From: David Chase Date: Thu, 24 Jul 2025 14:55:59 -0400 Subject: [PATCH 164/200] internal/simdgen: enable k-masked load/store on AVX512 includes a fix to the comments for the move-value-to-mask functions Generates Go dev.simd CL 690336 Change-Id: I2c98f0525a0e95d4eaa2ee221774a48607ac083a Reviewed-on: https://go-review.googlesource.com/c/arch/+/690315 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdTypes.go | 37 ++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index d1e4d495..d3791d78 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -50,7 +50,37 @@ func (x simdType) LanesContainer() int { // get masked loads/stores generated, it is used in two places, // this forces coordination. func (x simdType) MaskedLoadStoreFilter() bool { - return x.Size < 512 && x.ElemBits() >= 32 && x.Type != "mask" + return x.Size == 512 || x.ElemBits() >= 32 && x.Type != "mask" +} + +func (x simdType) IntelSizeSuffix() string { + switch x.ElemBits() { + case 8: + return "B" + case 16: + return "W" + case 32: + return "D" + case 64: + return "Q" + } + panic("oops") +} + +func (x simdType) MaskedLoadDoc() string { + if x.Size == 512 || x.ElemBits() < 32 { + return fmt.Sprintf("// Asm: VMOVDQU%d.Z, CPU Feature: AVX512", x.ElemBits()) + } else { + return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix()) + } +} + +func (x simdType) MaskedStoreDoc() string { + if x.Size == 512 || x.ElemBits() < 32 { + return fmt.Sprintf("// Asm: VMOVDQU%d, CPU Feature: AVX512", x.ElemBits()) + } else { + return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix()) + } } func compareSimdTypes(x, y simdType) int { @@ -145,6 +175,7 @@ const simdMaskFromValTemplate = ` // {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset. // Only the lower {{.Lanes}} bits of y are used. // +// Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512" func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}} ` @@ -152,12 +183,16 @@ const simdMaskedLoadStoreTemplate = ` // LoadMasked{{.Name}} loads a {{.Name}} from an array, // at those elements enabled by mask // +{{.MaskedLoadDoc}} +// //go:noescape func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}} // StoreMasked stores a {{.Name}} to an array, // at those elements enabled by mask // +{{.MaskedStoreDoc}} +// //go:noescape func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) ` From 0354b497e1da15e8daee321f92b0186061d34c90 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 31 Jul 2025 23:44:40 +0000 Subject: [PATCH 165/200] internal/simdgen: change Shift*AndFillUpperFrom to Shift*Concat This CL generates CL 692215. Change-Id: Idccaeeef2f0d3ca6e8113df5c95d72f9e11830b6 Reviewed-on: https://go-review.googlesource.com/c/arch/+/692216 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 8 ++++---- internal/simdgen/go.yaml | 8 ++++---- internal/simdgen/ops/ShiftRotate/categories.yaml | 8 ++++---- internal/simdgen/ops/ShiftRotate/go.yaml | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 0f883bfa..996955bf 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -465,28 +465,28 @@ extension: "AVX.*" documentation: !string |- // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements. -- go: ShiftAllLeftAndFillUpperFrom +- go: ShiftAllLeftConcat nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -- go: ShiftAllRightAndFillUpperFrom +- go: ShiftAllRightConcat nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -- go: ShiftLeftAndFillUpperFrom +- go: ShiftLeftConcat nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -- go: ShiftRightAndFillUpperFrom +- go: ShiftRightConcat nameAndSizeCheck: true commutative: false extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index df8f341c..1e836912 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -945,7 +945,7 @@ - *any # Bizzare shifts. -- go: ShiftAllLeftAndFillUpperFrom +- go: ShiftAllLeftConcat asm: "VPSHLD[WDQ]" in: - *any @@ -953,7 +953,7 @@ - *pureImm out: - *any -- go: ShiftAllRightAndFillUpperFrom +- go: ShiftAllRightConcat asm: "VPSHRD[WDQ]" in: - *any @@ -961,7 +961,7 @@ - *pureImm out: - *any -- go: ShiftLeftAndFillUpperFrom +- go: ShiftLeftConcat asm: "VPSHLDV[WDQ]" in: - *any @@ -969,7 +969,7 @@ - *any out: - *any -- go: ShiftRightAndFillUpperFrom +- go: ShiftRightConcat asm: "VPSHRDV[WDQ]" in: - *any diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml index f9a92652..5528b4d8 100644 --- a/internal/simdgen/ops/ShiftRotate/categories.yaml +++ b/internal/simdgen/ops/ShiftRotate/categories.yaml @@ -63,28 +63,28 @@ extension: "AVX.*" documentation: !string |- // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements. -- go: ShiftAllLeftAndFillUpperFrom +- go: ShiftAllLeftConcat nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -- go: ShiftAllRightAndFillUpperFrom +- go: ShiftAllRightConcat nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -- go: ShiftLeftAndFillUpperFrom +- go: ShiftLeftConcat nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -- go: ShiftRightAndFillUpperFrom +- go: ShiftRightConcat nameAndSizeCheck: true commutative: false extension: "AVX.*" diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml index ff4c3156..4ade55d7 100644 --- a/internal/simdgen/ops/ShiftRotate/go.yaml +++ b/internal/simdgen/ops/ShiftRotate/go.yaml @@ -114,7 +114,7 @@ - *any # Bizzare shifts. -- go: ShiftAllLeftAndFillUpperFrom +- go: ShiftAllLeftConcat asm: "VPSHLD[WDQ]" in: - *any @@ -122,7 +122,7 @@ - *pureImm out: - *any -- go: ShiftAllRightAndFillUpperFrom +- go: ShiftAllRightConcat asm: "VPSHRD[WDQ]" in: - *any @@ -130,7 +130,7 @@ - *pureImm out: - *any -- go: ShiftLeftAndFillUpperFrom +- go: ShiftLeftConcat asm: "VPSHLDV[WDQ]" in: - *any @@ -138,7 +138,7 @@ - *any out: - *any -- go: ShiftRightAndFillUpperFrom +- go: ShiftRightConcat asm: "VPSHRDV[WDQ]" in: - *any From 4967ce7c35d0e540671a81128edf4ae59370026b Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 31 Jul 2025 23:53:12 +0000 Subject: [PATCH 166/200] internal/simdgen: change PairDotProdAccumulate to AddDotProd This CL generates CL 692156. Change-Id: Ic38a9e0e3febb63465afca065e1b9fb98c0e81ca Reviewed-on: https://go-review.googlesource.com/c/arch/+/692219 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/categories.yaml | 8 ++++---- internal/simdgen/go.yaml | 6 ++---- internal/simdgen/ops/MLOps/categories.yaml | 8 ++++---- internal/simdgen/ops/MLOps/go.yaml | 6 ++---- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 996955bf..24fa6165 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -304,16 +304,16 @@ extension: "AVX.*" documentation: !string |- // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. -- go: PairDotProdAccumulate +- go: AddDotProd commutative: false extension: "AVX.*" documentation: !string |- - // NAME performs dot products on pairs of elements of x and y and then adds z. -- go: SaturatedPairDotProdAccumulate + // NAME performs dot products on pairs of elements of y and z and then adds x. +- go: SaturatedAddDotProd commutative: false extension: "AVX.*" documentation: !string |- - // NAME performs dot products on pairs of elements of x and y and then adds z. + // NAME performs dot products on pairs of elements of y and z and then adds x. - go: FusedMultiplyAdd commutative: false extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 1e836912..eb435407 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -457,9 +457,8 @@ - *qdpa_src2 out: - *qdpa_acc -- go: PairDotProdAccumulate +- go: AddDotProd asm: "VPDPWSSD" - operandOrder: "31" # switch operand 3 and 1 in: - &pdpa_acc go: $t_acc @@ -475,9 +474,8 @@ overwriteElementBits: 16 out: - *pdpa_acc -- go: SaturatedPairDotProdAccumulate +- go: SaturatedAddDotProd asm: "VPDPWSSDS" - operandOrder: "31" # switch operand 3 and 1 in: - *pdpa_acc - *pdpa_src1 diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index c90942de..b3508d25 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -28,16 +28,16 @@ extension: "AVX.*" documentation: !string |- // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. -- go: PairDotProdAccumulate +- go: AddDotProd commutative: false extension: "AVX.*" documentation: !string |- - // NAME performs dot products on pairs of elements of x and y and then adds z. -- go: SaturatedPairDotProdAccumulate + // NAME performs dot products on pairs of elements of y and z and then adds x. +- go: SaturatedAddDotProd commutative: false extension: "AVX.*" documentation: !string |- - // NAME performs dot products on pairs of elements of x and y and then adds z. + // NAME performs dot products on pairs of elements of y and z and then adds x. - go: FusedMultiplyAdd commutative: false extension: "AVX.*" diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml index 2fb3e52f..8da2071d 100644 --- a/internal/simdgen/ops/MLOps/go.yaml +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -60,9 +60,8 @@ - *qdpa_src2 out: - *qdpa_acc -- go: PairDotProdAccumulate +- go: AddDotProd asm: "VPDPWSSD" - operandOrder: "31" # switch operand 3 and 1 in: - &pdpa_acc go: $t_acc @@ -78,9 +77,8 @@ overwriteElementBits: 16 out: - *pdpa_acc -- go: SaturatedPairDotProdAccumulate +- go: SaturatedAddDotProd asm: "VPDPWSSDS" - operandOrder: "31" # switch operand 3 and 1 in: - *pdpa_acc - *pdpa_src1 From 2f2bc4cacd1ea78a54aefe396e099548d3ff7f5f Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 1 Aug 2025 19:12:32 +0000 Subject: [PATCH 167/200] internal/simdgen: make bitwise logic avaialble to all u?int vectors This CL generates CL 692356. Change-Id: I4d5da85d4ff7f83df52f4e2e1e082e8ccd6a5883 Reviewed-on: https://go-review.googlesource.com/c/arch/+/692555 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/go.yaml | 60 +++++++++++++++++++++++ internal/simdgen/ops/BitwiseLogic/go.yaml | 60 +++++++++++++++++++++++ 2 files changed, 120 insertions(+) diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index eb435407..ddab9c38 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -118,6 +118,28 @@ out: - *any +- go: And + asm: "VPANDD" # Fill in the gap, And is missing for Uint8x64 and Int8x64 + inVariant: [] + in: &twoI8x64 + - &i8x64 + go: $t + overwriteElementBits: 8 + - *i8x64 + out: &oneI8x64 + - *i8x64 + +- go: And + asm: "VPANDD" # Fill in the gap, And is missing for Uint16x32 and Int16x32 + inVariant: [] + in: &twoI16x32 + - &i16x32 + go: $t + overwriteElementBits: 16 + - *i16x32 + out: &oneI16x32 + - *i16x32 + - go: AndNot asm: "VPANDN[DQ]?" operandOrder: "21" # switch the arg order @@ -127,6 +149,20 @@ out: - *any +- go: AndNot + asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint8x64 and Int8x64 + operandOrder: "21" # switch the arg order + inVariant: [] + in: *twoI8x64 + out: *oneI8x64 + +- go: AndNot + asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint16x32 and Int16x32 + operandOrder: "21" # switch the arg order + inVariant: [] + in: *twoI16x32 + out: *oneI16x32 + - go: Or asm: "VPOR[DQ]?" in: @@ -135,6 +171,18 @@ out: - *any +- go: Or + asm: "VPORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64 + inVariant: [] + in: *twoI8x64 + out: *oneI8x64 + +- go: Or + asm: "VPORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32 + inVariant: [] + in: *twoI16x32 + out: *oneI16x32 + - go: Xor asm: "VPXOR[DQ]?" in: @@ -142,6 +190,18 @@ - *any out: - *any + +- go: Xor + asm: "VPXORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64 + inVariant: [] + in: *twoI8x64 + out: *oneI8x64 + +- go: Xor + asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32 + inVariant: [] + in: *twoI16x32 + out: *oneI16x32 # Ints - go: Equal asm: "V?PCMPEQ[BWDQ]" diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml index 0d0f1c8c..ab344438 100644 --- a/internal/simdgen/ops/BitwiseLogic/go.yaml +++ b/internal/simdgen/ops/BitwiseLogic/go.yaml @@ -42,6 +42,28 @@ out: - *any +- go: And + asm: "VPANDD" # Fill in the gap, And is missing for Uint8x64 and Int8x64 + inVariant: [] + in: &twoI8x64 + - &i8x64 + go: $t + overwriteElementBits: 8 + - *i8x64 + out: &oneI8x64 + - *i8x64 + +- go: And + asm: "VPANDD" # Fill in the gap, And is missing for Uint16x32 and Int16x32 + inVariant: [] + in: &twoI16x32 + - &i16x32 + go: $t + overwriteElementBits: 16 + - *i16x32 + out: &oneI16x32 + - *i16x32 + - go: AndNot asm: "VPANDN[DQ]?" operandOrder: "21" # switch the arg order @@ -51,6 +73,20 @@ out: - *any +- go: AndNot + asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint8x64 and Int8x64 + operandOrder: "21" # switch the arg order + inVariant: [] + in: *twoI8x64 + out: *oneI8x64 + +- go: AndNot + asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint16x32 and Int16x32 + operandOrder: "21" # switch the arg order + inVariant: [] + in: *twoI16x32 + out: *oneI16x32 + - go: Or asm: "VPOR[DQ]?" in: @@ -59,6 +95,18 @@ out: - *any +- go: Or + asm: "VPORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64 + inVariant: [] + in: *twoI8x64 + out: *oneI8x64 + +- go: Or + asm: "VPORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32 + inVariant: [] + in: *twoI16x32 + out: *oneI16x32 + - go: Xor asm: "VPXOR[DQ]?" in: @@ -66,3 +114,15 @@ - *any out: - *any + +- go: Xor + asm: "VPXORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64 + inVariant: [] + in: *twoI8x64 + out: *oneI8x64 + +- go: Xor + asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32 + inVariant: [] + in: *twoI16x32 + out: *oneI16x32 \ No newline at end of file From 17d837876ca6305800d018a702fadb1a56620870 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 1 Aug 2025 16:05:20 -0400 Subject: [PATCH 168/200] internal/simdgen: rename some methods Generates dev.simd CL 692357. these are the "easy" renamings: SaturatedOp -> OpSaturated PairwiseOp -> OpPairs OpWithPrecision -> OpScaled DiffWithOpWithPrecision -> OpScaledResidue Change-Id: I494efdc5b09d39dc1628fc667a71574fc5725515 Reviewed-on: https://go-review.googlesource.com/c/arch/+/692556 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 35 ++++++++----------- internal/simdgen/go.yaml | 32 ++++++++--------- internal/simdgen/ops/AddSub/categories.yaml | 12 +++---- internal/simdgen/ops/AddSub/go.yaml | 24 ++++++------- .../simdgen/ops/FPonlyArith/categories.yaml | 18 +++++----- internal/simdgen/ops/FPonlyArith/go.yaml | 6 ++-- internal/simdgen/ops/Mul/categories.yaml | 5 --- internal/simdgen/ops/Mul/go.yaml | 2 +- 8 files changed, 62 insertions(+), 72 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 24fa6165..dcba4b73 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -4,7 +4,7 @@ extension: "AVX.*" documentation: !string |- // NAME adds corresponding elements of two vectors. -- go: SaturatedAdd +- go: AddSaturated commutative: true extension: "AVX.*" documentation: !string |- @@ -14,30 +14,30 @@ extension: "AVX.*" documentation: !string |- // NAME subtracts corresponding elements of two vectors. -- go: SaturatedSub +- go: SubSaturated commutative: false extension: "AVX.*" documentation: !string |- // NAME subtracts corresponding elements of two vectors with saturation. -- go: PairwiseAdd +- go: AddPairs commutative: false extension: "AVX.*" documentation: !string |- // NAME horizontally adds adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -- go: PairwiseSub +- go: SubPairs commutative: false extension: "AVX.*" documentation: !string |- // NAME horizontally subtracts adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -- go: SaturatedPairwiseAdd +- go: AddPairsSaturated commutative: false extension: "AVX.*" documentation: !string |- // NAME horizontally adds adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -- go: SaturatedPairwiseSub +- go: SubPairsSaturated commutative: false extension: "AVX.*" documentation: !string |- @@ -146,7 +146,7 @@ extension: "AVX.*" documentation: !string |- // NAME computes an approximate reciprocal of the square root of each element. -- go: MulByPowOf2 +- go: Scale commutative: false extension: "AVX.*" documentation: !string |- @@ -157,13 +157,13 @@ constImm: 0 documentation: !string |- // NAME rounds elements to the nearest integer. -- go: RoundWithPrecision +- go: RoundScaled commutative: false extension: "AVX.*" constImm: 0 documentation: !string |- // NAME rounds elements with specified precision. -- go: DiffWithRoundWithPrecision +- go: RoundScaledResidue commutative: false extension: "AVX.*" constImm: 0 @@ -175,13 +175,13 @@ constImm: 1 documentation: !string |- // NAME rounds elements down to the nearest integer. -- go: FloorWithPrecision +- go: FloorScaled commutative: false extension: "AVX.*" constImm: 1 documentation: !string |- // NAME rounds elements down with specified precision. -- go: DiffWithFloorWithPrecision +- go: FloorScaledResidue commutative: false extension: "AVX.*" constImm: 1 @@ -193,13 +193,13 @@ constImm: 2 documentation: !string |- // NAME rounds elements up to the nearest integer. -- go: CeilWithPrecision +- go: CeilScaled commutative: false extension: "AVX.*" constImm: 2 documentation: !string |- // NAME rounds elements up with specified precision. -- go: DiffWithCeilWithPrecision +- go: CeilScaledResidue commutative: false extension: "AVX.*" constImm: 2 @@ -211,13 +211,13 @@ constImm: 3 documentation: !string |- // NAME truncates elements towards zero. -- go: TruncWithPrecision +- go: TruncScaled commutative: false extension: "AVX.*" constImm: 3 documentation: !string |- // NAME truncates elements with specified precision. -- go: DiffWithTruncWithPrecision +- go: TruncScaledResidue commutative: false extension: "AVX.*" constImm: 3 @@ -396,11 +396,6 @@ extension: "AVX.*" documentation: !string |- // NAME multiplies elements and stores the high part of the result. -- go: MulLow - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies elements and stores the low part of the result. - go: ShiftAllLeft nameAndSizeCheck: true commutative: false diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index ddab9c38..8c893ba6 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -8,8 +8,8 @@ - *any out: - *any -# Saturated Add -- go: SaturatedAdd +# Add Saturated +- go: AddSaturated asm: "VPADDS[BWDQ]" in: - &int @@ -18,7 +18,7 @@ - *int out: - *int -- go: SaturatedAdd +- go: AddSaturated asm: "VPADDS[BWDQ]" in: - &uint @@ -36,42 +36,42 @@ - *any out: &1any - *any -# Saturated Sub -- go: SaturatedSub +# Sub Saturated +- go: SubSaturated asm: "VPSUBS[BWDQ]" in: &2int - *int - *int out: &1int - *int -- go: SaturatedSub +- go: SubSaturated asm: "VPSUBS[BWDQ]" in: - *uint - *uint out: - *uint -- go: PairwiseAdd +- go: AddPairs asm: "VPHADD[DW]" in: *2any out: *1any -- go: PairwiseSub +- go: SubPairs asm: "VPHSUB[DW]" in: *2any out: *1any -- go: PairwiseAdd +- go: AddPairs asm: "VHADDP[SD]" # floats in: *2any out: *1any -- go: PairwiseSub +- go: SubPairs asm: "VHSUBP[SD]" # floats in: *2any out: *1any -- go: SaturatedPairwiseAdd +- go: AddPairsSaturated asm: "VPHADDS[DW]" in: *2int out: *1int -- go: SaturatedPairwiseSub +- go: SubPairsSaturated asm: "VPHSUBS[DW]" in: *2int out: *1int @@ -342,7 +342,7 @@ asm: "V?RSQRT(14)?P[SD]" in: *1fp out: *1fp -- go: MulByPowOf2 +- go: Scale asm: "VSCALEFP[SD]" in: *2fp out: *1fp @@ -355,7 +355,7 @@ const: 0 # place holder out: *1fp -- go: "(Round|Ceil|Floor|Trunc)WithPrecision" +- go: "(Round|Ceil|Floor|Trunc)Scaled" asm: "VRNDSCALEP[SD]" in: - *fp @@ -364,7 +364,7 @@ immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). name: prec out: *1fp -- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecision" +- go: "(Round|Ceil|Floor|Trunc)ScaledResidue" asm: "VREDUCEP[SD]" in: - *fp @@ -881,7 +881,7 @@ # MulLow # Signed int only. -- go: MulLow +- go: Mul asm: "VPMULL[WDQ]" in: - *int diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml index 2ffd1e23..4e492516 100644 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -4,7 +4,7 @@ extension: "AVX.*" documentation: !string |- // NAME adds corresponding elements of two vectors. -- go: SaturatedAdd +- go: AddSaturated commutative: true extension: "AVX.*" documentation: !string |- @@ -14,30 +14,30 @@ extension: "AVX.*" documentation: !string |- // NAME subtracts corresponding elements of two vectors. -- go: SaturatedSub +- go: SubSaturated commutative: false extension: "AVX.*" documentation: !string |- // NAME subtracts corresponding elements of two vectors with saturation. -- go: PairwiseAdd +- go: AddPairs commutative: false extension: "AVX.*" documentation: !string |- // NAME horizontally adds adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -- go: PairwiseSub +- go: SubPairs commutative: false extension: "AVX.*" documentation: !string |- // NAME horizontally subtracts adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -- go: SaturatedPairwiseAdd +- go: AddPairsSaturated commutative: false extension: "AVX.*" documentation: !string |- // NAME horizontally adds adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -- go: SaturatedPairwiseSub +- go: SubPairsSaturated commutative: false extension: "AVX.*" documentation: !string |- diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml index c952c150..45726cd6 100644 --- a/internal/simdgen/ops/AddSub/go.yaml +++ b/internal/simdgen/ops/AddSub/go.yaml @@ -8,8 +8,8 @@ - *any out: - *any -# Saturated Add -- go: SaturatedAdd +# Add Saturated +- go: AddSaturated asm: "VPADDS[BWDQ]" in: - &int @@ -18,7 +18,7 @@ - *int out: - *int -- go: SaturatedAdd +- go: AddSaturated asm: "VPADDS[BWDQ]" in: - &uint @@ -36,42 +36,42 @@ - *any out: &1any - *any -# Saturated Sub -- go: SaturatedSub +# Sub Saturated +- go: SubSaturated asm: "VPSUBS[BWDQ]" in: &2int - *int - *int out: &1int - *int -- go: SaturatedSub +- go: SubSaturated asm: "VPSUBS[BWDQ]" in: - *uint - *uint out: - *uint -- go: PairwiseAdd +- go: AddPairs asm: "VPHADD[DW]" in: *2any out: *1any -- go: PairwiseSub +- go: SubPairs asm: "VPHSUB[DW]" in: *2any out: *1any -- go: PairwiseAdd +- go: AddPairs asm: "VHADDP[SD]" # floats in: *2any out: *1any -- go: PairwiseSub +- go: SubPairs asm: "VHSUBP[SD]" # floats in: *2any out: *1any -- go: SaturatedPairwiseAdd +- go: AddPairsSaturated asm: "VPHADDS[DW]" in: *2int out: *1int -- go: SaturatedPairwiseSub +- go: SubPairsSaturated asm: "VPHSUBS[DW]" in: *2int out: *1int diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index 0fb727d5..63ddbb34 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -19,7 +19,7 @@ extension: "AVX.*" documentation: !string |- // NAME computes an approximate reciprocal of the square root of each element. -- go: MulByPowOf2 +- go: Scale commutative: false extension: "AVX.*" documentation: !string |- @@ -30,13 +30,13 @@ constImm: 0 documentation: !string |- // NAME rounds elements to the nearest integer. -- go: RoundWithPrecision +- go: RoundScaled commutative: false extension: "AVX.*" constImm: 0 documentation: !string |- // NAME rounds elements with specified precision. -- go: DiffWithRoundWithPrecision +- go: RoundScaledResidue commutative: false extension: "AVX.*" constImm: 0 @@ -48,13 +48,13 @@ constImm: 1 documentation: !string |- // NAME rounds elements down to the nearest integer. -- go: FloorWithPrecision +- go: FloorScaled commutative: false extension: "AVX.*" constImm: 1 documentation: !string |- // NAME rounds elements down with specified precision. -- go: DiffWithFloorWithPrecision +- go: FloorScaledResidue commutative: false extension: "AVX.*" constImm: 1 @@ -66,13 +66,13 @@ constImm: 2 documentation: !string |- // NAME rounds elements up to the nearest integer. -- go: CeilWithPrecision +- go: CeilScaled commutative: false extension: "AVX.*" constImm: 2 documentation: !string |- // NAME rounds elements up with specified precision. -- go: DiffWithCeilWithPrecision +- go: CeilScaledResidue commutative: false extension: "AVX.*" constImm: 2 @@ -84,13 +84,13 @@ constImm: 3 documentation: !string |- // NAME truncates elements towards zero. -- go: TruncWithPrecision +- go: TruncScaled commutative: false extension: "AVX.*" constImm: 3 documentation: !string |- // NAME truncates elements with specified precision. -- go: DiffWithTruncWithPrecision +- go: TruncScaledResidue commutative: false extension: "AVX.*" constImm: 3 diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml index 71d1cb5f..dfb0454e 100644 --- a/internal/simdgen/ops/FPonlyArith/go.yaml +++ b/internal/simdgen/ops/FPonlyArith/go.yaml @@ -21,7 +21,7 @@ asm: "V?RSQRT(14)?P[SD]" in: *1fp out: *1fp -- go: MulByPowOf2 +- go: Scale asm: "VSCALEFP[SD]" in: *2fp out: *1fp @@ -34,7 +34,7 @@ const: 0 # place holder out: *1fp -- go: "(Round|Ceil|Floor|Trunc)WithPrecision" +- go: "(Round|Ceil|Floor|Trunc)Scaled" asm: "VRNDSCALEP[SD]" in: - *fp @@ -43,7 +43,7 @@ immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). name: prec out: *1fp -- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecision" +- go: "(Round|Ceil|Floor|Trunc)ScaledResidue" asm: "VREDUCEP[SD]" in: - *fp diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml index 9a9b8328..f4e2aed2 100644 --- a/internal/simdgen/ops/Mul/categories.yaml +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -15,8 +15,3 @@ extension: "AVX.*" documentation: !string |- // NAME multiplies elements and stores the high part of the result. -- go: MulLow - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies elements and stores the low part of the result. diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml index c9ddeb4a..dd9f55fb 100644 --- a/internal/simdgen/ops/Mul/go.yaml +++ b/internal/simdgen/ops/Mul/go.yaml @@ -57,7 +57,7 @@ # MulLow # Signed int only. -- go: MulLow +- go: Mul asm: "VPMULL[WDQ]" in: - *int From a373a4b004606c4eca9d42eea9f824a03c388341 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 1 Aug 2025 22:20:43 +0000 Subject: [PATCH 169/200] internal/simdgen: add ShiftAll immediate variant. Right now ShiftAll with immediate might make the compiler generate erroneous instruction like MOV $3, X1. This CL adds the immediate variant of ShiftAll and adds rewrite rules to lower (VPSLL (Vec Const)) => (VPSLLImm [Const] (Vec)). To facilitate this, this CL adds a mechanism to do partial code generation: the immediate variant of ShiftAll only appears in machine ops. This CL also did some cleanups. This CL generates CL 693157. Change-Id: Ife898877e952f2e8d4ee1cb1efbfcf0c07e87189 Reviewed-on: https://go-review.googlesource.com/c/arch/+/693136 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/categories.yaml | 27 +++++++++++++ internal/simdgen/gen_simdGenericOps.go | 5 ++- internal/simdgen/gen_simdIntrinsics.go | 39 ++++++++++--------- internal/simdgen/gen_simdMachineOps.go | 8 +--- internal/simdgen/gen_simdTypes.go | 6 ++- internal/simdgen/gen_simdrules.go | 34 ++++++++++++---- internal/simdgen/gen_simdssa.go | 6 +-- internal/simdgen/gen_utility.go | 17 +++++++- internal/simdgen/go.yaml | 24 ++++++++++++ internal/simdgen/godefs.go | 21 ++++++++++ .../simdgen/ops/ShiftRotate/categories.yaml | 27 +++++++++++++ internal/simdgen/ops/ShiftRotate/go.yaml | 24 ++++++++++++ 12 files changed, 195 insertions(+), 43 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index dcba4b73..0afa0b14 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -398,6 +398,7 @@ // NAME multiplies elements and stores the high part of the result. - go: ShiftAllLeft nameAndSizeCheck: true + specialLower: sftimm commutative: false extension: "AVX.*" documentation: !string |- @@ -405,17 +406,43 @@ - go: ShiftAllRight signed: false nameAndSizeCheck: true + specialLower: sftimm commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRight signed: true + specialLower: sftimm nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +- go: shiftAllLeftConst # no APIs, only ssa ops. + noTypes: "true" + noGenericOps: "true" + SSAVariant: "const" # to avoid its name colliding with reg version of this instruction, amend this to its ssa op name. + nameAndSizeCheck: true + commutative: false + extension: "AVX.*" +- go: shiftAllRightConst # no APIs, only ssa ops. + noTypes: "true" + noGenericOps: "true" + SSAVariant: "const" + signed: false + nameAndSizeCheck: true + commutative: false + extension: "AVX.*" +- go: shiftAllRightConst # no APIs, only ssa ops. + noTypes: "true" + noGenericOps: "true" + SSAVariant: "const" + signed: true + nameAndSizeCheck: true + commutative: false + extension: "AVX.*" + - go: ShiftLeft nameAndSizeCheck: true commutative: false diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index c345793a..daf941d7 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -42,8 +42,11 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { } var opsData opData for _, op := range ops { + if op.NoGenericOps != nil && *op.NoGenericOps == "true" { + continue + } _, _, _, immType, gOp := op.shape() - gOpData := genericOpsData{genericName(gOp), len(gOp.In), op.Commutative} + gOpData := genericOpsData{gOp.GenericName(), len(gOp.In), op.Commutative} if immType == VarImm || immType == ConstVarImm { opsData.OpsImm = append(opsData.OpsImm, gOpData) } else { diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index d3b35218..ca339ac2 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -25,41 +25,41 @@ const simdPackage = "` + simdPackage + `" func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { {{end}} -{{define "op1"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op1"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op2"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op2"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op2_21"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op2_21"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op2_21Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op2_21Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op3_21"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op3_21"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op3_21Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op3_21Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op3_231Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op3_231Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op3_31"}} addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen3_31(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op3_31"}} addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen3_31(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op4_231Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op4_231Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op4_31"}} addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen4_31(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64) +{{define "op4_31"}} addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen4_31(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) {{end}} -{{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op2Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_2I(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op2Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op3Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op3Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} -{{define "op4Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) +{{define "op4Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) {{end}} {{define "vectorConversion"}} addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) @@ -99,6 +99,9 @@ func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { slices.SortFunc(ops, compareOperations) for _, op := range ops { + if op.NoTypes != nil && *op.NoTypes == "true" { + continue + } if s, op, err := classifyOp(op); err == nil { if err := t.ExecuteTemplate(buffer, s, op); err != nil { panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err)) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 22893a22..7c538a00 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -53,13 +53,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { opsDataImm := make([]opData, 0) for _, op := range ops { shapeIn, shapeOut, maskType, _, gOp := op.shape() - - asm := gOp.Asm - if maskType == OneMask { - asm += "Masked" - } - - asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth()) + asm := machineOpName(maskType, gOp) // TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy // one here with a name suffix "Merging". The rewrite rules will need them. diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index d3791d78..b9427c4a 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -524,7 +524,7 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { return buffer } -// writeSIMDStubs generates the simd vector intrinsic stubs and writes it to stubs_amd64.go +// writeSIMDStubs generates the simd vector intrinsic stubs and writes it to ops_amd64.go and ops_internal_amd64.go // within the specified directory. func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { t := templateOf(simdStubsTmpl, "simdStubs") @@ -537,6 +537,9 @@ func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { slices.SortFunc(ops, compareOperations) for i, op := range ops { + if op.NoTypes != nil && *op.NoTypes == "true" { + continue + } idxVecAsScalar, err := checkVecAsScalar(op) if err != nil { panic(err) @@ -555,7 +558,6 @@ func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { if err := t.ExecuteTemplate(buffer, s, op); err != nil { panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err)) } - } else { panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err)) } diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index 81aba7a0..c910f64a 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -22,6 +22,10 @@ var ( {{end}} {{define "maskInMaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} mask))) {{end}} +{{define "sftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c])) => ({{.Asm}}const [int8(c)] x) +{{end}} +{{define "masksftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c]) mask) => ({{.Asm}}const [int8(c)] x ({{.MaskInConvert}} mask)) +{{end}} `)) ) @@ -65,15 +69,15 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { var allData []tplRuleData for _, opr := range ops { + if opr.NoGenericOps != nil && *opr.NoGenericOps == "true" { + continue + } opInShape, opOutShape, maskType, immType, gOp := opr.shape() - + asm := machineOpName(maskType, gOp) vregInCnt := len(gOp.In) - asm := gOp.Asm if maskType == OneMask { - asm += "Masked" vregInCnt-- } - asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth()) data := tplRuleData{ GoOp: gOp.Go, @@ -157,11 +161,25 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { } } - if tplName == "pureVreg" && data.Args == data.ArgsOut { - data.Args = "..." - data.ArgsOut = "..." + if gOp.SpecialLower != nil { + if *gOp.SpecialLower == "sftimm" { + sftImmData := data + if tplName == "maskIn" { + sftImmData.tplName = "masksftimm" + } else { + sftImmData.tplName = "sftimm" + } + allData = append(allData, sftImmData) + } else { + panic("simdgen sees unknwon special lower " + *gOp.SpecialLower + ", maybe implement it?") + } + } else { + // SpecialLower rules cannot use "...". + if tplName == "pureVreg" && data.Args == data.ArgsOut { + data.Args = "..." + data.ArgsOut = "..." + } } - data.tplName = tplName allData = append(allData, data) } diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go index b664b0f4..5a5421a8 100644 --- a/internal/simdgen/gen_simdssa.go +++ b/internal/simdgen/gen_simdssa.go @@ -89,13 +89,9 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { seen := map[string]struct{}{} allUnseen := make(map[string][]Operation) for _, op := range ops { - asm := op.Asm shapeIn, shapeOut, maskType, _, gOp := op.shape() + asm := machineOpName(maskType, gOp) - if maskType == 2 { - asm += "Masked" - } - asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth()) if _, ok := seen[asm]; ok { continue } diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index f1cfcfe9..59832e0e 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -361,6 +361,16 @@ func (o Operand) OpNameAndType(s string) string { return o.OpName(s) + " " + *o.Go } +// GoExported returns [Go] with first character capitalized. +func (op Operation) GoExported() string { + return capitalizeFirst(op.Go) +} + +// DocumentationExported returns [Documentation] with method name capitalized. +func (op Operation) DocumentationExported() string { + return strings.ReplaceAll(op.Documentation, op.Go, op.GoExported()) +} + // Op0Name returns the name to use for the 0 operand, // if any is present, otherwise the parameter is used. func (op Operation) Op0Name(s string) string { @@ -549,7 +559,7 @@ func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation) return } -func genericName(op Operation) string { +func (op Operation) GenericName() string { if op.OperandOrder != nil { switch *op.OperandOrder { case "21Type1", "231Type1": @@ -557,6 +567,9 @@ func genericName(op Operation) string { return op.Go + *op.In[1].Go } } + if op.In[0].Class == "immediate" { + return op.Go + *op.In[1].Go + } return op.Go + *op.In[0].Go } @@ -569,7 +582,7 @@ func dedupGodef(ops []Operation) ([]Operation, error) { for _, op := range ops { _, _, _, _, gOp := op.shape() - gN := genericName(gOp) + gN := gOp.GenericName() seen[gN] = append(seen[gN], op) } if *FlagReportDup { diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index 8c893ba6..ba1e96be 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -921,6 +921,30 @@ out: - *int +- go: shiftAllLeftConst + asm: "VPSLL[WDQ]" + in: + - *any + - &imm + class: immediate + immOffset: 0 + out: + - *any +- go: shiftAllRightConst + asm: "VPSRL[WDQ]" + in: + - *int + - *imm + out: + - *int +- go: shiftAllRightConst + asm: "VPSRA[WDQ]" + in: + - *uint + - *imm + out: + - *uint + # Shift* (variable) - go: ShiftLeft asm: "VPSLLV[WD]" diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 1bdfec1b..166a5933 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -47,6 +47,9 @@ type rawOperation struct { GoArch string // GOARCH for this definition Asm string // Assembly mnemonic OperandOrder *string // optional Operand order for better Go declarations + // Optional tag to indicate this operation is paired with special generic->machine ssa lowering rules. + // Should be paired with special templates in gen_simdrules.go + SpecialLower *string In []Operand // Parameters InVariant []Operand // Optional parameters @@ -62,6 +65,12 @@ type rawOperation struct { ConstImm *string // NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits. NameAndSizeCheck *bool + // If non-nil, all generation in gen_simdTypes.go and gen_intrinsics will be skipped. + NoTypes *string + // If non-nil, all generation in gen_simdGenericOps and gen_simdrules will be skipped. + NoGenericOps *string + // If non-nil, this string will be attached to the machine ssa op name. + SSAVariant *string } func (o *Operation) DecodeUnified(v *unify.Value) error { @@ -114,6 +123,18 @@ func (o *Operation) VectorWidth() int { panic(fmt.Errorf("Figure out what the vector width is for %v and implement it", *o)) } +func machineOpName(maskType maskShape, gOp Operation) string { + asm := gOp.Asm + if maskType == 2 { + asm += "Masked" + } + asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth()) + if gOp.SSAVariant != nil { + asm += *gOp.SSAVariant + } + return asm +} + func compareStringPointers(x, y *string) int { if x != nil && y != nil { return compareNatural(*x, *y) diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml index 5528b4d8..e51d289b 100644 --- a/internal/simdgen/ops/ShiftRotate/categories.yaml +++ b/internal/simdgen/ops/ShiftRotate/categories.yaml @@ -1,6 +1,7 @@ !sum - go: ShiftAllLeft nameAndSizeCheck: true + specialLower: sftimm commutative: false extension: "AVX.*" documentation: !string |- @@ -8,17 +9,43 @@ - go: ShiftAllRight signed: false nameAndSizeCheck: true + specialLower: sftimm commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRight signed: true + specialLower: sftimm nameAndSizeCheck: true commutative: false extension: "AVX.*" documentation: !string |- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +- go: shiftAllLeftConst # no APIs, only ssa ops. + noTypes: "true" + noGenericOps: "true" + SSAVariant: "const" # to avoid its name colliding with reg version of this instruction, amend this to its ssa op name. + nameAndSizeCheck: true + commutative: false + extension: "AVX.*" +- go: shiftAllRightConst # no APIs, only ssa ops. + noTypes: "true" + noGenericOps: "true" + SSAVariant: "const" + signed: false + nameAndSizeCheck: true + commutative: false + extension: "AVX.*" +- go: shiftAllRightConst # no APIs, only ssa ops. + noTypes: "true" + noGenericOps: "true" + SSAVariant: "const" + signed: true + nameAndSizeCheck: true + commutative: false + extension: "AVX.*" + - go: ShiftLeft nameAndSizeCheck: true commutative: false diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml index 4ade55d7..e7ccdeb0 100644 --- a/internal/simdgen/ops/ShiftRotate/go.yaml +++ b/internal/simdgen/ops/ShiftRotate/go.yaml @@ -32,6 +32,30 @@ out: - *int +- go: shiftAllLeftConst + asm: "VPSLL[WDQ]" + in: + - *any + - &imm + class: immediate + immOffset: 0 + out: + - *any +- go: shiftAllRightConst + asm: "VPSRL[WDQ]" + in: + - *int + - *imm + out: + - *int +- go: shiftAllRightConst + asm: "VPSRA[WDQ]" + in: + - *uint + - *imm + out: + - *uint + # Shift* (variable) - go: ShiftLeft asm: "VPSLLV[WD]" From fd301eea3aa0a7038f6e164fe44adcd2fa64c157 Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 4 Aug 2025 13:55:36 -0400 Subject: [PATCH 170/200] internal/simdgen: add VPBLENDVB and VPBLENDMB These are not exported -- for use in emulation functions. Generates dev.simd CL 693155 Change-Id: I9f89465a3f98dcd0cb0f60f7c184bd30e25004da Reviewed-on: https://go-review.googlesource.com/c/arch/+/693175 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/categories.yaml | 6 ++++ internal/simdgen/go.yaml | 37 +++++++++++++++++++++- internal/simdgen/ops/Moves/categories.yaml | 6 ++++ internal/simdgen/ops/Moves/go.yaml | 37 +++++++++++++++++++++- 4 files changed, 84 insertions(+), 2 deletions(-) diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 0afa0b14..26e80c5e 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -380,6 +380,12 @@ documentation: !string |- // NAME performs a compression on vector x using mask by // selecting elements as indicated by mask, and pack them to lower indexed elements. +- go: blend + commutative: false + extension: "AVX.*" + documentation: !string |- + // NAME blends two vectors based on mask values, choosing either + // the first or the second based on whether the third is false or true - go: Mul commutative: true extension: "AVX.*" diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index ba1e96be..cdee0870 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -637,7 +637,6 @@ base: $b bits: $e - - go: Set128 asm: "VINSERTI128" in: @@ -823,6 +822,42 @@ - *any out: - *any + +# For now a non-public method because +# (1) [OverwriteClass] must be set together with [OverwriteBase] +# (2) "simdgen does not support [OverwriteClass] in inputs". +# That means the signature is wrong. +- go: blend + asm: VPBLENDVB + in: + - &v + go: $t + class: vreg + base: int + - *v + - + class: vreg + base: int + name: mask + out: + - *v + +# For AVX512 +- go: blend + asm: VPBLENDM[BWDQ] + in: + - &v + go: $t + bits: 512 + class: vreg + base: int + - *v + inVariant: + - + class: mask + out: + - *v + # "Normal" multiplication is only available for floats. # This only covers the single and double precision. - go: Mul diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index a6dd7bab..5e51becb 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -40,3 +40,9 @@ documentation: !string |- // NAME performs a compression on vector x using mask by // selecting elements as indicated by mask, and pack them to lower indexed elements. +- go: blend + commutative: false + extension: "AVX.*" + documentation: !string |- + // NAME blends two vectors based on mask values, choosing either + // the first or the second based on whether the third is false or true diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index c1dd6e4d..52e6228d 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -27,7 +27,6 @@ base: $b bits: $e - - go: Set128 asm: "VINSERTI128" in: @@ -213,3 +212,39 @@ - *any out: - *any + +# For now a non-public method because +# (1) [OverwriteClass] must be set together with [OverwriteBase] +# (2) "simdgen does not support [OverwriteClass] in inputs". +# That means the signature is wrong. +- go: blend + asm: VPBLENDVB + in: + - &v + go: $t + class: vreg + base: int + - *v + - + class: vreg + base: int + name: mask + out: + - *v + +# For AVX512 +- go: blend + asm: VPBLENDM[BWDQ] + in: + - &v + go: $t + bits: 512 + class: vreg + base: int + - *v + inVariant: + - + class: mask + out: + - *v + From dca4598c94df40d5c693e058c663e24aa1a92610 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 1 Aug 2025 17:12:31 -0400 Subject: [PATCH 171/200] internal/simdgen: add profiling flags Change-Id: I9f893e4a1420c135b1affaf355a84b0498e6c981 Reviewed-on: https://go-review.googlesource.com/c/arch/+/693337 Auto-Submit: Austin Clements Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/main.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index 6ac22a68..92b1fa98 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -94,6 +94,7 @@ import ( "maps" "os" "path/filepath" + "runtime/pprof" "slices" "strings" @@ -116,6 +117,9 @@ var ( flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace") flagDebugHTML = flag.String("debug-html", "", "write unification trace to `file.html`") FlagReportDup = flag.Bool("reportdup", false, "report the duplicate godefs") + + flagCPUProfile = flag.String("cpuprofile", "", "write CPU profile to `file`") + flagMemProfile = flag.String("memprofile", "", "write memory profile to `file`") ) const simdPackage = "simd" @@ -123,6 +127,26 @@ const simdPackage = "simd" func main() { flag.Parse() + if *flagCPUProfile != "" { + f, err := os.Create(*flagCPUProfile) + if err != nil { + log.Fatalf("-cpuprofile: %s", err) + } + defer f.Close() + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + } + if *flagMemProfile != "" { + f, err := os.Create(*flagMemProfile) + if err != nil { + log.Fatalf("-memprofile: %s", err) + } + defer func() { + pprof.WriteHeapProfile(f) + f.Close() + }() + } + var inputs []unify.Closure if *FlagArch != "amd64" { From c9a2ab369950b498dfd7d4e53fd38f7df616803a Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 5 Aug 2025 16:22:15 -0400 Subject: [PATCH 172/200] internal/unify: use arbitrary expressions for environment sets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, nonDetEnv, which represents a set of environments, uses a restricted algebraic form consisting of a cross-product of sets of environments. Unfortunately, this restriction means that if we want to union two environment sets, we may need to multiply factors out in order to normalize the result into this restricted representation. In some cases, this can result in exponential blowup. For example, if there are nested sums, then the environment will contain bindings of variables that don't matter for whole branches of the value expression, but that still participate when constructing the union of environment sets. These dead variables wind up expanding the environment representation exponentially, even though they have no effect. To fix this, we lift this restriction. Now, a nonDetEnv is an arbitrary algebraic expression of unions and cross-products. This is actually much simpler, implementation-wise, and addresses this exponential blowup problem. We add a stress test demonstrated nested sums that prior to this change required 12 GB of RAM and took 20 seconds to unify. With this change, it takes 90 MB of RAM and a fraction of a second. We're about to add "import" support to YAML, which will tend to create these nested sums. Thus we have to fix this first. This has no effect on the output of simdgen. Curiously, it also has no effect on the time of simdgen, but it does reduce its memory by almost 10x: │ /tmp/before.bench │ /tmp/after.bench │ │ sec/op │ sec/op vs base │ Simdgen 26.40 ± 3% 26.49 ± 26% ~ (p=1.000 n=10) │ /tmp/before.bench │ /tmp/after.bench │ │ peak-RSS-bytes │ peak-RSS-bytes vs base │ Simdgen 1443.4Mi ± 1% 178.4Mi ± 1% -87.64% (p=0.000 n=10) Change-Id: Idaecb8693065c61d5d63afbc1014d3300886def8 Reviewed-on: https://go-review.googlesource.com/c/arch/+/693338 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Auto-Submit: Austin Clements --- internal/unify/dot.go | 46 ++- internal/unify/env.go | 535 +++++++++++++--------------- internal/unify/html.go | 87 +---- internal/unify/testdata/stress.yaml | 33 ++ internal/unify/yaml.go | 44 ++- 5 files changed, 367 insertions(+), 378 deletions(-) create mode 100644 internal/unify/testdata/stress.yaml diff --git a/internal/unify/dot.go b/internal/unify/dot.go index 143fa615..a26b9dad 100644 --- a/internal/unify/dot.go +++ b/internal/unify/dot.go @@ -96,14 +96,15 @@ func (enc *dotEncoder) edge(from, to string, label string, args ...any) { fmt.Fprintf(enc.w, "%s -> %s [label=%q];\n", from, to, l) } -func (enc *dotEncoder) subgraph(v *Value) (vID, cID string) { +func (enc *dotEncoder) valueSubgraph(v *Value) { enc.valLimit = maxNodes - cID = enc.newID("cluster_%d") + cID := enc.newID("cluster_%d") fmt.Fprintf(enc.w, "subgraph %s {\n", cID) fmt.Fprintf(enc.w, "style=invis;") - vID = enc.value(v) + vID := enc.value(v) fmt.Fprintf(enc.w, "}\n") - return + // We don't need the IDs right now. + _, _ = cID, vID } func (enc *dotEncoder) value(v *Value) string { @@ -181,3 +182,40 @@ func (enc *dotEncoder) value(v *Value) string { return enc.node(fmt.Sprintf("Var %s", enc.idp.unique(vd.id)), "") } } + +func (enc *dotEncoder) envSubgraph(e nonDetEnv) { + enc.valLimit = maxNodes + cID := enc.newID("cluster_%d") + fmt.Fprintf(enc.w, "subgraph %s {\n", cID) + fmt.Fprintf(enc.w, "style=invis;") + vID := enc.env(e.root) + fmt.Fprintf(enc.w, "}\n") + _, _ = cID, vID +} + +func (enc *dotEncoder) env(e *envExpr) string { + switch e.kind { + default: + panic("bad kind") + case envZero: + return enc.node("0", "") + case envUnit: + return enc.node("1", "") + case envBinding: + node := enc.node(fmt.Sprintf("%q :", enc.idp.unique(e.id)), "") + enc.edge(node, enc.value(e.val), "") + return node + case envProduct: + node := enc.node("⨯", "") + for _, op := range e.operands { + enc.edge(node, enc.env(op), "") + } + return node + case envSum: + node := enc.node("+", "") + for _, op := range e.operands { + enc.edge(node, enc.env(op), "") + } + return node + } +} diff --git a/internal/unify/env.go b/internal/unify/env.go index 618887cd..0f45af39 100644 --- a/internal/unify/env.go +++ b/internal/unify/env.go @@ -8,29 +8,36 @@ import ( "fmt" "iter" "reflect" - "slices" "strings" ) -// A nonDetEnv is a non-deterministic mapping from [ident]s to [Value]s. +// A nonDetEnv is an immutable set of environments, where each environment is a +// mapping from [ident]s to [Value]s. // -// Logically, this is just a set of deterministic environments, where each -// deterministic environment is a complete mapping from each [ident]s to exactly -// one [Value]. In particular, [ident]s are NOT necessarily independent of each -// other. For example, an environment may have both {x: 1, y: 1} and {x: 2, y: -// 2}, but not {x: 1, y: 2}. +// To keep this compact, we use an algebraic representation similar to +// relational algebra. The atoms are zero, unit, or a singular binding: // -// A nonDetEnv is immutable. +// - A singular binding is an environment set consisting of a single environment +// that binds a single ident to a single value. // -// Often [ident]s are independent of each other, so the representation optimizes -// for this by using a cross-product of environment factors, where each factor -// is a sum of deterministic environments. These operations obey the usual -// distributional laws, so we can always canonicalize into this form. (It MAY be -// worthwhile to allow more general expressions of sums and products.) +// - Zero is the empty set. // -// For example, to represent {{x: 1, y: 1}, {x: 2, y: 2}}, in which the -// variables x and y are dependent, we need a single factor that covers x and y -// and consists of two terms: {x: 1, y: 1} + {x: 2, y: 2}. +// - Unit is an environment set consisting of a single, empty environment (no +// bindings). +// +// From these, we build up more complex sets of environments using sums and +// cross products: +// +// - A sum is simply the union of the two environment sets. +// +// - A cross product is the Cartesian product of the two environment sets, +// followed by combining each pair of environments. Combining simply merges the +// two mappings, but fails if the mappings overlap. +// +// For example, to represent {{x: 1, y: 1}, {x: 2, y: 2}}, we build the two +// environments and sum them: +// +// ({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2}) // // If we add a third variable z that can be 1 or 2, independent of x and y, we // get four logical environments: @@ -40,43 +47,59 @@ import ( // {x: 1, y: 1, z: 2} // {x: 2, y: 2, z: 2} // -// This could be represented as a single factor that is the sum of these four -// detEnvs, but because z is independent, it can be a separate factor. Hence, -// the most compact representation of this environment is: +// This could be represented as a sum of all four environments, but because z is +// independent, we can use a more compact representation: +// +// (({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2})) ⨯ ({z: 1} + {z: 2}) // -// ({x: 1, y: 1} + {x: 2, y: 2}) ⨯ ({z: 1} + {z: 2}) +// Environment sets obey commutative algebra rules: // -// That is, two factors, where each is the sum of two terms. +// e + 0 = e +// e ⨯ 0 = 0 +// e ⨯ 1 = +// e + f = f + e +// e ⨯ f = f ⨯ e type nonDetEnv struct { - // factors is a list of the multiplicative factors in this environment. The - // set of deterministic environments is the cross-product of these factors. - // All factors must have disjoint variables. - factors []*envSum + root *envExpr } -// envSum is a sum of deterministic environments, all with the same set of -// variables. -type envSum struct { - ids []*ident // TODO: Do we ever use this as a slice? Should it be a map? - terms []detEnv -} +type envExpr struct { + // TODO: A tree-based data structure for this may not be ideal, since it + // involves a lot of walking to find things and we often have to do deep + // rewrites anyway for partitioning. Would some flattened array-style + // representation be better, possibly combined with an index of ident uses? + // We could even combine that with an immutable array abstraction (ala + // Clojure) that could enable more efficient construction operations. + + kind envExprKind + + // For envBinding + id *ident + val *Value -type detEnv struct { - vals []*Value // Indexes correspond to envSum.ids + // For sum or product. Len must be >= 2 and none of the elements can have + // the same kind as this node. + operands []*envExpr } +type envExprKind byte + +const ( + envZero envExprKind = iota + envUnit + envProduct + envSum + envBinding +) + var ( - // zeroEnvFactor is the "0" value of an [envSum]. It's a a factor with no - // sum terms. This is easiest to think of as: an empty sum must be the - // additive identity, 0. - zeroEnvFactor = &envSum{} - - // topEnv is the algebraic one value of a [nonDetEnv]. It has no factors - // because the product of no factors is the multiplicative identity. - topEnv = nonDetEnv{} - // bottomEnv is the algebraic zero value of a [nonDetEnv]. The product of - // bottomEnv with x is bottomEnv, and the sum of bottomEnv with y is y. - bottomEnv = nonDetEnv{factors: []*envSum{zeroEnvFactor}} + // topEnv is the unit value (multiplicative identity) of a [nonDetEnv]. + topEnv = nonDetEnv{envExprUnit} + // bottomEnv is the zero value (additive identity) of a [nonDetEnv]. + bottomEnv = nonDetEnv{envExprZero} + + envExprZero = &envExpr{kind: envZero} + envExprUnit = &envExpr{kind: envUnit} ) // bind binds id to each of vals in e. @@ -90,232 +113,132 @@ func (e nonDetEnv) bind(id *ident, vals ...*Value) nonDetEnv { return bottomEnv } - // TODO: If any of vals are _, should we just not do anything? We're kind of + // TODO: If any of vals are _, should we just drop that val? We're kind of // inconsistent about whether an id missing from e means id is invalid or // means id is _. // Check that id isn't present in e. - for _, f := range e.factors { - if slices.Contains(f.ids, id) { - panic("id " + id.name + " already present in environment") - } + for range e.root.bindings(id) { + panic("id " + id.name + " already present in environment") } - // Create the new sum term. - sum := &envSum{ids: []*ident{id}} + // Create a sum of all the values. + bindings := make([]*envExpr, 0, 1) for _, val := range vals { - sum.terms = append(sum.terms, detEnv{vals: []*Value{val}}) + bindings = append(bindings, &envExpr{kind: envBinding, id: id, val: val}) } + // Multiply it in. - factors := append(e.factors[:len(e.factors):len(e.factors)], sum) - return nonDetEnv{factors} + return nonDetEnv{newEnvExprProduct(e.root, newEnvExprSum(bindings...))} } func (e nonDetEnv) isBottom() bool { - if len(e.factors) == 0 { - // This is top. - return false - } - return len(e.factors[0].terms) == 0 + return e.root.kind == envZero } -func (e nonDetEnv) vars() iter.Seq[*ident] { - return func(yield func(*ident) bool) { - for _, t := range e.factors { - for _, id := range t.ids { - if !yield(id) { - return +// bindings yields all [envBinding] nodes in e with the given id. If id is nil, +// it yields all binding nodes. +func (e *envExpr) bindings(id *ident) iter.Seq[*envExpr] { + // This is just a pre-order walk and it happens this is the only thing we + // need a pre-order walk for. + return func(yield func(*envExpr) bool) { + var rec func(e *envExpr) bool + rec = func(e *envExpr) bool { + if e.kind == envBinding && (id == nil || e.id == id) { + if !yield(e) { + return false } } - } - } -} - -// all enumerates all deterministic environments in e. -// -// The result slice is in the same order as the slice returned by -// [nonDetEnv2.vars]. The slice is reused between iterations. -func (e nonDetEnv) all() iter.Seq[[]*Value] { - return func(yield func([]*Value) bool) { - var vals []*Value - var walk func(int) bool - walk = func(i int) bool { - if i == len(e.factors) { - return yield(vals) - } - start := len(vals) - for _, term := range e.factors[i].terms { - vals = append(vals[:start], term.vals...) - if !walk(i + 1) { + for _, o := range e.operands { + if !rec(o) { return false } } return true } - walk(0) + rec(e) } } -// allOrdered is like all, but idOrder controls the order of the values in the -// resulting slice. Any [ident]s in idOrder that are missing from e are set to -// topValue. The values of idOrder must be a bijection with [0, n). -func (e nonDetEnv) allOrdered(idOrder map[*ident]int) iter.Seq[[]*Value] { - valsLen := 0 - for _, idx := range idOrder { - valsLen = max(valsLen, idx+1) - } - - return func(yield func([]*Value) bool) { - vals := make([]*Value, valsLen) - // e may not have all of the IDs in idOrder. Make sure any missing - // values are top. - for i := range vals { - vals[i] = topValue - } - var walk func(int) bool - walk = func(i int) bool { - if i == len(e.factors) { - return yield(vals) - } - for _, term := range e.factors[i].terms { - for j, id := range e.factors[i].ids { - vals[idOrder[id]] = term.vals[j] - } - if !walk(i + 1) { - return false - } - } - return true +// newEnvExprProduct constructs a product node from exprs, performing +// simplifications. It does NOT check that bindings are disjoint. +func newEnvExprProduct(exprs ...*envExpr) *envExpr { + factors := make([]*envExpr, 0, 2) + for _, expr := range exprs { + switch expr.kind { + case envZero: + return envExprZero + case envUnit: + // No effect on product + case envProduct: + factors = append(factors, expr.operands...) + default: + factors = append(factors, expr) } - walk(0) } -} -func crossEnvs(envs ...nonDetEnv) nonDetEnv { - // Combine the factors of envs - var factors []*envSum - haveIDs := map[*ident]struct{}{} - for _, e := range envs { - if e.isBottom() { - // The environment is bottom, so the whole product goes to - // bottom. - return bottomEnv - } - // Check that all ids are disjoint. - for _, f := range e.factors { - for _, id := range f.ids { - if _, ok := haveIDs[id]; ok { - panic("conflict on " + id.name) - } - haveIDs[id] = struct{}{} - } - } - // Everything checks out. Multiply the factors. - factors = append(factors, e.factors...) + if len(factors) == 0 { + return envExprUnit + } else if len(factors) == 1 { + return factors[0] } - return nonDetEnv{factors: factors} + return &envExpr{kind: envProduct, operands: factors} } -func sumEnvs(envs ...nonDetEnv) nonDetEnv { - // nonDetEnv is a product at the top level, so we implement summation using - // the distributive law. We also use associativity to keep as many top-level - // factors as we can, since those are what keep the environment compact. - // - // a * b * c + a * d (where a, b, c, and d are factors) - // (combine common factors) - // = a * (b * c + d) - // (expand factors into their sum terms) - // = a * ((b_1 + b_2 + ...) * (c_1 + c_2 + ...) + d) - // (where b_i and c_i are deterministic environments) - // (FOIL) - // = a * (b_1 * c_1 + b_1 * c_2 + b_2 * c_1 + b_2 * c2 + d) - // (all factors are now in canonical form) - // = a * e - // - // The product of two deterministic environments is a deterministic - // environment, and the sum of deterministic environments is a factor, so - // this process results in the canonical product-of-sums form. - // - // TODO: This is a bit of a one-way process. We could try to factor the - // environment to reduce the number of sums. I'm not sure how to do this - // efficiently. It might be possible to guide it by gathering the - // distributions of each ID's bindings. E.g., if there are 12 deterministic - // environments in a sum and $x is bound to 4 different values, each 3 - // times, then it *might* be possible to factor out $x into a 4-way sum of - // its own. - - factors, toSum := commonFactors(envs) - - if len(toSum) > 0 { - // Collect all IDs into a single order. - var ids []*ident - idOrder := make(map[*ident]int) - for _, e := range toSum { - for v := range e.vars() { - if _, ok := idOrder[v]; !ok { - idOrder[v] = len(ids) - ids = append(ids, v) +// newEnvExprSum constructs a sum node from exprs, performing simplifications. +func newEnvExprSum(exprs ...*envExpr) *envExpr { + // TODO: If all of envs are products (or bindings), factor any common terms. + // E.g., x * y + x * z ==> x * (y + z). This is easy to do for binding + // terms, but harder to do for more general terms. + + var have smallSet[*envExpr] + terms := make([]*envExpr, 0, 2) + for _, expr := range exprs { + switch expr.kind { + case envZero: + // No effect on sum + case envSum: + for _, expr1 := range expr.operands { + if have.Add(expr1) { + terms = append(terms, expr1) } } - } - - // Flatten out each term in the sum. - var summands []detEnv - for _, env := range toSum { - for vals := range env.allOrdered(idOrder) { - summands = append(summands, detEnv{vals: slices.Clone(vals)}) + default: + if have.Add(expr) { + terms = append(terms, expr) } } - factors = append(factors, &envSum{ids: ids, terms: summands}) } - return nonDetEnv{factors: factors} -} - -// commonFactors finds common factors that can be factored out of a summation of -// [nonDetEnv]s. -func commonFactors(envs []nonDetEnv) (common []*envSum, toSum []nonDetEnv) { - // Drop any bottom environments. They don't contribute to the sum and they - // would complicate some logic below. - envs = slices.DeleteFunc(envs, func(e nonDetEnv) bool { - return e.isBottom() - }) - if len(envs) == 0 { - return bottomEnv.factors, nil + if len(terms) == 0 { + return envExprZero + } else if len(terms) == 1 { + return terms[0] } + return &envExpr{kind: envSum, operands: terms} +} - // It's very common that the exact same factor will appear across all envs. - // Keep those factored out. - // - // TODO: Is it also common to have vars that are bound to the same value - // across all envs? If so, we could also factor those into common terms. - counts := map[*envSum]int{} - for _, e := range envs { - for _, f := range e.factors { - counts[f]++ - } +func crossEnvs(env1, env2 nonDetEnv) nonDetEnv { + // Confirm that envs have disjoint idents. + var ids1 smallSet[*ident] + for e := range env1.root.bindings(nil) { + ids1.Add(e.id) } - for _, f := range envs[0].factors { - if counts[f] == len(envs) { - // Common factor - common = append(common, f) + for e := range env2.root.bindings(nil) { + if ids1.Has(e.id) { + panic(fmt.Sprintf("%s bound on both sides of cross-product", e.id.name)) } } - // Any other factors need to be multiplied out. - for _, env := range envs { - var newFactors []*envSum - for _, f := range env.factors { - if counts[f] != len(envs) { - newFactors = append(newFactors, f) - } - } - if len(newFactors) > 0 { - toSum = append(toSum, nonDetEnv{factors: newFactors}) - } - } + return nonDetEnv{newEnvExprProduct(env1.root, env2.root)} +} - return common, toSum +func sumEnvs(envs ...nonDetEnv) nonDetEnv { + exprs := make([]*envExpr, len(envs)) + for i := range envs { + exprs[i] = envs[i].root + } + return nonDetEnv{newEnvExprSum(exprs...)} } // envPartition is a subset of an env where id is bound to value in all @@ -326,69 +249,125 @@ type envPartition struct { env nonDetEnv } +// partitionBy splits e by distinct bindings of id and removes id from each +// partition. +// +// If there are environments in e where id is not bound, they will not be +// reflected in any partition. +// +// It panics if e is bottom, since attempting to partition an empty environment +// set almost certainly indicates a bug. func (e nonDetEnv) partitionBy(id *ident) []envPartition { if e.isBottom() { - // Bottom contains all variables - return []envPartition{{id: id, value: bottomValue, env: e}} + // We could return zero partitions, but getting here at all almost + // certainly indicates a bug. + panic("cannot partition empty environment set") } - // Find the factor containing id and id's index in that factor. - idFactor, idIndex := -1, -1 - var newIDs []*ident - for factI, fact := range e.factors { - idI := slices.Index(fact.ids, id) - if idI < 0 { + // Emit a partition for each value of id. + var seen smallSet[*Value] + var parts []envPartition + for n := range e.root.bindings(id) { + if !seen.Add(n.val) { + // Already emitted a partition for this value. continue - } else if idFactor != -1 { - panic("multiple factors containing id " + id.name) - } else { - idFactor, idIndex = factI, idI - // Drop id from this factor's IDs - newIDs = without(fact.ids, idI) } - } - if idFactor == -1 { - panic("id " + id.name + " not found in environment") - } - // If id is the only term in its factor, then dropping it is equivalent to - // making the factor be the unit value, so we can just drop the factor. (And - // if this is the only factor, we'll arrive at [topEnv], which is exactly - // what we want!). In this case we can use the same nonDetEnv in all of the - // partitions. - isUnit := len(newIDs) == 0 - var unitFactors []*envSum - if isUnit { - unitFactors = without(e.factors, idFactor) + parts = append(parts, envPartition{ + id: id, + value: n.val, + env: nonDetEnv{e.root.substitute(id, n.val)}, + }) } - // Create a partition for each distinct value of id. - var parts []envPartition - partIndex := map[*Value]int{} - for _, det := range e.factors[idFactor].terms { - val := det.vals[idIndex] - i, ok := partIndex[val] - if !ok { - i = len(parts) - var factors []*envSum - if isUnit { - factors = unitFactors - } else { - // Copy all other factor - factors = slices.Clone(e.factors) - factors[idFactor] = &envSum{ids: newIDs} + return parts +} + +// substitute replaces bindings of id to val with 1 and bindings of id to any +// other value with 0 and simplifies the result. +func (e *envExpr) substitute(id *ident, val *Value) *envExpr { + switch e.kind { + default: + panic("bad kind") + + case envZero, envUnit: + return e + + case envBinding: + if e.id != id { + return e + } else if e.val != val { + return envExprZero + } else { + return envExprUnit + } + + case envProduct, envSum: + // Substitute each operand. Sometimes, this won't change anything, so we + // build the new operands list lazily. + var nOperands []*envExpr + for i, op := range e.operands { + nOp := op.substitute(id, val) + if nOperands == nil && op != nOp { + // Operand diverged; initialize nOperands. + nOperands = make([]*envExpr, 0, len(e.operands)) + nOperands = append(nOperands, e.operands[:i]...) + } + if nOperands != nil { + nOperands = append(nOperands, nOp) } - parts = append(parts, envPartition{id: id, value: val, env: nonDetEnv{factors: factors}}) - partIndex[val] = i } + if nOperands == nil { + // Nothing changed. + return e + } + if e.kind == envProduct { + return newEnvExprProduct(nOperands...) + } else { + return newEnvExprSum(nOperands...) + } + } +} + +// A smallSet is a set optimized for stack allocation when small. +type smallSet[T comparable] struct { + array [32]T + n int - if !isUnit { - factor := parts[i].env.factors[idFactor] - newVals := without(det.vals, idIndex) - factor.terms = append(factor.terms, detEnv{vals: newVals}) + m map[T]struct{} +} + +// Has returns whether val is in set. +func (s *smallSet[T]) Has(val T) bool { + arr := s.array[:s.n] + for i := range arr { + if arr[i] == val { + return true } } - return parts + _, ok := s.m[val] + return ok +} + +// Add adds val to the set and returns true if it was added (not already +// present). +func (s *smallSet[T]) Add(val T) bool { + // Test for presence. + if s.Has(val) { + return false + } + + // Add it + if s.n < len(s.array) { + s.array[s.n] = val + s.n++ + } else { + if s.m == nil { + s.m = make(map[T]struct{}) + } + s.m[val] = struct{}{} + } + return true } type ident struct { @@ -494,7 +473,3 @@ func (p *identPrinter) slice(ids []*ident) string { } return fmt.Sprintf("[%s]", strings.Join(strs, ", ")) } - -func without[Elt any](s []Elt, i int) []Elt { - return append(s[:i:i], s[i+1:]...) -} diff --git a/internal/unify/html.go b/internal/unify/html.go index d2434fe4..d59bd8fc 100644 --- a/internal/unify/html.go +++ b/internal/unify/html.go @@ -52,7 +52,7 @@ const htmlCSS = ` type htmlTracer struct { w io.Writer dot *dotEncoder - svgs map[*Value]string + svgs map[any]string } func (t *htmlTracer) writeTree(node *traceTree) { @@ -91,19 +91,19 @@ func (t *htmlTracer) writeTree(node *traceTree) { } } -func (t *htmlTracer) svg(v *Value) string { - if s, ok := t.svgs[v]; ok { +func htmlSVG[Key comparable](t *htmlTracer, f func(Key), arg Key) string { + if s, ok := t.svgs[arg]; ok { return s } var buf strings.Builder - t.dot.subgraph(v) + f(arg) t.dot.writeSvg(&buf) t.dot.clear() svg := buf.String() if t.svgs == nil { - t.svgs = make(map[*Value]string) + t.svgs = make(map[any]string) } - t.svgs[v] = svg + t.svgs[arg] = svg buf.Reset() return svg } @@ -112,79 +112,12 @@ func (t *htmlTracer) emit(vs []*Value, labels []string, env nonDetEnv) { fmt.Fprintf(t.w, `
`) for i, v := range vs { fmt.Fprintf(t.w, `
%s
`, i+1, html.EscapeString(labels[i])) - fmt.Fprintf(t.w, `
%s
`, i+1, t.svg(v)) + fmt.Fprintf(t.w, `
%s
`, i+1, htmlSVG(t, t.dot.valueSubgraph, v)) } + col := len(vs) - t.emitEnv(env, len(vs)) + fmt.Fprintf(t.w, `
in
`, col+1) + fmt.Fprintf(t.w, `
%s
`, col+1, htmlSVG(t, t.dot.envSubgraph, env)) fmt.Fprintf(t.w, `
`) } - -func (t *htmlTracer) emitEnv(env nonDetEnv, colStart int) { - if env.isBottom() { - fmt.Fprintf(t.w, `
_|_
`, colStart+1) - return - } - - colLimit := 10 - col := colStart - for i, f := range env.factors { - if i > 0 { - // Print * between each factor. - fmt.Fprintf(t.w, `
×
`, col+1) - col++ - } - - var idCols []int - for i, id := range f.ids { - var str string - if i == 0 && len(f.ids) > 1 { - str = "(" - } - if colLimit <= 0 { - str += "..." - } else { - str += html.EscapeString(t.dot.idp.unique(id)) - } - if (i == len(f.ids)-1 || colLimit <= 0) && len(f.ids) > 1 { - str += ")" - } - - fmt.Fprintf(t.w, `
%s
`, col+1, str) - idCols = append(idCols, col) - - col++ - if colLimit <= 0 { - break - } - colLimit-- - } - - fmt.Fprintf(t.w, `
`, idCols[0]+1, col+1) - rowLimit := 10 - row := 0 - for _, term := range f.terms { - // TODO: Print + between rows? With some horizontal something to - // make it clear what it applies across? - - for i, val := range term.vals { - fmt.Fprintf(t.w, `
`, row+1, idCols[i]-idCols[0]+1) - if i < len(term.vals)-1 && i == len(idCols)-1 { - fmt.Fprintf(t.w, `...
`) - break - } else if rowLimit <= 0 { - fmt.Fprintf(t.w, `...
`) - } else { - fmt.Fprintf(t.w, `%s`, t.svg(val)) - } - } - - row++ - if rowLimit <= 0 { - break - } - rowLimit-- - } - fmt.Fprintf(t.w, ``) - } -} diff --git a/internal/unify/testdata/stress.yaml b/internal/unify/testdata/stress.yaml new file mode 100644 index 00000000..e4478536 --- /dev/null +++ b/internal/unify/testdata/stress.yaml @@ -0,0 +1,33 @@ +# In the original representation of environments, this caused an exponential +# blowup in time and allocation. With that representation, this took about 20 +# seconds on my laptop and had a max RSS of ~12 GB. Big enough to be really +# noticeable, but not so big it's likely to crash a developer machine. With the +# better environment representation, it runs almost instantly and has an RSS of +# ~90 MB. +unify: +- !sum + - !sum [1, 2] + - !sum [3, 4] + - !sum [5, 6] + - !sum [7, 8] + - !sum [9, 10] + - !sum [11, 12] + - !sum [13, 14] + - !sum [15, 16] + - !sum [17, 18] + - !sum [19, 20] + - !sum [21, 22] +- !sum + - !sum [1, 2] + - !sum [3, 4] + - !sum [5, 6] + - !sum [7, 8] + - !sum [9, 10] + - !sum [11, 12] + - !sum [13, 14] + - !sum [15, 16] + - !sum [17, 18] + - !sum [19, 20] + - !sum [21, 22] +all: + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go index 1b1c8139..ff5115f7 100644 --- a/internal/unify/yaml.go +++ b/internal/unify/yaml.go @@ -93,7 +93,7 @@ func (c *Closure) Unmarshal(r io.Reader, opts UnmarshalOpts) error { } func (c *Closure) unmarshal(node *yaml.Node, opts UnmarshalOpts) error { - dec := &yamlDecoder{opts: opts, vars: make(map[string]*ident)} + dec := &yamlDecoder{opts: opts, vars: make(map[string]*ident), env: topEnv} val, err := dec.value(node) if err != nil { return err @@ -349,25 +349,35 @@ func (enc *yamlEncoder) closure(c Closure) *yaml.Node { } func (enc *yamlEncoder) env(e nonDetEnv) *yaml.Node { - var n yaml.Node - n.Kind = yaml.SequenceNode - n.Tag = "!env" - for _, term := range e.factors { - var nTerm yaml.Node - n.Content = append(n.Content, &nTerm) - nTerm.Kind = yaml.SequenceNode - for _, det := range term.terms { - var nDet yaml.Node - nTerm.Content = append(nTerm.Content, &nDet) - nDet.Kind = yaml.MappingNode - for i, val := range det.vals { - var nLabel yaml.Node - nLabel.SetString(enc.idp.unique(term.ids[i])) - nDet.Content = append(nDet.Content, &nLabel, enc.value(val)) + var encode func(e *envExpr) *yaml.Node + encode = func(e *envExpr) *yaml.Node { + var n yaml.Node + switch e.kind { + default: + panic("bad kind") + case envZero: + n.SetString("0") + case envUnit: + n.SetString("1") + case envBinding: + var id yaml.Node + id.SetString(enc.idp.unique(e.id)) + n.Kind = yaml.MappingNode + n.Content = []*yaml.Node{&id, enc.value(e.val)} + case envProduct, envSum: + n.Kind = yaml.SequenceNode + if e.kind == envProduct { + n.Tag = "!product" + } else { + n.Tag = "!sum" + } + for _, e2 := range e.operands { + n.Content = append(n.Content, encode(e2)) } } + return &n } - return &n + return encode(e.root) } var yamlIntRe = regexp.MustCompile(`^-?[0-9]+$`) From 0a9327541e7556eb6439fd93c5d825d1f9205723 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 5 Aug 2025 14:17:06 -0400 Subject: [PATCH 173/200] internal/unify: rename nonDetEnv -> envSet Leaning into the "this is just a set" view, we also rename isBottom to isEmpty and sumEnvs to unionEnvs. Change-Id: Ib69c99995b7fd0944eab88721eefdb28e3edecee Reviewed-on: https://go-review.googlesource.com/c/arch/+/693339 Reviewed-by: Junyang Shao Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI --- internal/unify/closure.go | 18 ++++++++-------- internal/unify/domain.go | 4 ++-- internal/unify/dot.go | 2 +- internal/unify/env.go | 44 +++++++++++++++++++-------------------- internal/unify/html.go | 2 +- internal/unify/trace.go | 8 +++---- internal/unify/unify.go | 22 ++++++++++---------- internal/unify/yaml.go | 12 +++++------ 8 files changed, 56 insertions(+), 56 deletions(-) diff --git a/internal/unify/closure.go b/internal/unify/closure.go index 8a1636de..5b654879 100644 --- a/internal/unify/closure.go +++ b/internal/unify/closure.go @@ -13,7 +13,7 @@ import ( type Closure struct { val *Value - env nonDetEnv + env envSet } func NewSum(vs ...*Value) Closure { @@ -67,13 +67,13 @@ func (c Closure) All() iter.Seq[*Value] { // continuation for each choice. Similar to a yield function, the // continuation can return false to stop the non-deterministic walk. return func(yield func(*Value) bool) { - c.val.all1(c.env, func(v *Value, e nonDetEnv) bool { + c.val.all1(c.env, func(v *Value, e envSet) bool { return yield(v) }) } } -func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool { +func (v *Value) all1(e envSet, cont func(*Value, envSet) bool) bool { switch d := v.Domain.(type) { default: panic(fmt.Sprintf("unknown domain type %T", d)) @@ -93,8 +93,8 @@ func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool { // TODO: If there are no Vars or Sums under this Def, then nothing can // change the Value or env, so we could just cont(v, e). - var allElt func(elt int, e nonDetEnv) bool - allElt = func(elt int, e nonDetEnv) bool { + var allElt func(elt int, e envSet) bool + allElt = func(elt int, e envSet) bool { if elt == len(fields) { // Build a new Def from the concrete parts. Clone parts because // we may reuse it on other non-deterministic branches. @@ -102,7 +102,7 @@ func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool { return cont(nVal, e) } - return d.fields[fields[elt]].all1(e, func(v *Value, e nonDetEnv) bool { + return d.fields[fields[elt]].all1(e, func(v *Value, e envSet) bool { parts[fields[elt]] = v return allElt(elt+1, e) }) @@ -116,8 +116,8 @@ func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool { return cont(v, e) } parts := make([]*Value, len(d.vs)) - var allElt func(elt int, e nonDetEnv) bool - allElt = func(elt int, e nonDetEnv) bool { + var allElt func(elt int, e envSet) bool + allElt = func(elt int, e envSet) bool { if elt == len(d.vs) { // Build a new tuple from the concrete parts. Clone parts because // we may reuse it on other non-deterministic branches. @@ -125,7 +125,7 @@ func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool { return cont(nVal, e) } - return d.vs[elt].all1(e, func(v *Value, e nonDetEnv) bool { + return d.vs[elt].all1(e, func(v *Value, e envSet) bool { parts[elt] = v return allElt(elt+1, e) }) diff --git a/internal/unify/domain.go b/internal/unify/domain.go index 36239054..5c4d349f 100644 --- a/internal/unify/domain.go +++ b/internal/unify/domain.go @@ -196,14 +196,14 @@ type Tuple struct { // function because we don't necessarily want *exactly* the same Value // repeated. For example, in YAML encoding, a !sum in a repeated tuple needs // a fresh variable in each instance. - repeat []func(nonDetEnv) (*Value, nonDetEnv) + repeat []func(envSet) (*Value, envSet) } func NewTuple(vs ...*Value) Tuple { return Tuple{vs: vs} } -func NewRepeat(gens ...func(nonDetEnv) (*Value, nonDetEnv)) Tuple { +func NewRepeat(gens ...func(envSet) (*Value, envSet)) Tuple { return Tuple{repeat: gens} } diff --git a/internal/unify/dot.go b/internal/unify/dot.go index a26b9dad..6fafa252 100644 --- a/internal/unify/dot.go +++ b/internal/unify/dot.go @@ -183,7 +183,7 @@ func (enc *dotEncoder) value(v *Value) string { } } -func (enc *dotEncoder) envSubgraph(e nonDetEnv) { +func (enc *dotEncoder) envSubgraph(e envSet) { enc.valLimit = maxNodes cID := enc.newID("cluster_%d") fmt.Fprintf(enc.w, "subgraph %s {\n", cID) diff --git a/internal/unify/env.go b/internal/unify/env.go index 0f45af39..b9989dd2 100644 --- a/internal/unify/env.go +++ b/internal/unify/env.go @@ -11,7 +11,7 @@ import ( "strings" ) -// A nonDetEnv is an immutable set of environments, where each environment is a +// An envSet is an immutable set of environments, where each environment is a // mapping from [ident]s to [Value]s. // // To keep this compact, we use an algebraic representation similar to @@ -59,7 +59,7 @@ import ( // e ⨯ 1 = // e + f = f + e // e ⨯ f = f ⨯ e -type nonDetEnv struct { +type envSet struct { root *envExpr } @@ -93,10 +93,10 @@ const ( ) var ( - // topEnv is the unit value (multiplicative identity) of a [nonDetEnv]. - topEnv = nonDetEnv{envExprUnit} - // bottomEnv is the zero value (additive identity) of a [nonDetEnv]. - bottomEnv = nonDetEnv{envExprZero} + // topEnv is the unit value (multiplicative identity) of a [envSet]. + topEnv = envSet{envExprUnit} + // bottomEnv is the zero value (additive identity) of a [envSet]. + bottomEnv = envSet{envExprZero} envExprZero = &envExpr{kind: envZero} envExprUnit = &envExpr{kind: envUnit} @@ -108,8 +108,8 @@ var ( // // Environments are typically initially constructed by starting with [topEnv] // and calling bind one or more times. -func (e nonDetEnv) bind(id *ident, vals ...*Value) nonDetEnv { - if e.isBottom() { +func (e envSet) bind(id *ident, vals ...*Value) envSet { + if e.isEmpty() { return bottomEnv } @@ -129,10 +129,10 @@ func (e nonDetEnv) bind(id *ident, vals ...*Value) nonDetEnv { } // Multiply it in. - return nonDetEnv{newEnvExprProduct(e.root, newEnvExprSum(bindings...))} + return envSet{newEnvExprProduct(e.root, newEnvExprSum(bindings...))} } -func (e nonDetEnv) isBottom() bool { +func (e envSet) isEmpty() bool { return e.root.kind == envZero } @@ -218,7 +218,7 @@ func newEnvExprSum(exprs ...*envExpr) *envExpr { return &envExpr{kind: envSum, operands: terms} } -func crossEnvs(env1, env2 nonDetEnv) nonDetEnv { +func crossEnvs(env1, env2 envSet) envSet { // Confirm that envs have disjoint idents. var ids1 smallSet[*ident] for e := range env1.root.bindings(nil) { @@ -230,15 +230,15 @@ func crossEnvs(env1, env2 nonDetEnv) nonDetEnv { } } - return nonDetEnv{newEnvExprProduct(env1.root, env2.root)} + return envSet{newEnvExprProduct(env1.root, env2.root)} } -func sumEnvs(envs ...nonDetEnv) nonDetEnv { +func unionEnvs(envs ...envSet) envSet { exprs := make([]*envExpr, len(envs)) for i := range envs { exprs[i] = envs[i].root } - return nonDetEnv{newEnvExprSum(exprs...)} + return envSet{newEnvExprSum(exprs...)} } // envPartition is a subset of an env where id is bound to value in all @@ -246,7 +246,7 @@ func sumEnvs(envs ...nonDetEnv) nonDetEnv { type envPartition struct { id *ident value *Value - env nonDetEnv + env envSet } // partitionBy splits e by distinct bindings of id and removes id from each @@ -257,8 +257,8 @@ type envPartition struct { // // It panics if e is bottom, since attempting to partition an empty environment // set almost certainly indicates a bug. -func (e nonDetEnv) partitionBy(id *ident) []envPartition { - if e.isBottom() { +func (e envSet) partitionBy(id *ident) []envPartition { + if e.isEmpty() { // We could return zero partitions, but getting here at all almost // certainly indicates a bug. panic("cannot partition empty environment set") @@ -276,7 +276,7 @@ func (e nonDetEnv) partitionBy(id *ident) []envPartition { parts = append(parts, envPartition{ id: id, value: n.val, - env: nonDetEnv{e.root.substitute(id, n.val)}, + env: envSet{e.root.substitute(id, n.val)}, }) } @@ -388,7 +388,7 @@ func (d Var) decode(rv reflect.Value) error { return &inexactError{"var", rv.Type().String()} } -func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) { +func (d Var) unify(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) { // TODO: Vars from !sums in the input can have a huge number of values. // Unifying these could be way more efficient with some indexes over any // exact values we can pull out, like Def fields that are exact Strings. @@ -409,7 +409,7 @@ func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDe // We need to unify w with the value of d in each possible environment. We // can save some work by grouping environments by the value of d, since // there will be a lot of redundancy here. - var nEnvs []nonDetEnv + var nEnvs []envSet envParts := e.partitionBy(d.id) for i, envPart := range envParts { exit := uf.enterVar(d.id, i) @@ -419,7 +419,7 @@ func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDe res, e2, err := w.unify(envPart.value, envPart.env, swap, uf) exit.exit() if err != nil { - return nil, nonDetEnv{}, err + return nil, envSet{}, err } if res.Domain == nil { // This branch entirely failed to unify, so it's gone. @@ -436,7 +436,7 @@ func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDe // The effect of this is entirely captured in the environment. We can return // back the same Bind node. - return d, sumEnvs(nEnvs...), nil + return d, unionEnvs(nEnvs...), nil } // An identPrinter maps [ident]s to unique string names. diff --git a/internal/unify/html.go b/internal/unify/html.go index d59bd8fc..036b80e2 100644 --- a/internal/unify/html.go +++ b/internal/unify/html.go @@ -108,7 +108,7 @@ func htmlSVG[Key comparable](t *htmlTracer, f func(Key), arg Key) string { return svg } -func (t *htmlTracer) emit(vs []*Value, labels []string, env nonDetEnv) { +func (t *htmlTracer) emit(vs []*Value, labels []string, env envSet) { fmt.Fprintf(t.w, `
`) for i, v := range vs { fmt.Fprintf(t.w, `
%s
`, i+1, html.EscapeString(labels[i])) diff --git a/internal/unify/trace.go b/internal/unify/trace.go index f1a7ea2c..b0aa3525 100644 --- a/internal/unify/trace.go +++ b/internal/unify/trace.go @@ -40,9 +40,9 @@ type tracer struct { type traceTree struct { label string // Identifies this node as a child of parent v, w *Value // Unification inputs - envIn nonDetEnv + envIn envSet res *Value // Unification result - env nonDetEnv + env envSet err error // or error parent *traceTree @@ -127,7 +127,7 @@ func (t *tracer) logf(pat string, vals ...any) { fmt.Fprintf(t.logw, "%s\n", s) } -func (t *tracer) traceUnify(v, w *Value, e nonDetEnv) { +func (t *tracer) traceUnify(v, w *Value, e envSet) { if t == nil { return } @@ -146,7 +146,7 @@ func (t *tracer) traceUnify(v, w *Value, e nonDetEnv) { } } -func (t *tracer) traceDone(res *Value, e nonDetEnv, err error) { +func (t *tracer) traceDone(res *Value, e envSet, err error) { if t == nil { return } diff --git a/internal/unify/unify.go b/internal/unify/unify.go index 6ebed7bd..9d22bf19 100644 --- a/internal/unify/unify.go +++ b/internal/unify/unify.go @@ -103,7 +103,7 @@ func newUnifier() *unifier { // unify1 could not unify the domains of the two values. var errDomains = errors.New("cannot unify domains") -func (v *Value) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (*Value, nonDetEnv, error) { +func (v *Value) unify(w *Value, e envSet, swap bool, uf *unifier) (*Value, envSet, error) { if swap { // Put the values in order. This just happens to be a handy choke-point // to do this at. @@ -122,14 +122,14 @@ func (v *Value) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (*Value, no } } if err != nil { - uf.traceDone(nil, nonDetEnv{}, err) - return nil, nonDetEnv{}, err + uf.traceDone(nil, envSet{}, err) + return nil, envSet{}, err } res := unified(d, v, w) uf.traceDone(res, e2, nil) if d == nil { // Double check that a bottom Value also has a bottom env. - if !e2.isBottom() { + if !e2.isEmpty() { panic("bottom Value has non-bottom environment") } } @@ -137,7 +137,7 @@ func (v *Value) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (*Value, no return res, e2, nil } -func (v *Value) unify1(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) { +func (v *Value) unify1(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) { // TODO: If there's an error, attach position information to it. vd, wd := v.Domain, w.Domain @@ -180,10 +180,10 @@ func (v *Value) unify1(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, n } } - return nil, nonDetEnv{}, errDomains + return nil, envSet{}, errDomains } -func (d Def) unify(o Def, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) { +func (d Def) unify(o Def, e envSet, swap bool, uf *unifier) (Domain, envSet, error) { out := Def{fields: make(map[string]*Value)} // Check keys of d against o. @@ -198,7 +198,7 @@ func (d Def) unify(o Def, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEn res, e2, err := dv.unify(ov, e, swap, uf) exit.exit() if err != nil { - return nil, nonDetEnv{}, err + return nil, envSet{}, err } else if res.Domain == nil { // No match. return nil, bottomEnv, nil @@ -216,7 +216,7 @@ func (d Def) unify(o Def, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEn return out, e, nil } -func (v Tuple) unify(w Tuple, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) { +func (v Tuple) unify(w Tuple, e envSet, swap bool, uf *unifier) (Domain, envSet, error) { if v.repeat != nil && w.repeat != nil { // Since we generate the content of these lazily, there's not much we // can do but just stick them on a list to unify later. @@ -253,7 +253,7 @@ func (v Tuple) unify(w Tuple, e nonDetEnv, swap bool, uf *unifier) (Domain, nonD z, e2, err := v1.unify(t.vs[i], e, swap, uf) exit.exit() if err != nil { - return nil, nonDetEnv{}, err + return nil, envSet{}, err } else if z.Domain == nil { return nil, bottomEnv, nil } @@ -268,7 +268,7 @@ func (v Tuple) unify(w Tuple, e nonDetEnv, swap bool, uf *unifier) (Domain, nonD // doRepeat creates a fixed-length tuple from a repeated tuple. The caller is // expected to unify the returned tuples. -func (v Tuple) doRepeat(e nonDetEnv, n int) ([]Tuple, nonDetEnv) { +func (v Tuple) doRepeat(e envSet, n int) ([]Tuple, envSet) { res := make([]Tuple, len(v.repeat)) for i, gen := range v.repeat { res[i].vs = make([]*Value, n) diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go index ff5115f7..7edc3d9d 100644 --- a/internal/unify/yaml.go +++ b/internal/unify/yaml.go @@ -112,7 +112,7 @@ type yamlDecoder struct { vars map[string]*ident nSums int - env nonDetEnv + env envSet } func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { @@ -243,7 +243,7 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { // Decode the children to make sure they're well-formed, but otherwise // discard that decoding and do it again every time we need a new // element. - var gen []func(e nonDetEnv) (*Value, nonDetEnv) + var gen []func(e envSet) (*Value, envSet) origEnv := dec.env elts := node.Content for i, elt := range elts { @@ -256,7 +256,7 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { // introduced within the element. dec.env = origEnv // Add a generator function - gen = append(gen, func(e nonDetEnv) (*Value, nonDetEnv) { + gen = append(gen, func(e envSet) (*Value, envSet) { dec.env = e // TODO: If this is in a sum, this tends to generate a ton of // fresh variables that are different on each branch of the @@ -298,7 +298,7 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { type yamlEncoder struct { idp identPrinter - e nonDetEnv // We track the environment for !repeat nodes. + e envSet // We track the environment for !repeat nodes. } // TODO: Switch some Value marshaling to Closure? @@ -344,11 +344,11 @@ func (enc *yamlEncoder) closure(c Closure) *yaml.Node { // Fill in the env after we've written the value in case value encoding // affects the env. n.Content[1] = enc.env(enc.e) - enc.e = nonDetEnv{} // Allow GC'ing the env + enc.e = envSet{} // Allow GC'ing the env return &n } -func (enc *yamlEncoder) env(e nonDetEnv) *yaml.Node { +func (enc *yamlEncoder) env(e envSet) *yaml.Node { var encode func(e *envExpr) *yaml.Node encode = func(e *envExpr) *yaml.Node { var n yaml.Node From 603ac80d1808fe60694cb2da7d6de67f3dbe7d02 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 29 Jul 2025 14:16:22 -0400 Subject: [PATCH 174/200] internal/unify: drop StringReplacer We're not using it and it was probably a bad idea anyway. Change-Id: I6c5ff0b5796a1adffe4889309dc08dd352d0fe56 Reviewed-on: https://go-review.googlesource.com/c/arch/+/693340 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/unify/yaml.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go index 7edc3d9d..281519de 100644 --- a/internal/unify/yaml.go +++ b/internal/unify/yaml.go @@ -19,10 +19,6 @@ import ( type UnmarshalOpts struct { // Path is the file path to store in the [Pos] of all [Value]s. Path string - - // StringReplacer, if non-nil, is called for each string value to perform - // any application-specific string interpolation. - StringReplacer func(string) string } // UnmarshalYAML unmarshals a YAML node into a Closure. @@ -184,18 +180,12 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { case isExact(): val := node.Value - if dec.opts.StringReplacer != nil { - val = dec.opts.StringReplacer(val) - } return mk(NewStringExact(val)) case isStr || is(yaml.ScalarNode, "!regex"): // Any other string we treat as a regex. This will produce an exact // string anyway if the regex is literal. val := node.Value - if dec.opts.StringReplacer != nil { - val = dec.opts.StringReplacer(val) - } return mk2(NewStringRegex(val)) case is(yaml.SequenceNode, "!regex"): From 57b3564f8b95515bb4c02f4a07a41d16fbffbcb7 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 31 Jul 2025 17:40:09 -0400 Subject: [PATCH 175/200] internal/unify: refactor in preparation for !import This rearranges entry to the YAML parser in preparation for referencing other files from the YAML. Currently the options struct is empty, but we keep it around because we'll use it in the next CL. Change-Id: Ib41dd274cd50c30bb22cdec7785721e9c0997939 Reviewed-on: https://go-review.googlesource.com/c/arch/+/693341 Reviewed-by: Junyang Shao Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI --- internal/simdgen/main.go | 20 +----- internal/unify/yaml.go | 132 ++++++++++++++++++++++++++++----------- 2 files changed, 98 insertions(+), 54 deletions(-) diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index 92b1fa98..7bf43618 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -162,8 +162,8 @@ func main() { // Load query. if *flagQ != "" { r := strings.NewReader(*flagQ) - var def unify.Closure - if err := def.Unmarshal(r, unify.UnmarshalOpts{Path: ""}); err != nil { + def, err := unify.Read(r, "", unify.ReadOpts{}) + if err != nil { log.Fatalf("parsing -q: %s", err) } inputs = append(inputs, def) @@ -172,7 +172,7 @@ func main() { // Load defs files. must := make(map[*unify.Value]struct{}) for _, path := range flag.Args() { - defs, err := loadValue(path) + defs, err := unify.ReadFile(path, unify.ReadOpts{}) if err != nil { log.Fatal(err) } @@ -245,20 +245,6 @@ func main() { } } -func loadValue(path string) (unify.Closure, error) { - f, err := os.Open(path) - if err != nil { - return unify.Closure{}, err - } - defer f.Close() - - var c unify.Closure - if err := c.Unmarshal(f, unify.UnmarshalOpts{}); err != nil { - return unify.Closure{}, fmt.Errorf("%s: %v", path, err) - } - return c, nil -} - func validate(cl unify.Closure, required map[*unify.Value]struct{}) { // Validate that: // 1. All final defs are exact diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go index 281519de..afe9c7b5 100644 --- a/internal/unify/yaml.go +++ b/internal/unify/yaml.go @@ -8,22 +8,21 @@ import ( "errors" "fmt" "io" + "os" "regexp" "strings" "gopkg.in/yaml.v3" ) -// UnmarshalOpts provides options to unmarshaling. The zero value is the default -// options. -type UnmarshalOpts struct { - // Path is the file path to store in the [Pos] of all [Value]s. - Path string +// ReadOpts provides options to [Read] and related functions. The zero value is +// the default options. +type ReadOpts struct { } -// UnmarshalYAML unmarshals a YAML node into a Closure. +// Read reads a [Closure] in YAML format from r, using path for error messages. // -// This is how UnmarshalYAML maps YAML nodes into terminal Values: +// It maps YAML nodes into terminal Values as follows: // // - "_" or !top _ is the top value ([Top]). // @@ -46,7 +45,7 @@ type UnmarshalOpts struct { // // - !regex [x, y, ...] is an intersection of regular expressions ([String]). // -// This is how UnmarshalYAML maps YAML nodes into non-terminal Values: +// It maps YAML nodes into non-terminal Values as follows: // // - Sequence nodes like [x, y, z] are tuples ([Tuple]). // @@ -62,48 +61,53 @@ type UnmarshalOpts struct { // non-deterministic choice view really works. The unifier does not directly // implement sums; instead, this is decoded as a fresh variable that's // simultaneously bound to x, y, and z. -func (c *Closure) UnmarshalYAML(node *yaml.Node) error { - return c.unmarshal(node, UnmarshalOpts{}) +func Read(r io.Reader, path string, opts ReadOpts) (Closure, error) { + dec := yamlDecoder{opts: opts, path: path, env: topEnv} + v, err := dec.read(r) + if err != nil { + return Closure{}, err + } + return dec.close(v), nil } -// Unmarshal is like [UnmarshalYAML], but accepts options and reads from r. If -// opts.Path is "" and r has a Name() string method, the result of r.Name() is -// used as the path for all [Value]s read from r. -func (c *Closure) Unmarshal(r io.Reader, opts UnmarshalOpts) error { - if opts.Path == "" { - type named interface{ Name() string } - if n, ok := r.(named); ok { - opts.Path = n.Name() - } +// ReadFile reads a [Closure] in YAML format from a file. +// +// The file must consist of a single YAML document. +// +// See [Read] for details. +func ReadFile(path string, opts ReadOpts) (Closure, error) { + f, err := os.Open(path) + if err != nil { + return Closure{}, err } + defer f.Close() - var node yaml.Node - if err := yaml.NewDecoder(r).Decode(&node); err != nil { - return err - } - np := &node - if np.Kind == yaml.DocumentNode { - np = node.Content[0] + dec := yamlDecoder{opts: opts, path: path, env: topEnv} + v, err := dec.read(f) + if err != nil { + return Closure{}, err } - return c.unmarshal(np, opts) + + return dec.close(v), nil } -func (c *Closure) unmarshal(node *yaml.Node, opts UnmarshalOpts) error { - dec := &yamlDecoder{opts: opts, vars: make(map[string]*ident), env: topEnv} - val, err := dec.value(node) +// UnmarshalYAML implements [yaml.Unmarshaler]. +// +// Since there is no way to pass [ReadOpts] to this function, it assumes default +// options. +func (c *Closure) UnmarshalYAML(node *yaml.Node) error { + dec := yamlDecoder{path: "", env: topEnv} + v, err := dec.root(node) if err != nil { return err } - vars := make(map[*ident]*Value) - for _, id := range dec.vars { - vars[id] = topValue - } - *c = Closure{val, dec.env} + *c = dec.close(v) return nil } type yamlDecoder struct { - opts UnmarshalOpts + opts ReadOpts + path string vars map[string]*ident nSums int @@ -111,8 +115,62 @@ type yamlDecoder struct { env envSet } +func (dec *yamlDecoder) read(r io.Reader) (*Value, error) { + n, err := readOneNode(r) + if err != nil { + return nil, fmt.Errorf("%s: %w", dec.path, err) + } + + // Decode YAML node to a Value + v, err := dec.root(n) + if err != nil { + return nil, fmt.Errorf("%s: %w", dec.path, err) + } + + return v, nil +} + +// readOneNode reads a single YAML document from r and returns an error if there +// are more documents in r. +func readOneNode(r io.Reader) (*yaml.Node, error) { + yd := yaml.NewDecoder(r) + + // Decode as a YAML node + var node yaml.Node + if err := yd.Decode(&node); err != nil { + return nil, err + } + np := &node + if np.Kind == yaml.DocumentNode { + np = node.Content[0] + } + + // Ensure there are no more YAML docs in this file + if err := yd.Decode(nil); err == nil { + return nil, fmt.Errorf("must not contain multiple documents") + } else if err != io.EOF { + return nil, err + } + + return np, nil +} + +// root parses the root of a file. +func (dec *yamlDecoder) root(node *yaml.Node) (*Value, error) { + // Prepare for variable name resolution in this file. + dec.vars = make(map[string]*ident, 0) + dec.nSums = 0 + + return dec.value(node) +} + +// close wraps a decoded [Value] into a [Closure]. +func (dec *yamlDecoder) close(v *Value) Closure { + return Closure{v, dec.env} +} + func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { - pos := &Pos{Path: dec.opts.Path, Line: node.Line} + pos := &Pos{Path: dec.path, Line: node.Line} // Resolve alias nodes. if node.Kind == yaml.AliasNode { From dfa62d10c0340b1dadadd0350e61a7a46d821612 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 1 Aug 2025 13:10:05 -0400 Subject: [PATCH 176/200] internal/unify: add !import nodes The new "!import pattern" node can be used to import other YAML files using a glob pattern. Each file gets its own variable scope ($x in one file is not the same as $x in another file), and we protect against paths that reference files above the current file. Change-Id: Ib479aa16f0979c3b9060d63320aa3505f72b1ff1 Reviewed-on: https://go-review.googlesource.com/c/arch/+/693342 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/unify/value_test.go | 16 ++++++- internal/unify/yaml.go | 90 +++++++++++++++++++++++++++++++++--- internal/unify/yaml_test.go | 57 ++++++++++++++++++++++- 3 files changed, 155 insertions(+), 8 deletions(-) diff --git a/internal/unify/value_test.go b/internal/unify/value_test.go index 28d22b25..54937c68 100644 --- a/internal/unify/value_test.go +++ b/internal/unify/value_test.go @@ -4,7 +4,11 @@ package unify -import "slices" +import ( + "reflect" + "slices" + "testing" +) func ExampleClosure_All_tuple() { v := mustParse(` @@ -34,3 +38,13 @@ c: 5 // - {a: 2, b: 3, c: 5} // - {a: 2, b: 4, c: 5} } + +func checkDecode[T any](t *testing.T, got *Value, want T) { + var gotT T + if err := got.Decode(&gotT); err != nil { + t.Fatalf("Decode failed: %v", err) + } + if !reflect.DeepEqual(&gotT, &want) { + t.Fatalf("got:\n%s\nwant:\n%s", prettyYaml(gotT), prettyYaml(want)) + } +} diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go index afe9c7b5..dadcd71d 100644 --- a/internal/unify/yaml.go +++ b/internal/unify/yaml.go @@ -8,7 +8,9 @@ import ( "errors" "fmt" "io" + "io/fs" "os" + "path/filepath" "regexp" "strings" @@ -18,6 +20,9 @@ import ( // ReadOpts provides options to [Read] and related functions. The zero value is // the default options. type ReadOpts struct { + // FS, if non-nil, is the file system from which to resolve !import file + // names. + FS fs.FS } // Read reads a [Closure] in YAML format from r, using path for error messages. @@ -61,6 +66,10 @@ type ReadOpts struct { // non-deterministic choice view really works. The unifier does not directly // implement sums; instead, this is decoded as a fresh variable that's // simultaneously bound to x, y, and z. +// +// - !import glob is like a !sum, but its children are read from all files +// matching the given glob pattern, which is interpreted relative to the current +// file path. Each file gets its own variable scope. func Read(r io.Reader, path string, opts ReadOpts) (Closure, error) { dec := yamlDecoder{opts: opts, path: path, env: topEnv} v, err := dec.read(r) @@ -74,6 +83,8 @@ func Read(r io.Reader, path string, opts ReadOpts) (Closure, error) { // // The file must consist of a single YAML document. // +// If opts.FS is not set, this sets it to a FS rooted at path's directory. +// // See [Read] for details. func ReadFile(path string, opts ReadOpts) (Closure, error) { f, err := os.Open(path) @@ -82,13 +93,11 @@ func ReadFile(path string, opts ReadOpts) (Closure, error) { } defer f.Close() - dec := yamlDecoder{opts: opts, path: path, env: topEnv} - v, err := dec.read(f) - if err != nil { - return Closure{}, err + if opts.FS == nil { + opts.FS = os.DirFS(filepath.Dir(path)) } - return dec.close(v), nil + return Read(f, path, opts) } // UnmarshalYAML implements [yaml.Unmarshaler]. @@ -157,7 +166,12 @@ func readOneNode(r io.Reader) (*yaml.Node, error) { // root parses the root of a file. func (dec *yamlDecoder) root(node *yaml.Node) (*Value, error) { - // Prepare for variable name resolution in this file. + // Prepare for variable name resolution in this file. This may be a nested + // root, so restore the current values when we're done. + oldVars, oldNSums := dec.vars, dec.nSums + defer func() { + dec.vars, dec.nSums = oldVars, oldNSums + }() dec.vars = make(map[string]*ident, 0) dec.nSums = 0 @@ -339,11 +353,75 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { dec.nSums++ dec.env = dec.env.bind(id, vs...) return mk(Var{id: id}) + + case is(yaml.ScalarNode, "!import"): + if dec.opts.FS == nil { + return nil, fmt.Errorf("!import not allowed (ReadOpts.FS not set)") + } + pat := node.Value + + if !fs.ValidPath(pat) { + // This will result in Glob returning no results. Give a more useful + // error message for this case. + return nil, fmt.Errorf("!import path must not contain '.' or '..'") + } + + ms, err := fs.Glob(dec.opts.FS, pat) + if err != nil { + return nil, fmt.Errorf("resolving !import: %w", err) + } + if len(ms) == 0 { + return nil, fmt.Errorf("!import did not match any files") + } + + // Parse each file + vs := make([]*Value, 0, len(ms)) + for _, m := range ms { + v, err := dec.import1(m) + if err != nil { + return nil, err + } + vs = append(vs, v) + } + + // Create a sum. + if len(vs) == 1 { + return vs[0], nil + } + id := &ident{name: "import"} + dec.env = dec.env.bind(id, vs...) + return mk(Var{id: id}) } return nil, fmt.Errorf("unknown node kind %d %v", node.Kind, node.Tag) } +func (dec *yamlDecoder) import1(path string) (*Value, error) { + // Make sure we can open the path first. + f, err := dec.opts.FS.Open(path) + if err != nil { + return nil, fmt.Errorf("!import failed: %w", err) + } + defer f.Close() + + // Prepare the enter path. + oldFS, oldPath := dec.opts.FS, dec.path + defer func() { + dec.opts.FS, dec.path = oldFS, oldPath + }() + + // Enter path, which is relative to the current path's directory. + newPath := filepath.Join(filepath.Dir(dec.path), path) + subFS, err := fs.Sub(dec.opts.FS, filepath.Dir(path)) + if err != nil { + return nil, err + } + dec.opts.FS, dec.path = subFS, newPath + + // Parse the file. + return dec.read(f) +} + type yamlEncoder struct { idp identPrinter e envSet // We track the environment for !repeat nodes. diff --git a/internal/unify/yaml_test.go b/internal/unify/yaml_test.go index e04d05d4..4f0aef43 100644 --- a/internal/unify/yaml_test.go +++ b/internal/unify/yaml_test.go @@ -11,6 +11,7 @@ import ( "log" "strings" "testing" + "testing/fstest" "gopkg.in/yaml.v3" ) @@ -37,6 +38,10 @@ func oneValue(t *testing.T, c Closure) *Value { } func printYaml(val any) { + fmt.Println(prettyYaml(val)) +} + +func prettyYaml(val any) string { b, err := yaml.Marshal(val) if err != nil { panic(err) @@ -66,7 +71,7 @@ func printYaml(val any) { if err != nil { panic(err) } - fmt.Println(string(b)) + return string(b) } func cleanYaml(node *yaml.Node, lines []int, endPos int) { @@ -145,3 +150,53 @@ func TestEmptyString(t *testing.T) { t.Fatal("expected exact string") } } + +func TestImport(t *testing.T) { + // Test a basic import + main := strings.NewReader("!import x/y.yaml") + fs := fstest.MapFS{ + // Test a glob import with a relative path + "x/y.yaml": {Data: []byte("!import y/*.yaml")}, + "x/y/z.yaml": {Data: []byte("42")}, + } + cl, err := Read(main, "x.yaml", ReadOpts{FS: fs}) + if err != nil { + t.Fatal(err) + } + x := 42 + checkDecode(t, oneValue(t, cl), &x) +} + +func TestImportEscape(t *testing.T) { + // Make sure an import can't escape its subdirectory. + main := strings.NewReader("!import x/y.yaml") + fs := fstest.MapFS{ + "x/y.yaml": {Data: []byte("!import ../y/*.yaml")}, + "y/z.yaml": {Data: []byte("42")}, + } + _, err := Read(main, "x.yaml", ReadOpts{FS: fs}) + if err == nil { + t.Fatal("relative !import should have failed") + } + if !strings.Contains(err.Error(), "must not contain") { + t.Fatalf("unexpected error %v", err) + } +} + +func TestImportScope(t *testing.T) { + // Test that imports have different variable scopes. + main := strings.NewReader("[!import y.yaml, !import y.yaml]") + fs := fstest.MapFS{ + "y.yaml": {Data: []byte("$v")}, + } + cl1, err := Read(main, "x.yaml", ReadOpts{FS: fs}) + if err != nil { + t.Fatal(err) + } + cl2 := mustParse("[1, 2]") + res, err := Unify(cl1, cl2) + if err != nil { + t.Fatal(err) + } + checkDecode(t, oneValue(t, res), []int{1, 2}) +} From ca8f6cb8d5997937a1ce630f5ead4104cbff1bda Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 5 Aug 2025 17:05:27 -0400 Subject: [PATCH 177/200] internal/unify: fix Summands for sum of sums Right now, Closure.Summands only iterates over a top-level sum. Fix it so it can handle sums of sums. This is going to start coming up since !imports tend to lead to sums of sums. Change-Id: Ie67ced083d6d5b814e3ad77b089c4adb2591c568 Reviewed-on: https://go-review.googlesource.com/c/arch/+/693343 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/unify/closure.go | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/internal/unify/closure.go b/internal/unify/closure.go index 5b654879..e8e76e21 100644 --- a/internal/unify/closure.go +++ b/internal/unify/closure.go @@ -29,18 +29,24 @@ func (c Closure) IsBottom() bool { // Summands returns the top-level Values of c. This assumes the top-level of c // was constructed as a sum, and is mostly useful for debugging. func (c Closure) Summands() iter.Seq[*Value] { - if v, ok := c.val.Domain.(Var); ok { - parts := c.env.partitionBy(v.id) - return func(yield func(*Value) bool) { - for _, part := range parts { - if !yield(part.value) { - return + return func(yield func(*Value) bool) { + var rec func(v *Value, env envSet) bool + rec = func(v *Value, env envSet) bool { + switch d := v.Domain.(type) { + case Var: + parts := env.partitionBy(d.id) + for _, part := range parts { + // It may be a sum of sums. Walk into this value. + if !rec(part.value, part.env) { + return false + } } + return true + default: + return yield(v) } } - } - return func(yield func(*Value) bool) { - yield(c.val) + rec(c.val, c.env) } } From f958ca743b4b6fb2e95d944adcd52a16643dc393 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 1 Aug 2025 15:48:19 -0400 Subject: [PATCH 178/200] internal/simdgen: replace go generate with !import Currently, after editing the source YAML files, you have to run go generate to produce that concatenated inputs. This is easy to forget to do, and it's easy to accidentally edit the concatenated input instead of the source YAML. It also means any line numbers that appear in error messages or debug output refer to the generated YAML instead of the source YAML. Fix this by using the new !import node to perform this file loading in the unifier itself from the original source files. Change-Id: I735f96ac9b12ccd1057629758a73b1bda30544cc Reviewed-on: https://go-review.googlesource.com/c/arch/+/693344 LUCI-TryBot-Result: Go LUCI Auto-Submit: Austin Clements Reviewed-by: Junyang Shao --- internal/simdgen/categories.yaml | 524 +------------- internal/simdgen/etetest.sh | 1 - internal/simdgen/go.yaml | 1097 +----------------------------- internal/simdgen/main.go | 6 - internal/simdgen/ops/main.go | 75 -- 5 files changed, 2 insertions(+), 1701 deletions(-) delete mode 100644 internal/simdgen/ops/main.go diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml index 26e80c5e..ed4c9645 100644 --- a/internal/simdgen/categories.yaml +++ b/internal/simdgen/categories.yaml @@ -1,523 +1 @@ -!sum -- go: Add - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME adds corresponding elements of two vectors. -- go: AddSaturated - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME adds corresponding elements of two vectors with saturation. -- go: Sub - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME subtracts corresponding elements of two vectors. -- go: SubSaturated - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME subtracts corresponding elements of two vectors with saturation. -- go: AddPairs - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME horizontally adds adjacent pairs of elements. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -- go: SubPairs - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME horizontally subtracts adjacent pairs of elements. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -- go: AddPairsSaturated - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME horizontally adds adjacent pairs of elements with saturation. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -- go: SubPairsSaturated - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME horizontally subtracts adjacent pairs of elements with saturation. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -- go: And - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise AND operation between two vectors. -- go: Or - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise OR operation between two vectors. -- go: AndNot - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise x &^ y. -- go: Xor - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME performs a bitwise XOR operation between two vectors. - -# We also have PTEST and VPTERNLOG, those should be hidden from the users -# and only appear in rewrite rules. -# const imm predicate(holds for both float and int|uint): -# 0: Equal -# 1: Less -# 2: LessEqual -# 4: NotEqual -# 5: GreaterEqual -# 6: Greater -- go: Equal - constImm: 0 - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME compares for equality. -- go: Less - constImm: 1 - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for less than. -- go: LessEqual - constImm: 2 - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for less than or equal. -- go: IsNan # For float only. - constImm: 3 - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME checks if elements are NaN. Use as x.IsNan(x). -- go: NotEqual - constImm: 4 - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME compares for inequality. -- go: GreaterEqual - constImm: 13 - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for greater than or equal. -- go: Greater - constImm: 14 - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME compares for greater than. -- go: ConvertToInt32 - commutative: false - extension: "AVX.*" - documentation: !string |- - // ConvertToInt32 converts element values to int32. - -- go: ConvertToUint32 - commutative: false - extension: "AVX.*" - documentation: !string |- - // ConvertToUint32Masked converts element values to uint32. -- go: Div - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME divides elements of two vectors. -- go: Sqrt - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME computes the square root of each element. -- go: ApproximateReciprocal - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME computes an approximate reciprocal of each element. -- go: ApproximateReciprocalOfSqrt - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME computes an approximate reciprocal of the square root of each element. -- go: Scale - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME multiplies elements by a power of 2. -- go: Round - commutative: false - extension: "AVX.*" - constImm: 0 - documentation: !string |- - // NAME rounds elements to the nearest integer. -- go: RoundScaled - commutative: false - extension: "AVX.*" - constImm: 0 - documentation: !string |- - // NAME rounds elements with specified precision. -- go: RoundScaledResidue - commutative: false - extension: "AVX.*" - constImm: 0 - documentation: !string |- - // NAME computes the difference after rounding with specified precision. -- go: Floor - commutative: false - extension: "AVX.*" - constImm: 1 - documentation: !string |- - // NAME rounds elements down to the nearest integer. -- go: FloorScaled - commutative: false - extension: "AVX.*" - constImm: 1 - documentation: !string |- - // NAME rounds elements down with specified precision. -- go: FloorScaledResidue - commutative: false - extension: "AVX.*" - constImm: 1 - documentation: !string |- - // NAME computes the difference after flooring with specified precision. -- go: Ceil - commutative: false - extension: "AVX.*" - constImm: 2 - documentation: !string |- - // NAME rounds elements up to the nearest integer. -- go: CeilScaled - commutative: false - extension: "AVX.*" - constImm: 2 - documentation: !string |- - // NAME rounds elements up with specified precision. -- go: CeilScaledResidue - commutative: false - extension: "AVX.*" - constImm: 2 - documentation: !string |- - // NAME computes the difference after ceiling with specified precision. -- go: Trunc - commutative: false - extension: "AVX.*" - constImm: 3 - documentation: !string |- - // NAME truncates elements towards zero. -- go: TruncScaled - commutative: false - extension: "AVX.*" - constImm: 3 - documentation: !string |- - // NAME truncates elements with specified precision. -- go: TruncScaledResidue - commutative: false - extension: "AVX.*" - constImm: 3 - documentation: !string |- - // NAME computes the difference after truncating with specified precision. -- go: AddSub - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME subtracts even elements and adds odd elements of two vectors. -- go: GaloisFieldAffineTransform - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME computes an affine transformation in GF(2^8): - // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; - // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y - // corresponding to a group of 8 elements in x. -- go: GaloisFieldAffineTransformInverse - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME computes an affine transformation in GF(2^8), - // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1: - // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; - // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y - // corresponding to a group of 8 elements in x. -- go: GaloisFieldMul - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME computes element-wise GF(2^8) multiplication with - // reduction polynomial x^8 + x^4 + x^3 + x + 1. -- go: Average - commutative: true - extension: "AVX.*" # VPAVGB/W are available across various AVX versions - documentation: !string |- - // NAME computes the rounded average of corresponding elements. -- go: Absolute - commutative: false - # Unary operation, not commutative - extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 - documentation: !string |- - // NAME computes the absolute value of each element. -- go: Sign - # Applies sign of second operand to first: sign(val, sign_src) - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME returns the product of the first operand with -1, 0, or 1, - // whichever constant is nearest to the value of the second operand. - # Sign does not have masked version -- go: PopCount - commutative: false - extension: "AVX512.*" - documentation: !string |- - // NAME counts the number of set bits in each element. -- go: PairDotProd - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME multiplies the elements and add the pairs together, - // yielding a vector of half as many elements with twice the input element size. -# TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. -- go: SaturatedUnsignedSignedPairDotProd - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME multiplies the elements and add the pairs together with saturation, - // yielding a vector of half as many elements with twice the input element size. -# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. -- go: DotProdBroadcast - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies all elements and broadcasts the sum. -- go: UnsignedSignedQuadDotProdAccumulate - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs dot products on groups of 4 elements of x and y and then adds z. -- go: SaturatedUnsignedSignedQuadDotProdAccumulate - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. -- go: AddDotProd - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs dot products on pairs of elements of y and z and then adds x. -- go: SaturatedAddDotProd - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs dot products on pairs of elements of y and z and then adds x. -- go: FusedMultiplyAdd - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs (x * y) + z. -- go: FusedMultiplyAddSub - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. -- go: FusedMultiplySubAdd - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. -- go: Max - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME computes the maximum of corresponding elements. -- go: Min - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME computes the minimum of corresponding elements. -- go: SetElem - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME sets a single constant-indexed element's value. -- go: GetElem - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME retrieves a single constant-indexed element's value. -- go: Set128 - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. -- go: Get128 - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. -- go: Permute - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs a full permutation of vector x using indices: - // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} - // Only the needed bits to represent x's index are used in indices' elements. -- go: Permute2 # Permute2 is only available on or after AVX512 - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs a full permutation of vector x, y using indices: - // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} - // where xy is x appending y. - // Only the needed bits to represent xy's index are used in indices' elements. -- go: Compress - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME performs a compression on vector x using mask by - // selecting elements as indicated by mask, and pack them to lower indexed elements. -- go: blend - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME blends two vectors based on mask values, choosing either - // the first or the second based on whether the third is false or true -- go: Mul - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies corresponding elements of two vectors. -- go: MulEvenWiden - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies even-indexed elements, widening the result. - // Result[i] = v1.Even[i] * v2.Even[i]. -- go: MulHigh - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies elements and stores the high part of the result. -- go: ShiftAllLeft - nameAndSizeCheck: true - specialLower: sftimm - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -- go: ShiftAllRight - signed: false - nameAndSizeCheck: true - specialLower: sftimm - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -- go: ShiftAllRight - signed: true - specialLower: sftimm - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -- go: shiftAllLeftConst # no APIs, only ssa ops. - noTypes: "true" - noGenericOps: "true" - SSAVariant: "const" # to avoid its name colliding with reg version of this instruction, amend this to its ssa op name. - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" -- go: shiftAllRightConst # no APIs, only ssa ops. - noTypes: "true" - noGenericOps: "true" - SSAVariant: "const" - signed: false - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" -- go: shiftAllRightConst # no APIs, only ssa ops. - noTypes: "true" - noGenericOps: "true" - SSAVariant: "const" - signed: true - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - -- go: ShiftLeft - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -- go: ShiftRight - signed: false - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -- go: ShiftRight - signed: true - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: RotateAllLeft - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME rotates each element to the left by the number of bits specified by the immediate. -- go: RotateLeft - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements. -- go: RotateAllRight - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME rotates each element to the right by the number of bits specified by the immediate. -- go: RotateRight - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements. -- go: ShiftAllLeftConcat - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element of x to the left by the number of bits specified by the - // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -- go: ShiftAllRightConcat - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element of x to the right by the number of bits specified by the - // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -- go: ShiftLeftConcat - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element of x to the left by the number of bits specified by the - // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -- go: ShiftRightConcat - nameAndSizeCheck: true - commutative: false - extension: "AVX.*" - documentation: !string |- - // NAME shifts each element of x to the right by the number of bits specified by the - // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +!import ops/*/categories.yaml diff --git a/internal/simdgen/etetest.sh b/internal/simdgen/etetest.sh index ea270429..7b5001ec 100755 --- a/internal/simdgen/etetest.sh +++ b/internal/simdgen/etetest.sh @@ -13,7 +13,6 @@ builds the compiler. rm -rf go-test git clone https://go.googlesource.com/go -b dev.simd go-test -go generate go run . -xedPath xeddata -o godefs -goroot ./go-test go.yaml types.yaml categories.yaml (cd go-test/src/cmd/compile/internal/ssa/_gen ; go run *.go ) (cd go-test/src ; GOEXPERIMENT=simd ./make.bash ) diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml index cdee0870..4f077c81 100644 --- a/internal/simdgen/go.yaml +++ b/internal/simdgen/go.yaml @@ -1,1096 +1 @@ -!sum -# Add -- go: Add - asm: "VPADD[BWDQ]|VADDP[SD]" - in: - - &any - go: $t - - *any - out: - - *any -# Add Saturated -- go: AddSaturated - asm: "VPADDS[BWDQ]" - in: - - &int - go: $t - base: int - - *int - out: - - *int -- go: AddSaturated - asm: "VPADDS[BWDQ]" - in: - - &uint - go: $t - base: uint - - *uint - out: - - *uint - -# Sub -- go: Sub - asm: "VPSUB[BWDQ]|VSUBP[SD]" - in: &2any - - *any - - *any - out: &1any - - *any -# Sub Saturated -- go: SubSaturated - asm: "VPSUBS[BWDQ]" - in: &2int - - *int - - *int - out: &1int - - *int -- go: SubSaturated - asm: "VPSUBS[BWDQ]" - in: - - *uint - - *uint - out: - - *uint -- go: AddPairs - asm: "VPHADD[DW]" - in: *2any - out: *1any -- go: SubPairs - asm: "VPHSUB[DW]" - in: *2any - out: *1any -- go: AddPairs - asm: "VHADDP[SD]" # floats - in: *2any - out: *1any -- go: SubPairs - asm: "VHSUBP[SD]" # floats - in: *2any - out: *1any -- go: AddPairsSaturated - asm: "VPHADDS[DW]" - in: *2int - out: *1int -- go: SubPairsSaturated - asm: "VPHSUBS[DW]" - in: *2int - out: *1int -# In the XED data, *all* floating point bitwise logic operation has their -# operand type marked as uint. We are not trying to understand why Intel -# decided that they want FP bit-wise logic operations, but this irregularity -# has to be dealed with in separate rules with some overwrites. - -# For many bit-wise operations, we have the following non-orthogonal -# choices: -# -# - Non-masked AVX operations have no element width (because it -# doesn't matter), but only cover 128 and 256 bit vectors. -# -# - Masked AVX-512 operations have an element width (because it needs -# to know how to interpret the mask), and cover 128, 256, and 512 bit -# vectors. These only cover 32- and 64-bit element widths. -# -# - Non-masked AVX-512 operations still have an element width (because -# they're just the masked operations with an implicit K0 mask) but it -# doesn't matter! This is the only option for non-masked 512 bit -# operations, and we can pick any of the element widths. -# -# We unify with ALL of these operations and the compiler generator -# picks when there are multiple options. - -# TODO: We don't currently generate unmasked bit-wise operations on 512 bit -# vectors of 8- or 16-bit elements. AVX-512 only has *masked* bit-wise -# operations for 32- and 64-bit elements; while the element width doesn't matter -# for unmasked operations, right now we don't realize that we can just use the -# 32- or 64-bit version for the unmasked form. Maybe in the XED decoder we -# should recognize bit-wise operations when generating unmasked versions and -# omit the element width. - -# For binary operations, we constrain their two inputs and one output to the -# same Go type using a variable. - -- go: And - asm: "VPAND[DQ]?" - in: - - &any - go: $t - - *any - out: - - *any - -- go: And - asm: "VPANDD" # Fill in the gap, And is missing for Uint8x64 and Int8x64 - inVariant: [] - in: &twoI8x64 - - &i8x64 - go: $t - overwriteElementBits: 8 - - *i8x64 - out: &oneI8x64 - - *i8x64 - -- go: And - asm: "VPANDD" # Fill in the gap, And is missing for Uint16x32 and Int16x32 - inVariant: [] - in: &twoI16x32 - - &i16x32 - go: $t - overwriteElementBits: 16 - - *i16x32 - out: &oneI16x32 - - *i16x32 - -- go: AndNot - asm: "VPANDN[DQ]?" - operandOrder: "21" # switch the arg order - in: - - *any - - *any - out: - - *any - -- go: AndNot - asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint8x64 and Int8x64 - operandOrder: "21" # switch the arg order - inVariant: [] - in: *twoI8x64 - out: *oneI8x64 - -- go: AndNot - asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint16x32 and Int16x32 - operandOrder: "21" # switch the arg order - inVariant: [] - in: *twoI16x32 - out: *oneI16x32 - -- go: Or - asm: "VPOR[DQ]?" - in: - - *any - - *any - out: - - *any - -- go: Or - asm: "VPORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64 - inVariant: [] - in: *twoI8x64 - out: *oneI8x64 - -- go: Or - asm: "VPORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32 - inVariant: [] - in: *twoI16x32 - out: *oneI16x32 - -- go: Xor - asm: "VPXOR[DQ]?" - in: - - *any - - *any - out: - - *any - -- go: Xor - asm: "VPXORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64 - inVariant: [] - in: *twoI8x64 - out: *oneI8x64 - -- go: Xor - asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32 - inVariant: [] - in: *twoI16x32 - out: *oneI16x32 -# Ints -- go: Equal - asm: "V?PCMPEQ[BWDQ]" - in: - - &any - go: $t - - *any - out: - - &anyvregToMask - go: $t - overwriteBase: int - overwriteClass: mask -- go: Greater - asm: "V?PCMPGT[BWDQ]" - in: - - &int - go: $t - base: int - - *int - out: - - *anyvregToMask -# 256-bit VCMPGTQ's output elemBits is marked 32-bit in the XED data, we -# believe this is an error, so add this definition to overwrite. -- go: Greater - asm: "VPCMPGTQ" - in: - - &int64 - go: $t - base: int - elemBits: 64 - - *int64 - out: - - base: int - elemBits: 32 - overwriteElementBits: 64 - overwriteClass: mask - overwriteBase: int -# AVX-512 compares produce masks. -- go: Equal - asm: "V?PCMPEQ[BWDQ]" - in: - - *any - - *any - out: - - class: mask -- go: Greater - asm: "V?PCMPGT[BWDQ]" - in: - - *int - - *int - out: - - class: mask -# The const imm predicated compares after AVX512, please see categories.yaml -# for const imm specification. -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) - asm: "VPCMP[BWDQ]" - in: - - *int - - *int - - class: immediate - const: 0 # Just a placeholder, will be overwritten by const imm porting. - out: - - class: mask -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) - asm: "VPCMPU[BWDQ]" - in: - - &uint - go: $t - base: uint - - *uint - - class: immediate - const: 0 - out: - - class: mask - -# Floats -- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan - asm: "VCMPP[SD]" - in: - - &float - go: $t - base: float - - *float - - class: immediate - const: 0 - out: - - go: $t - overwriteBase: int - overwriteClass: mask -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) - asm: "VCMPP[SD]" - in: - - *float - - *float - - class: immediate - const: 0 - out: - - class: mask -- go: ConvertToInt32 - asm: "VCVTTPS2DQ" - in: - - &fp - go: $t - base: float - out: - - &i32 - go: $u - base: int - elemBits: 32 -- go: ConvertToUint32 - asm: "VCVTPS2UDQ" - in: - - *fp - out: - - &u32 - go: $u - base: uint - elemBits: 32 -- go: Div - asm: "V?DIVP[SD]" - in: &2fp - - &fp - go: $t - base: float - - *fp - out: &1fp - - *fp -- go: Sqrt - asm: "V?SQRTP[SD]" - in: *1fp - out: *1fp -# TODO: Provide separate methods for 12-bit precision and 14-bit precision? -- go: ApproximateReciprocal - asm: "VRCP(14)?P[SD]" - in: *1fp - out: *1fp -- go: ApproximateReciprocalOfSqrt - asm: "V?RSQRT(14)?P[SD]" - in: *1fp - out: *1fp -- go: Scale - asm: "VSCALEFP[SD]" - in: *2fp - out: *1fp - -- go: "Round|Ceil|Floor|Trunc" - asm: "VROUNDP[SD]" - in: - - *fp - - class: immediate - const: 0 # place holder - out: *1fp - -- go: "(Round|Ceil|Floor|Trunc)Scaled" - asm: "VRNDSCALEP[SD]" - in: - - *fp - - class: immediate - const: 0 # place holder - immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). - name: prec - out: *1fp -- go: "(Round|Ceil|Floor|Trunc)ScaledResidue" - asm: "VREDUCEP[SD]" - in: - - *fp - - class: immediate - const: 0 # place holder - immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). - name: prec - out: *1fp - -- go: "AddSub" - asm: "VADDSUBP[SD]" - in: - - *fp - - *fp - out: - - *fp -- go: GaloisFieldAffineTransform - asm: VGF2P8AFFINEQB - operandOrder: 2I # 2nd operand, then immediate - in: &AffineArgs - - &uint8 - go: $t - base: uint - - &uint8x8 - go: $t2 - base: uint - - &pureImmVar - class: immediate - immOffset: 0 - name: b - out: - - *uint8 - -- go: GaloisFieldAffineTransformInverse - asm: VGF2P8AFFINEINVQB - operandOrder: 2I # 2nd operand, then immediate - in: *AffineArgs - out: - - *uint8 - -- go: GaloisFieldMul - asm: VGF2P8MULB - in: - - *uint8 - - *uint8 - out: - - *uint8 -# Average (unsigned byte, unsigned word) -# Instructions: VPAVGB, VPAVGW -- go: Average - asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word) - in: - - &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW - go: $t - base: uint - - *uint_t - out: - - *uint_t - -# Absolute Value (signed byte, word, dword, qword) -# Instructions: VPABSB, VPABSW, VPABSD, VPABSQ -- go: Absolute - asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ - in: - - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN - go: $t - base: int - out: - - *int_t # Output is magnitude, fits in the same signed type - -# Sign Operation (signed byte, word, dword) -# Applies sign of second operand to the first. -# Instructions: VPSIGNB, VPSIGNW, VPSIGND -- go: Sign - asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND - in: - - *int_t # value to apply sign to - - *int_t # value from which to take the sign - out: - - *int_t - -# Population Count (count set bits in each element) -# Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG) -# VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ) -- go: PopCount - asm: "VPOPCNT[BWDQ]" - in: - - &any - go: $t - out: - - *any -- go: PairDotProd - asm: VPMADDWD - in: - - &int - go: $t - base: int - - *int - out: - - &int2 # The elemBits are different - go: $t2 - base: int -- go: SaturatedUnsignedSignedPairDotProd - asm: VPMADDUBSW - in: - - &uint - go: $t - base: uint - overwriteElementBits: 8 - - &int3 - go: $t3 - base: int - overwriteElementBits: 8 - out: - - *int2 -- go: DotProdBroadcast - asm: VDPP[SD] - in: - - &dpb_src - go: $t - - *dpb_src - - class: immediate - const: 127 - out: - - *dpb_src -- go: UnsignedSignedQuadDotProdAccumulate - asm: "VPDPBUSD" - operandOrder: "31" # switch operand 3 and 1 - in: - - &qdpa_acc - go: $t_acc - base: int - elemBits: 32 - - &qdpa_src1 - go: $t_src1 - base: uint - overwriteElementBits: 8 - - &qdpa_src2 - go: $t_src2 - base: int - overwriteElementBits: 8 - out: - - *qdpa_acc -- go: SaturatedUnsignedSignedQuadDotProdAccumulate - asm: "VPDPBUSDS" - operandOrder: "31" # switch operand 3 and 1 - in: - - *qdpa_acc - - *qdpa_src1 - - *qdpa_src2 - out: - - *qdpa_acc -- go: AddDotProd - asm: "VPDPWSSD" - in: - - &pdpa_acc - go: $t_acc - base: int - elemBits: 32 - - &pdpa_src1 - go: $t_src1 - base: int - overwriteElementBits: 16 - - &pdpa_src2 - go: $t_src2 - base: int - overwriteElementBits: 16 - out: - - *pdpa_acc -- go: SaturatedAddDotProd - asm: "VPDPWSSDS" - in: - - *pdpa_acc - - *pdpa_src1 - - *pdpa_src2 - out: - - *pdpa_acc -- go: FusedMultiplyAdd - asm: "VFMADD213PS|VFMADD213PD" - in: - - &fma_op - go: $t - base: float - - *fma_op - - *fma_op - out: - - *fma_op -- go: FusedMultiplyAddSub - asm: "VFMADDSUB213PS|VFMADDSUB213PD" - in: - - *fma_op - - *fma_op - - *fma_op - out: - - *fma_op -- go: FusedMultiplySubAdd - asm: "VFMSUBADD213PS|VFMSUBADD213PD" - in: - - *fma_op - - *fma_op - - *fma_op - out: - - *fma_op -- go: Max - asm: "V?PMAXS[BWDQ]" - in: &2int - - &int - go: $t - base: int - - *int - out: &1int - - *int -- go: Max - asm: "V?PMAXU[BWDQ]" - in: &2uint - - &uint - go: $t - base: uint - - *uint - out: &1uint - - *uint - -- go: Min - asm: "V?PMINS[BWDQ]" - in: *2int - out: *1int -- go: Min - asm: "V?PMINU[BWDQ]" - in: *2uint - out: *1uint - -- go: Max - asm: "V?MAXP[SD]" - in: &2float - - &float - go: $t - base: float - - *float - out: &1float - - *float -- go: Min - asm: "V?MINP[SD]" - in: *2float - out: *1float -- go: SetElem - asm: "VPINSR[BWDQ]" - in: - - &t - class: vreg - base: $b - - class: greg - base: $b - lanes: 1 # Scalar, darn it! - - &imm - class: immediate - immOffset: 0 - name: index - out: - - *t - -- go: GetElem - asm: "VPEXTR[BWDQ]" - in: - - class: vreg - base: $b - elemBits: $e - - *imm - out: - - class: greg - base: $b - bits: $e - -- go: Set128 - asm: "VINSERTI128" - in: - - &i8x32 - class: vreg - base: $t - bits: 256 - OverwriteElementBits: 8 - - &i8x16 - class: vreg - base: $t - bits: 128 - OverwriteElementBits: 8 - - &imm01 # This immediate should be only 0 or 1 - class: immediate - immOffset: 0 - name: index - out: - - *i8x32 - -- go: Get128 - asm: "VEXTRACTI128" - in: - - *i8x32 - - *imm01 - out: - - *i8x16 - -- go: Set128 - asm: "VINSERTI128" - in: - - &i16x16 - class: vreg - base: $t - bits: 256 - OverwriteElementBits: 16 - - &i16x8 - class: vreg - base: $t - bits: 128 - OverwriteElementBits: 16 - - *imm01 - out: - - *i16x16 - -- go: Get128 - asm: "VEXTRACTI128" - in: - - *i16x16 - - *imm01 - out: - - *i16x8 - -- go: Set128 - asm: "VINSERTI128" - in: - - &i32x8 - class: vreg - base: $t - bits: 256 - OverwriteElementBits: 32 - - &i32x4 - class: vreg - base: $t - bits: 128 - OverwriteElementBits: 32 - - *imm01 - out: - - *i32x8 - -- go: Get128 - asm: "VEXTRACTI128" - in: - - *i32x8 - - *imm01 - out: - - *i32x4 - -- go: Set128 - asm: "VINSERTI128" - in: - - &i64x4 - class: vreg - base: $t - bits: 256 - OverwriteElementBits: 64 - - &i64x2 - class: vreg - base: $t - bits: 128 - OverwriteElementBits: 64 - - *imm01 - out: - - *i64x4 - -- go: Get128 - asm: "VEXTRACTI128" - in: - - *i64x4 - - *imm01 - out: - - *i64x2 - -- go: Set128 - asm: "VINSERTF128" - in: - - &f32x8 - class: vreg - base: $t - bits: 256 - OverwriteElementBits: 32 - - &f32x4 - class: vreg - base: $t - bits: 128 - OverwriteElementBits: 32 - - *imm01 - out: - - *f32x8 - -- go: Get128 - asm: "VEXTRACTF128" - in: - - *f32x8 - - *imm01 - out: - - *f32x4 - -- go: Set128 - asm: "VINSERTF128" - in: - - &f64x4 - class: vreg - base: $t - bits: 256 - - &f64x2 - class: vreg - base: $t - bits: 128 - - *imm01 - out: - - *f64x4 - -- go: Get128 - asm: "VEXTRACTF128" - in: - - *f64x4 - - *imm01 - out: - - *f64x2 - -- go: Permute - asm: "VPERM[BWDQ]|VPERMP[SD]" - operandOrder: "21Type1" - in: - - &anyindices - go: $t - name: indices - overwriteBase: uint - - &any - go: $t - out: - - *any - -- go: Permute2 - asm: "VPERMI2[BWDQ]|VPERMI2P[SD]" - # Because we are overwriting the receiver's type, we - # have to move the receiver to be a parameter so that - # we can have no duplication. - operandOrder: "231Type1" - in: - - *anyindices # result in arg 0 - - *any - - *any - out: - - *any - -- go: Compress - asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]" - in: - # The mask in Compress is a control mask rather than a write mask, so it's not optional. - - class: mask - - *any - out: - - *any - -# For now a non-public method because -# (1) [OverwriteClass] must be set together with [OverwriteBase] -# (2) "simdgen does not support [OverwriteClass] in inputs". -# That means the signature is wrong. -- go: blend - asm: VPBLENDVB - in: - - &v - go: $t - class: vreg - base: int - - *v - - - class: vreg - base: int - name: mask - out: - - *v - -# For AVX512 -- go: blend - asm: VPBLENDM[BWDQ] - in: - - &v - go: $t - bits: 512 - class: vreg - base: int - - *v - inVariant: - - - class: mask - out: - - *v - -# "Normal" multiplication is only available for floats. -# This only covers the single and double precision. -- go: Mul - asm: "VMULP[SD]" - in: - - &fp - go: $t - base: float - - *fp - out: - - *fp - -# Integer multiplications. - -# MulEvenWiden -# Dword only. -- go: MulEvenWiden - asm: "VPMULDQ" - in: - - &int - go: $t - base: int - - *int - out: - - &int2 - go: $t2 - base: int -- go: MulEvenWiden - asm: "VPMULUDQ" - in: - - &uint - go: $t - base: uint - - *uint - out: - - &uint2 - go: $t2 - base: uint - -# MulHigh -# Word only. -- go: MulHigh - asm: "VPMULHW" - in: - - *int - - *int - out: - - *int2 -- go: MulHigh - asm: "VPMULHUW" - in: - - *uint - - *uint - out: - - *uint2 - -# MulLow -# Signed int only. -- go: Mul - asm: "VPMULL[WDQ]" - in: - - *int - - *int - out: - - *int2 -# Integers -# ShiftAll* -- go: ShiftAllLeft - asm: "VPSLL[WDQ]" - in: - - &any - go: $t - - &vecAsScalar64 - go: "Uint.*" - treatLikeAScalarOfSize: 64 - out: - - *any -- go: ShiftAllRight - signed: false - asm: "VPSRL[WDQ]" - in: - - &uint - go: $t - base: uint - - *vecAsScalar64 - out: - - *uint -- go: ShiftAllRight - signed: true - asm: "VPSRA[WDQ]" - in: - - &int - go: $t - base: int - - *vecAsScalar64 - out: - - *int - -- go: shiftAllLeftConst - asm: "VPSLL[WDQ]" - in: - - *any - - &imm - class: immediate - immOffset: 0 - out: - - *any -- go: shiftAllRightConst - asm: "VPSRL[WDQ]" - in: - - *int - - *imm - out: - - *int -- go: shiftAllRightConst - asm: "VPSRA[WDQ]" - in: - - *uint - - *imm - out: - - *uint - -# Shift* (variable) -- go: ShiftLeft - asm: "VPSLLV[WD]" - in: - - *any - - *any - out: - - *any -# XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite -# it to 64. -- go: ShiftLeft - asm: "VPSLLVQ" - in: - - &anyOverwriteElemBits - go: $t - overwriteElementBits: 64 - - *anyOverwriteElemBits - out: - - *anyOverwriteElemBits -- go: ShiftRight - signed: false - asm: "VPSRLV[WD]" - in: - - *uint - - *uint - out: - - *uint -# XED data of VPSRLVQ needs the same overwrite as VPSLLVQ. -- go: ShiftRight - signed: false - asm: "VPSRLVQ" - in: - - &uintOverwriteElemBits - go: $t - base: uint - overwriteElementBits: 64 - - *uintOverwriteElemBits - out: - - *uintOverwriteElemBits -- go: ShiftRight - signed: true - asm: "VPSRAV[WDQ]" - in: - - *int - - *int - out: - - *int - -# Rotate -- go: RotateAllLeft - asm: "VPROL[DQ]" - in: - - *any - - &pureImm - class: immediate - immOffset: 0 - name: shift - out: - - *any -- go: RotateAllRight - asm: "VPROR[DQ]" - in: - - *any - - *pureImm - out: - - *any -- go: RotateLeft - asm: "VPROLV[DQ]" - in: - - *any - - *any - out: - - *any -- go: RotateRight - asm: "VPRORV[DQ]" - in: - - *any - - *any - out: - - *any - -# Bizzare shifts. -- go: ShiftAllLeftConcat - asm: "VPSHLD[WDQ]" - in: - - *any - - *any - - *pureImm - out: - - *any -- go: ShiftAllRightConcat - asm: "VPSHRD[WDQ]" - in: - - *any - - *any - - *pureImm - out: - - *any -- go: ShiftLeftConcat - asm: "VPSHLDV[WDQ]" - in: - - *any - - *any - - *any - out: - - *any -- go: ShiftRightConcat - asm: "VPSHRDV[WDQ]" - in: - - *any - - *any - - *any - out: - - *any +!import ops/*/go.yaml diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index 7bf43618..69eb85f9 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -46,10 +46,6 @@ // categories.yaml and go.yaml contains definitions that unifies with types.yaml and XED // data, you can find an example in ops/AddSub/. // -// To produce an aggregation of go.yaml and categoris.yaml from ./ops/ to ./, run: -// -// go generate -// // When generating Go definitions, simdgen do 3 "magic"s: // - It splits masked operations(with op's [Masked] field set) to const and non const: // - One is a normal masked operation, the original @@ -84,8 +80,6 @@ package main // - Do I need Closure, Value, and Domain? It feels like I should only need two // types. -//go:generate go run ./ops/. - import ( "cmp" "flag" diff --git a/internal/simdgen/ops/main.go b/internal/simdgen/ops/main.go deleted file mode 100644 index 7e462bf7..00000000 --- a/internal/simdgen/ops/main.go +++ /dev/null @@ -1,75 +0,0 @@ -package main - -import ( - "bufio" - "fmt" - "os" - "path/filepath" -) - -const baseDir = "ops" // The main directory containing A, B, C, etc. - -func main() { - if err := mergeYamlFiles("categories.yaml"); err != nil { - fmt.Printf("Error processing categories.yaml: %v\n", err) - os.Exit(1) - } - if err := mergeYamlFiles("go.yaml"); err != nil { - fmt.Printf("Error processing go.yaml: %v\n", err) - os.Exit(1) - } -} - -func mergeYamlFiles(targetFileName string) error { - outputFile, err := os.Create(targetFileName) - if err != nil { - return fmt.Errorf("failed to create output file %s: %w", targetFileName, err) - } - defer outputFile.Close() - - writer := bufio.NewWriter(outputFile) - _, err = writer.WriteString("!sum\n") - if err != nil { - return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err) - } - - entries, err := os.ReadDir(baseDir) - if err != nil { - return fmt.Errorf("failed to read base directory %s: %w", baseDir, err) - } - for _, entry := range entries { - if !entry.IsDir() { - continue - } - - subdirPath := filepath.Join(baseDir, entry.Name()) - sourceFilePath := filepath.Join(subdirPath, targetFileName) - - sourceFile, err := os.Open(sourceFilePath) - if err != nil { - if os.IsNotExist(err) { - fmt.Printf("Skipping: %s not found in %s\n", targetFileName, subdirPath) - continue - } - return fmt.Errorf("failed to open source file %s: %w", sourceFilePath, err) - } - defer sourceFile.Close() - - scanner := bufio.NewScanner(sourceFile) - // Skip first line - scanner.Scan() - // Append the rest of the lines to the output file - for scanner.Scan() { - line := scanner.Text() - _, err = writer.WriteString(line + "\n") - if err != nil { - return fmt.Errorf("failed to write line from %s to %s: %w", sourceFilePath, targetFileName, err) - } - } - - if err := scanner.Err(); err != nil { - return fmt.Errorf("error reading lines from %s: %w", sourceFilePath, err) - } - } - return writer.Flush() -} From 5f469bfecac542f07368a0e23c0c83951decb709 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 5 Aug 2025 19:01:59 +0000 Subject: [PATCH 179/200] internal/simdgen: (Set|Get)(Lo|Hi) This CL adds the missing pieces of set/get elements for larger vectors. It also changes the Set and Get API to be better. This CL generates CL 693355. Change-Id: If545221e87776de7946205b41f9a7648a8148b2d Reviewed-on: https://go-review.googlesource.com/c/arch/+/693335 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/ops/Moves/categories.yaml | 22 +++- internal/simdgen/ops/Moves/go.yaml | 136 +++++++++++---------- 2 files changed, 87 insertions(+), 71 deletions(-) diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index 5e51becb..d56e4c93 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -9,16 +9,30 @@ extension: "AVX.*" documentation: !string |- // NAME retrieves a single constant-indexed element's value. -- go: Set128 +- go: SetLo commutative: false + constImm: 0 extension: "AVX.*" documentation: !string |- - // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. -- go: Get128 + // NAME returns x with its lower half set to y. +- go: GetLo commutative: false + constImm: 0 extension: "AVX.*" documentation: !string |- - // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. + // NAME returns the lower half of x. +- go: SetHi + commutative: false + constImm: 1 + extension: "AVX.*" + documentation: !string |- + // NAME returns x with its upper half set to y. +- go: GetHi + commutative: false + constImm: 1 + extension: "AVX.*" + documentation: !string |- + // NAME returns the upper half of x. - go: Permute commutative: false extension: "AVX.*" diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index 52e6228d..b014a7a6 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -27,156 +27,158 @@ base: $b bits: $e -- go: Set128 - asm: "VINSERTI128" +- go: "SetHi|SetLo" + asm: "VINSERTI128|VINSERTI64X4" + inVariant: [] in: - - &i8x32 + - &i8x2N class: vreg base: $t - bits: 256 OverwriteElementBits: 8 - - &i8x16 + - &i8xN class: vreg base: $t - bits: 128 OverwriteElementBits: 8 - &imm01 # This immediate should be only 0 or 1 class: immediate - immOffset: 0 + const: 0 # place holder name: index out: - - *i8x32 + - *i8x2N -- go: Get128 - asm: "VEXTRACTI128" +- go: "GetHi|GetLo" + asm: "VEXTRACTI128|VEXTRACTI64X4" + inVariant: [] in: - - *i8x32 + - *i8x2N - *imm01 out: - - *i8x16 + - *i8xN -- go: Set128 - asm: "VINSERTI128" +- go: "SetHi|SetLo" + asm: "VINSERTI128|VINSERTI64X4" + inVariant: [] in: - - &i16x16 + - &i16x2N class: vreg base: $t - bits: 256 OverwriteElementBits: 16 - - &i16x8 + - &i16xN class: vreg base: $t - bits: 128 OverwriteElementBits: 16 - *imm01 out: - - *i16x16 + - *i16x2N -- go: Get128 - asm: "VEXTRACTI128" +- go: "GetHi|GetLo" + asm: "VEXTRACTI128|VEXTRACTI64X4" + inVariant: [] in: - - *i16x16 + - *i16x2N - *imm01 out: - - *i16x8 + - *i16xN -- go: Set128 - asm: "VINSERTI128" +- go: "SetHi|SetLo" + asm: "VINSERTI128|VINSERTI64X4" + inVariant: [] in: - - &i32x8 + - &i32x2N class: vreg base: $t - bits: 256 OverwriteElementBits: 32 - - &i32x4 + - &i32xN class: vreg base: $t - bits: 128 OverwriteElementBits: 32 - *imm01 out: - - *i32x8 + - *i32x2N -- go: Get128 - asm: "VEXTRACTI128" +- go: "GetHi|GetLo" + asm: "VEXTRACTI128|VEXTRACTI64X4" + inVariant: [] in: - - *i32x8 + - *i32x2N - *imm01 out: - - *i32x4 + - *i32xN -- go: Set128 - asm: "VINSERTI128" +- go: "SetHi|SetLo" + asm: "VINSERTI128|VINSERTI64X4" + inVariant: [] in: - - &i64x4 + - &i64x2N class: vreg base: $t - bits: 256 OverwriteElementBits: 64 - - &i64x2 + - &i64xN class: vreg base: $t - bits: 128 OverwriteElementBits: 64 - *imm01 out: - - *i64x4 + - *i64x2N -- go: Get128 - asm: "VEXTRACTI128" +- go: "GetHi|GetLo" + asm: "VEXTRACTI128|VEXTRACTI64X4" + inVariant: [] in: - - *i64x4 + - *i64x2N - *imm01 out: - - *i64x2 + - *i64xN -- go: Set128 - asm: "VINSERTF128" +- go: "SetHi|SetLo" + asm: "VINSERTF128|VINSERTF64X4" + inVariant: [] in: - - &f32x8 + - &f32x2N class: vreg base: $t - bits: 256 OverwriteElementBits: 32 - - &f32x4 + - &f32xN class: vreg base: $t - bits: 128 OverwriteElementBits: 32 - *imm01 out: - - *f32x8 + - *f32x2N -- go: Get128 - asm: "VEXTRACTF128" +- go: "GetHi|GetLo" + asm: "VEXTRACTF128|VEXTRACTF64X4" + inVariant: [] in: - - *f32x8 + - *f32x2N - *imm01 out: - - *f32x4 + - *f32xN -- go: Set128 - asm: "VINSERTF128" +- go: "SetHi|SetLo" + asm: "VINSERTF128|VINSERTF64X4" + inVariant: [] in: - - &f64x4 + - &f64x2N class: vreg base: $t - bits: 256 - - &f64x2 + OverwriteElementBits: 64 + - &f64xN class: vreg base: $t - bits: 128 + OverwriteElementBits: 64 - *imm01 out: - - *f64x4 + - *f64x2N -- go: Get128 - asm: "VEXTRACTF128" +- go: "GetHi|GetLo" + asm: "VEXTRACTF128|VEXTRACTF64X4" + inVariant: [] in: - - *f64x4 + - *f64x2N - *imm01 out: - - *f64x2 + - *f64xN - go: Permute asm: "VPERM[BWDQ]|VPERMP[SD]" From 515a7504e9e134accd2aa63ff300676afb0ff61a Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 5 Aug 2025 19:42:29 +0000 Subject: [PATCH 180/200] internal/simdgen: add Expand This CL generates CL 693375. Change-Id: Id7e71a68e9997cbec767cd3addbf152710f4c1f8 Reviewed-on: https://go-review.googlesource.com/c/arch/+/693336 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/ops/Moves/categories.yaml | 6 ++++++ internal/simdgen/ops/Moves/go.yaml | 8 ++++++++ internal/simdgen/xed.go | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index d56e4c93..6f30ccbc 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -60,3 +60,9 @@ documentation: !string |- // NAME blends two vectors based on mask values, choosing either // the first or the second based on whether the third is false or true +- go: Expand + commutative: false + extension: "AVX.*" + documentation: !string |- + // NAME performs an expansion on a vector x whose elements are packed to lower parts. + // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index b014a7a6..50e2869e 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -250,3 +250,11 @@ out: - *v +- go: Expand + asm: "VPEXPAND[BWDQ]|VEXPANDP[SD]" + in: + # The mask in Expand is a control mask rather than a write mask, so it's not optional. + - class: mask + - *any + out: + - *any diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index 6a3feb36..f773fcda 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -89,7 +89,7 @@ func loadXED(xedPath string) []*unify.Value { } var ( - maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]`) + maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]|VPEXPAND[BWDQ]|VEXPANDP[SD]`) maskOptionalRe = regexp.MustCompile(`VPCMP(EQ|GT|U)?[BWDQ]|VCMPP[SD]`) ) From b15c9c00eaa3881472a1bd88e46cfb6d9863dab7 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Wed, 6 Aug 2025 18:18:26 +0000 Subject: [PATCH 181/200] internal/simdgen: add value conversion ToBits for mask This CL generates CL 693755. Change-Id: If29791f9810cacebb99e27516d677fe9200badb2 Reviewed-on: https://go-review.googlesource.com/c/arch/+/693598 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_simdIntrinsics.go | 3 ++- internal/simdgen/gen_simdTypes.go | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index ca339ac2..5050834b 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -79,7 +79,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) addF(simdPackage, "{{.Name}}.StoreToBits", simdStoreMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) - addF(simdPackage, "{{.Name}}FromBits", simdCvtMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) + addF(simdPackage, "{{.Name}}FromBits", simdCvtVToMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) + addF(simdPackage, "{{.Name}}.ToBits", simdCvtMaskToV({{.ElemBits}}, {{.Lanes}}), sys.AMD64) {{end}} {{define "footer"}}} diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index b9427c4a..f3c68796 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -175,8 +175,14 @@ const simdMaskFromValTemplate = ` // {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset. // Only the lower {{.Lanes}} bits of y are used. // -// Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512" +// Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512 func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}} + +// ToBits constructs a bitmap from a {{.Name}}, where 1 means set for the indexed element, 0 means unset. +// Only the lower {{.Lanes}} bits of y are used. +// +// Asm: KMOV{{.IntelSizeSuffix}}, CPU Features: AVX512 +func (x {{.Name}}) ToBits() uint{{.LanesContainer}} ` const simdMaskedLoadStoreTemplate = ` From 238887481806c4d02444e1ba8f568919520ce1f5 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 7 Aug 2025 16:49:18 +0000 Subject: [PATCH 182/200] internal/simdgen: API interface fixes - Absolute -> Abs - ApproximateReciprocal -> Reciprocal - Other derived apis also changed. - Round -> RoundToEven - Other derived apis also changed. - Drop DotProdBroadcast - Fused(Mul|Add)(Mul|Add)? -> remove the "Fused" - MulEvenWiden -> remove 64bit - MulLow -> Mul, add unit - PairDotProd -> DotProdPairs - make AddDotProdPairs machine ops only - peepholes will be in another CL at dev.simd. - PopCount -> OnesCount - Saturated* -> *Saturated - Fix (Add|Sub)Saturated uint mappings. - UnsignedSignedQuadDotProdAccumulate -> AddDotProdQuadruple - The "DotProdQuadruple" instruction does not exist, so no peepholes for this. This CL generated CL 694115. Change-Id: I02a22b14110154a4c9d06bde30d0ba8306e6e9be Reviewed-on: https://go-review.googlesource.com/c/arch/+/694095 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/ops/AddSub/go.yaml | 4 +- .../simdgen/ops/FPonlyArith/categories.yaml | 10 ++--- internal/simdgen/ops/FPonlyArith/go.yaml | 10 ++--- .../simdgen/ops/IntOnlyArith/categories.yaml | 6 +-- internal/simdgen/ops/IntOnlyArith/go.yaml | 6 +-- internal/simdgen/ops/MLOps/categories.yaml | 36 +++++++++--------- internal/simdgen/ops/MLOps/go.yaml | 38 +++++++++---------- internal/simdgen/ops/Mul/go.yaml | 31 +++++++++------ 8 files changed, 75 insertions(+), 66 deletions(-) diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml index 45726cd6..4423d8c7 100644 --- a/internal/simdgen/ops/AddSub/go.yaml +++ b/internal/simdgen/ops/AddSub/go.yaml @@ -19,7 +19,7 @@ out: - *int - go: AddSaturated - asm: "VPADDS[BWDQ]" + asm: "VPADDUS[BWDQ]" in: - &uint go: $t @@ -45,7 +45,7 @@ out: &1int - *int - go: SubSaturated - asm: "VPSUBS[BWDQ]" + asm: "VPSUBUS[BWDQ]" in: - *uint - *uint diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index 63ddbb34..512cfc50 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -9,12 +9,12 @@ extension: "AVX.*" documentation: !string |- // NAME computes the square root of each element. -- go: ApproximateReciprocal +- go: Reciprocal commutative: false extension: "AVX.*" documentation: !string |- // NAME computes an approximate reciprocal of each element. -- go: ApproximateReciprocalOfSqrt +- go: ReciprocalSqrt commutative: false extension: "AVX.*" documentation: !string |- @@ -24,19 +24,19 @@ extension: "AVX.*" documentation: !string |- // NAME multiplies elements by a power of 2. -- go: Round +- go: RoundToEven commutative: false extension: "AVX.*" constImm: 0 documentation: !string |- // NAME rounds elements to the nearest integer. -- go: RoundScaled +- go: RoundToEvenScaled commutative: false extension: "AVX.*" constImm: 0 documentation: !string |- // NAME rounds elements with specified precision. -- go: RoundScaledResidue +- go: RoundToEvenScaledResidue commutative: false extension: "AVX.*" constImm: 0 diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml index dfb0454e..e164f7b7 100644 --- a/internal/simdgen/ops/FPonlyArith/go.yaml +++ b/internal/simdgen/ops/FPonlyArith/go.yaml @@ -13,11 +13,11 @@ in: *1fp out: *1fp # TODO: Provide separate methods for 12-bit precision and 14-bit precision? -- go: ApproximateReciprocal +- go: Reciprocal asm: "VRCP(14)?P[SD]" in: *1fp out: *1fp -- go: ApproximateReciprocalOfSqrt +- go: ReciprocalSqrt asm: "V?RSQRT(14)?P[SD]" in: *1fp out: *1fp @@ -26,7 +26,7 @@ in: *2fp out: *1fp -- go: "Round|Ceil|Floor|Trunc" +- go: "RoundToEven|Ceil|Floor|Trunc" asm: "VROUNDP[SD]" in: - *fp @@ -34,7 +34,7 @@ const: 0 # place holder out: *1fp -- go: "(Round|Ceil|Floor|Trunc)Scaled" +- go: "(RoundToEven|Ceil|Floor|Trunc)Scaled" asm: "VRNDSCALEP[SD]" in: - *fp @@ -43,7 +43,7 @@ immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). name: prec out: *1fp -- go: "(Round|Ceil|Floor|Trunc)ScaledResidue" +- go: "(RoundToEven|Ceil|Floor|Trunc)ScaledResidue" asm: "VREDUCEP[SD]" in: - *fp diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml index 477b1896..2c7a9998 100644 --- a/internal/simdgen/ops/IntOnlyArith/categories.yaml +++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml @@ -4,13 +4,13 @@ extension: "AVX.*" # VPAVGB/W are available across various AVX versions documentation: !string |- // NAME computes the rounded average of corresponding elements. -- go: Absolute +- go: Abs commutative: false # Unary operation, not commutative extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 documentation: !string |- // NAME computes the absolute value of each element. -- go: Sign +- go: CopySign # Applies sign of second operand to first: sign(val, sign_src) commutative: false extension: "AVX.*" @@ -18,7 +18,7 @@ // NAME returns the product of the first operand with -1, 0, or 1, // whichever constant is nearest to the value of the second operand. # Sign does not have masked version -- go: PopCount +- go: OnesCount commutative: false extension: "AVX512.*" documentation: !string |- diff --git a/internal/simdgen/ops/IntOnlyArith/go.yaml b/internal/simdgen/ops/IntOnlyArith/go.yaml index 4c73be26..54938b4f 100644 --- a/internal/simdgen/ops/IntOnlyArith/go.yaml +++ b/internal/simdgen/ops/IntOnlyArith/go.yaml @@ -13,7 +13,7 @@ # Absolute Value (signed byte, word, dword, qword) # Instructions: VPABSB, VPABSW, VPABSD, VPABSQ -- go: Absolute +- go: Abs asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ in: - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN @@ -25,7 +25,7 @@ # Sign Operation (signed byte, word, dword) # Applies sign of second operand to the first. # Instructions: VPSIGNB, VPSIGNW, VPSIGND -- go: Sign +- go: CopySign asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND in: - *int_t # value to apply sign to @@ -36,7 +36,7 @@ # Population Count (count set bits in each element) # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG) # VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ) -- go: PopCount +- go: OnesCount asm: "VPOPCNT[BWDQ]" in: - &any diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index b3508d25..6c5d3c67 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -1,55 +1,57 @@ !sum -- go: PairDotProd +- go: DotProdPairs commutative: false extension: "AVX.*" documentation: !string |- // NAME multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. -- go: SaturatedUnsignedSignedPairDotProd +- go: DotProdPairsSaturated commutative: false extension: "AVX.*" documentation: !string |- // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. -- go: DotProdBroadcast - commutative: true - extension: "AVX.*" - documentation: !string |- - // NAME multiplies all elements and broadcasts the sum. -- go: UnsignedSignedQuadDotProdAccumulate +# - go: DotProdBroadcast +# commutative: true +# extension: "AVX.*" +# documentation: !string |- +# // NAME multiplies all elements and broadcasts the sum. +- go: AddDotProdQuadruple commutative: false extension: "AVX.*" documentation: !string |- // NAME performs dot products on groups of 4 elements of x and y and then adds z. -- go: SaturatedUnsignedSignedQuadDotProdAccumulate +- go: AddDotProdQuadrupleSaturated commutative: false extension: "AVX.*" documentation: !string |- // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. -- go: AddDotProd +- go: AddDotProdPairs commutative: false + noTypes: "true" + noGenericOps: "true" extension: "AVX.*" documentation: !string |- // NAME performs dot products on pairs of elements of y and z and then adds x. -- go: SaturatedAddDotProd +- go: AddDotProdPairsSaturated commutative: false extension: "AVX.*" documentation: !string |- // NAME performs dot products on pairs of elements of y and z and then adds x. -- go: FusedMultiplyAdd +- go: MulAdd commutative: false extension: "AVX.*" documentation: !string |- - // NAME performs (x * y) + z. -- go: FusedMultiplyAddSub + // NAME performs a fused (x * y) + z. +- go: MulAddSub commutative: false extension: "AVX.*" documentation: !string |- - // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. -- go: FusedMultiplySubAdd + // NAME performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. +- go: MulSubAdd commutative: false extension: "AVX.*" documentation: !string |- - // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. + // NAME performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml index 8da2071d..f6b6f135 100644 --- a/internal/simdgen/ops/MLOps/go.yaml +++ b/internal/simdgen/ops/MLOps/go.yaml @@ -1,5 +1,5 @@ !sum -- go: PairDotProd +- go: DotProdPairs asm: VPMADDWD in: - &int @@ -10,7 +10,7 @@ - &int2 # The elemBits are different go: $t2 base: int -- go: SaturatedUnsignedSignedPairDotProd +- go: DotProdPairsSaturated asm: VPMADDUBSW in: - &uint @@ -23,17 +23,17 @@ overwriteElementBits: 8 out: - *int2 -- go: DotProdBroadcast - asm: VDPP[SD] - in: - - &dpb_src - go: $t - - *dpb_src - - class: immediate - const: 127 - out: - - *dpb_src -- go: UnsignedSignedQuadDotProdAccumulate +# - go: DotProdBroadcast +# asm: VDPP[SD] +# in: +# - &dpb_src +# go: $t +# - *dpb_src +# - class: immediate +# const: 127 +# out: +# - *dpb_src +- go: AddDotProdQuadruple asm: "VPDPBUSD" operandOrder: "31" # switch operand 3 and 1 in: @@ -51,7 +51,7 @@ overwriteElementBits: 8 out: - *qdpa_acc -- go: SaturatedUnsignedSignedQuadDotProdAccumulate +- go: AddDotProdQuadrupleSaturated asm: "VPDPBUSDS" operandOrder: "31" # switch operand 3 and 1 in: @@ -60,7 +60,7 @@ - *qdpa_src2 out: - *qdpa_acc -- go: AddDotProd +- go: AddDotProdPairs asm: "VPDPWSSD" in: - &pdpa_acc @@ -77,7 +77,7 @@ overwriteElementBits: 16 out: - *pdpa_acc -- go: SaturatedAddDotProd +- go: AddDotProdPairsSaturated asm: "VPDPWSSDS" in: - *pdpa_acc @@ -85,7 +85,7 @@ - *pdpa_src2 out: - *pdpa_acc -- go: FusedMultiplyAdd +- go: MulAdd asm: "VFMADD213PS|VFMADD213PD" in: - &fma_op @@ -95,7 +95,7 @@ - *fma_op out: - *fma_op -- go: FusedMultiplyAddSub +- go: MulAddSub asm: "VFMADDSUB213PS|VFMADDSUB213PD" in: - *fma_op @@ -103,7 +103,7 @@ - *fma_op out: - *fma_op -- go: FusedMultiplySubAdd +- go: MulSubAdd asm: "VFMSUBADD213PS|VFMSUBADD213PD" in: - *fma_op diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml index dd9f55fb..3d868839 100644 --- a/internal/simdgen/ops/Mul/go.yaml +++ b/internal/simdgen/ops/Mul/go.yaml @@ -18,10 +18,11 @@ - go: MulEvenWiden asm: "VPMULDQ" in: - - &int + - &intNot64 go: $t + elemBits: 8|16|32 base: int - - *int + - *intNot64 out: - &int2 go: $t2 @@ -29,10 +30,11 @@ - go: MulEvenWiden asm: "VPMULUDQ" in: - - &uint + - &uintNot64 go: $t + elemBits: 8|16|32 base: uint - - *uint + - *uintNot64 out: - &uint2 go: $t2 @@ -43,24 +45,29 @@ - go: MulHigh asm: "VPMULHW" in: - - *int + - &int + go: $t + base: int - *int out: - - *int2 + - *int - go: MulHigh asm: "VPMULHUW" in: - - *uint + - &uint + go: $t + base: int - *uint out: - - *uint2 + - *uint # MulLow -# Signed int only. +# signed and unsigned are the same for lower bits. - go: Mul asm: "VPMULL[WDQ]" in: - - *int - - *int + - &any + go: $t + - *any out: - - *int2 + - *any From d3d6994999bd0d662e06f310b1ec5f5ae95e72f0 Mon Sep 17 00:00:00 2001 From: Mark D Ryan Date: Tue, 29 Apr 2025 08:48:41 +0000 Subject: [PATCH 183/200] riscv64: fix the path to the RISC-V extensions in spec.go The riscv-opcodes repository has been restructured. The files needed by spec.go are now to be found in the extensions directory. Change-Id: I163c08aed5d99088f5094c0365a9918977e39b5a Reviewed-on: https://go-review.googlesource.com/c/arch/+/670875 LUCI-TryBot-Result: Go LUCI Reviewed-by: Mark Freeman Reviewed-by: Joel Sing Reviewed-by: Meng Zhuo Reviewed-by: Dmitri Shuralyov --- riscv64/riscv64spec/spec.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/riscv64/riscv64spec/spec.go b/riscv64/riscv64spec/spec.go index 55c498a0..b65ea697 100644 --- a/riscv64/riscv64spec/spec.go +++ b/riscv64/riscv64spec/spec.go @@ -68,11 +68,10 @@ func main() { log.SetFlags(0) log.SetPrefix("riscv64spec: ") - var repoPath string if len(os.Args) < 1 { log.Fatal("usage: go run spec.go ") } - repoPath = os.Args[1] + extensionsPath := filepath.Join(os.Args[1], "extensions") fileTables, err := os.Create("tables.go") if err != nil { @@ -86,7 +85,7 @@ func main() { } for _, ext := range extensions { - f, err := os.Open(filepath.Join(repoPath, ext)) + f, err := os.Open(filepath.Join(extensionsPath, ext)) if err != nil { log.Fatal(err) } From 46ba08e3ae58883936f0eefa4871530b0fa6156f Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 7 Aug 2025 15:31:06 -0400 Subject: [PATCH 184/200] internal/unify: fix minor comment typo Change-Id: Ib5a1580d3561f86e7583460a03c6da708388a100 Reviewed-on: https://go-review.googlesource.com/c/arch/+/694116 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/unify/env.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/unify/env.go b/internal/unify/env.go index b9989dd2..1200eb36 100644 --- a/internal/unify/env.go +++ b/internal/unify/env.go @@ -56,7 +56,7 @@ import ( // // e + 0 = e // e ⨯ 0 = 0 -// e ⨯ 1 = +// e ⨯ 1 = e // e + f = f + e // e ⨯ f = f ⨯ e type envSet struct { From 861b9976b78b3cdf81fc3cb14aaac37314c226f4 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 8 Aug 2025 17:32:14 +0000 Subject: [PATCH 185/200] internal/simdgen: fix imm aux types and change documentation The correct Aux type for immediates of SIMD instruction is uint8(signed value will be rejected by the assembler). This CL fixes it. Since we generate a jump table for non-const immediates now, this CL also updates the documentation. This CL partially generates CL 694395. Change-Id: Iaf1b0044242ad679cb326fbc6fdb07158b8266c1 Reviewed-on: https://go-review.googlesource.com/c/arch/+/694375 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdGenericOps.go | 2 +- internal/simdgen/gen_simdMachineOps.go | 2 +- internal/simdgen/gen_simdTypes.go | 12 ++++++------ internal/simdgen/gen_simdrules.go | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index daf941d7..72cc8fab 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -19,7 +19,7 @@ func simdGenericOps() []opData { {name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}}, {{- end }} {{- range .OpsImm }} - {name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}, aux: "Int8"}, + {name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}, aux: "UInt8"}, {{- end }} } } diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index 7c538a00..fbd7ccf5 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -20,7 +20,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, {{- end }} {{- range .OpsDataImm }} - {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "Int8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, + {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, {{- end }} } } diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index f3c68796..c7053f24 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -311,7 +311,7 @@ func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndTyp {{define "op1Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} @@ -320,7 +320,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} {{define "op2Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} @@ -329,7 +329,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y" {{define "op2Imm8_2I"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}} @@ -339,7 +339,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin {{define "op3Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}} @@ -348,7 +348,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y" {{define "op3Imm8_2I"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}} @@ -358,7 +358,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin {{define "op4Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic. +// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}} diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index c910f64a..bac4e942 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -22,9 +22,9 @@ var ( {{end}} {{define "maskInMaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} mask))) {{end}} -{{define "sftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c])) => ({{.Asm}}const [int8(c)] x) +{{define "sftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c])) => ({{.Asm}}const [uint8(c)] x) {{end}} -{{define "masksftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c]) mask) => ({{.Asm}}const [int8(c)] x ({{.MaskInConvert}} mask)) +{{define "masksftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c]) mask) => ({{.Asm}}const [uint8(c)] x ({{.MaskInConvert}} mask)) {{end}} `)) ) From 134aefd5422e22b9fd27337491b58870499055b2 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 11 Aug 2025 17:19:06 +0000 Subject: [PATCH 186/200] internal/simdgen: imm document improve This CL generates CL 694795. Change-Id: I36165d0f3cd038f2fa04b8612446b87ac1bce89c Reviewed-on: https://go-review.googlesource.com/c/arch/+/694775 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_simdTypes.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index c7053f24..820c27fa 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -311,7 +311,7 @@ func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndTyp {{define "op1Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. +// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} @@ -320,7 +320,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} {{define "op2Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. +// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} @@ -329,7 +329,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y" {{define "op2Imm8_2I"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. +// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}} @@ -339,7 +339,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin {{define "op3Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. +// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}} @@ -348,7 +348,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y" {{define "op3Imm8_2I"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. +// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}} @@ -358,7 +358,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin {{define "op4Imm8"}} {{if .Documentation}}{{.Documentation}} //{{end}} -// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated. +// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}} From fbc9dad06686f9627e9ff873bbe622fc27730def Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 8 Aug 2025 16:09:15 -0400 Subject: [PATCH 187/200] internal/simdgen/ops: use correct op for unsigned MulHigh We were matching both signed and unsigned definitions to the signed instruction. This caused dedupGodef to pick essentially arbitrarily between them, which hid the problem. Change-Id: I51cc697ebf5ee4b9ac00307d6db472ef21279904 Reviewed-on: https://go-review.googlesource.com/c/arch/+/694857 LUCI-TryBot-Result: Go LUCI Auto-Submit: Austin Clements Reviewed-by: Junyang Shao --- internal/simdgen/godefs.go | 2 ++ internal/simdgen/ops/Mul/go.yaml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 166a5933..522ae69a 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -349,6 +349,8 @@ func writeGoDefs(path string, cl unify.Closure) error { log.Printf("dedup len: %d\n", len(deduped)) } if !*FlagNoDedup { + // TODO: This can hide mistakes in the API definitions, especially when + // multiple patterns result in the same API unintentionally. Make it stricter. if deduped, err = dedupGodef(deduped); err != nil { return err } diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml index 3d868839..c0205a68 100644 --- a/internal/simdgen/ops/Mul/go.yaml +++ b/internal/simdgen/ops/Mul/go.yaml @@ -56,7 +56,7 @@ in: - &uint go: $t - base: int + base: uint - *uint out: - *uint From 3d4fe2e6b6f9416be630cf183c843f1a746bb8f4 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 8 Aug 2025 16:17:21 -0400 Subject: [PATCH 188/200] internal/simdgen: compute CPU feature in XED decoder Currently, the XED decoder emits the raw "EXTENSION" and "ISA_SET" fields directly from the XED, and these are translated into a CPU feature by godefs using a bunch of fairly ad hoc string manipulations. Replace this with computing the CPU feature directly in the XED decoder. The extension and isa_set are strictly XED concepts, while "CPU features" are generic concepts. Thus, this should be the role of the XED decoder. We also use an explicit mapping table rather than string manipulations. These CPU feature names appear in the API, and thus it's important that we pay attention to their names. No effect on generated code. Change-Id: I1c7c79c461d57b2cd78cfa81f376683ae33c69b1 Reviewed-on: https://go-review.googlesource.com/c/arch/+/694858 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase Reviewed-by: Junyang Shao Auto-Submit: Austin Clements --- internal/simdgen/gen_simdMachineOps.go | 2 +- internal/simdgen/gen_utility.go | 48 +------- internal/simdgen/godefs.go | 9 +- internal/simdgen/ops/AddSub/categories.yaml | 8 -- .../simdgen/ops/BitwiseLogic/categories.yaml | 4 - internal/simdgen/ops/Compares/categories.yaml | 7 -- internal/simdgen/ops/Converts/categories.yaml | 2 - .../simdgen/ops/FPonlyArith/categories.yaml | 18 --- .../simdgen/ops/GaloisField/categories.yaml | 3 - .../simdgen/ops/IntOnlyArith/categories.yaml | 4 - internal/simdgen/ops/MLOps/categories.yaml | 12 +- internal/simdgen/ops/MinMax/categories.yaml | 2 - internal/simdgen/ops/Moves/categories.yaml | 11 -- internal/simdgen/ops/Mul/categories.yaml | 3 - .../simdgen/ops/ShiftRotate/categories.yaml | 17 --- internal/simdgen/xed.go | 108 ++++++++++++++++-- 16 files changed, 104 insertions(+), 154 deletions(-) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index fbd7ccf5..f110ae61 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -79,7 +79,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { } } // Makes AVX512 operations use upper registers - if strings.Contains(op.Extension, "AVX512") { + if strings.Contains(op.CPUFeature, "AVX512") { regInfo = strings.ReplaceAll(regInfo, "v", "w") } if _, ok := regInfoSet[regInfo]; !ok { diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 59832e0e..8a3e1735 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -515,50 +515,6 @@ func dedup(ops []Operation) (deduped []Operation) { return } -func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation) { - allCPUFeatures := map[string]struct{}{} - for _, op := range ops { - if op.ISASet == "" { - newS := op.Extension - op.CPUFeature = &newS - } else { - newS := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(op.ISASet, "_128"), "_256"), "_512") - newS = strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(newS, "_128N"), "_256N"), "_512N") - op.CPUFeature = &newS - } - if *op.CPUFeature == "AVX" || *op.CPUFeature == "AVX2" || strings.HasPrefix(*op.CPUFeature, "AVX512") || - strings.HasPrefix(*op.CPUFeature, "AVX_") || strings.HasPrefix(*op.CPUFeature, "AVX2_") { - // This excludes instructions from CPU Features like AVX10.1, which usually are rebrandings of AVX512. - filled = append(filled, op) - if strings.Contains(*op.CPUFeature, "_") { - *op.CPUFeature = strings.ReplaceAll(*op.CPUFeature, "_", "") - } - allCPUFeatures[*op.CPUFeature] = struct{}{} - } else { - excluded = append(excluded, op) - } - } - // Sanity check, make sure we are not excluding the only definition of an operation - filledSeen := map[string]struct{}{} - excludedSeen := map[string]Operation{} - for _, op := range filled { - filledSeen[op.Go+*op.In[0].Go] = struct{}{} - } - for _, op := range excluded { - excludedSeen[op.Go+*op.In[0].Go] = op - } - for k, op := range excludedSeen { - if _, ok := filledSeen[k]; !ok { - panic(fmt.Sprintf("simdgen is excluding the only def of op: %s", op)) - } - } - if *Verbose { - // It might contain - log.Printf("All CPU Features: %v\n", allCPUFeatures) - } - return -} - func (op Operation) GenericName() string { if op.OperandOrder != nil { switch *op.OperandOrder { @@ -597,7 +553,7 @@ func dedupGodef(ops []Operation) ([]Operation, error) { return ops, nil } isAVX512 := func(op Operation) bool { - return strings.Contains(op.Extension, "AVX512") + return strings.Contains(op.CPUFeature, "AVX512") } deduped := []Operation{} for _, dup := range seen { @@ -610,7 +566,7 @@ func dedupGodef(ops []Operation) ([]Operation, error) { if isAVX512(i) && !isAVX512(j) { return 1 } - return strings.Compare(*i.CPUFeature, *j.CPUFeature) + return strings.Compare(i.CPUFeature, j.CPUFeature) }) } deduped = append(deduped, dup[0]) diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 522ae69a..c37b4d38 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -55,9 +55,7 @@ type rawOperation struct { InVariant []Operand // Optional parameters Out []Operand // Results Commutative bool // Commutativity - Extension string // Extension - ISASet string // ISASet - CPUFeature *string // If ISASet is empty, then Extension, otherwise ISASet + CPUFeature string // CPUID/Has* feature name Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z" Documentation *string // Documentation will be appended to the stubs comments. // ConstMask is a hack to reduce the size of defs the user writes for const-immediate @@ -329,11 +327,6 @@ func writeGoDefs(path string, cl unify.Closure) error { // The parsed XED data might contain duplicates, like // 512 bits VPADDP. deduped := dedup(ops) - var excluded []Operation - deduped, excluded = fillCPUFeature(deduped) - if *Verbose { - log.Printf("excluded len: %d\n", len(excluded)) - } if *Verbose { log.Printf("dedup len: %d\n", len(ops)) diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml index 4e492516..35e81042 100644 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ b/internal/simdgen/ops/AddSub/categories.yaml @@ -1,45 +1,37 @@ !sum - go: Add commutative: true - extension: "AVX.*" documentation: !string |- // NAME adds corresponding elements of two vectors. - go: AddSaturated commutative: true - extension: "AVX.*" documentation: !string |- // NAME adds corresponding elements of two vectors with saturation. - go: Sub commutative: false - extension: "AVX.*" documentation: !string |- // NAME subtracts corresponding elements of two vectors. - go: SubSaturated commutative: false - extension: "AVX.*" documentation: !string |- // NAME subtracts corresponding elements of two vectors with saturation. - go: AddPairs commutative: false - extension: "AVX.*" documentation: !string |- // NAME horizontally adds adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: SubPairs commutative: false - extension: "AVX.*" documentation: !string |- // NAME horizontally subtracts adjacent pairs of elements. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. - go: AddPairsSaturated commutative: false - extension: "AVX.*" documentation: !string |- // NAME horizontally adds adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. - go: SubPairsSaturated commutative: false - extension: "AVX.*" documentation: !string |- // NAME horizontally subtracts adjacent pairs of elements with saturation. // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml index 320cfd18..3142d191 100644 --- a/internal/simdgen/ops/BitwiseLogic/categories.yaml +++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml @@ -1,22 +1,18 @@ !sum - go: And commutative: true - extension: "AVX.*" documentation: !string |- // NAME performs a bitwise AND operation between two vectors. - go: Or commutative: true - extension: "AVX.*" documentation: !string |- // NAME performs a bitwise OR operation between two vectors. - go: AndNot commutative: false - extension: "AVX.*" documentation: !string |- // NAME performs a bitwise x &^ y. - go: Xor commutative: true - extension: "AVX.*" documentation: !string |- // NAME performs a bitwise XOR operation between two vectors. diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml index e3d990ed..aa07ade2 100644 --- a/internal/simdgen/ops/Compares/categories.yaml +++ b/internal/simdgen/ops/Compares/categories.yaml @@ -9,42 +9,35 @@ - go: Equal constImm: 0 commutative: true - extension: "AVX.*" documentation: !string |- // NAME compares for equality. - go: Less constImm: 1 commutative: false - extension: "AVX.*" documentation: !string |- // NAME compares for less than. - go: LessEqual constImm: 2 commutative: false - extension: "AVX.*" documentation: !string |- // NAME compares for less than or equal. - go: IsNan # For float only. constImm: 3 commutative: true - extension: "AVX.*" documentation: !string |- // NAME checks if elements are NaN. Use as x.IsNan(x). - go: NotEqual constImm: 4 commutative: true - extension: "AVX.*" documentation: !string |- // NAME compares for inequality. - go: GreaterEqual constImm: 13 commutative: false - extension: "AVX.*" documentation: !string |- // NAME compares for greater than or equal. - go: Greater constImm: 14 commutative: false - extension: "AVX.*" documentation: !string |- // NAME compares for greater than. diff --git a/internal/simdgen/ops/Converts/categories.yaml b/internal/simdgen/ops/Converts/categories.yaml index 16316ed3..cc6c419d 100644 --- a/internal/simdgen/ops/Converts/categories.yaml +++ b/internal/simdgen/ops/Converts/categories.yaml @@ -1,12 +1,10 @@ !sum - go: ConvertToInt32 commutative: false - extension: "AVX.*" documentation: !string |- // ConvertToInt32 converts element values to int32. - go: ConvertToUint32 commutative: false - extension: "AVX.*" documentation: !string |- // ConvertToUint32Masked converts element values to uint32. diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml index 512cfc50..f2d8af68 100644 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ b/internal/simdgen/ops/FPonlyArith/categories.yaml @@ -1,103 +1,85 @@ !sum - go: Div commutative: false - extension: "AVX.*" documentation: !string |- // NAME divides elements of two vectors. - go: Sqrt commutative: false - extension: "AVX.*" documentation: !string |- // NAME computes the square root of each element. - go: Reciprocal commutative: false - extension: "AVX.*" documentation: !string |- // NAME computes an approximate reciprocal of each element. - go: ReciprocalSqrt commutative: false - extension: "AVX.*" documentation: !string |- // NAME computes an approximate reciprocal of the square root of each element. - go: Scale commutative: false - extension: "AVX.*" documentation: !string |- // NAME multiplies elements by a power of 2. - go: RoundToEven commutative: false - extension: "AVX.*" constImm: 0 documentation: !string |- // NAME rounds elements to the nearest integer. - go: RoundToEvenScaled commutative: false - extension: "AVX.*" constImm: 0 documentation: !string |- // NAME rounds elements with specified precision. - go: RoundToEvenScaledResidue commutative: false - extension: "AVX.*" constImm: 0 documentation: !string |- // NAME computes the difference after rounding with specified precision. - go: Floor commutative: false - extension: "AVX.*" constImm: 1 documentation: !string |- // NAME rounds elements down to the nearest integer. - go: FloorScaled commutative: false - extension: "AVX.*" constImm: 1 documentation: !string |- // NAME rounds elements down with specified precision. - go: FloorScaledResidue commutative: false - extension: "AVX.*" constImm: 1 documentation: !string |- // NAME computes the difference after flooring with specified precision. - go: Ceil commutative: false - extension: "AVX.*" constImm: 2 documentation: !string |- // NAME rounds elements up to the nearest integer. - go: CeilScaled commutative: false - extension: "AVX.*" constImm: 2 documentation: !string |- // NAME rounds elements up with specified precision. - go: CeilScaledResidue commutative: false - extension: "AVX.*" constImm: 2 documentation: !string |- // NAME computes the difference after ceiling with specified precision. - go: Trunc commutative: false - extension: "AVX.*" constImm: 3 documentation: !string |- // NAME truncates elements towards zero. - go: TruncScaled commutative: false - extension: "AVX.*" constImm: 3 documentation: !string |- // NAME truncates elements with specified precision. - go: TruncScaledResidue commutative: false - extension: "AVX.*" constImm: 3 documentation: !string |- // NAME computes the difference after truncating with specified precision. - go: AddSub commutative: false - extension: "AVX.*" documentation: !string |- // NAME subtracts even elements and adds odd elements of two vectors. diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml index d57b5265..25824625 100644 --- a/internal/simdgen/ops/GaloisField/categories.yaml +++ b/internal/simdgen/ops/GaloisField/categories.yaml @@ -1,7 +1,6 @@ !sum - go: GaloisFieldAffineTransform commutative: false - extension: "AVX.*" documentation: !string |- // NAME computes an affine transformation in GF(2^8): // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; @@ -9,7 +8,6 @@ // corresponding to a group of 8 elements in x. - go: GaloisFieldAffineTransformInverse commutative: false - extension: "AVX.*" documentation: !string |- // NAME computes an affine transformation in GF(2^8), // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1: @@ -18,7 +16,6 @@ // corresponding to a group of 8 elements in x. - go: GaloisFieldMul commutative: false - extension: "AVX.*" documentation: !string |- // NAME computes element-wise GF(2^8) multiplication with // reduction polynomial x^8 + x^4 + x^3 + x + 1. diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml index 2c7a9998..bf33642a 100644 --- a/internal/simdgen/ops/IntOnlyArith/categories.yaml +++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml @@ -1,25 +1,21 @@ !sum - go: Average commutative: true - extension: "AVX.*" # VPAVGB/W are available across various AVX versions documentation: !string |- // NAME computes the rounded average of corresponding elements. - go: Abs commutative: false # Unary operation, not commutative - extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512 documentation: !string |- // NAME computes the absolute value of each element. - go: CopySign # Applies sign of second operand to first: sign(val, sign_src) commutative: false - extension: "AVX.*" documentation: !string |- // NAME returns the product of the first operand with -1, 0, or 1, // whichever constant is nearest to the value of the second operand. # Sign does not have masked version - go: OnesCount commutative: false - extension: "AVX512.*" documentation: !string |- // NAME counts the number of set bits in each element. diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml index 6c5d3c67..97381e1e 100644 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ b/internal/simdgen/ops/MLOps/categories.yaml @@ -1,57 +1,47 @@ !sum - go: DotProdPairs commutative: false - extension: "AVX.*" documentation: !string |- // NAME multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. - go: DotProdPairsSaturated commutative: false - extension: "AVX.*" documentation: !string |- // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. # - go: DotProdBroadcast # commutative: true -# extension: "AVX.*" -# documentation: !string |- +# # documentation: !string |- # // NAME multiplies all elements and broadcasts the sum. - go: AddDotProdQuadruple commutative: false - extension: "AVX.*" documentation: !string |- // NAME performs dot products on groups of 4 elements of x and y and then adds z. - go: AddDotProdQuadrupleSaturated commutative: false - extension: "AVX.*" documentation: !string |- // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. - go: AddDotProdPairs commutative: false noTypes: "true" noGenericOps: "true" - extension: "AVX.*" documentation: !string |- // NAME performs dot products on pairs of elements of y and z and then adds x. - go: AddDotProdPairsSaturated commutative: false - extension: "AVX.*" documentation: !string |- // NAME performs dot products on pairs of elements of y and z and then adds x. - go: MulAdd commutative: false - extension: "AVX.*" documentation: !string |- // NAME performs a fused (x * y) + z. - go: MulAddSub commutative: false - extension: "AVX.*" documentation: !string |- // NAME performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. - go: MulSubAdd commutative: false - extension: "AVX.*" documentation: !string |- // NAME performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml index 9ac0d3d4..a7e30f46 100644 --- a/internal/simdgen/ops/MinMax/categories.yaml +++ b/internal/simdgen/ops/MinMax/categories.yaml @@ -1,11 +1,9 @@ !sum - go: Max commutative: true - extension: "AVX.*" documentation: !string |- // NAME computes the maximum of corresponding elements. - go: Min commutative: true - extension: "AVX.*" documentation: !string |- // NAME computes the minimum of corresponding elements. diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index 6f30ccbc..cd9260ab 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -1,48 +1,40 @@ !sum - go: SetElem commutative: false - extension: "AVX.*" documentation: !string |- // NAME sets a single constant-indexed element's value. - go: GetElem commutative: false - extension: "AVX.*" documentation: !string |- // NAME retrieves a single constant-indexed element's value. - go: SetLo commutative: false constImm: 0 - extension: "AVX.*" documentation: !string |- // NAME returns x with its lower half set to y. - go: GetLo commutative: false constImm: 0 - extension: "AVX.*" documentation: !string |- // NAME returns the lower half of x. - go: SetHi commutative: false constImm: 1 - extension: "AVX.*" documentation: !string |- // NAME returns x with its upper half set to y. - go: GetHi commutative: false constImm: 1 - extension: "AVX.*" documentation: !string |- // NAME returns the upper half of x. - go: Permute commutative: false - extension: "AVX.*" documentation: !string |- // NAME performs a full permutation of vector x using indices: // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} // Only the needed bits to represent x's index are used in indices' elements. - go: Permute2 # Permute2 is only available on or after AVX512 commutative: false - extension: "AVX.*" documentation: !string |- // NAME performs a full permutation of vector x, y using indices: // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} @@ -50,19 +42,16 @@ // Only the needed bits to represent xy's index are used in indices' elements. - go: Compress commutative: false - extension: "AVX.*" documentation: !string |- // NAME performs a compression on vector x using mask by // selecting elements as indicated by mask, and pack them to lower indexed elements. - go: blend commutative: false - extension: "AVX.*" documentation: !string |- // NAME blends two vectors based on mask values, choosing either // the first or the second based on whether the third is false or true - go: Expand commutative: false - extension: "AVX.*" documentation: !string |- // NAME performs an expansion on a vector x whose elements are packed to lower parts. // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml index f4e2aed2..92491b51 100644 --- a/internal/simdgen/ops/Mul/categories.yaml +++ b/internal/simdgen/ops/Mul/categories.yaml @@ -1,17 +1,14 @@ !sum - go: Mul commutative: true - extension: "AVX.*" documentation: !string |- // NAME multiplies corresponding elements of two vectors. - go: MulEvenWiden commutative: true - extension: "AVX.*" documentation: !string |- // NAME multiplies even-indexed elements, widening the result. // Result[i] = v1.Even[i] * v2.Even[i]. - go: MulHigh commutative: true - extension: "AVX.*" documentation: !string |- // NAME multiplies elements and stores the high part of the result. diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml index e51d289b..0d0b006c 100644 --- a/internal/simdgen/ops/ShiftRotate/categories.yaml +++ b/internal/simdgen/ops/ShiftRotate/categories.yaml @@ -3,7 +3,6 @@ nameAndSizeCheck: true specialLower: sftimm commutative: false - extension: "AVX.*" documentation: !string |- // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. - go: ShiftAllRight @@ -11,7 +10,6 @@ nameAndSizeCheck: true specialLower: sftimm commutative: false - extension: "AVX.*" documentation: !string |- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. - go: ShiftAllRight @@ -19,7 +17,6 @@ specialLower: sftimm nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. - go: shiftAllLeftConst # no APIs, only ssa ops. @@ -28,7 +25,6 @@ SSAVariant: "const" # to avoid its name colliding with reg version of this instruction, amend this to its ssa op name. nameAndSizeCheck: true commutative: false - extension: "AVX.*" - go: shiftAllRightConst # no APIs, only ssa ops. noTypes: "true" noGenericOps: "true" @@ -36,7 +32,6 @@ signed: false nameAndSizeCheck: true commutative: false - extension: "AVX.*" - go: shiftAllRightConst # no APIs, only ssa ops. noTypes: "true" noGenericOps: "true" @@ -44,77 +39,65 @@ signed: true nameAndSizeCheck: true commutative: false - extension: "AVX.*" - go: ShiftLeft nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. - go: ShiftRight signed: false nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. - go: ShiftRight signed: true nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. - go: RotateAllLeft nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME rotates each element to the left by the number of bits specified by the immediate. - go: RotateLeft nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements. - go: RotateAllRight nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME rotates each element to the right by the number of bits specified by the immediate. - go: RotateRight nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements. - go: ShiftAllLeftConcat nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. - go: ShiftAllRightConcat nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. - go: ShiftLeftConcat nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. - go: ShiftRightConcat nameAndSizeCheck: true commutative: false - extension: "AVX.*" documentation: !string |- // NAME shifts each element of x to the right by the number of bits specified by the // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index f773fcda..b0b4ab5a 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -5,9 +5,12 @@ package main import ( + "cmp" "fmt" "log" + "maps" "regexp" + "slices" "strconv" "strings" @@ -52,9 +55,9 @@ func loadXED(xedPath string) []*unify.Value { switch { case inst.RealOpcode == "N": return // Skip unstable instructions - case !(strings.HasPrefix(inst.Extension, "SSE") || strings.HasPrefix(inst.Extension, "AVX")): - // We're only intested in SSE and AVX instuctions. - return // Skip non-AVX or SSE instructions + case !strings.HasPrefix(inst.Extension, "AVX"): + // We're only interested in AVX instructions. + return } if *flagDebugXED { @@ -85,6 +88,30 @@ func loadXED(xedPath string) []*unify.Value { if err != nil { log.Fatalf("walk insts: %v", err) } + + if len(unknownFeatures) > 0 { + if !*Verbose { + nInst := 0 + for _, insts := range unknownFeatures { + nInst += len(insts) + } + log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst) + } else { + keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int { + return cmp.Or(cmp.Compare(a.Extension, b.Extension), + cmp.Compare(a.ISASet, b.ISASet)) + }) + for _, key := range keys { + if key.ISASet == "" || key.ISASet == key.Extension { + log.Printf("unhandled Extension %s", key.Extension) + } else { + log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet) + } + log.Printf(" opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key]))) + } + } + } + return defs } @@ -492,22 +519,25 @@ func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVaria } func instToUVal(inst *xeddata.Inst, ops []operand) []*unify.Value { + feature, ok := decodeCPUFeature(inst) + if !ok { + return nil + } + var vals []*unify.Value - vals = append(vals, instToUVal1(inst, ops, instVariantNone)) + vals = append(vals, instToUVal1(inst, ops, feature, instVariantNone)) if hasOptionalMask(ops) { - vals = append(vals, instToUVal1(inst, ops, instVariantMasked)) + vals = append(vals, instToUVal1(inst, ops, feature, instVariantMasked)) } return vals } -func instToUVal1(inst *xeddata.Inst, ops []operand, variant instVariant) *unify.Value { - // TODO: "feature" +func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant instVariant) *unify.Value { var db unify.DefBuilder db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64"))) db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode()))) addOperandsToDef(ops, &db, variant) - db.Add("extension", unify.NewValue(unify.NewStringExact(inst.Extension))) - db.Add("isaset", unify.NewValue(unify.NewStringExact(inst.ISASet))) + db.Add("cpuFeature", unify.NewValue(unify.NewStringExact(feature))) if strings.Contains(inst.Pattern, "ZEROING=0") { // This is an EVEX instruction, but the ".Z" (zero-merging) @@ -531,6 +561,66 @@ func instToUVal1(inst *xeddata.Inst, ops []operand, variant instVariant) *unify. return unify.NewValuePos(db.Build(), pos) } +// decodeCPUFeature returns the CPU feature name required by inst. These match +// the names of the "Has*" feature checks in the simd package. +func decodeCPUFeature(inst *xeddata.Inst) (string, bool) { + key := cpuFeatureKey{ + Extension: inst.Extension, + ISASet: isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""), + } + feat, ok := cpuFeatureMap[key] + if !ok { + imap := unknownFeatures[key] + if imap == nil { + imap = make(map[string]struct{}) + unknownFeatures[key] = imap + } + imap[inst.Opcode()] = struct{}{} + return "", false + } + if feat == "ignore" { + return "", false + } + return feat, true +} + +var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$") + +type cpuFeatureKey struct { + Extension, ISASet string +} + +// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name +// that can be used in the SIMD API. +var cpuFeatureMap = map[cpuFeatureKey]string{ + {"AVX", ""}: "AVX", + {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI", + {"AVX2", ""}: "AVX2", + + // AVX-512 foundational features + // + // TODO: These should all map to "AVX512". + {"AVX512EVEX", "AVX512F"}: "AVX512F", + {"AVX512EVEX", "AVX512CD"}: "AVX512CD", + {"AVX512EVEX", "AVX512BW"}: "AVX512BW", + {"AVX512EVEX", "AVX512DQ"}: "AVX512DQ", + // AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by + // the vector length suffix. + + // AVX-512 extension features + {"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG", + {"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI", + {"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2", + {"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI", + {"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI", + {"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ", + + // AVX 10.2 (not yet supported) + {"AVX512EVEX", "AVX10_2_RC"}: "ignore", +} + +var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{} + // hasOptionalMask returns whether there is an optional mask operand in ops. func hasOptionalMask(ops []operand) bool { for _, op := range ops { From c1242d79c210cb1fc8df78e9a9221324d456fc12 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 8 Aug 2025 16:23:30 -0400 Subject: [PATCH 189/200] internal/simdgen: combine AVX512F+CD+BW+DQ+VL into "AVX512" feature This affects only comments in the generated code. Change-Id: Ieb475ffaf9ae90e5f5b78c72b556e92e6e65b0c6 Reviewed-on: https://go-review.googlesource.com/c/arch/+/694859 Reviewed-by: David Chase Reviewed-by: Junyang Shao Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI --- internal/simdgen/xed.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go index b0b4ab5a..3bbf2cbc 100644 --- a/internal/simdgen/xed.go +++ b/internal/simdgen/xed.go @@ -597,13 +597,11 @@ var cpuFeatureMap = map[cpuFeatureKey]string{ {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI", {"AVX2", ""}: "AVX2", - // AVX-512 foundational features - // - // TODO: These should all map to "AVX512". - {"AVX512EVEX", "AVX512F"}: "AVX512F", - {"AVX512EVEX", "AVX512CD"}: "AVX512CD", - {"AVX512EVEX", "AVX512BW"}: "AVX512BW", - {"AVX512EVEX", "AVX512DQ"}: "AVX512DQ", + // AVX-512 foundational features. We combine all of these into one "AVX512" feature. + {"AVX512EVEX", "AVX512F"}: "AVX512", + {"AVX512EVEX", "AVX512CD"}: "AVX512", + {"AVX512EVEX", "AVX512BW"}: "AVX512", + {"AVX512EVEX", "AVX512DQ"}: "AVX512", // AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by // the vector length suffix. From 88601128e7881337b45f4ea9ef1036dadd8e3f6e Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 8 Aug 2025 22:24:20 -0400 Subject: [PATCH 190/200] internal/simdgen: single copy of the generated header string Change-Id: I1c9b2d09961513e1b2a1e2087204afc3f8383459 Reviewed-on: https://go-review.googlesource.com/c/arch/+/694860 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- internal/simdgen/gen_simdGenericOps.go | 3 ++- internal/simdgen/gen_simdIntrinsics.go | 4 +++- internal/simdgen/gen_simdMachineOps.go | 3 ++- internal/simdgen/gen_simdTypes.go | 24 ++++++------------------ internal/simdgen/gen_simdrules.go | 9 +-------- internal/simdgen/godefs.go | 3 +++ 6 files changed, 17 insertions(+), 29 deletions(-) diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go index 72cc8fab..3dbbeb09 100644 --- a/internal/simdgen/gen_simdGenericOps.go +++ b/internal/simdgen/gen_simdGenericOps.go @@ -10,7 +10,7 @@ import ( "sort" ) -const simdGenericOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +const simdGenericOpsTmpl = ` package main func simdGenericOps() []opData { @@ -30,6 +30,7 @@ func simdGenericOps() []opData { func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { t := templateOf(simdGenericOpsTmpl, "simdgenericOps") buffer := new(bytes.Buffer) + buffer.WriteString(generatedHeader) type genericOpsData struct { OpName string diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go index 5050834b..6a1501e1 100644 --- a/internal/simdgen/gen_simdIntrinsics.go +++ b/internal/simdgen/gen_simdIntrinsics.go @@ -10,7 +10,8 @@ import ( "slices" ) -const simdIntrinsicsTmpl = `{{define "header"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +const simdIntrinsicsTmpl = ` +{{define "header"}} package ssagen import ( @@ -92,6 +93,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { t := templateOf(simdIntrinsicsTmpl, "simdintrinsics") buffer := new(bytes.Buffer) + buffer.WriteString(generatedHeader) if err := t.ExecuteTemplate(buffer, "header", nil); err != nil { panic(fmt.Errorf("failed to execute header template: %w", err)) diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go index f110ae61..64918e55 100644 --- a/internal/simdgen/gen_simdMachineOps.go +++ b/internal/simdgen/gen_simdMachineOps.go @@ -11,7 +11,7 @@ import ( "strings" ) -const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +const simdMachineOpsTmpl = ` package main func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw regInfo) []opData { @@ -31,6 +31,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { t := templateOf(simdMachineOpsTmpl, "simdAMD64Ops") buffer := new(bytes.Buffer) + buffer.WriteString(generatedHeader) type opData struct { OpName string diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 820c27fa..57d48317 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -117,13 +117,13 @@ func compareSimdTypePairs(x, y simdTypePair) int { return compareSimdTypes(x.Tdst, y.Tdst) } -const simdTypesTemplates = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. - +const simdPackageHeader = generatedHeader + ` //go:build goexperiment.simd package simd -{{end}} +` +const simdTypesTemplates = ` {{define "sizeTmpl"}} // v{{.}} is a tag type that tells the compiler that this is really {{.}}-bit SIMD type v{{.}} struct { @@ -203,13 +203,7 @@ func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lan func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) ` -const simdStubsTmpl = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. - -//go:build goexperiment.simd - -package simd -{{end}} - +const simdStubsTmpl = ` {{define "op1"}} {{if .Documentation}}{{.Documentation}} //{{end}} @@ -479,10 +473,7 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64") buffer := new(bytes.Buffer) - - if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil { - panic(fmt.Errorf("failed to execute fileHeader template: %w", err)) - } + buffer.WriteString(simdPackageHeader) sizes := make([]int, 0, len(typeMap)) for size, types := range typeMap { @@ -535,10 +526,7 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { t := templateOf(simdStubsTmpl, "simdStubs") buffer := new(bytes.Buffer) - - if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil { - panic(fmt.Errorf("failed to execute fileHeader template: %w", err)) - } + buffer.WriteString(simdPackageHeader) slices.SortFunc(ops, compareOperations) diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index bac4e942..d1db2545 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -7,7 +7,6 @@ package main import ( "bytes" "fmt" - "io" "slices" "text/template" ) @@ -58,13 +57,7 @@ func compareTplRuleData(x, y tplRuleData) int { // within the specified directory. func writeSIMDRules(ops []Operation) *bytes.Buffer { buffer := new(bytes.Buffer) - - header := `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. - -` - if _, err := io.WriteString(buffer, header); err != nil { - panic(fmt.Errorf("failed to write header: %w", err)) - } + buffer.WriteString(generatedHeader + "\n") var allData []tplRuleData diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index c37b4d38..203b227e 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -305,6 +305,9 @@ func compareNatural(s1, s2 string) int { return strings.Compare(s1, s2) } +const generatedHeader = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +` + func writeGoDefs(path string, cl unify.Closure) error { // TODO: Merge operations with the same signature but multiple // implementations (e.g., SSE vs AVX) From 1e80165d14f3d8caf67c9e0cb801fa252ea63b98 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 8 Aug 2025 22:42:08 -0400 Subject: [PATCH 191/200] internal/simdgen: generate cpu.go feature checks API Change-Id: I205a88c9d643f4f76b5dade5e674ce0f413e6570 Reviewed-on: https://go-review.googlesource.com/c/arch/+/694861 Auto-Submit: Austin Clements LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Reviewed-by: David Chase --- internal/simdgen/gen_simdTypes.go | 56 +++++++++++++++++++++++++++++++ internal/simdgen/godefs.go | 1 + 2 files changed, 57 insertions(+) diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go index 57d48317..a367cce0 100644 --- a/internal/simdgen/gen_simdTypes.go +++ b/internal/simdgen/gen_simdTypes.go @@ -6,7 +6,9 @@ package main import ( "bytes" + "cmp" "fmt" + "maps" "slices" "sort" "strings" @@ -140,6 +142,29 @@ type {{.Name}} struct { {{end}} ` +const simdFeaturesTemplate = ` +import "internal/cpu" + +{{range .}} +{{- if eq .Feature "AVX512"}} +// Has{{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features. +// +// These five CPU features are bundled together, and no use of AVX-512 +// is allowed unless all of these features are supported together. +// Nearly every CPU that has shipped with any support for AVX-512 has +// supported all five of these features. +{{- else -}} +// Has{{.Feature}} returns whether the CPU supports the {{.Feature}} feature. +{{- end}} +// +// Has{{.Feature}} is defined on all GOARCHes, but will only return true on +// GOARCH {{.GoArch}}. +func Has{{.Feature}}() bool { + return cpu.X86.Has{{.Feature}} +} +{{end}} +` + const simdLoadStoreTemplate = ` // Len returns the number of elements in a {{.Name}} func (x {{.Name}}) Len() int { return {{.Lanes}} } @@ -521,6 +546,37 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { return buffer } +func writeSIMDFeatures(ops []Operation) *bytes.Buffer { + // Gather all features + type featureKey struct { + GoArch string + Feature string + } + featureSet := make(map[featureKey]struct{}) + for _, op := range ops { + featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{} + } + features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int { + if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 { + return c + } + return compareNatural(a.Feature, b.Feature) + }) + + // If we ever have the same feature name on more than one GOARCH, we'll have + // to be more careful about this. + t := templateOf(simdFeaturesTemplate, "features") + + buffer := new(bytes.Buffer) + buffer.WriteString(simdPackageHeader) + + if err := t.Execute(buffer, features); err != nil { + panic(fmt.Errorf("failed to execute features template: %w", err)) + } + + return buffer +} + // writeSIMDStubs generates the simd vector intrinsic stubs and writes it to ops_amd64.go and ops_internal_amd64.go // within the specified directory. func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 203b227e..7c65d0ad 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -366,6 +366,7 @@ func writeGoDefs(path string, cl unify.Closure) error { typeMap := parseSIMDTypes(deduped) formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go") + formatWriteAndClose(writeSIMDFeatures(deduped), path, "src/"+simdPackage+"/cpu.go") formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/ops_amd64.go") formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go") formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go") From 0bf34ca4f31739c2faf8b6c4a75d783f5f7cfa55 Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 11 Aug 2025 17:04:16 -0400 Subject: [PATCH 192/200] internal/simdgen: make sure that output is based on sorted data there was still some variation, this may not be "the best" order in all cases, but it is definitely better than no order, and we can tweak individual files as we decide it is suitable. this does not change the current generated files, but that turns out to be just luck. Change-Id: I38c6ac72f69b9d29c71de3250985cff8b7fcd677 Reviewed-on: https://go-review.googlesource.com/c/arch/+/695335 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_simdrules.go | 42 +++++++++++++++++++++++-------- internal/simdgen/godefs.go | 1 + 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index d1db2545..9a0bfd51 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -11,6 +11,17 @@ import ( "text/template" ) +type tplRuleData struct { + tplName string // e.g. "sftimm" + GoOp string // e.g. "ShiftAllLeft" + GoType string // e.g. "Uint32x8" + Args string // e.g. "x y" + Asm string // e.g. "VPSLLD256" + ArgsOut string // e.g. "x y" + MaskInConvert string // e.g. "VPMOVVec32x8ToM" + MaskOutConvert string // e.g. "VPMOVMToVec32x8" +} + var ( ruleTemplates = template.Must(template.New("simdRules").Parse(` {{define "pureVreg"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} {{.ArgsOut}}) @@ -28,19 +39,17 @@ var ( `)) ) -type tplRuleData struct { - tplName string - GoOp string - GoType string - Args string - Asm string - ArgsOut string - MaskInConvert string - MaskOutConvert string +// SSA rewrite rules need to appear in a most-to-least-specific order. This works for that. +var tmplOrder = map[string]int{ + "masksftimm": 0, + "sftimm": 1, + "maskInMaskOut": 2, + "maskOut": 3, + "maskIn": 4, + "pureVreg": 5, } func compareTplRuleData(x, y tplRuleData) int { - // TODO should MaskedXYZ compare just after XYZ? if c := compareNatural(x.GoOp, y.GoOp); c != 0 { return c } @@ -50,7 +59,18 @@ func compareTplRuleData(x, y tplRuleData) int { if c := compareNatural(x.Args, y.Args); c != 0 { return c } - return 0 + if x.tplName == y.tplName { + return 0 + } + xo, xok := tmplOrder[x.tplName] + yo, yok := tmplOrder[y.tplName] + if !xok { + panic(fmt.Errorf("Unexpected template name %s, please add to tmplOrder", x.tplName)) + } + if !yok { + panic(fmt.Errorf("Unexpected template name %s, please add to tmplOrder", y.tplName)) + } + return xo - yo } // writeSIMDRules generates the lowering and rewrite rules for ssa and writes it to simdAMD64.rules diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go index 7c65d0ad..3a830ead 100644 --- a/internal/simdgen/godefs.go +++ b/internal/simdgen/godefs.go @@ -330,6 +330,7 @@ func writeGoDefs(path string, cl unify.Closure) error { // The parsed XED data might contain duplicates, like // 512 bits VPADDP. deduped := dedup(ops) + slices.SortFunc(deduped, compareOperations) if *Verbose { log.Printf("dedup len: %d\n", len(ops)) From 0177facd94fd367ffbefb7fe104f951543883b4b Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 12 Aug 2025 16:59:31 -0400 Subject: [PATCH 193/200] internal/simdgen: fix generated rules for shifts the rewrite rules don't always apply in the friendliest order, be sure that they are defined so they work for all orders. this generates dev.simd CL 695475 Change-Id: I80784b1df90108fa97ea6156cdc9259fd2696868 Reviewed-on: https://go-review.googlesource.com/c/arch/+/695455 Reviewed-by: Junyang Shao LUCI-TryBot-Result: Go LUCI --- internal/simdgen/gen_simdrules.go | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go index 9a0bfd51..b0fc7e62 100644 --- a/internal/simdgen/gen_simdrules.go +++ b/internal/simdgen/gen_simdrules.go @@ -32,9 +32,9 @@ var ( {{end}} {{define "maskInMaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} mask))) {{end}} -{{define "sftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c])) => ({{.Asm}}const [uint8(c)] x) +{{define "sftimm"}}({{.Asm}} x (MOVQconst [c])) => ({{.Asm}}const [uint8(c)] x) {{end}} -{{define "masksftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c]) mask) => ({{.Asm}}const [uint8(c)] x ({{.MaskInConvert}} mask)) +{{define "masksftimm"}}({{.Asm}} x (MOVQconst [c]) mask) => ({{.Asm}}const [uint8(c)] x mask) {{end}} `)) ) @@ -176,22 +176,24 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { if gOp.SpecialLower != nil { if *gOp.SpecialLower == "sftimm" { - sftImmData := data - if tplName == "maskIn" { - sftImmData.tplName = "masksftimm" - } else { - sftImmData.tplName = "sftimm" + if data.GoType[0] == 'I' { + // only do these for signed types, it is a duplicate rewrite for unsigned + sftImmData := data + if tplName == "maskIn" { + sftImmData.tplName = "masksftimm" + } else { + sftImmData.tplName = "sftimm" + } + allData = append(allData, sftImmData) } - allData = append(allData, sftImmData) } else { panic("simdgen sees unknwon special lower " + *gOp.SpecialLower + ", maybe implement it?") } - } else { - // SpecialLower rules cannot use "...". - if tplName == "pureVreg" && data.Args == data.ArgsOut { - data.Args = "..." - data.ArgsOut = "..." - } + } + + if tplName == "pureVreg" && data.Args == data.ArgsOut { + data.Args = "..." + data.ArgsOut = "..." } data.tplName = tplName allData = append(allData, data) From faba133cd546b7e2eb39b29f0e38f4d65b873d13 Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 5 Aug 2025 16:30:53 -0400 Subject: [PATCH 194/200] internal/simdgen: add broadcast helper methods and SetElem for floats Generates dev.simd CL 693758 Change-Id: I97b34d453b09054dd1eef4b3f192c2946ff4875f Reviewed-on: https://go-review.googlesource.com/c/arch/+/693599 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/gen_utility.go | 3 + internal/simdgen/ops/Moves/categories.yaml | 15 +++ internal/simdgen/ops/Moves/go.yaml | 128 +++++++++++++++++++-- 3 files changed, 138 insertions(+), 8 deletions(-) diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go index 8a3e1735..20ce3c13 100644 --- a/internal/simdgen/gen_utility.go +++ b/internal/simdgen/gen_utility.go @@ -642,6 +642,9 @@ func overwrite(ops []Operation) error { } else if op[idx].OverwriteBase != nil { oBase := *op[idx].OverwriteBase *op[idx].Go = strings.ReplaceAll(*op[idx].Go, capitalizeFirst(*op[idx].Base), capitalizeFirst(oBase)) + if op[idx].Class == "greg" { + *op[idx].Go = strings.ReplaceAll(*op[idx].Go, *op[idx].Base, oBase) + } *op[idx].Base = oBase } return nil diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml index cd9260ab..ef8e0360 100644 --- a/internal/simdgen/ops/Moves/categories.yaml +++ b/internal/simdgen/ops/Moves/categories.yaml @@ -55,3 +55,18 @@ documentation: !string |- // NAME performs an expansion on a vector x whose elements are packed to lower parts. // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +- go: Broadcast128 + commutative: false + documentation: !string |- + // NAME copies element zero of its (128-bit) input to all elements of + // the 128-bit output vector. +- go: Broadcast256 + commutative: false + documentation: !string |- + // NAME copies element zero of its (128-bit) input to all elements of + // the 256-bit output vector. +- go: Broadcast512 + commutative: false + documentation: !string |- + // NAME copies element zero of its (128-bit) input to all elements of + // the 512-bit output vector. diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml index 50e2869e..71981c12 100644 --- a/internal/simdgen/ops/Moves/go.yaml +++ b/internal/simdgen/ops/Moves/go.yaml @@ -15,6 +15,24 @@ out: - *t +- go: SetElem + asm: "VPINSR[DQ]" + in: + - &t + class: vreg + base: int + OverwriteBase: float + - class: greg + base: int + OverwriteBase: float + lanes: 1 # Scalar, darn it! + - &imm + class: immediate + immOffset: 0 + name: index + out: + - *t + - go: GetElem asm: "VPEXTR[BWDQ]" in: @@ -195,10 +213,10 @@ - go: Permute2 asm: "VPERMI2[BWDQ]|VPERMI2P[SD]" - # Because we are overwriting the receiver's type, we + # Because we are overwriting the receiver's type, we # have to move the receiver to be a parameter so that # we can have no duplication. - operandOrder: "231Type1" + operandOrder: "231Type1" in: - *anyindices # result in arg 0 - *any @@ -218,16 +236,16 @@ # For now a non-public method because # (1) [OverwriteClass] must be set together with [OverwriteBase] # (2) "simdgen does not support [OverwriteClass] in inputs". -# That means the signature is wrong. +# That means the signature is wrong. - go: blend asm: VPBLENDVB - in: + in: - &v go: $t class: vreg base: int - *v - - + - class: vreg base: int name: mask @@ -237,15 +255,15 @@ # For AVX512 - go: blend asm: VPBLENDM[BWDQ] - in: + in: - &v go: $t bits: 512 class: vreg base: int - *v - inVariant: - - + inVariant: + - class: mask out: - *v @@ -258,3 +276,97 @@ - *any out: - *any + +- go: Broadcast128 + asm: VPBROADCAST[BWDQ] + in: + - class: vreg + bits: 128 + elemBits: $e + base: $b + out: + - class: vreg + bits: 128 + elemBits: $e + base: $b + +# weirdly, this one case on AVX2 is memory-operand-only +- go: Broadcast128 + asm: VPBROADCASTQ + in: + - class: vreg + bits: 128 + elemBits: 64 + base: int + OverwriteBase: float + out: + - class: vreg + bits: 128 + elemBits: 64 + base: int + OverwriteBase: float + +- go: Broadcast256 + asm: VPBROADCAST[BWDQ] + in: + - class: vreg + bits: 128 + elemBits: $e + base: $b + out: + - class: vreg + bits: 256 + elemBits: $e + base: $b + +- go: Broadcast512 + asm: VPBROADCAST[BWDQ] + in: + - class: vreg + bits: 128 + elemBits: $e + base: $b + out: + - class: vreg + bits: 512 + elemBits: $e + base: $b + +- go: Broadcast128 + asm: VBROADCASTS[SD] + in: + - class: vreg + bits: 128 + elemBits: $e + base: $b + out: + - class: vreg + bits: 128 + elemBits: $e + base: $b + +- go: Broadcast256 + asm: VBROADCASTS[SD] + in: + - class: vreg + bits: 128 + elemBits: $e + base: $b + out: + - class: vreg + bits: 256 + elemBits: $e + base: $b + +- go: Broadcast512 + asm: VBROADCASTS[SD] + in: + - class: vreg + bits: 128 + elemBits: $e + base: $b + out: + - class: vreg + bits: 512 + elemBits: $e + base: $b From d90dca2d02c32d001dc2abba41e7ce3fb86c7992 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 1 Aug 2025 09:26:12 -0400 Subject: [PATCH 195/200] internal/simdgen: remove non-masked 128/256-bit AVX512 comparisons If we intend to emulate these on AVX2, and also give them the "good names", then we can't use those same names for AVX512 versions of the comparisons. Generates dev.simd CL 692335 Change-Id: I4e814b4de42cea38fb6e81c293a21cc56f45c13e Reviewed-on: https://go-review.googlesource.com/c/arch/+/692355 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- internal/simdgen/main.go | 2 +- internal/simdgen/ops/Compares/go.yaml | 46 +++++++++++++++++++++++++-- internal/unify/domain.go | 35 +++++++++++++++++++- internal/unify/env.go | 5 +++ internal/unify/value.go | 7 ++++ 5 files changed, 91 insertions(+), 4 deletions(-) diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go index 69eb85f9..a7f0b0de 100644 --- a/internal/simdgen/main.go +++ b/internal/simdgen/main.go @@ -250,7 +250,7 @@ func validate(cl unify.Closure, required map[*unify.Value]struct{}) { } if !def.Exact() { - fmt.Fprintf(os.Stderr, "%s: def not reduced to an exact value:\n", def.PosString()) + fmt.Fprintf(os.Stderr, "%s: def not reduced to an exact value, why is %s:\n", def.PosString(), def.WhyNotExact()) fmt.Fprintf(os.Stderr, "\t%s\n", strings.ReplaceAll(def.String(), "\n", "\n\t")) } diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml index a8e2368f..0f916283 100644 --- a/internal/simdgen/ops/Compares/go.yaml +++ b/internal/simdgen/ops/Compares/go.yaml @@ -36,6 +36,8 @@ overwriteElementBits: 64 overwriteClass: mask overwriteBase: int + +# TODO these are redundant with VPCMP operations. # AVX-512 compares produce masks. - go: Equal asm: "V?PCMPEQ[BWDQ]" @@ -51,21 +53,61 @@ - *int out: - class: mask -# The const imm predicated compares after AVX512, please see categories.yaml -# for const imm specification. + +# MASKED signed comparisons for X/Y registers +# unmasked would clash with emulations on AVX2 - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) asm: "VPCMP[BWDQ]" in: + - &int + bits: (128|256) + go: $t + base: int - *int + - class: immediate + const: 0 # Just a placeholder, will be overwritten by const imm porting. + inVariant: + - class: mask + out: + - class: mask + +# MASKED unsigned comparisons for X/Y registers +# unmasked would clash with emulations on AVX2 +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) + asm: "VPCMPU[BWDQ]" + in: + - &uint + bits: (128|256) + go: $t + base: uint + - *uint + - class: immediate + const: 0 + inVariant: + - class: mask + out: + - class: mask + +# masked/unmasked signed comparisons for Z registers +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) + asm: "VPCMP[BWDQ]" + in: + - &int + bits: 512 + go: $t + base: int - *int - class: immediate const: 0 # Just a placeholder, will be overwritten by const imm porting. out: - class: mask + +# masked/unmasked unsigned comparisons for Z registers - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) asm: "VPCMPU[BWDQ]" in: - &uint + bits: 512 go: $t base: uint - *uint diff --git a/internal/unify/domain.go b/internal/unify/domain.go index 5c4d349f..1e0f2be6 100644 --- a/internal/unify/domain.go +++ b/internal/unify/domain.go @@ -39,6 +39,7 @@ import ( // - [Var] - A value captured in the environment. type Domain interface { Exact() bool + WhyNotExact() string // decode stores this value in a Go value. If this value is not exact, this // returns a potentially wrapped *inexactError. @@ -77,7 +78,8 @@ func (e *decodeError) Error() string { // Top represents all possible values of all possible types. type Top struct{} -func (t Top) Exact() bool { return false } +func (t Top) Exact() bool { return false } +func (t Top) WhyNotExact() string { return "is top" } func (t Top) decode(rv reflect.Value) error { // We can decode Top into a pointer-typed value as nil. @@ -125,6 +127,17 @@ func (d Def) Exact() bool { return true } +// WhyNotExact returns why the value is not exact +func (d Def) WhyNotExact() string { + for s, v := range d.fields { + if !v.Exact() { + w := v.WhyNotExact() + return "field " + s + ": " + w + } + } + return "" +} + func (d Def) decode(rv reflect.Value) error { if rv.Kind() != reflect.Struct { return fmt.Errorf("cannot decode Def into %s", rv.Type()) @@ -219,6 +232,19 @@ func (d Tuple) Exact() bool { return true } +func (d Tuple) WhyNotExact() string { + if d.repeat != nil { + return "d.repeat is not nil" + } + for i, v := range d.vs { + if !v.Exact() { + w := v.WhyNotExact() + return "index " + strconv.FormatInt(int64(i), 10) + ": " + w + } + } + return "" +} + func (d Tuple) decode(rv reflect.Value) error { if d.repeat != nil { return &inexactError{"repeated tuple", rv.Type().String()} @@ -300,6 +326,13 @@ func (d String) Exact() bool { return d.kind == stringExact } +func (d String) WhyNotExact() string { + if d.kind == stringExact { + return "" + } + return "string is not exact" +} + func (d String) decode(rv reflect.Value) error { if d.kind != stringExact { return &inexactError{"regex", rv.Type().String()} diff --git a/internal/unify/env.go b/internal/unify/env.go index 1200eb36..3331ff79 100644 --- a/internal/unify/env.go +++ b/internal/unify/env.go @@ -384,6 +384,11 @@ func (d Var) Exact() bool { panic("Exact called on non-concrete Value") } +func (d Var) WhyNotExact() string { + // These can't appear in concrete Values. + return "WhyNotExact called on non-concrete Value" +} + func (d Var) decode(rv reflect.Value) error { return &inexactError{"var", rv.Type().String()} } diff --git a/internal/unify/value.go b/internal/unify/value.go index 87387bbf..ffc25b87 100644 --- a/internal/unify/value.go +++ b/internal/unify/value.go @@ -69,6 +69,13 @@ func (v *Value) PosString() string { return string(b) } +func (v *Value) WhyNotExact() string { + if v.Domain == nil { + return "v.Domain is nil" + } + return v.Domain.WhyNotExact() +} + func (v *Value) Exact() bool { if v.Domain == nil { return false From 6ad8cbc456cf7deb7d97d2ec7d914a1a7fe19225 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 13 Aug 2025 15:21:58 -0400 Subject: [PATCH 196/200] internal/simdgen: add missing copyright header Change-Id: I69912c7c9be7ccf5b22c01db0c3bec46fa478127 Reviewed-on: https://go-review.googlesource.com/c/arch/+/695619 LUCI-TryBot-Result: Go LUCI Auto-Submit: Austin Clements Reviewed-by: Junyang Shao --- internal/simdgen/sort_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/simdgen/sort_test.go b/internal/simdgen/sort_test.go index 43a9fd64..399acf03 100644 --- a/internal/simdgen/sort_test.go +++ b/internal/simdgen/sort_test.go @@ -1,3 +1,7 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package main import "testing" From fb55ef737e0789cb87b0a66b25916fd051da93db Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 13 Aug 2025 15:13:38 -0400 Subject: [PATCH 197/200] internal/{simdgen,unify}: delete, move to main repo This deletes internal/simdgen and its supporting unify package. They have been moved to the dev.simd branch of the main Go repo as of CL 695975. Change-Id: I6247c7f97dd869b5f6934d1bc72f5b20f5f1705e Reviewed-on: https://go-review.googlesource.com/c/arch/+/695796 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- go.mod | 5 +- go.sum | 4 - internal/simdgen/.gitignore | 3 - internal/simdgen/asm.yaml.toy | 107 --- internal/simdgen/categories.yaml | 1 - internal/simdgen/etetest.sh | 33 - internal/simdgen/gen_simdGenericOps.go | 70 -- internal/simdgen/gen_simdIntrinsics.go | 151 ---- internal/simdgen/gen_simdMachineOps.go | 122 --- internal/simdgen/gen_simdTypes.go | 631 -------------- internal/simdgen/gen_simdrules.go | 211 ----- internal/simdgen/gen_simdssa.go | 173 ---- internal/simdgen/gen_utility.go | 729 ---------------- internal/simdgen/go.yaml | 1 - internal/simdgen/godefs.go | 379 --------- internal/simdgen/main.go | 280 ------- internal/simdgen/ops/AddSub/categories.yaml | 37 - internal/simdgen/ops/AddSub/go.yaml | 77 -- .../simdgen/ops/BitwiseLogic/categories.yaml | 20 - internal/simdgen/ops/BitwiseLogic/go.yaml | 128 --- internal/simdgen/ops/Compares/categories.yaml | 43 - internal/simdgen/ops/Compares/go.yaml | 141 ---- internal/simdgen/ops/Converts/categories.yaml | 10 - internal/simdgen/ops/Converts/go.yaml | 21 - .../simdgen/ops/FPonlyArith/categories.yaml | 85 -- internal/simdgen/ops/FPonlyArith/go.yaml | 62 -- .../simdgen/ops/GaloisField/categories.yaml | 21 - internal/simdgen/ops/GaloisField/go.yaml | 32 - .../simdgen/ops/IntOnlyArith/categories.yaml | 21 - internal/simdgen/ops/IntOnlyArith/go.yaml | 45 - internal/simdgen/ops/MLOps/categories.yaml | 47 -- internal/simdgen/ops/MLOps/go.yaml | 113 --- internal/simdgen/ops/MinMax/categories.yaml | 9 - internal/simdgen/ops/MinMax/go.yaml | 42 - internal/simdgen/ops/Moves/categories.yaml | 72 -- internal/simdgen/ops/Moves/go.yaml | 372 --------- internal/simdgen/ops/Mul/categories.yaml | 14 - internal/simdgen/ops/Mul/go.yaml | 73 -- .../simdgen/ops/ShiftRotate/categories.yaml | 103 --- internal/simdgen/ops/ShiftRotate/go.yaml | 172 ---- internal/simdgen/pprint.go | 73 -- internal/simdgen/sort_test.go | 41 - internal/simdgen/types.yaml | 90 -- internal/simdgen/xed.go | 780 ------------------ internal/unify/closure.go | 154 ---- internal/unify/domain.go | 359 -------- internal/unify/dot.go | 221 ----- internal/unify/env.go | 480 ----------- internal/unify/html.go | 123 --- internal/unify/pos.go | 33 - internal/unify/testdata/stress.yaml | 33 - internal/unify/testdata/unify.yaml | 174 ---- internal/unify/testdata/vars.yaml | 175 ---- internal/unify/trace.go | 168 ---- internal/unify/unify.go | 322 -------- internal/unify/unify_test.go | 154 ---- internal/unify/value.go | 167 ---- internal/unify/value_test.go | 50 -- internal/unify/yaml.go | 619 -------------- internal/unify/yaml_test.go | 202 ----- 60 files changed, 1 insertion(+), 9077 deletions(-) delete mode 100644 internal/simdgen/.gitignore delete mode 100644 internal/simdgen/asm.yaml.toy delete mode 100644 internal/simdgen/categories.yaml delete mode 100755 internal/simdgen/etetest.sh delete mode 100644 internal/simdgen/gen_simdGenericOps.go delete mode 100644 internal/simdgen/gen_simdIntrinsics.go delete mode 100644 internal/simdgen/gen_simdMachineOps.go delete mode 100644 internal/simdgen/gen_simdTypes.go delete mode 100644 internal/simdgen/gen_simdrules.go delete mode 100644 internal/simdgen/gen_simdssa.go delete mode 100644 internal/simdgen/gen_utility.go delete mode 100644 internal/simdgen/go.yaml delete mode 100644 internal/simdgen/godefs.go delete mode 100644 internal/simdgen/main.go delete mode 100644 internal/simdgen/ops/AddSub/categories.yaml delete mode 100644 internal/simdgen/ops/AddSub/go.yaml delete mode 100644 internal/simdgen/ops/BitwiseLogic/categories.yaml delete mode 100644 internal/simdgen/ops/BitwiseLogic/go.yaml delete mode 100644 internal/simdgen/ops/Compares/categories.yaml delete mode 100644 internal/simdgen/ops/Compares/go.yaml delete mode 100644 internal/simdgen/ops/Converts/categories.yaml delete mode 100644 internal/simdgen/ops/Converts/go.yaml delete mode 100644 internal/simdgen/ops/FPonlyArith/categories.yaml delete mode 100644 internal/simdgen/ops/FPonlyArith/go.yaml delete mode 100644 internal/simdgen/ops/GaloisField/categories.yaml delete mode 100644 internal/simdgen/ops/GaloisField/go.yaml delete mode 100644 internal/simdgen/ops/IntOnlyArith/categories.yaml delete mode 100644 internal/simdgen/ops/IntOnlyArith/go.yaml delete mode 100644 internal/simdgen/ops/MLOps/categories.yaml delete mode 100644 internal/simdgen/ops/MLOps/go.yaml delete mode 100644 internal/simdgen/ops/MinMax/categories.yaml delete mode 100644 internal/simdgen/ops/MinMax/go.yaml delete mode 100644 internal/simdgen/ops/Moves/categories.yaml delete mode 100644 internal/simdgen/ops/Moves/go.yaml delete mode 100644 internal/simdgen/ops/Mul/categories.yaml delete mode 100644 internal/simdgen/ops/Mul/go.yaml delete mode 100644 internal/simdgen/ops/ShiftRotate/categories.yaml delete mode 100644 internal/simdgen/ops/ShiftRotate/go.yaml delete mode 100644 internal/simdgen/pprint.go delete mode 100644 internal/simdgen/sort_test.go delete mode 100644 internal/simdgen/types.yaml delete mode 100644 internal/simdgen/xed.go delete mode 100644 internal/unify/closure.go delete mode 100644 internal/unify/domain.go delete mode 100644 internal/unify/dot.go delete mode 100644 internal/unify/env.go delete mode 100644 internal/unify/html.go delete mode 100644 internal/unify/pos.go delete mode 100644 internal/unify/testdata/stress.yaml delete mode 100644 internal/unify/testdata/unify.yaml delete mode 100644 internal/unify/testdata/vars.yaml delete mode 100644 internal/unify/trace.go delete mode 100644 internal/unify/unify.go delete mode 100644 internal/unify/unify_test.go delete mode 100644 internal/unify/value.go delete mode 100644 internal/unify/value_test.go delete mode 100644 internal/unify/yaml.go delete mode 100644 internal/unify/yaml_test.go diff --git a/go.mod b/go.mod index bcca36b6..b72ba1a5 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,4 @@ module golang.org/x/arch go 1.23.0 -require ( - gopkg.in/yaml.v3 v3.0.1 - rsc.io/pdf v0.1.1 -) +require rsc.io/pdf v0.1.1 diff --git a/go.sum b/go.sum index cf7dae80..e854d25c 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,2 @@ -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= rsc.io/pdf v0.1.1 h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/internal/simdgen/.gitignore b/internal/simdgen/.gitignore deleted file mode 100644 index de579f6b..00000000 --- a/internal/simdgen/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -testdata/* -.gemini/* -.gemini* diff --git a/internal/simdgen/asm.yaml.toy b/internal/simdgen/asm.yaml.toy deleted file mode 100644 index 7885c776..00000000 --- a/internal/simdgen/asm.yaml.toy +++ /dev/null @@ -1,107 +0,0 @@ -# Hand-written toy input like -xedPath would generate. -# This input can be substituted for -xedPath. -!sum -- asm: ADDPS - goarch: amd64 - feature: "SSE2" - in: - - asmPos: 0 - class: vreg - base: float - elemBits: 32 - bits: 128 - - asmPos: 1 - class: vreg - base: float - elemBits: 32 - bits: 128 - out: - - asmPos: 0 - class: vreg - base: float - elemBits: 32 - bits: 128 - -- asm: ADDPD - goarch: amd64 - feature: "SSE2" - in: - - asmPos: 0 - class: vreg - base: float - elemBits: 64 - bits: 128 - - asmPos: 1 - class: vreg - base: float - elemBits: 64 - bits: 128 - out: - - asmPos: 0 - class: vreg - base: float - elemBits: 64 - bits: 128 - -- asm: PADDB - goarch: amd64 - feature: "SSE2" - in: - - asmPos: 0 - class: vreg - base: int|uint - elemBits: 32 - bits: 128 - - asmPos: 1 - class: vreg - base: int|uint - elemBits: 32 - bits: 128 - out: - - asmPos: 0 - class: vreg - base: int|uint - elemBits: 32 - bits: 128 - -- asm: VPADDB - goarch: amd64 - feature: "AVX" - in: - - asmPos: 1 - class: vreg - base: int|uint - elemBits: 8 - bits: 128 - - asmPos: 2 - class: vreg - base: int|uint - elemBits: 8 - bits: 128 - out: - - asmPos: 0 - class: vreg - base: int|uint - elemBits: 8 - bits: 128 - -- asm: VPADDB - goarch: amd64 - feature: "AVX2" - in: - - asmPos: 1 - class: vreg - base: int|uint - elemBits: 8 - bits: 256 - - asmPos: 2 - class: vreg - base: int|uint - elemBits: 8 - bits: 256 - out: - - asmPos: 0 - class: vreg - base: int|uint - elemBits: 8 - bits: 256 diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml deleted file mode 100644 index ed4c9645..00000000 --- a/internal/simdgen/categories.yaml +++ /dev/null @@ -1 +0,0 @@ -!import ops/*/categories.yaml diff --git a/internal/simdgen/etetest.sh b/internal/simdgen/etetest.sh deleted file mode 100755 index 7b5001ec..00000000 --- a/internal/simdgen/etetest.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -x - -cat <<\\EOF - -This is an end-to-end test of Go SIMD. It checks out a fresh Go -repository from the go.simd branch, then generates the SIMD input -files and runs simdgen writing into the fresh repository. - -After that it generates the modified ssa pattern matching files, then -builds the compiler. - -\EOF - -rm -rf go-test -git clone https://go.googlesource.com/go -b dev.simd go-test -go run . -xedPath xeddata -o godefs -goroot ./go-test go.yaml types.yaml categories.yaml -(cd go-test/src/cmd/compile/internal/ssa/_gen ; go run *.go ) -(cd go-test/src ; GOEXPERIMENT=simd ./make.bash ) -(cd go-test/bin; b=`pwd` ; cd ../src/simd/testdata; GOARCH=amd64 $b/go run .) -(cd go-test/bin; b=`pwd` ; cd ../src ; -GOEXPERIMENT=simd GOARCH=amd64 $b/go test -v simd -GOEXPERIMENT=simd $b/go test go/doc -GOEXPERIMENT=simd $b/go test go/build -GOEXPERIMENT=simd $b/go test cmd/api -v -check -$b/go test go/doc -$b/go test go/build -$b/go test cmd/api -v -check - -$b/go test cmd/compile/internal/ssagen -simd=0 -GOEXPERIMENT=simd $b/go test cmd/compile/internal/ssagen -simd=0 -) - -# next, add some tests of SIMD itself diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go deleted file mode 100644 index 3dbbeb09..00000000 --- a/internal/simdgen/gen_simdGenericOps.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "bytes" - "fmt" - "sort" -) - -const simdGenericOpsTmpl = ` -package main - -func simdGenericOps() []opData { - return []opData{ -{{- range .Ops }} - {name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}}, -{{- end }} -{{- range .OpsImm }} - {name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}, aux: "UInt8"}, -{{- end }} - } -} -` - -// writeSIMDGenericOps generates the generic ops and writes it to simdAMD64ops.go -// within the specified directory. -func writeSIMDGenericOps(ops []Operation) *bytes.Buffer { - t := templateOf(simdGenericOpsTmpl, "simdgenericOps") - buffer := new(bytes.Buffer) - buffer.WriteString(generatedHeader) - - type genericOpsData struct { - OpName string - OpInLen int - Comm bool - } - type opData struct { - Ops []genericOpsData - OpsImm []genericOpsData - } - var opsData opData - for _, op := range ops { - if op.NoGenericOps != nil && *op.NoGenericOps == "true" { - continue - } - _, _, _, immType, gOp := op.shape() - gOpData := genericOpsData{gOp.GenericName(), len(gOp.In), op.Commutative} - if immType == VarImm || immType == ConstVarImm { - opsData.OpsImm = append(opsData.OpsImm, gOpData) - } else { - opsData.Ops = append(opsData.Ops, gOpData) - } - } - sort.Slice(opsData.Ops, func(i, j int) bool { - return compareNatural(opsData.Ops[i].OpName, opsData.Ops[j].OpName) < 0 - }) - sort.Slice(opsData.OpsImm, func(i, j int) bool { - return compareNatural(opsData.OpsImm[i].OpName, opsData.OpsImm[j].OpName) < 0 - }) - - err := t.Execute(buffer, opsData) - if err != nil { - panic(fmt.Errorf("failed to execute template: %w", err)) - } - - return buffer -} diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go deleted file mode 100644 index 6a1501e1..00000000 --- a/internal/simdgen/gen_simdIntrinsics.go +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "bytes" - "fmt" - "slices" -) - -const simdIntrinsicsTmpl = ` -{{define "header"}} -package ssagen - -import ( - "cmd/compile/internal/ir" - "cmd/compile/internal/ssa" - "cmd/compile/internal/types" - "cmd/internal/sys" -) - -const simdPackage = "` + simdPackage + `" - -func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { -{{end}} - -{{define "op1"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op2"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op2_21"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op2_21Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op3"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op3_21"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op3_21Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op3_231Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op3_31"}} addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen3_31(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op4"}} addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op4_231Type1"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op4_31"}} addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen4_31(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64) -{{end}} -{{define "op1Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) -{{end}} -{{define "op2Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) -{{end}} -{{define "op2Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) -{{end}} -{{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) -{{end}} -{{define "op3Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) -{{end}} -{{define "op4Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64) -{{end}} - -{{define "vectorConversion"}} addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) -{{end}} - -{{define "loadStore"}} addF(simdPackage, "Load{{.Name}}", simdLoad(), sys.AMD64) - addF(simdPackage, "{{.Name}}.Store", simdStore(), sys.AMD64) -{{end}} - -{{define "maskedLoadStore"}} addF(simdPackage, "LoadMasked{{.Name}}", simdMaskedLoad(ssa.OpLoadMasked{{.ElemBits}}), sys.AMD64) - addF(simdPackage, "{{.Name}}.StoreMasked", simdMaskedStore(ssa.OpStoreMasked{{.ElemBits}}), sys.AMD64) -{{end}} - -{{define "mask"}} addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) - addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) - addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64) - addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) - addF(simdPackage, "{{.Name}}.StoreToBits", simdStoreMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) - addF(simdPackage, "{{.Name}}FromBits", simdCvtVToMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64) - addF(simdPackage, "{{.Name}}.ToBits", simdCvtMaskToV({{.ElemBits}}, {{.Lanes}}), sys.AMD64) -{{end}} - -{{define "footer"}}} -{{end}} -` - -// writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go -// within the specified directory. -func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { - t := templateOf(simdIntrinsicsTmpl, "simdintrinsics") - buffer := new(bytes.Buffer) - buffer.WriteString(generatedHeader) - - if err := t.ExecuteTemplate(buffer, "header", nil); err != nil { - panic(fmt.Errorf("failed to execute header template: %w", err)) - } - - slices.SortFunc(ops, compareOperations) - - for _, op := range ops { - if op.NoTypes != nil && *op.NoTypes == "true" { - continue - } - if s, op, err := classifyOp(op); err == nil { - if err := t.ExecuteTemplate(buffer, s, op); err != nil { - panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err)) - } - - } else { - panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err)) - } - } - - for _, conv := range vConvertFromTypeMap(typeMap) { - if err := t.ExecuteTemplate(buffer, "vectorConversion", conv); err != nil { - panic(fmt.Errorf("failed to execute vectorConversion template: %w", err)) - } - } - - for _, typ := range typesFromTypeMap(typeMap) { - if typ.Type != "mask" { - if err := t.ExecuteTemplate(buffer, "loadStore", typ); err != nil { - panic(fmt.Errorf("failed to execute loadStore template: %w", err)) - } - } - } - - for _, typ := range typesFromTypeMap(typeMap) { - if typ.MaskedLoadStoreFilter() { - if err := t.ExecuteTemplate(buffer, "maskedLoadStore", typ); err != nil { - panic(fmt.Errorf("failed to execute maskedLoadStore template: %w", err)) - } - } - } - - for _, mask := range masksFromTypeMap(typeMap) { - if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil { - panic(fmt.Errorf("failed to execute mask template: %w", err)) - } - } - - if err := t.ExecuteTemplate(buffer, "footer", nil); err != nil { - panic(fmt.Errorf("failed to execute footer template: %w", err)) - } - - return buffer -} diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go deleted file mode 100644 index 64918e55..00000000 --- a/internal/simdgen/gen_simdMachineOps.go +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "bytes" - "fmt" - "sort" - "strings" -) - -const simdMachineOpsTmpl = ` -package main - -func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw regInfo) []opData { - return []opData{ -{{- range .OpsData }} - {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, -{{- end }} -{{- range .OpsDataImm }} - {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, -{{- end }} - } -} -` - -// writeSIMDMachineOps generates the machine ops and writes it to simdAMD64ops.go -// within the specified directory. -func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { - t := templateOf(simdMachineOpsTmpl, "simdAMD64Ops") - buffer := new(bytes.Buffer) - buffer.WriteString(generatedHeader) - - type opData struct { - OpName string - Asm string - OpInLen int - RegInfo string - Comm bool - Type string - ResultInArg0 bool - } - type machineOpsData struct { - OpsData []opData - OpsDataImm []opData - } - seen := map[string]struct{}{} - regInfoSet := map[string]bool{ - "v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true, - "w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true} - opsData := make([]opData, 0) - opsDataImm := make([]opData, 0) - for _, op := range ops { - shapeIn, shapeOut, maskType, _, gOp := op.shape() - asm := machineOpName(maskType, gOp) - - // TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy - // one here with a name suffix "Merging". The rewrite rules will need them. - if _, ok := seen[asm]; ok { - continue - } - seen[asm] = struct{}{} - regInfo, err := op.regShape() - if err != nil { - panic(err) - } - idx, err := checkVecAsScalar(op) - if err != nil { - panic(err) - } - if idx != -1 { - if regInfo == "v21" { - regInfo = "vfpv" - } else if regInfo == "v2kv" { - regInfo = "vfpkv" - } else { - panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regInfo, op)) - } - } - // Makes AVX512 operations use upper registers - if strings.Contains(op.CPUFeature, "AVX512") { - regInfo = strings.ReplaceAll(regInfo, "v", "w") - } - if _, ok := regInfoSet[regInfo]; !ok { - panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s. Op is %s", regInfo, op)) - } - var outType string - if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil { - // If class overwrite is happening, that's not really a mask but a vreg. - outType = fmt.Sprintf("Vec%d", *gOp.Out[0].Bits) - } else if shapeOut == OneGregOut { - outType = gOp.GoType() // this is a straight Go type, not a VecNNN type - } else if shapeOut == OneKmaskOut { - outType = "Mask" - } else { - panic(fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut)) - } - resultInArg0 := false - if shapeOut == OneVregOutAtIn { - resultInArg0 = true - } - if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { - opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) - } else { - opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) - } - } - sort.Slice(opsData, func(i, j int) bool { - return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0 - }) - sort.Slice(opsDataImm, func(i, j int) bool { - return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0 - }) - err := t.Execute(buffer, machineOpsData{opsData, opsDataImm}) - if err != nil { - panic(fmt.Errorf("failed to execute template: %w", err)) - } - - return buffer -} diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go deleted file mode 100644 index a367cce0..00000000 --- a/internal/simdgen/gen_simdTypes.go +++ /dev/null @@ -1,631 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "bytes" - "cmp" - "fmt" - "maps" - "slices" - "sort" - "strings" -) - -type simdType struct { - Name string // The go type name of this simd type, for example Int32x4. - Lanes int // The number of elements in this vector/mask. - Base string // The element's type, like for Int32x4 it will be int32. - Fields string // The struct fields, it should be right formatted. - Type string // Either "mask" or "vreg" - VectorCounterpart string // For mask use only: just replacing the "Mask" in [simdType.Name] with "Int" - ReshapedVectorWithAndOr string // For mask use only: vector AND and OR are only available in some shape with element width 32. - Size int // The size of the vector type -} - -func (x simdType) ElemBits() int { - return x.Size / x.Lanes -} - -// LanesContainer returns the smallest int/uint bit size that is -// large enough to hold one bit for each lane. E.g., Mask32x4 -// is 4 lanes, and a uint8 is the smallest uint that has 4 bits. -func (x simdType) LanesContainer() int { - if x.Lanes > 64 { - panic("too many lanes") - } - if x.Lanes > 32 { - return 64 - } - if x.Lanes > 16 { - return 32 - } - if x.Lanes > 8 { - return 16 - } - return 8 -} - -// MaskedLoadStoreFilter encodes which simd type type currently -// get masked loads/stores generated, it is used in two places, -// this forces coordination. -func (x simdType) MaskedLoadStoreFilter() bool { - return x.Size == 512 || x.ElemBits() >= 32 && x.Type != "mask" -} - -func (x simdType) IntelSizeSuffix() string { - switch x.ElemBits() { - case 8: - return "B" - case 16: - return "W" - case 32: - return "D" - case 64: - return "Q" - } - panic("oops") -} - -func (x simdType) MaskedLoadDoc() string { - if x.Size == 512 || x.ElemBits() < 32 { - return fmt.Sprintf("// Asm: VMOVDQU%d.Z, CPU Feature: AVX512", x.ElemBits()) - } else { - return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix()) - } -} - -func (x simdType) MaskedStoreDoc() string { - if x.Size == 512 || x.ElemBits() < 32 { - return fmt.Sprintf("// Asm: VMOVDQU%d, CPU Feature: AVX512", x.ElemBits()) - } else { - return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix()) - } -} - -func compareSimdTypes(x, y simdType) int { - // "vreg" then "mask" - if c := -compareNatural(x.Type, y.Type); c != 0 { - return c - } - // want "flo" < "int" < "uin" (and then 8 < 16 < 32 < 64), - // not "int16" < "int32" < "int64" < "int8") - // so limit comparison to first 3 bytes in string. - if c := compareNatural(x.Base[:3], y.Base[:3]); c != 0 { - return c - } - // base type size, 8 < 16 < 32 < 64 - if c := x.ElemBits() - y.ElemBits(); c != 0 { - return c - } - // vector size last - return x.Size - y.Size -} - -type simdTypeMap map[int][]simdType - -type simdTypePair struct { - Tsrc simdType - Tdst simdType -} - -func compareSimdTypePairs(x, y simdTypePair) int { - c := compareSimdTypes(x.Tsrc, y.Tsrc) - if c != 0 { - return c - } - return compareSimdTypes(x.Tdst, y.Tdst) -} - -const simdPackageHeader = generatedHeader + ` -//go:build goexperiment.simd - -package simd -` - -const simdTypesTemplates = ` -{{define "sizeTmpl"}} -// v{{.}} is a tag type that tells the compiler that this is really {{.}}-bit SIMD -type v{{.}} struct { - _{{.}} struct{} -} -{{end}} - -{{define "typeTmpl"}} -// {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}} -type {{.Name}} struct { -{{.Fields}} -} - -{{end}} -` - -const simdFeaturesTemplate = ` -import "internal/cpu" - -{{range .}} -{{- if eq .Feature "AVX512"}} -// Has{{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features. -// -// These five CPU features are bundled together, and no use of AVX-512 -// is allowed unless all of these features are supported together. -// Nearly every CPU that has shipped with any support for AVX-512 has -// supported all five of these features. -{{- else -}} -// Has{{.Feature}} returns whether the CPU supports the {{.Feature}} feature. -{{- end}} -// -// Has{{.Feature}} is defined on all GOARCHes, but will only return true on -// GOARCH {{.GoArch}}. -func Has{{.Feature}}() bool { - return cpu.X86.Has{{.Feature}} -} -{{end}} -` - -const simdLoadStoreTemplate = ` -// Len returns the number of elements in a {{.Name}} -func (x {{.Name}}) Len() int { return {{.Lanes}} } - -// Load{{.Name}} loads a {{.Name}} from an array -// -//go:noescape -func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}} - -// Store stores a {{.Name}} to an array -// -//go:noescape -func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}}) -` - -const simdMaskFromBitsTemplate = ` -// Load{{.Name}}FromBits constructs a {{.Name}} from a bitmap, where 1 means set for the indexed element, 0 means unset. -// Only the lower {{.Lanes}} bits of y are used. -// -// CPU Features: AVX512 -//go:noescape -func Load{{.Name}}FromBits(y *uint64) {{.Name}} - -// StoreToBits stores a {{.Name}} as a bitmap, where 1 means set for the indexed element, 0 means unset. -// Only the lower {{.Lanes}} bits of y are used. -// -// CPU Features: AVX512 -//go:noescape -func (x {{.Name}}) StoreToBits(y *uint64) -` - -const simdMaskFromValTemplate = ` -// {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset. -// Only the lower {{.Lanes}} bits of y are used. -// -// Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512 -func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}} - -// ToBits constructs a bitmap from a {{.Name}}, where 1 means set for the indexed element, 0 means unset. -// Only the lower {{.Lanes}} bits of y are used. -// -// Asm: KMOV{{.IntelSizeSuffix}}, CPU Features: AVX512 -func (x {{.Name}}) ToBits() uint{{.LanesContainer}} -` - -const simdMaskedLoadStoreTemplate = ` -// LoadMasked{{.Name}} loads a {{.Name}} from an array, -// at those elements enabled by mask -// -{{.MaskedLoadDoc}} -// -//go:noescape -func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}} - -// StoreMasked stores a {{.Name}} to an array, -// at those elements enabled by mask -// -{{.MaskedStoreDoc}} -// -//go:noescape -func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) -` - -const simdStubsTmpl = ` -{{define "op1"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op0NameAndType "x"}}) {{.Go}}() {{.GoType}} -{{end}} - -{{define "op2"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}} -{{end}} - -{{define "op2_21"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}} -{{end}} - -{{define "op2_21Type1"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}} -{{end}} - -{{define "op3"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} -{{end}} - -{{define "op3_31"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}} -{{end}} - -{{define "op3_21"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} -{{end}} - -{{define "op3_21Type1"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}} -{{end}} - -{{define "op3_231Type1"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}} -{{end}} - -{{define "op2VecAsScalar"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}} -{{end}} - -{{define "op3VecAsScalar"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}} -{{end}} - -{{define "op4"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} -{{end}} - -{{define "op4_231Type1"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} -{{end}} - -{{define "op4_31"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}} -{{end}} - -{{define "op1Imm8"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. -// -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}} -{{end}} - -{{define "op2Imm8"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. -// -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}} -{{end}} - -{{define "op2Imm8_2I"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. -// -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}} -{{end}} - - -{{define "op3Imm8"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. -// -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}} -{{end}} - -{{define "op3Imm8_2I"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. -// -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}} -{{end}} - - -{{define "op4Imm8"}} -{{if .Documentation}}{{.Documentation}} -//{{end}} -// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table. -// -// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}} -func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}} -{{end}} - -{{define "vectorConversion"}} -// {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}} -func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}}) -{{end}} - -{{define "mask"}} -// converts from {{.Name}} to {{.VectorCounterpart}} -func (from {{.Name}}) As{{.VectorCounterpart}}() (to {{.VectorCounterpart}}) - -// converts from {{.VectorCounterpart}} to {{.Name}} -func (from {{.VectorCounterpart}}) As{{.Name}}() (to {{.Name}}) - -func (x {{.Name}}) And(y {{.Name}}) {{.Name}} - -func (x {{.Name}}) Or(y {{.Name}}) {{.Name}} -{{end}} -` - -// parseSIMDTypes groups go simd types by their vector sizes, and -// returns a map whose key is the vector size, value is the simd type. -func parseSIMDTypes(ops []Operation) simdTypeMap { - // TODO: maybe instead of going over ops, let's try go over types.yaml. - ret := map[int][]simdType{} - seen := map[string]struct{}{} - processArg := func(arg Operand) { - if arg.Class == "immediate" || arg.Class == "greg" { - // Immediates are not encoded as vector types. - return - } - if _, ok := seen[*arg.Go]; ok { - return - } - seen[*arg.Go] = struct{}{} - - lanes := *arg.Lanes - base := fmt.Sprintf("%s%d", *arg.Base, *arg.ElemBits) - tagFieldNameS := fmt.Sprintf("%sx%d", base, lanes) - tagFieldS := fmt.Sprintf("%s v%d", tagFieldNameS, *arg.Bits) - valFieldS := fmt.Sprintf("vals%s[%d]%s", strings.Repeat(" ", len(tagFieldNameS)-3), lanes, base) - fields := fmt.Sprintf("\t%s\n\t%s", tagFieldS, valFieldS) - if arg.Class == "mask" { - vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int") - reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32) - ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits}) - // In case the vector counterpart of a mask is not present, put its vector counterpart typedef into the map as well. - if _, ok := seen[vectorCounterpart]; !ok { - seen[vectorCounterpart] = struct{}{} - ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits}) - } - } else { - ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits}) - } - } - for _, op := range ops { - for _, arg := range op.In { - processArg(arg) - } - for _, arg := range op.Out { - processArg(arg) - } - } - return ret -} - -func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair { - v := []simdTypePair{} - for _, ts := range typeMap { - for i, tsrc := range ts { - for j, tdst := range ts { - if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" && - tsrc.Lanes > 1 && tdst.Lanes > 1 { - v = append(v, simdTypePair{tsrc, tdst}) - } - } - } - } - slices.SortFunc(v, compareSimdTypePairs) - return v -} - -func masksFromTypeMap(typeMap simdTypeMap) []simdType { - m := []simdType{} - for _, ts := range typeMap { - for _, tsrc := range ts { - if tsrc.Type == "mask" { - m = append(m, tsrc) - } - } - } - slices.SortFunc(m, compareSimdTypes) - return m -} - -func typesFromTypeMap(typeMap simdTypeMap) []simdType { - m := []simdType{} - for _, ts := range typeMap { - for _, tsrc := range ts { - if tsrc.Lanes > 1 { - m = append(m, tsrc) - } - } - } - slices.SortFunc(m, compareSimdTypes) - return m -} - -// writeSIMDTypes generates the simd vector types into a bytes.Buffer -func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { - t := templateOf(simdTypesTemplates, "types_amd64") - loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64") - maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64") - maskFromBits := templateOf(simdMaskFromBitsTemplate, "maskFromBits_amd64") - maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64") - - buffer := new(bytes.Buffer) - buffer.WriteString(simdPackageHeader) - - sizes := make([]int, 0, len(typeMap)) - for size, types := range typeMap { - slices.SortFunc(types, compareSimdTypes) - sizes = append(sizes, size) - } - sort.Ints(sizes) - - for _, size := range sizes { - if size <= 64 { - // these are scalar - continue - } - if err := t.ExecuteTemplate(buffer, "sizeTmpl", size); err != nil { - panic(fmt.Errorf("failed to execute size template for size %d: %w", size, err)) - } - for _, typeDef := range typeMap[size] { - if typeDef.Lanes == 1 { - continue - } - if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil { - panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err)) - } - if typeDef.Type != "mask" { - if err := loadStore.ExecuteTemplate(buffer, "loadstore_amd64", typeDef); err != nil { - panic(fmt.Errorf("failed to execute loadstore template for type %s: %w", typeDef.Name, err)) - } - // restrict to AVX2 masked loads/stores first. - if typeDef.MaskedLoadStoreFilter() { - if err := maskedLoadStore.ExecuteTemplate(buffer, "maskedloadstore_amd64", typeDef); err != nil { - panic(fmt.Errorf("failed to execute maskedloadstore template for type %s: %w", typeDef.Name, err)) - } - } - } else { - if err := maskFromBits.ExecuteTemplate(buffer, "maskFromBits_amd64", typeDef); err != nil { - panic(fmt.Errorf("failed to execute maskFromBits template for type %s: %w", typeDef.Name, err)) - } - if err := maskFromVal.ExecuteTemplate(buffer, "maskFromVal_amd64", typeDef); err != nil { - panic(fmt.Errorf("failed to execute maskFromVal template for type %s: %w", typeDef.Name, err)) - } - } - } - } - - return buffer -} - -func writeSIMDFeatures(ops []Operation) *bytes.Buffer { - // Gather all features - type featureKey struct { - GoArch string - Feature string - } - featureSet := make(map[featureKey]struct{}) - for _, op := range ops { - featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{} - } - features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int { - if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 { - return c - } - return compareNatural(a.Feature, b.Feature) - }) - - // If we ever have the same feature name on more than one GOARCH, we'll have - // to be more careful about this. - t := templateOf(simdFeaturesTemplate, "features") - - buffer := new(bytes.Buffer) - buffer.WriteString(simdPackageHeader) - - if err := t.Execute(buffer, features); err != nil { - panic(fmt.Errorf("failed to execute features template: %w", err)) - } - - return buffer -} - -// writeSIMDStubs generates the simd vector intrinsic stubs and writes it to ops_amd64.go and ops_internal_amd64.go -// within the specified directory. -func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer { - t := templateOf(simdStubsTmpl, "simdStubs") - buffer := new(bytes.Buffer) - buffer.WriteString(simdPackageHeader) - - slices.SortFunc(ops, compareOperations) - - for i, op := range ops { - if op.NoTypes != nil && *op.NoTypes == "true" { - continue - } - idxVecAsScalar, err := checkVecAsScalar(op) - if err != nil { - panic(err) - } - if s, op, err := classifyOp(op); err == nil { - if idxVecAsScalar != -1 { - if s == "op2" || s == "op3" { - s += "VecAsScalar" - } else { - panic(fmt.Errorf("simdgen only supports op2 or op3 with TreatLikeAScalarOfSize")) - } - } - if i == 0 || op.Go != ops[i-1].Go { - fmt.Fprintf(buffer, "\n/* %s */\n", op.Go) - } - if err := t.ExecuteTemplate(buffer, s, op); err != nil { - panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err)) - } - } else { - panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err)) - } - } - - vectorConversions := vConvertFromTypeMap(typeMap) - for _, conv := range vectorConversions { - if err := t.ExecuteTemplate(buffer, "vectorConversion", conv); err != nil { - panic(fmt.Errorf("failed to execute vectorConversion template: %w", err)) - } - } - - masks := masksFromTypeMap(typeMap) - for _, mask := range masks { - if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil { - panic(fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err)) - } - } - - return buffer -} diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go deleted file mode 100644 index b0fc7e62..00000000 --- a/internal/simdgen/gen_simdrules.go +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "bytes" - "fmt" - "slices" - "text/template" -) - -type tplRuleData struct { - tplName string // e.g. "sftimm" - GoOp string // e.g. "ShiftAllLeft" - GoType string // e.g. "Uint32x8" - Args string // e.g. "x y" - Asm string // e.g. "VPSLLD256" - ArgsOut string // e.g. "x y" - MaskInConvert string // e.g. "VPMOVVec32x8ToM" - MaskOutConvert string // e.g. "VPMOVMToVec32x8" -} - -var ( - ruleTemplates = template.Must(template.New("simdRules").Parse(` -{{define "pureVreg"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} {{.ArgsOut}}) -{{end}} -{{define "maskIn"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} mask)) -{{end}} -{{define "maskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}})) -{{end}} -{{define "maskInMaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} mask))) -{{end}} -{{define "sftimm"}}({{.Asm}} x (MOVQconst [c])) => ({{.Asm}}const [uint8(c)] x) -{{end}} -{{define "masksftimm"}}({{.Asm}} x (MOVQconst [c]) mask) => ({{.Asm}}const [uint8(c)] x mask) -{{end}} -`)) -) - -// SSA rewrite rules need to appear in a most-to-least-specific order. This works for that. -var tmplOrder = map[string]int{ - "masksftimm": 0, - "sftimm": 1, - "maskInMaskOut": 2, - "maskOut": 3, - "maskIn": 4, - "pureVreg": 5, -} - -func compareTplRuleData(x, y tplRuleData) int { - if c := compareNatural(x.GoOp, y.GoOp); c != 0 { - return c - } - if c := compareNatural(x.GoType, y.GoType); c != 0 { - return c - } - if c := compareNatural(x.Args, y.Args); c != 0 { - return c - } - if x.tplName == y.tplName { - return 0 - } - xo, xok := tmplOrder[x.tplName] - yo, yok := tmplOrder[y.tplName] - if !xok { - panic(fmt.Errorf("Unexpected template name %s, please add to tmplOrder", x.tplName)) - } - if !yok { - panic(fmt.Errorf("Unexpected template name %s, please add to tmplOrder", y.tplName)) - } - return xo - yo -} - -// writeSIMDRules generates the lowering and rewrite rules for ssa and writes it to simdAMD64.rules -// within the specified directory. -func writeSIMDRules(ops []Operation) *bytes.Buffer { - buffer := new(bytes.Buffer) - buffer.WriteString(generatedHeader + "\n") - - var allData []tplRuleData - - for _, opr := range ops { - if opr.NoGenericOps != nil && *opr.NoGenericOps == "true" { - continue - } - opInShape, opOutShape, maskType, immType, gOp := opr.shape() - asm := machineOpName(maskType, gOp) - vregInCnt := len(gOp.In) - if maskType == OneMask { - vregInCnt-- - } - - data := tplRuleData{ - GoOp: gOp.Go, - Asm: asm, - } - - if vregInCnt == 1 { - data.Args = "x" - data.ArgsOut = data.Args - } else if vregInCnt == 2 { - data.Args = "x y" - data.ArgsOut = data.Args - } else if vregInCnt == 3 { - data.Args = "x y z" - data.ArgsOut = data.Args - } else { - panic(fmt.Errorf("simdgen does not support more than 3 vreg in inputs")) - } - if immType == ConstImm { - data.ArgsOut = fmt.Sprintf("[%s] %s", *opr.In[0].Const, data.ArgsOut) - } else if immType == VarImm { - data.Args = fmt.Sprintf("[a] %s", data.Args) - data.ArgsOut = fmt.Sprintf("[a] %s", data.ArgsOut) - } else if immType == ConstVarImm { - data.Args = fmt.Sprintf("[a] %s", data.Args) - data.ArgsOut = fmt.Sprintf("[a+%s] %s", *opr.In[0].Const, data.ArgsOut) - } - - goType := func(op Operation) string { - if op.OperandOrder != nil { - switch *op.OperandOrder { - case "21Type1", "231Type1": - // Permute uses operand[1] for method receiver. - return *op.In[1].Go - } - } - return *op.In[0].Go - } - var tplName string - // If class overwrite is happening, that's not really a mask but a vreg. - if opOutShape == OneVregOut || opOutShape == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil { - switch opInShape { - case OneImmIn: - tplName = "pureVreg" - data.GoType = goType(gOp) - case PureVregIn: - tplName = "pureVreg" - data.GoType = goType(gOp) - case OneKmaskImmIn: - fallthrough - case OneKmaskIn: - tplName = "maskIn" - data.GoType = goType(gOp) - rearIdx := len(gOp.In) - 1 - // Mask is at the end. - data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes) - case PureKmaskIn: - panic(fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")) - } - } else if opOutShape == OneGregOut { - tplName = "pureVreg" // TODO this will be wrong - data.GoType = goType(gOp) - } else { - // OneKmaskOut case - data.MaskOutConvert = fmt.Sprintf("VPMOVMToVec%dx%d", *gOp.Out[0].ElemBits, *gOp.In[0].Lanes) - switch opInShape { - case OneImmIn: - fallthrough - case PureVregIn: - tplName = "maskOut" - data.GoType = goType(gOp) - case OneKmaskImmIn: - fallthrough - case OneKmaskIn: - tplName = "maskInMaskOut" - data.GoType = goType(gOp) - rearIdx := len(gOp.In) - 1 - data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes) - case PureKmaskIn: - panic(fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")) - } - } - - if gOp.SpecialLower != nil { - if *gOp.SpecialLower == "sftimm" { - if data.GoType[0] == 'I' { - // only do these for signed types, it is a duplicate rewrite for unsigned - sftImmData := data - if tplName == "maskIn" { - sftImmData.tplName = "masksftimm" - } else { - sftImmData.tplName = "sftimm" - } - allData = append(allData, sftImmData) - } - } else { - panic("simdgen sees unknwon special lower " + *gOp.SpecialLower + ", maybe implement it?") - } - } - - if tplName == "pureVreg" && data.Args == data.ArgsOut { - data.Args = "..." - data.ArgsOut = "..." - } - data.tplName = tplName - allData = append(allData, data) - } - - slices.SortFunc(allData, compareTplRuleData) - - for _, data := range allData { - if err := ruleTemplates.ExecuteTemplate(buffer, data.tplName, data); err != nil { - panic(fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.GoOp+data.GoType, err)) - } - } - - return buffer -} diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go deleted file mode 100644 index 5a5421a8..00000000 --- a/internal/simdgen/gen_simdssa.go +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "bytes" - "fmt" - "strings" - "text/template" -) - -var ( - ssaTemplates = template.Must(template.New("simdSSA").Parse(` -{{define "header"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. - -package amd64 - -import ( - "cmd/compile/internal/ssa" - "cmd/compile/internal/ssagen" - "cmd/internal/obj" - "cmd/internal/obj/x86" -) - -func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { - var p *obj.Prog - switch v.Op {{"{"}}{{end}} -{{define "case"}} - case {{.Cases}}: - p = {{.Helper}}(s, v) -{{end}} -{{define "footer"}} - default: - // Unknown reg shape - return false - } -{{end}} -{{define "zeroing"}} - // Masked operation are always compiled with zeroing. - switch v.Op { - case {{.}}: - x86.ParseSuffix(p, "Z") - } -{{end}} -{{define "ending"}} - return true -} -{{end}}`)) -) - -type tplSSAData struct { - Cases string - Helper string -} - -// writeSIMDSSA generates the ssa to prog lowering codes and writes it to simdssa.go -// within the specified directory. -func writeSIMDSSA(ops []Operation) *bytes.Buffer { - var ZeroingMask []string - regInfoKeys := []string{ - "v11", - "v21", - "v2k", - "v2kv", - "v2kk", - "vkv", - "v31", - "v3kv", - "v11Imm8", - "vkvImm8", - "v21Imm8", - "v2kImm8", - "v2kkImm8", - "v31ResultInArg0", - "v3kvResultInArg0", - "vfpv", - "vfpkv", - "vgpvImm8", - "vgpImm8", - "v2kvImm8", - } - regInfoSet := map[string][]string{} - for _, key := range regInfoKeys { - regInfoSet[key] = []string{} - } - - seen := map[string]struct{}{} - allUnseen := make(map[string][]Operation) - for _, op := range ops { - shapeIn, shapeOut, maskType, _, gOp := op.shape() - asm := machineOpName(maskType, gOp) - - if _, ok := seen[asm]; ok { - continue - } - seen[asm] = struct{}{} - caseStr := fmt.Sprintf("ssa.OpAMD64%s", asm) - if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn { - if gOp.Zeroing == nil { - ZeroingMask = append(ZeroingMask, caseStr) - } - } - regShape, err := op.regShape() - if err != nil { - panic(err) - } - if shapeOut == OneVregOutAtIn { - regShape += "ResultInArg0" - } - if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { - regShape += "Imm8" - } - idx, err := checkVecAsScalar(op) - if err != nil { - panic(err) - } - if idx != -1 { - if regShape == "v21" { - regShape = "vfpv" - } else if regShape == "v2kv" { - regShape = "vfpkv" - } else { - panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regShape, op)) - } - } - if _, ok := regInfoSet[regShape]; !ok { - allUnseen[regShape] = append(allUnseen[regShape], op) - } - regInfoSet[regShape] = append(regInfoSet[regShape], caseStr) - } - if len(allUnseen) != 0 { - panic(fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen)) - } - - buffer := new(bytes.Buffer) - - if err := ssaTemplates.ExecuteTemplate(buffer, "header", nil); err != nil { - panic(fmt.Errorf("failed to execute header template: %w", err)) - } - - for _, regShape := range regInfoKeys { - // Stable traversal of regInfoSet - cases := regInfoSet[regShape] - if len(cases) == 0 { - continue - } - data := tplSSAData{ - Cases: strings.Join(cases, ",\n\t\t"), - Helper: "simd" + capitalizeFirst(regShape), - } - if err := ssaTemplates.ExecuteTemplate(buffer, "case", data); err != nil { - panic(fmt.Errorf("failed to execute case template for %s: %w", regShape, err)) - } - } - - if err := ssaTemplates.ExecuteTemplate(buffer, "footer", nil); err != nil { - panic(fmt.Errorf("failed to execute footer template: %w", err)) - } - - if len(ZeroingMask) != 0 { - if err := ssaTemplates.ExecuteTemplate(buffer, "zeroing", strings.Join(ZeroingMask, ",\n\t\t")); err != nil { - panic(fmt.Errorf("failed to execute footer template: %w", err)) - } - } - - if err := ssaTemplates.ExecuteTemplate(buffer, "ending", nil); err != nil { - panic(fmt.Errorf("failed to execute footer template: %w", err)) - } - - return buffer -} diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go deleted file mode 100644 index 20ce3c13..00000000 --- a/internal/simdgen/gen_utility.go +++ /dev/null @@ -1,729 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "bufio" - "bytes" - "fmt" - "go/format" - "log" - "os" - "path/filepath" - "reflect" - "slices" - "sort" - "strings" - "text/template" - "unicode" -) - -func templateOf(temp, name string) *template.Template { - t, err := template.New(name).Parse(temp) - if err != nil { - panic(fmt.Errorf("failed to parse template %s: %w", name, err)) - } - return t -} - -func createPath(goroot string, file string) (*os.File, error) { - fp := filepath.Join(goroot, file) - dir := filepath.Dir(fp) - err := os.MkdirAll(dir, 0755) - if err != nil { - return nil, fmt.Errorf("failed to create directory %s: %w", dir, err) - } - f, err := os.Create(fp) - if err != nil { - return nil, fmt.Errorf("failed to create file %s: %w", fp, err) - } - return f, nil -} - -func formatWriteAndClose(out *bytes.Buffer, goroot string, file string) { - b, err := format.Source(out.Bytes()) - if err != nil { - fmt.Fprintf(os.Stderr, "%v\n", err) - fmt.Fprintf(os.Stderr, "%s\n", numberLines(out.Bytes())) - fmt.Fprintf(os.Stderr, "%v\n", err) - panic(err) - } else { - writeAndClose(b, goroot, file) - } -} - -func writeAndClose(b []byte, goroot string, file string) { - ofile, err := createPath(goroot, file) - if err != nil { - panic(err) - } - ofile.Write(b) - ofile.Close() -} - -// numberLines takes a slice of bytes, and returns a string where each line -// is numbered, starting from 1. -func numberLines(data []byte) string { - var buf bytes.Buffer - r := bytes.NewReader(data) - s := bufio.NewScanner(r) - for i := 1; s.Scan(); i++ { - fmt.Fprintf(&buf, "%d: %s\n", i, s.Text()) - } - return buf.String() -} - -type inShape uint8 -type outShape uint8 -type maskShape uint8 -type immShape uint8 - -const ( - InvalidIn inShape = iota - PureVregIn // vector register input only - OneKmaskIn // vector and kmask input - OneImmIn // vector and immediate input - OneKmaskImmIn // vector, kmask, and immediate inputs - PureKmaskIn // only mask inputs. -) - -const ( - InvalidOut outShape = iota - NoOut // no output - OneVregOut // (one) vector register output - OneGregOut // (one) general register output - OneKmaskOut // mask output - OneVregOutAtIn // the first input is also the output -) - -const ( - InvalidMask maskShape = iota - NoMask // no mask - OneMask // with mask (K1 to K7) - AllMasks // a K mask instruction (K0-K7) -) - -const ( - InvalidImm immShape = iota - NoImm // no immediate - ConstImm // const only immediate - VarImm // pure imm argument provided by the users - ConstVarImm // a combination of user arg and const -) - -// opShape returns the several integers describing the shape of the operation, -// and modified versions of the op: -// -// opNoImm is op with its inputs excluding the const imm. -// -// This function does not modify op. -func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskShape, immType immShape, - opNoImm Operation) { - if len(op.Out) > 1 { - panic(fmt.Errorf("simdgen only supports 1 output: %s", op)) - } - var outputReg int - if len(op.Out) == 1 { - outputReg = op.Out[0].AsmPos - if op.Out[0].Class == "vreg" { - shapeOut = OneVregOut - } else if op.Out[0].Class == "greg" { - shapeOut = OneGregOut - } else if op.Out[0].Class == "mask" { - shapeOut = OneKmaskOut - } else { - panic(fmt.Errorf("simdgen only supports output of class vreg or mask: %s", op)) - } - } else { - shapeOut = NoOut - // TODO: are these only Load/Stores? - // We manually supported two Load and Store, are those enough? - panic(fmt.Errorf("simdgen only supports 1 output: %s", op)) - } - hasImm := false - maskCount := 0 - hasVreg := false - for _, in := range op.In { - if in.AsmPos == outputReg { - if shapeOut != OneVregOutAtIn && in.AsmPos == 0 && in.Class == "vreg" { - shapeOut = OneVregOutAtIn - } else { - panic(fmt.Errorf("simdgen only support output and input sharing the same position case of \"the first input is vreg and the only output\": %s", op)) - } - } - if in.Class == "immediate" { - // A manual check on XED data found that AMD64 SIMD instructions at most - // have 1 immediates. So we don't need to check this here. - if *in.Bits != 8 { - panic(fmt.Errorf("simdgen only supports immediates of 8 bits: %s", op)) - } - hasImm = true - } else if in.Class == "mask" { - maskCount++ - } else { - hasVreg = true - } - } - opNoImm = *op - - removeImm := func(o *Operation) { - o.In = o.In[1:] - } - if hasImm { - removeImm(&opNoImm) - if op.In[0].Const != nil { - if op.In[0].ImmOffset != nil { - immType = ConstVarImm - } else { - immType = ConstImm - } - } else if op.In[0].ImmOffset != nil { - immType = VarImm - } else { - panic(fmt.Errorf("simdgen requires imm to have at least one of ImmOffset or Const set: %s", op)) - } - } else { - immType = NoImm - } - if maskCount == 0 { - maskType = NoMask - } else { - maskType = OneMask - } - checkPureMask := func() bool { - if hasImm { - panic(fmt.Errorf("simdgen does not support immediates in pure mask operations: %s", op)) - } - if hasVreg { - panic(fmt.Errorf("simdgen does not support more than 1 masks in non-pure mask operations: %s", op)) - } - return false - } - if !hasImm && maskCount == 0 { - shapeIn = PureVregIn - } else if !hasImm && maskCount > 0 { - if maskCount == 1 { - shapeIn = OneKmaskIn - } else { - if checkPureMask() { - return - } - shapeIn = PureKmaskIn - maskType = AllMasks - } - } else if hasImm && maskCount == 0 { - shapeIn = OneImmIn - } else { - if maskCount == 1 { - shapeIn = OneKmaskImmIn - } else { - checkPureMask() - return - } - } - return -} - -// regShape returns a string representation of the register shape. -func (op *Operation) regShape() (string, error) { - _, _, _, _, gOp := op.shape() - var regInfo string - var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int - for _, in := range gOp.In { - if in.Class == "vreg" { - vRegInCnt++ - } else if in.Class == "greg" { - gRegInCnt++ - } else if in.Class == "mask" { - kMaskInCnt++ - } - } - for _, out := range gOp.Out { - // If class overwrite is happening, that's not really a mask but a vreg. - if out.Class == "vreg" || out.OverwriteClass != nil { - vRegOutCnt++ - } else if out.Class == "greg" { - gRegOutCnt++ - } else if out.Class == "mask" { - kMaskOutCnt++ - } - } - var inRegs, inMasks, outRegs, outMasks string - - rmAbbrev := func(s string, i int) string { - if i == 0 { - return "" - } - if i == 1 { - return s - } - return fmt.Sprintf("%s%d", s, i) - - } - - inRegs = rmAbbrev("v", vRegInCnt) - inRegs += rmAbbrev("gp", gRegInCnt) - inMasks = rmAbbrev("k", kMaskInCnt) - - outRegs = rmAbbrev("v", vRegOutCnt) - outRegs += rmAbbrev("gp", gRegOutCnt) - outMasks = rmAbbrev("k", kMaskOutCnt) - - if kMaskInCnt == 0 && kMaskOutCnt == 0 && gRegInCnt == 0 && gRegOutCnt == 0 { - // For pure v we can abbreviate it as v%d%d. - regInfo = fmt.Sprintf("v%d%d", vRegInCnt, vRegOutCnt) - } else if kMaskInCnt == 0 && kMaskOutCnt == 0 { - regInfo = fmt.Sprintf("%s%s", inRegs, outRegs) - } else { - regInfo = fmt.Sprintf("%s%s%s%s", inRegs, inMasks, outRegs, outMasks) - } - return regInfo, nil -} - -// sortOperand sorts op.In by putting immediates first, then vreg, and mask the last. -// TODO: verify that this is a safe assumption of the prog structure. -// from my observation looks like in asm, imms are always the first, -// masks are always the last, with vreg in between. -func (op *Operation) sortOperand() { - priority := map[string]int{"immediate": 0, "vreg": 1, "greg": 1, "mask": 2} - sort.SliceStable(op.In, func(i, j int) bool { - pi := priority[op.In[i].Class] - pj := priority[op.In[j].Class] - if pi != pj { - return pi < pj - } - return op.In[i].AsmPos < op.In[j].AsmPos - }) -} - -// goNormalType returns the Go type name for the result of an Op that -// does not return a vector, i.e., that returns a result in a general -// register. Currently there's only one family of Ops in Go's simd library -// that does this (GetElem), and so this is specialized to work for that, -// but the problem (mismatch betwen hardware register width and Go type -// width) seems likely to recur if there are any other cases. -func (op Operation) goNormalType() string { - if op.Go == "GetElem" { - // GetElem returns an element of the vector into a general register - // but as far as the hardware is concerned, that result is either 32 - // or 64 bits wide, no matter what the vector element width is. - // This is not "wrong" but it is not the right answer for Go source code. - // To get the Go type right, combine the base type ("int", "uint", "float"), - // with the input vector element width in bits (8,16,32,64). - - at := 0 // proper value of at depends on whether immediate was stripped or not - if op.In[at].Class == "immediate" { - at++ - } - return fmt.Sprintf("%s%d", *op.Out[0].Base, *op.In[at].ElemBits) - } - panic(fmt.Errorf("Implement goNormalType for %v", op)) -} - -// SSAType returns the string for the type reference in SSA generation, -// for example in the intrinsics generating template. -func (op Operation) SSAType() string { - if op.Out[0].Class == "greg" { - return fmt.Sprintf("types.Types[types.T%s]", strings.ToUpper(op.goNormalType())) - } - return fmt.Sprintf("types.TypeVec%d", *op.Out[0].Bits) -} - -// GoType returns the Go type returned by this operation (relative to the simd package), -// for example "int32" or "Int8x16". This is used in a template. -func (op Operation) GoType() string { - if op.Out[0].Class == "greg" { - return op.goNormalType() - } - return *op.Out[0].Go -} - -// ImmName returns the name to use for an operation's immediate operand. -// This can be overriden in the yaml with "name" on an operand, -// otherwise, for now, "constant" -func (op Operation) ImmName() string { - return op.Op0Name("constant") -} - -func (o Operand) OpName(s string) string { - if n := o.Name; n != nil { - return *n - } - if o.Class == "mask" { - return "mask" - } - return s -} - -func (o Operand) OpNameAndType(s string) string { - return o.OpName(s) + " " + *o.Go -} - -// GoExported returns [Go] with first character capitalized. -func (op Operation) GoExported() string { - return capitalizeFirst(op.Go) -} - -// DocumentationExported returns [Documentation] with method name capitalized. -func (op Operation) DocumentationExported() string { - return strings.ReplaceAll(op.Documentation, op.Go, op.GoExported()) -} - -// Op0Name returns the name to use for the 0 operand, -// if any is present, otherwise the parameter is used. -func (op Operation) Op0Name(s string) string { - return op.In[0].OpName(s) -} - -// Op1Name returns the name to use for the 1 operand, -// if any is present, otherwise the parameter is used. -func (op Operation) Op1Name(s string) string { - return op.In[1].OpName(s) -} - -// Op2Name returns the name to use for the 2 operand, -// if any is present, otherwise the parameter is used. -func (op Operation) Op2Name(s string) string { - return op.In[2].OpName(s) -} - -// Op3Name returns the name to use for the 3 operand, -// if any is present, otherwise the parameter is used. -func (op Operation) Op3Name(s string) string { - return op.In[3].OpName(s) -} - -// Op0NameAndType returns the name and type to use for -// the 0 operand, if a name is provided, otherwise -// the parameter value is used as the default. -func (op Operation) Op0NameAndType(s string) string { - return op.In[0].OpNameAndType(s) -} - -// Op1NameAndType returns the name and type to use for -// the 1 operand, if a name is provided, otherwise -// the parameter value is used as the default. -func (op Operation) Op1NameAndType(s string) string { - return op.In[1].OpNameAndType(s) -} - -// Op2NameAndType returns the name and type to use for -// the 2 operand, if a name is provided, otherwise -// the parameter value is used as the default. -func (op Operation) Op2NameAndType(s string) string { - return op.In[2].OpNameAndType(s) -} - -// Op3NameAndType returns the name and type to use for -// the 3 operand, if a name is provided, otherwise -// the parameter value is used as the default. -func (op Operation) Op3NameAndType(s string) string { - return op.In[3].OpNameAndType(s) -} - -// Op4NameAndType returns the name and type to use for -// the 4 operand, if a name is provided, otherwise -// the parameter value is used as the default. -func (op Operation) Op4NameAndType(s string) string { - return op.In[4].OpNameAndType(s) -} - -var immClasses []string = []string{"BAD0Imm", "BAD1Imm", "op1Imm8", "op2Imm8", "op3Imm8", "op4Imm8"} -var classes []string = []string{"BAD0", "op1", "op2", "op3", "op4"} - -// classifyOp returns a classification string, modified operation, and perhaps error based -// on the stub and intrinsic shape for the operation. -// The classification string is in the regular expression set "op[1234](Imm8)?(_)?" -// where the "" suffix is optionally attached to the Operation in its input yaml. -// The classification string is used to select a template or a clause of a template -// for intrinsics declaration and the ssagen intrinisics glue code in the compiler. -func classifyOp(op Operation) (string, Operation, error) { - _, _, _, immType, gOp := op.shape() - - var class string - - if immType == VarImm || immType == ConstVarImm { - switch l := len(op.In); l { - case 1: - return "", op, fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op) - case 2, 3, 4, 5: - class = immClasses[l] - default: - return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(op.In), op) - } - if order := op.OperandOrder; order != nil { - class += "_" + *order - } - return class, op, nil - } else { - switch l := len(gOp.In); l { - case 1, 2, 3, 4: - class = classes[l] - default: - return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(op.In), op) - } - if order := op.OperandOrder; order != nil { - class += "_" + *order - } - return class, gOp, nil - } -} - -func checkVecAsScalar(op Operation) (idx int, err error) { - idx = -1 - sSize := 0 - for i, o := range op.In { - if o.TreatLikeAScalarOfSize != nil { - if idx == -1 { - idx = i - sSize = *o.TreatLikeAScalarOfSize - } else { - err = fmt.Errorf("simdgen only supports one TreatLikeAScalarOfSize in the arg list: %s", op) - return - } - } - } - if idx >= 0 { - if idx != 1 { - err = fmt.Errorf("simdgen only supports TreatLikeAScalarOfSize at the 2nd arg of the arg list: %s", op) - return - } - if sSize != 8 && sSize != 16 && sSize != 32 && sSize != 64 { - err = fmt.Errorf("simdgen does not recognize this uint size: %d, %s", sSize, op) - return - } - } - return -} - -// dedup is deduping operations in the full structure level. -func dedup(ops []Operation) (deduped []Operation) { - for _, op := range ops { - seen := false - for _, dop := range deduped { - if reflect.DeepEqual(op, dop) { - seen = true - break - } - } - if !seen { - deduped = append(deduped, op) - } - } - return -} - -func (op Operation) GenericName() string { - if op.OperandOrder != nil { - switch *op.OperandOrder { - case "21Type1", "231Type1": - // Permute uses operand[1] for method receiver. - return op.Go + *op.In[1].Go - } - } - if op.In[0].Class == "immediate" { - return op.Go + *op.In[1].Go - } - return op.Go + *op.In[0].Go -} - -// dedupGodef is deduping operations in [Op.Go]+[*Op.In[0].Go] level. -// By deduping, it means picking the least advanced architecture that satisfy the requirement: -// AVX512 will be least preferred. -// If FlagNoDedup is set, it will report the duplicates to the console. -func dedupGodef(ops []Operation) ([]Operation, error) { - seen := map[string][]Operation{} - for _, op := range ops { - _, _, _, _, gOp := op.shape() - - gN := gOp.GenericName() - seen[gN] = append(seen[gN], op) - } - if *FlagReportDup { - for gName, dup := range seen { - if len(dup) > 1 { - log.Printf("Duplicate for %s:\n", gName) - for _, op := range dup { - log.Printf("%s\n", op) - } - } - } - return ops, nil - } - isAVX512 := func(op Operation) bool { - return strings.Contains(op.CPUFeature, "AVX512") - } - deduped := []Operation{} - for _, dup := range seen { - if len(dup) > 1 { - slices.SortFunc(dup, func(i, j Operation) int { - // Put non-AVX512 candidates at the beginning - if !isAVX512(i) && isAVX512(j) { - return -1 - } - if isAVX512(i) && !isAVX512(j) { - return 1 - } - return strings.Compare(i.CPUFeature, j.CPUFeature) - }) - } - deduped = append(deduped, dup[0]) - } - slices.SortFunc(deduped, compareOperations) - return deduped, nil -} - -// Copy op.ConstImm to op.In[0].Const -// This is a hack to reduce the size of defs we need for const imm operations. -func copyConstImm(ops []Operation) error { - for _, op := range ops { - if op.ConstImm == nil { - continue - } - _, _, _, immType, _ := op.shape() - - if immType == ConstImm || immType == ConstVarImm { - op.In[0].Const = op.ConstImm - } - // Otherwise, just not port it - e.g. {VPCMP[BWDQ] imm=0} and {VPCMPEQ[BWDQ]} are - // the same operations "Equal", [dedupgodef] should be able to distinguish them. - } - return nil -} - -func capitalizeFirst(s string) string { - if s == "" { - return "" - } - // Convert the string to a slice of runes to handle multi-byte characters correctly. - r := []rune(s) - r[0] = unicode.ToUpper(r[0]) - return string(r) -} - -// overwrite corrects some errors due to: -// - The XED data is wrong -// - Go's SIMD API requirement, for example AVX2 compares should also produce masks. -// This rewrite has strict constraints, please see the error message. -// These constraints are also explointed in [writeSIMDRules], [writeSIMDMachineOps] -// and [writeSIMDSSA], please be careful when updating these constraints. -func overwrite(ops []Operation) error { - hasClassOverwrite := false - overwrite := func(op []Operand, idx int, o Operation) error { - if op[idx].OverwriteElementBits != nil { - if op[idx].ElemBits == nil { - panic(fmt.Errorf("ElemBits is nil at operand %d of %v", idx, o)) - } - *op[idx].ElemBits = *op[idx].OverwriteElementBits - *op[idx].Lanes = *op[idx].Bits / *op[idx].ElemBits - *op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Lanes) - } - if op[idx].OverwriteClass != nil { - if op[idx].OverwriteBase == nil { - panic(fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx])) - } - oBase := *op[idx].OverwriteBase - oClass := *op[idx].OverwriteClass - if oClass != "mask" { - panic(fmt.Errorf("simdgen: [Class] overwrite only supports overwritting to mask: %s", op[idx])) - } - if oBase != "int" { - panic(fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to int: %s", op[idx])) - } - if op[idx].Class != "vreg" { - panic(fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx])) - } - hasClassOverwrite = true - *op[idx].Base = oBase - op[idx].Class = oClass - *op[idx].Go = fmt.Sprintf("Mask%dx%d", *op[idx].ElemBits, *op[idx].Lanes) - } else if op[idx].OverwriteBase != nil { - oBase := *op[idx].OverwriteBase - *op[idx].Go = strings.ReplaceAll(*op[idx].Go, capitalizeFirst(*op[idx].Base), capitalizeFirst(oBase)) - if op[idx].Class == "greg" { - *op[idx].Go = strings.ReplaceAll(*op[idx].Go, *op[idx].Base, oBase) - } - *op[idx].Base = oBase - } - return nil - } - for i, o := range ops { - hasClassOverwrite = false - for j := range ops[i].In { - if err := overwrite(ops[i].In, j, o); err != nil { - return err - } - if hasClassOverwrite { - return fmt.Errorf("simdgen does not support [OverwriteClass] in inputs: %s", ops[i]) - } - } - for j := range ops[i].Out { - if err := overwrite(ops[i].Out, j, o); err != nil { - return err - } - } - if hasClassOverwrite { - for _, in := range ops[i].In { - if in.Class == "mask" { - return fmt.Errorf("simdgen only supports [OverwriteClass] for operations without mask inputs") - } - } - } - } - return nil -} - -// reportXEDInconsistency reports potential XED inconsistencies. -// We can add more fields to [Operation] to enable more checks and implement it here. -// Supported checks: -// [NameAndSizeCheck]: NAME[BWDQ] should set the elemBits accordingly. -// This check is useful to find inconsistencies, then we can add overwrite fields to -// those defs to correct them manually. -func reportXEDInconsistency(ops []Operation) error { - for _, o := range ops { - if o.NameAndSizeCheck != nil { - suffixSizeMap := map[byte]int{'B': 8, 'W': 16, 'D': 32, 'Q': 64} - checkOperand := func(opr Operand) error { - if opr.ElemBits == nil { - return fmt.Errorf("simdgen expects elemBits to be set when performing NameAndSizeCheck") - } - if v, ok := suffixSizeMap[o.Asm[len(o.Asm)-1]]; !ok { - return fmt.Errorf("simdgen expects asm to end with [BWDQ] when performing NameAndSizeCheck") - } else { - if v != *opr.ElemBits { - return fmt.Errorf("simdgen finds NameAndSizeCheck inconsistency in def: %s", o) - } - } - return nil - } - for _, in := range o.In { - if in.Class != "vreg" && in.Class != "mask" { - continue - } - if in.TreatLikeAScalarOfSize != nil { - // This is an irregular operand, don't check it. - continue - } - if err := checkOperand(in); err != nil { - return err - } - } - for _, out := range o.Out { - if err := checkOperand(out); err != nil { - return err - } - } - } - } - return nil -} - -func (o Operation) String() string { - return pprints(o) -} - -func (op Operand) String() string { - return pprints(op) -} diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml deleted file mode 100644 index 4f077c81..00000000 --- a/internal/simdgen/go.yaml +++ /dev/null @@ -1 +0,0 @@ -!import ops/*/go.yaml diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go deleted file mode 100644 index 3a830ead..00000000 --- a/internal/simdgen/godefs.go +++ /dev/null @@ -1,379 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "fmt" - "log" - "regexp" - "slices" - "strconv" - "strings" - - "golang.org/x/arch/internal/unify" -) - -type Operation struct { - rawOperation - - // Go is the Go method name of this operation. - // - // It is derived from the raw Go method name by adding optional suffixes. - // Currently, "Masked" is the only suffix. - Go string - - // Documentation is the doc string for this API. - // - // It is computed from the raw documentation: - // - // - "NAME" is replaced by the Go method name. - // - // - For masked operation, a sentence about masking is added. - Documentation string - - // In is the sequence of parameters to the Go method. - // - // For masked operations, this will have the mask operand appended. - In []Operand -} - -// rawOperation is the unifier representation of an [Operation]. It is -// translated into a more parsed form after unifier decoding. -type rawOperation struct { - Go string // Base Go method name - - GoArch string // GOARCH for this definition - Asm string // Assembly mnemonic - OperandOrder *string // optional Operand order for better Go declarations - // Optional tag to indicate this operation is paired with special generic->machine ssa lowering rules. - // Should be paired with special templates in gen_simdrules.go - SpecialLower *string - - In []Operand // Parameters - InVariant []Operand // Optional parameters - Out []Operand // Results - Commutative bool // Commutativity - CPUFeature string // CPUID/Has* feature name - Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z" - Documentation *string // Documentation will be appended to the stubs comments. - // ConstMask is a hack to reduce the size of defs the user writes for const-immediate - // If present, it will be copied to [In[0].Const]. - ConstImm *string - // NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits. - NameAndSizeCheck *bool - // If non-nil, all generation in gen_simdTypes.go and gen_intrinsics will be skipped. - NoTypes *string - // If non-nil, all generation in gen_simdGenericOps and gen_simdrules will be skipped. - NoGenericOps *string - // If non-nil, this string will be attached to the machine ssa op name. - SSAVariant *string -} - -func (o *Operation) DecodeUnified(v *unify.Value) error { - if err := v.Decode(&o.rawOperation); err != nil { - return err - } - - isMasked := false - if len(o.InVariant) == 0 { - // No variant - } else if len(o.InVariant) == 1 && o.InVariant[0].Class == "mask" { - isMasked = true - } else { - return fmt.Errorf("unknown inVariant") - } - - // Compute full Go method name. - o.Go = o.rawOperation.Go - if isMasked { - o.Go += "Masked" - } - - // Compute doc string. - if o.rawOperation.Documentation != nil { - o.Documentation = *o.rawOperation.Documentation - } else { - o.Documentation = "// UNDOCUMENTED" - } - o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go) - if isMasked { - o.Documentation += "\n//\n// This operation is applied selectively under a write mask." - } - - o.In = append(o.rawOperation.In, o.rawOperation.InVariant...) - - return nil -} - -func (o *Operation) VectorWidth() int { - out := o.Out[0] - if out.Class == "vreg" { - return *out.Bits - } else if out.Class == "greg" || out.Class == "mask" { - for i := range o.In { - if o.In[i].Class == "vreg" { - return *o.In[i].Bits - } - } - } - panic(fmt.Errorf("Figure out what the vector width is for %v and implement it", *o)) -} - -func machineOpName(maskType maskShape, gOp Operation) string { - asm := gOp.Asm - if maskType == 2 { - asm += "Masked" - } - asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth()) - if gOp.SSAVariant != nil { - asm += *gOp.SSAVariant - } - return asm -} - -func compareStringPointers(x, y *string) int { - if x != nil && y != nil { - return compareNatural(*x, *y) - } - if x == nil && y == nil { - return 0 - } - if x == nil { - return -1 - } - return 1 -} - -func compareIntPointers(x, y *int) int { - if x != nil && y != nil { - return *x - *y - } - if x == nil && y == nil { - return 0 - } - if x == nil { - return -1 - } - return 1 -} - -func compareOperations(x, y Operation) int { - if c := compareNatural(x.Go, y.Go); c != 0 { - return c - } - xIn, yIn := x.In, y.In - - if len(xIn) > len(yIn) && xIn[len(xIn)-1].Class == "mask" { - xIn = xIn[:len(xIn)-1] - } else if len(xIn) < len(yIn) && yIn[len(yIn)-1].Class == "mask" { - yIn = yIn[:len(yIn)-1] - } - - if len(xIn) < len(yIn) { - return -1 - } - if len(xIn) > len(yIn) { - return 1 - } - if len(x.Out) < len(y.Out) { - return -1 - } - if len(x.Out) > len(y.Out) { - return 1 - } - for i := range xIn { - ox, oy := &xIn[i], &yIn[i] - if c := compareOperands(ox, oy); c != 0 { - return c - } - } - return 0 -} - -func compareOperands(x, y *Operand) int { - if c := compareNatural(x.Class, y.Class); c != 0 { - return c - } - if x.Class == "immediate" { - return compareStringPointers(x.ImmOffset, y.ImmOffset) - } else { - if c := compareStringPointers(x.Base, y.Base); c != 0 { - return c - } - if c := compareIntPointers(x.ElemBits, y.ElemBits); c != 0 { - return c - } - if c := compareIntPointers(x.Bits, y.Bits); c != 0 { - return c - } - return 0 - } -} - -type Operand struct { - Class string // One of "mask", "immediate", "vreg", "greg", and "mem" - - Go *string // Go type of this operand - AsmPos int // Position of this operand in the assembly instruction - - Base *string // Base Go type ("int", "uint", "float") - ElemBits *int // Element bit width - Bits *int // Total vector bit width - - Const *string // Optional constant value for immediates. - // Optional immediate arg offsets. If this field is non-nil, - // This operand will be an immediate operand: - // The compiler will right-shift the user-passed value by ImmOffset and set it as the AuxInt - // field of the operation. - ImmOffset *string - Name *string // optional name in the Go intrinsic declaration - Lanes *int // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1 - // TreatLikeAScalarOfSize means only the lower $TreatLikeAScalarOfSize bits of the vector - // is used, so at the API level we can make it just a scalar value of this size; Then we - // can overwrite it to a vector of the right size during intrinsics stage. - TreatLikeAScalarOfSize *int - // If non-nil, it means the [Class] field is overwritten here, right now this is used to - // overwrite the results of AVX2 compares to masks. - OverwriteClass *string - // If non-nil, it means the [Base] field is overwritten here. This field exist solely - // because Intel's XED data is inconsistent. e.g. VANDNP[SD] marks its operand int. - OverwriteBase *string - // If non-nil, it means the [ElementBits] field is overwritten. This field exist solely - // because Intel's XED data is inconsistent. e.g. AVX512 VPMADDUBSW marks its operand - // elemBits 16, which should be 8. - OverwriteElementBits *int -} - -// isDigit returns true if the byte is an ASCII digit. -func isDigit(b byte) bool { - return b >= '0' && b <= '9' -} - -// compareNatural performs a "natural sort" comparison of two strings. -// It compares non-digit sections lexicographically and digit sections -// numerically. In the case of string-unequal "equal" strings like -// "a01b" and "a1b", strings.Compare breaks the tie. -// -// It returns: -// -// -1 if s1 < s2 -// 0 if s1 == s2 -// +1 if s1 > s2 -func compareNatural(s1, s2 string) int { - i, j := 0, 0 - len1, len2 := len(s1), len(s2) - - for i < len1 && j < len2 { - // Find a non-digit segment or a number segment in both strings. - if isDigit(s1[i]) && isDigit(s2[j]) { - // Number segment comparison. - numStart1 := i - for i < len1 && isDigit(s1[i]) { - i++ - } - num1, _ := strconv.Atoi(s1[numStart1:i]) - - numStart2 := j - for j < len2 && isDigit(s2[j]) { - j++ - } - num2, _ := strconv.Atoi(s2[numStart2:j]) - - if num1 < num2 { - return -1 - } - if num1 > num2 { - return 1 - } - // If numbers are equal, continue to the next segment. - } else { - // Non-digit comparison. - if s1[i] < s2[j] { - return -1 - } - if s1[i] > s2[j] { - return 1 - } - i++ - j++ - } - } - - // deal with a01b vs a1b; there needs to be an order. - return strings.Compare(s1, s2) -} - -const generatedHeader = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. -` - -func writeGoDefs(path string, cl unify.Closure) error { - // TODO: Merge operations with the same signature but multiple - // implementations (e.g., SSE vs AVX) - var ops []Operation - for def := range cl.All() { - var op Operation - if !def.Exact() { - continue - } - if err := def.Decode(&op); err != nil { - log.Println(err.Error()) - log.Println(def) - continue - } - // TODO: verify that this is safe. - op.sortOperand() - ops = append(ops, op) - } - slices.SortFunc(ops, compareOperations) - // The parsed XED data might contain duplicates, like - // 512 bits VPADDP. - deduped := dedup(ops) - slices.SortFunc(deduped, compareOperations) - - if *Verbose { - log.Printf("dedup len: %d\n", len(ops)) - } - var err error - if err = overwrite(deduped); err != nil { - return err - } - if *Verbose { - log.Printf("dedup len: %d\n", len(deduped)) - } - if *Verbose { - log.Printf("dedup len: %d\n", len(deduped)) - } - if !*FlagNoDedup { - // TODO: This can hide mistakes in the API definitions, especially when - // multiple patterns result in the same API unintentionally. Make it stricter. - if deduped, err = dedupGodef(deduped); err != nil { - return err - } - } - if *Verbose { - log.Printf("dedup len: %d\n", len(deduped)) - } - if !*FlagNoConstImmPorting { - if err = copyConstImm(deduped); err != nil { - return err - } - } - if *Verbose { - log.Printf("dedup len: %d\n", len(deduped)) - } - reportXEDInconsistency(deduped) - typeMap := parseSIMDTypes(deduped) - - formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go") - formatWriteAndClose(writeSIMDFeatures(deduped), path, "src/"+simdPackage+"/cpu.go") - formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/ops_amd64.go") - formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go") - formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go") - formatWriteAndClose(writeSIMDMachineOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go") - formatWriteAndClose(writeSIMDSSA(deduped), path, "src/cmd/compile/internal/amd64/simdssa.go") - writeAndClose(writeSIMDRules(deduped).Bytes(), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules") - - return nil -} diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go deleted file mode 100644 index a7f0b0de..00000000 --- a/internal/simdgen/main.go +++ /dev/null @@ -1,280 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// simdgen is an experiment in generating Go <-> asm SIMD mappings. -// -// Usage: simdgen [-xedPath=path] [-q=query] input.yaml... -// -// If -xedPath is provided, one of the inputs is a sum of op-code definitions -// generated from the Intel XED data at path. -// -// If input YAML files are provided, each file is read as an input value. See -// [unify.Closure.UnmarshalYAML] or "go doc unify.Closure.UnmarshalYAML" for the -// format of these files. -// -// TODO: Example definitions and values. -// -// The command unifies across all of the inputs and prints all possible results -// of this unification. -// -// If the -q flag is provided, its string value is parsed as a value and treated -// as another input to unification. This is intended as a way to "query" the -// result, typically by narrowing it down to a small subset of results. -// -// Typical usage: -// -// go run . -xedPath $XEDPATH *.yaml -// -// To see just the definitions generated from XED, run: -// -// go run . -xedPath $XEDPATH -// -// (This works because if there's only one input, there's nothing to unify it -// with, so the result is simply itself.) -// -// To see just the definitions for VPADDQ: -// -// go run . -xedPath $XEDPATH -q '{asm: VPADDQ}' -// -// simdgen can also generate Go definitions of SIMD mappings: -// To generate go files to the go root, run: -// -// go run . -xedPath $XEDPATH -o godefs -goroot $PATH/TO/go go.yaml categories.yaml types.yaml -// -// types.yaml is already written, it specifies the shapes of vectors. -// categories.yaml and go.yaml contains definitions that unifies with types.yaml and XED -// data, you can find an example in ops/AddSub/. -// -// When generating Go definitions, simdgen do 3 "magic"s: -// - It splits masked operations(with op's [Masked] field set) to const and non const: -// - One is a normal masked operation, the original -// - The other has its mask operand's [Const] fields set to "K0". -// - This way the user does not need to provide a separate "K0"-masked operation def. -// -// - It deduplicates intrinsic names that have duplicates: -// - If there are two operations that shares the same signature, one is AVX512 the other -// is before AVX512, the other will be selected. -// - This happens often when some operations are defined both before AVX512 and after. -// This way the user does not need to provide a separate "K0" operation for the -// AVX512 counterpart. -// -// - It copies the op's [ConstImm] field to its immediate operand's [Const] field. -// - This way the user does not need to provide verbose op definition while only -// the const immediate field is different. This is useful to reduce verbosity of -// compares with imm control predicates. -// -// These 3 magics could be disabled by enabling -nosplitmask, -nodedup or -// -noconstimmporting flags. -// -// simdgen right now only supports amd64, -arch=$OTHERARCH will trigger a fatal error. -package main - -// Big TODOs: -// -// - This can produce duplicates, which can also lead to less efficient -// environment merging. Add hashing and use it for deduplication. Be careful -// about how this shows up in debug traces, since it could make things -// confusing if we don't show it happening. -// -// - Do I need Closure, Value, and Domain? It feels like I should only need two -// types. - -import ( - "cmp" - "flag" - "fmt" - "log" - "maps" - "os" - "path/filepath" - "runtime/pprof" - "slices" - "strings" - - "golang.org/x/arch/internal/unify" - "gopkg.in/yaml.v3" -) - -var ( - xedPath = flag.String("xedPath", "", "load XED datafiles from `path`") - flagQ = flag.String("q", "", "query: read `def` as another input (skips final validation)") - flagO = flag.String("o", "yaml", "output type: yaml, godefs (generate definitions into a Go source tree") - flagGoDefRoot = flag.String("goroot", ".", "the path to the Go dev directory that will receive the generated files") - FlagNoDedup = flag.Bool("nodedup", false, "disable deduplicating godefs of 2 qualifying operations from different extensions") - FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand") - FlagArch = flag.String("arch", "amd64", "the target architecture") - - Verbose = flag.Bool("v", false, "verbose") - - flagDebugXED = flag.Bool("debug-xed", false, "show XED instructions") - flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace") - flagDebugHTML = flag.String("debug-html", "", "write unification trace to `file.html`") - FlagReportDup = flag.Bool("reportdup", false, "report the duplicate godefs") - - flagCPUProfile = flag.String("cpuprofile", "", "write CPU profile to `file`") - flagMemProfile = flag.String("memprofile", "", "write memory profile to `file`") -) - -const simdPackage = "simd" - -func main() { - flag.Parse() - - if *flagCPUProfile != "" { - f, err := os.Create(*flagCPUProfile) - if err != nil { - log.Fatalf("-cpuprofile: %s", err) - } - defer f.Close() - pprof.StartCPUProfile(f) - defer pprof.StopCPUProfile() - } - if *flagMemProfile != "" { - f, err := os.Create(*flagMemProfile) - if err != nil { - log.Fatalf("-memprofile: %s", err) - } - defer func() { - pprof.WriteHeapProfile(f) - f.Close() - }() - } - - var inputs []unify.Closure - - if *FlagArch != "amd64" { - log.Fatalf("simdgen only supports amd64") - } - - // Load XED into a defs set. - if *xedPath != "" { - xedDefs := loadXED(*xedPath) - inputs = append(inputs, unify.NewSum(xedDefs...)) - } - - // Load query. - if *flagQ != "" { - r := strings.NewReader(*flagQ) - def, err := unify.Read(r, "", unify.ReadOpts{}) - if err != nil { - log.Fatalf("parsing -q: %s", err) - } - inputs = append(inputs, def) - } - - // Load defs files. - must := make(map[*unify.Value]struct{}) - for _, path := range flag.Args() { - defs, err := unify.ReadFile(path, unify.ReadOpts{}) - if err != nil { - log.Fatal(err) - } - inputs = append(inputs, defs) - - if filepath.Base(path) == "go.yaml" { - // These must all be used in the final result - for def := range defs.Summands() { - must[def] = struct{}{} - } - } - } - - // Prepare for unification - if *flagDebugUnify { - unify.Debug.UnifyLog = os.Stderr - } - if *flagDebugHTML != "" { - f, err := os.Create(*flagDebugHTML) - if err != nil { - log.Fatal(err) - } - unify.Debug.HTML = f - defer f.Close() - } - - // Unify! - unified, err := unify.Unify(inputs...) - if err != nil { - log.Fatal(err) - } - - // Print results. - switch *flagO { - case "yaml": - // Produce a result that looks like encoding a slice, but stream it. - fmt.Println("!sum") - var val1 [1]*unify.Value - for val := range unified.All() { - val1[0] = val - // We have to make a new encoder each time or it'll print a document - // separator between each object. - enc := yaml.NewEncoder(os.Stdout) - if err := enc.Encode(val1); err != nil { - log.Fatal(err) - } - enc.Close() - } - case "godefs": - if err := writeGoDefs(*flagGoDefRoot, unified); err != nil { - log.Fatalf("Failed writing godefs: %+v", err) - } - } - - if !*Verbose && *xedPath != "" { - if operandRemarks == 0 { - fmt.Fprintf(os.Stderr, "XED decoding generated no errors, which is unusual.\n") - } else { - fmt.Fprintf(os.Stderr, "XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks) - } - } - - // Validate results. - // - // Don't validate if this is a command-line query because that tends to - // eliminate lots of required defs and is used in cases where maybe defs - // aren't enumerable anyway. - if *flagQ == "" && len(must) > 0 { - validate(unified, must) - } -} - -func validate(cl unify.Closure, required map[*unify.Value]struct{}) { - // Validate that: - // 1. All final defs are exact - // 2. All required defs are used - for def := range cl.All() { - if _, ok := def.Domain.(unify.Def); !ok { - fmt.Fprintf(os.Stderr, "%s: expected Def, got %T\n", def.PosString(), def.Domain) - continue - } - - if !def.Exact() { - fmt.Fprintf(os.Stderr, "%s: def not reduced to an exact value, why is %s:\n", def.PosString(), def.WhyNotExact()) - fmt.Fprintf(os.Stderr, "\t%s\n", strings.ReplaceAll(def.String(), "\n", "\n\t")) - } - - for root := range def.Provenance() { - delete(required, root) - } - } - // Report unused defs - unused := slices.SortedFunc(maps.Keys(required), - func(a, b *unify.Value) int { - return cmp.Or( - cmp.Compare(a.Pos().Path, b.Pos().Path), - cmp.Compare(a.Pos().Line, b.Pos().Line), - ) - }) - for _, def := range unused { - // TODO: Can we say anything more actionable? This is always a problem - // with unification: if it fails, it's very hard to point a finger at - // any particular reason. We could go back and try unifying this again - // with each subset of the inputs (starting with individual inputs) to - // at least say "it doesn't unify with anything in x.yaml". That's a lot - // of work, but if we have trouble debugging unification failure it may - // be worth it. - fmt.Fprintf(os.Stderr, "%s: def required, but did not unify (%v)\n", - def.PosString(), def) - } -} diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml deleted file mode 100644 index 35e81042..00000000 --- a/internal/simdgen/ops/AddSub/categories.yaml +++ /dev/null @@ -1,37 +0,0 @@ -!sum -- go: Add - commutative: true - documentation: !string |- - // NAME adds corresponding elements of two vectors. -- go: AddSaturated - commutative: true - documentation: !string |- - // NAME adds corresponding elements of two vectors with saturation. -- go: Sub - commutative: false - documentation: !string |- - // NAME subtracts corresponding elements of two vectors. -- go: SubSaturated - commutative: false - documentation: !string |- - // NAME subtracts corresponding elements of two vectors with saturation. -- go: AddPairs - commutative: false - documentation: !string |- - // NAME horizontally adds adjacent pairs of elements. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -- go: SubPairs - commutative: false - documentation: !string |- - // NAME horizontally subtracts adjacent pairs of elements. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -- go: AddPairsSaturated - commutative: false - documentation: !string |- - // NAME horizontally adds adjacent pairs of elements with saturation. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -- go: SubPairsSaturated - commutative: false - documentation: !string |- - // NAME horizontally subtracts adjacent pairs of elements with saturation. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml deleted file mode 100644 index 4423d8c7..00000000 --- a/internal/simdgen/ops/AddSub/go.yaml +++ /dev/null @@ -1,77 +0,0 @@ -!sum -# Add -- go: Add - asm: "VPADD[BWDQ]|VADDP[SD]" - in: - - &any - go: $t - - *any - out: - - *any -# Add Saturated -- go: AddSaturated - asm: "VPADDS[BWDQ]" - in: - - &int - go: $t - base: int - - *int - out: - - *int -- go: AddSaturated - asm: "VPADDUS[BWDQ]" - in: - - &uint - go: $t - base: uint - - *uint - out: - - *uint - -# Sub -- go: Sub - asm: "VPSUB[BWDQ]|VSUBP[SD]" - in: &2any - - *any - - *any - out: &1any - - *any -# Sub Saturated -- go: SubSaturated - asm: "VPSUBS[BWDQ]" - in: &2int - - *int - - *int - out: &1int - - *int -- go: SubSaturated - asm: "VPSUBUS[BWDQ]" - in: - - *uint - - *uint - out: - - *uint -- go: AddPairs - asm: "VPHADD[DW]" - in: *2any - out: *1any -- go: SubPairs - asm: "VPHSUB[DW]" - in: *2any - out: *1any -- go: AddPairs - asm: "VHADDP[SD]" # floats - in: *2any - out: *1any -- go: SubPairs - asm: "VHSUBP[SD]" # floats - in: *2any - out: *1any -- go: AddPairsSaturated - asm: "VPHADDS[DW]" - in: *2int - out: *1int -- go: SubPairsSaturated - asm: "VPHSUBS[DW]" - in: *2int - out: *1int diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml deleted file mode 100644 index 3142d191..00000000 --- a/internal/simdgen/ops/BitwiseLogic/categories.yaml +++ /dev/null @@ -1,20 +0,0 @@ -!sum -- go: And - commutative: true - documentation: !string |- - // NAME performs a bitwise AND operation between two vectors. -- go: Or - commutative: true - documentation: !string |- - // NAME performs a bitwise OR operation between two vectors. -- go: AndNot - commutative: false - documentation: !string |- - // NAME performs a bitwise x &^ y. -- go: Xor - commutative: true - documentation: !string |- - // NAME performs a bitwise XOR operation between two vectors. - -# We also have PTEST and VPTERNLOG, those should be hidden from the users -# and only appear in rewrite rules. diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml deleted file mode 100644 index ab344438..00000000 --- a/internal/simdgen/ops/BitwiseLogic/go.yaml +++ /dev/null @@ -1,128 +0,0 @@ -!sum -# In the XED data, *all* floating point bitwise logic operation has their -# operand type marked as uint. We are not trying to understand why Intel -# decided that they want FP bit-wise logic operations, but this irregularity -# has to be dealed with in separate rules with some overwrites. - -# For many bit-wise operations, we have the following non-orthogonal -# choices: -# -# - Non-masked AVX operations have no element width (because it -# doesn't matter), but only cover 128 and 256 bit vectors. -# -# - Masked AVX-512 operations have an element width (because it needs -# to know how to interpret the mask), and cover 128, 256, and 512 bit -# vectors. These only cover 32- and 64-bit element widths. -# -# - Non-masked AVX-512 operations still have an element width (because -# they're just the masked operations with an implicit K0 mask) but it -# doesn't matter! This is the only option for non-masked 512 bit -# operations, and we can pick any of the element widths. -# -# We unify with ALL of these operations and the compiler generator -# picks when there are multiple options. - -# TODO: We don't currently generate unmasked bit-wise operations on 512 bit -# vectors of 8- or 16-bit elements. AVX-512 only has *masked* bit-wise -# operations for 32- and 64-bit elements; while the element width doesn't matter -# for unmasked operations, right now we don't realize that we can just use the -# 32- or 64-bit version for the unmasked form. Maybe in the XED decoder we -# should recognize bit-wise operations when generating unmasked versions and -# omit the element width. - -# For binary operations, we constrain their two inputs and one output to the -# same Go type using a variable. - -- go: And - asm: "VPAND[DQ]?" - in: - - &any - go: $t - - *any - out: - - *any - -- go: And - asm: "VPANDD" # Fill in the gap, And is missing for Uint8x64 and Int8x64 - inVariant: [] - in: &twoI8x64 - - &i8x64 - go: $t - overwriteElementBits: 8 - - *i8x64 - out: &oneI8x64 - - *i8x64 - -- go: And - asm: "VPANDD" # Fill in the gap, And is missing for Uint16x32 and Int16x32 - inVariant: [] - in: &twoI16x32 - - &i16x32 - go: $t - overwriteElementBits: 16 - - *i16x32 - out: &oneI16x32 - - *i16x32 - -- go: AndNot - asm: "VPANDN[DQ]?" - operandOrder: "21" # switch the arg order - in: - - *any - - *any - out: - - *any - -- go: AndNot - asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint8x64 and Int8x64 - operandOrder: "21" # switch the arg order - inVariant: [] - in: *twoI8x64 - out: *oneI8x64 - -- go: AndNot - asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint16x32 and Int16x32 - operandOrder: "21" # switch the arg order - inVariant: [] - in: *twoI16x32 - out: *oneI16x32 - -- go: Or - asm: "VPOR[DQ]?" - in: - - *any - - *any - out: - - *any - -- go: Or - asm: "VPORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64 - inVariant: [] - in: *twoI8x64 - out: *oneI8x64 - -- go: Or - asm: "VPORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32 - inVariant: [] - in: *twoI16x32 - out: *oneI16x32 - -- go: Xor - asm: "VPXOR[DQ]?" - in: - - *any - - *any - out: - - *any - -- go: Xor - asm: "VPXORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64 - inVariant: [] - in: *twoI8x64 - out: *oneI8x64 - -- go: Xor - asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32 - inVariant: [] - in: *twoI16x32 - out: *oneI16x32 \ No newline at end of file diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml deleted file mode 100644 index aa07ade2..00000000 --- a/internal/simdgen/ops/Compares/categories.yaml +++ /dev/null @@ -1,43 +0,0 @@ -!sum -# const imm predicate(holds for both float and int|uint): -# 0: Equal -# 1: Less -# 2: LessEqual -# 4: NotEqual -# 5: GreaterEqual -# 6: Greater -- go: Equal - constImm: 0 - commutative: true - documentation: !string |- - // NAME compares for equality. -- go: Less - constImm: 1 - commutative: false - documentation: !string |- - // NAME compares for less than. -- go: LessEqual - constImm: 2 - commutative: false - documentation: !string |- - // NAME compares for less than or equal. -- go: IsNan # For float only. - constImm: 3 - commutative: true - documentation: !string |- - // NAME checks if elements are NaN. Use as x.IsNan(x). -- go: NotEqual - constImm: 4 - commutative: true - documentation: !string |- - // NAME compares for inequality. -- go: GreaterEqual - constImm: 13 - commutative: false - documentation: !string |- - // NAME compares for greater than or equal. -- go: Greater - constImm: 14 - commutative: false - documentation: !string |- - // NAME compares for greater than. diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml deleted file mode 100644 index 0f916283..00000000 --- a/internal/simdgen/ops/Compares/go.yaml +++ /dev/null @@ -1,141 +0,0 @@ -!sum -# Ints -- go: Equal - asm: "V?PCMPEQ[BWDQ]" - in: - - &any - go: $t - - *any - out: - - &anyvregToMask - go: $t - overwriteBase: int - overwriteClass: mask -- go: Greater - asm: "V?PCMPGT[BWDQ]" - in: - - &int - go: $t - base: int - - *int - out: - - *anyvregToMask -# 256-bit VCMPGTQ's output elemBits is marked 32-bit in the XED data, we -# believe this is an error, so add this definition to overwrite. -- go: Greater - asm: "VPCMPGTQ" - in: - - &int64 - go: $t - base: int - elemBits: 64 - - *int64 - out: - - base: int - elemBits: 32 - overwriteElementBits: 64 - overwriteClass: mask - overwriteBase: int - -# TODO these are redundant with VPCMP operations. -# AVX-512 compares produce masks. -- go: Equal - asm: "V?PCMPEQ[BWDQ]" - in: - - *any - - *any - out: - - class: mask -- go: Greater - asm: "V?PCMPGT[BWDQ]" - in: - - *int - - *int - out: - - class: mask - -# MASKED signed comparisons for X/Y registers -# unmasked would clash with emulations on AVX2 -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) - asm: "VPCMP[BWDQ]" - in: - - &int - bits: (128|256) - go: $t - base: int - - *int - - class: immediate - const: 0 # Just a placeholder, will be overwritten by const imm porting. - inVariant: - - class: mask - out: - - class: mask - -# MASKED unsigned comparisons for X/Y registers -# unmasked would clash with emulations on AVX2 -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) - asm: "VPCMPU[BWDQ]" - in: - - &uint - bits: (128|256) - go: $t - base: uint - - *uint - - class: immediate - const: 0 - inVariant: - - class: mask - out: - - class: mask - -# masked/unmasked signed comparisons for Z registers -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) - asm: "VPCMP[BWDQ]" - in: - - &int - bits: 512 - go: $t - base: int - - *int - - class: immediate - const: 0 # Just a placeholder, will be overwritten by const imm porting. - out: - - class: mask - -# masked/unmasked unsigned comparisons for Z registers -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) - asm: "VPCMPU[BWDQ]" - in: - - &uint - bits: 512 - go: $t - base: uint - - *uint - - class: immediate - const: 0 - out: - - class: mask - -# Floats -- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan - asm: "VCMPP[SD]" - in: - - &float - go: $t - base: float - - *float - - class: immediate - const: 0 - out: - - go: $t - overwriteBase: int - overwriteClass: mask -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) - asm: "VCMPP[SD]" - in: - - *float - - *float - - class: immediate - const: 0 - out: - - class: mask \ No newline at end of file diff --git a/internal/simdgen/ops/Converts/categories.yaml b/internal/simdgen/ops/Converts/categories.yaml deleted file mode 100644 index cc6c419d..00000000 --- a/internal/simdgen/ops/Converts/categories.yaml +++ /dev/null @@ -1,10 +0,0 @@ -!sum -- go: ConvertToInt32 - commutative: false - documentation: !string |- - // ConvertToInt32 converts element values to int32. - -- go: ConvertToUint32 - commutative: false - documentation: !string |- - // ConvertToUint32Masked converts element values to uint32. diff --git a/internal/simdgen/ops/Converts/go.yaml b/internal/simdgen/ops/Converts/go.yaml deleted file mode 100644 index 4e251728..00000000 --- a/internal/simdgen/ops/Converts/go.yaml +++ /dev/null @@ -1,21 +0,0 @@ -!sum -- go: ConvertToInt32 - asm: "VCVTTPS2DQ" - in: - - &fp - go: $t - base: float - out: - - &i32 - go: $u - base: int - elemBits: 32 -- go: ConvertToUint32 - asm: "VCVTPS2UDQ" - in: - - *fp - out: - - &u32 - go: $u - base: uint - elemBits: 32 diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml deleted file mode 100644 index f2d8af68..00000000 --- a/internal/simdgen/ops/FPonlyArith/categories.yaml +++ /dev/null @@ -1,85 +0,0 @@ -!sum -- go: Div - commutative: false - documentation: !string |- - // NAME divides elements of two vectors. -- go: Sqrt - commutative: false - documentation: !string |- - // NAME computes the square root of each element. -- go: Reciprocal - commutative: false - documentation: !string |- - // NAME computes an approximate reciprocal of each element. -- go: ReciprocalSqrt - commutative: false - documentation: !string |- - // NAME computes an approximate reciprocal of the square root of each element. -- go: Scale - commutative: false - documentation: !string |- - // NAME multiplies elements by a power of 2. -- go: RoundToEven - commutative: false - constImm: 0 - documentation: !string |- - // NAME rounds elements to the nearest integer. -- go: RoundToEvenScaled - commutative: false - constImm: 0 - documentation: !string |- - // NAME rounds elements with specified precision. -- go: RoundToEvenScaledResidue - commutative: false - constImm: 0 - documentation: !string |- - // NAME computes the difference after rounding with specified precision. -- go: Floor - commutative: false - constImm: 1 - documentation: !string |- - // NAME rounds elements down to the nearest integer. -- go: FloorScaled - commutative: false - constImm: 1 - documentation: !string |- - // NAME rounds elements down with specified precision. -- go: FloorScaledResidue - commutative: false - constImm: 1 - documentation: !string |- - // NAME computes the difference after flooring with specified precision. -- go: Ceil - commutative: false - constImm: 2 - documentation: !string |- - // NAME rounds elements up to the nearest integer. -- go: CeilScaled - commutative: false - constImm: 2 - documentation: !string |- - // NAME rounds elements up with specified precision. -- go: CeilScaledResidue - commutative: false - constImm: 2 - documentation: !string |- - // NAME computes the difference after ceiling with specified precision. -- go: Trunc - commutative: false - constImm: 3 - documentation: !string |- - // NAME truncates elements towards zero. -- go: TruncScaled - commutative: false - constImm: 3 - documentation: !string |- - // NAME truncates elements with specified precision. -- go: TruncScaledResidue - commutative: false - constImm: 3 - documentation: !string |- - // NAME computes the difference after truncating with specified precision. -- go: AddSub - commutative: false - documentation: !string |- - // NAME subtracts even elements and adds odd elements of two vectors. diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml deleted file mode 100644 index e164f7b7..00000000 --- a/internal/simdgen/ops/FPonlyArith/go.yaml +++ /dev/null @@ -1,62 +0,0 @@ -!sum -- go: Div - asm: "V?DIVP[SD]" - in: &2fp - - &fp - go: $t - base: float - - *fp - out: &1fp - - *fp -- go: Sqrt - asm: "V?SQRTP[SD]" - in: *1fp - out: *1fp -# TODO: Provide separate methods for 12-bit precision and 14-bit precision? -- go: Reciprocal - asm: "VRCP(14)?P[SD]" - in: *1fp - out: *1fp -- go: ReciprocalSqrt - asm: "V?RSQRT(14)?P[SD]" - in: *1fp - out: *1fp -- go: Scale - asm: "VSCALEFP[SD]" - in: *2fp - out: *1fp - -- go: "RoundToEven|Ceil|Floor|Trunc" - asm: "VROUNDP[SD]" - in: - - *fp - - class: immediate - const: 0 # place holder - out: *1fp - -- go: "(RoundToEven|Ceil|Floor|Trunc)Scaled" - asm: "VRNDSCALEP[SD]" - in: - - *fp - - class: immediate - const: 0 # place holder - immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). - name: prec - out: *1fp -- go: "(RoundToEven|Ceil|Floor|Trunc)ScaledResidue" - asm: "VREDUCEP[SD]" - in: - - *fp - - class: immediate - const: 0 # place holder - immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number). - name: prec - out: *1fp - -- go: "AddSub" - asm: "VADDSUBP[SD]" - in: - - *fp - - *fp - out: - - *fp diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml deleted file mode 100644 index 25824625..00000000 --- a/internal/simdgen/ops/GaloisField/categories.yaml +++ /dev/null @@ -1,21 +0,0 @@ -!sum -- go: GaloisFieldAffineTransform - commutative: false - documentation: !string |- - // NAME computes an affine transformation in GF(2^8): - // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; - // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y - // corresponding to a group of 8 elements in x. -- go: GaloisFieldAffineTransformInverse - commutative: false - documentation: !string |- - // NAME computes an affine transformation in GF(2^8), - // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1: - // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; - // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y - // corresponding to a group of 8 elements in x. -- go: GaloisFieldMul - commutative: false - documentation: !string |- - // NAME computes element-wise GF(2^8) multiplication with - // reduction polynomial x^8 + x^4 + x^3 + x + 1. diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml deleted file mode 100644 index e86211cb..00000000 --- a/internal/simdgen/ops/GaloisField/go.yaml +++ /dev/null @@ -1,32 +0,0 @@ -!sum -- go: GaloisFieldAffineTransform - asm: VGF2P8AFFINEQB - operandOrder: 2I # 2nd operand, then immediate - in: &AffineArgs - - &uint8 - go: $t - base: uint - - &uint8x8 - go: $t2 - base: uint - - &pureImmVar - class: immediate - immOffset: 0 - name: b - out: - - *uint8 - -- go: GaloisFieldAffineTransformInverse - asm: VGF2P8AFFINEINVQB - operandOrder: 2I # 2nd operand, then immediate - in: *AffineArgs - out: - - *uint8 - -- go: GaloisFieldMul - asm: VGF2P8MULB - in: - - *uint8 - - *uint8 - out: - - *uint8 diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml deleted file mode 100644 index bf33642a..00000000 --- a/internal/simdgen/ops/IntOnlyArith/categories.yaml +++ /dev/null @@ -1,21 +0,0 @@ -!sum -- go: Average - commutative: true - documentation: !string |- - // NAME computes the rounded average of corresponding elements. -- go: Abs - commutative: false - # Unary operation, not commutative - documentation: !string |- - // NAME computes the absolute value of each element. -- go: CopySign - # Applies sign of second operand to first: sign(val, sign_src) - commutative: false - documentation: !string |- - // NAME returns the product of the first operand with -1, 0, or 1, - // whichever constant is nearest to the value of the second operand. - # Sign does not have masked version -- go: OnesCount - commutative: false - documentation: !string |- - // NAME counts the number of set bits in each element. diff --git a/internal/simdgen/ops/IntOnlyArith/go.yaml b/internal/simdgen/ops/IntOnlyArith/go.yaml deleted file mode 100644 index 54938b4f..00000000 --- a/internal/simdgen/ops/IntOnlyArith/go.yaml +++ /dev/null @@ -1,45 +0,0 @@ -!sum -# Average (unsigned byte, unsigned word) -# Instructions: VPAVGB, VPAVGW -- go: Average - asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word) - in: - - &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW - go: $t - base: uint - - *uint_t - out: - - *uint_t - -# Absolute Value (signed byte, word, dword, qword) -# Instructions: VPABSB, VPABSW, VPABSD, VPABSQ -- go: Abs - asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ - in: - - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN - go: $t - base: int - out: - - *int_t # Output is magnitude, fits in the same signed type - -# Sign Operation (signed byte, word, dword) -# Applies sign of second operand to the first. -# Instructions: VPSIGNB, VPSIGNW, VPSIGND -- go: CopySign - asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND - in: - - *int_t # value to apply sign to - - *int_t # value from which to take the sign - out: - - *int_t - -# Population Count (count set bits in each element) -# Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG) -# VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ) -- go: OnesCount - asm: "VPOPCNT[BWDQ]" - in: - - &any - go: $t - out: - - *any diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml deleted file mode 100644 index 97381e1e..00000000 --- a/internal/simdgen/ops/MLOps/categories.yaml +++ /dev/null @@ -1,47 +0,0 @@ -!sum -- go: DotProdPairs - commutative: false - documentation: !string |- - // NAME multiplies the elements and add the pairs together, - // yielding a vector of half as many elements with twice the input element size. -# TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. -- go: DotProdPairsSaturated - commutative: false - documentation: !string |- - // NAME multiplies the elements and add the pairs together with saturation, - // yielding a vector of half as many elements with twice the input element size. -# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. -# - go: DotProdBroadcast -# commutative: true -# # documentation: !string |- -# // NAME multiplies all elements and broadcasts the sum. -- go: AddDotProdQuadruple - commutative: false - documentation: !string |- - // NAME performs dot products on groups of 4 elements of x and y and then adds z. -- go: AddDotProdQuadrupleSaturated - commutative: false - documentation: !string |- - // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. -- go: AddDotProdPairs - commutative: false - noTypes: "true" - noGenericOps: "true" - documentation: !string |- - // NAME performs dot products on pairs of elements of y and z and then adds x. -- go: AddDotProdPairsSaturated - commutative: false - documentation: !string |- - // NAME performs dot products on pairs of elements of y and z and then adds x. -- go: MulAdd - commutative: false - documentation: !string |- - // NAME performs a fused (x * y) + z. -- go: MulAddSub - commutative: false - documentation: !string |- - // NAME performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. -- go: MulSubAdd - commutative: false - documentation: !string |- - // NAME performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml deleted file mode 100644 index f6b6f135..00000000 --- a/internal/simdgen/ops/MLOps/go.yaml +++ /dev/null @@ -1,113 +0,0 @@ -!sum -- go: DotProdPairs - asm: VPMADDWD - in: - - &int - go: $t - base: int - - *int - out: - - &int2 # The elemBits are different - go: $t2 - base: int -- go: DotProdPairsSaturated - asm: VPMADDUBSW - in: - - &uint - go: $t - base: uint - overwriteElementBits: 8 - - &int3 - go: $t3 - base: int - overwriteElementBits: 8 - out: - - *int2 -# - go: DotProdBroadcast -# asm: VDPP[SD] -# in: -# - &dpb_src -# go: $t -# - *dpb_src -# - class: immediate -# const: 127 -# out: -# - *dpb_src -- go: AddDotProdQuadruple - asm: "VPDPBUSD" - operandOrder: "31" # switch operand 3 and 1 - in: - - &qdpa_acc - go: $t_acc - base: int - elemBits: 32 - - &qdpa_src1 - go: $t_src1 - base: uint - overwriteElementBits: 8 - - &qdpa_src2 - go: $t_src2 - base: int - overwriteElementBits: 8 - out: - - *qdpa_acc -- go: AddDotProdQuadrupleSaturated - asm: "VPDPBUSDS" - operandOrder: "31" # switch operand 3 and 1 - in: - - *qdpa_acc - - *qdpa_src1 - - *qdpa_src2 - out: - - *qdpa_acc -- go: AddDotProdPairs - asm: "VPDPWSSD" - in: - - &pdpa_acc - go: $t_acc - base: int - elemBits: 32 - - &pdpa_src1 - go: $t_src1 - base: int - overwriteElementBits: 16 - - &pdpa_src2 - go: $t_src2 - base: int - overwriteElementBits: 16 - out: - - *pdpa_acc -- go: AddDotProdPairsSaturated - asm: "VPDPWSSDS" - in: - - *pdpa_acc - - *pdpa_src1 - - *pdpa_src2 - out: - - *pdpa_acc -- go: MulAdd - asm: "VFMADD213PS|VFMADD213PD" - in: - - &fma_op - go: $t - base: float - - *fma_op - - *fma_op - out: - - *fma_op -- go: MulAddSub - asm: "VFMADDSUB213PS|VFMADDSUB213PD" - in: - - *fma_op - - *fma_op - - *fma_op - out: - - *fma_op -- go: MulSubAdd - asm: "VFMSUBADD213PS|VFMSUBADD213PD" - in: - - *fma_op - - *fma_op - - *fma_op - out: - - *fma_op \ No newline at end of file diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml deleted file mode 100644 index a7e30f46..00000000 --- a/internal/simdgen/ops/MinMax/categories.yaml +++ /dev/null @@ -1,9 +0,0 @@ -!sum -- go: Max - commutative: true - documentation: !string |- - // NAME computes the maximum of corresponding elements. -- go: Min - commutative: true - documentation: !string |- - // NAME computes the minimum of corresponding elements. diff --git a/internal/simdgen/ops/MinMax/go.yaml b/internal/simdgen/ops/MinMax/go.yaml deleted file mode 100644 index 55f1e18b..00000000 --- a/internal/simdgen/ops/MinMax/go.yaml +++ /dev/null @@ -1,42 +0,0 @@ -!sum -- go: Max - asm: "V?PMAXS[BWDQ]" - in: &2int - - &int - go: $t - base: int - - *int - out: &1int - - *int -- go: Max - asm: "V?PMAXU[BWDQ]" - in: &2uint - - &uint - go: $t - base: uint - - *uint - out: &1uint - - *uint - -- go: Min - asm: "V?PMINS[BWDQ]" - in: *2int - out: *1int -- go: Min - asm: "V?PMINU[BWDQ]" - in: *2uint - out: *1uint - -- go: Max - asm: "V?MAXP[SD]" - in: &2float - - &float - go: $t - base: float - - *float - out: &1float - - *float -- go: Min - asm: "V?MINP[SD]" - in: *2float - out: *1float diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml deleted file mode 100644 index ef8e0360..00000000 --- a/internal/simdgen/ops/Moves/categories.yaml +++ /dev/null @@ -1,72 +0,0 @@ -!sum -- go: SetElem - commutative: false - documentation: !string |- - // NAME sets a single constant-indexed element's value. -- go: GetElem - commutative: false - documentation: !string |- - // NAME retrieves a single constant-indexed element's value. -- go: SetLo - commutative: false - constImm: 0 - documentation: !string |- - // NAME returns x with its lower half set to y. -- go: GetLo - commutative: false - constImm: 0 - documentation: !string |- - // NAME returns the lower half of x. -- go: SetHi - commutative: false - constImm: 1 - documentation: !string |- - // NAME returns x with its upper half set to y. -- go: GetHi - commutative: false - constImm: 1 - documentation: !string |- - // NAME returns the upper half of x. -- go: Permute - commutative: false - documentation: !string |- - // NAME performs a full permutation of vector x using indices: - // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} - // Only the needed bits to represent x's index are used in indices' elements. -- go: Permute2 # Permute2 is only available on or after AVX512 - commutative: false - documentation: !string |- - // NAME performs a full permutation of vector x, y using indices: - // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} - // where xy is x appending y. - // Only the needed bits to represent xy's index are used in indices' elements. -- go: Compress - commutative: false - documentation: !string |- - // NAME performs a compression on vector x using mask by - // selecting elements as indicated by mask, and pack them to lower indexed elements. -- go: blend - commutative: false - documentation: !string |- - // NAME blends two vectors based on mask values, choosing either - // the first or the second based on whether the third is false or true -- go: Expand - commutative: false - documentation: !string |- - // NAME performs an expansion on a vector x whose elements are packed to lower parts. - // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. -- go: Broadcast128 - commutative: false - documentation: !string |- - // NAME copies element zero of its (128-bit) input to all elements of - // the 128-bit output vector. -- go: Broadcast256 - commutative: false - documentation: !string |- - // NAME copies element zero of its (128-bit) input to all elements of - // the 256-bit output vector. -- go: Broadcast512 - commutative: false - documentation: !string |- - // NAME copies element zero of its (128-bit) input to all elements of - // the 512-bit output vector. diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml deleted file mode 100644 index 71981c12..00000000 --- a/internal/simdgen/ops/Moves/go.yaml +++ /dev/null @@ -1,372 +0,0 @@ -!sum -- go: SetElem - asm: "VPINSR[BWDQ]" - in: - - &t - class: vreg - base: $b - - class: greg - base: $b - lanes: 1 # Scalar, darn it! - - &imm - class: immediate - immOffset: 0 - name: index - out: - - *t - -- go: SetElem - asm: "VPINSR[DQ]" - in: - - &t - class: vreg - base: int - OverwriteBase: float - - class: greg - base: int - OverwriteBase: float - lanes: 1 # Scalar, darn it! - - &imm - class: immediate - immOffset: 0 - name: index - out: - - *t - -- go: GetElem - asm: "VPEXTR[BWDQ]" - in: - - class: vreg - base: $b - elemBits: $e - - *imm - out: - - class: greg - base: $b - bits: $e - -- go: "SetHi|SetLo" - asm: "VINSERTI128|VINSERTI64X4" - inVariant: [] - in: - - &i8x2N - class: vreg - base: $t - OverwriteElementBits: 8 - - &i8xN - class: vreg - base: $t - OverwriteElementBits: 8 - - &imm01 # This immediate should be only 0 or 1 - class: immediate - const: 0 # place holder - name: index - out: - - *i8x2N - -- go: "GetHi|GetLo" - asm: "VEXTRACTI128|VEXTRACTI64X4" - inVariant: [] - in: - - *i8x2N - - *imm01 - out: - - *i8xN - -- go: "SetHi|SetLo" - asm: "VINSERTI128|VINSERTI64X4" - inVariant: [] - in: - - &i16x2N - class: vreg - base: $t - OverwriteElementBits: 16 - - &i16xN - class: vreg - base: $t - OverwriteElementBits: 16 - - *imm01 - out: - - *i16x2N - -- go: "GetHi|GetLo" - asm: "VEXTRACTI128|VEXTRACTI64X4" - inVariant: [] - in: - - *i16x2N - - *imm01 - out: - - *i16xN - -- go: "SetHi|SetLo" - asm: "VINSERTI128|VINSERTI64X4" - inVariant: [] - in: - - &i32x2N - class: vreg - base: $t - OverwriteElementBits: 32 - - &i32xN - class: vreg - base: $t - OverwriteElementBits: 32 - - *imm01 - out: - - *i32x2N - -- go: "GetHi|GetLo" - asm: "VEXTRACTI128|VEXTRACTI64X4" - inVariant: [] - in: - - *i32x2N - - *imm01 - out: - - *i32xN - -- go: "SetHi|SetLo" - asm: "VINSERTI128|VINSERTI64X4" - inVariant: [] - in: - - &i64x2N - class: vreg - base: $t - OverwriteElementBits: 64 - - &i64xN - class: vreg - base: $t - OverwriteElementBits: 64 - - *imm01 - out: - - *i64x2N - -- go: "GetHi|GetLo" - asm: "VEXTRACTI128|VEXTRACTI64X4" - inVariant: [] - in: - - *i64x2N - - *imm01 - out: - - *i64xN - -- go: "SetHi|SetLo" - asm: "VINSERTF128|VINSERTF64X4" - inVariant: [] - in: - - &f32x2N - class: vreg - base: $t - OverwriteElementBits: 32 - - &f32xN - class: vreg - base: $t - OverwriteElementBits: 32 - - *imm01 - out: - - *f32x2N - -- go: "GetHi|GetLo" - asm: "VEXTRACTF128|VEXTRACTF64X4" - inVariant: [] - in: - - *f32x2N - - *imm01 - out: - - *f32xN - -- go: "SetHi|SetLo" - asm: "VINSERTF128|VINSERTF64X4" - inVariant: [] - in: - - &f64x2N - class: vreg - base: $t - OverwriteElementBits: 64 - - &f64xN - class: vreg - base: $t - OverwriteElementBits: 64 - - *imm01 - out: - - *f64x2N - -- go: "GetHi|GetLo" - asm: "VEXTRACTF128|VEXTRACTF64X4" - inVariant: [] - in: - - *f64x2N - - *imm01 - out: - - *f64xN - -- go: Permute - asm: "VPERM[BWDQ]|VPERMP[SD]" - operandOrder: "21Type1" - in: - - &anyindices - go: $t - name: indices - overwriteBase: uint - - &any - go: $t - out: - - *any - -- go: Permute2 - asm: "VPERMI2[BWDQ]|VPERMI2P[SD]" - # Because we are overwriting the receiver's type, we - # have to move the receiver to be a parameter so that - # we can have no duplication. - operandOrder: "231Type1" - in: - - *anyindices # result in arg 0 - - *any - - *any - out: - - *any - -- go: Compress - asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]" - in: - # The mask in Compress is a control mask rather than a write mask, so it's not optional. - - class: mask - - *any - out: - - *any - -# For now a non-public method because -# (1) [OverwriteClass] must be set together with [OverwriteBase] -# (2) "simdgen does not support [OverwriteClass] in inputs". -# That means the signature is wrong. -- go: blend - asm: VPBLENDVB - in: - - &v - go: $t - class: vreg - base: int - - *v - - - class: vreg - base: int - name: mask - out: - - *v - -# For AVX512 -- go: blend - asm: VPBLENDM[BWDQ] - in: - - &v - go: $t - bits: 512 - class: vreg - base: int - - *v - inVariant: - - - class: mask - out: - - *v - -- go: Expand - asm: "VPEXPAND[BWDQ]|VEXPANDP[SD]" - in: - # The mask in Expand is a control mask rather than a write mask, so it's not optional. - - class: mask - - *any - out: - - *any - -- go: Broadcast128 - asm: VPBROADCAST[BWDQ] - in: - - class: vreg - bits: 128 - elemBits: $e - base: $b - out: - - class: vreg - bits: 128 - elemBits: $e - base: $b - -# weirdly, this one case on AVX2 is memory-operand-only -- go: Broadcast128 - asm: VPBROADCASTQ - in: - - class: vreg - bits: 128 - elemBits: 64 - base: int - OverwriteBase: float - out: - - class: vreg - bits: 128 - elemBits: 64 - base: int - OverwriteBase: float - -- go: Broadcast256 - asm: VPBROADCAST[BWDQ] - in: - - class: vreg - bits: 128 - elemBits: $e - base: $b - out: - - class: vreg - bits: 256 - elemBits: $e - base: $b - -- go: Broadcast512 - asm: VPBROADCAST[BWDQ] - in: - - class: vreg - bits: 128 - elemBits: $e - base: $b - out: - - class: vreg - bits: 512 - elemBits: $e - base: $b - -- go: Broadcast128 - asm: VBROADCASTS[SD] - in: - - class: vreg - bits: 128 - elemBits: $e - base: $b - out: - - class: vreg - bits: 128 - elemBits: $e - base: $b - -- go: Broadcast256 - asm: VBROADCASTS[SD] - in: - - class: vreg - bits: 128 - elemBits: $e - base: $b - out: - - class: vreg - bits: 256 - elemBits: $e - base: $b - -- go: Broadcast512 - asm: VBROADCASTS[SD] - in: - - class: vreg - bits: 128 - elemBits: $e - base: $b - out: - - class: vreg - bits: 512 - elemBits: $e - base: $b diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml deleted file mode 100644 index 92491b51..00000000 --- a/internal/simdgen/ops/Mul/categories.yaml +++ /dev/null @@ -1,14 +0,0 @@ -!sum -- go: Mul - commutative: true - documentation: !string |- - // NAME multiplies corresponding elements of two vectors. -- go: MulEvenWiden - commutative: true - documentation: !string |- - // NAME multiplies even-indexed elements, widening the result. - // Result[i] = v1.Even[i] * v2.Even[i]. -- go: MulHigh - commutative: true - documentation: !string |- - // NAME multiplies elements and stores the high part of the result. diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml deleted file mode 100644 index c0205a68..00000000 --- a/internal/simdgen/ops/Mul/go.yaml +++ /dev/null @@ -1,73 +0,0 @@ -!sum -# "Normal" multiplication is only available for floats. -# This only covers the single and double precision. -- go: Mul - asm: "VMULP[SD]" - in: - - &fp - go: $t - base: float - - *fp - out: - - *fp - -# Integer multiplications. - -# MulEvenWiden -# Dword only. -- go: MulEvenWiden - asm: "VPMULDQ" - in: - - &intNot64 - go: $t - elemBits: 8|16|32 - base: int - - *intNot64 - out: - - &int2 - go: $t2 - base: int -- go: MulEvenWiden - asm: "VPMULUDQ" - in: - - &uintNot64 - go: $t - elemBits: 8|16|32 - base: uint - - *uintNot64 - out: - - &uint2 - go: $t2 - base: uint - -# MulHigh -# Word only. -- go: MulHigh - asm: "VPMULHW" - in: - - &int - go: $t - base: int - - *int - out: - - *int -- go: MulHigh - asm: "VPMULHUW" - in: - - &uint - go: $t - base: uint - - *uint - out: - - *uint - -# MulLow -# signed and unsigned are the same for lower bits. -- go: Mul - asm: "VPMULL[WDQ]" - in: - - &any - go: $t - - *any - out: - - *any diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml deleted file mode 100644 index 0d0b006c..00000000 --- a/internal/simdgen/ops/ShiftRotate/categories.yaml +++ /dev/null @@ -1,103 +0,0 @@ -!sum -- go: ShiftAllLeft - nameAndSizeCheck: true - specialLower: sftimm - commutative: false - documentation: !string |- - // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -- go: ShiftAllRight - signed: false - nameAndSizeCheck: true - specialLower: sftimm - commutative: false - documentation: !string |- - // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -- go: ShiftAllRight - signed: true - specialLower: sftimm - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -- go: shiftAllLeftConst # no APIs, only ssa ops. - noTypes: "true" - noGenericOps: "true" - SSAVariant: "const" # to avoid its name colliding with reg version of this instruction, amend this to its ssa op name. - nameAndSizeCheck: true - commutative: false -- go: shiftAllRightConst # no APIs, only ssa ops. - noTypes: "true" - noGenericOps: "true" - SSAVariant: "const" - signed: false - nameAndSizeCheck: true - commutative: false -- go: shiftAllRightConst # no APIs, only ssa ops. - noTypes: "true" - noGenericOps: "true" - SSAVariant: "const" - signed: true - nameAndSizeCheck: true - commutative: false - -- go: ShiftLeft - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -- go: ShiftRight - signed: false - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -- go: ShiftRight - signed: true - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -- go: RotateAllLeft - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME rotates each element to the left by the number of bits specified by the immediate. -- go: RotateLeft - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements. -- go: RotateAllRight - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME rotates each element to the right by the number of bits specified by the immediate. -- go: RotateRight - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements. -- go: ShiftAllLeftConcat - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME shifts each element of x to the left by the number of bits specified by the - // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -- go: ShiftAllRightConcat - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME shifts each element of x to the right by the number of bits specified by the - // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -- go: ShiftLeftConcat - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME shifts each element of x to the left by the number of bits specified by the - // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -- go: ShiftRightConcat - nameAndSizeCheck: true - commutative: false - documentation: !string |- - // NAME shifts each element of x to the right by the number of bits specified by the - // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml deleted file mode 100644 index e7ccdeb0..00000000 --- a/internal/simdgen/ops/ShiftRotate/go.yaml +++ /dev/null @@ -1,172 +0,0 @@ -!sum -# Integers -# ShiftAll* -- go: ShiftAllLeft - asm: "VPSLL[WDQ]" - in: - - &any - go: $t - - &vecAsScalar64 - go: "Uint.*" - treatLikeAScalarOfSize: 64 - out: - - *any -- go: ShiftAllRight - signed: false - asm: "VPSRL[WDQ]" - in: - - &uint - go: $t - base: uint - - *vecAsScalar64 - out: - - *uint -- go: ShiftAllRight - signed: true - asm: "VPSRA[WDQ]" - in: - - &int - go: $t - base: int - - *vecAsScalar64 - out: - - *int - -- go: shiftAllLeftConst - asm: "VPSLL[WDQ]" - in: - - *any - - &imm - class: immediate - immOffset: 0 - out: - - *any -- go: shiftAllRightConst - asm: "VPSRL[WDQ]" - in: - - *int - - *imm - out: - - *int -- go: shiftAllRightConst - asm: "VPSRA[WDQ]" - in: - - *uint - - *imm - out: - - *uint - -# Shift* (variable) -- go: ShiftLeft - asm: "VPSLLV[WD]" - in: - - *any - - *any - out: - - *any -# XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite -# it to 64. -- go: ShiftLeft - asm: "VPSLLVQ" - in: - - &anyOverwriteElemBits - go: $t - overwriteElementBits: 64 - - *anyOverwriteElemBits - out: - - *anyOverwriteElemBits -- go: ShiftRight - signed: false - asm: "VPSRLV[WD]" - in: - - *uint - - *uint - out: - - *uint -# XED data of VPSRLVQ needs the same overwrite as VPSLLVQ. -- go: ShiftRight - signed: false - asm: "VPSRLVQ" - in: - - &uintOverwriteElemBits - go: $t - base: uint - overwriteElementBits: 64 - - *uintOverwriteElemBits - out: - - *uintOverwriteElemBits -- go: ShiftRight - signed: true - asm: "VPSRAV[WDQ]" - in: - - *int - - *int - out: - - *int - -# Rotate -- go: RotateAllLeft - asm: "VPROL[DQ]" - in: - - *any - - &pureImm - class: immediate - immOffset: 0 - name: shift - out: - - *any -- go: RotateAllRight - asm: "VPROR[DQ]" - in: - - *any - - *pureImm - out: - - *any -- go: RotateLeft - asm: "VPROLV[DQ]" - in: - - *any - - *any - out: - - *any -- go: RotateRight - asm: "VPRORV[DQ]" - in: - - *any - - *any - out: - - *any - -# Bizzare shifts. -- go: ShiftAllLeftConcat - asm: "VPSHLD[WDQ]" - in: - - *any - - *any - - *pureImm - out: - - *any -- go: ShiftAllRightConcat - asm: "VPSHRD[WDQ]" - in: - - *any - - *any - - *pureImm - out: - - *any -- go: ShiftLeftConcat - asm: "VPSHLDV[WDQ]" - in: - - *any - - *any - - *any - out: - - *any -- go: ShiftRightConcat - asm: "VPSHRDV[WDQ]" - in: - - *any - - *any - - *any - out: - - *any diff --git a/internal/simdgen/pprint.go b/internal/simdgen/pprint.go deleted file mode 100644 index 054b5176..00000000 --- a/internal/simdgen/pprint.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "fmt" - "reflect" - "strconv" -) - -func pprints(v any) string { - var pp pprinter - pp.val(reflect.ValueOf(v), 0) - return string(pp.buf) -} - -type pprinter struct { - buf []byte -} - -func (p *pprinter) indent(by int) { - for range by { - p.buf = append(p.buf, '\t') - } -} - -func (p *pprinter) val(v reflect.Value, indent int) { - switch v.Kind() { - default: - p.buf = fmt.Appendf(p.buf, "unsupported kind %v", v.Kind()) - - case reflect.Bool: - p.buf = strconv.AppendBool(p.buf, v.Bool()) - - case reflect.Int, reflect.Int16, reflect.Int32, reflect.Int64: - p.buf = strconv.AppendInt(p.buf, v.Int(), 10) - - case reflect.String: - p.buf = strconv.AppendQuote(p.buf, v.String()) - - case reflect.Pointer: - if v.IsNil() { - p.buf = append(p.buf, "nil"...) - } else { - p.buf = append(p.buf, "&"...) - p.val(v.Elem(), indent) - } - - case reflect.Slice, reflect.Array: - p.buf = append(p.buf, "[\n"...) - for i := range v.Len() { - p.indent(indent + 1) - p.val(v.Index(i), indent+1) - p.buf = append(p.buf, ",\n"...) - } - p.indent(indent) - p.buf = append(p.buf, ']') - - case reflect.Struct: - vt := v.Type() - p.buf = append(append(p.buf, vt.String()...), "{\n"...) - for f := range v.NumField() { - p.indent(indent + 1) - p.buf = append(append(p.buf, vt.Field(f).Name...), ": "...) - p.val(v.Field(f), indent+1) - p.buf = append(p.buf, ",\n"...) - } - p.indent(indent) - p.buf = append(p.buf, '}') - } -} diff --git a/internal/simdgen/sort_test.go b/internal/simdgen/sort_test.go deleted file mode 100644 index 399acf03..00000000 --- a/internal/simdgen/sort_test.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import "testing" - -func TestSort(t *testing.T) { - testCases := []struct { - s1, s2 string - want int - }{ - {"a1", "a2", -1}, - {"a11a", "a11b", -1}, - {"a01a1", "a1a01", -1}, - {"a2", "a1", 1}, - {"a10", "a2", 1}, - {"a1", "a10", -1}, - {"z11", "z2", 1}, - {"z2", "z11", -1}, - {"abc", "abd", -1}, - {"123", "45", 1}, - {"file1", "file1", 0}, - {"file", "file1", -1}, - {"file1", "file", 1}, - {"a01", "a1", -1}, - {"a1a", "a1b", -1}, - } - - for _, tc := range testCases { - got := compareNatural(tc.s1, tc.s2) - result := "✅" - if got != tc.want { - result = "❌" - t.Errorf("%s CompareNatural(\"%s\", \"%s\") -> got %2d, want %2d\n", result, tc.s1, tc.s2, got, tc.want) - } else { - t.Logf("%s CompareNatural(\"%s\", \"%s\") -> got %2d, want %2d\n", result, tc.s1, tc.s2, got, tc.want) - } - } -} diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml deleted file mode 100644 index f7a01cb3..00000000 --- a/internal/simdgen/types.yaml +++ /dev/null @@ -1,90 +0,0 @@ -# This file defines the possible types of each operand and result. -# -# In general, we're able to narrow this down on some attributes directly from -# the machine instruction descriptions, but the Go mappings need to further -# constrain them and how they relate. For example, on x86 we can't distinguish -# int and uint, though we can distinguish these from float. - -in: !repeat -- !sum &types - - {class: vreg, go: Int8x16, base: "int", elemBits: 8, bits: 128, lanes: 16} - - {class: vreg, go: Uint8x16, base: "uint", elemBits: 8, bits: 128, lanes: 16} - - {class: vreg, go: Int16x8, base: "int", elemBits: 16, bits: 128, lanes: 8} - - {class: vreg, go: Uint16x8, base: "uint", elemBits: 16, bits: 128, lanes: 8} - - {class: vreg, go: Int32x4, base: "int", elemBits: 32, bits: 128, lanes: 4} - - {class: vreg, go: Uint32x4, base: "uint", elemBits: 32, bits: 128, lanes: 4} - - {class: vreg, go: Int64x2, base: "int", elemBits: 64, bits: 128, lanes: 2} - - {class: vreg, go: Uint64x2, base: "uint", elemBits: 64, bits: 128, lanes: 2} - - {class: vreg, go: Float32x4, base: "float", elemBits: 32, bits: 128, lanes: 4} - - {class: vreg, go: Float64x2, base: "float", elemBits: 64, bits: 128, lanes: 2} - - {class: vreg, go: Int8x32, base: "int", elemBits: 8, bits: 256, lanes: 32} - - {class: vreg, go: Uint8x32, base: "uint", elemBits: 8, bits: 256, lanes: 32} - - {class: vreg, go: Int16x16, base: "int", elemBits: 16, bits: 256, lanes: 16} - - {class: vreg, go: Uint16x16, base: "uint", elemBits: 16, bits: 256, lanes: 16} - - {class: vreg, go: Int32x8, base: "int", elemBits: 32, bits: 256, lanes: 8} - - {class: vreg, go: Uint32x8, base: "uint", elemBits: 32, bits: 256, lanes: 8} - - {class: vreg, go: Int64x4, base: "int", elemBits: 64, bits: 256, lanes: 4} - - {class: vreg, go: Uint64x4, base: "uint", elemBits: 64, bits: 256, lanes: 4} - - {class: vreg, go: Float32x8, base: "float", elemBits: 32, bits: 256, lanes: 8} - - {class: vreg, go: Float64x4, base: "float", elemBits: 64, bits: 256, lanes: 4} - - {class: vreg, go: Int8x64, base: "int", elemBits: 8, bits: 512, lanes: 64} - - {class: vreg, go: Uint8x64, base: "uint", elemBits: 8, bits: 512, lanes: 64} - - {class: vreg, go: Int16x32, base: "int", elemBits: 16, bits: 512, lanes: 32} - - {class: vreg, go: Uint16x32, base: "uint", elemBits: 16, bits: 512, lanes: 32} - - {class: vreg, go: Int32x16, base: "int", elemBits: 32, bits: 512, lanes: 16} - - {class: vreg, go: Uint32x16, base: "uint", elemBits: 32, bits: 512, lanes: 16} - - {class: vreg, go: Int64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} - - {class: vreg, go: Uint64x8, base: "uint", elemBits: 64, bits: 512, lanes: 8} - - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512, lanes: 16} - - {class: vreg, go: Float64x8, base: "float", elemBits: 64, bits: 512, lanes: 8} - - - {class: mask, go: Mask8x16, base: "int", elemBits: 8, bits: 128, lanes: 16} - - {class: mask, go: Mask16x8, base: "int", elemBits: 16, bits: 128, lanes: 8} - - {class: mask, go: Mask32x4, base: "int", elemBits: 32, bits: 128, lanes: 4} - - {class: mask, go: Mask64x2, base: "int", elemBits: 64, bits: 128, lanes: 2} - - {class: mask, go: Mask8x32, base: "int", elemBits: 8, bits: 256, lanes: 32} - - {class: mask, go: Mask16x16, base: "int", elemBits: 16, bits: 256, lanes: 16} - - {class: mask, go: Mask32x8, base: "int", elemBits: 32, bits: 256, lanes: 8} - - {class: mask, go: Mask64x4, base: "int", elemBits: 64, bits: 256, lanes: 4} - - {class: mask, go: Mask8x64, base: "int", elemBits: 8, bits: 512, lanes: 64} - - {class: mask, go: Mask16x32, base: "int", elemBits: 16, bits: 512, lanes: 32} - - {class: mask, go: Mask32x16, base: "int", elemBits: 32, bits: 512, lanes: 16} - - {class: mask, go: Mask64x8, base: "int", elemBits: 64, bits: 512, lanes: 8} - - - - {class: greg, go: float64, base: "float", bits: 64, lanes: 1} - - {class: greg, go: float32, base: "float", bits: 32, lanes: 1} - - {class: greg, go: int64, base: "int", bits: 64, lanes: 1} - - {class: greg, go: int32, base: "int", bits: 32, lanes: 1} - - {class: greg, go: int16, base: "int", bits: 16, lanes: 1} - - {class: greg, go: int8, base: "int", bits: 8, lanes: 1} - - {class: greg, go: uint64, base: "uint", bits: 64, lanes: 1} - - {class: greg, go: uint32, base: "uint", bits: 32, lanes: 1} - - {class: greg, go: uint16, base: "uint", bits: 16, lanes: 1} - - {class: greg, go: uint8, base: "uint", bits: 8, lanes: 1} - -# Special shapes just to make INSERT[IF]128 work. -# The elemBits field of these shapes are wrong, it would be overwritten by overwriteElemBits. - - {class: vreg, go: Int8x16, base: "int", elemBits: 128, bits: 128, lanes: 16} - - {class: vreg, go: Uint8x16, base: "uint", elemBits: 128, bits: 128, lanes: 16} - - {class: vreg, go: Int16x8, base: "int", elemBits: 128, bits: 128, lanes: 8} - - {class: vreg, go: Uint16x8, base: "uint", elemBits: 128, bits: 128, lanes: 8} - - {class: vreg, go: Int32x4, base: "int", elemBits: 128, bits: 128, lanes: 4} - - {class: vreg, go: Uint32x4, base: "uint", elemBits: 128, bits: 128, lanes: 4} - - {class: vreg, go: Int64x2, base: "int", elemBits: 128, bits: 128, lanes: 2} - - {class: vreg, go: Uint64x2, base: "uint", elemBits: 128, bits: 128, lanes: 2} - - - {class: vreg, go: Int8x32, base: "int", elemBits: 128, bits: 256, lanes: 32} - - {class: vreg, go: Uint8x32, base: "uint", elemBits: 128, bits: 256, lanes: 32} - - {class: vreg, go: Int16x16, base: "int", elemBits: 128, bits: 256, lanes: 16} - - {class: vreg, go: Uint16x16, base: "uint", elemBits: 128, bits: 256, lanes: 16} - - {class: vreg, go: Int32x8, base: "int", elemBits: 128, bits: 256, lanes: 8} - - {class: vreg, go: Uint32x8, base: "uint", elemBits: 128, bits: 256, lanes: 8} - - {class: vreg, go: Int64x4, base: "int", elemBits: 128, bits: 256, lanes: 4} - - {class: vreg, go: Uint64x4, base: "uint", elemBits: 128, bits: 256, lanes: 4} - - - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now. -inVariant: !repeat -- *types -out: !repeat -- *types diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go deleted file mode 100644 index 3bbf2cbc..00000000 --- a/internal/simdgen/xed.go +++ /dev/null @@ -1,780 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package main - -import ( - "cmp" - "fmt" - "log" - "maps" - "regexp" - "slices" - "strconv" - "strings" - - "golang.org/x/arch/internal/unify" - "golang.org/x/arch/x86/xeddata" - "gopkg.in/yaml.v3" -) - -const ( - NOT_REG_CLASS = 0 // not a register - VREG_CLASS = 1 // classify as a vector register; see - GREG_CLASS = 2 // classify as a general register -) - -// instVariant is a bitmap indicating a variant of an instruction that has -// optional parameters. -type instVariant uint8 - -const ( - instVariantNone instVariant = 0 - - // instVariantMasked indicates that this is the masked variant of an - // optionally-masked instruction. - instVariantMasked instVariant = 1 << iota -) - -var operandRemarks int - -// TODO: Doc. Returns Values with Def domains. -func loadXED(xedPath string) []*unify.Value { - // TODO: Obviously a bunch more to do here. - - db, err := xeddata.NewDatabase(xedPath) - if err != nil { - log.Fatalf("open database: %v", err) - } - - var defs []*unify.Value - err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) { - inst.Pattern = xeddata.ExpandStates(db, inst.Pattern) - - switch { - case inst.RealOpcode == "N": - return // Skip unstable instructions - case !strings.HasPrefix(inst.Extension, "AVX"): - // We're only interested in AVX instructions. - return - } - - if *flagDebugXED { - fmt.Printf("%s:\n%+v\n", inst.Pos, inst) - } - - ops, err := decodeOperands(db, strings.Fields(inst.Operands)) - if err != nil { - operandRemarks++ - if *Verbose { - log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err) - } - return - } - - applyQuirks(inst, ops) - - defsPos := len(defs) - defs = append(defs, instToUVal(inst, ops)...) - - if *flagDebugXED { - for i := defsPos; i < len(defs); i++ { - y, _ := yaml.Marshal(defs[i]) - fmt.Printf("==>\n%s\n", y) - } - } - }) - if err != nil { - log.Fatalf("walk insts: %v", err) - } - - if len(unknownFeatures) > 0 { - if !*Verbose { - nInst := 0 - for _, insts := range unknownFeatures { - nInst += len(insts) - } - log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst) - } else { - keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int { - return cmp.Or(cmp.Compare(a.Extension, b.Extension), - cmp.Compare(a.ISASet, b.ISASet)) - }) - for _, key := range keys { - if key.ISASet == "" || key.ISASet == key.Extension { - log.Printf("unhandled Extension %s", key.Extension) - } else { - log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet) - } - log.Printf(" opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key]))) - } - } - } - - return defs -} - -var ( - maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]|VPEXPAND[BWDQ]|VEXPANDP[SD]`) - maskOptionalRe = regexp.MustCompile(`VPCMP(EQ|GT|U)?[BWDQ]|VCMPP[SD]`) -) - -func applyQuirks(inst *xeddata.Inst, ops []operand) { - opc := inst.Opcode() - switch { - case maskRequiredRe.MatchString(opc): - // The mask on these instructions is marked optional, but the - // instruction is pointless without the mask. - for i, op := range ops { - if op, ok := op.(operandMask); ok { - op.optional = false - ops[i] = op - } - } - - case maskOptionalRe.MatchString(opc): - // Conversely, these masks should be marked optional and aren't. - for i, op := range ops { - if op, ok := op.(operandMask); ok && op.action.r { - op.optional = true - ops[i] = op - } - } - } -} - -type operandCommon struct { - action operandAction -} - -// operandAction defines whether this operand is read and/or written. -// -// TODO: Should this live in [xeddata.Operand]? -type operandAction struct { - r bool // Read - w bool // Written - cr bool // Read is conditional (implies r==true) - cw bool // Write is conditional (implies w==true) -} - -type operandMem struct { - operandCommon - // TODO -} - -type vecShape struct { - elemBits int // Element size in bits - bits int // Register width in bits (total vector bits) -} - -type operandVReg struct { // Vector register - operandCommon - vecShape - elemBaseType scalarBaseType -} - -type operandGReg struct { // Vector register - operandCommon - vecShape - elemBaseType scalarBaseType -} - -// operandMask is a vector mask. -// -// Regardless of the actual mask representation, the [vecShape] of this operand -// corresponds to the "bit for bit" type of mask. That is, elemBits gives the -// element width covered by each mask element, and bits/elemBits gives the total -// number of mask elements. (bits gives the total number of bits as if this were -// a bit-for-bit mask, which may be meaningless on its own.) -type operandMask struct { - operandCommon - vecShape - // Bits in the mask is w/bits. - - allMasks bool // If set, size cannot be inferred because all operands are masks. - - // Mask can be omitted, in which case it defaults to K0/"no mask" - optional bool -} - -type operandImm struct { - operandCommon - bits int // Immediate size in bits -} - -type operand interface { - common() operandCommon - addToDef(b *unify.DefBuilder) -} - -func strVal(s any) *unify.Value { - return unify.NewValue(unify.NewStringExact(fmt.Sprint(s))) -} - -func (o operandCommon) common() operandCommon { - return o -} - -func (o operandMem) addToDef(b *unify.DefBuilder) { - // TODO: w, base - b.Add("class", strVal("memory")) -} - -func (o operandVReg) addToDef(b *unify.DefBuilder) { - baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex()) - if err != nil { - panic("parsing baseRe: " + err.Error()) - } - b.Add("class", strVal("vreg")) - b.Add("bits", strVal(o.bits)) - b.Add("base", unify.NewValue(baseDomain)) - // If elemBits == bits, then the vector can be ANY shape. This happens with, - // for example, logical ops. - if o.elemBits != o.bits { - b.Add("elemBits", strVal(o.elemBits)) - } -} - -func (o operandGReg) addToDef(b *unify.DefBuilder) { - baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex()) - if err != nil { - panic("parsing baseRe: " + err.Error()) - } - b.Add("class", strVal("greg")) - b.Add("bits", strVal(o.bits)) - b.Add("base", unify.NewValue(baseDomain)) - if o.elemBits != o.bits { - b.Add("elemBits", strVal(o.elemBits)) - } -} - -func (o operandMask) addToDef(b *unify.DefBuilder) { - b.Add("class", strVal("mask")) - if o.allMasks { - // If all operands are masks, omit sizes and let unification determine mask sizes. - return - } - b.Add("elemBits", strVal(o.elemBits)) - b.Add("bits", strVal(o.bits)) -} - -func (o operandImm) addToDef(b *unify.DefBuilder) { - b.Add("class", strVal("immediate")) - b.Add("bits", strVal(o.bits)) -} - -var actionEncoding = map[string]operandAction{ - "r": {r: true}, - "cr": {r: true, cr: true}, - "w": {w: true}, - "cw": {w: true, cw: true}, - "rw": {r: true, w: true}, - "crw": {r: true, w: true, cr: true}, - "rcw": {r: true, w: true, cw: true}, -} - -func decodeOperand(db *xeddata.Database, operand string) (operand, error) { - op, err := xeddata.NewOperand(db, operand) - if err != nil { - log.Fatalf("parsing operand %q: %v", operand, err) - } - if *flagDebugXED { - fmt.Printf(" %+v\n", op) - } - - if strings.HasPrefix(op.Name, "EMX_BROADCAST") { - // This refers to a set of macros defined in all-state.txt that set a - // BCAST operand to various fixed values. But the BCAST operand is - // itself suppressed and "internal", so I think we can just ignore this - // operand. - return nil, nil - } - - // TODO: See xed_decoded_inst_operand_action. This might need to be more - // complicated. - action, ok := actionEncoding[op.Action] - if !ok { - return nil, fmt.Errorf("unknown action %q", op.Action) - } - common := operandCommon{action: action} - - lhs := op.NameLHS() - if strings.HasPrefix(lhs, "MEM") { - // TODO: Width, base type - return operandMem{ - operandCommon: common, - }, nil - } else if strings.HasPrefix(lhs, "REG") { - if op.Width == "mskw" { - // The mask operand doesn't specify a width. We have to infer it. - // - // XED uses the marker ZEROSTR to indicate that a mask operand is - // optional and, if omitted, implies K0, aka "no mask". - return operandMask{ - operandCommon: common, - optional: op.Attributes["TXT=ZEROSTR"], - }, nil - } else { - class, regBits := decodeReg(op) - if class == NOT_REG_CLASS { - return nil, fmt.Errorf("failed to decode register %q", operand) - } - baseType, elemBits, ok := decodeType(op) - if !ok { - return nil, fmt.Errorf("failed to decode register width %q", operand) - } - shape := vecShape{elemBits: elemBits, bits: regBits} - if class == VREG_CLASS { - return operandVReg{ - operandCommon: common, - vecShape: shape, - elemBaseType: baseType, - }, nil - } - // general register - m := min(shape.bits, shape.elemBits) - shape.bits, shape.elemBits = m, m - return operandGReg{ - operandCommon: common, - vecShape: shape, - elemBaseType: baseType, - }, nil - - } - } else if strings.HasPrefix(lhs, "IMM") { - _, bits, ok := decodeType(op) - if !ok { - return nil, fmt.Errorf("failed to decode register width %q", operand) - } - return operandImm{ - operandCommon: common, - bits: bits, - }, nil - } - - // TODO: BASE and SEG - return nil, fmt.Errorf("unknown operand LHS %q in %q", lhs, operand) -} - -func decodeOperands(db *xeddata.Database, operands []string) (ops []operand, err error) { - // Decode the XED operand descriptions. - for _, o := range operands { - op, err := decodeOperand(db, o) - if err != nil { - return nil, err - } - if op != nil { - ops = append(ops, op) - } - } - - // XED doesn't encode the size of mask operands. If there are mask operands, - // try to infer their sizes from other operands. - if err := inferMaskSizes(ops); err != nil { - return nil, fmt.Errorf("%w in operands %+v", err, operands) - } - - return ops, nil -} - -func inferMaskSizes(ops []operand) error { - // This is a heuristic and it falls apart in some cases: - // - // - Mask operations like KAND[BWDQ] have *nothing* in the XED to indicate - // mask size. - // - // - VINSERT*, VPSLL*, VPSRA*, and VPSRL* and some others naturally have - // mixed input sizes and the XED doesn't indicate which operands the mask - // applies to. - // - // - VPDP* and VP4DP* have really complex mixed operand patterns. - // - // I think for these we may just have to hand-write a table of which - // operands each mask applies to. - inferMask := func(r, w bool) error { - var masks []int - var rSizes, wSizes, sizes []vecShape - allMasks := true - hasWMask := false - for i, op := range ops { - action := op.common().action - if _, ok := op.(operandMask); ok { - if action.r && action.w { - return fmt.Errorf("unexpected rw mask") - } - if action.r == r || action.w == w { - masks = append(masks, i) - } - if action.w { - hasWMask = true - } - } else { - allMasks = false - if reg, ok := op.(operandVReg); ok { - if action.r { - rSizes = append(rSizes, reg.vecShape) - } - if action.w { - wSizes = append(wSizes, reg.vecShape) - } - } - } - } - if len(masks) == 0 { - return nil - } - - if r { - sizes = rSizes - if len(sizes) == 0 { - sizes = wSizes - } - } - if w { - sizes = wSizes - if len(sizes) == 0 { - sizes = rSizes - } - } - - if len(sizes) == 0 { - // If all operands are masks, leave the mask inferrence to the users. - if allMasks { - for _, i := range masks { - m := ops[i].(operandMask) - m.allMasks = true - ops[i] = m - } - return nil - } - return fmt.Errorf("cannot infer mask size: no register operands") - } - shape, ok := singular(sizes) - if !ok { - if !hasWMask && len(wSizes) == 1 && len(masks) == 1 { - // This pattern looks like predicate mask, so its shape should align with the - // output. TODO: verify this is a safe assumption. - shape = wSizes[0] - } else { - return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes) - } - } - for _, i := range masks { - m := ops[i].(operandMask) - m.vecShape = shape - ops[i] = m - } - return nil - } - if err := inferMask(true, false); err != nil { - return err - } - if err := inferMask(false, true); err != nil { - return err - } - return nil -} - -// addOperandstoDef adds "in", "inVariant", and "out" to an instruction Def. -// -// Optional mask input operands are added to the inVariant field if -// variant&instVariantMasked, and omitted otherwise. -func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVariant) { - var inVals, inVar, outVals []*unify.Value - asmPos := 0 - for _, op := range ops { - var db unify.DefBuilder - op.addToDef(&db) - db.Add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos)))) - - action := op.common().action - asmCount := 1 // # of assembly operands; 0 or 1 - if action.r { - inVal := unify.NewValue(db.Build()) - // If this is an optional mask, put it in the input variant tuple. - if mask, ok := op.(operandMask); ok && mask.optional { - if variant&instVariantMasked != 0 { - inVar = append(inVar, inVal) - } else { - // This operand doesn't appear in the assembly at all. - asmCount = 0 - } - } else { - // Just a regular input operand. - inVals = append(inVals, inVal) - } - } - if action.w { - outVal := unify.NewValue(db.Build()) - outVals = append(outVals, outVal) - } - - asmPos += asmCount - } - - instDB.Add("in", unify.NewValue(unify.NewTuple(inVals...))) - instDB.Add("inVariant", unify.NewValue(unify.NewTuple(inVar...))) - instDB.Add("out", unify.NewValue(unify.NewTuple(outVals...))) -} - -func instToUVal(inst *xeddata.Inst, ops []operand) []*unify.Value { - feature, ok := decodeCPUFeature(inst) - if !ok { - return nil - } - - var vals []*unify.Value - vals = append(vals, instToUVal1(inst, ops, feature, instVariantNone)) - if hasOptionalMask(ops) { - vals = append(vals, instToUVal1(inst, ops, feature, instVariantMasked)) - } - return vals -} - -func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant instVariant) *unify.Value { - var db unify.DefBuilder - db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64"))) - db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode()))) - addOperandsToDef(ops, &db, variant) - db.Add("cpuFeature", unify.NewValue(unify.NewStringExact(feature))) - - if strings.Contains(inst.Pattern, "ZEROING=0") { - // This is an EVEX instruction, but the ".Z" (zero-merging) - // instruction flag is NOT valid. EVEX.z must be zero. - // - // This can mean a few things: - // - // - The output of an instruction is a mask, so merging modes don't - // make any sense. E.g., VCMPPS. - // - // - There are no masks involved anywhere. (Maybe MASK=0 is also set - // in this case?) E.g., VINSERTPS. - // - // - The operation inherently performs merging. E.g., VCOMPRESSPS - // with a mem operand. - // - // There may be other reasons. - db.Add("zeroing", unify.NewValue(unify.NewStringExact("false"))) - } - pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line} - return unify.NewValuePos(db.Build(), pos) -} - -// decodeCPUFeature returns the CPU feature name required by inst. These match -// the names of the "Has*" feature checks in the simd package. -func decodeCPUFeature(inst *xeddata.Inst) (string, bool) { - key := cpuFeatureKey{ - Extension: inst.Extension, - ISASet: isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""), - } - feat, ok := cpuFeatureMap[key] - if !ok { - imap := unknownFeatures[key] - if imap == nil { - imap = make(map[string]struct{}) - unknownFeatures[key] = imap - } - imap[inst.Opcode()] = struct{}{} - return "", false - } - if feat == "ignore" { - return "", false - } - return feat, true -} - -var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$") - -type cpuFeatureKey struct { - Extension, ISASet string -} - -// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name -// that can be used in the SIMD API. -var cpuFeatureMap = map[cpuFeatureKey]string{ - {"AVX", ""}: "AVX", - {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI", - {"AVX2", ""}: "AVX2", - - // AVX-512 foundational features. We combine all of these into one "AVX512" feature. - {"AVX512EVEX", "AVX512F"}: "AVX512", - {"AVX512EVEX", "AVX512CD"}: "AVX512", - {"AVX512EVEX", "AVX512BW"}: "AVX512", - {"AVX512EVEX", "AVX512DQ"}: "AVX512", - // AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by - // the vector length suffix. - - // AVX-512 extension features - {"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG", - {"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI", - {"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2", - {"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI", - {"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI", - {"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ", - - // AVX 10.2 (not yet supported) - {"AVX512EVEX", "AVX10_2_RC"}: "ignore", -} - -var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{} - -// hasOptionalMask returns whether there is an optional mask operand in ops. -func hasOptionalMask(ops []operand) bool { - for _, op := range ops { - if op, ok := op.(operandMask); ok && op.optional { - return true - } - } - return false -} - -func singular[T comparable](xs []T) (T, bool) { - if len(xs) == 0 { - return *new(T), false - } - for _, x := range xs[1:] { - if x != xs[0] { - return *new(T), false - } - } - return xs[0], true -} - -// decodeReg returns class (NOT_REG_CLASS, VREG_CLASS, GREG_CLASS), -// and width in bits. If the operand cannot be decided as a register, -// then the clas is NOT_REG_CLASS. -func decodeReg(op *xeddata.Operand) (class, width int) { - // op.Width tells us the total width, e.g.,: - // - // dq => 128 bits (XMM) - // qq => 256 bits (YMM) - // mskw => K - // z[iuf?](8|16|32|...) => 512 bits (ZMM) - // - // But the encoding is really weird and it's not clear if these *always* - // mean XMM/YMM/ZMM or if other irregular things can use these large widths. - // Hence, we dig into the register sets themselves. - - if !strings.HasPrefix(op.NameLHS(), "REG") { - return NOT_REG_CLASS, 0 - } - // TODO: We shouldn't be relying on the macro naming conventions. We should - // use all-dec-patterns.txt, but xeddata doesn't support that table right now. - rhs := op.NameRHS() - if !strings.HasSuffix(rhs, "()") { - return NOT_REG_CLASS, 0 - } - switch { - case strings.HasPrefix(rhs, "XMM_"): - return VREG_CLASS, 128 - case strings.HasPrefix(rhs, "YMM_"): - return VREG_CLASS, 256 - case strings.HasPrefix(rhs, "ZMM_"): - return VREG_CLASS, 512 - case strings.HasPrefix(rhs, "GPR64_"), strings.HasPrefix(rhs, "VGPR64_"): - return GREG_CLASS, 64 - case strings.HasPrefix(rhs, "GPR32_"), strings.HasPrefix(rhs, "VGPR32_"): - return GREG_CLASS, 32 - } - return NOT_REG_CLASS, 0 -} - -var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`) - -// scalarBaseType describes the base type of a scalar element. This is a Go -// type, but without the bit width suffix (with the exception of -// scalarBaseIntOrUint). -type scalarBaseType int - -const ( - scalarBaseInt scalarBaseType = iota - scalarBaseUint - scalarBaseIntOrUint // Signed or unsigned is unspecified - scalarBaseFloat - scalarBaseComplex - scalarBaseBFloat - scalarBaseHFloat -) - -func (s scalarBaseType) regex() string { - switch s { - case scalarBaseInt: - return "int" - case scalarBaseUint: - return "uint" - case scalarBaseIntOrUint: - return "int|uint" - case scalarBaseFloat: - return "float" - case scalarBaseComplex: - return "complex" - case scalarBaseBFloat: - return "BFloat" - case scalarBaseHFloat: - return "HFloat" - } - panic(fmt.Sprintf("unknown scalar base type %d", s)) -} - -func decodeType(op *xeddata.Operand) (base scalarBaseType, bits int, ok bool) { - // The xtype tells you the element type. i8, i16, i32, i64, f32, etc. - // - // TODO: Things like AVX2 VPAND have an xtype of u256 because they're - // element-width agnostic. Do I map that to all widths, or just omit the - // element width and let unification flesh it out? There's no u512 - // (presumably those are all masked, so elem width matters). These are all - // Category: LOGICAL, so maybe we could use that info? - - // Handle some weird ones. - switch op.Xtype { - // 8-bit float formats as defined by Open Compute Project "OCP 8-bit - // Floating Point Specification (OFP8)". - case "bf8": // E5M2 float - return scalarBaseBFloat, 8, true - case "hf8": // E4M3 float - return scalarBaseHFloat, 8, true - case "bf16": // bfloat16 float - return scalarBaseBFloat, 16, true - case "2f16": - // Complex consisting of 2 float16s. Doesn't exist in Go, but we can say - // what it would be. - return scalarBaseComplex, 32, true - case "2i8", "2I8": - // These just use the lower INT8 in each 16 bit field. - // As far as I can tell, "2I8" is a typo. - return scalarBaseInt, 8, true - case "2u16", "2U16": - // some VPDP* has it - // TODO: does "z" means it has zeroing? - return scalarBaseUint, 16, true - case "2i16", "2I16": - // some VPDP* has it - return scalarBaseInt, 16, true - case "4u8", "4U8": - // some VPDP* has it - return scalarBaseUint, 8, true - case "4i8", "4I8": - // some VPDP* has it - return scalarBaseInt, 8, true - } - - // The rest follow a simple pattern. - m := xtypeRe.FindStringSubmatch(op.Xtype) - if m == nil { - // TODO: Report unrecognized xtype - return 0, 0, false - } - bits, _ = strconv.Atoi(m[2]) - switch m[1] { - case "i", "u": - // XED is rather inconsistent about what's signed, unsigned, or doesn't - // matter, so merge them together and let the Go definitions narrow as - // appropriate. Maybe there's a better way to do this. - return scalarBaseIntOrUint, bits, true - case "f": - return scalarBaseFloat, bits, true - default: - panic("unreachable") - } -} diff --git a/internal/unify/closure.go b/internal/unify/closure.go deleted file mode 100644 index e8e76e21..00000000 --- a/internal/unify/closure.go +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "fmt" - "iter" - "maps" - "slices" -) - -type Closure struct { - val *Value - env envSet -} - -func NewSum(vs ...*Value) Closure { - id := &ident{name: "sum"} - return Closure{NewValue(Var{id}), topEnv.bind(id, vs...)} -} - -// IsBottom returns whether c consists of no values. -func (c Closure) IsBottom() bool { - return c.val.Domain == nil -} - -// Summands returns the top-level Values of c. This assumes the top-level of c -// was constructed as a sum, and is mostly useful for debugging. -func (c Closure) Summands() iter.Seq[*Value] { - return func(yield func(*Value) bool) { - var rec func(v *Value, env envSet) bool - rec = func(v *Value, env envSet) bool { - switch d := v.Domain.(type) { - case Var: - parts := env.partitionBy(d.id) - for _, part := range parts { - // It may be a sum of sums. Walk into this value. - if !rec(part.value, part.env) { - return false - } - } - return true - default: - return yield(v) - } - } - rec(c.val, c.env) - } -} - -// All enumerates all possible concrete values of c by substituting variables -// from the environment. -// -// E.g., enumerating this Value -// -// a: !sum [1, 2] -// b: !sum [3, 4] -// -// results in -// -// - {a: 1, b: 3} -// - {a: 1, b: 4} -// - {a: 2, b: 3} -// - {a: 2, b: 4} -func (c Closure) All() iter.Seq[*Value] { - // In order to enumerate all concrete values under all possible variable - // bindings, we use a "non-deterministic continuation passing style" to - // implement this. We use CPS to traverse the Value tree, threading the - // (possibly narrowing) environment through that CPS following an Euler - // tour. Where the environment permits multiple choices, we invoke the same - // continuation for each choice. Similar to a yield function, the - // continuation can return false to stop the non-deterministic walk. - return func(yield func(*Value) bool) { - c.val.all1(c.env, func(v *Value, e envSet) bool { - return yield(v) - }) - } -} - -func (v *Value) all1(e envSet, cont func(*Value, envSet) bool) bool { - switch d := v.Domain.(type) { - default: - panic(fmt.Sprintf("unknown domain type %T", d)) - - case nil: - return true - - case Top, String: - return cont(v, e) - - case Def: - fields := d.keys() - // We can reuse this parts slice because we're doing a DFS through the - // state space. (Otherwise, we'd have to do some messy threading of an - // immutable slice-like value through allElt.) - parts := make(map[string]*Value, len(fields)) - - // TODO: If there are no Vars or Sums under this Def, then nothing can - // change the Value or env, so we could just cont(v, e). - var allElt func(elt int, e envSet) bool - allElt = func(elt int, e envSet) bool { - if elt == len(fields) { - // Build a new Def from the concrete parts. Clone parts because - // we may reuse it on other non-deterministic branches. - nVal := newValueFrom(Def{maps.Clone(parts)}, v) - return cont(nVal, e) - } - - return d.fields[fields[elt]].all1(e, func(v *Value, e envSet) bool { - parts[fields[elt]] = v - return allElt(elt+1, e) - }) - } - return allElt(0, e) - - case Tuple: - // Essentially the same as Def. - if d.repeat != nil { - // There's nothing we can do with this. - return cont(v, e) - } - parts := make([]*Value, len(d.vs)) - var allElt func(elt int, e envSet) bool - allElt = func(elt int, e envSet) bool { - if elt == len(d.vs) { - // Build a new tuple from the concrete parts. Clone parts because - // we may reuse it on other non-deterministic branches. - nVal := newValueFrom(Tuple{vs: slices.Clone(parts)}, v) - return cont(nVal, e) - } - - return d.vs[elt].all1(e, func(v *Value, e envSet) bool { - parts[elt] = v - return allElt(elt+1, e) - }) - } - return allElt(0, e) - - case Var: - // Go each way this variable can be bound. - for _, ePart := range e.partitionBy(d.id) { - // d.id is no longer bound in this environment partition. We'll may - // need it later in the Euler tour, so bind it back to this single - // value. - env := ePart.env.bind(d.id, ePart.value) - if !ePart.value.all1(env, cont) { - return false - } - } - return true - } -} diff --git a/internal/unify/domain.go b/internal/unify/domain.go deleted file mode 100644 index 1e0f2be6..00000000 --- a/internal/unify/domain.go +++ /dev/null @@ -1,359 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "fmt" - "iter" - "maps" - "reflect" - "regexp" - "slices" - "strconv" - "strings" -) - -// A Domain is a non-empty set of values, all of the same kind. -// -// Domain may be a scalar: -// -// - [String] - Represents string-typed values. -// -// Or a composite: -// -// - [Def] - A mapping from fixed keys to [Domain]s. -// -// - [Tuple] - A fixed-length sequence of [Domain]s or -// all possible lengths repeating a [Domain]. -// -// Or top or bottom: -// -// - [Top] - Represents all possible values of all kinds. -// -// - nil - Represents no values. -// -// Or a variable: -// -// - [Var] - A value captured in the environment. -type Domain interface { - Exact() bool - WhyNotExact() string - - // decode stores this value in a Go value. If this value is not exact, this - // returns a potentially wrapped *inexactError. - decode(reflect.Value) error -} - -type inexactError struct { - valueType string - goType string -} - -func (e *inexactError) Error() string { - return fmt.Sprintf("cannot store inexact %s value in %s", e.valueType, e.goType) -} - -type decodeError struct { - path string - err error -} - -func newDecodeError(path string, err error) *decodeError { - if err, ok := err.(*decodeError); ok { - return &decodeError{path: path + "." + err.path, err: err.err} - } - return &decodeError{path: path, err: err} -} - -func (e *decodeError) Unwrap() error { - return e.err -} - -func (e *decodeError) Error() string { - return fmt.Sprintf("%s: %s", e.path, e.err) -} - -// Top represents all possible values of all possible types. -type Top struct{} - -func (t Top) Exact() bool { return false } -func (t Top) WhyNotExact() string { return "is top" } - -func (t Top) decode(rv reflect.Value) error { - // We can decode Top into a pointer-typed value as nil. - if rv.Kind() != reflect.Pointer { - return &inexactError{"top", rv.Type().String()} - } - rv.SetZero() - return nil -} - -// A Def is a mapping from field names to [Value]s. Any fields not explicitly -// listed have [Value] [Top]. -type Def struct { - fields map[string]*Value -} - -// A DefBuilder builds a [Def] one field at a time. The zero value is an empty -// [Def]. -type DefBuilder struct { - fields map[string]*Value -} - -func (b *DefBuilder) Add(name string, v *Value) { - if b.fields == nil { - b.fields = make(map[string]*Value) - } - if _, ok := b.fields[name]; ok { - panic(fmt.Sprintf("duplicate field %q", name)) - } - b.fields[name] = v -} - -// Build constructs a [Def] from the fields added to this builder. -func (b *DefBuilder) Build() Def { - return Def{maps.Clone(b.fields)} -} - -// Exact returns true if all field Values are exact. -func (d Def) Exact() bool { - for _, v := range d.fields { - if !v.Exact() { - return false - } - } - return true -} - -// WhyNotExact returns why the value is not exact -func (d Def) WhyNotExact() string { - for s, v := range d.fields { - if !v.Exact() { - w := v.WhyNotExact() - return "field " + s + ": " + w - } - } - return "" -} - -func (d Def) decode(rv reflect.Value) error { - if rv.Kind() != reflect.Struct { - return fmt.Errorf("cannot decode Def into %s", rv.Type()) - } - - var lowered map[string]string // Lower case -> canonical for d.fields. - rt := rv.Type() - for fi := range rv.NumField() { - fType := rt.Field(fi) - if fType.PkgPath != "" { - continue - } - v := d.fields[fType.Name] - if v == nil { - v = topValue - - // Try a case-insensitive match - canon, ok := d.fields[strings.ToLower(fType.Name)] - if ok { - v = canon - } else { - if lowered == nil { - lowered = make(map[string]string, len(d.fields)) - for k := range d.fields { - l := strings.ToLower(k) - if k != l { - lowered[l] = k - } - } - } - canon, ok := lowered[strings.ToLower(fType.Name)] - if ok { - v = d.fields[canon] - } - } - } - if err := decodeReflect(v, rv.Field(fi)); err != nil { - return newDecodeError(fType.Name, err) - } - } - return nil -} - -func (d Def) keys() []string { - return slices.Sorted(maps.Keys(d.fields)) -} - -func (d Def) All() iter.Seq2[string, *Value] { - // TODO: We call All fairly often. It's probably bad to sort this every - // time. - keys := slices.Sorted(maps.Keys(d.fields)) - return func(yield func(string, *Value) bool) { - for _, k := range keys { - if !yield(k, d.fields[k]) { - return - } - } - } -} - -// A Tuple is a sequence of Values in one of two forms: 1. a fixed-length tuple, -// where each Value can be different or 2. a "repeated tuple", which is a Value -// repeated 0 or more times. -type Tuple struct { - vs []*Value - - // repeat, if non-nil, means this Tuple consists of an element repeated 0 or - // more times. If repeat is non-nil, vs must be nil. This is a generator - // function because we don't necessarily want *exactly* the same Value - // repeated. For example, in YAML encoding, a !sum in a repeated tuple needs - // a fresh variable in each instance. - repeat []func(envSet) (*Value, envSet) -} - -func NewTuple(vs ...*Value) Tuple { - return Tuple{vs: vs} -} - -func NewRepeat(gens ...func(envSet) (*Value, envSet)) Tuple { - return Tuple{repeat: gens} -} - -func (d Tuple) Exact() bool { - if d.repeat != nil { - return false - } - for _, v := range d.vs { - if !v.Exact() { - return false - } - } - return true -} - -func (d Tuple) WhyNotExact() string { - if d.repeat != nil { - return "d.repeat is not nil" - } - for i, v := range d.vs { - if !v.Exact() { - w := v.WhyNotExact() - return "index " + strconv.FormatInt(int64(i), 10) + ": " + w - } - } - return "" -} - -func (d Tuple) decode(rv reflect.Value) error { - if d.repeat != nil { - return &inexactError{"repeated tuple", rv.Type().String()} - } - // TODO: We could also do arrays. - if rv.Kind() != reflect.Slice { - return fmt.Errorf("cannot decode Tuple into %s", rv.Type()) - } - if rv.IsNil() || rv.Cap() < len(d.vs) { - rv.Set(reflect.MakeSlice(rv.Type(), len(d.vs), len(d.vs))) - } else { - rv.SetLen(len(d.vs)) - } - for i, v := range d.vs { - if err := decodeReflect(v, rv.Index(i)); err != nil { - return newDecodeError(fmt.Sprintf("%d", i), err) - } - } - return nil -} - -// A String represents a set of strings. It can represent the intersection of a -// set of regexps, or a single exact string. In general, the domain of a String -// is non-empty, but we do not attempt to prove emptiness of a regexp value. -type String struct { - kind stringKind - re []*regexp.Regexp // Intersection of regexps - exact string -} - -type stringKind int - -const ( - stringRegex stringKind = iota - stringExact -) - -func NewStringRegex(exprs ...string) (String, error) { - if len(exprs) == 0 { - exprs = []string{""} - } - v := String{kind: -1} - for _, expr := range exprs { - if expr == "" { - // Skip constructing the regexp. It won't have a "literal prefix" - // and so we wind up thinking this is a regexp instead of an exact - // (empty) string. - v = String{kind: stringExact, exact: ""} - continue - } - - re, err := regexp.Compile(`\A(?:` + expr + `)\z`) - if err != nil { - return String{}, fmt.Errorf("parsing value: %s", err) - } - - // An exact value narrows the whole domain to exact, so we're done, but - // should keep parsing. - if v.kind == stringExact { - continue - } - - if exact, complete := re.LiteralPrefix(); complete { - v = String{kind: stringExact, exact: exact} - } else { - v.kind = stringRegex - v.re = append(v.re, re) - } - } - return v, nil -} - -func NewStringExact(s string) String { - return String{kind: stringExact, exact: s} -} - -// Exact returns whether this Value is known to consist of a single string. -func (d String) Exact() bool { - return d.kind == stringExact -} - -func (d String) WhyNotExact() string { - if d.kind == stringExact { - return "" - } - return "string is not exact" -} - -func (d String) decode(rv reflect.Value) error { - if d.kind != stringExact { - return &inexactError{"regex", rv.Type().String()} - } - switch rv.Kind() { - default: - return fmt.Errorf("cannot decode String into %s", rv.Type()) - case reflect.String: - rv.SetString(d.exact) - case reflect.Int: - i, err := strconv.Atoi(d.exact) - if err != nil { - return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err) - } - rv.SetInt(int64(i)) - case reflect.Bool: - b, err := strconv.ParseBool(d.exact) - if err != nil { - return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err) - } - rv.SetBool(b) - } - return nil -} diff --git a/internal/unify/dot.go b/internal/unify/dot.go deleted file mode 100644 index 6fafa252..00000000 --- a/internal/unify/dot.go +++ /dev/null @@ -1,221 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "bytes" - "fmt" - "html" - "io" - "os" - "os/exec" - "strings" -) - -const maxNodes = 30 - -type dotEncoder struct { - w *bytes.Buffer - - idGen int // Node name generation - valLimit int // Limit the number of Values in a subgraph - - idp identPrinter -} - -func newDotEncoder() *dotEncoder { - return &dotEncoder{ - w: new(bytes.Buffer), - } -} - -func (enc *dotEncoder) clear() { - enc.w.Reset() - enc.idGen = 0 -} - -func (enc *dotEncoder) writeTo(w io.Writer) { - fmt.Fprintln(w, "digraph {") - // Use the "new" ranking algorithm, which lets us put nodes from different - // clusters in the same rank. - fmt.Fprintln(w, "newrank=true;") - fmt.Fprintln(w, "node [shape=box, ordering=out];") - - w.Write(enc.w.Bytes()) - fmt.Fprintln(w, "}") -} - -func (enc *dotEncoder) writeSvg(w io.Writer) error { - cmd := exec.Command("dot", "-Tsvg") - in, err := cmd.StdinPipe() - if err != nil { - return err - } - var out bytes.Buffer - cmd.Stdout = &out - cmd.Stderr = os.Stderr - if err := cmd.Start(); err != nil { - return err - } - enc.writeTo(in) - in.Close() - if err := cmd.Wait(); err != nil { - return err - } - // Trim SVG header so the result can be embedded - // - // TODO: In Graphviz 10.0.1, we could use -Tsvg_inline. - svg := out.Bytes() - if i := bytes.Index(svg, []byte("= 0 { - svg = svg[i:] - } - _, err = w.Write(svg) - return err -} - -func (enc *dotEncoder) newID(f string) string { - id := fmt.Sprintf(f, enc.idGen) - enc.idGen++ - return id -} - -func (enc *dotEncoder) node(label, sublabel string) string { - id := enc.newID("n%d") - l := html.EscapeString(label) - if sublabel != "" { - l += fmt.Sprintf("
%s", html.EscapeString(sublabel)) - } - fmt.Fprintf(enc.w, "%s [label=<%s>];\n", id, l) - return id -} - -func (enc *dotEncoder) edge(from, to string, label string, args ...any) { - l := fmt.Sprintf(label, args...) - fmt.Fprintf(enc.w, "%s -> %s [label=%q];\n", from, to, l) -} - -func (enc *dotEncoder) valueSubgraph(v *Value) { - enc.valLimit = maxNodes - cID := enc.newID("cluster_%d") - fmt.Fprintf(enc.w, "subgraph %s {\n", cID) - fmt.Fprintf(enc.w, "style=invis;") - vID := enc.value(v) - fmt.Fprintf(enc.w, "}\n") - // We don't need the IDs right now. - _, _ = cID, vID -} - -func (enc *dotEncoder) value(v *Value) string { - if enc.valLimit <= 0 { - id := enc.newID("n%d") - fmt.Fprintf(enc.w, "%s [label=\"...\", shape=triangle];\n", id) - return id - } - enc.valLimit-- - - switch vd := v.Domain.(type) { - default: - panic(fmt.Sprintf("unknown domain type %T", vd)) - - case nil: - return enc.node("_|_", "") - - case Top: - return enc.node("_", "") - - // TODO: Like in YAML, figure out if this is just a sum. In dot, we - // could say any unentangled variable is a sum, and if it has more than - // one reference just share the node. - - // case Sum: - // node := enc.node("Sum", "") - // for i, elt := range vd.vs { - // enc.edge(node, enc.value(elt), "%d", i) - // if enc.valLimit <= 0 { - // break - // } - // } - // return node - - case Def: - node := enc.node("Def", "") - for k, v := range vd.All() { - enc.edge(node, enc.value(v), "%s", k) - if enc.valLimit <= 0 { - break - } - } - return node - - case Tuple: - if vd.repeat == nil { - label := "Tuple" - node := enc.node(label, "") - for i, elt := range vd.vs { - enc.edge(node, enc.value(elt), "%d", i) - if enc.valLimit <= 0 { - break - } - } - return node - } else { - // TODO - return enc.node("TODO: Repeat", "") - } - - case String: - switch vd.kind { - case stringExact: - return enc.node(fmt.Sprintf("%q", vd.exact), "") - case stringRegex: - var parts []string - for _, re := range vd.re { - parts = append(parts, fmt.Sprintf("%q", re)) - } - return enc.node(strings.Join(parts, "&"), "") - } - panic("bad String kind") - - case Var: - return enc.node(fmt.Sprintf("Var %s", enc.idp.unique(vd.id)), "") - } -} - -func (enc *dotEncoder) envSubgraph(e envSet) { - enc.valLimit = maxNodes - cID := enc.newID("cluster_%d") - fmt.Fprintf(enc.w, "subgraph %s {\n", cID) - fmt.Fprintf(enc.w, "style=invis;") - vID := enc.env(e.root) - fmt.Fprintf(enc.w, "}\n") - _, _ = cID, vID -} - -func (enc *dotEncoder) env(e *envExpr) string { - switch e.kind { - default: - panic("bad kind") - case envZero: - return enc.node("0", "") - case envUnit: - return enc.node("1", "") - case envBinding: - node := enc.node(fmt.Sprintf("%q :", enc.idp.unique(e.id)), "") - enc.edge(node, enc.value(e.val), "") - return node - case envProduct: - node := enc.node("⨯", "") - for _, op := range e.operands { - enc.edge(node, enc.env(op), "") - } - return node - case envSum: - node := enc.node("+", "") - for _, op := range e.operands { - enc.edge(node, enc.env(op), "") - } - return node - } -} diff --git a/internal/unify/env.go b/internal/unify/env.go deleted file mode 100644 index 3331ff79..00000000 --- a/internal/unify/env.go +++ /dev/null @@ -1,480 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "fmt" - "iter" - "reflect" - "strings" -) - -// An envSet is an immutable set of environments, where each environment is a -// mapping from [ident]s to [Value]s. -// -// To keep this compact, we use an algebraic representation similar to -// relational algebra. The atoms are zero, unit, or a singular binding: -// -// - A singular binding is an environment set consisting of a single environment -// that binds a single ident to a single value. -// -// - Zero is the empty set. -// -// - Unit is an environment set consisting of a single, empty environment (no -// bindings). -// -// From these, we build up more complex sets of environments using sums and -// cross products: -// -// - A sum is simply the union of the two environment sets. -// -// - A cross product is the Cartesian product of the two environment sets, -// followed by combining each pair of environments. Combining simply merges the -// two mappings, but fails if the mappings overlap. -// -// For example, to represent {{x: 1, y: 1}, {x: 2, y: 2}}, we build the two -// environments and sum them: -// -// ({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2}) -// -// If we add a third variable z that can be 1 or 2, independent of x and y, we -// get four logical environments: -// -// {x: 1, y: 1, z: 1} -// {x: 2, y: 2, z: 1} -// {x: 1, y: 1, z: 2} -// {x: 2, y: 2, z: 2} -// -// This could be represented as a sum of all four environments, but because z is -// independent, we can use a more compact representation: -// -// (({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2})) ⨯ ({z: 1} + {z: 2}) -// -// Environment sets obey commutative algebra rules: -// -// e + 0 = e -// e ⨯ 0 = 0 -// e ⨯ 1 = e -// e + f = f + e -// e ⨯ f = f ⨯ e -type envSet struct { - root *envExpr -} - -type envExpr struct { - // TODO: A tree-based data structure for this may not be ideal, since it - // involves a lot of walking to find things and we often have to do deep - // rewrites anyway for partitioning. Would some flattened array-style - // representation be better, possibly combined with an index of ident uses? - // We could even combine that with an immutable array abstraction (ala - // Clojure) that could enable more efficient construction operations. - - kind envExprKind - - // For envBinding - id *ident - val *Value - - // For sum or product. Len must be >= 2 and none of the elements can have - // the same kind as this node. - operands []*envExpr -} - -type envExprKind byte - -const ( - envZero envExprKind = iota - envUnit - envProduct - envSum - envBinding -) - -var ( - // topEnv is the unit value (multiplicative identity) of a [envSet]. - topEnv = envSet{envExprUnit} - // bottomEnv is the zero value (additive identity) of a [envSet]. - bottomEnv = envSet{envExprZero} - - envExprZero = &envExpr{kind: envZero} - envExprUnit = &envExpr{kind: envUnit} -) - -// bind binds id to each of vals in e. -// -// Its panics if id is already bound in e. -// -// Environments are typically initially constructed by starting with [topEnv] -// and calling bind one or more times. -func (e envSet) bind(id *ident, vals ...*Value) envSet { - if e.isEmpty() { - return bottomEnv - } - - // TODO: If any of vals are _, should we just drop that val? We're kind of - // inconsistent about whether an id missing from e means id is invalid or - // means id is _. - - // Check that id isn't present in e. - for range e.root.bindings(id) { - panic("id " + id.name + " already present in environment") - } - - // Create a sum of all the values. - bindings := make([]*envExpr, 0, 1) - for _, val := range vals { - bindings = append(bindings, &envExpr{kind: envBinding, id: id, val: val}) - } - - // Multiply it in. - return envSet{newEnvExprProduct(e.root, newEnvExprSum(bindings...))} -} - -func (e envSet) isEmpty() bool { - return e.root.kind == envZero -} - -// bindings yields all [envBinding] nodes in e with the given id. If id is nil, -// it yields all binding nodes. -func (e *envExpr) bindings(id *ident) iter.Seq[*envExpr] { - // This is just a pre-order walk and it happens this is the only thing we - // need a pre-order walk for. - return func(yield func(*envExpr) bool) { - var rec func(e *envExpr) bool - rec = func(e *envExpr) bool { - if e.kind == envBinding && (id == nil || e.id == id) { - if !yield(e) { - return false - } - } - for _, o := range e.operands { - if !rec(o) { - return false - } - } - return true - } - rec(e) - } -} - -// newEnvExprProduct constructs a product node from exprs, performing -// simplifications. It does NOT check that bindings are disjoint. -func newEnvExprProduct(exprs ...*envExpr) *envExpr { - factors := make([]*envExpr, 0, 2) - for _, expr := range exprs { - switch expr.kind { - case envZero: - return envExprZero - case envUnit: - // No effect on product - case envProduct: - factors = append(factors, expr.operands...) - default: - factors = append(factors, expr) - } - } - - if len(factors) == 0 { - return envExprUnit - } else if len(factors) == 1 { - return factors[0] - } - return &envExpr{kind: envProduct, operands: factors} -} - -// newEnvExprSum constructs a sum node from exprs, performing simplifications. -func newEnvExprSum(exprs ...*envExpr) *envExpr { - // TODO: If all of envs are products (or bindings), factor any common terms. - // E.g., x * y + x * z ==> x * (y + z). This is easy to do for binding - // terms, but harder to do for more general terms. - - var have smallSet[*envExpr] - terms := make([]*envExpr, 0, 2) - for _, expr := range exprs { - switch expr.kind { - case envZero: - // No effect on sum - case envSum: - for _, expr1 := range expr.operands { - if have.Add(expr1) { - terms = append(terms, expr1) - } - } - default: - if have.Add(expr) { - terms = append(terms, expr) - } - } - } - - if len(terms) == 0 { - return envExprZero - } else if len(terms) == 1 { - return terms[0] - } - return &envExpr{kind: envSum, operands: terms} -} - -func crossEnvs(env1, env2 envSet) envSet { - // Confirm that envs have disjoint idents. - var ids1 smallSet[*ident] - for e := range env1.root.bindings(nil) { - ids1.Add(e.id) - } - for e := range env2.root.bindings(nil) { - if ids1.Has(e.id) { - panic(fmt.Sprintf("%s bound on both sides of cross-product", e.id.name)) - } - } - - return envSet{newEnvExprProduct(env1.root, env2.root)} -} - -func unionEnvs(envs ...envSet) envSet { - exprs := make([]*envExpr, len(envs)) - for i := range envs { - exprs[i] = envs[i].root - } - return envSet{newEnvExprSum(exprs...)} -} - -// envPartition is a subset of an env where id is bound to value in all -// deterministic environments. -type envPartition struct { - id *ident - value *Value - env envSet -} - -// partitionBy splits e by distinct bindings of id and removes id from each -// partition. -// -// If there are environments in e where id is not bound, they will not be -// reflected in any partition. -// -// It panics if e is bottom, since attempting to partition an empty environment -// set almost certainly indicates a bug. -func (e envSet) partitionBy(id *ident) []envPartition { - if e.isEmpty() { - // We could return zero partitions, but getting here at all almost - // certainly indicates a bug. - panic("cannot partition empty environment set") - } - - // Emit a partition for each value of id. - var seen smallSet[*Value] - var parts []envPartition - for n := range e.root.bindings(id) { - if !seen.Add(n.val) { - // Already emitted a partition for this value. - continue - } - - parts = append(parts, envPartition{ - id: id, - value: n.val, - env: envSet{e.root.substitute(id, n.val)}, - }) - } - - return parts -} - -// substitute replaces bindings of id to val with 1 and bindings of id to any -// other value with 0 and simplifies the result. -func (e *envExpr) substitute(id *ident, val *Value) *envExpr { - switch e.kind { - default: - panic("bad kind") - - case envZero, envUnit: - return e - - case envBinding: - if e.id != id { - return e - } else if e.val != val { - return envExprZero - } else { - return envExprUnit - } - - case envProduct, envSum: - // Substitute each operand. Sometimes, this won't change anything, so we - // build the new operands list lazily. - var nOperands []*envExpr - for i, op := range e.operands { - nOp := op.substitute(id, val) - if nOperands == nil && op != nOp { - // Operand diverged; initialize nOperands. - nOperands = make([]*envExpr, 0, len(e.operands)) - nOperands = append(nOperands, e.operands[:i]...) - } - if nOperands != nil { - nOperands = append(nOperands, nOp) - } - } - if nOperands == nil { - // Nothing changed. - return e - } - if e.kind == envProduct { - return newEnvExprProduct(nOperands...) - } else { - return newEnvExprSum(nOperands...) - } - } -} - -// A smallSet is a set optimized for stack allocation when small. -type smallSet[T comparable] struct { - array [32]T - n int - - m map[T]struct{} -} - -// Has returns whether val is in set. -func (s *smallSet[T]) Has(val T) bool { - arr := s.array[:s.n] - for i := range arr { - if arr[i] == val { - return true - } - } - _, ok := s.m[val] - return ok -} - -// Add adds val to the set and returns true if it was added (not already -// present). -func (s *smallSet[T]) Add(val T) bool { - // Test for presence. - if s.Has(val) { - return false - } - - // Add it - if s.n < len(s.array) { - s.array[s.n] = val - s.n++ - } else { - if s.m == nil { - s.m = make(map[T]struct{}) - } - s.m[val] = struct{}{} - } - return true -} - -type ident struct { - _ [0]func() // Not comparable (only compare *ident) - name string -} - -type Var struct { - id *ident -} - -func (d Var) Exact() bool { - // These can't appear in concrete Values. - panic("Exact called on non-concrete Value") -} - -func (d Var) WhyNotExact() string { - // These can't appear in concrete Values. - return "WhyNotExact called on non-concrete Value" -} - -func (d Var) decode(rv reflect.Value) error { - return &inexactError{"var", rv.Type().String()} -} - -func (d Var) unify(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) { - // TODO: Vars from !sums in the input can have a huge number of values. - // Unifying these could be way more efficient with some indexes over any - // exact values we can pull out, like Def fields that are exact Strings. - // Maybe we try to produce an array of yes/no/maybe matches and then we only - // have to do deeper evaluation of the maybes. We could probably cache this - // on an envTerm. It may also help to special-case Var/Var unification to - // pick which one to index versus enumerate. - - if vd, ok := w.Domain.(Var); ok && d.id == vd.id { - // Unifying $x with $x results in $x. If we descend into this we'll have - // problems because we strip $x out of the environment to keep ourselves - // honest and then can't find it on the other side. - // - // TODO: I'm not positive this is the right fix. - return vd, e, nil - } - - // We need to unify w with the value of d in each possible environment. We - // can save some work by grouping environments by the value of d, since - // there will be a lot of redundancy here. - var nEnvs []envSet - envParts := e.partitionBy(d.id) - for i, envPart := range envParts { - exit := uf.enterVar(d.id, i) - // Each branch logically gets its own copy of the initial environment - // (narrowed down to just this binding of the variable), and each branch - // may result in different changes to that starting environment. - res, e2, err := w.unify(envPart.value, envPart.env, swap, uf) - exit.exit() - if err != nil { - return nil, envSet{}, err - } - if res.Domain == nil { - // This branch entirely failed to unify, so it's gone. - continue - } - nEnv := e2.bind(d.id, res) - nEnvs = append(nEnvs, nEnv) - } - - if len(nEnvs) == 0 { - // All branches failed - return nil, bottomEnv, nil - } - - // The effect of this is entirely captured in the environment. We can return - // back the same Bind node. - return d, unionEnvs(nEnvs...), nil -} - -// An identPrinter maps [ident]s to unique string names. -type identPrinter struct { - ids map[*ident]string - idGen map[string]int -} - -func (p *identPrinter) unique(id *ident) string { - if p.ids == nil { - p.ids = make(map[*ident]string) - p.idGen = make(map[string]int) - } - - name, ok := p.ids[id] - if !ok { - gen := p.idGen[id.name] - p.idGen[id.name]++ - if gen == 0 { - name = id.name - } else { - name = fmt.Sprintf("%s#%d", id.name, gen) - } - p.ids[id] = name - } - - return name -} - -func (p *identPrinter) slice(ids []*ident) string { - var strs []string - for _, id := range ids { - strs = append(strs, p.unique(id)) - } - return fmt.Sprintf("[%s]", strings.Join(strs, ", ")) -} diff --git a/internal/unify/html.go b/internal/unify/html.go deleted file mode 100644 index 036b80e2..00000000 --- a/internal/unify/html.go +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "fmt" - "html" - "io" - "strings" -) - -func (t *tracer) writeHTML(w io.Writer) { - if !t.saveTree { - panic("writeHTML called without tracer.saveTree") - } - - fmt.Fprintf(w, "", htmlCSS) - for _, root := range t.trees { - dot := newDotEncoder() - html := htmlTracer{w: w, dot: dot} - html.writeTree(root) - } - fmt.Fprintf(w, "\n") -} - -const htmlCSS = ` -.unify { - display: grid; - grid-auto-columns: min-content; - text-align: center; -} - -.header { - grid-row: 1; - font-weight: bold; - padding: 0.25em; - position: sticky; - top: 0; - background: white; -} - -.envFactor { - display: grid; - grid-auto-rows: min-content; - grid-template-columns: subgrid; - text-align: center; -} -` - -type htmlTracer struct { - w io.Writer - dot *dotEncoder - svgs map[any]string -} - -func (t *htmlTracer) writeTree(node *traceTree) { - // TODO: This could be really nice. - // - // - Put nodes that were unified on the same rank with {rank=same; a; b} - // - // - On hover, highlight nodes that node was unified with and the result. If - // it's a variable, highlight it in the environment, too. - // - // - On click, show the details of unifying that node. - // - // This could be the only way to navigate, without necessarily needing the - // whole nest of nodes. - - // TODO: It might be possible to write this out on the fly. - - t.emit([]*Value{node.v, node.w}, []string{"v", "w"}, node.envIn) - - // Render children. - for i, child := range node.children { - if i >= 10 { - fmt.Fprintf(t.w, `
...
`) - break - } - fmt.Fprintf(t.w, `
%s`, html.EscapeString(child.label)) - t.writeTree(child) - fmt.Fprintf(t.w, "
\n") - } - - // Render result. - if node.err != nil { - fmt.Fprintf(t.w, "Error: %s\n", html.EscapeString(node.err.Error())) - } else { - t.emit([]*Value{node.res}, []string{"res"}, node.env) - } -} - -func htmlSVG[Key comparable](t *htmlTracer, f func(Key), arg Key) string { - if s, ok := t.svgs[arg]; ok { - return s - } - var buf strings.Builder - f(arg) - t.dot.writeSvg(&buf) - t.dot.clear() - svg := buf.String() - if t.svgs == nil { - t.svgs = make(map[any]string) - } - t.svgs[arg] = svg - buf.Reset() - return svg -} - -func (t *htmlTracer) emit(vs []*Value, labels []string, env envSet) { - fmt.Fprintf(t.w, `
`) - for i, v := range vs { - fmt.Fprintf(t.w, `
%s
`, i+1, html.EscapeString(labels[i])) - fmt.Fprintf(t.w, `
%s
`, i+1, htmlSVG(t, t.dot.valueSubgraph, v)) - } - col := len(vs) - - fmt.Fprintf(t.w, `
in
`, col+1) - fmt.Fprintf(t.w, `
%s
`, col+1, htmlSVG(t, t.dot.envSubgraph, env)) - - fmt.Fprintf(t.w, `
`) -} diff --git a/internal/unify/pos.go b/internal/unify/pos.go deleted file mode 100644 index 4f7046a4..00000000 --- a/internal/unify/pos.go +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "fmt" -) - -type Pos struct { - Path string - Line int -} - -func (p Pos) String() string { - var b []byte - b, _ = p.AppendText(b) - return string(b) -} - -func (p Pos) AppendText(b []byte) ([]byte, error) { - if p.Line == 0 { - if p.Path == "" { - return append(b, "?:?"...), nil - } else { - return append(b, p.Path...), nil - } - } else if p.Path == "" { - return fmt.Appendf(b, "?:%d", p.Line), nil - } - return fmt.Appendf(b, "%s:%d", p.Path, p.Line), nil -} diff --git a/internal/unify/testdata/stress.yaml b/internal/unify/testdata/stress.yaml deleted file mode 100644 index e4478536..00000000 --- a/internal/unify/testdata/stress.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# In the original representation of environments, this caused an exponential -# blowup in time and allocation. With that representation, this took about 20 -# seconds on my laptop and had a max RSS of ~12 GB. Big enough to be really -# noticeable, but not so big it's likely to crash a developer machine. With the -# better environment representation, it runs almost instantly and has an RSS of -# ~90 MB. -unify: -- !sum - - !sum [1, 2] - - !sum [3, 4] - - !sum [5, 6] - - !sum [7, 8] - - !sum [9, 10] - - !sum [11, 12] - - !sum [13, 14] - - !sum [15, 16] - - !sum [17, 18] - - !sum [19, 20] - - !sum [21, 22] -- !sum - - !sum [1, 2] - - !sum [3, 4] - - !sum [5, 6] - - !sum [7, 8] - - !sum [9, 10] - - !sum [11, 12] - - !sum [13, 14] - - !sum [15, 16] - - !sum [17, 18] - - !sum [19, 20] - - !sum [21, 22] -all: - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] diff --git a/internal/unify/testdata/unify.yaml b/internal/unify/testdata/unify.yaml deleted file mode 100644 index 131e527c..00000000 --- a/internal/unify/testdata/unify.yaml +++ /dev/null @@ -1,174 +0,0 @@ -# Basic tests of unification - -# -# Terminals -# - -unify: -- _ -- _ -want: - _ ---- -unify: -- _ -- test -want: - test ---- -unify: -- test -- t?est -want: - test ---- -unify: -- 1 -- 1 -want: - 1 ---- -unify: -- test -- foo -want: - _|_ - -# -# Tuple -# - ---- -unify: -- [a, b] -- [a, b] -want: - [a, b] ---- -unify: -- [a, _] -- [_, b] -want: - [a, b] ---- -unify: -- ["ab?c", "de?f"] -- [ac, def] -want: - [ac, def] - -# -# Repeats -# - ---- -unify: -- !repeat [a] -- [_] -want: - [a] ---- -unify: -- !repeat [a] -- [_, _] -want: - [a, a] ---- -unify: -- !repeat [a] -- [b] -want: - _|_ ---- -unify: -- !repeat [xy*] -- [x, xy, xyy] -want: - [x, xy, xyy] ---- -unify: -- !repeat [xy*] -- !repeat ["xz?y*"] -- [x, xy, xyy] -want: - [x, xy, xyy] ---- -unify: -- !repeat [!sum [a, b]] -- [a, b, a] -all: -- [a, b, a] ---- -unify: -- !repeat [!sum [a, b]] -- !repeat [!sum [b, c]] -- [b, b, b] -all: -- [b, b, b] ---- -unify: -- !repeat [!sum [a, b]] -- !repeat [!sum [b, c]] -- [a] -all: [] - -# -# Def -# - ---- -unify: -- {a: a, b: b} -- {a: a, b: b} -want: - {a: a, b: b} ---- -unify: -- {a: a} -- {b: b} -want: - {a: a, b: b} - -# -# Sum -# - ---- -unify: -- !sum [1, 2] -- !sum [2, 3] -all: -- 2 ---- -unify: -- !sum [{label: a, value: abc}, {label: b, value: def}] -- !sum [{value: "ab?c", extra: d}, {value: "def?", extra: g}] -all: -- {extra: d, label: a, value: abc} -- {extra: g, label: b, value: def} ---- -# A sum of repeats must deal with different dynamically-created variables in -# each branch. -unify: -- !sum [!repeat [a], !repeat [b]] -- [a, a, a] -all: -- [a, a, a] ---- -unify: -- !sum [!repeat [a], !repeat [b]] -- [a, a, b] -all: [] ---- -# Exercise sumEnvs with more than one result -unify: -- !sum - - [a|b, c|d] - - [e, g] -- [!sum [a, b, e, f], !sum [c, d, g, h]] -all: -- [a, c] -- [a, d] -- [b, c] -- [b, d] -- [e, g] diff --git a/internal/unify/testdata/vars.yaml b/internal/unify/testdata/vars.yaml deleted file mode 100644 index fe8a57e4..00000000 --- a/internal/unify/testdata/vars.yaml +++ /dev/null @@ -1,175 +0,0 @@ -# -# Basic tests -# - -name: "basic string" -unify: -- $x -- test -all: -- test ---- -name: "basic tuple" -unify: -- [$x, $x] -- [test, test] -all: -- [test, test] ---- -name: "three tuples" -unify: -- [$x, $x] -- [test, _] -- [_, test] -all: -- [test, test] ---- -name: "basic def" -unify: -- {a: $x, b: $x} -- {a: test, b: test} -all: -- {a: test, b: test} ---- -name: "three defs" -unify: -- {a: $x, b: $x} -- {a: test} -- {b: test} -all: -- {a: test, b: test} - -# -# Bottom tests -# - ---- -name: "basic bottom" -unify: -- [$x, $x] -- [test, foo] -all: [] ---- -name: "three-way bottom" -unify: -- [$x, $x] -- [test, _] -- [_, foo] -all: [] - -# -# Basic sum tests -# - ---- -name: "basic sum" -unify: -- $x -- !sum [a, b] -all: -- a -- b ---- -name: "sum of tuples" -unify: -- [$x] -- !sum [[a], [b]] -all: -- [a] -- [b] ---- -name: "acausal sum" -unify: -- [_, !sum [a, b]] -- [$x, $x] -all: -- [a, a] -- [b, b] - -# -# Transitivity tests -# - ---- -name: "transitivity" -unify: -- [_, _, _, test] -- [$x, $x, _, _] -- [ _, $x, $x, _] -- [ _, _, $x, $x] -all: -- [test, test, test, test] - -# -# Multiple vars -# - ---- -name: "basic uncorrelated vars" -unify: -- - !sum [1, 2] - - !sum [3, 4] -- - $a - - $b -all: -- [1, 3] -- [1, 4] -- [2, 3] -- [2, 4] ---- -name: "uncorrelated vars" -unify: -- - !sum [1, 2] - - !sum [3, 4] - - !sum [1, 2] -- - $a - - $b - - $a -all: -- [1, 3, 1] -- [1, 4, 1] -- [2, 3, 2] -- [2, 4, 2] ---- -name: "entangled vars" -unify: -- - !sum [[1,2],[3,4]] - - !sum [[2,1],[3,4],[4,3]] -- - [$a, $b] - - [$b, $a] -all: -- - [1, 2] - - [2, 1] -- - [3, 4] - - [4, 3] - -# -# End-to-end examples -# - ---- -name: "end-to-end" -unify: -- go: Add - in: - - go: $t - - go: $t -- in: !repeat - - !sum - - go: Int32x4 - base: int - - go: Uint32x4 - base: uint -all: -- go: Add - in: - - base: int - go: Int32x4 - - base: int - go: Int32x4 -- go: Add - in: - - base: uint - go: Uint32x4 - - base: uint - go: Uint32x4 diff --git a/internal/unify/trace.go b/internal/unify/trace.go deleted file mode 100644 index b0aa3525..00000000 --- a/internal/unify/trace.go +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "fmt" - "io" - "strings" - - "gopkg.in/yaml.v3" -) - -// debugDotInHTML, if true, includes dot code for all graphs in the HTML. Useful -// for debugging the dot output itself. -const debugDotInHTML = false - -var Debug struct { - // UnifyLog, if non-nil, receives a streaming text trace of unification. - UnifyLog io.Writer - - // HTML, if non-nil, writes an HTML trace of unification to HTML. - HTML io.Writer -} - -type tracer struct { - logw io.Writer - - enc yamlEncoder // Print consistent idents throughout - - saveTree bool // if set, record tree; required for HTML output - - path []string - - node *traceTree - trees []*traceTree -} - -type traceTree struct { - label string // Identifies this node as a child of parent - v, w *Value // Unification inputs - envIn envSet - res *Value // Unification result - env envSet - err error // or error - - parent *traceTree - children []*traceTree -} - -type tracerExit struct { - t *tracer - len int - node *traceTree -} - -func (t *tracer) enter(pat string, vals ...any) tracerExit { - if t == nil { - return tracerExit{} - } - - label := fmt.Sprintf(pat, vals...) - - var p *traceTree - if t.saveTree { - p = t.node - if p != nil { - t.node = &traceTree{label: label, parent: p} - p.children = append(p.children, t.node) - } - } - - t.path = append(t.path, label) - return tracerExit{t, len(t.path) - 1, p} -} - -func (t *tracer) enterVar(id *ident, branch int) tracerExit { - if t == nil { - return tracerExit{} - } - - // Use the tracer's ident printer - return t.enter("Var %s br %d", t.enc.idp.unique(id), branch) -} - -func (te tracerExit) exit() { - if te.t == nil { - return - } - te.t.path = te.t.path[:te.len] - te.t.node = te.node -} - -func indentf(prefix string, pat string, vals ...any) string { - s := fmt.Sprintf(pat, vals...) - if len(prefix) == 0 { - return s - } - if !strings.Contains(s, "\n") { - return prefix + s - } - - indent := prefix - if strings.TrimLeft(prefix, " ") != "" { - // Prefix has non-space characters in it. Construct an all space-indent. - indent = strings.Repeat(" ", len(prefix)) - } - return prefix + strings.ReplaceAll(s, "\n", "\n"+indent) -} - -func yamlf(prefix string, node *yaml.Node) string { - b, err := yaml.Marshal(node) - if err != nil { - return fmt.Sprintf("", err) - } - return strings.TrimRight(indentf(prefix, "%s", b), " \n") -} - -func (t *tracer) logf(pat string, vals ...any) { - if t == nil || t.logw == nil { - return - } - prefix := fmt.Sprintf("[%s] ", strings.Join(t.path, "/")) - s := indentf(prefix, pat, vals...) - s = strings.TrimRight(s, " \n") - fmt.Fprintf(t.logw, "%s\n", s) -} - -func (t *tracer) traceUnify(v, w *Value, e envSet) { - if t == nil { - return - } - - t.logf("Unify\n%s\nwith\n%s\nin\n%s", - yamlf(" ", t.enc.value(v)), - yamlf(" ", t.enc.value(w)), - yamlf(" ", t.enc.env(e))) - - if t.saveTree { - if t.node == nil { - t.node = &traceTree{} - t.trees = append(t.trees, t.node) - } - t.node.v, t.node.w, t.node.envIn = v, w, e - } -} - -func (t *tracer) traceDone(res *Value, e envSet, err error) { - if t == nil { - return - } - - if err != nil { - t.logf("==> %s", err) - } else { - t.logf("==>\n%s", yamlf(" ", t.enc.closure(Closure{res, e}))) - } - - if t.saveTree { - node := t.node - if node == nil { - panic("popped top of trace stack") - } - node.res, node.err = res, err - node.env = e - } -} diff --git a/internal/unify/unify.go b/internal/unify/unify.go deleted file mode 100644 index 9d22bf19..00000000 --- a/internal/unify/unify.go +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package unify implements unification of structured values. -// -// A [Value] represents a possibly infinite set of concrete values, where a -// value is either a string ([String]), a tuple of values ([Tuple]), or a -// string-keyed map of values called a "def" ([Def]). These sets can be further -// constrained by variables ([Var]). A [Value] combined with bindings of -// variables is a [Closure]. -// -// [Unify] finds a [Closure] that satisfies two or more other [Closure]s. This -// can be thought of as intersecting the sets represented by these Closures' -// values, or as the greatest lower bound/infimum of these Closures. If no such -// Closure exists, the result of unification is "bottom", or the empty set. -// -// # Examples -// -// The regular expression "a*" is the infinite set of strings of zero or more -// "a"s. "a*" can be unified with "a" or "aa" or "aaa", and the result is just -// "a", "aa", or "aaa", respectively. However, unifying "a*" with "b" fails -// because there are no values that satisfy both. -// -// Sums express sets directly. For example, !sum [a, b] is the set consisting of -// "a" and "b". Unifying this with !sum [b, c] results in just "b". This also -// makes it easy to demonstrate that unification isn't necessarily a single -// concrete value. For example, unifying !sum [a, b, c] with !sum [b, c, d] -// results in two concrete values: "b" and "c". -// -// The special value _ or "top" represents all possible values. Unifying _ with -// any value x results in x. -// -// Unifying composite values—tuples and defs—unifies their elements. -// -// The value [a*, aa] is an infinite set of tuples. If we unify that with the -// value [aaa, a*], the only possible value that satisfies both is [aaa, aa]. -// Likewise, this is the intersection of the sets described by these two values. -// -// Defs are similar to tuples, but they are indexed by strings and don't have a -// fixed length. For example, {x: a, y: b} is a def with two fields. Any field -// not mentioned in a def is implicitly top. Thus, unifying this with {y: b, z: -// c} results in {x: a, y: b, z: c}. -// -// Variables constrain values. For example, the value [$x, $x] represents all -// tuples whose first and second values are the same, but doesn't otherwise -// constrain that value. Thus, this set includes [a, a] as well as [[b, c, d], -// [b, c, d]], but it doesn't include [a, b]. -// -// Sums are internally implemented as fresh variables that are simultaneously -// bound to all values of the sum. That is !sum [a, b] is actually $var (where -// var is some fresh name), closed under the environment $var=a | $var=b. -package unify - -import ( - "errors" - "fmt" - "slices" -) - -// Unify computes a Closure that satisfies each input Closure. If no such -// Closure exists, it returns bottom. -func Unify(closures ...Closure) (Closure, error) { - if len(closures) == 0 { - return Closure{topValue, topEnv}, nil - } - - var trace *tracer - if Debug.UnifyLog != nil || Debug.HTML != nil { - trace = &tracer{ - logw: Debug.UnifyLog, - saveTree: Debug.HTML != nil, - } - } - - unified := closures[0] - for _, c := range closures[1:] { - var err error - uf := newUnifier() - uf.tracer = trace - e := crossEnvs(unified.env, c.env) - unified.val, unified.env, err = unified.val.unify(c.val, e, false, uf) - if Debug.HTML != nil { - uf.writeHTML(Debug.HTML) - } - if err != nil { - return Closure{}, err - } - } - - return unified, nil -} - -type unifier struct { - *tracer -} - -func newUnifier() *unifier { - return &unifier{} -} - -// errDomains is a sentinel error used between unify and unify1 to indicate that -// unify1 could not unify the domains of the two values. -var errDomains = errors.New("cannot unify domains") - -func (v *Value) unify(w *Value, e envSet, swap bool, uf *unifier) (*Value, envSet, error) { - if swap { - // Put the values in order. This just happens to be a handy choke-point - // to do this at. - v, w = w, v - } - - uf.traceUnify(v, w, e) - - d, e2, err := v.unify1(w, e, false, uf) - if err == errDomains { - // Try the other order. - d, e2, err = w.unify1(v, e, true, uf) - if err == errDomains { - // Okay, we really can't unify these. - err = fmt.Errorf("cannot unify %T (%s) and %T (%s): kind mismatch", v.Domain, v.PosString(), w.Domain, w.PosString()) - } - } - if err != nil { - uf.traceDone(nil, envSet{}, err) - return nil, envSet{}, err - } - res := unified(d, v, w) - uf.traceDone(res, e2, nil) - if d == nil { - // Double check that a bottom Value also has a bottom env. - if !e2.isEmpty() { - panic("bottom Value has non-bottom environment") - } - } - - return res, e2, nil -} - -func (v *Value) unify1(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) { - // TODO: If there's an error, attach position information to it. - - vd, wd := v.Domain, w.Domain - - // Bottom returns bottom, and eliminates all possible environments. - if vd == nil || wd == nil { - return nil, bottomEnv, nil - } - - // Top always returns the other. - if _, ok := vd.(Top); ok { - return wd, e, nil - } - - // Variables - if vd, ok := vd.(Var); ok { - return vd.unify(w, e, swap, uf) - } - - // Composite values - if vd, ok := vd.(Def); ok { - if wd, ok := wd.(Def); ok { - return vd.unify(wd, e, swap, uf) - } - } - if vd, ok := vd.(Tuple); ok { - if wd, ok := wd.(Tuple); ok { - return vd.unify(wd, e, swap, uf) - } - } - - // Scalar values - if vd, ok := vd.(String); ok { - if wd, ok := wd.(String); ok { - res := vd.unify(wd) - if res == nil { - e = bottomEnv - } - return res, e, nil - } - } - - return nil, envSet{}, errDomains -} - -func (d Def) unify(o Def, e envSet, swap bool, uf *unifier) (Domain, envSet, error) { - out := Def{fields: make(map[string]*Value)} - - // Check keys of d against o. - for key, dv := range d.All() { - ov, ok := o.fields[key] - if !ok { - // ov is implicitly Top. Bypass unification. - out.fields[key] = dv - continue - } - exit := uf.enter("%s", key) - res, e2, err := dv.unify(ov, e, swap, uf) - exit.exit() - if err != nil { - return nil, envSet{}, err - } else if res.Domain == nil { - // No match. - return nil, bottomEnv, nil - } - out.fields[key] = res - e = e2 - } - // Check keys of o that we didn't already check. These all implicitly match - // because we know the corresponding fields in d are all Top. - for key, dv := range o.All() { - if _, ok := d.fields[key]; !ok { - out.fields[key] = dv - } - } - return out, e, nil -} - -func (v Tuple) unify(w Tuple, e envSet, swap bool, uf *unifier) (Domain, envSet, error) { - if v.repeat != nil && w.repeat != nil { - // Since we generate the content of these lazily, there's not much we - // can do but just stick them on a list to unify later. - return Tuple{repeat: concat(v.repeat, w.repeat)}, e, nil - } - - // Expand any repeated tuples. - tuples := make([]Tuple, 0, 2) - if v.repeat == nil { - tuples = append(tuples, v) - } else { - v2, e2 := v.doRepeat(e, len(w.vs)) - tuples = append(tuples, v2...) - e = e2 - } - if w.repeat == nil { - tuples = append(tuples, w) - } else { - w2, e2 := w.doRepeat(e, len(v.vs)) - tuples = append(tuples, w2...) - e = e2 - } - - // Now unify all of the tuples (usually this will be just 2 tuples) - out := tuples[0] - for _, t := range tuples[1:] { - if len(out.vs) != len(t.vs) { - uf.logf("tuple length mismatch") - return nil, bottomEnv, nil - } - zs := make([]*Value, len(out.vs)) - for i, v1 := range out.vs { - exit := uf.enter("%d", i) - z, e2, err := v1.unify(t.vs[i], e, swap, uf) - exit.exit() - if err != nil { - return nil, envSet{}, err - } else if z.Domain == nil { - return nil, bottomEnv, nil - } - zs[i] = z - e = e2 - } - out = Tuple{vs: zs} - } - - return out, e, nil -} - -// doRepeat creates a fixed-length tuple from a repeated tuple. The caller is -// expected to unify the returned tuples. -func (v Tuple) doRepeat(e envSet, n int) ([]Tuple, envSet) { - res := make([]Tuple, len(v.repeat)) - for i, gen := range v.repeat { - res[i].vs = make([]*Value, n) - for j := range n { - res[i].vs[j], e = gen(e) - } - } - return res, e -} - -// unify intersects the domains of two [String]s. If it can prove that this -// domain is empty, it returns nil (bottom). -// -// TODO: Consider splitting literals and regexps into two domains. -func (v String) unify(w String) Domain { - // Unification is symmetric, so put them in order of string kind so we only - // have to deal with half the cases. - if v.kind > w.kind { - v, w = w, v - } - - switch v.kind { - case stringRegex: - switch w.kind { - case stringRegex: - // Construct a match against all of the regexps - return String{kind: stringRegex, re: slices.Concat(v.re, w.re)} - case stringExact: - for _, re := range v.re { - if !re.MatchString(w.exact) { - return nil - } - } - return w - } - case stringExact: - if v.exact != w.exact { - return nil - } - return v - } - panic("bad string kind") -} - -func concat[T any](s1, s2 []T) []T { - // Reuse s1 or s2 if possible. - if len(s1) == 0 { - return s2 - } - return append(s1[:len(s1):len(s1)], s2...) -} diff --git a/internal/unify/unify_test.go b/internal/unify/unify_test.go deleted file mode 100644 index 8071e0c9..00000000 --- a/internal/unify/unify_test.go +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "bytes" - "fmt" - "io" - "os" - "path/filepath" - "slices" - "strings" - "testing" - - "gopkg.in/yaml.v3" -) - -func TestUnify(t *testing.T) { - paths, err := filepath.Glob("testdata/*") - if err != nil { - t.Fatal(err) - } - if len(paths) == 0 { - t.Fatal("no testdata found") - } - for _, path := range paths { - // Skip paths starting with _ so experimental files can be added. - base := filepath.Base(path) - if base[0] == '_' { - continue - } - if !strings.HasSuffix(base, ".yaml") { - t.Errorf("non-.yaml file in testdata: %s", base) - continue - } - base = strings.TrimSuffix(base, ".yaml") - - t.Run(base, func(t *testing.T) { - testUnify(t, path) - }) - } -} - -func testUnify(t *testing.T, path string) { - f, err := os.Open(path) - if err != nil { - t.Fatal(err) - } - defer f.Close() - - type testCase struct { - Skip bool - Name string - Unify []Closure - Want yaml.Node - All yaml.Node - } - dec := yaml.NewDecoder(f) - - for i := 0; ; i++ { - var tc testCase - err := dec.Decode(&tc) - if err == io.EOF { - break - } - if err != nil { - t.Fatal(err) - } - - name := tc.Name - if name == "" { - name = fmt.Sprint(i) - } - - t.Run(name, func(t *testing.T) { - if tc.Skip { - t.Skip("skip: true set in test case") - } - - defer func() { - p := recover() - if p != nil || t.Failed() { - // Redo with a trace - // - // TODO: Use t.Output() in Go 1.25. - var buf bytes.Buffer - Debug.UnifyLog = &buf - func() { - defer func() { - // If the original unify panicked, the second one - // probably will, too. Ignore it and let the first panic - // bubble. - recover() - }() - Unify(tc.Unify...) - }() - Debug.UnifyLog = nil - t.Logf("Trace:\n%s", buf.String()) - } - if p != nil { - panic(p) - } - }() - - // Unify the test cases - // - // TODO: Try reordering the inputs also - c, err := Unify(tc.Unify...) - if err != nil { - // TODO: Tests of errors - t.Fatal(err) - } - - // Encode the result back to YAML so we can check if it's structurally - // equal. - clean := func(val any) *yaml.Node { - var node yaml.Node - node.Encode(val) - for n := range allYamlNodes(&node) { - // Canonicalize the style. There may be other style flags we need to - // muck with. - n.Style &^= yaml.FlowStyle - n.HeadComment = "" - n.LineComment = "" - n.FootComment = "" - } - return &node - } - check := func(gotVal any, wantNode *yaml.Node) { - got, err := yaml.Marshal(clean(gotVal)) - if err != nil { - t.Fatalf("Encoding Value back to yaml failed: %s", err) - } - want, err := yaml.Marshal(clean(wantNode)) - if err != nil { - t.Fatalf("Encoding Want back to yaml failed: %s", err) - } - - if !bytes.Equal(got, want) { - t.Errorf("%s:%d:\nwant:\n%sgot\n%s", f.Name(), wantNode.Line, want, got) - } - } - if tc.Want.Kind != 0 { - check(c.val, &tc.Want) - } - if tc.All.Kind != 0 { - fVal := slices.Collect(c.All()) - check(fVal, &tc.All) - } - }) - } -} diff --git a/internal/unify/value.go b/internal/unify/value.go deleted file mode 100644 index ffc25b87..00000000 --- a/internal/unify/value.go +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "fmt" - "iter" - "reflect" -) - -// A Value represents a structured, non-deterministic value consisting of -// strings, tuples of Values, and string-keyed maps of Values. A -// non-deterministic Value will also contain variables, which are resolved via -// an environment as part of a [Closure]. -// -// For debugging, a Value can also track the source position it was read from in -// an input file, and its provenance from other Values. -type Value struct { - Domain Domain - - // A Value has either a pos or parents (or neither). - pos *Pos - parents *[2]*Value -} - -var ( - topValue = &Value{Domain: Top{}} - bottomValue = &Value{Domain: nil} -) - -// NewValue returns a new [Value] with the given domain and no position -// information. -func NewValue(d Domain) *Value { - return &Value{Domain: d} -} - -// NewValuePos returns a new [Value] with the given domain at position p. -func NewValuePos(d Domain, p Pos) *Value { - return &Value{Domain: d, pos: &p} -} - -// newValueFrom returns a new [Value] with the given domain that copies the -// position information of p. -func newValueFrom(d Domain, p *Value) *Value { - return &Value{Domain: d, pos: p.pos, parents: p.parents} -} - -func unified(d Domain, p1, p2 *Value) *Value { - return &Value{Domain: d, parents: &[2]*Value{p1, p2}} -} - -func (v *Value) Pos() Pos { - if v.pos == nil { - return Pos{} - } - return *v.pos -} - -func (v *Value) PosString() string { - var b []byte - for root := range v.Provenance() { - if len(b) > 0 { - b = append(b, ' ') - } - b, _ = root.pos.AppendText(b) - } - return string(b) -} - -func (v *Value) WhyNotExact() string { - if v.Domain == nil { - return "v.Domain is nil" - } - return v.Domain.WhyNotExact() -} - -func (v *Value) Exact() bool { - if v.Domain == nil { - return false - } - return v.Domain.Exact() -} - -// Decode decodes v into a Go value. -// -// v must be exact, except that it can include Top. into must be a pointer. -// [Def]s are decoded into structs. [Tuple]s are decoded into slices. [String]s -// are decoded into strings or ints. Any field can itself be a pointer to one of -// these types. Top can be decoded into a pointer-typed field and will set the -// field to nil. Anything else will allocate a value if necessary. -// -// Any type may implement [Decoder], in which case its DecodeUnified method will -// be called instead of using the default decoding scheme. -func (v *Value) Decode(into any) error { - rv := reflect.ValueOf(into) - if rv.Kind() != reflect.Pointer { - return fmt.Errorf("cannot decode into non-pointer %T", into) - } - return decodeReflect(v, rv.Elem()) -} - -func decodeReflect(v *Value, rv reflect.Value) error { - var ptr reflect.Value - if rv.Kind() == reflect.Pointer { - if rv.IsNil() { - // Transparently allocate through pointers, *except* for Top, which - // wants to set the pointer to nil. - // - // TODO: Drop this condition if I switch to an explicit Optional[T] - // or move the Top logic into Def. - if _, ok := v.Domain.(Top); !ok { - // Allocate the value to fill in, but don't actually store it in - // the pointer until we successfully decode. - ptr = rv - rv = reflect.New(rv.Type().Elem()).Elem() - } - } else { - rv = rv.Elem() - } - } - - var err error - if reflect.PointerTo(rv.Type()).Implements(decoderType) { - // Use the custom decoder. - err = rv.Addr().Interface().(Decoder).DecodeUnified(v) - } else { - err = v.Domain.decode(rv) - } - if err == nil && ptr.IsValid() { - ptr.Set(rv.Addr()) - } - return err -} - -// Decoder can be implemented by types as a custom implementation of [Decode] -// for that type. -type Decoder interface { - DecodeUnified(v *Value) error -} - -var decoderType = reflect.TypeOf((*Decoder)(nil)).Elem() - -// Provenance iterates over all of the source Values that have contributed to -// this Value. -func (v *Value) Provenance() iter.Seq[*Value] { - return func(yield func(*Value) bool) { - var rec func(d *Value) bool - rec = func(d *Value) bool { - if d.pos != nil { - if !yield(d) { - return false - } - } - if d.parents != nil { - for _, p := range d.parents { - if !rec(p) { - return false - } - } - } - return true - } - rec(v) - } -} diff --git a/internal/unify/value_test.go b/internal/unify/value_test.go deleted file mode 100644 index 54937c68..00000000 --- a/internal/unify/value_test.go +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "reflect" - "slices" - "testing" -) - -func ExampleClosure_All_tuple() { - v := mustParse(` -- !sum [1, 2] -- !sum [3, 4] -`) - printYaml(slices.Collect(v.All())) - - // Output: - // - [1, 3] - // - [1, 4] - // - [2, 3] - // - [2, 4] -} - -func ExampleClosure_All_def() { - v := mustParse(` -a: !sum [1, 2] -b: !sum [3, 4] -c: 5 -`) - printYaml(slices.Collect(v.All())) - - // Output: - // - {a: 1, b: 3, c: 5} - // - {a: 1, b: 4, c: 5} - // - {a: 2, b: 3, c: 5} - // - {a: 2, b: 4, c: 5} -} - -func checkDecode[T any](t *testing.T, got *Value, want T) { - var gotT T - if err := got.Decode(&gotT); err != nil { - t.Fatalf("Decode failed: %v", err) - } - if !reflect.DeepEqual(&gotT, &want) { - t.Fatalf("got:\n%s\nwant:\n%s", prettyYaml(gotT), prettyYaml(want)) - } -} diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go deleted file mode 100644 index dadcd71d..00000000 --- a/internal/unify/yaml.go +++ /dev/null @@ -1,619 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "errors" - "fmt" - "io" - "io/fs" - "os" - "path/filepath" - "regexp" - "strings" - - "gopkg.in/yaml.v3" -) - -// ReadOpts provides options to [Read] and related functions. The zero value is -// the default options. -type ReadOpts struct { - // FS, if non-nil, is the file system from which to resolve !import file - // names. - FS fs.FS -} - -// Read reads a [Closure] in YAML format from r, using path for error messages. -// -// It maps YAML nodes into terminal Values as follows: -// -// - "_" or !top _ is the top value ([Top]). -// -// - "_|_" or !bottom _ is the bottom value. This is an error during -// unmarshaling, but can appear in marshaled values. -// -// - "$" or !var is a variable ([Var]). Everywhere the same name -// appears within a single unmarshal operation, it is mapped to the same -// variable. Different unmarshal operations get different variables, even if -// they have the same string name. -// -// - !regex "x" is a regular expression ([String]), as is any string that -// doesn't match "_", "_|_", or "$...". Regular expressions are implicitly -// anchored at the beginning and end. If the string doesn't contain any -// meta-characters (that is, it's a "literal" regular expression), then it's -// treated as an exact string. -// -// - !string "x", or any int, float, bool, or binary value is an exact string -// ([String]). -// -// - !regex [x, y, ...] is an intersection of regular expressions ([String]). -// -// It maps YAML nodes into non-terminal Values as follows: -// -// - Sequence nodes like [x, y, z] are tuples ([Tuple]). -// -// - !repeat [x] is a repeated tuple ([Tuple]), which is 0 or more instances of -// x. There must be exactly one element in the list. -// -// - Mapping nodes like {a: x, b: y} are defs ([Def]). Any fields not listed are -// implicitly top. -// -// - !sum [x, y, z] is a sum of its children. This can be thought of as a union -// of the values x, y, and z, or as a non-deterministic choice between x, y, and -// z. If a variable appears both inside the sum and outside of it, only the -// non-deterministic choice view really works. The unifier does not directly -// implement sums; instead, this is decoded as a fresh variable that's -// simultaneously bound to x, y, and z. -// -// - !import glob is like a !sum, but its children are read from all files -// matching the given glob pattern, which is interpreted relative to the current -// file path. Each file gets its own variable scope. -func Read(r io.Reader, path string, opts ReadOpts) (Closure, error) { - dec := yamlDecoder{opts: opts, path: path, env: topEnv} - v, err := dec.read(r) - if err != nil { - return Closure{}, err - } - return dec.close(v), nil -} - -// ReadFile reads a [Closure] in YAML format from a file. -// -// The file must consist of a single YAML document. -// -// If opts.FS is not set, this sets it to a FS rooted at path's directory. -// -// See [Read] for details. -func ReadFile(path string, opts ReadOpts) (Closure, error) { - f, err := os.Open(path) - if err != nil { - return Closure{}, err - } - defer f.Close() - - if opts.FS == nil { - opts.FS = os.DirFS(filepath.Dir(path)) - } - - return Read(f, path, opts) -} - -// UnmarshalYAML implements [yaml.Unmarshaler]. -// -// Since there is no way to pass [ReadOpts] to this function, it assumes default -// options. -func (c *Closure) UnmarshalYAML(node *yaml.Node) error { - dec := yamlDecoder{path: "", env: topEnv} - v, err := dec.root(node) - if err != nil { - return err - } - *c = dec.close(v) - return nil -} - -type yamlDecoder struct { - opts ReadOpts - path string - - vars map[string]*ident - nSums int - - env envSet -} - -func (dec *yamlDecoder) read(r io.Reader) (*Value, error) { - n, err := readOneNode(r) - if err != nil { - return nil, fmt.Errorf("%s: %w", dec.path, err) - } - - // Decode YAML node to a Value - v, err := dec.root(n) - if err != nil { - return nil, fmt.Errorf("%s: %w", dec.path, err) - } - - return v, nil -} - -// readOneNode reads a single YAML document from r and returns an error if there -// are more documents in r. -func readOneNode(r io.Reader) (*yaml.Node, error) { - yd := yaml.NewDecoder(r) - - // Decode as a YAML node - var node yaml.Node - if err := yd.Decode(&node); err != nil { - return nil, err - } - np := &node - if np.Kind == yaml.DocumentNode { - np = node.Content[0] - } - - // Ensure there are no more YAML docs in this file - if err := yd.Decode(nil); err == nil { - return nil, fmt.Errorf("must not contain multiple documents") - } else if err != io.EOF { - return nil, err - } - - return np, nil -} - -// root parses the root of a file. -func (dec *yamlDecoder) root(node *yaml.Node) (*Value, error) { - // Prepare for variable name resolution in this file. This may be a nested - // root, so restore the current values when we're done. - oldVars, oldNSums := dec.vars, dec.nSums - defer func() { - dec.vars, dec.nSums = oldVars, oldNSums - }() - dec.vars = make(map[string]*ident, 0) - dec.nSums = 0 - - return dec.value(node) -} - -// close wraps a decoded [Value] into a [Closure]. -func (dec *yamlDecoder) close(v *Value) Closure { - return Closure{v, dec.env} -} - -func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) { - pos := &Pos{Path: dec.path, Line: node.Line} - - // Resolve alias nodes. - if node.Kind == yaml.AliasNode { - node = node.Alias - } - - mk := func(d Domain) (*Value, error) { - v := &Value{Domain: d, pos: pos} - return v, nil - } - mk2 := func(d Domain, err error) (*Value, error) { - if err != nil { - return nil, err - } - return mk(d) - } - - // is tests the kind and long tag of node. - is := func(kind yaml.Kind, tag string) bool { - return node.Kind == kind && node.LongTag() == tag - } - isExact := func() bool { - if node.Kind != yaml.ScalarNode { - return false - } - // We treat any string-ish YAML node as a string. - switch node.LongTag() { - case "!string", "tag:yaml.org,2002:int", "tag:yaml.org,2002:float", "tag:yaml.org,2002:bool", "tag:yaml.org,2002:binary": - return true - } - return false - } - - // !!str nodes provide a short-hand syntax for several leaf domains that are - // also available under explicit tags. To simplify checking below, we set - // strVal to non-"" only for !!str nodes. - strVal := "" - isStr := is(yaml.ScalarNode, "tag:yaml.org,2002:str") - if isStr { - strVal = node.Value - } - - switch { - case is(yaml.ScalarNode, "!var"): - strVal = "$" + node.Value - fallthrough - case strings.HasPrefix(strVal, "$"): - id, ok := dec.vars[strVal] - if !ok { - // We encode different idents with the same string name by adding a - // #N suffix. Strip that off so it doesn't accumulate. This isn't - // meant to be used in user-written input, though nothing stops that. - name, _, _ := strings.Cut(strVal, "#") - id = &ident{name: name} - dec.vars[strVal] = id - dec.env = dec.env.bind(id, topValue) - } - return mk(Var{id: id}) - - case strVal == "_" || is(yaml.ScalarNode, "!top"): - return mk(Top{}) - - case strVal == "_|_" || is(yaml.ScalarNode, "!bottom"): - return nil, errors.New("found bottom") - - case isExact(): - val := node.Value - return mk(NewStringExact(val)) - - case isStr || is(yaml.ScalarNode, "!regex"): - // Any other string we treat as a regex. This will produce an exact - // string anyway if the regex is literal. - val := node.Value - return mk2(NewStringRegex(val)) - - case is(yaml.SequenceNode, "!regex"): - var vals []string - if err := node.Decode(&vals); err != nil { - return nil, err - } - return mk2(NewStringRegex(vals...)) - - case is(yaml.MappingNode, "tag:yaml.org,2002:map"): - var db DefBuilder - for i := 0; i < len(node.Content); i += 2 { - key := node.Content[i] - if key.Kind != yaml.ScalarNode { - return nil, fmt.Errorf("non-scalar key %q", key.Value) - } - val, err := dec.value(node.Content[i+1]) - if err != nil { - return nil, err - } - db.Add(key.Value, val) - } - return mk(db.Build()) - - case is(yaml.SequenceNode, "tag:yaml.org,2002:seq"): - elts := node.Content - vs := make([]*Value, 0, len(elts)) - for _, elt := range elts { - v, err := dec.value(elt) - if err != nil { - return nil, err - } - vs = append(vs, v) - } - return mk(NewTuple(vs...)) - - case is(yaml.SequenceNode, "!repeat") || is(yaml.SequenceNode, "!repeat-unify"): - // !repeat must have one child. !repeat-unify is used internally for - // delayed unification, and is the same, it's just allowed to have more - // than one child. - if node.LongTag() == "!repeat" && len(node.Content) != 1 { - return nil, fmt.Errorf("!repeat must have exactly one child") - } - - // Decode the children to make sure they're well-formed, but otherwise - // discard that decoding and do it again every time we need a new - // element. - var gen []func(e envSet) (*Value, envSet) - origEnv := dec.env - elts := node.Content - for i, elt := range elts { - _, err := dec.value(elt) - if err != nil { - return nil, err - } - // Undo any effects on the environment. We *do* keep any named - // variables that were added to the vars map in case they were - // introduced within the element. - dec.env = origEnv - // Add a generator function - gen = append(gen, func(e envSet) (*Value, envSet) { - dec.env = e - // TODO: If this is in a sum, this tends to generate a ton of - // fresh variables that are different on each branch of the - // parent sum. Does it make sense to hold on to the i'th value - // of the tuple after we've generated it? - v, err := dec.value(elts[i]) - if err != nil { - // It worked the first time, so this really shouldn't hapen. - panic("decoding repeat element failed") - } - return v, dec.env - }) - } - return mk(NewRepeat(gen...)) - - case is(yaml.SequenceNode, "!sum"): - vs := make([]*Value, 0, len(node.Content)) - for _, elt := range node.Content { - v, err := dec.value(elt) - if err != nil { - return nil, err - } - vs = append(vs, v) - } - if len(vs) == 1 { - return vs[0], nil - } - - // A sum is implemented as a fresh variable that's simultaneously bound - // to each of the descendants. - id := &ident{name: fmt.Sprintf("sum%d", dec.nSums)} - dec.nSums++ - dec.env = dec.env.bind(id, vs...) - return mk(Var{id: id}) - - case is(yaml.ScalarNode, "!import"): - if dec.opts.FS == nil { - return nil, fmt.Errorf("!import not allowed (ReadOpts.FS not set)") - } - pat := node.Value - - if !fs.ValidPath(pat) { - // This will result in Glob returning no results. Give a more useful - // error message for this case. - return nil, fmt.Errorf("!import path must not contain '.' or '..'") - } - - ms, err := fs.Glob(dec.opts.FS, pat) - if err != nil { - return nil, fmt.Errorf("resolving !import: %w", err) - } - if len(ms) == 0 { - return nil, fmt.Errorf("!import did not match any files") - } - - // Parse each file - vs := make([]*Value, 0, len(ms)) - for _, m := range ms { - v, err := dec.import1(m) - if err != nil { - return nil, err - } - vs = append(vs, v) - } - - // Create a sum. - if len(vs) == 1 { - return vs[0], nil - } - id := &ident{name: "import"} - dec.env = dec.env.bind(id, vs...) - return mk(Var{id: id}) - } - - return nil, fmt.Errorf("unknown node kind %d %v", node.Kind, node.Tag) -} - -func (dec *yamlDecoder) import1(path string) (*Value, error) { - // Make sure we can open the path first. - f, err := dec.opts.FS.Open(path) - if err != nil { - return nil, fmt.Errorf("!import failed: %w", err) - } - defer f.Close() - - // Prepare the enter path. - oldFS, oldPath := dec.opts.FS, dec.path - defer func() { - dec.opts.FS, dec.path = oldFS, oldPath - }() - - // Enter path, which is relative to the current path's directory. - newPath := filepath.Join(filepath.Dir(dec.path), path) - subFS, err := fs.Sub(dec.opts.FS, filepath.Dir(path)) - if err != nil { - return nil, err - } - dec.opts.FS, dec.path = subFS, newPath - - // Parse the file. - return dec.read(f) -} - -type yamlEncoder struct { - idp identPrinter - e envSet // We track the environment for !repeat nodes. -} - -// TODO: Switch some Value marshaling to Closure? - -func (c Closure) MarshalYAML() (any, error) { - // TODO: If the environment is trivial, just marshal the value. - enc := &yamlEncoder{} - return enc.closure(c), nil -} - -func (c Closure) String() string { - b, err := yaml.Marshal(c) - if err != nil { - return fmt.Sprintf("marshal failed: %s", err) - } - return string(b) -} - -func (v *Value) MarshalYAML() (any, error) { - enc := &yamlEncoder{} - return enc.value(v), nil -} - -func (v *Value) String() string { - b, err := yaml.Marshal(v) - if err != nil { - return fmt.Sprintf("marshal failed: %s", err) - } - return string(b) -} - -func (enc *yamlEncoder) closure(c Closure) *yaml.Node { - enc.e = c.env - var n yaml.Node - n.Kind = yaml.MappingNode - n.Tag = "!closure" - n.Content = make([]*yaml.Node, 4) - n.Content[0] = new(yaml.Node) - n.Content[0].SetString("env") - n.Content[2] = new(yaml.Node) - n.Content[2].SetString("in") - n.Content[3] = enc.value(c.val) - // Fill in the env after we've written the value in case value encoding - // affects the env. - n.Content[1] = enc.env(enc.e) - enc.e = envSet{} // Allow GC'ing the env - return &n -} - -func (enc *yamlEncoder) env(e envSet) *yaml.Node { - var encode func(e *envExpr) *yaml.Node - encode = func(e *envExpr) *yaml.Node { - var n yaml.Node - switch e.kind { - default: - panic("bad kind") - case envZero: - n.SetString("0") - case envUnit: - n.SetString("1") - case envBinding: - var id yaml.Node - id.SetString(enc.idp.unique(e.id)) - n.Kind = yaml.MappingNode - n.Content = []*yaml.Node{&id, enc.value(e.val)} - case envProduct, envSum: - n.Kind = yaml.SequenceNode - if e.kind == envProduct { - n.Tag = "!product" - } else { - n.Tag = "!sum" - } - for _, e2 := range e.operands { - n.Content = append(n.Content, encode(e2)) - } - } - return &n - } - return encode(e.root) -} - -var yamlIntRe = regexp.MustCompile(`^-?[0-9]+$`) - -func (enc *yamlEncoder) value(v *Value) *yaml.Node { - var n yaml.Node - switch d := v.Domain.(type) { - case nil: - // Not allowed by unmarshaler, but useful for understanding when - // something goes horribly wrong. - // - // TODO: We might be able to track useful provenance for this, which - // would really help with debugging unexpected bottoms. - n.SetString("_|_") - return &n - - case Top: - n.SetString("_") - return &n - - case Def: - n.Kind = yaml.MappingNode - for k, elt := range d.All() { - var kn yaml.Node - kn.SetString(k) - n.Content = append(n.Content, &kn, enc.value(elt)) - } - n.HeadComment = v.PosString() - return &n - - case Tuple: - n.Kind = yaml.SequenceNode - if d.repeat == nil { - for _, elt := range d.vs { - n.Content = append(n.Content, enc.value(elt)) - } - } else { - if len(d.repeat) == 1 { - n.Tag = "!repeat" - } else { - n.Tag = "!repeat-unify" - } - // TODO: I'm not positive this will round-trip everything correctly. - for _, gen := range d.repeat { - v, e := gen(enc.e) - enc.e = e - n.Content = append(n.Content, enc.value(v)) - } - } - return &n - - case String: - switch d.kind { - case stringExact: - n.SetString(d.exact) - switch { - // Make this into a "nice" !!int node if I can. - case yamlIntRe.MatchString(d.exact): - n.Tag = "tag:yaml.org,2002:int" - - // Or a "nice" !!bool node. - case d.exact == "false" || d.exact == "true": - n.Tag = "tag:yaml.org,2002:bool" - - // If this doesn't require escaping, leave it as a str node to avoid - // the annoying YAML tags. Otherwise, mark it as an exact string. - // Alternatively, we could always emit a str node with regexp - // quoting. - case d.exact != regexp.QuoteMeta(d.exact): - n.Tag = "!string" - } - return &n - case stringRegex: - o := make([]string, 0, 1) - for _, re := range d.re { - s := re.String() - s = strings.TrimSuffix(strings.TrimPrefix(s, `\A(?:`), `)\z`) - o = append(o, s) - } - if len(o) == 1 { - n.SetString(o[0]) - return &n - } - n.Encode(o) - n.Tag = "!regex" - return &n - } - panic("bad String kind") - - case Var: - // TODO: If Var only appears once in the whole Value and is independent - // in the environment (part of a term that is only over Var), then emit - // this as a !sum instead. - if false { - var vs []*Value // TODO: Get values of this var. - if len(vs) == 1 { - return enc.value(vs[0]) - } - n.Kind = yaml.SequenceNode - n.Tag = "!sum" - for _, elt := range vs { - n.Content = append(n.Content, enc.value(elt)) - } - return &n - } - n.SetString(enc.idp.unique(d.id)) - if !strings.HasPrefix(d.id.name, "$") { - n.Tag = "!var" - } - return &n - } - panic(fmt.Sprintf("unknown domain type %T", v.Domain)) -} diff --git a/internal/unify/yaml_test.go b/internal/unify/yaml_test.go deleted file mode 100644 index 4f0aef43..00000000 --- a/internal/unify/yaml_test.go +++ /dev/null @@ -1,202 +0,0 @@ -// Copyright 2025 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package unify - -import ( - "bytes" - "fmt" - "iter" - "log" - "strings" - "testing" - "testing/fstest" - - "gopkg.in/yaml.v3" -) - -func mustParse(expr string) Closure { - var c Closure - if err := yaml.Unmarshal([]byte(expr), &c); err != nil { - panic(err) - } - return c -} - -func oneValue(t *testing.T, c Closure) *Value { - t.Helper() - var v *Value - var i int - for v = range c.All() { - i++ - } - if i != 1 { - t.Fatalf("expected 1 value, got %d", i) - } - return v -} - -func printYaml(val any) { - fmt.Println(prettyYaml(val)) -} - -func prettyYaml(val any) string { - b, err := yaml.Marshal(val) - if err != nil { - panic(err) - } - var node yaml.Node - if err := yaml.Unmarshal(b, &node); err != nil { - panic(err) - } - - // Map lines to start offsets. We'll use this to figure out when nodes are - // "small" and should use inline style. - lines := []int{-1, 0} - for pos := 0; pos < len(b); { - next := bytes.IndexByte(b[pos:], '\n') - if next == -1 { - break - } - pos += next + 1 - lines = append(lines, pos) - } - lines = append(lines, len(b)) - - // Strip comments and switch small nodes to inline style - cleanYaml(&node, lines, len(b)) - - b, err = yaml.Marshal(&node) - if err != nil { - panic(err) - } - return string(b) -} - -func cleanYaml(node *yaml.Node, lines []int, endPos int) { - node.HeadComment = "" - node.FootComment = "" - node.LineComment = "" - - for i, n2 := range node.Content { - end2 := endPos - if i < len(node.Content)-1 { - end2 = lines[node.Content[i+1].Line] - } - cleanYaml(n2, lines, end2) - } - - // Use inline style? - switch node.Kind { - case yaml.MappingNode, yaml.SequenceNode: - if endPos-lines[node.Line] < 40 { - node.Style = yaml.FlowStyle - } - } -} - -func allYamlNodes(n *yaml.Node) iter.Seq[*yaml.Node] { - return func(yield func(*yaml.Node) bool) { - if !yield(n) { - return - } - for _, n2 := range n.Content { - for n3 := range allYamlNodes(n2) { - if !yield(n3) { - return - } - } - } - } -} - -func TestRoundTripString(t *testing.T) { - // Check that we can round-trip a string with regexp meta-characters in it. - const y = `!string test*` - t.Logf("input:\n%s", y) - - v1 := oneValue(t, mustParse(y)) - var buf1 strings.Builder - enc := yaml.NewEncoder(&buf1) - if err := enc.Encode(v1); err != nil { - log.Fatal(err) - } - enc.Close() - t.Logf("after parse 1:\n%s", buf1.String()) - - v2 := oneValue(t, mustParse(buf1.String())) - var buf2 strings.Builder - enc = yaml.NewEncoder(&buf2) - if err := enc.Encode(v2); err != nil { - log.Fatal(err) - } - enc.Close() - t.Logf("after parse 2:\n%s", buf2.String()) - - if buf1.String() != buf2.String() { - t.Fatal("parse 1 and parse 2 differ") - } -} - -func TestEmptyString(t *testing.T) { - // Regression test. Make sure an empty string is parsed as an exact string, - // not a regexp. - const y = `""` - t.Logf("input:\n%s", y) - - v1 := oneValue(t, mustParse(y)) - if !v1.Exact() { - t.Fatal("expected exact string") - } -} - -func TestImport(t *testing.T) { - // Test a basic import - main := strings.NewReader("!import x/y.yaml") - fs := fstest.MapFS{ - // Test a glob import with a relative path - "x/y.yaml": {Data: []byte("!import y/*.yaml")}, - "x/y/z.yaml": {Data: []byte("42")}, - } - cl, err := Read(main, "x.yaml", ReadOpts{FS: fs}) - if err != nil { - t.Fatal(err) - } - x := 42 - checkDecode(t, oneValue(t, cl), &x) -} - -func TestImportEscape(t *testing.T) { - // Make sure an import can't escape its subdirectory. - main := strings.NewReader("!import x/y.yaml") - fs := fstest.MapFS{ - "x/y.yaml": {Data: []byte("!import ../y/*.yaml")}, - "y/z.yaml": {Data: []byte("42")}, - } - _, err := Read(main, "x.yaml", ReadOpts{FS: fs}) - if err == nil { - t.Fatal("relative !import should have failed") - } - if !strings.Contains(err.Error(), "must not contain") { - t.Fatalf("unexpected error %v", err) - } -} - -func TestImportScope(t *testing.T) { - // Test that imports have different variable scopes. - main := strings.NewReader("[!import y.yaml, !import y.yaml]") - fs := fstest.MapFS{ - "y.yaml": {Data: []byte("$v")}, - } - cl1, err := Read(main, "x.yaml", ReadOpts{FS: fs}) - if err != nil { - t.Fatal(err) - } - cl2 := mustParse("[1, 2]") - res, err := Unify(cl1, cl2) - if err != nil { - t.Fatal(err) - } - checkDecode(t, oneValue(t, res), []int{1, 2}) -} From 3476d8e3db440a7f3418917a5139e56eee685ca6 Mon Sep 17 00:00:00 2001 From: Cherry Mui Date: Thu, 14 Aug 2025 12:04:00 -0400 Subject: [PATCH 198/200] arm64/arm64asm: stop relying on global rand.Seed The global rand.Seed becomes no-op as of Go 1.24. Use a local random source with the seed instead. Updates golang/go#67273. For golang/go#69095. Change-Id: Ie50f197ba3dc115d4b514a9ba2baa72563bebbd6 Reviewed-on: https://go-review.googlesource.com/c/arch/+/696135 Reviewed-by: Dmitri Shuralyov LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov --- arm64/arm64asm/ext_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arm64/arm64asm/ext_test.go b/arm64/arm64asm/ext_test.go index f0d18e93..839eb3fd 100644 --- a/arm64/arm64asm/ext_test.go +++ b/arm64/arm64asm/ext_test.go @@ -422,7 +422,7 @@ var condmark bool = false func doFuzzy(inst *InstJson, Ninst int) { var testdata uint32 var NonDigRE = regexp.MustCompile(`[\D]`) - rand.Seed(int64(Round + Ninst)) + rand := rand.New(rand.NewSource(int64(Round + Ninst))) off := 0 DigBit := "" if condmark == true && !strings.Contains(inst.Bits, "cond") { From 981dfb93ab29835405565cbd6975de348c266385 Mon Sep 17 00:00:00 2001 From: Gopher Robot Date: Wed, 13 Aug 2025 14:21:36 +0000 Subject: [PATCH 199/200] all: upgrade go directive to at least 1.24.0 [generated] By now Go 1.25.0 has been released, and Go 1.23 is no longer supported per the Go Release Policy (see https://go.dev/doc/devel/release#policy). For golang/go#69095. [git-generate] (cd . && go get go@1.24.0 && go mod tidy && go fix ./... && go mod edit -toolchain=none) Change-Id: I27cc60c60dde64df29829e4f8577b4ae9cba33a3 Reviewed-on: https://go-review.googlesource.com/c/arch/+/695695 LUCI-TryBot-Result: Go LUCI Auto-Submit: Dmitri Shuralyov Reviewed-by: Cherry Mui Reviewed-by: David Chase Reviewed-by: Dmitri Shuralyov --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index b72ba1a5..0db7aa41 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module golang.org/x/arch -go 1.23.0 +go 1.24.0 require rsc.io/pdf v0.1.1 From 090af6d6344176653a725a32332977a644bca8f9 Mon Sep 17 00:00:00 2001 From: Mark Ryan Date: Mon, 25 Aug 2025 14:16:31 +0200 Subject: [PATCH 200/200] riscv64: fix argument count check in spec.go The code was panicking instead of reporting an error when an incorrect number of arguments were passed. Change-Id: I1ed8c94cedc8501160dbc65cdfc28badf67bf4f0 Reviewed-on: https://go-review.googlesource.com/c/arch/+/698895 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Auto-Submit: Joel Sing Reviewed-by: Joel Sing Reviewed-by: Carlos Amedee --- riscv64/riscv64spec/spec.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv64/riscv64spec/spec.go b/riscv64/riscv64spec/spec.go index b65ea697..5b80c868 100644 --- a/riscv64/riscv64spec/spec.go +++ b/riscv64/riscv64spec/spec.go @@ -68,7 +68,7 @@ func main() { log.SetFlags(0) log.SetPrefix("riscv64spec: ") - if len(os.Args) < 1 { + if len(os.Args) < 2 { log.Fatal("usage: go run spec.go ") } extensionsPath := filepath.Join(os.Args[1], "extensions")