all: REVERSE MERGE dev.simd (c456ab7) into master

This commit is a REVERSE MERGE.
It merges dev.simd back into its parent branch, master.
This marks the end of development on dev.simd.

Merge List:

+ 2025-12-08 c456ab7a30 [dev.simd] all: merge master (a33bbf1) into dev.simd
+ 2025-12-08 1d8711e126 [dev.simd] internal/buildcfg: don't enable SIMD experiment by default
+ 2025-12-08 f38e968aba [dev.simd] cmd/compile: zero only low 128-bit of X15
+ 2025-12-08 144cf17d2c [dev.simd] simd, cmd/compile: move "simd" to "simd/archsimd"
+ 2025-12-08 3417b48b17 [dev.simd] simd: add carryless multiply
+ 2025-12-05 f51ee08905 [dev.simd] simd: replace checking loops with call to slice-checker
+ 2025-12-03 2b91d96941 [dev.simd] internal/buildcfg: turn GOEXPERIMENT=simd back on

Change-Id: Ife3f2ca4f6d8ce131335c0f868358db6a6a1a534
This commit is contained in:
Cherry Mui 2025-12-08 17:53:31 -05:00
commit 9e09812308
133 changed files with 2435 additions and 2268 deletions

View File

@ -1232,6 +1232,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSHRDQ128,
ssa.OpAMD64VPSHRDQ256,
ssa.OpAMD64VPSHRDQ512,
ssa.OpAMD64VPCLMULQDQ128,
ssa.OpAMD64VPCLMULQDQ256,
ssa.OpAMD64VPCLMULQDQ512,
ssa.OpAMD64VSHUFPS128,
ssa.OpAMD64VSHUFPD128,
ssa.OpAMD64VSHUFPS256,

View File

@ -18,7 +18,6 @@ import (
"cmd/internal/obj"
"cmd/internal/obj/x86"
"internal/abi"
"internal/buildcfg"
)
// ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
@ -1718,7 +1717,15 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
case ssa.OpAMD64VZEROUPPER, ssa.OpAMD64VZEROALL:
s.Prog(v.Op.Asm())
case ssa.OpAMD64Zero128, ssa.OpAMD64Zero256, ssa.OpAMD64Zero512: // no code emitted
case ssa.OpAMD64Zero128: // no code emitted
case ssa.OpAMD64Zero256, ssa.OpAMD64Zero512:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = simdReg(v)
p.AddRestSourceReg(simdReg(v))
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
case ssa.OpAMD64VMOVSSf2v, ssa.OpAMD64VMOVSDf2v:
// These are for initializing the least 32/64 bits of a SIMD register from a "float".
@ -1871,34 +1878,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
// zeroX15 zeroes the X15 register.
func zeroX15(s *ssagen.State) {
if !buildcfg.Experiment.SIMD {
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
return
}
vxorps := func(s *ssagen.State) {
p := s.Prog(x86.AVXORPS)
p.From.Type = obj.TYPE_REG
p.From.Reg = x86.REG_X15
p.AddRestSourceReg(x86.REG_X15)
p.To.Type = obj.TYPE_REG
p.To.Reg = x86.REG_X15
}
if buildcfg.GOAMD64 >= 3 {
vxorps(s)
return
}
// AVX may not be available, check before zeroing the high bits.
p := s.Prog(x86.ACMPB)
p.From.Type = obj.TYPE_MEM
p.From.Name = obj.NAME_EXTERN
p.From.Sym = ir.Syms.X86HasAVX
p.To.Type = obj.TYPE_CONST
p.To.Offset = 1
jmp := s.Prog(x86.AJNE)
jmp.To.Type = obj.TYPE_BRANCH
vxorps(s)
sse := opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
jmp.To.SetTarget(sse)
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
}
// Example instruction: VRSQRTPS X1, X1

View File

@ -445,7 +445,7 @@ type hairyVisitor struct {
func isDebugFn(fn *ir.Func) bool {
// if n := fn.Nname; n != nil {
// if n.Sym().Name == "Int32x8.Transpose8" && n.Sym().Pkg.Path == "simd" {
// if n.Sym().Name == "Int32x8.Transpose8" && n.Sym().Pkg.Path == "simd/archsimd" {
// fmt.Printf("isDebugFn '%s' DOT '%s'\n", n.Sym().Pkg.Path, n.Sym().Name)
// return true
// }

View File

@ -214,6 +214,7 @@ func init() {
vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
v01 = regInfo{inputs: nil, outputs: vonly}
v11 = regInfo{inputs: vonly, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
v21 = regInfo{inputs: []regMask{v, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
vk = regInfo{inputs: vzonly, outputs: maskonly}
@ -232,6 +233,7 @@ func init() {
gpv = regInfo{inputs: []regMask{gp}, outputs: vonly}
v2flags = regInfo{inputs: []regMask{vz, vz}}
w01 = regInfo{inputs: nil, outputs: wonly}
w11 = regInfo{inputs: wonly, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
w21 = regInfo{inputs: []regMask{wz, wz}, outputs: wonly}
wk = regInfo{inputs: wzonly, outputs: maskonly}
@ -1398,12 +1400,15 @@ func init() {
{name: "VPMOVVec64x4ToM", argLength: 1, reg: vk, asm: "VPMOVQ2M"},
{name: "VPMOVVec64x8ToM", argLength: 1, reg: wk, asm: "VPMOVQ2M"},
// X15 is the zero register up to 128-bit. For larger values, we zero it on the fly.
{name: "Zero128", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
{name: "Zero256", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
{name: "Zero512", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
{name: "Zero256", argLength: 0, reg: v01, asm: "VPXOR"},
{name: "Zero512", argLength: 0, reg: w01, asm: "VPXORQ"},
// Move a 32/64 bit float to a 128-bit SIMD register.
{name: "VMOVSDf2v", argLength: 1, reg: fpv, asm: "VMOVSD"},
{name: "VMOVSSf2v", argLength: 1, reg: fpv, asm: "VMOVSS"},
{name: "VMOVQ", argLength: 1, reg: gpv, asm: "VMOVQ"},
{name: "VMOVD", argLength: 1, reg: gpv, asm: "VMOVD"},

View File

@ -1333,6 +1333,9 @@
(blendMaskedInt16x32 x y mask) => (VPBLENDMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(blendMaskedInt32x16 x y mask) => (VPBLENDMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
(blendMaskedInt64x8 x y mask) => (VPBLENDMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(carrylessMultiplyUint64x2 ...) => (VPCLMULQDQ128 ...)
(carrylessMultiplyUint64x4 ...) => (VPCLMULQDQ256 ...)
(carrylessMultiplyUint64x8 ...) => (VPCLMULQDQ512 ...)
(concatSelectedConstantFloat32x4 ...) => (VSHUFPS128 ...)
(concatSelectedConstantFloat64x2 ...) => (VSHUFPD128 ...)
(concatSelectedConstantInt32x4 ...) => (VSHUFPS128 ...)

View File

@ -1269,6 +1269,9 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPALIGNRMasked128", argLength: 3, reg: w2kw, asm: "VPALIGNR", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPALIGNRMasked256", argLength: 3, reg: w2kw, asm: "VPALIGNR", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPALIGNRMasked512", argLength: 3, reg: w2kw, asm: "VPALIGNR", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPCLMULQDQ128", argLength: 2, reg: v21, asm: "VPCLMULQDQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCLMULQDQ256", argLength: 2, reg: w21, asm: "VPCLMULQDQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPCLMULQDQ512", argLength: 2, reg: w21, asm: "VPCLMULQDQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},

View File

@ -1301,6 +1301,9 @@ func simdGenericOps() []opData {
{name: "TruncScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
{name: "TruncScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
{name: "TruncScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
{name: "carrylessMultiplyUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
{name: "carrylessMultiplyUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
{name: "carrylessMultiplyUint64x8", argLength: 2, commutative: false, aux: "UInt8"},
{name: "concatSelectedConstantFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
{name: "concatSelectedConstantFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
{name: "concatSelectedConstantGroupedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},

View File

@ -2510,6 +2510,9 @@ const (
OpAMD64VPALIGNRMasked128
OpAMD64VPALIGNRMasked256
OpAMD64VPALIGNRMasked512
OpAMD64VPCLMULQDQ128
OpAMD64VPCLMULQDQ256
OpAMD64VPCLMULQDQ512
OpAMD64VPCMPB512
OpAMD64VPCMPBMasked128
OpAMD64VPCMPBMasked256
@ -7448,6 +7451,9 @@ const (
OpTruncScaledResidueFloat64x2
OpTruncScaledResidueFloat64x4
OpTruncScaledResidueFloat64x8
OpcarrylessMultiplyUint64x2
OpcarrylessMultiplyUint64x4
OpcarrylessMultiplyUint64x8
OpconcatSelectedConstantFloat32x4
OpconcatSelectedConstantFloat64x2
OpconcatSelectedConstantGroupedFloat32x8
@ -20359,24 +20365,22 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "Zero256",
argLen: 0,
zeroWidth: true,
fixedReg: true,
name: "Zero256",
argLen: 0,
asm: x86.AVPXOR,
reg: regInfo{
outputs: []outputInfo{
{0, 2147483648}, // X15
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "Zero512",
argLen: 0,
zeroWidth: true,
fixedReg: true,
name: "Zero512",
argLen: 0,
asm: x86.AVPXORQ,
reg: regInfo{
outputs: []outputInfo{
{0, 2147483648}, // X15
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
},
},
@ -39211,6 +39215,51 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPCLMULQDQ128",
auxType: auxUInt8,
argLen: 2,
asm: x86.AVPCLMULQDQ,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPCLMULQDQ256",
auxType: auxUInt8,
argLen: 2,
asm: x86.AVPCLMULQDQ,
reg: regInfo{
inputs: []inputInfo{
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
outputs: []outputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
},
},
{
name: "VPCLMULQDQ512",
auxType: auxUInt8,
argLen: 2,
asm: x86.AVPCLMULQDQ,
reg: regInfo{
inputs: []inputInfo{
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
outputs: []outputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
},
},
{
name: "VPCMPB512",
auxType: auxUInt8,
@ -95848,6 +95897,24 @@ var opcodeTable = [...]opInfo{
argLen: 1,
generic: true,
},
{
name: "carrylessMultiplyUint64x2",
auxType: auxUInt8,
argLen: 2,
generic: true,
},
{
name: "carrylessMultiplyUint64x4",
auxType: auxUInt8,
argLen: 2,
generic: true,
},
{
name: "carrylessMultiplyUint64x8",
auxType: auxUInt8,
argLen: 2,
generic: true,
},
{
name: "concatSelectedConstantFloat32x4",
auxType: auxUInt8,

View File

@ -6307,6 +6307,15 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpblendMaskedInt64x8(v)
case OpblendMaskedInt8x64:
return rewriteValueAMD64_OpblendMaskedInt8x64(v)
case OpcarrylessMultiplyUint64x2:
v.Op = OpAMD64VPCLMULQDQ128
return true
case OpcarrylessMultiplyUint64x4:
v.Op = OpAMD64VPCLMULQDQ256
return true
case OpcarrylessMultiplyUint64x8:
v.Op = OpAMD64VPCLMULQDQ512
return true
case OpconcatSelectedConstantFloat32x4:
v.Op = OpAMD64VSHUFPS128
return true

View File

@ -1644,7 +1644,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
// Only enable intrinsics, if SIMD experiment.
simdIntrinsics(addF)
addF("simd", "ClearAVXUpperBits",
addF(simdPackage, "ClearAVXUpperBits",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
s.vars[memVar] = s.newValue1(ssa.OpAMD64VZEROUPPER, types.TypeMem, s.mem())
return nil
@ -1668,15 +1668,18 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
// sfp4 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go.
sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) {
addF("simd", method,
addF(simdPackage, method,
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
x, a, b, c, d, y := args[0], args[1], args[2], args[3], args[4], args[5]
if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 && c.Op == ssa.OpConst8 && d.Op == ssa.OpConst8 {
return select4FromPair(x, a, b, c, d, y, s, hwop, vectype)
} else {
return s.callResult(n, callNormal)
z := select4FromPair(x, a, b, c, d, y, s, hwop, vectype)
if z != nil {
return z
}
}
return s.callResult(n, callNormal)
},
sys.AMD64)
}
@ -1693,15 +1696,18 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
sfp4("Uint32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x16, types.TypeVec512)
sfp4("Float32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x16, types.TypeVec512)
// sfp2 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go.
sfp2 := func(method string, hwop ssa.Op, vectype *types.Type, cscimm func(i, j uint8) int64) {
addF("simd", method,
addF(simdPackage, method,
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
x, a, b, y := args[0], args[1], args[2], args[3]
if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 {
return select2FromPair(x, a, b, y, s, hwop, vectype, cscimm)
} else {
return s.callResult(n, callNormal)
z := select2FromPair(x, a, b, y, s, hwop, vectype, cscimm)
if z != nil {
return z
}
}
return s.callResult(n, callNormal)
},
sys.AMD64)
}
@ -1767,6 +1773,9 @@ const (
func select2FromPair(x, _a, _b, y *ssa.Value, s *state, op ssa.Op, t *types.Type, csc func(a, b uint8) int64) *ssa.Value {
a, b := uint8(_a.AuxInt8()), uint8(_b.AuxInt8())
if a > 3 || b > 3 {
return nil
}
pattern := (a&2)>>1 + (b & 2)
a, b = a&1, b&1
@ -1785,6 +1794,9 @@ func select2FromPair(x, _a, _b, y *ssa.Value, s *state, op ssa.Op, t *types.Type
func select4FromPair(x, _a, _b, _c, _d, y *ssa.Value, s *state, op ssa.Op, t *types.Type) *ssa.Value {
a, b, c, d := uint8(_a.AuxInt8()), uint8(_b.AuxInt8()), uint8(_c.AuxInt8()), uint8(_d.AuxInt8())
if a > 7 || b > 7 || c > 7 || d > 7 {
return nil
}
pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
a, b, c, d = a&3, b&3, c&3, d&3
@ -2154,7 +2166,7 @@ func findIntrinsic(sym *types.Sym) intrinsicBuilder {
fn := sym.Name
if ssa.IntrinsicsDisable {
if pkg == "internal/runtime/sys" && (fn == "GetCallerPC" || fn == "GrtCallerSP" || fn == "GetClosurePtr") ||
pkg == "internal/simd" || pkg == "simd" { // TODO after simd has been moved to package simd, remove internal/simd
pkg == simdPackage {
// These runtime functions don't have definitions, must be intrinsics.
} else {
return nil

View File

@ -1407,13 +1407,13 @@ func TestIntrinsics(t *testing.T) {
gotIntrinsics[testIntrinsicKey{ik.arch.Name, ik.pkg, ik.fn}] = struct{}{}
}
for ik, _ := range gotIntrinsics {
if _, found := wantIntrinsics[ik]; !found && (ik.pkg != "simd" || *simd) {
if _, found := wantIntrinsics[ik]; !found && (ik.pkg != "simd/archsimd" || *simd) {
t.Errorf("Got unwanted intrinsic %v %v.%v", ik.archName, ik.pkg, ik.fn)
}
}
for ik, _ := range wantIntrinsics {
if _, found := gotIntrinsics[ik]; !found && (ik.pkg != "simd" || *simd) {
if _, found := gotIntrinsics[ik]; !found && (ik.pkg != "simd/archsimd" || *simd) {
t.Errorf("Want missing intrinsic %v %v.%v", ik.archName, ik.pkg, ik.fn)
}
}

View File

@ -9,7 +9,7 @@ import (
"cmd/internal/sys"
)
const simdPackage = "simd"
const simdPackage = "simd/archsimd"
func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
addF(simdPackage, "Uint8x16.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x16, types.TypeVec128), sys.AMD64)
@ -1309,6 +1309,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int16x32.blendMasked", opLen3(ssa.OpblendMaskedInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x16.blendMasked", opLen3(ssa.OpblendMaskedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x8.blendMasked", opLen3(ssa.OpblendMaskedInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.carrylessMultiply", opLen2Imm8(ssa.OpcarrylessMultiplyUint64x2, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint64x4.carrylessMultiply", opLen2Imm8(ssa.OpcarrylessMultiplyUint64x4, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Uint64x8.carrylessMultiply", opLen2Imm8(ssa.OpcarrylessMultiplyUint64x8, types.TypeVec512, 0), sys.AMD64)
addF(simdPackage, "Float32x4.concatSelectedConstant", opLen2Imm8(ssa.OpconcatSelectedConstantFloat32x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Float64x2.concatSelectedConstant", opLen2Imm8(ssa.OpconcatSelectedConstantFloat64x2, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int32x4.concatSelectedConstant", opLen2Imm8(ssa.OpconcatSelectedConstantInt32x4, types.TypeVec128, 0), sys.AMD64)

View File

@ -471,11 +471,6 @@ func simdify(st *Type, isTag bool) {
} else {
st.floatRegs = 1
}
// if st.Sym() != nil {
// base.Warn("Simdify %s, %v, %d", st.Sym().Name, isTag, st.width)
// } else {
// base.Warn("Simdify %v, %v, %d", st, isTag, st.width)
// }
}
// CalcStructSize calculates the size of t,
@ -491,10 +486,9 @@ func CalcStructSize(t *Type) {
case sym.Name == "align64" && isAtomicStdPkg(sym.Pkg):
maxAlign = 8
case buildcfg.Experiment.SIMD && (sym.Pkg.Path == "internal/simd" || sym.Pkg.Path == "simd") && len(t.Fields()) >= 1:
case buildcfg.Experiment.SIMD && (sym.Pkg.Path == "simd/archsimd") && len(t.Fields()) >= 1:
// This gates the experiment -- without it, no user-visible types can be "simd".
// The SSA-visible SIMD types remain.
// TODO after simd has been moved to package simd, remove internal/simd.
switch sym.Name {
case "v128":
simdify(t, true)

View File

@ -361,8 +361,8 @@ var excluded = map[string]bool{
"builtin": true,
"cmd/compile/internal/ssa/_gen": true,
"runtime/_mkmalloc": true,
"simd/_gen/simdgen": true,
"simd/_gen/unify": true,
"simd/archsimd/_gen/simdgen": true,
"simd/archsimd/_gen/unify": true,
}
// printPackageMu synchronizes the printing of type-checked package files in

View File

@ -73,7 +73,7 @@ var depsRules = `
internal/byteorder, internal/cpu, internal/goarch < internal/chacha8rand;
internal/goarch, math/bits < internal/strconv;
internal/cpu, internal/strconv < simd;
internal/cpu, internal/strconv < simd/archsimd;
# RUNTIME is the core runtime group of packages, all of them very light-weight.
internal/abi,
@ -709,7 +709,7 @@ var depsRules = `
< testing;
testing, math
< simd/internal/test_helpers;
< simd/archsimd/internal/test_helpers;
log/slog, testing
< testing/slogtest;

View File

@ -361,8 +361,8 @@ var excluded = map[string]bool{
"builtin": true,
"cmd/compile/internal/ssa/_gen": true,
"runtime/_mkmalloc": true,
"simd/_gen/simdgen": true,
"simd/_gen/unify": true,
"simd/archsimd/_gen/simdgen": true,
"simd/archsimd/_gen/unify": true,
}
// printPackageMu synchronizes the printing of type-checked package files in

View File

@ -1093,11 +1093,6 @@ needm:
// there's no need to handle that. Clear R14 so that there's
// a bad value in there, in case needm tries to use it.
XORPS X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
XORQ R14, R14
MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
CALL AX
@ -1795,11 +1790,6 @@ TEXT ·sigpanic0(SB),NOSPLIT,$0-0
get_tls(R14)
MOVQ g(R14), R14
XORPS X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
JMP ·sigpanic<ABIInternal>(SB)
// gcWriteBarrier informs the GC about heap pointer writes.

View File

@ -456,11 +456,6 @@ call:
// Back to Go world, set special registers.
// The g register (R14) is preserved in C.
XORPS X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
RET
// C->Go callback thunk that allows to call runtime·racesymbolize from C code.

View File

@ -177,11 +177,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking

View File

@ -228,11 +228,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking

View File

@ -265,11 +265,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking
@ -295,11 +290,6 @@ TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT|NOFRAME,$0
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking

View File

@ -352,11 +352,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking
@ -382,11 +377,6 @@ TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT|NOFRAME,$0
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking

View File

@ -310,11 +310,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking

View File

@ -64,11 +64,6 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking

View File

@ -32,11 +32,6 @@ TEXT sigtramp<>(SB),NOSPLIT,$0-0
// R14 is cleared in case there's a non-zero value in there
// if called from a non-go thread.
XORPS X15, X15
#ifdef GOEXPERIMENT_simd
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $1
JNE 2(PC)
VXORPS X15, X15, X15
#endif
XORQ R14, R14
get_tls(AX)

View File

@ -1,3 +0,0 @@
testdata/*
.gemini/*
.gemini*

View File

@ -1,32 +0,0 @@
!sum
- go: GaloisFieldAffineTransform
asm: VGF2P8AFFINEQB
operandOrder: 2I # 2nd operand, then immediate
in: &AffineArgs
- &uint8
go: $t
base: uint
- &uint8x8
go: $t2
base: uint
- &pureImmVar
class: immediate
immOffset: 0
name: b
out:
- *uint8
- go: GaloisFieldAffineTransformInverse
asm: VGF2P8AFFINEINVQB
operandOrder: 2I # 2nd operand, then immediate
in: *AffineArgs
out:
- *uint8
- go: GaloisFieldMul
asm: VGF2P8MULB
in:
- *uint8
- *uint8
out:
- *uint8

View File

@ -1,4 +1,4 @@
module simd/_gen
module simd/archsimd/_gen
go 1.24

View File

@ -9,16 +9,13 @@ if [[ ! -d "$XEDDATA" ]]; then
exit 1
fi
# Ensure that goroot is the appropriate ancestor of this directory
which go >/dev/null || exit 1
goroot="$(go env GOROOT)"
if [[ ! ../../../.. -ef "$goroot" ]]; then
ancestor="../../../../.."
if [[ ! $ancestor -ef "$goroot" ]]; then
# We might be able to make this work but it's SO CONFUSING.
echo >&2 "go command in path has GOROOT $goroot"
exit 1
fi
if [[ $(go env GOEXPERIMENT) != simd ]]; then
echo >&2 "GOEXPERIMENT=$(go env GOEXPERIMENT), expected simd"
echo >&2 "go command in path has GOROOT $goroot instead of" `(cd $ancestor; pwd)`
exit 1
fi
@ -34,11 +31,12 @@ cd "$goroot"/src
go install cmd/compile
# Tests
GOARCH=amd64 go run -C simd/testdata .
GOARCH=amd64 go test -v simd
go test go/doc go/build
go test cmd/api -v -check -run ^TestCheck$
go test cmd/compile/internal/ssagen -simd=0
# Set the GOEXPERIMENT explicitly.
GOEXPERIMENT=simd GOARCH=amd64 go run -C simd/archsimd/testdata .
GOEXPERIMENT=simd GOARCH=amd64 go test -v simd/archsimd
GOEXPERIMENT=simd GOARCH=amd64 go test go/doc go/build
GOEXPERIMENT=simd GOARCH=amd64 go test cmd/api -v -check -run ^TestCheck$
GOEXPERIMENT=simd GOARCH=amd64 go test cmd/compile/internal/ssagen -simd=0
# Check tests without the GOEXPERIMENT
GOEXPERIMENT= go test go/doc go/build

View File

@ -123,7 +123,7 @@ func compareSimdTypePairs(x, y simdTypePair) int {
const simdPackageHeader = generatedHeader + `
//go:build goexperiment.simd
package simd
package archsimd
`
const simdTypesTemplates = `

View File

@ -13,7 +13,7 @@ import (
"strings"
"unicode"
"simd/_gen/unify"
"simd/archsimd/_gen/unify"
)
type Operation struct {

View File

@ -92,7 +92,7 @@ import (
"slices"
"strings"
"simd/_gen/unify"
"simd/archsimd/_gen/unify"
"gopkg.in/yaml.v3"
)
@ -117,7 +117,7 @@ var (
flagMemProfile = flag.String("memprofile", "", "write memory profile to `file`")
)
const simdPackage = "simd"
const simdPackage = "simd/archsimd"
func main() {
flag.Parse()

View File

@ -19,3 +19,5 @@
documentation: !string |-
// NAME computes element-wise GF(2^8) multiplication with
// reduction polynomial x^8 + x^4 + x^3 + x + 1.
- go: carrylessMultiply
commutative: false

View File

@ -0,0 +1,92 @@
!sum
- go: GaloisFieldAffineTransform
asm: VGF2P8AFFINEQB
operandOrder: 2I # 2nd operand, then immediate
in: &AffineArgs
- &uint8
go: $t
base: uint
- &uint8x8
go: $t2
base: uint
- &pureImmVar
class: immediate
immOffset: 0
name: b
out:
- *uint8
- go: GaloisFieldAffineTransformInverse
asm: VGF2P8AFFINEINVQB
operandOrder: 2I # 2nd operand, then immediate
in: *AffineArgs
out:
- *uint8
- go: GaloisFieldMul
asm: VGF2P8MULB
in:
- *uint8
- *uint8
out:
- *uint8
- go: carrylessMultiply
documentation: !string |-
// NAME computes one of four possible Galois polynomial
// products of selected high and low halves of x and y,
// depending on the value of xyHiLo, returning the 128-bit
// product in the concatenated two elements of the result.
// Bit 0 selects the low (0) or high (1) element of x and
// bit 4 selects the low (0x00) or high (0x10) element of y.
asm: V?PCLMULQDQ
in:
- go: Uint64x2
- go: Uint64x2
- class: immediate
immOffset: 0
name: xyHiLo
out:
- go: Uint64x2
overwriteElementBits: 64
hideMaskMethods: true
- go: carrylessMultiply
documentation: !string |-
// NAME computes one of two possible Galois polynomial
// products of selected high and low halves of each of the two
// 128-bit lanes of x and y, depending on the value of xyHiLo,
// and returns the four 128-bit products in the result's lanes.
// Bit 0 selects the low (0) or high (1) elements of x's lanes and
// bit 4 selects the low (0x00) or high (0x10) elements of y's lanes.
asm: V?PCLMULQDQ
in:
- go: Uint64x4
- go: Uint64x4
- class: immediate
immOffset: 0
name: xyHiLo
out:
- go: Uint64x4
overwriteElementBits: 64
hideMaskMethods: true
- go: carrylessMultiply
documentation: !string |-
// NAME computes one of four possible Galois polynomial
// products of selected high and low halves of each of the four
// 128-bit lanes of x and y, depending on the value of xyHiLo,
// and returns the four 128-bit products in the result's lanes.
// Bit 0 selects the low (0) or high (1) elements of x's lanes and
// bit 4 selects the low (0x00) or high (0x10) elements of y's lanes.
asm: V?PCLMULQDQ
in:
- go: Uint64x8
- go: Uint64x8
- class: immediate
immOffset: 0
name: xyHiLo
out:
- go: Uint64x8
overwriteElementBits: 64
hideMaskMethods: true

View File

@ -83,6 +83,9 @@ in: !repeat
- {class: vreg, go: Int64x4, base: "int", elemBits: 128, bits: 256, lanes: 4}
- {class: vreg, go: Uint64x4, base: "uint", elemBits: 128, bits: 256, lanes: 4}
# Special for carryless multiply
- {class: vreg, go: Uint64x8, base: "uint", elemBits: 128, bits: 512, lanes: 8}
# Special shapes just to make VAES(ENC|DEC)(LAST)?512 work.
# The elemBits field of these shapes are wrong, it would be overwritten by overwriteElemBits.
- {class: vreg, go: Int8x32, base: "int", elemBits: 128, bits: 512, lanes: 32}

View File

@ -15,7 +15,7 @@ import (
"strconv"
"strings"
"simd/_gen/unify"
"simd/archsimd/_gen/unify"
"golang.org/x/arch/x86/xeddata"
"gopkg.in/yaml.v3"
@ -808,13 +808,14 @@ var cpuFeatureMap = map[cpuFeatureKey]string{
// the vector length suffix.
// AVX-512 extension features
{"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG",
{"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI",
{"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2",
{"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI",
{"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI",
{"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
{"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES",
{"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG",
{"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI",
{"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2",
{"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI",
{"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI",
{"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
{"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES",
{"AVX512EVEX", "AVX512_VPCLMULQDQ"}: "AVX512VPCLMULQDQ",
// AVX 10.2 (not yet supported)
{"AVX512EVEX", "AVX10_2_RC"}: "ignore",

View File

@ -247,7 +247,7 @@ func prologue(s string, out io.Writer) {
//go:build goexperiment.simd
package simd
package archsimd
`, s)
}
@ -267,7 +267,7 @@ func unsafePrologue(s string, out io.Writer) {
//go:build goexperiment.simd
package simd
package archsimd
import "unsafe"
@ -287,7 +287,7 @@ func testPrologue(t, s string, out io.Writer) {
package simd_test
import (
"simd"
"simd/archsimd"
"testing"
)
@ -324,12 +324,12 @@ func (x {{.VType}}) StoreSlice(s []{{.Etype}}) {
var unaryTemplate = templateOf("unary_helpers", `
// test{{.VType}}Unary tests the simd unary method f against the expected behavior generated by want
func test{{.VType}}Unary(t *testing.T, f func(_ simd.{{.VType}}) simd.{{.VType}}, want func(_ []{{.Etype}}) []{{.Etype}}) {
func test{{.VType}}Unary(t *testing.T, f func(_ archsimd.{{.VType}}) archsimd.{{.VType}}, want func(_ []{{.Etype}}) []{{.Etype}}) {
n := {{.Count}}
t.Helper()
forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
t.Helper()
a := simd.Load{{.VType}}Slice(x)
a := archsimd.Load{{.VType}}Slice(x)
g := make([]{{.Etype}}, n)
f(a).StoreSlice(g)
w := want(x)
@ -341,12 +341,12 @@ func test{{.VType}}Unary(t *testing.T, f func(_ simd.{{.VType}}) simd.{{.VType}}
var unaryFlakyTemplate = shapedTemplateOf(unaryFlaky, "unary_flaky_helpers", `
// test{{.VType}}UnaryFlaky tests the simd unary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
func test{{.VType}}UnaryFlaky(t *testing.T, f func(x simd.{{.VType}}) simd.{{.VType}}, want func(x []{{.Etype}}) []{{.Etype}}, flakiness float64) {
func test{{.VType}}UnaryFlaky(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.{{.VType}}, want func(x []{{.Etype}}) []{{.Etype}}, flakiness float64) {
n := {{.Count}}
t.Helper()
forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
t.Helper()
a := simd.Load{{.VType}}Slice(x)
a := archsimd.Load{{.VType}}Slice(x)
g := make([]{{.Etype}}, n)
f(a).StoreSlice(g)
w := want(x)
@ -358,12 +358,12 @@ func test{{.VType}}UnaryFlaky(t *testing.T, f func(x simd.{{.VType}}) simd.{{.VT
var convertTemplate = templateOf("convert_helpers", `
// test{{.VType}}ConvertTo{{.OEType}} tests the simd conversion method f against the expected behavior generated by want
// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x simd.{{.VType}}) simd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) {
func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) {
n := {{.Count}}
t.Helper()
forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
t.Helper()
a := simd.Load{{.VType}}Slice(x)
a := archsimd.Load{{.VType}}Slice(x)
g := make([]{{.OEtype}}, n)
f(a).StoreSlice(g)
w := want(x)
@ -378,13 +378,13 @@ var unaryToUint16 = convertTemplate.target("uint", 16)
var binaryTemplate = templateOf("binary_helpers", `
// test{{.VType}}Binary tests the simd binary method f against the expected behavior generated by want
func test{{.VType}}Binary(t *testing.T, f func(_, _ simd.{{.VType}}) simd.{{.VType}}, want func(_, _ []{{.Etype}}) []{{.Etype}}) {
func test{{.VType}}Binary(t *testing.T, f func(_, _ archsimd.{{.VType}}) archsimd.{{.VType}}, want func(_, _ []{{.Etype}}) []{{.Etype}}) {
n := {{.Count}}
t.Helper()
forSlicePair(t, {{.Etype}}s, n, func(x, y []{{.Etype}}) bool {
t.Helper()
a := simd.Load{{.VType}}Slice(x)
b := simd.Load{{.VType}}Slice(y)
a := archsimd.Load{{.VType}}Slice(x)
b := archsimd.Load{{.VType}}Slice(y)
g := make([]{{.Etype}}, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -395,14 +395,14 @@ func test{{.VType}}Binary(t *testing.T, f func(_, _ simd.{{.VType}}) simd.{{.VTy
var ternaryTemplate = templateOf("ternary_helpers", `
// test{{.VType}}Ternary tests the simd ternary method f against the expected behavior generated by want
func test{{.VType}}Ternary(t *testing.T, f func(_, _, _ simd.{{.VType}}) simd.{{.VType}}, want func(_, _, _ []{{.Etype}}) []{{.Etype}}) {
func test{{.VType}}Ternary(t *testing.T, f func(_, _, _ archsimd.{{.VType}}) archsimd.{{.VType}}, want func(_, _, _ []{{.Etype}}) []{{.Etype}}) {
n := {{.Count}}
t.Helper()
forSliceTriple(t, {{.Etype}}s, n, func(x, y, z []{{.Etype}}) bool {
t.Helper()
a := simd.Load{{.VType}}Slice(x)
b := simd.Load{{.VType}}Slice(y)
c := simd.Load{{.VType}}Slice(z)
a := archsimd.Load{{.VType}}Slice(x)
b := archsimd.Load{{.VType}}Slice(y)
c := archsimd.Load{{.VType}}Slice(z)
g := make([]{{.Etype}}, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
@ -414,14 +414,14 @@ func test{{.VType}}Ternary(t *testing.T, f func(_, _, _ simd.{{.VType}}) simd.{{
var ternaryFlakyTemplate = shapedTemplateOf(ternaryFlaky, "ternary_helpers", `
// test{{.VType}}TernaryFlaky tests the simd ternary method f against the expected behavior generated by want,
// but using a flakiness parameter because we haven't exactly figured out how simd floating point works
func test{{.VType}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.VType}}) simd.{{.VType}}, want func(x, y, z []{{.Etype}}) []{{.Etype}}, flakiness float64) {
func test{{.VType}}TernaryFlaky(t *testing.T, f func(x, y, z archsimd.{{.VType}}) archsimd.{{.VType}}, want func(x, y, z []{{.Etype}}) []{{.Etype}}, flakiness float64) {
n := {{.Count}}
t.Helper()
forSliceTriple(t, {{.Etype}}s, n, func(x, y, z []{{.Etype}}) bool {
t.Helper()
a := simd.Load{{.VType}}Slice(x)
b := simd.Load{{.VType}}Slice(y)
c := simd.Load{{.VType}}Slice(z)
a := archsimd.Load{{.VType}}Slice(x)
b := archsimd.Load{{.VType}}Slice(y)
c := archsimd.Load{{.VType}}Slice(z)
g := make([]{{.Etype}}, n)
f(a, b, c).StoreSlice(g)
w := want(x, y, z)
@ -432,13 +432,13 @@ func test{{.VType}}TernaryFlaky(t *testing.T, f func(x, y, z simd.{{.VType}}) si
var compareTemplate = templateOf("compare_helpers", `
// test{{.VType}}Compare tests the simd comparison method f against the expected behavior generated by want
func test{{.VType}}Compare(t *testing.T, f func(_, _ simd.{{.VType}}) simd.Mask{{.WxC}}, want func(_, _ []{{.Etype}}) []int64) {
func test{{.VType}}Compare(t *testing.T, f func(_, _ archsimd.{{.VType}}) archsimd.Mask{{.WxC}}, want func(_, _ []{{.Etype}}) []int64) {
n := {{.Count}}
t.Helper()
forSlicePair(t, {{.Etype}}s, n, func(x, y []{{.Etype}}) bool {
t.Helper()
a := simd.Load{{.VType}}Slice(x)
b := simd.Load{{.VType}}Slice(y)
a := archsimd.Load{{.VType}}Slice(x)
b := archsimd.Load{{.VType}}Slice(y)
g := make([]int{{.EWidth}}, n)
f(a, b).AsInt{{.WxC}}().StoreSlice(g)
w := want(x, y)
@ -452,15 +452,15 @@ var compareMaskedTemplate = templateOf("comparemasked_helpers", `
// test{{.VType}}CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func test{{.VType}}CompareMasked(t *testing.T,
f func(_, _ simd.{{.VType}}, m simd.Mask{{.WxC}}) simd.Mask{{.WxC}},
f func(_, _ archsimd.{{.VType}}, m archsimd.Mask{{.WxC}}) archsimd.Mask{{.WxC}},
want func(_, _ []{{.Etype}}) []int64) {
n := {{.Count}}
t.Helper()
forSlicePairMasked(t, {{.Etype}}s, n, func(x, y []{{.Etype}}, m []bool) bool {
t.Helper()
a := simd.Load{{.VType}}Slice(x)
b := simd.Load{{.VType}}Slice(y)
k := simd.LoadInt{{.WxC}}Slice(toVect[int{{.EWidth}}](m)).ToMask()
a := archsimd.Load{{.VType}}Slice(x)
b := archsimd.Load{{.VType}}Slice(y)
k := archsimd.LoadInt{{.WxC}}Slice(toVect[int{{.EWidth}}](m)).ToMask()
g := make([]int{{.EWidth}}, n)
f(a, b, k).AsInt{{.WxC}}().StoreSlice(g)
w := want(x, y)
@ -814,7 +814,7 @@ func (x {{.VType}}) String() string {
const SIMD = "../../"
const TD = "../../internal/simd_test/"
const SSA = "../../../cmd/compile/internal/ssa/"
const SSA = "../../../../cmd/compile/internal/ssa/"
func main() {
sl := flag.String("sl", SIMD+"slice_gen_amd64.go", "file name for slice operations")

View File

@ -2,7 +2,7 @@
//go:build goexperiment.simd
package simd
package archsimd
// Less returns a mask whose elements indicate whether x < y
//

View File

@ -2,7 +2,7 @@
//go:build goexperiment.simd
package simd
package archsimd
import "internal/cpu"
@ -95,6 +95,14 @@ func (X86Features) AVX512VNNI() bool {
return cpu.X86.HasAVX512VNNI
}
// AVX512VPCLMULQDQ returns whether the CPU supports the AVX512VPCLMULQDQ feature.
//
// AVX512VPCLMULQDQ is defined on all GOARCHes, but will only return true on
// GOARCH amd64.
func (X86Features) AVX512VPCLMULQDQ() bool {
return cpu.X86.HasAVX512VPCLMULQDQ
}
// AVX512VPOPCNTDQ returns whether the CPU supports the AVX512VPOPCNTDQ feature.
//
// AVX512VPOPCNTDQ is defined on all GOARCHes, but will only return true on

View File

@ -6,7 +6,7 @@
// This exposes some internal interfaces to simd_test.
package simd
package archsimd
func (x Int64x2) ExportTestConcatSelectedConstant(indices uint8, y Int64x2) Int64x2 {
return x.concatSelectedConstant(indices, y)

View File

@ -4,7 +4,7 @@
//go:build goexperiment.simd && amd64
package simd
package archsimd
// ClearAVXUpperBits clears the high bits of Y0-Y15 and Z0-Z15 registers.
// It is intended for transitioning from AVX to SSE, eliminating the

View File

@ -4,7 +4,7 @@
//go:build goexperiment.simd
package simd
package archsimd
// Invoke code generators.

View File

@ -9,18 +9,18 @@
package simd_test
import (
"simd"
"simd/archsimd"
"testing"
)
// testInt8x16Binary tests the simd binary method f against the expected behavior generated by want
func testInt8x16Binary(t *testing.T, f func(_, _ simd.Int8x16) simd.Int8x16, want func(_, _ []int8) []int8) {
func testInt8x16Binary(t *testing.T, f func(_, _ archsimd.Int8x16) archsimd.Int8x16, want func(_, _ []int8) []int8) {
n := 16
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
a := simd.LoadInt8x16Slice(x)
b := simd.LoadInt8x16Slice(y)
a := archsimd.LoadInt8x16Slice(x)
b := archsimd.LoadInt8x16Slice(y)
g := make([]int8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -29,13 +29,13 @@ func testInt8x16Binary(t *testing.T, f func(_, _ simd.Int8x16) simd.Int8x16, wan
}
// testInt16x8Binary tests the simd binary method f against the expected behavior generated by want
func testInt16x8Binary(t *testing.T, f func(_, _ simd.Int16x8) simd.Int16x8, want func(_, _ []int16) []int16) {
func testInt16x8Binary(t *testing.T, f func(_, _ archsimd.Int16x8) archsimd.Int16x8, want func(_, _ []int16) []int16) {
n := 8
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
a := simd.LoadInt16x8Slice(x)
b := simd.LoadInt16x8Slice(y)
a := archsimd.LoadInt16x8Slice(x)
b := archsimd.LoadInt16x8Slice(y)
g := make([]int16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -44,13 +44,13 @@ func testInt16x8Binary(t *testing.T, f func(_, _ simd.Int16x8) simd.Int16x8, wan
}
// testInt32x4Binary tests the simd binary method f against the expected behavior generated by want
func testInt32x4Binary(t *testing.T, f func(_, _ simd.Int32x4) simd.Int32x4, want func(_, _ []int32) []int32) {
func testInt32x4Binary(t *testing.T, f func(_, _ archsimd.Int32x4) archsimd.Int32x4, want func(_, _ []int32) []int32) {
n := 4
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
a := simd.LoadInt32x4Slice(x)
b := simd.LoadInt32x4Slice(y)
a := archsimd.LoadInt32x4Slice(x)
b := archsimd.LoadInt32x4Slice(y)
g := make([]int32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -59,13 +59,13 @@ func testInt32x4Binary(t *testing.T, f func(_, _ simd.Int32x4) simd.Int32x4, wan
}
// testInt64x2Binary tests the simd binary method f against the expected behavior generated by want
func testInt64x2Binary(t *testing.T, f func(_, _ simd.Int64x2) simd.Int64x2, want func(_, _ []int64) []int64) {
func testInt64x2Binary(t *testing.T, f func(_, _ archsimd.Int64x2) archsimd.Int64x2, want func(_, _ []int64) []int64) {
n := 2
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
a := simd.LoadInt64x2Slice(x)
b := simd.LoadInt64x2Slice(y)
a := archsimd.LoadInt64x2Slice(x)
b := archsimd.LoadInt64x2Slice(y)
g := make([]int64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -74,13 +74,13 @@ func testInt64x2Binary(t *testing.T, f func(_, _ simd.Int64x2) simd.Int64x2, wan
}
// testUint8x16Binary tests the simd binary method f against the expected behavior generated by want
func testUint8x16Binary(t *testing.T, f func(_, _ simd.Uint8x16) simd.Uint8x16, want func(_, _ []uint8) []uint8) {
func testUint8x16Binary(t *testing.T, f func(_, _ archsimd.Uint8x16) archsimd.Uint8x16, want func(_, _ []uint8) []uint8) {
n := 16
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
a := simd.LoadUint8x16Slice(x)
b := simd.LoadUint8x16Slice(y)
a := archsimd.LoadUint8x16Slice(x)
b := archsimd.LoadUint8x16Slice(y)
g := make([]uint8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -89,13 +89,13 @@ func testUint8x16Binary(t *testing.T, f func(_, _ simd.Uint8x16) simd.Uint8x16,
}
// testUint16x8Binary tests the simd binary method f against the expected behavior generated by want
func testUint16x8Binary(t *testing.T, f func(_, _ simd.Uint16x8) simd.Uint16x8, want func(_, _ []uint16) []uint16) {
func testUint16x8Binary(t *testing.T, f func(_, _ archsimd.Uint16x8) archsimd.Uint16x8, want func(_, _ []uint16) []uint16) {
n := 8
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
a := simd.LoadUint16x8Slice(x)
b := simd.LoadUint16x8Slice(y)
a := archsimd.LoadUint16x8Slice(x)
b := archsimd.LoadUint16x8Slice(y)
g := make([]uint16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -104,13 +104,13 @@ func testUint16x8Binary(t *testing.T, f func(_, _ simd.Uint16x8) simd.Uint16x8,
}
// testUint32x4Binary tests the simd binary method f against the expected behavior generated by want
func testUint32x4Binary(t *testing.T, f func(_, _ simd.Uint32x4) simd.Uint32x4, want func(_, _ []uint32) []uint32) {
func testUint32x4Binary(t *testing.T, f func(_, _ archsimd.Uint32x4) archsimd.Uint32x4, want func(_, _ []uint32) []uint32) {
n := 4
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
a := simd.LoadUint32x4Slice(x)
b := simd.LoadUint32x4Slice(y)
a := archsimd.LoadUint32x4Slice(x)
b := archsimd.LoadUint32x4Slice(y)
g := make([]uint32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -119,13 +119,13 @@ func testUint32x4Binary(t *testing.T, f func(_, _ simd.Uint32x4) simd.Uint32x4,
}
// testUint64x2Binary tests the simd binary method f against the expected behavior generated by want
func testUint64x2Binary(t *testing.T, f func(_, _ simd.Uint64x2) simd.Uint64x2, want func(_, _ []uint64) []uint64) {
func testUint64x2Binary(t *testing.T, f func(_, _ archsimd.Uint64x2) archsimd.Uint64x2, want func(_, _ []uint64) []uint64) {
n := 2
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
a := simd.LoadUint64x2Slice(x)
b := simd.LoadUint64x2Slice(y)
a := archsimd.LoadUint64x2Slice(x)
b := archsimd.LoadUint64x2Slice(y)
g := make([]uint64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -134,13 +134,13 @@ func testUint64x2Binary(t *testing.T, f func(_, _ simd.Uint64x2) simd.Uint64x2,
}
// testFloat32x4Binary tests the simd binary method f against the expected behavior generated by want
func testFloat32x4Binary(t *testing.T, f func(_, _ simd.Float32x4) simd.Float32x4, want func(_, _ []float32) []float32) {
func testFloat32x4Binary(t *testing.T, f func(_, _ archsimd.Float32x4) archsimd.Float32x4, want func(_, _ []float32) []float32) {
n := 4
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
a := simd.LoadFloat32x4Slice(x)
b := simd.LoadFloat32x4Slice(y)
a := archsimd.LoadFloat32x4Slice(x)
b := archsimd.LoadFloat32x4Slice(y)
g := make([]float32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -149,13 +149,13 @@ func testFloat32x4Binary(t *testing.T, f func(_, _ simd.Float32x4) simd.Float32x
}
// testFloat64x2Binary tests the simd binary method f against the expected behavior generated by want
func testFloat64x2Binary(t *testing.T, f func(_, _ simd.Float64x2) simd.Float64x2, want func(_, _ []float64) []float64) {
func testFloat64x2Binary(t *testing.T, f func(_, _ archsimd.Float64x2) archsimd.Float64x2, want func(_, _ []float64) []float64) {
n := 2
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
a := simd.LoadFloat64x2Slice(x)
b := simd.LoadFloat64x2Slice(y)
a := archsimd.LoadFloat64x2Slice(x)
b := archsimd.LoadFloat64x2Slice(y)
g := make([]float64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -164,13 +164,13 @@ func testFloat64x2Binary(t *testing.T, f func(_, _ simd.Float64x2) simd.Float64x
}
// testInt8x32Binary tests the simd binary method f against the expected behavior generated by want
func testInt8x32Binary(t *testing.T, f func(_, _ simd.Int8x32) simd.Int8x32, want func(_, _ []int8) []int8) {
func testInt8x32Binary(t *testing.T, f func(_, _ archsimd.Int8x32) archsimd.Int8x32, want func(_, _ []int8) []int8) {
n := 32
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
a := simd.LoadInt8x32Slice(x)
b := simd.LoadInt8x32Slice(y)
a := archsimd.LoadInt8x32Slice(x)
b := archsimd.LoadInt8x32Slice(y)
g := make([]int8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -179,13 +179,13 @@ func testInt8x32Binary(t *testing.T, f func(_, _ simd.Int8x32) simd.Int8x32, wan
}
// testInt16x16Binary tests the simd binary method f against the expected behavior generated by want
func testInt16x16Binary(t *testing.T, f func(_, _ simd.Int16x16) simd.Int16x16, want func(_, _ []int16) []int16) {
func testInt16x16Binary(t *testing.T, f func(_, _ archsimd.Int16x16) archsimd.Int16x16, want func(_, _ []int16) []int16) {
n := 16
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
a := simd.LoadInt16x16Slice(x)
b := simd.LoadInt16x16Slice(y)
a := archsimd.LoadInt16x16Slice(x)
b := archsimd.LoadInt16x16Slice(y)
g := make([]int16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -194,13 +194,13 @@ func testInt16x16Binary(t *testing.T, f func(_, _ simd.Int16x16) simd.Int16x16,
}
// testInt32x8Binary tests the simd binary method f against the expected behavior generated by want
func testInt32x8Binary(t *testing.T, f func(_, _ simd.Int32x8) simd.Int32x8, want func(_, _ []int32) []int32) {
func testInt32x8Binary(t *testing.T, f func(_, _ archsimd.Int32x8) archsimd.Int32x8, want func(_, _ []int32) []int32) {
n := 8
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
a := simd.LoadInt32x8Slice(x)
b := simd.LoadInt32x8Slice(y)
a := archsimd.LoadInt32x8Slice(x)
b := archsimd.LoadInt32x8Slice(y)
g := make([]int32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -209,13 +209,13 @@ func testInt32x8Binary(t *testing.T, f func(_, _ simd.Int32x8) simd.Int32x8, wan
}
// testInt64x4Binary tests the simd binary method f against the expected behavior generated by want
func testInt64x4Binary(t *testing.T, f func(_, _ simd.Int64x4) simd.Int64x4, want func(_, _ []int64) []int64) {
func testInt64x4Binary(t *testing.T, f func(_, _ archsimd.Int64x4) archsimd.Int64x4, want func(_, _ []int64) []int64) {
n := 4
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
a := simd.LoadInt64x4Slice(x)
b := simd.LoadInt64x4Slice(y)
a := archsimd.LoadInt64x4Slice(x)
b := archsimd.LoadInt64x4Slice(y)
g := make([]int64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -224,13 +224,13 @@ func testInt64x4Binary(t *testing.T, f func(_, _ simd.Int64x4) simd.Int64x4, wan
}
// testUint8x32Binary tests the simd binary method f against the expected behavior generated by want
func testUint8x32Binary(t *testing.T, f func(_, _ simd.Uint8x32) simd.Uint8x32, want func(_, _ []uint8) []uint8) {
func testUint8x32Binary(t *testing.T, f func(_, _ archsimd.Uint8x32) archsimd.Uint8x32, want func(_, _ []uint8) []uint8) {
n := 32
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
a := simd.LoadUint8x32Slice(x)
b := simd.LoadUint8x32Slice(y)
a := archsimd.LoadUint8x32Slice(x)
b := archsimd.LoadUint8x32Slice(y)
g := make([]uint8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -239,13 +239,13 @@ func testUint8x32Binary(t *testing.T, f func(_, _ simd.Uint8x32) simd.Uint8x32,
}
// testUint16x16Binary tests the simd binary method f against the expected behavior generated by want
func testUint16x16Binary(t *testing.T, f func(_, _ simd.Uint16x16) simd.Uint16x16, want func(_, _ []uint16) []uint16) {
func testUint16x16Binary(t *testing.T, f func(_, _ archsimd.Uint16x16) archsimd.Uint16x16, want func(_, _ []uint16) []uint16) {
n := 16
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
a := simd.LoadUint16x16Slice(x)
b := simd.LoadUint16x16Slice(y)
a := archsimd.LoadUint16x16Slice(x)
b := archsimd.LoadUint16x16Slice(y)
g := make([]uint16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -254,13 +254,13 @@ func testUint16x16Binary(t *testing.T, f func(_, _ simd.Uint16x16) simd.Uint16x1
}
// testUint32x8Binary tests the simd binary method f against the expected behavior generated by want
func testUint32x8Binary(t *testing.T, f func(_, _ simd.Uint32x8) simd.Uint32x8, want func(_, _ []uint32) []uint32) {
func testUint32x8Binary(t *testing.T, f func(_, _ archsimd.Uint32x8) archsimd.Uint32x8, want func(_, _ []uint32) []uint32) {
n := 8
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
a := simd.LoadUint32x8Slice(x)
b := simd.LoadUint32x8Slice(y)
a := archsimd.LoadUint32x8Slice(x)
b := archsimd.LoadUint32x8Slice(y)
g := make([]uint32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -269,13 +269,13 @@ func testUint32x8Binary(t *testing.T, f func(_, _ simd.Uint32x8) simd.Uint32x8,
}
// testUint64x4Binary tests the simd binary method f against the expected behavior generated by want
func testUint64x4Binary(t *testing.T, f func(_, _ simd.Uint64x4) simd.Uint64x4, want func(_, _ []uint64) []uint64) {
func testUint64x4Binary(t *testing.T, f func(_, _ archsimd.Uint64x4) archsimd.Uint64x4, want func(_, _ []uint64) []uint64) {
n := 4
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
a := simd.LoadUint64x4Slice(x)
b := simd.LoadUint64x4Slice(y)
a := archsimd.LoadUint64x4Slice(x)
b := archsimd.LoadUint64x4Slice(y)
g := make([]uint64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -284,13 +284,13 @@ func testUint64x4Binary(t *testing.T, f func(_, _ simd.Uint64x4) simd.Uint64x4,
}
// testFloat32x8Binary tests the simd binary method f against the expected behavior generated by want
func testFloat32x8Binary(t *testing.T, f func(_, _ simd.Float32x8) simd.Float32x8, want func(_, _ []float32) []float32) {
func testFloat32x8Binary(t *testing.T, f func(_, _ archsimd.Float32x8) archsimd.Float32x8, want func(_, _ []float32) []float32) {
n := 8
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
a := simd.LoadFloat32x8Slice(x)
b := simd.LoadFloat32x8Slice(y)
a := archsimd.LoadFloat32x8Slice(x)
b := archsimd.LoadFloat32x8Slice(y)
g := make([]float32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -299,13 +299,13 @@ func testFloat32x8Binary(t *testing.T, f func(_, _ simd.Float32x8) simd.Float32x
}
// testFloat64x4Binary tests the simd binary method f against the expected behavior generated by want
func testFloat64x4Binary(t *testing.T, f func(_, _ simd.Float64x4) simd.Float64x4, want func(_, _ []float64) []float64) {
func testFloat64x4Binary(t *testing.T, f func(_, _ archsimd.Float64x4) archsimd.Float64x4, want func(_, _ []float64) []float64) {
n := 4
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
a := simd.LoadFloat64x4Slice(x)
b := simd.LoadFloat64x4Slice(y)
a := archsimd.LoadFloat64x4Slice(x)
b := archsimd.LoadFloat64x4Slice(y)
g := make([]float64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -314,13 +314,13 @@ func testFloat64x4Binary(t *testing.T, f func(_, _ simd.Float64x4) simd.Float64x
}
// testInt8x64Binary tests the simd binary method f against the expected behavior generated by want
func testInt8x64Binary(t *testing.T, f func(_, _ simd.Int8x64) simd.Int8x64, want func(_, _ []int8) []int8) {
func testInt8x64Binary(t *testing.T, f func(_, _ archsimd.Int8x64) archsimd.Int8x64, want func(_, _ []int8) []int8) {
n := 64
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
a := simd.LoadInt8x64Slice(x)
b := simd.LoadInt8x64Slice(y)
a := archsimd.LoadInt8x64Slice(x)
b := archsimd.LoadInt8x64Slice(y)
g := make([]int8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -329,13 +329,13 @@ func testInt8x64Binary(t *testing.T, f func(_, _ simd.Int8x64) simd.Int8x64, wan
}
// testInt16x32Binary tests the simd binary method f against the expected behavior generated by want
func testInt16x32Binary(t *testing.T, f func(_, _ simd.Int16x32) simd.Int16x32, want func(_, _ []int16) []int16) {
func testInt16x32Binary(t *testing.T, f func(_, _ archsimd.Int16x32) archsimd.Int16x32, want func(_, _ []int16) []int16) {
n := 32
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
a := simd.LoadInt16x32Slice(x)
b := simd.LoadInt16x32Slice(y)
a := archsimd.LoadInt16x32Slice(x)
b := archsimd.LoadInt16x32Slice(y)
g := make([]int16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -344,13 +344,13 @@ func testInt16x32Binary(t *testing.T, f func(_, _ simd.Int16x32) simd.Int16x32,
}
// testInt32x16Binary tests the simd binary method f against the expected behavior generated by want
func testInt32x16Binary(t *testing.T, f func(_, _ simd.Int32x16) simd.Int32x16, want func(_, _ []int32) []int32) {
func testInt32x16Binary(t *testing.T, f func(_, _ archsimd.Int32x16) archsimd.Int32x16, want func(_, _ []int32) []int32) {
n := 16
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
a := simd.LoadInt32x16Slice(x)
b := simd.LoadInt32x16Slice(y)
a := archsimd.LoadInt32x16Slice(x)
b := archsimd.LoadInt32x16Slice(y)
g := make([]int32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -359,13 +359,13 @@ func testInt32x16Binary(t *testing.T, f func(_, _ simd.Int32x16) simd.Int32x16,
}
// testInt64x8Binary tests the simd binary method f against the expected behavior generated by want
func testInt64x8Binary(t *testing.T, f func(_, _ simd.Int64x8) simd.Int64x8, want func(_, _ []int64) []int64) {
func testInt64x8Binary(t *testing.T, f func(_, _ archsimd.Int64x8) archsimd.Int64x8, want func(_, _ []int64) []int64) {
n := 8
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
a := simd.LoadInt64x8Slice(x)
b := simd.LoadInt64x8Slice(y)
a := archsimd.LoadInt64x8Slice(x)
b := archsimd.LoadInt64x8Slice(y)
g := make([]int64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -374,13 +374,13 @@ func testInt64x8Binary(t *testing.T, f func(_, _ simd.Int64x8) simd.Int64x8, wan
}
// testUint8x64Binary tests the simd binary method f against the expected behavior generated by want
func testUint8x64Binary(t *testing.T, f func(_, _ simd.Uint8x64) simd.Uint8x64, want func(_, _ []uint8) []uint8) {
func testUint8x64Binary(t *testing.T, f func(_, _ archsimd.Uint8x64) archsimd.Uint8x64, want func(_, _ []uint8) []uint8) {
n := 64
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
a := simd.LoadUint8x64Slice(x)
b := simd.LoadUint8x64Slice(y)
a := archsimd.LoadUint8x64Slice(x)
b := archsimd.LoadUint8x64Slice(y)
g := make([]uint8, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -389,13 +389,13 @@ func testUint8x64Binary(t *testing.T, f func(_, _ simd.Uint8x64) simd.Uint8x64,
}
// testUint16x32Binary tests the simd binary method f against the expected behavior generated by want
func testUint16x32Binary(t *testing.T, f func(_, _ simd.Uint16x32) simd.Uint16x32, want func(_, _ []uint16) []uint16) {
func testUint16x32Binary(t *testing.T, f func(_, _ archsimd.Uint16x32) archsimd.Uint16x32, want func(_, _ []uint16) []uint16) {
n := 32
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
a := simd.LoadUint16x32Slice(x)
b := simd.LoadUint16x32Slice(y)
a := archsimd.LoadUint16x32Slice(x)
b := archsimd.LoadUint16x32Slice(y)
g := make([]uint16, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -404,13 +404,13 @@ func testUint16x32Binary(t *testing.T, f func(_, _ simd.Uint16x32) simd.Uint16x3
}
// testUint32x16Binary tests the simd binary method f against the expected behavior generated by want
func testUint32x16Binary(t *testing.T, f func(_, _ simd.Uint32x16) simd.Uint32x16, want func(_, _ []uint32) []uint32) {
func testUint32x16Binary(t *testing.T, f func(_, _ archsimd.Uint32x16) archsimd.Uint32x16, want func(_, _ []uint32) []uint32) {
n := 16
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
a := simd.LoadUint32x16Slice(x)
b := simd.LoadUint32x16Slice(y)
a := archsimd.LoadUint32x16Slice(x)
b := archsimd.LoadUint32x16Slice(y)
g := make([]uint32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -419,13 +419,13 @@ func testUint32x16Binary(t *testing.T, f func(_, _ simd.Uint32x16) simd.Uint32x1
}
// testUint64x8Binary tests the simd binary method f against the expected behavior generated by want
func testUint64x8Binary(t *testing.T, f func(_, _ simd.Uint64x8) simd.Uint64x8, want func(_, _ []uint64) []uint64) {
func testUint64x8Binary(t *testing.T, f func(_, _ archsimd.Uint64x8) archsimd.Uint64x8, want func(_, _ []uint64) []uint64) {
n := 8
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
a := simd.LoadUint64x8Slice(x)
b := simd.LoadUint64x8Slice(y)
a := archsimd.LoadUint64x8Slice(x)
b := archsimd.LoadUint64x8Slice(y)
g := make([]uint64, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -434,13 +434,13 @@ func testUint64x8Binary(t *testing.T, f func(_, _ simd.Uint64x8) simd.Uint64x8,
}
// testFloat32x16Binary tests the simd binary method f against the expected behavior generated by want
func testFloat32x16Binary(t *testing.T, f func(_, _ simd.Float32x16) simd.Float32x16, want func(_, _ []float32) []float32) {
func testFloat32x16Binary(t *testing.T, f func(_, _ archsimd.Float32x16) archsimd.Float32x16, want func(_, _ []float32) []float32) {
n := 16
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
a := simd.LoadFloat32x16Slice(x)
b := simd.LoadFloat32x16Slice(y)
a := archsimd.LoadFloat32x16Slice(x)
b := archsimd.LoadFloat32x16Slice(y)
g := make([]float32, n)
f(a, b).StoreSlice(g)
w := want(x, y)
@ -449,13 +449,13 @@ func testFloat32x16Binary(t *testing.T, f func(_, _ simd.Float32x16) simd.Float3
}
// testFloat64x8Binary tests the simd binary method f against the expected behavior generated by want
func testFloat64x8Binary(t *testing.T, f func(_, _ simd.Float64x8) simd.Float64x8, want func(_, _ []float64) []float64) {
func testFloat64x8Binary(t *testing.T, f func(_, _ archsimd.Float64x8) archsimd.Float64x8, want func(_, _ []float64) []float64) {
n := 8
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
a := simd.LoadFloat64x8Slice(x)
b := simd.LoadFloat64x8Slice(y)
a := archsimd.LoadFloat64x8Slice(x)
b := archsimd.LoadFloat64x8Slice(y)
g := make([]float64, n)
f(a, b).StoreSlice(g)
w := want(x, y)

View File

@ -0,0 +1,361 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.simd && amd64
package simd_test
import (
"simd/archsimd"
"testing"
)
func TestAdd(t *testing.T) {
testFloat32x4Binary(t, archsimd.Float32x4.Add, addSlice[float32])
testFloat32x8Binary(t, archsimd.Float32x8.Add, addSlice[float32])
testFloat64x2Binary(t, archsimd.Float64x2.Add, addSlice[float64])
testFloat64x4Binary(t, archsimd.Float64x4.Add, addSlice[float64])
testInt16x16Binary(t, archsimd.Int16x16.Add, addSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Add, addSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Add, addSlice[int32])
testInt32x8Binary(t, archsimd.Int32x8.Add, addSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.Add, addSlice[int64])
testInt64x4Binary(t, archsimd.Int64x4.Add, addSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.Add, addSlice[int8])
testInt8x32Binary(t, archsimd.Int8x32.Add, addSlice[int8])
testUint16x16Binary(t, archsimd.Uint16x16.Add, addSlice[uint16])
testUint16x8Binary(t, archsimd.Uint16x8.Add, addSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Add, addSlice[uint32])
testUint32x8Binary(t, archsimd.Uint32x8.Add, addSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.Add, addSlice[uint64])
testUint64x4Binary(t, archsimd.Uint64x4.Add, addSlice[uint64])
testUint8x16Binary(t, archsimd.Uint8x16.Add, addSlice[uint8])
testUint8x32Binary(t, archsimd.Uint8x32.Add, addSlice[uint8])
if archsimd.X86.AVX512() {
testFloat32x16Binary(t, archsimd.Float32x16.Add, addSlice[float32])
testFloat64x8Binary(t, archsimd.Float64x8.Add, addSlice[float64])
testInt8x64Binary(t, archsimd.Int8x64.Add, addSlice[int8])
testInt16x32Binary(t, archsimd.Int16x32.Add, addSlice[int16])
testInt32x16Binary(t, archsimd.Int32x16.Add, addSlice[int32])
testInt64x8Binary(t, archsimd.Int64x8.Add, addSlice[int64])
testUint8x64Binary(t, archsimd.Uint8x64.Add, addSlice[uint8])
testUint16x32Binary(t, archsimd.Uint16x32.Add, addSlice[uint16])
testUint32x16Binary(t, archsimd.Uint32x16.Add, addSlice[uint32])
testUint64x8Binary(t, archsimd.Uint64x8.Add, addSlice[uint64])
}
}
func TestSub(t *testing.T) {
testFloat32x4Binary(t, archsimd.Float32x4.Sub, subSlice[float32])
testFloat32x8Binary(t, archsimd.Float32x8.Sub, subSlice[float32])
testFloat64x2Binary(t, archsimd.Float64x2.Sub, subSlice[float64])
testFloat64x4Binary(t, archsimd.Float64x4.Sub, subSlice[float64])
testInt16x16Binary(t, archsimd.Int16x16.Sub, subSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Sub, subSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Sub, subSlice[int32])
testInt32x8Binary(t, archsimd.Int32x8.Sub, subSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.Sub, subSlice[int64])
testInt64x4Binary(t, archsimd.Int64x4.Sub, subSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.Sub, subSlice[int8])
testInt8x32Binary(t, archsimd.Int8x32.Sub, subSlice[int8])
testUint16x16Binary(t, archsimd.Uint16x16.Sub, subSlice[uint16])
testUint16x8Binary(t, archsimd.Uint16x8.Sub, subSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Sub, subSlice[uint32])
testUint32x8Binary(t, archsimd.Uint32x8.Sub, subSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.Sub, subSlice[uint64])
testUint64x4Binary(t, archsimd.Uint64x4.Sub, subSlice[uint64])
testUint8x16Binary(t, archsimd.Uint8x16.Sub, subSlice[uint8])
testUint8x32Binary(t, archsimd.Uint8x32.Sub, subSlice[uint8])
if archsimd.X86.AVX512() {
testFloat32x16Binary(t, archsimd.Float32x16.Sub, subSlice[float32])
testFloat64x8Binary(t, archsimd.Float64x8.Sub, subSlice[float64])
testInt8x64Binary(t, archsimd.Int8x64.Sub, subSlice[int8])
testInt16x32Binary(t, archsimd.Int16x32.Sub, subSlice[int16])
testInt32x16Binary(t, archsimd.Int32x16.Sub, subSlice[int32])
testInt64x8Binary(t, archsimd.Int64x8.Sub, subSlice[int64])
testUint8x64Binary(t, archsimd.Uint8x64.Sub, subSlice[uint8])
testUint16x32Binary(t, archsimd.Uint16x32.Sub, subSlice[uint16])
testUint32x16Binary(t, archsimd.Uint32x16.Sub, subSlice[uint32])
testUint64x8Binary(t, archsimd.Uint64x8.Sub, subSlice[uint64])
}
}
func TestMax(t *testing.T) {
// testFloat32x4Binary(t, archsimd.Float32x4.Max, maxSlice[float32]) // nan is wrong
// testFloat32x8Binary(t, archsimd.Float32x8.Max, maxSlice[float32]) // nan is wrong
// testFloat64x2Binary(t, archsimd.Float64x2.Max, maxSlice[float64]) // nan is wrong
// testFloat64x4Binary(t, archsimd.Float64x4.Max, maxSlice[float64]) // nan is wrong
testInt16x16Binary(t, archsimd.Int16x16.Max, maxSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Max, maxSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Max, maxSlice[int32])
testInt32x8Binary(t, archsimd.Int32x8.Max, maxSlice[int32])
if archsimd.X86.AVX512() {
testInt64x2Binary(t, archsimd.Int64x2.Max, maxSlice[int64])
testInt64x4Binary(t, archsimd.Int64x4.Max, maxSlice[int64])
}
testInt8x16Binary(t, archsimd.Int8x16.Max, maxSlice[int8])
testInt8x32Binary(t, archsimd.Int8x32.Max, maxSlice[int8])
testUint16x16Binary(t, archsimd.Uint16x16.Max, maxSlice[uint16])
testUint16x8Binary(t, archsimd.Uint16x8.Max, maxSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Max, maxSlice[uint32])
testUint32x8Binary(t, archsimd.Uint32x8.Max, maxSlice[uint32])
if archsimd.X86.AVX512() {
testUint64x2Binary(t, archsimd.Uint64x2.Max, maxSlice[uint64])
testUint64x4Binary(t, archsimd.Uint64x4.Max, maxSlice[uint64])
}
testUint8x16Binary(t, archsimd.Uint8x16.Max, maxSlice[uint8])
testUint8x32Binary(t, archsimd.Uint8x32.Max, maxSlice[uint8])
if archsimd.X86.AVX512() {
// testFloat32x16Binary(t, archsimd.Float32x16.Max, maxSlice[float32]) // nan is wrong
// testFloat64x8Binary(t, archsimd.Float64x8.Max, maxSlice[float64]) // nan is wrong
testInt8x64Binary(t, archsimd.Int8x64.Max, maxSlice[int8])
testInt16x32Binary(t, archsimd.Int16x32.Max, maxSlice[int16])
testInt32x16Binary(t, archsimd.Int32x16.Max, maxSlice[int32])
testInt64x8Binary(t, archsimd.Int64x8.Max, maxSlice[int64])
testUint8x64Binary(t, archsimd.Uint8x64.Max, maxSlice[uint8])
testUint16x32Binary(t, archsimd.Uint16x32.Max, maxSlice[uint16])
testUint32x16Binary(t, archsimd.Uint32x16.Max, maxSlice[uint32])
testUint64x8Binary(t, archsimd.Uint64x8.Max, maxSlice[uint64])
}
}
func TestMin(t *testing.T) {
// testFloat32x4Binary(t, archsimd.Float32x4.Min, minSlice[float32]) // nan is wrong
// testFloat32x8Binary(t, archsimd.Float32x8.Min, minSlice[float32]) // nan is wrong
// testFloat64x2Binary(t, archsimd.Float64x2.Min, minSlice[float64]) // nan is wrong
// testFloat64x4Binary(t, archsimd.Float64x4.Min, minSlice[float64]) // nan is wrong
testInt16x16Binary(t, archsimd.Int16x16.Min, minSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Min, minSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Min, minSlice[int32])
testInt32x8Binary(t, archsimd.Int32x8.Min, minSlice[int32])
if archsimd.X86.AVX512() {
testInt64x2Binary(t, archsimd.Int64x2.Min, minSlice[int64])
testInt64x4Binary(t, archsimd.Int64x4.Min, minSlice[int64])
}
testInt8x16Binary(t, archsimd.Int8x16.Min, minSlice[int8])
testInt8x32Binary(t, archsimd.Int8x32.Min, minSlice[int8])
testUint16x16Binary(t, archsimd.Uint16x16.Min, minSlice[uint16])
testUint16x8Binary(t, archsimd.Uint16x8.Min, minSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Min, minSlice[uint32])
testUint32x8Binary(t, archsimd.Uint32x8.Min, minSlice[uint32])
if archsimd.X86.AVX512() {
testUint64x2Binary(t, archsimd.Uint64x2.Min, minSlice[uint64])
testUint64x4Binary(t, archsimd.Uint64x4.Min, minSlice[uint64])
}
testUint8x16Binary(t, archsimd.Uint8x16.Min, minSlice[uint8])
testUint8x32Binary(t, archsimd.Uint8x32.Min, minSlice[uint8])
if archsimd.X86.AVX512() {
// testFloat32x16Binary(t, archsimd.Float32x16.Min, minSlice[float32]) // nan is wrong
// testFloat64x8Binary(t, archsimd.Float64x8.Min, minSlice[float64]) // nan is wrong
testInt8x64Binary(t, archsimd.Int8x64.Min, minSlice[int8])
testInt16x32Binary(t, archsimd.Int16x32.Min, minSlice[int16])
testInt32x16Binary(t, archsimd.Int32x16.Min, minSlice[int32])
testInt64x8Binary(t, archsimd.Int64x8.Min, minSlice[int64])
testUint8x64Binary(t, archsimd.Uint8x64.Min, minSlice[uint8])
testUint16x32Binary(t, archsimd.Uint16x32.Min, minSlice[uint16])
testUint32x16Binary(t, archsimd.Uint32x16.Min, minSlice[uint32])
testUint64x8Binary(t, archsimd.Uint64x8.Min, minSlice[uint64])
}
}
func TestAnd(t *testing.T) {
testInt16x16Binary(t, archsimd.Int16x16.And, andSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.And, andSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.And, andSlice[int32])
testInt32x8Binary(t, archsimd.Int32x8.And, andSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.And, andSlice[int64])
testInt64x4Binary(t, archsimd.Int64x4.And, andSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.And, andSlice[int8])
testInt8x32Binary(t, archsimd.Int8x32.And, andSlice[int8])
testUint16x16Binary(t, archsimd.Uint16x16.And, andSlice[uint16])
testUint16x8Binary(t, archsimd.Uint16x8.And, andSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.And, andSlice[uint32])
testUint32x8Binary(t, archsimd.Uint32x8.And, andSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.And, andSlice[uint64])
testUint64x4Binary(t, archsimd.Uint64x4.And, andSlice[uint64])
testUint8x16Binary(t, archsimd.Uint8x16.And, andSlice[uint8])
testUint8x32Binary(t, archsimd.Uint8x32.And, andSlice[uint8])
if archsimd.X86.AVX512() {
// testInt8x64Binary(t, archsimd.Int8x64.And, andISlice[int8]) // missing
// testInt16x32Binary(t, archsimd.Int16x32.And, andISlice[int16]) // missing
testInt32x16Binary(t, archsimd.Int32x16.And, andSlice[int32])
testInt64x8Binary(t, archsimd.Int64x8.And, andSlice[int64])
// testUint8x64Binary(t, archsimd.Uint8x64.And, andISlice[uint8]) // missing
// testUint16x32Binary(t, archsimd.Uint16x32.And, andISlice[uint16]) // missing
testUint32x16Binary(t, archsimd.Uint32x16.And, andSlice[uint32])
testUint64x8Binary(t, archsimd.Uint64x8.And, andSlice[uint64])
}
}
func TestAndNot(t *testing.T) {
testInt16x16Binary(t, archsimd.Int16x16.AndNot, andNotSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.AndNot, andNotSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.AndNot, andNotSlice[int32])
testInt32x8Binary(t, archsimd.Int32x8.AndNot, andNotSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.AndNot, andNotSlice[int64])
testInt64x4Binary(t, archsimd.Int64x4.AndNot, andNotSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.AndNot, andNotSlice[int8])
testInt8x32Binary(t, archsimd.Int8x32.AndNot, andNotSlice[int8])
testUint16x16Binary(t, archsimd.Uint16x16.AndNot, andNotSlice[uint16])
testUint16x8Binary(t, archsimd.Uint16x8.AndNot, andNotSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.AndNot, andNotSlice[uint32])
testUint32x8Binary(t, archsimd.Uint32x8.AndNot, andNotSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.AndNot, andNotSlice[uint64])
testUint64x4Binary(t, archsimd.Uint64x4.AndNot, andNotSlice[uint64])
testUint8x16Binary(t, archsimd.Uint8x16.AndNot, andNotSlice[uint8])
testUint8x32Binary(t, archsimd.Uint8x32.AndNot, andNotSlice[uint8])
if archsimd.X86.AVX512() {
testInt8x64Binary(t, archsimd.Int8x64.AndNot, andNotSlice[int8])
testInt16x32Binary(t, archsimd.Int16x32.AndNot, andNotSlice[int16])
testInt32x16Binary(t, archsimd.Int32x16.AndNot, andNotSlice[int32])
testInt64x8Binary(t, archsimd.Int64x8.AndNot, andNotSlice[int64])
testUint8x64Binary(t, archsimd.Uint8x64.AndNot, andNotSlice[uint8])
testUint16x32Binary(t, archsimd.Uint16x32.AndNot, andNotSlice[uint16])
testUint32x16Binary(t, archsimd.Uint32x16.AndNot, andNotSlice[uint32])
testUint64x8Binary(t, archsimd.Uint64x8.AndNot, andNotSlice[uint64])
}
}
func TestXor(t *testing.T) {
testInt16x16Binary(t, archsimd.Int16x16.Xor, xorSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Xor, xorSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Xor, xorSlice[int32])
testInt32x8Binary(t, archsimd.Int32x8.Xor, xorSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.Xor, xorSlice[int64])
testInt64x4Binary(t, archsimd.Int64x4.Xor, xorSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.Xor, xorSlice[int8])
testInt8x32Binary(t, archsimd.Int8x32.Xor, xorSlice[int8])
testUint16x16Binary(t, archsimd.Uint16x16.Xor, xorSlice[uint16])
testUint16x8Binary(t, archsimd.Uint16x8.Xor, xorSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Xor, xorSlice[uint32])
testUint32x8Binary(t, archsimd.Uint32x8.Xor, xorSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.Xor, xorSlice[uint64])
testUint64x4Binary(t, archsimd.Uint64x4.Xor, xorSlice[uint64])
testUint8x16Binary(t, archsimd.Uint8x16.Xor, xorSlice[uint8])
testUint8x32Binary(t, archsimd.Uint8x32.Xor, xorSlice[uint8])
if archsimd.X86.AVX512() {
// testInt8x64Binary(t, archsimd.Int8x64.Xor, andISlice[int8]) // missing
// testInt16x32Binary(t, archsimd.Int16x32.Xor, andISlice[int16]) // missing
testInt32x16Binary(t, archsimd.Int32x16.Xor, xorSlice[int32])
testInt64x8Binary(t, archsimd.Int64x8.Xor, xorSlice[int64])
// testUint8x64Binary(t, archsimd.Uint8x64.Xor, andISlice[uint8]) // missing
// testUint16x32Binary(t, archsimd.Uint16x32.Xor, andISlice[uint16]) // missing
testUint32x16Binary(t, archsimd.Uint32x16.Xor, xorSlice[uint32])
testUint64x8Binary(t, archsimd.Uint64x8.Xor, xorSlice[uint64])
}
}
func TestOr(t *testing.T) {
testInt16x16Binary(t, archsimd.Int16x16.Or, orSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Or, orSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Or, orSlice[int32])
testInt32x8Binary(t, archsimd.Int32x8.Or, orSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.Or, orSlice[int64])
testInt64x4Binary(t, archsimd.Int64x4.Or, orSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.Or, orSlice[int8])
testInt8x32Binary(t, archsimd.Int8x32.Or, orSlice[int8])
testUint16x16Binary(t, archsimd.Uint16x16.Or, orSlice[uint16])
testUint16x8Binary(t, archsimd.Uint16x8.Or, orSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Or, orSlice[uint32])
testUint32x8Binary(t, archsimd.Uint32x8.Or, orSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.Or, orSlice[uint64])
testUint64x4Binary(t, archsimd.Uint64x4.Or, orSlice[uint64])
testUint8x16Binary(t, archsimd.Uint8x16.Or, orSlice[uint8])
testUint8x32Binary(t, archsimd.Uint8x32.Or, orSlice[uint8])
if archsimd.X86.AVX512() {
// testInt8x64Binary(t, archsimd.Int8x64.Or, andISlice[int8]) // missing
// testInt16x32Binary(t, archsimd.Int16x32.Or, andISlice[int16]) // missing
testInt32x16Binary(t, archsimd.Int32x16.Or, orSlice[int32])
testInt64x8Binary(t, archsimd.Int64x8.Or, orSlice[int64])
// testUint8x64Binary(t, archsimd.Uint8x64.Or, andISlice[uint8]) // missing
// testUint16x32Binary(t, archsimd.Uint16x32.Or, andISlice[uint16]) // missing
testUint32x16Binary(t, archsimd.Uint32x16.Or, orSlice[uint32])
testUint64x8Binary(t, archsimd.Uint64x8.Or, orSlice[uint64])
}
}
func TestMul(t *testing.T) {
testFloat32x4Binary(t, archsimd.Float32x4.Mul, mulSlice[float32])
testFloat32x8Binary(t, archsimd.Float32x8.Mul, mulSlice[float32])
testFloat64x2Binary(t, archsimd.Float64x2.Mul, mulSlice[float64])
testFloat64x4Binary(t, archsimd.Float64x4.Mul, mulSlice[float64])
testInt16x16Binary(t, archsimd.Int16x16.Mul, mulSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Mul, mulSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Mul, mulSlice[int32])
testInt32x8Binary(t, archsimd.Int32x8.Mul, mulSlice[int32])
// testInt8x16Binary(t, archsimd.Int8x16.Mul, mulSlice[int8]) // nope
// testInt8x32Binary(t, archsimd.Int8x32.Mul, mulSlice[int8])
// TODO we should be able to do these, there's no difference between signed/unsigned Mul
// testUint16x16Binary(t, archsimd.Uint16x16.Mul, mulSlice[uint16])
// testUint16x8Binary(t, archsimd.Uint16x8.Mul, mulSlice[uint16])
// testUint32x4Binary(t, archsimd.Uint32x4.Mul, mulSlice[uint32])
// testUint32x8Binary(t, archsimd.Uint32x8.Mul, mulSlice[uint32])
// testUint64x2Binary(t, archsimd.Uint64x2.Mul, mulSlice[uint64])
// testUint64x4Binary(t, archsimd.Uint64x4.Mul, mulSlice[uint64])
// testUint8x16Binary(t, archsimd.Uint8x16.Mul, mulSlice[uint8]) // nope
// testUint8x32Binary(t, archsimd.Uint8x32.Mul, mulSlice[uint8])
if archsimd.X86.AVX512() {
testInt64x2Binary(t, archsimd.Int64x2.Mul, mulSlice[int64]) // avx512 only
testInt64x4Binary(t, archsimd.Int64x4.Mul, mulSlice[int64])
testFloat32x16Binary(t, archsimd.Float32x16.Mul, mulSlice[float32])
testFloat64x8Binary(t, archsimd.Float64x8.Mul, mulSlice[float64])
// testInt8x64Binary(t, archsimd.Int8x64.Mul, mulSlice[int8]) // nope
testInt16x32Binary(t, archsimd.Int16x32.Mul, mulSlice[int16])
testInt32x16Binary(t, archsimd.Int32x16.Mul, mulSlice[int32])
testInt64x8Binary(t, archsimd.Int64x8.Mul, mulSlice[int64])
// testUint8x64Binary(t, archsimd.Uint8x64.Mul, mulSlice[uint8]) // nope
// TODO signed should do the job
// testUint16x32Binary(t, archsimd.Uint16x32.Mul, mulSlice[uint16])
// testUint32x16Binary(t, archsimd.Uint32x16.Mul, mulSlice[uint32])
// testUint64x8Binary(t, archsimd.Uint64x8.Mul, mulSlice[uint64])
}
}
func TestDiv(t *testing.T) {
testFloat32x4Binary(t, archsimd.Float32x4.Div, divSlice[float32])
testFloat32x8Binary(t, archsimd.Float32x8.Div, divSlice[float32])
testFloat64x2Binary(t, archsimd.Float64x2.Div, divSlice[float64])
testFloat64x4Binary(t, archsimd.Float64x4.Div, divSlice[float64])
if archsimd.X86.AVX512() {
testFloat32x16Binary(t, archsimd.Float32x16.Div, divSlice[float32])
testFloat64x8Binary(t, archsimd.Float64x8.Div, divSlice[float64])
}
}

View File

@ -9,18 +9,18 @@
package simd_test
import (
"simd"
"simd/archsimd"
"testing"
)
// testInt8x16Compare tests the simd comparison method f against the expected behavior generated by want
func testInt8x16Compare(t *testing.T, f func(_, _ simd.Int8x16) simd.Mask8x16, want func(_, _ []int8) []int64) {
func testInt8x16Compare(t *testing.T, f func(_, _ archsimd.Int8x16) archsimd.Mask8x16, want func(_, _ []int8) []int64) {
n := 16
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
a := simd.LoadInt8x16Slice(x)
b := simd.LoadInt8x16Slice(y)
a := archsimd.LoadInt8x16Slice(x)
b := archsimd.LoadInt8x16Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x16().StoreSlice(g)
w := want(x, y)
@ -29,13 +29,13 @@ func testInt8x16Compare(t *testing.T, f func(_, _ simd.Int8x16) simd.Mask8x16, w
}
// testInt16x8Compare tests the simd comparison method f against the expected behavior generated by want
func testInt16x8Compare(t *testing.T, f func(_, _ simd.Int16x8) simd.Mask16x8, want func(_, _ []int16) []int64) {
func testInt16x8Compare(t *testing.T, f func(_, _ archsimd.Int16x8) archsimd.Mask16x8, want func(_, _ []int16) []int64) {
n := 8
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
a := simd.LoadInt16x8Slice(x)
b := simd.LoadInt16x8Slice(y)
a := archsimd.LoadInt16x8Slice(x)
b := archsimd.LoadInt16x8Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x8().StoreSlice(g)
w := want(x, y)
@ -44,13 +44,13 @@ func testInt16x8Compare(t *testing.T, f func(_, _ simd.Int16x8) simd.Mask16x8, w
}
// testInt32x4Compare tests the simd comparison method f against the expected behavior generated by want
func testInt32x4Compare(t *testing.T, f func(_, _ simd.Int32x4) simd.Mask32x4, want func(_, _ []int32) []int64) {
func testInt32x4Compare(t *testing.T, f func(_, _ archsimd.Int32x4) archsimd.Mask32x4, want func(_, _ []int32) []int64) {
n := 4
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
a := simd.LoadInt32x4Slice(x)
b := simd.LoadInt32x4Slice(y)
a := archsimd.LoadInt32x4Slice(x)
b := archsimd.LoadInt32x4Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x4().StoreSlice(g)
w := want(x, y)
@ -59,13 +59,13 @@ func testInt32x4Compare(t *testing.T, f func(_, _ simd.Int32x4) simd.Mask32x4, w
}
// testInt64x2Compare tests the simd comparison method f against the expected behavior generated by want
func testInt64x2Compare(t *testing.T, f func(_, _ simd.Int64x2) simd.Mask64x2, want func(_, _ []int64) []int64) {
func testInt64x2Compare(t *testing.T, f func(_, _ archsimd.Int64x2) archsimd.Mask64x2, want func(_, _ []int64) []int64) {
n := 2
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
a := simd.LoadInt64x2Slice(x)
b := simd.LoadInt64x2Slice(y)
a := archsimd.LoadInt64x2Slice(x)
b := archsimd.LoadInt64x2Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x2().StoreSlice(g)
w := want(x, y)
@ -74,13 +74,13 @@ func testInt64x2Compare(t *testing.T, f func(_, _ simd.Int64x2) simd.Mask64x2, w
}
// testUint8x16Compare tests the simd comparison method f against the expected behavior generated by want
func testUint8x16Compare(t *testing.T, f func(_, _ simd.Uint8x16) simd.Mask8x16, want func(_, _ []uint8) []int64) {
func testUint8x16Compare(t *testing.T, f func(_, _ archsimd.Uint8x16) archsimd.Mask8x16, want func(_, _ []uint8) []int64) {
n := 16
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
a := simd.LoadUint8x16Slice(x)
b := simd.LoadUint8x16Slice(y)
a := archsimd.LoadUint8x16Slice(x)
b := archsimd.LoadUint8x16Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x16().StoreSlice(g)
w := want(x, y)
@ -89,13 +89,13 @@ func testUint8x16Compare(t *testing.T, f func(_, _ simd.Uint8x16) simd.Mask8x16,
}
// testUint16x8Compare tests the simd comparison method f against the expected behavior generated by want
func testUint16x8Compare(t *testing.T, f func(_, _ simd.Uint16x8) simd.Mask16x8, want func(_, _ []uint16) []int64) {
func testUint16x8Compare(t *testing.T, f func(_, _ archsimd.Uint16x8) archsimd.Mask16x8, want func(_, _ []uint16) []int64) {
n := 8
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
a := simd.LoadUint16x8Slice(x)
b := simd.LoadUint16x8Slice(y)
a := archsimd.LoadUint16x8Slice(x)
b := archsimd.LoadUint16x8Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x8().StoreSlice(g)
w := want(x, y)
@ -104,13 +104,13 @@ func testUint16x8Compare(t *testing.T, f func(_, _ simd.Uint16x8) simd.Mask16x8,
}
// testUint32x4Compare tests the simd comparison method f against the expected behavior generated by want
func testUint32x4Compare(t *testing.T, f func(_, _ simd.Uint32x4) simd.Mask32x4, want func(_, _ []uint32) []int64) {
func testUint32x4Compare(t *testing.T, f func(_, _ archsimd.Uint32x4) archsimd.Mask32x4, want func(_, _ []uint32) []int64) {
n := 4
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
a := simd.LoadUint32x4Slice(x)
b := simd.LoadUint32x4Slice(y)
a := archsimd.LoadUint32x4Slice(x)
b := archsimd.LoadUint32x4Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x4().StoreSlice(g)
w := want(x, y)
@ -119,13 +119,13 @@ func testUint32x4Compare(t *testing.T, f func(_, _ simd.Uint32x4) simd.Mask32x4,
}
// testUint64x2Compare tests the simd comparison method f against the expected behavior generated by want
func testUint64x2Compare(t *testing.T, f func(_, _ simd.Uint64x2) simd.Mask64x2, want func(_, _ []uint64) []int64) {
func testUint64x2Compare(t *testing.T, f func(_, _ archsimd.Uint64x2) archsimd.Mask64x2, want func(_, _ []uint64) []int64) {
n := 2
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
a := simd.LoadUint64x2Slice(x)
b := simd.LoadUint64x2Slice(y)
a := archsimd.LoadUint64x2Slice(x)
b := archsimd.LoadUint64x2Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x2().StoreSlice(g)
w := want(x, y)
@ -134,13 +134,13 @@ func testUint64x2Compare(t *testing.T, f func(_, _ simd.Uint64x2) simd.Mask64x2,
}
// testFloat32x4Compare tests the simd comparison method f against the expected behavior generated by want
func testFloat32x4Compare(t *testing.T, f func(_, _ simd.Float32x4) simd.Mask32x4, want func(_, _ []float32) []int64) {
func testFloat32x4Compare(t *testing.T, f func(_, _ archsimd.Float32x4) archsimd.Mask32x4, want func(_, _ []float32) []int64) {
n := 4
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
a := simd.LoadFloat32x4Slice(x)
b := simd.LoadFloat32x4Slice(y)
a := archsimd.LoadFloat32x4Slice(x)
b := archsimd.LoadFloat32x4Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x4().StoreSlice(g)
w := want(x, y)
@ -149,13 +149,13 @@ func testFloat32x4Compare(t *testing.T, f func(_, _ simd.Float32x4) simd.Mask32x
}
// testFloat64x2Compare tests the simd comparison method f against the expected behavior generated by want
func testFloat64x2Compare(t *testing.T, f func(_, _ simd.Float64x2) simd.Mask64x2, want func(_, _ []float64) []int64) {
func testFloat64x2Compare(t *testing.T, f func(_, _ archsimd.Float64x2) archsimd.Mask64x2, want func(_, _ []float64) []int64) {
n := 2
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
a := simd.LoadFloat64x2Slice(x)
b := simd.LoadFloat64x2Slice(y)
a := archsimd.LoadFloat64x2Slice(x)
b := archsimd.LoadFloat64x2Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x2().StoreSlice(g)
w := want(x, y)
@ -164,13 +164,13 @@ func testFloat64x2Compare(t *testing.T, f func(_, _ simd.Float64x2) simd.Mask64x
}
// testInt8x32Compare tests the simd comparison method f against the expected behavior generated by want
func testInt8x32Compare(t *testing.T, f func(_, _ simd.Int8x32) simd.Mask8x32, want func(_, _ []int8) []int64) {
func testInt8x32Compare(t *testing.T, f func(_, _ archsimd.Int8x32) archsimd.Mask8x32, want func(_, _ []int8) []int64) {
n := 32
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
a := simd.LoadInt8x32Slice(x)
b := simd.LoadInt8x32Slice(y)
a := archsimd.LoadInt8x32Slice(x)
b := archsimd.LoadInt8x32Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x32().StoreSlice(g)
w := want(x, y)
@ -179,13 +179,13 @@ func testInt8x32Compare(t *testing.T, f func(_, _ simd.Int8x32) simd.Mask8x32, w
}
// testInt16x16Compare tests the simd comparison method f against the expected behavior generated by want
func testInt16x16Compare(t *testing.T, f func(_, _ simd.Int16x16) simd.Mask16x16, want func(_, _ []int16) []int64) {
func testInt16x16Compare(t *testing.T, f func(_, _ archsimd.Int16x16) archsimd.Mask16x16, want func(_, _ []int16) []int64) {
n := 16
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
a := simd.LoadInt16x16Slice(x)
b := simd.LoadInt16x16Slice(y)
a := archsimd.LoadInt16x16Slice(x)
b := archsimd.LoadInt16x16Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x16().StoreSlice(g)
w := want(x, y)
@ -194,13 +194,13 @@ func testInt16x16Compare(t *testing.T, f func(_, _ simd.Int16x16) simd.Mask16x16
}
// testInt32x8Compare tests the simd comparison method f against the expected behavior generated by want
func testInt32x8Compare(t *testing.T, f func(_, _ simd.Int32x8) simd.Mask32x8, want func(_, _ []int32) []int64) {
func testInt32x8Compare(t *testing.T, f func(_, _ archsimd.Int32x8) archsimd.Mask32x8, want func(_, _ []int32) []int64) {
n := 8
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
a := simd.LoadInt32x8Slice(x)
b := simd.LoadInt32x8Slice(y)
a := archsimd.LoadInt32x8Slice(x)
b := archsimd.LoadInt32x8Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x8().StoreSlice(g)
w := want(x, y)
@ -209,13 +209,13 @@ func testInt32x8Compare(t *testing.T, f func(_, _ simd.Int32x8) simd.Mask32x8, w
}
// testInt64x4Compare tests the simd comparison method f against the expected behavior generated by want
func testInt64x4Compare(t *testing.T, f func(_, _ simd.Int64x4) simd.Mask64x4, want func(_, _ []int64) []int64) {
func testInt64x4Compare(t *testing.T, f func(_, _ archsimd.Int64x4) archsimd.Mask64x4, want func(_, _ []int64) []int64) {
n := 4
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
a := simd.LoadInt64x4Slice(x)
b := simd.LoadInt64x4Slice(y)
a := archsimd.LoadInt64x4Slice(x)
b := archsimd.LoadInt64x4Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x4().StoreSlice(g)
w := want(x, y)
@ -224,13 +224,13 @@ func testInt64x4Compare(t *testing.T, f func(_, _ simd.Int64x4) simd.Mask64x4, w
}
// testUint8x32Compare tests the simd comparison method f against the expected behavior generated by want
func testUint8x32Compare(t *testing.T, f func(_, _ simd.Uint8x32) simd.Mask8x32, want func(_, _ []uint8) []int64) {
func testUint8x32Compare(t *testing.T, f func(_, _ archsimd.Uint8x32) archsimd.Mask8x32, want func(_, _ []uint8) []int64) {
n := 32
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
a := simd.LoadUint8x32Slice(x)
b := simd.LoadUint8x32Slice(y)
a := archsimd.LoadUint8x32Slice(x)
b := archsimd.LoadUint8x32Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x32().StoreSlice(g)
w := want(x, y)
@ -239,13 +239,13 @@ func testUint8x32Compare(t *testing.T, f func(_, _ simd.Uint8x32) simd.Mask8x32,
}
// testUint16x16Compare tests the simd comparison method f against the expected behavior generated by want
func testUint16x16Compare(t *testing.T, f func(_, _ simd.Uint16x16) simd.Mask16x16, want func(_, _ []uint16) []int64) {
func testUint16x16Compare(t *testing.T, f func(_, _ archsimd.Uint16x16) archsimd.Mask16x16, want func(_, _ []uint16) []int64) {
n := 16
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
a := simd.LoadUint16x16Slice(x)
b := simd.LoadUint16x16Slice(y)
a := archsimd.LoadUint16x16Slice(x)
b := archsimd.LoadUint16x16Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x16().StoreSlice(g)
w := want(x, y)
@ -254,13 +254,13 @@ func testUint16x16Compare(t *testing.T, f func(_, _ simd.Uint16x16) simd.Mask16x
}
// testUint32x8Compare tests the simd comparison method f against the expected behavior generated by want
func testUint32x8Compare(t *testing.T, f func(_, _ simd.Uint32x8) simd.Mask32x8, want func(_, _ []uint32) []int64) {
func testUint32x8Compare(t *testing.T, f func(_, _ archsimd.Uint32x8) archsimd.Mask32x8, want func(_, _ []uint32) []int64) {
n := 8
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
a := simd.LoadUint32x8Slice(x)
b := simd.LoadUint32x8Slice(y)
a := archsimd.LoadUint32x8Slice(x)
b := archsimd.LoadUint32x8Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x8().StoreSlice(g)
w := want(x, y)
@ -269,13 +269,13 @@ func testUint32x8Compare(t *testing.T, f func(_, _ simd.Uint32x8) simd.Mask32x8,
}
// testUint64x4Compare tests the simd comparison method f against the expected behavior generated by want
func testUint64x4Compare(t *testing.T, f func(_, _ simd.Uint64x4) simd.Mask64x4, want func(_, _ []uint64) []int64) {
func testUint64x4Compare(t *testing.T, f func(_, _ archsimd.Uint64x4) archsimd.Mask64x4, want func(_, _ []uint64) []int64) {
n := 4
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
a := simd.LoadUint64x4Slice(x)
b := simd.LoadUint64x4Slice(y)
a := archsimd.LoadUint64x4Slice(x)
b := archsimd.LoadUint64x4Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x4().StoreSlice(g)
w := want(x, y)
@ -284,13 +284,13 @@ func testUint64x4Compare(t *testing.T, f func(_, _ simd.Uint64x4) simd.Mask64x4,
}
// testFloat32x8Compare tests the simd comparison method f against the expected behavior generated by want
func testFloat32x8Compare(t *testing.T, f func(_, _ simd.Float32x8) simd.Mask32x8, want func(_, _ []float32) []int64) {
func testFloat32x8Compare(t *testing.T, f func(_, _ archsimd.Float32x8) archsimd.Mask32x8, want func(_, _ []float32) []int64) {
n := 8
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
a := simd.LoadFloat32x8Slice(x)
b := simd.LoadFloat32x8Slice(y)
a := archsimd.LoadFloat32x8Slice(x)
b := archsimd.LoadFloat32x8Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x8().StoreSlice(g)
w := want(x, y)
@ -299,13 +299,13 @@ func testFloat32x8Compare(t *testing.T, f func(_, _ simd.Float32x8) simd.Mask32x
}
// testFloat64x4Compare tests the simd comparison method f against the expected behavior generated by want
func testFloat64x4Compare(t *testing.T, f func(_, _ simd.Float64x4) simd.Mask64x4, want func(_, _ []float64) []int64) {
func testFloat64x4Compare(t *testing.T, f func(_, _ archsimd.Float64x4) archsimd.Mask64x4, want func(_, _ []float64) []int64) {
n := 4
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
a := simd.LoadFloat64x4Slice(x)
b := simd.LoadFloat64x4Slice(y)
a := archsimd.LoadFloat64x4Slice(x)
b := archsimd.LoadFloat64x4Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x4().StoreSlice(g)
w := want(x, y)
@ -314,13 +314,13 @@ func testFloat64x4Compare(t *testing.T, f func(_, _ simd.Float64x4) simd.Mask64x
}
// testInt8x64Compare tests the simd comparison method f against the expected behavior generated by want
func testInt8x64Compare(t *testing.T, f func(_, _ simd.Int8x64) simd.Mask8x64, want func(_, _ []int8) []int64) {
func testInt8x64Compare(t *testing.T, f func(_, _ archsimd.Int8x64) archsimd.Mask8x64, want func(_, _ []int8) []int64) {
n := 64
t.Helper()
forSlicePair(t, int8s, n, func(x, y []int8) bool {
t.Helper()
a := simd.LoadInt8x64Slice(x)
b := simd.LoadInt8x64Slice(y)
a := archsimd.LoadInt8x64Slice(x)
b := archsimd.LoadInt8x64Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x64().StoreSlice(g)
w := want(x, y)
@ -329,13 +329,13 @@ func testInt8x64Compare(t *testing.T, f func(_, _ simd.Int8x64) simd.Mask8x64, w
}
// testInt16x32Compare tests the simd comparison method f against the expected behavior generated by want
func testInt16x32Compare(t *testing.T, f func(_, _ simd.Int16x32) simd.Mask16x32, want func(_, _ []int16) []int64) {
func testInt16x32Compare(t *testing.T, f func(_, _ archsimd.Int16x32) archsimd.Mask16x32, want func(_, _ []int16) []int64) {
n := 32
t.Helper()
forSlicePair(t, int16s, n, func(x, y []int16) bool {
t.Helper()
a := simd.LoadInt16x32Slice(x)
b := simd.LoadInt16x32Slice(y)
a := archsimd.LoadInt16x32Slice(x)
b := archsimd.LoadInt16x32Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x32().StoreSlice(g)
w := want(x, y)
@ -344,13 +344,13 @@ func testInt16x32Compare(t *testing.T, f func(_, _ simd.Int16x32) simd.Mask16x32
}
// testInt32x16Compare tests the simd comparison method f against the expected behavior generated by want
func testInt32x16Compare(t *testing.T, f func(_, _ simd.Int32x16) simd.Mask32x16, want func(_, _ []int32) []int64) {
func testInt32x16Compare(t *testing.T, f func(_, _ archsimd.Int32x16) archsimd.Mask32x16, want func(_, _ []int32) []int64) {
n := 16
t.Helper()
forSlicePair(t, int32s, n, func(x, y []int32) bool {
t.Helper()
a := simd.LoadInt32x16Slice(x)
b := simd.LoadInt32x16Slice(y)
a := archsimd.LoadInt32x16Slice(x)
b := archsimd.LoadInt32x16Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x16().StoreSlice(g)
w := want(x, y)
@ -359,13 +359,13 @@ func testInt32x16Compare(t *testing.T, f func(_, _ simd.Int32x16) simd.Mask32x16
}
// testInt64x8Compare tests the simd comparison method f against the expected behavior generated by want
func testInt64x8Compare(t *testing.T, f func(_, _ simd.Int64x8) simd.Mask64x8, want func(_, _ []int64) []int64) {
func testInt64x8Compare(t *testing.T, f func(_, _ archsimd.Int64x8) archsimd.Mask64x8, want func(_, _ []int64) []int64) {
n := 8
t.Helper()
forSlicePair(t, int64s, n, func(x, y []int64) bool {
t.Helper()
a := simd.LoadInt64x8Slice(x)
b := simd.LoadInt64x8Slice(y)
a := archsimd.LoadInt64x8Slice(x)
b := archsimd.LoadInt64x8Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x8().StoreSlice(g)
w := want(x, y)
@ -374,13 +374,13 @@ func testInt64x8Compare(t *testing.T, f func(_, _ simd.Int64x8) simd.Mask64x8, w
}
// testUint8x64Compare tests the simd comparison method f against the expected behavior generated by want
func testUint8x64Compare(t *testing.T, f func(_, _ simd.Uint8x64) simd.Mask8x64, want func(_, _ []uint8) []int64) {
func testUint8x64Compare(t *testing.T, f func(_, _ archsimd.Uint8x64) archsimd.Mask8x64, want func(_, _ []uint8) []int64) {
n := 64
t.Helper()
forSlicePair(t, uint8s, n, func(x, y []uint8) bool {
t.Helper()
a := simd.LoadUint8x64Slice(x)
b := simd.LoadUint8x64Slice(y)
a := archsimd.LoadUint8x64Slice(x)
b := archsimd.LoadUint8x64Slice(y)
g := make([]int8, n)
f(a, b).AsInt8x64().StoreSlice(g)
w := want(x, y)
@ -389,13 +389,13 @@ func testUint8x64Compare(t *testing.T, f func(_, _ simd.Uint8x64) simd.Mask8x64,
}
// testUint16x32Compare tests the simd comparison method f against the expected behavior generated by want
func testUint16x32Compare(t *testing.T, f func(_, _ simd.Uint16x32) simd.Mask16x32, want func(_, _ []uint16) []int64) {
func testUint16x32Compare(t *testing.T, f func(_, _ archsimd.Uint16x32) archsimd.Mask16x32, want func(_, _ []uint16) []int64) {
n := 32
t.Helper()
forSlicePair(t, uint16s, n, func(x, y []uint16) bool {
t.Helper()
a := simd.LoadUint16x32Slice(x)
b := simd.LoadUint16x32Slice(y)
a := archsimd.LoadUint16x32Slice(x)
b := archsimd.LoadUint16x32Slice(y)
g := make([]int16, n)
f(a, b).AsInt16x32().StoreSlice(g)
w := want(x, y)
@ -404,13 +404,13 @@ func testUint16x32Compare(t *testing.T, f func(_, _ simd.Uint16x32) simd.Mask16x
}
// testUint32x16Compare tests the simd comparison method f against the expected behavior generated by want
func testUint32x16Compare(t *testing.T, f func(_, _ simd.Uint32x16) simd.Mask32x16, want func(_, _ []uint32) []int64) {
func testUint32x16Compare(t *testing.T, f func(_, _ archsimd.Uint32x16) archsimd.Mask32x16, want func(_, _ []uint32) []int64) {
n := 16
t.Helper()
forSlicePair(t, uint32s, n, func(x, y []uint32) bool {
t.Helper()
a := simd.LoadUint32x16Slice(x)
b := simd.LoadUint32x16Slice(y)
a := archsimd.LoadUint32x16Slice(x)
b := archsimd.LoadUint32x16Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x16().StoreSlice(g)
w := want(x, y)
@ -419,13 +419,13 @@ func testUint32x16Compare(t *testing.T, f func(_, _ simd.Uint32x16) simd.Mask32x
}
// testUint64x8Compare tests the simd comparison method f against the expected behavior generated by want
func testUint64x8Compare(t *testing.T, f func(_, _ simd.Uint64x8) simd.Mask64x8, want func(_, _ []uint64) []int64) {
func testUint64x8Compare(t *testing.T, f func(_, _ archsimd.Uint64x8) archsimd.Mask64x8, want func(_, _ []uint64) []int64) {
n := 8
t.Helper()
forSlicePair(t, uint64s, n, func(x, y []uint64) bool {
t.Helper()
a := simd.LoadUint64x8Slice(x)
b := simd.LoadUint64x8Slice(y)
a := archsimd.LoadUint64x8Slice(x)
b := archsimd.LoadUint64x8Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x8().StoreSlice(g)
w := want(x, y)
@ -434,13 +434,13 @@ func testUint64x8Compare(t *testing.T, f func(_, _ simd.Uint64x8) simd.Mask64x8,
}
// testFloat32x16Compare tests the simd comparison method f against the expected behavior generated by want
func testFloat32x16Compare(t *testing.T, f func(_, _ simd.Float32x16) simd.Mask32x16, want func(_, _ []float32) []int64) {
func testFloat32x16Compare(t *testing.T, f func(_, _ archsimd.Float32x16) archsimd.Mask32x16, want func(_, _ []float32) []int64) {
n := 16
t.Helper()
forSlicePair(t, float32s, n, func(x, y []float32) bool {
t.Helper()
a := simd.LoadFloat32x16Slice(x)
b := simd.LoadFloat32x16Slice(y)
a := archsimd.LoadFloat32x16Slice(x)
b := archsimd.LoadFloat32x16Slice(y)
g := make([]int32, n)
f(a, b).AsInt32x16().StoreSlice(g)
w := want(x, y)
@ -449,13 +449,13 @@ func testFloat32x16Compare(t *testing.T, f func(_, _ simd.Float32x16) simd.Mask3
}
// testFloat64x8Compare tests the simd comparison method f against the expected behavior generated by want
func testFloat64x8Compare(t *testing.T, f func(_, _ simd.Float64x8) simd.Mask64x8, want func(_, _ []float64) []int64) {
func testFloat64x8Compare(t *testing.T, f func(_, _ archsimd.Float64x8) archsimd.Mask64x8, want func(_, _ []float64) []int64) {
n := 8
t.Helper()
forSlicePair(t, float64s, n, func(x, y []float64) bool {
t.Helper()
a := simd.LoadFloat64x8Slice(x)
b := simd.LoadFloat64x8Slice(y)
a := archsimd.LoadFloat64x8Slice(x)
b := archsimd.LoadFloat64x8Slice(y)
g := make([]int64, n)
f(a, b).AsInt64x8().StoreSlice(g)
w := want(x, y)

View File

@ -0,0 +1,265 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.simd && amd64
package simd_test
import (
"simd/archsimd"
"testing"
)
// AVX 2 lacks most comparisons, but they can be synthesized
// from > and =
var comparisonFixed bool = archsimd.X86.AVX512()
func TestLess(t *testing.T) {
testFloat32x4Compare(t, archsimd.Float32x4.Less, lessSlice[float32])
testFloat32x8Compare(t, archsimd.Float32x8.Less, lessSlice[float32])
testFloat64x2Compare(t, archsimd.Float64x2.Less, lessSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.Less, lessSlice[float64])
testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.Less, lessSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.Less, lessSlice[int32])
testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.Less, lessSlice[int64])
testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.Less, lessSlice[int8])
testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8])
testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.Less, lessSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.Less, lessSlice[int32])
testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.Less, lessSlice[int64])
testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.Less, lessSlice[int8])
testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8])
testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16])
testUint16x8Compare(t, archsimd.Uint16x8.Less, lessSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.Less, lessSlice[uint32])
testUint32x8Compare(t, archsimd.Uint32x8.Less, lessSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.Less, lessSlice[uint64])
testUint64x4Compare(t, archsimd.Uint64x4.Less, lessSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.Less, lessSlice[uint8])
testUint8x32Compare(t, archsimd.Uint8x32.Less, lessSlice[uint8])
if archsimd.X86.AVX512() {
testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16])
testUint16x8Compare(t, archsimd.Uint16x8.Less, lessSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.Less, lessSlice[uint32])
testUint32x8Compare(t, archsimd.Uint32x8.Less, lessSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.Less, lessSlice[uint64])
testUint64x4Compare(t, archsimd.Uint64x4.Less, lessSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.Less, lessSlice[uint8])
testUint8x32Compare(t, archsimd.Uint8x32.Less, lessSlice[uint8])
testFloat32x16Compare(t, archsimd.Float32x16.Less, lessSlice[float32])
testFloat64x8Compare(t, archsimd.Float64x8.Less, lessSlice[float64])
testInt8x64Compare(t, archsimd.Int8x64.Less, lessSlice[int8])
testInt16x32Compare(t, archsimd.Int16x32.Less, lessSlice[int16])
testInt32x16Compare(t, archsimd.Int32x16.Less, lessSlice[int32])
testInt64x8Compare(t, archsimd.Int64x8.Less, lessSlice[int64])
testUint8x64Compare(t, archsimd.Uint8x64.Less, lessSlice[uint8])
testUint16x32Compare(t, archsimd.Uint16x32.Less, lessSlice[uint16])
testUint32x16Compare(t, archsimd.Uint32x16.Less, lessSlice[uint32])
testUint64x8Compare(t, archsimd.Uint64x8.Less, lessSlice[uint64])
}
}
func TestLessEqual(t *testing.T) {
testFloat32x4Compare(t, archsimd.Float32x4.LessEqual, lessEqualSlice[float32])
testFloat32x8Compare(t, archsimd.Float32x8.LessEqual, lessEqualSlice[float32])
testFloat64x2Compare(t, archsimd.Float64x2.LessEqual, lessEqualSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.LessEqual, lessEqualSlice[float64])
testInt16x16Compare(t, archsimd.Int16x16.LessEqual, lessEqualSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.LessEqual, lessEqualSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.LessEqual, lessEqualSlice[int32])
testInt32x8Compare(t, archsimd.Int32x8.LessEqual, lessEqualSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.LessEqual, lessEqualSlice[int64])
testInt64x4Compare(t, archsimd.Int64x4.LessEqual, lessEqualSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.LessEqual, lessEqualSlice[int8])
testInt8x32Compare(t, archsimd.Int8x32.LessEqual, lessEqualSlice[int8])
testUint16x16Compare(t, archsimd.Uint16x16.LessEqual, lessEqualSlice[uint16])
testUint16x8Compare(t, archsimd.Uint16x8.LessEqual, lessEqualSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.LessEqual, lessEqualSlice[uint32])
testUint32x8Compare(t, archsimd.Uint32x8.LessEqual, lessEqualSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.LessEqual, lessEqualSlice[uint64])
testUint64x4Compare(t, archsimd.Uint64x4.LessEqual, lessEqualSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.LessEqual, lessEqualSlice[uint8])
testUint8x32Compare(t, archsimd.Uint8x32.LessEqual, lessEqualSlice[uint8])
if archsimd.X86.AVX512() {
testFloat32x16Compare(t, archsimd.Float32x16.LessEqual, lessEqualSlice[float32])
testFloat64x8Compare(t, archsimd.Float64x8.LessEqual, lessEqualSlice[float64])
testInt8x64Compare(t, archsimd.Int8x64.LessEqual, lessEqualSlice[int8])
testInt16x32Compare(t, archsimd.Int16x32.LessEqual, lessEqualSlice[int16])
testInt32x16Compare(t, archsimd.Int32x16.LessEqual, lessEqualSlice[int32])
testInt64x8Compare(t, archsimd.Int64x8.LessEqual, lessEqualSlice[int64])
testUint8x64Compare(t, archsimd.Uint8x64.LessEqual, lessEqualSlice[uint8])
testUint16x32Compare(t, archsimd.Uint16x32.LessEqual, lessEqualSlice[uint16])
testUint32x16Compare(t, archsimd.Uint32x16.LessEqual, lessEqualSlice[uint32])
testUint64x8Compare(t, archsimd.Uint64x8.LessEqual, lessEqualSlice[uint64])
}
}
func TestGreater(t *testing.T) {
testFloat32x4Compare(t, archsimd.Float32x4.Greater, greaterSlice[float32])
testFloat32x8Compare(t, archsimd.Float32x8.Greater, greaterSlice[float32])
testFloat64x2Compare(t, archsimd.Float64x2.Greater, greaterSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.Greater, greaterSlice[float64])
testInt16x16Compare(t, archsimd.Int16x16.Greater, greaterSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.Greater, greaterSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.Greater, greaterSlice[int32])
testInt32x8Compare(t, archsimd.Int32x8.Greater, greaterSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.Greater, greaterSlice[int64])
testInt64x4Compare(t, archsimd.Int64x4.Greater, greaterSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.Greater, greaterSlice[int8])
testInt8x32Compare(t, archsimd.Int8x32.Greater, greaterSlice[int8])
testUint16x16Compare(t, archsimd.Uint16x16.Greater, greaterSlice[uint16])
testUint16x8Compare(t, archsimd.Uint16x8.Greater, greaterSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.Greater, greaterSlice[uint32])
testUint32x8Compare(t, archsimd.Uint32x8.Greater, greaterSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.Greater, greaterSlice[uint64])
testUint64x4Compare(t, archsimd.Uint64x4.Greater, greaterSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.Greater, greaterSlice[uint8])
testUint8x32Compare(t, archsimd.Uint8x32.Greater, greaterSlice[uint8])
if archsimd.X86.AVX512() {
testFloat32x16Compare(t, archsimd.Float32x16.Greater, greaterSlice[float32])
testFloat64x8Compare(t, archsimd.Float64x8.Greater, greaterSlice[float64])
testInt8x64Compare(t, archsimd.Int8x64.Greater, greaterSlice[int8])
testInt16x32Compare(t, archsimd.Int16x32.Greater, greaterSlice[int16])
testInt32x16Compare(t, archsimd.Int32x16.Greater, greaterSlice[int32])
testInt64x8Compare(t, archsimd.Int64x8.Greater, greaterSlice[int64])
testUint8x64Compare(t, archsimd.Uint8x64.Greater, greaterSlice[uint8])
testUint16x32Compare(t, archsimd.Uint16x32.Greater, greaterSlice[uint16])
testUint32x16Compare(t, archsimd.Uint32x16.Greater, greaterSlice[uint32])
testUint64x8Compare(t, archsimd.Uint64x8.Greater, greaterSlice[uint64])
}
}
func TestGreaterEqual(t *testing.T) {
testFloat32x4Compare(t, archsimd.Float32x4.GreaterEqual, greaterEqualSlice[float32])
testFloat32x8Compare(t, archsimd.Float32x8.GreaterEqual, greaterEqualSlice[float32])
testFloat64x2Compare(t, archsimd.Float64x2.GreaterEqual, greaterEqualSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.GreaterEqual, greaterEqualSlice[float64])
testInt16x16Compare(t, archsimd.Int16x16.GreaterEqual, greaterEqualSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.GreaterEqual, greaterEqualSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.GreaterEqual, greaterEqualSlice[int32])
testInt32x8Compare(t, archsimd.Int32x8.GreaterEqual, greaterEqualSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.GreaterEqual, greaterEqualSlice[int64])
testInt64x4Compare(t, archsimd.Int64x4.GreaterEqual, greaterEqualSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.GreaterEqual, greaterEqualSlice[int8])
testInt8x32Compare(t, archsimd.Int8x32.GreaterEqual, greaterEqualSlice[int8])
testUint16x16Compare(t, archsimd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16])
testUint16x8Compare(t, archsimd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32])
testUint32x8Compare(t, archsimd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64])
testUint64x4Compare(t, archsimd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8])
testUint8x32Compare(t, archsimd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8])
if archsimd.X86.AVX512() {
testFloat32x16Compare(t, archsimd.Float32x16.GreaterEqual, greaterEqualSlice[float32])
testFloat64x8Compare(t, archsimd.Float64x8.GreaterEqual, greaterEqualSlice[float64])
testInt8x64Compare(t, archsimd.Int8x64.GreaterEqual, greaterEqualSlice[int8])
testInt16x32Compare(t, archsimd.Int16x32.GreaterEqual, greaterEqualSlice[int16])
testInt32x16Compare(t, archsimd.Int32x16.GreaterEqual, greaterEqualSlice[int32])
testInt64x8Compare(t, archsimd.Int64x8.GreaterEqual, greaterEqualSlice[int64])
testUint8x64Compare(t, archsimd.Uint8x64.GreaterEqual, greaterEqualSlice[uint8])
testUint16x32Compare(t, archsimd.Uint16x32.GreaterEqual, greaterEqualSlice[uint16])
testUint32x16Compare(t, archsimd.Uint32x16.GreaterEqual, greaterEqualSlice[uint32])
testUint64x8Compare(t, archsimd.Uint64x8.GreaterEqual, greaterEqualSlice[uint64])
}
}
func TestEqual(t *testing.T) {
testFloat32x4Compare(t, archsimd.Float32x4.Equal, equalSlice[float32])
testFloat32x8Compare(t, archsimd.Float32x8.Equal, equalSlice[float32])
testFloat64x2Compare(t, archsimd.Float64x2.Equal, equalSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.Equal, equalSlice[float64])
testInt16x16Compare(t, archsimd.Int16x16.Equal, equalSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.Equal, equalSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.Equal, equalSlice[int32])
testInt32x8Compare(t, archsimd.Int32x8.Equal, equalSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.Equal, equalSlice[int64])
testInt64x4Compare(t, archsimd.Int64x4.Equal, equalSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.Equal, equalSlice[int8])
testInt8x32Compare(t, archsimd.Int8x32.Equal, equalSlice[int8])
testUint16x16Compare(t, archsimd.Uint16x16.Equal, equalSlice[uint16])
testUint16x8Compare(t, archsimd.Uint16x8.Equal, equalSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.Equal, equalSlice[uint32])
testUint32x8Compare(t, archsimd.Uint32x8.Equal, equalSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.Equal, equalSlice[uint64])
testUint64x4Compare(t, archsimd.Uint64x4.Equal, equalSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.Equal, equalSlice[uint8])
testUint8x32Compare(t, archsimd.Uint8x32.Equal, equalSlice[uint8])
if archsimd.X86.AVX512() {
testFloat32x16Compare(t, archsimd.Float32x16.Equal, equalSlice[float32])
testFloat64x8Compare(t, archsimd.Float64x8.Equal, equalSlice[float64])
testInt8x64Compare(t, archsimd.Int8x64.Equal, equalSlice[int8])
testInt16x32Compare(t, archsimd.Int16x32.Equal, equalSlice[int16])
testInt32x16Compare(t, archsimd.Int32x16.Equal, equalSlice[int32])
testInt64x8Compare(t, archsimd.Int64x8.Equal, equalSlice[int64])
testUint8x64Compare(t, archsimd.Uint8x64.Equal, equalSlice[uint8])
testUint16x32Compare(t, archsimd.Uint16x32.Equal, equalSlice[uint16])
testUint32x16Compare(t, archsimd.Uint32x16.Equal, equalSlice[uint32])
testUint64x8Compare(t, archsimd.Uint64x8.Equal, equalSlice[uint64])
}
}
func TestNotEqual(t *testing.T) {
testFloat32x4Compare(t, archsimd.Float32x4.NotEqual, notEqualSlice[float32])
testFloat32x8Compare(t, archsimd.Float32x8.NotEqual, notEqualSlice[float32])
testFloat64x2Compare(t, archsimd.Float64x2.NotEqual, notEqualSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.NotEqual, notEqualSlice[float64])
testInt16x16Compare(t, archsimd.Int16x16.NotEqual, notEqualSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.NotEqual, notEqualSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.NotEqual, notEqualSlice[int32])
testInt32x8Compare(t, archsimd.Int32x8.NotEqual, notEqualSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.NotEqual, notEqualSlice[int64])
testInt64x4Compare(t, archsimd.Int64x4.NotEqual, notEqualSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.NotEqual, notEqualSlice[int8])
testInt8x32Compare(t, archsimd.Int8x32.NotEqual, notEqualSlice[int8])
testUint16x16Compare(t, archsimd.Uint16x16.NotEqual, notEqualSlice[uint16])
testUint16x8Compare(t, archsimd.Uint16x8.NotEqual, notEqualSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.NotEqual, notEqualSlice[uint32])
testUint32x8Compare(t, archsimd.Uint32x8.NotEqual, notEqualSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.NotEqual, notEqualSlice[uint64])
testUint64x4Compare(t, archsimd.Uint64x4.NotEqual, notEqualSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.NotEqual, notEqualSlice[uint8])
testUint8x32Compare(t, archsimd.Uint8x32.NotEqual, notEqualSlice[uint8])
if archsimd.X86.AVX512() {
testFloat32x16Compare(t, archsimd.Float32x16.NotEqual, notEqualSlice[float32])
testFloat64x8Compare(t, archsimd.Float64x8.NotEqual, notEqualSlice[float64])
testInt8x64Compare(t, archsimd.Int8x64.NotEqual, notEqualSlice[int8])
testInt16x32Compare(t, archsimd.Int16x32.NotEqual, notEqualSlice[int16])
testInt32x16Compare(t, archsimd.Int32x16.NotEqual, notEqualSlice[int32])
testInt64x8Compare(t, archsimd.Int64x8.NotEqual, notEqualSlice[int64])
testUint8x64Compare(t, archsimd.Uint8x64.NotEqual, notEqualSlice[uint8])
testUint16x32Compare(t, archsimd.Uint16x32.NotEqual, notEqualSlice[uint16])
testUint32x16Compare(t, archsimd.Uint32x16.NotEqual, notEqualSlice[uint32])
testUint64x8Compare(t, archsimd.Uint64x8.NotEqual, notEqualSlice[uint64])
}
}

View File

@ -9,22 +9,22 @@
package simd_test
import (
"simd"
"simd/archsimd"
"testing"
)
// testInt8x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt8x16CompareMasked(t *testing.T,
f func(_, _ simd.Int8x16, m simd.Mask8x16) simd.Mask8x16,
f func(_, _ archsimd.Int8x16, m archsimd.Mask8x16) archsimd.Mask8x16,
want func(_, _ []int8) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, int8s, n, func(x, y []int8, m []bool) bool {
t.Helper()
a := simd.LoadInt8x16Slice(x)
b := simd.LoadInt8x16Slice(y)
k := simd.LoadInt8x16Slice(toVect[int8](m)).ToMask()
a := archsimd.LoadInt8x16Slice(x)
b := archsimd.LoadInt8x16Slice(y)
k := archsimd.LoadInt8x16Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x16().StoreSlice(g)
w := want(x, y)
@ -40,15 +40,15 @@ func testInt8x16CompareMasked(t *testing.T,
// testInt16x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt16x8CompareMasked(t *testing.T,
f func(_, _ simd.Int16x8, m simd.Mask16x8) simd.Mask16x8,
f func(_, _ archsimd.Int16x8, m archsimd.Mask16x8) archsimd.Mask16x8,
want func(_, _ []int16) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, int16s, n, func(x, y []int16, m []bool) bool {
t.Helper()
a := simd.LoadInt16x8Slice(x)
b := simd.LoadInt16x8Slice(y)
k := simd.LoadInt16x8Slice(toVect[int16](m)).ToMask()
a := archsimd.LoadInt16x8Slice(x)
b := archsimd.LoadInt16x8Slice(y)
k := archsimd.LoadInt16x8Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x8().StoreSlice(g)
w := want(x, y)
@ -64,15 +64,15 @@ func testInt16x8CompareMasked(t *testing.T,
// testInt32x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt32x4CompareMasked(t *testing.T,
f func(_, _ simd.Int32x4, m simd.Mask32x4) simd.Mask32x4,
f func(_, _ archsimd.Int32x4, m archsimd.Mask32x4) archsimd.Mask32x4,
want func(_, _ []int32) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, int32s, n, func(x, y []int32, m []bool) bool {
t.Helper()
a := simd.LoadInt32x4Slice(x)
b := simd.LoadInt32x4Slice(y)
k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
a := archsimd.LoadInt32x4Slice(x)
b := archsimd.LoadInt32x4Slice(y)
k := archsimd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x4().StoreSlice(g)
w := want(x, y)
@ -88,15 +88,15 @@ func testInt32x4CompareMasked(t *testing.T,
// testInt64x2CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt64x2CompareMasked(t *testing.T,
f func(_, _ simd.Int64x2, m simd.Mask64x2) simd.Mask64x2,
f func(_, _ archsimd.Int64x2, m archsimd.Mask64x2) archsimd.Mask64x2,
want func(_, _ []int64) []int64) {
n := 2
t.Helper()
forSlicePairMasked(t, int64s, n, func(x, y []int64, m []bool) bool {
t.Helper()
a := simd.LoadInt64x2Slice(x)
b := simd.LoadInt64x2Slice(y)
k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
a := archsimd.LoadInt64x2Slice(x)
b := archsimd.LoadInt64x2Slice(y)
k := archsimd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x2().StoreSlice(g)
w := want(x, y)
@ -112,15 +112,15 @@ func testInt64x2CompareMasked(t *testing.T,
// testUint8x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint8x16CompareMasked(t *testing.T,
f func(_, _ simd.Uint8x16, m simd.Mask8x16) simd.Mask8x16,
f func(_, _ archsimd.Uint8x16, m archsimd.Mask8x16) archsimd.Mask8x16,
want func(_, _ []uint8) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, uint8s, n, func(x, y []uint8, m []bool) bool {
t.Helper()
a := simd.LoadUint8x16Slice(x)
b := simd.LoadUint8x16Slice(y)
k := simd.LoadInt8x16Slice(toVect[int8](m)).ToMask()
a := archsimd.LoadUint8x16Slice(x)
b := archsimd.LoadUint8x16Slice(y)
k := archsimd.LoadInt8x16Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x16().StoreSlice(g)
w := want(x, y)
@ -136,15 +136,15 @@ func testUint8x16CompareMasked(t *testing.T,
// testUint16x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint16x8CompareMasked(t *testing.T,
f func(_, _ simd.Uint16x8, m simd.Mask16x8) simd.Mask16x8,
f func(_, _ archsimd.Uint16x8, m archsimd.Mask16x8) archsimd.Mask16x8,
want func(_, _ []uint16) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, uint16s, n, func(x, y []uint16, m []bool) bool {
t.Helper()
a := simd.LoadUint16x8Slice(x)
b := simd.LoadUint16x8Slice(y)
k := simd.LoadInt16x8Slice(toVect[int16](m)).ToMask()
a := archsimd.LoadUint16x8Slice(x)
b := archsimd.LoadUint16x8Slice(y)
k := archsimd.LoadInt16x8Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x8().StoreSlice(g)
w := want(x, y)
@ -160,15 +160,15 @@ func testUint16x8CompareMasked(t *testing.T,
// testUint32x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint32x4CompareMasked(t *testing.T,
f func(_, _ simd.Uint32x4, m simd.Mask32x4) simd.Mask32x4,
f func(_, _ archsimd.Uint32x4, m archsimd.Mask32x4) archsimd.Mask32x4,
want func(_, _ []uint32) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, uint32s, n, func(x, y []uint32, m []bool) bool {
t.Helper()
a := simd.LoadUint32x4Slice(x)
b := simd.LoadUint32x4Slice(y)
k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
a := archsimd.LoadUint32x4Slice(x)
b := archsimd.LoadUint32x4Slice(y)
k := archsimd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x4().StoreSlice(g)
w := want(x, y)
@ -184,15 +184,15 @@ func testUint32x4CompareMasked(t *testing.T,
// testUint64x2CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint64x2CompareMasked(t *testing.T,
f func(_, _ simd.Uint64x2, m simd.Mask64x2) simd.Mask64x2,
f func(_, _ archsimd.Uint64x2, m archsimd.Mask64x2) archsimd.Mask64x2,
want func(_, _ []uint64) []int64) {
n := 2
t.Helper()
forSlicePairMasked(t, uint64s, n, func(x, y []uint64, m []bool) bool {
t.Helper()
a := simd.LoadUint64x2Slice(x)
b := simd.LoadUint64x2Slice(y)
k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
a := archsimd.LoadUint64x2Slice(x)
b := archsimd.LoadUint64x2Slice(y)
k := archsimd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x2().StoreSlice(g)
w := want(x, y)
@ -208,15 +208,15 @@ func testUint64x2CompareMasked(t *testing.T,
// testFloat32x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat32x4CompareMasked(t *testing.T,
f func(_, _ simd.Float32x4, m simd.Mask32x4) simd.Mask32x4,
f func(_, _ archsimd.Float32x4, m archsimd.Mask32x4) archsimd.Mask32x4,
want func(_, _ []float32) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, float32s, n, func(x, y []float32, m []bool) bool {
t.Helper()
a := simd.LoadFloat32x4Slice(x)
b := simd.LoadFloat32x4Slice(y)
k := simd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
a := archsimd.LoadFloat32x4Slice(x)
b := archsimd.LoadFloat32x4Slice(y)
k := archsimd.LoadInt32x4Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x4().StoreSlice(g)
w := want(x, y)
@ -232,15 +232,15 @@ func testFloat32x4CompareMasked(t *testing.T,
// testFloat64x2CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat64x2CompareMasked(t *testing.T,
f func(_, _ simd.Float64x2, m simd.Mask64x2) simd.Mask64x2,
f func(_, _ archsimd.Float64x2, m archsimd.Mask64x2) archsimd.Mask64x2,
want func(_, _ []float64) []int64) {
n := 2
t.Helper()
forSlicePairMasked(t, float64s, n, func(x, y []float64, m []bool) bool {
t.Helper()
a := simd.LoadFloat64x2Slice(x)
b := simd.LoadFloat64x2Slice(y)
k := simd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
a := archsimd.LoadFloat64x2Slice(x)
b := archsimd.LoadFloat64x2Slice(y)
k := archsimd.LoadInt64x2Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x2().StoreSlice(g)
w := want(x, y)
@ -256,15 +256,15 @@ func testFloat64x2CompareMasked(t *testing.T,
// testInt8x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt8x32CompareMasked(t *testing.T,
f func(_, _ simd.Int8x32, m simd.Mask8x32) simd.Mask8x32,
f func(_, _ archsimd.Int8x32, m archsimd.Mask8x32) archsimd.Mask8x32,
want func(_, _ []int8) []int64) {
n := 32
t.Helper()
forSlicePairMasked(t, int8s, n, func(x, y []int8, m []bool) bool {
t.Helper()
a := simd.LoadInt8x32Slice(x)
b := simd.LoadInt8x32Slice(y)
k := simd.LoadInt8x32Slice(toVect[int8](m)).ToMask()
a := archsimd.LoadInt8x32Slice(x)
b := archsimd.LoadInt8x32Slice(y)
k := archsimd.LoadInt8x32Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x32().StoreSlice(g)
w := want(x, y)
@ -280,15 +280,15 @@ func testInt8x32CompareMasked(t *testing.T,
// testInt16x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt16x16CompareMasked(t *testing.T,
f func(_, _ simd.Int16x16, m simd.Mask16x16) simd.Mask16x16,
f func(_, _ archsimd.Int16x16, m archsimd.Mask16x16) archsimd.Mask16x16,
want func(_, _ []int16) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, int16s, n, func(x, y []int16, m []bool) bool {
t.Helper()
a := simd.LoadInt16x16Slice(x)
b := simd.LoadInt16x16Slice(y)
k := simd.LoadInt16x16Slice(toVect[int16](m)).ToMask()
a := archsimd.LoadInt16x16Slice(x)
b := archsimd.LoadInt16x16Slice(y)
k := archsimd.LoadInt16x16Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x16().StoreSlice(g)
w := want(x, y)
@ -304,15 +304,15 @@ func testInt16x16CompareMasked(t *testing.T,
// testInt32x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt32x8CompareMasked(t *testing.T,
f func(_, _ simd.Int32x8, m simd.Mask32x8) simd.Mask32x8,
f func(_, _ archsimd.Int32x8, m archsimd.Mask32x8) archsimd.Mask32x8,
want func(_, _ []int32) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, int32s, n, func(x, y []int32, m []bool) bool {
t.Helper()
a := simd.LoadInt32x8Slice(x)
b := simd.LoadInt32x8Slice(y)
k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
a := archsimd.LoadInt32x8Slice(x)
b := archsimd.LoadInt32x8Slice(y)
k := archsimd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x8().StoreSlice(g)
w := want(x, y)
@ -328,15 +328,15 @@ func testInt32x8CompareMasked(t *testing.T,
// testInt64x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt64x4CompareMasked(t *testing.T,
f func(_, _ simd.Int64x4, m simd.Mask64x4) simd.Mask64x4,
f func(_, _ archsimd.Int64x4, m archsimd.Mask64x4) archsimd.Mask64x4,
want func(_, _ []int64) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, int64s, n, func(x, y []int64, m []bool) bool {
t.Helper()
a := simd.LoadInt64x4Slice(x)
b := simd.LoadInt64x4Slice(y)
k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
a := archsimd.LoadInt64x4Slice(x)
b := archsimd.LoadInt64x4Slice(y)
k := archsimd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x4().StoreSlice(g)
w := want(x, y)
@ -352,15 +352,15 @@ func testInt64x4CompareMasked(t *testing.T,
// testUint8x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint8x32CompareMasked(t *testing.T,
f func(_, _ simd.Uint8x32, m simd.Mask8x32) simd.Mask8x32,
f func(_, _ archsimd.Uint8x32, m archsimd.Mask8x32) archsimd.Mask8x32,
want func(_, _ []uint8) []int64) {
n := 32
t.Helper()
forSlicePairMasked(t, uint8s, n, func(x, y []uint8, m []bool) bool {
t.Helper()
a := simd.LoadUint8x32Slice(x)
b := simd.LoadUint8x32Slice(y)
k := simd.LoadInt8x32Slice(toVect[int8](m)).ToMask()
a := archsimd.LoadUint8x32Slice(x)
b := archsimd.LoadUint8x32Slice(y)
k := archsimd.LoadInt8x32Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x32().StoreSlice(g)
w := want(x, y)
@ -376,15 +376,15 @@ func testUint8x32CompareMasked(t *testing.T,
// testUint16x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint16x16CompareMasked(t *testing.T,
f func(_, _ simd.Uint16x16, m simd.Mask16x16) simd.Mask16x16,
f func(_, _ archsimd.Uint16x16, m archsimd.Mask16x16) archsimd.Mask16x16,
want func(_, _ []uint16) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, uint16s, n, func(x, y []uint16, m []bool) bool {
t.Helper()
a := simd.LoadUint16x16Slice(x)
b := simd.LoadUint16x16Slice(y)
k := simd.LoadInt16x16Slice(toVect[int16](m)).ToMask()
a := archsimd.LoadUint16x16Slice(x)
b := archsimd.LoadUint16x16Slice(y)
k := archsimd.LoadInt16x16Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x16().StoreSlice(g)
w := want(x, y)
@ -400,15 +400,15 @@ func testUint16x16CompareMasked(t *testing.T,
// testUint32x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint32x8CompareMasked(t *testing.T,
f func(_, _ simd.Uint32x8, m simd.Mask32x8) simd.Mask32x8,
f func(_, _ archsimd.Uint32x8, m archsimd.Mask32x8) archsimd.Mask32x8,
want func(_, _ []uint32) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, uint32s, n, func(x, y []uint32, m []bool) bool {
t.Helper()
a := simd.LoadUint32x8Slice(x)
b := simd.LoadUint32x8Slice(y)
k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
a := archsimd.LoadUint32x8Slice(x)
b := archsimd.LoadUint32x8Slice(y)
k := archsimd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x8().StoreSlice(g)
w := want(x, y)
@ -424,15 +424,15 @@ func testUint32x8CompareMasked(t *testing.T,
// testUint64x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint64x4CompareMasked(t *testing.T,
f func(_, _ simd.Uint64x4, m simd.Mask64x4) simd.Mask64x4,
f func(_, _ archsimd.Uint64x4, m archsimd.Mask64x4) archsimd.Mask64x4,
want func(_, _ []uint64) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, uint64s, n, func(x, y []uint64, m []bool) bool {
t.Helper()
a := simd.LoadUint64x4Slice(x)
b := simd.LoadUint64x4Slice(y)
k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
a := archsimd.LoadUint64x4Slice(x)
b := archsimd.LoadUint64x4Slice(y)
k := archsimd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x4().StoreSlice(g)
w := want(x, y)
@ -448,15 +448,15 @@ func testUint64x4CompareMasked(t *testing.T,
// testFloat32x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat32x8CompareMasked(t *testing.T,
f func(_, _ simd.Float32x8, m simd.Mask32x8) simd.Mask32x8,
f func(_, _ archsimd.Float32x8, m archsimd.Mask32x8) archsimd.Mask32x8,
want func(_, _ []float32) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, float32s, n, func(x, y []float32, m []bool) bool {
t.Helper()
a := simd.LoadFloat32x8Slice(x)
b := simd.LoadFloat32x8Slice(y)
k := simd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
a := archsimd.LoadFloat32x8Slice(x)
b := archsimd.LoadFloat32x8Slice(y)
k := archsimd.LoadInt32x8Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x8().StoreSlice(g)
w := want(x, y)
@ -472,15 +472,15 @@ func testFloat32x8CompareMasked(t *testing.T,
// testFloat64x4CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat64x4CompareMasked(t *testing.T,
f func(_, _ simd.Float64x4, m simd.Mask64x4) simd.Mask64x4,
f func(_, _ archsimd.Float64x4, m archsimd.Mask64x4) archsimd.Mask64x4,
want func(_, _ []float64) []int64) {
n := 4
t.Helper()
forSlicePairMasked(t, float64s, n, func(x, y []float64, m []bool) bool {
t.Helper()
a := simd.LoadFloat64x4Slice(x)
b := simd.LoadFloat64x4Slice(y)
k := simd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
a := archsimd.LoadFloat64x4Slice(x)
b := archsimd.LoadFloat64x4Slice(y)
k := archsimd.LoadInt64x4Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x4().StoreSlice(g)
w := want(x, y)
@ -496,15 +496,15 @@ func testFloat64x4CompareMasked(t *testing.T,
// testInt8x64CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt8x64CompareMasked(t *testing.T,
f func(_, _ simd.Int8x64, m simd.Mask8x64) simd.Mask8x64,
f func(_, _ archsimd.Int8x64, m archsimd.Mask8x64) archsimd.Mask8x64,
want func(_, _ []int8) []int64) {
n := 64
t.Helper()
forSlicePairMasked(t, int8s, n, func(x, y []int8, m []bool) bool {
t.Helper()
a := simd.LoadInt8x64Slice(x)
b := simd.LoadInt8x64Slice(y)
k := simd.LoadInt8x64Slice(toVect[int8](m)).ToMask()
a := archsimd.LoadInt8x64Slice(x)
b := archsimd.LoadInt8x64Slice(y)
k := archsimd.LoadInt8x64Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x64().StoreSlice(g)
w := want(x, y)
@ -520,15 +520,15 @@ func testInt8x64CompareMasked(t *testing.T,
// testInt16x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt16x32CompareMasked(t *testing.T,
f func(_, _ simd.Int16x32, m simd.Mask16x32) simd.Mask16x32,
f func(_, _ archsimd.Int16x32, m archsimd.Mask16x32) archsimd.Mask16x32,
want func(_, _ []int16) []int64) {
n := 32
t.Helper()
forSlicePairMasked(t, int16s, n, func(x, y []int16, m []bool) bool {
t.Helper()
a := simd.LoadInt16x32Slice(x)
b := simd.LoadInt16x32Slice(y)
k := simd.LoadInt16x32Slice(toVect[int16](m)).ToMask()
a := archsimd.LoadInt16x32Slice(x)
b := archsimd.LoadInt16x32Slice(y)
k := archsimd.LoadInt16x32Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x32().StoreSlice(g)
w := want(x, y)
@ -544,15 +544,15 @@ func testInt16x32CompareMasked(t *testing.T,
// testInt32x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt32x16CompareMasked(t *testing.T,
f func(_, _ simd.Int32x16, m simd.Mask32x16) simd.Mask32x16,
f func(_, _ archsimd.Int32x16, m archsimd.Mask32x16) archsimd.Mask32x16,
want func(_, _ []int32) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, int32s, n, func(x, y []int32, m []bool) bool {
t.Helper()
a := simd.LoadInt32x16Slice(x)
b := simd.LoadInt32x16Slice(y)
k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
a := archsimd.LoadInt32x16Slice(x)
b := archsimd.LoadInt32x16Slice(y)
k := archsimd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x16().StoreSlice(g)
w := want(x, y)
@ -568,15 +568,15 @@ func testInt32x16CompareMasked(t *testing.T,
// testInt64x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testInt64x8CompareMasked(t *testing.T,
f func(_, _ simd.Int64x8, m simd.Mask64x8) simd.Mask64x8,
f func(_, _ archsimd.Int64x8, m archsimd.Mask64x8) archsimd.Mask64x8,
want func(_, _ []int64) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, int64s, n, func(x, y []int64, m []bool) bool {
t.Helper()
a := simd.LoadInt64x8Slice(x)
b := simd.LoadInt64x8Slice(y)
k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
a := archsimd.LoadInt64x8Slice(x)
b := archsimd.LoadInt64x8Slice(y)
k := archsimd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x8().StoreSlice(g)
w := want(x, y)
@ -592,15 +592,15 @@ func testInt64x8CompareMasked(t *testing.T,
// testUint8x64CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint8x64CompareMasked(t *testing.T,
f func(_, _ simd.Uint8x64, m simd.Mask8x64) simd.Mask8x64,
f func(_, _ archsimd.Uint8x64, m archsimd.Mask8x64) archsimd.Mask8x64,
want func(_, _ []uint8) []int64) {
n := 64
t.Helper()
forSlicePairMasked(t, uint8s, n, func(x, y []uint8, m []bool) bool {
t.Helper()
a := simd.LoadUint8x64Slice(x)
b := simd.LoadUint8x64Slice(y)
k := simd.LoadInt8x64Slice(toVect[int8](m)).ToMask()
a := archsimd.LoadUint8x64Slice(x)
b := archsimd.LoadUint8x64Slice(y)
k := archsimd.LoadInt8x64Slice(toVect[int8](m)).ToMask()
g := make([]int8, n)
f(a, b, k).AsInt8x64().StoreSlice(g)
w := want(x, y)
@ -616,15 +616,15 @@ func testUint8x64CompareMasked(t *testing.T,
// testUint16x32CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint16x32CompareMasked(t *testing.T,
f func(_, _ simd.Uint16x32, m simd.Mask16x32) simd.Mask16x32,
f func(_, _ archsimd.Uint16x32, m archsimd.Mask16x32) archsimd.Mask16x32,
want func(_, _ []uint16) []int64) {
n := 32
t.Helper()
forSlicePairMasked(t, uint16s, n, func(x, y []uint16, m []bool) bool {
t.Helper()
a := simd.LoadUint16x32Slice(x)
b := simd.LoadUint16x32Slice(y)
k := simd.LoadInt16x32Slice(toVect[int16](m)).ToMask()
a := archsimd.LoadUint16x32Slice(x)
b := archsimd.LoadUint16x32Slice(y)
k := archsimd.LoadInt16x32Slice(toVect[int16](m)).ToMask()
g := make([]int16, n)
f(a, b, k).AsInt16x32().StoreSlice(g)
w := want(x, y)
@ -640,15 +640,15 @@ func testUint16x32CompareMasked(t *testing.T,
// testUint32x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint32x16CompareMasked(t *testing.T,
f func(_, _ simd.Uint32x16, m simd.Mask32x16) simd.Mask32x16,
f func(_, _ archsimd.Uint32x16, m archsimd.Mask32x16) archsimd.Mask32x16,
want func(_, _ []uint32) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, uint32s, n, func(x, y []uint32, m []bool) bool {
t.Helper()
a := simd.LoadUint32x16Slice(x)
b := simd.LoadUint32x16Slice(y)
k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
a := archsimd.LoadUint32x16Slice(x)
b := archsimd.LoadUint32x16Slice(y)
k := archsimd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x16().StoreSlice(g)
w := want(x, y)
@ -664,15 +664,15 @@ func testUint32x16CompareMasked(t *testing.T,
// testUint64x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testUint64x8CompareMasked(t *testing.T,
f func(_, _ simd.Uint64x8, m simd.Mask64x8) simd.Mask64x8,
f func(_, _ archsimd.Uint64x8, m archsimd.Mask64x8) archsimd.Mask64x8,
want func(_, _ []uint64) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, uint64s, n, func(x, y []uint64, m []bool) bool {
t.Helper()
a := simd.LoadUint64x8Slice(x)
b := simd.LoadUint64x8Slice(y)
k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
a := archsimd.LoadUint64x8Slice(x)
b := archsimd.LoadUint64x8Slice(y)
k := archsimd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x8().StoreSlice(g)
w := want(x, y)
@ -688,15 +688,15 @@ func testUint64x8CompareMasked(t *testing.T,
// testFloat32x16CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat32x16CompareMasked(t *testing.T,
f func(_, _ simd.Float32x16, m simd.Mask32x16) simd.Mask32x16,
f func(_, _ archsimd.Float32x16, m archsimd.Mask32x16) archsimd.Mask32x16,
want func(_, _ []float32) []int64) {
n := 16
t.Helper()
forSlicePairMasked(t, float32s, n, func(x, y []float32, m []bool) bool {
t.Helper()
a := simd.LoadFloat32x16Slice(x)
b := simd.LoadFloat32x16Slice(y)
k := simd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
a := archsimd.LoadFloat32x16Slice(x)
b := archsimd.LoadFloat32x16Slice(y)
k := archsimd.LoadInt32x16Slice(toVect[int32](m)).ToMask()
g := make([]int32, n)
f(a, b, k).AsInt32x16().StoreSlice(g)
w := want(x, y)
@ -712,15 +712,15 @@ func testFloat32x16CompareMasked(t *testing.T,
// testFloat64x8CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
// The mask is applied to the output of want; anything not in the mask, is zeroed.
func testFloat64x8CompareMasked(t *testing.T,
f func(_, _ simd.Float64x8, m simd.Mask64x8) simd.Mask64x8,
f func(_, _ archsimd.Float64x8, m archsimd.Mask64x8) archsimd.Mask64x8,
want func(_, _ []float64) []int64) {
n := 8
t.Helper()
forSlicePairMasked(t, float64s, n, func(x, y []float64, m []bool) bool {
t.Helper()
a := simd.LoadFloat64x8Slice(x)
b := simd.LoadFloat64x8Slice(y)
k := simd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
a := archsimd.LoadFloat64x8Slice(x)
b := archsimd.LoadFloat64x8Slice(y)
k := archsimd.LoadInt64x8Slice(toVect[int64](m)).ToMask()
g := make([]int64, n)
f(a, b, k).AsInt64x8().StoreSlice(g)
w := want(x, y)

Some files were not shown because too many files have changed in this diff Show More