wazevo(arm64): fixes lowerSelectVec (#1837)

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
Takeshi Yoneda
2023-11-15 09:08:25 +09:00
committed by GitHub
parent a1b48079b9
commit 33d815fe40
2 changed files with 12 additions and 8 deletions

View File

@@ -1906,18 +1906,22 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
} }
func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
// First we clear the unnecessary bits of rc by ANDing it with 1. // First, we copy the condition to a temporary register in case rc is used somewhere else.
one := m.compiler.AllocateVReg(ssa.TypeI32) tmp := m.compiler.AllocateVReg(ssa.TypeI32)
m.lowerConstantI32(one, 1) mov := m.allocateInstr()
mov.asFpuMov128(tmp, rc.nr())
m.insert(mov)
// Next is to clear the unnecessary bits of rc by ANDing it with 1, and store it to a temporary register.
oneOrZero := m.compiler.AllocateVReg(ssa.TypeI32)
and := m.allocateInstr() and := m.allocateInstr()
oneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) and.asALUBitmaskImm(aluOpAnd, oneOrZero, tmp, 1, false)
and.asALU(aluOpAnd, oneOrZero, rc, operandNR(one), false)
m.insert(and) m.insert(and)
// Sets all bits to 1 if rc is not zero. // Sets all bits to 1 if rc is not zero.
allOneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) allOneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
alu := m.allocateInstr() alu := m.allocateInstr()
alu.asALU(aluOpSub, allOneOrZero, operandNR(xzrVReg), oneOrZero, true) alu.asALU(aluOpSub, allOneOrZero, operandNR(xzrVReg), operandNR(oneOrZero), true)
m.insert(alu) m.insert(alu)
// Then move the bits to the result vector register. // Then move the bits to the result vector register.

View File

@@ -853,8 +853,8 @@ func TestMachine_lowerSelectVec(t *testing.T) {
m.lowerSelectVec(c, rn, rm, rd) m.lowerSelectVec(c, rn, rm, rd)
require.Equal(t, ` require.Equal(t, `
orr w5?, wzr, #0x1 mov v5?.16b, v1?.16b
and w6?, w1?, w5? and w6?, w5?, #0x1
sub x7?, xzr, x6? sub x7?, xzr, x6?
dup v4?.2d, x7? dup v4?.2d, x7?
bsl v4?.16b, v2?.16b, v3?.16b bsl v4?.16b, v2?.16b, v3?.16b