-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][GFX12] VOP encoding and codegen - add support for v_cvt fp8/… #78414
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
ef5080c
a91e42a
01d8b5e
8e058e0
6149014
4996c77
a36de86
afe32a9
685d767
7de5e75
9ad8493
9a7989f
3211bc3
55d4d1b
55d611a
585e918
a3610b5
1db3bf4
5270f1e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -3500,6 +3500,9 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { | |||
return !isInlineConstant(Inst, OpIdx); | ||||
} else if (MO.isReg()) { | ||||
auto Reg = MO.getReg(); | ||||
if (!Reg) { | ||||
return false; | ||||
} | ||||
const MCRegisterInfo *TRI = getContext().getRegisterInfo(); | ||||
auto PReg = mc2PseudoReg(Reg); | ||||
return isSGPR(PReg, TRI) && PReg != SGPR_NULL; | ||||
|
@@ -8273,6 +8276,16 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, | |||
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); | ||||
} | ||||
|
||||
if (isVOP1Cvt_F32_Fp8_Bf8_e64(Opc) && | ||||
Opc != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 && | ||||
Opc != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12) { | ||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I++]); | ||||
Op.addRegOrImmWithFPInputModsOperands(Inst, 1); // src0 | ||||
// Add dummy src1 | ||||
Inst.addOperand(MCOperand::createImm(0)); | ||||
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI()))); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably no need to call Are these dummy operands really necessary? By having them, we just seem to give ourselves more work handling them with custom code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think we can remove We will need these dummy operands when doing
|
||||
} | ||||
|
||||
for (unsigned E = Operands.size(); I != E; ++I) { | ||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); | ||||
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | ||||
|
@@ -8321,12 +8334,20 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, | |||
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; | ||||
|
||||
if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || | ||||
Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { | ||||
Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || | ||||
Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 || | ||||
Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) { | ||||
Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods | ||||
Inst.addOperand(Inst.getOperand(0)); | ||||
} | ||||
|
||||
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) { | ||||
// Adding vdst_in operand is already covered for these DPP instructions in | ||||
// cvtVOP3DPP. | ||||
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) && | ||||
!(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 || | ||||
Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 || | ||||
Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 || | ||||
Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) { | ||||
assert(!IsPacked); | ||||
Inst.addOperand(Inst.getOperand(0)); | ||||
} | ||||
|
@@ -8765,6 +8786,11 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, | |||
int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old); | ||||
int Src2ModIdx = | ||||
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); | ||||
int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); | ||||
bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 || | ||||
Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 || | ||||
Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 || | ||||
Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12; | ||||
mariusz-sikora-at-amd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && | ||||
Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1; | ||||
|
||||
|
@@ -8788,6 +8814,20 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, | |||
} | ||||
} | ||||
|
||||
if (VdstInIdx != -1) { | ||||
int NumOperands = Inst.getNumOperands(); | ||||
if (VdstInIdx == NumOperands) | ||||
mariusz-sikora-at-amd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
Inst.addOperand(Inst.getOperand(0)); | ||||
} | ||||
|
||||
if (IsVOP3CvtSrDpp) { | ||||
int NumOperands = Inst.getNumOperands(); | ||||
if (Src2ModIdx == NumOperands) { | ||||
Inst.addOperand(MCOperand::createImm(0)); | ||||
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI()))); | ||||
} | ||||
} | ||||
|
||||
auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), | ||||
MCOI::TIED_TO); | ||||
if (TiedTo != -1) { | ||||
|
@@ -8801,6 +8841,13 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, | |||
Fi = Op.getImm(); | ||||
} else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | ||||
Op.addRegOrImmWithFPInputModsOperands(Inst, 2); | ||||
if (isVOP1Cvt_F32_Fp8_Bf8_e64(Inst.getOpcode()) && | ||||
Inst.getOpcode() != AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 && | ||||
Inst.getOpcode() != AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12) { | ||||
// Add dummy src1 | ||||
Inst.addOperand(MCOperand::createImm(0)); | ||||
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI()))); | ||||
} | ||||
} else if (Op.isReg()) { | ||||
Op.addRegOperands(Inst, 1); | ||||
} else if (Op.isImm() && | ||||
|
@@ -8847,6 +8894,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I | |||
OptionalImmIndexMap OptionalIdx; | ||||
|
||||
unsigned I = 1; | ||||
const unsigned Opc = Inst.getOpcode(); | ||||
const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); | ||||
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { | ||||
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); | ||||
|
@@ -8874,6 +8922,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I | |||
Op.addImmOperands(Inst, 1); | ||||
} else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | ||||
Op.addRegWithFPInputModsOperands(Inst, 2); | ||||
if (Opc == AMDGPU::V_CVT_F32_BF8_dpp_gfx12 || | ||||
Opc == AMDGPU::V_CVT_F32_FP8_dpp_gfx12 || | ||||
Opc == AMDGPU::V_CVT_F32_BF8_dpp8_gfx12 || | ||||
Opc == AMDGPU::V_CVT_F32_FP8_dpp8_gfx12) { | ||||
// Add dummy src1 | ||||
Inst.addOperand(MCOperand::createImm(0)); | ||||
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI()))); | ||||
} | ||||
} else if (Op.isDppFI()) { | ||||
Fi = Op.getImm(); | ||||
} else if (Op.isReg()) { | ||||
|
@@ -8884,6 +8940,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I | |||
} else { | ||||
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | ||||
Op.addRegWithFPInputModsOperands(Inst, 2); | ||||
if (Opc == AMDGPU::V_CVT_F32_BF8_dpp_gfx12 || | ||||
Opc == AMDGPU::V_CVT_F32_FP8_dpp_gfx12 || | ||||
Opc == AMDGPU::V_CVT_F32_BF8_dpp8_gfx12 || | ||||
Opc == AMDGPU::V_CVT_F32_FP8_dpp8_gfx12) { | ||||
// Add dummy src1 | ||||
Inst.addOperand(MCOperand::createImm(0)); | ||||
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(0, getSTI()))); | ||||
} | ||||
} else if (Op.isReg()) { | ||||
Op.addRegOperands(Inst, 1); | ||||
} else if (Op.isDPPCtrl()) { | ||||
|
Uh oh!
There was an error while loading. Please reload this page.