diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index fb960711d4ae0..857cf85a8acbc 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -391,6 +391,8 @@ class RegAllocFastImpl { bool mayLiveOut(Register VirtReg); bool mayLiveIn(Register VirtReg); + bool mayBeSpillFromInlineAsmBr(const MachineInstr &MI) const; + void dumpState() const; }; @@ -491,6 +493,21 @@ static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, return IndexA < IndexB; } +/// Returns true if \p MI is a spill of a live-in physical register in a block +/// targeted by an INLINEASM_BR. Such spills must precede reloads of live-in +/// virtual registers, so that we do not reload from an uninitialized stack +/// slot. +bool RegAllocFastImpl::mayBeSpillFromInlineAsmBr(const MachineInstr &MI) const { + int FI; + auto *MBB = MI.getParent(); + if (MBB->isInlineAsmBrIndirectTarget() && TII->isStoreToStackSlot(MI, FI) && + MFI->isSpillSlotObjectIndex(FI)) + for (const auto &Op : MI.operands()) + if (Op.isReg() && MBB->isLiveIn(Op.getReg())) + return true; + return false; +} + /// Returns false if \p VirtReg is known to not live out of the current block. bool RegAllocFastImpl::mayLiveOut(Register VirtReg) { if (MayLiveAcrossBlocks.test(VirtReg.virtRegIndex())) { @@ -648,8 +665,8 @@ MachineBasicBlock::iterator RegAllocFastImpl::getMBBBeginInsertionPoint( continue; } - // Most reloads should be inserted after prolog instructions. - if (!TII->isBasicBlockPrologue(*I)) + // Skip prologues and inlineasm_br spills to place reloads afterwards. + if (!TII->isBasicBlockPrologue(*I) && !mayBeSpillFromInlineAsmBr(*I)) break; // However if a prolog instruction reads a register that needs to be @@ -736,6 +753,8 @@ bool RegAllocFastImpl::displacePhysReg(MachineInstr &MI, MCRegister PhysReg) { assert(LRI != LiveVirtRegs.end() && "datastructures in sync"); MachineBasicBlock::iterator ReloadBefore = std::next((MachineBasicBlock::iterator)MI.getIterator()); + while (mayBeSpillFromInlineAsmBr(*ReloadBefore)) + ++ReloadBefore; reload(ReloadBefore, VirtReg, LRI->PhysReg); setPhysRegState(LRI->PhysReg, regFree); diff --git a/llvm/test/CodeGen/X86/regallocfast-callbr-asm-spills-after-reload.mir b/llvm/test/CodeGen/X86/regallocfast-callbr-asm-spills-after-reload.mir new file mode 100644 index 0000000000000..4341250e0e66a --- /dev/null +++ b/llvm/test/CodeGen/X86/regallocfast-callbr-asm-spills-after-reload.mir @@ -0,0 +1,66 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=x86_64-- -run-pass=regallocfast -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-- -passes=regallocfast -o - %s | FileCheck %s + +... +--- +name: callbr-asm-spills-after-reload +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64, preferred-register: '', flags: [ ] } + - { id: 1, class: gr32, preferred-register: '', flags: [ ] } + - { id: 2, class: gr64, preferred-register: '', flags: [ ] } +liveins: + - { reg: '$rdi', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: false +fixedStack: [] +stack: + - { id: 0, type: default, offset: 0, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true } +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.3(0x40000000) + liveins: $rdi + + %2:gr64 = COPY $rdi + %3:gr64 = COPY killed %2 + MOV64mr %stack.0, 1, $noreg, 0, $noreg, %3 :: (store (s64)) + %0:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (dereferenceable load (s64)) + %6:gr32 = MOV32rm %0, 1, $noreg, 0, $noreg :: (load (s32)) + %5:gr32_norex2 = COPY %6 + %4:gr32_norex2 = COPY %5 + INLINEASM_BR &" subl $$11, $0; cmpl $$11, $0; je ${2:l};", 0 /* attdialect */, 2686986 /* regdef:GR32_NOREX2 */, def %4, 2147483657 /* reguse tiedto:$0 */, %4(tied-def 3), 13 /* imm */, %bb.3, 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags + %1:gr32 = COPY %4 + JMP_1 %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + ; CHECK: $rax = MOV64rm %stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %stack.3) + ; CHECK-NEXT: $ecx = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1) + ; CHECK-NEXT: MOV32mr renamable $rax, 1, $noreg, 0, $noreg, renamable $ecx :: (store (s32)) + MOV32mr %0, 1, $noreg, 0, $noreg, %1 :: (store (s32)) + + bb.2: + RET64 + + bb.3 (machine-block-address-taken, inlineasm-br-indirect-target): + successors: %bb.2(0x80000000) + + ; CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, $eax :: (store (s32) into %stack.2) + ; CHECK-NEXT: $ecx = MOV32rm %stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %stack.2) + ; CHECK-NEXT: $rax = MOV64rm %stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %stack.3) + ; CHECK-NEXT: MOV32mr renamable $rax, 1, $noreg, 0, $noreg, killed renamable $ecx :: (store (s32)) + ; CHECK-NEXT: JMP_1 %bb.2 + %7:gr32 = COPY %4 + MOV32mr %0, 1, $noreg, 0, $noreg, %7 :: (store (s32)) + JMP_1 %bb.2 + +...