Skip to content

Commit dcba994

Browse files
committed
[X86] combineX86ShuffleChain - ensure we only peek through bitcasts to vectors (PR51858)
When searching for hidden identity shuffles (added at rG41146bfe82aecc79961c3de898cda02998172e4b), only peek through bitcasts to the source operand if it is a vector type as well.
1 parent 533471f commit dcba994

File tree

2 files changed

+59
-1
lines changed

2 files changed

+59
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36487,7 +36487,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3648736487

3648836488
// See if the shuffle is a hidden identity shuffle - repeated args in HOPs
3648936489
// etc. can be simplified.
36490-
if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits) {
36490+
if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits && VT1.isVector()) {
3649136491
SmallVector<int> ScaledMask, IdentityMask;
3649236492
unsigned NumElts = VT1.getVectorNumElements();
3649336493
if (Mask.size() <= NumElts &&

llvm/test/CodeGen/X86/vector-reduce-mul.ll

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2344,6 +2344,64 @@ define i8 @illegal_v8i8(i8 %a0, <8 x i8>* %a1) {
23442344
ret i8 %mul
23452345
}
23462346

2347+
define i8 @PR51858(i128 %arg) {
2348+
; SSE2-LABEL: PR51858:
2349+
; SSE2: # %bb.0:
2350+
; SSE2-NEXT: movq %rdi, %xmm0
2351+
; SSE2-NEXT: movq %rsi, %xmm1
2352+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2353+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2354+
; SSE2-NEXT: pmullw %xmm1, %xmm0
2355+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2356+
; SSE2-NEXT: pmullw %xmm0, %xmm1
2357+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2358+
; SSE2-NEXT: pmullw %xmm1, %xmm0
2359+
; SSE2-NEXT: movdqa %xmm0, %xmm1
2360+
; SSE2-NEXT: psrld $16, %xmm1
2361+
; SSE2-NEXT: pmullw %xmm0, %xmm1
2362+
; SSE2-NEXT: movd %xmm1, %eax
2363+
; SSE2-NEXT: # kill: def $al killed $al killed $eax
2364+
; SSE2-NEXT: retq
2365+
;
2366+
; SSE41-LABEL: PR51858:
2367+
; SSE41: # %bb.0:
2368+
; SSE41-NEXT: movq %rdi, %xmm0
2369+
; SSE41-NEXT: movq %rsi, %xmm1
2370+
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2371+
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2372+
; SSE41-NEXT: pmullw %xmm1, %xmm0
2373+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2374+
; SSE41-NEXT: pmullw %xmm0, %xmm1
2375+
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2376+
; SSE41-NEXT: pmullw %xmm1, %xmm0
2377+
; SSE41-NEXT: movdqa %xmm0, %xmm1
2378+
; SSE41-NEXT: psrld $16, %xmm1
2379+
; SSE41-NEXT: pmullw %xmm0, %xmm1
2380+
; SSE41-NEXT: movd %xmm1, %eax
2381+
; SSE41-NEXT: # kill: def $al killed $al killed $eax
2382+
; SSE41-NEXT: retq
2383+
;
2384+
; AVX-LABEL: PR51858:
2385+
; AVX: # %bb.0:
2386+
; AVX-NEXT: vmovq %rdi, %xmm0
2387+
; AVX-NEXT: vmovq %rsi, %xmm1
2388+
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2389+
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2390+
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
2391+
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2392+
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
2393+
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
2394+
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
2395+
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
2396+
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
2397+
; AVX-NEXT: vmovd %xmm0, %eax
2398+
; AVX-NEXT: # kill: def $al killed $al killed $eax
2399+
; AVX-NEXT: retq
2400+
%vec = bitcast i128 %arg to <16 x i8>
2401+
%red = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %vec)
2402+
ret i8 %red
2403+
}
2404+
23472405
declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
23482406
declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
23492407
declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>)

0 commit comments

Comments
 (0)