We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7f282a3 commit a1a1cafCopy full SHA for a1a1caf
ggml-cuda/mmvq.cu
@@ -75,7 +75,7 @@ static __global__ void mul_mat_vec_q(
75
tmp[j][i] = warp_reduce_sum(tmp[j][i]);
76
}
77
78
- if (threadIdx.x < rows_per_cuda_block) {
+ if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 || row0 + threadIdx.x < nrows_dst)) {
79
dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x];
80
81
0 commit comments