cuda : fix bounds check for src0 rows in MMVQ kernel (ggml-org#2231)

ggerganov · JohannesGaessler · iThalay · commit a1a1caf0e211 · 2024-09-23T08:59:27.000+07:00
* cuda : fix bounds check for src0 rows in MMVQ kernel

* Update ggml-cuda/mmvq.cu

Co-authored-by: Johannes Gäßler &lt;johannesg@5d6.de&gt;

---------

Co-authored-by: Johannes Gäßler &lt;johannesg@5d6.de&gt;
diff --git a/ggml-cuda/mmvq.cu b/ggml-cuda/mmvq.cu
@@ -75,7 +75,7 @@ static __global__ void mul_mat_vec_q(
             tmp[j][i] = warp_reduce_sum(tmp[j][i]);
         }
 
-        if (threadIdx.x < rows_per_cuda_block) {
+        if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 || row0 + threadIdx.x < nrows_dst)) {
             dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x];
         }
     }

Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@ static __global__ void mul_mat_vec_q(`
`75`	`75`	`tmp[j][i] = warp_reduce_sum(tmp[j][i]);`
`76`	`76`	`}`
`77`	`77`
`78`		`- if (threadIdx.x < rows_per_cuda_block) {`
	`78`	`+ if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 \|\| row0 + threadIdx.x < nrows_dst)) {`
`79`	`79`	`dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x];`
`80`	`80`	`}`
`81`	`81`	`}`