File tree Expand file tree Collapse file tree 1 file changed +9
-12
lines changed
python/sglang/srt/layers/quantization Expand file tree Collapse file tree 1 file changed +9
-12
lines changed Original file line number Diff line number Diff line change @@ -240,22 +240,19 @@ def block_quant_dequant(
240
240
assert n_tiles == x_s .shape [0 ]
241
241
assert k_tiles == x_s .shape [1 ]
242
242
243
- x_dq_block = x_q_block . to ( dtype )
243
+ x_dq_block = torch . empty_like ( x_q_block , dtype = dtype )
244
244
245
- x_dq_block_tiles = [
246
- [
247
- x_dq_block [
245
+ for j in range ( n_tiles ):
246
+ for i in range ( k_tiles ):
247
+ x_q_block_tile = x_q_block [
248
248
j * block_n : min ((j + 1 ) * block_n , n ),
249
249
i * block_k : min ((i + 1 ) * block_k , k ),
250
250
]
251
- for i in range (k_tiles )
252
- ]
253
- for j in range (n_tiles )
254
- ]
255
-
256
- for i in range (k_tiles ):
257
- for j in range (n_tiles ):
258
- x_dq_block_tiles [j ][i ][:, :] = x_dq_block_tiles [j ][i ] * x_s [j ][i ]
251
+ x_dq_block_tile = x_dq_block [
252
+ j * block_n : min ((j + 1 ) * block_n , n ),
253
+ i * block_k : min ((i + 1 ) * block_k , k ),
254
+ ]
255
+ x_dq_block_tile [:, :] = x_q_block_tile .to (torch .float32 ) * x_s [j ][i ]
259
256
260
257
return x_dq_block
261
258
You can’t perform that action at this time.
0 commit comments