Skip to content

Commit 8717c5e

Browse files
committed
fix codestyle
1 parent 8d67d0f commit 8717c5e

File tree

10 files changed

+36
-13
lines changed

10 files changed

+36
-13
lines changed

llm/predictor.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ def _create_predictor(self, predictor_args: PredictorArgument):
652652
config.enable_custom_device(predictor_args.device, device_id)
653653
elif predictor_args.device == "xpu":
654654
raise ValueError(
655-
"you should export xpu static model with --block_attn flag and use predictor with --block_attn too"
655+
"you should export xpu static model with --block_attn flag and use predictor with --block_attn too"
656656
"https://github.com/PaddlePaddle/PaddleNLP/blob/develop/llm/docs/inference.md"
657657
)
658658
else:
@@ -925,7 +925,9 @@ def _preprocess(self, source):
925925
source = [self.tokenizer.apply_chat_template(sentence, tokenize=False) for sentence in source]
926926

927927
for i, text in enumerate(source):
928-
add_special_tokens = self.tokenizer.chat_template is None or isinstance(self.tokenizer, (ChatGLMv2Tokenizer, ChatGLMTokenizer))
928+
add_special_tokens = self.tokenizer.chat_template is None or isinstance(
929+
self.tokenizer, (ChatGLMv2Tokenizer, ChatGLMTokenizer)
930+
)
929931
add_special_tokens = add_special_tokens if not self.benchmark else False
930932
tokens = self.tokenizer(
931933
text,
@@ -1087,10 +1089,9 @@ def _create_predictor(self, predictor_args: PredictorArgument):
10871089
config.set_xpu_device_id(device_id)
10881090
xpu_config = paddle.inference.XpuConfig()
10891091
xpu_config.device_id = device_id
1090-
xpu_config.l3_size = 63*1024*1024
1091-
xpu_config.l3_autotune_size = 63*1024*1024
1092+
xpu_config.l3_size = 63 * 1024 * 1024
1093+
xpu_config.l3_autotune_size = 63 * 1024 * 1024
10921094
config.set_xpu_config(xpu_config)
1093-
config.enable_new_executor()
10941095
else:
10951096
device_id = int(os.environ.get("FLAGS_selected_gpus", 0))
10961097
config.enable_use_gpu(100, device_id)
@@ -1348,7 +1349,7 @@ def create_predictor(
13481349
else:
13491350
if predictor_args.device == "xpu":
13501351
raise ValueError(
1351-
"you should run xpu dynamic model with --block_attn flag"
1352+
"you should run xpu dynamic model with --block_attn flag"
13521353
"https://github.com/PaddlePaddle/PaddleNLP/blob/develop/llm/docs/inference.md"
13531354
)
13541355
from paddlenlp.experimental.transformers import (
@@ -1608,7 +1609,9 @@ def predict():
16081609

16091610
def benchmark(predictor, predictor_args, model_args):
16101611
# Just construct a simple benchmark input. We pad input to the src_length.
1611-
benchmark_texts = [predictor.tokenizer.pad_token * predictor_args.src_length for _ in range(predictor_args.batch_size)]
1612+
benchmark_texts = [
1613+
predictor.tokenizer.pad_token * predictor_args.src_length for _ in range(predictor_args.batch_size)
1614+
]
16121615

16131616
batch_benchmark_texts = batchfy_text(benchmark_texts, predictor_args.batch_size)
16141617
print("***********Start Benchmark**********")

paddlenlp/experimental/transformers/bloom/modeling.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
219219
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
220220
token_num = paddle.sum(seq_lens_this_time)
221221
from paddlenlp_ops import get_padding_offset
222+
222223
ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
223224
input_ids, cum_offsets_now, token_num, seq_lens_this_time
224225
)
@@ -593,6 +594,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
593594
cum_offsets_now = paddle.cumsum(self.max_seq_len - seq_lens_this_time)
594595
token_num = paddle.sum(seq_lens_this_time)
595596
from paddlenlp_ops import get_padding_offset_v2
597+
596598
ids_remove_padding, cum_offsets, padding_offset, cu_seqlens_q, cu_seqlens_k = get_padding_offset_v2(
597599
input_ids, cum_offsets_now, token_num, seq_lens_this_time
598600
)

paddlenlp/experimental/transformers/chatglm/modeling.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
273273
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
274274
token_num = paddle.sum(seq_lens_this_time)
275275
from paddlenlp_ops import get_padding_offset
276+
276277
ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
277278
input_ids, cum_offsets_now, token_num, seq_lens_this_time
278279
)

paddlenlp/experimental/transformers/chatglm_v2/modeling.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
202202
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
203203
token_num = paddle.sum(seq_lens_this_time)
204204
from paddlenlp_ops import get_padding_offset
205+
205206
ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
206207
input_ids, cum_offsets_now, token_num, seq_lens_this_time
207208
)

paddlenlp/experimental/transformers/fused_transformer_layers.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
import paddle
1717
import paddle.distributed as dist
18-
from paddle.framework import LayerHelper, in_dynamic_mode, core
18+
from paddle.framework import LayerHelper, core, in_dynamic_mode
1919
from paddle.incubate.nn.functional import (
2020
fused_layer_norm,
2121
fused_rms_norm,
@@ -28,15 +28,15 @@
2828

2929
from paddlenlp.utils.import_utils import is_paddlenlp_ops_available
3030
from paddlenlp.utils.log import logger
31-
from paddlenlp_ops import rebuild_padding_v2
32-
3331

3432
if not is_paddlenlp_ops_available():
3533
logger.warning(
3634
"The paddlenlp_ops package is not installed. you can read the docs and install it by hand, "
3735
"you can refer to: https://github.com/PaddlePaddle/PaddleNLP/blob/develop/csrc/README.md"
3836
)
3937

38+
from paddlenlp_ops import rebuild_padding_v2
39+
4040
if core.is_compiled_with_cuda():
4141
from paddlenlp_ops import (
4242
dequant_int8,
@@ -1350,8 +1350,8 @@ class FusedBlockMultiTransformer(FusedMultiTransformerBase):
13501350
def __init__(self, config: FusedMultiTransformerConfig):
13511351
super().__init__(config)
13521352
if not core.is_compiled_with_cuda():
1353-
self.cache_k_per_batch_maxs = paddle.full(shape=[10, 6], fill_value=0, dtype='float32')
1354-
self.cache_v_per_batch_maxs = paddle.full(shape=[10, 6], fill_value=0, dtype='float32')
1353+
self.cache_k_per_batch_maxs = paddle.full(shape=[10, 6], fill_value=0, dtype="float32")
1354+
self.cache_v_per_batch_maxs = paddle.full(shape=[10, 6], fill_value=0, dtype="float32")
13551355

13561356
def compute_attn(
13571357
self,

paddlenlp/experimental/transformers/generation_utils.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ def update_model_kwargs_for_generation(self, cache, just_decoder, next_tokens, e
198198
if cache is None:
199199
next_tokens = paddle.where(just_decoder, paddle.full_like(next_tokens, -1), next_tokens)
200200
from paddlenlp_ops import set_stop_value_multi_ends
201+
201202
next_tokens, model_kwargs["stop_flags"] = set_stop_value_multi_ends(
202203
next_tokens, model_kwargs["stop_flags"], eos_token_id, 2
203204
) # multi ends
@@ -296,6 +297,7 @@ def _post_process_(outputs, top_p, temperature, step_idx_ori, model_kwargs):
296297
else:
297298
step_idx = model_kwargs["step_idx"]
298299
from paddlenlp_ops import set_value_by_flags_and_idx
300+
299301
model_kwargs["stop_flags"] = set_value_by_flags_and_idx(
300302
model_kwargs["pre_ids"],
301303
model_kwargs["tgt_ids"],
@@ -308,6 +310,7 @@ def _post_process_(outputs, top_p, temperature, step_idx_ori, model_kwargs):
308310
logits = logits_processors(model_kwargs["all_input_ids"], logits, decoding_step=step_idx_ori)
309311

310312
from paddlenlp_ops import get_token_penalty_multi_scores
313+
311314
logits = get_token_penalty_multi_scores(
312315
model_kwargs["pre_ids"],
313316
logits,
@@ -319,7 +322,7 @@ def _post_process_(outputs, top_p, temperature, step_idx_ori, model_kwargs):
319322
eos_token_id,
320323
)
321324
logits = logits / temperature
322-
325+
323326
# sample
324327
probs = F.softmax(logits)
325328

@@ -340,6 +343,7 @@ def _post_process_(outputs, top_p, temperature, step_idx_ori, model_kwargs):
340343
model_kwargs["all_input_ids"] = paddle.concat([model_kwargs["all_input_ids"], next_tokens], axis=1)
341344

342345
from paddlenlp_ops import save_with_output
346+
343347
save_with_output(
344348
next_tokens,
345349
batch_idx,
@@ -629,6 +633,7 @@ def _post_process_(
629633
):
630634
step_idx = model_kwargs["step_idx"]
631635
from paddlenlp_ops import set_value_by_flags_and_idx_v2
636+
632637
set_value_by_flags_and_idx_v2(
633638
model_kwargs["pre_ids"],
634639
model_kwargs["input_ids"],
@@ -643,6 +648,7 @@ def _post_process_(
643648

644649
# pre-process distribution
645650
from paddlenlp_ops import get_token_penalty_multi_scores_v2
651+
646652
logits = get_token_penalty_multi_scores_v2(
647653
model_kwargs["pre_ids"],
648654
logits,
@@ -669,12 +675,14 @@ def _post_process_(
669675
length_cond = paddle.greater_equal(step_idx, model_kwargs["max_dec_len"])
670676
stop_flags = paddle.logical_or(model_kwargs["stop_flags"], length_cond)
671677
from paddlenlp_ops import set_stop_value_multi_ends_v2
678+
672679
set_stop_value_multi_ends_v2(
673680
next_tokens, stop_flags, model_kwargs["seq_lens_this_time"], eos_token_id, model_kwargs["next_tokens"]
674681
) # multi ends
675682
paddle.assign(stop_flags, model_kwargs["stop_flags"])
676683
# update inputs
677684
from paddlenlp_ops import update_inputs
685+
678686
update_inputs(
679687
stop_flags,
680688
model_kwargs["not_need_stop"],
@@ -687,6 +695,7 @@ def _post_process_(
687695
model_kwargs["is_block_step"],
688696
)
689697
from paddlenlp_ops import save_output
698+
690699
save_output(next_tokens, model_kwargs["not_need_stop"], self.config.tensor_parallel_rank)
691700
return next_tokens
692701

paddlenlp/experimental/transformers/gpt/modeling.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
203203
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
204204
token_num = paddle.sum(seq_lens_this_time)
205205
from paddlenlp_ops import get_padding_offset
206+
206207
ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
207208
input_ids, cum_offsets_now, token_num, seq_lens_this_time
208209
)

paddlenlp/experimental/transformers/llama/modeling.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
346346
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
347347
token_num = paddle.sum(seq_lens_this_time)
348348
from paddlenlp_ops import get_padding_offset
349+
349350
ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
350351
input_ids, cum_offsets_now, token_num, seq_lens_this_time
351352
)
@@ -433,6 +434,7 @@ def forward(
433434
if not is_decoder and pre_caches is not None:
434435
position_offset = 128
435436
from paddlenlp_ops import fused_get_rotary_embedding
437+
436438
new_rope = fused_get_rotary_embedding(
437439
input_ids, position_ids, self.head_dim_shape_tensor, position_offset, theta, True
438440
)
@@ -825,6 +827,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
825827
cum_offsets_now = paddle.cumsum(self.max_seq_len - seq_lens_this_time)
826828
token_num = paddle.sum(seq_lens_this_time)
827829
from paddlenlp_ops import get_padding_offset_v2
830+
828831
ids_remove_padding, cum_offsets, padding_offset, cu_seqlens_q, cu_seqlens_k = get_padding_offset_v2(
829832
input_ids, cum_offsets_now, token_num, seq_lens_this_time
830833
)

paddlenlp/experimental/transformers/opt/modeling.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
147147
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
148148
token_num = paddle.sum(seq_lens_this_time)
149149
from paddlenlp_ops import get_padding_offset
150+
150151
ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
151152
input_ids, cum_offsets_now, token_num, seq_lens_this_time
152153
)

paddlenlp/experimental/transformers/qwen/modeling.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ def remove_padding(self, input_ids, seq_lens_this_time):
239239
cum_offsets_now = paddle.cumsum(paddle.max(seq_lens_this_time) - seq_lens_this_time)
240240
token_num = paddle.sum(seq_lens_this_time)
241241
from paddlenlp_ops import get_padding_offset
242+
242243
ids_remove_padding, cum_offsets, padding_offset = get_padding_offset(
243244
input_ids, cum_offsets_now, token_num, seq_lens_this_time
244245
)
@@ -325,6 +326,7 @@ def forward(
325326
position_offset = 128
326327

327328
from paddlenlp_ops import fused_get_rotary_embedding
329+
328330
new_rope = fused_get_rotary_embedding(
329331
input_ids, position_ids, self.head_dim_shape_tensor, position_offset, theta, True
330332
)

0 commit comments

Comments
 (0)