|
438 | 438 | "source": [
|
439 | 439 | "terminate_process(expert_record_server_process)"
|
440 | 440 | ]
|
441 |
| - }, |
442 |
| - { |
443 |
| - "cell_type": "markdown", |
444 |
| - "metadata": {}, |
445 |
| - "source": [ |
446 |
| - "## Skip Tokenizer and Detokenizer\n", |
447 |
| - "\n", |
448 |
| - "SGLang Runtime also supports skip tokenizer and detokenizer. This is useful in cases like integrating with RLHF workflow." |
449 |
| - ] |
450 |
| - }, |
451 |
| - { |
452 |
| - "cell_type": "code", |
453 |
| - "execution_count": null, |
454 |
| - "metadata": {}, |
455 |
| - "outputs": [], |
456 |
| - "source": [ |
457 |
| - "tokenizer_free_server_process, port = launch_server_cmd(\n", |
458 |
| - " \"\"\"\n", |
459 |
| - "python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct --skip-tokenizer-init\n", |
460 |
| - "\"\"\"\n", |
461 |
| - ")\n", |
462 |
| - "\n", |
463 |
| - "wait_for_server(f\"http://localhost:{port}\")" |
464 |
| - ] |
465 |
| - }, |
466 |
| - { |
467 |
| - "cell_type": "code", |
468 |
| - "execution_count": null, |
469 |
| - "metadata": {}, |
470 |
| - "outputs": [], |
471 |
| - "source": [ |
472 |
| - "from transformers import AutoTokenizer\n", |
473 |
| - "\n", |
474 |
| - "tokenizer = AutoTokenizer.from_pretrained(\"qwen/qwen2.5-0.5b-instruct\")\n", |
475 |
| - "\n", |
476 |
| - "input_text = \"What is the capital of France?\"\n", |
477 |
| - "\n", |
478 |
| - "input_tokens = tokenizer.encode(input_text)\n", |
479 |
| - "print_highlight(f\"Input Text: {input_text}\")\n", |
480 |
| - "print_highlight(f\"Tokenized Input: {input_tokens}\")\n", |
481 |
| - "\n", |
482 |
| - "response = requests.post(\n", |
483 |
| - " f\"http://localhost:{port}/generate\",\n", |
484 |
| - " json={\n", |
485 |
| - " \"input_ids\": input_tokens,\n", |
486 |
| - " \"sampling_params\": {\n", |
487 |
| - " \"temperature\": 0,\n", |
488 |
| - " \"max_new_tokens\": 256,\n", |
489 |
| - " \"stop_token_ids\": [tokenizer.eos_token_id],\n", |
490 |
| - " },\n", |
491 |
| - " \"stream\": False,\n", |
492 |
| - " },\n", |
493 |
| - ")\n", |
494 |
| - "output = response.json()\n", |
495 |
| - "output_tokens = output[\"output_ids\"]\n", |
496 |
| - "\n", |
497 |
| - "output_text = tokenizer.decode(output_tokens, skip_special_tokens=False)\n", |
498 |
| - "print_highlight(f\"Tokenized Output: {output_tokens}\")\n", |
499 |
| - "print_highlight(f\"Decoded Output: {output_text}\")\n", |
500 |
| - "print_highlight(f\"Output Text: {output['meta_info']['finish_reason']}\")" |
501 |
| - ] |
502 |
| - }, |
503 |
| - { |
504 |
| - "cell_type": "code", |
505 |
| - "execution_count": null, |
506 |
| - "metadata": {}, |
507 |
| - "outputs": [], |
508 |
| - "source": [ |
509 |
| - "terminate_process(tokenizer_free_server_process)" |
510 |
| - ] |
511 | 441 | }
|
512 | 442 | ],
|
513 | 443 | "metadata": {
|
|
0 commit comments