|
385 | 385 | "import json\n",
|
386 | 386 | "from pydantic import BaseModel, Field\n",
|
387 | 387 | "\n",
|
| 388 | + "from transformers import AutoTokenizer\n", |
| 389 | + "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Meta-Llama-3.1-8B-Instruct\")\n", |
| 390 | + "\n", |
| 391 | + "prompt = lambda x : tokenizer.apply_chat_template(\n", |
| 392 | + " x, tokenize=False, add_generation_prompt=True\n", |
| 393 | + ")\n", |
388 | 394 | "\n",
|
389 | 395 | "# Define the schema using Pydantic\n",
|
390 | 396 | "class CapitalInfo(BaseModel):\n",
|
|
396 | 402 | "response = requests.post(\n",
|
397 | 403 | " f\"http://localhost:{port}/generate\",\n",
|
398 | 404 | " json={\n",
|
399 |
| - " \"text\": \"Here is the information of the capital of France in the JSON format.\\n\",\n", |
| 405 | + " \"text\": prompt(\"Here is the information of the capital of France in the JSON format.\\n\"),\n", |
400 | 406 | " \"sampling_params\": {\n",
|
401 | 407 | " \"temperature\": 0,\n",
|
402 | 408 | " \"max_new_tokens\": 64,\n",
|
|
441 | 447 | "response = requests.post(\n",
|
442 | 448 | " f\"http://localhost:{port}/generate\",\n",
|
443 | 449 | " json={\n",
|
444 |
| - " \"text\": \"Here is the information of the capital of France in the JSON format.\\n\",\n", |
| 450 | + " \"text\": prompt(\"Here is the information of the capital of France in the JSON format.\\n\"),\n", |
445 | 451 | " \"sampling_params\": {\n",
|
446 | 452 | " \"temperature\": 0,\n",
|
447 | 453 | " \"max_new_tokens\": 64,\n",
|
|
469 | 475 | "response = requests.post(\n",
|
470 | 476 | " f\"http://localhost:{port}/generate\",\n",
|
471 | 477 | " json={\n",
|
472 |
| - " \"text\": \"Give me the information of the capital of France.\",\n", |
| 478 | + " \"text\": prompt(\"Give me the information of the capital of France.\"),\n", |
473 | 479 | " \"sampling_params\": {\n",
|
474 | 480 | " \"max_new_tokens\": 128,\n",
|
475 | 481 | " \"temperature\": 0,\n",
|
|
506 | 512 | "response = requests.post(\n",
|
507 | 513 | " f\"http://localhost:{port}/generate\",\n",
|
508 | 514 | " json={\n",
|
509 |
| - " \"text\": \"Paris is the capital of\",\n", |
| 515 | + " \"text\": prompt(\"Paris is the capital of\"),\n", |
510 | 516 | " \"sampling_params\": {\n",
|
511 | 517 | " \"temperature\": 0,\n",
|
512 | 518 | " \"max_new_tokens\": 64,\n",
|
|
530 | 536 | "metadata": {},
|
531 | 537 | "outputs": [],
|
532 | 538 | "source": [
|
533 |
| - "from transformers import AutoTokenizer\n", |
534 |
| - "\n", |
535 |
| - "# generate an answer\n", |
536 |
| - "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Meta-Llama-3.1-8B-Instruct\")\n", |
537 |
| - "\n", |
538 |
| - "text = tokenizer.apply_chat_template(\n", |
539 |
| - " messages, tokenize=False, add_generation_prompt=True\n", |
540 |
| - ")\n", |
541 | 539 | "payload = {\n",
|
542 |
| - " \"text\": text,\n", |
| 540 | + " \"text\": prompt(messages),\n", |
543 | 541 | " \"sampling_params\": {\n",
|
544 | 542 | " \"structural_tag\": json.dumps(\n",
|
545 | 543 | " {\n",
|
|
0 commit comments