Remove token in token out in Native API (sgl-project#5967)

zhaochenyang20 · tarinkk · commit 67672c1121e9 · 2025-05-09T12:14:21.000Z
diff --git a/docs/backend/native_api.ipynb b/docs/backend/native_api.ipynb
@@ -438,76 +438,6 @@
    "source": [
     "terminate_process(expert_record_server_process)"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Skip Tokenizer and Detokenizer\n",
-    "\n",
-    "SGLang Runtime also supports skip tokenizer and detokenizer. This is useful in cases like integrating with RLHF workflow."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tokenizer_free_server_process, port = launch_server_cmd(\n",
-    "    \"\"\"\n",
-    "python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct --skip-tokenizer-init\n",
-    "\"\"\"\n",
-    ")\n",
-    "\n",
-    "wait_for_server(f\"http://localhost:{port}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from transformers import AutoTokenizer\n",
-    "\n",
-    "tokenizer = AutoTokenizer.from_pretrained(\"qwen/qwen2.5-0.5b-instruct\")\n",
-    "\n",
-    "input_text = \"What is the capital of France?\"\n",
-    "\n",
-    "input_tokens = tokenizer.encode(input_text)\n",
-    "print_highlight(f\"Input Text: {input_text}\")\n",
-    "print_highlight(f\"Tokenized Input: {input_tokens}\")\n",
-    "\n",
-    "response = requests.post(\n",
-    "    f\"http://localhost:{port}/generate\",\n",
-    "    json={\n",
-    "        \"input_ids\": input_tokens,\n",
-    "        \"sampling_params\": {\n",
-    "            \"temperature\": 0,\n",
-    "            \"max_new_tokens\": 256,\n",
-    "            \"stop_token_ids\": [tokenizer.eos_token_id],\n",
-    "        },\n",
-    "        \"stream\": False,\n",
-    "    },\n",
-    ")\n",
-    "output = response.json()\n",
-    "output_tokens = output[\"output_ids\"]\n",
-    "\n",
-    "output_text = tokenizer.decode(output_tokens, skip_special_tokens=False)\n",
-    "print_highlight(f\"Tokenized Output: {output_tokens}\")\n",
-    "print_highlight(f\"Decoded Output: {output_text}\")\n",
-    "print_highlight(f\"Output Text: {output['meta_info']['finish_reason']}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "terminate_process(tokenizer_free_server_process)"
-   ]
   }
  ],
  "metadata": {