|
1 | 1 | import sys
|
2 | 2 | import os
|
3 | 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
4 |
| -from model import ExLlama, ExLlamaConfig |
5 | 4 | from flask import Flask, render_template, request, jsonify
|
6 | 5 | from flask import Response, stream_with_context
|
7 | 6 | from threading import Timer, Lock
|
8 | 7 | import webbrowser
|
9 | 8 | import json
|
10 | 9 | import model_init
|
11 |
| -from session import prepare_sessions, get_initial_session, Session, load_session, new_session, _sessions_dir |
| 10 | +from session import prepare_sessions, get_generator, get_initial_session, Session, load_session, new_session, _sessions_dir |
12 | 11 | import argparse
|
13 | 12 | from tokenizer import ExLlamaTokenizer
|
| 13 | +from model import ExLlama, ExLlamaConfig |
14 | 14 | from waitress import serve
|
15 | 15 |
|
16 | 16 | app = Flask(__name__)
|
@@ -139,12 +139,21 @@ def api_append_block():
|
139 | 139 | data = request.get_json()
|
140 | 140 | session.api_append_block(data)
|
141 | 141 | return json.dumps({"result": "ok"}) + "\n"
|
| 142 | + |
| 143 | +@app.route('/inference', methods=['POST']) |
| 144 | +def inferContextP(): |
| 145 | + prompt = request.form.get('prompt') |
| 146 | + tokens = int(request.form.get('max_tokens', 200)) |
| 147 | + |
| 148 | + outputs = get_generator().generate_simple(prompt, max_new_tokens = tokens) |
| 149 | + print(outputs) |
| 150 | + return outputs |
142 | 151 |
|
143 | 152 | # Load the model
|
144 | 153 |
|
145 | 154 | parser = argparse.ArgumentParser(description="Simple web-based chatbot for ExLlama")
|
146 | 155 | parser.add_argument("-host", "--host", type = str, help = "IP:PORT eg, 0.0.0.0:7862", default = "localhost:5000")
|
147 |
| -parser.add_argument("-sd", "--sessions_dir", type = str, help = "Location for storing user sessions, default: ~/exllama_sessions/", default = "~/exllama_sessions/") |
| 156 | +parser.add_argument("-sd", "--sessions-dir", type = str, help = "Location for storing user sessions, default: ~/exllama_sessions/", default = "~/exllama_sessions/") |
148 | 157 |
|
149 | 158 | model_init.add_args(parser)
|
150 | 159 | args = parser.parse_args()
|
|
0 commit comments