Skip to content

Commit fd98c27

Browse files
committed
bump deps
1 parent 0d2dca7 commit fd98c27

File tree

4 files changed

+45
-51
lines changed

4 files changed

+45
-51
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ log = "0.4.26"
1616
once_cell = "1.20.3"
1717
serde = { version = "1.0.218", features = ["derive"] }
1818
serde_json = "1.0.140"
19-
tokio = "1.43.0"
19+
tokio = "1.44.0"
2020
tokenizers = { version = "0.21.0", features = ["hf-hub", "http"]}
2121
regex = "1.11.1"
2222
rand = "0.9.0"

src/common.rs

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -34,50 +34,36 @@ fn init_string() -> Vec<&'static str> {
3434
.iter()
3535
.map(|token| {
3636
let token = tokenizer.decode(&[*token], true).unwrap();
37-
let token = serde_json::to_string(&token).unwrap().trim_matches('"').to_string();
37+
let token = serde_json::to_string(&token)
38+
.unwrap()
39+
.trim_matches('"')
40+
.to_string();
3841
let token: &'static str = Box::leak(token.into_boxed_str());
3942
token
4043
})
4144
.collect()
4245
} else {
4346
// fall back to a simple whitespace tokenizer
4447
log::error!("Failed to load the tokenizer, falling back to a simple whitespace tokenizer");
45-
contents.split_whitespace().map(|s|{
46-
let s: &'static str = Box::leak(s.to_string().into_boxed_str());
47-
s}
48-
).collect()
48+
contents
49+
.split_whitespace()
50+
.map(|s| {
51+
let s: &'static str = Box::leak(s.to_string().into_boxed_str());
52+
s
53+
})
54+
.collect()
4955
}
5056
}
5157

5258
#[cfg(test)]
5359
mod tests {
5460
use super::*;
5561
#[test]
56-
fn test_init_string() {
57-
// to be implemented
58-
let tokenizer = tokenizers::Tokenizer::from_pretrained(
59-
"NousResearch/DeepHermes-3-Llama-3-8B-Preview",
60-
None,
61-
)
62-
.expect("Should have been able to load the tokenizer");
63-
let contents = "This is a test";
64-
let tokens = tokenizer
65-
.encode(contents, false)
66-
.unwrap()
67-
.get_ids()
68-
.to_vec();
69-
let clean_tokens: Vec<String> = tokens
70-
.iter()
71-
.map(|token| tokenizer.decode(&[*token], true).unwrap())
72-
.collect();
73-
println!("{:?}", clean_tokens);
74-
}
75-
#[test]
76-
fn test_init_string_template () {
62+
fn test_init_string_template() {
7763
// sanity check that the strings are the same
7864
let baseline = raw_string();
7965
// because the tokens are escaped
80-
let baseline = serde_json::to_string(&baseline)
66+
let baseline = serde_json::to_string(&baseline)
8167
.unwrap()
8268
.trim_matches('"')
8369
.to_string();

src/routes.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ impl Default for Choice {
9191
}
9292

9393
static TEMPLATE: Lazy<String> = Lazy::new(init_template);
94-
static RE: Lazy<regex::Regex> = Lazy::new(|| regex::Regex::new(r"\[INPUT\]|\[MAX_TOKENS\]").unwrap());
94+
static RE: Lazy<regex::Regex> =
95+
Lazy::new(|| regex::Regex::new(r"\[INPUT\]|\[MAX_TOKENS\]").unwrap());
9596

9697
fn init_template() -> String {
9798
let max_tokens = i32::MAX;
@@ -103,13 +104,12 @@ fn init_template() -> String {
103104

104105
fn substitute_template(input: &str, max_tokens: usize, template: Option<&String>) -> String {
105106
let template = template.unwrap_or(&TEMPLATE);
106-
RE.replace_all(template, |caps: &regex::Captures| {
107-
match &caps[0] {
108-
"[INPUT]" => input.to_string(),
109-
"[MAX_TOKENS]" => max_tokens.to_string(),
110-
_ => unreachable!(),
111-
}
112-
}).to_string()
107+
RE.replace_all(template, |caps: &regex::Captures| match &caps[0] {
108+
"[INPUT]" => input.to_string(),
109+
"[MAX_TOKENS]" => max_tokens.to_string(),
110+
_ => unreachable!(),
111+
})
112+
.to_string()
113113
}
114114
// Use the same endpoint to allow the streaming
115115
pub async fn common_completions(
@@ -140,7 +140,7 @@ async fn completions(_req: HttpRequest, payload: Request) -> Result<HttpResponse
140140
} else {
141141
&TOKENIZED_OUTPUT[..max_tokens].concat()
142142
};
143-
substitute_template(&return_string, max_tokens, None)
143+
substitute_template(return_string, max_tokens, None)
144144
} else {
145145
// Use the full output when max_tokens is not specified
146146
let return_string = &MAX_OUTPUT;

src/stream.rs

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use actix_web_lab::sse;
22
use futures_util::stream::Stream;
33
use once_cell::sync::Lazy;
4+
use rand::rngs::ThreadRng;
45
use rand::Rng;
56
use serde::{Deserialize, Serialize};
6-
use tokio::time::Sleep;
77
use std::future::Future;
88
use std::pin::Pin;
99
use std::task::{Context, Poll};
10-
use rand::rngs::ThreadRng;
10+
use tokio::time::Sleep;
1111

1212
use crate::common::MAX_TOKENS;
1313
use crate::routes::Usage;
@@ -58,6 +58,8 @@ impl StreamingChunkResponse {
5858
}
5959
}
6060
}
61+
// TODO
62+
// this can be combined with the one in routes.rs
6163
#[derive(Deserialize, Serialize, Debug, Default)]
6264
struct Choice {
6365
index: i32,
@@ -114,18 +116,20 @@ fn init_template() -> String {
114116
impl Stream for StringsStream<'_> {
115117
type Item = Result<sse::Event, std::convert::Infallible>;
116118

119+
// high level
120+
// Starts with state::Input
121+
// switch to state::Start after a random delay
122+
// Once it reaches the end of the strings, it will switch to state::Usage if log usage is enabled
123+
// After state::Usage, it will switch to state::Done
124+
// If log usage is not enabled, it will switch to state::Done
125+
// Once it reaches state::Done, it will switch to state::Completed
126+
127+
// init a string for faster access
128+
// let response = StreamingChunkResponse::from_string("[INPUT]".to_string());
129+
// let output = serde_json::to_string(&response).unwrap();
117130
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
118131
let this = &mut *self;
119-
// high level
120-
// Starts with state::Start
121-
// Once it reaches the end of the strings, it will switch to state::Usage if log usage is enabled
122-
// After state::Usage, it will switch to state::Done
123-
// If log usage is not enabled, it will switch to state::Done
124-
// Once it reaches state::Done, it will switch to state::Completed
125132

126-
// init a string for faster access
127-
// let response = StreamingChunkResponse::from_string("[INPUT]".to_string());
128-
// let output = serde_json::to_string(&response).unwrap();
129133
if let Some(sleep) = &mut this.sleep {
130134
if Pin::new(sleep).poll(cx).is_pending() {
131135
return Poll::Pending;
@@ -134,19 +138,23 @@ impl Stream for StringsStream<'_> {
134138
}
135139

136140
match this.state {
137-
State:: Input => {
141+
State::Input => {
138142
// Input gives a fake TTFT
139143
// that is your initial delay from the LLM processing the tokens
140144
// this can typically be long
141145
let rand = this.rng.random_range(500..1000);
142-
this.sleep = Some(Box::pin(tokio::time::sleep(tokio::time::Duration::from_millis(rand))));
146+
this.sleep = Some(Box::pin(tokio::time::sleep(
147+
tokio::time::Duration::from_millis(rand),
148+
)));
143149
this.state = State::Start;
144150
Poll::Pending
145151
}
146152
State::Start => {
147153
if this.index < this.max_tokens {
148154
let rand = this.rng.random_range(50..100);
149-
this.sleep = Some(Box::pin(tokio::time::sleep(tokio::time::Duration::from_millis(rand))));
155+
this.sleep = Some(Box::pin(tokio::time::sleep(
156+
tokio::time::Duration::from_millis(rand),
157+
)));
150158
let string_item = &this.strings[this.index];
151159
this.index += 1;
152160
// let chunk = StreamingChunkResponse::from_string(string_item);

0 commit comments

Comments
 (0)