@@ -25,6 +25,12 @@ public data class ChatCompletionRequest(
25
25
*/
26
26
@SerialName(" messages" ) public val messages : List <ChatMessage >,
27
27
28
+ /* *
29
+ * Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high.
30
+ * Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
31
+ */
32
+ @SerialName(" reasoning_effort" ) public val reasoningEffort : Effort ? = null ,
33
+
28
34
/* *
29
35
* What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random,
30
36
* while lower values like 0.2 will make it more focused and deterministic.
@@ -52,12 +58,24 @@ public data class ChatCompletionRequest(
52
58
*/
53
59
@SerialName(" stop" ) public val stop : List <String >? = null ,
54
60
61
+ /* *
62
+ * Whether to store the output of this chat completion request for use in our model distillation or evals products
63
+ */
64
+ @SerialName(" store" ) public val store : Boolean? = null ,
65
+
55
66
/* *
56
67
* The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can
57
68
* return will be (4096 - prompt tokens).
58
69
*/
70
+ @Deprecated(message = " Deprecated in favor of `max_completion_tokens`" )
59
71
@SerialName(" max_tokens" ) public val maxTokens : Int? = null ,
60
72
73
+ /* *
74
+ * An upper bound for the number of tokens that can be generated for a completion,
75
+ * including visible output tokens and reasoning tokens.
76
+ */
77
+ @SerialName(" max_completion_tokens" ) public val maxCompletionTokens : Int? = null ,
78
+
61
79
/* *
62
80
* Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,
63
81
* increasing the model's likelihood to talk about new topics.
@@ -191,6 +209,12 @@ public class ChatCompletionRequestBuilder {
191
209
*/
192
210
public var messages: List <ChatMessage >? = null
193
211
212
+ /* *
213
+ * Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high.
214
+ * Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
215
+ */
216
+ public val reasoningEffort: Effort ? = null
217
+
194
218
/* *
195
219
* What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random,
196
220
* while lower values like 0.2 will make it more focused and deterministic.
@@ -218,12 +242,24 @@ public class ChatCompletionRequestBuilder {
218
242
*/
219
243
public var stop: List <String >? = null
220
244
245
+ /* *
246
+ * Whether to store the output of this chat completion request for use in our model distillation or evals products
247
+ */
248
+ public val store: Boolean? = null
249
+
221
250
/* *
222
251
* The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can
223
252
* return will be (4096 - prompt tokens).
224
253
*/
254
+ @Deprecated(message = " Deprecated in favor of `max_completion_tokens`" )
225
255
public var maxTokens: Int? = null
226
256
257
+ /* *
258
+ * An upper bound for the number of tokens that can be generated for a completion,
259
+ * including visible output tokens and reasoning tokens.
260
+ */
261
+ public val maxCompletionTokens: Int? = null
262
+
227
263
/* *
228
264
* Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,
229
265
* increasing the model's likelihood to talk about new topics.
@@ -354,11 +390,14 @@ public class ChatCompletionRequestBuilder {
354
390
public fun build (): ChatCompletionRequest = ChatCompletionRequest (
355
391
model = requireNotNull(model) { " model is required" },
356
392
messages = requireNotNull(messages) { " messages is required" },
393
+ reasoningEffort = reasoningEffort,
357
394
temperature = temperature,
358
395
topP = topP,
359
396
n = n,
360
397
stop = stop,
398
+ store = store,
361
399
maxTokens = maxTokens,
400
+ maxCompletionTokens = maxCompletionTokens,
362
401
presencePenalty = presencePenalty,
363
402
frequencyPenalty = frequencyPenalty,
364
403
logitBias = logitBias,
0 commit comments