@@ -1577,14 +1577,6 @@ class TranscriptionRequest(OpenAIBaseModel):
1577
1577
"""
1578
1578
1579
1579
## TODO (varun) : Support if set to 0, certain thresholds are met !!
1580
- temperature : float = Field (default = 0.0 )
1581
- """The sampling temperature, between 0 and 1.
1582
-
1583
- Higher values like 0.8 will make the output more random, while lower values
1584
- like 0.2 will make it more focused / deterministic. If set to 0, the model
1585
- will use [log probability](https://en.wikipedia.org/wiki/Log_probability)
1586
- to automatically increase the temperature until certain thresholds are hit.
1587
- """
1588
1580
1589
1581
timestamp_granularities : list [Literal ["word" , "segment" ]] = Field (
1590
1582
alias = "timestamp_granularities[]" , default = [])
@@ -1596,6 +1588,7 @@ class TranscriptionRequest(OpenAIBaseModel):
1596
1588
timestamps incurs additional latency.
1597
1589
"""
1598
1590
1591
+ # doc: begin-transcription-extra-params
1599
1592
stream : Optional [bool ] = False
1600
1593
"""Custom field not present in the original OpenAI definition. When set,
1601
1594
it will enable output to be streamed in a similar fashion as the Chat
@@ -1604,10 +1597,51 @@ class TranscriptionRequest(OpenAIBaseModel):
1604
1597
# Flattened stream option to simplify form data.
1605
1598
stream_include_usage : Optional [bool ] = False
1606
1599
stream_continuous_usage_stats : Optional [bool ] = False
1600
+ # doc: end-transcription-extra-params
1601
+
1602
+ # doc: begin-transcription-sampling-params
1603
+ temperature : float = Field (default = 0.0 )
1604
+ """The sampling temperature, between 0 and 1.
1605
+
1606
+ Higher values like 0.8 will make the output more random, while lower values
1607
+ like 0.2 will make it more focused / deterministic. If set to 0, the model
1608
+ will use [log probability](https://en.wikipedia.org/wiki/Log_probability)
1609
+ to automatically increase the temperature until certain thresholds are hit.
1610
+ """
1611
+
1612
+ top_p : Optional [float ] = None
1613
+ """Enables nucleus (top-p) sampling, where tokens are selected from the
1614
+ smallest possible set whose cumulative probability exceeds `p`.
1615
+ """
1616
+
1617
+ top_k : Optional [int ] = None
1618
+ """Limits sampling to the `k` most probable tokens at each step."""
1619
+
1620
+ min_p : Optional [float ] = None
1621
+ """Filters out tokens with a probability lower than `min_p`, ensuring a
1622
+ minimum likelihood threshold during sampling.
1623
+ """
1624
+
1625
+ seed : Optional [int ] = Field (None , ge = _LONG_INFO .min , le = _LONG_INFO .max )
1626
+ """The seed to use for sampling."""
1627
+
1628
+ frequency_penalty : Optional [float ] = 0.0
1629
+ """The frequency penalty to use for sampling."""
1630
+
1631
+ repetition_penalty : Optional [float ] = None
1632
+ """The repetition penalty to use for sampling."""
1633
+
1634
+ presence_penalty : Optional [float ] = 0.0
1635
+ """The presence penalty to use for sampling."""
1636
+ # doc: end-transcription-sampling-params
1607
1637
1608
1638
# Default sampling parameters for transcription requests.
1609
1639
_DEFAULT_SAMPLING_PARAMS : dict = {
1610
- "temperature" : 0 ,
1640
+ "repetition_penalty" : 1.0 ,
1641
+ "temperature" : 1.0 ,
1642
+ "top_p" : 1.0 ,
1643
+ "top_k" : - 1 ,
1644
+ "min_p" : 0.0 ,
1611
1645
}
1612
1646
1613
1647
def to_sampling_params (
@@ -1619,13 +1653,35 @@ def to_sampling_params(
1619
1653
1620
1654
if default_sampling_params is None :
1621
1655
default_sampling_params = {}
1656
+
1622
1657
# Default parameters
1623
1658
if (temperature := self .temperature ) is None :
1624
1659
temperature = default_sampling_params .get (
1625
1660
"temperature" , self ._DEFAULT_SAMPLING_PARAMS ["temperature" ])
1661
+ if (top_p := self .top_p ) is None :
1662
+ top_p = default_sampling_params .get (
1663
+ "top_p" , self ._DEFAULT_SAMPLING_PARAMS ["top_p" ])
1664
+ if (top_k := self .top_k ) is None :
1665
+ top_k = default_sampling_params .get (
1666
+ "top_k" , self ._DEFAULT_SAMPLING_PARAMS ["top_k" ])
1667
+ if (min_p := self .min_p ) is None :
1668
+ min_p = default_sampling_params .get (
1669
+ "min_p" , self ._DEFAULT_SAMPLING_PARAMS ["min_p" ])
1670
+
1671
+ if (repetition_penalty := self .repetition_penalty ) is None :
1672
+ repetition_penalty = default_sampling_params .get (
1673
+ "repetition_penalty" ,
1674
+ self ._DEFAULT_SAMPLING_PARAMS ["repetition_penalty" ])
1626
1675
1627
1676
return SamplingParams .from_optional (temperature = temperature ,
1628
1677
max_tokens = max_tokens ,
1678
+ seed = self .seed ,
1679
+ top_p = top_p ,
1680
+ top_k = top_k ,
1681
+ min_p = min_p ,
1682
+ frequency_penalty = self .frequency_penalty ,
1683
+ repetition_penalty = repetition_penalty ,
1684
+ presence_penalty = self .presence_penalty ,
1629
1685
output_kind = RequestOutputKind .DELTA
1630
1686
if self .stream \
1631
1687
else RequestOutputKind .FINAL_ONLY )
0 commit comments