| package dto |
|
|
| import ( |
| "encoding/json" |
| "strings" |
|
|
| "github.com/QuantumNous/new-api/types" |
|
|
| "github.com/gin-gonic/gin" |
| ) |
|
|
| type AudioRequest struct { |
| Model string `json:"model"` |
| Input string `json:"input"` |
| Voice string `json:"voice"` |
| Instructions string `json:"instructions,omitempty"` |
| ResponseFormat string `json:"response_format,omitempty"` |
| Speed *float64 `json:"speed,omitempty"` |
| StreamFormat string `json:"stream_format,omitempty"` |
| Metadata json.RawMessage `json:"metadata,omitempty"` |
| } |
|
|
| func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta { |
| meta := &types.TokenCountMeta{ |
| CombineText: r.Input, |
| TokenType: types.TokenTypeTextNumber, |
| } |
| if strings.Contains(r.Model, "gpt") { |
| meta.TokenType = types.TokenTypeTokenizer |
| } |
| return meta |
| } |
|
|
| func (r *AudioRequest) IsStream(c *gin.Context) bool { |
| return r.StreamFormat == "sse" |
| } |
|
|
| func (r *AudioRequest) SetModelName(modelName string) { |
| if modelName != "" { |
| r.Model = modelName |
| } |
| } |
|
|
| type AudioResponse struct { |
| Text string `json:"text"` |
| } |
|
|
| type WhisperVerboseJSONResponse struct { |
| Task string `json:"task,omitempty"` |
| Language string `json:"language,omitempty"` |
| Duration float64 `json:"duration,omitempty"` |
| Text string `json:"text,omitempty"` |
| Segments []Segment `json:"segments,omitempty"` |
| } |
|
|
| type Segment struct { |
| Id int `json:"id"` |
| Seek int `json:"seek"` |
| Start float64 `json:"start"` |
| End float64 `json:"end"` |
| Text string `json:"text"` |
| Tokens []int `json:"tokens"` |
| Temperature float64 `json:"temperature"` |
| AvgLogprob float64 `json:"avg_logprob"` |
| CompressionRatio float64 `json:"compression_ratio"` |
| NoSpeechProb float64 `json:"no_speech_prob"` |
| } |
|
|