493 lines
19 KiB
Go
493 lines
19 KiB
Go
package openai
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"net/http"
|
|
|
|
"github.com/sashabaranov/go-openai/jsonschema"
|
|
)
|
|
|
|
// Chat message role defined by the OpenAI API.
|
|
const (
|
|
ChatMessageRoleSystem = "system"
|
|
ChatMessageRoleUser = "user"
|
|
ChatMessageRoleAssistant = "assistant"
|
|
ChatMessageRoleFunction = "function"
|
|
ChatMessageRoleTool = "tool"
|
|
ChatMessageRoleDeveloper = "developer"
|
|
)
|
|
|
|
const chatCompletionsSuffix = "/chat/completions"
|
|
|
|
var (
|
|
ErrChatCompletionInvalidModel = errors.New("this model is not supported with this method, please use CreateCompletion client method instead") //nolint:lll
|
|
ErrChatCompletionStreamNotSupported = errors.New("streaming is not supported with this method, please use CreateChatCompletionStream") //nolint:lll
|
|
ErrContentFieldsMisused = errors.New("can't use both Content and MultiContent properties simultaneously")
|
|
)
|
|
|
|
type Hate struct {
|
|
Filtered bool `json:"filtered"`
|
|
Severity string `json:"severity,omitempty"`
|
|
}
|
|
type SelfHarm struct {
|
|
Filtered bool `json:"filtered"`
|
|
Severity string `json:"severity,omitempty"`
|
|
}
|
|
type Sexual struct {
|
|
Filtered bool `json:"filtered"`
|
|
Severity string `json:"severity,omitempty"`
|
|
}
|
|
type Violence struct {
|
|
Filtered bool `json:"filtered"`
|
|
Severity string `json:"severity,omitempty"`
|
|
}
|
|
|
|
type JailBreak struct {
|
|
Filtered bool `json:"filtered"`
|
|
Detected bool `json:"detected"`
|
|
}
|
|
|
|
type Profanity struct {
|
|
Filtered bool `json:"filtered"`
|
|
Detected bool `json:"detected"`
|
|
}
|
|
|
|
type ContentFilterResults struct {
|
|
Hate Hate `json:"hate,omitempty"`
|
|
SelfHarm SelfHarm `json:"self_harm,omitempty"`
|
|
Sexual Sexual `json:"sexual,omitempty"`
|
|
Violence Violence `json:"violence,omitempty"`
|
|
JailBreak JailBreak `json:"jailbreak,omitempty"`
|
|
Profanity Profanity `json:"profanity,omitempty"`
|
|
}
|
|
|
|
type PromptAnnotation struct {
|
|
PromptIndex int `json:"prompt_index,omitempty"`
|
|
ContentFilterResults ContentFilterResults `json:"content_filter_results,omitempty"`
|
|
}
|
|
|
|
type ImageURLDetail string
|
|
|
|
const (
|
|
ImageURLDetailHigh ImageURLDetail = "high"
|
|
ImageURLDetailLow ImageURLDetail = "low"
|
|
ImageURLDetailAuto ImageURLDetail = "auto"
|
|
)
|
|
|
|
type ChatMessageImageURL struct {
|
|
URL string `json:"url,omitempty"`
|
|
Detail ImageURLDetail `json:"detail,omitempty"`
|
|
}
|
|
|
|
type ChatMessagePartType string
|
|
|
|
const (
|
|
ChatMessagePartTypeText ChatMessagePartType = "text"
|
|
ChatMessagePartTypeImageURL ChatMessagePartType = "image_url"
|
|
)
|
|
|
|
type ChatMessagePart struct {
|
|
Type ChatMessagePartType `json:"type,omitempty"`
|
|
Text string `json:"text,omitempty"`
|
|
ImageURL *ChatMessageImageURL `json:"image_url,omitempty"`
|
|
}
|
|
|
|
type ChatCompletionMessage struct {
|
|
Role string `json:"role"`
|
|
Content string `json:"content,omitempty"`
|
|
Refusal string `json:"refusal,omitempty"`
|
|
MultiContent []ChatMessagePart
|
|
|
|
// This property isn't in the official documentation, but it's in
|
|
// the documentation for the official library for python:
|
|
// - https://github.com/openai/openai-python/blob/main/chatml.md
|
|
// - https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
|
Name string `json:"name,omitempty"`
|
|
|
|
// This property is used for the "reasoning" feature supported by deepseek-reasoner
|
|
// which is not in the official documentation.
|
|
// the doc from deepseek:
|
|
// - https://api-docs.deepseek.com/api/create-chat-completion#responses
|
|
ReasoningContent string `json:"reasoning_content,omitempty"`
|
|
|
|
FunctionCall *FunctionCall `json:"function_call,omitempty"`
|
|
|
|
// For Role=assistant prompts this may be set to the tool calls generated by the model, such as function calls.
|
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
|
|
|
// For Role=tool prompts this should be set to the ID given in the assistant's prior request to call a tool.
|
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
|
}
|
|
|
|
func (m ChatCompletionMessage) MarshalJSON() ([]byte, error) {
|
|
if m.Content != "" && m.MultiContent != nil {
|
|
return nil, ErrContentFieldsMisused
|
|
}
|
|
if len(m.MultiContent) > 0 {
|
|
msg := struct {
|
|
Role string `json:"role"`
|
|
Content string `json:"-"`
|
|
Refusal string `json:"refusal,omitempty"`
|
|
MultiContent []ChatMessagePart `json:"content,omitempty"`
|
|
Name string `json:"name,omitempty"`
|
|
ReasoningContent string `json:"reasoning_content,omitempty"`
|
|
FunctionCall *FunctionCall `json:"function_call,omitempty"`
|
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
|
}(m)
|
|
return json.Marshal(msg)
|
|
}
|
|
|
|
msg := struct {
|
|
Role string `json:"role"`
|
|
Content string `json:"content,omitempty"`
|
|
Refusal string `json:"refusal,omitempty"`
|
|
MultiContent []ChatMessagePart `json:"-"`
|
|
Name string `json:"name,omitempty"`
|
|
ReasoningContent string `json:"reasoning_content,omitempty"`
|
|
FunctionCall *FunctionCall `json:"function_call,omitempty"`
|
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
|
}(m)
|
|
return json.Marshal(msg)
|
|
}
|
|
|
|
func (m *ChatCompletionMessage) UnmarshalJSON(bs []byte) error {
|
|
msg := struct {
|
|
Role string `json:"role"`
|
|
Content string `json:"content"`
|
|
Refusal string `json:"refusal,omitempty"`
|
|
MultiContent []ChatMessagePart
|
|
Name string `json:"name,omitempty"`
|
|
ReasoningContent string `json:"reasoning_content,omitempty"`
|
|
FunctionCall *FunctionCall `json:"function_call,omitempty"`
|
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
|
}{}
|
|
|
|
if err := json.Unmarshal(bs, &msg); err == nil {
|
|
*m = ChatCompletionMessage(msg)
|
|
return nil
|
|
}
|
|
multiMsg := struct {
|
|
Role string `json:"role"`
|
|
Content string
|
|
Refusal string `json:"refusal,omitempty"`
|
|
MultiContent []ChatMessagePart `json:"content"`
|
|
Name string `json:"name,omitempty"`
|
|
ReasoningContent string `json:"reasoning_content,omitempty"`
|
|
FunctionCall *FunctionCall `json:"function_call,omitempty"`
|
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
|
}{}
|
|
if err := json.Unmarshal(bs, &multiMsg); err != nil {
|
|
return err
|
|
}
|
|
*m = ChatCompletionMessage(multiMsg)
|
|
return nil
|
|
}
|
|
|
|
type ToolCall struct {
|
|
// Index is not nil only in chat completion chunk object
|
|
Index *int `json:"index,omitempty"`
|
|
ID string `json:"id,omitempty"`
|
|
Type ToolType `json:"type"`
|
|
Function FunctionCall `json:"function"`
|
|
}
|
|
|
|
type FunctionCall struct {
|
|
Name string `json:"name,omitempty"`
|
|
// call function with arguments in JSON format
|
|
Arguments string `json:"arguments,omitempty"`
|
|
}
|
|
|
|
type ChatCompletionResponseFormatType string
|
|
|
|
const (
|
|
ChatCompletionResponseFormatTypeJSONObject ChatCompletionResponseFormatType = "json_object"
|
|
ChatCompletionResponseFormatTypeJSONSchema ChatCompletionResponseFormatType = "json_schema"
|
|
ChatCompletionResponseFormatTypeText ChatCompletionResponseFormatType = "text"
|
|
)
|
|
|
|
type ChatCompletionResponseFormat struct {
|
|
Type ChatCompletionResponseFormatType `json:"type,omitempty"`
|
|
JSONSchema *ChatCompletionResponseFormatJSONSchema `json:"json_schema,omitempty"`
|
|
}
|
|
|
|
type ChatCompletionResponseFormatJSONSchema struct {
|
|
Name string `json:"name"`
|
|
Description string `json:"description,omitempty"`
|
|
Schema json.Marshaler `json:"schema"`
|
|
Strict bool `json:"strict"`
|
|
}
|
|
|
|
func (r *ChatCompletionResponseFormatJSONSchema) UnmarshalJSON(data []byte) error {
|
|
type rawJSONSchema struct {
|
|
Name string `json:"name"`
|
|
Description string `json:"description,omitempty"`
|
|
Schema json.RawMessage `json:"schema"`
|
|
Strict bool `json:"strict"`
|
|
}
|
|
var raw rawJSONSchema
|
|
if err := json.Unmarshal(data, &raw); err != nil {
|
|
return err
|
|
}
|
|
r.Name = raw.Name
|
|
r.Description = raw.Description
|
|
r.Strict = raw.Strict
|
|
if len(raw.Schema) > 0 && string(raw.Schema) != "null" {
|
|
var d jsonschema.Definition
|
|
err := json.Unmarshal(raw.Schema, &d)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
r.Schema = &d
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ChatCompletionRequestExtensions contains third-party OpenAI API extensions
|
|
// (e.g., vendor-specific implementations like vLLM).
|
|
type ChatCompletionRequestExtensions struct {
|
|
// GuidedChoice is a vLLM-specific extension that restricts the model's output
|
|
// to one of the predefined string choices provided in this field. This feature
|
|
// is used to constrain the model's responses to a controlled set of options,
|
|
// ensuring predictable and consistent outputs in scenarios where specific
|
|
// choices are required.
|
|
GuidedChoice []string `json:"guided_choice,omitempty"`
|
|
}
|
|
|
|
// ChatCompletionRequest represents a request structure for chat completion API.
|
|
type ChatCompletionRequest struct {
|
|
Model string `json:"model"`
|
|
Messages []ChatCompletionMessage `json:"messages"`
|
|
// MaxTokens The maximum number of tokens that can be generated in the chat completion.
|
|
// This value can be used to control costs for text generated via API.
|
|
// Deprecated: use MaxCompletionTokens. Not compatible with o1-series models.
|
|
// refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
|
|
MaxTokens int `json:"max_tokens,omitempty"`
|
|
// MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
|
|
// including visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning
|
|
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
|
|
Temperature float32 `json:"temperature,omitempty"`
|
|
TopP float32 `json:"top_p,omitempty"`
|
|
N int `json:"n,omitempty"`
|
|
Stream bool `json:"stream,omitempty"`
|
|
Stop []string `json:"stop,omitempty"`
|
|
PresencePenalty float32 `json:"presence_penalty,omitempty"`
|
|
ResponseFormat *ChatCompletionResponseFormat `json:"response_format,omitempty"`
|
|
Seed *int `json:"seed,omitempty"`
|
|
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
|
|
// LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.
|
|
// incorrect: `"logit_bias":{"You": 6}`, correct: `"logit_bias":{"1639": 6}`
|
|
// refs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias
|
|
LogitBias map[string]int `json:"logit_bias,omitempty"`
|
|
// LogProbs indicates whether to return log probabilities of the output tokens or not.
|
|
// If true, returns the log probabilities of each output token returned in the content of message.
|
|
// This option is currently not available on the gpt-4-vision-preview model.
|
|
LogProbs bool `json:"logprobs,omitempty"`
|
|
// TopLogProbs is an integer between 0 and 5 specifying the number of most likely tokens to return at each
|
|
// token position, each with an associated log probability.
|
|
// logprobs must be set to true if this parameter is used.
|
|
TopLogProbs int `json:"top_logprobs,omitempty"`
|
|
User string `json:"user,omitempty"`
|
|
// Deprecated: use Tools instead.
|
|
Functions []FunctionDefinition `json:"functions,omitempty"`
|
|
// Deprecated: use ToolChoice instead.
|
|
FunctionCall any `json:"function_call,omitempty"`
|
|
Tools []Tool `json:"tools,omitempty"`
|
|
// This can be either a string or an ToolChoice object.
|
|
ToolChoice any `json:"tool_choice,omitempty"`
|
|
// Options for streaming response. Only set this when you set stream: true.
|
|
StreamOptions *StreamOptions `json:"stream_options,omitempty"`
|
|
// Disable the default behavior of parallel tool calls by setting it: false.
|
|
ParallelToolCalls any `json:"parallel_tool_calls,omitempty"`
|
|
// Store can be set to true to store the output of this completion request for use in distillations and evals.
|
|
// https://platform.openai.com/docs/api-reference/chat/create#chat-create-store
|
|
Store bool `json:"store,omitempty"`
|
|
// Controls effort on reasoning for reasoning models. It can be set to "low", "medium", or "high".
|
|
ReasoningEffort string `json:"reasoning_effort,omitempty"`
|
|
// Metadata to store with the completion.
|
|
Metadata map[string]string `json:"metadata,omitempty"`
|
|
// Configuration for a predicted output.
|
|
Prediction *Prediction `json:"prediction,omitempty"`
|
|
// ChatTemplateKwargs provides a way to add non-standard parameters to the request body.
|
|
// Additional kwargs to pass to the template renderer. Will be accessible by the chat template.
|
|
// Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false}
|
|
// https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes
|
|
ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"`
|
|
// Specifies the latency tier to use for processing the request.
|
|
ServiceTier ServiceTier `json:"service_tier,omitempty"`
|
|
// A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.
|
|
// The IDs should be a string that uniquely identifies each user.
|
|
// We recommend hashing their username or email address, in order to avoid sending us any identifying information.
|
|
// https://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier
|
|
SafetyIdentifier string `json:"safety_identifier,omitempty"`
|
|
// Embedded struct for non-OpenAI extensions
|
|
ChatCompletionRequestExtensions
|
|
}
|
|
|
|
type StreamOptions struct {
|
|
// If set, an additional chunk will be streamed before the data: [DONE] message.
|
|
// The usage field on this chunk shows the token usage statistics for the entire request,
|
|
// and the choices field will always be an empty array.
|
|
// All other chunks will also include a usage field, but with a null value.
|
|
IncludeUsage bool `json:"include_usage,omitempty"`
|
|
}
|
|
|
|
type ToolType string
|
|
|
|
const (
|
|
ToolTypeFunction ToolType = "function"
|
|
)
|
|
|
|
type Tool struct {
|
|
Type ToolType `json:"type"`
|
|
Function *FunctionDefinition `json:"function,omitempty"`
|
|
}
|
|
|
|
type ToolChoice struct {
|
|
Type ToolType `json:"type"`
|
|
Function ToolFunction `json:"function,omitempty"`
|
|
}
|
|
|
|
type ToolFunction struct {
|
|
Name string `json:"name"`
|
|
}
|
|
|
|
type FunctionDefinition struct {
|
|
Name string `json:"name"`
|
|
Description string `json:"description,omitempty"`
|
|
Strict bool `json:"strict,omitempty"`
|
|
// Parameters is an object describing the function.
|
|
// You can pass json.RawMessage to describe the schema,
|
|
// or you can pass in a struct which serializes to the proper JSON schema.
|
|
// The jsonschema package is provided for convenience, but you should
|
|
// consider another specialized library if you require more complex schemas.
|
|
Parameters any `json:"parameters"`
|
|
}
|
|
|
|
// Deprecated: use FunctionDefinition instead.
|
|
type FunctionDefine = FunctionDefinition
|
|
|
|
type TopLogProbs struct {
|
|
Token string `json:"token"`
|
|
LogProb float64 `json:"logprob"`
|
|
Bytes []byte `json:"bytes,omitempty"`
|
|
}
|
|
|
|
// LogProb represents the probability information for a token.
|
|
type LogProb struct {
|
|
Token string `json:"token"`
|
|
LogProb float64 `json:"logprob"`
|
|
Bytes []byte `json:"bytes,omitempty"` // Omitting the field if it is null
|
|
// TopLogProbs is a list of the most likely tokens and their log probability, at this token position.
|
|
// In rare cases, there may be fewer than the number of requested top_logprobs returned.
|
|
TopLogProbs []TopLogProbs `json:"top_logprobs"`
|
|
}
|
|
|
|
// LogProbs is the top-level structure containing the log probability information.
|
|
type LogProbs struct {
|
|
// Content is a list of message content tokens with log probability information.
|
|
Content []LogProb `json:"content"`
|
|
}
|
|
|
|
type Prediction struct {
|
|
Content string `json:"content"`
|
|
Type string `json:"type"`
|
|
}
|
|
|
|
type FinishReason string
|
|
|
|
const (
|
|
FinishReasonStop FinishReason = "stop"
|
|
FinishReasonLength FinishReason = "length"
|
|
FinishReasonFunctionCall FinishReason = "function_call"
|
|
FinishReasonToolCalls FinishReason = "tool_calls"
|
|
FinishReasonContentFilter FinishReason = "content_filter"
|
|
FinishReasonNull FinishReason = "null"
|
|
)
|
|
|
|
type ServiceTier string
|
|
|
|
const (
|
|
ServiceTierAuto ServiceTier = "auto"
|
|
ServiceTierDefault ServiceTier = "default"
|
|
ServiceTierFlex ServiceTier = "flex"
|
|
ServiceTierPriority ServiceTier = "priority"
|
|
)
|
|
|
|
func (r FinishReason) MarshalJSON() ([]byte, error) {
|
|
if r == FinishReasonNull || r == "" {
|
|
return []byte("null"), nil
|
|
}
|
|
return []byte(`"` + string(r) + `"`), nil // best effort to not break future API changes
|
|
}
|
|
|
|
type ChatCompletionChoice struct {
|
|
Index int `json:"index"`
|
|
Message ChatCompletionMessage `json:"message"`
|
|
// FinishReason
|
|
// stop: API returned complete message,
|
|
// or a message terminated by one of the stop sequences provided via the stop parameter
|
|
// length: Incomplete model output due to max_tokens parameter or token limit
|
|
// function_call: The model decided to call a function
|
|
// content_filter: Omitted content due to a flag from our content filters
|
|
// null: API response still in progress or incomplete
|
|
FinishReason FinishReason `json:"finish_reason"`
|
|
LogProbs *LogProbs `json:"logprobs,omitempty"`
|
|
ContentFilterResults ContentFilterResults `json:"content_filter_results,omitempty"`
|
|
}
|
|
|
|
// ChatCompletionResponse represents a response structure for chat completion API.
|
|
type ChatCompletionResponse struct {
|
|
ID string `json:"id"`
|
|
Object string `json:"object"`
|
|
Created int64 `json:"created"`
|
|
Model string `json:"model"`
|
|
Choices []ChatCompletionChoice `json:"choices"`
|
|
Usage Usage `json:"usage"`
|
|
SystemFingerprint string `json:"system_fingerprint"`
|
|
PromptFilterResults []PromptFilterResult `json:"prompt_filter_results,omitempty"`
|
|
ServiceTier ServiceTier `json:"service_tier,omitempty"`
|
|
|
|
httpHeader
|
|
}
|
|
|
|
// CreateChatCompletion — API call to Create a completion for the chat message.
|
|
func (c *Client) CreateChatCompletion(
|
|
ctx context.Context,
|
|
request ChatCompletionRequest,
|
|
) (response ChatCompletionResponse, err error) {
|
|
if request.Stream {
|
|
err = ErrChatCompletionStreamNotSupported
|
|
return
|
|
}
|
|
|
|
urlSuffix := chatCompletionsSuffix
|
|
if !checkEndpointSupportsModel(urlSuffix, request.Model) {
|
|
err = ErrChatCompletionInvalidModel
|
|
return
|
|
}
|
|
|
|
reasoningValidator := NewReasoningValidator()
|
|
if err = reasoningValidator.Validate(request); err != nil {
|
|
return
|
|
}
|
|
|
|
req, err := c.newRequest(
|
|
ctx,
|
|
http.MethodPost,
|
|
c.fullURL(urlSuffix, withModel(request.Model)),
|
|
withBody(request),
|
|
)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
err = c.sendRequest(req, &response)
|
|
return
|
|
}
|