| package functions |
|
|
| import ( |
| "encoding/json" |
| "errors" |
| "fmt" |
| "math/rand" |
| "regexp" |
| "strings" |
| "unicode" |
| "unicode/utf8" |
|
|
| "github.com/mudler/xlog" |
| ) |
|
|
| |
| type ChatMsgPartialException struct { |
| Message string |
| } |
|
|
| func (e *ChatMsgPartialException) Error() string { |
| return e.Message |
| } |
|
|
| |
| type StringRange struct { |
| Begin int |
| End int |
| } |
|
|
| |
| type FindLiteralResult struct { |
| Prelude string |
| Groups []StringRange |
| } |
|
|
| |
| |
| type ChatMsgParser struct { |
| input string |
| isPartial bool |
| pos int |
| healingMarker string |
| content strings.Builder |
| reasoning strings.Builder |
| toolCalls []FuncCallResults |
| } |
|
|
| |
| func NewChatMsgParser(input string, isPartial bool) *ChatMsgParser { |
| |
| healingMarker := generateHealingMarker(input) |
|
|
| return &ChatMsgParser{ |
| input: input, |
| isPartial: isPartial, |
| pos: 0, |
| healingMarker: healingMarker, |
| toolCalls: make([]FuncCallResults, 0), |
| } |
| } |
|
|
| |
| func generateHealingMarker(input string) string { |
| for { |
| id := fmt.Sprintf("%d", rand.Int63()) |
| if !strings.Contains(input, id) { |
| return id |
| } |
| } |
| } |
|
|
| |
| func (p *ChatMsgParser) SetHealingMarker(marker string) { |
| p.healingMarker = marker |
| } |
|
|
| |
| func (p *ChatMsgParser) Input() string { |
| return p.input |
| } |
|
|
| |
| func (p *ChatMsgParser) Pos() int { |
| return p.pos |
| } |
|
|
| |
| func (p *ChatMsgParser) IsPartial() bool { |
| return p.isPartial |
| } |
|
|
| |
| func (p *ChatMsgParser) HealingMarker() string { |
| return p.healingMarker |
| } |
|
|
| |
| func (p *ChatMsgParser) MoveTo(pos int) error { |
| if pos < 0 || pos > len(p.input) { |
| return fmt.Errorf("invalid position: %d (input length: %d)", pos, len(p.input)) |
| } |
| p.pos = pos |
| return nil |
| } |
|
|
| |
| func (p *ChatMsgParser) MoveBack(n int) error { |
| if p.pos < n { |
| return fmt.Errorf("can't move back %d characters from position %d", n, p.pos) |
| } |
| p.pos -= n |
| return nil |
| } |
|
|
| |
| func (p *ChatMsgParser) Str(rng StringRange) string { |
| if rng.Begin < 0 || rng.End > len(p.input) || rng.Begin > rng.End { |
| return "" |
| } |
| return p.input[rng.Begin:rng.End] |
| } |
|
|
| |
| func (p *ChatMsgParser) ConsumeRest() string { |
| if p.pos >= len(p.input) { |
| return "" |
| } |
| result := p.input[p.pos:] |
| p.pos = len(p.input) |
| return result |
| } |
|
|
| |
| func (p *ChatMsgParser) AddContent(content string) { |
| p.content.WriteString(content) |
| } |
|
|
| |
| func (p *ChatMsgParser) AddReasoningContent(reasoning string) { |
| p.reasoning.WriteString(reasoning) |
| } |
|
|
| |
| func (p *ChatMsgParser) AddToolCall(name, id, arguments string) bool { |
| if name == "" { |
| return false |
| } |
| p.toolCalls = append(p.toolCalls, FuncCallResults{ |
| Name: name, |
| Arguments: arguments, |
| }) |
| return true |
| } |
|
|
| |
| func (p *ChatMsgParser) ToolCalls() []FuncCallResults { |
| return p.toolCalls |
| } |
|
|
| |
| func (p *ChatMsgParser) Content() string { |
| return p.content.String() |
| } |
|
|
| |
| func (p *ChatMsgParser) Reasoning() string { |
| return p.reasoning.String() |
| } |
|
|
| |
| func rstrip(s string) string { |
| return strings.TrimRightFunc(s, unicode.IsSpace) |
| } |
|
|
| |
| |
| func eraseSpaces(str string, l, r int) (string, int) { |
| if l < 0 || r < 0 || l > len(str) || r > len(str) || l > r { |
| return str, l |
| } |
| |
| for l > 0 && l < len(str) && unicode.IsSpace(rune(str[l-1])) { |
| l-- |
| } |
| |
| for r < len(str) && unicode.IsSpace(rune(str[r])) { |
| r++ |
| } |
| |
| result := str[:l] |
| if l < r { |
| result += "\n" |
| if l+1 < r { |
| result += "\n" |
| } |
| } |
| newL := l |
| if newL != 0 { |
| newL += 2 |
| } |
| if newL < len(str) && newL <= r { |
| result += str[r:] |
| } else if newL < len(str) { |
| result += str[newL:] |
| } |
| return result, newL |
| } |
|
|
| |
| func (p *ChatMsgParser) ClearTools() { |
| p.toolCalls = p.toolCalls[:0] |
| } |
|
|
| |
| |
| func (p *ChatMsgParser) TryConsumeLiteral(literal string) bool { |
| if len(literal) == 0 { |
| return true |
| } |
| if p.pos+len(literal) > len(p.input) { |
| return false |
| } |
| if p.input[p.pos:p.pos+len(literal)] == literal { |
| p.pos += len(literal) |
| return true |
| } |
| return false |
| } |
|
|
| |
| func (p *ChatMsgParser) ConsumeLiteral(literal string) error { |
| if !p.TryConsumeLiteral(literal) { |
| return &ChatMsgPartialException{Message: fmt.Sprintf("Expected literal: %s", literal)} |
| } |
| return nil |
| } |
|
|
| |
| |
| |
| func (p *ChatMsgParser) TryFindLiteral(literal string) *FindLiteralResult { |
| if len(literal) == 0 { |
| return nil |
| } |
|
|
| |
| idx := strings.Index(p.input[p.pos:], literal) |
| if idx == -1 { |
| |
| if p.isPartial { |
| partialIdx := stringFindPartialStop(p.input[p.pos:], literal) |
| if partialIdx != -1 && partialIdx >= 0 { |
| result := &FindLiteralResult{ |
| Prelude: p.input[p.pos : p.pos+partialIdx], |
| Groups: []StringRange{ |
| {Begin: p.pos + partialIdx, End: len(p.input)}, |
| }, |
| } |
| p.pos = len(p.input) |
| return result |
| } |
| } |
| return nil |
| } |
|
|
| idx += p.pos |
| result := &FindLiteralResult{ |
| Prelude: p.input[p.pos:idx], |
| Groups: []StringRange{ |
| {Begin: idx, End: idx + len(literal)}, |
| }, |
| } |
| p.pos = idx + len(literal) |
| return result |
| } |
|
|
| |
| |
| func stringFindPartialStop(s, needle string) int { |
| if len(needle) == 0 || len(s) == 0 { |
| return -1 |
| } |
| |
| for i := len(needle); i > 0; i-- { |
| if len(s) >= i && s[len(s)-i:] == needle[:i] { |
| return len(s) - i |
| } |
| } |
| return -1 |
| } |
|
|
| |
| func (p *ChatMsgParser) ConsumeSpaces() bool { |
| consumed := false |
| for p.pos < len(p.input) && unicode.IsSpace(rune(p.input[p.pos])) { |
| p.pos++ |
| consumed = true |
| } |
| return consumed |
| } |
|
|
| |
| func AllSpace(s string) bool { |
| return strings.TrimSpace(s) == "" |
| } |
|
|
| |
| |
| |
| |
| func (p *ChatMsgParser) TryConsumeJSON() (any, bool, string, error) { |
| |
| p.ConsumeSpaces() |
|
|
| if p.pos >= len(p.input) { |
| return nil, false, "", errors.New("end of input") |
| } |
|
|
| |
| jsonStart := p.pos |
| if p.input[p.pos] != '{' && p.input[p.pos] != '[' { |
| return nil, false, "", errors.New("not a JSON object or array") |
| } |
|
|
| |
| |
| decoder := json.NewDecoder(strings.NewReader(p.input[jsonStart:])) |
| var jsonValue any |
| if err := decoder.Decode(&jsonValue); err == nil { |
| |
| |
| p.pos = jsonStart + int(decoder.InputOffset()) |
| return jsonValue, false, "", nil |
| } |
|
|
| |
| |
| depth := 0 |
| inString := false |
| escape := false |
| jsonEnd := -1 |
|
|
| for i := p.pos; i < len(p.input); i++ { |
| ch := p.input[i] |
|
|
| if escape { |
| escape = false |
| continue |
| } |
|
|
| if ch == '\\' { |
| escape = true |
| continue |
| } |
|
|
| if ch == '"' { |
| inString = !inString |
| continue |
| } |
|
|
| if inString { |
| continue |
| } |
|
|
| if ch == '{' || ch == '[' { |
| depth++ |
| } else if ch == '}' || ch == ']' { |
| depth-- |
| if depth == 0 { |
| jsonEnd = i + 1 |
| break |
| } |
| } |
| } |
|
|
| if jsonEnd == -1 { |
| |
| if p.isPartial { |
| |
| partialInput := p.input[jsonStart:] |
| healedValue, wasHealed, jsonDumpMarker, err := parseJSONWithStack(partialInput, p.healingMarker) |
| if err == nil && wasHealed { |
| |
| cleaned := removeHealingMarkerFromJSONAny(healedValue, p.healingMarker) |
| p.pos = len(p.input) |
| return cleaned, true, jsonDumpMarker, nil |
| } |
| } |
| return nil, true, "", errors.New("incomplete JSON") |
| } |
|
|
| |
| jsonStr := p.input[jsonStart:jsonEnd] |
| if err := json.Unmarshal([]byte(jsonStr), &jsonValue); err != nil { |
| return nil, false, "", err |
| } |
|
|
| p.pos = jsonEnd |
| return jsonValue, false, "", nil |
| } |
|
|
| |
| |
| |
| func (p *ChatMsgParser) tryConsumeJSONPrimitive() (any, bool) { |
| |
| p.ConsumeSpaces() |
| if p.pos >= len(p.input) { |
| return nil, false |
| } |
|
|
| |
| remaining := p.input[p.pos:] |
| safeView := utf8TruncateSafeView(remaining) |
|
|
| |
| if len(safeView) >= 4 { |
| prefix := safeView |
| if len(prefix) > 6 { |
| prefix = prefix[:6] |
| } |
| if strings.HasPrefix(prefix, "null") { |
| |
| if len(safeView) >= 4 { |
| if len(safeView) == 4 || isJSONTerminator(safeView[4]) { |
| p.pos += 4 |
| return nil, false |
| } |
| } |
| } else if strings.HasPrefix(prefix, "true") { |
| if len(safeView) >= 4 { |
| if len(safeView) == 4 || isJSONTerminator(safeView[4]) { |
| p.pos += 4 |
| return true, false |
| } |
| } |
| } else if strings.HasPrefix(prefix, "false") { |
| if len(safeView) >= 5 { |
| if len(safeView) == 5 || isJSONTerminator(safeView[5]) { |
| p.pos += 5 |
| return false, false |
| } |
| } |
| } |
| } |
|
|
| |
| |
| numberRegex := regexp.MustCompile(`^[0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?`) |
| if match := numberRegex.FindString(safeView); match != "" { |
| |
| var numValue float64 |
| if _, err := fmt.Sscanf(match, "%f", &numValue); err == nil { |
| |
| if len(safeView) == len(match) || isJSONTerminator(safeView[len(match)]) { |
| p.pos += len(match) |
| return numValue, false |
| } |
| } |
| } |
|
|
| return nil, false |
| } |
|
|
| |
| func isJSONTerminator(ch byte) bool { |
| return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || |
| ch == ',' || ch == '}' || ch == ']' || ch == ':' || ch == '<' |
| } |
|
|
| |
| |
| func utf8TruncateSafeView(s string) string { |
| if len(s) == 0 { |
| return s |
| } |
| |
| |
| for i := len(s); i > 0 && i > len(s)-4; i-- { |
| if utf8.ValidString(s[:i]) { |
| return s[:i] |
| } |
| } |
| |
| if len(s) > 3 { |
| return s[:len(s)-3] |
| } |
| return "" |
| } |
|
|
| |
| func isJSONObjectOrArray(v any) bool { |
| switch v.(type) { |
| case map[string]any, []any: |
| return true |
| default: |
| return false |
| } |
| } |
|
|
| |
| func isJSONString(v any) bool { |
| _, ok := v.(string) |
| return ok |
| } |
|
|
| |
| |
| |
| func trimPotentialPartialWord(content string, format *XMLToolCallFormat, startThink, endThink string) string { |
| patterns := []string{ |
| startThink, |
| endThink, |
| format.ScopeStart, |
| format.ToolStart, |
| format.ToolSep, |
| format.KeyStart, |
| format.KeyValSep, |
| } |
| if format.KeyValSep2 != nil { |
| patterns = append(patterns, *format.KeyValSep2) |
| } |
| patterns = append(patterns, format.ValEnd) |
| if format.LastValEnd != nil { |
| patterns = append(patterns, *format.LastValEnd) |
| } |
| patterns = append(patterns, format.ToolEnd) |
| if format.LastToolEnd != nil { |
| patterns = append(patterns, *format.LastToolEnd) |
| } |
| patterns = append(patterns, format.ScopeEnd) |
|
|
| bestMatch := len(content) |
| for _, pattern := range patterns { |
| if len(pattern) == 0 { |
| continue |
| } |
| |
| maxStart := len(content) - len(pattern) |
| if maxStart < 0 { |
| maxStart = 0 |
| } |
| for matchIdx := len(content); matchIdx > maxStart; matchIdx-- { |
| matchLen := len(content) - matchIdx |
| if matchLen > 0 && matchIdx < len(content) { |
| |
| if matchIdx+matchLen <= len(content) { |
| substr := content[matchIdx : matchIdx+matchLen] |
| if len(substr) <= len(pattern) && strings.HasPrefix(pattern, substr) { |
| if matchIdx < bestMatch { |
| bestMatch = matchIdx |
| } |
| } |
| } |
| } |
| } |
| } |
|
|
| if len(content) > bestMatch { |
| return content[:bestMatch] |
| } |
| return content |
| } |
|
|
| |
| func removeHealingMarkerFromJSON(value map[string]any, marker string) map[string]any { |
| result := make(map[string]any) |
| for k, v := range value { |
| if str, ok := v.(string); ok { |
| if idx := strings.Index(str, marker); idx != -1 { |
| v = str[:idx] |
| } |
| } else if nestedMap, ok := v.(map[string]any); ok { |
| v = removeHealingMarkerFromJSON(nestedMap, marker) |
| } |
| result[k] = v |
| } |
| return result |
| } |
|
|
| |
| func removeHealingMarkerFromJSONAny(value any, marker string) any { |
| switch v := value.(type) { |
| case map[string]any: |
| return removeHealingMarkerFromJSON(v, marker) |
| case []any: |
| result := make([]any, len(v)) |
| for i, item := range v { |
| result[i] = removeHealingMarkerFromJSONAny(item, marker) |
| } |
| return result |
| case string: |
| if idx := strings.Index(v, marker); idx != -1 { |
| return v[:idx] |
| } |
| return v |
| default: |
| return v |
| } |
| } |
|
|
| |
| |
| |
| func (p *ChatMsgParser) TryConsumeXMLToolCalls(format *XMLToolCallFormat) (bool, error) { |
| if format == nil { |
| return false, errors.New("format is required") |
| } |
|
|
| |
| if format.KeyStart == "" && format.ToolStart == "<function=" { |
| |
| results, err := parseFunctionaryFormat(p.input[p.pos:], format) |
| if err != nil || len(results) == 0 { |
| return false, nil |
| } |
| for _, result := range results { |
| p.AddToolCall(result.Name, "", result.Arguments) |
| } |
| return true, nil |
| } |
|
|
| |
| if format.ToolStart != "" && strings.Contains(format.ToolStart, "{\"name\"") { |
| results, err := parseJSONLikeXMLFormat(p.input[p.pos:], format) |
| if err != nil || len(results) == 0 { |
| return false, nil |
| } |
| for _, result := range results { |
| p.AddToolCall(result.Name, "", result.Arguments) |
| } |
| return true, nil |
| } |
|
|
| |
| if format.ToolStart == "" || format.KeyStart == "" || format.KeyValSep == "" || |
| format.ValEnd == "" || format.ToolEnd == "" { |
| return false, errors.New("required format fields missing") |
| } |
|
|
| startPos := p.pos |
| recovery := true |
|
|
| |
| returnError := func(err error, canRecover bool) (bool, error) { |
| xlog.Debug("Failed to parse XML tool call", "error", err, "position", p.pos) |
| if canRecover && recovery { |
| p.MoveTo(startPos) |
| return false, nil |
| } |
| return false, fmt.Errorf("tool call parsing failed with unrecoverable errors: %w", err) |
| } |
|
|
| |
| tryFindValEnd := func() (int, *FindLiteralResult) { |
| savedPos := p.pos |
| tc := p.TryFindLiteral(format.ValEnd) |
| valEndSize := len(format.ValEnd) |
|
|
| if format.LastValEnd != nil { |
| p.MoveTo(savedPos) |
| tc2 := p.tryFind2LiteralSplitBySpaces(*format.LastValEnd, format.ToolEnd) |
| if format.LastToolEnd != nil { |
| p.MoveTo(savedPos) |
| tc3 := p.tryFind2LiteralSplitBySpaces(*format.LastValEnd, *format.LastToolEnd) |
| if tc3 != nil && (tc2 == nil || len(tc2.Prelude) > len(tc3.Prelude)) { |
| tc2 = tc3 |
| } |
| } |
| if tc2 != nil && (tc == nil || len(tc.Prelude) > len(tc2.Prelude)) { |
| tc = tc2 |
| if tc.Groups[0].End > len(p.input) { |
| tc.Groups[0].End = len(p.input) |
| } |
| if tc.Groups[0].Begin+len(*format.LastValEnd) < len(p.input) { |
| tc.Groups[0].End = tc.Groups[0].Begin + len(*format.LastValEnd) |
| } |
| p.MoveTo(tc.Groups[0].End) |
| valEndSize = len(*format.LastValEnd) |
| } else { |
| p.MoveTo(savedPos) |
| } |
| } |
| return valEndSize, tc |
| } |
|
|
| |
| tryFindToolEnd := func() (int, *FindLiteralResult) { |
| savedPos := p.pos |
| tc := p.TryFindLiteral(format.ToolEnd) |
| toolEndSize := len(format.ToolEnd) |
|
|
| if format.LastToolEnd != nil { |
| p.MoveTo(savedPos) |
| tc2 := p.tryFind2LiteralSplitBySpaces(*format.LastToolEnd, format.ScopeEnd) |
| if tc2 != nil && (tc == nil || len(tc.Prelude) > len(tc2.Prelude)) { |
| tc = tc2 |
| if tc.Groups[0].End > len(p.input) { |
| tc.Groups[0].End = len(p.input) |
| } |
| if tc.Groups[0].Begin+len(*format.LastToolEnd) < len(p.input) { |
| tc.Groups[0].End = tc.Groups[0].Begin + len(*format.LastToolEnd) |
| } |
| p.MoveTo(tc.Groups[0].End) |
| toolEndSize = len(*format.LastToolEnd) |
| } else { |
| p.MoveTo(savedPos) |
| } |
| } |
| return toolEndSize, tc |
| } |
|
|
| |
| |
| for { |
| |
| if format.ScopeStart != "" && !AllSpace(format.ScopeStart) { |
| tc := p.TryFindLiteral(format.ScopeStart) |
| if tc == nil { |
| |
| break |
| } |
| if !AllSpace(tc.Prelude) { |
| |
| p.MoveTo(tc.Groups[0].Begin - len(tc.Prelude)) |
| break |
| } |
| |
| if len(tc.Groups) > 0 { |
| matchedSize := tc.Groups[0].End - tc.Groups[0].Begin |
| if matchedSize != len(format.ScopeStart) { |
| return false, &ChatMsgPartialException{Message: fmt.Sprintf("Partial literal: %s", format.ScopeStart)} |
| } |
| } |
| } |
|
|
| |
| scopeToolCallsFound := false |
| for { |
| tc := p.TryFindLiteral(format.ToolStart) |
| if tc == nil { |
| break |
| } |
|
|
| if !AllSpace(tc.Prelude) { |
| |
| p.MoveTo(tc.Groups[0].Begin - len(tc.Prelude)) |
| break |
| } |
|
|
| |
| var funcName *FindLiteralResult |
| if AllSpace(format.ToolSep) { |
| |
| funcName = p.TryFindLiteral(format.KeyStart) |
| } else { |
| |
| funcName = p.TryFindLiteral(format.ToolSep) |
| } |
|
|
| if funcName == nil { |
| |
| _, toolEnd := tryFindToolEnd() |
| if toolEnd != nil { |
| |
| nameStart := tc.Groups[0].End |
| nameEnd := toolEnd.Groups[0].Begin |
| functionName := "" |
| if nameEnd > nameStart { |
| functionName = strings.TrimSpace(p.input[nameStart:nameEnd]) |
| } |
| argsJSON, _ := json.Marshal(map[string]any{}) |
| p.AddToolCall(functionName, "", string(argsJSON)) |
| recovery = false |
| continue |
| } |
| |
| return false, &ChatMsgPartialException{Message: "incomplete tool_call"} |
| } |
|
|
| |
| functionNamePrelude := funcName.Prelude |
| if strings.Contains(functionNamePrelude, format.ToolEnd) || |
| (format.LastToolEnd != nil && strings.Contains(functionNamePrelude, *format.LastToolEnd)) { |
| |
| |
| p.MoveTo(tc.Groups[0].Begin) |
| _, toolEnd := tryFindToolEnd() |
| if toolEnd != nil { |
| |
| nameStart := tc.Groups[0].End |
| nameEnd := toolEnd.Groups[0].Begin |
| functionName := "" |
| if nameEnd > nameStart { |
| functionName = strings.TrimSpace(p.input[nameStart:nameEnd]) |
| |
| if !AllSpace(format.ToolSep) && strings.HasSuffix(functionName, format.ToolSep) { |
| functionName = strings.TrimSpace(functionName[:len(functionName)-len(format.ToolSep)]) |
| } |
| } |
| argsJSON, _ := json.Marshal(map[string]any{}) |
| p.AddToolCall(functionName, "", string(argsJSON)) |
| recovery = false |
| continue |
| } |
| } |
|
|
| |
| |
| if AllSpace(format.ToolSep) { |
| |
| |
| p.MoveTo(funcName.Groups[0].Begin) |
| } else { |
| |
| p.MoveTo(funcName.Groups[0].End) |
| } |
| functionName := strings.TrimSpace(funcName.Prelude) |
|
|
| |
| if strings.HasPrefix(functionName, "functions.") { |
| functionName = functionName[10:] |
| if idx := strings.LastIndex(functionName, ":"); idx != -1 { |
| suffix := functionName[idx+1:] |
| allDigits := true |
| for _, r := range suffix { |
| if r < '0' || r > '9' { |
| allDigits = false |
| break |
| } |
| } |
| if allDigits { |
| functionName = functionName[:idx] |
| } |
| } |
| } |
|
|
| |
| arguments := make(map[string]any) |
|
|
| for { |
| keyStart := p.TryFindLiteral(format.KeyStart) |
| if keyStart == nil { |
| break |
| } |
|
|
| if !AllSpace(keyStart.Prelude) { |
| |
| p.MoveTo(keyStart.Groups[0].Begin - len(keyStart.Prelude)) |
| break |
| } |
|
|
| |
| if len(keyStart.Groups) > 0 { |
| matchedSize := keyStart.Groups[0].End - keyStart.Groups[0].Begin |
| if matchedSize != len(format.KeyStart) { |
| |
| argsJSON, _ := json.Marshal(arguments) |
| if len(argsJSON) > 0 && argsJSON[len(argsJSON)-1] == '}' { |
| argsJSON = argsJSON[:len(argsJSON)-1] |
| } |
| p.AddToolCall(functionName, "", string(argsJSON)) |
| return false, &ChatMsgPartialException{Message: fmt.Sprintf("Partial literal: %s", format.KeyStart)} |
| } |
| } |
|
|
| |
| keyValSep := p.TryFindLiteral(format.KeyValSep) |
| if keyValSep == nil { |
| |
| rest := p.ConsumeRest() |
| arguments[rest+"XML_TOOL_CALL_PARTIAL_FLAG"] = "" |
| argsJSON, _ := json.Marshal(arguments) |
| toolStr := string(argsJSON) |
| if cleaned, isPartial := partialJSON(toolStr); isPartial { |
| p.AddToolCall(functionName, "", cleaned) |
| } else { |
| p.AddToolCall(functionName, "", toolStr) |
| } |
| return false, &ChatMsgPartialException{ |
| Message: fmt.Sprintf("Expected %s after %s", format.KeyValSep, format.KeyStart), |
| } |
| } |
|
|
| |
| if len(keyValSep.Groups) > 0 { |
| matchedSize := keyValSep.Groups[0].End - keyValSep.Groups[0].Begin |
| if matchedSize != len(format.KeyValSep) { |
| |
| rest := keyValSep.Prelude |
| arguments[rest+"XML_TOOL_CALL_PARTIAL_FLAG"] = "" |
| argsJSON, _ := json.Marshal(arguments) |
| toolStr := string(argsJSON) |
| if cleaned, isPartial := partialJSON(toolStr); isPartial { |
| p.AddToolCall(functionName, "", cleaned) |
| } else { |
| p.AddToolCall(functionName, "", toolStr) |
| } |
| return false, &ChatMsgPartialException{Message: fmt.Sprintf("Partial literal: %s", format.KeyValSep)} |
| } |
| } |
|
|
| key := strings.TrimSpace(keyValSep.Prelude) |
| recovery = false |
|
|
| |
| |
| |
| if format.KeyValSep2 != nil { |
| |
| p.TryConsumeLiteral(*format.KeyValSep2) |
| } |
|
|
| |
| |
| valStart := p.pos |
|
|
| |
| |
| var jsonValue any |
| var jsonHealingMarker string |
| jsonParsed := false |
|
|
| if format.RawArgVal == nil || !*format.RawArgVal { |
| |
| jsonVal, _, jsonDumpMarker, err := p.TryConsumeJSON() |
| if err == nil { |
| jsonValue = jsonVal |
| jsonHealingMarker = jsonDumpMarker |
| jsonParsed = true |
| } else { |
| |
| primitiveVal, found := p.tryConsumeJSONPrimitive() |
| if found { |
| jsonValue = primitiveVal |
| jsonParsed = true |
| } else { |
| |
| p.MoveTo(valStart) |
| } |
| } |
| } |
|
|
| |
| if jsonParsed { |
| jsonEnd := p.pos |
| p.ConsumeSpaces() |
|
|
| |
| if p.pos >= len(p.input) { |
| |
| if format.RawArgVal != nil && !*format.RawArgVal { |
| |
| if isJSONObjectOrArray(jsonValue) || isJSONString(jsonValue) { |
| arguments[key] = jsonValue |
| argsJSON, _ := json.Marshal(arguments) |
| toolStr := string(argsJSON) |
|
|
| |
| if jsonHealingMarker != "" { |
| |
| |
| idx := strings.LastIndex(toolStr, jsonHealingMarker) |
| if idx != -1 { |
| toolStr = toolStr[:idx] |
| } else { |
| |
| |
| jsonPreview := toolStr |
| if len(jsonPreview) > 100 { |
| jsonPreview = jsonPreview[:100] |
| } |
| xlog.Debug("jsonDumpMarker not found in JSON string, using fallback", "marker", jsonHealingMarker, "json", jsonPreview) |
| |
| if len(toolStr) > 0 && toolStr[len(toolStr)-1] == '}' { |
| toolStr = toolStr[:len(toolStr)-1] |
| } |
| } |
| } else { |
| |
| if len(toolStr) > 0 && toolStr[len(toolStr)-1] == '}' { |
| toolStr = toolStr[:len(toolStr)-1] |
| } |
| } |
| p.AddToolCall(functionName, "", toolStr) |
| return false, &ChatMsgPartialException{ |
| Message: "JSON arg_value detected. Waiting for more tokens for validations.", |
| } |
| } |
| } |
| |
| genPartialArgs := func(needle string) { |
| arguments[key] = needle |
| argsJSON, _ := json.Marshal(arguments) |
| toolStr := string(argsJSON) |
| if cleaned, isPartial := partialJSON(toolStr); isPartial { |
| p.AddToolCall(functionName, "", cleaned) |
| } else { |
| p.AddToolCall(functionName, "", toolStr) |
| } |
| } |
| genPartialArgs("XML_TOOL_CALL_PARTIAL_FLAG") |
| return false, &ChatMsgPartialException{ |
| Message: "JSON arg_value detected. Waiting for more tokens for validations.", |
| } |
| } |
|
|
| |
| p.MoveTo(jsonEnd) |
| valEndSize, valEnd := tryFindValEnd() |
| if valEnd != nil && AllSpace(valEnd.Prelude) && jsonHealingMarker == "" { |
| |
| if len(valEnd.Groups) > 0 { |
| matchedSize := valEnd.Groups[0].End - valEnd.Groups[0].Begin |
| if matchedSize == valEndSize { |
| |
| arguments[key] = jsonValue |
| } else { |
| |
| genPartialArgs := func(needle string) { |
| arguments[key] = needle |
| argsJSON, _ := json.Marshal(arguments) |
| toolStr := string(argsJSON) |
| if cleaned, isPartial := partialJSON(toolStr); isPartial { |
| p.AddToolCall(functionName, "", cleaned) |
| } else { |
| p.AddToolCall(functionName, "", toolStr) |
| } |
| } |
| genPartialArgs("XML_TOOL_CALL_PARTIAL_FLAG") |
| return false, &ChatMsgPartialException{ |
| Message: fmt.Sprintf("Partial literal: %s", format.ValEnd), |
| } |
| } |
| } |
| } else { |
| |
| p.MoveTo(valStart) |
| jsonParsed = false |
| } |
| } |
|
|
| |
| if !jsonParsed { |
| valEndSize, valEnd := tryFindValEnd() |
| if valEnd == nil { |
| |
| rest := p.ConsumeRest() |
| if format.TrimRawArgVal { |
| rest = strings.TrimSpace(rest) |
| } |
| arguments[key] = rest + "XML_TOOL_CALL_PARTIAL_FLAG" |
| argsJSON, _ := json.Marshal(arguments) |
| toolStr := string(argsJSON) |
| if cleaned, isPartial := partialJSON(toolStr); isPartial { |
| p.AddToolCall(functionName, "", cleaned) |
| } else { |
| p.AddToolCall(functionName, "", toolStr) |
| } |
| return false, &ChatMsgPartialException{ |
| Message: fmt.Sprintf("Expected %s after %s", format.ValEnd, format.KeyValSep), |
| } |
| } |
|
|
| |
| if len(valEnd.Groups) > 0 { |
| matchedSize := valEnd.Groups[0].End - valEnd.Groups[0].Begin |
| if matchedSize != valEndSize { |
| |
| rest := valEnd.Prelude |
| if format.TrimRawArgVal { |
| rest = strings.TrimSpace(rest) |
| } |
| arguments[key] = rest + "XML_TOOL_CALL_PARTIAL_FLAG" |
| argsJSON, _ := json.Marshal(arguments) |
| toolStr := string(argsJSON) |
| if cleaned, isPartial := partialJSON(toolStr); isPartial { |
| p.AddToolCall(functionName, "", cleaned) |
| } else { |
| p.AddToolCall(functionName, "", toolStr) |
| } |
| return false, &ChatMsgPartialException{Message: fmt.Sprintf("Partial literal: %s", format.ValEnd)} |
| } |
| } |
|
|
| |
| |
| valueStr := strings.TrimSpace(valEnd.Prelude) |
| value := parseParameterValue(valueStr, format) |
| arguments[key] = value |
| } |
| } |
|
|
| |
| toolEndSize, toolEnd := tryFindToolEnd() |
| if toolEnd == nil { |
| |
| argsJSON, _ := json.Marshal(arguments) |
| toolStr := string(argsJSON) |
| if len(toolStr) > 0 && toolStr[len(toolStr)-1] == '}' { |
| toolStr = toolStr[:len(toolStr)-1] |
| } |
| p.AddToolCall(functionName, "", toolStr) |
| return false, &ChatMsgPartialException{Message: "incomplete tool_call"} |
| } |
|
|
| if !AllSpace(toolEnd.Prelude) { |
| return returnError(errors.New("non-whitespace before tool_end"), recovery) |
| } |
|
|
| |
| if len(toolEnd.Groups) > 0 { |
| matchedSize := toolEnd.Groups[0].End - toolEnd.Groups[0].Begin |
| if matchedSize == toolEndSize { |
| |
| argsJSON, _ := json.Marshal(arguments) |
| if !p.AddToolCall(functionName, "", string(argsJSON)) { |
| return false, &ChatMsgPartialException{Message: "Failed to add XML tool call"} |
| } |
| recovery = false |
| continue |
| } |
| } |
|
|
| |
| argsJSON, _ := json.Marshal(arguments) |
| toolStr := string(argsJSON) |
| if len(toolStr) > 0 && toolStr[len(toolStr)-1] == '}' { |
| toolStr = toolStr[:len(toolStr)-1] |
| } |
| p.AddToolCall(functionName, "", toolStr) |
| return false, &ChatMsgPartialException{Message: "incomplete tool_call"} |
| } |
|
|
| |
| if format.ScopeEnd != "" { |
| tc := p.TryFindLiteral(format.ScopeEnd) |
| if tc == nil { |
| |
| if !p.isPartial { |
| |
| |
| if !scopeToolCallsFound { |
| return returnError(errors.New("expected scope_end"), recovery) |
| } |
| break |
| } |
| break |
| } else if !AllSpace(tc.Prelude) { |
| |
| |
| if format.ScopeStart != "" { |
| |
| testPos := tc.Groups[0].Begin - len(tc.Prelude) |
| if testPos >= 0 && testPos < len(p.input) { |
| testInput := p.input[testPos:] |
| if strings.HasPrefix(testInput, format.ScopeStart) { |
| |
| p.MoveTo(testPos) |
| break |
| } |
| } |
| } |
| return returnError(errors.New("non-whitespace before scope_end"), recovery) |
| } |
| |
| scopeToolCallsFound = true |
| } else { |
| |
| break |
| } |
| } |
|
|
| return len(p.toolCalls) > 0, nil |
| } |
|
|
| |
| |
| |
| func (p *ChatMsgParser) ParseMsgWithXMLToolCalls(format *XMLToolCallFormat, startThink, endThink string) error { |
| if format == nil { |
| return errors.New("format is required") |
| } |
|
|
| |
| if startThink == "" { |
| startThink = "<think>" |
| } |
| if endThink == "" { |
| endThink = "</think>" |
| } |
|
|
| |
| p.ConsumeSpaces() |
|
|
| |
| reasoningUnclosed := false |
| unclosedReasoningContent := "" |
|
|
| for { |
| |
| tc := p.tryFind2LiteralSplitBySpaces(format.ScopeStart, format.ToolStart) |
| var content string |
| var toolCallStart string |
|
|
| if tc != nil { |
| content = tc.Prelude |
| toolCallStart = p.Str(tc.Groups[0]) |
| } else { |
| content = p.ConsumeRest() |
| content = utf8TruncateSafeView(content) |
| } |
|
|
| |
| if reasoningUnclosed { |
| pos := strings.Index(content, endThink) |
| if pos == -1 && p.pos != len(p.input) { |
| unclosedReasoningContent += content |
| if !(format.AllowToolcallInThink && tc != nil) { |
| unclosedReasoningContent += toolCallStart |
| continue |
| } |
| } else { |
| reasoningUnclosed = false |
| var reasoningContent string |
| if pos == -1 { |
| reasoningContent = content |
| content = "" |
| } else { |
| reasoningContent = content[:pos] |
| content = content[pos+len(endThink):] |
| } |
| if p.pos == len(p.input) && AllSpace(content) { |
| reasoningContent = rstrip(reasoningContent) |
| reasoningContent = trimPotentialPartialWord(reasoningContent, format, startThink, endThink) |
| reasoningContent = rstrip(reasoningContent) |
| if reasoningContent == "" { |
| unclosedReasoningContent = rstrip(unclosedReasoningContent) |
| unclosedReasoningContent = trimPotentialPartialWord(unclosedReasoningContent, format, startThink, endThink) |
| unclosedReasoningContent = rstrip(unclosedReasoningContent) |
| if unclosedReasoningContent == "" { |
| continue |
| } |
| } |
| } |
| |
| |
| p.AddReasoningContent(unclosedReasoningContent) |
| p.AddReasoningContent(reasoningContent) |
| unclosedReasoningContent = "" |
| } |
| } |
|
|
| |
| toolcallInThink := false |
| thinkStart := strings.Index(content, startThink) |
| for thinkStart != -1 { |
| thinkEnd := strings.Index(content[thinkStart+len(startThink):], endThink) |
| if thinkEnd != -1 { |
| thinkEnd += thinkStart + len(startThink) |
| |
| reasoningContent := content[thinkStart+len(startThink) : thinkEnd] |
| p.AddReasoningContent(reasoningContent) |
| |
| content, _ = eraseSpaces(content, thinkStart, thinkEnd+len(endThink)-1) |
| thinkStart = strings.Index(content, startThink) |
| } else { |
| |
| if format.AllowToolcallInThink { |
| unclosedReasoningContent = content[thinkStart+len(startThink):] |
| } else { |
| unclosedReasoningContent = content[thinkStart+len(startThink):] + toolCallStart |
| } |
| reasoningUnclosed = true |
| content = content[:thinkStart] |
| toolcallInThink = true |
| break |
| } |
| } |
|
|
| |
| |
| content = rstrip(content) |
| pos := strings.LastIndex(content, endThink) |
| for pos != -1 { |
| content, pos = eraseSpaces(content, pos, pos+len(endThink)-1) |
| pos = strings.LastIndex(content, endThink) |
| } |
| |
| content = strings.TrimLeftFunc(content, unicode.IsSpace) |
|
|
| |
| if p.pos == len(p.input) { |
| if unclosedReasoningContent == "" { |
| content = rstrip(content) |
| content = trimPotentialPartialWord(content, format, startThink, endThink) |
| content = rstrip(content) |
| } else { |
| unclosedReasoningContent = rstrip(unclosedReasoningContent) |
| unclosedReasoningContent = trimPotentialPartialWord(unclosedReasoningContent, format, startThink, endThink) |
| unclosedReasoningContent = rstrip(unclosedReasoningContent) |
| } |
| } |
|
|
| |
| if format.AllowToolcallInThink && unclosedReasoningContent != "" { |
| |
| p.AddReasoningContent(unclosedReasoningContent) |
| unclosedReasoningContent = "" |
| } |
|
|
| |
| if content != "" { |
| |
| if p.content.Len() > 0 { |
| p.AddContent("\n\n") |
| } |
| p.AddContent(content) |
| } |
|
|
| |
| if toolcallInThink && !format.AllowToolcallInThink { |
| continue |
| } |
|
|
| |
| if tc == nil { |
| break |
| } |
|
|
| |
| p.MoveTo(tc.Groups[0].Begin) |
| success, err := p.TryConsumeXMLToolCalls(format) |
| if err != nil { |
| |
| if _, ok := err.(*ChatMsgPartialException); ok { |
| |
| continue |
| } |
| return err |
| } |
| if success { |
| endOfTool := p.pos |
| p.ConsumeSpaces() |
| if p.pos != len(p.input) { |
| p.MoveTo(endOfTool) |
| if p.content.Len() > 0 { |
| p.AddContent("\n\n") |
| } |
| } |
| } else { |
| |
| if p.pos < len(p.input) { |
| nextChar := string(p.input[p.pos]) |
| nextChar = rstrip(nextChar) |
| p.AddContent(nextChar) |
| p.pos++ |
| } |
| } |
| } |
|
|
| return nil |
| } |
|
|
| |
| func (p *ChatMsgParser) tryFind2LiteralSplitBySpaces(literal1, literal2 string) *FindLiteralResult { |
| savedPos := p.pos |
|
|
| |
| tc1 := p.TryFindLiteral(literal1) |
| if tc1 == nil { |
| p.MoveTo(savedPos) |
| return nil |
| } |
|
|
| |
| p.ConsumeSpaces() |
|
|
| |
| tc2 := p.TryFindLiteral(literal2) |
| if tc2 == nil { |
| p.MoveTo(savedPos) |
| return nil |
| } |
|
|
| |
| betweenText := p.input[tc1.Groups[0].End:tc2.Groups[0].Begin] |
| return &FindLiteralResult{ |
| Prelude: tc1.Prelude + strings.TrimSpace(betweenText) + tc2.Prelude, |
| Groups: []StringRange{ |
| {Begin: tc1.Groups[0].Begin, End: tc2.Groups[0].End}, |
| }, |
| } |
| } |
|
|