| package service |
|
|
| import ( |
| "math" |
| "strings" |
| "sync" |
| "unicode" |
| ) |
|
|
| |
| type Provider string |
|
|
| const ( |
| OpenAI Provider = "openai" |
| Gemini Provider = "gemini" |
| Claude Provider = "claude" |
| Unknown Provider = "unknown" |
| ) |
|
|
| |
| type multipliers struct { |
| Word float64 |
| Number float64 |
| CJK float64 |
| Symbol float64 |
| MathSymbol float64 |
| URLDelim float64 |
| AtSign float64 |
| Emoji float64 |
| Newline float64 |
| Space float64 |
| BasePad int |
| } |
|
|
| var ( |
| multipliersMap = map[Provider]multipliers{ |
| Gemini: { |
| Word: 1.15, Number: 2.8, CJK: 0.68, Symbol: 0.38, MathSymbol: 1.05, URLDelim: 1.2, AtSign: 2.5, Emoji: 1.08, Newline: 1.15, Space: 0.2, BasePad: 0, |
| }, |
| Claude: { |
| Word: 1.13, Number: 1.63, CJK: 1.21, Symbol: 0.4, MathSymbol: 4.52, URLDelim: 1.26, AtSign: 2.82, Emoji: 2.6, Newline: 0.89, Space: 0.39, BasePad: 0, |
| }, |
| OpenAI: { |
| Word: 1.02, Number: 1.55, CJK: 0.85, Symbol: 0.4, MathSymbol: 2.68, URLDelim: 1.0, AtSign: 2.0, Emoji: 2.12, Newline: 0.5, Space: 0.42, BasePad: 0, |
| }, |
| } |
| multipliersLock sync.RWMutex |
| ) |
|
|
| |
| func getMultipliers(p Provider) multipliers { |
| multipliersLock.RLock() |
| defer multipliersLock.RUnlock() |
|
|
| switch p { |
| case Gemini: |
| return multipliersMap[Gemini] |
| case Claude: |
| return multipliersMap[Claude] |
| case OpenAI: |
| return multipliersMap[OpenAI] |
| default: |
| |
| return multipliersMap[OpenAI] |
| } |
| } |
|
|
| |
| func EstimateToken(provider Provider, text string) int { |
| m := getMultipliers(provider) |
| var count float64 |
|
|
| |
| type WordType int |
| const ( |
| None WordType = iota |
| Latin |
| Number |
| ) |
| currentWordType := None |
|
|
| for _, r := range text { |
| |
| if unicode.IsSpace(r) { |
| currentWordType = None |
| |
| if r == '\n' || r == '\t' { |
| count += m.Newline |
| } else { |
| |
| count += m.Space |
| } |
| continue |
| } |
|
|
| |
| if isCJK(r) { |
| currentWordType = None |
| count += m.CJK |
| continue |
| } |
|
|
| |
| if isEmoji(r) { |
| currentWordType = None |
| count += m.Emoji |
| continue |
| } |
|
|
| |
| if isLatinOrNumber(r) { |
| isNum := unicode.IsNumber(r) |
| newType := Latin |
| if isNum { |
| newType = Number |
| } |
|
|
| |
| |
| |
| if currentWordType == None || currentWordType != newType { |
| if newType == Number { |
| count += m.Number |
| } else { |
| count += m.Word |
| } |
| currentWordType = newType |
| } |
| |
| continue |
| } |
|
|
| |
| currentWordType = None |
| if isMathSymbol(r) { |
| count += m.MathSymbol |
| } else if r == '@' { |
| count += m.AtSign |
| } else if isURLDelim(r) { |
| count += m.URLDelim |
| } else { |
| count += m.Symbol |
| } |
| } |
|
|
| |
| return int(math.Ceil(count)) + m.BasePad |
| } |
|
|
| |
| func isCJK(r rune) bool { |
| return unicode.Is(unicode.Han, r) || |
| (r >= 0x3040 && r <= 0x30FF) || |
| (r >= 0xAC00 && r <= 0xD7A3) |
| } |
|
|
| |
| func isLatinOrNumber(r rune) bool { |
| return unicode.IsLetter(r) || unicode.IsNumber(r) |
| } |
|
|
| |
| func isEmoji(r rune) bool { |
| |
| |
| |
| |
| |
| return (r >= 0x1F300 && r <= 0x1F9FF) || |
| (r >= 0x2600 && r <= 0x26FF) || |
| (r >= 0x2700 && r <= 0x27BF) || |
| (r >= 0x1F600 && r <= 0x1F64F) || |
| (r >= 0x1F900 && r <= 0x1F9FF) || |
| (r >= 0x1FA00 && r <= 0x1FAFF) |
| } |
|
|
| |
| func isMathSymbol(r rune) bool { |
| |
| |
| |
| |
| mathSymbols := "∑∫∂√∞≤≥≠≈±×÷∈∉∋∌⊂⊃⊆⊇∪∩∧∨¬∀∃∄∅∆∇∝∟∠∡∢°′″‴⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎²³¹⁴⁵⁶⁷⁸⁹⁰" |
| for _, m := range mathSymbols { |
| if r == m { |
| return true |
| } |
| } |
| |
| if r >= 0x2200 && r <= 0x22FF { |
| return true |
| } |
| |
| if r >= 0x2A00 && r <= 0x2AFF { |
| return true |
| } |
| |
| if r >= 0x1D400 && r <= 0x1D7FF { |
| return true |
| } |
| return false |
| } |
|
|
| |
| func isURLDelim(r rune) bool { |
| |
| urlDelims := "/:?&=;#%" |
| for _, d := range urlDelims { |
| if r == d { |
| return true |
| } |
| } |
| return false |
| } |
|
|
| func EstimateTokenByModel(model, text string) int { |
| |
| if text == "" { |
| return 0 |
| } |
|
|
| model = strings.ToLower(model) |
| if strings.Contains(model, "gemini") { |
| return EstimateToken(Gemini, text) |
| } else if strings.Contains(model, "claude") { |
| return EstimateToken(Claude, text) |
| } else { |
| return EstimateToken(OpenAI, text) |
| } |
| } |
|
|