From 0bc66e5a566a734d90dbd85a24e081b98a075704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=96=E7=95=8C?= Date: Tue, 24 Feb 2026 19:16:40 +0800 Subject: [PATCH] service/ccm,ocm: Fixes and improvements --- go.mod | 4 +- go.sum | 12 +- service/ccm/service.go | 45 +- service/ccm/service_usage.go | 559 +++++++++++++++++-------- service/ocm/service.go | 91 +++- service/ocm/service_usage.go | 777 ++++++++++++++++++++++++++++++----- 6 files changed, 1201 insertions(+), 287 deletions(-) diff --git a/go.mod b/go.mod index db97d327..0523b73f 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/sagernet/sing-box go 1.24.7 require ( - github.com/anthropics/anthropic-sdk-go v1.19.0 + github.com/anthropics/anthropic-sdk-go v1.26.0 github.com/anytls/sing-anytls v0.0.11 github.com/caddyserver/certmagic v0.25.0 github.com/coder/websocket v1.8.14 @@ -22,7 +22,7 @@ require ( github.com/metacubex/utls v1.8.4 github.com/mholt/acmez/v3 v3.1.4 github.com/miekg/dns v1.1.69 - github.com/openai/openai-go/v3 v3.15.0 + github.com/openai/openai-go/v3 v3.23.0 github.com/oschwald/maxminddb-golang v1.13.1 github.com/sagernet/asc-go v0.0.0-20241217030726-d563060fe4e1 github.com/sagernet/bbolt v0.0.0-20231014093535-ea5cb2fe9f0a diff --git a/go.sum b/go.sum index bb12cda4..738a415b 100644 --- a/go.sum +++ b/go.sum @@ -8,8 +8,8 @@ github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7V github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= -github.com/anthropics/anthropic-sdk-go v1.19.0 h1:mO6E+ffSzLRvR/YUH9KJC0uGw0uV8GjISIuzem//3KE= -github.com/anthropics/anthropic-sdk-go v1.19.0/go.mod h1:WTz31rIUHUHqai2UslPpw5CwXrQP3geYBioRV4WOLvE= +github.com/anthropics/anthropic-sdk-go v1.26.0 h1:oUTzFaUpAevfuELAP1sjL6CQJ9HHAfT7CoSYSac11PY= +github.com/anthropics/anthropic-sdk-go v1.26.0/go.mod h1:qUKmaW+uuPB64iy1l+4kOSvaLqPXnHTTBKH6RVZ7q5Q= github.com/anytls/sing-anytls v0.0.11 h1:w8e9Uj1oP3m4zxkyZDewPk0EcQbvVxb7Nn+rapEx4fc= github.com/anytls/sing-anytls v0.0.11/go.mod h1:7rjN6IukwysmdusYsrV51Fgu1uW6vsrdd6ctjnEAln8= github.com/caddyserver/certmagic v0.25.0 h1:VMleO/XA48gEWes5l+Fh6tRWo9bHkhwAEhx63i+F5ic= @@ -38,6 +38,8 @@ github.com/dblohm7/wingoes v0.0.0-20240119213807-a09d6be7affa h1:h8TfIT1xc8FWbww github.com/dblohm7/wingoes v0.0.0-20240119213807-a09d6be7affa/go.mod h1:Nx87SkVqTKd8UtT+xu7sM/l+LgXs6c0aHrlKusR+2EQ= github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1 h1:CaO/zOnF8VvUfEbhRatPcwKVWamvbYd8tQGRWacE9kU= github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1/go.mod h1:+hnT3ywWDTAFrW5aE+u2Sa/wT555ZqwoCS+pk3p6ry4= +github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= +github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A= github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/florianl/go-nfqueue/v2 v2.0.2 h1:FL5lQTeetgpCvac1TRwSfgaXUn0YSO7WzGvWNIp3JPE= @@ -126,8 +128,8 @@ github.com/mitchellh/go-ps v1.0.0 h1:i6ampVEEF4wQFF+bkYfwYgY+F/uYJDktmvLPf7qIgjc github.com/mitchellh/go-ps v1.0.0/go.mod h1:J4lOc8z8yJs6vUwklHw2XEIiT4z4C40KtWVN3nvg8Pg= github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ= github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= -github.com/openai/openai-go/v3 v3.15.0 h1:hk99rM7YPz+M99/5B/zOQcVwFRLLMdprVGx1vaZ8XMo= -github.com/openai/openai-go/v3 v3.15.0/go.mod h1:cdufnVK14cWcT9qA1rRtrXx4FTRsgbDPW7Ia7SS5cZo= +github.com/openai/openai-go/v3 v3.23.0 h1:FRFwTcB4FoWFtIunTY/8fgHvzSHgqbfWjiCwOMVrsvw= +github.com/openai/openai-go/v3 v3.23.0/go.mod h1:cdufnVK14cWcT9qA1rRtrXx4FTRsgbDPW7Ia7SS5cZo= github.com/oschwald/maxminddb-golang v1.13.1 h1:G3wwjdN9JmIK2o/ermkHM+98oX5fS+k5MbwsmL4MRQE= github.com/oschwald/maxminddb-golang v1.13.1/go.mod h1:K4pgV9N/GcK694KSTmVSDTODk4IsCNThNdTmnaBZ/F8= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= @@ -378,6 +380,8 @@ google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg= +gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/service/ccm/service.go b/service/ccm/service.go index 94e47734..ba428060 100644 --- a/service/ccm/service.go +++ b/service/ccm/service.go @@ -10,6 +10,7 @@ import ( "mime" "net" "net/http" + "strconv" "strings" "sync" "time" @@ -79,6 +80,35 @@ func isHopByHopHeader(header string) bool { } } +const ( + weeklyWindowSeconds = 604800 + weeklyWindowMinutes = weeklyWindowSeconds / 60 +) + +func parseInt64Header(headers http.Header, headerName string) (int64, bool) { + headerValue := strings.TrimSpace(headers.Get(headerName)) + if headerValue == "" { + return 0, false + } + parsedValue, parseError := strconv.ParseInt(headerValue, 10, 64) + if parseError != nil { + return 0, false + } + return parsedValue, true +} + +func extractWeeklyCycleHint(headers http.Header) *WeeklyCycleHint { + resetAtUnix, hasResetAt := parseInt64Header(headers, "anthropic-ratelimit-unified-7d-reset") + if !hasResetAt || resetAtUnix <= 0 { + return nil + } + + return &WeeklyCycleHint{ + WindowMinutes: weeklyWindowMinutes, + ResetAt: time.Unix(resetAtUnix, 0).UTC(), + } +} + type Service struct { boxService.Adapter ctx context.Context @@ -392,6 +422,7 @@ func (s *Service) ServeHTTP(w http.ResponseWriter, r *http.Request) { } func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, response *http.Response, requestModel string, anthropicBetaHeader string, messagesCount int, username string) { + weeklyCycleHint := extractWeeklyCycleHint(response.Header) mediaType, _, err := mime.ParseMediaType(response.Header.Get("Content-Type")) isStreaming := err == nil && mediaType == "text/event-stream" @@ -417,7 +448,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons if usage.InputTokens > 0 || usage.OutputTokens > 0 { if responseModel != "" { contextWindow := detectContextWindow(anthropicBetaHeader, usage.InputTokens) - s.usageTracker.AddUsage( + s.usageTracker.AddUsageWithCycleHint( responseModel, contextWindow, messagesCount, @@ -425,7 +456,11 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons usage.OutputTokens, usage.CacheReadInputTokens, usage.CacheCreationInputTokens, + usage.CacheCreation.Ephemeral5mInputTokens, + usage.CacheCreation.Ephemeral1hInputTokens, username, + time.Now(), + weeklyCycleHint, ) } } @@ -485,6 +520,8 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons accumulatedUsage.InputTokens = messageStart.Message.Usage.InputTokens accumulatedUsage.CacheReadInputTokens = messageStart.Message.Usage.CacheReadInputTokens accumulatedUsage.CacheCreationInputTokens = messageStart.Message.Usage.CacheCreationInputTokens + accumulatedUsage.CacheCreation.Ephemeral5mInputTokens = messageStart.Message.Usage.CacheCreation.Ephemeral5mInputTokens + accumulatedUsage.CacheCreation.Ephemeral1hInputTokens = messageStart.Message.Usage.CacheCreation.Ephemeral1hInputTokens } case "message_delta": messageDelta := event.AsMessageDelta() @@ -511,7 +548,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons if accumulatedUsage.InputTokens > 0 || accumulatedUsage.OutputTokens > 0 { if responseModel != "" { contextWindow := detectContextWindow(anthropicBetaHeader, accumulatedUsage.InputTokens) - s.usageTracker.AddUsage( + s.usageTracker.AddUsageWithCycleHint( responseModel, contextWindow, messagesCount, @@ -519,7 +556,11 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons accumulatedUsage.OutputTokens, accumulatedUsage.CacheReadInputTokens, accumulatedUsage.CacheCreationInputTokens, + accumulatedUsage.CacheCreation.Ephemeral5mInputTokens, + accumulatedUsage.CacheCreation.Ephemeral1hInputTokens, username, + time.Now(), + weeklyCycleHint, ) } } diff --git a/service/ccm/service_usage.go b/service/ccm/service_usage.go index 7d39e3ce..7d776774 100644 --- a/service/ccm/service_usage.go +++ b/service/ccm/service_usage.go @@ -2,6 +2,7 @@ package ccm import ( "encoding/json" + "fmt" "math" "os" "regexp" @@ -13,17 +14,20 @@ import ( ) type UsageStats struct { - RequestCount int `json:"request_count"` - MessagesCount int `json:"messages_count"` - InputTokens int64 `json:"input_tokens"` - OutputTokens int64 `json:"output_tokens"` - CacheReadInputTokens int64 `json:"cache_read_input_tokens"` - CacheCreationInputTokens int64 `json:"cache_creation_input_tokens"` + RequestCount int `json:"request_count"` + MessagesCount int `json:"messages_count"` + InputTokens int64 `json:"input_tokens"` + OutputTokens int64 `json:"output_tokens"` + CacheReadInputTokens int64 `json:"cache_read_input_tokens"` + CacheCreationInputTokens int64 `json:"cache_creation_input_tokens"` + CacheCreation5MinuteInputTokens int64 `json:"cache_creation_5m_input_tokens,omitempty"` + CacheCreation1HourInputTokens int64 `json:"cache_creation_1h_input_tokens,omitempty"` } type CostCombination struct { Model string `json:"model"` ContextWindow int `json:"context_window"` + WeekStartUnix int64 `json:"week_start_unix,omitempty"` Total UsageStats `json:"total"` ByUser map[string]UsageStats `json:"by_user"` } @@ -41,18 +45,21 @@ type AggregatedUsage struct { } type UsageStatsJSON struct { - RequestCount int `json:"request_count"` - MessagesCount int `json:"messages_count"` - InputTokens int64 `json:"input_tokens"` - OutputTokens int64 `json:"output_tokens"` - CacheReadInputTokens int64 `json:"cache_read_input_tokens"` - CacheCreationInputTokens int64 `json:"cache_creation_input_tokens"` - CostUSD float64 `json:"cost_usd"` + RequestCount int `json:"request_count"` + MessagesCount int `json:"messages_count"` + InputTokens int64 `json:"input_tokens"` + OutputTokens int64 `json:"output_tokens"` + CacheReadInputTokens int64 `json:"cache_read_input_tokens"` + CacheCreationInputTokens int64 `json:"cache_creation_input_tokens"` + CacheCreation5MinuteInputTokens int64 `json:"cache_creation_5m_input_tokens,omitempty"` + CacheCreation1HourInputTokens int64 `json:"cache_creation_1h_input_tokens,omitempty"` + CostUSD float64 `json:"cost_usd"` } type CostCombinationJSON struct { Model string `json:"model"` ContextWindow int `json:"context_window"` + WeekStartUnix int64 `json:"week_start_unix,omitempty"` Total UsageStatsJSON `json:"total"` ByUser map[string]UsageStatsJSON `json:"by_user"` } @@ -60,6 +67,7 @@ type CostCombinationJSON struct { type CostsSummaryJSON struct { TotalUSD float64 `json:"total_usd"` ByUser map[string]float64 `json:"by_user"` + ByWeek map[string]float64 `json:"by_week,omitempty"` } type AggregatedUsageJSON struct { @@ -68,11 +76,17 @@ type AggregatedUsageJSON struct { Combinations []CostCombinationJSON `json:"combinations"` } +type WeeklyCycleHint struct { + WindowMinutes int64 + ResetAt time.Time +} + type ModelPricing struct { - InputPrice float64 - OutputPrice float64 - CacheReadPrice float64 - CacheWritePrice float64 + InputPrice float64 + OutputPrice float64 + CacheReadPrice float64 + CacheWritePrice5Minute float64 + CacheWritePrice1Hour float64 } type modelFamily struct { @@ -82,143 +96,205 @@ type modelFamily struct { } var ( - opus4Pricing = ModelPricing{ - InputPrice: 15.0, - OutputPrice: 75.0, - CacheReadPrice: 1.5, - CacheWritePrice: 18.75, + opus46StandardPricing = ModelPricing{ + InputPrice: 5.0, + OutputPrice: 25.0, + CacheReadPrice: 0.5, + CacheWritePrice5Minute: 6.25, + CacheWritePrice1Hour: 10.0, } - sonnet4StandardPricing = ModelPricing{ - InputPrice: 3.0, - OutputPrice: 15.0, - CacheReadPrice: 0.3, - CacheWritePrice: 3.75, - } - - sonnet4PremiumPricing = ModelPricing{ - InputPrice: 6.0, - OutputPrice: 22.5, - CacheReadPrice: 0.6, - CacheWritePrice: 7.5, - } - - haiku4Pricing = ModelPricing{ - InputPrice: 1.0, - OutputPrice: 5.0, - CacheReadPrice: 0.1, - CacheWritePrice: 1.25, - } - - haiku35Pricing = ModelPricing{ - InputPrice: 0.8, - OutputPrice: 4.0, - CacheReadPrice: 0.08, - CacheWritePrice: 1.0, - } - - sonnet35Pricing = ModelPricing{ - InputPrice: 3.0, - OutputPrice: 15.0, - CacheReadPrice: 0.3, - CacheWritePrice: 3.75, + opus46PremiumPricing = ModelPricing{ + InputPrice: 10.0, + OutputPrice: 37.5, + CacheReadPrice: 1.0, + CacheWritePrice5Minute: 12.5, + CacheWritePrice1Hour: 20.0, } opus45Pricing = ModelPricing{ - InputPrice: 5.0, - OutputPrice: 25.0, - CacheReadPrice: 0.5, - CacheWritePrice: 6.25, + InputPrice: 5.0, + OutputPrice: 25.0, + CacheReadPrice: 0.5, + CacheWritePrice5Minute: 6.25, + CacheWritePrice1Hour: 10.0, + } + + opus4Pricing = ModelPricing{ + InputPrice: 15.0, + OutputPrice: 75.0, + CacheReadPrice: 1.5, + CacheWritePrice5Minute: 18.75, + CacheWritePrice1Hour: 30.0, + } + + sonnet46StandardPricing = ModelPricing{ + InputPrice: 3.0, + OutputPrice: 15.0, + CacheReadPrice: 0.3, + CacheWritePrice5Minute: 3.75, + CacheWritePrice1Hour: 6.0, + } + + sonnet46PremiumPricing = ModelPricing{ + InputPrice: 6.0, + OutputPrice: 22.5, + CacheReadPrice: 0.6, + CacheWritePrice5Minute: 7.5, + CacheWritePrice1Hour: 12.0, } sonnet45StandardPricing = ModelPricing{ - InputPrice: 3.0, - OutputPrice: 15.0, - CacheReadPrice: 0.3, - CacheWritePrice: 3.75, + InputPrice: 3.0, + OutputPrice: 15.0, + CacheReadPrice: 0.3, + CacheWritePrice5Minute: 3.75, + CacheWritePrice1Hour: 6.0, } sonnet45PremiumPricing = ModelPricing{ - InputPrice: 6.0, - OutputPrice: 22.5, - CacheReadPrice: 0.6, - CacheWritePrice: 7.5, + InputPrice: 6.0, + OutputPrice: 22.5, + CacheReadPrice: 0.6, + CacheWritePrice5Minute: 7.5, + CacheWritePrice1Hour: 12.0, + } + + sonnet4StandardPricing = ModelPricing{ + InputPrice: 3.0, + OutputPrice: 15.0, + CacheReadPrice: 0.3, + CacheWritePrice5Minute: 3.75, + CacheWritePrice1Hour: 6.0, + } + + sonnet4PremiumPricing = ModelPricing{ + InputPrice: 6.0, + OutputPrice: 22.5, + CacheReadPrice: 0.6, + CacheWritePrice5Minute: 7.5, + CacheWritePrice1Hour: 12.0, + } + + sonnet37Pricing = ModelPricing{ + InputPrice: 3.0, + OutputPrice: 15.0, + CacheReadPrice: 0.3, + CacheWritePrice5Minute: 3.75, + CacheWritePrice1Hour: 6.0, + } + + sonnet35Pricing = ModelPricing{ + InputPrice: 3.0, + OutputPrice: 15.0, + CacheReadPrice: 0.3, + CacheWritePrice5Minute: 3.75, + CacheWritePrice1Hour: 6.0, } haiku45Pricing = ModelPricing{ - InputPrice: 1.0, - OutputPrice: 5.0, - CacheReadPrice: 0.1, - CacheWritePrice: 1.25, + InputPrice: 1.0, + OutputPrice: 5.0, + CacheReadPrice: 0.1, + CacheWritePrice5Minute: 1.25, + CacheWritePrice1Hour: 2.0, + } + + haiku4Pricing = ModelPricing{ + InputPrice: 1.0, + OutputPrice: 5.0, + CacheReadPrice: 0.1, + CacheWritePrice5Minute: 1.25, + CacheWritePrice1Hour: 2.0, + } + + haiku35Pricing = ModelPricing{ + InputPrice: 0.8, + OutputPrice: 4.0, + CacheReadPrice: 0.08, + CacheWritePrice5Minute: 1.0, + CacheWritePrice1Hour: 1.6, } haiku3Pricing = ModelPricing{ - InputPrice: 0.25, - OutputPrice: 1.25, - CacheReadPrice: 0.03, - CacheWritePrice: 0.3, + InputPrice: 0.25, + OutputPrice: 1.25, + CacheReadPrice: 0.03, + CacheWritePrice5Minute: 0.3, + CacheWritePrice1Hour: 0.5, } opus3Pricing = ModelPricing{ - InputPrice: 15.0, - OutputPrice: 75.0, - CacheReadPrice: 1.5, - CacheWritePrice: 18.75, + InputPrice: 15.0, + OutputPrice: 75.0, + CacheReadPrice: 1.5, + CacheWritePrice5Minute: 18.75, + CacheWritePrice1Hour: 30.0, } modelFamilies = []modelFamily{ { - pattern: regexp.MustCompile(`^claude-opus-4-5-`), + pattern: regexp.MustCompile(`^claude-opus-4-6(?:-|$)`), + standardPricing: opus46StandardPricing, + premiumPricing: &opus46PremiumPricing, + }, + { + pattern: regexp.MustCompile(`^claude-opus-4-5(?:-|$)`), standardPricing: opus45Pricing, premiumPricing: nil, }, { - pattern: regexp.MustCompile(`^claude-(?:opus-4-|4-opus-|opus-4-1-)`), + pattern: regexp.MustCompile(`^claude-(?:opus-4(?:-|$)|4-opus-)`), standardPricing: opus4Pricing, premiumPricing: nil, }, { - pattern: regexp.MustCompile(`^claude-(?:opus-3-|3-opus-)`), + pattern: regexp.MustCompile(`^claude-(?:opus-3(?:-|$)|3-opus-)`), standardPricing: opus3Pricing, premiumPricing: nil, }, { - pattern: regexp.MustCompile(`^claude-(?:sonnet-4-5-|4-5-sonnet-)`), + pattern: regexp.MustCompile(`^claude-(?:sonnet-4-6(?:-|$)|4-6-sonnet-)`), + standardPricing: sonnet46StandardPricing, + premiumPricing: &sonnet46PremiumPricing, + }, + { + pattern: regexp.MustCompile(`^claude-(?:sonnet-4-5(?:-|$)|4-5-sonnet-)`), standardPricing: sonnet45StandardPricing, premiumPricing: &sonnet45PremiumPricing, }, { - pattern: regexp.MustCompile(`^claude-3-7-sonnet-`), + pattern: regexp.MustCompile(`^claude-(?:sonnet-4(?:-|$)|4-sonnet-)`), standardPricing: sonnet4StandardPricing, premiumPricing: &sonnet4PremiumPricing, }, { - pattern: regexp.MustCompile(`^claude-(?:sonnet-4-|4-sonnet-)`), - standardPricing: sonnet4StandardPricing, - premiumPricing: &sonnet4PremiumPricing, + pattern: regexp.MustCompile(`^claude-3-7-sonnet(?:-|$)`), + standardPricing: sonnet37Pricing, + premiumPricing: nil, }, { - pattern: regexp.MustCompile(`^claude-3-5-sonnet-`), + pattern: regexp.MustCompile(`^claude-3-5-sonnet(?:-|$)`), standardPricing: sonnet35Pricing, premiumPricing: nil, }, { - pattern: regexp.MustCompile(`^claude-(?:haiku-4-5-|4-5-haiku-)`), + pattern: regexp.MustCompile(`^claude-(?:haiku-4-5(?:-|$)|4-5-haiku-)`), standardPricing: haiku45Pricing, premiumPricing: nil, }, { - pattern: regexp.MustCompile(`^claude-haiku-4-`), + pattern: regexp.MustCompile(`^claude-haiku-4(?:-|$)`), standardPricing: haiku4Pricing, premiumPricing: nil, }, { - pattern: regexp.MustCompile(`^claude-3-5-haiku-`), + pattern: regexp.MustCompile(`^claude-3-5-haiku(?:-|$)`), standardPricing: haiku35Pricing, premiumPricing: nil, }, { - pattern: regexp.MustCompile(`^claude-3-haiku-`), + pattern: regexp.MustCompile(`^claude-3-haiku(?:-|$)`), standardPricing: haiku3Pricing, premiumPricing: nil, }, @@ -243,68 +319,211 @@ func getPricing(model string, contextWindow int) ModelPricing { func calculateCost(stats UsageStats, model string, contextWindow int) float64 { pricing := getPricing(model, contextWindow) + cacheCreationCost := 0.0 + if stats.CacheCreation5MinuteInputTokens > 0 || stats.CacheCreation1HourInputTokens > 0 { + cacheCreationCost = float64(stats.CacheCreation5MinuteInputTokens)*pricing.CacheWritePrice5Minute + + float64(stats.CacheCreation1HourInputTokens)*pricing.CacheWritePrice1Hour + } else { + // Backward compatibility for usage files generated before TTL split tracking. + cacheCreationCost = float64(stats.CacheCreationInputTokens) * pricing.CacheWritePrice5Minute + } + cost := (float64(stats.InputTokens)*pricing.InputPrice + float64(stats.OutputTokens)*pricing.OutputPrice + float64(stats.CacheReadInputTokens)*pricing.CacheReadPrice + - float64(stats.CacheCreationInputTokens)*pricing.CacheWritePrice) / 1_000_000 + cacheCreationCost) / 1_000_000 return math.Round(cost*100) / 100 } +func roundCost(cost float64) float64 { + return math.Round(cost*100) / 100 +} + +func normalizeCombinations(combinations []CostCombination) { + for index := range combinations { + if combinations[index].ByUser == nil { + combinations[index].ByUser = make(map[string]UsageStats) + } + } +} + +func addUsageToCombinations( + combinations *[]CostCombination, + model string, + contextWindow int, + weekStartUnix int64, + messagesCount int, + inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens, cacheCreation5MinuteTokens, cacheCreation1HourTokens int64, + user string, +) { + var matchedCombination *CostCombination + for index := range *combinations { + combination := &(*combinations)[index] + if combination.Model == model && combination.ContextWindow == contextWindow && combination.WeekStartUnix == weekStartUnix { + matchedCombination = combination + break + } + } + + if matchedCombination == nil { + newCombination := CostCombination{ + Model: model, + ContextWindow: contextWindow, + WeekStartUnix: weekStartUnix, + Total: UsageStats{}, + ByUser: make(map[string]UsageStats), + } + *combinations = append(*combinations, newCombination) + matchedCombination = &(*combinations)[len(*combinations)-1] + } + + if cacheCreationTokens == 0 { + cacheCreationTokens = cacheCreation5MinuteTokens + cacheCreation1HourTokens + } + + matchedCombination.Total.RequestCount++ + matchedCombination.Total.MessagesCount += messagesCount + matchedCombination.Total.InputTokens += inputTokens + matchedCombination.Total.OutputTokens += outputTokens + matchedCombination.Total.CacheReadInputTokens += cacheReadTokens + matchedCombination.Total.CacheCreationInputTokens += cacheCreationTokens + matchedCombination.Total.CacheCreation5MinuteInputTokens += cacheCreation5MinuteTokens + matchedCombination.Total.CacheCreation1HourInputTokens += cacheCreation1HourTokens + + if user != "" { + userStats := matchedCombination.ByUser[user] + userStats.RequestCount++ + userStats.MessagesCount += messagesCount + userStats.InputTokens += inputTokens + userStats.OutputTokens += outputTokens + userStats.CacheReadInputTokens += cacheReadTokens + userStats.CacheCreationInputTokens += cacheCreationTokens + userStats.CacheCreation5MinuteInputTokens += cacheCreation5MinuteTokens + userStats.CacheCreation1HourInputTokens += cacheCreation1HourTokens + matchedCombination.ByUser[user] = userStats + } +} + +func buildCombinationJSON(combinations []CostCombination, aggregateUserCosts map[string]float64) ([]CostCombinationJSON, float64) { + result := make([]CostCombinationJSON, len(combinations)) + var totalCost float64 + + for index, combination := range combinations { + combinationTotalCost := calculateCost(combination.Total, combination.Model, combination.ContextWindow) + totalCost += combinationTotalCost + + combinationJSON := CostCombinationJSON{ + Model: combination.Model, + ContextWindow: combination.ContextWindow, + WeekStartUnix: combination.WeekStartUnix, + Total: UsageStatsJSON{ + RequestCount: combination.Total.RequestCount, + MessagesCount: combination.Total.MessagesCount, + InputTokens: combination.Total.InputTokens, + OutputTokens: combination.Total.OutputTokens, + CacheReadInputTokens: combination.Total.CacheReadInputTokens, + CacheCreationInputTokens: combination.Total.CacheCreationInputTokens, + CacheCreation5MinuteInputTokens: combination.Total.CacheCreation5MinuteInputTokens, + CacheCreation1HourInputTokens: combination.Total.CacheCreation1HourInputTokens, + CostUSD: combinationTotalCost, + }, + ByUser: make(map[string]UsageStatsJSON), + } + + for user, userStats := range combination.ByUser { + userCost := calculateCost(userStats, combination.Model, combination.ContextWindow) + if aggregateUserCosts != nil { + aggregateUserCosts[user] += userCost + } + + combinationJSON.ByUser[user] = UsageStatsJSON{ + RequestCount: userStats.RequestCount, + MessagesCount: userStats.MessagesCount, + InputTokens: userStats.InputTokens, + OutputTokens: userStats.OutputTokens, + CacheReadInputTokens: userStats.CacheReadInputTokens, + CacheCreationInputTokens: userStats.CacheCreationInputTokens, + CacheCreation5MinuteInputTokens: userStats.CacheCreation5MinuteInputTokens, + CacheCreation1HourInputTokens: userStats.CacheCreation1HourInputTokens, + CostUSD: userCost, + } + } + + result[index] = combinationJSON + } + + return result, roundCost(totalCost) +} + +func formatUTCOffsetLabel(timestamp time.Time) string { + _, offsetSeconds := timestamp.Zone() + sign := "+" + if offsetSeconds < 0 { + sign = "-" + offsetSeconds = -offsetSeconds + } + offsetHours := offsetSeconds / 3600 + offsetMinutes := (offsetSeconds % 3600) / 60 + if offsetMinutes == 0 { + return fmt.Sprintf("UTC%s%d", sign, offsetHours) + } + return fmt.Sprintf("UTC%s%d:%02d", sign, offsetHours, offsetMinutes) +} + +func formatWeekStartKey(cycleStartAt time.Time) string { + localCycleStart := cycleStartAt.In(time.Local) + return fmt.Sprintf("%s %s", localCycleStart.Format("2006-01-02 15:04:05"), formatUTCOffsetLabel(localCycleStart)) +} + +func buildByWeekCost(combinations []CostCombination) map[string]float64 { + byWeek := make(map[string]float64) + for _, combination := range combinations { + if combination.WeekStartUnix <= 0 { + continue + } + weekStartAt := time.Unix(combination.WeekStartUnix, 0).UTC() + weekKey := formatWeekStartKey(weekStartAt) + byWeek[weekKey] += calculateCost(combination.Total, combination.Model, combination.ContextWindow) + } + for weekKey, weekCost := range byWeek { + byWeek[weekKey] = roundCost(weekCost) + } + return byWeek +} + +func deriveWeekStartUnix(cycleHint *WeeklyCycleHint) int64 { + if cycleHint == nil || cycleHint.WindowMinutes <= 0 || cycleHint.ResetAt.IsZero() { + return 0 + } + windowDuration := time.Duration(cycleHint.WindowMinutes) * time.Minute + return cycleHint.ResetAt.UTC().Add(-windowDuration).Unix() +} + func (u *AggregatedUsage) ToJSON() *AggregatedUsageJSON { u.mutex.Lock() defer u.mutex.Unlock() result := &AggregatedUsageJSON{ - LastUpdated: u.LastUpdated, - Combinations: make([]CostCombinationJSON, len(u.Combinations)), + LastUpdated: u.LastUpdated, Costs: CostsSummaryJSON{ TotalUSD: 0, ByUser: make(map[string]float64), + ByWeek: make(map[string]float64), }, } - for i, combo := range u.Combinations { - totalCost := calculateCost(combo.Total, combo.Model, combo.ContextWindow) + globalCombinationsJSON, totalCost := buildCombinationJSON(u.Combinations, result.Costs.ByUser) + result.Combinations = globalCombinationsJSON + result.Costs.TotalUSD = totalCost + result.Costs.ByWeek = buildByWeekCost(u.Combinations) - result.Costs.TotalUSD += totalCost - - comboJSON := CostCombinationJSON{ - Model: combo.Model, - ContextWindow: combo.ContextWindow, - Total: UsageStatsJSON{ - RequestCount: combo.Total.RequestCount, - MessagesCount: combo.Total.MessagesCount, - InputTokens: combo.Total.InputTokens, - OutputTokens: combo.Total.OutputTokens, - CacheReadInputTokens: combo.Total.CacheReadInputTokens, - CacheCreationInputTokens: combo.Total.CacheCreationInputTokens, - CostUSD: totalCost, - }, - ByUser: make(map[string]UsageStatsJSON), - } - - for user, userStats := range combo.ByUser { - userCost := calculateCost(userStats, combo.Model, combo.ContextWindow) - result.Costs.ByUser[user] += userCost - - comboJSON.ByUser[user] = UsageStatsJSON{ - RequestCount: userStats.RequestCount, - MessagesCount: userStats.MessagesCount, - InputTokens: userStats.InputTokens, - OutputTokens: userStats.OutputTokens, - CacheReadInputTokens: userStats.CacheReadInputTokens, - CacheCreationInputTokens: userStats.CacheCreationInputTokens, - CostUSD: userCost, - } - } - - result.Combinations[i] = comboJSON + if len(result.Costs.ByWeek) == 0 { + result.Costs.ByWeek = nil } - result.Costs.TotalUSD = math.Round(result.Costs.TotalUSD*100) / 100 for user, cost := range result.Costs.ByUser { - result.Costs.ByUser[user] = math.Round(cost*100) / 100 + result.Costs.ByUser[user] = roundCost(cost) } return result @@ -314,6 +533,9 @@ func (u *AggregatedUsage) Load() error { u.mutex.Lock() defer u.mutex.Unlock() + u.LastUpdated = time.Time{} + u.Combinations = nil + data, err := os.ReadFile(u.filePath) if err != nil { if os.IsNotExist(err) { @@ -334,12 +556,7 @@ func (u *AggregatedUsage) Load() error { u.LastUpdated = temp.LastUpdated u.Combinations = temp.Combinations - - for i := range u.Combinations { - if u.Combinations[i].ByUser == nil { - u.Combinations[i].ByUser = make(map[string]UsageStats) - } - } + normalizeCombinations(u.Combinations) return nil } @@ -367,58 +584,42 @@ func (u *AggregatedUsage) Save() error { return err } -func (u *AggregatedUsage) AddUsage(model string, contextWindow int, messagesCount int, inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens int64, user string) error { +func (u *AggregatedUsage) AddUsage( + model string, + contextWindow int, + messagesCount int, + inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens, cacheCreation5MinuteTokens, cacheCreation1HourTokens int64, + user string, +) error { + return u.AddUsageWithCycleHint(model, contextWindow, messagesCount, inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens, cacheCreation5MinuteTokens, cacheCreation1HourTokens, user, time.Now(), nil) +} + +func (u *AggregatedUsage) AddUsageWithCycleHint( + model string, + contextWindow int, + messagesCount int, + inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens, cacheCreation5MinuteTokens, cacheCreation1HourTokens int64, + user string, + observedAt time.Time, + cycleHint *WeeklyCycleHint, +) error { if model == "" { return E.New("model cannot be empty") } if contextWindow <= 0 { return E.New("contextWindow must be positive") } + if observedAt.IsZero() { + observedAt = time.Now() + } u.mutex.Lock() defer u.mutex.Unlock() - u.LastUpdated = time.Now() + u.LastUpdated = observedAt + weekStartUnix := deriveWeekStartUnix(cycleHint) - // Find or create combination - var combo *CostCombination - for i := range u.Combinations { - if u.Combinations[i].Model == model && u.Combinations[i].ContextWindow == contextWindow { - combo = &u.Combinations[i] - break - } - } - - if combo == nil { - newCombo := CostCombination{ - Model: model, - ContextWindow: contextWindow, - Total: UsageStats{}, - ByUser: make(map[string]UsageStats), - } - u.Combinations = append(u.Combinations, newCombo) - combo = &u.Combinations[len(u.Combinations)-1] - } - - // Update total stats - combo.Total.RequestCount++ - combo.Total.MessagesCount += messagesCount - combo.Total.InputTokens += inputTokens - combo.Total.OutputTokens += outputTokens - combo.Total.CacheReadInputTokens += cacheReadTokens - combo.Total.CacheCreationInputTokens += cacheCreationTokens - - // Update per-user stats if user is specified - if user != "" { - userStats := combo.ByUser[user] - userStats.RequestCount++ - userStats.MessagesCount += messagesCount - userStats.InputTokens += inputTokens - userStats.OutputTokens += outputTokens - userStats.CacheReadInputTokens += cacheReadTokens - userStats.CacheCreationInputTokens += cacheCreationTokens - combo.ByUser[user] = userStats - } + addUsageToCombinations(&u.Combinations, model, contextWindow, weekStartUnix, messagesCount, inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens, cacheCreation5MinuteTokens, cacheCreation1HourTokens, user) go u.scheduleSave() diff --git a/service/ocm/service.go b/service/ocm/service.go index fc655f67..2354d159 100644 --- a/service/ocm/service.go +++ b/service/ocm/service.go @@ -10,6 +10,7 @@ import ( "mime" "net" "net/http" + "strconv" "strings" "sync" "time" @@ -71,6 +72,57 @@ func isHopByHopHeader(header string) bool { } } +func normalizeRateLimitIdentifier(limitIdentifier string) string { + trimmedIdentifier := strings.TrimSpace(strings.ToLower(limitIdentifier)) + if trimmedIdentifier == "" { + return "" + } + return strings.ReplaceAll(trimmedIdentifier, "_", "-") +} + +func parseInt64Header(headers http.Header, headerName string) (int64, bool) { + headerValue := strings.TrimSpace(headers.Get(headerName)) + if headerValue == "" { + return 0, false + } + parsedValue, parseError := strconv.ParseInt(headerValue, 10, 64) + if parseError != nil { + return 0, false + } + return parsedValue, true +} + +func weeklyCycleHintForLimit(headers http.Header, limitIdentifier string) *WeeklyCycleHint { + normalizedLimitIdentifier := normalizeRateLimitIdentifier(limitIdentifier) + if normalizedLimitIdentifier == "" { + return nil + } + + windowHeader := "x-" + normalizedLimitIdentifier + "-secondary-window-minutes" + resetHeader := "x-" + normalizedLimitIdentifier + "-secondary-reset-at" + + windowMinutes, hasWindowMinutes := parseInt64Header(headers, windowHeader) + resetAtUnix, hasResetAt := parseInt64Header(headers, resetHeader) + if !hasWindowMinutes || !hasResetAt || windowMinutes <= 0 || resetAtUnix <= 0 { + return nil + } + + return &WeeklyCycleHint{ + WindowMinutes: windowMinutes, + ResetAt: time.Unix(resetAtUnix, 0).UTC(), + } +} + +func extractWeeklyCycleHint(headers http.Header) *WeeklyCycleHint { + activeLimitIdentifier := normalizeRateLimitIdentifier(headers.Get("x-codex-active-limit")) + if activeLimitIdentifier != "" { + if activeHint := weeklyCycleHintForLimit(headers, activeLimitIdentifier); activeHint != nil { + return activeHint + } + } + return weeklyCycleHintForLimit(headers, "codex") +} + type Service struct { boxService.Adapter ctx context.Context @@ -404,9 +456,12 @@ func (s *Service) ServeHTTP(w http.ResponseWriter, r *http.Request) { func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, response *http.Response, path string, requestModel string, username string) { isChatCompletions := path == "/v1/chat/completions" + weeklyCycleHint := extractWeeklyCycleHint(response.Header) mediaType, _, err := mime.ParseMediaType(response.Header.Get("Content-Type")) isStreaming := err == nil && mediaType == "text/event-stream" - + if !isStreaming && !isChatCompletions && response.Header.Get("Content-Type") == "" { + isStreaming = true + } if !isStreaming { bodyBytes, err := io.ReadAll(response.Body) if err != nil { @@ -414,13 +469,14 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons return } - var responseModel string + var responseModel, serviceTier string var inputTokens, outputTokens, cachedTokens int64 if isChatCompletions { var chatCompletion openai.ChatCompletion if json.Unmarshal(bodyBytes, &chatCompletion) == nil { responseModel = chatCompletion.Model + serviceTier = string(chatCompletion.ServiceTier) inputTokens = chatCompletion.Usage.PromptTokens outputTokens = chatCompletion.Usage.CompletionTokens cachedTokens = chatCompletion.Usage.PromptTokensDetails.CachedTokens @@ -429,6 +485,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons var responsesResponse responses.Response if json.Unmarshal(bodyBytes, &responsesResponse) == nil { responseModel = string(responsesResponse.Model) + serviceTier = string(responsesResponse.ServiceTier) inputTokens = responsesResponse.Usage.InputTokens outputTokens = responsesResponse.Usage.OutputTokens cachedTokens = responsesResponse.Usage.InputTokensDetails.CachedTokens @@ -440,7 +497,16 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons responseModel = requestModel } if responseModel != "" { - s.usageTracker.AddUsage(responseModel, inputTokens, outputTokens, cachedTokens, username) + s.usageTracker.AddUsageWithCycleHint( + responseModel, + inputTokens, + outputTokens, + cachedTokens, + serviceTier, + username, + time.Now(), + weeklyCycleHint, + ) } } @@ -455,7 +521,7 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons } var inputTokens, outputTokens, cachedTokens int64 - var responseModel string + var responseModel, serviceTier string buffer := make([]byte, buf.BufferSize) var leftover []byte @@ -490,6 +556,9 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons if chatChunk.Model != "" { responseModel = chatChunk.Model } + if chatChunk.ServiceTier != "" { + serviceTier = string(chatChunk.ServiceTier) + } if chatChunk.Usage.PromptTokens > 0 { inputTokens = chatChunk.Usage.PromptTokens cachedTokens = chatChunk.Usage.PromptTokensDetails.CachedTokens @@ -506,6 +575,9 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons if string(completedEvent.Response.Model) != "" { responseModel = string(completedEvent.Response.Model) } + if completedEvent.Response.ServiceTier != "" { + serviceTier = string(completedEvent.Response.ServiceTier) + } if completedEvent.Response.Usage.InputTokens > 0 { inputTokens = completedEvent.Response.Usage.InputTokens cachedTokens = completedEvent.Response.Usage.InputTokensDetails.CachedTokens @@ -534,7 +606,16 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons if inputTokens > 0 || outputTokens > 0 { if responseModel != "" { - s.usageTracker.AddUsage(responseModel, inputTokens, outputTokens, cachedTokens, username) + s.usageTracker.AddUsageWithCycleHint( + responseModel, + inputTokens, + outputTokens, + cachedTokens, + serviceTier, + username, + time.Now(), + weeklyCycleHint, + ) } } return diff --git a/service/ocm/service_usage.go b/service/ocm/service_usage.go index 7089f4d3..a4c1d1c8 100644 --- a/service/ocm/service_usage.go +++ b/service/ocm/service_usage.go @@ -2,9 +2,11 @@ package ocm import ( "encoding/json" + "fmt" "math" "os" "regexp" + "strings" "sync" "time" @@ -42,9 +44,11 @@ func (u *UsageStats) UnmarshalJSON(data []byte) error { } type CostCombination struct { - Model string `json:"model"` - Total UsageStats `json:"total"` - ByUser map[string]UsageStats `json:"by_user"` + Model string `json:"model"` + ServiceTier string `json:"service_tier,omitempty"` + WeekStartUnix int64 `json:"week_start_unix,omitempty"` + Total UsageStats `json:"total"` + ByUser map[string]UsageStats `json:"by_user"` } type AggregatedUsage struct { @@ -68,14 +72,17 @@ type UsageStatsJSON struct { } type CostCombinationJSON struct { - Model string `json:"model"` - Total UsageStatsJSON `json:"total"` - ByUser map[string]UsageStatsJSON `json:"by_user"` + Model string `json:"model"` + ServiceTier string `json:"service_tier,omitempty"` + WeekStartUnix int64 `json:"week_start_unix,omitempty"` + Total UsageStatsJSON `json:"total"` + ByUser map[string]UsageStatsJSON `json:"by_user"` } type CostsSummaryJSON struct { TotalUSD float64 `json:"total_usd"` ByUser map[string]float64 `json:"by_user"` + ByWeek map[string]float64 `json:"by_week,omitempty"` } type AggregatedUsageJSON struct { @@ -84,6 +91,11 @@ type AggregatedUsageJSON struct { Combinations []CostCombinationJSON `json:"combinations"` } +type WeeklyCycleHint struct { + WindowMinutes int64 + ResetAt time.Time +} + type ModelPricing struct { InputPrice float64 OutputPrice float64 @@ -95,7 +107,123 @@ type modelFamily struct { pricing ModelPricing } +const ( + serviceTierAuto = "auto" + serviceTierDefault = "default" + serviceTierFlex = "flex" + serviceTierPriority = "priority" + serviceTierScale = "scale" +) + var ( + gpt52Pricing = ModelPricing{ + InputPrice: 1.75, + OutputPrice: 14.0, + CachedInputPrice: 0.175, + } + + gpt5Pricing = ModelPricing{ + InputPrice: 1.25, + OutputPrice: 10.0, + CachedInputPrice: 0.125, + } + + gpt5MiniPricing = ModelPricing{ + InputPrice: 0.25, + OutputPrice: 2.0, + CachedInputPrice: 0.025, + } + + gpt5NanoPricing = ModelPricing{ + InputPrice: 0.05, + OutputPrice: 0.4, + CachedInputPrice: 0.005, + } + + gpt52CodexPricing = ModelPricing{ + InputPrice: 1.75, + OutputPrice: 14.0, + CachedInputPrice: 0.175, + } + + gpt51CodexPricing = ModelPricing{ + InputPrice: 1.25, + OutputPrice: 10.0, + CachedInputPrice: 0.125, + } + + gpt51CodexMiniPricing = ModelPricing{ + InputPrice: 0.25, + OutputPrice: 2.0, + CachedInputPrice: 0.025, + } + + gpt52ProPricing = ModelPricing{ + InputPrice: 21.0, + OutputPrice: 168.0, + CachedInputPrice: 21.0, + } + + gpt5ProPricing = ModelPricing{ + InputPrice: 15.0, + OutputPrice: 120.0, + CachedInputPrice: 15.0, + } + + gpt52FlexPricing = ModelPricing{ + InputPrice: 0.875, + OutputPrice: 7.0, + CachedInputPrice: 0.0875, + } + + gpt5FlexPricing = ModelPricing{ + InputPrice: 0.625, + OutputPrice: 5.0, + CachedInputPrice: 0.0625, + } + + gpt5MiniFlexPricing = ModelPricing{ + InputPrice: 0.125, + OutputPrice: 1.0, + CachedInputPrice: 0.0125, + } + + gpt5NanoFlexPricing = ModelPricing{ + InputPrice: 0.025, + OutputPrice: 0.2, + CachedInputPrice: 0.0025, + } + + gpt52PriorityPricing = ModelPricing{ + InputPrice: 3.5, + OutputPrice: 28.0, + CachedInputPrice: 0.35, + } + + gpt5PriorityPricing = ModelPricing{ + InputPrice: 2.5, + OutputPrice: 20.0, + CachedInputPrice: 0.25, + } + + gpt5MiniPriorityPricing = ModelPricing{ + InputPrice: 0.45, + OutputPrice: 3.6, + CachedInputPrice: 0.045, + } + + gpt52CodexPriorityPricing = ModelPricing{ + InputPrice: 3.5, + OutputPrice: 28.0, + CachedInputPrice: 0.35, + } + + gpt51CodexPriorityPricing = ModelPricing{ + InputPrice: 2.5, + OutputPrice: 20.0, + CachedInputPrice: 0.25, + } + gpt4oPricing = ModelPricing{ InputPrice: 2.5, OutputPrice: 10.0, @@ -111,7 +239,19 @@ var ( gpt4oAudioPricing = ModelPricing{ InputPrice: 2.5, OutputPrice: 10.0, - CachedInputPrice: 1.25, + CachedInputPrice: 2.5, + } + + gpt4oMiniAudioPricing = ModelPricing{ + InputPrice: 0.15, + OutputPrice: 0.6, + CachedInputPrice: 0.15, + } + + gptAudioMiniPricing = ModelPricing{ + InputPrice: 0.6, + OutputPrice: 2.4, + CachedInputPrice: 0.6, } o1Pricing = ModelPricing{ @@ -120,6 +260,12 @@ var ( CachedInputPrice: 7.5, } + o1ProPricing = ModelPricing{ + InputPrice: 150.0, + OutputPrice: 600.0, + CachedInputPrice: 150.0, + } + o1MiniPricing = ModelPricing{ InputPrice: 1.1, OutputPrice: 4.4, @@ -135,13 +281,55 @@ var ( o3Pricing = ModelPricing{ InputPrice: 2.0, OutputPrice: 8.0, - CachedInputPrice: 1.0, + CachedInputPrice: 0.5, + } + + o3ProPricing = ModelPricing{ + InputPrice: 20.0, + OutputPrice: 80.0, + CachedInputPrice: 20.0, + } + + o3DeepResearchPricing = ModelPricing{ + InputPrice: 10.0, + OutputPrice: 40.0, + CachedInputPrice: 2.5, } o4MiniPricing = ModelPricing{ InputPrice: 1.1, OutputPrice: 4.4, - CachedInputPrice: 0.55, + CachedInputPrice: 0.275, + } + + o4MiniDeepResearchPricing = ModelPricing{ + InputPrice: 2.0, + OutputPrice: 8.0, + CachedInputPrice: 0.5, + } + + o3FlexPricing = ModelPricing{ + InputPrice: 1.0, + OutputPrice: 4.0, + CachedInputPrice: 0.25, + } + + o4MiniFlexPricing = ModelPricing{ + InputPrice: 0.55, + OutputPrice: 2.2, + CachedInputPrice: 0.138, + } + + o3PriorityPricing = ModelPricing{ + InputPrice: 3.5, + OutputPrice: 14.0, + CachedInputPrice: 0.875, + } + + o4MiniPriorityPricing = ModelPricing{ + InputPrice: 2.0, + OutputPrice: 8.0, + CachedInputPrice: 0.5, } gpt41Pricing = ModelPricing{ @@ -162,69 +350,374 @@ var ( CachedInputPrice: 0.025, } - modelFamilies = []modelFamily{ + gpt41PriorityPricing = ModelPricing{ + InputPrice: 3.5, + OutputPrice: 14.0, + CachedInputPrice: 0.875, + } + + gpt41MiniPriorityPricing = ModelPricing{ + InputPrice: 0.7, + OutputPrice: 2.8, + CachedInputPrice: 0.175, + } + + gpt41NanoPriorityPricing = ModelPricing{ + InputPrice: 0.2, + OutputPrice: 0.8, + CachedInputPrice: 0.05, + } + + gpt4oPriorityPricing = ModelPricing{ + InputPrice: 4.25, + OutputPrice: 17.0, + CachedInputPrice: 2.125, + } + + gpt4oMiniPriorityPricing = ModelPricing{ + InputPrice: 0.25, + OutputPrice: 1.0, + CachedInputPrice: 0.125, + } + + standardModelFamilies = []modelFamily{ { - pattern: regexp.MustCompile(`^gpt-4\.1-nano`), - pricing: gpt41NanoPricing, + pattern: regexp.MustCompile(`^gpt-5\.3-codex(?:$|-)`), + pricing: gpt52CodexPricing, }, { - pattern: regexp.MustCompile(`^gpt-4\.1-mini`), - pricing: gpt41MiniPricing, + pattern: regexp.MustCompile(`^gpt-5\.2-codex(?:$|-)`), + pricing: gpt52CodexPricing, }, { - pattern: regexp.MustCompile(`^gpt-4\.1`), - pricing: gpt41Pricing, + pattern: regexp.MustCompile(`^gpt-5\.1-codex-max(?:$|-)`), + pricing: gpt51CodexPricing, }, { - pattern: regexp.MustCompile(`^o4-mini`), + pattern: regexp.MustCompile(`^gpt-5\.1-codex-mini(?:$|-)`), + pricing: gpt51CodexMiniPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.1-codex(?:$|-)`), + pricing: gpt51CodexPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5-codex-mini(?:$|-)`), + pricing: gpt51CodexMiniPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5-codex(?:$|-)`), + pricing: gpt51CodexPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.2-chat-latest$`), + pricing: gpt52Pricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.1-chat-latest$`), + pricing: gpt5Pricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5-chat-latest$`), + pricing: gpt5Pricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.2-pro(?:$|-)`), + pricing: gpt52ProPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5-pro(?:$|-)`), + pricing: gpt5ProPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`), + pricing: gpt5MiniPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5-nano(?:$|-)`), + pricing: gpt5NanoPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.2(?:$|-)`), + pricing: gpt52Pricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.1(?:$|-)`), + pricing: gpt5Pricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5(?:$|-)`), + pricing: gpt5Pricing, + }, + { + pattern: regexp.MustCompile(`^o4-mini-deep-research(?:$|-)`), + pricing: o4MiniDeepResearchPricing, + }, + { + pattern: regexp.MustCompile(`^o4-mini(?:$|-)`), pricing: o4MiniPricing, }, { - pattern: regexp.MustCompile(`^o3-mini`), + pattern: regexp.MustCompile(`^o3-pro(?:$|-)`), + pricing: o3ProPricing, + }, + { + pattern: regexp.MustCompile(`^o3-deep-research(?:$|-)`), + pricing: o3DeepResearchPricing, + }, + { + pattern: regexp.MustCompile(`^o3-mini(?:$|-)`), pricing: o3MiniPricing, }, { - pattern: regexp.MustCompile(`^o3`), + pattern: regexp.MustCompile(`^o3(?:$|-)`), pricing: o3Pricing, }, { - pattern: regexp.MustCompile(`^o1-mini`), + pattern: regexp.MustCompile(`^o1-pro(?:$|-)`), + pricing: o1ProPricing, + }, + { + pattern: regexp.MustCompile(`^o1-mini(?:$|-)`), pricing: o1MiniPricing, }, { - pattern: regexp.MustCompile(`^o1`), + pattern: regexp.MustCompile(`^o1(?:$|-)`), pricing: o1Pricing, }, { - pattern: regexp.MustCompile(`^gpt-4o-audio`), + pattern: regexp.MustCompile(`^gpt-4o-mini-audio(?:$|-)`), + pricing: gpt4oMiniAudioPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-audio-mini(?:$|-)`), + pricing: gptAudioMiniPricing, + }, + { + pattern: regexp.MustCompile(`^(?:gpt-4o-audio|gpt-audio)(?:$|-)`), pricing: gpt4oAudioPricing, }, { - pattern: regexp.MustCompile(`^gpt-4o-mini`), + pattern: regexp.MustCompile(`^gpt-4\.1-nano(?:$|-)`), + pricing: gpt41NanoPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-4\.1-mini(?:$|-)`), + pricing: gpt41MiniPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-4\.1(?:$|-)`), + pricing: gpt41Pricing, + }, + { + pattern: regexp.MustCompile(`^gpt-4o-mini(?:$|-)`), pricing: gpt4oMiniPricing, }, { - pattern: regexp.MustCompile(`^gpt-4o`), + pattern: regexp.MustCompile(`^gpt-4o(?:$|-)`), pricing: gpt4oPricing, }, { - pattern: regexp.MustCompile(`^chatgpt-4o`), + pattern: regexp.MustCompile(`^chatgpt-4o(?:$|-)`), pricing: gpt4oPricing, }, } + + flexModelFamilies = []modelFamily{ + { + pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`), + pricing: gpt5MiniFlexPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5-nano(?:$|-)`), + pricing: gpt5NanoFlexPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.2(?:$|-)`), + pricing: gpt52FlexPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.1(?:$|-)`), + pricing: gpt5FlexPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5(?:$|-)`), + pricing: gpt5FlexPricing, + }, + { + pattern: regexp.MustCompile(`^o4-mini(?:$|-)`), + pricing: o4MiniFlexPricing, + }, + { + pattern: regexp.MustCompile(`^o3(?:$|-)`), + pricing: o3FlexPricing, + }, + } + + priorityModelFamilies = []modelFamily{ + { + pattern: regexp.MustCompile(`^gpt-5\.3-codex(?:$|-)`), + pricing: gpt52CodexPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.2-codex(?:$|-)`), + pricing: gpt52CodexPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.1-codex-max(?:$|-)`), + pricing: gpt51CodexPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.1-codex(?:$|-)`), + pricing: gpt51CodexPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5-codex-mini(?:$|-)`), + pricing: gpt5MiniPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5-codex(?:$|-)`), + pricing: gpt51CodexPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`), + pricing: gpt5MiniPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.2(?:$|-)`), + pricing: gpt52PriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5\.1(?:$|-)`), + pricing: gpt5PriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-5(?:$|-)`), + pricing: gpt5PriorityPricing, + }, + { + pattern: regexp.MustCompile(`^o4-mini(?:$|-)`), + pricing: o4MiniPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^o3(?:$|-)`), + pricing: o3PriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-4\.1-nano(?:$|-)`), + pricing: gpt41NanoPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-4\.1-mini(?:$|-)`), + pricing: gpt41MiniPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-4\.1(?:$|-)`), + pricing: gpt41PriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-4o-mini(?:$|-)`), + pricing: gpt4oMiniPriorityPricing, + }, + { + pattern: regexp.MustCompile(`^gpt-4o(?:$|-)`), + pricing: gpt4oPriorityPricing, + }, + } ) -func getPricing(model string) ModelPricing { +func modelFamiliesForTier(serviceTier string) []modelFamily { + switch serviceTier { + case serviceTierFlex: + return flexModelFamilies + case serviceTierPriority: + return priorityModelFamilies + default: + return standardModelFamilies + } +} + +func findPricingInFamilies(model string, modelFamilies []modelFamily) (ModelPricing, bool) { for _, family := range modelFamilies { if family.pattern.MatchString(model) { - return family.pricing + return family.pricing, true } } + return ModelPricing{}, false +} + +func normalizeServiceTier(serviceTier string) string { + switch strings.ToLower(strings.TrimSpace(serviceTier)) { + case "", serviceTierAuto, serviceTierDefault: + return serviceTierDefault + case serviceTierFlex: + return serviceTierFlex + case serviceTierPriority: + return serviceTierPriority + case serviceTierScale: + // Scale-tier requests are prepaid differently and not listed in this usage file. + return serviceTierDefault + default: + return serviceTierDefault + } +} + +func getPricing(model string, serviceTier string) ModelPricing { + normalizedServiceTier := normalizeServiceTier(serviceTier) + modelFamilies := modelFamiliesForTier(normalizedServiceTier) + + if pricing, found := findPricingInFamilies(model, modelFamilies); found { + return pricing + } + + normalizedModel := normalizeGPT5Model(model) + if normalizedModel != model { + if pricing, found := findPricingInFamilies(normalizedModel, modelFamilies); found { + return pricing + } + } + + if normalizedServiceTier != serviceTierDefault { + if pricing, found := findPricingInFamilies(model, standardModelFamilies); found { + return pricing + } + if normalizedModel != model { + if pricing, found := findPricingInFamilies(normalizedModel, standardModelFamilies); found { + return pricing + } + } + } + return gpt4oPricing } -func calculateCost(stats UsageStats, model string) float64 { - pricing := getPricing(model) +func normalizeGPT5Model(model string) string { + if !strings.HasPrefix(model, "gpt-5.") { + return model + } + + switch { + case strings.Contains(model, "-codex-mini"): + return "gpt-5.1-codex-mini" + case strings.Contains(model, "-codex-max"): + return "gpt-5.1-codex-max" + case strings.Contains(model, "-codex"): + return "gpt-5.3-codex" + case strings.Contains(model, "-chat-latest"): + return "gpt-5.2-chat-latest" + case strings.Contains(model, "-pro"): + return "gpt-5.2-pro" + case strings.Contains(model, "-mini"): + return "gpt-5-mini" + case strings.Contains(model, "-nano"): + return "gpt-5-nano" + default: + return "gpt-5.2" + } +} + +func calculateCost(stats UsageStats, model string, serviceTier string) float64 { + pricing := getPricing(model, serviceTier) regularInputTokens := stats.InputTokens - stats.CachedTokens if regularInputTokens < 0 { @@ -238,41 +731,89 @@ func calculateCost(stats UsageStats, model string) float64 { return math.Round(cost*100) / 100 } -func (u *AggregatedUsage) ToJSON() *AggregatedUsageJSON { - u.mutex.Lock() - defer u.mutex.Unlock() +func roundCost(cost float64) float64 { + return math.Round(cost*100) / 100 +} - result := &AggregatedUsageJSON{ - LastUpdated: u.LastUpdated, - Combinations: make([]CostCombinationJSON, len(u.Combinations)), - Costs: CostsSummaryJSON{ - TotalUSD: 0, - ByUser: make(map[string]float64), - }, +func normalizeCombinations(combinations []CostCombination) { + for index := range combinations { + combinations[index].ServiceTier = normalizeServiceTier(combinations[index].ServiceTier) + if combinations[index].ByUser == nil { + combinations[index].ByUser = make(map[string]UsageStats) + } + } +} + +func addUsageToCombinations(combinations *[]CostCombination, model string, serviceTier string, weekStartUnix int64, user string, inputTokens, outputTokens, cachedTokens int64) { + var matchedCombination *CostCombination + for index := range *combinations { + combination := &(*combinations)[index] + combinationServiceTier := normalizeServiceTier(combination.ServiceTier) + if combination.ServiceTier != combinationServiceTier { + combination.ServiceTier = combinationServiceTier + } + if combination.Model == model && combinationServiceTier == serviceTier && combination.WeekStartUnix == weekStartUnix { + matchedCombination = combination + break + } } - for i, combo := range u.Combinations { - totalCost := calculateCost(combo.Total, combo.Model) + if matchedCombination == nil { + newCombination := CostCombination{ + Model: model, + ServiceTier: serviceTier, + WeekStartUnix: weekStartUnix, + Total: UsageStats{}, + ByUser: make(map[string]UsageStats), + } + *combinations = append(*combinations, newCombination) + matchedCombination = &(*combinations)[len(*combinations)-1] + } - result.Costs.TotalUSD += totalCost + matchedCombination.Total.RequestCount++ + matchedCombination.Total.InputTokens += inputTokens + matchedCombination.Total.OutputTokens += outputTokens + matchedCombination.Total.CachedTokens += cachedTokens - comboJSON := CostCombinationJSON{ - Model: combo.Model, + if user != "" { + userStats := matchedCombination.ByUser[user] + userStats.RequestCount++ + userStats.InputTokens += inputTokens + userStats.OutputTokens += outputTokens + userStats.CachedTokens += cachedTokens + matchedCombination.ByUser[user] = userStats + } +} + +func buildCombinationJSON(combinations []CostCombination, aggregateUserCosts map[string]float64) ([]CostCombinationJSON, float64) { + result := make([]CostCombinationJSON, len(combinations)) + var totalCost float64 + + for index, combination := range combinations { + combinationTotalCost := calculateCost(combination.Total, combination.Model, combination.ServiceTier) + totalCost += combinationTotalCost + + combinationJSON := CostCombinationJSON{ + Model: combination.Model, + ServiceTier: combination.ServiceTier, + WeekStartUnix: combination.WeekStartUnix, Total: UsageStatsJSON{ - RequestCount: combo.Total.RequestCount, - InputTokens: combo.Total.InputTokens, - OutputTokens: combo.Total.OutputTokens, - CachedTokens: combo.Total.CachedTokens, - CostUSD: totalCost, + RequestCount: combination.Total.RequestCount, + InputTokens: combination.Total.InputTokens, + OutputTokens: combination.Total.OutputTokens, + CachedTokens: combination.Total.CachedTokens, + CostUSD: combinationTotalCost, }, ByUser: make(map[string]UsageStatsJSON), } - for user, userStats := range combo.ByUser { - userCost := calculateCost(userStats, combo.Model) - result.Costs.ByUser[user] += userCost + for user, userStats := range combination.ByUser { + userCost := calculateCost(userStats, combination.Model, combination.ServiceTier) + if aggregateUserCosts != nil { + aggregateUserCosts[user] += userCost + } - comboJSON.ByUser[user] = UsageStatsJSON{ + combinationJSON.ByUser[user] = UsageStatsJSON{ RequestCount: userStats.RequestCount, InputTokens: userStats.InputTokens, OutputTokens: userStats.OutputTokens, @@ -281,12 +822,80 @@ func (u *AggregatedUsage) ToJSON() *AggregatedUsageJSON { } } - result.Combinations[i] = comboJSON + result[index] = combinationJSON + } + + return result, roundCost(totalCost) +} + +func formatUTCOffsetLabel(timestamp time.Time) string { + _, offsetSeconds := timestamp.Zone() + sign := "+" + if offsetSeconds < 0 { + sign = "-" + offsetSeconds = -offsetSeconds + } + offsetHours := offsetSeconds / 3600 + offsetMinutes := (offsetSeconds % 3600) / 60 + if offsetMinutes == 0 { + return fmt.Sprintf("UTC%s%d", sign, offsetHours) + } + return fmt.Sprintf("UTC%s%d:%02d", sign, offsetHours, offsetMinutes) +} + +func formatWeekStartKey(cycleStartAt time.Time) string { + localCycleStart := cycleStartAt.In(time.Local) + return fmt.Sprintf("%s %s", localCycleStart.Format("2006-01-02 15:04:05"), formatUTCOffsetLabel(localCycleStart)) +} + +func buildByWeekCost(combinations []CostCombination) map[string]float64 { + byWeek := make(map[string]float64) + for _, combination := range combinations { + if combination.WeekStartUnix <= 0 { + continue + } + weekStartAt := time.Unix(combination.WeekStartUnix, 0).UTC() + weekKey := formatWeekStartKey(weekStartAt) + byWeek[weekKey] += calculateCost(combination.Total, combination.Model, combination.ServiceTier) + } + for weekKey, weekCost := range byWeek { + byWeek[weekKey] = roundCost(weekCost) + } + return byWeek +} + +func deriveWeekStartUnix(cycleHint *WeeklyCycleHint) int64 { + if cycleHint == nil || cycleHint.WindowMinutes <= 0 || cycleHint.ResetAt.IsZero() { + return 0 + } + windowDuration := time.Duration(cycleHint.WindowMinutes) * time.Minute + return cycleHint.ResetAt.UTC().Add(-windowDuration).Unix() +} + +func (u *AggregatedUsage) ToJSON() *AggregatedUsageJSON { + u.mutex.Lock() + defer u.mutex.Unlock() + + result := &AggregatedUsageJSON{ + LastUpdated: u.LastUpdated, + Costs: CostsSummaryJSON{ + TotalUSD: 0, + ByUser: make(map[string]float64), + ByWeek: make(map[string]float64), + }, + } + + globalCombinationsJSON, totalCost := buildCombinationJSON(u.Combinations, result.Costs.ByUser) + result.Combinations = globalCombinationsJSON + result.Costs.TotalUSD = totalCost + result.Costs.ByWeek = buildByWeekCost(u.Combinations) + + if len(result.Costs.ByWeek) == 0 { + result.Costs.ByWeek = nil } - result.Costs.TotalUSD = math.Round(result.Costs.TotalUSD*100) / 100 for user, cost := range result.Costs.ByUser { - result.Costs.ByUser[user] = math.Round(cost*100) / 100 + result.Costs.ByUser[user] = roundCost(cost) } return result @@ -296,6 +905,9 @@ func (u *AggregatedUsage) Load() error { u.mutex.Lock() defer u.mutex.Unlock() + u.LastUpdated = time.Time{} + u.Combinations = nil + data, err := os.ReadFile(u.filePath) if err != nil { if os.IsNotExist(err) { @@ -316,12 +928,7 @@ func (u *AggregatedUsage) Load() error { u.LastUpdated = temp.LastUpdated u.Combinations = temp.Combinations - - for i := range u.Combinations { - if u.Combinations[i].ByUser == nil { - u.Combinations[i].ByUser = make(map[string]UsageStats) - } - } + normalizeCombinations(u.Combinations) return nil } @@ -349,47 +956,27 @@ func (u *AggregatedUsage) Save() error { return err } -func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cachedTokens int64, user string) error { +func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cachedTokens int64, serviceTier string, user string) error { + return u.AddUsageWithCycleHint(model, inputTokens, outputTokens, cachedTokens, serviceTier, user, time.Now(), nil) +} + +func (u *AggregatedUsage) AddUsageWithCycleHint(model string, inputTokens, outputTokens, cachedTokens int64, serviceTier string, user string, observedAt time.Time, cycleHint *WeeklyCycleHint) error { if model == "" { return E.New("model cannot be empty") } + normalizedServiceTier := normalizeServiceTier(serviceTier) + if observedAt.IsZero() { + observedAt = time.Now() + } + u.mutex.Lock() defer u.mutex.Unlock() - u.LastUpdated = time.Now() + u.LastUpdated = observedAt + weekStartUnix := deriveWeekStartUnix(cycleHint) - var combo *CostCombination - for i := range u.Combinations { - if u.Combinations[i].Model == model { - combo = &u.Combinations[i] - break - } - } - - if combo == nil { - newCombo := CostCombination{ - Model: model, - Total: UsageStats{}, - ByUser: make(map[string]UsageStats), - } - u.Combinations = append(u.Combinations, newCombo) - combo = &u.Combinations[len(u.Combinations)-1] - } - - combo.Total.RequestCount++ - combo.Total.InputTokens += inputTokens - combo.Total.OutputTokens += outputTokens - combo.Total.CachedTokens += cachedTokens - - if user != "" { - userStats := combo.ByUser[user] - userStats.RequestCount++ - userStats.InputTokens += inputTokens - userStats.OutputTokens += outputTokens - userStats.CachedTokens += cachedTokens - combo.ByUser[user] = userStats - } + addUsageToCombinations(&u.Combinations, model, normalizedServiceTier, weekStartUnix, user, inputTokens, outputTokens, cachedTokens) go u.scheduleSave()