ccm/ocm: Fix missing metering for 1M context and /fast mode

CCM: Fix 1M context detection - use prefix match for versioned
beta strings (e.g. "context-1m-2025-08-07") and include cache
tokens in the 200K threshold check per Anthropic billing docs.

OCM: Add GPT-5.4 family pricing (standard/priority/flex) with
extended context (>272K) premium pricing support. Add context
window tracking to usage combinations, mirroring CCM's pattern.
Update normalizeGPT5Model defaults to latest known models.
This commit is contained in:
世界
2026-03-11 17:19:52 +08:00
parent a7ee943216
commit 49c450d942
4 changed files with 175 additions and 28 deletions

View File

@@ -281,11 +281,11 @@ func (s *Service) getAccessToken() (string, error) {
return newCredentials.AccessToken, nil return newCredentials.AccessToken, nil
} }
func detectContextWindow(betaHeader string, inputTokens int64) int { func detectContextWindow(betaHeader string, totalInputTokens int64) int {
if inputTokens > premiumContextThreshold { if totalInputTokens > premiumContextThreshold {
features := strings.Split(betaHeader, ",") features := strings.Split(betaHeader, ",")
for _, feature := range features { for _, feature := range features {
if strings.TrimSpace(feature) == "context-1m" { if strings.HasPrefix(strings.TrimSpace(feature), "context-1m") {
return contextWindowPremium return contextWindowPremium
} }
} }
@@ -454,7 +454,8 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
if usage.InputTokens > 0 || usage.OutputTokens > 0 { if usage.InputTokens > 0 || usage.OutputTokens > 0 {
if responseModel != "" { if responseModel != "" {
contextWindow := detectContextWindow(anthropicBetaHeader, usage.InputTokens) totalInputTokens := usage.InputTokens + usage.CacheCreationInputTokens + usage.CacheReadInputTokens
contextWindow := detectContextWindow(anthropicBetaHeader, totalInputTokens)
s.usageTracker.AddUsageWithCycleHint( s.usageTracker.AddUsageWithCycleHint(
responseModel, responseModel,
contextWindow, contextWindow,
@@ -554,7 +555,8 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
if accumulatedUsage.InputTokens > 0 || accumulatedUsage.OutputTokens > 0 { if accumulatedUsage.InputTokens > 0 || accumulatedUsage.OutputTokens > 0 {
if responseModel != "" { if responseModel != "" {
contextWindow := detectContextWindow(anthropicBetaHeader, accumulatedUsage.InputTokens) totalInputTokens := accumulatedUsage.InputTokens + accumulatedUsage.CacheCreationInputTokens + accumulatedUsage.CacheReadInputTokens
contextWindow := detectContextWindow(anthropicBetaHeader, totalInputTokens)
s.usageTracker.AddUsageWithCycleHint( s.usageTracker.AddUsageWithCycleHint(
responseModel, responseModel,
contextWindow, contextWindow,

View File

@@ -507,8 +507,10 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
responseModel = requestModel responseModel = requestModel
} }
if responseModel != "" { if responseModel != "" {
contextWindow := detectContextWindow(responseModel, serviceTier, inputTokens)
s.usageTracker.AddUsageWithCycleHint( s.usageTracker.AddUsageWithCycleHint(
responseModel, responseModel,
contextWindow,
inputTokens, inputTokens,
outputTokens, outputTokens,
cachedTokens, cachedTokens,
@@ -616,8 +618,10 @@ func (s *Service) handleResponseWithTracking(writer http.ResponseWriter, respons
if inputTokens > 0 || outputTokens > 0 { if inputTokens > 0 || outputTokens > 0 {
if responseModel != "" { if responseModel != "" {
contextWindow := detectContextWindow(responseModel, serviceTier, inputTokens)
s.usageTracker.AddUsageWithCycleHint( s.usageTracker.AddUsageWithCycleHint(
responseModel, responseModel,
contextWindow,
inputTokens, inputTokens,
outputTokens, outputTokens,
cachedTokens, cachedTokens,

View File

@@ -46,6 +46,7 @@ func (u *UsageStats) UnmarshalJSON(data []byte) error {
type CostCombination struct { type CostCombination struct {
Model string `json:"model"` Model string `json:"model"`
ServiceTier string `json:"service_tier,omitempty"` ServiceTier string `json:"service_tier,omitempty"`
ContextWindow int `json:"context_window"`
WeekStartUnix int64 `json:"week_start_unix,omitempty"` WeekStartUnix int64 `json:"week_start_unix,omitempty"`
Total UsageStats `json:"total"` Total UsageStats `json:"total"`
ByUser map[string]UsageStats `json:"by_user"` ByUser map[string]UsageStats `json:"by_user"`
@@ -74,6 +75,7 @@ type UsageStatsJSON struct {
type CostCombinationJSON struct { type CostCombinationJSON struct {
Model string `json:"model"` Model string `json:"model"`
ServiceTier string `json:"service_tier,omitempty"` ServiceTier string `json:"service_tier,omitempty"`
ContextWindow int `json:"context_window"`
WeekStartUnix int64 `json:"week_start_unix,omitempty"` WeekStartUnix int64 `json:"week_start_unix,omitempty"`
Total UsageStatsJSON `json:"total"` Total UsageStatsJSON `json:"total"`
ByUser map[string]UsageStatsJSON `json:"by_user"` ByUser map[string]UsageStatsJSON `json:"by_user"`
@@ -104,8 +106,9 @@ type ModelPricing struct {
} }
type modelFamily struct { type modelFamily struct {
pattern *regexp.Regexp pattern *regexp.Regexp
pricing ModelPricing pricing ModelPricing
premiumPricing *ModelPricing
} }
const ( const (
@@ -116,6 +119,12 @@ const (
serviceTierScale = "scale" serviceTierScale = "scale"
) )
const (
contextWindowStandard = 272000
contextWindowPremium = 1050000
premiumContextThreshold = 272000
)
var ( var (
gpt52Pricing = ModelPricing{ gpt52Pricing = ModelPricing{
InputPrice: 1.75, InputPrice: 1.75,
@@ -159,6 +168,30 @@ var (
CachedInputPrice: 0.025, CachedInputPrice: 0.025,
} }
gpt54StandardPricing = ModelPricing{
InputPrice: 2.5,
OutputPrice: 15.0,
CachedInputPrice: 0.25,
}
gpt54PremiumPricing = ModelPricing{
InputPrice: 5.0,
OutputPrice: 22.5,
CachedInputPrice: 0.5,
}
gpt54ProPricing = ModelPricing{
InputPrice: 30.0,
OutputPrice: 180.0,
CachedInputPrice: 30.0,
}
gpt54ProPremiumPricing = ModelPricing{
InputPrice: 60.0,
OutputPrice: 270.0,
CachedInputPrice: 60.0,
}
gpt52ProPricing = ModelPricing{ gpt52ProPricing = ModelPricing{
InputPrice: 21.0, InputPrice: 21.0,
OutputPrice: 168.0, OutputPrice: 168.0,
@@ -171,6 +204,30 @@ var (
CachedInputPrice: 15.0, CachedInputPrice: 15.0,
} }
gpt54FlexPricing = ModelPricing{
InputPrice: 1.25,
OutputPrice: 7.5,
CachedInputPrice: 0.125,
}
gpt54PremiumFlexPricing = ModelPricing{
InputPrice: 2.5,
OutputPrice: 11.25,
CachedInputPrice: 0.25,
}
gpt54ProFlexPricing = ModelPricing{
InputPrice: 15.0,
OutputPrice: 90.0,
CachedInputPrice: 15.0,
}
gpt54ProPremiumFlexPricing = ModelPricing{
InputPrice: 30.0,
OutputPrice: 135.0,
CachedInputPrice: 30.0,
}
gpt52FlexPricing = ModelPricing{ gpt52FlexPricing = ModelPricing{
InputPrice: 0.875, InputPrice: 0.875,
OutputPrice: 7.0, OutputPrice: 7.0,
@@ -195,6 +252,18 @@ var (
CachedInputPrice: 0.0025, CachedInputPrice: 0.0025,
} }
gpt54PriorityPricing = ModelPricing{
InputPrice: 5.0,
OutputPrice: 30.0,
CachedInputPrice: 0.5,
}
gpt54PremiumPriorityPricing = ModelPricing{
InputPrice: 10.0,
OutputPrice: 45.0,
CachedInputPrice: 1.0,
}
gpt52PriorityPricing = ModelPricing{ gpt52PriorityPricing = ModelPricing{
InputPrice: 3.5, InputPrice: 3.5,
OutputPrice: 28.0, OutputPrice: 28.0,
@@ -382,6 +451,16 @@ var (
} }
standardModelFamilies = []modelFamily{ standardModelFamilies = []modelFamily{
{
pattern: regexp.MustCompile(`^gpt-5\.4-pro(?:$|-)`),
pricing: gpt54ProPricing,
premiumPricing: &gpt54ProPremiumPricing,
},
{
pattern: regexp.MustCompile(`^gpt-5\.4(?:$|-)`),
pricing: gpt54StandardPricing,
premiumPricing: &gpt54PremiumPricing,
},
{ {
pattern: regexp.MustCompile(`^gpt-5\.3-codex(?:$|-)`), pattern: regexp.MustCompile(`^gpt-5\.3-codex(?:$|-)`),
pricing: gpt52CodexPricing, pricing: gpt52CodexPricing,
@@ -525,6 +604,16 @@ var (
} }
flexModelFamilies = []modelFamily{ flexModelFamilies = []modelFamily{
{
pattern: regexp.MustCompile(`^gpt-5\.4-pro(?:$|-)`),
pricing: gpt54ProFlexPricing,
premiumPricing: &gpt54ProPremiumFlexPricing,
},
{
pattern: regexp.MustCompile(`^gpt-5\.4(?:$|-)`),
pricing: gpt54FlexPricing,
premiumPricing: &gpt54PremiumFlexPricing,
},
{ {
pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`), pattern: regexp.MustCompile(`^gpt-5-mini(?:$|-)`),
pricing: gpt5MiniFlexPricing, pricing: gpt5MiniFlexPricing,
@@ -556,6 +645,11 @@ var (
} }
priorityModelFamilies = []modelFamily{ priorityModelFamilies = []modelFamily{
{
pattern: regexp.MustCompile(`^gpt-5\.4(?:$|-)`),
pricing: gpt54PriorityPricing,
premiumPricing: &gpt54PremiumPriorityPricing,
},
{ {
pattern: regexp.MustCompile(`^gpt-5\.3-codex(?:$|-)`), pattern: regexp.MustCompile(`^gpt-5\.3-codex(?:$|-)`),
pricing: gpt52CodexPriorityPricing, pricing: gpt52CodexPriorityPricing,
@@ -638,15 +732,28 @@ func modelFamiliesForTier(serviceTier string) []modelFamily {
} }
} }
func findPricingInFamilies(model string, modelFamilies []modelFamily) (ModelPricing, bool) { func findPricingInFamilies(model string, contextWindow int, modelFamilies []modelFamily) (ModelPricing, bool) {
isPremium := contextWindow >= contextWindowPremium
for _, family := range modelFamilies { for _, family := range modelFamilies {
if family.pattern.MatchString(model) { if family.pattern.MatchString(model) {
if isPremium && family.premiumPricing != nil {
return *family.premiumPricing, true
}
return family.pricing, true return family.pricing, true
} }
} }
return ModelPricing{}, false return ModelPricing{}, false
} }
func hasPremiumPricingInFamilies(model string, modelFamilies []modelFamily) bool {
for _, family := range modelFamilies {
if family.pattern.MatchString(model) {
return family.premiumPricing != nil
}
}
return false
}
func normalizeServiceTier(serviceTier string) string { func normalizeServiceTier(serviceTier string) string {
switch strings.ToLower(strings.TrimSpace(serviceTier)) { switch strings.ToLower(strings.TrimSpace(serviceTier)) {
case "", serviceTierAuto, serviceTierDefault: case "", serviceTierAuto, serviceTierDefault:
@@ -663,27 +770,27 @@ func normalizeServiceTier(serviceTier string) string {
} }
} }
func getPricing(model string, serviceTier string) ModelPricing { func getPricing(model string, serviceTier string, contextWindow int) ModelPricing {
normalizedServiceTier := normalizeServiceTier(serviceTier) normalizedServiceTier := normalizeServiceTier(serviceTier)
modelFamilies := modelFamiliesForTier(normalizedServiceTier) families := modelFamiliesForTier(normalizedServiceTier)
if pricing, found := findPricingInFamilies(model, modelFamilies); found { if pricing, found := findPricingInFamilies(model, contextWindow, families); found {
return pricing return pricing
} }
normalizedModel := normalizeGPT5Model(model) normalizedModel := normalizeGPT5Model(model)
if normalizedModel != model { if normalizedModel != model {
if pricing, found := findPricingInFamilies(normalizedModel, modelFamilies); found { if pricing, found := findPricingInFamilies(normalizedModel, contextWindow, families); found {
return pricing return pricing
} }
} }
if normalizedServiceTier != serviceTierDefault { if normalizedServiceTier != serviceTierDefault {
if pricing, found := findPricingInFamilies(model, standardModelFamilies); found { if pricing, found := findPricingInFamilies(model, contextWindow, standardModelFamilies); found {
return pricing return pricing
} }
if normalizedModel != model { if normalizedModel != model {
if pricing, found := findPricingInFamilies(normalizedModel, standardModelFamilies); found { if pricing, found := findPricingInFamilies(normalizedModel, contextWindow, standardModelFamilies); found {
return pricing return pricing
} }
} }
@@ -692,6 +799,30 @@ func getPricing(model string, serviceTier string) ModelPricing {
return gpt4oPricing return gpt4oPricing
} }
func detectContextWindow(model string, serviceTier string, inputTokens int64) int {
if inputTokens <= premiumContextThreshold {
return contextWindowStandard
}
normalizedServiceTier := normalizeServiceTier(serviceTier)
families := modelFamiliesForTier(normalizedServiceTier)
if hasPremiumPricingInFamilies(model, families) {
return contextWindowPremium
}
normalizedModel := normalizeGPT5Model(model)
if normalizedModel != model && hasPremiumPricingInFamilies(normalizedModel, families) {
return contextWindowPremium
}
if normalizedServiceTier != serviceTierDefault {
if hasPremiumPricingInFamilies(model, standardModelFamilies) {
return contextWindowPremium
}
if normalizedModel != model && hasPremiumPricingInFamilies(normalizedModel, standardModelFamilies) {
return contextWindowPremium
}
}
return contextWindowStandard
}
func normalizeGPT5Model(model string) string { func normalizeGPT5Model(model string) string {
if !strings.HasPrefix(model, "gpt-5.") { if !strings.HasPrefix(model, "gpt-5.") {
return model return model
@@ -707,18 +838,18 @@ func normalizeGPT5Model(model string) string {
case strings.Contains(model, "-chat-latest"): case strings.Contains(model, "-chat-latest"):
return "gpt-5.2-chat-latest" return "gpt-5.2-chat-latest"
case strings.Contains(model, "-pro"): case strings.Contains(model, "-pro"):
return "gpt-5.2-pro" return "gpt-5.4-pro"
case strings.Contains(model, "-mini"): case strings.Contains(model, "-mini"):
return "gpt-5-mini" return "gpt-5-mini"
case strings.Contains(model, "-nano"): case strings.Contains(model, "-nano"):
return "gpt-5-nano" return "gpt-5-nano"
default: default:
return "gpt-5.2" return "gpt-5.4"
} }
} }
func calculateCost(stats UsageStats, model string, serviceTier string) float64 { func calculateCost(stats UsageStats, model string, serviceTier string, contextWindow int) float64 {
pricing := getPricing(model, serviceTier) pricing := getPricing(model, serviceTier, contextWindow)
regularInputTokens := stats.InputTokens - stats.CachedTokens regularInputTokens := stats.InputTokens - stats.CachedTokens
if regularInputTokens < 0 { if regularInputTokens < 0 {
@@ -739,13 +870,16 @@ func roundCost(cost float64) float64 {
func normalizeCombinations(combinations []CostCombination) { func normalizeCombinations(combinations []CostCombination) {
for index := range combinations { for index := range combinations {
combinations[index].ServiceTier = normalizeServiceTier(combinations[index].ServiceTier) combinations[index].ServiceTier = normalizeServiceTier(combinations[index].ServiceTier)
if combinations[index].ContextWindow <= 0 {
combinations[index].ContextWindow = contextWindowStandard
}
if combinations[index].ByUser == nil { if combinations[index].ByUser == nil {
combinations[index].ByUser = make(map[string]UsageStats) combinations[index].ByUser = make(map[string]UsageStats)
} }
} }
} }
func addUsageToCombinations(combinations *[]CostCombination, model string, serviceTier string, weekStartUnix int64, user string, inputTokens, outputTokens, cachedTokens int64) { func addUsageToCombinations(combinations *[]CostCombination, model string, serviceTier string, contextWindow int, weekStartUnix int64, user string, inputTokens, outputTokens, cachedTokens int64) {
var matchedCombination *CostCombination var matchedCombination *CostCombination
for index := range *combinations { for index := range *combinations {
combination := &(*combinations)[index] combination := &(*combinations)[index]
@@ -753,7 +887,7 @@ func addUsageToCombinations(combinations *[]CostCombination, model string, servi
if combination.ServiceTier != combinationServiceTier { if combination.ServiceTier != combinationServiceTier {
combination.ServiceTier = combinationServiceTier combination.ServiceTier = combinationServiceTier
} }
if combination.Model == model && combinationServiceTier == serviceTier && combination.WeekStartUnix == weekStartUnix { if combination.Model == model && combinationServiceTier == serviceTier && combination.ContextWindow == contextWindow && combination.WeekStartUnix == weekStartUnix {
matchedCombination = combination matchedCombination = combination
break break
} }
@@ -763,6 +897,7 @@ func addUsageToCombinations(combinations *[]CostCombination, model string, servi
newCombination := CostCombination{ newCombination := CostCombination{
Model: model, Model: model,
ServiceTier: serviceTier, ServiceTier: serviceTier,
ContextWindow: contextWindow,
WeekStartUnix: weekStartUnix, WeekStartUnix: weekStartUnix,
Total: UsageStats{}, Total: UsageStats{},
ByUser: make(map[string]UsageStats), ByUser: make(map[string]UsageStats),
@@ -791,12 +926,13 @@ func buildCombinationJSON(combinations []CostCombination, aggregateUserCosts map
var totalCost float64 var totalCost float64
for index, combination := range combinations { for index, combination := range combinations {
combinationTotalCost := calculateCost(combination.Total, combination.Model, combination.ServiceTier) combinationTotalCost := calculateCost(combination.Total, combination.Model, combination.ServiceTier, combination.ContextWindow)
totalCost += combinationTotalCost totalCost += combinationTotalCost
combinationJSON := CostCombinationJSON{ combinationJSON := CostCombinationJSON{
Model: combination.Model, Model: combination.Model,
ServiceTier: combination.ServiceTier, ServiceTier: combination.ServiceTier,
ContextWindow: combination.ContextWindow,
WeekStartUnix: combination.WeekStartUnix, WeekStartUnix: combination.WeekStartUnix,
Total: UsageStatsJSON{ Total: UsageStatsJSON{
RequestCount: combination.Total.RequestCount, RequestCount: combination.Total.RequestCount,
@@ -809,7 +945,7 @@ func buildCombinationJSON(combinations []CostCombination, aggregateUserCosts map
} }
for user, userStats := range combination.ByUser { for user, userStats := range combination.ByUser {
userCost := calculateCost(userStats, combination.Model, combination.ServiceTier) userCost := calculateCost(userStats, combination.Model, combination.ServiceTier, combination.ContextWindow)
if aggregateUserCosts != nil { if aggregateUserCosts != nil {
aggregateUserCosts[user] += userCost aggregateUserCosts[user] += userCost
} }
@@ -857,7 +993,7 @@ func buildByWeekCost(combinations []CostCombination) map[string]float64 {
} }
weekStartAt := time.Unix(combination.WeekStartUnix, 0).UTC() weekStartAt := time.Unix(combination.WeekStartUnix, 0).UTC()
weekKey := formatWeekStartKey(weekStartAt) weekKey := formatWeekStartKey(weekStartAt)
byWeek[weekKey] += calculateCost(combination.Total, combination.Model, combination.ServiceTier) byWeek[weekKey] += calculateCost(combination.Total, combination.Model, combination.ServiceTier, combination.ContextWindow)
} }
for weekKey, weekCost := range byWeek { for weekKey, weekCost := range byWeek {
byWeek[weekKey] = roundCost(weekCost) byWeek[weekKey] = roundCost(weekCost)
@@ -879,7 +1015,7 @@ func buildByUserAndWeekCost(combinations []CostCombination) map[string]map[strin
userWeeks = make(map[string]float64) userWeeks = make(map[string]float64)
byUserAndWeek[user] = userWeeks byUserAndWeek[user] = userWeeks
} }
userWeeks[weekKey] += calculateCost(userStats, combination.Model, combination.ServiceTier) userWeeks[weekKey] += calculateCost(userStats, combination.Model, combination.ServiceTier, combination.ContextWindow)
} }
} }
for _, weekCosts := range byUserAndWeek { for _, weekCosts := range byUserAndWeek {
@@ -987,14 +1123,17 @@ func (u *AggregatedUsage) Save() error {
return err return err
} }
func (u *AggregatedUsage) AddUsage(model string, inputTokens, outputTokens, cachedTokens int64, serviceTier string, user string) error { func (u *AggregatedUsage) AddUsage(model string, contextWindow int, inputTokens, outputTokens, cachedTokens int64, serviceTier string, user string) error {
return u.AddUsageWithCycleHint(model, inputTokens, outputTokens, cachedTokens, serviceTier, user, time.Now(), nil) return u.AddUsageWithCycleHint(model, contextWindow, inputTokens, outputTokens, cachedTokens, serviceTier, user, time.Now(), nil)
} }
func (u *AggregatedUsage) AddUsageWithCycleHint(model string, inputTokens, outputTokens, cachedTokens int64, serviceTier string, user string, observedAt time.Time, cycleHint *WeeklyCycleHint) error { func (u *AggregatedUsage) AddUsageWithCycleHint(model string, contextWindow int, inputTokens, outputTokens, cachedTokens int64, serviceTier string, user string, observedAt time.Time, cycleHint *WeeklyCycleHint) error {
if model == "" { if model == "" {
return E.New("model cannot be empty") return E.New("model cannot be empty")
} }
if contextWindow <= 0 {
return E.New("contextWindow must be positive")
}
normalizedServiceTier := normalizeServiceTier(serviceTier) normalizedServiceTier := normalizeServiceTier(serviceTier)
if observedAt.IsZero() { if observedAt.IsZero() {
@@ -1007,7 +1146,7 @@ func (u *AggregatedUsage) AddUsageWithCycleHint(model string, inputTokens, outpu
u.LastUpdated = observedAt u.LastUpdated = observedAt
weekStartUnix := deriveWeekStartUnix(cycleHint) weekStartUnix := deriveWeekStartUnix(cycleHint)
addUsageToCombinations(&u.Combinations, model, normalizedServiceTier, weekStartUnix, user, inputTokens, outputTokens, cachedTokens) addUsageToCombinations(&u.Combinations, model, normalizedServiceTier, contextWindow, weekStartUnix, user, inputTokens, outputTokens, cachedTokens)
go u.scheduleSave() go u.scheduleSave()

View File

@@ -256,8 +256,10 @@ func (s *Service) proxyWebSocketUpstreamToClient(upstreamReadWriter io.ReadWrite
responseModel = requestModel responseModel = requestModel
} }
if responseModel != "" { if responseModel != "" {
contextWindow := detectContextWindow(responseModel, serviceTier, inputTokens)
s.usageTracker.AddUsageWithCycleHint( s.usageTracker.AddUsageWithCycleHint(
responseModel, responseModel,
contextWindow,
inputTokens, inputTokens,
outputTokens, outputTokens,
cachedTokens, cachedTokens,