From 832b2f0a26a74bff95dbe207e92870b18641c0d8 Mon Sep 17 00:00:00 2001 From: maaydin Date: Thu, 20 Nov 2025 03:56:20 +0000 Subject: [PATCH] Add rate limit headers, token usage tracking, and improve error handling --- README.md | 27 +++++++++++++++++++++------ client/client.go | 32 ++++++++++++++++++++++++++++++++ client/http.go | 24 ++++++++++++++++-------- endpoints/catalog.go | 2 +- endpoints/inference.go | 22 ++++++++++++++++++++-- models/inference.go | 23 ++++++++++++++++++++--- 6 files changed, 110 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 966f2d6..cc210a2 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ It allows you to list models, perform chat/inference completions, and supports s - List available models in the GitHub Models catalog - Create chat completions (like OpenAI’s `ChatCompletion`) +- Rate limit tracking (headers parsed automatically) +- Token usage tracking (prompt, completion, total) - Optional streaming support for real-time responses - Supports organization-scoped endpoints - Easy-to-use Go client interface @@ -25,15 +27,17 @@ go get github.com/tigillo/githubmodels-go ## Usage ### Initialize Client -``` +```go package main import ( "context" "fmt" "os" + "time" githubmodels "github.com/tigillo/githubmodels-go/client" + "github.com/tigillo/githubmodels-go/models" ) func main() { @@ -43,30 +47,41 @@ func main() { ctx := context.Background() // Example: list models - models, err := client.ListModels(ctx) + modelsList, err := client.ListModels(ctx) if err != nil { panic(err) } - for _, m := range models { + for _, m := range modelsList { fmt.Println(m.ID, "-", m.Description) } } ``` ### Create Chat Completion -``` -resp, err := client.ChatCompletion(ctx, githubmodels.ChatRequest{ +```go +resp, err := client.ChatCompletion(ctx, models.ChatRequest{ Model: "github/code-chat", - Messages: []githubmodels.Message{ + Messages: []models.Message{ {Role: "user", Content: "Write a Go function to reverse a string"}, }, }) + +// Check for rate limit info even on error +if resp != nil && resp.RateLimit.Limit > 0 { + fmt.Printf("Rate Limit: %d/%d remaining\n", resp.RateLimit.Remaining, resp.RateLimit.Limit) + fmt.Printf("Resets at: %s\n", time.Unix(resp.RateLimit.Reset, 0)) +} + if err != nil { panic(err) } fmt.Println(resp.Choices[0].Message.Content) + +// Check token usage +fmt.Printf("Token Usage: %d prompt + %d completion = %d total\n", + resp.Usage.PromptTokens, resp.Usage.CompletionTokens, resp.Usage.TotalTokens) ``` ## Environment Variables diff --git a/client/client.go b/client/client.go index 0af2ed2..a56c009 100644 --- a/client/client.go +++ b/client/client.go @@ -6,6 +6,7 @@ import ( "encoding/json" "fmt" "net/http" + "strconv" "github.com/tigillo/githubmodels-go/models" ) @@ -95,3 +96,34 @@ func (c *Client) ChatCompletion(ctx context.Context, reqData models.ChatRequest) return &chatResp, nil } + +// ParseRateLimitHeaders extracts rate limit information from HTTP headers +func ParseRateLimitHeaders(headers http.Header) models.RateLimitInfo { + info := models.RateLimitInfo{} + + if limit := headers.Get("X-RateLimit-Limit"); limit != "" { + if val, err := strconv.Atoi(limit); err == nil { + info.Limit = val + } + } + + if remaining := headers.Get("X-RateLimit-Remaining"); remaining != "" { + if val, err := strconv.Atoi(remaining); err == nil { + info.Remaining = val + } + } + + if reset := headers.Get("X-RateLimit-Reset"); reset != "" { + if val, err := strconv.ParseInt(reset, 10, 64); err == nil { + info.Reset = val + } + } + + if retryAfter := headers.Get("Retry-After"); retryAfter != "" { + if val, err := strconv.Atoi(retryAfter); err == nil { + info.RetryAfter = val + } + } + + return info +} diff --git a/client/http.go b/client/http.go index e0ba4de..5aea27b 100644 --- a/client/http.go +++ b/client/http.go @@ -9,22 +9,22 @@ import ( "net/http" ) -// doRequest is a helper to make HTTP requests to GitHub Models API -func (c *Client) DoRequest(ctx context.Context, method, path string, body interface{}, result interface{}) error { +// DoRequest is a helper to make HTTP requests to GitHub Models API +func (c *Client) DoRequest(ctx context.Context, method, path string, body interface{}, result interface{}) (http.Header, error) { url := fmt.Sprintf("%s%s", c.BaseURL, path) var bodyReader io.Reader if body != nil { b, err := json.Marshal(body) if err != nil { - return err + return nil, err } bodyReader = bytes.NewReader(b) } req, err := http.NewRequestWithContext(ctx, method, url, bodyReader) if err != nil { - return err + return nil, err } req.Header.Set("Authorization", "Bearer "+c.token) @@ -35,21 +35,29 @@ func (c *Client) DoRequest(ctx context.Context, method, path string, body interf resp, err := c.Client.Do(req) if err != nil { - return err + return nil, err } defer resp.Body.Close() + // Extract only relevant headers + headers := make(http.Header) + for k, v := range resp.Header { + if k == "X-RateLimit-Limit" || k == "X-RateLimit-Remaining" || k == "X-RateLimit-Reset" || k == "Retry-After" { + headers[k] = v + } + } + if resp.StatusCode < 200 || resp.StatusCode >= 300 { // Read response body for error message respBody, _ := io.ReadAll(resp.Body) - return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody)) + return headers, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody)) } if result != nil { if err := json.NewDecoder(resp.Body).Decode(result); err != nil { - return err + return headers, err } } - return nil + return headers, nil } diff --git a/endpoints/catalog.go b/endpoints/catalog.go index 646f4cd..20efa90 100644 --- a/endpoints/catalog.go +++ b/endpoints/catalog.go @@ -9,7 +9,7 @@ import ( // ListModels fetches all available models from GitHub Models catalog func ListModels(ctx context.Context, c *client.Client) ([]client.Model, error) { var models []client.Model - err := c.DoRequest(ctx, "GET", "/catalog/models", nil, &models) + _, err := c.DoRequest(ctx, "GET", "/catalog/models", nil, &models) if err != nil { return nil, err } diff --git a/endpoints/inference.go b/endpoints/inference.go index 835ecd2..5458717 100644 --- a/endpoints/inference.go +++ b/endpoints/inference.go @@ -10,8 +10,18 @@ import ( // ChatCompletion sends a chat request to the GitHub Models API func ChatCompletion(ctx context.Context, c *client.Client, req models.ChatRequest) (*models.ChatResponse, error) { var resp models.ChatResponse - err := c.DoRequest(ctx, "POST", "/inference/chat/completions", req, &resp) + headers, err := c.DoRequest(ctx, "POST", "/inference/chat/completions", req, &resp) + + // Always attach headers if available, even on error + if headers != nil { + resp.RateLimit = client.ParseRateLimitHeaders(headers) + } + if err != nil { + // If we have headers (rate limits), return the partial response with the error + if headers != nil { + return &resp, err + } return nil, err } return &resp, nil @@ -21,8 +31,16 @@ func ChatCompletion(ctx context.Context, c *client.Client, req models.ChatReques func OrgChatCompletion(ctx context.Context, c *client.Client, org string, req models.ChatRequest) (*models.ChatResponse, error) { path := "/orgs/" + org + "/inference/chat/completions" var resp models.ChatResponse - err := c.DoRequest(ctx, "POST", path, req, &resp) + headers, err := c.DoRequest(ctx, "POST", path, req, &resp) + + if headers != nil { + resp.RateLimit = client.ParseRateLimitHeaders(headers) + } + if err != nil { + if headers != nil { + return &resp, err + } return nil, err } return &resp, nil diff --git a/models/inference.go b/models/inference.go index 4a66045..6837e9b 100644 --- a/models/inference.go +++ b/models/inference.go @@ -17,9 +17,26 @@ type Choice struct { Message Message `json:"message"` // The generated message from the model } +// RateLimitInfo contains rate limit information from GitHub API response headers +type RateLimitInfo struct { + Limit int // X-RateLimit-Limit: Maximum requests per hour + Remaining int // X-RateLimit-Remaining: Requests remaining in current window + Reset int64 // X-RateLimit-Reset: Unix timestamp when the limit resets + RetryAfter int // Retry-After: Seconds to wait before retrying (only on 429) +} + +// Usage contains token usage information from the API response +type Usage struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` +} + // ChatResponse represents the response from the chat completion endpoint type ChatResponse struct { - ID string `json:"id"` // Response ID - Object string `json:"object"` // Type of object, e.g., "chat.completion" - Choices []Choice `json:"choices"` // List of choices + ID string `json:"id"` // Response ID + Object string `json:"object"` // Type of object, e.g., "chat.completion" + Choices []Choice `json:"choices"` // List of choices + Usage Usage `json:"usage"` // Token usage information + RateLimit RateLimitInfo // Rate limit information from response headers }