Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ It allows you to list models, perform chat/inference completions, and supports s

- List available models in the GitHub Models catalog
- Create chat completions (like OpenAI’s `ChatCompletion`)
- Rate limit tracking (headers parsed automatically)
- Token usage tracking (prompt, completion, total)
- Optional streaming support for real-time responses
- Supports organization-scoped endpoints
- Easy-to-use Go client interface
Expand All @@ -25,15 +27,17 @@ go get github.com/tigillo/githubmodels-go

## Usage
### Initialize Client
```
```go
package main

import (
"context"
"fmt"
"os"
"time"

githubmodels "github.com/tigillo/githubmodels-go/client"
"github.com/tigillo/githubmodels-go/models"
)

func main() {
Expand All @@ -43,30 +47,41 @@ func main() {
ctx := context.Background()

// Example: list models
models, err := client.ListModels(ctx)
modelsList, err := client.ListModels(ctx)
if err != nil {
panic(err)
}

for _, m := range models {
for _, m := range modelsList {
fmt.Println(m.ID, "-", m.Description)
}
}
```

### Create Chat Completion
```
resp, err := client.ChatCompletion(ctx, githubmodels.ChatRequest{
```go
resp, err := client.ChatCompletion(ctx, models.ChatRequest{
Model: "github/code-chat",
Messages: []githubmodels.Message{
Messages: []models.Message{
{Role: "user", Content: "Write a Go function to reverse a string"},
},
})

// Check for rate limit info even on error
if resp != nil && resp.RateLimit.Limit > 0 {
fmt.Printf("Rate Limit: %d/%d remaining\n", resp.RateLimit.Remaining, resp.RateLimit.Limit)
fmt.Printf("Resets at: %s\n", time.Unix(resp.RateLimit.Reset, 0))
}

if err != nil {
panic(err)
}

fmt.Println(resp.Choices[0].Message.Content)

// Check token usage
fmt.Printf("Token Usage: %d prompt + %d completion = %d total\n",
resp.Usage.PromptTokens, resp.Usage.CompletionTokens, resp.Usage.TotalTokens)
```

## Environment Variables
Expand Down
32 changes: 32 additions & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/json"
"fmt"
"net/http"
"strconv"

"github.com/tigillo/githubmodels-go/models"
)
Expand Down Expand Up @@ -95,3 +96,34 @@ func (c *Client) ChatCompletion(ctx context.Context, reqData models.ChatRequest)

return &chatResp, nil
}

// ParseRateLimitHeaders extracts rate limit information from HTTP headers
func ParseRateLimitHeaders(headers http.Header) models.RateLimitInfo {
info := models.RateLimitInfo{}

if limit := headers.Get("X-RateLimit-Limit"); limit != "" {
if val, err := strconv.Atoi(limit); err == nil {
info.Limit = val
}
}

if remaining := headers.Get("X-RateLimit-Remaining"); remaining != "" {
if val, err := strconv.Atoi(remaining); err == nil {
info.Remaining = val
}
}

if reset := headers.Get("X-RateLimit-Reset"); reset != "" {
if val, err := strconv.ParseInt(reset, 10, 64); err == nil {
info.Reset = val
}
}

if retryAfter := headers.Get("Retry-After"); retryAfter != "" {
if val, err := strconv.Atoi(retryAfter); err == nil {
info.RetryAfter = val
}
}

return info
}
24 changes: 16 additions & 8 deletions client/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@ import (
"net/http"
)

// doRequest is a helper to make HTTP requests to GitHub Models API
func (c *Client) DoRequest(ctx context.Context, method, path string, body interface{}, result interface{}) error {
// DoRequest is a helper to make HTTP requests to GitHub Models API
func (c *Client) DoRequest(ctx context.Context, method, path string, body interface{}, result interface{}) (http.Header, error) {
url := fmt.Sprintf("%s%s", c.BaseURL, path)

var bodyReader io.Reader
if body != nil {
b, err := json.Marshal(body)
if err != nil {
return err
return nil, err
}
bodyReader = bytes.NewReader(b)
}

req, err := http.NewRequestWithContext(ctx, method, url, bodyReader)
if err != nil {
return err
return nil, err
}

req.Header.Set("Authorization", "Bearer "+c.token)
Expand All @@ -35,21 +35,29 @@ func (c *Client) DoRequest(ctx context.Context, method, path string, body interf

resp, err := c.Client.Do(req)
if err != nil {
return err
return nil, err
}
defer resp.Body.Close()

// Extract only relevant headers
headers := make(http.Header)
for k, v := range resp.Header {
if k == "X-RateLimit-Limit" || k == "X-RateLimit-Remaining" || k == "X-RateLimit-Reset" || k == "Retry-After" {
headers[k] = v
}
}

if resp.StatusCode < 200 || resp.StatusCode >= 300 {
// Read response body for error message
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody))
return headers, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody))
}

if result != nil {
if err := json.NewDecoder(resp.Body).Decode(result); err != nil {
return err
return headers, err
}
}

return nil
return headers, nil
}
2 changes: 1 addition & 1 deletion endpoints/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
// ListModels fetches all available models from GitHub Models catalog
func ListModels(ctx context.Context, c *client.Client) ([]client.Model, error) {
var models []client.Model
err := c.DoRequest(ctx, "GET", "/catalog/models", nil, &models)
_, err := c.DoRequest(ctx, "GET", "/catalog/models", nil, &models)
if err != nil {
return nil, err
}
Expand Down
22 changes: 20 additions & 2 deletions endpoints/inference.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,18 @@ import (
// ChatCompletion sends a chat request to the GitHub Models API
func ChatCompletion(ctx context.Context, c *client.Client, req models.ChatRequest) (*models.ChatResponse, error) {
var resp models.ChatResponse
err := c.DoRequest(ctx, "POST", "/inference/chat/completions", req, &resp)
headers, err := c.DoRequest(ctx, "POST", "/inference/chat/completions", req, &resp)

// Always attach headers if available, even on error
if headers != nil {
resp.RateLimit = client.ParseRateLimitHeaders(headers)
}

if err != nil {
// If we have headers (rate limits), return the partial response with the error
if headers != nil {
return &resp, err
}
return nil, err
}
return &resp, nil
Expand All @@ -21,8 +31,16 @@ func ChatCompletion(ctx context.Context, c *client.Client, req models.ChatReques
func OrgChatCompletion(ctx context.Context, c *client.Client, org string, req models.ChatRequest) (*models.ChatResponse, error) {
path := "/orgs/" + org + "/inference/chat/completions"
var resp models.ChatResponse
err := c.DoRequest(ctx, "POST", path, req, &resp)
headers, err := c.DoRequest(ctx, "POST", path, req, &resp)

if headers != nil {
resp.RateLimit = client.ParseRateLimitHeaders(headers)
}

if err != nil {
if headers != nil {
return &resp, err
}
return nil, err
}
return &resp, nil
Expand Down
23 changes: 20 additions & 3 deletions models/inference.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,26 @@ type Choice struct {
Message Message `json:"message"` // The generated message from the model
}

// RateLimitInfo contains rate limit information from GitHub API response headers
type RateLimitInfo struct {
Limit int // X-RateLimit-Limit: Maximum requests per hour
Remaining int // X-RateLimit-Remaining: Requests remaining in current window
Reset int64 // X-RateLimit-Reset: Unix timestamp when the limit resets
RetryAfter int // Retry-After: Seconds to wait before retrying (only on 429)
}

// Usage contains token usage information from the API response
type Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}

// ChatResponse represents the response from the chat completion endpoint
type ChatResponse struct {
ID string `json:"id"` // Response ID
Object string `json:"object"` // Type of object, e.g., "chat.completion"
Choices []Choice `json:"choices"` // List of choices
ID string `json:"id"` // Response ID
Object string `json:"object"` // Type of object, e.g., "chat.completion"
Choices []Choice `json:"choices"` // List of choices
Usage Usage `json:"usage"` // Token usage information
RateLimit RateLimitInfo // Rate limit information from response headers
}
Loading