Files
skybridge/internal/metrics/metrics.go
2025-08-22 14:40:59 -04:00

416 lines
11 KiB
Go

package metrics
import (
"context"
"net/http"
"strconv"
"sync"
"time"
"github.com/gin-gonic/gin"
"go.uber.org/zap"
)
// Metrics holds all application metrics
type Metrics struct {
// HTTP metrics
RequestsTotal *Counter
RequestDuration *Histogram
RequestsInFlight *Gauge
ResponseSize *Histogram
// Business metrics
TokensCreated *Counter
TokensVerified *Counter
TokensRevoked *Counter
ApplicationsTotal *Gauge
PermissionsTotal *Gauge
// System metrics
DatabaseConnections *Gauge
DatabaseQueries *Counter
DatabaseErrors *Counter
CacheHits *Counter
CacheMisses *Counter
// Error metrics
ErrorsTotal *Counter
mu sync.RWMutex
}
// Counter represents a monotonically increasing counter
type Counter struct {
value float64
labels map[string]string
mu sync.RWMutex
}
// Gauge represents a value that can go up and down
type Gauge struct {
value float64
labels map[string]string
mu sync.RWMutex
}
// Histogram represents a distribution of values
type Histogram struct {
buckets map[float64]float64
sum float64
count float64
labels map[string]string
mu sync.RWMutex
}
// NewMetrics creates a new metrics instance
func NewMetrics() *Metrics {
return &Metrics{
// HTTP metrics
RequestsTotal: NewCounter("http_requests_total", map[string]string{}),
RequestDuration: NewHistogram("http_request_duration_seconds", map[string]string{}),
RequestsInFlight: NewGauge("http_requests_in_flight", map[string]string{}),
ResponseSize: NewHistogram("http_response_size_bytes", map[string]string{}),
// Business metrics
TokensCreated: NewCounter("tokens_created_total", map[string]string{}),
TokensVerified: NewCounter("tokens_verified_total", map[string]string{}),
TokensRevoked: NewCounter("tokens_revoked_total", map[string]string{}),
ApplicationsTotal: NewGauge("applications_total", map[string]string{}),
PermissionsTotal: NewGauge("permissions_total", map[string]string{}),
// System metrics
DatabaseConnections: NewGauge("database_connections", map[string]string{}),
DatabaseQueries: NewCounter("database_queries_total", map[string]string{}),
DatabaseErrors: NewCounter("database_errors_total", map[string]string{}),
CacheHits: NewCounter("cache_hits_total", map[string]string{}),
CacheMisses: NewCounter("cache_misses_total", map[string]string{}),
// Error metrics
ErrorsTotal: NewCounter("errors_total", map[string]string{}),
}
}
// NewCounter creates a new counter
func NewCounter(name string, labels map[string]string) *Counter {
return &Counter{
value: 0,
labels: labels,
}
}
// NewGauge creates a new gauge
func NewGauge(name string, labels map[string]string) *Gauge {
return &Gauge{
value: 0,
labels: labels,
}
}
// NewHistogram creates a new histogram
func NewHistogram(name string, labels map[string]string) *Histogram {
return &Histogram{
buckets: make(map[float64]float64),
sum: 0,
count: 0,
labels: labels,
}
}
// Counter methods
func (c *Counter) Inc() {
c.mu.Lock()
defer c.mu.Unlock()
c.value++
}
func (c *Counter) Add(value float64) {
c.mu.Lock()
defer c.mu.Unlock()
c.value += value
}
func (c *Counter) Value() float64 {
c.mu.RLock()
defer c.mu.RUnlock()
return c.value
}
// Gauge methods
func (g *Gauge) Set(value float64) {
g.mu.Lock()
defer g.mu.Unlock()
g.value = value
}
func (g *Gauge) Inc() {
g.mu.Lock()
defer g.mu.Unlock()
g.value++
}
func (g *Gauge) Dec() {
g.mu.Lock()
defer g.mu.Unlock()
g.value--
}
func (g *Gauge) Add(value float64) {
g.mu.Lock()
defer g.mu.Unlock()
g.value += value
}
func (g *Gauge) Value() float64 {
g.mu.RLock()
defer g.mu.RUnlock()
return g.value
}
// Histogram methods
func (h *Histogram) Observe(value float64) {
h.mu.Lock()
defer h.mu.Unlock()
h.sum += value
h.count++
// Define standard buckets
buckets := []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10}
for _, bucket := range buckets {
if value <= bucket {
h.buckets[bucket]++
}
}
}
func (h *Histogram) Sum() float64 {
h.mu.RLock()
defer h.mu.RUnlock()
return h.sum
}
func (h *Histogram) Count() float64 {
h.mu.RLock()
defer h.mu.RUnlock()
return h.count
}
func (h *Histogram) Buckets() map[float64]float64 {
h.mu.RLock()
defer h.mu.RUnlock()
result := make(map[float64]float64)
for k, v := range h.buckets {
result[k] = v
}
return result
}
// Global metrics instance
var globalMetrics *Metrics
var once sync.Once
// GetMetrics returns the global metrics instance
func GetMetrics() *Metrics {
once.Do(func() {
globalMetrics = NewMetrics()
})
return globalMetrics
}
// Middleware creates a Gin middleware for collecting HTTP metrics
func Middleware(logger *zap.Logger) gin.HandlerFunc {
metrics := GetMetrics()
return func(c *gin.Context) {
start := time.Now()
// Increment in-flight requests
metrics.RequestsInFlight.Inc()
defer metrics.RequestsInFlight.Dec()
// Process request
c.Next()
// Record metrics
duration := time.Since(start).Seconds()
status := strconv.Itoa(c.Writer.Status())
method := c.Request.Method
path := c.FullPath()
// Increment total requests
metrics.RequestsTotal.Add(1)
// Record request duration
metrics.RequestDuration.Observe(duration)
// Record response size
metrics.ResponseSize.Observe(float64(c.Writer.Size()))
// Record errors
if c.Writer.Status() >= 400 {
metrics.ErrorsTotal.Add(1)
}
// Log metrics
logger.Debug("HTTP request metrics",
zap.String("method", method),
zap.String("path", path),
zap.String("status", status),
zap.Float64("duration", duration),
zap.Int("size", c.Writer.Size()),
)
}
}
// RecordTokenCreation records a token creation event
func RecordTokenCreation(tokenType string) {
metrics := GetMetrics()
metrics.TokensCreated.Inc()
}
// RecordTokenVerification records a token verification event
func RecordTokenVerification(tokenType string, success bool) {
metrics := GetMetrics()
metrics.TokensVerified.Inc()
}
// RecordTokenRevocation records a token revocation event
func RecordTokenRevocation(tokenType string) {
metrics := GetMetrics()
metrics.TokensRevoked.Inc()
}
// RecordDatabaseQuery records a database query
func RecordDatabaseQuery(operation string, success bool) {
metrics := GetMetrics()
metrics.DatabaseQueries.Inc()
if !success {
metrics.DatabaseErrors.Inc()
}
}
// RecordCacheHit records a cache hit
func RecordCacheHit() {
metrics := GetMetrics()
metrics.CacheHits.Inc()
}
// RecordCacheMiss records a cache miss
func RecordCacheMiss() {
metrics := GetMetrics()
metrics.CacheMisses.Inc()
}
// UpdateApplicationCount updates the total number of applications
func UpdateApplicationCount(count int) {
metrics := GetMetrics()
metrics.ApplicationsTotal.Set(float64(count))
}
// UpdatePermissionCount updates the total number of permissions
func UpdatePermissionCount(count int) {
metrics := GetMetrics()
metrics.PermissionsTotal.Set(float64(count))
}
// UpdateDatabaseConnections updates the number of database connections
func UpdateDatabaseConnections(count int) {
metrics := GetMetrics()
metrics.DatabaseConnections.Set(float64(count))
}
// PrometheusHandler returns an HTTP handler that exports metrics in Prometheus format
func PrometheusHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
metrics := GetMetrics()
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
// Export all metrics in Prometheus format
exportCounter(w, "http_requests_total", metrics.RequestsTotal)
exportGauge(w, "http_requests_in_flight", metrics.RequestsInFlight)
exportHistogram(w, "http_request_duration_seconds", metrics.RequestDuration)
exportHistogram(w, "http_response_size_bytes", metrics.ResponseSize)
exportCounter(w, "tokens_created_total", metrics.TokensCreated)
exportCounter(w, "tokens_verified_total", metrics.TokensVerified)
exportCounter(w, "tokens_revoked_total", metrics.TokensRevoked)
exportGauge(w, "applications_total", metrics.ApplicationsTotal)
exportGauge(w, "permissions_total", metrics.PermissionsTotal)
exportGauge(w, "database_connections", metrics.DatabaseConnections)
exportCounter(w, "database_queries_total", metrics.DatabaseQueries)
exportCounter(w, "database_errors_total", metrics.DatabaseErrors)
exportCounter(w, "cache_hits_total", metrics.CacheHits)
exportCounter(w, "cache_misses_total", metrics.CacheMisses)
exportCounter(w, "errors_total", metrics.ErrorsTotal)
}
}
func exportCounter(w http.ResponseWriter, name string, counter *Counter) {
w.Write([]byte("# HELP " + name + " Total number of " + name + "\n"))
w.Write([]byte("# TYPE " + name + " counter\n"))
w.Write([]byte(name + " " + strconv.FormatFloat(counter.Value(), 'f', -1, 64) + "\n"))
}
func exportGauge(w http.ResponseWriter, name string, gauge *Gauge) {
w.Write([]byte("# HELP " + name + " Current value of " + name + "\n"))
w.Write([]byte("# TYPE " + name + " gauge\n"))
w.Write([]byte(name + " " + strconv.FormatFloat(gauge.Value(), 'f', -1, 64) + "\n"))
}
func exportHistogram(w http.ResponseWriter, name string, histogram *Histogram) {
w.Write([]byte("# HELP " + name + " Histogram of " + name + "\n"))
w.Write([]byte("# TYPE " + name + " histogram\n"))
buckets := histogram.Buckets()
for bucket, count := range buckets {
w.Write([]byte(name + "_bucket{le=\"" + strconv.FormatFloat(bucket, 'f', -1, 64) + "\"} " + strconv.FormatFloat(count, 'f', -1, 64) + "\n"))
}
w.Write([]byte(name + "_sum " + strconv.FormatFloat(histogram.Sum(), 'f', -1, 64) + "\n"))
w.Write([]byte(name + "_count " + strconv.FormatFloat(histogram.Count(), 'f', -1, 64) + "\n"))
}
// HealthMetrics represents health check metrics
type HealthMetrics struct {
DatabaseConnected bool `json:"database_connected"`
ResponseTime time.Duration `json:"response_time"`
Uptime time.Duration `json:"uptime"`
Version string `json:"version"`
Environment string `json:"environment"`
}
// GetHealthMetrics returns current health metrics
func GetHealthMetrics(ctx context.Context, version, environment string, startTime time.Time) *HealthMetrics {
return &HealthMetrics{
DatabaseConnected: true, // This should be checked against actual DB
ResponseTime: time.Since(time.Now()),
Uptime: time.Since(startTime),
Version: version,
Environment: environment,
}
}
// BusinessMetrics represents business-specific metrics
type BusinessMetrics struct {
TotalApplications int `json:"total_applications"`
TotalTokens int `json:"total_tokens"`
TotalPermissions int `json:"total_permissions"`
ActiveTokens int `json:"active_tokens"`
}
// GetBusinessMetrics returns current business metrics
func GetBusinessMetrics() *BusinessMetrics {
metrics := GetMetrics()
return &BusinessMetrics{
TotalApplications: int(metrics.ApplicationsTotal.Value()),
TotalTokens: int(metrics.TokensCreated.Value()),
TotalPermissions: int(metrics.PermissionsTotal.Value()),
ActiveTokens: int(metrics.TokensCreated.Value() - metrics.TokensRevoked.Value()),
}
}