org
This commit is contained in:
415
kms/internal/metrics/metrics.go
Normal file
415
kms/internal/metrics/metrics.go
Normal file
@ -0,0 +1,415 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// Metrics holds all application metrics
|
||||
type Metrics struct {
|
||||
// HTTP metrics
|
||||
RequestsTotal *Counter
|
||||
RequestDuration *Histogram
|
||||
RequestsInFlight *Gauge
|
||||
ResponseSize *Histogram
|
||||
|
||||
// Business metrics
|
||||
TokensCreated *Counter
|
||||
TokensVerified *Counter
|
||||
TokensRevoked *Counter
|
||||
ApplicationsTotal *Gauge
|
||||
PermissionsTotal *Gauge
|
||||
|
||||
// System metrics
|
||||
DatabaseConnections *Gauge
|
||||
DatabaseQueries *Counter
|
||||
DatabaseErrors *Counter
|
||||
CacheHits *Counter
|
||||
CacheMisses *Counter
|
||||
|
||||
// Error metrics
|
||||
ErrorsTotal *Counter
|
||||
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// Counter represents a monotonically increasing counter
|
||||
type Counter struct {
|
||||
value float64
|
||||
labels map[string]string
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// Gauge represents a value that can go up and down
|
||||
type Gauge struct {
|
||||
value float64
|
||||
labels map[string]string
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// Histogram represents a distribution of values
|
||||
type Histogram struct {
|
||||
buckets map[float64]float64
|
||||
sum float64
|
||||
count float64
|
||||
labels map[string]string
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// NewMetrics creates a new metrics instance
|
||||
func NewMetrics() *Metrics {
|
||||
return &Metrics{
|
||||
// HTTP metrics
|
||||
RequestsTotal: NewCounter("http_requests_total", map[string]string{}),
|
||||
RequestDuration: NewHistogram("http_request_duration_seconds", map[string]string{}),
|
||||
RequestsInFlight: NewGauge("http_requests_in_flight", map[string]string{}),
|
||||
ResponseSize: NewHistogram("http_response_size_bytes", map[string]string{}),
|
||||
|
||||
// Business metrics
|
||||
TokensCreated: NewCounter("tokens_created_total", map[string]string{}),
|
||||
TokensVerified: NewCounter("tokens_verified_total", map[string]string{}),
|
||||
TokensRevoked: NewCounter("tokens_revoked_total", map[string]string{}),
|
||||
ApplicationsTotal: NewGauge("applications_total", map[string]string{}),
|
||||
PermissionsTotal: NewGauge("permissions_total", map[string]string{}),
|
||||
|
||||
// System metrics
|
||||
DatabaseConnections: NewGauge("database_connections", map[string]string{}),
|
||||
DatabaseQueries: NewCounter("database_queries_total", map[string]string{}),
|
||||
DatabaseErrors: NewCounter("database_errors_total", map[string]string{}),
|
||||
CacheHits: NewCounter("cache_hits_total", map[string]string{}),
|
||||
CacheMisses: NewCounter("cache_misses_total", map[string]string{}),
|
||||
|
||||
// Error metrics
|
||||
ErrorsTotal: NewCounter("errors_total", map[string]string{}),
|
||||
}
|
||||
}
|
||||
|
||||
// NewCounter creates a new counter
|
||||
func NewCounter(name string, labels map[string]string) *Counter {
|
||||
return &Counter{
|
||||
value: 0,
|
||||
labels: labels,
|
||||
}
|
||||
}
|
||||
|
||||
// NewGauge creates a new gauge
|
||||
func NewGauge(name string, labels map[string]string) *Gauge {
|
||||
return &Gauge{
|
||||
value: 0,
|
||||
labels: labels,
|
||||
}
|
||||
}
|
||||
|
||||
// NewHistogram creates a new histogram
|
||||
func NewHistogram(name string, labels map[string]string) *Histogram {
|
||||
return &Histogram{
|
||||
buckets: make(map[float64]float64),
|
||||
sum: 0,
|
||||
count: 0,
|
||||
labels: labels,
|
||||
}
|
||||
}
|
||||
|
||||
// Counter methods
|
||||
func (c *Counter) Inc() {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.value++
|
||||
}
|
||||
|
||||
func (c *Counter) Add(value float64) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.value += value
|
||||
}
|
||||
|
||||
func (c *Counter) Value() float64 {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
return c.value
|
||||
}
|
||||
|
||||
// Gauge methods
|
||||
func (g *Gauge) Set(value float64) {
|
||||
g.mu.Lock()
|
||||
defer g.mu.Unlock()
|
||||
g.value = value
|
||||
}
|
||||
|
||||
func (g *Gauge) Inc() {
|
||||
g.mu.Lock()
|
||||
defer g.mu.Unlock()
|
||||
g.value++
|
||||
}
|
||||
|
||||
func (g *Gauge) Dec() {
|
||||
g.mu.Lock()
|
||||
defer g.mu.Unlock()
|
||||
g.value--
|
||||
}
|
||||
|
||||
func (g *Gauge) Add(value float64) {
|
||||
g.mu.Lock()
|
||||
defer g.mu.Unlock()
|
||||
g.value += value
|
||||
}
|
||||
|
||||
func (g *Gauge) Value() float64 {
|
||||
g.mu.RLock()
|
||||
defer g.mu.RUnlock()
|
||||
return g.value
|
||||
}
|
||||
|
||||
// Histogram methods
|
||||
func (h *Histogram) Observe(value float64) {
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
|
||||
h.sum += value
|
||||
h.count++
|
||||
|
||||
// Define standard buckets
|
||||
buckets := []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10}
|
||||
for _, bucket := range buckets {
|
||||
if value <= bucket {
|
||||
h.buckets[bucket]++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Histogram) Sum() float64 {
|
||||
h.mu.RLock()
|
||||
defer h.mu.RUnlock()
|
||||
return h.sum
|
||||
}
|
||||
|
||||
func (h *Histogram) Count() float64 {
|
||||
h.mu.RLock()
|
||||
defer h.mu.RUnlock()
|
||||
return h.count
|
||||
}
|
||||
|
||||
func (h *Histogram) Buckets() map[float64]float64 {
|
||||
h.mu.RLock()
|
||||
defer h.mu.RUnlock()
|
||||
result := make(map[float64]float64)
|
||||
for k, v := range h.buckets {
|
||||
result[k] = v
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Global metrics instance
|
||||
var globalMetrics *Metrics
|
||||
var once sync.Once
|
||||
|
||||
// GetMetrics returns the global metrics instance
|
||||
func GetMetrics() *Metrics {
|
||||
once.Do(func() {
|
||||
globalMetrics = NewMetrics()
|
||||
})
|
||||
return globalMetrics
|
||||
}
|
||||
|
||||
// Middleware creates a Gin middleware for collecting HTTP metrics
|
||||
func Middleware(logger *zap.Logger) gin.HandlerFunc {
|
||||
metrics := GetMetrics()
|
||||
|
||||
return func(c *gin.Context) {
|
||||
start := time.Now()
|
||||
|
||||
// Increment in-flight requests
|
||||
metrics.RequestsInFlight.Inc()
|
||||
defer metrics.RequestsInFlight.Dec()
|
||||
|
||||
// Process request
|
||||
c.Next()
|
||||
|
||||
// Record metrics
|
||||
duration := time.Since(start).Seconds()
|
||||
status := strconv.Itoa(c.Writer.Status())
|
||||
method := c.Request.Method
|
||||
path := c.FullPath()
|
||||
|
||||
// Increment total requests
|
||||
metrics.RequestsTotal.Add(1)
|
||||
|
||||
// Record request duration
|
||||
metrics.RequestDuration.Observe(duration)
|
||||
|
||||
// Record response size
|
||||
metrics.ResponseSize.Observe(float64(c.Writer.Size()))
|
||||
|
||||
// Record errors
|
||||
if c.Writer.Status() >= 400 {
|
||||
metrics.ErrorsTotal.Add(1)
|
||||
}
|
||||
|
||||
// Log metrics
|
||||
logger.Debug("HTTP request metrics",
|
||||
zap.String("method", method),
|
||||
zap.String("path", path),
|
||||
zap.String("status", status),
|
||||
zap.Float64("duration", duration),
|
||||
zap.Int("size", c.Writer.Size()),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// RecordTokenCreation records a token creation event
|
||||
func RecordTokenCreation(tokenType string) {
|
||||
metrics := GetMetrics()
|
||||
metrics.TokensCreated.Inc()
|
||||
}
|
||||
|
||||
// RecordTokenVerification records a token verification event
|
||||
func RecordTokenVerification(tokenType string, success bool) {
|
||||
metrics := GetMetrics()
|
||||
metrics.TokensVerified.Inc()
|
||||
}
|
||||
|
||||
// RecordTokenRevocation records a token revocation event
|
||||
func RecordTokenRevocation(tokenType string) {
|
||||
metrics := GetMetrics()
|
||||
metrics.TokensRevoked.Inc()
|
||||
}
|
||||
|
||||
// RecordDatabaseQuery records a database query
|
||||
func RecordDatabaseQuery(operation string, success bool) {
|
||||
metrics := GetMetrics()
|
||||
metrics.DatabaseQueries.Inc()
|
||||
|
||||
if !success {
|
||||
metrics.DatabaseErrors.Inc()
|
||||
}
|
||||
}
|
||||
|
||||
// RecordCacheHit records a cache hit
|
||||
func RecordCacheHit() {
|
||||
metrics := GetMetrics()
|
||||
metrics.CacheHits.Inc()
|
||||
}
|
||||
|
||||
// RecordCacheMiss records a cache miss
|
||||
func RecordCacheMiss() {
|
||||
metrics := GetMetrics()
|
||||
metrics.CacheMisses.Inc()
|
||||
}
|
||||
|
||||
// UpdateApplicationCount updates the total number of applications
|
||||
func UpdateApplicationCount(count int) {
|
||||
metrics := GetMetrics()
|
||||
metrics.ApplicationsTotal.Set(float64(count))
|
||||
}
|
||||
|
||||
// UpdatePermissionCount updates the total number of permissions
|
||||
func UpdatePermissionCount(count int) {
|
||||
metrics := GetMetrics()
|
||||
metrics.PermissionsTotal.Set(float64(count))
|
||||
}
|
||||
|
||||
// UpdateDatabaseConnections updates the number of database connections
|
||||
func UpdateDatabaseConnections(count int) {
|
||||
metrics := GetMetrics()
|
||||
metrics.DatabaseConnections.Set(float64(count))
|
||||
}
|
||||
|
||||
// PrometheusHandler returns an HTTP handler that exports metrics in Prometheus format
|
||||
func PrometheusHandler() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
metrics := GetMetrics()
|
||||
|
||||
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||
|
||||
// Export all metrics in Prometheus format
|
||||
exportCounter(w, "http_requests_total", metrics.RequestsTotal)
|
||||
exportGauge(w, "http_requests_in_flight", metrics.RequestsInFlight)
|
||||
exportHistogram(w, "http_request_duration_seconds", metrics.RequestDuration)
|
||||
exportHistogram(w, "http_response_size_bytes", metrics.ResponseSize)
|
||||
|
||||
exportCounter(w, "tokens_created_total", metrics.TokensCreated)
|
||||
exportCounter(w, "tokens_verified_total", metrics.TokensVerified)
|
||||
exportCounter(w, "tokens_revoked_total", metrics.TokensRevoked)
|
||||
exportGauge(w, "applications_total", metrics.ApplicationsTotal)
|
||||
exportGauge(w, "permissions_total", metrics.PermissionsTotal)
|
||||
|
||||
exportGauge(w, "database_connections", metrics.DatabaseConnections)
|
||||
exportCounter(w, "database_queries_total", metrics.DatabaseQueries)
|
||||
exportCounter(w, "database_errors_total", metrics.DatabaseErrors)
|
||||
exportCounter(w, "cache_hits_total", metrics.CacheHits)
|
||||
exportCounter(w, "cache_misses_total", metrics.CacheMisses)
|
||||
|
||||
exportCounter(w, "errors_total", metrics.ErrorsTotal)
|
||||
}
|
||||
}
|
||||
|
||||
func exportCounter(w http.ResponseWriter, name string, counter *Counter) {
|
||||
w.Write([]byte("# HELP " + name + " Total number of " + name + "\n"))
|
||||
w.Write([]byte("# TYPE " + name + " counter\n"))
|
||||
w.Write([]byte(name + " " + strconv.FormatFloat(counter.Value(), 'f', -1, 64) + "\n"))
|
||||
}
|
||||
|
||||
func exportGauge(w http.ResponseWriter, name string, gauge *Gauge) {
|
||||
w.Write([]byte("# HELP " + name + " Current value of " + name + "\n"))
|
||||
w.Write([]byte("# TYPE " + name + " gauge\n"))
|
||||
w.Write([]byte(name + " " + strconv.FormatFloat(gauge.Value(), 'f', -1, 64) + "\n"))
|
||||
}
|
||||
|
||||
func exportHistogram(w http.ResponseWriter, name string, histogram *Histogram) {
|
||||
w.Write([]byte("# HELP " + name + " Histogram of " + name + "\n"))
|
||||
w.Write([]byte("# TYPE " + name + " histogram\n"))
|
||||
|
||||
buckets := histogram.Buckets()
|
||||
for bucket, count := range buckets {
|
||||
w.Write([]byte(name + "_bucket{le=\"" + strconv.FormatFloat(bucket, 'f', -1, 64) + "\"} " + strconv.FormatFloat(count, 'f', -1, 64) + "\n"))
|
||||
}
|
||||
|
||||
w.Write([]byte(name + "_sum " + strconv.FormatFloat(histogram.Sum(), 'f', -1, 64) + "\n"))
|
||||
w.Write([]byte(name + "_count " + strconv.FormatFloat(histogram.Count(), 'f', -1, 64) + "\n"))
|
||||
}
|
||||
|
||||
// HealthMetrics represents health check metrics
|
||||
type HealthMetrics struct {
|
||||
DatabaseConnected bool `json:"database_connected"`
|
||||
ResponseTime time.Duration `json:"response_time"`
|
||||
Uptime time.Duration `json:"uptime"`
|
||||
Version string `json:"version"`
|
||||
Environment string `json:"environment"`
|
||||
}
|
||||
|
||||
// GetHealthMetrics returns current health metrics
|
||||
func GetHealthMetrics(ctx context.Context, version, environment string, startTime time.Time) *HealthMetrics {
|
||||
return &HealthMetrics{
|
||||
DatabaseConnected: true, // This should be checked against actual DB
|
||||
ResponseTime: time.Since(time.Now()),
|
||||
Uptime: time.Since(startTime),
|
||||
Version: version,
|
||||
Environment: environment,
|
||||
}
|
||||
}
|
||||
|
||||
// BusinessMetrics represents business-specific metrics
|
||||
type BusinessMetrics struct {
|
||||
TotalApplications int `json:"total_applications"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
TotalPermissions int `json:"total_permissions"`
|
||||
ActiveTokens int `json:"active_tokens"`
|
||||
}
|
||||
|
||||
// GetBusinessMetrics returns current business metrics
|
||||
func GetBusinessMetrics() *BusinessMetrics {
|
||||
metrics := GetMetrics()
|
||||
|
||||
return &BusinessMetrics{
|
||||
TotalApplications: int(metrics.ApplicationsTotal.Value()),
|
||||
TotalTokens: int(metrics.TokensCreated.Value()),
|
||||
TotalPermissions: int(metrics.PermissionsTotal.Value()),
|
||||
ActiveTokens: int(metrics.TokensCreated.Value() - metrics.TokensRevoked.Value()),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user