openzeppelin_relayer/api/routes/docs/
health_docs.rs

1//! # Health Documentation
2//!
3//! This module contains the OpenAPI documentation for the health check API endpoints.
4//!
5//! ## Endpoints
6//!
7//! - `GET /api/v1/health`: Basic health check endpoint (liveness probe)
8//! - `GET /api/v1/ready`: Readiness check endpoint with comprehensive health status
9
10use crate::models::health::ReadinessResponse;
11
12/// Health routes implementation
13///
14/// Note: OpenAPI documentation for these endpoints can be found in the `openapi.rs` file
15///
16/// Handles the `/health` endpoint.
17///
18/// Returns an `HttpResponse` with a status of `200 OK` and a body of `"OK"`.
19/// This endpoint is used for liveness probes in container orchestration platforms.
20#[utoipa::path(
21    get,
22    path = "/api/v1/health",
23    tag = "Health",
24    operation_id = "health",
25    responses(
26        (status = 200, description = "Service is alive", body = String, example = json!("OK")),
27        (status = 500, description = "Internal server error", body = String),
28    )
29)]
30#[allow(dead_code)]
31fn doc_health() {}
32
33/// Readiness endpoint that checks system resources, Redis, Queue, and plugins.
34///
35/// Returns 200 OK if the service is ready to accept traffic, or 503 Service Unavailable if not.
36/// This endpoint is used for readiness probes in container orchestration platforms like
37/// AWS ECS or Kubernetes.
38///
39/// ## Health Check Components
40///
41/// - **System**: File descriptor usage, CLOSE_WAIT socket count
42/// - **Redis**: Primary and reader pool connectivity
43/// - **Queue**: Queue's Redis connections (separate from app's Redis)
44/// - **Plugins**: Plugin pool health, circuit breaker state, and connection metrics (if enabled)
45///
46/// ## Status Levels
47///
48/// - `healthy`: All components operational
49/// - `degraded`: Some components degraded but service can function (e.g., reader pool down)
50/// - `unhealthy`: Critical components failed, service unavailable
51///
52/// ## Plugin Connection Metrics
53///
54/// When plugins are enabled, the following connection metrics are exposed:
55///
56/// - `shared_socket_available_slots`: Number of additional concurrent plugin executions that can start
57/// - `shared_socket_active_connections`: Current number of active plugin execution connections
58/// - `shared_socket_registered_executions`: Number of plugin executions currently registered (awaiting response)
59/// - `connection_pool_available_slots`: Available connections to the pool server
60/// - `connection_pool_active_connections`: Active connections to the pool server
61///
62/// These metrics help diagnose connection pool exhaustion and plugin capacity issues.
63///
64/// ## Caching
65///
66/// Health check results are cached for 10 seconds to prevent excessive load from frequent
67/// health checks. Multiple requests within the TTL return the same cached response.
68#[utoipa::path(
69    get,
70    path = "/api/v1/ready",
71    tag = "Health",
72    operation_id = "readiness",
73    responses(
74        (
75            status = 200,
76            description = "Service is ready (healthy or degraded)",
77            body = ReadinessResponse,
78            example = json!({
79                "ready": true,
80                "status": "healthy",
81                "components": {
82                    "system": {
83                        "status": "healthy",
84                        "fd_count": 42,
85                        "fd_limit": 1024,
86                        "fd_usage_percent": 4,
87                        "close_wait_count": 0
88                    },
89                    "redis": {
90                        "status": "healthy",
91                        "primary_pool": {
92                            "connected": true,
93                            "available": 8,
94                            "max_size": 16
95                        },
96                        "reader_pool": {
97                            "connected": true,
98                            "available": 8,
99                            "max_size": 16
100                        }
101                    },
102                    "queue": {
103                        "status": "healthy"
104                    },
105                    "plugins": {
106                        "status": "healthy",
107                        "enabled": true,
108                        "circuit_state": "closed",
109                        "uptime_ms": 3600000,
110                        "memory": 52428800,
111                        "pool_completed": 1000,
112                        "pool_queued": 2,
113                        "success_rate": 99.5,
114                        "avg_response_time_ms": 120,
115                        "recovering": false,
116                        "shared_socket_available_slots": 48,
117                        "shared_socket_active_connections": 2,
118                        "shared_socket_registered_executions": 2,
119                        "connection_pool_available_slots": 8,
120                        "connection_pool_active_connections": 2
121                    }
122                },
123                "timestamp": "2026-01-30T12:00:00Z"
124            })
125        ),
126        (
127            status = 503,
128            description = "Service is not ready (unhealthy)",
129            body = ReadinessResponse,
130            example = json!({
131                "ready": false,
132                "status": "unhealthy",
133                "reason": "Redis primary pool: PING timed out",
134                "components": {
135                    "system": {
136                        "status": "healthy",
137                        "fd_count": 42,
138                        "fd_limit": 1024,
139                        "fd_usage_percent": 4,
140                        "close_wait_count": 0
141                    },
142                    "redis": {
143                        "status": "unhealthy",
144                        "primary_pool": {
145                            "connected": false,
146                            "available": 0,
147                            "max_size": 16,
148                            "error": "PING timed out"
149                        },
150                        "reader_pool": {
151                            "connected": false,
152                            "available": 0,
153                            "max_size": 16,
154                            "error": "PING timed out"
155                        },
156                        "error": "Redis primary pool: PING timed out"
157                    },
158                    "queue": {
159                        "status": "unhealthy",
160                        "error": "Queue connection: Stats check timed out"
161                    },
162                    "plugins": {
163                        "status": "degraded",
164                        "enabled": true,
165                        "circuit_state": "open",
166                        "error": "Plugin pool health check failed",
167                        "uptime_ms": 3600000,
168                        "memory": 52428800,
169                        "pool_completed": 1000,
170                        "pool_queued": 5,
171                        "success_rate": 95.5,
172                        "avg_response_time_ms": 150,
173                        "recovering": true,
174                        "recovery_percent": 10,
175                        "shared_socket_available_slots": 45,
176                        "shared_socket_active_connections": 5,
177                        "shared_socket_registered_executions": 5,
178                        "connection_pool_available_slots": 6,
179                        "connection_pool_active_connections": 4
180                    }
181                },
182                "timestamp": "2026-01-30T12:00:00Z"
183            })
184        )
185    )
186)]
187#[allow(dead_code)]
188fn doc_readiness() {}