1pub mod middleware;
7use lazy_static::lazy_static;
8use prometheus::{
9 CounterVec, Encoder, Gauge, GaugeVec, HistogramOpts, HistogramVec, Opts, Registry, TextEncoder,
10};
11use sysinfo::{Disks, System};
12
13lazy_static! {
14 pub static ref REGISTRY: Registry = Registry::new();
16
17 pub static ref REQUEST_COUNTER: CounterVec = {
19 let opts = Opts::new("requests_total", "Total number of HTTP requests");
20 let counter_vec = CounterVec::new(opts, &["endpoint", "method", "status"]).unwrap();
21 REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
22 counter_vec
23 };
24
25 pub static ref RAW_REQUEST_COUNTER: CounterVec = {
27 let opts = Opts::new("raw_requests_total", "Total number of HTTP requests by raw URI");
28 let counter_vec = CounterVec::new(opts, &["raw_uri", "method", "status"]).unwrap();
29 REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
30 counter_vec
31 };
32
33 pub static ref REQUEST_LATENCY: HistogramVec = {
35 let histogram_opts = HistogramOpts::new("request_latency_seconds", "Request latency in seconds")
36 .buckets(vec![0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0]);
37 let histogram_vec = HistogramVec::new(histogram_opts, &["endpoint", "method", "status"]).unwrap();
38 REGISTRY.register(Box::new(histogram_vec.clone())).unwrap();
39 histogram_vec
40 };
41
42 pub static ref ERROR_COUNTER: CounterVec = {
44 let opts = Opts::new("error_requests_total", "Total number of error responses");
45 let counter_vec = CounterVec::new(opts, &["endpoint", "method", "status"]).unwrap();
47 REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
48 counter_vec
49 };
50
51 pub static ref CPU_USAGE: Gauge = {
53 let gauge = Gauge::new("cpu_usage_percentage", "Current CPU usage percentage").unwrap();
54 REGISTRY.register(Box::new(gauge.clone())).unwrap();
55 gauge
56 };
57
58 pub static ref MEMORY_USAGE_PERCENT: Gauge = {
60 let gauge = Gauge::new("memory_usage_percentage", "Memory usage percentage").unwrap();
61 REGISTRY.register(Box::new(gauge.clone())).unwrap();
62 gauge
63 };
64
65 pub static ref MEMORY_USAGE: Gauge = {
67 let gauge = Gauge::new("memory_usage_bytes", "Memory usage in bytes").unwrap();
68 REGISTRY.register(Box::new(gauge.clone())).unwrap();
69 gauge
70 };
71
72 pub static ref TOTAL_MEMORY: Gauge = {
74 let gauge = Gauge::new("total_memory_bytes", "Total memory in bytes").unwrap();
75 REGISTRY.register(Box::new(gauge.clone())).unwrap();
76 gauge
77 };
78
79 pub static ref AVAILABLE_MEMORY: Gauge = {
81 let gauge = Gauge::new("available_memory_bytes", "Available memory in bytes").unwrap();
82 REGISTRY.register(Box::new(gauge.clone())).unwrap();
83 gauge
84 };
85
86 pub static ref DISK_USAGE: Gauge = {
88 let gauge = Gauge::new("disk_usage_bytes", "Used disk space in bytes").unwrap();
89 REGISTRY.register(Box::new(gauge.clone())).unwrap();
90 gauge
91 };
92
93 pub static ref DISK_USAGE_PERCENT: Gauge = {
95 let gauge = Gauge::new("disk_usage_percentage", "Disk usage percentage").unwrap();
96 REGISTRY.register(Box::new(gauge.clone())).unwrap();
97 gauge
98 };
99
100 pub static ref IN_FLIGHT_REQUESTS: GaugeVec = {
102 let gauge_vec = GaugeVec::new(
103 Opts::new("in_flight_requests", "Number of in-flight requests"),
104 &["endpoint"]
105 ).unwrap();
106 REGISTRY.register(Box::new(gauge_vec.clone())).unwrap();
107 gauge_vec
108 };
109
110 pub static ref TIMEOUT_COUNTER: CounterVec = {
112 let opts = Opts::new("request_timeouts_total", "Total number of request timeouts");
113 let counter_vec = CounterVec::new(opts, &["endpoint", "method", "timeout_type"]).unwrap();
114 REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
115 counter_vec
116 };
117
118 pub static ref FILE_DESCRIPTORS: Gauge = {
120 let gauge = Gauge::new("file_descriptors_count", "Current file descriptor count").unwrap();
121 REGISTRY.register(Box::new(gauge.clone())).unwrap();
122 gauge
123 };
124
125 pub static ref CLOSE_WAIT_SOCKETS: Gauge = {
127 let gauge = Gauge::new("close_wait_sockets_count", "Number of CLOSE_WAIT sockets").unwrap();
128 REGISTRY.register(Box::new(gauge.clone())).unwrap();
129 gauge
130 };
131
132 pub static ref TRANSACTIONS_SUCCESS: CounterVec = {
134 let opts = Opts::new("transactions_success_total", "Total number of successful transactions");
135 let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
136 REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
137 counter_vec
138 };
139
140 pub static ref TRANSACTIONS_FAILED: CounterVec = {
142 let opts = Opts::new("transactions_failed_total", "Total number of failed transactions");
143 let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type", "failure_reason"]).unwrap();
144 REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
145 counter_vec
146 };
147
148 pub static ref API_RPC_FAILURES: CounterVec = {
152 let opts = Opts::new("api_rpc_failures_total", "Total number of RPC failures during API requests (before transaction creation)");
153 let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type", "operation_name", "error_type"]).unwrap();
154 REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
155 counter_vec
156 };
157
158 pub static ref TRANSACTIONS_CREATED: CounterVec = {
160 let opts = Opts::new("transactions_created_total", "Total number of transactions created");
161 let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
162 REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
163 counter_vec
164 };
165
166 pub static ref TRANSACTIONS_SUBMITTED: CounterVec = {
168 let opts = Opts::new("transactions_submitted_total", "Total number of transactions submitted to the network");
169 let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
170 REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
171 counter_vec
172 };
173
174 pub static ref TRANSACTIONS_BY_STATUS: GaugeVec = {
176 let gauge_vec = GaugeVec::new(
177 Opts::new("transactions_by_status", "Current number of transactions by status"),
178 &["relayer_id", "network_type", "status"]
179 ).unwrap();
180 REGISTRY.register(Box::new(gauge_vec.clone())).unwrap();
181 gauge_vec
182 };
183
184 pub static ref TRANSACTION_PROCESSING_TIME: HistogramVec = {
186 let histogram_opts = HistogramOpts::new("transaction_processing_seconds", "Transaction processing time in seconds")
187 .buckets(vec![0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0]);
188 let histogram_vec = HistogramVec::new(histogram_opts, &["relayer_id", "network_type", "stage"]).unwrap();
189 REGISTRY.register(Box::new(histogram_vec.clone())).unwrap();
190 histogram_vec
191 };
192
193 pub static ref RPC_CALL_LATENCY: HistogramVec = {
195 let histogram_opts = HistogramOpts::new("rpc_call_latency_seconds", "RPC call latency in seconds")
196 .buckets(vec![0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0]);
197 let histogram_vec = HistogramVec::new(histogram_opts, &["relayer_id", "network_type", "operation_name"]).unwrap();
198 REGISTRY.register(Box::new(histogram_vec.clone())).unwrap();
199 histogram_vec
200 };
201
202 pub static ref PLUGIN_CALLS: CounterVec = {
204 let opts = Opts::new("plugin_calls_total", "Total number of plugin calls");
205 let counter_vec = CounterVec::new(opts, &["plugin_id", "method", "status"]).unwrap();
206 REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
207 counter_vec
208 };
209}
210
211pub fn gather_metrics() -> Result<Vec<u8>, Box<dyn std::error::Error>> {
213 let encoder = TextEncoder::new();
214 let metric_families = REGISTRY.gather();
215 let mut buffer = Vec::new();
216 encoder.encode(&metric_families, &mut buffer)?;
217 Ok(buffer)
218}
219
220fn get_fd_count() -> Result<usize, std::io::Error> {
222 let pid = std::process::id();
223
224 #[cfg(target_os = "linux")]
225 {
226 let fd_dir = format!("/proc/{pid}/fd");
227 std::fs::read_dir(fd_dir).map(|entries| entries.count())
228 }
229
230 #[cfg(target_os = "macos")]
231 {
232 use std::process::Command;
233 let output = Command::new("lsof")
234 .args(["-p", &pid.to_string()])
235 .output()?;
236 let count = String::from_utf8_lossy(&output.stdout)
237 .lines()
238 .count()
239 .saturating_sub(1); Ok(count)
241 }
242
243 #[cfg(not(any(target_os = "linux", target_os = "macos")))]
244 {
245 Ok(0) }
247}
248
249fn get_close_wait_count() -> Result<usize, std::io::Error> {
251 #[cfg(any(target_os = "linux", target_os = "macos"))]
252 {
253 use std::process::Command;
254 let output = Command::new("sh")
255 .args(["-c", "netstat -an | grep CLOSE_WAIT | wc -l"])
256 .output()?;
257 let count = String::from_utf8_lossy(&output.stdout)
258 .trim()
259 .parse()
260 .unwrap_or(0);
261 Ok(count)
262 }
263
264 #[cfg(not(any(target_os = "linux", target_os = "macos")))]
265 {
266 Ok(0) }
268}
269
270pub fn update_system_metrics() {
272 let mut sys = System::new_all();
273 sys.refresh_all();
274
275 let cpu_usage = sys.global_cpu_usage();
277 CPU_USAGE.set(cpu_usage as f64);
278
279 let total_memory = sys.total_memory();
281 TOTAL_MEMORY.set(total_memory as f64);
282
283 let available_memory = sys.available_memory();
285 AVAILABLE_MEMORY.set(available_memory as f64);
286
287 let memory_usage = sys.used_memory();
289 MEMORY_USAGE.set(memory_usage as f64);
290
291 let memory_percentage = if total_memory > 0 {
293 (memory_usage as f64 / total_memory as f64) * 100.0
294 } else {
295 0.0
296 };
297 MEMORY_USAGE_PERCENT.set(memory_percentage);
298
299 let disks = Disks::new_with_refreshed_list();
302 let mut total_disk_space: u64 = 0;
303 let mut total_disk_available: u64 = 0;
304 for disk in disks.list() {
305 total_disk_space += disk.total_space();
306 total_disk_available += disk.available_space();
307 }
308 let used_disk_space = total_disk_space.saturating_sub(total_disk_available);
310 DISK_USAGE.set(used_disk_space as f64);
311
312 let disk_percentage = if total_disk_space > 0 {
314 (used_disk_space as f64 / total_disk_space as f64) * 100.0
315 } else {
316 0.0
317 };
318 DISK_USAGE_PERCENT.set(disk_percentage);
319
320 if let Ok(fd_count) = get_fd_count() {
322 FILE_DESCRIPTORS.set(fd_count as f64);
323 }
324
325 if let Ok(close_wait) = get_close_wait_count() {
327 CLOSE_WAIT_SOCKETS.set(close_wait as f64);
328 }
329}
330
331#[cfg(test)]
332mod actix_tests {
333 use super::*;
334 use actix_web::{
335 dev::{Service, ServiceRequest, ServiceResponse, Transform},
336 http, test, Error, HttpResponse,
337 };
338 use futures::future::{self};
339 use middleware::MetricsMiddleware;
340 use prometheus::proto::MetricFamily;
341 use std::{
342 pin::Pin,
343 task::{Context, Poll},
344 };
345
346 struct DummySuccessService;
348
349 impl Service<ServiceRequest> for DummySuccessService {
350 type Response = ServiceResponse;
351 type Error = Error;
352 type Future = Pin<Box<dyn future::Future<Output = Result<Self::Response, Self::Error>>>>;
353
354 fn poll_ready(&self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
355 Poll::Ready(Ok(()))
356 }
357
358 fn call(&self, req: ServiceRequest) -> Self::Future {
359 let resp = req.into_response(HttpResponse::Ok().finish());
360 Box::pin(async move { Ok(resp) })
361 }
362 }
363
364 struct DummyErrorService;
366
367 impl Service<ServiceRequest> for DummyErrorService {
368 type Response = ServiceResponse;
369 type Error = Error;
370 type Future = Pin<Box<dyn future::Future<Output = Result<Self::Response, Self::Error>>>>;
371
372 fn poll_ready(&self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
373 Poll::Ready(Ok(()))
374 }
375
376 fn call(&self, _req: ServiceRequest) -> Self::Future {
377 Box::pin(async move { Err(actix_web::error::ErrorInternalServerError("dummy error")) })
378 }
379 }
380
381 fn find_metric_family<'a>(
383 name: &str,
384 families: &'a [MetricFamily],
385 ) -> Option<&'a MetricFamily> {
386 families.iter().find(|mf| mf.name() == name)
387 }
388
389 #[actix_rt::test]
390 async fn test_gather_metrics_contains_expected_names() {
391 update_system_metrics();
393
394 REQUEST_COUNTER
396 .with_label_values(&["/test", "GET", "200"])
397 .inc();
398 RAW_REQUEST_COUNTER
399 .with_label_values(&["/test?param=value", "GET", "200"])
400 .inc();
401 REQUEST_LATENCY
402 .with_label_values(&["/test", "GET", "200"])
403 .observe(0.1);
404 ERROR_COUNTER
405 .with_label_values(&["/test", "GET", "500"])
406 .inc();
407
408 let metrics = gather_metrics().expect("failed to gather metrics");
409 let output = String::from_utf8(metrics).expect("metrics output is not valid UTF-8");
410
411 assert!(output.contains("cpu_usage_percentage"));
413 assert!(output.contains("memory_usage_percentage"));
414 assert!(output.contains("memory_usage_bytes"));
415 assert!(output.contains("total_memory_bytes"));
416 assert!(output.contains("available_memory_bytes"));
417 assert!(output.contains("disk_usage_bytes"));
418 assert!(output.contains("disk_usage_percentage"));
419
420 assert!(output.contains("requests_total"));
422 assert!(output.contains("raw_requests_total"));
423 assert!(output.contains("request_latency_seconds"));
424 assert!(output.contains("error_requests_total"));
425 }
426
427 #[actix_rt::test]
428 async fn test_update_system_metrics() {
429 CPU_USAGE.set(0.0);
431 TOTAL_MEMORY.set(0.0);
432 AVAILABLE_MEMORY.set(0.0);
433 MEMORY_USAGE.set(0.0);
434 MEMORY_USAGE_PERCENT.set(0.0);
435 DISK_USAGE.set(0.0);
436 DISK_USAGE_PERCENT.set(0.0);
437
438 update_system_metrics();
440
441 let cpu_usage = CPU_USAGE.get();
443 assert!(
444 (0.0..=100.0).contains(&cpu_usage),
445 "CPU usage should be between 0-100%, got {cpu_usage}"
446 );
447
448 let memory_usage = MEMORY_USAGE.get();
449 assert!(
450 memory_usage >= 0.0,
451 "Memory usage should be >= 0, got {memory_usage}"
452 );
453
454 let memory_percent = MEMORY_USAGE_PERCENT.get();
455 assert!(
456 (0.0..=100.0).contains(&memory_percent),
457 "Memory usage percentage should be between 0-100%, got {memory_percent}"
458 );
459
460 let total_memory = TOTAL_MEMORY.get();
461 assert!(
462 total_memory > 0.0,
463 "Total memory should be > 0, got {total_memory}"
464 );
465
466 let available_memory = AVAILABLE_MEMORY.get();
467 assert!(
468 available_memory >= 0.0,
469 "Available memory should be >= 0, got {available_memory}"
470 );
471
472 let disk_usage = DISK_USAGE.get();
473 assert!(
474 disk_usage >= 0.0,
475 "Disk usage should be >= 0, got {disk_usage}"
476 );
477
478 let disk_percent = DISK_USAGE_PERCENT.get();
479 assert!(
480 (0.0..=100.0).contains(&disk_percent),
481 "Disk usage percentage should be between 0-100%, got {disk_percent}"
482 );
483
484 assert!(
486 memory_usage <= total_memory,
487 "Memory usage should be <= total memory, got {memory_usage}"
488 );
489
490 assert!(
492 (available_memory + memory_usage) <= total_memory,
493 "Available memory plus used memory should be <= total memory {}, got {}",
494 total_memory,
495 available_memory + memory_usage
496 );
497 }
498
499 #[actix_rt::test]
500 async fn test_middleware_success() {
501 let req = test::TestRequest::with_uri("/test_success").to_srv_request();
502
503 let middleware = MetricsMiddleware;
504 let service = middleware.new_transform(DummySuccessService).await.unwrap();
505
506 let resp = service.call(req).await.unwrap();
507 assert_eq!(resp.response().status(), http::StatusCode::OK);
508
509 let families = REGISTRY.gather();
510 let counter_fam = find_metric_family("requests_total", &families)
511 .expect("requests_total metric family not found");
512
513 let mut found = false;
514 for m in counter_fam.get_metric() {
515 let labels = m.get_label();
516 if labels
517 .iter()
518 .any(|l| l.name() == "endpoint" && l.value() == "/test_success")
519 {
520 found = true;
521 assert!(m.get_counter().value() >= 1.0);
522 }
523 }
524 assert!(
525 found,
526 "Expected metric with endpoint '/test_success' not found"
527 );
528 }
529
530 #[actix_rt::test]
531 async fn test_middleware_error() {
532 let req = test::TestRequest::with_uri("/test_error").to_srv_request();
533
534 let middleware = MetricsMiddleware;
535 let service = middleware.new_transform(DummyErrorService).await.unwrap();
536
537 let result = service.call(req).await;
538 assert!(result.is_err());
539
540 let families = REGISTRY.gather();
541 let error_counter_fam = find_metric_family("error_requests_total", &families)
542 .expect("error_requests_total metric family not found");
543
544 let mut found = false;
545 for m in error_counter_fam.get_metric() {
546 let labels = m.get_label();
547 if labels
548 .iter()
549 .any(|l| l.name() == "endpoint" && l.value() == "/test_error")
550 {
551 found = true;
552 assert!(m.get_counter().value() >= 1.0);
553 }
554 }
555 assert!(
556 found,
557 "Expected error metric with endpoint '/test_error' not found"
558 );
559 }
560}
561
562#[cfg(test)]
563mod property_tests {
564 use proptest::{prelude::*, test_runner::Config};
565
566 fn compute_percentage(used: u64, total: u64) -> f64 {
568 if total > 0 {
569 (used as f64 / total as f64) * 100.0
570 } else {
571 0.0
572 }
573 }
574
575 proptest! {
576 #![proptest_config(Config {
578 cases: 1000, ..Config::default()
579 })]
580
581 #[test]
582 fn prop_compute_percentage((total, used) in {
583 (1u64..1_000_000u64).prop_flat_map(|total| {
584 (Just(total), 0u64..=total)
585 })
586 }) {
587 let percentage = compute_percentage(used, total);
588 prop_assert!(percentage >= 0.0);
589 prop_assert!(percentage <= 100.0);
590 }
591
592 #[test]
593 fn prop_labels_are_reasonable(
594 endpoint in ".*",
595 method in prop::sample::select(vec![
596 "GET".to_string(),
597 "POST".to_string(),
598 "PUT".to_string(),
599 "DELETE".to_string()
600 ])
601 ) {
602 let endpoint_label = if endpoint.is_empty() { "/".to_string() } else { endpoint.clone() };
603 let method_label = method;
604
605 prop_assert!(endpoint_label.chars().count() <= 1024, "Endpoint label too long");
606 prop_assert!(method_label.chars().count() <= 16, "Method label too long");
607
608 let status = "200".to_string();
609 let labels = vec![endpoint_label, method_label, status];
610
611 for label in labels {
612 prop_assert!(!label.is_empty());
613 prop_assert!(label.len() < 1024);
614 }
615 }
616 }
617}