From 820bc9919459ce88e857cf251d5c2571b0408199 Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Wed, 8 Oct 2025 11:26:47 +0200 Subject: [PATCH] ROX-30302: embed pprof and expose it via an endpoint This will allow us to get CPU profiles from applications running in the wild without the need to ship dedicated images with embedded additional tooling. TODO: Write documentation on how to use the profiler. --- Cargo.lock | 390 ++++++++++++++++++++++++++++++++++++--- Cargo.toml | 1 + fact/Cargo.toml | 1 + fact/src/config/mod.rs | 26 +++ fact/src/config/tests.rs | 54 ++++++ fact/src/endpoints.rs | 140 +++++++++++--- fact/src/lib.rs | 2 + fact/src/profiler.rs | 63 +++++++ 8 files changed, 630 insertions(+), 47 deletions(-) create mode 100644 fact/src/profiler.rs diff --git a/Cargo.lock b/Cargo.lock index 9479c572..f7e85890 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "aligned-vec" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b" +dependencies = [ + "equator", +] + [[package]] name = "allocator-api2" version = "0.2.21" @@ -179,13 +188,13 @@ checksum = "d18bc4e506fbb85ab7392ed993a7db4d1a452c71b75a246af4a80ab8c9d2dd50" dependencies = [ "assert_matches", "aya-obj", - "bitflags", + "bitflags 2.9.1", "bytes", "libc", "log", "object", "once_cell", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -199,7 +208,7 @@ dependencies = [ "hashbrown", "log", "object", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -229,10 +238,10 @@ version = "0.72.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f72209734318d0b619a5e0f5129918b848c416e122a3c4ce054e03cb87b726f" dependencies = [ - "bitflags", + "bitflags 2.9.1", "cexpr", "clang-sys", - "itertools", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", @@ -243,12 +252,27 @@ dependencies = [ "syn", ] +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -351,6 +375,24 @@ dependencies = [ "version_check", ] +[[package]] +name = "cpp_demangle" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2bb79cb74d735044c972aae58ed0aaa9a837e85b01106a54c39e42e97f62253" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.4.2" @@ -360,6 +402,35 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dtoa" version = "1.0.10" @@ -401,6 +472,26 @@ dependencies = [ "log", ] +[[package]] +name = "equator" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc" +dependencies = [ + "equator-macro", +] + +[[package]] +name = "equator-macro" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -432,9 +523,10 @@ dependencies = [ "hyper-util", "libc", "log", + "pprof", "prometheus-client", - "prost", - "prost-types", + "prost 0.13.5", + "prost-types 0.13.5", "serde", "serde_json", "tempfile", @@ -450,8 +542,8 @@ name = "fact-api" version = "0.1.0" dependencies = [ "anyhow", - "prost", - "prost-types", + "prost 0.13.5", + "prost-types 0.13.5", "tokio", "tonic", "tonic-build", @@ -473,6 +565,24 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "findshlibs" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" +dependencies = [ + "cc", + "lazy_static", + "libc", + "winapi", +] + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -530,6 +640,16 @@ dependencies = [ "pin-utils", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -727,7 +847,7 @@ version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013" dependencies = [ - "bitflags", + "bitflags 2.9.1", "cfg-if", "libc", ] @@ -738,6 +858,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -787,6 +916,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.174" @@ -837,6 +972,15 @@ version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +[[package]] +name = "memmap2" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" +dependencies = [ + "libc", +] + [[package]] name = "mime" version = "0.3.17" @@ -875,6 +1019,17 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -938,13 +1093,23 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset 0.4.2", + "indexmap", +] + [[package]] name = "petgraph" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ - "fixedbitset", + "fixedbitset 0.5.7", "indexmap", ] @@ -995,6 +1160,31 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "pprof" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38a01da47675efa7673b032bf8efd8214f1917d89685e07e395ab125ea42b187" +dependencies = [ + "aligned-vec", + "backtrace", + "cfg-if", + "findshlibs", + "libc", + "log", + "nix", + "once_cell", + "prost 0.12.6", + "prost-build 0.12.6", + "prost-derive 0.12.6", + "sha2", + "smallvec", + "spin", + "symbolic-demangle", + "tempfile", + "thiserror 2.0.17", +] + [[package]] name = "prettyplease" version = "0.2.35" @@ -1037,6 +1227,16 @@ dependencies = [ "syn", ] +[[package]] +name = "prost" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +dependencies = [ + "bytes", + "prost-derive 0.12.6", +] + [[package]] name = "prost" version = "0.13.5" @@ -1044,7 +1244,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.13.5", +] + +[[package]] +name = "prost-build" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +dependencies = [ + "bytes", + "heck", + "itertools 0.12.1", + "log", + "multimap", + "once_cell", + "petgraph 0.6.5", + "prettyplease", + "prost 0.12.6", + "prost-types 0.12.6", + "regex", + "syn", + "tempfile", ] [[package]] @@ -1054,19 +1275,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck", - "itertools", + "itertools 0.13.0", "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.7.1", "prettyplease", - "prost", - "prost-types", + "prost 0.13.5", + "prost-types 0.13.5", "regex", "syn", "tempfile", ] +[[package]] +name = "prost-derive" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +dependencies = [ + "anyhow", + "itertools 0.12.1", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "prost-derive" version = "0.13.5" @@ -1074,19 +1308,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools", + "itertools 0.13.0", "proc-macro2", "quote", "syn", ] +[[package]] +name = "prost-types" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +dependencies = [ + "prost 0.12.6", +] + [[package]] name = "prost-types" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ - "prost", + "prost 0.13.5", ] [[package]] @@ -1110,7 +1353,7 @@ version = "0.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" dependencies = [ - "bitflags", + "bitflags 2.9.1", ] [[package]] @@ -1174,7 +1417,7 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" dependencies = [ - "bitflags", + "bitflags 2.9.1", "errno", "libc", "linux-raw-sys", @@ -1266,6 +1509,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -1303,6 +1557,21 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spin" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" +dependencies = [ + "lock_api", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.11.1" @@ -1315,6 +1584,29 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "symbolic-common" +version = "12.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d03f433c9befeea460a01d750e698aa86caf86dcfbd77d552885cd6c89d52f50" +dependencies = [ + "debugid", + "memmap2", + "stable_deref_trait", + "uuid", +] + +[[package]] +name = "symbolic-demangle" +version = "12.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13d359ef6192db1760a34321ec4f089245ede4342c27e59be99642f12a859de8" +dependencies = [ + "cpp_demangle", + "rustc-demangle", + "symbolic-common", +] + [[package]] name = "syn" version = "2.0.104" @@ -1351,7 +1643,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", ] [[package]] @@ -1365,6 +1666,17 @@ dependencies = [ "syn", ] +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tokio" version = "1.46.0" @@ -1448,7 +1760,7 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost", + "prost 0.13.5", "socket2", "tokio", "tokio-rustls", @@ -1467,8 +1779,8 @@ checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847" dependencies = [ "prettyplease", "proc-macro2", - "prost-build", - "prost-types", + "prost-build 0.13.5", + "prost-types 0.13.5", "quote", "syn", ] @@ -1541,6 +1853,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typenum" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" + [[package]] name = "unicode-ident" version = "1.0.18" @@ -1658,6 +1976,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.52.0" @@ -1746,7 +2086,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags", + "bitflags 2.9.1", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 7888fc11..72567501 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ hyper = { version = "1.6.0", default-features = false } hyper-util = { version = "0.1.16", default-features = false } libc = { version = "0.2.159", default-features = false } log = { version = "0.4.22", default-features = false } +pprof = { version = "0.15.0", features = ["prost-codec"] } prometheus-client = { version = "0.24.0", default-features = false } prost = "0.13.5" prost-types = "0.13.5" diff --git a/fact/Cargo.toml b/fact/Cargo.toml index 808635a1..6552eb9a 100644 --- a/fact/Cargo.toml +++ b/fact/Cargo.toml @@ -18,6 +18,7 @@ log = { workspace = true } tonic = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true } +pprof = { workspace = true } prometheus-client = { workspace = true } prost = { workspace = true } prost-types = { workspace = true } diff --git a/fact/src/config/mod.rs b/fact/src/config/mod.rs index 24fd392b..a6123fb2 100644 --- a/fact/src/config/mod.rs +++ b/fact/src/config/mod.rs @@ -13,6 +13,7 @@ pub struct FactConfig { paths: Option>, url: Option, certs: Option, + expose_profiler: Option, expose_metrics: Option, health_check: Option, skip_pre_flight: Option, @@ -70,6 +71,10 @@ impl FactConfig { self.certs = Some(certs.to_owned()); } + if let Some(expose_profiler) = from.expose_profiler { + self.expose_profiler = Some(expose_profiler); + } + if let Some(expose_metrics) = from.expose_metrics { self.expose_metrics = Some(expose_metrics); } @@ -103,6 +108,10 @@ impl FactConfig { self.certs.as_deref() } + pub fn expose_profiler(&self) -> bool { + self.expose_profiler.unwrap_or(false) + } + pub fn expose_metrics(&self) -> bool { self.expose_metrics.unwrap_or(false) } @@ -195,6 +204,12 @@ impl TryFrom> for FactConfig { }; config.certs = Some(PathBuf::from(certs)); } + "expose_profiler" => { + let Some(em) = v.as_bool() else { + bail!("expose_profiler field has incorrect type: {v:?}"); + }; + config.expose_profiler = Some(em); + } "expose_metrics" => { let Some(em) = v.as_bool() else { bail!("expose_metrics field has incorrect type: {v:?}"); @@ -255,6 +270,16 @@ pub struct FactCli { #[arg(short, long, env = "FACT_CERTS")] certs: Option, + /// Whether pprof profiler should be exposed + #[arg( + long, + overrides_with("no_expose_profiler"), + env = "FACT_EXPOSE_PROFILER" + )] + expose_profiler: bool, + #[arg(long, overrides_with = "expose_profiler", hide(true))] + no_expose_profiler: bool, + /// Whether prometheus metrics should be collected and exposed #[arg(long, overrides_with("no_expose_metrics"), env = "FACT_EXPOSE_METRICS")] expose_metrics: bool, @@ -301,6 +326,7 @@ impl FactCli { paths: self.paths.clone(), url: self.url.clone(), certs: self.certs.clone(), + expose_profiler: resolve_bool_arg(self.expose_profiler, self.no_expose_profiler), expose_metrics: resolve_bool_arg(self.expose_metrics, self.no_expose_metrics), health_check: resolve_bool_arg(self.health_check, self.no_health_check), skip_pre_flight: resolve_bool_arg(self.skip_pre_flight, self.no_skip_pre_flight), diff --git a/fact/src/config/tests.rs b/fact/src/config/tests.rs index d27f8fb9..137b9163 100644 --- a/fact/src/config/tests.rs +++ b/fact/src/config/tests.rs @@ -32,6 +32,20 @@ fn parsing() { ..Default::default() }, ), + ( + "expose_profiler: true", + FactConfig { + expose_profiler: Some(true), + ..Default::default() + }, + ), + ( + "expose_profiler: false", + FactConfig { + expose_profiler: Some(false), + ..Default::default() + }, + ), ( "expose_metrics: true", FactConfig { @@ -101,6 +115,7 @@ fn parsing() { - /etc url: https://svc.sensor.stackrox:9090 certs: /etc/stackrox/certs + expose_profiler: true expose_metrics: true health_check: true skip_pre_flight: false @@ -111,6 +126,7 @@ fn parsing() { paths: Some(vec![PathBuf::from("/etc")]), url: Some(String::from("https://svc.sensor.stackrox:9090")), certs: Some(PathBuf::from("/etc/stackrox/certs")), + expose_profiler: Some(true), expose_metrics: Some(true), health_check: Some(true), skip_pre_flight: Some(false), @@ -151,6 +167,10 @@ paths: "certs: true", "certs field has incorrect type: Boolean(true)", ), + ( + "expose_profiler: 4", + "expose_profiler field has incorrect type: Integer(4)", + ), ( "expose_metrics: 4", "expose_metrics field has incorrect type: Integer(4)", @@ -298,6 +318,36 @@ fn update() { ..Default::default() }, ), + ( + "expose_profiler: true", + FactConfig::default(), + FactConfig { + expose_profiler: Some(true), + ..Default::default() + }, + ), + ( + "expose_profiler: true", + FactConfig { + expose_profiler: Some(false), + ..Default::default() + }, + FactConfig { + expose_profiler: Some(true), + ..Default::default() + }, + ), + ( + "expose_profiler: true", + FactConfig { + expose_profiler: Some(true), + ..Default::default() + }, + FactConfig { + expose_profiler: Some(true), + ..Default::default() + }, + ), ( "expose_metrics: true", FactConfig::default(), @@ -424,6 +474,7 @@ fn update() { - /etc url: https://svc.sensor.stackrox:9090 certs: /etc/stackrox/certs + expose_profiler: true expose_metrics: true health_check: true skip_pre_flight: false @@ -434,6 +485,7 @@ fn update() { paths: Some(vec![PathBuf::from("/etc"), PathBuf::from("/bin")]), url: Some(String::from("http://localhost")), certs: Some(PathBuf::from("/etc/certs")), + expose_profiler: Some(false), expose_metrics: Some(false), health_check: Some(false), skip_pre_flight: Some(true), @@ -444,6 +496,7 @@ fn update() { paths: Some(vec![PathBuf::from("/etc")]), url: Some(String::from("https://svc.sensor.stackrox:9090")), certs: Some(PathBuf::from("/etc/stackrox/certs")), + expose_profiler: Some(true), expose_metrics: Some(true), health_check: Some(true), skip_pre_flight: Some(false), @@ -469,6 +522,7 @@ fn defaults() { assert_eq!(config.paths(), default_paths); assert_eq!(config.url(), None); assert_eq!(config.certs(), None); + assert!(!config.expose_profiler()); assert!(!config.expose_metrics()); assert!(!config.health_check()); assert!(!config.skip_pre_flight()); diff --git a/fact/src/endpoints.rs b/fact/src/endpoints.rs index 9d7b039a..5a1a557f 100644 --- a/fact/src/endpoints.rs +++ b/fact/src/endpoints.rs @@ -1,6 +1,6 @@ use std::{future::Future, net::SocketAddr, pin::Pin}; -use http_body_util::Full; +use http_body_util::{BodyExt, Full}; use hyper::{ body::{Bytes, Incoming}, server::conn::http1, @@ -12,25 +12,41 @@ use log::{info, warn}; use tokio::{net::TcpListener, sync::watch, task::JoinHandle}; use crate::metrics::exporter::Exporter; +use crate::profiler::Profiler; + +type ServerResponse = anyhow::Result>>; #[derive(Clone)] pub struct Server { metrics: Option, health_check: bool, + profiler: Option, } impl Server { - pub fn new(metrics: Exporter, expose_metrics: bool, health_check: bool) -> Self { + pub fn new( + metrics: Exporter, + expose_profiler: bool, + expose_metrics: bool, + health_check: bool, + ) -> Self { let metrics = if expose_metrics { Some(metrics) } else { None }; + let profiler = if expose_profiler { + Some(Profiler::new()) + } else { + None + }; + Server { metrics, health_check, + profiler, } } pub fn start(self, mut running: watch::Receiver) -> Option> { // If there is nothing to expose, we don't run the hyper server - if self.metrics.is_none() && !self.health_check { + if self.metrics.is_none() && self.profiler.is_none() && !self.health_check { return None; } @@ -63,39 +79,116 @@ impl Server { Some(handle) } - fn make_response( + fn response(res: StatusCode, body: impl Into) -> ServerResponse { + Response::builder() + .status(res) + .body(Full::new(body.into())) + .map_err(anyhow::Error::new) + } + + fn response_with_content_type( res: StatusCode, - body: String, - ) -> Result>, anyhow::Error> { - Ok(Response::builder() + content_type: &str, + body: impl Into, + ) -> ServerResponse { + Response::builder() .status(res) - .body(Full::new(Bytes::from(body))) - .unwrap()) + .header(hyper::header::CONTENT_TYPE, content_type) + .body(Full::new(body.into())) + .map_err(anyhow::Error::new) } - fn handle_metrics(&self) -> Result>, anyhow::Error> { + fn handle_metrics(&self) -> ServerResponse { match &self.metrics { Some(metrics) => metrics.encode().map(|buf| { - let body = Full::new(Bytes::from(buf)); - Response::builder() - .header( - hyper::header::CONTENT_TYPE, - "application/openmetrics-text; version=1.0.0; charset=utf-8", - ) - .body(body) - .map_err(anyhow::Error::new) + Server::response_with_content_type( + StatusCode::OK, + "application/openmetrics-text; version=1.0.0; charset=utf-8", + buf, + ) })?, - None => Server::make_response(StatusCode::SERVICE_UNAVAILABLE, String::new()), + None => Server::response(StatusCode::SERVICE_UNAVAILABLE, ""), } } - fn handle_health_check(&self) -> Result>, anyhow::Error> { + fn handle_health_check(&self) -> ServerResponse { let res = if self.health_check { StatusCode::OK } else { StatusCode::SERVICE_UNAVAILABLE }; - Server::make_response(res, String::new()) + Server::response(res, "") + } + + async fn handle_profiler_status(&self) -> ServerResponse { + let Some(profiler) = &self.profiler else { + return Server::response(StatusCode::INTERNAL_SERVER_ERROR, "Profiler is not enabled"); + }; + let body = profiler.get_status().await; + Server::response_with_content_type(StatusCode::OK, "application/json", body) + } + + async fn handle_cpu_profiler(&self, body: Incoming) -> ServerResponse { + let Some(profiler) = &self.profiler else { + return Server::response(StatusCode::INTERNAL_SERVER_ERROR, "Profiler is not enabled"); + }; + + let body = match body.collect().await { + Ok(b) => b.to_bytes(), + Err(e) => { + return Server::response( + StatusCode::BAD_REQUEST, + format!("Failed to read request body: {e}"), + ) + } + }; + + if body == "on" { + match profiler.start().await { + Ok(_) => Server::response_with_content_type( + StatusCode::OK, + "text/plain", + "CPU profiler starter", + ), + Err(e) => Server::response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to start CPU profiler: {e}"), + ), + } + } else if body == "off" { + match profiler.stop().await { + Ok(_) => Server::response_with_content_type( + StatusCode::OK, + "text/plain", + "CPU profiler stopped", + ), + Err(e) => Server::response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to stop CPU profiler: {e}"), + ), + } + } else { + Server::response( + StatusCode::BAD_REQUEST, + format!("Invalid request body: {body:?}"), + ) + } + } + + async fn handle_cpu_report(&self) -> ServerResponse { + let Some(profiler) = &self.profiler else { + return Server::response(StatusCode::INTERNAL_SERVER_ERROR, "Profiler is not enabled"); + }; + + match profiler.get().await { + Ok(profile) => { + Server::response_with_content_type(StatusCode::OK, "text/plain", profile) + } + Err(e) => Server::response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to get CPU profile: {e}"), + ), + } } } @@ -110,7 +203,10 @@ impl Service> for Server { match (req.method(), req.uri().path()) { (&Method::GET, "/metrics") => s.handle_metrics(), (&Method::GET, "/health_check") => s.handle_health_check(), - _ => Server::make_response(StatusCode::NOT_FOUND, String::new()), + (&Method::POST, "/profile/cpu") => s.handle_cpu_profiler(req.into_body()).await, + (&Method::GET, "/profile/cpu") => s.handle_cpu_report().await, + (&Method::GET, "/profile") => s.handle_profiler_status().await, + _ => Server::response(StatusCode::NOT_FOUND, ""), } }) } diff --git a/fact/src/lib.rs b/fact/src/lib.rs index 3dae57f7..b567fc21 100644 --- a/fact/src/lib.rs +++ b/fact/src/lib.rs @@ -20,6 +20,7 @@ mod host_info; mod metrics; mod output; mod pre_flight; +mod profiler; use config::FactConfig; use pre_flight::pre_flight; @@ -79,6 +80,7 @@ pub async fn run(config: FactConfig) -> anyhow::Result<()> { endpoints::Server::new( exporter.clone(), + config.expose_profiler(), config.expose_metrics(), config.health_check(), ) diff --git a/fact/src/profiler.rs b/fact/src/profiler.rs new file mode 100644 index 00000000..35827a7d --- /dev/null +++ b/fact/src/profiler.rs @@ -0,0 +1,63 @@ +use std::sync::LazyLock; + +use anyhow::bail; +use pprof::{protos::Message, ProfilerGuard}; +use tokio::sync::Mutex; + +static PROFILER_GUARD: LazyLock>>> = + LazyLock::new(|| Mutex::new(None)); + +#[derive(Clone)] +pub struct Profiler {} + +impl Profiler { + pub fn new() -> Self { + Profiler {} + } + + pub async fn get_status(&self) -> &'static str { + if PROFILER_GUARD.lock().await.is_some() { + r#"{"cpu":"on"}"# + } else { + r#"{"cpu":"off"}"# + } + } + + pub async fn start(&self) -> anyhow::Result<()> { + let mut guard = PROFILER_GUARD.lock().await; + if guard.is_some() { + bail!("CPU profiler already started"); + } + + // The blocklist is required because libunwind is not signal + // safe. See the backtrace section in the following link: + // https://docs.rs/crate/pprof + *guard = Some( + pprof::ProfilerGuardBuilder::default() + .frequency(1000) + .blocklist(&["libc", "libgcc", "pthread", "vdso"]) + .build()?, + ); + + Ok(()) + } + + pub async fn stop(&self) -> anyhow::Result<()> { + let mut guard = PROFILER_GUARD.lock().await; + if guard.is_none() { + bail!("CPU profiler already stopped"); + } + + guard.take(); + Ok(()) + } + + pub async fn get(&self) -> anyhow::Result> { + let guard = PROFILER_GUARD.lock().await; + let Some(ref profiler) = *guard else { + bail!("CPU profiler is not running"); + }; + let profile = profiler.report().build()?.pprof()?.encode_to_vec(); + Ok(profile) + } +}