From 71576feddc4c9a8f20925eac47bbe984c503614e Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Tue, 11 Nov 2025 11:42:22 +0100 Subject: [PATCH 1/3] ROX-30437: refine host path algorithm Retrieve device id for the dentry being accessed on kernel side, then use the mountinfo from /proc on userspace to adjust the path the user would see on the host node. This new logic requires us to keep track of mountinfo in userspace, so a new EventParser type is added to do this in a cached manner and generate events with it. In case a device id is received that is not found in the mountinfo cache, the cache will be rebuilt. If the device id is still not found, an empty entry will be added for that id and we will assume we cannot get the required information to correct the host path gathered from kernelspace. These changes also require some adjustments to work on k8s, so the manifest is updated accordingly. --- fact-ebpf/src/bpf/events.h | 1 + fact-ebpf/src/bpf/types.h | 1 + fact/src/bpf/mod.rs | 12 ++++- fact/src/event/mod.rs | 37 ++++++------- fact/src/event/parser.rs | 42 +++++++++++++++ fact/src/lib.rs | 1 + fact/src/mount_info.rs | 103 +++++++++++++++++++++++++++++++++++++ k8s/manifest.yml | 10 ++-- 8 files changed, 179 insertions(+), 28 deletions(-) create mode 100644 fact/src/event/parser.rs create mode 100644 fact/src/mount_info.rs diff --git a/fact-ebpf/src/bpf/events.h b/fact-ebpf/src/bpf/events.h index adadbd46..1e0fc6ee 100644 --- a/fact-ebpf/src/bpf/events.h +++ b/fact-ebpf/src/bpf/events.h @@ -16,6 +16,7 @@ __always_inline static void submit_event(struct metrics_by_hook_t* m, file_activ event->type = event_type; event->timestamp = bpf_ktime_get_boot_ns(); + event->dev = BPF_CORE_READ(dentry, d_sb, s_dev); bpf_probe_read_str(event->filename, PATH_MAX, filename); struct helper_t* helper = get_helper(); diff --git a/fact-ebpf/src/bpf/types.h b/fact-ebpf/src/bpf/types.h index f32ade10..62f83d90 100644 --- a/fact-ebpf/src/bpf/types.h +++ b/fact-ebpf/src/bpf/types.h @@ -45,6 +45,7 @@ struct event_t { char filename[PATH_MAX]; char host_file[PATH_MAX]; file_activity_type_t type; + unsigned int dev; }; /** diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 3b6e27ec..707c6622 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -15,7 +15,11 @@ use tokio::{ task::JoinHandle, }; -use crate::{event::Event, host_info, metrics::EventCounter}; +use crate::{ + event::{parser::EventParser, Event}, + host_info, + metrics::EventCounter, +}; use fact_ebpf::{event_t, metrics_t, path_prefix_t, LPM_SIZE_MAX}; @@ -30,6 +34,8 @@ pub struct Bpf { paths: Vec, paths_config: watch::Receiver>, + + parser: EventParser, } impl Bpf { @@ -56,11 +62,13 @@ impl Bpf { let paths = Vec::new(); let (tx, _) = broadcast::channel(100); + let parser = EventParser::new()?; let mut bpf = Bpf { obj, tx, paths, paths_config, + parser, }; bpf.load_paths()?; @@ -183,7 +191,7 @@ impl Bpf { let ringbuf = guard.get_inner_mut(); while let Some(event) = ringbuf.next() { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; - let event = match Event::try_from(event) { + let event = match self.parser.parse(event) { Ok(event) => Arc::new(event), Err(e) => { error!("Failed to parse event: '{e}'"); diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 18b6ea34..91b89260 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -4,11 +4,12 @@ use std::{ffi::CStr, os::raw::c_char, path::PathBuf}; use serde::Serialize; -use fact_ebpf::{event_t, file_activity_type_t, PATH_MAX}; +use fact_ebpf::{file_activity_type_t, PATH_MAX}; -use crate::host_info; +use crate::mount_info::MountEntry; use process::Process; +pub(crate) mod parser; pub(crate) mod process; fn slice_to_string(s: &[c_char]) -> anyhow::Result { @@ -62,23 +63,6 @@ impl Event { } } -impl TryFrom<&event_t> for Event { - type Error = anyhow::Error; - - fn try_from(value: &event_t) -> Result { - let process = Process::try_from(value.process)?; - let timestamp = host_info::get_boot_time() + value.timestamp; - let file = FileData::new(value.type_, value.filename, value.host_file)?; - - Ok(Event { - timestamp, - hostname: host_info::get_hostname(), - process, - file, - }) - } -} - impl From for fact_api::FileActivity { fn from(value: Event) -> Self { let file = fact_api::file_activity::File::from(value.file); @@ -113,8 +97,9 @@ impl FileData { event_type: file_activity_type_t, filename: [c_char; PATH_MAX as usize], host_file: [c_char; PATH_MAX as usize], + mounts: &Vec, ) -> anyhow::Result { - let inner = BaseFileData::new(filename, host_file)?; + let inner = BaseFileData::new(filename, host_file, mounts)?; let file = match event_type { file_activity_type_t::FILE_ACTIVITY_OPEN => FileData::Open(inner), file_activity_type_t::FILE_ACTIVITY_CREATION => FileData::Creation(inner), @@ -170,13 +155,21 @@ impl BaseFileData { pub fn new( filename: [c_char; PATH_MAX as usize], host_file: [c_char; PATH_MAX as usize], + mounts: &Vec, ) -> anyhow::Result { let filename = slice_to_string(&filename)?.into(); - let host_file = slice_to_string(&host_file)?.into(); + let mut host_file: PathBuf = slice_to_string(&host_file)?.into(); + + for mount in mounts { + if let Ok(hf) = host_file.strip_prefix(&mount.root) { + host_file = mount.mount_point.join(hf); + break; + } + } Ok(BaseFileData { filename, - host_file, + host_file: host_file.to_path_buf(), }) } } diff --git a/fact/src/event/parser.rs b/fact/src/event/parser.rs new file mode 100644 index 00000000..61789219 --- /dev/null +++ b/fact/src/event/parser.rs @@ -0,0 +1,42 @@ +use fact_ebpf::event_t; + +use crate::{host_info, mount_info::MountInfo}; + +use super::{process::Process, Event, FileData}; + +pub(crate) struct EventParser { + mountinfo: MountInfo, +} + +impl EventParser { + pub(crate) fn new() -> anyhow::Result { + let mountinfo = MountInfo::new()?; + + Ok(EventParser { mountinfo }) + } + + pub(crate) fn parse(&mut self, event: &event_t) -> anyhow::Result { + let process = Process::try_from(event.process)?; + let timestamp = host_info::get_boot_time() + event.timestamp; + + let mounts = match self.mountinfo.get(&event.dev) { + Some(mounts) => mounts, + None => { + self.mountinfo.refresh()?; + match self.mountinfo.get(&event.dev) { + Some(mounts) => mounts, + None => self.mountinfo.insert_empty(event.dev), + } + } + }; + + let file = FileData::new(event.type_, event.filename, event.host_file, mounts)?; + + Ok(Event { + timestamp, + hostname: host_info::get_hostname(), + process, + file, + }) + } +} diff --git a/fact/src/lib.rs b/fact/src/lib.rs index d198f67a..b308521b 100644 --- a/fact/src/lib.rs +++ b/fact/src/lib.rs @@ -16,6 +16,7 @@ mod endpoints; mod event; mod host_info; mod metrics; +mod mount_info; mod output; mod pre_flight; diff --git a/fact/src/mount_info.rs b/fact/src/mount_info.rs new file mode 100644 index 00000000..df5816d5 --- /dev/null +++ b/fact/src/mount_info.rs @@ -0,0 +1,103 @@ +use std::{ + collections::HashMap, + fs::read_to_string, + path::{Path, PathBuf}, +}; + +use anyhow::bail; + +use crate::host_info; + +#[derive(Debug)] +pub struct MountEntry { + pub root: PathBuf, + pub mount_point: PathBuf, +} + +#[derive(Debug)] +pub struct MountInfo(HashMap>); + +impl MountInfo { + pub fn new() -> anyhow::Result { + let cache = MountInfo::build_cache()?; + Ok(MountInfo(cache)) + } + + pub fn refresh(&mut self) -> anyhow::Result<()> { + let cache = MountInfo::build_cache()?; + self.0 = cache; + Ok(()) + } + + fn parse_dev(dev: &str) -> anyhow::Result { + let mut dev_split = dev.split(':'); + let Some(major) = dev_split.next() else { + bail!("Failed to read device major part"); + }; + let Some(minor) = dev_split.next() else { + bail!("Failed to read device minor part"); + }; + if dev_split.next().is_some() { + bail!("Invalid device"); + } + + let major = major.parse::()?; + let minor = minor.parse::()?; + Ok((major << 20) + (minor & 0xFFFFF)) + } + + pub fn get(&self, k: &u32) -> Option<&Vec> { + self.0.get(k) + } + + pub fn insert_empty(&mut self, k: u32) -> &Vec { + self.0.entry(k).or_default() + } + + fn build_cache() -> anyhow::Result>> { + let host_mount = host_info::get_host_mount(); + let path = PathBuf::from("/proc/self/mountinfo"); + if !path.exists() { + bail!("/proc/self/mountinfo does not exist"); + } + let mounts = read_to_string(path)?; + let mountinfo_it = mounts.lines().map(|line| { + let mut parts = line.split(' '); + let Some(dev) = parts.nth(2) else { + bail!("Failed to retrieve device number"); + }; + let dev = MountInfo::parse_dev(dev)?; + + let Some(root) = parts.next() else { + bail!("Failed to retrieve root"); + }; + + let Some(mount_point) = parts.next() else { + bail!("Failed to retrieve mount point"); + }; + let mut mount_point = Path::new(mount_point); + if host_mount != Path::new("/") { + if let Ok(mp) = mount_point.strip_prefix(host_mount) { + mount_point = mp; + } + } + + let entry = MountEntry { + root: root.into(), + mount_point: Path::new("/").join(mount_point), + }; + Ok((dev, entry)) + }); + + let mut cache = HashMap::new(); + for i in mountinfo_it { + let (dev, mountinfo) = i?; + let entry: &mut Vec = cache.entry(dev).or_default(); + if mountinfo.root != Path::new("/") && mountinfo.root != mountinfo.mount_point { + entry.push(mountinfo); + } + } + + Ok(cache) + } +} diff --git a/k8s/manifest.yml b/k8s/manifest.yml index f470c708..dc8e1e94 100644 --- a/k8s/manifest.yml +++ b/k8s/manifest.yml @@ -26,6 +26,8 @@ spec: env: - name: FACT_LOGLEVEL value: 'debug' + - name: FACT_HOST_MOUNT + value: '/host' securityContext: capabilities: drop: @@ -33,11 +35,11 @@ spec: privileged: true readOnlyRootFilesystem: true volumeMounts: - - mountPath: /sys - name: sys-ro + - mountPath: /host + name: root-ro readOnly: true mountPropagation: HostToContainer volumes: - hostPath: - path: /sys/ - name: sys-ro + path: / + name: root-ro From 3824dc9dc56846d3e55a32258a84a2bc6385a386 Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Wed, 12 Nov 2025 10:18:52 +0100 Subject: [PATCH 2/3] ROX-30437: add integration tests Added tests will validate events generated on an overlayfs file properly shows the event on the upper layer and the access to the underlying FS. They also validate a mounted path on a container resolves to the correct host path. While developing these tests, it became painfully obvious getting the information of the process running inside the container is not straightforward. Because containers tend to be fairly static, we should be able to manually create the information statically in the test and still have everything work correctly. In order to minimize the amount of changes on existing tests, the default Process constructor now takes fields directly and there is a from_proc class method that builds a new Process object from /proc. Additionally, getting the pid of a process in a container is virtually impossible, so we make the pid check optional. --- tests/conftest.py | 43 ++++++++----- tests/event.py | 84 ++++++++++++++++++------- tests/test_config_hotreload.py | 38 ++++++----- tests/test_file_open.py | 81 ++++++++++++++++++++---- tests/test_path_unlink.py | 112 +++++++++++++++++++++++++++++---- 5 files changed, 283 insertions(+), 75 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b87f2914..88ee7b01 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -84,7 +84,7 @@ def dump_logs(container, file): def fact_config(request, monitored_dir, logs_dir): cwd = os.getcwd() config = { - 'paths': [monitored_dir], + 'paths': [monitored_dir, '/mounted', '/container-dir'], 'grpc': { 'url': 'http://127.0.0.1:9999', }, @@ -106,6 +106,31 @@ def fact_config(request, monitored_dir, logs_dir): config_file.close() +@pytest.fixture +def test_container(request, docker_client, ignored_dir): + """ + Run a container for triggering events in. + """ + container = docker_client.containers.run( + 'quay.io/fedora/fedora:43', + detach=True, + tty=True, + volumes={ + ignored_dir: { + 'bind': '/mounted', + 'mode': 'z', + }, + }, + name='fedora', + ) + container.exec_run('mkdir /mounted /container-dir') + + yield container + + container.stop(timeout=1) + container.remove() + + @pytest.fixture def fact(request, docker_client, fact_config, server, logs_dir): """ @@ -124,20 +149,8 @@ def fact(request, docker_client, fact_config, server, logs_dir): network_mode='host', privileged=True, volumes={ - '/sys/kernel/security': { - 'bind': '/host/sys/kernel/security', - 'mode': 'ro', - }, - '/etc': { - 'bind': '/host/etc', - 'mode': 'ro', - }, - '/proc/sys/kernel': { - 'bind': '/host/proc/sys/kernel', - 'mode': 'ro', - }, - '/usr/lib/os-release': { - 'bind': '/host/usr/lib/os-release', + '/': { + 'bind': '/host', 'mode': 'ro', }, config_file: { diff --git a/tests/event.py b/tests/event.py index 1b30cf7d..bdd3a6ed 100644 --- a/tests/event.py +++ b/tests/event.py @@ -37,10 +37,31 @@ class Process: Represents a process with its attributes. """ - def __init__(self, pid: int | None = None): - self._pid: int = pid if pid is not None else os.getpid() - proc_dir = os.path.join('/proc', str(self._pid)) - + def __init__(self, + pid: int | None, + uid: int, + gid: int, + exe_path: str, + args: str, + name: str, + container_id: str, + loginuid: int): + self._pid: int | None = pid + self._uid: int = uid + self._gid: int = gid + self._exe_path: str = exe_path + self._args: str = args + self._name: str = name + self._container_id: str = container_id + self._loginuid: int = loginuid + + @classmethod + def from_proc(cls, pid: int | None = None): + pid: int = pid if pid is not None else os.getpid() + proc_dir = os.path.join('/proc', str(pid)) + + uid = 0 + gid = 0 with open(os.path.join(proc_dir, 'status'), 'r') as f: def get_id(line: str, wanted_id: str) -> int | None: if line.startswith(f'{wanted_id}:'): @@ -50,27 +71,36 @@ def get_id(line: str, wanted_id: str) -> int | None: return None for line in f.readlines(): - if (uid := get_id(line, 'Uid')) is not None: - self._uid: int = uid - elif (gid := get_id(line, 'Gid')) is not None: - self._gid: int = gid + if (id := get_id(line, 'Uid')) is not None: + uid = id + elif (id := get_id(line, 'Gid')) is not None: + gid = id - self._exe_path: str = os.path.realpath(os.path.join(proc_dir, 'exe')) + exe_path = os.path.realpath(os.path.join(proc_dir, 'exe')) with open(os.path.join(proc_dir, 'cmdline'), 'rb') as f: content = f.read(4096) args = [arg.decode('utf-8') for arg in content.split(b'\x00') if arg] - self._args: str = ' '.join(args) + args = ' '.join(args) with open(os.path.join(proc_dir, 'comm'), 'r') as f: - self._name: str = f.read().strip() + name = f.read().strip() with open(os.path.join(proc_dir, 'cgroup'), 'r') as f: - self._container_id: str = extract_container_id(f.read()) + container_id = extract_container_id(f.read()) with open(os.path.join(proc_dir, 'loginuid'), 'r') as f: - self._loginuid: int = int(f.read()) + loginuid = int(f.read()) + + return Process(pid=pid, + uid=uid, + gid=gid, + exe_path=exe_path, + args=args, + name=name, + container_id=container_id, + loginuid=loginuid) @property def uid(self) -> int: @@ -81,7 +111,7 @@ def gid(self) -> int: return self._gid @property - def pid(self) -> int: + def pid(self) -> int | None: return self._pid @property @@ -107,10 +137,12 @@ def loginuid(self) -> int: @override def __eq__(self, other: Any) -> bool: if isinstance(other, ProcessSignal): + if self.pid is not None and self.pid != other.pid: + return False + return ( self.uid == other.uid and self.gid == other.gid and - self.pid == other.pid and self.exe_path == other.exec_file_path and self.args == other.args and self.name == other.name and @@ -124,7 +156,7 @@ def __str__(self) -> str: return (f'Process(uid={self.uid}, gid={self.gid}, pid={self.pid}, ' f'exe_path={self.exe_path}, args={self.args}, ' f'name={self.name}, container_id={self.container_id}, ' - f'loginuid={self.loginuid}') + f'loginuid={self.loginuid})') class Event: @@ -136,10 +168,12 @@ class Event: def __init__(self, process: Process, event_type: EventType, - file: str): + file: str, + host_path: str = ''): self._type: EventType = event_type self._process: Process = process self._file: str = file + self._host_path: str = host_path @property def event_type(self) -> EventType: @@ -153,6 +187,10 @@ def process(self) -> Process: def file(self) -> str: return self._file + @property + def host_path(self) -> str: + return self._host_path + @override def __eq__(self, other: Any) -> bool: if isinstance(other, FileActivity): @@ -160,15 +198,19 @@ def __eq__(self, other: Any) -> bool: return False if self.event_type == EventType.CREATION: - return self.file == other.creation.activity.path + return self.file == other.creation.activity.path and \ + self.host_path == other.creation.activity.host_path elif self.event_type == EventType.OPEN: - return self.file == other.open.activity.path + return self.file == other.open.activity.path and \ + self.host_path == other.open.activity.host_path elif self.event_type == EventType.UNLINK: - return self.file == other.unlink.activity.path + return self.file == other.unlink.activity.path and \ + self.host_path == other.unlink.activity.host_path return False raise NotImplementedError @override def __str__(self) -> str: return (f'Event(event_type={self.event_type.name}, ' - f'process={self.process}, file="{self.file}")') + f'process={self.process}, file="{self.file}", ' + f'host_path="{self.host_path}")') diff --git a/tests/test_config_hotreload.py b/tests/test_config_hotreload.py index d5f5b0bb..10ea7fd9 100644 --- a/tests/test_config_hotreload.py +++ b/tests/test_config_hotreload.py @@ -97,8 +97,9 @@ def test_output_grpc_address_change(fact, fact_config, monitored_dir, server, al with open(fut, 'w') as f: f.write('This is a test') - process = Process() - e = Event(process=process, event_type=EventType.CREATION, file=fut) + process = Process.from_proc() + e = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path=fut) print(f'Waiting for event: {e}') server.wait_events([e]) @@ -111,22 +112,23 @@ def test_output_grpc_address_change(fact, fact_config, monitored_dir, server, al with open(fut, 'w') as f: f.write('This is another test') - e = Event(process=process, event_type=EventType.OPEN, file=fut) + e = Event(process=process, event_type=EventType.OPEN, + file=fut, host_path=fut) print(f'Waiting for event on alternate server: {e}') alternate_server.wait_events([e]) def test_paths(fact, fact_config, monitored_dir, ignored_dir, server): - p = Process() + p = Process.from_proc() # Ignored file, must not show up in the server ignored_file = os.path.join(ignored_dir, 'test.txt') with open(ignored_file, 'w') as f: f.write('This is to be ignored') - ignored_event = Event( - process=p, event_type=EventType.CREATION, file=ignored_file) + ignored_event = Event(process=p, event_type=EventType.CREATION, + file=ignored_file, host_path=ignored_file) print(f'Ignoring: {ignored_event}') # File Under Test @@ -134,7 +136,8 @@ def test_paths(fact, fact_config, monitored_dir, ignored_dir, server): with open(fut, 'w') as f: f.write('This is a test') - e = Event(process=p, event_type=EventType.CREATION, file=fut) + e = Event(process=p, event_type=EventType.CREATION, + file=fut, host_path=fut) print(f'Waiting for event: {e}') server.wait_events([e], ignored=[ignored_event]) @@ -148,30 +151,31 @@ def test_paths(fact, fact_config, monitored_dir, ignored_dir, server): with open(ignored_file, 'w') as f: f.write('This is another test') - e = Event( - process=p, event_type=EventType.OPEN, file=ignored_file) + e = Event(process=p, event_type=EventType.OPEN, + file=ignored_file, host_path=ignored_file) print(f'Waiting for event: {e}') # File Under Test with open(fut, 'w') as f: f.write('This is another ignored event') - ignored_event = Event(process=p, event_type=EventType.OPEN, file=fut) + ignored_event = Event( + process=p, event_type=EventType.OPEN, file=fut, host_path=fut) print(f'Ignoring: {ignored_event}') server.wait_events([e], ignored=[ignored_event]) def test_paths_addition(fact, fact_config, monitored_dir, ignored_dir, server): - p = Process() + p = Process.from_proc() # Ignored file, must not show up in the server ignored_file = os.path.join(ignored_dir, 'test.txt') with open(ignored_file, 'w') as f: f.write('This is to be ignored') - ignored_event = Event( - process=p, event_type=EventType.CREATION, file=ignored_file) + ignored_event = Event(process=p, event_type=EventType.CREATION, + file=ignored_file, host_path=ignored_file) print(f'Ignoring: {ignored_event}') # File Under Test @@ -179,7 +183,8 @@ def test_paths_addition(fact, fact_config, monitored_dir, ignored_dir, server): with open(fut, 'w') as f: f.write('This is a test') - e = Event(process=p, event_type=EventType.CREATION, file=fut) + e = Event(process=p, event_type=EventType.CREATION, + file=fut, host_path=fut) print(f'Waiting for event: {e}') server.wait_events([e], ignored=[ignored_event]) @@ -196,8 +201,9 @@ def test_paths_addition(fact, fact_config, monitored_dir, ignored_dir, server): f.write('This is one final event') events = [ - Event(process=p, event_type=EventType.OPEN, file=ignored_file), - Event(process=p, event_type=EventType.OPEN, file=fut) + Event(process=p, event_type=EventType.OPEN, + file=ignored_file, host_path=ignored_file), + Event(process=p, event_type=EventType.OPEN, file=fut, host_path=fut) ] print(f'Waiting for events: {events}') diff --git a/tests/test_file_open.py b/tests/test_file_open.py index b499cdce..ca421b52 100644 --- a/tests/test_file_open.py +++ b/tests/test_file_open.py @@ -1,6 +1,8 @@ import multiprocessing as mp import os +import docker + from event import Event, EventType, Process @@ -19,7 +21,8 @@ def test_open(fact, monitored_dir, server): with open(fut, 'w') as f: f.write('This is a test') - e = Event(process=Process(), event_type=EventType.CREATION, file=fut) + e = Event(process=Process.from_proc(), event_type=EventType.CREATION, + file=fut, host_path=fut) print(f'Waiting for event: {e}') server.wait_events([e]) @@ -36,14 +39,15 @@ def test_multiple(fact, monitored_dir, server): server: The server instance to communicate with. """ events = [] - process = Process() + process = Process.from_proc() # File Under Test for i in range(3): fut = os.path.join(monitored_dir, f'{i}.txt') with open(fut, 'w') as f: f.write('This is a test') - e = Event(process=process, event_type=EventType.CREATION, file=fut) + e = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path=fut) print(f'Waiting for event: {e}') events.append(e) @@ -68,7 +72,7 @@ def test_multiple_access(fact, monitored_dir, server): with open(fut, 'a+') as f: f.write('This is a test') - e = Event(process=Process(), file=fut, + e = Event(process=Process.from_proc(), file=fut, host_path=fut, event_type=EventType.CREATION if i == 0 else EventType.OPEN) print(f'Waiting for event: {e}') events.append(e) @@ -87,15 +91,15 @@ def test_ignored(fact, monitored_dir, ignored_dir, server): ignored_dir: Temporary directory path that is not monitored by fact. server: The server instance to communicate with. """ - p = Process() + p = Process.from_proc() # Ignored file, must not show up in the server ignored_file = os.path.join(ignored_dir, 'test.txt') with open(ignored_file, 'w') as f: f.write('This is to be ignored') - ignored_event = Event( - process=p, event_type=EventType.CREATION, file=ignored_file) + ignored_event = Event(process=p, event_type=EventType.CREATION, + file=ignored_file, host_path=ignored_file) print(f'Ignoring: {ignored_event}') # File Under Test @@ -103,7 +107,8 @@ def test_ignored(fact, monitored_dir, ignored_dir, server): with open(fut, 'w') as f: f.write('This is a test') - e = Event(process=p, event_type=EventType.CREATION, file=fut) + e = Event(process=p, event_type=EventType.CREATION, + file=fut, host_path=fut) print(f'Waiting for event: {e}') server.wait_events([e], ignored=[ignored_event]) @@ -135,11 +140,13 @@ def test_external_process(fact, monitored_dir, server): stop_event = mp.Event() proc = mp.Process(target=do_test, args=(fut, stop_event)) proc.start() - p = Process(proc.pid) + p = Process.from_proc(proc.pid) - creation = Event(process=p, event_type=EventType.CREATION, file=fut) + creation = Event(process=p, event_type=EventType.CREATION, + file=fut, host_path=fut) print(f'Waiting for event: {creation}') - write_access = Event(process=p, event_type=EventType.OPEN, file=fut) + write_access = Event( + process=p, event_type=EventType.OPEN, file=fut, host_path=fut) print(f'Waiting for event: {write_access}') try: @@ -147,3 +154,55 @@ def test_external_process(fact, monitored_dir, server): finally: stop_event.set() proc.join(1) + + +def test_overlay(fact, test_container, server): + # File Under Test + fut = '/container-dir/test.txt' + + # Create the exec and an equivalent event that it will trigger + test_container.exec_run(f'touch {fut}') + inspect = docker.APIClient().inspect_container(test_container.id) + upper_dir = inspect['GraphDriver']['Data']['UpperDir'] + + process = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/touch', + args=f'touch {fut}', + name='touch', + container_id=test_container.id[:12], + loginuid=pow(2, 32)-1) + events = [ + Event(process=process, event_type=EventType.CREATION, + file=fut, host_path=fut), + Event(process=process, event_type=EventType.OPEN, + file=fut, host_path=os.path.join(upper_dir, fut[1:])) + ] + + for e in events: + print(f'Waiting for event: {e}') + + server.wait_events(events) + + +def test_mounted_dir(fact, test_container, ignored_dir, server): + # File Under Test + fut = '/mounted/test.txt' + + # Create the exec and an equivalent event that it will trigger + test_container.exec_run(f'touch {fut}') + + process = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/touch', + args=f'touch {fut}', + name='touch', + container_id=test_container.id[:12], + loginuid=pow(2, 32)-1) + event = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path=os.path.join(ignored_dir, 'test.txt')) + print(f'Waiting for event: {event}') + + server.wait_events([event]) diff --git a/tests/test_path_unlink.py b/tests/test_path_unlink.py index 9486b9bd..55f455fa 100644 --- a/tests/test_path_unlink.py +++ b/tests/test_path_unlink.py @@ -1,6 +1,8 @@ import multiprocessing as mp import os +import docker + from event import Event, EventType, Process @@ -19,10 +21,12 @@ def test_remove(fact, monitored_dir, server): f.write('This is a test') os.remove(fut) - process = Process() + process = Process.from_proc() events = [ - Event(process=process, event_type=EventType.CREATION, file=fut), - Event(process=process, event_type=EventType.UNLINK, file=fut), + Event(process=process, event_type=EventType.CREATION, + file=fut, host_path=fut), + Event(process=process, event_type=EventType.UNLINK, + file=fut, host_path=fut), ] server.wait_events(events) @@ -39,7 +43,7 @@ def test_multiple(fact, monitored_dir, server): server: The server instance to communicate with. """ events = [] - process = Process() + process = Process.from_proc() # File Under Test for i in range(3): @@ -49,8 +53,10 @@ def test_multiple(fact, monitored_dir, server): os.remove(fut) events.extend([ - Event(process=process, event_type=EventType.CREATION, file=fut), - Event(process=process, event_type=EventType.UNLINK, file=fut), + Event(process=process, event_type=EventType.CREATION, + file=fut, host_path=fut), + Event(process=process, event_type=EventType.UNLINK, + file=fut, host_path=fut), ]) server.wait_events(events) @@ -67,7 +73,7 @@ def test_ignored(fact, monitored_dir, ignored_dir, server): ignored_dir: Temporary directory path that is not monitored by fact. server: The server instance to communicate with. """ - process = Process() + process = Process.from_proc() # Ignored file, must not show up in the server ignored_file = os.path.join(ignored_dir, 'test.txt') @@ -75,8 +81,8 @@ def test_ignored(fact, monitored_dir, ignored_dir, server): f.write('This is to be ignored') os.remove(ignored_file) - ignored_event = Event( - process=process, event_type=EventType.UNLINK, file=ignored_file) + ignored_event = Event(process=process, event_type=EventType.UNLINK, + file=ignored_file, host_path=ignored_file) print(f'Ignoring: {ignored_event}') # File Under Test @@ -85,7 +91,8 @@ def test_ignored(fact, monitored_dir, ignored_dir, server): f.write('This is a test') os.remove(fut) - e = Event(process=process, event_type=EventType.UNLINK, file=fut) + e = Event(process=process, event_type=EventType.UNLINK, + file=fut, host_path=fut) print(f'Waiting for event: {e}') server.wait_events([e], ignored=[ignored_event]) @@ -115,9 +122,10 @@ def test_external_process(fact, monitored_dir, server): stop_event = mp.Event() proc = mp.Process(target=do_test, args=(fut, stop_event)) proc.start() - process = Process(proc.pid) + process = Process.from_proc(proc.pid) - removal = Event(process=process, event_type=EventType.UNLINK, file=fut) + removal = Event(process=process, event_type=EventType.UNLINK, + file=fut, host_path=fut) print(f'Waiting for event: {removal}') try: @@ -125,3 +133,83 @@ def test_external_process(fact, monitored_dir, server): finally: stop_event.set() proc.join(1) + + +def test_overlay(fact, test_container, server): + # File Under Test + fut = '/container-dir/test.txt' + + # Create the exec and an equivalent event that it will trigger + test_container.exec_run(f'touch {fut}') + test_container.exec_run(f'rm {fut}') + inspect = docker.APIClient().inspect_container(test_container.id) + upper_dir = inspect['GraphDriver']['Data']['UpperDir'] + + loginuid = pow(2, 32)-1 + touch = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/touch', + args=f'touch {fut}', + name='touch', + container_id=test_container.id[:12], + loginuid=loginuid) + rm = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/rm', + args=f'rm {fut}', + name='rm', + container_id=test_container.id[:12], + loginuid=loginuid) + events = [ + Event(process=touch, event_type=EventType.CREATION, + file=fut, host_path=fut), + Event(process=touch, event_type=EventType.OPEN, + file=fut, host_path=os.path.join(upper_dir, fut[1:])), + Event(process=rm, event_type=EventType.UNLINK, + file=fut, host_path=fut), + ] + + for e in events: + print(f'Waiting for event: {e}') + + server.wait_events(events) + + +def test_mounted_dir(fact, test_container, ignored_dir, server): + # File Under Test + fut = '/mounted/test.txt' + + # Create the exec and an equivalent event that it will trigger + test_container.exec_run(f'touch {fut}') + test_container.exec_run(f'rm {fut}') + + loginuid = pow(2, 32)-1 + touch = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/touch', + args=f'touch {fut}', + name='touch', + container_id=test_container.id[:12], + loginuid=loginuid) + rm = Process(pid=None, + uid=0, + gid=0, + exe_path='/usr/bin/rm', + args=f'rm {fut}', + name='rm', + container_id=test_container.id[:12], + loginuid=loginuid) + events = [ + Event(process=touch, event_type=EventType.CREATION, file=fut, + host_path=os.path.join(ignored_dir, 'test.txt')), + Event(process=rm, event_type=EventType.UNLINK, file=fut, + host_path=os.path.join(ignored_dir, 'test.txt')), + ] + + for e in events: + print(f'Waiting for event: {e}') + + server.wait_events(events) From e2489e9a4a9a6c93c5dd2faeab726b0e121c0287 Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Mon, 17 Nov 2025 11:55:23 +0100 Subject: [PATCH 3/3] Filter mountpoint stored based on configuration --- fact-ebpf/src/bpf/main.c | 4 ++++ fact/src/bpf/mod.rs | 31 ++++++++++++++++++++++--- fact/src/event/mod.rs | 12 ++++++++++ fact/src/event/parser.rs | 50 +++++++++++++++++++++++++++++----------- fact/src/metrics/mod.rs | 13 +++++++++++ fact/src/mount_info.rs | 19 ++++++++++----- 6 files changed, 107 insertions(+), 22 deletions(-) diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index 659814bf..6495d115 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -44,9 +44,11 @@ int BPF_PROG(trace_file_open, struct file* file) { return 0; } + /* if (!is_monitored(path)) { goto ignored; } + */ struct dentry* d = BPF_CORE_READ(file, f_path.dentry); submit_event(&m->file_open, event_type, path->path, d, true); @@ -91,10 +93,12 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { goto error; } + /* if (!is_monitored(path)) { m->path_unlink.ignored++; return 0; } + */ submit_event(&m->path_unlink, FILE_ACTIVITY_UNLINK, path->path, dentry, path_unlink_supports_bpf_d_path); return 0; diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 707c6622..a7b5bc5c 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -16,7 +16,10 @@ use tokio::{ }; use crate::{ - event::{parser::EventParser, Event}, + event::{ + parser::{EventParser, EventParserError}, + Event, + }, host_info, metrics::EventCounter, }; @@ -62,7 +65,7 @@ impl Bpf { let paths = Vec::new(); let (tx, _) = broadcast::channel(100); - let parser = EventParser::new()?; + let parser = EventParser::new(paths_config.borrow().as_slice())?; let mut bpf = Bpf { obj, tx, @@ -192,7 +195,23 @@ impl Bpf { while let Some(event) = ringbuf.next() { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match self.parser.parse(event) { - Ok(event) => Arc::new(event), + Ok(event) => event, + Err(EventParserError::NotFound) => { + let paths_config = self.paths_config.borrow(); + self.parser.refresh(paths_config.as_slice())?; + if self.parser.mountinfo.get(&event.dev).is_none() { + self.parser.mountinfo.insert_empty(event.dev); + } + match self.parser.parse(event) { + Ok(event) => event, + Err(e) => { + error!("Failed to parse event: '{e}'"); + debug!("Event: {event:?}"); + event_counter.dropped(); + continue; + } + } + } Err(e) => { error!("Failed to parse event: '{e}'"); debug!("Event: {event:?}"); @@ -201,6 +220,12 @@ impl Bpf { } }; + if !event.is_monitored(self.paths_config.borrow().as_slice()) { + event_counter.ignored(); + continue; + } + let event = Arc::new(event); + event_counter.added(); if self.tx.send(event).is_err() { info!("No BPF consumers left, stopping..."); diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 91b89260..4eb85717 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -61,6 +61,18 @@ impl Event { file, }) } + + pub fn is_monitored(&self, paths: &[PathBuf]) -> bool { + let file = match &self.file { + FileData::Open(base_file_data) => base_file_data, + FileData::Creation(base_file_data) => base_file_data, + FileData::Unlink(base_file_data) => base_file_data, + }; + + paths + .iter() + .any(|prefix| file.filename.starts_with(prefix) || file.host_file.starts_with(prefix)) + } } impl From for fact_api::FileActivity { diff --git a/fact/src/event/parser.rs b/fact/src/event/parser.rs index 61789219..2706e8f2 100644 --- a/fact/src/event/parser.rs +++ b/fact/src/event/parser.rs @@ -1,36 +1,60 @@ +use std::{error::Error, fmt::Display, path::PathBuf}; + use fact_ebpf::event_t; use crate::{host_info, mount_info::MountInfo}; use super::{process::Process, Event, FileData}; +#[derive(Debug)] +pub(crate) enum EventParserError { + NotFound, + ProcessParse(String), + FileParse(String), +} + +impl Error for EventParserError {} +impl Display for EventParserError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + EventParserError::NotFound => write!(f, "mountpoint not found"), + EventParserError::ProcessParse(e) => write!(f, "Failed to parse process: {e}"), + EventParserError::FileParse(e) => write!(f, "Failed to parse file: {e}"), + } + } +} + pub(crate) struct EventParser { - mountinfo: MountInfo, + pub mountinfo: MountInfo, } impl EventParser { - pub(crate) fn new() -> anyhow::Result { - let mountinfo = MountInfo::new()?; + pub(crate) fn new(paths: &[PathBuf]) -> anyhow::Result { + let mountinfo = MountInfo::new(paths)?; Ok(EventParser { mountinfo }) } - pub(crate) fn parse(&mut self, event: &event_t) -> anyhow::Result { - let process = Process::try_from(event.process)?; + pub(crate) fn refresh(&mut self, paths: &[PathBuf]) -> anyhow::Result<()> { + self.mountinfo.refresh(paths) + } + + pub(crate) fn parse(&mut self, event: &event_t) -> Result { + let process = match Process::try_from(event.process) { + Ok(p) => p, + Err(e) => return Err(EventParserError::ProcessParse(e.to_string())), + }; let timestamp = host_info::get_boot_time() + event.timestamp; let mounts = match self.mountinfo.get(&event.dev) { Some(mounts) => mounts, - None => { - self.mountinfo.refresh()?; - match self.mountinfo.get(&event.dev) { - Some(mounts) => mounts, - None => self.mountinfo.insert_empty(event.dev), - } - } + None => return Err(EventParserError::NotFound), }; - let file = FileData::new(event.type_, event.filename, event.host_file, mounts)?; + let file = match FileData::new(event.type_, event.filename, event.host_file, mounts) { + Ok(f) => f, + Err(e) => return Err(EventParserError::FileParse(e.to_string())), + }; Ok(Event { timestamp, diff --git a/fact/src/metrics/mod.rs b/fact/src/metrics/mod.rs index 38e579b8..e2b18f1a 100644 --- a/fact/src/metrics/mod.rs +++ b/fact/src/metrics/mod.rs @@ -97,6 +97,19 @@ impl EventCounter { .unwrap() .inc_by(n); } + + /// Increment the counter for the Ignored label. + /// + /// Panics if the counter did not add the Ignored label as part of + /// its creation step. + pub fn ignored(&self) { + self.counter + .get(&MetricEvents { + label: LabelValues::Ignored, + }) + .unwrap() + .inc(); + } } #[derive(Debug, Clone)] diff --git a/fact/src/mount_info.rs b/fact/src/mount_info.rs index df5816d5..791a8477 100644 --- a/fact/src/mount_info.rs +++ b/fact/src/mount_info.rs @@ -18,13 +18,13 @@ pub struct MountEntry { pub struct MountInfo(HashMap>); impl MountInfo { - pub fn new() -> anyhow::Result { - let cache = MountInfo::build_cache()?; + pub fn new(paths: &[PathBuf]) -> anyhow::Result { + let cache = MountInfo::build_cache(paths)?; Ok(MountInfo(cache)) } - pub fn refresh(&mut self) -> anyhow::Result<()> { - let cache = MountInfo::build_cache()?; + pub fn refresh(&mut self, paths: &[PathBuf]) -> anyhow::Result<()> { + let cache = MountInfo::build_cache(paths)?; self.0 = cache; Ok(()) } @@ -54,7 +54,7 @@ impl MountInfo { self.0.entry(k).or_default() } - fn build_cache() -> anyhow::Result>> { + fn build_cache(paths: &[PathBuf]) -> anyhow::Result>> { let host_mount = host_info::get_host_mount(); let path = PathBuf::from("/proc/self/mountinfo"); if !path.exists() { @@ -93,7 +93,14 @@ impl MountInfo { for i in mountinfo_it { let (dev, mountinfo) = i?; let entry: &mut Vec = cache.entry(dev).or_default(); - if mountinfo.root != Path::new("/") && mountinfo.root != mountinfo.mount_point { + + if mountinfo.root != Path::new("/") + && mountinfo.root != mountinfo.mount_point + && paths.iter().any(|monitored_path| { + mountinfo.mount_point.starts_with(monitored_path) + || monitored_path.starts_with(&mountinfo.mount_point) + }) + { entry.push(mountinfo); } }