diff --git a/fact-ebpf/src/bpf/events.h b/fact-ebpf/src/bpf/events.h index f4e6ff4b..7dc41268 100644 --- a/fact-ebpf/src/bpf/events.h +++ b/fact-ebpf/src/bpf/events.h @@ -3,47 +3,83 @@ // clang-format off #include "vmlinux.h" -#include "inode.h" #include "maps.h" #include "process.h" #include "types.h" +#include "raw_event.h" #include // clang-format on +/** + * Format and submit an event to the ringbuffer. + * + * This method is responsible for using the provided values from + * different BPF programs, serialize this data alongside the current + * process information in a binary format and submit it as an event to + * the ringbuffer. + * + * The high level format for an event can be described as follows: + * |--|--------|---------------------------|---------------------------| + * | | | | ^ event end + * | | | ^ begin file data + * | | ^ begin process data + * | ^ timestamp + * ^ event type + * + * Event type: a 16 bit integer specifying the type of event this is. + * Timestamp: the amount of nano seconds since boot time. + * Process data: all the information collected from the current process. + * For more information on this field see the documentation for + * `process_fill`. + * File data: information collected about the file being acted upon. + * + * The file data field can be expanded as follows: + * |----|--------------|---| + * | | ^ Event specific data + * | ^ file path + * ^ inode information + * + * Inode information: Encoded as the inode and device numbers. Used for + * host path tracking. + * File path: The path to the file being acted upon, retrieved from + * d_path. + */ __always_inline static void submit_event(struct metrics_by_hook_t* m, file_activity_type_t event_type, struct bound_path_t* path, inode_key_t* inode, bool use_bpf_d_path) { - struct event_t* event = bpf_ringbuf_reserve(&rb, sizeof(struct event_t), 0); - if (event == NULL) { - m->ringbuffer_full++; + unsigned int zero = 0; + struct raw_event_t raw_event = { + .buf = bpf_map_lookup_elem(&heap_map, &zero), + .len = 0, + }; + if (raw_event.buf == NULL) { + m->error++; return; } - event->type = event_type; - event->timestamp = bpf_ktime_get_boot_ns(); - inode_copy_or_reset(&event->inode, inode); - bpf_probe_read(event->filename, path->len & (PATH_MAX - 1), path->path); - event->filename_len = path->len; - - struct helper_t* helper = get_helper(); - if (helper == NULL) { - goto error; - } + raw_event_copy_u16(&raw_event, event_type); + raw_event_copy_uint(&raw_event, bpf_ktime_get_boot_ns()); - int64_t err = process_fill(&event->process, use_bpf_d_path); + int64_t err = process_fill(&raw_event, use_bpf_d_path); if (err) { bpf_printk("Failed to fill process information: %d", err); goto error; } + // File data + raw_event_copy_inode(&raw_event, inode); + raw_event_copy_bound_path(&raw_event, path); + + if (bpf_ringbuf_output(&rb, raw_event.buf, raw_event.len, 0) != 0) { + m->ringbuffer_full++; + return; + } m->added++; - bpf_ringbuf_submit(event, 0); return; error: m->error++; - bpf_ringbuf_discard(event, 0); } diff --git a/fact-ebpf/src/bpf/maps.h b/fact-ebpf/src/bpf/maps.h index 1b0402e2..52bed165 100644 --- a/fact-ebpf/src/bpf/maps.h +++ b/fact-ebpf/src/bpf/maps.h @@ -7,6 +7,18 @@ #include +#define MAX_EVENT_LEN ((1<< 15) - 1) +/** + * Raw buffer to encode events into prior to submitting to the + * ringbuffer + */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, __u32); + __type(value, char[MAX_EVENT_LEN]); + __uint(max_entries, 1); +} heap_map SEC(".maps"); + /** * Helper struct with buffers for various operations */ diff --git a/fact-ebpf/src/bpf/process.h b/fact-ebpf/src/bpf/process.h index 340c4717..fbc7b87b 100644 --- a/fact-ebpf/src/bpf/process.h +++ b/fact-ebpf/src/bpf/process.h @@ -6,6 +6,7 @@ #include "d_path.h" #include "maps.h" #include "types.h" +#include "raw_event.h" #include #include @@ -77,23 +78,36 @@ __always_inline static const char* get_memory_cgroup(struct helper_t* helper) { return helper->buf; } -__always_inline static void process_fill_lineage(process_t* p, struct helper_t* helper, bool use_bpf_d_path) { +__always_inline static long process_fill_lineage(struct raw_event_t* event, struct helper_t* helper, bool use_bpf_d_path) { struct task_struct* task = (struct task_struct*)bpf_get_current_task_btf(); - p->lineage_len = 0; + uint16_t lineage_len_pos = event->len; + event->len += 2; - for (int i = 0; i < LINEAGE_MAX; i++) { + uint16_t i = 0; + for (; i < LINEAGE_MAX; i++) { struct task_struct* parent = task->real_parent; if (task == parent || parent->pid == 0) { - return; + break; } task = parent; - p->lineage[i].uid = task->cred->uid.val; - - d_path(&task->mm->exe_file->f_path, p->lineage[i].exe_path, PATH_MAX, use_bpf_d_path); - p->lineage_len++; + raw_event_copy_uint(event, task->cred->uid.val); + long err = raw_event_d_path(event, &task->mm->exe_file->f_path, use_bpf_d_path); + if (err != 0) { + bpf_printk("Failed to read lineage exe_path"); + return err; + } } + + // go back and set the amount of lineage processes in the buffer + uint16_t back = event->len; + event->len = lineage_len_pos; + + raw_event_copy_uint(event, i); + + event->len = back; + return 0; } __always_inline static unsigned long get_mount_ns() { @@ -101,15 +115,35 @@ __always_inline static unsigned long get_mount_ns() { return task->nsproxy->mnt_ns->ns.inum; } -__always_inline static int64_t process_fill(process_t* p, bool use_bpf_d_path) { +/** + * Fill in the information about the current process to the event + * buffer. + * + * This method serializes all required process information for the event + * as a binary blob into the provided event buffer. The serialized data + * will look something like this: + * |--|--|--|--|-------|--------------|-------------|------------|-|----|---| + * | | | | | | | | | | ^ grandparent lineage + * | | | | | | | | | ^ parent lineage + * | | | | | | | | ^ in_root_mount_ns + * | | | | | | | ^ cgroup + * | | | | | | ^ executable path + * | | | | | ^ arguments + * | | | | ^ comm + * | | | ^ pid + * | | ^ loginuid + * | ^ gid + * ^ uid + */ +__always_inline static int64_t process_fill(struct raw_event_t* event, bool use_bpf_d_path) { struct task_struct* task = (struct task_struct*)bpf_get_current_task_btf(); uint32_t key = 0; uint64_t uid_gid = bpf_get_current_uid_gid(); - p->uid = uid_gid & 0xFFFFFFFF; - p->gid = (uid_gid >> 32) & 0xFFFFFFFF; - p->login_uid = task->loginuid.val; - p->pid = (bpf_get_current_pid_tgid() >> 32) & 0xFFFFFFFF; - u_int64_t err = bpf_get_current_comm(p->comm, TASK_COMM_LEN); + raw_event_copy_u32(event, uid_gid & 0xFFFFFFFF); + raw_event_copy_u32(event, ((uid_gid >> 32) & 0xFFFFFFFF)); + raw_event_copy_uint(event, task->loginuid.val); + raw_event_copy_u32(event, (bpf_get_current_pid_tgid() >> 32) & 0xFFFFFFFF); + uint64_t err = raw_event_copy_comm(event); if (err != 0) { bpf_printk("Failed to fill task comm"); return err; @@ -117,12 +151,17 @@ __always_inline static int64_t process_fill(process_t* p, bool use_bpf_d_path) { unsigned long arg_start = task->mm->arg_start; unsigned long arg_end = task->mm->arg_end; - p->args_len = (arg_end - arg_start) & 0xFFF; - p->args[4095] = '\0'; // Ensure string termination at end of buffer - err = bpf_probe_read_user(p->args, p->args_len, (const char*)arg_start); - if (err != 0) { - bpf_printk("Failed to fill task args"); - return err; + uint16_t args_len = (arg_end - arg_start) & 0xFFF; + err = raw_event_copy_buffer(event, (const void*)arg_start, args_len); + if (err < 0) { + bpf_printk("Failed to read process args"); + return -1; + } + + err = raw_event_d_path(event, &task->mm->exe_file->f_path, use_bpf_d_path); + if (err < 0) { + bpf_printk("Failed to read exe_path"); + return -1; } struct helper_t* helper = bpf_map_lookup_elem(&helper_map, &key); @@ -131,16 +170,26 @@ __always_inline static int64_t process_fill(process_t* p, bool use_bpf_d_path) { return -1; } - p->exe_path_len = d_path(&task->mm->exe_file->f_path, p->exe_path, PATH_MAX, use_bpf_d_path); - const char* cg = get_memory_cgroup(helper); if (cg != NULL) { - bpf_probe_read_str(p->memory_cgroup, PATH_MAX, cg); + // Reserve space for the cgroup length + event->len += 2; + uint16_t cg_len = (uint16_t)bpf_probe_read_str(&event->buf[event->len], PATH_MAX, cg); + + // Move back and fix the length + event->len -= 2; + raw_event_copy_u16(event, cg_len - 1); + + // Forward past the cgroup + event->len += ((cg_len - 1) & (PATH_MAX - 1)); } - p->in_root_mount_ns = get_mount_ns() == host_mount_ns; + raw_event_copy_u8(event, get_mount_ns() == host_mount_ns); - process_fill_lineage(p, helper, use_bpf_d_path); + err = process_fill_lineage(event, helper, use_bpf_d_path); + if (err < 0) { + return -1; + } return 0; } diff --git a/fact-ebpf/src/bpf/raw_event.h b/fact-ebpf/src/bpf/raw_event.h new file mode 100644 index 00000000..7b76641a --- /dev/null +++ b/fact-ebpf/src/bpf/raw_event.h @@ -0,0 +1,170 @@ +#pragma once + +// clang-format off +#include "vmlinux.h" + +#include "d_path.h" +#include "bound_path.h" +#include "types.h" + +#include +// clang-format on + +struct raw_event_t { + char* buf; + unsigned short len; +}; + +/** + * Copy a single byte to the event buffer and increment its size. + */ +__always_inline static void raw_event_copy_u8(struct raw_event_t* event, uint8_t val) { + event->buf[event->len++] = val; +} + +/** + * Copy an unsigned integer in big endian format to the event buffer + * and increase its size accordingly. + * + * Big endian is used in order to make parsing easier in user space by + * simply rotating a target integer and adding bytes to the end. + */ +__always_inline static void _raw_event_copy_uint(struct raw_event_t* event, uint64_t val, uint8_t size) { + for (int8_t i = size - 1; i >= 0; i--) { + uint64_t mask = 0xFFULL << (i * 8); + uint8_t v = (val & mask) >> (i * 8); + raw_event_copy_u8(event, v); + } +} + +/** + * Type safe integer copying. + */ +#define raw_event_copy_uint(event, val) _Generic(val, \ + uint8_t: raw_event_copy_u8(event, val), \ + uint16_t: _raw_event_copy_uint(event, val, sizeof(uint16_t)), \ + uint32_t: _raw_event_copy_uint(event, val, sizeof(uint32_t)), \ + uint64_t: _raw_event_copy_uint(event, val, sizeof(uint64_t)), \ + unsigned long: _raw_event_copy_uint(event, val, sizeof(unsigned long))) + +/** + * Copy a 16 bit integer to the event buffer and increase its size. + */ +__always_inline static void raw_event_copy_u16(struct raw_event_t* event, uint16_t val) { + raw_event_copy_uint(event, val); +} + +/** + * Copy a 32 bit integer to the event buffer and increase its size. + */ +__always_inline static void raw_event_copy_u32(struct raw_event_t* event, uint32_t val) { + raw_event_copy_uint(event, val); +} + +/** + * Copy a 64 bit integer to the event buffer and increase its size. + */ +__always_inline static void raw_event_copy_u64(struct raw_event_t* event, uint64_t val) { + raw_event_copy_uint(event, val); +} + +/** + * Copy the provided inode information to the event buffer. + * + * The serialized blob will be of 2 big endian 32 bits integers, with + * the inode number first and the device number second. + * + * If no inode information is provided, the same space is filled with + * zeroes for ease of parsing. + */ +__always_inline static void raw_event_copy_inode(struct raw_event_t* event, inode_key_t* val) { + if (val != NULL) { + raw_event_copy_uint(event, val->inode); + raw_event_copy_uint(event, val->dev); + } else { + raw_event_copy_u32(event, 0); + raw_event_copy_u32(event, 0); + } +} + +/** + * Copy a buffer to the event. + * + * The format used for the serialized buffer is as follows: + * |--|------------| + * | ^ begin data + * ^ data length + * + * Data length: 16 bit, big endian integer, number of data bytes held. + * Data: a blob of bytes with the required data. + */ +__always_inline static long raw_event_copy_buffer(struct raw_event_t* event, const void* buf, uint16_t len) { + raw_event_copy_uint(event, len); + long res = bpf_probe_read(&event->buf[event->len], len, buf); + if (res < 0) { + return res; + } + event->len += len; + return 0; +} + +/** + * Helper function for encoding a bound_path_t as a buffer in the event. + * + * The resulting buffer that is serialized will not be null terminated. + */ +__always_inline static long raw_event_copy_bound_path(struct raw_event_t* event, struct bound_path_t* path) { + // The & (PATH_MAX - 1) is there to convince the verifier we are at + // most copying 4KB, otherwise it will assume we can add UINT16_MAX + // bytes and immediately fail, as the event buffer is smaller than + // that. + return raw_event_copy_buffer(event, path->path, (path->len - 1) & (PATH_MAX - 1)); +} + +/** + * Serialize the comm value for the current task in the event buffer. + * + * For simplicity, the comm value is directly copied into the buffer by + * using the bpf_get_current_comm helper with a fix length of 16. + * + * bpf_get_current_comm ensures the copied data is null terminated and + * padded with zeroes if the comm is smaller than 16 bytes. + */ +__always_inline static long raw_event_copy_comm(struct raw_event_t* event) { + long res = bpf_get_current_comm((char*)&event->buf[event->len], TASK_COMM_LEN); + if (res != 0) { + return res; + } + event->len += TASK_COMM_LEN; + return 0; +} + +/** + * Serialize the result of calling d_path onto the event buffer. + * + * The resulting path is encoded as described in raw_event_copy_buffer + * and is not null terminated. + */ +__always_inline static long raw_event_d_path(struct raw_event_t* event, struct path* path, bool use_bpf_d_path) { + // Reserve room for the path length + event->len += 2; + long res = d_path(path, &event->buf[event->len], PATH_MAX, use_bpf_d_path); + if (res < 0) { + return res; + } + + // Go back and add the length of the path + uint16_t len = (uint16_t)res; + event->len -= 2; + raw_event_copy_u16(event, len - 1); + + // Move the buffer past the path + // + // The & (PATH_MAX - 1) is there to convince the verifier we are at + // most copying 4KB, otherwise it will assume we can add UINT16_MAX + // bytes and immediately fail, as the event buffer is smaller than + // that. + event->len += ((len - 1) & (PATH_MAX - 1)); + + return 0; +} diff --git a/fact-ebpf/src/bpf/types.h b/fact-ebpf/src/bpf/types.h index 735aeab2..67d4f2aa 100644 --- a/fact-ebpf/src/bpf/types.h +++ b/fact-ebpf/src/bpf/types.h @@ -12,30 +12,9 @@ #define LPM_SIZE_MAX 256 -typedef struct lineage_t { - unsigned int uid; - char exe_path[PATH_MAX]; -} lineage_t; - -typedef struct process_t { - char comm[TASK_COMM_LEN]; - char args[4096]; - unsigned int args_len; - char exe_path[PATH_MAX]; - short unsigned int exe_path_len; - char memory_cgroup[PATH_MAX]; - unsigned int uid; - unsigned int gid; - unsigned int login_uid; - unsigned int pid; - lineage_t lineage[LINEAGE_MAX]; - unsigned int lineage_len; - char in_root_mount_ns; -} process_t; - typedef struct inode_key_t { - unsigned long inode; - unsigned long dev; + unsigned int inode; + unsigned int dev; } inode_key_t; // We can't use bool here because it is not a standard C type, we would @@ -50,15 +29,6 @@ typedef enum file_activity_type_t { FILE_ACTIVITY_UNLINK, } file_activity_type_t; -struct event_t { - unsigned long timestamp; - process_t process; - char filename[PATH_MAX]; - short unsigned int filename_len; - inode_key_t inode; - file_activity_type_t type; -}; - /** * Used as the key for the path_prefix map. * diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index 2251993d..d7ce79a6 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -92,6 +92,17 @@ impl Serialize for inode_key_t { unsafe impl Pod for inode_key_t {} +impl From for file_activity_type_t { + fn from(value: u16) -> Self { + match value { + 0 => file_activity_type_t::FILE_ACTIVITY_OPEN, + 1 => file_activity_type_t::FILE_ACTIVITY_CREATION, + 2 => file_activity_type_t::FILE_ACTIVITY_UNLINK, + invalid => unreachable!("Invalid file activity type: {invalid}"), + } + } +} + impl metrics_by_hook_t { fn accumulate(&self, other: &metrics_by_hook_t) -> metrics_by_hook_t { let mut m = metrics_by_hook_t { ..*self }; diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 2a6b85e2..2a02f345 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -8,7 +8,7 @@ use aya::{ }; use checks::Checks; use libc::c_char; -use log::{debug, error, info}; +use log::{error, info}; use tokio::{ io::unix::AsyncFd, sync::{mpsc, watch}, @@ -17,7 +17,7 @@ use tokio::{ use crate::{event::Event, host_info, metrics::EventCounter}; -use fact_ebpf::{event_t, inode_key_t, inode_value_t, metrics_t, path_prefix_t, LPM_SIZE_MAX}; +use fact_ebpf::{inode_key_t, inode_value_t, metrics_t, path_prefix_t, LPM_SIZE_MAX}; mod checks; @@ -187,12 +187,10 @@ impl Bpf { .context("ringbuffer guard held while runtime is stopping")?; let ringbuf = guard.get_inner_mut(); while let Some(event) = ringbuf.next() { - let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { Ok(event) => event, Err(e) => { - error!("Failed to parse event: '{e}'"); - debug!("Event: {event:?}"); + error!("Failed to parse event: '{e:?}'"); event_counter.dropped(); continue; } diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index f2b419ab..d5477299 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -1,32 +1,33 @@ +//! Event handling module +//! +//! This module provides Rust types that make it easier and safer to +//! interact with the information received from the BPF programs. +//! +//! The main interest is the `Event` type, which can be parsed from an +//! element received from the ringbuffer and passed around to other +//! components. + #[cfg(test)] use std::time::{SystemTime, UNIX_EPOCH}; use std::{ - ffi::{CStr, OsString}, - os::{raw::c_char, unix::ffi::OsStringExt}, + ffi::OsStr, + ops::{BitOrAssign, ShlAssign}, + os::unix::ffi::OsStrExt, path::PathBuf, }; +use anyhow::bail; +use aya::maps::ring_buf::RingBufItem; +use log::warn; use serde::Serialize; -use fact_ebpf::{event_t, file_activity_type_t, inode_key_t}; +use fact_ebpf::{file_activity_type_t, inode_key_t}; use crate::host_info; use process::Process; pub(crate) mod process; -fn slice_to_pathbuf(s: &[c_char]) -> PathBuf { - #[cfg(target_arch = "x86_64")] - let v = s.iter().map(|c| *c as u8).collect::>(); - #[cfg(not(target_arch = "x86_64"))] - let v = s.to_vec(); - OsString::from_vec(v).into() -} - -fn slice_to_string(s: &[c_char]) -> anyhow::Result { - Ok(unsafe { CStr::from_ptr(s.as_ptr()) }.to_str()?.to_owned()) -} - fn timestamp_to_proto(ts: u64) -> prost_types::Timestamp { let seconds = (ts / 1_000_000_000) as i64; let nanos = (ts % 1_000_000_000) as i32; @@ -89,17 +90,99 @@ impl Event { FileData::Unlink(data) => data.host_file = host_path, } } + + /// Parse an integer value from the supplied slice. + /// + /// This method parses integers as they are added to the ringbuffer + /// by the kernel side BPF programs. For simplicity, integers are + /// always loaded in Big Endian format regardless of the + /// architecture the program runs on. + fn parse_int(s: &[u8]) -> Option<(T, &[u8])> + where + T: From + BitOrAssign + ShlAssign, + { + let len = size_of::(); + let (val, s) = s.split_at_checked(len)?; + let mut res = T::from(0); + for byte in val { + // Types with size of 1 byte cannot be shifted since they + // would overflow, so we only shift bigger types. + if len > 1 { + res <<= 8; + } + res |= (*byte).into(); + } + Some((res, s)) + } + + /// Parse a buffer from the supplied slice. + /// + /// This method parses buffers as they are added to the ringbuffer + /// by the kernel side BPF programs. The format these programs use + /// is relatively straightforward, they use 2 bytes in Big Endian + /// format to encode the length of the buffer in bytes, then put + /// the buffer right after, looking something like this: + /// + /// |--|--------------|------- + /// | | | ^ rest of the event + /// | | ^ buffer end + /// | ^ buffer start + /// ^ length of the buffer + /// + /// This allows parsing fairly easy in userspace, we can simply + /// parse a u16 for the size of the buffer, then take as many bytes + /// as that value indicates. + /// + /// This representation also works for both strings and binary + /// blobs, so it allows for quite good flexibility, leaving the + /// specialization of the type to the caller. + fn parse_buffer(s: &[u8]) -> Option<(&[u8], &[u8])> { + let (len, s) = Event::parse_int::(s)?; + s.split_at_checked(len as usize) + } } -impl TryFrom<&event_t> for Event { +impl TryFrom> for Event { type Error = anyhow::Error; - fn try_from(value: &event_t) -> Result { - let process = Process::try_from(value.process)?; - let timestamp = host_info::get_boot_time() + value.timestamp; - let filename_len = value.filename_len as usize; - let (filename, _) = value.filename.as_slice().split_at(filename_len - 1); - let file = FileData::new(value.type_, filename, value.inode)?; + fn try_from(value: RingBufItem) -> Result { + let Some((event_type, value)) = Event::parse_int::(&value) else { + bail!("Failed to read event type"); + }; + let event_type = event_type.into(); + + let Some((timestamp, value)) = Event::parse_int::(value) else { + bail!("Failed to parse timestamp"); + }; + let timestamp = timestamp + host_info::get_boot_time(); + + let (process, value) = Process::parse(value)?; + + let Some((inode, value)) = Event::parse_int::(value) else { + bail!("Failed to parse inode"); + }; + let Some((dev, value)) = Event::parse_int::(value) else { + bail!("Failed to parse device number"); + }; + let inode = inode_key_t { inode, dev }; + let Some((filename, value)) = Event::parse_buffer(value) else { + bail!("Failed to parse filename"); + }; + let filename = OsStr::from_bytes(filename).into(); + let file = FileData::new(event_type, filename, inode)?; + + // Handling of special fields. + // TODO: Currently implemented events have no special fields. + match event_type { + file_activity_type_t::FILE_ACTIVITY_CREATION + | file_activity_type_t::FILE_ACTIVITY_OPEN + | file_activity_type_t::FILE_ACTIVITY_UNLINK => {} + invalid => unreachable!("missing special field treatment for event type {invalid:?}"), + } + + if !value.is_empty() { + warn!("Event has remaining data"); + } Ok(Event { timestamp, @@ -142,7 +225,7 @@ pub enum FileData { impl FileData { pub fn new( event_type: file_activity_type_t, - filename: &[c_char], + filename: PathBuf, inode: inode_key_t, ) -> anyhow::Result { let inner = BaseFileData::new(filename, inode)?; @@ -199,9 +282,7 @@ pub struct BaseFileData { } impl BaseFileData { - pub fn new(filename: &[c_char], inode: inode_key_t) -> anyhow::Result { - let filename = slice_to_pathbuf(filename); - + pub fn new(filename: PathBuf, inode: inode_key_t) -> anyhow::Result { Ok(BaseFileData { filename, host_file: PathBuf::new(), // this field is set by HostScanner @@ -225,3 +306,146 @@ impl From for fact_api::FileActivityBase { } } } + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + + use super::*; + + struct ParseIntTestCase<'a, T> { + input: &'a [u8], + expected: Option<(T, &'a [u8])>, + } + + impl<'a, T> ParseIntTestCase<'a, T> { + fn new(input: &'a [u8], expected: Option<(T, &'a [u8])>) -> Self { + ParseIntTestCase { input, expected } + } + } + + fn test_parse_int(ParseIntTestCase { input, expected }: &ParseIntTestCase) + where + T: From + BitOrAssign + ShlAssign + Debug + PartialEq, + { + let res = Event::parse_int::(input); + assert_eq!( + res, *expected, + "\ninput: {input:#x?}\nexpected: {expected:#x?}\nres: {res:#x?}" + ) + } + + #[test] + fn test_parse_u8() { + let tests = &[ + ParseIntTestCase::new(&[0xef], Some((0xef, &[]))), + ParseIntTestCase::new(&[0xef, 0x00], Some((0xef, &[0x00]))), + ParseIntTestCase::new(&[0xbe, 0xef, 0x00], Some((0xbe, &[0xef, 0x00]))), + ParseIntTestCase::new(&[], None), + ]; + + for test in tests { + test_parse_int::(test); + } + } + + #[test] + fn test_parse_u16() { + let tests = &[ + ParseIntTestCase::new(&[0xbe, 0xef], Some((0xbeef, &[]))), + ParseIntTestCase::new(&[0xbe, 0xef, 0x00], Some((0xbeef, &[0x00]))), + ParseIntTestCase::new( + &[0xbe, 0xef, 0xbe, 0xef, 0x00], + Some((0xbeef, &[0xbe, 0xef, 0x00])), + ), + ParseIntTestCase::new(&[0xef], None), + ParseIntTestCase::new(&[], None), + ]; + + for test in tests { + test_parse_int::(test); + } + } + + #[test] + fn test_parse_u32() { + let tests = &[ + ParseIntTestCase::new(&[0xde, 0xad, 0xbe, 0xef], Some((0xdeadbeef, &[]))), + ParseIntTestCase::new(&[0xde, 0xad, 0xbe, 0xef, 0x00], Some((0xdeadbeef, &[0x00]))), + ParseIntTestCase::new( + &[0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, 0x00], + Some((0xdeadbeef, &[0xde, 0xad, 0xbe, 0xef, 0x00])), + ), + ParseIntTestCase::new(&[0xad, 0xbe, 0xef], None), + ParseIntTestCase::new(&[], None), + ]; + + for test in tests { + test_parse_int::(test); + } + } + + #[test] + fn test_parse_u64() { + let tests = &[ + ParseIntTestCase::new( + &[0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef], + Some((0xdeadbeefdeadbeef, &[])), + ), + ParseIntTestCase::new( + &[0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, 0x00], + Some((0xdeadbeefdeadbeef, &[0x00])), + ), + ParseIntTestCase::new( + &[ + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, 0xde, + 0xad, 0xbe, 0xef, 0x00, + ], + Some(( + 0xdeadbeefdeadbeef, + &[0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, 0x00], + )), + ), + ParseIntTestCase::new(&[0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef], None), + ParseIntTestCase::new(&[], None), + ]; + + for test in tests { + test_parse_int::(test); + } + } + + #[test] + fn test_parse_buffer() { + struct TestCase<'a> { + input: &'a [u8], + expected: Option<(&'a [u8], &'a [u8])>, + } + let tests = &[ + TestCase { + input: b"\x00\x0B/usr/bin/rm", + expected: Some((b"/usr/bin/rm", &[])), + }, + TestCase { + input: b"\x00\x0B/usr/bin/", + expected: None, + }, + TestCase { + input: b"\x00\x0E/usr/bin/touch ignored", + expected: Some((b"/usr/bin/touch", b" ignored")), + }, + TestCase { + input: b"\x00\x00\x00\x0E/usr/bin/touch ignored", + expected: Some((b"", b"\x00\x0E/usr/bin/touch ignored")), + }, + TestCase { + input: b"", + expected: None, + }, + ]; + for TestCase { input, expected } in tests { + let res = Event::parse_buffer(input); + assert_eq!(res, *expected, "input: {}", String::from_utf8_lossy(input)); + } + } +} diff --git a/fact/src/event/process.rs b/fact/src/event/process.rs index d8dc7487..67bccb30 100644 --- a/fact/src/event/process.rs +++ b/fact/src/event/process.rs @@ -1,36 +1,43 @@ -use std::{ffi::CStr, path::PathBuf}; - -use fact_ebpf::{lineage_t, process_t}; -use serde::Serialize; +use core::str; +use std::{ + ffi::{CStr, CString, OsStr}, + os::unix::ffi::OsStrExt, + path::PathBuf, +}; + +use anyhow::bail; +use serde::{ser::SerializeSeq, Serialize, Serializer}; use uuid::Uuid; -use crate::host_info; +use crate::host_info::get_username; -use super::{slice_to_pathbuf, slice_to_string}; +use super::Event; #[derive(Debug, Clone, Default, Serialize)] pub struct Lineage { uid: u32, - exe_path: String, + exe_path: PathBuf, } impl Lineage { - fn new(uid: u32, exe_path: &str) -> Self { - Lineage { - uid, - exe_path: exe_path.to_owned(), - } - } -} - -impl TryFrom<&lineage_t> for Lineage { - type Error = anyhow::Error; + /// Parse a `Lineage` object from a ringbuffer event. + /// + /// # Safety + /// + /// * The order of fields parsed must match the order used by the + /// BPF programs. + fn parse(value: &[u8]) -> anyhow::Result<(Self, &[u8])> { + let Some((uid, value)) = Event::parse_int::(value) else { + bail!("Failed to parse lineage uid"); + }; + let Some((exe_path, value)) = Event::parse_buffer(value) else { + bail!("Failed to parse lineage exe_path"); + }; + let exe_path = OsStr::from_bytes(exe_path).into(); - fn try_from(value: &lineage_t) -> Result { - let lineage_t { uid, exe_path } = value; - let exe_path = unsafe { CStr::from_ptr(exe_path.as_ptr()) }.to_str()?; + let lineage = Lineage { uid, exe_path }; - Ok(Lineage::new(*uid, exe_path)) + Ok((lineage, value)) } } @@ -39,15 +46,42 @@ impl From for fact_api::process_signal::LineageInfo { let Lineage { uid, exe_path } = value; Self { parent_uid: uid, - parent_exec_file_path: exe_path, + parent_exec_file_path: exe_path.to_string_lossy().to_string(), } } } +#[cfg(test)] +impl PartialEq for Lineage { + fn eq(&self, other: &Self) -> bool { + self.uid == other.uid && self.exe_path == other.exe_path + } +} + +fn serialize_lossy_string(value: &CString, serializer: S) -> Result +where + S: Serializer, +{ + value.to_string_lossy().serialize(serializer) +} + +fn serialize_vector_lossy_string(value: &Vec, serializer: S) -> Result +where + S: Serializer, +{ + let mut seq = serializer.serialize_seq(Some(value.len()))?; + for i in value { + seq.serialize_element(&i.to_string_lossy().to_string())?; + } + seq.end() +} + #[derive(Debug, Clone, Default, Serialize)] pub struct Process { - comm: String, - args: Vec, + #[serde(serialize_with = "serialize_lossy_string")] + comm: CString, + #[serde(serialize_with = "serialize_vector_lossy_string")] + args: Vec, exe_path: PathBuf, container_id: Option, uid: u32, @@ -67,7 +101,9 @@ impl Process { use crate::host_info::{get_host_mount_ns, get_mount_ns}; let exe_path = std::env::current_exe().expect("Failed to get current exe"); - let args = std::env::args().collect::>(); + let args = std::env::args() + .map(|a| CString::new(a.into_bytes()).unwrap()) + .collect::>(); let cgroup = std::fs::read_to_string("/proc/self/cgroup").expect("Failed to read cgroup"); let container_id = Process::extract_container_id(&cgroup); let uid = unsafe { libc::getuid() }; @@ -81,7 +117,7 @@ impl Process { let in_root_mount_ns = get_host_mount_ns() == get_mount_ns(&pid.to_string(), false); Self { - comm: "".to_string(), + comm: c"".into(), args, exe_path, container_id, @@ -118,67 +154,125 @@ impl Process { None } } -} -#[cfg(test)] -impl PartialEq for Process { - fn eq(&self, other: &Self) -> bool { - self.uid == other.uid - && self.login_uid == other.login_uid - && self.gid == other.gid - && self.exe_path == other.exe_path - && self.args == other.args - && self.container_id == other.container_id - && self.in_root_mount_ns == other.in_root_mount_ns + /// Parse the process comm value. + /// + /// For simplicity, the kernel side BPF program loads the result of + /// calling the bpf_get_current_comm helper directly onto the event. + /// The resulting value loaded in is 16 bytes with a guaranteed + /// null terminator and null padding if needed. + /// + /// We could save a few bytes if we were to retrieve the string + /// length in kernel side and load a generic buffer onto the event + /// like `Event::parse_buffer` expects, but we would need to do a + /// bit more work kernel side that is not worth it. + fn parse_comm(s: &[u8]) -> Option<(CString, &[u8])> { + let (val, s) = s.split_at_checked(16)?; + let res = CStr::from_bytes_until_nul(val).ok()?; + Some((res.to_owned(), s)) } -} -impl TryFrom for Process { - type Error = anyhow::Error; - - fn try_from(value: process_t) -> Result { - let comm = slice_to_string(value.comm.as_slice())?; - let exe_path_len = value.exe_path_len as usize; - let (exe_path, _) = value.exe_path.as_slice().split_at(exe_path_len - 1); - let exe_path = slice_to_pathbuf(exe_path); - let memory_cgroup = unsafe { CStr::from_ptr(value.memory_cgroup.as_ptr()) }.to_str()?; - let container_id = Process::extract_container_id(memory_cgroup); - let in_root_mount_ns = value.in_root_mount_ns != 0; - - let lineage = value.lineage[..value.lineage_len as usize] - .iter() - .map(Lineage::try_from) + /// Parse the arguments of a process. + /// + /// The kernel stores arguments as a sequence of null terminated + /// strings in a single buffer, we copy that blob directly onto the + /// ringbuffer and prepend the actual length we copied in the same + /// way `Event::parse_buffer` expects. This way we can read the + /// buffer and then iterate over the null strings, mapping them to + /// `CString`s in a vector. + /// + /// # Safety + /// + /// * The BPF program loading the arguments must ensure the last + /// portion ends with a null terminator, even if we truncate it + /// for performance reasons. + fn parse_args(s: &[u8]) -> anyhow::Result<(Vec, &[u8])> { + let Some((buf, s)) = Event::parse_buffer(s) else { + bail!("Failed to get arguments length"); + }; + + let args = buf + .split_inclusive(|a| *a == 0) + .map(|arg| CString::from_vec_with_nul(arg.to_vec())) .collect::, _>>()?; + Ok((args, s)) + } - let mut converted_args = Vec::new(); - let args_len = value.args_len as usize; - let mut offset = 0; - while offset < args_len { - let arg = unsafe { CStr::from_ptr(value.args.as_ptr().add(offset)) } - .to_str()? - .to_owned(); - if arg.is_empty() { - break; - } - offset += arg.len() + 1; - converted_args.push(arg); + /// Parse a `Process` from a ringbuffer event. + /// + /// # Safety + /// + /// * The order of fields must match the order used by the BPF + /// programs. + pub(super) fn parse(value: &[u8]) -> anyhow::Result<(Self, &[u8])> { + let Some((uid, value)) = Event::parse_int::(value) else { + bail!("Failed to parse uid"); + }; + let username = get_username(uid); + let Some((gid, value)) = Event::parse_int::(value) else { + bail!("Failed to parse gid"); + }; + let Some((login_uid, value)) = Event::parse_int::(value) else { + bail!("Failed to parse login_uid"); + }; + let Some((pid, value)) = Event::parse_int::(value) else { + bail!("Failed to parse pid"); + }; + let Some((comm, value)) = Process::parse_comm(value) else { + bail!("Failed to parse comm"); + }; + let (args, value) = Process::parse_args(value)?; + let Some((exe_path, value)) = Event::parse_buffer(value) else { + bail!("Failed to parse exe_path"); + }; + let exe_path = OsStr::from_bytes(exe_path).into(); + let Some((cgroup, value)) = Event::parse_buffer(value) else { + bail!("Failed to parse cgroup"); + }; + let cgroup = str::from_utf8(cgroup)?; + let container_id = Process::extract_container_id(cgroup); + let Some((in_root_mount_ns, value)) = Event::parse_int::(value) else { + bail!("Failed to parse in_root_mount_ns"); + }; + let in_root_mount_ns = in_root_mount_ns != 0; + let Some((lineage_len, mut value)) = Event::parse_int::(value) else { + bail!("Failed to parse lineage length"); + }; + let mut lineage = Vec::with_capacity(lineage_len as usize); + for _ in 0..lineage_len { + let (l, v) = Lineage::parse(value)?; + value = v; + lineage.push(l); } - let username = host_info::get_username(value.uid); - - Ok(Process { + let process = Process { comm, - args: converted_args, + uid, + username, + gid, + login_uid, + pid, + args, exe_path, container_id, - uid: value.uid, - username, - gid: value.gid, - login_uid: value.login_uid, - pid: value.pid, in_root_mount_ns, lineage, - }) + }; + + Ok((process, value)) + } +} + +#[cfg(test)] +impl PartialEq for Process { + fn eq(&self, other: &Self) -> bool { + self.uid == other.uid + && self.login_uid == other.login_uid + && self.gid == other.gid + && self.exe_path == other.exe_path + && self.args == other.args + && self.container_id == other.container_id + && self.in_root_mount_ns == other.in_root_mount_ns } } @@ -202,6 +296,7 @@ impl From for fact_api::ProcessSignal { let args = args .into_iter() + .map(|a| a.to_string_lossy().to_string()) .reduce(|acc, i| acc + " " + &i) .unwrap_or("".to_owned()); @@ -209,7 +304,7 @@ impl From for fact_api::ProcessSignal { id: Uuid::new_v4().to_string(), container_id, creation_time: None, - name: comm, + name: comm.to_string_lossy().to_string(), args, exec_file_path: exe_path.to_string_lossy().into_owned(), pid, @@ -267,4 +362,176 @@ mod tests { assert_eq!(id, expected); } } + + #[test] + fn test_parse_comm() { + struct TestCase<'a> { + input: &'a [u8], + expected: Option<(CString, &'a [u8])>, + } + let tests = [ + TestCase { + input: b"touch\0\0\0\0\0\0\0\0\0\0\0", + expected: Some((CString::from(c"touch"), b"")), + }, + TestCase { + input: b"touch\0\0\0\0\0\0\0\0\0\0\0ignored", + expected: Some((CString::from(c"touch"), b"ignored")), + }, + TestCase { + input: b"", + expected: None, + }, + ]; + + for TestCase { input, expected } in tests { + let res = Process::parse_comm(input); + assert_eq!(res, expected, "input: {}", String::from_utf8_lossy(input)); + } + } + + #[test] + fn test_parse_args() { + struct TestCase<'a> { + input: &'a [u8], + expected: anyhow::Result<(Vec, &'a [u8])>, + } + let tests = [ + TestCase { + input: b"\x00\x03id\0", + expected: Ok((vec![CString::from(c"id")], b"")), + }, + TestCase { + input: b"\x00\x12rm\0-rf\0/some/path\0", + expected: Ok(( + vec![ + CString::from(c"rm"), + CString::from(c"-rf"), + CString::from(c"/some/path"), + ], + b"", + )), + }, + TestCase { + input: b"\x00\x12rm\0-rf\0/some/path\0ignored", + expected: Ok(( + vec![ + CString::from(c"rm"), + CString::from(c"-rf"), + CString::from(c"/some/path"), + ], + b"ignored", + )), + }, + TestCase { + input: b"\x00\x13rm\0-rf\0/some/path\0\0ignored", + expected: Ok(( + vec![ + CString::from(c"rm"), + CString::from(c"-rf"), + CString::from(c"/some/path"), + CString::from(c""), + ], + b"ignored", + )), + }, + TestCase { + input: b"", + expected: Err(anyhow::anyhow!("Failed to get arguments length")), + }, + TestCase { + input: b"\x00\x11rm\0-rf\0/some/path", + expected: Err(anyhow::anyhow!("data provided is not nul terminated")), + }, + ]; + for TestCase { input, expected } in tests { + let res = Process::parse_args(input); + match (res, expected) { + (Ok(res), Ok(expected)) => { + assert_eq!(res, expected, "input: '{}'", String::from_utf8_lossy(input)) + } + (Err(res), Err(expected)) => { + let res = format!("{res:?}"); + let expected = format!("{expected:?}"); + assert_eq!(res, expected, "input: '{}'", String::from_utf8_lossy(input)); + } + (left, right) => { + panic!( + "Result mismatch\nleft: {left:#?}\nright: {right:#?}\ninput: '{}'", + String::from_utf8_lossy(input) + ) + } + } + } + } + + #[test] + fn test_parse_lineage() { + struct TestCase<'a> { + input: &'a [u8], + expected: anyhow::Result<(Lineage, &'a [u8])>, + } + let tests = [ + TestCase { + input: b"\x00\x00\x03\xE8\x00\x0D/usr/bin/bash", + expected: Ok(( + Lineage { + exe_path: PathBuf::from("/usr/bin/bash"), + uid: 1000, + }, + b"", + )), + }, + TestCase { + input: b"\x00\x00\x03\xE8\x00\x0D/usr/bin/bashignored", + expected: Ok(( + Lineage { + exe_path: PathBuf::from("/usr/bin/bash"), + uid: 1000, + }, + b"ignored", + )), + }, + TestCase { + input: b"", + expected: Err(anyhow::anyhow!("Failed to parse lineage uid")), + }, + TestCase { + input: b"\x00\x00\x03", + expected: Err(anyhow::anyhow!("Failed to parse lineage uid")), + }, + TestCase { + input: b"\x00\x00\x03\xE8\x00\x0D/usr/bin/bas", + expected: Err(anyhow::anyhow!("Failed to parse lineage exe_path")), + }, + ]; + + for TestCase { input, expected } in tests { + let lineage = Lineage::parse(input); + match (lineage, expected) { + (Ok(lineage), Ok(expected)) => assert_eq!( + lineage, + expected, + "input: {}", + String::from_utf8_lossy(input) + ), + (Err(lineage), Err(expected)) => { + let lineage = format!("{lineage:?}"); + let expected = format!("{expected:?}"); + assert_eq!( + lineage, + expected, + "input: {}", + String::from_utf8_lossy(input) + ); + } + (left, right) => { + panic!( + "Result mismatch\nleft: {left:#?}\nright: {right:#?}\ninput: '{}'", + String::from_utf8_lossy(input) + ) + } + } + } + } } diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index ac2b5bae..3fb590d3 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -106,8 +106,8 @@ impl HostScanner { let metadata = path.metadata()?; let inode = inode_key_t { - inode: metadata.st_ino(), - dev: metadata.st_dev(), + inode: metadata.st_ino() as u32, + dev: metadata.st_dev() as u32, }; self.kernel_inode_map