Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions fact-ebpf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ license.workspace = true
[dependencies]
aya = { workspace = true }
libc = { workspace = true }
serde = { workspace = true }

[build-dependencies]
anyhow = { workspace = true }
Expand Down
7 changes: 0 additions & 7 deletions fact-ebpf/src/bpf/builtins.h

This file was deleted.

20 changes: 12 additions & 8 deletions fact-ebpf/src/bpf/events.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
#pragma once

#include <bpf/bpf_helpers.h>
// clang-format off
#include "vmlinux.h"

#include "inode.h"
#include "maps.h"
#include "process.h"
#include "types.h"
#include "vmlinux.h"

__always_inline static void submit_event(struct metrics_by_hook_t* m, file_activity_type_t event_type, const char filename[PATH_MAX], struct dentry* dentry, bool use_bpf_d_path) {
#include <bpf/bpf_helpers.h>
// clang-format on

__always_inline static void submit_event(struct metrics_by_hook_t* m,
file_activity_type_t event_type,
const char filename[PATH_MAX],
inode_key_t* inode,
bool use_bpf_d_path) {
struct event_t* event = bpf_ringbuf_reserve(&rb, sizeof(struct event_t), 0);
if (event == NULL) {
m->ringbuffer_full++;
Expand All @@ -16,18 +24,14 @@ __always_inline static void submit_event(struct metrics_by_hook_t* m, file_activ

event->type = event_type;
event->timestamp = bpf_ktime_get_boot_ns();
inode_copy_or_reset(&event->inode, inode);
bpf_probe_read_str(event->filename, PATH_MAX, filename);

struct helper_t* helper = get_helper();
if (helper == NULL) {
goto error;
}

const char* p = get_host_path(helper->buf, dentry);
if (p != NULL) {
bpf_probe_read_str(event->host_file, PATH_MAX, p);
}

int64_t err = process_fill(&event->process, use_bpf_d_path);
if (err) {
bpf_printk("Failed to fill process information: %d", err);
Expand Down
47 changes: 0 additions & 47 deletions fact-ebpf/src/bpf/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,61 +3,14 @@
// clang-format off
#include "vmlinux.h"

#include "bound_path.h"
#include "builtins.h"
#include "d_path.h"
#include "types.h"
#include "maps.h"

#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
// clang-format on

__always_inline static char* get_host_path(char buf[PATH_MAX * 2], struct dentry* d) {
int offset = PATH_MAX - 1;
buf[PATH_MAX - 1] = '\0';

for (int i = 0; i < 16 && offset > 0; i++) {
struct qstr d_name;
BPF_CORE_READ_INTO(&d_name, d, d_name);
if (d_name.name == NULL) {
break;
}

int len = d_name.len;
if (len <= 0 || len >= PATH_MAX) {
return NULL;
}

offset -= len;
if (offset <= 0) {
return NULL;
}

if (bpf_probe_read_kernel(&buf[offset], len, d_name.name) != 0) {
return NULL;
}

if (len == 1 && buf[offset] == '/') {
// Reached the root
offset++;
break;
}

offset--;
buf[offset] = '/';

struct dentry* parent = BPF_CORE_READ(d, d_parent);
// if we reached the root
if (parent == NULL || d == parent) {
break;
}
d = parent;
}

return &buf[offset];
}

__always_inline static bool is_monitored(struct bound_path_t* path) {
if (!filter_by_prefix()) {
// no path configured, allow all
Expand Down
108 changes: 108 additions & 0 deletions fact-ebpf/src/bpf/inode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#pragma once

// clang-format off
#include "vmlinux.h"

#include "kdev.h"
#include "types.h"
#include "maps.h"

#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
// clang-format on

#define BTRFS_SUPER_MAGIC 0x9123683E

/**
* Retrieve the inode and device numbers and return them as a new key.
*
* Different filesystems may `stat` files in different ways, if support
* for a new filesystem is needed, add it here.
*
* Most Linux filesystems use the following generic function to fill
* these fields when running `stat`:
* https://github.com/torvalds/linux/blob/7d0a66e4bb9081d75c82ec4957c50034cb0ea449/fs/stat.c#L82
*
* The method used to retrieve the device is different in btrfs and can
* be found here:
* https://github.com/torvalds/linux/blob/7d0a66e4bb9081d75c82ec4957c50034cb0ea449/fs/btrfs/inode.c#L8038
*/
__always_inline static inode_key_t inode_to_key(struct inode* inode) {
inode_key_t key = {0};
if (inode == NULL) {
return key;
}

unsigned long magic = inode->i_sb->s_magic;
switch (magic) {
case BTRFS_SUPER_MAGIC:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I understand, btrfs support is mostly useful for development on Fedora, but OCP doesn't use it. In that case what do you think about turning it into a set of if/else conditions, wrapped into likely/unlikely macro as needed, to make sure that we account for the most anticipated result?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the check right below this line:

if (bpf_core_type_exists(struct btrfs_inode)) {

My understanding is that the verifier will remove the block entirely when btrfs is not used in the system because it is dead code. In this case, we should never go in this branch of the switch-case, since no event will have the btrfs magic number, but there will also not be any additional code because it was removed by the verifier. Do we need to complicate it further than this?

If this was a kernel module, I would 100% agree with you, but I believe letting the verifier remove the code is the way to go here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The verifier will drop the btrfs part, but the switch condition with it's overhead will stay.

But in this particular case it probably doesn't matter indeed. The likely/unlikely is mostly a signal to the compiler to adapt the code layout for the locality -- and since the current layout is rather simple, looks like clang doesn't change anything that much:

// original
; switch (magic) {
 18c:	cmpq	%rsi, %rdi
 18f:	jne	0x1e2
 191:	movl	$1, %edi
; if (bpf_core_type_exists(struct btrfs_inode)) {
 196:	testl	%edi, %edi
 198:	je	0x1e2

// with the macros
; if (unlikely(magic == BTRFS_SUPER_MAGIC)) {
 18c:	cmpq	%rsi, %rdi
 18f:	jne	0x1e8
 191:	xorl	%edi, %edi
 193:	movl	$1, %esi
; if (bpf_core_type_exists(struct btrfs_inode)) {
 198:	testl	%esi, %esi
 19a:	je	0x21e

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can twist the condition a bit more to look something like this:

  if (bpf_core_type_exists(struct btrfs_inode) && unlikely(magic == BTRFS_SUPER_MAGIC)) {
    struct btrfs_inode* btrfs_inode = container_of(inode, struct btrfs_inode, vfs_inode);
    key.inode = inode->i_ino;
    key.dev = BPF_CORE_READ(btrfs_inode, root, anon_dev);
  } else {
    key.inode = inode->i_ino;
    key.dev = inode->i_sb->s_dev;
  }

I'm not 100% sure how to get the same output you got, but this might make the compiler output something a bit better while still letting the verifier remove the first check when btrfs is configured or the entire block when it is not?

if (bpf_core_type_exists(struct btrfs_inode)) {
struct btrfs_inode* btrfs_inode = container_of(inode, struct btrfs_inode, vfs_inode);
key.inode = inode->i_ino;
key.dev = BPF_CORE_READ(btrfs_inode, root, anon_dev);
break;
}
// If the btrfs_inode does not exist, most likely it is not
// supported on the system. Fallback to the generic implementation
// just in case.
default:
key.inode = inode->i_ino;
key.dev = inode->i_sb->s_dev;
break;
}

// Encode the device so it matches with the result of `stat` in
// userspace
key.dev = new_encode_dev(key.dev);

return key;
}

__always_inline static inode_value_t* inode_get(struct inode_key_t* inode) {
if (inode == NULL) {
return NULL;
}
return bpf_map_lookup_elem(&inode_map, inode);
}

__always_inline static long inode_remove(struct inode_key_t* inode) {
if (inode == NULL) {
return 0;
}
return bpf_map_delete_elem(&inode_map, inode);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for null check?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method is currently only used like this:

      inode_remove(&inode_key);

Which means the key will never be null, but you are right, I will add the null check.

}

typedef enum inode_monitored_t {
NOT_MONITORED = 0,
MONITORED,
} inode_monitored_t;

/**
* Check if the provided inode is being monitored.
*
* The current implementation is very basic and might seem like
* overkill, but in the near future this function will be extended to
* check if the parent of the provided inode is monitored and provide
* different results for handling more complicated scenarios.
*/
__always_inline static inode_monitored_t inode_is_monitored(const inode_value_t* inode) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That looks overly verbose for what seems to be a null check under the hood. Any reason why is this needed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current state is very minimal, but the next step to properly do inode tracking will require us to keep track of directories, so when a file is accessed, we will need to know if the parent directory is monitored in case the file is being created and it should be tracked. This function is in preparation to extend the inode_monitored_t enum to have a PARENT_MONITORED value that will be returned when the current inode is not monitored but the parent directory is (the function also needs to take in the inode of the parent to check this condition).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you feel this is too much for the current implementation, I'm happy to roll back to a simple null check and add everything back in the next stage of the implementation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's fine by me, just have some short doc string to this function explaining this pls.

if (inode != NULL) {
return MONITORED;
}

return NOT_MONITORED;
}

__always_inline static void inode_copy_or_reset(inode_key_t* dst, const inode_key_t* src) {
if (dst == NULL) {
return;
}

if (src != NULL) {
dst->inode = src->inode;
dst->dev = src->dev;
} else {
dst->inode = 0;
dst->dev = 0;
}
}
22 changes: 22 additions & 0 deletions fact-ebpf/src/bpf/kdev.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#pragma once

// clang-format off
#include "vmlinux.h"

#include <bpf/bpf_helpers.h>
// clang-format on

// Most of the code in this file is taken from:
// https://github.com/torvalds/linux/blob/559e608c46553c107dbba19dae0854af7b219400/include/linux/kdev_t.h

#define MINORBITS 20
#define MINORMASK ((1U << MINORBITS) - 1)

#define MAJOR(dev) ((unsigned int)((dev) >> MINORBITS))
#define MINOR(dev) ((unsigned int)((dev) & MINORMASK))

__always_inline static u32 new_encode_dev(dev_t dev) {
unsigned major = MAJOR(dev);
unsigned minor = MINOR(dev);
return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12);
}
40 changes: 31 additions & 9 deletions fact-ebpf/src/bpf/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "file.h"
#include "types.h"
#include "process.h"
#include "inode.h"
#include "maps.h"
#include "events.h"
#include "bound_path.h"
Expand Down Expand Up @@ -44,12 +44,19 @@ int BPF_PROG(trace_file_open, struct file* file) {
return 0;
}

if (!is_monitored(path)) {
goto ignored;
inode_key_t inode_key = inode_to_key(file->f_inode);
const inode_value_t* inode = inode_get(&inode_key);
switch (inode_is_monitored(inode)) {
case NOT_MONITORED:
if (!is_monitored(path)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there currently any scenarios where an inode is monitored, but the path is not?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could happen if the file is renamed or a hard link is used. Renames we don't support yet so I don't worry much about this. Accessing a file via a hardlink should work with the current implementation but I haven't had time to test it yet.

I believe bpf_d_path resolves to the real path when using symlinks, so they don't apply for this situation, but I haven't had time to test this either.

goto ignored;
}
break;
case MONITORED:
break;
}

struct dentry* d = BPF_CORE_READ(file, f_path.dentry);
submit_event(&m->file_open, event_type, path->path, d, true);
submit_event(&m->file_open, event_type, path->path, &inode_key, true);

return 0;

Expand Down Expand Up @@ -91,12 +98,27 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) {
goto error;
}

if (!is_monitored(path)) {
m->path_unlink.ignored++;
return 0;
inode_key_t inode_key = inode_to_key(dentry->d_inode);
const inode_value_t* inode = inode_get(&inode_key);

switch (inode_is_monitored(inode)) {
case NOT_MONITORED:
if (!is_monitored(path)) {
m->path_unlink.ignored++;
return 0;
}
break;

case MONITORED:
inode_remove(&inode_key);
break;
}

submit_event(&m->path_unlink, FILE_ACTIVITY_UNLINK, path->path, dentry, path_unlink_supports_bpf_d_path);
submit_event(&m->path_unlink,
FILE_ACTIVITY_UNLINK,
path->path,
&inode_key,
path_unlink_supports_bpf_d_path);
return 0;

error:
Expand Down
7 changes: 7 additions & 0 deletions fact-ebpf/src/bpf/maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ struct {
__uint(max_entries, 8 * 1024 * 1024);
} rb SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, inode_key_t);
__type(value, inode_value_t);
__uint(max_entries, 1024);
} inode_map SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, __u32);
Expand Down
8 changes: 4 additions & 4 deletions fact-ebpf/src/bpf/process.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#pragma once

#include "file.h"
#include "maps.h"
#include "types.h"

// clang-format off
#include "vmlinux.h"

#include "d_path.h"
#include "maps.h"
#include "types.h"

#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
// clang-format on
Expand Down
12 changes: 11 additions & 1 deletion fact-ebpf/src/bpf/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ typedef struct process_t {
char in_root_mount_ns;
} process_t;

typedef struct inode_key_t {
unsigned long inode;
unsigned long dev;
} inode_key_t;

// We can't use bool here because it is not a standard C type, we would
// need to include vmlinux.h but that would explode our Rust bindings.
// For the time being we just keep a char.
typedef char inode_value_t;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason for char instead of bool?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the only reason I chose this is because the conversion on the rust side was easier this way, I'll double check and come back to this.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember now, using bool requires us to include vmlinux.h. If we include vmlinux.h in this file, then the generated bindings created by our build script gets massive, since everything in vmlinux.h gets a binding.

There's probably a better way to go about this, for now I've gone with just using char.


typedef enum file_activity_type_t {
FILE_ACTIVITY_INIT = -1,
FILE_ACTIVITY_OPEN = 0,
Expand All @@ -43,7 +53,7 @@ struct event_t {
unsigned long timestamp;
process_t process;
char filename[PATH_MAX];
char host_file[PATH_MAX];
inode_key_t inode;
file_activity_type_t type;
};

Expand Down
Loading
Loading