-
Notifications
You must be signed in to change notification settings - Fork 1
ROX-30437: basic inode tracking for host path resolution #166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2dba48e
d0d21b9
ce0b5fc
23c63ba
e01148b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| #pragma once | ||
|
|
||
| // clang-format off | ||
| #include "vmlinux.h" | ||
|
|
||
| #include "kdev.h" | ||
| #include "types.h" | ||
| #include "maps.h" | ||
|
|
||
| #include <bpf/bpf_core_read.h> | ||
| #include <bpf/bpf_helpers.h> | ||
| // clang-format on | ||
|
|
||
| #define BTRFS_SUPER_MAGIC 0x9123683E | ||
|
|
||
| /** | ||
| * Retrieve the inode and device numbers and return them as a new key. | ||
| * | ||
| * Different filesystems may `stat` files in different ways, if support | ||
| * for a new filesystem is needed, add it here. | ||
| * | ||
| * Most Linux filesystems use the following generic function to fill | ||
| * these fields when running `stat`: | ||
| * https://github.com/torvalds/linux/blob/7d0a66e4bb9081d75c82ec4957c50034cb0ea449/fs/stat.c#L82 | ||
| * | ||
| * The method used to retrieve the device is different in btrfs and can | ||
| * be found here: | ||
| * https://github.com/torvalds/linux/blob/7d0a66e4bb9081d75c82ec4957c50034cb0ea449/fs/btrfs/inode.c#L8038 | ||
| */ | ||
| __always_inline static inode_key_t inode_to_key(struct inode* inode) { | ||
| inode_key_t key = {0}; | ||
| if (inode == NULL) { | ||
| return key; | ||
| } | ||
|
|
||
| unsigned long magic = inode->i_sb->s_magic; | ||
| switch (magic) { | ||
| case BTRFS_SUPER_MAGIC: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From what I understand, btrfs support is mostly useful for development on Fedora, but OCP doesn't use it. In that case what do you think about turning it into a set of if/else conditions, wrapped into likely/unlikely macro as needed, to make sure that we account for the most anticipated result?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the check right below this line: if (bpf_core_type_exists(struct btrfs_inode)) {My understanding is that the verifier will remove the block entirely when btrfs is not used in the system because it is dead code. In this case, we should never go in this branch of the switch-case, since no event will have the btrfs magic number, but there will also not be any additional code because it was removed by the verifier. Do we need to complicate it further than this? If this was a kernel module, I would 100% agree with you, but I believe letting the verifier remove the code is the way to go here.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The verifier will drop the btrfs part, but the switch condition with it's overhead will stay. But in this particular case it probably doesn't matter indeed. The likely/unlikely is mostly a signal to the compiler to adapt the code layout for the locality -- and since the current layout is rather simple, looks like clang doesn't change anything that much:
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can twist the condition a bit more to look something like this: if (bpf_core_type_exists(struct btrfs_inode) && unlikely(magic == BTRFS_SUPER_MAGIC)) {
struct btrfs_inode* btrfs_inode = container_of(inode, struct btrfs_inode, vfs_inode);
key.inode = inode->i_ino;
key.dev = BPF_CORE_READ(btrfs_inode, root, anon_dev);
} else {
key.inode = inode->i_ino;
key.dev = inode->i_sb->s_dev;
}I'm not 100% sure how to get the same output you got, but this might make the compiler output something a bit better while still letting the verifier remove the first check when btrfs is configured or the entire block when it is not? |
||
| if (bpf_core_type_exists(struct btrfs_inode)) { | ||
| struct btrfs_inode* btrfs_inode = container_of(inode, struct btrfs_inode, vfs_inode); | ||
| key.inode = inode->i_ino; | ||
| key.dev = BPF_CORE_READ(btrfs_inode, root, anon_dev); | ||
| break; | ||
| } | ||
| // If the btrfs_inode does not exist, most likely it is not | ||
| // supported on the system. Fallback to the generic implementation | ||
| // just in case. | ||
| default: | ||
| key.inode = inode->i_ino; | ||
| key.dev = inode->i_sb->s_dev; | ||
| break; | ||
| } | ||
|
|
||
| // Encode the device so it matches with the result of `stat` in | ||
| // userspace | ||
| key.dev = new_encode_dev(key.dev); | ||
|
|
||
| return key; | ||
| } | ||
|
|
||
| __always_inline static inode_value_t* inode_get(struct inode_key_t* inode) { | ||
| if (inode == NULL) { | ||
| return NULL; | ||
| } | ||
| return bpf_map_lookup_elem(&inode_map, inode); | ||
| } | ||
|
|
||
| __always_inline static long inode_remove(struct inode_key_t* inode) { | ||
| if (inode == NULL) { | ||
| return 0; | ||
| } | ||
| return bpf_map_delete_elem(&inode_map, inode); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for null check?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method is currently only used like this: inode_remove(&inode_key);Which means the key will never be null, but you are right, I will add the null check. |
||
| } | ||
|
|
||
| typedef enum inode_monitored_t { | ||
| NOT_MONITORED = 0, | ||
| MONITORED, | ||
| } inode_monitored_t; | ||
|
|
||
| /** | ||
| * Check if the provided inode is being monitored. | ||
| * | ||
| * The current implementation is very basic and might seem like | ||
| * overkill, but in the near future this function will be extended to | ||
| * check if the parent of the provided inode is monitored and provide | ||
| * different results for handling more complicated scenarios. | ||
| */ | ||
| __always_inline static inode_monitored_t inode_is_monitored(const inode_value_t* inode) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That looks overly verbose for what seems to be a null check under the hood. Any reason why is this needed?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current state is very minimal, but the next step to properly do inode tracking will require us to keep track of directories, so when a file is accessed, we will need to know if the parent directory is monitored in case the file is being created and it should be tracked. This function is in preparation to extend the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you feel this is too much for the current implementation, I'm happy to roll back to a simple null check and add everything back in the next stage of the implementation.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, it's fine by me, just have some short doc string to this function explaining this pls. |
||
| if (inode != NULL) { | ||
| return MONITORED; | ||
| } | ||
|
|
||
| return NOT_MONITORED; | ||
| } | ||
|
|
||
| __always_inline static void inode_copy_or_reset(inode_key_t* dst, const inode_key_t* src) { | ||
| if (dst == NULL) { | ||
| return; | ||
| } | ||
|
|
||
| if (src != NULL) { | ||
| dst->inode = src->inode; | ||
| dst->dev = src->dev; | ||
| } else { | ||
| dst->inode = 0; | ||
| dst->dev = 0; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| #pragma once | ||
|
|
||
| // clang-format off | ||
| #include "vmlinux.h" | ||
|
|
||
| #include <bpf/bpf_helpers.h> | ||
| // clang-format on | ||
|
|
||
| // Most of the code in this file is taken from: | ||
| // https://github.com/torvalds/linux/blob/559e608c46553c107dbba19dae0854af7b219400/include/linux/kdev_t.h | ||
|
|
||
| #define MINORBITS 20 | ||
| #define MINORMASK ((1U << MINORBITS) - 1) | ||
|
|
||
| #define MAJOR(dev) ((unsigned int)((dev) >> MINORBITS)) | ||
| #define MINOR(dev) ((unsigned int)((dev) & MINORMASK)) | ||
|
|
||
| __always_inline static u32 new_encode_dev(dev_t dev) { | ||
| unsigned major = MAJOR(dev); | ||
| unsigned minor = MINOR(dev); | ||
| return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,7 +3,7 @@ | |
|
|
||
| #include "file.h" | ||
| #include "types.h" | ||
| #include "process.h" | ||
| #include "inode.h" | ||
| #include "maps.h" | ||
| #include "events.h" | ||
| #include "bound_path.h" | ||
|
|
@@ -44,12 +44,19 @@ int BPF_PROG(trace_file_open, struct file* file) { | |
| return 0; | ||
| } | ||
|
|
||
| if (!is_monitored(path)) { | ||
| goto ignored; | ||
| inode_key_t inode_key = inode_to_key(file->f_inode); | ||
| const inode_value_t* inode = inode_get(&inode_key); | ||
| switch (inode_is_monitored(inode)) { | ||
| case NOT_MONITORED: | ||
| if (!is_monitored(path)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there currently any scenarios where an inode is monitored, but the path is not?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It could happen if the file is renamed or a hard link is used. Renames we don't support yet so I don't worry much about this. Accessing a file via a hardlink should work with the current implementation but I haven't had time to test it yet. I believe |
||
| goto ignored; | ||
| } | ||
| break; | ||
| case MONITORED: | ||
| break; | ||
| } | ||
|
|
||
| struct dentry* d = BPF_CORE_READ(file, f_path.dentry); | ||
| submit_event(&m->file_open, event_type, path->path, d, true); | ||
| submit_event(&m->file_open, event_type, path->path, &inode_key, true); | ||
|
|
||
| return 0; | ||
|
|
||
|
|
@@ -91,12 +98,27 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { | |
| goto error; | ||
| } | ||
|
|
||
| if (!is_monitored(path)) { | ||
| m->path_unlink.ignored++; | ||
| return 0; | ||
| inode_key_t inode_key = inode_to_key(dentry->d_inode); | ||
| const inode_value_t* inode = inode_get(&inode_key); | ||
|
|
||
| switch (inode_is_monitored(inode)) { | ||
| case NOT_MONITORED: | ||
| if (!is_monitored(path)) { | ||
| m->path_unlink.ignored++; | ||
| return 0; | ||
| } | ||
| break; | ||
|
|
||
| case MONITORED: | ||
| inode_remove(&inode_key); | ||
| break; | ||
| } | ||
|
|
||
| submit_event(&m->path_unlink, FILE_ACTIVITY_UNLINK, path->path, dentry, path_unlink_supports_bpf_d_path); | ||
| submit_event(&m->path_unlink, | ||
| FILE_ACTIVITY_UNLINK, | ||
| path->path, | ||
| &inode_key, | ||
| path_unlink_supports_bpf_d_path); | ||
| return 0; | ||
|
|
||
| error: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,16 @@ typedef struct process_t { | |
| char in_root_mount_ns; | ||
| } process_t; | ||
|
|
||
| typedef struct inode_key_t { | ||
| unsigned long inode; | ||
| unsigned long dev; | ||
| } inode_key_t; | ||
|
|
||
| // We can't use bool here because it is not a standard C type, we would | ||
| // need to include vmlinux.h but that would explode our Rust bindings. | ||
| // For the time being we just keep a char. | ||
| typedef char inode_value_t; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any particular reason for char instead of bool?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe the only reason I chose this is because the conversion on the rust side was easier this way, I'll double check and come back to this.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remember now, using There's probably a better way to go about this, for now I've gone with just using |
||
|
|
||
| typedef enum file_activity_type_t { | ||
| FILE_ACTIVITY_INIT = -1, | ||
| FILE_ACTIVITY_OPEN = 0, | ||
|
|
@@ -43,7 +53,7 @@ struct event_t { | |
| unsigned long timestamp; | ||
| process_t process; | ||
| char filename[PATH_MAX]; | ||
| char host_file[PATH_MAX]; | ||
| inode_key_t inode; | ||
| file_activity_type_t type; | ||
| }; | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.