Kprobes events

写一个包含 Kprobes 注册/注销的模块稍微有点复杂, 使用 Kprobes events 就像对简单一些. 它们类似基于 tracepoint 的events, 但是使用 Kprobes 实现的, 可以动态的添加删除.

概念

  1. 要使用此功能, 编译 Kernel的时候, 必须 CONFIG_KPROBE_EVENTS=y.;
  2. 类似其它 event tracer, 不需要通过 current_tracer 激活;
  3. 通过 /sys/kernel/tracing/kprobe_events/sys/kernel/tracing/dynamic_events 添加 Kprobes events;
  4. 通过 /sys/kernel/tracing/events/kprobes/<EVENT>/enable 开启 events;
  5. /sys/kernel/tracing/kprobe_profile 统计 hit 多少, miss 多少.

一个简单的例子

下面的例子使用探测 uptime_proc_show 函数, 建立以 trace_sample 为event 名字的 Kprobes event.

可以看到动态添加这个 Kprobes events 之后, 对应的文件夹被建立了.

并且不需要设置 current_tracer.

# 保证环境正确
$ echo 0 > /sys/kernel/tracing/tracing_on
$ echo "" > /sys/kernel/tracing/trace

# 设置 Kprobes events
$ echo "p:trace_sample uptime_proc_show" > /sys/kernel/debug/tracing/kprobe_events

# 查看新生成的 Kprobes 文件夹
$ ls /sys/kernel/tracing/events/kprobes/trace_sample/
enable  filter  format  hist  id  inject  trigger

$ echo 1 > /sys/kernel/tracing/events/kprobes/trace_sample/enable

# 开启 probe 
$ echo 1 > /sys/kernel/tracing/tracing_on

# 执行包含该函数的命令
$ uptime

# 查看结果
$ cat /sys/kernel/tracing/trace
# tracer: nop
#
# entries-in-buffer/entries-written: 1/1   #P:8
#
#                                _-----=> irqs-off
#                               / _----=> need-resched
#                              | / _---=> hardirq/softirq
#                              || / _--=> preempt-depth
#                              ||| / _-=> migrate-disable
#                              |||| /     delay
#           TASK-PID     CPU#  |||||  TIMESTAMP  FUNCTION
#              | |         |   |||||     |         |
          uptime-40063   [007] ..... 67233.823241: trace_sample: (uptime_proc_show+0x0/0x1d0)

# 清理
$ echo 0 > /sys/kernel/tracing/tracing_on
$ echo 0 > /sys/kernel/tracing/events/kprobes/trace_sample/enable
$ echo -:trace_sample > /sys/kernel/debug/tracing/kprobe_events

查看 ksys_read 系统调用返回值的例子

下面的内容都放到 test.sh 文件里, 然后执行 sh test.sh

echo $$

# 准备
echo 0 > /sys/kernel/tracing/tracing_on
echo "" > /sys/kernel/tracing/trace

# 注册事件
echo "r:myreturnprobe ksys_read $retval" > /sys/kernel/debug/tracing/kprobe_events
echo $$ > /sys/kernel/debug/tracing/set_event_pid

#开启
echo 1 > /sys/kernel/tracing/events/kprobes/myreturnprobe/enable
echo 1 > /sys/kernel/tracing/tracing_on

# 等一会, 然后查看结果, 命令替换同一个进程读
sleep 5
output=$(cat /tmp/test.sh > /dev/null)

# 关闭
echo 0 > /sys/kernel/tracing/tracing_on

# 查看结果
cat /sys/kernel/tracing/trace

# 清理
echo 0 > /sys/kernel/tracing/events/kprobes/myreturnprobe/enable
echo -:myreturnprobe > /sys/kernel/debug/tracing/kprobe_events 2>/dev/null

echo "done"

Kprobes events 格式

更多说明在这里: https://docs.kernel.org/trace/kprobetrace.html

 p[:[GRP/][EVENT]] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]        : Set a probe
 r[MAXACTIVE][:[GRP/][EVENT]] [MOD:]SYM[+0] [FETCHARGS]        : Set a return probe
 p[:[GRP/][EVENT]] [MOD:]SYM[+0]%return [FETCHARGS]            : Set a return probe
 -:[GRP/][EVENT]                                               : Clear a probe

参考:
https://docs.kernel.org/trace/kprobetrace.html

几个Linux 内核模块的例子

本文写几个Linux 内核模块的例子.

系列:

  1. 写一个 Linux 内核 hello world 模块
  2. 写一个有参数的 Linux 内核模块
  3. 写一个有依赖的Linux 内核模块

打印系统进程的 kernel 模块

文件名: printthread.c

#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/oom.h>

static int __init tprocs_init(void)
{
    struct task_struct *p, *t;
    pr_info("print_threads: tid, pid, command, state\n\n");

    for_each_process_thread(p, t) {
            pr_info("tgid=%d, thread_pid=%d, parent_pid=%d, comm=%s, state=%d\n",
                    t->tgid, t->pid, t->real_parent->pid, t->comm, READ_ONCE(t->__state));
    }
    return 0;
}

static void __exit tprocs_exit(void)
{
        pr_info("print_threads has left the building...\n");
}

module_init(tprocs_init);
module_exit(tprocs_exit);
MODULE_LICENSE("GPL v2");

打印当前进程及它的子进程

文件名: list_children.c

#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/oom.h>

void list_children(struct task_struct *parent) {
    struct task_struct *child;
    struct list_head *list;

    // Iterate through the list of children
    list_for_each(list, &parent->children) {
        child = list_entry(list, struct task_struct, sibling);

        // Print information about the child process
        printk(KERN_INFO "Child process: %s [%d]\n", child->comm, child->pid);
    }
}

static int __init pprocs_init(void)
{
    struct task_struct *p;
    pr_info("print_procs: pid, command, state\n\n");

    for_each_process(p) {
            pr_info("pid=%d, comm=%s, state=%d\n",
                    p->pid, p->comm, READ_ONCE(p->__state));
        list_children(p);
    }
    return 0;
}

static void __exit pprocs_exit(void)
{
        pr_info("print_procs has left the building...\n");
}

module_init(pprocs_init);
module_exit(pprocs_exit);
MODULE_LICENSE("GPL v2");

dump 某个进程的内存信息

文件名 dumpprocmm.c

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/types.h>
#include <linux/kstrtox.h>
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/mm_types_task.h>
#include <linux/mm.h>
#include <linux/pid.h>
#include <linux/slab.h>

static struct proc_dir_entry *parent;
static int flag1 = 1;
static int flag2 = 1;
char buff_array[32] = "123";
char mm_array[500] = "initial value\n";

static int open_proc(struct inode *inode, struct file *file)
{
    printk(KERN_ALERT "Open dump_something.....\n");
    return 0;
}

static int release_proc(struct inode *inode, struct file *file)
{
    printk(KERN_ALERT "Close dump_something.....\n");
    return 0;
}

static ssize_t read_proc1(struct file *filp, char __user *buffer, size_t length, loff_t *offset)
{
    printk(KERN_ALERT "Read dump_something.....\n");
    if (flag1)
    {
        flag1 = 0;
        printk(KERN_ALERT "flag1 is 0 \n");
    }
    else
    {
        flag1 = 1;
        printk(KERN_ALERT "flag1 is 1, now to 0, return 0 \n");
        return 0;
    }

    if (copy_to_user(buffer, buff_array, strlen(buff_array)))
    {
        printk(KERN_ERR "Data Send: Err!\n");
        return -EFAULT;
    }
    printk(KERN_ALERT "return strlen is %zu \n", strlen(buff_array));
    return strlen(buff_array);
}

static ssize_t read_proc2(struct file *filp, char __user *buffer, size_t length, loff_t *offset)
{
    printk(KERN_ALERT "Read dump_something.....\n");
    if (flag2)
    {
        flag2 = 0;
        printk(KERN_ALERT "2 flag2 is 0 \n");
    }
    else
    {
        flag2 = 1;
        printk(KERN_ALERT "2 flag2 is 1, return 0 \n");
        return 0;
    }

    if (copy_to_user(buffer, mm_array, strlen(mm_array)))
    {
        printk(KERN_ERR "Data Send: Err!\n");
        return -EFAULT;
    }
    printk(KERN_ALERT "2 return strlen is %zu \n", strlen(buff_array));
    return strlen(mm_array);
}

static struct task_struct *get_task_struct_from_pid(const char *pid_buffer)
{
    pid_t pid;
    struct task_struct *task = NULL;

    // Convert the PID string to an integer
    if (kstrtoint(pid_buffer, 10, &pid) != 0)
    {
        printk(KERN_ERR "Invalid PID: %s\n", pid_buffer);
        return NULL;
    }

    // Get the task_struct pointer from the PID
    task = pid_task(find_vpid(pid), PIDTYPE_PID);
    if (task == NULL)
    {
        printk(KERN_ERR "Process with PID %d not found\n", pid);
        return NULL;
    }

    return task;
}

static void get_memory_information(struct mm_struct *mm, char *info_buffer)
{
    unsigned long total_vm = mm->total_vm;
    unsigned long anon = get_mm_counter(mm, MM_ANONPAGES);
    unsigned long file = get_mm_counter(mm, MM_FILEPAGES);
    unsigned long swap = get_mm_counter(mm, MM_SWAPENTS);

    snprintf(info_buffer, 400, "Total virtual memory: %lu kB\nRssAnon: %lu kB\nRssFile: %lu kB\nVmSwap: %lu kB\n",
             total_vm << (PAGE_SHIFT - 10), anon << (PAGE_SHIFT - 10), file << (PAGE_SHIFT - 10), swap << (PAGE_SHIFT - 10));
}

static ssize_t write_proc1(struct file *filp, const char *buffer, size_t len, loff_t *off)
{
    printk(KERN_ALERT "try to write to pid file.....\n");

    if (len >= sizeof(buff_array))
    {
        printk(KERN_ERR "Invalid PID: Length exceeds buffer size\n");
        return -EINVAL;
    }

    if (copy_from_user(buff_array, buffer, len))
    {
        printk(KERN_ERR "Data Write: Err!\n");
        return -EFAULT;
    }
    buff_array[len] = '\0';

    struct task_struct *task = get_task_struct_from_pid(buff_array);
    if (!task)
        return -EINVAL;

    struct mm_struct *mm = task->mm;

    // Check if mm_struct is present
    if (mm != NULL)
    {
        // Access memory information
        unsigned long rss = get_mm_rss(mm);

        // Print memory information
        printk(KERN_INFO "Process memory information:\n");
        printk(KERN_INFO "Resident set size (RSS): %lu\n", rss);

        get_memory_information(mm, mm_array);
    }

    return len;
}

static ssize_t write_proc2(struct file *filp, const char *buffer, size_t len, loff_t *off)
{
    printk(KERN_ALERT "try to write to dump_mm file.....\n");
    return 0;
}

static const struct proc_ops proc_fops1 = {
    .proc_open = open_proc,
    .proc_read = read_proc1,
    .proc_write = write_proc1,
    .proc_release = release_proc,
};

static const struct proc_ops proc_fops2 = {
    .proc_open = open_proc,
    .proc_read = read_proc2,
    .proc_write = write_proc2,
    .proc_release = release_proc,
};

static int __init hello_init(void)
{
    printk(KERN_ALERT "Hello !\n");

    /* Create proc directory under /proc */
    parent = proc_mkdir("dumpprocmm", NULL);

    if (NULL == parent)
    {
        printk(KERN_ERR "Failed creating proc entry dumpprocmm");
        return -ENOMEM;
    }

    /* Create proc file under /proc/dumpprocmm */
    proc_create("pid", 0666, parent, &proc_fops1);
    proc_create("dump_mm", 0666, parent, &proc_fops2);

    return 0;
}

static void __exit hello_exit(void)
{
    proc_remove(parent);
    printk(KERN_ALERT "Goodbye !\n");
}

module_init(hello_init);
module_exit(hello_exit);

MODULE_DESCRIPTION("dump process memory");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eric Tian");

执行

$ make all
$ sudo insmod dumpprocmm.ko
$ echo "755" > /proc/dumpprocmm/pid
$ cat /proc/dumpprocmm/pid
$ cat /proc/dumpprocmm/dump_mm

写一个有参数的 Linux 内核模块

本文讲写一个简单的hello world 内核模块, 但是可以设置参数.

本系列:

  1. 写一个 Linux 内核 hello world 模块
  2. 写一个有依赖的Linux 内核模块

源代码

#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/moduleparam.h>

MODULE_LICENSE("GPL");

// 定义模块参数变量
static char* name = "John";
static int age = 30;

// 注册模块参数
module_param(name, charp, S_IRUGO);
MODULE_PARM_DESC(name, "Name parameter");
module_param(age, int, S_IRUGO);
MODULE_PARM_DESC(age, "Age parameter");

// 模块初始化函数
static int __init hello_init(void) {
    printk(KERN_INFO "Hello, %s! Your age is %d.\n", name, age);
    return 0;
}

// 模块退出函数
static void __exit hello_exit(void) {
    printk(KERN_INFO "Goodbye, %s!\n", name);
}

// 注册模块初始化和退出函数
module_init(hello_init);
module_exit(hello_exit);

Makefile

obj-m += hello.o

tag ?= `uname -r`
KDIR := /lib/modules/${tag}/build/

all:
    make -C $(KDIR) M=$(PWD) modules

clean:
    make -C $(KDIR) M=$(PWD) clean

编译并执行

$ make all 

$ sudo insmod hello.ko

$ tail -n 1 /var/log/syslog
Jul 11 01:27:56 supra kernel: [ 8683.334440] Hello, Eric! Your age is 35.

查看内核模块参数

$ cat /sys/module/hello/parameters/age
35
$ cat /sys/module/hello/parameters/name
Eric

改变内核模块参数

$ sudo echo 28 > /sys/module/hello/parameters/age
bash: /sys/module/hello/parameters/age: Permission denied

上面的权限问题, 是由于我们设置的参数权限导致的: S_IRUGO, 可以改它为 0660 就可以了.

写一个 Linux 内核 hello world 模块

本文介绍如何一步步写一个 Linux 内核 hello world 模块.

内核模块分类

分为如下2类:

  1. Builtin modules 内置模块, 包含在 Linux image 里面
  2. External modules 外部模块, 可以随时加载/卸载的内核模块.
    本文例子中的模块属于 外部模块.

Kernel Build System

更多 Linux 内核 Kbuild 系统的更多信息, 参看官方文档.

hello world 内核模块

创建一个 hello.c 文件. 源代码如下:

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>

static int __init hello_init(void)
{
    printk(KERN_ALERT "hello world\n");
    return 0;
}

static void __exit hello_exit(void)
{
    printk(KERN_ALERT "goodbye\n");
}

module_init(hello_init);
module_exit(hello_exit);

MODULE_DESCRIPTION("my first module");
MODULE_LICENSE("free");
MODULE_AUTHOR("Eric Tian");

设置内核 build 环境

安装 Kernel header: 找到对应的包, 然后安装, 然后查看安装的包.

$ apt search linux-headers-$(uname -r)
$ sudo apt install linux-headers-$(uname -r)

$ file /lib/modules/$(uname -r)/build
/lib/modules/5.15.92/build: symbolic link to /home/supra/work/jammy/jammy-Ubuntu-5.15.0-70.77-test

Makefile

在 hello.c 相同的目录, 创建一个新文件 Makefile. 内容如下, 缩进符号使用 tab.

obj-m += hello.o

tag ?= `uname -r`
KDIR := /lib/modules/${tag}/build/

all:
    make -C $(KDIR) M=$(PWD) modules

clean:
    make -C $(KDIR) M=$(PWD) clean

编译运行

$ make clean
$ make all
make -C /lib/modules/`uname -r`/build/ M=/home/supra/work/c/kernel modules
make[1]: Entering directory '/usr/src/linux-headers-5.15.0-76-generic'
  CC [M]  /home/supra/work/c/kernel/hello.o
  MODPOST /home/supra/work/c/kernel/Module.symvers
  CC [M]  /home/supra/work/c/kernel/hello.mod.o
  LD [M]  /home/supra/work/c/kernel/hello.ko
  BTF [M] /home/supra/work/c/kernel/hello.ko
Skipping BTF generation for /home/supra/work/c/kernel/hello.ko due to unavailability of vmlinux
make[1]: Leaving directory '/usr/src/linux-headers-5.15.0-76-generic'

查看模块信息

$ modinfo hello.ko
modinfo hello.ko
filename:       /home/supra/work/c/kernel/hello.ko
author:         Eric Tian
license:        free
description:    my first module
srcversion:     01F8F75DC7D8708707AA062
depends:
retpoline:      Y
name:           hello
vermagic:       5.15.0-76-generic SMP mod_unload modversions

运行模块

$ sudo insmod hello.ko
insmod: ERROR: could not insert module hello.ko: Operation not permitted

虽然你用了 sudo, 仍然得到上面的错误, 原因很可能是: Check if your system has secure boot enabled, which can prevent loading unsigned kernel modules. Disable secure boot in your system's BIOS/UEFI settings and try again.

解决上述问题后, 执行 insmod 然后通过 dmesg 查看日志信息.

$ sudo insmod hello.ko
$ sudo dmesg 

[  176.149789] hello: loading out-of-tree module taints kernel.
[  176.149794] hello: module license 'free' taints kernel.
[  176.149794] Disabling lock debugging due to kernel taint
[  176.149815] hello: module verification failed: signature and/or required key missing - tainting kernel
[  176.150529] hello world

查看加载的模块信息

$ lsmod | grep hello
Module                    Size   Used by
hello                  16384  0

$ sudo cat /proc/modules | grep hello
hello 16384 0 - Live 0xffffffffc0797000 (POE)

卸载模块 并查看 exit 日志

$ sudo rmmod hello
$ tail -n 10 /var/log/syslog
Jul 10 12:16:14 supra kernel: [  596.219986] goodbye

写一个有依赖的Linux 内核模块

接上一篇 写一个 Linux内核 hello world 模块, 这次我们写2个内核模块 hello & world, 并且 world 模块依赖于 hello 模块.

hello 模块

源代码: hello.c

#include <linux/module.h>
#include <linux/init.h>

static int __init hello_init(void)
{
        pr_info("hello module is loaded\n");
        return 0;
}

static void __exit hello_exit(void)
{
        pr_info("hello module is unloaded\n");
}

void say_hello(void)
{
        pr_info("hello ");
}

EXPORT_SYMBOL(say_hello);

module_init(hello_init);
module_exit(hello_exit);
MODULE_LICENSE("free");

world 模块

源代码: world.c

#include <linux/module.h>
#include <linux/init.h>

void say_hello(void);

static int __init world_init(void)
{
        pr_info("world module is loaded\n");
    say_hello();
        pr_info("world\n");
        return 0;
}

static void __exit world_exit(void)
{
        pr_info("world module is unloaded\n");
}

module_init(world_init);
module_exit(world_exit);
MODULE_LICENSE("free");

Makefile

源代码: Makefile. 缩进使用 tab.

obj-m := world.o hello.o

KDIR := /lib/modules/`uname -r`/build/
PWD := $(shell pwd)

default:
    $(MAKE) -C $(KDIR) M=$(PWD) modules

clean:
    $(MAKE) -C $(KDIR) M=$(PWD) clean

编译模块

$ make
make -C /lib/modules/`uname -r`/build/ M=/home/supra/work/modules/deps modules
make[1]: Entering directory '/home/supra/work/jammy/jammy-Ubuntu-5.15.0-70.77-test'
  CC [M]  /home/supra/work/modules/deps/world.o
  CC [M]  /home/supra/work/modules/deps/hello.o
  MODPOST /home/supra/work/modules/deps/Module.symvers
  CC [M]  /home/supra/work/modules/deps/hello.mod.o
  LD [M]  /home/supra/work/modules/deps/hello.ko
  BTF [M] /home/supra/work/modules/deps/hello.ko
  CC [M]  /home/supra/work/modules/deps/world.mod.o
  LD [M]  /home/supra/work/modules/deps/world.ko
  BTF [M] /home/supra/work/modules/deps/world.ko
make[1]: Leaving directory '/home/supra/work/jammy/jammy-Ubuntu-5.15.0-70.77-test'

加载模块

$ sudo insmod hello.ko

$ tail -n 1  /var/log/syslog
Jul 10 14:15:22 supra kernel: [ 3313.748762] hello module is loaded

$ sudo cat /proc/modules | grep hello
hello 16384 0 - Live 0xffffffffc0797000 (POE)

$ sudo insmod world.ko

$ tail -n 3 /var/log/syslog
Jul 10 14:19:22 supra kernel: [ 3553.077383] world module is loaded
Jul 10 14:19:22 supra kernel: [ 3553.077385] hello
Jul 10 14:19:22 supra kernel: [ 3553.077386] world

$ sudo cat /proc/modules | grep hello
hello 16384 1 world, Live 0xffffffffc0797000 (POE)

查看模块依赖

$ lsmod | grep hello
hello                  16384  1 world

$ lsmod | grep world
world                  16384  0
hello                  16384  1 world

相反顺序卸载模块

$ sudo rmmod world.ko
$ sudo rmmod hello.ko

$ tail -n 2 /var/log/syslog
Jul 10 14:26:00 supra kernel: [ 3949.339943] world module is unloaded
Jul 10 14:26:05 supra kernel: [ 3954.185122] hello module is unloaded