如何在Linux中测试/验证vmalloc保护页



我正在学习Linux中的堆栈保护。我发现Linux内核VMAP_STACK配置参数正在使用保护页机制以及vmalloc((来提供堆栈保护
我正试图找到一种方法来检查这个保护页在Linux内核中是如何工作的。我在谷歌上搜索并检查了内核代码,但没有找到代码。

另一个问题是如何验证受保护的堆栈
我有一个内核模块来欠载/溢出进程的内核堆栈,比如这个

static void shoot_kernel_stack(void)
{
unsigned char *ptr =  task_stack_page(current);
unsigned char *tmp = NULL;

tmp = ptr + THREAD_SIZE + PAGE_SIZE + 0;
//  tmp -= 0x100;
memset(tmp, 0xB4, 0x10); // Underrun
}

我真的得到了如下的内核恐慌,

[ 8006.358354] BUG: stack guard page was hit at 00000000e8dc2d98 (stack is 00000000cff0f921..00000000653b24a9)
[ 8006.361276] kernel stack overflow (page fault): 0000 [#1] SMP PTI

这是验证保护页的正确方法吗?

VMAP_STACKLinux功能用于将线程的内核堆栈映射到VMA中。通过虚拟映射堆栈,底层物理页面不需要是连续的。可以通过添加保护页来检测跨页溢出。由于VMA后面跟着一个保护(除非在分配时传递VM_NO_guard标志(,因此在这些区域中分配的堆栈可以从中受益,用于检测堆栈溢出。

分配

线程堆栈在线程创建时使用kernel/fork.c中的alloc_thread_stack_node((进行分配。当VMAP_stack被激活时,堆栈会被缓存,因为根据源代码中的注释:

vmalloc((有点慢,调用vfree((足够多次将强制TLB脸红通过缓存堆栈尽量减少调用次数。

内核堆栈大小为THREAD_size(在x86_64平台上等于4页(。在线程创建时调用的分配源代码是:

static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
{
#ifdef CONFIG_VMAP_STACK
void *stack;
int i;
[...] // <----- Part which gets a previously cached stack. If no stack in cache
// the following is run to allocate a brand new stack:
/*
* Allocated stacks are cached and later reused by new threads,
* so memcg accounting is performed manually on assigning/releasing
* stacks to tasks. Drop __GFP_ACCOUNT.
*/
stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
VMALLOC_START, VMALLOC_END,
THREADINFO_GFP & ~__GFP_ACCOUNT,
PAGE_KERNEL,
0, node, __builtin_return_address(0));
[...]

__vmalloc_node_range((mm/vmalloc.c中定义。这将调用__get_vm_area_de((。由于后者未通过VM_NO_GUARD标志,因此会在分配区域的末尾添加一个附加页面。这是VMA:的保护页

static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long align, unsigned long flags, unsigned long start,
unsigned long end, int node, gfp_t gfp_mask, const void *caller)
{
struct vmap_area *va;
struct vm_struct *area;
BUG_ON(in_interrupt());
size = PAGE_ALIGN(size);
if (unlikely(!size))
return NULL;
if (flags & VM_IOREMAP)
align = 1ul << clamp_t(int, get_count_order_long(size),
PAGE_SHIFT, IOREMAP_MAX_ORDER);
area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!area))
return NULL;
if (!(flags & VM_NO_GUARD)) // <----- A GUARD PAGE IS ADDED
size += PAGE_SIZE;
va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
if (IS_ERR(va)) {
kfree(area);
return NULL;
}
setup_vmalloc_vm(area, va, flags, caller);
return area;
}

溢流管理

堆栈溢出管理依赖于体系结构(即位于arch/…中的源代码(。下面引用的链接提供了一些依赖于体系架构的实现的指针。

对于x86_64平台,溢出检查是在页面故障中断时完成的,该故障中断触发以下函数调用链:在arch/x86/mm/ffault.c中定义的do_page_fault()->__do_page_fault()->do_kern_addr_fault()->bad_area_nosemaphore()->no_context()函数。在no_context((,有一个部分专门用于VMAP_STACK管理,用于检测溢出下的堆栈:

static noinline void
no_context(struct pt_regs *regs, unsigned long error_code,
unsigned long address, int signal, int si_code)
{
struct task_struct *tsk = current;
unsigned long flags;
int sig;
[...]
#ifdef CONFIG_VMAP_STACK
/*
* Stack overflow?  During boot, we can fault near the initial
* stack in the direct map, but that's not an overflow -- check
* that we're in vmalloc space to avoid this.
*/
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
/*
* We're likely to be running with very little stack space
* left.  It's plausible that we'd hit this condition but
* double-fault even before we get this far, in which case
* we're fine: the double-fault handler will deal with it.
*
* We don't want to make it all the way into the oops code
* and then double-fault, though, because we're likely to
* break the console driver and lose most of the stack dump.
*/
asm volatile ("movq %[stack], %%rspnt"
"call handle_stack_overflownt"
"1: jmp 1b"
: ASM_CALL_CONSTRAINT
: "D" ("kernel stack overflow (page fault)"),
"S" (regs), "d" (address),
[stack] "rm" (stack));
unreachable();
}
#endif
[...]
}

在上面的代码中,当检测到堆栈不足/溢出时,arch/x86/kernel/traps.c中定义的handle_stack_overflow((函数被调用:

#ifdef CONFIG_VMAP_STACK
__visible void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
unsigned long fault_address)
{
printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)n",
(void *)fault_address, current->stack,
(char *)current->stack + THREAD_SIZE - 1);
die(message, regs, 0);
/* Be absolutely certain we don't return. */
panic("%s", message);
}
#endif

示例错误消息";BUG:堆栈保护页在;问题中指出的来自上面的handle_stack_overflow((函数。

来自示例模块

定义VMAP_STACK时,任务描述符的STACK_vm_area字段出现,并设置有与堆栈关联的VMA地址。从那里,可以获取有趣的信息:

struct task_struct *task;
#ifdef CONFIG_VMAP_STACK
struct vm_struct *vm;
#endif // CONFIG_VMAP_STACK
task = current;
printk("tKernel stack: 0x%lxn", (unsigned long)(task->stack));
printk("tStack end magic: 0x%lxn", *(unsigned long *)(task->stack));
#ifdef CONFIG_VMAP_STACK
vm = task->stack_vm_area;
printk("tstack_vm_area->addr = 0x%lxn", (unsigned long)(vm->addr));
printk("tstack_vm_area->nr_pages = %un", vm->nr_pages);
printk("tstack_vm_area->size = %lun", vm->size);
#endif // CONFIG_VMAP_STACK
printk("tLocal var in stack: 0x%lxn", (unsigned long)(&task));

nr_pages字段是没有附加保护页的页数。堆栈顶部的最后一个无符号long使用include/uapi/linux/MAGIC.h中定义的stack_END_MAGIC设置为:

#define STACK_END_MAGIC     0x57AC6E9D

参考

  • 防止堆栈保护页面跳转
  • arm64:VMAP_STACK支持
  • CONFIG_VMAP_STACK:使用虚拟映射堆栈
  • x86_64上的Linux 4.9支持Vmapped堆栈
  • Linux内核漏洞十年

最新更新