请帮我解决这个问题。我使用1毫秒的高分辨率计时器,并将其安装为"insmod"的单独模块。这每1毫秒触发一次,我必须用这个计时器中断做一些任务。还有其他进程进行图像传输,我看到以太网驱动程序中断出现发送图像。这个enet中断具有一些高优先级,看起来它正在延迟上面的1毫秒定时器中断,但我不确定。
运行测试3到3小时后,我看到下面的Oops。如何找出根本原因?请帮助。本系统为ARM omap,运行Linux 2.6.33交叉编译。
[root@user:/]#
Unable to handle kernel paging request at virtual address 7eb52754
pgd = 80004000
[7eb52754] *pgd=00000000
Internal error: Oops: 80000005 [#1] PREEMPT
last sysfs file: /sys/devices/virtual/spi/spi/dev
Modules linked in: mod timermod mod2(P) mod3(P) mod4
CPU: 0 Tainted: P (2.6.33_appl #1)
PC is at 0x7eb52754
LR is at walk_stackframe+0x24/0x40
pc : [<7eb52754>] lr : [<8002d4dc>] psr: a0000013
sp : 80395f10 ip : 80395f30 fp : 80395f2c
r10: 0000001f r9 : 00000000 r8 : 87a25200
r7 : 878b0380 r6 : 80395f40 r5 : 80028374 r4 : 80395f30
r3 : 80000100 r2 : 80395f40 r1 : 80395f40 r0 : 80395f30
Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel
Control: 10c5387d Table: 86fb0019 DAC: 00000017
Process swapper (pid: 0, stack limit = 0x803942e8)
Stack: (0x80395f10 to 0x80396000)
5f00: 8002bfa4 00000001 802c678c 87a25380
5f20: 80395f54 80395f30 8002bfe0 8002d4c4 80395f54 80395f30 8004998c 8002bfa4
5f40: 00000002 00000002 80395f6c 80395f58 8004998c 8002bfb0 80396ea8 80394000
5f60: 80395fa4 80395f70 802c678c 800498d0 8002b320 80023218 80398408 80021e10
5f80: 80394000 8002321c 80023218 80398408 80021e10 413fc082 80395fbc 80395fa8
5fa0: 8002b324 802c62fc 803f4cc8 803f5190 80395fcc 80395fc0 802c3ee4 8002b28c
5fc0: 80395ff4 80395fd0 8000897c 802c3e6c 800084fc 00000000 00000000 8002321c
5fe0: 10c53c7d 803c7630 00000000 80395ff8 80008034 80008754 00000000 00000000
Backtrace:
[<8002d4b8>] (walk_stackframe+0x0/0x40) from [<8002bfe0>] (return_address+0x3c/0x5c)
r6:87a25380 r5:802c678c r4:00000001 r3:8002bfa4
[<8002bfa4>] (return_address+0x0/0x5c) from [<8004998c>] (sub_preempt_count+0xc8/0xfc)
[<800498c4>] (sub_preempt_count+0x0/0xfc) from [<802c678c>] (schedule+0x49c/0x4d8)
r5:80394000 r4:80396ea8
[<802c62f0>] (schedule+0x0/0x4d8) from [<8002b324>] (cpu_idle+0xa4/0xbc)
r9:413fc082 r8:80021e10 r7:80398408 r6:80023218 r5:8002321c
r4:80394000
[<8002b280>] (cpu_idle+0x0/0xbc) from [<802c3ee4>] (rest_init+0x84/0xa0)
r4:803f5190 r3:803f4cc8
[<802c3e60>] (rest_init+0x0/0xa0) from [<8000897c>] (start_kernel+0x234/0x284)
[<80008748>] (start_kernel+0x0/0x284) from [<80008034>] (__enable_mmu+0x0/0x2c)
Code: bad PC value
---[ end trace 7e26218fd59f68a5 ]---
Kernel panic - not syncing: Attempted to kill the idle task!
Backtrace:
[<8002db2c>] (dump_backtrace+0x0/0x114) from [<802c610c>] (dump_stack+0x20/0x24)
r6:fffffffc r5:0000000b r4:803c8518 r3:00000002
[<802c60ec>] (dump_stack+0x0/0x24) from [<802c6168>] (panic+0x58/0x130)
[<802c6110>] (panic+0x0/0x130) from [<80057330>] (do_exit+0x7c/0x6e0)
r3:80394000 r2:00000000 r1:80395d28 r0:80348e90
[<800572b4>] (do_exit+0x0/0x6e0) from [<8002dfc0>] (die+0x290/0x2c4)
r7:7eb52744
[<8002dd30>] (die+0x0/0x2c4) from [<8002f4d4>] (__do_kernel_fault+0x74/0x84)
r7:80395ec8
[<8002f460>] (__do_kernel_fault+0x0/0x84) from [<8002f6bc>] (do_page_fault+0x1d8/0x1f0)
r7:00000000 r6:80395ec8 r5:7eb52754 r4:80396ea8
[<8002f4e4>] (do_page_fault+0x0/0x1f0) from [<8002f794>] (do_translation_fault+0x20/0x80)
[<8002f774>] (do_translation_fault+0x0/0x80) from [<80029250>] (do_PrefetchAbort+0x44/0xa8)
r6:7eb52754 r5:80398820 r4:00000005 r3:8002f774
[<8002920c>] (do_PrefetchAbort+0x0/0xa8) from [<80029d1c>] (__pabt_svc+0x5c/0xa0)
Exception stack(0x80395ec8 to 0x80395f10)
5ec0: 80395f30 80395f40 80395f40 80000100 80395f30 80028374
5ee0: 80395f40 878b0380 87a25200 00000000 0000001f 80395f2c 80395f30 80395f10
5f00: 8002d4dc 7eb52754 a0000013 ffffffff
r7:878b0380 r6:80395f40 r5:80395efc r4:ffffffff
[<8002d4b8>] (walk_stackframe+0x0/0x40) from [<8002bfe0>] (return_address+0x3c/0x5c)
r6:87a25380 r5:802c678c r4:00000001 r3:8002bfa4
[<8002bfa4>] (return_address+0x0/0x5c) from [<8004998c>] (sub_preempt_count+0xc8/0xfc)
[<800498c4>] (sub_preempt_count+0x0/0xfc) from [<802c678c>] (schedule+0x49c/0x4d8)
r5:80394000 r4:80396ea8
[<802c62f0>] (schedule+0x0/0x4d8) from [<8002b324>] (cpu_idle+0xa4/0xbc)
r9:413fc082 r8:80021e10 r7:80398408 r6:80023218 r5:8002321c
r4:80394000
[<8002b280>] (cpu_idle+0x0/0xbc) from [<802c3ee4>] (rest_init+0x84/0xa0)
r4:803f5190 r3:803f4cc8
[<802c3e60>] (rest_init+0x0/0xa0) from [<8000897c>] (start_kernel+0x234/0x284)
[<80008748>] (start_kernel+0x0/0x284) from [<80008034>] (__enable_mmu+0x0/0x2c)
=========================================
#include <linux/hrtimer.h>
#include <linux/module.h>
#include <linux/ktime.h>
#include <linux/kdev_t.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/wait.h>
#include <linux/sched.h>
#define FIRST_MINOR 0
#define MINOR_CNT 1
static struct class *cl;
static struct cdev cdev;
static dev_t dev;
static u8 timer_expired = 0;
static wait_queue_head_t wq_head;
static struct hrtimer timer;
static ssize_t hr_read(struct file *f, char * __user buff, size_t cnt, loff_t *off)
{
wait_event_interruptible(wq_head, timer_expired);
timer_expired = 0;
return 0;
}
static int hr_open(struct inode *i, struct file *f)
{
ktime_t ktime;
ktime.tv64 = 1E6L;
hrtimer_start(&timer, ktime, HRTIMER_MODE_REL);
return 0;
}
static int hr_close(struct inode *i, struct file *f)
{
if (hrtimer_cancel(&timer))
printk(KERN_INFO "timercancelledn");
return 0;
}
static struct file_operations hr_fops = {
.read = hr_read,
.open = hr_open,
.release = hr_close
};
static enum hrtimer_restart timer_callback(struct hrtimer *timer)
{
ktime_t ktime;
u64 overrun;
ktime.tv64 = 1E6L;
//printk("KERN_INFO""Timer Expired");
overrun = hrtimer_forward_now(timer, ktime);
timer_expired = 1;
wake_up_interruptible(&wq_head);
return HRTIMER_RESTART;
}
#if 1
static int init_hrtimer(void)
{
ktime_t ktime;
unsigned long delay_in_ms = 500L;
printk(KERN_ERR "Timer being set upn");
ktime = ktime_set(0,delay_in_ms*1E6L);
hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
timer.function = &timer_callback;
printk(KERN_ERR "Timer starting to firen");
printk(KERN_ERR "in %ldms %ldn", delay_in_ms, jiffies);
if (alloc_chrdev_region(&dev, FIRST_MINOR, MINOR_CNT, "Hr Timer") < 0)
{
return -1;
}
printk("Major Nr: %dn", MAJOR(dev));
cdev_init(&cdev, &hr_fops);
if (cdev_add(&cdev, dev, MINOR_CNT) == -1)
{
unregister_chrdev_region(dev, MINOR_CNT);
return -1;
}
if ((cl = class_create(THIS_MODULE, "hrtimer")) == NULL)
{
cdev_del(&cdev);
unregister_chrdev_region(dev, MINOR_CNT);
return -1;
}
if (IS_ERR(device_create(cl, NULL, dev, NULL, "hrt%d", 0)))
{
class_destroy(cl);
cdev_del(&cdev);
unregister_chrdev_region(dev, MINOR_CNT);
return -1;
}
init_waitqueue_head(&wq_head);
return 0;
}
#endif
static void clean_hrtimer(void)
{
int cancelled = hrtimer_cancel(&timer);
if (cancelled)
printk(KERN_ERR "Timer still runningn");
else
printk(KERN_ERR "Timer cancelledn");
device_destroy(cl, dev);
class_destroy(cl);
cdev_del(&cdev);
unregister_chrdev_region(dev, MINOR_CNT);
}
module_init(init_hrtimer);
module_exit(clean_hrtimer);
MODULE_LICENSE("GPL");
========================
我使用上面的代码作为驱动模块,并使用insmod插入它。我希望它每1毫秒触发一次,它工作得很好,但是当以太网流量太高时,它会像解释的那样给出一个内核Oops。请检查代码是否有问题?
我检查了lsmod,我看到所有5个内核模块(我自己的)都加载在:0x7f000000到0x7f02xxxx之间
mod at 0x7f020xxxx,
timermod at 0x7f01xxx,
mod2 at 0x7f01xxxx,
mod3 at 0x7f00xxxx,
mod4 at 0x7f000000.
在oops地址0x7eb52754没有加载模块。我从/proc/kallsyms文件检查到验证这一点。如何检查0x7eb5xxxx in到源文件的映射?
根据错误消息,导致内核恐慌的代码位于虚拟地址0x7eb52754。从地址(略低于0x8000000)判断,我猜这是内核模块的代码段——可能是您自己的内核模块之一。
要进行根本原因分析,请按照故障发生时加载的顺序加载您的(和所有其他)内核模块,并观察lsmod打印的加载地址(或cat/proc/modules,两者几乎相同)。
使用它们的代码大小和加载地址,计算哪个模块文本段驻留在虚拟地址0x7eb52754。从模块加载地址中减去0x7eb52754。
您将得到的是引起panic的指令在模块二进制中的偏移量。
现在在内核模块二进制文件上使用objdump并查找该偏移量,并检查它属于哪个函数(如果您也有add2line,也可以使用add2line)。这将为您指出导致此panic的指令的函数甚至行号(如果您有调试信息)。
好运。