c-Malloc分割错误

这是发生分段错误的代码段(没有调用错误)：

job = malloc(sizeof(task_t));
if(job == NULL)
perror("malloc");

更准确地说，gdb说segfault发生在__int_malloc调用内部，这是malloc进行的子程序调用。

由于malloc函数是与其他线程并行调用的，最初我认为这可能是个问题。我使用的是glibc的2.19版本。

数据结构：

typedef struct rv_thread thread_wrapper_t;
typedef struct future
{
pthread_cond_t wait;
pthread_mutex_t mutex;
long completed;
} future_t;
typedef struct task
{
future_t * f;
void * data;
void *
(*fun)(thread_wrapper_t *, void *);
} task_t;
typedef struct
{
queue_t * queue;
} pool_worker_t;
typedef struct
{
task_t * t;
} sfuture_t;
struct rv_thread
{
pool_worker_t * pool;
};

现在是未来的实施：

future_t *
create_future()
{
future_t * new_f = malloc(sizeof(future_t));
if(new_f == NULL)
perror("malloc");
new_f->completed = 0;
pthread_mutex_init(&(new_f->mutex), NULL);
pthread_cond_init(&(new_f->wait), NULL);
return new_f;
}
int
wait_future(future_t * f)
{
pthread_mutex_lock(&(f->mutex));
while (!f->completed)
{
pthread_cond_wait(&(f->wait),&(f->mutex));
}
pthread_mutex_unlock(&(f->mutex));
return 0;
}
void
complete(future_t * f)
{
pthread_mutex_lock(&(f->mutex));
f->completed = 1;
pthread_mutex_unlock(&(f->mutex));
pthread_cond_broadcast(&(f->wait));
}

线程池本身：

pool_worker_t *
create_work_pool(int threads)
{
pool_worker_t * new_p = malloc(sizeof(pool_worker_t));
if(new_p == NULL)
perror("malloc");
threads = 1;
new_p->queue = create_queue();
int i;
for (i = 0; i < threads; i++){
thread_wrapper_t * w = malloc(sizeof(thread_wrapper_t));
if(w == NULL)
perror("malloc");
w->pool = new_p;
pthread_t n;
pthread_create(&n, NULL, work, w);
}
return new_p;
}
task_t *
try_get_new_task(thread_wrapper_t * thr)
{
task_t * t = NULL;
try_dequeue(thr->pool->queue, t);
return t;
}
void
submit_job(pool_worker_t * p, task_t * t)
{
enqueue(p->queue, t);
}
void *
work(void * data)
{
thread_wrapper_t * thr = (thread_wrapper_t *) data;
while (1){
task_t * t = NULL;
while ((t = (task_t *) try_get_new_task(thr)) == NULL);
future_t * f = t->f;
(*(t->fun))(thr,t->data);
complete(f);
}
pthread_exit(NULL);
}

最后是任务

pool_worker_t *
create_tpool()
{
return (create_work_pool(8));
}
sfuture_t *
async(pool_worker_t * p, thread_wrapper_t * thr, void *
(*fun)(thread_wrapper_t *, void *), void * data)
{
task_t * job = NULL;
job = malloc(sizeof(task_t));
if(job == NULL)
perror("malloc");
job->data = data;
job->fun = fun;
job->f = create_future();
submit_job(p, job);
sfuture_t * new_t = malloc(sizeof(sfuture_t));
if(new_t == NULL)
perror("malloc");
new_t->t = job;
return (new_t);
}
void
mywait(thread_wrapper_t * thr, sfuture_t * sf)
{
if (sf == NULL)
return;
if (thr != NULL)
{
while (!sf->t->f->completed)
{
task_t * t_n = try_get_new_task(thr);
if (t_n != NULL)
{
future_t * f = t_n->f;
(*(t_n->fun))(thr,t_n->data);
complete(f);
}
}
return;
}
wait_future(sf->t->f);
return ;
}

该队列是lfds无锁队列。

#define enqueue(q,t) {                                 
if(!lfds611_queue_enqueue(q->lq, t))             
{                                               
lfds611_queue_guaranteed_enqueue(q->lq, t);  
}                                               
}
#define try_dequeue(q,t) {                            
lfds611_queue_dequeue(q->lq, &t);               
}

只要对async的调用次数非常多，就会出现问题。

Valgrind输出：

Process terminating with default action of signal 11 (SIGSEGV)
==12022==  Bad permissions for mapped region at address 0x5AF9FF8
==12022==    at 0x4C28737: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)

我已经发现了问题所在：堆栈溢出。

首先，让我解释一下malloc内部发生堆栈溢出的原因(这可能就是您阅读本文的原因)。当我的程序运行时，每当它开始(递归地)执行另一个任务时，堆栈大小就会不断增加(因为我对它的编程方式)。但对于每一次这样的时间，我都必须使用malloc分配一个新任务。然而，malloc会进行其他子程序调用，这会使堆栈的大小增加，甚至超过执行另一个任务的简单调用。所以，发生的事情是，即使没有malloc，我也会得到堆栈溢出。然而，因为我有malloc，所以堆栈溢出的那一刻是在malloc中，在它通过另一个递归调用溢出之前。下图显示了发生的事情：

初始堆栈状态：

-------------------------
| recursive call n - 3  |
-------------------------
| recursive call n - 2  |
-------------------------
| recursive call n - 1  |
-------------------------
|        garbage        |
-------------------------
|        garbage        | <- If the stack passes this point, the stack overflows.
-------------------------

malloc调用期间的堆栈：

-------------------------
| recursive call n - 3  |
-------------------------
| recursive call n - 2  |
-------------------------
| recursive call n - 1  |
-------------------------
|        malloc         |
-------------------------
|     __int_malloc      | <- If the stack passes this point, the stack overflows.
-------------------------

然后堆栈再次缩小，我的代码进入了一个新的递归调用：

-------------------------
| recursive call n - 3  |
-------------------------
| recursive call n - 2  |
-------------------------
| recursive call n - 1  |
-------------------------
| recursive call n      |
-------------------------
|        garbage        | <- If the stack passes this point, the stack overflows.
-------------------------

然后，它在这个新的递归调用中再次调用malloc。然而，这次它溢出了：

-------------------------
| recursive call n - 3  |
-------------------------
| recursive call n - 2  |
-------------------------
| recursive call n - 1  |
-------------------------
| recursive call n      |
-------------------------
|        malloc         | <- If the stack passes this point, the stack overflows.
-------------------------
|     __int_malloc      | <- This is when the stack overflow occurs.
-------------------------

[答案的其余部分更集中在我的代码中出现这个问题的原因上。]

通常，当递归计算某个数n的斐波那契时，堆栈大小会随着该数线性增长。但是，在本例中，我将创建任务，使用队列存储它们，并将(fib)任务排成队列以供执行。如果你在纸上画这个，你会看到任务的数量随着n呈指数增长，而不是线性增长(还要注意，如果我在创建任务时使用堆栈来存储任务，那么分配的任务数量以及堆栈大小只会随着n线性增长。因此，发生的情况是，堆栈随n呈指数级增长，导致堆栈溢出。。。现在是malloc调用内部发生溢出的部分原因。所以基本上，正如我上面解释的，堆栈溢出发生在malloc调用内部，因为它是堆栈最大的地方。所发生的事情是，堆栈几乎爆炸了，由于malloc调用了其中的函数，堆栈的增长不仅仅是调用mywait和fib。

谢谢大家！如果不是你的帮助，我想不通！

在malloc中触发SIGSEGV(分段错误)通常是由堆损坏引起的。堆损坏不会导致分段错误，所以只有当malloc尝试访问时才会看到。问题是，造成堆损坏的代码可能位于任何位置，甚至远离malloc的调用位置。通常是malloc内的下一个块指针因堆损坏而更改为无效地址，因此当您调用malloc时，一个无效指针会被取消引用，并出现分段错误。

我认为您可以尝试将部分代码与程序的其他部分隔离，以降低bug的可见性。

此外，我看到你永远不会释放这里的内存，可能会有内存泄漏。

为了检查内存泄漏，您可以运行顶部命令top -b -n 1并检查：

RPRVT - resident private address space size
RSHRD - resident shared address space size
RSIZE - resident memory size
VPRVT - private address space size
VSIZE - total memory size

相关内容

最新更新

热门标签：