我正在尝试实现表示文件夹树的链表数据结构。
以下结构:
typedef struct SRC_ERROR SRC_ERROR;
struct SRC_ERROR {
int error_code;
char *error;
};
typedef struct SRC_FILE SRC_FILE;
struct SRC_FILE {
char *entry;
char md5[MD5_DIGEST_LENGTH];
};
typedef struct SRC SRC; //Source file tree with md5 entry char for source verification.
struct SRC {
SRC_ERROR error;
char *name;
char *full_path;
SRC_FILE **entries;
SRC *next_dir;
};
这个想法是每个目录将存储在SRC
SRC_FILE
将用作数组来存储每个文件的文件名和 MD5 哈希。
下面的scan_source()
填充结构。
SRC *scan_source(char *source_path) {
SRC *source = malloc(sizeof(SRC));
source->error.error_code = OK;
int count = 0;
DIR *dir;
struct dirent *entry;
if (!(dir = opendir(source_path))) {
source->error.error_code = ERROR;
source->error.error = "Unable to open source directory.n";
return source;
}
source->entries = (SRC_FILE **)malloc(sizeof(SRC_FILE *) * count);
if (source->entries == NULL) {
source->error.error_code = ERROR;
source->error.error = "Unable to allocate memory to file entry treen";
}
while ((entry = readdir(dir)) != NULL) {
if (entry->d_type == DT_DIR) {
char path[PATH_MAX];
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
continue;
snprintf(path, sizeof(path), "%s/%s", source_path, entry->d_name);
printf("[%s] - %sn", entry->d_name, path);
//add new node
source = add_dir(source, insert_dir_node(entry->d_name, path));
scan_source(path);
} else
if (entry->d_type == DT_REG) {
printf("[FILE] - %sn", entry->d_name);
source->entries[count]->entry = entry->d_name; //SEGFAULT HERE
count++;
source->entries = realloc(source->entries, sizeof(SRC_FILE *) * (count));
}
}
closedir(dir);
return source;
}
我在内存管理方面遇到问题。当目录以某些方式构建时,我遇到间歇性 seg 错误。
我已经标记了调试器标记的行
source->entries[count]->entry = entry->d_name; //SEGFAULT HERE
我以为我为每个结构分配了内存,但也许我没有正确执行此操作,或者数据结构完全存在潜在问题?
例如:
test> tree
.
└── Text
0 directories, 1 file
这会导致 seg 错误。鉴于这不会:
/test> tree
.
├── another sample
│ └── Text
└── sample folder
2 directories, 1 file
使用的附加功能:
SRC *add_dir(SRC *file_tree, SRC *new_dir) {
new_dir->next_dir = file_tree;
return new_dir;
}
SRC *insert_dir_node(char *name, char *full_path) {
SRC *next_dir;
next_dir = (SRC *)emalloc(sizeof(SRC));
next_dir->name = name;
next_dir->full_path = full_path;
next_dir->next_dir = NULL;
return next_dir;
}
我开始查看代码,我看到的第一个问题是您正在存储readdir()
调用返回的指针 - 您应该复制其中包含的数据。
改变
source = add_dir(source, insert_dir_node(entry->d_name, path));
自
source = add_dir(source, insert_dir_node(strdup(entry->d_name), path));
您看到分段错误的原因是您总是在source->entries
数组结束后写入。
您最初创建一个 0 大小的数组:
int count = 0;
/* ... */
source->entries = (SRC_FILE **) malloc(sizeof(SRC_FILE*) * count);
然后设置其第一个(索引为 0)元素:
source->entries[count]->entry = entry->d_name; //SEGFAULT HERE
count++;
source->entries = realloc(source->entries, sizeof(SRC_FILE*)*(count));
然后将数组扩展到 1 个元素,然后写入第二个索引,依此类推。
您可以修复逻辑(始终为count+1
元素分配空间,因为您不仅希望为现有元素留出空间,还希望为下一个元素留出空间),或者,在这种情况下可能更有效,也可以在此处切换到链表结构。
下一个问题是你只分配指向SRC_FILE的指针,而不是SRC_FILE结构 - 你应该将定义更改为:
struct SRC {
SRC_ERROR error;
char *name;
char *full_path;
SRC_FILE *entries;
SRC *next_dir;
};
并初始化为
source->entries = (SRC_FILE *) malloc(sizeof(SRC_FILE) * (count + 1));
然后关键部分
source->entries[count].entry = strdup(entry->d_name);
count++;
source->entries = realloc(source->entries, sizeof(SRC_FILE) * (count + 1));
还有一件事需要注意:insert_dir_node
创建一个新的 SRC 结构,该结构需要具有新初始化的条目成员:
next_dir->count = 0;
next_dir->entries = (SRC_FILE *)malloc(sizeof(SRC_FILE) * (1));
- 而且,由于我们现在有单独的
entries
我们需要为每个变量提供一个count
,因此将此变量也移动到结构中。
修复所有这些为我提供了一个无错误的程序。
主题是链表中的内存管理。事实上,这是C程序中的一个主要问题,因为没有自动内存管理。您必须从内存管理的角度决定并指定如何处理结构中指针指向的每个对象。指针是对象生存期的引用,还是在其他地方处理的生存期,而指针只是一个访问点。
让我们分析一下您的对象定义:
typedef struct SRC_ERROR SRC_ERROR;
struct SRC_ERROR {
int error_code;
char *error;
};
SRC_ERROR
只是打包错误描述的一种方式。如果error
成员始终存储指向字符串文本的指针,则应将其定义为const char *
。相反,如果在某些情况下分配一个字符串,其中包含特定于实际错误的信息,例如"error allocating 1023 objectsn"
,那么您要么需要一个指示符,指定使用后应释放的已分配内存的error
点,要么应始终为错误消息分配内存,并在丢弃SRC_ERROR
对象时始终释放此内存。
typedef struct SRC_FILE SRC_FILE;
struct SRC_FILE {
char *entry;
char md5[MD5_DIGEST_LENGTH];
};
entry
应指向分配的内存,并且在丢弃SRC_FILE
对象时应释放此内存。
typedef struct SRC SRC; //Source file tree with md5 entry char for source verification.
struct SRC {
SRC_ERROR error;
char *name;
char *full_path;
SRC_FILE **entries;
SRC *next_dir;
};
name
和full_path
应指向分配的内存,并在丢弃SRC
对象时释放。next_dir
指向另一个SRC
对象,应一致地分配和释放该对象。entries
指向已分配的数组,该数组的每个元素都指向已分配的对象。您需要一种方法来告诉此数组中的元素数。您可以在数组的末尾维护一个NULL
指针,但为此信息添加count
成员SRC
更简单。将其设置为指向已分配的SRC
对象数组的指针也简单得多。
该函数不构造树,但尝试构造目录列表。每当要递归到目录中时,都应将scan_source
返回的SRC_ERROR
对象中的新列表附加到调用方分配的SRC_ERROR
对象中已构造的列表,并释放递归调用返回的对象。
下面是测试程序中的修改版本:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <dirent.h>
#ifndef PATH_MAX
#define PATH_MAX 1024
#endif
#define MD5_DIGEST_LENGTH 16
#define TRACE(x) //x
enum { OK = 0, ERROR, OUT_OF_MEMORY };
typedef struct ERROR_STATE ERROR_STATE;
struct ERROR_STATE {
int code;
const char *message; // always a string literal
};
typedef struct SRC_FILE SRC_FILE;
struct SRC_FILE {
char *name; // points to allocated memory
char md5[MD5_DIGEST_LENGTH];
};
typedef struct SRC SRC; //Source file tree with md5 entry char for source verification.
struct SRC {
char *name; // points to allocated memory
char *full_path; // points to allocated memory
size_t count; // number of elements in entries
SRC_FILE *entries; // allocated array of count elements
SRC *next_dir; // the next SRC
};
static char *basename_dup(const char *full_path) {
char *p = strrchr(full_path, '/');
return strdup(p ? p + 1 : full_path);
}
/* construct a SRC describing the directory contents.
* if there is an error, either return a partially constructed SRC or return NULL
*/
SRC *scan_source(const char *source_path, ERROR_STATE *error) {
char *full_path = strdup(source_path);
char *name = basename_dup(source_path);
SRC *source = calloc(1, sizeof(SRC)); // all members initialized to 0
if (source == NULL) {
error->code = ERROR;
error->message = "Unable to allocate memory.n";
free(full_path);
free(name);
free(source);
return NULL;
}
error->code = OK;
source->full_path = full_path;
source->name = name;
DIR *dir;
struct dirent *entry;
if (!(dir = opendir(source_path))) {
error->code = ERROR;
error->message = "Unable to open source directory.n";
return source;
}
while ((entry = readdir(dir)) != NULL) {
char path[PATH_MAX];
int len;
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
len = snprintf(path, sizeof(path), "%s/%s", source_path, entry->d_name);
if (len >= (int)sizeof(path)) {
// the path was truncated.
// you can report this or ignore it...
TRACE(printf("[%s] - %s - path too long, ignoredn", entry->d_name, path));
continue;
}
if (entry->d_type == DT_DIR) {
TRACE(printf("[%s] - %sn", entry->d_name, path));
SRC *source1 = scan_source(path, error);
if (error->code != OK) {
// either ignore the error or abort?
}
if (source1) {
// append the new directory (and its list of sub-directories)
SRC **tailp = &source->next_dir;
while (*tailp) tailp = &(*tailp)->next_dir;
*tailp = source1;
}
} else
if (entry->d_type == DT_REG) {
TRACE(printf("[FILE] - %sn", entry->d_name));
// add the file to the entries list
SRC_FILE *entries = realloc(source->entries, sizeof(source->entries[0]) * (source->count + 1));
if (entries == NULL) {
// you should return to the caller with a proper error code
error->code = OUT_OF_MEMORY;
error->message = "cannot reallocate entries array";
break;
}
source->entries = entries;
// source->entries[count] must point to an allocated object
name = strdup(entry->d_name);
if (name == NULL) {
error->code = OUT_OF_MEMORY;
error->message = "cannot allocate entry name";
break;
}
source->entries[source->count].name = name;
memset(source->entries[source->count].md5, 0, sizeof(source->entries[source->count].md5));
source->count++;
//if (md5_sum(full_path, source->entries[source->count].md5)) {
// // error computing the MD5 sum...
//}
}
}
closedir(dir);
return source;
}
void free_source(SRC *source) {
if (source) {
free(source->name);
free(source->full_path);
for (size_t i = 0; i < source->count; i++) {
free(source->entries[i].name);
}
free(source);
}
}
int main(int argc, char *argv[1]) {
ERROR_STATE error = { 0, NULL };
if (argc < 2) {
printf("usage: scansource directory [...]n");
return 1;
}
for (int i = 1; i < argc; i++) {
SRC *source = scan_source(argv[i], &error);
if (error.code) {
printf("Error %d: %sn", error.code, error.message);
}
while (source) {
SRC *cur = source;
source = source->next_dir;
printf("{n"
" name: '%s',n"
" full_path: '%s',n"
" count: %zu,n"
" entries: [n",
cur->name, cur->full_path, cur->count);
for (size_t j = 0; j < cur->count; j++) {
printf(" { md5: '");
for (size_t k = 0; k < MD5_DIGEST_LENGTH; k++)
printf("%02x", cur->entries[j].md5[k]);
printf("', name: '%s' },n", cur->entries[j].name);
}
printf(" ]n},n");
free_source(cur);
}
}
return 0;
}