将节点添加到链表的函数不起作用 | c.



我有以下程序,它从文本文件中读取单词并创建一个链表,每个节点包含:word,count,next。

当链表中已存在单词时,计数将更新,否则,将在链表的末尾创建一个节点。

我所有的函数都有效,除了我在链表末尾添加一个单词的函数。节点链接可能存在错误?

使用我的以下文本文件:第 1 行"我的名字是娜塔莉",第 2 行"我的狗是尼科" 我应该得到以下输出:my(2),name(1),is(2),natalie(1),dog(1),niko(1) 但我得到:我的(2),狗(2),s(1),iko(1),is(1),niko(1)

我的错误在哪里?

//function to add word to linked list
struct WordNode *addWord(char* word, struct WordNode *wordListHead){
//create new node
struct WordNode *current = malloc(sizeof(struct WordNode));
current->word = word;
current->count = 1;
current->next = NULL;
//
while(1){
if((wordListHead != NULL)&&(wordListHead->next ==NULL)){
//connect 
wordListHead->next=current;
break;
}
wordListHead = wordListHead->next;
}
}

在主线中调用:

char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("%sn", filename);
if (fp == NULL){
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
else {
char *delim = " tn,:;'".?!#$-><(){}[]|\/*&^%@!~+=_"; // These are our word delimiters.
char line[1000];
char * token;
int count = 0;//count used so that first word of the doc can be created as head node
//create head pointer
struct WordNode *wordListHead = NULL;
//create current pointer 
struct WordNode *current = NULL;
//iterate through each line in file
while(fgets(line, 1000, fp)){
//seperate each word
//first word of line
token = strtok(line, delim);
printf("%sn", token);
if(count == 0){
//first word of document, create first wordnode (head node)
wordListHead = malloc(sizeof(struct WordNode));
wordListHead->word = token;
wordListHead->count = 1;
wordListHead->next = NULL;
}
else{
//check if first word of line exists in linked list
if((doesWordExist(token, wordListHead)) == 0){
//update count
updateCount(token, wordListHead);
}
else{
//create node
addWord(token, wordListHead);
}
}

//iterate through all the other words of line
while ((token=strtok(NULL, delim)) != NULL){
printf("%sn", token);
//check if name is in linked list
if((doesWordExist(token, wordListHead)) == 0){
//update count
updateCount(token, wordListHead);
}
else{
//create node
addWord(token, wordListHead);
}
}
count++;
}
printWordList(wordListHead);
}
}

此处定义的结构:

//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
struct WordNode{
char *word;
unsigned long count;
struct WordNode *next;
};
void printWordList( struct WordNode *wordListHead);
struct WordNode *addWord(char* word , struct WordNode *wordListead);
#endif

其他供参考的功能:

//function to check if word is in linked list
bool doesWordExist(char* myword, struct WordNode *wordListHead){
while (wordListHead != NULL){
if(strcmp(wordListHead->word, myword) == 0){
return 0;
}
wordListHead= wordListHead-> next;
}
return 1;
}
//function to update the count of word 
void updateCount(char* myword, struct WordNode *wordListHead){
while (wordListHead != NULL){
if(strcmp(wordListHead->word, myword) == 0){
//update count value
//capture current count and add 1
int curcount = wordListHead->count;
int newcount = curcount + 1;
wordListHead->count = newcount;
//printf("%dn", newcount);
}
wordListHead= wordListHead-> next;
}
}
//function to print word list
//takes head node as argument
void printWordList( struct WordNode *wordListHead){
//WordNode *toyptr = wordListHead;
while (wordListHead != NULL){
printf("%sn", wordListHead->word);
printf("%ldn", wordListHead->count);
wordListHead = wordListHead -> next;
}
}

token存储到结构中时,使用的是作为输入缓冲区一部分的指针。

在新的输入行上,从几行收集的令牌将被损坏/丢弃。

您需要分配空间以将令牌存储在堆上的结构中。为此使用strdup

因此,在addWord中,您需要:

current->word = strdup(word);

main中,您需要:

wordListHead->word = strdup(token);
>更新:

这是主要问题。但是,您的代码会执行大量不必要的复制。

addWord不会处理空列表。但是,如果是这样,main就不需要为行上的第一个单词和后续单词使用单独的[复制]代码。

strcmp可以合并到addWord中,并且可以"做所有事情"。(即列表的次扫描)

对于doesWordExist,它返回一个匹配项的bool。如果它返回指向匹配元素的指针,则updateCount只需增加计数 [而不是重新扫描列表]。我已经相应地更新了这些功能,但由于对addWord的更改,不再需要它们

以下是我简化和重构代码的方法:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef int bool;
#ifdef DEBUG
#define dbgprt(_fmt...) 
printf(_fmt)
#else
#define dbgprt(_fmt...)     do { } while (0)
#endif
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
struct WordNode {
char *word;
unsigned long count;
struct WordNode *next;
};
void printWordList(struct WordNode *wordListHead);
#endif
//function to add word to linked list
struct WordNode *
addWord(char *word, struct WordNode **list)
{
struct WordNode *curr;
struct WordNode *prev = NULL;
struct WordNode *newnode = NULL;
for (curr = *list;  curr != NULL;  curr = curr->next) {
if (strcmp(curr->word,word) == 0) {
newnode = curr;
break;
}
prev = curr;
}
// create new node
do {
// word already exists
if (newnode != NULL)
break;
newnode = malloc(sizeof(struct WordNode));
newnode->word = strdup(word);
newnode->count = 0;
newnode->next = NULL;
// attach to tail of list
if (prev != NULL) {
prev->next = newnode;
break;
}
// first node -- set list pointer
*list = newnode;
} while (0);
// increment the count
newnode->count += 1;
return newnode;
}
//function to check if word is in linked list
struct WordNode *
findWord(char *myword, struct WordNode *head)
{
struct WordNode *curr;
for (curr = head;  curr != NULL;  curr = curr->next) {
if (strcmp(curr->word,myword) == 0)
break;
}
return curr;
}
//function to update the count of word
void
updateCount(char *myword, struct WordNode *head)
{
struct WordNode *match;
match = findWord(myword,head);
if (match != NULL)
match->count += 1;
}
//function to print word list
//takes head node as argument
void
printWordList(struct WordNode *head)
{
struct WordNode *curr;
for (curr = head;  curr != NULL;  curr = curr->next) {
printf("%s", curr->word);
printf(" %ldn", curr->count);
}
}
int
main(int argc, char **argv)
{
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("FILE: %sn", filename);
if (fp == NULL) {
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
// These are our word delimiters.
char *delim = " tn,:;'".?!#$-><(){}[]|\/*&^%@!~+=_";
char line[1000];
char *token;
// create head pointer
struct WordNode *wordListHead = NULL;
// iterate through each line in file
while (fgets(line, sizeof(line), fp)) {
// seperate each word
// first word of line
char *bp = line;
while (1) {
token = strtok(bp, delim);
bp = NULL;
if (token == NULL)
break;
dbgprt("TOKEN1: %sn", token);
addWord(token,&wordListHead);
}
}
printWordList(wordListHead);
return 0;
}
<小时 />

更新#2:

请注意,addWordfindWord复制代码。addWord的第一部分[基本上]是复制findWord所做的事情。

但是,addWord不能只使用findWord[这是可取的],因为findWord,如果它找不到匹配返回NULL。在这种情况下,它[没有办法]传回addWord需要附加到的最后一个元素(即列表的"尾巴")。

虽然我们可以添加一个额外的参数来findWord传播这个值,但更好的解决方案是创建一个定义"列表"的不同结构。

在现有代码中,我们使用指向头字节点的"双星"指针作为"列表"。使用单独的结构更干净,并且具有一些额外的优点。

我们可以传递一个简单的指向列表的指针。我们不再需要担心我们是否应该传递双星指针。

虽然我们只使用链表,但如果我们希望将列表转换为双向链表,单独的列表结构会有所帮助。

我们只需传递一个指向列表的指针,列表就可以跟踪列表的头部、列表的尾部以及列表中元素数量的计数。

链表非常适合与mergesort一起排序。列表计数有助于提高效率,因为更容易找到列表的"中点"[合并排序需要知道]。

为了显示双向链表的开头,我在单词 struct 中添加了一个prev元素。目前未使用,但它暗示了双重链接版本。

我重新设计了所有函数,以获取指向列表的指针,而不是指向头节点的指针。

由于列表结构具有tail指针,因此addWord现在可以调用findWord。如果findWord找不到匹配项,addWord只需使用列表中的head/tail指针即可找到正确的插入点。

为了简化一点,我更改了单词节点 [和列表结构] 以使用一些typedef语句。

此外,将主导结构指针作为第一个参数更常见/惯用,因此我颠倒了某些函数的参数顺序。

无论如何,这是[进一步]重构的代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef int bool;
#ifdef DEBUG
#define dbgprt(_fmt...) 
printf(_fmt)
#else
#define dbgprt(_fmt...)     do { } while (0)
#endif
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
// word frequency control
typedef struct WordNode Word_t;
struct WordNode {
const char *word;
unsigned long count;
Word_t *next;
Word_t *prev;
};
// word list control
typedef struct {
Word_t *head;
Word_t *tail;
unsigned long count;
} List_t;
void printWordList(List_t *list);
#endif
// create a list
List_t *
newList(void)
{
List_t *list;
list = calloc(1,sizeof(*list));
return list;
}
// function to check if word is in linked list
Word_t *
findWord(List_t *list,const char *myword)
{
Word_t *curr;
for (curr = list->head;  curr != NULL;  curr = curr->next) {
if (strcmp(curr->word,myword) == 0)
break;
}
return curr;
}
//function to add word to linked list
Word_t *
addWord(List_t *list,const char *word)
{
Word_t *match;
do {
// try to find existing word
match = findWord(list,word);
// word already exists
if (match != NULL)
break;
// create new node
match = malloc(sizeof(*match));
match->word = strdup(word);
match->count = 0;
match->next = NULL;
// attach to head of list
if (list->head == NULL)
list->head = match;
// append to tail of list
else
list->tail->next = match;
// set new tail of list
list->tail = match;
// advance list count
list->count += 1;
} while (0);
// increment the word frequency count
match->count += 1;
return match;
}
//function to update the count of word
void
updateCount(List_t *list,const char *myword)
{
Word_t *match;
match = findWord(list,myword);
if (match != NULL)
match->count += 1;
}
//function to print word list
//takes head node as argument
void
printWordList(List_t *list)
{
Word_t *curr;
for (curr = list->head;  curr != NULL;  curr = curr->next) {
printf("%s", curr->word);
printf(" %ldn", curr->count);
}
}
int
main(int argc, char **argv)
{
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("FILE: %sn", filename);
if (fp == NULL) {
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
// These are our word delimiters.
char *delim = " tn,:;'".?!#$-><(){}[]|\/*&^%@!~+=_";
char line[1000];
char *token;
// create list/head pointer
List_t *list = newList();
// iterate through each line in file
while (fgets(line, sizeof(line), fp) != NULL) {
// seperate each word
// first word of line
char *bp = line;
while (1) {
token = strtok(bp, delim);
bp = NULL;
if (token == NULL)
break;
dbgprt("TOKEN1: %sn", token);
addWord(list,token);
}
}
printWordList(list);
return 0;
}

@Craig Estey 为您提供了一个很好的答案,所以不要更改您的答案选择,但不要只是在评论中留下链接,有几种查看列表操作的重要方法可能会有所帮助,您必须使用内存/错误检查程序来验证您对分配内存的使用, 尤其是在处理列表操作时。

假设一个节点持有一个字符串,其引用计数为字符串出现的额外次数,就像你的情况一样,例如

typedef struct node_t {     /* list node */
char *s;
size_t refcnt;
struct node_t *next;
} node_t;

使用节点地址和指向节点的指针进行迭代消除了特殊情况

使用节点的地址和指向节点的指针在Linus的了解指针中进行了讨论。

例如,如果需要检查if (list->head == NULL)以将第一个节点添加到列表中,如果同时使用节点的地址和指向节点的指针进行迭代,则只需将分配给新节点的指针分配给当前节点的地址即可。无论它是第一个、中间还是最后一个节点,这都有效。它还消除了从列表中删除节点时不必担心前一个节点是什么。在要删除的节点上,您只需将下一个节点的内容分配给当前地址并释放最初存在的节点。这会将您的添加节点函数减少到类似于以下内容的内容:

/** add node in sort order to list.
*  returns pointer to new node on success, NULL otherwise.
*/
node_t *add_node (node_t **head, const char *s)
{
node_t **ppn = head,                            /* address of current node */
*pn = *head;                            /* pointer to current node */
while (pn) {                                    /* iterate to last node */
if (strcmp (pn->s, s) == 0) {               /* compare node string with string */
pn->refcnt += 1;                        /* increment ref count */
return *ppn;
}
ppn = &pn->next;                            /* ppn to address of next */
pn = pn->next;                              /* advance pointer to next */
}

return *ppn = create_node (s);                  /* allocate and return node */
}

(注意:通过延迟新节点和字符串的分配(create_node (s)上面),您可以避免分配,直到您知道需要添加字符串 - 简化内存处理)

如上面的注释中所述,这会将列表的doesWordExist()遍历和addWord()遍历结合起来,以找到结束的遍历到单个遍历中。如果列表中有数十万个节点,则不希望多次遍历列表。

使用strdup()很好,但要知道它是 POSIX 而不是标准 C

strdup()对于复制字符串并将结果分配给新指针很方便,但strdup()由 POSIX 提供,因此并非所有实现都会提供它。此外,strdup()分配内存,因此与分配内存的任何函数一样,在使用返回的指针之前,必须检查结果是否未NULL。您可以通过编写简短的等效项来避免潜在的可移植性问题。例如,在上面显示的create_node()中,它确实:

/** helper function to allocate node, and storage for string.
*  copies string to node and initializes node->next pointer NULL
*  and node->refcnt zero. returns pointer to allocated node on success,
*  NULL otherwise.
*/
node_t *create_node (const char *s)
{
size_t len = strlen (s);                        /* get string length */
node_t *node = malloc (sizeof *node);           /* allocate node */
if (!node) {    /* validate EVERY allocation */
perror ("create_node: malloc node");
return NULL;
}
if (!(node->s = malloc (len + 1))) {            /* allocate for string */
perror ("create_node: malloc node->s");
free (node);        /* on failure, free node before returning NULL */
return NULL;
}
memcpy (node->s, s, len+1);                     /* copy string to node */
node->refcnt = 0;                               /* initialize ref count */
node->next = NULL;                              /* initialze next NULL */
return node;    /* return allocated & initialized node */
}

释放分配的内存

每当您编写创建数据结构的代码时,它们都需要能够在您要删除单个节点时自行清理,或者使用该列表完成。当您创建仅在以下函数中声明和使用的列表等时,这变得势在必行main()因为内存不会在函数返回时释放。使用main()时,程序退出并释放所有分配的内存。但是,如果列表仅在main()下面创建和使用,则每次调用该函数时,如果在返回之前未释放列表内存,则会导致内存泄漏。释放所有内存的功能简短且易于编写,例如

/** delete all nodes in list */
void del_list (node_t *head)
{
node_t *pn = head;                              /* pointer to iterate */
while (pn) {                                    /* iterate over each node */
node_t *victim = pn;                        /* set victim to current */
pn = pn->next;                              /* advance pointer to next */
free (victim->s);                           /* free current string */
free (victim);                              /* free current node */
}
}

(**无需担心refcnt,因为您要删除列表)

一个包含所有这些点的简短示例,以及一个del_node()从列表中删除单个节点(或者如果refcnt不为零,则减少refcnt而不删除节点)可以是:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXC 1024   /* if you need a constant, #define one (or more) */
typedef struct node_t {     /* list node */
char *s;
size_t refcnt;
struct node_t *next;
} node_t;
/** helper function to allocate node, and storage for string.
*  copies string to node and initializes node->next pointer NULL
*  and node->refcnt zero. returns pointer to allocated node on success,
*  NULL otherwise.
*/
node_t *create_node (const char *s)
{
size_t len = strlen (s);                        /* get string length */
node_t *node = malloc (sizeof *node);           /* allocate node */
if (!node) {    /* validate EVERY allocation */
perror ("create_node: malloc node");
return NULL;
}
if (!(node->s = malloc (len + 1))) {            /* allocate for string */
perror ("create_node: malloc node->s");
free (node);        /* on failure, free node before returning NULL */
return NULL;
}
memcpy (node->s, s, len+1);                     /* copy string to node */
node->refcnt = 0;                               /* initialize ref count */
node->next = NULL;                              /* initialze next NULL */
return node;    /* return allocated & initialized node */
}
/** add node in sort order to list.
*  returns pointer to new node on success, NULL otherwise.
*/
node_t *add_node (node_t **head, const char *s)
{
node_t **ppn = head,                            /* address of current node */
*pn = *head;                            /* pointer to current node */
while (pn) {                                    /* iterate to last node */
if (strcmp (pn->s, s) == 0) {               /* compare node string with string */
pn->refcnt += 1;                        /* increment ref count */
return *ppn;
}
ppn = &pn->next;                            /* ppn to address of next */
pn = pn->next;                              /* advance pointer to next */
}

return *ppn = create_node (s);                  /* allocate and return node */
}
/** print all nodes in list */
void prn_list (node_t *head)
{
if (!head) {                                    /* check if list is empty */
puts ("list-empty");
return;
}
for (node_t *pn = head; pn; pn = pn->next)      /* iterate over each node */
printf ("%-24s %4zun", pn->s, pn->refcnt); /* print string an refcount */
}
/** delete node with string s from list (for loop) */
void del_node (node_t **head, const char *s)
{
node_t **ppn = head;                            /* address of node */
node_t *pn = *head;                             /* pointer to node */

for (; pn; ppn = &pn->next, pn = pn->next) {
if (strcmp (pn->s, s) == 0) {               /* does string match */
if (pn->refcnt) {                       /* ref count > 0 */
pn->refcnt -= 1;                    /* decrement ref count */
return;                             /* done */
}
*ppn = pn->next;                        /* set content at address to next */
free (pn);                              /* free original pointer */
break;
}
}
}
/** delete all nodes in list */
void del_list (node_t *head)
{
node_t *pn = head;                              /* pointer to iterate */
while (pn) {                                    /* iterate over each node */
node_t *victim = pn;                        /* set victim to current */
pn = pn->next;                              /* advance pointer to next */
free (victim->s);                           /* free current string */
free (victim);                              /* free current node */
}
}
int main (int argc, char **argv) {
char buf[MAXC];                                 /* read buffer */
const char *delim = " tn.,;?!()";             /* strtok delimiters */
node_t *list = NULL;    /* pointer to list (must initialize NULL */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) {  /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, MAXC, fp))                   /* read all lines in file */
/* tokenize line based on delim */
for (char *p = strtok (buf, delim); p; p = strtok (NULL, delim)) {
if (ispunct(*p))                        /* if word is punctionation, skip */
continue;
add_node (&list, p);                    /* add node or increment refcnt */
}
if (fp != stdin)                                /* close file if not stdin */
fclose (fp);
puts ("string                   refcntn"       /* heading */
"-------------------------------");
prn_list (list);                                /* print contents of list */
del_list (list);                                /* free all list memory */
return 0;
}

看看delim中包含的字符以用于strtok(),以及使用ispunct()跳过以标点符号开头的标记(允许使用连字符的单词,但跳过单独用作句子延续的连字符等......

示例输入文件

$ cat dat/tr_dec_3_1907.txt
No man is above the law and no man is below it;
nor do we ask any man's permission when we require him to obey it.
Obedience to the law is demanded as a right; not asked as a favor.
(Theodore Roosevelt - December 3, 1907)

示例使用/输出

$ ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
string                   refcnt
-------------------------------
No                          0
man                         1
is                          2
above                       0
the                         1
law                         1
and                         0
no                          0
below                       0
it                          1
nor                         0
do                          0
we                          1
ask                         0
any                         0
man's                       0
permission                  0
when                        0
require                     0
him                         0
to                          1
obey                        0
Obedience                   0
demanded                    0
as                          1
a                           1
right                       0
not                         0
asked                       0
favor                       0
Theodore                    0
Roosevelt                   0
December                    0
3                           0
1907                        0

内存使用/错误检查

在您编写的任何动态分配内存的代码中,对于分配的任何内存块,您有 2个责任:(1)始终保留指向内存块起始地址的指针,以便 (2) 当不再需要内存块时可以释放它。

必须使用内存错误检查程序来确保不会尝试访问内存或超出/超出分配块边界的写入,不会尝试读取或基于未初始化值的条件跳转,最后确认释放了已分配的所有内存。

对于Linux来说,valgrind是正常的选择。每个平台都有类似的内存检查器。它们都易于使用,只需通过它运行您的程序即可。

$ valgrind ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
==8528== Memcheck, a memory error detector
==8528== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==8528== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==8528== Command: ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
==8528==
string                   refcnt
-------------------------------
No                          0
man                         1
is                          2
above                       0
the                         1
law                         1
and                         0
no                          0
below                       0
it                          1
nor                         0
do                          0
we                          1
ask                         0
any                         0
man's                       0
permission                  0
when                        0
require                     0
him                         0
to                          1
obey                        0
Obedience                   0
demanded                    0
as                          1
a                           1
right                       0
not                         0
asked                       0
favor                       0
Theodore                    0
Roosevelt                   0
December                    0
3                           0
1907                        0
==8528==
==8528== HEAP SUMMARY:
==8528==     in use at exit: 0 bytes in 0 blocks
==8528==   total heap usage: 73 allocs, 73 frees, 6,693 bytes allocated
==8528==
==8528== All heap blocks were freed -- no leaks are possible
==8528==
==8528== For counts of detected and suppressed errors, rerun with: -v

始终确认已释放已分配的所有内存,并且没有内存错误。

您已经有了解决当前问题的方法,但是在项目中继续考虑上面的一些提示,以消除列表的多次遍历,以及围绕strdup()的可移植性(和验证)问题。祝你的编码好运。

最新更新