c-删除结构节点导致另一个问题



我正试图在结构中添加两个文件的单词(可能还有更多(。这是有效的。但是,我可以选择从结构中删除一些单词(位于stop.txt中(。当它被激活并且两个文件在中时,这会导致不正确的输出

例如,在test.txt中,我有一些随机字符串:

kiio
luio
kiio
ohaio
lol

test1.txt中:

vola
kiio
kiio
haio
lol

stop.txt

luio
kiio

remove_word激活时的输出:

lol     test.txt        [1] {5}                                                                                                
lol     %~      [1] {5}                                                                                                        
lol     %~      [1] {5}                                                                                                        
luio    test.txt        [1] {2}                                                                                                
ohaio   test.txt        [1] {4}                                                                                                
vola    test1.txt       [1] {1}  

如果不是:

kiio    test.txt        [2] {1,3}     I need to have two nodes with the same word but different `fileno`                                                                                         
kiio    test1.txt       [2] {2,3}                                                                                              
lol     test.txt        [1] {5}                                                                                                
lol     test1.txt       [1] {5}                                                                                                
luio    test.txt        [1] {2}                                                                                                
ohaio   test.txt        [1] {4}                                                                                                
vola    test1.txt       [1] {1}                                                                                                
haio    test1.txt       [1] {4}    

我认为问题出在函数remove_word中,但我不确定,因为它只适用于一个文件(删除单词(。

以下是结构定义:

typedef struct _word {
char *s;                /* the word */
int count;              /* number of times word occurs */
int *line_numbers;      // Array of line numbers
int num_line_numbers;   // Size of the array of line numbers
char *fileno;
} word;
// Creating a struct to hold the data. I find it's easier
typedef struct {
word *words;      // The array of word structs
int num_words;    // The size of the array
} word_list;

remove_word功能:

void remove_word(word_list *words, const char *word_to_delete) {
for (int i = 0; i < words->num_words; i++) {
if (0 == strcmp(words->words[i].s, word_to_delete)) {
// TODO: handle special case where there is only 1 word in list
// Calc number of words after found word
int number_of_words_to_right = words->num_words - i - 1;
// Free mem
free(words->words[i].s);
free(words->words[i].line_numbers);
free(words->words[i].fileno);
// Copy remaining words
memcpy(&words->words[i], &words->words[i + 1], sizeof(word) * number_of_words_to_right);
// Resize the array (technically not required)
word *tmp = realloc(words->words, sizeof(word) * --words->num_words);
if (NULL == tmp) exit(0);
words->words = tmp;
}
}
return;
}

Main:

int main() {
int i, n, m;
int option = 0;
n = 0;
FILE *file = fopen("test.txt", "r"); 
word_list *words = malloc(sizeof(word_list));
if (NULL == words)
exit(0);
memset(words, 0, sizeof(word_list));
char s[1000];
int line_number = 1;
while (fgets(s, sizeof(s), file)) {
char *word = strtok(s, " ");
while (word != NULL) {
size_t len = strlen(word);
if (len > 0 && word[len - 1] == 'n')
word[--len] = 0;
insert_word(words, word, line_number, "test.txt");
word = strtok(NULL, " ");
}
line_number += 1;
}
fclose(file);
FILE *file1 = fopen("test1.txt", "r"); 
line_number = 1;
while (fgets(s, sizeof(s), file)) {
char *word = strtok(s, " ");
while (word != NULL) {
size_t len = strlen(word);
if (len > 0 && word[len - 1] == 'n')
word[--len] = 0;
insert_word(words, word, line_number, "test1.txt");
word = strtok(NULL, " ");
}
line_number += 1;
}
fclose(file1);
if (option == 0) {
FILE *stopfile = fopen("stop.txt", "r"); /* should check the result */
char fline[256];
while (fgets(fline, sizeof(fline), stopfile)) {
remove_word(words, fline);
}
fclose(stopfile);
}
printlist(words);
for (int i = 0; i < words->num_words; i++) {
free(words->words[i].s);
free(words->words[i].line_numbers);
free(words->words[i].fileno);
}
free(words->words);
free(words);
}

我们无法测试您的程序,因为您没有提供insert_wordprintlist的源代码。

然而,张贴的代码中存在多个问题:

  • remove_word函数中,当找到并删除单词时,应递减i,以便循环在下一次迭代中测试相同的索引,以防两个文件中都存在相同的单词。

  • 在第二个读取循环中,您从file读取,但此FILE*已关闭,并且您使用不同的FILE指针file1打开第二个文件test1.txt。这具有未定义的行为。你可能很幸运,file1可能会碰巧指向与file相同的内存位置。只需对所有文件使用相同的变量file,或者更好:使用单独的函数从作为参数的文件中读取单词。

  • 您不会从传递给remove_word的单词中去掉尾随换行符,因此不会从字典中删除任何内容。

这是您程序的修改版本:

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct _word {
char *s;                /* the word */
int count;              /* number of times word occurs */
int *line_numbers;      // Array of line numbers
int num_line_numbers;   // Size of the array of line numbers
char *fileno;
} word;
// Creating a struct to hold the data. I find it's easier
typedef struct {
word *words;      // The array of word structs
int num_words;    // The size of the array
} word_list;
char *strlower(char *s) {
for (size_t i = 0; s[i]; i++) {
s[i] = (char)tolower((unsigned char)s[i]);
}
return s;
}
void printlist(const word_list *words) {
for (int i = 0, j; i < words->num_words;) {
const word *wp = &words->words[i];
/* check for identical words from different files */
for (j = i + 1; j < words->num_words; j++) {
if (strcmp(wp->s, words->words[j].s) != 0)
break;
}
printf("%st[%d]", wp->s, j - i);
const char *prefix = "";
for (; i < j; i++, wp++) {
printf("%st%st[%d]t{%d", prefix, wp->fileno, wp->count, wp->line_numbers[0]);
prefix = "t";
for (int k = 1; k < wp->num_line_numbers; k++) {
printf(",%d", wp->line_numbers[k]);
}
printf("}n");
}
}
}
/* insert the word in the dictionary.
* words are inserted in lexicographical order,
* identical words are inserted in order of calls to insert_word
*/
int insert_word(word_list *words, const char *s, int line_number, const char *filename) {
int i, j;
word *wp;
/* locate the word in the dictionary */
for (i = 0, j = words->num_words; i < j;) {
int m = i + (j - i) / 2;
if (strcmp(words->words[m].s, s) < 0)
i = m + 1;
else
j = m;
}
wp = &words->words[i];
/* check identical words already in the dictionary */
for (; i < words->num_words && !strcmp(wp->s, s); i++, wp++) {
if (!strcmp(wp->fileno, filename)) {
/* found word from the same file */
wp->count++;
/* check if word appears for a new line number */
for (j = 0; j < wp->num_line_numbers; j++) {
if (wp->line_numbers[j] == line_number)
break;
}
if (j == wp->num_line_numbers) {
/* add a new line */
int *lp = realloc(wp->line_numbers, (j + 1) * sizeof(*wp->line_numbers));
if (lp == NULL)
return 1;
wp->line_numbers = lp;
wp->line_numbers[wp->num_line_numbers++] = line_number;
}
return 0;
}
}
/* insert new word into the dictionary at offset i */
/* allocate all elements for easier memory management */
char *new_s = strdup(s);
char *new_filename = strdup(filename);
int *new_line_numbers = malloc(1 * sizeof(*wp->line_numbers));
if (!new_s || !new_filename || !new_line_numbers) {
free(new_s);
free(new_filename);
free(new_line_numbers);
return 1;
}
word *new_words = realloc(words->words, (words->num_words + 1) * sizeof(*words->words));
if (new_words == NULL) {
free(new_s);
free(new_filename);
free(new_line_numbers);
return 1;
}
words->words = new_words;
/* shift the rest of the dictionary to the right */
wp = &words->words[i];
memmove(wp + 1, wp, (words->num_words - i) * sizeof(*wp));
wp->s = new_s;
wp->count = 1;
wp->line_numbers = new_line_numbers;
wp->line_numbers[0] = line_number;
wp->num_line_numbers = 1;
wp->fileno = new_filename;
words->num_words++;
return 0;
}
int remove_word(word_list *words, const char *word_to_delete) {
int found = 0;
for (int i = 0; i < words->num_words; i++) {
if (!strcmp(words->words[i].s, word_to_delete)) {
// Calc number of words after found word
int number_of_words_to_right = words->num_words - i - 1;
// Free mem
free(words->words[i].s);
free(words->words[i].line_numbers);
free(words->words[i].fileno);
if (--words->num_words == 0) {
free(words->words);
words->words = NULL;
} else {
// Copy remaining words if any
memcpy(&words->words[i], &words->words[i + 1],
sizeof(word) * number_of_words_to_right);
// Resize the array (technically not required)
word *tmp = realloc(words->words, sizeof(word) * words->num_words);
if (tmp != NULL)
words->words = tmp;
}
found++;
i--; // restart from the same index in the loop
}
}
return found;
}
/* read all words from filename into word_list
* return 0 if no error.
*/
int read_file(word_list *words, const char *filename) {
char s[1000];
int line_number = 1;
FILE *file = fopen(filename, "r");
if (file == NULL) {
fprintf(stderr, "cannot open %sn", filename);
return 1;
}
while (fgets(s, sizeof(s), file)) {
char *word = strtok(s, " n");
while (word != NULL) {
if (insert_word(words, strlower(word), line_number, filename)) {
fprintf(stderr, "error inserting from %s at line %dn", filename, line_number);
fclose(file);
return 1;
}
word = strtok(NULL, " n");
}
line_number += 1;
}
fclose(file);
return 0;
}
int main() {
int option = 1;
word_list *words = calloc(sizeof(word_list), 1);
if (words == NULL) {
fprintf(stderr, "cannot allocate memoryn");
return 1;
}
read_file(words, "test.txt");
read_file(words, "test1.txt");
if (option != 0) {
char s[1000];
FILE *file = fopen("stop.txt", "r"); /* should check the result */
if (file == NULL) {
fprintf(stderr, "cannot open %sn", "stop.txt");
} else {
while (fgets(s, sizeof(s), file)) {
char *word = strtok(s, " n");
while (word != NULL) {
remove_word(words, strlower(word));
word = strtok(NULL, " n");
}
}
fclose(file);
}
}
printlist(words);
for (int i = 0; i < words->num_words; i++) {
free(words->words[i].s);
free(words->words[i].line_numbers);
free(words->words[i].fileno);
}
free(words->words);
free(words);
return 0;
}

输出:

haio[1]test1.text[1]{4}lol[2]test.txt[1]{5}test1.text[1]{5}ohaio[1]test.txt[1]{4}vola[1]test1.text[1]{1}

我发现了一些东西,不确定这是否足以修复代码:当你从数组中删除一个世界时,你必须减少num_words,尝试添加:

words->num_words--;

函数remove_word中if末尾的words->words = tmp;以下

相关内容

最新更新