c-用链表填充哈希表



我正在为单词词典创建一个具有链表冲突控制的哈希表(数组(。单词词典首先被加载到名为"的链表中;列表";使用下面的数据结构。然而,如果数组中每个哈希代码有多个条目,我在创建冲突控制时会遇到问题:链表是为多个条目无限创建的。。

这就是我用于条目的结构。

typedef struct node
{
char word[LENGTH + 1];
int hash;
struct node *next;
}
node;

下面是填充哈希表的部分:

while (list->next != NULL)
{
if (table[list->hash].hash != list->hash) // No entry in the array yet
{
node *tmp = list->next;
table[list->hash] = *list;
table[list->hash].next = NULL;
list = tmp;
}
else                                     // If array is already filled
{
node *tmp = list->next;
list->next = &table[list->hash];
table[list->hash] = *list;
list = tmp;
}
}

希望你能帮忙,谢谢!

请参阅此处的完整代码:

#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#define LENGTH 45
const unsigned int N = 4000;
typedef struct node
{
char word[LENGTH + 1];
int hash;
struct node *next;
}
node;
bool load(const char *dictionary);
unsigned int hash(const char *word);
node table[N];
int main(void)
{
char *dict = "dictionaries/medium";
bool loaded = load(dict);
if (!loaded)
{
printf("could not load dictionaryn");
return 1;
}
printf("workedn");
return 0;
}
bool load(const char *dictionary)
{
FILE *input = fopen(dictionary, "r");
if (input == NULL)
{
return false;
}
node *list = NULL;
char word[LENGTH + 1];
int wordcount = 0;
while(fscanf(input, "%s", word) != EOF)
{
if (list == NULL)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n->word, word);
n->hash = hash(word);
n->next = NULL;
list = n;
wordcount++;
}
else
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n->word, word);
n->hash = hash(word);
n->next = list;
list = n;
wordcount++;
}
}
while (list->next != NULL)
{
if (table[list->hash].hash != list->hash)
{
node *tmp = list->next;
table[list->hash] = *list;
table[list->hash].next = NULL;
list = tmp;
}
else
{
node *tmp = list->next;
list->next = &table[list->hash];
table[list->hash] = *list;
list = tmp;
}
}
// test function to print
printf("%sn%sn%sn%sn", table[196].word, table[196].next->word, table[196].next->next->word, table[196].next->next->next->word);
return true;
fclose(input);
}
unsigned int hash(const char *word)
{
int hash = 0;
for (int i = 0; i < strlen(word); i++)
{
if (word[i] != ''')
{
hash += ((toupper(word[i]) - 64));
}
if (i % 2 == 0)
{
hash = (hash * 3);
}
}
hash = hash % N;
return hash;
}

字典/介质文件当前包含以下项目:

maxim
maxim
Nina
maxim
Lukas
Nina

这也是为什么我使用数组元素196来打印出"Nina"的哈希代码。

编辑:感谢到目前为止的回复,我已经删除了冗余,并在创建列表时尝试填充数组。然而,我仍然有一个问题,我认为这是由于阵列点已经填充时的自引用。真的不知道该怎么解决。新代码在这里:

#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#define LENGTH 45
const unsigned int N = 4000;
typedef struct node
{
char word[LENGTH + 1];
int hash;
struct node *next;
}
node;
bool load(const char *dictionary);
unsigned int hash(const char *word);
node table[N];
int main(void)
{
char *dict = "dictionaries/medium";
bool loaded = load(dict);
if (!loaded)
{
printf("could not load dictionaryn");
return 1;
}
printf("workedn");
return 0;
}
bool load(const char *dictionary)
{
FILE *input = fopen(dictionary, "r");
if (input == NULL)
{
return false;
}
node *list = NULL;
char word[LENGTH + 1];
int wordcount = 0;
while(fscanf(input, "%s", word) != EOF)
{
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n->word, word);
n->hash = hash(word);
if (table[n->hash].hash != n->hash)
{
n->next = list;
table[n->hash] = *n;
}
else
{
n->next = &table[n->hash];
table[n->hash] = *n;
}
free(n);
wordcount++;
}
// test function to print
printf("%sn%sn%sn", table[196].word, table[196].next->word, table[196].next->next->word);
return true;
fclose(input);
}
unsigned int hash(const char *word)
{
int hash = 0;
for (int i = 0; i < strlen(word); i++)
{
if (word[i] != ''')
{
hash += ((toupper(word[i]) - 64));
}
if (i % 2 == 0)
{
hash = (hash * 3);
}
}
hash = hash % N;
return hash;
}

这部分代码是错误的:

while (list->next != NULL)
{
if (table[list->hash].hash != list->hash)
{
node *tmp = list->next;
table[list->hash] = *list;
table[list->hash].next = NULL;
list = tmp;
}
else
{
node *tmp = list->next;
list->next = &table[list->hash];
table[list->hash] = *list;
list = tmp;
}
}

特别地,else块是错误的,并且循环忽略列表上的最后一个元素。循环应该是这样的:

while (list != NULL)
{
node *next = list->next;
node copy = table[list->hash];
list->next = copy.next;
table[list->hash] = *list;
*list = copy;
list = next;
}

原来的";"空";表中的节点元素最终将被复制到哈希表中每个列表的最后一个元素,这可能不是很有用。(本答案后面提出的重新设计将哈希表变成指针数组,并且不存储任何"空"节点内容。(

另一个问题是,输入文件从未关闭,因为这些行的顺序错误:

return true;
fclose(input);

fclose呼叫永远无法接通。

指定数组table的长度的方式不是由C:定义的

const unsigned int N = 4000;
/*...*/
node table[N];

长度必须是一个整数常量表达式。这可以通过用宏定义#define N 4000替换const unsigned int N = 4000;来解决。


通过将node table[N];更改为node *table[N];,即将其转换为指针数组,可以简化代码设计。此外,不需要将散列存储在节点中。散列只是散列表的一个索引。哈希表中未使用的条目将是由空指针表示的空列表。(node *table[N]将被隐式初始化为包含空指针。(

#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#define LENGTH 45
#define N 4000
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
bool load(const char *dictionary);
unsigned int hash(const char *word);
node *table[N];
int main(void)
{
char *dict = "dictionaries/medium";
bool loaded = load(dict);
if (!loaded)
{
printf("could not load dictionaryn");
return 1;
}
printf("workedn");
return 0;
}
bool load(const char *dictionary)
{
FILE *input = fopen(dictionary, "r");
if (input == NULL)
{
return false;
}
char word[LENGTH + 1];
int wordcount = 0;
while(fscanf(input, "%s", word) != EOF)
{
int wordhash = hash(word);
node *n = malloc(sizeof(node));
if (n == NULL)
{
return false;
}
strcpy(n->word, word);
n->next = table[wordhash];
table[wordhash] = n;
wordcount++;
}
fclose(input);
return true;
}
unsigned int hash(const char *word)
{
int hash = 0;
for (int i = 0; i < strlen(word); i++)
{
if (word[i] != ''')
{
hash += ((toupper(word[i]) - 64));
}
if (i % 2 == 0)
{
hash = (hash * 3);
}
}
hash = hash % N;
return hash;
}

相关内容

  • 没有找到相关文章

最新更新