在c中计算水光过滤器的汉明距离?



我需要比较两个大小相等的布隆过滤器BF1BF2的相似性,使用汉明距离将两组之间的距离表示为 Bloom 距离

B(BF1,BF2(=one(BF1 & BF2(/SIZEOF(BF1(其中one((
函数计算 ANDDed 布隆过滤器中的设置位数。

我采用了使用布隆过滤器进行路径相似性评估第 3 部分(第 4 页。相似性指标(。

我已经实现了以下 c 代码来做到这一点,但它肯定不起作用。

#include <stdlib.h>
#include <stdio.h>
const int BF_LEN= 1024;
char *BF1;//=malloc(BF_LEN*sizeof(char));
char *BF2;//=malloc(BF_LEN*sizeof(char));
char *buf;//=malloc(BF_LEN*sizeof(char));
char *buf_ptr=NULL;
int set_bits_count=0;
float similarity=0.0;
u_int32_t NumberOfSetBits(u_int32_t i)
{
return (((((i - ((i >> 1) & 0x55555555)) & 0x33333333) + (((i - ((i >> 1) & 0x55555555)) >> 2) & 0x33333333) + (((i - ((i >> 1) & 0x55555555)) & 0x33333333) + (((i - ((i >> 1) & 0x55555555)) >> 2) & 0x33333333) >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
}
void main()
{
BF1=malloc(BF_LEN*sizeof(char));
BF2=malloc(BF_LEN*sizeof(char));
buf=malloc(BF_LEN*sizeof(char));
//Edit2:initialize them to 0
for(int j=0;j<BF_LEN;j++)    
{
BF1[j]='';
BF2[j]='';
buf[j]='';
}
BF1="BF1 is filled with some characters";
BF2="BF2 is filled with some characters and more";
for(int j=0; j<BF_LEN; j++)
{
buf[j]=BF1[j]&BF2[j];
}
buf_ptr=buf;
for(int m=0; m<BF_LEN; m++) //This is for the **one()** function
set_bits_count+=NumberOfSetBits(*buf_ptr++);
similarity=1-set_bits_count/(float)BF_LEN;
printf("%.2f",similarity);
//Edit1: Following Comments
free(BF1);
free(BF2);
free(buf);
}

NumberOfSetBits((采用自 Set 位计数器

请尝试使用此代码。汉明距离是具有异或的种群计数:

void hamming()
{
static const unsigned int BF_LEN = 1024;
char BF1[BF_LEN];
char BF2[BF_LEN];
unsigned int set_bits_count = 0;
float similarity = 0.0f;
memset(BF1,'',BF_LEN);
memset(BF2,'',BF_LEN);
strcpy(BF1, "BF1 is filled with some characters");
strcpy(BF2, "BF2 is filled with some characters and more");
for(unsigned int j = 0; j < BF_LEN; j += sizeof(T_Uint32))
{
set_bits_count += hammingDistance(*(T_Uint32*)&BF1[j], *(T_Uint32*)&BF2[j]);
}

similarity = 1.0f - set_bits_count/(float)BF_LEN;
printf("%.2f", similarity);
}
//Reference: P. Wegner, Comm. ACM, Vol. 3, Issue 5, 1960. 
T_Uint getNrOfBitsSet(T_Uint32 bits)
{     
T_Uint bitsSet = 0;
while (bits)
{
bits &= bits - 1; // clears the least significant bit set
++bitsSet;
}
return bitsSet;
}
T_Uint hammingDistance(T_Uint32 a, T_Uint32 b)
{
return getNrOfBitsSet(a^b);
}

最新更新