Postgres全文搜索与近操作符像oracle上下文索引



在Oracle中,我可以在Clob中搜索查询,如"NEAR((a,b,c), 5)"。这是来自oracle的文档:

使用NEAR操作符根据两个或多个查询词的接近程度返回分数。Oracle Text在一个文档中,距离较近的词返回较高的分数,距离较远的词返回较低的分数。

如何在Postgres中做到这一点?我只需要一个索引,可以搜索附近的另一个单词

这是汉明距离函数http://en.wikipedia.org/wiki/Hamming_distance

头文件

/**

    @file HammingDistance.h A C header file to compute the Hamming Distance between two strings
            as PostgreSQl C User Defined Function
*/
#ifndef HAMMINGDISTANCE_H_INCLUDED
#define HAMMINGDISTANCE_H_INCLUDED
DLLEXPORT Datum DistanceCstring(PG_FUNCTION_ARGS);
DLLEXPORT Datum Distance(PG_FUNCTION_ARGS);
#endif // HAMMINGDISTANCE_H_INCLUDED
源文件

/**

    @file HammingDistance.c A C source file to compute the Hamming Distance between two strings
            as PostgreSQl C User Defined Function
*/
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "postgres.h"
#include "utils/geo_decls.h"
#include "utils/builtins.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#define VC_EXTRALEAN
#pragma warning (disable : 4996)
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
#ifdef _WIN32
#define DLLEXPORT _declspsec(dllexport)
#else
#define DLLEXPORT
#endif // _WIN32
static  int32_t DistanceString( unsigned char * a,  int32_t la , unsigned char * b, int32_t lb);
static  int32_t DistanceUChar( unsigned char  a,  unsigned char b);

PG_FUNCTION_INFO_V1(Distance);
/**
    DistanceCstring An SQL function to Compute Hamming Distance between two text types
    @param[in]   A a Text type;
    @param[in]   B a text type
    @return     The hamming Distance between two Text types
**/
DLLEXPORT Datum Distance(PG_FUNCTION_ARGS)
{
    text *      a   = PG_GETARG_TEXT_PP(0);
    text *      b   = PG_GETARG_TEXT_PP(1);
    unsigned char *      ac;
    unsigned char *      bc;
    int32_t     distance = 0;
    ac = text_to_cstring( a );
    bc = text_to_cstring( b );
    distance = DistanceString( ac, strlen(ac), bc, strlen(bc) );
    PG_RETURN_INT32(distance);
}
PG_FUNCTION_INFO_V1(DistanceCstring);
/**
    DistanceCstring An SQL function to Compute Hamming Distance between two strings
    @param[in]   A a Cstring type
    @param[in]   B a Cstring type
    @return     The hamming Distance between two Cstring types
**/
DLLEXPORT Datum DistanceCstring(PG_FUNCTION_ARGS)
{
    unsigned char *      ac = (unsigned char *) PG_GETARG_CSTRING(0);
    unsigned char *      bc = (unsigned char *) PG_GETARG_CSTRING(1);
    int32_t     distance = 0;
    distance = DistanceString( ac, strlen(ac), bc, strlen(bc) );
    PG_RETURN_INT32(distance);
}
/**
    DistanceString  Compute Hamming Distance between two unsigned char strings
    @param[in]      a an unsigned char array
    @param[in]      la  length of a in char
    @param[in]      b an unsigned char array
    @param[in]      lb length of b  in char
    @return         Hamming distance
**/
static  int32_t DistanceString( unsigned char * a,  int32_t la , unsigned char * b, int32_t lb)
{
    unsigned char * smaller;
    unsigned char * larger;
    int i = 0;
    int length = 0;
    int32_t distance = 0;
    int delta = 0;
    if ( lb > la )
    {
        delta = lb - la;
        length = la;
        smaller = a;
        larger = b;
    }
    else
    {
        delta = la - lb;
        length = lb;
        smaller = b;
        larger  = a;
    }
    for( i = 0; i < length; i++ )
    {
        distance += DistanceUChar( * smaller++, * larger++);
    }
    for( i = 0; i < delta ; i++ )
    {
        distance += DistanceUChar( 0, * larger++);
    }
     return distance;
}
/**
    DistanceUChar  Compute Hamming Distance between two unsigned chars
    @param[in]      a unsigned char
    @param[in]      b unsigned char
    @return         Hamming Distance between two unsigned chars
**/
static  int32_t DistanceUChar( unsigned char  a,  unsigned char b)
{
    unsigned char x = a ^ b;
    int32_t     distance = 0;
    if ( (x & 0x1 )== 0x1 )
        distance++;
    if ( (x & 0x2) == 0x2 )
        distance++;
    if ( (x & 0x4) == 0x4 )
        distance++;
    if ( (x & 0x8) == 0x8 )
        distance++;
    if ( x & 0x10 == 0x10 )
        distance++;
    if ( (x & 0x20) == 0x20 )
        distance++;
    if ( (x & 0x40) == 0x40 )
        distance++;
    if ( (x & 0x80) == 0x80 )
        distance++;
    return distance;
}

Makefile

OPTS        := -g -fpic -c -I /opt/PostgreSQL/9.1/include/postgresql/server
INSTALLDIR  := /opt/PostgreSQL/9.1/lib/postgresql
all: HammingDistance.so
HammingDistance.so: HammingDistance.c HammingDistance.h
    gcc HammingDistance.c $(OPTS) -o HammingDistance.o
    gcc -shared -o HammingDistance.so HammingDistance.o
clean:
    rm -rf *.o *.so
register:
    psql -f install.sql -p 5433  -U postgres -d postgres ;
install:
    sudo cp  HammingDistance.so $(INSTALLDIR);

test:
    psql -f Test.sql -p 5433  -U postgres -d postgres ;
安装SQL

-- define the schema
set search_path to public;
-- Remove existing function
drop function if exists Distance( text, text ) cascade;
drop function if exists Distance( cstring , cstring ) cascade;
-- Create the new one
create or replace function Distance( text, text )  returns integer
 as '$libdir/HammingDistance',  'Distance'
 language c strict;
 create or replace function Distance( cstring, cstring )  returns integer
 as '$libdir/HammingDistance',  'DistanceCstring'
 language c strict

最新更新