I needed a small easy to use Hamming distance function in PostgreSQL which worked on byte arrays. It has Levenshtein distance, but that’s not quite what I wanted. Though if I expanded my byte array into a string of bits, they would be equivalent. The full file is available on bitbucket [link] and is reproduced below.


/* hamming.c
 * A distance function for PostgreSQL
 * Written by Joseph Catrambone
 * License: 
 * Permissive Coffeware License - A Beerware Derivative.  
 * I say this works, but I'm not responsible if things don't go as planned.
 * If you make anything with this, it would be nice to get credit, but it's not required.
 * If we meet in a coffee shop, it would be nice if you bought me a coffee or a muffin, but that's not required.
 * You are free to modify this as you see fit.  Derivative works do not need to be coffeeware.
 
 * To build:
 * gcc -I`pg_config --includedir` -fpic -c hamming.c
 * gcc -shared -o hamming.so hamming.o
 * sudo cp hamming.so /usr/lib/postgresql/9.1/lib/
 * cd /usr/lib/postgresql/9.1/lib/
 * sudo chmod +r hamming.so

 * To add to Postgres:
 * CREATE FUNCTION HAMMING_DISTANCE(bytea, bytea) RETURNS integer
 * 	AS 'hamming.so', 'HAMMING_DISTANCE'
 * 	LANGUAGE C STRICT;
 */

#include <postgres.h>
#include <fmgr.h>
// On Ubuntu 12.04, I have problems with finding fmgr.  You can include this instead of the line above.
//#include "/usr/include/postgresql/9.1/server/fmgr.h"

#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif

PG_FUNCTION_INFO_V1(HAMMING_DISTANCE);

Datum HAMMING_DISTANCE(PG_FUNCTION_ARGS)
{
	bytea* data1 = PG_GETARG_BYTEA_P(0);
	bytea* data2 = PG_GETARG_BYTEA_P(1);
	int32 dist = 0;
	int32 index = 0;
	char xor;
	char* st1 = (char*)data1;
	char* st2 = (char*)data2;

	// TODO: Maybe iterate to num_bytes, with int32 num_bytes = VARSIZE(data1) - VARHDRSZ;?
	for(index=0; index < 512; ++index) { 
		// Unroll loop internally
		xor = st1[index] ^ st2[index];
		dist += (xor&0x1); xor = xor >> 1;
		dist += (xor&0x1); xor = xor >> 1;
		dist += (xor&0x1); xor = xor >> 1;
		dist += (xor&0x1); xor = xor >> 1;
		dist += (xor&0x1); xor = xor >> 1;
		dist += (xor&0x1); xor = xor >> 1;
		dist += (xor&0x1); xor = xor >> 1;
		dist += (xor&0x1); xor = xor >> 1;
	}

    PG_RETURN_INT32(dist);
}