aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans-Nikolai Viessmann2019-01-29 21:05:47 +0000
committerHans-Nikolai Viessmann2019-01-29 21:05:47 +0000
commite517f35de51f94011f4029842832d3f4d2c1d053 (patch)
tree38912d5571b9f7c4a3dc7c7b42ab0624c71caa28
downloadpractice-e517f35de51f94011f4029842832d3f4d2c1d053.tar.gz
practice-e517f35de51f94011f4029842832d3f4d2c1d053.zip
inital commit (bloom filter)
-rw-r--r--README7
-rw-r--r--bloom_filter/.gitignore2
-rw-r--r--bloom_filter/Makefile1
-rw-r--r--bloom_filter/README13
-rw-r--r--bloom_filter/bloom.c94
-rw-r--r--bloom_filter/hash.c116
-rw-r--r--bloom_filter/hash.h15
-rw-r--r--bloom_filter/words.dat500
8 files changed, 748 insertions, 0 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..fff7db3
--- /dev/null
+++ b/README
@@ -0,0 +1,7 @@
+Practice Things Much Good, Oooh
+-------------------------------
+
+This repo contains all sort of random stuff, with the intent of learning/understanding
+different programming related paradigms/patterns/styles/etc.
+
+See the sub-dirs for further information...
diff --git a/bloom_filter/.gitignore b/bloom_filter/.gitignore
new file mode 100644
index 0000000..d7cc7da
--- /dev/null
+++ b/bloom_filter/.gitignore
@@ -0,0 +1,2 @@
+*.o
+bloom
diff --git a/bloom_filter/Makefile b/bloom_filter/Makefile
new file mode 100644
index 0000000..c511738
--- /dev/null
+++ b/bloom_filter/Makefile
@@ -0,0 +1 @@
+bloom: hash.o
diff --git a/bloom_filter/README b/bloom_filter/README
new file mode 100644
index 0000000..a789a7a
--- /dev/null
+++ b/bloom_filter/README
@@ -0,0 +1,13 @@
+Bloom filter
+------------
+
+This here is a very simple (sic naive) implementation of a bloom filter.
+
+Most of the work is in the hash function, which has been shamelessly _borrowed_
+from Bob Jenkins (see http://burtleburtle.net/bob/hash/evahash.html).
+
+In its current form, it is suboptimal and suffers from false-positive rot,
+because:
+ * the test data is too uniform
+ * the choice for bit-array size/number of hash functions/etc. was chosen
+ _in tumultum_ with little thought
diff --git a/bloom_filter/bloom.c b/bloom_filter/bloom.c
new file mode 100644
index 0000000..ef5c4af
--- /dev/null
+++ b/bloom_filter/bloom.c
@@ -0,0 +1,94 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+#include "hash.h"
+
+#define B 14 /* power of */
+#define M 16384 /* number of bits */
+#define K 8 /* number of hash varients */
+
+static void
+print_barray (ub1 * barray)
+{
+#define COLUMNS 128
+ int i,j;
+ for (i = 0; i < M/COLUMNS; i++)
+ {
+ for (j = 0; j < COLUMNS; j++)
+ {
+ printf ("%s", barray[i*j] ? "\u2593" : "\u2591");
+ }
+ printf ("\n");
+ }
+}
+
+static bool
+bloom_add_check (ub1 * barray, ub1 * key, ub4 len, bool check)
+{
+ unsigned i;
+ ub4 h;
+
+ for (i = 0, h = 0; i < K; i++)
+ {
+ h = (hash (key, len, h) & hashmask(10));
+ // printf ("Hashing `%s`, return is: %lu\n", key, h);
+ if (check)
+ {
+ if (barray[h] == 1)
+ return true;
+ }
+ else
+ barray[h] = 1;
+ }
+ return false;
+}
+
+bool
+bloom_check (ub1 * barray, ub1 * key, ub4 len)
+{
+ return bloom_add_check (barray, key, len, true);
+}
+
+void
+bloom_add (ub1 * barray, ub1 * key, ub4 len)
+{
+ bloom_add_check (barray, key, len, false);
+}
+
+int
+main (int argc, char * argv[])
+{
+ /* data input */
+ FILE *data;
+ char * text; size_t size = 5;
+ /* bloom filter bit-array */
+ ub1 barray[M] = {0};
+
+ if (argc < 2)
+ return EXIT_FAILURE;
+
+ /* read in data */
+ data = fopen (argv[1], "r");
+ if (!data)
+ {
+ perror ("Unable to open file");
+ return EXIT_FAILURE;
+ }
+
+ text = malloc (sizeof(char) * size);
+ for (int i = 0; i < 100; i++)
+ {
+ /* assumes that all lines are 5*char (including '\n') */
+ fgets (text, size, data);
+ fseek (data, 1, SEEK_CUR);
+ bloom_add (barray, (ub1 *)text, 4);
+ }
+
+ print_barray (barray);
+
+ free (text);
+ fclose (data);
+
+ return EXIT_SUCCESS;
+}
diff --git a/bloom_filter/hash.c b/bloom_filter/hash.c
new file mode 100644
index 0000000..70054d2
--- /dev/null
+++ b/bloom_filter/hash.c
@@ -0,0 +1,116 @@
+/*
+ * 'My Hash' by Bob Jenkin
+ */
+#include "hash.h"
+
+/*
+--------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+For every delta with one or two bits set, and the deltas of all three
+ high bits or all three low bits, whether the original value of a,b,c
+ is almost all zero or is uniformly distributed,
+* If mix() is run forward or backward, at least 32 bits in a,b,c
+ have at least 1/4 probability of changing.
+* If mix() is run forward, every bit of c will change between 1/3 and
+ 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
+mix() was built out of 36 single-cycle latency instructions in a
+ structure that could supported 2x parallelism, like so:
+ a -= b;
+ a -= c; x = (c>>13);
+ b -= c; a ^= x;
+ b -= a; x = (a<<8);
+ c -= a; b ^= x;
+ c -= b; x = (b>>13);
+ ...
+ Unfortunately, superscalar Pentiums and Sparcs can't take advantage
+ of that parallelism. They've also turned some of those single-cycle
+ latency instructions into multi-cycle latency instructions. Still,
+ this is the fastest good hash I could find. There were about 2^^68
+ to choose from. I only looked at a billion or so.
+--------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
+
+/*
+--------------------------------------------------------------------
+hash() -- hash a variable-length key into a 32-bit value
+ k : the key (the unaligned variable-length array of bytes)
+ len : the length of the key, counting by bytes
+ initval : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Every 1-bit and 2-bit delta achieves avalanche.
+About 6*len+35 instructions.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+ h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+ for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
+
+By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial. It's free.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable. Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+*/
+
+ub4 hash( k, length, initval)
+register ub1 *k; /* the key */
+register ub4 length; /* the length of the key */
+register ub4 initval; /* the previous hash, or an arbitrary value */
+{
+ register ub4 a,b,c,len;
+
+ /* Set up the internal state */
+ len = length;
+ a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ c = initval; /* the previous hash value */
+
+ /*---------------------------------------- handle most of the key */
+ while (len >= 12)
+ {
+ a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
+ b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
+ c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
+ mix(a,b,c);
+ k += 12; len -= 12;
+ }
+
+ /*------------------------------------- handle the last 11 bytes */
+ c += length;
+ switch(len) /* all the case statements fall through */
+ {
+ case 11: c+=((ub4)k[10]<<24);
+ case 10: c+=((ub4)k[9]<<16);
+ case 9 : c+=((ub4)k[8]<<8);
+ /* the first byte of c is reserved for the length */
+ case 8 : b+=((ub4)k[7]<<24);
+ case 7 : b+=((ub4)k[6]<<16);
+ case 6 : b+=((ub4)k[5]<<8);
+ case 5 : b+=k[4];
+ case 4 : a+=((ub4)k[3]<<24);
+ case 3 : a+=((ub4)k[2]<<16);
+ case 2 : a+=((ub4)k[1]<<8);
+ case 1 : a+=k[0];
+ /* case 0: nothing left to add */
+ }
+ mix(a,b,c);
+ /*-------------------------------------------- report the result */
+ return c;
+}
diff --git a/bloom_filter/hash.h b/bloom_filter/hash.h
new file mode 100644
index 0000000..50a9e96
--- /dev/null
+++ b/bloom_filter/hash.h
@@ -0,0 +1,15 @@
+/*
+ * 'My Hash' by Bob Jenkin
+ */
+#ifndef _MY_HASH_BOB_JENKIN_H_
+#define _MY_HASH_BOB_JENKIN_H_
+
+typedef unsigned long int ub4; /* unsigned 4-byte quantities */
+typedef unsigned char ub1; /* unsigned 1-byte quantities */
+
+#define hashsize(n) ((ub4)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+
+extern ub4 hash(ub1 * k, ub4 length, ub4 initval);
+
+#endif /* _MY_HASH_BOB_JENKIN_H_ */
diff --git a/bloom_filter/words.dat b/bloom_filter/words.dat
new file mode 100644
index 0000000..b94c531
--- /dev/null
+++ b/bloom_filter/words.dat
@@ -0,0 +1,500 @@
+able
+acid
+aged
+also
+area
+army
+away
+baby
+back
+ball
+band
+bank
+base
+bath
+bear
+beat
+been
+beer
+bell
+belt
+best
+bill
+bird
+blow
+blue
+boat
+body
+bomb
+bond
+bone
+book
+boom
+born
+boss
+both
+bowl
+bulk
+burn
+bush
+busy
+call
+calm
+came
+camp
+card
+care
+case
+cash
+cast
+cell
+chat
+chip
+city
+club
+coal
+coat
+code
+cold
+come
+cook
+cool
+cope
+copy
+CORE
+cost
+crew
+crop
+dark
+data
+date
+dawn
+days
+dead
+deal
+dean
+dear
+debt
+deep
+deny
+desk
+dial
+dick
+diet
+disc
+disk
+does
+done
+door
+dose
+down
+draw
+drew
+drop
+drug
+dual
+duke
+dust
+duty
+each
+earn
+ease
+east
+easy
+edge
+else
+even
+ever
+evil
+exit
+face
+fact
+fail
+fair
+fall
+farm
+fast
+fate
+fear
+feed
+feel
+feet
+fell
+felt
+file
+fill
+film
+find
+fine
+fire
+firm
+fish
+five
+flat
+flow
+food
+foot
+ford
+form
+fort
+four
+free
+from
+fuel
+full
+fund
+gain
+game
+gate
+gave
+gear
+gene
+gift
+girl
+give
+glad
+goal
+goes
+gold
+Golf
+gone
+good
+gray
+grew
+grey
+grow
+gulf
+hair
+half
+hall
+hand
+hang
+hard
+harm
+hate
+have
+head
+hear
+heat
+held
+hell
+help
+here
+hero
+high
+hill
+hire
+hold
+hole
+holy
+home
+hope
+host
+hour
+huge
+hung
+hunt
+hurt
+idea
+inch
+into
+iron
+item
+jack
+jane
+jean
+john
+join
+jump
+jury
+just
+keen
+keep
+kent
+kept
+kick
+kill
+kind
+king
+knee
+knew
+know
+lack
+lady
+laid
+lake
+land
+lane
+last
+late
+lead
+left
+less
+life
+lift
+like
+line
+link
+list
+live
+load
+loan
+lock
+logo
+long
+look
+lord
+lose
+loss
+lost
+love
+luck
+made
+mail
+main
+make
+male
+many
+Mark
+mass
+matt
+meal
+mean
+meat
+meet
+menu
+mere
+mike
+mile
+milk
+mill
+mind
+mine
+miss
+mode
+mood
+moon
+more
+most
+move
+much
+must
+name
+navy
+near
+neck
+need
+news
+next
+nice
+nick
+nine
+none
+nose
+note
+okay
+once
+only
+onto
+open
+oral
+over
+pace
+pack
+page
+paid
+pain
+pair
+palm
+park
+part
+pass
+past
+path
+peak
+pick
+pink
+pipe
+plan
+play
+plot
+plug
+plus
+poll
+pool
+poor
+port
+post
+pull
+pure
+push
+race
+rail
+rain
+rank
+rare
+rate
+read
+real
+rear
+rely
+rent
+rest
+rice
+rich
+ride
+ring
+rise
+risk
+road
+rock
+role
+roll
+roof
+room
+root
+rose
+rule
+rush
+ruth
+safe
+said
+sake
+sale
+salt
+same
+sand
+save
+seat
+seed
+seek
+seem
+seen
+self
+sell
+send
+sent
+sept
+ship
+shop
+shot
+show
+shut
+sick
+side
+sign
+site
+size
+skin
+slip
+slow
+snow
+soft
+soil
+sold
+sole
+some
+song
+soon
+sort
+soul
+spot
+star
+stay
+step
+stop
+such
+suit
+sure
+take
+tale
+talk
+tall
+tank
+tape
+task
+team
+tech
+tell
+tend
+term
+test
+text
+than
+that
+them
+then
+they
+thin
+this
+thus
+till
+time
+tiny
+told
+toll
+tone
+tony
+took
+tool
+tour
+town
+tree
+trip
+true
+tune
+turn
+twin
+type
+unit
+upon
+used
+user
+vary
+vast
+very
+vice
+view
+vote
+wage
+wait
+wake
+walk
+wall
+want
+ward
+warm
+wash
+wave
+ways
+weak
+wear
+week
+well
+went
+were
+west
+what
+when
+whom
+wide
+wife
+wild
+will
+wind
+wine
+wing
+wire
+wise
+wish
+with
+wood
+word
+wore
+work
+yard
+yeah
+year
+your
+zero
+zone