TIL's
- It is possible to use assert, e.g. for checking pre-conditions
- vector-of performs better, but it is not usable with transient data structures
- destructuring is a really convenient way to access values from a map; e.g. (defn bloom-add [{:keys [hash-functions bits] :as bloom-filter} value]
- reduced can be used to jump out of a `reduce` call: simply return `(reduced )` if we need to jump out
- ifn? can be used to check if a variable represents a function
; core.clj
(ns bloom-filter.core)
(defn bloom-create [numbits hash-functions]
(if (or (nil? numbits) (nil? hash-functions)) nil
{:bits (vec (repeat numbits 0)) :hash-functions hash-functions}))
(defn bloom-add [bloom-filter value]
(when-not (nil? bloom-filter)
(let [hash-functions (:hash-functions bloom-filter)
bits (:bits bloom-filter)
new-bits (reduce (fn [actual-bits hash-function] (assoc actual-bits (hash-function value) 1)) bits hash-functions)]
(assoc-in bloom-filter [:bits] new-bits))))
(defn bloom-contains [bloom-filter value]
(let [hash-functions (:hash-functions bloom-filter)
bits (:bits bloom-filter)]
(reduce (fn [actual-value hash-function] (and actual-value (= 1 (bits (hash-function value))))) true hash-functions)))
; core-test.clj
(ns bloom-filter.core-test
(:require [clojure.test :refer :all]
[bloom-filter.core :refer :all]))
(defn mod7-fun [num] (mod num 7))
(defn always-zero-fun [dontcare] 0)
(deftest create-test
(testing "create bloom filter"
(is (= nil (bloom-create nil nil)))
(is (= nil (bloom-create 0 nil)))
(is (= nil (bloom-create nil [])))
(is (= {:bits [] :hash-functions []} (bloom-create 0 [])))
(is (= {:bits [0] :hash-functions []} (bloom-create 1 [])))
(is (= {:bits [] :hash-functions [always-zero-fun]} (bloom-create 0 [always-zero-fun])))
))
(deftest add-test
(let [empty-filter (bloom-create 7 [])
single-fun-filter (bloom-create 7 [mod7-fun])
two-fun-filter (bloom-create 7 [mod7-fun always-zero-fun])]
(testing "add to bloom filter"
(is (= nil (bloom-add nil 3)))
(is (= empty-filter (bloom-add empty-filter nil)))
(is (= empty-filter (bloom-add empty-filter 10)))
(is (= {:bits [0 0 0 1 0 0 0] :hash-functions [mod7-fun]}
(bloom-add single-fun-filter 3)))
(is (= {:bits [1 0 0 1 0 0 0] :hash-functions [mod7-fun always-zero-fun]}
(bloom-add two-fun-filter 3)))
)))
(deftest contains-test
(let [empty-filter (bloom-create 7 [])
simple-filter (bloom-create 7 [mod7-fun])
filter-with-element (bloom-add simple-filter 3)]
(testing "bloom filter contains"
(is (true? (bloom-contains empty-filter 0)))
(is (false? (bloom-contains simple-filter 0)))
(is (true? (bloom-contains filter-with-element 3)))
(is (true? (bloom-contains filter-with-element 10)))
)))
; core.clj
(ns bloom-filter.core)
(defn bloom-create [numbits hash-functions]
(assert (number? numbits) "numbits must be numeric")
(assert (not (nil? hash-functions)) "hash-functions must not be nil")
(assert (every? ifn? hash-functions) "hash-functions must be a collection of functions")
{:bits (vec (repeat numbits false))
:hash-functions hash-functions})
(def ^:private safe-hash-functions (memoize
(fn [numbits hash-functions]
(map #(fn [x] (mod (% x) numbits)) hash-functions))))
(defn bloom-add [{:keys [hash-functions bits] :as bloom-filter} value]
(when bloom-filter
(let [hash-functions (safe-hash-functions (count bits) hash-functions)]
(->> hash-functions
(reduce (fn [actual-bits hash-function] (assoc! actual-bits (hash-function value) true)) (transient bits))
(persistent!)
(assoc bloom-filter :bits)))))
(defn bloom-contains? [{:keys [hash-functions bits] :as bloom-filter} value]
(when bloom-filter
(let [hash-functions (safe-hash-functions (count bits) hash-functions)]
(every? #(bits (% value)) hash-functions))))
; core-test.clj
(ns bloom-filter.core-test
(:require [clojure.test :refer :all]
[bloom-filter.core :refer :all]))
(def ^:const F false)
(def ^:const T true)
(defn mod7-fun [num] (mod num 7))
(defn always-zero-fun [dontcare] 0)
(defn always-ten-fun [dontcare] 10)
(def empty-filter (bloom-create 7 []))
(def single-fun-filter (bloom-create 7 [mod7-fun]))
(def too-high-range-fun-filter (bloom-create 7 [always-ten-fun]))
(deftest create-test
(testing "create bloom filter"
(is (thrown-with-msg? AssertionError #"numbits must be numeric" (bloom-create nil nil)))
(is (thrown-with-msg? AssertionError #"numbits must be numeric" (bloom-create "a" nil)))
(is (thrown-with-msg? AssertionError #"hash-functions must not be nil" (bloom-create 0 nil)))
(is (thrown-with-msg? AssertionError #"hash-functions must be a collection of functions" (bloom-create 0 "a")))
(is (thrown-with-msg? AssertionError #"hash-functions must be a collection of functions" (bloom-create 0 ["a"])))
(is (thrown-with-msg? AssertionError #"numbits must be numeric" (bloom-create nil [])))
(is (= {:bits [] :hash-functions []} (bloom-create 0 [])))
(is (= {:bits [F] :hash-functions []} (bloom-create 1 [])))
(is (= {:bits [] :hash-functions [always-zero-fun]} (bloom-create 0 [always-zero-fun])))
))
(deftest add-test
(let [two-fun-filter (bloom-create 7 [mod7-fun always-zero-fun])]
(testing "add to bloom filter"
(is (= nil (bloom-add nil 3)))
(is (= empty-filter (bloom-add empty-filter nil)))
(is (= empty-filter (bloom-add empty-filter 10)))
(is (= [F F F T F F F] (:bits (bloom-add single-fun-filter 3))))
(is (= [T F F T F F F] (:bits (bloom-add two-fun-filter 3))))
(is (= [F F F T F F F] (:bits (bloom-add too-high-range-fun-filter 3))))
)))
(deftest contains-test
(let [filter-with-element (bloom-add single-fun-filter 3)
filter-too-high-range-with-element (bloom-add too-high-range-fun-filter 3)]
(testing "bloom filter contains"
(is (= nil (bloom-contains? nil 0)))
(is (true? (bloom-contains? empty-filter 0)))
(is (false? (bloom-contains? single-fun-filter 0)))
(is (true? (bloom-contains? filter-with-element 3)))
(is (true? (bloom-contains? filter-with-element 10)))
(is (true? (bloom-contains? filter-too-high-range-with-element 3)))
)))
No comments:
Post a Comment