libbf  0.1
 All Classes Functions Typedefs Friends Pages
counting.cc
1 #include <bf/bloom_filter/counting.h>
2 
3 #include <algorithm>
4 #include <cassert>
5 
6 namespace bf {
7 
9  size_t width)
10  : hasher_(std::move(h)),
11  cells_(cells, width)
12 {
13 }
14 
15 void counting_bloom_filter::add(object const& o)
16 {
18 }
19 
20 size_t counting_bloom_filter::lookup(object const& o) const
21 {
22  auto min = cells_.max();
23  for (auto i : find_indices(o))
24  {
25  auto cnt = cells_.count(i);
26  if (cnt < min)
27  return min = cnt;
28  }
29  return min;
30 }
31 
33 {
34  cells_.clear();
35 }
36 
37 void counting_bloom_filter::remove(object const& o)
38 {
40 }
41 
42 std::vector<size_t>
43 counting_bloom_filter::find_indices(object const& o, bool part) const
44 {
45  auto digests = hasher_(o);
46  std::vector<size_t> indices(digests.size());
47  if (part)
48  {
49  assert(cells_.size() % digests.size() == 0);
50  size_t const parts = cells_.size() / digests.size();
51  for (size_t i = 0; i < indices.size(); ++i)
52  indices[i] = digests[i] % parts + (i * parts);
53  }
54  else
55  {
56  for (size_t i = 0; i < indices.size(); ++i)
57  indices[i] = digests[i] % cells_.size();
58  }
59  std::sort(indices.begin(), indices.end());
60  indices.erase(std::unique(indices.begin(), indices.end()), indices.end());
61  return indices;
62 };
63 
64 size_t
65 counting_bloom_filter::find_minimum(std::vector<size_t> const& indices) const
66 {
67  auto min = cells_.max();
68  for (auto i : indices)
69  {
70  auto cnt = cells_.count(i);
71  if (cnt < min)
72  min = cnt;
73  }
74  return min;
75 }
76 
77 std::vector<size_t>
78 counting_bloom_filter::find_minima(std::vector<size_t> const& indices) const
79 {
80  auto min = cells_.max();
81  std::vector<size_t> positions;
82  for (auto i : indices)
83  {
84  auto cnt = cells_.count(i);
85  if (cnt == min)
86  {
87  positions.push_back(i);
88  }
89  else if (cnt < min)
90  {
91  min = cnt;
92  positions.clear();
93  positions.push_back(i);
94  }
95  }
96  return positions;
97 }
98 
99 bool counting_bloom_filter::increment(std::vector<size_t> const& indices,
100  size_t value)
101 {
102  auto status = true;
103  for (auto i : indices)
104  if (! cells_.increment(i, value))
105  status = false;
106  return status;
107 }
108 
109 bool counting_bloom_filter::decrement(std::vector<size_t> const& indices,
110  size_t value)
111 {
112  auto status = true;
113  for (auto i : indices)
114  if (! cells_.decrement(i, value))
115  status = false;
116  return status;
117 }
118 
119 size_t counting_bloom_filter::count(size_t index) const
120 {
121  return cells_.count(index);
122 }
123 
125  size_t width)
126  : counting_bloom_filter(std::move(h), cells, width)
127 {
128 }
129 
130 void spectral_mi_bloom_filter::add(object const& o)
131 {
133 }
134 
135 
137  hasher h1, size_t cells1, size_t width1,
138  hasher h2, size_t cells2, size_t width2)
139  : first_(std::move(h1), cells1, width1),
140  second_(std::move(h2), cells2, width2)
141 {
142 }
143 
144 // "When adding an item x, increase the counters of x in the primary SBF. Then
145 // check if x has a recurring minimum. If so, continue normally. Otherwise (if
146 // x has a single minimum), look for x in the secondary SBF. If found, increase
147 // its counters, otherwise add x to the secondary SBF, with an initial value
148 // that equals its minimal value from the primary SBF."
149 void spectral_rm_bloom_filter::add(object const& o)
150 {
151  auto indices1 = first_.find_indices(o);
152  first_.increment(indices1);
153  auto mins1 = first_.find_minima(indices1);
154  if (mins1.size() > 1)
155  return;
156 
157  auto indices2 = second_.find_indices(o);
158  auto min1 = first_.count(mins1[0]);
159  auto min2 = second_.find_minimum(indices2);
160 
161  // Note: it's unclear to me whether "increase its counters" means increase
162  // only the minima or all indices. I opted for the latter (same during
163  // deletion).
164  second_.increment(indices2, min2 > 0 ? 1 : min1);
165 }
166 
167 // "When performing lookup for x, check if x has a recurring minimum in the
168 // primary SBF. If so return the minimum. Otherwise, perform lookup for x in
169 // secondary SBF. If [the] returned value is greater than 0, return it.
170 // Otherwise, return minimum from primary SBF."
171 size_t spectral_rm_bloom_filter::lookup(object const& o) const
172 {
173  auto mins1 = first_.find_minima(first_.find_indices(o));
174  auto min1 = first_.count(mins1[0]);
175  if (mins1.size() > 1)
176  return min1;
177  auto min2 = second_.find_minimum(second_.find_indices(o));
178  return min2 > 0 ? min2 : min1;
179 }
180 
182 {
183  first_.clear();
184  second_.clear();
185 }
186 
187 // "First decrease its counters in the primary SBF, then if it has a single
188 // minimum (or if it exists in Bf) decrease its counters in the secondary SBF,
189 // unless at least one of them is 0."
190 void spectral_rm_bloom_filter::remove(object const& o)
191 {
192  auto indices1 = first_.find_indices(o);
193  first_.decrement(indices1);
194  auto mins1 = first_.find_minima(indices1);
195  if (mins1.size() > 1)
196  return;
197 
198  auto indices2 = second_.find_indices(o);
199  if (second_.find_minimum(indices2) > 0)
200  second_.decrement(indices2);
201 }
202 
203 } // namespace bf