From ef899b7c09aa266c79bf784111ad3b1155b4d029 Mon Sep 17 00:00:00 2001 From: Vitalie Ciubotaru Date: Sat, 13 Jul 2019 03:46:37 +0900 Subject: [PATCH] bf: add cmp(), join(), slice(), popcount() and hamming() --- vlib/bf/bf.v | 148 +++++++++++++++++++++++++++++++++++++++++++++- vlib/bf/bf_test.v | 93 ++++++++++++++++++++++++++--- 2 files changed, 231 insertions(+), 10 deletions(-) diff --git a/vlib/bf/bf.v b/vlib/bf/bf.v index e5fed64c08..0f0d2c3462 100644 --- a/vlib/bf/bf.v +++ b/vlib/bf/bf.v @@ -13,7 +13,7 @@ const ( ) fn bitmask(bitnr int) u32 { - return u32(1 << (bitnr % SLOT_SIZE)) + return u32(u32(1) << u32(bitnr % SLOT_SIZE)) } fn bitslot(size int) int { @@ -150,6 +150,53 @@ pub fn bfxor(input1 BitField, input2 BitField) BitField { return output } +pub fn join(input1 BitField, input2 BitField) BitField { + output_size := input1.size + input2.size + mut output := new(output_size) + // copy the first input to output as is + for i := 0; i < bitnslots(input1.size); i++ { + output.field[i] = input1.field[i] + } + + // find offset bit and offset slot + offset_bit := input1.size % SLOT_SIZE + offset_slot := input1.size / SLOT_SIZE + + for i := 0; i < bitnslots(input2.size); i++ { + output.field[i + offset_slot] = + output.field[i + offset_slot] | + u32(input2.field[i] << u32(offset_bit)) + } + + /* + * If offset_bit is not zero, additional operations are needed. + * Number of iterations depends on the nr of slots in output. Two + * options: + * (a) nr of slots in output is the sum of inputs' slots. In this + * case, the nr of bits in the last slot of output is less than the + * nr of bits in second input (i.e. ), OR + * (b) nr of slots of output is the sum of inputs' slots less one + * (i.e. less iterations needed). In this case, the nr of bits in + * the last slot of output is greater than the nr of bits in second + * input. + * If offset_bit is zero, no additional copies needed. + */ + if (output_size - 1) % SLOT_SIZE < (input2.size - 1) % SLOT_SIZE { + for i := 0; i < bitnslots(input2.size); i++ { + output.field[i + offset_slot + 1] = + output.field[i + offset_slot + 1] | + u32(input2.field[i] >> u32(SLOT_SIZE - offset_bit)) + } + } else if (output_size - 1) % SLOT_SIZE > (input2.size - 1) % SLOT_SIZE { + for i := 0; i < bitnslots(input2.size) - 1; i++ { + output.field[i + offset_slot + 1] = + output.field[i + offset_slot + 1] | + u32(input2.field[i] >> u32(SLOT_SIZE - offset_bit)) + } + } + return output +} + pub fn print(instance BitField) { mut i := 0 for i < instance.size { @@ -178,3 +225,102 @@ pub fn clone(input BitField) BitField { return output } +pub fn cmp(input1 BitField, input2 BitField) bool { + if input1.size != input2.size {return false} + for i := 0; i < bitnslots(input1.size); i++ { + if input1.field[i] != input2.field[i] {return false} + } + return true +} + +pub fn (instance BitField) popcount() int { + size := instance.size + bitnslots := bitnslots(size) + tail := size % SLOT_SIZE + mut count := 0 + for i := 0; i < bitnslots - 1; i++ { + for j := 0; j < SLOT_SIZE; j++ { + if u32(instance.field[i] >> u32(j)) & u32(1) == u32(1) { + count++ + } + } + } + for j := 0; j < tail; j++ { + if u32(instance.field[bitnslots - 1] >> u32(j)) & u32(1) == u32(1) { + count++ + } + } + return count +} + +pub fn hamming (input1 BitField, input2 BitField) int { + input_xored := bfxor(input1, input2) + return input_xored.popcount() +} + +pub fn (input BitField) slice(_start int, _end int) BitField { + // boundary checks + mut start := _start + mut end := _end + if end > input.size { + end = input.size // or panic? + } + if start > end { + start = end // or panic? + } + + mut output := new(end - start) + start_offset := start % SLOT_SIZE + end_offset := (end - 1) % SLOT_SIZE + start_slot := start / SLOT_SIZE + end_slot := (end - 1) / SLOT_SIZE + output_slots := bitnslots(end - start) + + if output_slots > 1 { + if start_offset != 0 { + for i := 0; i < output_slots - 1; i++ { + output.field[i] = + u32(input.field[start_slot + i] >> u32(start_offset)) + output.field[i] = output.field[i] | + u32(input.field[start_slot + i + 1] << + u32(SLOT_SIZE - start_offset)) + } + } + else { + for i := 0; i < output_slots - 1; i++ { + output.field[i] = + u32(input.field[start_slot + i]) + } + } + } + + if start_offset > end_offset { + output.field[(end - start - 1) / SLOT_SIZE] = + u32(input.field[end_slot - 1] >> u32(start_offset)) + mut mask := u32((1 << (end_offset + 1)) - 1) + mask = input.field[end_slot] & mask + mask = u32(mask << u32(SLOT_SIZE - start_offset)) + output.field[(end - start - 1) / SLOT_SIZE] = + output.field[(end - start - 1) / SLOT_SIZE] | mask + } + else if start_offset == 0 { + mut mask := u32(0) + if end_offset == SLOT_SIZE - 1 { + mask = u32(-1) + } + else { + mask = u32(u32(1) << u32(end_offset + 1)) + mask = mask - u32(1) + } + output.field[(end - start - 1) / SLOT_SIZE] = + (input.field[end_slot] & mask) + } + else { + mut mask := u32(((1 << (end_offset - start_offset + 1)) - 1) << start_offset) + mask = input.field[end_slot] & mask + mask = u32(mask >> u32(start_offset)) + output.field[(end - start - 1) / SLOT_SIZE] = + output.field[(end - start - 1) / SLOT_SIZE] | mask + } + return output +} diff --git a/vlib/bf/bf_test.v b/vlib/bf/bf_test.v index 832849db63..ef8ef5b87f 100644 --- a/vlib/bf/bf_test.v +++ b/vlib/bf/bf_test.v @@ -3,18 +3,18 @@ import bf import rand fn test_bf_new_size() { - instance := bf.new(5) - assert instance.getsize() == 5 + instance := bf.new(75) + assert instance.getsize() == 75 } fn test_bf_set_clear_toggle_get() { - mut instance := bf.new(5) - instance.setbit(4) - assert instance.getbit(4) == 1 - instance.clearbit(4) - assert instance.getbit(4) == 0 - instance.togglebit(4) - assert instance.getbit(4) == 1 + mut instance := bf.new(75) + instance.setbit(47) + assert instance.getbit(47) == 1 + instance.clearbit(47) + assert instance.getbit(47) == 0 + instance.togglebit(47) + assert instance.getbit(47) == 1 } fn test_bf_and_not_or_xor() { @@ -43,3 +43,78 @@ fn test_bf_and_not_or_xor() { } assert result == 1 } + +fn test_clone_cmp() { + rand.seed() + len := 80 + mut input := bf.new(len) + for i := 0; i < len; i++ { + if rand.next(2) == 1 { + input.setbit(i) + } + } + output := bf.clone(input) + assert output.getsize() == len + assert bf.cmp(input, output) == true +} + +fn test_slice_join() { + rand.seed() + len := 80 + mut input := bf.new(len) + for i := 0; i < len; i++ { + if rand.next(2) == 1 { + input.setbit(i) + } + } + mut result := 1 + for point := 1; point < (len - 1); point++ { + // divide a bitfield into two subfields + chunk1 := input.slice(0, point) + chunk2 := input.slice(point, input.getsize()) + // concatenate them back into one and compare to the original + output := bf.join(chunk1, chunk2) + if !bf.cmp(input, output) { + result = 0 + } + } + assert result == 1 +} + +fn test_popcount() { + rand.seed() + len := 80 + mut count0 := 0 + mut input := bf.new(len) + for i := 0; i < len; i++ { + if rand.next(2) == 1 { + input.setbit(i) + count0++ + } + } + count1 := input.popcount() + assert count0 == count1 +} + +fn test_hamming() { + rand.seed() + len := 80 + mut count := 0 + mut input1 := bf.new(len) + mut input2 := bf.new(len) + for i := 0; i < len; i++ { + switch rand.next(4) { + case 0: + case 1: + input1.setbit(i) + count++ + case 2: + input2.setbit(i) + count++ + case 3: + input1.setbit(i) + input2.setbit(i) + } + } + assert count == bf.hamming(input1, input2) +}