bf: add cmp(), join(), slice(), popcount() and hamming()

2019-07-13 03:46:37 +09:00 · 2019-07-13 03:46:37 +09:00 · ef899b7c09
parent 2538a7e752
commit ef899b7c09
2 changed files with 231 additions and 10 deletions
--- a/vlib/bf/bf.v
+++ b/vlib/bf/bf.v
@ -13,7 +13,7 @@ const (
 )

 fn bitmask(bitnr int) u32 {
-	return u32(1 << (bitnr % SLOT_SIZE))
+	return u32(u32(1) << u32(bitnr % SLOT_SIZE))
 }

 fn bitslot(size int) int {
@ -150,6 +150,53 @@ pub fn bfxor(input1 BitField, input2 BitField) BitField {
 	return output
 }

+pub fn join(input1 BitField, input2 BitField) BitField {
+	output_size := input1.size + input2.size
+	mut output := new(output_size)
+	// copy the first input to output as is
+	for i := 0; i < bitnslots(input1.size); i++ {
+		output.field[i] = input1.field[i]
+	}
+
+	// find offset bit and offset slot
+	offset_bit := input1.size % SLOT_SIZE
+	offset_slot := input1.size / SLOT_SIZE
+
+	for i := 0; i < bitnslots(input2.size); i++ {
+		output.field[i + offset_slot] =
+		    output.field[i + offset_slot] |
+		    u32(input2.field[i] << u32(offset_bit))
+	}
+
+	/*
+	 * If offset_bit is not zero, additional operations are needed.
+	 * Number of iterations depends on the nr of slots in output. Two
+	 * options:
+	 * (a) nr of slots in output is the sum of inputs' slots. In this
+	 * case, the nr of bits in the last slot of output is less than the
+	 * nr of bits in second input (i.e. ), OR
+	 * (b) nr of slots of output is the sum of inputs' slots less one
+	 * (i.e. less iterations needed). In this case, the nr of bits in
+	 * the last slot of output is greater than the nr of bits in second
+	 * input.
+	 * If offset_bit is zero, no additional copies needed.
+	 */
+	if (output_size - 1) % SLOT_SIZE < (input2.size - 1) % SLOT_SIZE {
+		for i := 0; i < bitnslots(input2.size); i++ {
+			output.field[i + offset_slot + 1] =
+			    output.field[i + offset_slot + 1] |
+			    u32(input2.field[i] >> u32(SLOT_SIZE - offset_bit))
+		}
+	} else if (output_size - 1) % SLOT_SIZE > (input2.size - 1) % SLOT_SIZE {
+		for i := 0; i < bitnslots(input2.size) - 1; i++ {
+			output.field[i + offset_slot + 1] =
+			    output.field[i + offset_slot + 1] |
+			    u32(input2.field[i] >> u32(SLOT_SIZE - offset_bit))
+		}
+	}
+	return output
+}
+
 pub fn print(instance BitField) {
 	mut i := 0
 	for i < instance.size {
@ -178,3 +225,102 @@ pub fn clone(input BitField) BitField {
 	return output
 }

+pub fn cmp(input1 BitField, input2 BitField) bool {
+	if input1.size != input2.size {return false}
+	for i := 0; i < bitnslots(input1.size); i++ {
+		if input1.field[i] != input2.field[i] {return false}
+	}
+	return true
+}
+
+pub fn (instance BitField) popcount() int {
+	size := instance.size
+	bitnslots := bitnslots(size)
+	tail := size % SLOT_SIZE
+	mut count := 0
+	for i := 0; i < bitnslots - 1; i++ {
+		for j := 0; j < SLOT_SIZE; j++ {
+			if u32(instance.field[i] >> u32(j)) & u32(1) == u32(1) {
+				count++
+			}
+		}
+	}
+	for j := 0; j < tail; j++ {
+		if u32(instance.field[bitnslots - 1] >> u32(j)) & u32(1) == u32(1) {
+			count++
+		}
+	}
+	return count
+}
+
+pub fn hamming (input1 BitField, input2 BitField) int {
+	input_xored := bfxor(input1, input2)
+	return input_xored.popcount()
+}
+
+pub fn (input BitField) slice(_start int, _end int) BitField {
+	// boundary checks
+	mut start := _start
+	mut end := _end
+	if end > input.size {
+		end = input.size // or panic?
+	}
+	if start > end {
+		start = end // or panic?
+	}
+
+	mut output := new(end - start)
+	start_offset := start % SLOT_SIZE
+	end_offset := (end - 1) % SLOT_SIZE
+	start_slot := start / SLOT_SIZE
+	end_slot := (end - 1) / SLOT_SIZE
+	output_slots := bitnslots(end - start)
+
+	if output_slots > 1 {
+		if start_offset != 0 {
+			for i := 0; i < output_slots - 1; i++ {
+				output.field[i] =
+				    u32(input.field[start_slot + i] >> u32(start_offset))
+				output.field[i] = output.field[i] |
+				    u32(input.field[start_slot + i + 1] <<
+				    u32(SLOT_SIZE - start_offset))
+			}
+		}
+		else {
+			for i := 0; i < output_slots - 1; i++ {
+				output.field[i] =
+				    u32(input.field[start_slot + i])
+			}
+		}
+	}
+
+	if start_offset > end_offset {
+		output.field[(end - start - 1) / SLOT_SIZE] =
+		    u32(input.field[end_slot - 1] >> u32(start_offset))
+		mut mask := u32((1 << (end_offset + 1)) - 1)
+		mask = input.field[end_slot] & mask
+		mask = u32(mask << u32(SLOT_SIZE - start_offset))
+		output.field[(end - start - 1) / SLOT_SIZE] =
+		    output.field[(end - start - 1) / SLOT_SIZE] | mask
+	}
+	else if start_offset == 0 {
+		mut mask := u32(0)
+		if end_offset == SLOT_SIZE - 1 {
+			mask = u32(-1)
+		}
+		else {
+			mask = u32(u32(1) << u32(end_offset + 1))
+			mask = mask - u32(1)
+		}
+		output.field[(end - start - 1) / SLOT_SIZE] =
+		    (input.field[end_slot] & mask)
+	}
+	else {
+		mut mask := u32(((1 << (end_offset - start_offset + 1)) - 1)  << start_offset)
+		mask = input.field[end_slot] & mask
+		mask = u32(mask >> u32(start_offset))
+		output.field[(end - start - 1) / SLOT_SIZE] =
+		    output.field[(end - start - 1) / SLOT_SIZE] | mask
+	}
+	return output
+}
--- a/vlib/bf/bf_test.v
+++ b/vlib/bf/bf_test.v
@ -3,18 +3,18 @@ import bf
 import rand

 fn test_bf_new_size() {
-	instance := bf.new(5)
-	assert instance.getsize() == 5
+	instance := bf.new(75)
+	assert instance.getsize() == 75
 }

 fn test_bf_set_clear_toggle_get() {
-	mut instance := bf.new(5)
-	instance.setbit(4)
-	assert instance.getbit(4) == 1
-	instance.clearbit(4)
-	assert instance.getbit(4) == 0
-	instance.togglebit(4)
-	assert instance.getbit(4) == 1
+	mut instance := bf.new(75)
+	instance.setbit(47)
+	assert instance.getbit(47) == 1
+	instance.clearbit(47)
+	assert instance.getbit(47) == 0
+	instance.togglebit(47)
+	assert instance.getbit(47) == 1
 }

 fn test_bf_and_not_or_xor() {
@ -43,3 +43,78 @@ fn test_bf_and_not_or_xor() {
 	}
 	assert result == 1
 }
+
+fn test_clone_cmp() {
+	rand.seed()
+	len := 80
+	mut input := bf.new(len)
+	for i := 0; i < len; i++ {
+		if rand.next(2) == 1 {
+			input.setbit(i)
+		}
+	}
+	output := bf.clone(input)
+	assert output.getsize() == len
+	assert bf.cmp(input, output) == true
+}
+
+fn test_slice_join() {
+	rand.seed()
+	len := 80
+	mut input := bf.new(len)
+	for i := 0; i < len; i++ {
+		if rand.next(2) == 1 {
+			input.setbit(i)
+		}
+	}
+	mut result := 1
+	for point := 1; point < (len - 1); point++ {
+		// divide a bitfield into two subfields
+		chunk1 := input.slice(0, point)
+		chunk2 := input.slice(point, input.getsize())
+		// concatenate them back into one and compare to the original
+		output := bf.join(chunk1, chunk2)
+		if !bf.cmp(input, output) {
+			result = 0
+		}
+	}
+	assert result == 1
+}
+
+fn test_popcount() {
+	rand.seed()
+	len := 80
+	mut count0 := 0
+	mut input := bf.new(len)
+	for i := 0; i < len; i++ {
+		if rand.next(2) == 1 {
+			input.setbit(i)
+			count0++
+		}
+	}
+	count1 := input.popcount()
+	assert count0 == count1
+}
+
+fn test_hamming() {
+	rand.seed()
+	len := 80
+	mut count := 0
+	mut input1 := bf.new(len)
+	mut input2 := bf.new(len)
+	for i := 0; i < len; i++ {
+		switch rand.next(4) {
+			case 0:
+			case 1:
+				input1.setbit(i)
+				count++
+			case 2:
+				input2.setbit(i)
+				count++
+			case 3:
+				input1.setbit(i)
+				input2.setbit(i)
+		}
+	}
+	assert count == bf.hamming(input1, input2)
+}