Files
nilsimsa/evaluate.go
mleku 7480e9232c Refactor Evaluate and add XOR helper function.
Refactored the Evaluate function to utilize a new XOR helper for improved readability and modularity. The XOR function handles the bitwise operation, streamlining the bit difference calculation and supporting potential reuse in related computations.
2025-06-16 14:56:21 +01:00

55 lines
1.5 KiB
Go

package nilsimsa
import (
"fmt"
)
func Evaluate(a, b []byte) (bitDiffCount int, err error) {
// Check if the lengths of the strings are the same
if len(a) != len(b) {
return 0, fmt.Errorf("byte strings are of different lengths %d and %d", len(a), len(b))
}
if len(a) != 32 || len(b) != 32 {
return 0, fmt.Errorf("input nilsimsa hashes must be 32 bytes each, got %d and %d",
len(a), len(b))
}
// Count the differing bits
var c []byte
c, err = XOR(a, b)
for i := 0; i < len(a); i++ {
bitDiffCount += countBits(c[i])
}
return bitDiffCount, nil
}
// Count the number of `1` bits in a byte
func countBits(x byte) int {
count := 0
for x > 0 {
count += int(x & 1) // Add the last bit
x >>= 1 // Right shift the bits
}
return count
}
// XOR is a simple bitwise XOR on two nilsimsa hashes. This is used in the simple distance count
// in Evaluate but could also be used against multiple samples of text from the same author to
// create a metric of the variety of their text by XORing all of them together.
func XOR(a, b []byte) (c []byte, err error) {
// Check if the lengths of the strings are the same
if len(a) != len(b) {
return nil, fmt.Errorf("byte strings are of different lengths %d and %d", len(a), len(b))
}
if len(a) != 32 || len(b) != 32 {
return nil, fmt.Errorf("input nilsimsa hashes must be 32 bytes each, got %d and %d",
len(a), len(b))
}
c = make([]byte, 32)
for i := 0; i < len(a); i++ {
// XOR the bytes and count the number of `1` bits
c[i] = a[i] ^ b[i]
}
return
}