3 Commits

Author SHA1 Message Date
6bd773cdf0 Refactor Evaluate to simplify error checks and bit counting
Removed redundant length checks and streamlined error handling for XOR operations. Simplified the loop to count differing bits by iterating directly over the XOR result. This improves code readability and reduces duplication.
2025-06-16 15:01:08 +01:00
7480e9232c Refactor Evaluate and add XOR helper function.
Refactored the Evaluate function to utilize a new XOR helper for improved readability and modularity. The XOR function handles the bitwise operation, streamlining the bit difference calculation and supporting potential reuse in related computations.
2025-06-16 14:56:21 +01:00
8aec19e945 fix tests function calls that now have an error value 2025-06-16 14:23:29 +01:00
2 changed files with 44 additions and 33 deletions

View File

@@ -4,22 +4,13 @@ import (
"fmt"
)
func Evaluate(a, b []byte) (int, error) {
// Check if the lengths of the strings are the same
if len(a) != len(b) {
return 0, fmt.Errorf("byte strings are of different lengths %d and %d", len(a), len(b))
func Evaluate(a, b []byte) (bitDiffCount int, err error) {
var c []byte
if c, err = XOR(a, b); err != nil {
return
}
if len(a) != 32 || len(b) != 32 {
return 0, fmt.Errorf("input nilsimsa hashes must be 32 bytes each, got %d and %d",
len(a), len(b))
}
// Count the differing bits
bitDiffCount := 0
for i := 0; i < len(a); i++ {
// XOR the bytes and count the number of `1` bits
xor := a[i] ^ b[i]
bitDiffCount += countBits(xor)
for _, v := range c {
bitDiffCount += countBits(v)
}
return bitDiffCount, nil
}
@@ -33,3 +24,23 @@ func countBits(x byte) int {
}
return count
}
// XOR is a simple bitwise XOR on two nilsimsa hashes. This is used in the simple distance count
// in Evaluate but could also be used against multiple samples of text from the same author to
// create a metric of the variety of their text by XORing all of them together.
func XOR(a, b []byte) (c []byte, err error) {
// Check if the lengths of the strings are the same
if len(a) != len(b) {
return nil, fmt.Errorf("byte strings are of different lengths %d and %d", len(a), len(b))
}
if len(a) != 32 || len(b) != 32 {
return nil, fmt.Errorf("input nilsimsa hashes must be 32 bytes each, got %d and %d",
len(a), len(b))
}
c = make([]byte, 32)
for i := 0; i < len(a); i++ {
// XOR the bytes and count the number of `1` bits
c[i] = a[i] ^ b[i]
}
return
}

View File

@@ -10,12 +10,12 @@ import (
// computes the nilsimsa Digest and compares to the true
// value stored in the pickled sid_to_nil dictionary
func TestNilsimsa(t *testing.T) {
x := HexSum([]byte{})
x, _ := HexSum([]byte{})
if x != "0000000000000000000000000000000000000000000000000000000000000000" {
t.Fatal(x)
}
x = HexSum([]byte("abcdefgh"))
x, _ = HexSum([]byte("abcdefgh"))
if x != "14c8118000000000030800000004042004189020001308014088003280000078" {
t.Fatal(x)
}
@@ -35,8 +35,8 @@ func TestNilsimsa(t *testing.T) {
t.Fatal(x)
}
digest1 := Sum([]byte("abcdefghijk"))
digest2 := Sum([]byte("abcdefgh"))
digest1, _ := Sum([]byte("abcdefghijk"))
digest2, _ := Sum([]byte("abcdefgh"))
bitsDiff := BitsDiff(&digest1, &digest2)
if bitsDiff != 109 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
@@ -53,16 +53,16 @@ func TestNilsimsa(t *testing.T) {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
x1 := HexSum([]byte("abcdefghijk"))
x2 := HexSum([]byte("abcdefgh"))
x1, _ := HexSum([]byte("abcdefghijk"))
x2, _ := HexSum([]byte("abcdefgh"))
bitsDiff = BitsDiffHex(x1, x2)
if bitsDiff != 109 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
x1 = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
x1, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
"(srcPath, dstPath)"))
x2 = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
x2, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
"(dstPath, srcPath)"))
if x1 != "8beb55d08d78fed441ede9301390b49b716a11af3962db70b24540338cb70035" {
t.Fatal(x1)
@@ -75,8 +75,8 @@ func TestNilsimsa(t *testing.T) {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
x1 = HexSum([]byte("return diff.XYZ"))
x2 = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
x1, _ = HexSum([]byte("return diff.XYZ"))
x2, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
"(dstPath, srcPath)"))
if x1 != "84125570884ae840f042ea400400009a721891002011a071225247f7a5241018" {
t.Fatal(x1)
@@ -89,8 +89,8 @@ func TestNilsimsa(t *testing.T) {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
digest1 = Sum([]byte("C.setTabChangeCallbackWrapper(h.ih())"))
digest2 = Sum([]byte("C.setTabChangeCallbackWrapper(ih)"))
digest1, _ = Sum([]byte("C.setTabChangeCallbackWrapper(h.ih())"))
digest2, _ = Sum([]byte("C.setTabChangeCallbackWrapper(ih)"))
bitsDiff = BitsDiff(&digest1, &digest2)
if bitsDiff != 40 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
@@ -320,7 +320,7 @@ public class Nilsimsa {
}
}
`
x := HexSum([]byte(nilsimsaJavaimplementation))
x, _ := HexSum([]byte(nilsimsaJavaimplementation))
if x != "4c900d44043f014c40f40040d8201000f246227123b28864013040008240204a" {
t.Fatal(x)
}
@@ -428,7 +428,7 @@ func TestNilsimsa3(t *testing.T) {
panic("len(list) != len(results)")
}
for i, x := range list {
hex := HexSum([]byte(x))
hex, _ := HexSum([]byte(x))
if hex != results[i] {
t.Fatal(hex)
}
@@ -437,9 +437,9 @@ func TestNilsimsa3(t *testing.T) {
if len(list) != len(compareResults)+1 {
panic("len(list) != len(compareResults) + 1")
}
last := Sum([]byte(list[0]))
last, _ := Sum([]byte(list[0]))
for i, x := range list[1:] {
sum := Sum([]byte(x))
sum, _ := Sum([]byte(x))
bits := BitsDiff(&sum, &last)
if bits != compareResults[i] {
t.Fatalf("%x", bits)
@@ -448,9 +448,9 @@ func TestNilsimsa3(t *testing.T) {
}
j := 0
last = Sum([]byte(list[0]))
last, _ = Sum([]byte(list[0]))
for i := 4; i < len(list); i += 3 {
sum := Sum([]byte(list[i]))
sum, _ := Sum([]byte(list[i]))
bits := BitsDiff(&sum, &last)
if bits != step3CompareResults[j] {
t.Fatalf("%x", bits)