4 Commits

Author SHA1 Message Date
6bd773cdf0 Refactor Evaluate to simplify error checks and bit counting
Removed redundant length checks and streamlined error handling for XOR operations. Simplified the loop to count differing bits by iterating directly over the XOR result. This improves code readability and reduces duplication.
2025-06-16 15:01:08 +01:00
7480e9232c Refactor Evaluate and add XOR helper function.
Refactored the Evaluate function to utilize a new XOR helper for improved readability and modularity. The XOR function handles the bitwise operation, streamlining the bit difference calculation and supporting potential reuse in related computations.
2025-06-16 14:56:21 +01:00
8aec19e945 fix tests function calls that now have an error value 2025-06-16 14:23:29 +01:00
4f16b0b2fd Fix typo in comment and add Evaluate function for hash comparison
Corrected a typo in a comment within `nilsimsa.go`. Introduced a new `Evaluate` function in `evaluate.go` to compute the bitwise difference between two Nilsimsa hash values, ensuring input validation and proper bit counting logic.
2025-06-16 14:13:17 +01:00
3 changed files with 65 additions and 19 deletions

46
evaluate.go Normal file
View File

@@ -0,0 +1,46 @@
package nilsimsa
import (
"fmt"
)
func Evaluate(a, b []byte) (bitDiffCount int, err error) {
var c []byte
if c, err = XOR(a, b); err != nil {
return
}
for _, v := range c {
bitDiffCount += countBits(v)
}
return bitDiffCount, nil
}
// Count the number of `1` bits in a byte
func countBits(x byte) int {
count := 0
for x > 0 {
count += int(x & 1) // Add the last bit
x >>= 1 // Right shift the bits
}
return count
}
// XOR is a simple bitwise XOR on two nilsimsa hashes. This is used in the simple distance count
// in Evaluate but could also be used against multiple samples of text from the same author to
// create a metric of the variety of their text by XORing all of them together.
func XOR(a, b []byte) (c []byte, err error) {
// Check if the lengths of the strings are the same
if len(a) != len(b) {
return nil, fmt.Errorf("byte strings are of different lengths %d and %d", len(a), len(b))
}
if len(a) != 32 || len(b) != 32 {
return nil, fmt.Errorf("input nilsimsa hashes must be 32 bytes each, got %d and %d",
len(a), len(b))
}
c = make([]byte, 32)
for i := 0; i < len(a); i++ {
// XOR the bytes and count the number of `1` bits
c[i] = a[i] ^ b[i]
}
return
}

View File

@@ -127,7 +127,7 @@ type Digest struct {
// New create a new Nilsimsa hash diget
func New() hash.Hash {
d := new(Digest)
// Note that no memory is allocate other than the struct itself. It is better to embedd
// Note that no memory is allocate other than the struct itself. It is better to embed
// last4Array into the struct itself since it's maximum size is know already
// d.last4 = d.last4Array[:0] //creating the slice by re-slicing last4Array
return d

View File

@@ -10,12 +10,12 @@ import (
// computes the nilsimsa Digest and compares to the true
// value stored in the pickled sid_to_nil dictionary
func TestNilsimsa(t *testing.T) {
x := HexSum([]byte{})
x, _ := HexSum([]byte{})
if x != "0000000000000000000000000000000000000000000000000000000000000000" {
t.Fatal(x)
}
x = HexSum([]byte("abcdefgh"))
x, _ = HexSum([]byte("abcdefgh"))
if x != "14c8118000000000030800000004042004189020001308014088003280000078" {
t.Fatal(x)
}
@@ -35,8 +35,8 @@ func TestNilsimsa(t *testing.T) {
t.Fatal(x)
}
digest1 := Sum([]byte("abcdefghijk"))
digest2 := Sum([]byte("abcdefgh"))
digest1, _ := Sum([]byte("abcdefghijk"))
digest2, _ := Sum([]byte("abcdefgh"))
bitsDiff := BitsDiff(&digest1, &digest2)
if bitsDiff != 109 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
@@ -53,16 +53,16 @@ func TestNilsimsa(t *testing.T) {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
x1 := HexSum([]byte("abcdefghijk"))
x2 := HexSum([]byte("abcdefgh"))
x1, _ := HexSum([]byte("abcdefghijk"))
x2, _ := HexSum([]byte("abcdefgh"))
bitsDiff = BitsDiffHex(x1, x2)
if bitsDiff != 109 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
x1 = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
x1, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
"(srcPath, dstPath)"))
x2 = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
x2, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
"(dstPath, srcPath)"))
if x1 != "8beb55d08d78fed441ede9301390b49b716a11af3962db70b24540338cb70035" {
t.Fatal(x1)
@@ -75,8 +75,8 @@ func TestNilsimsa(t *testing.T) {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
x1 = HexSum([]byte("return diff.XYZ"))
x2 = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
x1, _ = HexSum([]byte("return diff.XYZ"))
x2, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
"(dstPath, srcPath)"))
if x1 != "84125570884ae840f042ea400400009a721891002011a071225247f7a5241018" {
t.Fatal(x1)
@@ -89,8 +89,8 @@ func TestNilsimsa(t *testing.T) {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
digest1 = Sum([]byte("C.setTabChangeCallbackWrapper(h.ih())"))
digest2 = Sum([]byte("C.setTabChangeCallbackWrapper(ih)"))
digest1, _ = Sum([]byte("C.setTabChangeCallbackWrapper(h.ih())"))
digest2, _ = Sum([]byte("C.setTabChangeCallbackWrapper(ih)"))
bitsDiff = BitsDiff(&digest1, &digest2)
if bitsDiff != 40 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
@@ -320,7 +320,7 @@ public class Nilsimsa {
}
}
`
x := HexSum([]byte(nilsimsaJavaimplementation))
x, _ := HexSum([]byte(nilsimsaJavaimplementation))
if x != "4c900d44043f014c40f40040d8201000f246227123b28864013040008240204a" {
t.Fatal(x)
}
@@ -428,7 +428,7 @@ func TestNilsimsa3(t *testing.T) {
panic("len(list) != len(results)")
}
for i, x := range list {
hex := HexSum([]byte(x))
hex, _ := HexSum([]byte(x))
if hex != results[i] {
t.Fatal(hex)
}
@@ -437,9 +437,9 @@ func TestNilsimsa3(t *testing.T) {
if len(list) != len(compareResults)+1 {
panic("len(list) != len(compareResults) + 1")
}
last := Sum([]byte(list[0]))
last, _ := Sum([]byte(list[0]))
for i, x := range list[1:] {
sum := Sum([]byte(x))
sum, _ := Sum([]byte(x))
bits := BitsDiff(&sum, &last)
if bits != compareResults[i] {
t.Fatalf("%x", bits)
@@ -448,9 +448,9 @@ func TestNilsimsa3(t *testing.T) {
}
j := 0
last = Sum([]byte(list[0]))
last, _ = Sum([]byte(list[0]))
for i := 4; i < len(list); i += 3 {
sum := Sum([]byte(list[i]))
sum, _ := Sum([]byte(list[i]))
bits := BitsDiff(&sum, &last)
if bits != step3CompareResults[j] {
t.Fatalf("%x", bits)