Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
6bd773cdf0
|
|||
|
7480e9232c
|
|||
|
8aec19e945
|
41
evaluate.go
41
evaluate.go
@@ -4,22 +4,13 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Evaluate(a, b []byte) (int, error) {
|
func Evaluate(a, b []byte) (bitDiffCount int, err error) {
|
||||||
// Check if the lengths of the strings are the same
|
var c []byte
|
||||||
if len(a) != len(b) {
|
if c, err = XOR(a, b); err != nil {
|
||||||
return 0, fmt.Errorf("byte strings are of different lengths %d and %d", len(a), len(b))
|
return
|
||||||
}
|
}
|
||||||
if len(a) != 32 || len(b) != 32 {
|
for _, v := range c {
|
||||||
return 0, fmt.Errorf("input nilsimsa hashes must be 32 bytes each, got %d and %d",
|
bitDiffCount += countBits(v)
|
||||||
len(a), len(b))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Count the differing bits
|
|
||||||
bitDiffCount := 0
|
|
||||||
for i := 0; i < len(a); i++ {
|
|
||||||
// XOR the bytes and count the number of `1` bits
|
|
||||||
xor := a[i] ^ b[i]
|
|
||||||
bitDiffCount += countBits(xor)
|
|
||||||
}
|
}
|
||||||
return bitDiffCount, nil
|
return bitDiffCount, nil
|
||||||
}
|
}
|
||||||
@@ -33,3 +24,23 @@ func countBits(x byte) int {
|
|||||||
}
|
}
|
||||||
return count
|
return count
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// XOR is a simple bitwise XOR on two nilsimsa hashes. This is used in the simple distance count
|
||||||
|
// in Evaluate but could also be used against multiple samples of text from the same author to
|
||||||
|
// create a metric of the variety of their text by XORing all of them together.
|
||||||
|
func XOR(a, b []byte) (c []byte, err error) {
|
||||||
|
// Check if the lengths of the strings are the same
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return nil, fmt.Errorf("byte strings are of different lengths %d and %d", len(a), len(b))
|
||||||
|
}
|
||||||
|
if len(a) != 32 || len(b) != 32 {
|
||||||
|
return nil, fmt.Errorf("input nilsimsa hashes must be 32 bytes each, got %d and %d",
|
||||||
|
len(a), len(b))
|
||||||
|
}
|
||||||
|
c = make([]byte, 32)
|
||||||
|
for i := 0; i < len(a); i++ {
|
||||||
|
// XOR the bytes and count the number of `1` bits
|
||||||
|
c[i] = a[i] ^ b[i]
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
@@ -10,12 +10,12 @@ import (
|
|||||||
// computes the nilsimsa Digest and compares to the true
|
// computes the nilsimsa Digest and compares to the true
|
||||||
// value stored in the pickled sid_to_nil dictionary
|
// value stored in the pickled sid_to_nil dictionary
|
||||||
func TestNilsimsa(t *testing.T) {
|
func TestNilsimsa(t *testing.T) {
|
||||||
x := HexSum([]byte{})
|
x, _ := HexSum([]byte{})
|
||||||
if x != "0000000000000000000000000000000000000000000000000000000000000000" {
|
if x != "0000000000000000000000000000000000000000000000000000000000000000" {
|
||||||
t.Fatal(x)
|
t.Fatal(x)
|
||||||
}
|
}
|
||||||
|
|
||||||
x = HexSum([]byte("abcdefgh"))
|
x, _ = HexSum([]byte("abcdefgh"))
|
||||||
if x != "14c8118000000000030800000004042004189020001308014088003280000078" {
|
if x != "14c8118000000000030800000004042004189020001308014088003280000078" {
|
||||||
t.Fatal(x)
|
t.Fatal(x)
|
||||||
}
|
}
|
||||||
@@ -35,8 +35,8 @@ func TestNilsimsa(t *testing.T) {
|
|||||||
t.Fatal(x)
|
t.Fatal(x)
|
||||||
}
|
}
|
||||||
|
|
||||||
digest1 := Sum([]byte("abcdefghijk"))
|
digest1, _ := Sum([]byte("abcdefghijk"))
|
||||||
digest2 := Sum([]byte("abcdefgh"))
|
digest2, _ := Sum([]byte("abcdefgh"))
|
||||||
bitsDiff := BitsDiff(&digest1, &digest2)
|
bitsDiff := BitsDiff(&digest1, &digest2)
|
||||||
if bitsDiff != 109 {
|
if bitsDiff != 109 {
|
||||||
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
||||||
@@ -53,16 +53,16 @@ func TestNilsimsa(t *testing.T) {
|
|||||||
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
||||||
}
|
}
|
||||||
|
|
||||||
x1 := HexSum([]byte("abcdefghijk"))
|
x1, _ := HexSum([]byte("abcdefghijk"))
|
||||||
x2 := HexSum([]byte("abcdefgh"))
|
x2, _ := HexSum([]byte("abcdefgh"))
|
||||||
bitsDiff = BitsDiffHex(x1, x2)
|
bitsDiff = BitsDiffHex(x1, x2)
|
||||||
if bitsDiff != 109 {
|
if bitsDiff != 109 {
|
||||||
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
||||||
}
|
}
|
||||||
|
|
||||||
x1 = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
|
x1, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
|
||||||
"(srcPath, dstPath)"))
|
"(srcPath, dstPath)"))
|
||||||
x2 = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
|
x2, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
|
||||||
"(dstPath, srcPath)"))
|
"(dstPath, srcPath)"))
|
||||||
if x1 != "8beb55d08d78fed441ede9301390b49b716a11af3962db70b24540338cb70035" {
|
if x1 != "8beb55d08d78fed441ede9301390b49b716a11af3962db70b24540338cb70035" {
|
||||||
t.Fatal(x1)
|
t.Fatal(x1)
|
||||||
@@ -75,8 +75,8 @@ func TestNilsimsa(t *testing.T) {
|
|||||||
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
||||||
}
|
}
|
||||||
|
|
||||||
x1 = HexSum([]byte("return diff.XYZ"))
|
x1, _ = HexSum([]byte("return diff.XYZ"))
|
||||||
x2 = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
|
x2, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
|
||||||
"(dstPath, srcPath)"))
|
"(dstPath, srcPath)"))
|
||||||
if x1 != "84125570884ae840f042ea400400009a721891002011a071225247f7a5241018" {
|
if x1 != "84125570884ae840f042ea400400009a721891002011a071225247f7a5241018" {
|
||||||
t.Fatal(x1)
|
t.Fatal(x1)
|
||||||
@@ -89,8 +89,8 @@ func TestNilsimsa(t *testing.T) {
|
|||||||
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
||||||
}
|
}
|
||||||
|
|
||||||
digest1 = Sum([]byte("C.setTabChangeCallbackWrapper(h.ih())"))
|
digest1, _ = Sum([]byte("C.setTabChangeCallbackWrapper(h.ih())"))
|
||||||
digest2 = Sum([]byte("C.setTabChangeCallbackWrapper(ih)"))
|
digest2, _ = Sum([]byte("C.setTabChangeCallbackWrapper(ih)"))
|
||||||
bitsDiff = BitsDiff(&digest1, &digest2)
|
bitsDiff = BitsDiff(&digest1, &digest2)
|
||||||
if bitsDiff != 40 {
|
if bitsDiff != 40 {
|
||||||
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
t.Fatalf("bitsDiff(%d)", bitsDiff)
|
||||||
@@ -320,7 +320,7 @@ public class Nilsimsa {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
`
|
`
|
||||||
x := HexSum([]byte(nilsimsaJavaimplementation))
|
x, _ := HexSum([]byte(nilsimsaJavaimplementation))
|
||||||
if x != "4c900d44043f014c40f40040d8201000f246227123b28864013040008240204a" {
|
if x != "4c900d44043f014c40f40040d8201000f246227123b28864013040008240204a" {
|
||||||
t.Fatal(x)
|
t.Fatal(x)
|
||||||
}
|
}
|
||||||
@@ -428,7 +428,7 @@ func TestNilsimsa3(t *testing.T) {
|
|||||||
panic("len(list) != len(results)")
|
panic("len(list) != len(results)")
|
||||||
}
|
}
|
||||||
for i, x := range list {
|
for i, x := range list {
|
||||||
hex := HexSum([]byte(x))
|
hex, _ := HexSum([]byte(x))
|
||||||
if hex != results[i] {
|
if hex != results[i] {
|
||||||
t.Fatal(hex)
|
t.Fatal(hex)
|
||||||
}
|
}
|
||||||
@@ -437,9 +437,9 @@ func TestNilsimsa3(t *testing.T) {
|
|||||||
if len(list) != len(compareResults)+1 {
|
if len(list) != len(compareResults)+1 {
|
||||||
panic("len(list) != len(compareResults) + 1")
|
panic("len(list) != len(compareResults) + 1")
|
||||||
}
|
}
|
||||||
last := Sum([]byte(list[0]))
|
last, _ := Sum([]byte(list[0]))
|
||||||
for i, x := range list[1:] {
|
for i, x := range list[1:] {
|
||||||
sum := Sum([]byte(x))
|
sum, _ := Sum([]byte(x))
|
||||||
bits := BitsDiff(&sum, &last)
|
bits := BitsDiff(&sum, &last)
|
||||||
if bits != compareResults[i] {
|
if bits != compareResults[i] {
|
||||||
t.Fatalf("%x", bits)
|
t.Fatalf("%x", bits)
|
||||||
@@ -448,9 +448,9 @@ func TestNilsimsa3(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
j := 0
|
j := 0
|
||||||
last = Sum([]byte(list[0]))
|
last, _ = Sum([]byte(list[0]))
|
||||||
for i := 4; i < len(list); i += 3 {
|
for i := 4; i < len(list); i += 3 {
|
||||||
sum := Sum([]byte(list[i]))
|
sum, _ := Sum([]byte(list[i]))
|
||||||
bits := BitsDiff(&sum, &last)
|
bits := BitsDiff(&sum, &last)
|
||||||
if bits != step3CompareResults[j] {
|
if bits != step3CompareResults[j] {
|
||||||
t.Fatalf("%x", bits)
|
t.Fatalf("%x", bits)
|
||||||
|
|||||||
Reference in New Issue
Block a user