Files
nilsimsa/nilsimsa_test.go

462 lines
13 KiB
Go

package nilsimsa
import (
"fmt"
"io"
"testing"
)
// tests the nilsimsa hash by choosing a random test file
// computes the nilsimsa Digest and compares to the true
// value stored in the pickled sid_to_nil dictionary
func TestNilsimsa(t *testing.T) {
x, _ := HexSum([]byte{})
if x != "0000000000000000000000000000000000000000000000000000000000000000" {
t.Fatal(x)
}
x, _ = HexSum([]byte("abcdefgh"))
if x != "14c8118000000000030800000004042004189020001308014088003280000078" {
t.Fatal(x)
}
// Doing it the long way using incremental update
d := New()
io.WriteString(d, "abcd")
io.WriteString(d, "efgh")
x = fmt.Sprintf("%x", d.Sum(nil))
if x != "14c8118000000000030800000004042004189020001308014088003280000078" {
t.Fatal(x)
}
io.WriteString(d, "ijk")
x = fmt.Sprintf("%x", d.Sum(nil))
if x != "14c811840010000c0328200108040630041890200217582d4098103280000078" {
t.Fatal(x)
}
digest1, _ := Sum([]byte("abcdefghijk"))
digest2, _ := Sum([]byte("abcdefgh"))
bitsDiff := BitsDiff(&digest1, &digest2)
if bitsDiff != 109 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
d.Reset()
io.WriteString(d, "abcdefghijk")
s1 := d.Sum(nil)
d.Reset()
io.WriteString(d, "abcdefgh")
s2 := d.Sum(nil)
bitsDiff = BitsDiffSlice(s1, s2)
if bitsDiff != 109 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
x1, _ := HexSum([]byte("abcdefghijk"))
x2, _ := HexSum([]byte("abcdefgh"))
bitsDiff = BitsDiffHex(x1, x2)
if bitsDiff != 109 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
x1, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
"(srcPath, dstPath)"))
x2, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
"(dstPath, srcPath)"))
if x1 != "8beb55d08d78fed441ede9301390b49b716a11af3962db70b24540338cb70035" {
t.Fatal(x1)
}
if x2 != "8a5355d09968f8d451efeb309919949b73e211af7952c970f245403b8cb7a035" {
t.Fatal(x2)
}
bitsDiff = BitsDiffHex(x1, x2)
if bitsDiff != 96 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
x1, _ = HexSum([]byte("return diff.XYZ"))
x2, _ = HexSum([]byte("return diff.NewSequenceMatcherFromFiles" +
"(dstPath, srcPath)"))
if x1 != "84125570884ae840f042ea400400009a721891002011a071225247f7a5241018" {
t.Fatal(x1)
}
if x2 != "8a5355d09968f8d451efeb309919949b73e211af7952c970f245403b8cb7a035" {
t.Fatal(x2)
}
bitsDiff = BitsDiffHex(x1, x2)
if bitsDiff != 35 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
digest1, _ = Sum([]byte("C.setTabChangeCallbackWrapper(h.ih())"))
digest2, _ = Sum([]byte("C.setTabChangeCallbackWrapper(ih)"))
bitsDiff = BitsDiff(&digest1, &digest2)
if bitsDiff != 40 {
t.Fatalf("bitsDiff(%d)", bitsDiff)
}
}
func TestNilsimsa2(t *testing.T) {
nilsimsaJavaimplementation := `
package com.weblyzard.lib.string.nilsimsa;
import java.util.*;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.ArrayUtils;
/**
* Computes the Nilsimsa hash for the given string.
* @author Albert Weichselbraun <albert.weichselbraun@htwchur.ch>
* <weichselbraun@weblyzard.com>
*
* This class is a translation of the Python implementation by Michael Itz
* to the Java language <http://code.google.com/p/py-nilsimsa>.
*
* Original C nilsimsa-0.2.4 implementation by cmeclax:
* <http://ixazon.dynip.com/~cmeclax/nilsimsa.html>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 3 dated June, 2007.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
* MA 02110-1301, USA.
*
*/
public class Nilsimsa {
private int count = 0; // num characters seen
private int[] acc = new int[256]; // accumulators for the Digest
private int[] lastch = new int[4]; // the last four seen characters
// pre-defined transformation arrays
private static final byte[] TRAN = Nilsimsa._getByteArray(
"02D69E6FF91D04ABD022161FD873A1AC" +
"3B7062961E6E8F399D05144AA6BEAE0E" +
"CFB99C9AC76813E12DA4EB518D646B50" +
"23800341ECBB71CC7A867F98F2365EEE" +
"8ECE4FB832B65F59DC1B314C7BF06301" +
"6CBA07E81277493CDA46FE2F791C9B30" +
"E300067E2E0F383321ADA554CAA729FC" +
"5A47697DC595B5F40B90A3816D255535" +
"F575740A26BF195C1AC6FF995D84AA66" +
"3EAF78B32043C1ED24EAE63F18F3A042" +
"57085360C3C0834082D709BD442A67A8" +
"93E0C2569FD9DD8515B48A27289276DE" +
"EFF8B2B7C93D45944B110D65D5348B91" +
"0CFA87E97C5BB14DE5D4CB10A21789BC" +
"DBB0E2978852F748D3612C3A2BD18CFB" +
"F1CDE46AE7A9FDC437C8D2F6DF58724E");
// pre-defined array for the computation of the bitwise difference
// between two nilsimsa strings.
private static final byte[] POPC = Nilsimsa._getByteArray(
"00010102010202030102020302030304" +
"01020203020303040203030403040405" +
"01020203020303040203030403040405" +
"02030304030404050304040504050506" +
"01020203020303040203030403040405" +
"02030304030404050304040504050506" +
"02030304030404050304040504050506" +
"03040405040505060405050605060607" +
"01020203020303040203030403040405" +
"02030304030404050304040504050506" +
"02030304030404050304040504050506" +
"03040405040505060405050605060607" +
"02030304030404050304040504050506" +
"03040405040505060405050605060607" +
"03040405040505060405050605060607" +
"04050506050606070506060706070708");
public Nilsimsa() {
reset();
}
/**
* Updates the Nilsimsa Digest using the given String
* @param s: the String data to consider in the update
*/
public void update(String s) {
for (int ch: s.toCharArray()) {
count ++;
// incr accumulators for triplets
if (lastch[1] > -1) {
acc[ _tran3(ch, lastch[0], lastch[1], 0) ] ++;
}
if (lastch[2] > -1) {
acc[ _tran3(ch, lastch[0], lastch[2], 1)] ++;
acc[ _tran3(ch, lastch[1], lastch[2], 2)] ++;
}
if (lastch[3] > -1) {
acc[ _tran3(ch, lastch[0], lastch[3], 3)] ++;
acc[ _tran3(ch, lastch[1], lastch[3], 4)] ++;
acc[ _tran3(ch, lastch[2], lastch[3], 5)] ++;
acc[ _tran3(lastch[3], lastch[0], ch, 6)] ++;
acc[ _tran3(lastch[3], lastch[2], ch, 7)] ++;
}
// adjust lastch
for(int i=3; i>0; i-- ) {
lastch[i]=lastch[i-1];
}
lastch[0] = ch;
}
}
/**
* resets the Hash computation
*/
private void reset() {
count = 0;
Arrays.fill(acc, (byte) 0);
Arrays.fill(lastch, -1);
}
/*
* Converts the given hexString to a byte array.
* @param hexString: the hexString to convert
* @return the corresponding byte array
*/
private static byte[] _getByteArray( String hexString ) {
try {
return Hex.decodeHex( hexString.toCharArray());
} catch (DecoderException e) {
e.printStackTrace();
return null;
}
}
/**
* Accumulator for a transition n between the chars a, b, c
*/
private int _tran3(int a, int b, int c, int n) {
int i = (c)^TRAN[n];
return (((TRAN[(a+n)&255]^TRAN[b&0xff]*(n+n+1))+TRAN[i&0xff])&255);
}
/**
* @return the Digest for the current Nilsimsa object.
*/
public byte[] Digest() {
int total = 0;
int threshold;
byte[] Digest = new byte[32];
Arrays.fill(Digest, (byte)0);
if (count == 3) {
total = 1;
} else if (count == 4) {
total = 4;
} else if (count > 4) {
total = 8 * count - 28;
}
threshold = total / 256;
for (int i=0; i<256; i++) {
if (acc[i] > threshold) {
Digest[ i>>3 ] += 1 << (i&7);
}
}
ArrayUtils.reverse( Digest );
return Digest;
}
/**
* @return a String representation of the current state of
* the Nilsimsa object.
*/
public String hexdigest() {
return Hex.encodeHexString( Digest() );
}
/**
* Compute the Nilsimsa Digest for the given String.
* @param s: the String to hash
* @return the Nilsimsa Digest.
*/
public byte[] Digest(String s) {
reset();
update(s);
return Digest();
}
/**
* Compute the Nilsimsa hexDigest for the given String.
* @param s: the String to hash
* @return the Nilsimsa hexdigest.
*/
public String hexdigest(String s) {
return Hex.encodeHexString( Digest(s) );
}
/**
* Compares a Nilsimsa object to the current one and
* return the number of bits that differ.
* @param cmp: the comparison object
* @return the number of bits the strings differ.
*/
public int compare(Nilsimsa cmp) {
byte bits = 0;
int j;
byte[] n1 = Digest();
byte[] n2 = cmp.Digest();
for (int i=0; i<32; i++) {
j = 255 & n1[i] ^ n2[i];
bits += POPC[ j ];
}
return 128 - bits;
}
}
`
x, _ := HexSum([]byte(nilsimsaJavaimplementation))
if x != "4c900d44043f014c40f40040d8201000f246227123b28864013040008240204a" {
t.Fatal(x)
}
}
func TestNilsimsa3(t *testing.T) {
list := [...]string{
"a",
"ab",
"abc",
"abcd",
"abcde",
"abcdef",
"abcdefg",
"abcdefgh",
"abcdefghi",
"abcdefghij",
"abcdefghijk",
"abcdefghijkl",
"abcdefghijklm",
"abcdefghijklmn",
"abcdefghijklmno",
"abcdefghijklmnop",
"abcdefghijklmnopq",
"abcdefghijklmnopqr",
"abcdefghijklmnopqrs",
"abcdefghijklmnopqrst",
"abcdefghijklmnopqrstu",
"abcdefghijklmnopqrstuv",
"abcdefghijklmnopqrstuvw",
"abcdefghijklmnopqrstuvwx",
"abcdefghijklmnopqrstuvwxy",
"abcdefghijklmnopqrstuvwxyz",
}
results := [...]string{
"0000000000000000000000000000000000000000000000000000000000000000",
"0000000000000000000000000000000000000000000000000000000000000000",
"0040000000000000000000000000000000000000000000000000000000000000",
"0440000000000000000000000000000000100000000000000008000000000000",
"0440008000000000000000000000000000100020001200000008001200000050",
"04c0018000000000000000000000000004188020001200000088001280000058",
"04c8118000000000030000000000002004188020001208004088001280000078",
"14c8118000000000030800000004042004189020001308014088003280000078",
"14c8118400000000030800010804043004189020021318094098003280000078",
"14c81184000000000308200108040430041890200217580d4098103280000078",
"14c811840010000c0328200108040630041890200217582d4098103280000078",
"14c811840010000ca328200108044630041890200a17586d4298103280000078",
"14ca11850010000ca328200188044630041898200a17586dc2d8103284000078",
"14ca11850030004ca3a8200188044630041898200a17586dc2d8107284000078",
"14ca11850032004ca3a8284188044730041898200a17586dc2d8107384000078",
"94ca11850432005ca3a828418804473004199c200a17586dc2d8107384004178",
"94ca11850433005ca3a82841880447341419be200a17586dc2d8107384004178",
"94ca11850433005ca3a82841a88457341419be201a17586dc6d8107384084178",
"94ca11850533005ca3b82841a88657361419be201a17586dc6d8107384084178",
"94ca11850533005ca3b82841aa8657371419be201a17587dc6d81077840c4178",
"94ca15850533005ca3b92841aa8657371419be201a17587dd6d81077844cc178",
"94ca15850533005ca3b92849aa8657371419be201a17587fd6d81077844cc978",
"94ca15850533045cabb92869aa8657371419bea01a17587fd6f81077c44cc978",
"94ca95850533045cabb93869aa8657371499beb01a17587fd6f8107fc44cc978",
"94ca95850733045cabb93869aa8657373499beb01a17587fd6f9107fc54cc978",
"94ca95850773045cabb93869ba8657373499beb81a17587fd6f9107fc54cc978",
}
compareResults := [...]byte{
128,
127,
125,
120,
120,
120,
120,
120,
123,
122,
122,
121,
124,
123,
121,
123,
122,
124,
123,
123,
125,
122,
123,
124,
125,
}
step3CompareResults := [...]byte{
116,
104,
109,
111,
111,
113,
114,
116,
}
if len(list) != len(results) {
panic("len(list) != len(results)")
}
for i, x := range list {
hex, _ := HexSum([]byte(x))
if hex != results[i] {
t.Fatal(hex)
}
}
if len(list) != len(compareResults)+1 {
panic("len(list) != len(compareResults) + 1")
}
last, _ := Sum([]byte(list[0]))
for i, x := range list[1:] {
sum, _ := Sum([]byte(x))
bits := BitsDiff(&sum, &last)
if bits != compareResults[i] {
t.Fatalf("%x", bits)
}
last = sum
}
j := 0
last, _ = Sum([]byte(list[0]))
for i := 4; i < len(list); i += 3 {
sum, _ := Sum([]byte(list[i]))
bits := BitsDiff(&sum, &last)
if bits != step3CompareResults[j] {
t.Fatalf("%x", bits)
}
last = sum
j += 1
}
}