324 lines
12 KiB
Go
324 lines
12 KiB
Go
// Package based32 provides a simplified variant of the standard
|
|
// Bech32 human readable binary codec
|
|
//
|
|
// This codec simplifies the padding algorithm compared to the Bech32 standard
|
|
// BIP 0173 by performing all of the check validation with the decoded bits
|
|
// instead of separating the pads of each segment.
|
|
//
|
|
// The format will be entirely created by the use of the standard library
|
|
// base32, which may or may not result in the same thing (we are teaching Go
|
|
// here, not cryptocurrency, and the extra rules used by the Bech32 standard
|
|
// complicate this tutorial unnecessarily - and, Go Uber Alles :)
|
|
package based32
|
|
|
|
import (
|
|
"encoding/base32"
|
|
"github.com/quanterall/kitchensink/pkg/codec"
|
|
"github.com/quanterall/kitchensink/pkg/proto"
|
|
"lukechampine.com/blake3"
|
|
"strings"
|
|
)
|
|
|
|
// charset is the set of characters used in the data section of bech32 strings.
|
|
// Note that this is ordered, such that for a given charset[i], i is the binary
|
|
// value of the character.
|
|
const charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"
|
|
|
|
// Codec provides the encoder/decoder implementation created by makeCodec.
|
|
//
|
|
// This variable is sometimes called a "Singleton" in other languages, and in Go
|
|
// it is a thing that should be avoided unless the value is not a constant and
|
|
// an initialization process is required.
|
|
//
|
|
// Variable declarations like this are executed before init() functions and are
|
|
// for cases such as this, as the import of this package means the programmer
|
|
// intends to use this codec, usually, as otherwise they would be creating a new
|
|
// implementation from the struct type or for the interface.
|
|
//
|
|
// In general, an init() function is better avoided, and singletons also, better
|
|
// avoided, unless it makes sense in the context of the package as this is this
|
|
// initialization adds to startup delay for an application, so consider
|
|
// carefully before using these or init().
|
|
var Codec = makeCodec(
|
|
"Base32Check",
|
|
charset,
|
|
"QNTRL",
|
|
)
|
|
|
|
func getCheckLen(length int) (checkLen int) {
|
|
|
|
// In order to provide a minimum of 1 byte of check to the output, while
|
|
// avoiding the encoder adding padding characters (default is '=') the
|
|
// length of the encoded bytes must be rounded to the nearest multiple of 5,
|
|
// adding 5 if it is already a multiple of 5 (5 bytes is 40 bits which
|
|
// encodes as 8 base32 characters).
|
|
//
|
|
// The first byte of the encoded data contains the check length, as this
|
|
// formula varies depending on the length of the data, so it needs to be
|
|
// encoded into the format in the beginning as it can't go at the end. So
|
|
// the check length is one byte less than this formula indicates.
|
|
//
|
|
// This is a significant divergence from the methods used for these encoders
|
|
// because in this tutorial we are not only aiming to produce human readable
|
|
// transcription codes for just transaction hashes (usually 256bit/32 byte)
|
|
// and addresses (usually 160bit/20byte) but a general formula that could
|
|
// encode any binary data length, but presumably it would be likely no more
|
|
// than 512 bits of data for a double length hash, since such a code would
|
|
// take at least a couple of minutes to correctly transcribe.
|
|
//
|
|
// Though a Go programmer may never do a lot of this kind of algorithm
|
|
// design, it is here especially for those who are inclined towards this
|
|
// kind of low level encoding, which is part of any data encoding for wire,
|
|
// storage, for graphic and audio encoding formats, and things like writing
|
|
// GUIs.
|
|
//
|
|
// The following formula ensures that there is at least 1 check byte, up to
|
|
// 4
|
|
//
|
|
// we add two to the length before modulus, as there must be 1 byte for
|
|
// check length and 1 byte of check
|
|
lengthMod := (2 + length) % 5
|
|
|
|
// The modulus is subtracted from 5 to produce the complement required to
|
|
// make the correct number of bytes of total data, plus 1 to account for the
|
|
// minimum length of 1.
|
|
checkLen = 5 - lengthMod + 1
|
|
|
|
return checkLen
|
|
}
|
|
|
|
// getCutPoint is made into a function because it is needed more than once.
|
|
func getCutPoint(length, checkLen int) int {
|
|
|
|
return length - checkLen - 1
|
|
}
|
|
|
|
// makeCodec generates our custom codec as above, into the exported Codec
|
|
// variable
|
|
//
|
|
// Here we demonstrate the use of closures. In this case, it is an
|
|
// initialization, but it can also be used in dynamic generation code, or to use
|
|
// the 'builder' pattern to construct larger algorithms out of small modular
|
|
// parts.
|
|
func makeCodec(
|
|
name string,
|
|
cs string,
|
|
hrp string,
|
|
) (cdc *codec.Codec) {
|
|
|
|
// Create the codec.Codec struct and put its pointer in the return variable.
|
|
cdc = &codec.Codec{
|
|
Name: name,
|
|
Charset: cs,
|
|
HRP: hrp,
|
|
}
|
|
|
|
// We need to create the check creation functions first
|
|
cdc.MakeCheck = func(input []byte, checkLen int) (output []byte) {
|
|
|
|
// We use the Blake3 256 bit hash because it is nearly as fast as CRC32
|
|
// but less complicated to use due to the 32 bit integer conversions to
|
|
// bytes required to use the CRC32 algorithm.
|
|
checkArray := blake3.Sum256(input)
|
|
|
|
// This truncates the blake3 hash to the prescribed check length
|
|
return checkArray[:checkLen]
|
|
}
|
|
|
|
// Create a base32.Encoding from the provided charset.
|
|
enc := base32.NewEncoding(cdc.Charset)
|
|
|
|
cdc.Encoder = func(input []byte) (output string, err error) {
|
|
|
|
if len(input) < 1 {
|
|
|
|
// Unfortunately there is a minor bug in the Go protobuf/grpc
|
|
// generator that does not set the type of the errors to Error,
|
|
// which is an alias of int32. Thus here we have to cast it to int32
|
|
// to retrieve the map entry containing the error name.
|
|
//
|
|
// You can see the error in ../proto/based32.pb.go which is what is
|
|
// generated by protoc-gen-go.
|
|
err = proto.Error_ZERO_LENGTH
|
|
return
|
|
}
|
|
|
|
// The check length depends on the modulus of the length of the data is
|
|
// order to avoid padding.
|
|
checkLen := getCheckLen(len(input))
|
|
|
|
// The output is longer than the input, so we create a new buffer.
|
|
outputBytes := make([]byte, len(input)+checkLen+1)
|
|
|
|
// Add the check length byte to the front
|
|
outputBytes[0] = byte(checkLen)
|
|
|
|
// Then copy the input bytes for beginning segment.
|
|
copy(outputBytes[1:len(input)+1], input)
|
|
|
|
// Then copy the check to the end of the input.
|
|
copy(outputBytes[len(input)+1:], cdc.MakeCheck(input, checkLen))
|
|
|
|
// Create the encoding for the output.
|
|
outputString := enc.EncodeToString(outputBytes)
|
|
|
|
// We can omit the first character of the encoding because the length
|
|
// prefix never uses the first 5 bits of the first byte, and add it back
|
|
// for the decoder later.
|
|
trimmedString := outputString[1:]
|
|
|
|
// Prefix the output with the Human Readable Part and append the
|
|
// encoded string version of the provided bytes.
|
|
output = cdc.HRP + trimmedString
|
|
|
|
return
|
|
}
|
|
|
|
cdc.Check = func(input []byte) (err error) {
|
|
|
|
// We must do this check or the next statement will cause a bounds check
|
|
// panic. Note that zero length and nil slices are different, but have
|
|
// the same effect in this case, so both must be checked.
|
|
switch {
|
|
case len(input) < 1:
|
|
|
|
err = proto.Error_ZERO_LENGTH
|
|
return
|
|
|
|
case input == nil:
|
|
|
|
err = proto.Error_NIL_SLICE
|
|
return
|
|
}
|
|
|
|
// The check length is encoded into the first byte in order to ensure
|
|
// the data is cut correctly to perform the integrity check.
|
|
checkLen := int(input[0])
|
|
|
|
// Ensure there is at enough bytes in the input to run a check on
|
|
if len(input) < checkLen+1 {
|
|
|
|
err = proto.Error_CHECK_TOO_SHORT
|
|
return
|
|
}
|
|
|
|
// Find the index to cut the input to find the checksum value. We need
|
|
// this same value twice so it must be made into a variable.
|
|
cutPoint := getCutPoint(len(input), checkLen)
|
|
|
|
// Here is an example of a multiple assignment and more use of the
|
|
// slicing operator.
|
|
payload, checksum := input[1:cutPoint], string(input[cutPoint:])
|
|
|
|
// A checksum is checked in all cases by taking the data received, and
|
|
// applying the checksum generation function, and then comparing the
|
|
// checksum to the one attached to the received data with checksum
|
|
// present.
|
|
//
|
|
// Note: The casting to string above and here. This makes a copy to the
|
|
// immutable string, which is not optimal for large byte slices, but for
|
|
// this short check value, it is a cheap operation on the stack, and an
|
|
// illustration of the interchangeability of []byte and string, with the
|
|
// distinction of the availability of a comparison operator for the
|
|
// string that isn't present for []byte, so for such cases this
|
|
// conversion is a shortcut method to compare byte slices.
|
|
computedChecksum := string(cdc.MakeCheck(payload, checkLen))
|
|
|
|
// Here we assign to the return variable the result of the comparison.
|
|
// by doing this instead of using an if and returns, the meaning of the
|
|
// comparison is more clear by the use of the return value's name.
|
|
valid := checksum != computedChecksum
|
|
|
|
if !valid {
|
|
|
|
err = proto.Error_CHECK_FAILED
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
cdc.Decoder = func(input string) (output []byte, err error) {
|
|
|
|
// Other than for human identification, the HRP is also a validity
|
|
// check, so if the string prefix is wrong, the entire value is wrong
|
|
// and won't decode as it is expected.
|
|
if !strings.HasPrefix(input, cdc.HRP) {
|
|
|
|
log.Printf(
|
|
"Provided string has incorrect human readable part:"+
|
|
"found '%s' expected '%s'", input[:len(cdc.HRP)], cdc.HRP,
|
|
)
|
|
|
|
err = proto.Error_INCORRECT_HUMAN_READABLE_PART
|
|
return
|
|
}
|
|
|
|
// Cut the HRP off the beginning to get the content, add the initial
|
|
// zeroed 5 bits with a 'q' character.
|
|
//
|
|
// Be aware the input string will be copied to create the []byte
|
|
// version. Also, because the input bytes are always zero for the first
|
|
// 5 most significant bits, we must re-add the zero at the front (q)
|
|
// before feeding it to the decoder.
|
|
input = "q" + input[len(cdc.HRP):]
|
|
|
|
// The length of the base32 string refers to 5 bits per slice index
|
|
// position, so the correct size of the output bytes, which are 8 bytes
|
|
// per slice index position, is found with the following simple integer
|
|
// math calculation.
|
|
//
|
|
// This allocation needs to be made first as the base32 Decode function
|
|
// does not do this allocation automatically and it would be wasteful to
|
|
// not compute it precisely, when the calculation is so simple.
|
|
//
|
|
// If this allocation is omitted, the decoder will panic due to bounds
|
|
// check error. A nil slice is equivalent to a zero length slice and
|
|
// gives a bounds check error, but in fact, the slice has no data at
|
|
// all. Yes, the panic message is lies:
|
|
//
|
|
// panic: runtime error: index out of range [4] with length 0
|
|
//
|
|
// If this assignment isn't made, by default, output is nil, not
|
|
// []byte{} so this panic message is deceptive.
|
|
data := make([]byte, len(input)*5/8)
|
|
|
|
var writtenBytes int
|
|
writtenBytes, err = enc.Decode(data, []byte(input))
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
return
|
|
}
|
|
|
|
// The first byte signifies the length of the check at the end
|
|
checkLen := int(data[0])
|
|
if writtenBytes < checkLen+1 {
|
|
|
|
err = proto.Error_CHECK_TOO_SHORT
|
|
return
|
|
}
|
|
|
|
// Assigning the result of the check here as if true the resulting
|
|
// decoded bytes still need to be trimmed of the check value (keeping
|
|
// things cleanly separated between the check and decode function.
|
|
err = cdc.Check(data)
|
|
|
|
// There is no point in doing any more if the check fails, as per the
|
|
// contract specified in the interface definition codecer.Codecer
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Slice off the check length prefix, and the check bytes to return the
|
|
// valid input bytes.
|
|
output = data[1:getCutPoint(len(data)+1, checkLen)]
|
|
|
|
// If we got to here, the decode was successful.
|
|
return
|
|
}
|
|
|
|
// We return the value explicitly to be nice to readers as the function is
|
|
// not a short and simple one.
|
|
return cdc
|
|
}
|