Files
wazero/internal/bitpack/offset_array.go
2023-05-15 18:41:52 -07:00

225 lines
5.4 KiB
Go

package bitpack
import (
"math"
)
// OffsetArray is an interface representing read-only views of arrays of 64 bits
// offsets.
type OffsetArray interface {
// Returns the value at index i.
//
// The method complexity may be anywhere between O(1) and O(N).
Index(i int) uint64
// Returns the number of offsets in the array.
//
// The method complexity must be O(1).
Len() int
}
// OffsetArrayLen is a helper function to access the length of an offset array.
// It is similar to calling Len on the array but handles the special case where
// the array is nil, in which case it returns zero.
func OffsetArrayLen(array OffsetArray) int {
if array != nil {
return array.Len()
}
return 0
}
// NewOffsetArray constructs a new array of offsets from the slice of values
// passed as argument. The slice is not retained, the returned array always
// holds a copy of the values.
//
// The underlying implementation of the offset array applies a compression
// mechanism derived from Frame-of-Reference and Delta Encoding to minimize
// the memory footprint of the array. This compression model works best when
// the input is made of ordered values, otherwise the deltas between values
// are likely to be too large to benefit from delta encoding.
//
// See https://lemire.me/blog/2012/02/08/effective-compression-using-frame-of-reference-and-delta-coding/
func NewOffsetArray(values []uint64) OffsetArray {
if len(values) == 0 {
return emptyOffsetArray{}
}
if len(values) <= smallOffsetArrayCapacity {
return newSmallOffsetArray(values)
}
maxDelta := uint64(0)
lastValue := values[0]
// TODO: the pre-processing we perform here can be optimized using SIMD
// instructions.
for _, value := range values[1:] {
if delta := value - lastValue; delta > maxDelta {
maxDelta = delta
}
lastValue = value
}
switch {
case maxDelta > math.MaxUint32:
return newOffsetArray(values)
case maxDelta > math.MaxUint16:
return newDeltaArray[uint32](values)
case maxDelta > math.MaxUint8:
return newDeltaArray[uint16](values)
case maxDelta > 15:
return newDeltaArray[uint8](values)
default:
return newDeltaArrayUint4(values)
}
}
type offsetArray struct {
values []uint64
}
func newOffsetArray(values []uint64) *offsetArray {
a := &offsetArray{
values: make([]uint64, len(values)),
}
copy(a.values, values)
return a
}
func (a *offsetArray) Index(i int) uint64 {
return a.values[i]
}
func (a *offsetArray) Len() int {
return len(a.values)
}
type emptyOffsetArray struct{}
func (emptyOffsetArray) Index(int) uint64 {
panic("index out of bounds")
}
func (emptyOffsetArray) Len() int {
return 0
}
const smallOffsetArrayCapacity = 7
type smallOffsetArray struct {
length int
values [smallOffsetArrayCapacity]uint64
}
func newSmallOffsetArray(values []uint64) *smallOffsetArray {
a := &smallOffsetArray{length: len(values)}
copy(a.values[:], values)
return a
}
func (a *smallOffsetArray) Index(i int) uint64 {
if i < 0 || i >= a.length {
panic("index out of bounds")
}
return a.values[i]
}
func (a *smallOffsetArray) Len() int {
return a.length
}
type uintType interface {
uint8 | uint16 | uint32 | uint64
}
type deltaArray[T uintType] struct {
deltas []T
firstValue uint64
}
func newDeltaArray[T uintType](values []uint64) *deltaArray[T] {
a := &deltaArray[T]{
deltas: make([]T, len(values)-1),
firstValue: values[0],
}
lastValue := values[0]
for i, value := range values[1:] {
a.deltas[i] = T(value - lastValue)
lastValue = value
}
return a
}
func (a *deltaArray[T]) Index(i int) uint64 {
if i < 0 || i >= a.Len() {
panic("index out of bounds")
}
value := a.firstValue
// TODO: computing the prefix sum can be vectorized;
// see https://en.algorithmica.org/hpc/algorithms/prefix/
for _, delta := range a.deltas[:i] {
value += uint64(delta)
}
return value
}
func (a *deltaArray[T]) Len() int {
return len(a.deltas) + 1
}
// deltaArrayUint4 is a specialization of deltaArray which packs 4 bits integers
// to hold deltas between 0 and 15; based on the analysis of compiling Python,
// it appeared that most source offset deltas were under 16, so using this
// data structure cuts by 50% the memory needed compared to deltaArray[uint8].
//
// Here is the distribution of source offset deltas for Python 3.13:
//
// - <=15 : 10240
// - <=255 : 9565
// - <=65535 : 1163
//
// Memory profiles showed that using deltaArrayUint4 (compared to deltaArray[T])
// dropped the memory footprint of source mappings for Python from 6MB to 4.5MB.
type deltaArrayUint4 struct {
deltas []byte
numValues int
firstValue uint64
}
func newDeltaArrayUint4(values []uint64) *deltaArrayUint4 {
a := &deltaArrayUint4{
deltas: make([]byte, len(values)/2+1),
numValues: len(values),
firstValue: values[0],
}
lastValue := values[0]
for i, value := range values[1:] {
a.assign(i, value-lastValue)
lastValue = value
}
return a
}
func (a *deltaArrayUint4) assign(i int, v uint64) {
index, shift := uint(i)>>1, 4*(uint(i)&1)
a.deltas[index] &= ^(0xF << shift)
a.deltas[index] |= byte(v) << shift
}
func (a *deltaArrayUint4) index(i int) uint64 {
index, shift := uint(i)>>1, 4*(uint(i)&1)
return uint64((a.deltas[index] >> shift) & 0xF)
}
func (a *deltaArrayUint4) Index(i int) uint64 {
if i < 0 || i >= a.Len() {
panic("index out of bounds")
}
value := a.firstValue
for j := 0; j < i; j++ {
value += a.index(j)
}
return value
}
func (a *deltaArrayUint4) Len() int {
return a.numValues
}