Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions codec/dagcbor/marshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,125 @@ func marshalMap(n datamodel.Node, tk *tok.Token, sink shared.TokenSink, options
_, err := sink.Step(tk)
return err
}

// EncodedLength will calculate the length in bytes that the encoded form of the
// provided Node will occupy.
//
// Note that this function requires a full walk of the Node's graph, which may
// not necessarily be a trivial cost and will incur some allocations. Using this
// method to calculate buffers to pre-allocate may not result in performance
// gains, but rather incur an overall cost. Use with care.
func EncodedLength(n datamodel.Node) (int64, error) {
switch n.Kind() {
case datamodel.Kind_Invalid:
return 0, fmt.Errorf("cannot traverse a node that is absent")
case datamodel.Kind_Null:
return 1, nil // 0xf6
case datamodel.Kind_Map:
length := uintLength(uint64(n.Length())) // length prefixed major 5
for itr := n.MapIterator(); !itr.Done(); {
k, v, err := itr.Next()
if err != nil {
return 0, err
}
keyLength, err := EncodedLength(k)
if err != nil {
return 0, err
}
length += keyLength
valueLength, err := EncodedLength(v)
if err != nil {
return 0, err
}
length += valueLength
}
return length, nil
case datamodel.Kind_List:
nl := n.Length()
length := uintLength(uint64(nl)) // length prefixed major 4
for i := int64(0); i < nl; i++ {
v, err := n.LookupByIndex(i)
if err != nil {
return 0, err
}
innerLength, err := EncodedLength(v)
if err != nil {
return 0, err
}
length += innerLength
}
return length, nil
case datamodel.Kind_Bool:
return 1, nil // 0xf4 or 0xf5
case datamodel.Kind_Int:
v, err := n.AsInt()
if err != nil {
return 0, err
}
if v < 0 {
v = -v - 1 // negint is stored as one less than actual
}
return uintLength(uint64(v)), nil // major 0 or 1, as small as possible
case datamodel.Kind_Float:
return 9, nil // always major 7 and 64-bit float
case datamodel.Kind_String:
v, err := n.AsString()
if err != nil {
return 0, err
}

return uintLength(uint64(len(v))) + int64(len(v)), nil // length prefixed major 3
case datamodel.Kind_Bytes:
v, err := n.AsBytes()
if err != nil {
return 0, err
}
return uintLength(uint64(len(v))) + int64(len(v)), nil // length prefixed major 2
case datamodel.Kind_Link:
v, err := n.AsLink()
if err != nil {
return 0, err
}
switch lnk := v.(type) {
case cidlink.Link:
length := int64(2) // tag,42: 0xd82a
bl := int64(len(lnk.Bytes())) + 1 // additional 0x00 in front of the CID bytes
length += uintLength(uint64(bl)) + bl // length prefixed major 2
return length, err
default:
return 0, fmt.Errorf("schemafree link emission only supported by this codec for CID type links")
}
default:
panic("unreachable")
}
}

// Calculate how many bytes an integer, and therefore also the leading bytes of
// a length-prefixed token. CBOR will pack it up into the smallest possible
// uint representation, even merging it with the major if it's <=23.

type boundaryLength struct {
upperBound uint64
length int64
}

var lengthBoundaries = []boundaryLength{
{24, 1}, // packed major|minor
{256, 2}, // major, 8-bit length
{65536, 3}, // major, 16-bit length
{4294967296, 5}, // major, 32-bit length
{0, 9}, // major, 64-bit length
}

func uintLength(ii uint64) int64 {
for _, lb := range lengthBoundaries {
if ii < lb.upperBound {
return lb.length
}
}
// maximum number of bytes to pack this int
// if this int is used as a length prefix for a map, list, string or bytes
// then we likely have a very bad Node that shouldn't be encoded, but the
// encoder may raise problems with that if the memory allocator doesn't first.
return lengthBoundaries[len(lengthBoundaries)-1].length
}
70 changes: 70 additions & 0 deletions codec/dagcbor/marshal_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package dagcbor

import (
"bytes"
"math/rand"
"testing"
"time"

qt "github.com/frankban/quicktest"
"github.com/ipld/go-ipld-prime/datamodel"
basicnode "github.com/ipld/go-ipld-prime/node/basic"
"github.com/ipld/go-ipld-prime/testutil/garbage"
)

func calculateActualLength(t *testing.T, n datamodel.Node) int64 {
var buf bytes.Buffer
err := Encode(n, &buf)
qt.Assert(t, err, qt.IsNil)
return int64(buf.Len())
}

func verifyEstimatedSize(t *testing.T, n datamodel.Node) {
estimatedLength, err := EncodedLength(n)
qt.Assert(t, err, qt.IsNil)
actualLength := calculateActualLength(t, n)
qt.Assert(t, estimatedLength, qt.Equals, actualLength)
}

func TestEncodedLength(t *testing.T) {
t.Run("int boundaries", func(t *testing.T) {
for ii := 0; ii < 4; ii++ {
verifyEstimatedSize(t, basicnode.NewInt(int64(lengthBoundaries[ii].upperBound)))
verifyEstimatedSize(t, basicnode.NewInt(int64(lengthBoundaries[ii].upperBound)-1))
verifyEstimatedSize(t, basicnode.NewInt(int64(lengthBoundaries[ii].upperBound)+1))
verifyEstimatedSize(t, basicnode.NewInt(-1*int64(lengthBoundaries[ii].upperBound)))
verifyEstimatedSize(t, basicnode.NewInt(-1*int64(lengthBoundaries[ii].upperBound)-1))
verifyEstimatedSize(t, basicnode.NewInt(-1*int64(lengthBoundaries[ii].upperBound)+1))
}
})

t.Run("small garbage", func(t *testing.T) {
seed := time.Now().Unix()
t.Logf("randomness seed: %v\n", seed)
rnd := rand.New(rand.NewSource(seed))
for i := 0; i < 1000; i++ {
gbg := garbage.Generate(rnd, garbage.TargetBlockSize(1<<6))
verifyEstimatedSize(t, gbg)
}
})

t.Run("medium garbage", func(t *testing.T) {
seed := time.Now().Unix()
t.Logf("randomness seed: %v\n", seed)
rnd := rand.New(rand.NewSource(seed))
for i := 0; i < 100; i++ {
gbg := garbage.Generate(rnd, garbage.TargetBlockSize(1<<16))
verifyEstimatedSize(t, gbg)
}
})

t.Run("large garbage", func(t *testing.T) {
seed := time.Now().Unix()
t.Logf("randomness seed: %v\n", seed)
rnd := rand.New(rand.NewSource(seed))
for i := 0; i < 10; i++ {
gbg := garbage.Generate(rnd, garbage.TargetBlockSize(1<<20))
verifyEstimatedSize(t, gbg)
}
})
}
5 changes: 5 additions & 0 deletions codec/dagcbor/roundtrip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ func TestRoundtrip(t *testing.T) {
qt.Assert(t, err, qt.IsNil)
qt.Check(t, buf.String(), qt.Equals, serial)
})
t.Run("length", func(t *testing.T) {
length, err := EncodedLength(n)
qt.Assert(t, err, qt.IsNil)
qt.Check(t, length, qt.Equals, int64(len(serial)))
})
t.Run("decoding", func(t *testing.T) {
buf := strings.NewReader(serial)
nb := basicnode.Prototype.Map.NewBuilder()
Expand Down