implement Seek

This commit is contained in:
lukechampine 2020-01-10 12:43:02 -05:00
parent 3d75f823bd
commit fd30fee094
2 changed files with 113 additions and 40 deletions

View File

@ -6,7 +6,9 @@ package blake3
import ( import (
"encoding/binary" "encoding/binary"
"errors"
"hash" "hash"
"io"
) )
const ( const (
@ -124,33 +126,66 @@ func wordsToBytes(words []uint32, bytes []byte) {
} }
} }
// An OutputReader produces an unbounded stream of output from its initial // An OutputReader produces an seekable stream of output. Up to 2^64 - 1 bytes
// state. // can be safely read from the stream.
type OutputReader struct { type OutputReader struct {
n node n node
block [blockLen]byte block [blockLen]byte
blockUsed int unread int
} }
// Read implements io.Reader. Read always return len(p), nil. // Read implements io.Reader. It always return len(p), nil.
func (or *OutputReader) Read(p []byte) (int, error) { func (or *OutputReader) Read(p []byte) (int, error) {
lenp := len(p) lenp := len(p)
for len(p) > 0 { for len(p) > 0 {
if or.blockUsed == 0 { if or.unread == 0 {
words := or.n.compress() words := or.n.compress()
wordsToBytes(words[:], or.block[:]) wordsToBytes(words[:], or.block[:])
or.blockUsed = blockLen or.unread = blockLen
or.n.counter++ or.n.counter++
} }
// copy from output buffer // copy from output buffer
n := copy(p, or.block[blockLen-or.blockUsed:]) n := copy(p, or.block[blockLen-or.unread:])
or.blockUsed -= n or.unread -= n
p = p[n:] p = p[n:]
} }
return lenp, nil return lenp, nil
} }
// Seek implements io.Seeker. SeekEnd is defined as 2^64 - 1 bytes, the maximum
// safe output of a BLAKE3 stream.
func (or *OutputReader) Seek(offset int64, whence int) (int64, error) {
off := int64(or.n.counter*blockLen) + int64(blockLen-or.unread)
switch whence {
case io.SeekStart:
off = offset
case io.SeekCurrent:
off += offset
case io.SeekEnd:
// BLAKE3 can safely output up to 2^64 - 1 bytes. Seeking to the "end"
// of this stream is kind of strange, but perhaps could be useful for
// testing overflow scenarios.
off = int64(^uint64(0) - uint64(offset))
default:
panic("invalid whence")
}
if off < 0 {
return 0, errors.New("seek position cannot be negative")
}
or.n.counter = uint64(off) / blockLen
or.unread = blockLen - (int(off) % blockLen)
// If the new offset is not a block boundary, generate the block we are
// "inside."
if or.unread != 0 {
words := or.n.compress()
wordsToBytes(words[:], or.block[:])
}
return off, nil
}
type chunkState struct { type chunkState struct {
n node n node
block [blockLen]byte block [blockLen]byte

View File

@ -13,43 +13,51 @@ import (
func toHex(data []byte) string { return hex.EncodeToString(data) } func toHex(data []byte) string { return hex.EncodeToString(data) }
func TestVectors(t *testing.T) { var testVectors = func() (vecs struct {
Key string
Cases []struct {
InputLen int `json:"input_len"`
Hash string `json:"hash"`
KeyedHash string `json:"keyed_hash"`
DeriveKey string `json:"derive_key"`
}
}) {
data, err := ioutil.ReadFile("testdata/vectors.json") data, err := ioutil.ReadFile("testdata/vectors.json")
if err != nil { if err != nil {
t.Fatal(err) panic(err)
} }
var vectors struct { if err := json.Unmarshal(data, &vecs); err != nil {
Key string panic(err)
Cases []struct {
InputLen int `json:"input_len"`
Hash string `json:"hash"`
KeyedHash string `json:"keyed_hash"`
DeriveKey string `json:"derive_key"`
}
}
if err := json.Unmarshal(data, &vectors); err != nil {
t.Fatal(err)
} }
return
}()
var testInput = func() []byte {
input := make([]byte, 1<<15) input := make([]byte, 1<<15)
for i := range input { for i := range input {
input[i] = byte(i % 251) input[i] = byte(i % 251)
} }
return input
}()
func TestVectors(t *testing.T) {
for _, vec := range testVectors.Cases {
in := testInput[:vec.InputLen]
for _, vec := range vectors.Cases {
in := input[:vec.InputLen]
// regular // regular
h := blake3.New(len(vec.Hash)/2, nil) h := blake3.New(len(vec.Hash)/2, nil)
h.Write(in) h.Write(in)
if out := toHex(h.Sum(nil)); out != vec.Hash { if out := toHex(h.Sum(nil)); out != vec.Hash {
t.Errorf("output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.Hash[:10], out[:10]) t.Errorf("output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.Hash[:10], out[:10])
} }
// keyed // keyed
h = blake3.New(len(vec.KeyedHash)/2, []byte(vectors.Key)) h = blake3.New(len(vec.KeyedHash)/2, []byte(testVectors.Key))
h.Write(in) h.Write(in)
if out := toHex(h.Sum(nil)); out != vec.KeyedHash { if out := toHex(h.Sum(nil)); out != vec.KeyedHash {
t.Errorf("output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.KeyedHash[:10], out[:10]) t.Errorf("output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.KeyedHash[:10], out[:10])
} }
// derive key // derive key
const ctx = "BLAKE3 2019-12-27 16:29:52 test vectors context" const ctx = "BLAKE3 2019-12-27 16:29:52 test vectors context"
subKey := make([]byte, len(vec.DeriveKey)/2) subKey := make([]byte, len(vec.DeriveKey)/2)
@ -57,37 +65,67 @@ func TestVectors(t *testing.T) {
if out := toHex(subKey); out != vec.DeriveKey { if out := toHex(subKey); out != vec.DeriveKey {
t.Errorf("output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.DeriveKey[:10], subKey[:10]) t.Errorf("output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.DeriveKey[:10], subKey[:10])
} }
// XOF should produce identical results, even when outputting 7 bytes at a time }
h = blake3.New(len(vec.Hash)/2, nil) }
func TestXOF(t *testing.T) {
for _, vec := range testVectors.Cases {
in := testInput[:vec.InputLen]
// XOF should produce same output as Sum, even when outputting 7 bytes at a time
h := blake3.New(len(vec.Hash)/2, nil)
h.Write(in) h.Write(in)
var xofBuf bytes.Buffer var xofBuf bytes.Buffer
io.CopyBuffer(&xofBuf, io.LimitReader(h.XOF(), int64(len(vec.Hash)/2)), make([]byte, 7)) io.CopyBuffer(&xofBuf, io.LimitReader(h.XOF(), int64(len(vec.Hash)/2)), make([]byte, 7))
if out := toHex(xofBuf.Bytes()); out != vec.Hash { if out := toHex(xofBuf.Bytes()); out != vec.Hash {
t.Errorf("XOF output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.Hash[:10], out[:10]) t.Errorf("XOF output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.Hash[:10], out[:10])
} }
// Should be able to Seek around in the output stream without affecting correctness
seeks := []struct {
offset int64
whence int
}{
{0, io.SeekStart},
{17, io.SeekCurrent},
{-5, io.SeekCurrent},
{int64(h.Size()), io.SeekStart},
{int64(h.Size()), io.SeekCurrent},
}
xof := h.XOF()
outR := bytes.NewReader(xofBuf.Bytes())
for _, s := range seeks {
outRead := make([]byte, 10)
xofRead := make([]byte, 10)
offset, _ := outR.Seek(s.offset, s.whence)
n, _ := outR.Read(outRead)
xof.Seek(s.offset, s.whence)
xof.Read(xofRead[:n])
if !bytes.Equal(outRead[:n], xofRead[:n]) {
t.Errorf("XOF output did not match test vector at offset %v:\n\texpected: %x...\n\t got: %x...", offset, outRead[:10], xofRead[:10])
}
}
} }
} }
type nopReader struct{}
func (nopReader) Read(p []byte) (int, error) { return len(p), nil }
func BenchmarkWrite(b *testing.B) { func BenchmarkWrite(b *testing.B) {
h := blake3.New(32, nil) b.SetBytes(1)
buf := make([]byte, 1<<15) io.CopyN(blake3.New(0, nil), nopReader{}, int64(b.N))
b.SetBytes(int64(len(buf)))
for i := 0; i < b.N; i++ {
h.Write(buf)
}
} }
func BenchmarkChunk(b *testing.B) { func BenchmarkSum256(b *testing.B) {
h := blake3.New(32, nil)
buf := make([]byte, 1024) buf := make([]byte, 1024)
out := make([]byte, 0, 32)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
h.Write(buf) blake3.Sum256(buf)
h.Sum(out)
} }
} }
func BenchmarkXOF(b *testing.B) { func BenchmarkXOF(b *testing.B) {
b.SetBytes(1) b.SetBytes(1)
io.CopyN(ioutil.Discard, blake3.New(32, nil).XOF(), int64(b.N)) io.CopyN(ioutil.Discard, blake3.New(0, nil).XOF(), int64(b.N))
} }