From fd30fee094b6aa3b09f7d4c1286ff5c841caff2d Mon Sep 17 00:00:00 2001 From: lukechampine Date: Fri, 10 Jan 2020 12:43:02 -0500 Subject: [PATCH] implement Seek --- blake3.go | 55 ++++++++++++++++++++++------ blake3_test.go | 98 ++++++++++++++++++++++++++++++++++---------------- 2 files changed, 113 insertions(+), 40 deletions(-) diff --git a/blake3.go b/blake3.go index d25d9d6..3a5e34d 100644 --- a/blake3.go +++ b/blake3.go @@ -6,7 +6,9 @@ package blake3 import ( "encoding/binary" + "errors" "hash" + "io" ) const ( @@ -124,33 +126,66 @@ func wordsToBytes(words []uint32, bytes []byte) { } } -// An OutputReader produces an unbounded stream of output from its initial -// state. +// An OutputReader produces an seekable stream of output. Up to 2^64 - 1 bytes +// can be safely read from the stream. type OutputReader struct { - n node - block [blockLen]byte - blockUsed int + n node + block [blockLen]byte + unread int } -// Read implements io.Reader. Read always return len(p), nil. +// Read implements io.Reader. It always return len(p), nil. func (or *OutputReader) Read(p []byte) (int, error) { lenp := len(p) for len(p) > 0 { - if or.blockUsed == 0 { + if or.unread == 0 { words := or.n.compress() wordsToBytes(words[:], or.block[:]) - or.blockUsed = blockLen + or.unread = blockLen or.n.counter++ } // copy from output buffer - n := copy(p, or.block[blockLen-or.blockUsed:]) - or.blockUsed -= n + n := copy(p, or.block[blockLen-or.unread:]) + or.unread -= n p = p[n:] } return lenp, nil } +// Seek implements io.Seeker. SeekEnd is defined as 2^64 - 1 bytes, the maximum +// safe output of a BLAKE3 stream. +func (or *OutputReader) Seek(offset int64, whence int) (int64, error) { + off := int64(or.n.counter*blockLen) + int64(blockLen-or.unread) + switch whence { + case io.SeekStart: + off = offset + case io.SeekCurrent: + off += offset + case io.SeekEnd: + // BLAKE3 can safely output up to 2^64 - 1 bytes. Seeking to the "end" + // of this stream is kind of strange, but perhaps could be useful for + // testing overflow scenarios. + off = int64(^uint64(0) - uint64(offset)) + default: + panic("invalid whence") + } + if off < 0 { + return 0, errors.New("seek position cannot be negative") + } + or.n.counter = uint64(off) / blockLen + or.unread = blockLen - (int(off) % blockLen) + + // If the new offset is not a block boundary, generate the block we are + // "inside." + if or.unread != 0 { + words := or.n.compress() + wordsToBytes(words[:], or.block[:]) + } + + return off, nil +} + type chunkState struct { n node block [blockLen]byte diff --git a/blake3_test.go b/blake3_test.go index c727502..269f2e0 100644 --- a/blake3_test.go +++ b/blake3_test.go @@ -13,43 +13,51 @@ import ( func toHex(data []byte) string { return hex.EncodeToString(data) } -func TestVectors(t *testing.T) { +var testVectors = func() (vecs struct { + Key string + Cases []struct { + InputLen int `json:"input_len"` + Hash string `json:"hash"` + KeyedHash string `json:"keyed_hash"` + DeriveKey string `json:"derive_key"` + } +}) { data, err := ioutil.ReadFile("testdata/vectors.json") if err != nil { - t.Fatal(err) + panic(err) } - var vectors struct { - Key string - Cases []struct { - InputLen int `json:"input_len"` - Hash string `json:"hash"` - KeyedHash string `json:"keyed_hash"` - DeriveKey string `json:"derive_key"` - } - } - if err := json.Unmarshal(data, &vectors); err != nil { - t.Fatal(err) + if err := json.Unmarshal(data, &vecs); err != nil { + panic(err) } + return +}() +var testInput = func() []byte { input := make([]byte, 1<<15) for i := range input { input[i] = byte(i % 251) } + return input +}() + +func TestVectors(t *testing.T) { + for _, vec := range testVectors.Cases { + in := testInput[:vec.InputLen] - for _, vec := range vectors.Cases { - in := input[:vec.InputLen] // regular h := blake3.New(len(vec.Hash)/2, nil) h.Write(in) if out := toHex(h.Sum(nil)); out != vec.Hash { t.Errorf("output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.Hash[:10], out[:10]) } + // keyed - h = blake3.New(len(vec.KeyedHash)/2, []byte(vectors.Key)) + h = blake3.New(len(vec.KeyedHash)/2, []byte(testVectors.Key)) h.Write(in) if out := toHex(h.Sum(nil)); out != vec.KeyedHash { t.Errorf("output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.KeyedHash[:10], out[:10]) } + // derive key const ctx = "BLAKE3 2019-12-27 16:29:52 test vectors context" subKey := make([]byte, len(vec.DeriveKey)/2) @@ -57,37 +65,67 @@ func TestVectors(t *testing.T) { if out := toHex(subKey); out != vec.DeriveKey { t.Errorf("output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.DeriveKey[:10], subKey[:10]) } - // XOF should produce identical results, even when outputting 7 bytes at a time - h = blake3.New(len(vec.Hash)/2, nil) + } +} + +func TestXOF(t *testing.T) { + for _, vec := range testVectors.Cases { + in := testInput[:vec.InputLen] + + // XOF should produce same output as Sum, even when outputting 7 bytes at a time + h := blake3.New(len(vec.Hash)/2, nil) h.Write(in) var xofBuf bytes.Buffer io.CopyBuffer(&xofBuf, io.LimitReader(h.XOF(), int64(len(vec.Hash)/2)), make([]byte, 7)) if out := toHex(xofBuf.Bytes()); out != vec.Hash { t.Errorf("XOF output did not match test vector:\n\texpected: %v...\n\t got: %v...", vec.Hash[:10], out[:10]) } + + // Should be able to Seek around in the output stream without affecting correctness + seeks := []struct { + offset int64 + whence int + }{ + {0, io.SeekStart}, + {17, io.SeekCurrent}, + {-5, io.SeekCurrent}, + {int64(h.Size()), io.SeekStart}, + {int64(h.Size()), io.SeekCurrent}, + } + xof := h.XOF() + outR := bytes.NewReader(xofBuf.Bytes()) + for _, s := range seeks { + outRead := make([]byte, 10) + xofRead := make([]byte, 10) + offset, _ := outR.Seek(s.offset, s.whence) + n, _ := outR.Read(outRead) + xof.Seek(s.offset, s.whence) + xof.Read(xofRead[:n]) + if !bytes.Equal(outRead[:n], xofRead[:n]) { + t.Errorf("XOF output did not match test vector at offset %v:\n\texpected: %x...\n\t got: %x...", offset, outRead[:10], xofRead[:10]) + } + + } } } +type nopReader struct{} + +func (nopReader) Read(p []byte) (int, error) { return len(p), nil } + func BenchmarkWrite(b *testing.B) { - h := blake3.New(32, nil) - buf := make([]byte, 1<<15) - b.SetBytes(int64(len(buf))) - for i := 0; i < b.N; i++ { - h.Write(buf) - } + b.SetBytes(1) + io.CopyN(blake3.New(0, nil), nopReader{}, int64(b.N)) } -func BenchmarkChunk(b *testing.B) { - h := blake3.New(32, nil) +func BenchmarkSum256(b *testing.B) { buf := make([]byte, 1024) - out := make([]byte, 0, 32) for i := 0; i < b.N; i++ { - h.Write(buf) - h.Sum(out) + blake3.Sum256(buf) } } func BenchmarkXOF(b *testing.B) { b.SetBytes(1) - io.CopyN(ioutil.Discard, blake3.New(32, nil).XOF(), int64(b.N)) + io.CopyN(ioutil.Discard, blake3.New(0, nil).XOF(), int64(b.N)) }