From c649c5ac94df3129a0652d183283d85d3f1134ae Mon Sep 17 00:00:00 2001 From: renthraysk Date: Sun, 12 Jan 2020 14:47:47 +0000 Subject: [PATCH] Eliminate the need for permute() by generating code for each round --- blake3.go | 197 +++++++++++++++++++++++++++++++++++++++++------------ gen/gen.go | 49 +++++++++++++ 2 files changed, 202 insertions(+), 44 deletions(-) create mode 100644 gen/gen.go diff --git a/blake3.go b/blake3.go index f79c10c..90f6969 100644 --- a/blake3.go +++ b/blake3.go @@ -60,37 +60,6 @@ func gy(state *[16]uint32, a, b, c, d int, my uint32) { state[b] = bits.RotateLeft32(state[b]^state[c], -7) } -func round(state *[16]uint32, m *[16]uint32) { - // Mix the columns. - gx(state, 0, 4, 8, 12, m[0]) - gy(state, 0, 4, 8, 12, m[1]) - gx(state, 1, 5, 9, 13, m[2]) - gy(state, 1, 5, 9, 13, m[3]) - gx(state, 2, 6, 10, 14, m[4]) - gy(state, 2, 6, 10, 14, m[5]) - gx(state, 3, 7, 11, 15, m[6]) - gy(state, 3, 7, 11, 15, m[7]) - - // Mix the diagonals. - gx(state, 0, 5, 10, 15, m[8]) - gy(state, 0, 5, 10, 15, m[9]) - gx(state, 1, 6, 11, 12, m[10]) - gy(state, 1, 6, 11, 12, m[11]) - gx(state, 2, 7, 8, 13, m[12]) - gy(state, 2, 7, 8, 13, m[13]) - gx(state, 3, 4, 9, 14, m[14]) - gy(state, 3, 4, 9, 14, m[15]) -} - -func permute(m *[16]uint32) { - *m = [16]uint32{ - m[2], m[6], m[3], m[10], - m[7], m[0], m[4], m[13], - m[1], m[11], m[12], m[5], - m[9], m[14], m[15], m[8], - } -} - // A node represents a chunk or parent in the BLAKE3 Merkle tree. In BLAKE3 // terminology, the elements of the bottom layer (aka "leaves") of the tree are // called chunk nodes, and the elements of upper layers (aka "interior nodes") @@ -122,19 +91,159 @@ func (n node) compress() [16]uint32 { uint32(n.counter), uint32(n.counter >> 32), n.blockLen, n.flags, } - round(&state, &n.block) // round 1 - permute(&n.block) - round(&state, &n.block) // round 2 - permute(&n.block) - round(&state, &n.block) // round 3 - permute(&n.block) - round(&state, &n.block) // round 4 - permute(&n.block) - round(&state, &n.block) // round 5 - permute(&n.block) - round(&state, &n.block) // round 6 - permute(&n.block) - round(&state, &n.block) // round 7 + // round1 + + // Mix the columns. + gx(&state, 0, 4, 8, 12, n.block[0]) + gy(&state, 0, 4, 8, 12, n.block[1]) + gx(&state, 1, 5, 9, 13, n.block[2]) + gy(&state, 1, 5, 9, 13, n.block[3]) + gx(&state, 2, 6, 10, 14, n.block[4]) + gy(&state, 2, 6, 10, 14, n.block[5]) + gx(&state, 3, 7, 11, 15, n.block[6]) + gy(&state, 3, 7, 11, 15, n.block[7]) + + // Mix the diagonals. + gx(&state, 0, 5, 10, 15, n.block[8]) + gy(&state, 0, 5, 10, 15, n.block[9]) + gx(&state, 1, 6, 11, 12, n.block[10]) + gy(&state, 1, 6, 11, 12, n.block[11]) + gx(&state, 2, 7, 8, 13, n.block[12]) + gy(&state, 2, 7, 8, 13, n.block[13]) + gx(&state, 3, 4, 9, 14, n.block[14]) + gy(&state, 3, 4, 9, 14, n.block[15]) + + // round2 + + // Mix the columns. + gx(&state, 0, 4, 8, 12, n.block[2]) + gy(&state, 0, 4, 8, 12, n.block[6]) + gx(&state, 1, 5, 9, 13, n.block[3]) + gy(&state, 1, 5, 9, 13, n.block[10]) + gx(&state, 2, 6, 10, 14, n.block[7]) + gy(&state, 2, 6, 10, 14, n.block[0]) + gx(&state, 3, 7, 11, 15, n.block[4]) + gy(&state, 3, 7, 11, 15, n.block[13]) + + // Mix the diagonals. + gx(&state, 0, 5, 10, 15, n.block[1]) + gy(&state, 0, 5, 10, 15, n.block[11]) + gx(&state, 1, 6, 11, 12, n.block[12]) + gy(&state, 1, 6, 11, 12, n.block[5]) + gx(&state, 2, 7, 8, 13, n.block[9]) + gy(&state, 2, 7, 8, 13, n.block[14]) + gx(&state, 3, 4, 9, 14, n.block[15]) + gy(&state, 3, 4, 9, 14, n.block[8]) + + // round3 + + // Mix the columns. + gx(&state, 0, 4, 8, 12, n.block[3]) + gy(&state, 0, 4, 8, 12, n.block[4]) + gx(&state, 1, 5, 9, 13, n.block[10]) + gy(&state, 1, 5, 9, 13, n.block[12]) + gx(&state, 2, 6, 10, 14, n.block[13]) + gy(&state, 2, 6, 10, 14, n.block[2]) + gx(&state, 3, 7, 11, 15, n.block[7]) + gy(&state, 3, 7, 11, 15, n.block[14]) + + // Mix the diagonals. + gx(&state, 0, 5, 10, 15, n.block[6]) + gy(&state, 0, 5, 10, 15, n.block[5]) + gx(&state, 1, 6, 11, 12, n.block[9]) + gy(&state, 1, 6, 11, 12, n.block[0]) + gx(&state, 2, 7, 8, 13, n.block[11]) + gy(&state, 2, 7, 8, 13, n.block[15]) + gx(&state, 3, 4, 9, 14, n.block[8]) + gy(&state, 3, 4, 9, 14, n.block[1]) + + // round4 + + // Mix the columns. + gx(&state, 0, 4, 8, 12, n.block[10]) + gy(&state, 0, 4, 8, 12, n.block[7]) + gx(&state, 1, 5, 9, 13, n.block[12]) + gy(&state, 1, 5, 9, 13, n.block[9]) + gx(&state, 2, 6, 10, 14, n.block[14]) + gy(&state, 2, 6, 10, 14, n.block[3]) + gx(&state, 3, 7, 11, 15, n.block[13]) + gy(&state, 3, 7, 11, 15, n.block[15]) + + // Mix the diagonals. + gx(&state, 0, 5, 10, 15, n.block[4]) + gy(&state, 0, 5, 10, 15, n.block[0]) + gx(&state, 1, 6, 11, 12, n.block[11]) + gy(&state, 1, 6, 11, 12, n.block[2]) + gx(&state, 2, 7, 8, 13, n.block[5]) + gy(&state, 2, 7, 8, 13, n.block[8]) + gx(&state, 3, 4, 9, 14, n.block[1]) + gy(&state, 3, 4, 9, 14, n.block[6]) + + // round5 + + // Mix the columns. + gx(&state, 0, 4, 8, 12, n.block[12]) + gy(&state, 0, 4, 8, 12, n.block[13]) + gx(&state, 1, 5, 9, 13, n.block[9]) + gy(&state, 1, 5, 9, 13, n.block[11]) + gx(&state, 2, 6, 10, 14, n.block[15]) + gy(&state, 2, 6, 10, 14, n.block[10]) + gx(&state, 3, 7, 11, 15, n.block[14]) + gy(&state, 3, 7, 11, 15, n.block[8]) + + // Mix the diagonals. + gx(&state, 0, 5, 10, 15, n.block[7]) + gy(&state, 0, 5, 10, 15, n.block[2]) + gx(&state, 1, 6, 11, 12, n.block[5]) + gy(&state, 1, 6, 11, 12, n.block[3]) + gx(&state, 2, 7, 8, 13, n.block[0]) + gy(&state, 2, 7, 8, 13, n.block[1]) + gx(&state, 3, 4, 9, 14, n.block[6]) + gy(&state, 3, 4, 9, 14, n.block[4]) + + // round6 + + // Mix the columns. + gx(&state, 0, 4, 8, 12, n.block[9]) + gy(&state, 0, 4, 8, 12, n.block[14]) + gx(&state, 1, 5, 9, 13, n.block[11]) + gy(&state, 1, 5, 9, 13, n.block[5]) + gx(&state, 2, 6, 10, 14, n.block[8]) + gy(&state, 2, 6, 10, 14, n.block[12]) + gx(&state, 3, 7, 11, 15, n.block[15]) + gy(&state, 3, 7, 11, 15, n.block[1]) + + // Mix the diagonals. + gx(&state, 0, 5, 10, 15, n.block[13]) + gy(&state, 0, 5, 10, 15, n.block[3]) + gx(&state, 1, 6, 11, 12, n.block[0]) + gy(&state, 1, 6, 11, 12, n.block[10]) + gx(&state, 2, 7, 8, 13, n.block[2]) + gy(&state, 2, 7, 8, 13, n.block[6]) + gx(&state, 3, 4, 9, 14, n.block[4]) + gy(&state, 3, 4, 9, 14, n.block[7]) + + // round7 + + // Mix the columns. + gx(&state, 0, 4, 8, 12, n.block[11]) + gy(&state, 0, 4, 8, 12, n.block[15]) + gx(&state, 1, 5, 9, 13, n.block[5]) + gy(&state, 1, 5, 9, 13, n.block[0]) + gx(&state, 2, 6, 10, 14, n.block[1]) + gy(&state, 2, 6, 10, 14, n.block[9]) + gx(&state, 3, 7, 11, 15, n.block[8]) + gy(&state, 3, 7, 11, 15, n.block[6]) + + // Mix the diagonals. + gx(&state, 0, 5, 10, 15, n.block[14]) + gy(&state, 0, 5, 10, 15, n.block[10]) + gx(&state, 1, 6, 11, 12, n.block[2]) + gy(&state, 1, 6, 11, 12, n.block[12]) + gx(&state, 2, 7, 8, 13, n.block[3]) + gy(&state, 2, 7, 8, 13, n.block[4]) + gx(&state, 3, 4, 9, 14, n.block[7]) + gy(&state, 3, 4, 9, 14, n.block[13]) for i := range n.cv { state[i] ^= state[i+8] diff --git a/gen/gen.go b/gen/gen.go new file mode 100644 index 0000000..5c3179d --- /dev/null +++ b/gen/gen.go @@ -0,0 +1,49 @@ +package main + +import ( + "fmt" +) + +func permute(m *[16]uint32) { + *m = [16]uint32{ + m[2], m[6], m[3], m[10], + m[7], m[0], m[4], m[13], + m[1], m[11], m[12], m[5], + m[9], m[14], m[15], m[8], + } +} + +func main() { + + var m [16]uint32 + + for i := range m { + m[i] = uint32(i) + } + for x := 1; x < 8; x++ { + fmt.Printf(`// round%d + + // Mix the columns. + gx(&state, 0, 4, 8, 12, n.block[%d]) + gy(&state, 0, 4, 8, 12, n.block[%d]) + gx(&state, 1, 5, 9, 13, n.block[%d]) + gy(&state, 1, 5, 9, 13, n.block[%d]) + gx(&state, 2, 6, 10, 14, n.block[%d]) + gy(&state, 2, 6, 10, 14, n.block[%d]) + gx(&state, 3, 7, 11, 15, n.block[%d]) + gy(&state, 3, 7, 11, 15, n.block[%d]) + + // Mix the diagonals. + gx(&state, 0, 5, 10, 15, n.block[%d]) + gy(&state, 0, 5, 10, 15, n.block[%d]) + gx(&state, 1, 6, 11, 12, n.block[%d]) + gy(&state, 1, 6, 11, 12, n.block[%d]) + gx(&state, 2, 7, 8, 13, n.block[%d]) + gy(&state, 2, 7, 8, 13, n.block[%d]) + gx(&state, 3, 4, 9, 14, n.block[%d]) + gy(&state, 3, 4, 9, 14, n.block[%d]) + +`, x, m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8], m[9], m[10], m[11], m[12], m[13], m[14], m[15]) + permute(&m) + } +}