Improve reg usage in compress() [16]uint32 to vars

This commit is contained in:
renthraysk 2020-01-12 18:59:44 +00:00
parent 855147f461
commit 7786c919ad
2 changed files with 77 additions and 88 deletions

149
blake3.go
View File

@ -80,115 +80,104 @@ type node struct {
// node. When nodes are being merged into parents, only the first 8 words are // node. When nodes are being merged into parents, only the first 8 words are
// used. When the root node is being used to generate output, the full 16 words // used. When the root node is being used to generate output, the full 16 words
// are used. // are used.
func (n node) compress() (s [16]uint32) { func (n node) compress() [16]uint32 {
// round1 rather than init s and mix, do both. // round1 rather than init s and mix, do both.
// mix the columns. // mix the columns.
s[0], s[4], s[8], s[12] = g(n.cv[0], n.cv[4], iv[0], uint32(n.counter), n.block[0], n.block[1]) s0, s4, s8, s12 := g(n.cv[0], n.cv[4], iv[0], uint32(n.counter), n.block[0], n.block[1])
s[1], s[5], s[9], s[13] = g(n.cv[1], n.cv[5], iv[1], uint32(n.counter>>32), n.block[2], n.block[3]) s1, s5, s9, s13 := g(n.cv[1], n.cv[5], iv[1], uint32(n.counter>>32), n.block[2], n.block[3])
s[2], s[6], s[10], s[14] = g(n.cv[2], n.cv[6], iv[2], n.blockLen, n.block[4], n.block[5]) s2, s6, s10, s14 := g(n.cv[2], n.cv[6], iv[2], n.blockLen, n.block[4], n.block[5])
s[3], s[7], s[11], s[15] = g(n.cv[3], n.cv[7], iv[3], n.flags, n.block[6], n.block[7]) s3, s7, s11, s15 := g(n.cv[3], n.cv[7], iv[3], n.flags, n.block[6], n.block[7])
// Mix the diagonals. // Mix the diagonals.
s[0], s[5], s[10], s[15] = g(s[0], s[5], s[10], s[15], n.block[8], n.block[9]) s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[8], n.block[9])
s[1], s[6], s[11], s[12] = g(s[1], s[6], s[11], s[12], n.block[10], n.block[11]) s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[10], n.block[11])
s[2], s[7], s[8], s[13] = g(s[2], s[7], s[8], s[13], n.block[12], n.block[13]) s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[12], n.block[13])
s[3], s[4], s[9], s[14] = g(s[3], s[4], s[9], s[14], n.block[14], n.block[15]) s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[14], n.block[15])
// round2
// round 2
// Mix the columns. // Mix the columns.
s[0], s[4], s[8], s[12] = g(s[0], s[4], s[8], s[12], n.block[2], n.block[6]) s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[2], n.block[6])
s[1], s[5], s[9], s[13] = g(s[1], s[5], s[9], s[13], n.block[3], n.block[10]) s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[3], n.block[10])
s[2], s[6], s[10], s[14] = g(s[2], s[6], s[10], s[14], n.block[7], n.block[0]) s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[7], n.block[0])
s[3], s[7], s[11], s[15] = g(s[3], s[7], s[11], s[15], n.block[4], n.block[13]) s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[4], n.block[13])
// Mix the diagonals. // Mix the diagonals.
s[0], s[5], s[10], s[15] = g(s[0], s[5], s[10], s[15], n.block[1], n.block[11]) s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[1], n.block[11])
s[1], s[6], s[11], s[12] = g(s[1], s[6], s[11], s[12], n.block[12], n.block[5]) s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[12], n.block[5])
s[2], s[7], s[8], s[13] = g(s[2], s[7], s[8], s[13], n.block[9], n.block[14]) s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[9], n.block[14])
s[3], s[4], s[9], s[14] = g(s[3], s[4], s[9], s[14], n.block[15], n.block[8]) s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[15], n.block[8])
// round3
// round 3
// Mix the columns. // Mix the columns.
s[0], s[4], s[8], s[12] = g(s[0], s[4], s[8], s[12], n.block[3], n.block[4]) s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[3], n.block[4])
s[1], s[5], s[9], s[13] = g(s[1], s[5], s[9], s[13], n.block[10], n.block[12]) s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[10], n.block[12])
s[2], s[6], s[10], s[14] = g(s[2], s[6], s[10], s[14], n.block[13], n.block[2]) s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[13], n.block[2])
s[3], s[7], s[11], s[15] = g(s[3], s[7], s[11], s[15], n.block[7], n.block[14]) s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[7], n.block[14])
// Mix the diagonals. // Mix the diagonals.
s[0], s[5], s[10], s[15] = g(s[0], s[5], s[10], s[15], n.block[6], n.block[5]) s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[6], n.block[5])
s[1], s[6], s[11], s[12] = g(s[1], s[6], s[11], s[12], n.block[9], n.block[0]) s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[9], n.block[0])
s[2], s[7], s[8], s[13] = g(s[2], s[7], s[8], s[13], n.block[11], n.block[15]) s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[11], n.block[15])
s[3], s[4], s[9], s[14] = g(s[3], s[4], s[9], s[14], n.block[8], n.block[1]) s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[8], n.block[1])
// round4
// round 4
// Mix the columns. // Mix the columns.
s[0], s[4], s[8], s[12] = g(s[0], s[4], s[8], s[12], n.block[10], n.block[7]) s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[10], n.block[7])
s[1], s[5], s[9], s[13] = g(s[1], s[5], s[9], s[13], n.block[12], n.block[9]) s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[12], n.block[9])
s[2], s[6], s[10], s[14] = g(s[2], s[6], s[10], s[14], n.block[14], n.block[3]) s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[14], n.block[3])
s[3], s[7], s[11], s[15] = g(s[3], s[7], s[11], s[15], n.block[13], n.block[15]) s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[13], n.block[15])
// Mix the diagonals. // Mix the diagonals.
s[0], s[5], s[10], s[15] = g(s[0], s[5], s[10], s[15], n.block[4], n.block[0]) s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[4], n.block[0])
s[1], s[6], s[11], s[12] = g(s[1], s[6], s[11], s[12], n.block[11], n.block[2]) s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[11], n.block[2])
s[2], s[7], s[8], s[13] = g(s[2], s[7], s[8], s[13], n.block[5], n.block[8]) s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[5], n.block[8])
s[3], s[4], s[9], s[14] = g(s[3], s[4], s[9], s[14], n.block[1], n.block[6]) s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[1], n.block[6])
// round5
// round 5
// Mix the columns. // Mix the columns.
s[0], s[4], s[8], s[12] = g(s[0], s[4], s[8], s[12], n.block[12], n.block[13]) s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[12], n.block[13])
s[1], s[5], s[9], s[13] = g(s[1], s[5], s[9], s[13], n.block[9], n.block[11]) s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[9], n.block[11])
s[2], s[6], s[10], s[14] = g(s[2], s[6], s[10], s[14], n.block[15], n.block[10]) s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[15], n.block[10])
s[3], s[7], s[11], s[15] = g(s[3], s[7], s[11], s[15], n.block[14], n.block[8]) s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[14], n.block[8])
// Mix the diagonals. // Mix the diagonals.
s[0], s[5], s[10], s[15] = g(s[0], s[5], s[10], s[15], n.block[7], n.block[2]) s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[7], n.block[2])
s[1], s[6], s[11], s[12] = g(s[1], s[6], s[11], s[12], n.block[5], n.block[3]) s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[5], n.block[3])
s[2], s[7], s[8], s[13] = g(s[2], s[7], s[8], s[13], n.block[0], n.block[1]) s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[0], n.block[1])
s[3], s[4], s[9], s[14] = g(s[3], s[4], s[9], s[14], n.block[6], n.block[4]) s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[6], n.block[4])
// round6
// round 6
// Mix the columns. // Mix the columns.
s[0], s[4], s[8], s[12] = g(s[0], s[4], s[8], s[12], n.block[9], n.block[14]) s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[9], n.block[14])
s[1], s[5], s[9], s[13] = g(s[1], s[5], s[9], s[13], n.block[11], n.block[5]) s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[11], n.block[5])
s[2], s[6], s[10], s[14] = g(s[2], s[6], s[10], s[14], n.block[8], n.block[12]) s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[8], n.block[12])
s[3], s[7], s[11], s[15] = g(s[3], s[7], s[11], s[15], n.block[15], n.block[1]) s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[15], n.block[1])
// Mix the diagonals. // Mix the diagonals.
s[0], s[5], s[10], s[15] = g(s[0], s[5], s[10], s[15], n.block[13], n.block[3]) s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[13], n.block[3])
s[1], s[6], s[11], s[12] = g(s[1], s[6], s[11], s[12], n.block[0], n.block[10]) s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[0], n.block[10])
s[2], s[7], s[8], s[13] = g(s[2], s[7], s[8], s[13], n.block[2], n.block[6]) s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[2], n.block[6])
s[3], s[4], s[9], s[14] = g(s[3], s[4], s[9], s[14], n.block[4], n.block[7]) s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[4], n.block[7])
// round7
// round 7
// Mix the columns. // Mix the columns.
s[0], s[4], s[8], s[12] = g(s[0], s[4], s[8], s[12], n.block[11], n.block[15]) s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[11], n.block[15])
s[1], s[5], s[9], s[13] = g(s[1], s[5], s[9], s[13], n.block[5], n.block[0]) s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[5], n.block[0])
s[2], s[6], s[10], s[14] = g(s[2], s[6], s[10], s[14], n.block[1], n.block[9]) s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[1], n.block[9])
s[3], s[7], s[11], s[15] = g(s[3], s[7], s[11], s[15], n.block[8], n.block[6]) s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[8], n.block[6])
// Mix the diagonals. // Mix the diagonals.
s[0], s[5], s[10], s[15] = g(s[0], s[5], s[10], s[15], n.block[14], n.block[10]) s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[14], n.block[10])
s[1], s[6], s[11], s[12] = g(s[1], s[6], s[11], s[12], n.block[2], n.block[12]) s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[2], n.block[12])
s[2], s[7], s[8], s[13] = g(s[2], s[7], s[8], s[13], n.block[3], n.block[4]) s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[3], n.block[4])
s[3], s[4], s[9], s[14] = g(s[3], s[4], s[9], s[14], n.block[7], n.block[13]) s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[7], n.block[13])
s[0] ^= s[0+8] return [16]uint32{
s[1] ^= s[1+8] s0 ^ s8, s1 ^ s9, s2 ^ s10, s3 ^ s11,
s[2] ^= s[2+8] s4 ^ s12, s5 ^ s13, s6 ^ s14, s7 ^ s15,
s[3] ^= s[3+8] s8 ^ n.cv[0], s9 ^ n.cv[1], s10 ^ n.cv[2], s11 ^ n.cv[3],
s[4] ^= s[4+8] s12 ^ n.cv[4], s13 ^ n.cv[5], s14 ^ n.cv[6], s15 ^ n.cv[7],
s[5] ^= s[5+8] }
s[6] ^= s[6+8]
s[7] ^= s[7+8]
s[0+8] ^= n.cv[0]
s[1+8] ^= n.cv[1]
s[2+8] ^= n.cv[2]
s[3+8] ^= n.cv[3]
s[4+8] ^= n.cv[4]
s[5+8] ^= n.cv[5]
s[6+8] ^= n.cv[6]
s[7+8] ^= n.cv[7]
return
} }
// chainingValue returns the first 8 words of the compressed node. This is used // chainingValue returns the first 8 words of the compressed node. This is used

View File

@ -24,16 +24,16 @@ func main() {
fmt.Printf(`// round%d fmt.Printf(`// round%d
// Mix the columns. // Mix the columns.
s[0], s[4], s[8], s[12] = g(s[0], s[4], s[8], s[12], n.block[%d], n.block[%d]) s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[%d], n.block[%d])
s[1], s[5], s[9], s[13] = g(s[1], s[5], s[9], s[13], n.block[%d], n.block[%d]) s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[%d], n.block[%d])
s[2], s[6], s[10], s[14] = g(s[2], s[6], s[10], s[14], n.block[%d], n.block[%d]) s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[%d], n.block[%d])
s[3], s[7], s[11], s[15] = g(s[3], s[7], s[11], s[15], n.block[%d], n.block[%d]) s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[%d], n.block[%d])
// Mix the diagonals. // Mix the diagonals.
s[0], s[5], s[10], s[15] = g(s[0], s[5], s[10], s[15], n.block[%d], n.block[%d]) s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[%d], n.block[%d])
s[1], s[6], s[11], s[12] = g(s[1], s[6], s[11], s[12], n.block[%d], n.block[%d]) s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[%d], n.block[%d])
s[2], s[7], s[8], s[13] = g(s[2], s[7], s[8], s[13], n.block[%d], n.block[%d]) s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[%d], n.block[%d])
s[3], s[4], s[9], s[14] = g(s[3], s[4], s[9], s[14], n.block[%d], n.block[%d]) s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[%d], n.block[%d])
`, x, `, x,
m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8], m[9], m[10], m[11], m[12], m[13], m[14], m[15]) m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8], m[9], m[10], m[11], m[12], m[13], m[14], m[15])
permute(&m) permute(&m)