1//+build !noasm,!appengine 2 3/* 4 * Minio Cloud Storage, (C) 2017 Minio, Inc. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19package sha256 20 21import ( 22 "encoding/binary" 23 "errors" 24 "hash" 25 "sort" 26 "sync/atomic" 27 "time" 28) 29 30//go:noescape 31func sha256X16Avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte) 32 33// Avx512ServerUID - Do not start at 0 but next multiple of 16 so as to be able to 34// differentiate with default initialiation value of 0 35const Avx512ServerUID = 16 36 37var uidCounter uint64 38 39// NewAvx512 - initialize sha256 Avx512 implementation. 40func NewAvx512(a512srv *Avx512Server) hash.Hash { 41 uid := atomic.AddUint64(&uidCounter, 1) 42 return &Avx512Digest{uid: uid, a512srv: a512srv} 43} 44 45// Avx512Digest - Type for computing SHA256 using Avx512 46type Avx512Digest struct { 47 uid uint64 48 a512srv *Avx512Server 49 x [chunk]byte 50 nx int 51 len uint64 52 final bool 53 result [Size]byte 54} 55 56// Size - Return size of checksum 57func (d *Avx512Digest) Size() int { return Size } 58 59// BlockSize - Return blocksize of checksum 60func (d Avx512Digest) BlockSize() int { return BlockSize } 61 62// Reset - reset sha digest to its initial values 63func (d *Avx512Digest) Reset() { 64 d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true} 65 d.nx = 0 66 d.len = 0 67 d.final = false 68} 69 70// Write to digest 71func (d *Avx512Digest) Write(p []byte) (nn int, err error) { 72 73 if d.final { 74 return 0, errors.New("Avx512Digest already finalized. Reset first before writing again") 75 } 76 77 nn = len(p) 78 d.len += uint64(nn) 79 if d.nx > 0 { 80 n := copy(d.x[d.nx:], p) 81 d.nx += n 82 if d.nx == chunk { 83 d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: d.x[:]} 84 d.nx = 0 85 } 86 p = p[n:] 87 } 88 if len(p) >= chunk { 89 n := len(p) &^ (chunk - 1) 90 d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: p[:n]} 91 p = p[n:] 92 } 93 if len(p) > 0 { 94 d.nx = copy(d.x[:], p) 95 } 96 return 97} 98 99// Sum - Return sha256 sum in bytes 100func (d *Avx512Digest) Sum(in []byte) (result []byte) { 101 102 if d.final { 103 return append(in, d.result[:]...) 104 } 105 106 trail := make([]byte, 0, 128) 107 trail = append(trail, d.x[:d.nx]...) 108 109 len := d.len 110 // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. 111 var tmp [64]byte 112 tmp[0] = 0x80 113 if len%64 < 56 { 114 trail = append(trail, tmp[0:56-len%64]...) 115 } else { 116 trail = append(trail, tmp[0:64+56-len%64]...) 117 } 118 d.nx = 0 119 120 // Length in bits. 121 len <<= 3 122 for i := uint(0); i < 8; i++ { 123 tmp[i] = byte(len >> (56 - 8*i)) 124 } 125 trail = append(trail, tmp[0:8]...) 126 127 sumCh := make(chan [Size]byte) 128 d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: trail, final: true, sumCh: sumCh} 129 d.result = <-sumCh 130 d.final = true 131 return append(in, d.result[:]...) 132} 133 134var table = [512]uint64{ 135 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 136 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 137 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 138 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 139 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 140 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 141 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 142 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 143 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 144 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 145 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 146 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 147 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 148 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 149 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 150 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 151 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 152 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 153 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 154 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 155 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 156 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 157 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 158 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 159 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 160 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 161 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 162 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 163 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 164 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 165 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 166 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 167 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 168 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 169 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 170 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 171 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 172 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 173 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 174 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 175 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 176 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 177 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 178 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 179 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 180 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 181 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 182 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 183 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 184 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 185 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 186 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 187 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 188 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 189 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 190 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 191 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 192 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 193 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 194 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 195 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 196 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 197 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 198 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 199 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 200 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 201 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 202 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 203 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 204 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 205 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 206 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 207 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 208 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 209 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 210 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 211 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 212 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 213 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 214 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 215 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 216 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 217 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 218 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 219 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 220 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 221 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 222 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 223 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 224 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 225 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 226 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 227 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 228 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 229 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 230 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 231 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 232 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 233 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 234 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 235 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 236 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 237 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 238 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 239 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 240 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 241 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 242 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 243 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 244 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 245 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 246 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 247 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 248 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 249 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 250 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 251 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 252 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 253 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 254 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 255 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 256 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 257 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 258 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 259 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 260 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 261 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 262 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2} 263 264// Interface function to assembly ode 265func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte { 266 267 scratch := [512]byte{} 268 sha256X16Avx512(digests, &scratch, &table, mask, input) 269 270 output := [16][Size]byte{} 271 for i := 0; i < 16; i++ { 272 output[i] = getDigest(i, digests[:]) 273 } 274 275 return output 276} 277 278func getDigest(index int, state []byte) (sum [Size]byte) { 279 for j := 0; j < 16; j += 2 { 280 for i := index*4 + j*Size; i < index*4+(j+1)*Size; i += Size { 281 binary.BigEndian.PutUint32(sum[j*2:], binary.LittleEndian.Uint32(state[i:i+4])) 282 } 283 } 284 return 285} 286 287// Message to send across input channel 288type blockInput struct { 289 uid uint64 290 msg []byte 291 reset bool 292 final bool 293 sumCh chan [Size]byte 294} 295 296// Avx512Server - Type to implement 16x parallel handling of SHA256 invocations 297type Avx512Server struct { 298 blocksCh chan blockInput // Input channel 299 totalIn int // Total number of inputs waiting to be processed 300 lanes [16]Avx512LaneInfo // Array with info per lane (out of 16) 301 digests map[uint64][Size]byte // Map of uids to (interim) digest results 302} 303 304// Avx512LaneInfo - Info for each lane 305type Avx512LaneInfo struct { 306 uid uint64 // unique identification for this SHA processing 307 block []byte // input block to be processed 308 outputCh chan [Size]byte // channel for output result 309} 310 311// NewAvx512Server - Create new object for parallel processing handling 312func NewAvx512Server() *Avx512Server { 313 a512srv := &Avx512Server{} 314 a512srv.digests = make(map[uint64][Size]byte) 315 a512srv.blocksCh = make(chan blockInput) 316 317 // Start a single thread for reading from the input channel 318 go a512srv.Process() 319 return a512srv 320} 321 322// Process - Sole handler for reading from the input channel 323func (a512srv *Avx512Server) Process() { 324 for { 325 select { 326 case block := <-a512srv.blocksCh: 327 if block.reset { 328 a512srv.reset(block.uid) 329 continue 330 } 331 index := block.uid & 0xf 332 // fmt.Println("Adding message:", block.uid, index) 333 334 if a512srv.lanes[index].block != nil { // If slot is already filled, process all inputs 335 //fmt.Println("Invoking Blocks()") 336 a512srv.blocks() 337 } 338 a512srv.totalIn++ 339 a512srv.lanes[index] = Avx512LaneInfo{uid: block.uid, block: block.msg} 340 if block.final { 341 a512srv.lanes[index].outputCh = block.sumCh 342 } 343 if a512srv.totalIn == len(a512srv.lanes) { 344 // fmt.Println("Invoking Blocks() while FULL: ") 345 a512srv.blocks() 346 } 347 348 // TODO: test with larger timeout 349 case <-time.After(1 * time.Microsecond): 350 for _, lane := range a512srv.lanes { 351 if lane.block != nil { // check if there is any input to process 352 // fmt.Println("Invoking Blocks() on TIMEOUT: ") 353 a512srv.blocks() 354 break // we are done 355 } 356 } 357 } 358 } 359} 360 361// Do a reset for this calculation 362func (a512srv *Avx512Server) reset(uid uint64) { 363 364 // Check if there is a message still waiting to be processed (and remove if so) 365 for i, lane := range a512srv.lanes { 366 if lane.uid == uid { 367 if lane.block != nil { 368 a512srv.lanes[i] = Avx512LaneInfo{} // clear message 369 a512srv.totalIn-- 370 } 371 } 372 } 373 374 // Delete entry from hash map 375 delete(a512srv.digests, uid) 376} 377 378// Invoke assembly and send results back 379func (a512srv *Avx512Server) blocks() { 380 381 inputs := [16][]byte{} 382 for i := range inputs { 383 inputs[i] = a512srv.lanes[i].block 384 } 385 386 mask := expandMask(genMask(inputs)) 387 outputs := blockAvx512(a512srv.getDigests(), inputs, mask) 388 389 a512srv.totalIn = 0 390 for i := 0; i < len(outputs); i++ { 391 uid, outputCh := a512srv.lanes[i].uid, a512srv.lanes[i].outputCh 392 a512srv.digests[uid] = outputs[i] 393 a512srv.lanes[i] = Avx512LaneInfo{} 394 395 if outputCh != nil { 396 // Send back result 397 outputCh <- outputs[i] 398 delete(a512srv.digests, uid) // Delete entry from hashmap 399 } 400 } 401} 402 403func (a512srv *Avx512Server) Write(uid uint64, p []byte) (nn int, err error) { 404 a512srv.blocksCh <- blockInput{uid: uid, msg: p} 405 return len(p), nil 406} 407 408// Sum - return sha256 sum in bytes for a given sum id. 409func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte { 410 sumCh := make(chan [32]byte) 411 a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh} 412 return <-sumCh 413} 414 415func (a512srv *Avx512Server) getDigests() *[512]byte { 416 digests := [512]byte{} 417 for i, lane := range a512srv.lanes { 418 a, ok := a512srv.digests[lane.uid] 419 if ok { 420 binary.BigEndian.PutUint32(digests[(i+0*16)*4:], binary.LittleEndian.Uint32(a[0:4])) 421 binary.BigEndian.PutUint32(digests[(i+1*16)*4:], binary.LittleEndian.Uint32(a[4:8])) 422 binary.BigEndian.PutUint32(digests[(i+2*16)*4:], binary.LittleEndian.Uint32(a[8:12])) 423 binary.BigEndian.PutUint32(digests[(i+3*16)*4:], binary.LittleEndian.Uint32(a[12:16])) 424 binary.BigEndian.PutUint32(digests[(i+4*16)*4:], binary.LittleEndian.Uint32(a[16:20])) 425 binary.BigEndian.PutUint32(digests[(i+5*16)*4:], binary.LittleEndian.Uint32(a[20:24])) 426 binary.BigEndian.PutUint32(digests[(i+6*16)*4:], binary.LittleEndian.Uint32(a[24:28])) 427 binary.BigEndian.PutUint32(digests[(i+7*16)*4:], binary.LittleEndian.Uint32(a[28:32])) 428 } else { 429 binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0) 430 binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1) 431 binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2) 432 binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3) 433 binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4) 434 binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5) 435 binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6) 436 binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7) 437 } 438 } 439 return &digests 440} 441 442// Helper struct for sorting blocks based on length 443type lane struct { 444 len uint 445 pos uint 446} 447 448type lanes []lane 449 450func (lns lanes) Len() int { return len(lns) } 451func (lns lanes) Swap(i, j int) { lns[i], lns[j] = lns[j], lns[i] } 452func (lns lanes) Less(i, j int) bool { return lns[i].len < lns[j].len } 453 454// Helper struct for 455type maskRounds struct { 456 mask uint64 457 rounds uint64 458} 459 460func genMask(input [16][]byte) [16]maskRounds { 461 462 // Sort on blocks length small to large 463 var sorted [16]lane 464 for c, inpt := range input { 465 sorted[c] = lane{uint(len(inpt)), uint(c)} 466 } 467 sort.Sort(lanes(sorted[:])) 468 469 // Create mask array including 'rounds' between masks 470 m, round, index := uint64(0xffff), uint64(0), 0 471 var mr [16]maskRounds 472 for _, s := range sorted { 473 if s.len > 0 { 474 if uint64(s.len)>>6 > round { 475 mr[index] = maskRounds{m, (uint64(s.len) >> 6) - round} 476 index++ 477 } 478 round = uint64(s.len) >> 6 479 } 480 m = m & ^(1 << uint(s.pos)) 481 } 482 483 return mr 484} 485 486// TODO: remove function 487func expandMask(mr [16]maskRounds) []uint64 { 488 size := uint64(0) 489 for _, r := range mr { 490 size += r.rounds 491 } 492 result, index := make([]uint64, size), 0 493 for _, r := range mr { 494 for j := uint64(0); j < r.rounds; j++ { 495 result[index] = r.mask 496 index++ 497 } 498 } 499 return result 500} 501