1// Copyright 2015, Joe Tsai. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5package xflate 6 7import ( 8 "encoding/binary" 9 "hash/crc32" 10 "io" 11 12 "github.com/dsnet/compress/internal/errors" 13 "github.com/dsnet/compress/xflate/internal/meta" 14) 15 16// A Writer is an io.Writer that can write the XFLATE format. 17// The XFLATE stream outputted by this Writer can be read by both Reader and 18// flate.Reader. 19type Writer struct { 20 // These statistics fields are automatically updated by Writer. 21 // It is safe to set these values to any arbitrary value. 22 InputOffset int64 // Total number of bytes issued to Write 23 OutputOffset int64 // Total number of bytes written to underlying io.Writer 24 25 wr io.Writer 26 mw meta.Writer // Meta encoder used to write the index and footer 27 zw *flateWriter // DEFLATE compressor 28 29 idx index // Index table of seekable offsets 30 nidx int64 // Number of records per index 31 nchk int64 // Raw size of each independent chunk 32 err error // Persistent error 33 34 // The following fields are embedded here to reduce memory allocations. 35 scratch [64]byte 36} 37 38// WriterConfig configures the Writer. 39// The zero value for any field uses the default value for that field type. 40type WriterConfig struct { 41 // Underlying DEFLATE compression level. 42 // 43 // This compression level will be passed directly to the underlying DEFLATE 44 // compressor. Higher values provide better compression ratio at the expense 45 // of CPU time. 46 Level int 47 48 // Uncompressed size of each independent chunk. 49 // 50 // Each chunk will be compressed independently. This has that advantage that 51 // the chunk can be decompressed without knowledge about the preceding 52 // chunks, but has the disadvantage that it reduces the compression ratio. 53 // Smaller ChunkSizes provide better random access properties, while larger 54 // sizes provide better compression ratio. 55 ChunkSize int64 56 57 // The number of records in each index. 58 // 59 // When this number is reached, the index is automatically flushed. This is 60 // done to ensure that there is some limit on the amount of memory needed to 61 // represent the index. A negative value indicates that the Writer will 62 // not automatically flush the index. 63 // 64 // The multiplication of the IndexSize and the size of each record (24 B) 65 // gives an approximation for how much memory the index will occupy. 66 // The multiplication of the IndexSize and the ChunkSize gives an 67 // approximation for how much uncompressed data each index represents. 68 IndexSize int64 69 70 _ struct{} // Blank field to prevent unkeyed struct literals 71} 72 73// NewWriter creates a new Writer writing to the given writer. 74// It is the caller's responsibility to call Close to complete the stream. 75// 76// If conf is nil, then default configuration values are used. Writer copies 77// all configuration values as necessary and does not store conf. 78func NewWriter(wr io.Writer, conf *WriterConfig) (*Writer, error) { 79 var lvl int 80 var nchk, nidx int64 81 if conf != nil { 82 lvl = conf.Level 83 switch { 84 case conf.ChunkSize < 0: 85 return nil, errorf(errors.Invalid, "invalid chunk size: %d", conf.ChunkSize) 86 case conf.ChunkSize > 0: 87 nchk = conf.ChunkSize 88 } 89 switch { 90 case conf.IndexSize < 0: 91 nidx = -1 92 case conf.IndexSize > 0: 93 nidx = conf.IndexSize 94 } 95 } 96 97 zw, err := newFlateWriter(wr, lvl) 98 if err != nil { 99 return nil, err 100 } 101 xw := &Writer{wr: wr, zw: zw, nchk: nchk, nidx: nidx} 102 xw.Reset(wr) 103 return xw, nil 104} 105 106// Reset discards the Writer's state and makes it equivalent to the result 107// of a call to NewWriter, but writes to wr instead. Any configurations from 108// a prior call to NewWriter will be preserved. 109// 110// This is used to reduce memory allocations. 111func (xw *Writer) Reset(wr io.Writer) error { 112 *xw = Writer{ 113 wr: wr, 114 mw: xw.mw, 115 zw: xw.zw, 116 nchk: xw.nchk, 117 nidx: xw.nidx, 118 idx: xw.idx, 119 } 120 if xw.zw == nil { 121 xw.zw, _ = newFlateWriter(wr, DefaultCompression) 122 } else { 123 xw.zw.Reset(wr) 124 } 125 if xw.nchk == 0 { 126 xw.nchk = DefaultChunkSize 127 } 128 if xw.nidx == 0 { 129 xw.nidx = DefaultIndexSize 130 } 131 xw.idx.Reset() 132 return nil 133} 134 135// Write writes the compressed form of buf to the underlying io.Writer. 136// This automatically breaks the input into multiple chunks, writes them out, 137// and records the sizes of each chunk in the index table. 138func (xw *Writer) Write(buf []byte) (int, error) { 139 if xw.err != nil { 140 return 0, xw.err 141 } 142 143 var n, cnt int 144 for len(buf) > 0 && xw.err == nil { 145 // Flush chunk if necessary. 146 remain := xw.nchk - xw.zw.InputOffset 147 if remain <= 0 { 148 xw.err = xw.Flush(FlushFull) 149 continue 150 } 151 if remain > int64(len(buf)) { 152 remain = int64(len(buf)) 153 } 154 155 // Compress data for current chunk. 156 offset := xw.zw.OutputOffset 157 n, xw.err = xw.zw.Write(buf[:remain]) 158 xw.OutputOffset += xw.zw.OutputOffset - offset 159 buf = buf[n:] 160 cnt += n 161 } 162 163 xw.InputOffset += int64(cnt) 164 return cnt, xw.err 165} 166 167// Flush flushes the current write buffer to the underlying writer. 168// Flushing is entirely optional and should be used sparingly. 169func (xw *Writer) Flush(mode FlushMode) error { 170 if xw.err != nil { 171 return xw.err 172 } 173 174 switch mode { 175 case FlushSync: 176 offset := xw.zw.OutputOffset 177 xw.err = xw.zw.Flush() 178 xw.OutputOffset += xw.zw.OutputOffset - offset 179 return xw.err 180 case FlushFull: 181 if xw.err = xw.Flush(FlushSync); xw.err != nil { 182 return xw.err 183 } 184 xw.idx.AppendRecord(xw.zw.OutputOffset, xw.zw.InputOffset, deflateType) 185 xw.zw.Reset(xw.wr) 186 if int64(len(xw.idx.Records)) == xw.nidx { 187 xw.err = xw.Flush(FlushIndex) 188 } 189 return xw.err 190 case FlushIndex: 191 if xw.zw.InputOffset+xw.zw.OutputOffset > 0 { 192 if err := xw.Flush(FlushFull); err != nil { 193 return err 194 } 195 } 196 xw.err = xw.encodeIndex(&xw.idx) 197 backSize := xw.idx.IndexSize 198 xw.idx.Reset() 199 xw.idx.BackSize = backSize 200 return xw.err 201 default: 202 return errorf(errors.Invalid, "invalid flush mode: %d", mode) 203 } 204} 205 206// Close ends the XFLATE stream and flushes all buffered data. 207// This method automatically writes an index if any chunks have been written 208// since the last FlushIndex. 209func (xw *Writer) Close() error { 210 if xw.err == errClosed { 211 return nil 212 } 213 if xw.err != nil { 214 return xw.err 215 } 216 217 // Flush final index. 218 if xw.zw.OutputOffset+xw.zw.InputOffset > 0 || len(xw.idx.Records) > 0 { 219 xw.err = xw.Flush(FlushIndex) 220 if xw.err != nil { 221 return xw.err 222 } 223 } 224 225 // Encode the footer. 226 err := xw.encodeFooter(xw.idx.BackSize) 227 if err != nil { 228 xw.err = err 229 } else { 230 xw.err = errClosed 231 } 232 return err 233} 234 235// encodeIndex encodes the index into a meta encoded stream. 236// The index.Records and index.BackSize fields must be populated. 237// The index.IndexSize field will be populated upon successful write. 238func (xw *Writer) encodeIndex(index *index) error { 239 // Helper function to write VLIs. 240 var crc uint32 241 var errVLI error 242 writeVLI := func(x int64) { 243 b := xw.scratch[:binary.PutUvarint(xw.scratch[:], uint64(x))] 244 crc = crc32.Update(crc, crc32.MakeTable(crc32.IEEE), b) 245 if _, err := xw.mw.Write(b); err != nil { 246 errVLI = errWrap(err) 247 } 248 } 249 250 // Write the index. 251 xw.mw.Reset(xw.wr) 252 defer func() { xw.OutputOffset += xw.mw.OutputOffset }() 253 xw.mw.FinalMode = meta.FinalMeta 254 writeVLI(index.BackSize) 255 writeVLI(int64(len(index.Records))) 256 writeVLI(index.LastRecord().CompOffset) 257 writeVLI(index.LastRecord().RawOffset) 258 var preRec record 259 for _, rec := range index.Records { 260 writeVLI(rec.CompOffset - preRec.CompOffset) 261 writeVLI(rec.RawOffset - preRec.RawOffset) 262 preRec = rec 263 } 264 if errVLI != nil { 265 return errWrap(errVLI) 266 } 267 268 binary.LittleEndian.PutUint32(xw.scratch[:], crc) 269 if _, err := xw.mw.Write(xw.scratch[:4]); err != nil { 270 return errWrap(err) 271 } 272 if err := xw.mw.Close(); err != nil { 273 return errWrap(err) 274 } 275 index.IndexSize = xw.mw.OutputOffset // Record the encoded size 276 return nil 277} 278 279// encodeFooter writes the final footer, encoding the provided backSize into it. 280func (xw *Writer) encodeFooter(backSize int64) error { 281 var n int 282 n += copy(xw.scratch[n:], magic[:]) 283 n += binary.PutUvarint(xw.scratch[n:], uint64(backSize)) 284 285 xw.mw.Reset(xw.wr) 286 defer func() { xw.OutputOffset += xw.mw.OutputOffset }() 287 xw.mw.FinalMode = meta.FinalStream 288 if _, err := xw.mw.Write(xw.scratch[:n]); err != nil { 289 return errWrap(err) 290 } 291 if err := xw.mw.Close(); err != nil { 292 return errWrap(err) 293 } 294 if xw.mw.NumBlocks != 1 { 295 return errorf(errors.Internal, "footer was not a single block") 296 } 297 return nil 298} 299