1// +build windows
2
3package backuptar
4
5import (
6	"archive/tar"
7	"encoding/base64"
8	"fmt"
9	"io"
10	"io/ioutil"
11	"path/filepath"
12	"strconv"
13	"strings"
14	"syscall"
15	"time"
16
17	"github.com/Microsoft/go-winio"
18	"golang.org/x/sys/windows"
19)
20
21const (
22	c_ISUID  = 04000   // Set uid
23	c_ISGID  = 02000   // Set gid
24	c_ISVTX  = 01000   // Save text (sticky bit)
25	c_ISDIR  = 040000  // Directory
26	c_ISFIFO = 010000  // FIFO
27	c_ISREG  = 0100000 // Regular file
28	c_ISLNK  = 0120000 // Symbolic link
29	c_ISBLK  = 060000  // Block special file
30	c_ISCHR  = 020000  // Character special file
31	c_ISSOCK = 0140000 // Socket
32)
33
34const (
35	hdrFileAttributes        = "MSWINDOWS.fileattr"
36	hdrSecurityDescriptor    = "MSWINDOWS.sd"
37	hdrRawSecurityDescriptor = "MSWINDOWS.rawsd"
38	hdrMountPoint            = "MSWINDOWS.mountpoint"
39	hdrEaPrefix              = "MSWINDOWS.xattr."
40
41	hdrCreationTime = "LIBARCHIVE.creationtime"
42)
43
44// zeroReader is an io.Reader that always returns 0s.
45type zeroReader struct{}
46
47func (zr zeroReader) Read(b []byte) (int, error) {
48	for i := range b {
49		b[i] = 0
50	}
51	return len(b), nil
52}
53
54func copySparse(t *tar.Writer, br *winio.BackupStreamReader) error {
55	curOffset := int64(0)
56	for {
57		bhdr, err := br.Next()
58		if err == io.EOF {
59			err = io.ErrUnexpectedEOF
60		}
61		if err != nil {
62			return err
63		}
64		if bhdr.Id != winio.BackupSparseBlock {
65			return fmt.Errorf("unexpected stream %d", bhdr.Id)
66		}
67
68		// We can't seek backwards, since we have already written that data to the tar.Writer.
69		if bhdr.Offset < curOffset {
70			return fmt.Errorf("cannot seek back from %d to %d", curOffset, bhdr.Offset)
71		}
72		// archive/tar does not support writing sparse files
73		// so just write zeroes to catch up to the current offset.
74		if _, err := io.CopyN(t, zeroReader{}, bhdr.Offset-curOffset); err != nil {
75			return fmt.Errorf("seek to offset %d: %s", bhdr.Offset, err)
76		}
77		if bhdr.Size == 0 {
78			// A sparse block with size = 0 is used to mark the end of the sparse blocks.
79			break
80		}
81		n, err := io.Copy(t, br)
82		if err != nil {
83			return err
84		}
85		if n != bhdr.Size {
86			return fmt.Errorf("copied %d bytes instead of %d at offset %d", n, bhdr.Size, bhdr.Offset)
87		}
88		curOffset = bhdr.Offset + n
89	}
90	return nil
91}
92
93// BasicInfoHeader creates a tar header from basic file information.
94func BasicInfoHeader(name string, size int64, fileInfo *winio.FileBasicInfo) *tar.Header {
95	hdr := &tar.Header{
96		Format:     tar.FormatPAX,
97		Name:       filepath.ToSlash(name),
98		Size:       size,
99		Typeflag:   tar.TypeReg,
100		ModTime:    time.Unix(0, fileInfo.LastWriteTime.Nanoseconds()),
101		ChangeTime: time.Unix(0, fileInfo.ChangeTime.Nanoseconds()),
102		AccessTime: time.Unix(0, fileInfo.LastAccessTime.Nanoseconds()),
103		PAXRecords: make(map[string]string),
104	}
105	hdr.PAXRecords[hdrFileAttributes] = fmt.Sprintf("%d", fileInfo.FileAttributes)
106	hdr.PAXRecords[hdrCreationTime] = formatPAXTime(time.Unix(0, fileInfo.CreationTime.Nanoseconds()))
107
108	if (fileInfo.FileAttributes & syscall.FILE_ATTRIBUTE_DIRECTORY) != 0 {
109		hdr.Mode |= c_ISDIR
110		hdr.Size = 0
111		hdr.Typeflag = tar.TypeDir
112	}
113	return hdr
114}
115
116// WriteTarFileFromBackupStream writes a file to a tar writer using data from a Win32 backup stream.
117//
118// This encodes Win32 metadata as tar pax vendor extensions starting with MSWINDOWS.
119//
120// The additional Win32 metadata is:
121//
122// MSWINDOWS.fileattr: The Win32 file attributes, as a decimal value
123//
124// MSWINDOWS.rawsd: The Win32 security descriptor, in raw binary format
125//
126// MSWINDOWS.mountpoint: If present, this is a mount point and not a symlink, even though the type is '2' (symlink)
127func WriteTarFileFromBackupStream(t *tar.Writer, r io.Reader, name string, size int64, fileInfo *winio.FileBasicInfo) error {
128	name = filepath.ToSlash(name)
129	hdr := BasicInfoHeader(name, size, fileInfo)
130
131	// If r can be seeked, then this function is two-pass: pass 1 collects the
132	// tar header data, and pass 2 copies the data stream. If r cannot be
133	// seeked, then some header data (in particular EAs) will be silently lost.
134	var (
135		restartPos int64
136		err        error
137	)
138	sr, readTwice := r.(io.Seeker)
139	if readTwice {
140		if restartPos, err = sr.Seek(0, io.SeekCurrent); err != nil {
141			readTwice = false
142		}
143	}
144
145	br := winio.NewBackupStreamReader(r)
146	var dataHdr *winio.BackupHeader
147	for dataHdr == nil {
148		bhdr, err := br.Next()
149		if err == io.EOF {
150			break
151		}
152		if err != nil {
153			return err
154		}
155		switch bhdr.Id {
156		case winio.BackupData:
157			hdr.Mode |= c_ISREG
158			if !readTwice {
159				dataHdr = bhdr
160			}
161		case winio.BackupSecurity:
162			sd, err := ioutil.ReadAll(br)
163			if err != nil {
164				return err
165			}
166			hdr.PAXRecords[hdrRawSecurityDescriptor] = base64.StdEncoding.EncodeToString(sd)
167
168		case winio.BackupReparseData:
169			hdr.Mode |= c_ISLNK
170			hdr.Typeflag = tar.TypeSymlink
171			reparseBuffer, err := ioutil.ReadAll(br)
172			rp, err := winio.DecodeReparsePoint(reparseBuffer)
173			if err != nil {
174				return err
175			}
176			if rp.IsMountPoint {
177				hdr.PAXRecords[hdrMountPoint] = "1"
178			}
179			hdr.Linkname = rp.Target
180
181		case winio.BackupEaData:
182			eab, err := ioutil.ReadAll(br)
183			if err != nil {
184				return err
185			}
186			eas, err := winio.DecodeExtendedAttributes(eab)
187			if err != nil {
188				return err
189			}
190			for _, ea := range eas {
191				// Use base64 encoding for the binary value. Note that there
192				// is no way to encode the EA's flags, since their use doesn't
193				// make any sense for persisted EAs.
194				hdr.PAXRecords[hdrEaPrefix+ea.Name] = base64.StdEncoding.EncodeToString(ea.Value)
195			}
196
197		case winio.BackupAlternateData, winio.BackupLink, winio.BackupPropertyData, winio.BackupObjectId, winio.BackupTxfsData:
198			// ignore these streams
199		default:
200			return fmt.Errorf("%s: unknown stream ID %d", name, bhdr.Id)
201		}
202	}
203
204	err = t.WriteHeader(hdr)
205	if err != nil {
206		return err
207	}
208
209	if readTwice {
210		// Get back to the data stream.
211		if _, err = sr.Seek(restartPos, io.SeekStart); err != nil {
212			return err
213		}
214		for dataHdr == nil {
215			bhdr, err := br.Next()
216			if err == io.EOF {
217				break
218			}
219			if err != nil {
220				return err
221			}
222			if bhdr.Id == winio.BackupData {
223				dataHdr = bhdr
224			}
225		}
226	}
227
228	// The logic for copying file contents is fairly complicated due to the need for handling sparse files,
229	// and the weird ways they are represented by BackupRead. A normal file will always either have a data stream
230	// with size and content, or no data stream at all (if empty). However, for a sparse file, the content can also
231	// be represented using a series of sparse block streams following the data stream. Additionally, the way sparse
232	// files are handled by BackupRead has changed in the OS recently. The specifics of the representation are described
233	// in the list at the bottom of this block comment.
234	//
235	// Sparse files can be represented in four different ways, based on the specifics of the file.
236	// - Size = 0:
237	//     Previously: BackupRead yields no data stream and no sparse block streams.
238	//     Recently: BackupRead yields a data stream with size = 0. There are no following sparse block streams.
239	// - Size > 0, no allocated ranges:
240	//     BackupRead yields a data stream with size = 0. Following is a single sparse block stream with
241	//     size = 0 and offset = <file size>.
242	// - Size > 0, one allocated range:
243	//     BackupRead yields a data stream with size = <file size> containing the file contents. There are no
244	//     sparse block streams. This is the case if you take a normal file with contents and simply set the
245	//     sparse flag on it.
246	// - Size > 0, multiple allocated ranges:
247	//     BackupRead yields a data stream with size = 0. Following are sparse block streams for each allocated
248	//     range of the file containing the range contents. Finally there is a sparse block stream with
249	//     size = 0 and offset = <file size>.
250
251	if dataHdr != nil {
252		// A data stream was found. Copy the data.
253		// We assume that we will either have a data stream size > 0 XOR have sparse block streams.
254		if dataHdr.Size > 0 || (dataHdr.Attributes&winio.StreamSparseAttributes) == 0 {
255			if size != dataHdr.Size {
256				return fmt.Errorf("%s: mismatch between file size %d and header size %d", name, size, dataHdr.Size)
257			}
258			if _, err = io.Copy(t, br); err != nil {
259				return fmt.Errorf("%s: copying contents from data stream: %s", name, err)
260			}
261		} else if size > 0 {
262			// As of a recent OS change, BackupRead now returns a data stream for empty sparse files.
263			// These files have no sparse block streams, so skip the copySparse call if file size = 0.
264			if err = copySparse(t, br); err != nil {
265				return fmt.Errorf("%s: copying contents from sparse block stream: %s", name, err)
266			}
267		}
268	}
269
270	// Look for streams after the data stream. The only ones we handle are alternate data streams.
271	// Other streams may have metadata that could be serialized, but the tar header has already
272	// been written. In practice, this means that we don't get EA or TXF metadata.
273	for {
274		bhdr, err := br.Next()
275		if err == io.EOF {
276			break
277		}
278		if err != nil {
279			return err
280		}
281		switch bhdr.Id {
282		case winio.BackupAlternateData:
283			altName := bhdr.Name
284			if strings.HasSuffix(altName, ":$DATA") {
285				altName = altName[:len(altName)-len(":$DATA")]
286			}
287			if (bhdr.Attributes & winio.StreamSparseAttributes) == 0 {
288				hdr = &tar.Header{
289					Format:     hdr.Format,
290					Name:       name + altName,
291					Mode:       hdr.Mode,
292					Typeflag:   tar.TypeReg,
293					Size:       bhdr.Size,
294					ModTime:    hdr.ModTime,
295					AccessTime: hdr.AccessTime,
296					ChangeTime: hdr.ChangeTime,
297				}
298				err = t.WriteHeader(hdr)
299				if err != nil {
300					return err
301				}
302				_, err = io.Copy(t, br)
303				if err != nil {
304					return err
305				}
306
307			} else {
308				// Unsupported for now, since the size of the alternate stream is not present
309				// in the backup stream until after the data has been read.
310				return fmt.Errorf("%s: tar of sparse alternate data streams is unsupported", name)
311			}
312		case winio.BackupEaData, winio.BackupLink, winio.BackupPropertyData, winio.BackupObjectId, winio.BackupTxfsData:
313			// ignore these streams
314		default:
315			return fmt.Errorf("%s: unknown stream ID %d after data", name, bhdr.Id)
316		}
317	}
318	return nil
319}
320
321// FileInfoFromHeader retrieves basic Win32 file information from a tar header, using the additional metadata written by
322// WriteTarFileFromBackupStream.
323func FileInfoFromHeader(hdr *tar.Header) (name string, size int64, fileInfo *winio.FileBasicInfo, err error) {
324	name = hdr.Name
325	if hdr.Typeflag == tar.TypeReg || hdr.Typeflag == tar.TypeRegA {
326		size = hdr.Size
327	}
328	fileInfo = &winio.FileBasicInfo{
329		LastAccessTime: windows.NsecToFiletime(hdr.AccessTime.UnixNano()),
330		LastWriteTime:  windows.NsecToFiletime(hdr.ModTime.UnixNano()),
331		ChangeTime:     windows.NsecToFiletime(hdr.ChangeTime.UnixNano()),
332		// Default to ModTime, we'll pull hdrCreationTime below if present
333		CreationTime: windows.NsecToFiletime(hdr.ModTime.UnixNano()),
334	}
335	if attrStr, ok := hdr.PAXRecords[hdrFileAttributes]; ok {
336		attr, err := strconv.ParseUint(attrStr, 10, 32)
337		if err != nil {
338			return "", 0, nil, err
339		}
340		fileInfo.FileAttributes = uint32(attr)
341	} else {
342		if hdr.Typeflag == tar.TypeDir {
343			fileInfo.FileAttributes |= syscall.FILE_ATTRIBUTE_DIRECTORY
344		}
345	}
346	if creationTimeStr, ok := hdr.PAXRecords[hdrCreationTime]; ok {
347		creationTime, err := parsePAXTime(creationTimeStr)
348		if err != nil {
349			return "", 0, nil, err
350		}
351		fileInfo.CreationTime = windows.NsecToFiletime(creationTime.UnixNano())
352	}
353	return
354}
355
356// WriteBackupStreamFromTarFile writes a Win32 backup stream from the current tar file. Since this function may process multiple
357// tar file entries in order to collect all the alternate data streams for the file, it returns the next
358// tar file that was not processed, or io.EOF is there are no more.
359func WriteBackupStreamFromTarFile(w io.Writer, t *tar.Reader, hdr *tar.Header) (*tar.Header, error) {
360	bw := winio.NewBackupStreamWriter(w)
361	var sd []byte
362	var err error
363	// Maintaining old SDDL-based behavior for backward compatibility.  All new tar headers written
364	// by this library will have raw binary for the security descriptor.
365	if sddl, ok := hdr.PAXRecords[hdrSecurityDescriptor]; ok {
366		sd, err = winio.SddlToSecurityDescriptor(sddl)
367		if err != nil {
368			return nil, err
369		}
370	}
371	if sdraw, ok := hdr.PAXRecords[hdrRawSecurityDescriptor]; ok {
372		sd, err = base64.StdEncoding.DecodeString(sdraw)
373		if err != nil {
374			return nil, err
375		}
376	}
377	if len(sd) != 0 {
378		bhdr := winio.BackupHeader{
379			Id:   winio.BackupSecurity,
380			Size: int64(len(sd)),
381		}
382		err := bw.WriteHeader(&bhdr)
383		if err != nil {
384			return nil, err
385		}
386		_, err = bw.Write(sd)
387		if err != nil {
388			return nil, err
389		}
390	}
391	var eas []winio.ExtendedAttribute
392	for k, v := range hdr.PAXRecords {
393		if !strings.HasPrefix(k, hdrEaPrefix) {
394			continue
395		}
396		data, err := base64.StdEncoding.DecodeString(v)
397		if err != nil {
398			return nil, err
399		}
400		eas = append(eas, winio.ExtendedAttribute{
401			Name:  k[len(hdrEaPrefix):],
402			Value: data,
403		})
404	}
405	if len(eas) != 0 {
406		eadata, err := winio.EncodeExtendedAttributes(eas)
407		if err != nil {
408			return nil, err
409		}
410		bhdr := winio.BackupHeader{
411			Id:   winio.BackupEaData,
412			Size: int64(len(eadata)),
413		}
414		err = bw.WriteHeader(&bhdr)
415		if err != nil {
416			return nil, err
417		}
418		_, err = bw.Write(eadata)
419		if err != nil {
420			return nil, err
421		}
422	}
423	if hdr.Typeflag == tar.TypeSymlink {
424		_, isMountPoint := hdr.PAXRecords[hdrMountPoint]
425		rp := winio.ReparsePoint{
426			Target:       filepath.FromSlash(hdr.Linkname),
427			IsMountPoint: isMountPoint,
428		}
429		reparse := winio.EncodeReparsePoint(&rp)
430		bhdr := winio.BackupHeader{
431			Id:   winio.BackupReparseData,
432			Size: int64(len(reparse)),
433		}
434		err := bw.WriteHeader(&bhdr)
435		if err != nil {
436			return nil, err
437		}
438		_, err = bw.Write(reparse)
439		if err != nil {
440			return nil, err
441		}
442	}
443	if hdr.Typeflag == tar.TypeReg || hdr.Typeflag == tar.TypeRegA {
444		bhdr := winio.BackupHeader{
445			Id:   winio.BackupData,
446			Size: hdr.Size,
447		}
448		err := bw.WriteHeader(&bhdr)
449		if err != nil {
450			return nil, err
451		}
452		_, err = io.Copy(bw, t)
453		if err != nil {
454			return nil, err
455		}
456	}
457	// Copy all the alternate data streams and return the next non-ADS header.
458	for {
459		ahdr, err := t.Next()
460		if err != nil {
461			return nil, err
462		}
463		if ahdr.Typeflag != tar.TypeReg || !strings.HasPrefix(ahdr.Name, hdr.Name+":") {
464			return ahdr, nil
465		}
466		bhdr := winio.BackupHeader{
467			Id:   winio.BackupAlternateData,
468			Size: ahdr.Size,
469			Name: ahdr.Name[len(hdr.Name):] + ":$DATA",
470		}
471		err = bw.WriteHeader(&bhdr)
472		if err != nil {
473			return nil, err
474		}
475		_, err = io.Copy(bw, t)
476		if err != nil {
477			return nil, err
478		}
479	}
480}
481