1// Package xopen makes it easy to get buffered readers and writers.
2// Ropen opens a (possibly gzipped) file/process/http site for buffered reading.
3// Wopen opens a (possibly gzipped) file for buffered writing.
4// Both will use gzip when appropriate and will user buffered IO.
5package xopen
6
7import (
8	"bufio"
9	"errors"
10	"fmt"
11	"io"
12	"net/http"
13	"os"
14	"os/exec"
15	"os/user"
16	"path/filepath"
17	"strings"
18
19	gzip "github.com/klauspost/pgzip"
20	//"github.com/klauspost/compress/gzip"
21	// "compress/gzip"
22)
23
24// ErrNoContent means nothing in the stream/file.
25var ErrNoContent = errors.New("xopen: no content")
26
27// ErrDirNotSupported means the path is a directory.
28var ErrDirNotSupported = errors.New("xopen: input is a directory")
29
30// IsGzip returns true buffered Reader has the gzip magic.
31func IsGzip(b *bufio.Reader) (bool, error) {
32	return CheckBytes(b, []byte{0x1f, 0x8b})
33}
34
35// IsStdin checks if we are getting data from stdin.
36func IsStdin() bool {
37	// http://stackoverflow.com/a/26567513
38	stat, err := os.Stdin.Stat()
39	if err != nil {
40		return false
41	}
42	return (stat.Mode() & os.ModeCharDevice) == 0
43}
44
45// ExpandUser expands ~/path and ~otheruser/path appropriately
46func ExpandUser(path string) (string, error) {
47	if path[0] != '~' {
48		return path, nil
49	}
50	var u *user.User
51	var err error
52	if len(path) == 1 || path[1] == '/' {
53		u, err = user.Current()
54	} else {
55		name := strings.Split(path[1:], "/")[0]
56		u, err = user.Lookup(name)
57	}
58	if err != nil {
59		return "", err
60	}
61	home := u.HomeDir
62	path = home + "/" + path[1:]
63	return path, nil
64}
65
66// Exists checks if a local file exits
67func Exists(path string) bool {
68	path, perr := ExpandUser(path)
69	if perr != nil {
70		return false
71	}
72	_, err := os.Stat(path)
73	return err == nil
74}
75
76// CheckBytes peeks at a buffered stream and checks if the first read bytes match.
77func CheckBytes(b *bufio.Reader, buf []byte) (bool, error) {
78
79	m, err := b.Peek(len(buf))
80	if err != nil {
81		return false, ErrNoContent
82	}
83	for i := range buf {
84		if m[i] != buf[i] {
85			return false, nil
86		}
87	}
88	return true, nil
89}
90
91// Reader is returned by Ropen
92type Reader struct {
93	*bufio.Reader
94	rdr io.Reader
95	gz  io.ReadCloser
96}
97
98// Close the associated files.
99func (r *Reader) Close() error {
100	if r.gz != nil {
101		r.gz.Close()
102	}
103	if c, ok := r.rdr.(io.ReadCloser); ok {
104		c.Close()
105	}
106	return nil
107}
108
109// Writer is returned by Wopen
110type Writer struct {
111	*bufio.Writer
112	wtr *os.File
113	gz  *gzip.Writer
114}
115
116// Close the associated files.
117func (w *Writer) Close() error {
118	w.Flush()
119	if w.gz != nil {
120		w.gz.Close()
121	}
122	w.wtr.Close()
123	return nil
124}
125
126// Flush the writer.
127func (w *Writer) Flush() {
128	w.Writer.Flush()
129	if w.gz != nil {
130		w.gz.Flush()
131	}
132}
133
134var pageSize = os.Getpagesize() * 2
135
136// Buf returns a buffered reader from an io.Reader
137// If f == "-", then it will attempt to read from os.Stdin.
138// If the file is gzipped, it will be read as such.
139func Buf(r io.Reader) (*Reader, error) {
140	b := bufio.NewReaderSize(r, pageSize)
141	var rdr io.ReadCloser
142	if is, err := IsGzip(b); err != nil && err != io.EOF {
143		return nil, err
144	} else if is {
145		// rdr, err = newFastGzReader(b)
146		rdr, err = gzip.NewReader(b)
147		if err != nil {
148			return nil, err
149		}
150		b = bufio.NewReaderSize(rdr, pageSize)
151	}
152
153	// check BOM
154	t, _, err := b.ReadRune()
155	if err != nil {
156		return nil, ErrNoContent
157	}
158	if t != '\uFEFF' {
159		b.UnreadRune()
160	}
161	return &Reader{b, r, rdr}, nil
162}
163
164// XReader returns a reader from a url string or a file.
165func XReader(f string) (io.Reader, error) {
166	if strings.HasPrefix(f, "http://") || strings.HasPrefix(f, "https://") {
167		var rsp *http.Response
168		rsp, err := http.Get(f)
169		if err != nil {
170			return nil, err
171		}
172		if rsp.StatusCode != 200 {
173			return nil, fmt.Errorf("http error downloading %s. status: %s", f, rsp.Status)
174		}
175		rdr := rsp.Body
176		return rdr, nil
177	}
178	f, err := ExpandUser(f)
179	if err != nil {
180		return nil, err
181	}
182
183	fi, err := os.Stat(f)
184	if err != nil {
185		return nil, err
186	}
187	if fi.IsDir() {
188		return nil, ErrDirNotSupported
189	}
190
191	return os.Open(f)
192}
193
194// Ropen opens a buffered reader.
195func Ropen(f string) (*Reader, error) {
196	var err error
197	var rdr io.Reader
198	if f == "-" {
199		if !IsStdin() {
200			return nil, errors.New("stdin not detected")
201		}
202		b, err := Buf(os.Stdin)
203		return b, err
204	} else if f[0] == '|' {
205		// TODO: use csv to handle quoted file names.
206		cmdStrs := strings.Split(f[1:], " ")
207		var cmd *exec.Cmd
208		if len(cmdStrs) == 2 {
209			cmd = exec.Command(cmdStrs[0], cmdStrs[1:]...)
210		} else {
211			cmd = exec.Command(cmdStrs[0])
212		}
213		rdr, err = cmd.StdoutPipe()
214		if err != nil {
215			return nil, err
216		}
217		err = cmd.Start()
218		if err != nil {
219			return nil, err
220		}
221	} else {
222		rdr, err = XReader(f)
223	}
224	if err != nil {
225		return nil, err
226	}
227	b, err := Buf(rdr)
228	return b, err
229}
230
231// Wopen opens a buffered reader.
232// If f == "-", then stdout will be used.
233// If f endswith ".gz", then the output will be gzipped.
234func Wopen(f string) (*Writer, error) {
235	var wtr *os.File
236	if f == "-" {
237		wtr = os.Stdout
238	} else {
239		dir := filepath.Dir(f)
240		fi, err := os.Stat(dir)
241		if err == nil && !fi.IsDir() {
242			return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir)
243		}
244		if os.IsNotExist(err) {
245			os.MkdirAll(dir, 0755)
246		}
247
248		wtr, err = os.Create(f)
249		if err != nil {
250			return nil, err
251		}
252	}
253	if !strings.HasSuffix(f, ".gz") {
254		return &Writer{bufio.NewWriterSize(wtr, pageSize), wtr, nil}, nil
255	}
256	gz := gzip.NewWriter(wtr)
257	return &Writer{bufio.NewWriterSize(gz, pageSize), wtr, gz}, nil
258}
259
260// WopenGzip opens a buffered gzipped reader.
261// If f == "-", then stdout will be used.
262func WopenGzip(f string) (*Writer, error) {
263	var wtr *os.File
264	if f == "-" {
265		wtr = os.Stdout
266	} else {
267		dir := filepath.Dir(f)
268		fi, err := os.Stat(dir)
269		if err == nil && !fi.IsDir() {
270			return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir)
271		}
272		if os.IsNotExist(err) {
273			os.MkdirAll(dir, 0755)
274		}
275		wtr, err = os.Create(f)
276		if err != nil {
277			return nil, err
278		}
279	}
280	gz := gzip.NewWriter(wtr)
281	return &Writer{bufio.NewWriterSize(gz, pageSize), wtr, gz}, nil
282}
283
284// WopenFile opens a buffered reader.
285// If f == "-", then stdout will be used.
286// If f endswith ".gz", then the output will be gzipped.
287func WopenFile(f string, flag int, perm os.FileMode) (*Writer, error) {
288	var wtr *os.File
289	if f == "-" {
290		wtr = os.Stdout
291	} else {
292		dir := filepath.Dir(f)
293		fi, err := os.Stat(dir)
294		if err == nil && !fi.IsDir() {
295			return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir)
296		}
297		if os.IsNotExist(err) {
298			os.MkdirAll(dir, 0755)
299		}
300		wtr, err = os.OpenFile(f, flag, perm)
301		if err != nil {
302			return nil, err
303		}
304	}
305	if !strings.HasSuffix(f, ".gz") {
306		return &Writer{bufio.NewWriterSize(wtr, pageSize), wtr, nil}, nil
307	}
308	gz := gzip.NewWriter(wtr)
309	return &Writer{bufio.NewWriterSize(gz, pageSize), wtr, gz}, nil
310}
311