1// Package xopen makes it easy to get buffered readers and writers. 2// Ropen opens a (possibly gzipped) file/process/http site for buffered reading. 3// Wopen opens a (possibly gzipped) file for buffered writing. 4// Both will use gzip when appropriate and will user buffered IO. 5package xopen 6 7import ( 8 "bufio" 9 "errors" 10 "fmt" 11 "io" 12 "net/http" 13 "os" 14 "os/exec" 15 "os/user" 16 "path/filepath" 17 "strings" 18 19 gzip "github.com/klauspost/pgzip" 20 //"github.com/klauspost/compress/gzip" 21 // "compress/gzip" 22) 23 24// ErrNoContent means nothing in the stream/file. 25var ErrNoContent = errors.New("xopen: no content") 26 27// ErrDirNotSupported means the path is a directory. 28var ErrDirNotSupported = errors.New("xopen: input is a directory") 29 30// IsGzip returns true buffered Reader has the gzip magic. 31func IsGzip(b *bufio.Reader) (bool, error) { 32 return CheckBytes(b, []byte{0x1f, 0x8b}) 33} 34 35// IsStdin checks if we are getting data from stdin. 36func IsStdin() bool { 37 // http://stackoverflow.com/a/26567513 38 stat, err := os.Stdin.Stat() 39 if err != nil { 40 return false 41 } 42 return (stat.Mode() & os.ModeCharDevice) == 0 43} 44 45// ExpandUser expands ~/path and ~otheruser/path appropriately 46func ExpandUser(path string) (string, error) { 47 if path[0] != '~' { 48 return path, nil 49 } 50 var u *user.User 51 var err error 52 if len(path) == 1 || path[1] == '/' { 53 u, err = user.Current() 54 } else { 55 name := strings.Split(path[1:], "/")[0] 56 u, err = user.Lookup(name) 57 } 58 if err != nil { 59 return "", err 60 } 61 home := u.HomeDir 62 path = home + "/" + path[1:] 63 return path, nil 64} 65 66// Exists checks if a local file exits 67func Exists(path string) bool { 68 path, perr := ExpandUser(path) 69 if perr != nil { 70 return false 71 } 72 _, err := os.Stat(path) 73 return err == nil 74} 75 76// CheckBytes peeks at a buffered stream and checks if the first read bytes match. 77func CheckBytes(b *bufio.Reader, buf []byte) (bool, error) { 78 79 m, err := b.Peek(len(buf)) 80 if err != nil { 81 return false, ErrNoContent 82 } 83 for i := range buf { 84 if m[i] != buf[i] { 85 return false, nil 86 } 87 } 88 return true, nil 89} 90 91// Reader is returned by Ropen 92type Reader struct { 93 *bufio.Reader 94 rdr io.Reader 95 gz io.ReadCloser 96} 97 98// Close the associated files. 99func (r *Reader) Close() error { 100 if r.gz != nil { 101 r.gz.Close() 102 } 103 if c, ok := r.rdr.(io.ReadCloser); ok { 104 c.Close() 105 } 106 return nil 107} 108 109// Writer is returned by Wopen 110type Writer struct { 111 *bufio.Writer 112 wtr *os.File 113 gz *gzip.Writer 114} 115 116// Close the associated files. 117func (w *Writer) Close() error { 118 w.Flush() 119 if w.gz != nil { 120 w.gz.Close() 121 } 122 w.wtr.Close() 123 return nil 124} 125 126// Flush the writer. 127func (w *Writer) Flush() { 128 w.Writer.Flush() 129 if w.gz != nil { 130 w.gz.Flush() 131 } 132} 133 134var pageSize = os.Getpagesize() * 2 135 136// Buf returns a buffered reader from an io.Reader 137// If f == "-", then it will attempt to read from os.Stdin. 138// If the file is gzipped, it will be read as such. 139func Buf(r io.Reader) (*Reader, error) { 140 b := bufio.NewReaderSize(r, pageSize) 141 var rdr io.ReadCloser 142 if is, err := IsGzip(b); err != nil && err != io.EOF { 143 return nil, err 144 } else if is { 145 // rdr, err = newFastGzReader(b) 146 rdr, err = gzip.NewReader(b) 147 if err != nil { 148 return nil, err 149 } 150 b = bufio.NewReaderSize(rdr, pageSize) 151 } 152 153 // check BOM 154 t, _, err := b.ReadRune() 155 if err != nil { 156 return nil, ErrNoContent 157 } 158 if t != '\uFEFF' { 159 b.UnreadRune() 160 } 161 return &Reader{b, r, rdr}, nil 162} 163 164// XReader returns a reader from a url string or a file. 165func XReader(f string) (io.Reader, error) { 166 if strings.HasPrefix(f, "http://") || strings.HasPrefix(f, "https://") { 167 var rsp *http.Response 168 rsp, err := http.Get(f) 169 if err != nil { 170 return nil, err 171 } 172 if rsp.StatusCode != 200 { 173 return nil, fmt.Errorf("http error downloading %s. status: %s", f, rsp.Status) 174 } 175 rdr := rsp.Body 176 return rdr, nil 177 } 178 f, err := ExpandUser(f) 179 if err != nil { 180 return nil, err 181 } 182 183 fi, err := os.Stat(f) 184 if err != nil { 185 return nil, err 186 } 187 if fi.IsDir() { 188 return nil, ErrDirNotSupported 189 } 190 191 return os.Open(f) 192} 193 194// Ropen opens a buffered reader. 195func Ropen(f string) (*Reader, error) { 196 var err error 197 var rdr io.Reader 198 if f == "-" { 199 if !IsStdin() { 200 return nil, errors.New("stdin not detected") 201 } 202 b, err := Buf(os.Stdin) 203 return b, err 204 } else if f[0] == '|' { 205 // TODO: use csv to handle quoted file names. 206 cmdStrs := strings.Split(f[1:], " ") 207 var cmd *exec.Cmd 208 if len(cmdStrs) == 2 { 209 cmd = exec.Command(cmdStrs[0], cmdStrs[1:]...) 210 } else { 211 cmd = exec.Command(cmdStrs[0]) 212 } 213 rdr, err = cmd.StdoutPipe() 214 if err != nil { 215 return nil, err 216 } 217 err = cmd.Start() 218 if err != nil { 219 return nil, err 220 } 221 } else { 222 rdr, err = XReader(f) 223 } 224 if err != nil { 225 return nil, err 226 } 227 b, err := Buf(rdr) 228 return b, err 229} 230 231// Wopen opens a buffered reader. 232// If f == "-", then stdout will be used. 233// If f endswith ".gz", then the output will be gzipped. 234func Wopen(f string) (*Writer, error) { 235 var wtr *os.File 236 if f == "-" { 237 wtr = os.Stdout 238 } else { 239 dir := filepath.Dir(f) 240 fi, err := os.Stat(dir) 241 if err == nil && !fi.IsDir() { 242 return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir) 243 } 244 if os.IsNotExist(err) { 245 os.MkdirAll(dir, 0755) 246 } 247 248 wtr, err = os.Create(f) 249 if err != nil { 250 return nil, err 251 } 252 } 253 if !strings.HasSuffix(f, ".gz") { 254 return &Writer{bufio.NewWriterSize(wtr, pageSize), wtr, nil}, nil 255 } 256 gz := gzip.NewWriter(wtr) 257 return &Writer{bufio.NewWriterSize(gz, pageSize), wtr, gz}, nil 258} 259 260// WopenGzip opens a buffered gzipped reader. 261// If f == "-", then stdout will be used. 262func WopenGzip(f string) (*Writer, error) { 263 var wtr *os.File 264 if f == "-" { 265 wtr = os.Stdout 266 } else { 267 dir := filepath.Dir(f) 268 fi, err := os.Stat(dir) 269 if err == nil && !fi.IsDir() { 270 return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir) 271 } 272 if os.IsNotExist(err) { 273 os.MkdirAll(dir, 0755) 274 } 275 wtr, err = os.Create(f) 276 if err != nil { 277 return nil, err 278 } 279 } 280 gz := gzip.NewWriter(wtr) 281 return &Writer{bufio.NewWriterSize(gz, pageSize), wtr, gz}, nil 282} 283 284// WopenFile opens a buffered reader. 285// If f == "-", then stdout will be used. 286// If f endswith ".gz", then the output will be gzipped. 287func WopenFile(f string, flag int, perm os.FileMode) (*Writer, error) { 288 var wtr *os.File 289 if f == "-" { 290 wtr = os.Stdout 291 } else { 292 dir := filepath.Dir(f) 293 fi, err := os.Stat(dir) 294 if err == nil && !fi.IsDir() { 295 return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir) 296 } 297 if os.IsNotExist(err) { 298 os.MkdirAll(dir, 0755) 299 } 300 wtr, err = os.OpenFile(f, flag, perm) 301 if err != nil { 302 return nil, err 303 } 304 } 305 if !strings.HasSuffix(f, ".gz") { 306 return &Writer{bufio.NewWriterSize(wtr, pageSize), wtr, nil}, nil 307 } 308 gz := gzip.NewWriter(wtr) 309 return &Writer{bufio.NewWriterSize(gz, pageSize), wtr, gz}, nil 310} 311