1// Copyright 2015 Richard Lehane. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package main 16 17import ( 18 "encoding/base64" 19 "fmt" 20 "io" 21 "net/http" 22 "os" 23 "sync" 24 "time" 25 26 "github.com/richardlehane/siegfried" 27 "github.com/richardlehane/siegfried/internal/checksum" 28 "github.com/richardlehane/siegfried/pkg/config" 29 "github.com/richardlehane/siegfried/pkg/writer" 30) 31 32func handleErr(w http.ResponseWriter, status int, e error) { 33 w.WriteHeader(status) 34 w.Header().Set("Content-Type", "text/plain; charset=utf-8") 35 io.WriteString(w, fmt.Sprintf("SF server error; got %v\n", e)) 36} 37 38func decodePath(s, b64 string) (string, error) { 39 if len(s) < 11 { 40 return "", fmt.Errorf("path too short, expecting at least 11 characters got %d", len(s)) 41 } 42 if b64 == "true" { 43 data, err := base64.URLEncoding.DecodeString(s[10:]) 44 if err != nil { 45 return "", fmt.Errorf("Error base64 decoding file path, error message %v", err) 46 } 47 return string(data), nil 48 } 49 return s[10:], nil 50} 51 52func parseRequest(w http.ResponseWriter, r *http.Request, s *siegfried.Siegfried, wg *sync.WaitGroup) (error, string, writer.Writer, bool, bool, bool, checksum.HashTyp, *siegfried.Siegfried, getFn) { 53 // json, csv, droid or yaml 54 paramsErr := func(field, expect string) (error, string, writer.Writer, bool, bool, bool, checksum.HashTyp, *siegfried.Siegfried, getFn) { 55 return fmt.Errorf("bad request; in param %s got %s; valid values %s", field, r.FormValue(field), expect), "", nil, false, false, false, -1, nil, nil 56 } 57 var ( 58 mime string 59 wr writer.Writer 60 d bool 61 frmt int 62 ) 63 switch { 64 case *jsono: 65 frmt = 1 66 case *csvo: 67 frmt = 2 68 case *droido: 69 frmt = 3 70 } 71 if v := r.FormValue("format"); v != "" { 72 switch v { 73 case "yaml": 74 frmt = 0 75 case "json": 76 frmt = 1 77 case "csv": 78 frmt = 2 79 case "droid": 80 frmt = 3 81 default: 82 return paramsErr("format", "yaml, json, csv or droid") 83 } 84 } 85 if accept := r.Header.Get("Accept"); accept != "" { 86 switch accept { 87 case "application/x-yaml": 88 frmt = 0 89 case "application/json": 90 frmt = 1 91 case "text/csv", "application/csv": 92 frmt = 2 93 case "application/x-droid": 94 frmt = 3 95 } 96 } 97 switch frmt { 98 case 0: 99 wr = writer.YAML(w) 100 mime = "application/x-yaml" 101 case 1: 102 wr = writer.JSON(w) 103 mime = "application/json" 104 case 2: 105 wr = writer.CSV(w) 106 mime = "text/csv" 107 case 3: 108 wr = writer.Droid(w) 109 d = true 110 mime = "application/x-droid" 111 } 112 // no recurse 113 norec := *nr 114 if v := r.FormValue("nr"); v != "" { 115 switch v { 116 case "true": 117 norec = true 118 case "false": 119 norec = false 120 default: 121 paramsErr("nr", "true or false") 122 } 123 } 124 // continue on error 125 coerr := *coe 126 if v := r.FormValue("coe"); v != "" { 127 switch v { 128 case "true": 129 coerr = true 130 case "false": 131 coerr = false 132 default: 133 paramsErr("coe", "true or false") 134 } 135 } 136 // archive 137 z := *archive 138 if v := r.FormValue("z"); v != "" { 139 switch v { 140 case "true": 141 z = true 142 case "false": 143 z = false 144 default: 145 paramsErr("z", "true or false") 146 } 147 } 148 // checksum 149 h := *hashf 150 if v := r.FormValue("hash"); v != "" { 151 h = v 152 } 153 ht := checksum.GetHash(h) 154 // sig 155 sf := s 156 if v := r.FormValue("sig"); v != "" { 157 if _, err := os.Stat(config.Local(v)); err != nil { 158 return fmt.Errorf("bad request; sig param should be path to a signature file (absolute or relative to home); got %v", err), "", nil, false, false, false, -1, nil, nil 159 } 160 nsf, err := siegfried.Load(config.Local(v)) 161 if err == nil { 162 sf = nsf 163 } 164 } 165 gf := func(path, mime string, mod time.Time, sz int64) *context { 166 c := ctxPool.Get().(*context) 167 c.path, c.mime, c.mod, c.sz = path, mime, mod, sz 168 c.s, c.wg, c.w, c.d, c.z, c.h = sf, wg, wr, d, z, checksum.MakeHash(ht) 169 return c 170 } 171 return nil, mime, wr, coerr, norec, d, ht, sf, gf 172} 173 174func handleIdentify(w http.ResponseWriter, r *http.Request, s *siegfried.Siegfried, ctxts chan *context) { 175 wg := &sync.WaitGroup{} 176 err, mime, wr, coerr, nrec, d, ht, sf, gf := parseRequest(w, r, s, wg) 177 if err != nil { 178 handleErr(w, http.StatusNotFound, err) 179 return 180 } 181 if r.Method == "POST" { 182 f, h, err := r.FormFile("file") 183 if err != nil { 184 handleErr(w, http.StatusNotFound, err) 185 return 186 } 187 defer f.Close() 188 var sz int64 189 var mod time.Time 190 osf, ok := f.(*os.File) 191 if ok { 192 info, err := osf.Stat() 193 if err != nil { 194 handleErr(w, http.StatusInternalServerError, err) 195 } 196 sz = info.Size() 197 mod = info.ModTime() 198 } else { 199 sz = r.ContentLength 200 } 201 w.Header().Set("Content-Type", mime) 202 wr.Head(config.SignatureBase(), time.Now(), sf.C, config.Version(), sf.Identifiers(), sf.Fields(), ht.String()) 203 wg.Add(1) 204 ctx := gf(h.Filename, "", mod, sz) 205 ctxts <- ctx 206 identifyRdr(f, ctx, ctxts, gf) 207 wg.Wait() 208 wr.Tail() 209 return 210 } else { 211 path, err := decodePath(r.URL.Path, r.FormValue("base64")) 212 if err == nil { 213 _, err = os.Stat(path) 214 } 215 if err != nil { 216 handleErr(w, http.StatusNotFound, err) 217 return 218 } 219 w.Header().Set("Content-Type", mime) 220 wr.Head(config.SignatureBase(), time.Now(), sf.C, config.Version(), sf.Identifiers(), sf.Fields(), ht.String()) 221 err = identify(ctxts, path, "", coerr, nrec, d, gf) 222 wg.Wait() 223 wr.Tail() 224 if err != nil { 225 if _, ok := err.(WalkError); ok { // only dump out walk errors, other errors reported in result 226 io.WriteString(w, err.Error()) 227 } 228 } 229 return 230 } 231} 232 233const usage = ` 234 <html> 235 <head> 236 <title>Siegfried server</title> 237 </head> 238 <body> 239 <h1><a name="top">Siegfried server usage</a></h1> 240 <p>The siegfried server has two modes of identification: 241 <ul><li><a href="#get_request">GET request</a>, where a file or directory path is given in the URL and the server retrieves the file(s);</li> 242 <li><a href="#post_request">POST request</a>, where the file is sent over the network as form-data.</li></ul></p> 243 <h2>Default settings</h2> 244 <p>When starting the server, you can use regular sf flags to set defaults for the <i>nr</i>, <i>format</i>, <i>hash</i>, <i>z</i>, and <i>sig</i> parameters that will apply to all requests unless overridden. Logging options can also be set.<p> 245 <p>E.g. sf -nr -z -hash md5 -sig pronom-tika.sig -log p,w,e -serve localhost:5138</p> 246 <hr> 247 <h2><a name="get_request">GET request</a></h2> 248 <p><strong>GET</strong> <i>/identify/[file or folder name (percent encoded)](?base64=false&nr=true&format=yaml&hash=md5&z=true&sig=locfdd.sig)</i></p> 249 <p>E.g. http://localhost:5138/identify/c%3A%2FUsers%2Frichardl%2FMy%20Documents%2Fhello%20world.docx?format=json</p> 250 <h3>Parameters</h3> 251 <p><i>base64</i> (optional) - use <a href="https://tools.ietf.org/html/rfc4648#section-5">URL-safe base64 encoding</a> for the file or folder name with base64=true.</p> 252 <p><i>coe</i> (optional) - continue directory scans even when fatal file access errors are encountered with coe=true.</p> 253 <p><i>nr</i> (optional) - stop sub-directory recursion when a directory path is given with nr=true.</p> 254 <p><i>format</i> (optional) - select the output format (csv, yaml, json, droid). Default is yaml. Alternatively, HTTP content negotiation can be used.</p> 255 <p><i>hash</i> (optional) - calculate file checksum (md5, sha1, sha256, sha512, crc)</p> 256 <p><i>z</i> (optional) - scan archive formats (zip, tar, gzip, warc, arc) with z=true. Default is false.</p> 257 <p><i>sig</i> (optional) - load a specific signature file. Default is default.sig.</p> 258 <h3>Example</h2> 259 <!-- set the get target for the example form using js function at bottom page--> 260 <h4>File/ directory:</h4> 261 <p><input type="text" id="filename"> (provide the path to a file or directory e.g. c:\My Documents\file.doc. It will be percent encoded by this form.)</p> 262 <h4>Parameters:</h4> 263 <form method="get" id="get_example"> 264 <p>Use base64 encoding (base64): <input type="radio" name="base64" value="true"> true <input type="radio" name="base64" value="false" checked> false</p> 265 <p>Continue on error (coe): <input type="radio" name="coe" value="true"> true <input type="radio" name="nr" value="false" checked> false</p> 266 <p>No directory recursion (nr): <input type="radio" name="nr" value="true"> true <input type="radio" name="nr" value="false" checked> false</p> 267 <p>Format (format): <select name="format"> 268 <option value="json">json</option> 269 <option value="yaml">yaml</option> 270 <option value="csv">csv</option> 271 <option value="droid">droid</option> 272 </select></p> 273 <p>Hash (hash): <select name="hash"> 274 <option value="none">none</option> 275 <option value="md5">md5</option> 276 <option value="sha1">sha1</option> 277 <option value="sha256">sha256</option> 278 <option value="sha512">sha512</option> 279 <option value="crc">crc</option> 280 </select></p> 281 <p>Scan archive (z): <input type="radio" name="z" value="true"> true <input type="radio" name="z" value="false" checked> false</p> 282 <p>Signature file (sig): <input type="text" name="sig"></p> 283 <p><input type="submit" value="Submit"></p> 284 </form> 285 <p><a href="#top">Back to top</p> 286 <hr> 287 <h2><a name="post_request">POST request</a></h2> 288 <p><strong>POST</strong> <i>/identify(?format=yaml&hash=md5&z=true&sig=locfdd.sig)</i> Attach a file as form-data with the key "file".</p> 289 <p>E.g. curl "http://localhost:5138/identify?format=json&hash=crc" -F file=@myfile.doc</p> 290 <h3>Parameters</h3> 291 <p><i>format</i> (optional) - select the output format (csv, yaml, json, droid). Default is yaml. Alternatively, HTTP content negotiation can be used.</p> 292 <p><i>hash</i> (optional) - calculate file checksum (md5, sha1, sha256, sha512, crc)</p> 293 <p><i>z</i> (optional) - scan archive formats (zip, tar, gzip, warc, arc) with z=true. Default is false.</p> 294 <p><i>sig</i> (optional) - load a specific signature file. Default is default.sig.</p> 295 <h3>Example</h2> 296 <form action="/identify" enctype="multipart/form-data" method="post"> 297 <h4>File:</h4> 298 <p><input type="file" name="file"></p> 299 <h4>Parameters:</h4> 300 <p>Format (format): <select name="format"> 301 <option value="json">json</option> 302 <option value="yaml">yaml</option> 303 <option value="csv">csv</option> 304 <option value="droid">droid</option> 305 </select></p> 306 <p>Hash (hash): <select name="hash"> 307 <option value="none">none</option> 308 <option value="md5">md5</option> 309 <option value="sha1">sha1</option> 310 <option value="sha256">sha256</option> 311 <option value="sha512">sha512</option> 312 <option value="crc">crc</option> 313 </select></p> 314 <p>Scan archive (z): <input type="radio" name="z" value="true"> true <input type="radio" name="z" value="false" checked> false</p> 315 <p>Signature file (sig): <input type="text" name="sig"></p> 316 <p><input type="submit" value="Submit"></p> 317 </form> 318 <p><a href="#top">Back to top</p> 319 <script> 320 var input = document.getElementById('filename'); 321 input.addEventListener('input', function() 322 { 323 var frm = document.getElementById('get_example'); 324 frm.action = "/identify/" + encodeURIComponent(input.value); 325 }); 326 </script> 327 </body> 328 </html> 329` 330 331func handleMain(w http.ResponseWriter, r *http.Request) { 332 w.Header().Set("Content-Type", "text/html") 333 io.WriteString(w, usage) 334} 335 336type muxer struct { 337 s *siegfried.Siegfried 338 ctxts chan *context 339} 340 341func (m *muxer) ServeHTTP(w http.ResponseWriter, r *http.Request) { 342 if (len(r.URL.Path) == 0 || r.URL.Path == "/") && r.Method == "GET" { 343 handleMain(w, r) 344 return 345 } 346 if len(r.URL.Path) >= 9 && r.URL.Path[:9] == "/identify" { 347 handleIdentify(w, r, m.s, m.ctxts) 348 return 349 } 350 handleErr(w, http.StatusNotFound, fmt.Errorf("valid paths are /, /identify and /identify/*")) 351 return 352} 353 354func listen(port string, s *siegfried.Siegfried, ctxts chan *context) { 355 mux := &muxer{s, ctxts} 356 http.ListenAndServe(port, mux) 357} 358