1 /*
2  * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /* DEBUG: section 06    Disk I/O Routines */
10 
11 #include "squid.h"
12 #include "comm/Loops.h"
13 #include "fd.h"
14 #include "fde.h"
15 #include "fs_io.h"
16 #include "globals.h"
17 #include "MemBuf.h"
18 #include "profiler/Profiler.h"
19 #include "StatCounters.h"
20 
21 #include <cerrno>
22 
23 static PF diskHandleRead;
24 static PF diskHandleWrite;
25 
26 #if _SQUID_WINDOWS_ || _SQUID_OS2_
27 static int
diskWriteIsComplete(int fd)28 diskWriteIsComplete(int fd)
29 {
30     return fd_table[fd].disk.write_q ? 0 : 1;
31 }
32 
33 #endif
34 
35 /* hack needed on SunStudio to avoid linkage convention mismatch */
cxx_xfree(void * ptr)36 static void cxx_xfree(void *ptr)
37 {
38     xfree(ptr);
39 }
40 
41 /*
42  * opens a disk file specified by 'path'.  This function always
43  * blocks!  There is no callback.
44  */
45 int
file_open(const char * path,int mode)46 file_open(const char *path, int mode)
47 {
48     int fd;
49     PROF_start(file_open);
50 
51     if (FILE_MODE(mode) == O_WRONLY)
52         mode |= O_APPEND;
53 
54     errno = 0;
55 
56     fd = open(path, mode, 0644);
57 
58     ++ statCounter.syscalls.disk.opens;
59 
60     if (fd < 0) {
61         int xerrno = errno;
62         debugs(50, 3, "error opening file " << path << ": " << xstrerr(xerrno));
63         fd = DISK_ERROR;
64     } else {
65         debugs(6, 5, "FD " << fd);
66         commSetCloseOnExec(fd);
67         fd_open(fd, FD_FILE, path);
68     }
69 
70     PROF_stop(file_open);
71     return fd;
72 }
73 
74 /* close a disk file. */
75 void
file_close(int fd)76 file_close(int fd)
77 {
78     fde *F = &fd_table[fd];
79     PF *read_callback;
80     PROF_start(file_close);
81     assert(fd >= 0);
82     assert(F->flags.open);
83 
84     if ((read_callback = F->read_handler)) {
85         F->read_handler = NULL;
86         read_callback(-1, F->read_data);
87     }
88 
89     if (F->flags.write_daemon) {
90 #if _SQUID_WINDOWS_ || _SQUID_OS2_
91         /*
92          * on some operating systems, you can not delete or rename
93          * open files, so we won't allow delayed close.
94          */
95         while (!diskWriteIsComplete(fd))
96             diskHandleWrite(fd, NULL);
97 #else
98         F->flags.close_request = true;
99         debugs(6, 2, "file_close: FD " << fd << ", delaying close");
100         PROF_stop(file_close);
101         return;
102 #endif
103 
104     }
105 
106     /*
107      * Assert there is no write callback.  Otherwise we might be
108      * leaking write state data by closing the descriptor
109      */
110     assert(F->write_handler == NULL);
111 
112 #if CALL_FSYNC_BEFORE_CLOSE
113 
114     fsync(fd);
115 
116 #endif
117 
118     close(fd);
119 
120     debugs(6, F->flags.close_request ? 2 : 5, "file_close: FD " << fd << " really closing");
121 
122     fd_close(fd);
123 
124     ++ statCounter.syscalls.disk.closes;
125 
126     PROF_stop(file_close);
127 }
128 
129 /*
130  * This function has the purpose of combining multiple writes.  This is
131  * to facilitate the ASYNC_IO option since it can only guarantee 1
132  * write to a file per trip around the comm.c select() loop. That's bad
133  * because more than 1 write can be made to the access.log file per
134  * trip, and so this code is purely designed to help batch multiple
135  * sequential writes to the access.log file.  Squid will never issue
136  * multiple writes for any other file type during 1 trip around the
137  * select() loop.       --SLF
138  */
139 static void
diskCombineWrites(_fde_disk * fdd)140 diskCombineWrites(_fde_disk *fdd)
141 {
142     /*
143      * We need to combine multiple write requests on an FD's write
144      * queue But only if we don't need to seek() in between them, ugh!
145      * XXX This currently ignores any seeks (file_offset)
146      */
147 
148     if (fdd->write_q != NULL && fdd->write_q->next != NULL) {
149         int len = 0;
150 
151         for (dwrite_q *q = fdd->write_q; q != NULL; q = q->next)
152             len += q->len - q->buf_offset;
153 
154         dwrite_q *wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
155 
156         wq->buf = (char *)xmalloc(len);
157 
158         wq->len = 0;
159 
160         wq->buf_offset = 0;
161 
162         wq->next = NULL;
163 
164         wq->free_func = cxx_xfree;
165 
166         while (fdd->write_q != NULL) {
167             dwrite_q *q = fdd->write_q;
168 
169             len = q->len - q->buf_offset;
170             memcpy(wq->buf + wq->len, q->buf + q->buf_offset, len);
171             wq->len += len;
172             fdd->write_q = q->next;
173 
174             if (q->free_func)
175                 q->free_func(q->buf);
176 
177             memFree(q, MEM_DWRITE_Q);
178         };
179 
180         fdd->write_q_tail = wq;
181 
182         fdd->write_q = wq;
183     }
184 }
185 
186 /* write handler */
187 static void
diskHandleWrite(int fd,void *)188 diskHandleWrite(int fd, void *)
189 {
190     int len = 0;
191     fde *F = &fd_table[fd];
192 
193     _fde_disk *fdd = &F->disk;
194     dwrite_q *q = fdd->write_q;
195     int status = DISK_OK;
196     bool do_close;
197 
198     if (NULL == q)
199         return;
200 
201     PROF_start(diskHandleWrite);
202 
203     debugs(6, 3, "diskHandleWrite: FD " << fd);
204 
205     F->flags.write_daemon = false;
206 
207     assert(fdd->write_q != NULL);
208 
209     assert(fdd->write_q->len > fdd->write_q->buf_offset);
210 
211     debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " <<
212            (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes at " <<
213            fdd->write_q->file_offset);
214 
215     errno = 0;
216 
217     if (fdd->write_q->file_offset != -1) {
218         errno = 0;
219         if (lseek(fd, fdd->write_q->file_offset, SEEK_SET) == -1) {
220             int xerrno = errno;
221             debugs(50, DBG_IMPORTANT, "error in seek for FD " << fd << ": " << xstrerr(xerrno));
222             // XXX: handle error?
223         }
224     }
225 
226     len = FD_WRITE_METHOD(fd,
227                           fdd->write_q->buf + fdd->write_q->buf_offset,
228                           fdd->write_q->len - fdd->write_q->buf_offset);
229 
230     debugs(6, 3, "diskHandleWrite: FD " << fd << " len = " << len);
231 
232     ++ statCounter.syscalls.disk.writes;
233 
234     fd_bytes(fd, len, FD_WRITE);
235 
236     if (len < 0) {
237         if (!ignoreErrno(errno)) {
238             status = errno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR;
239             int xerrno = errno;
240             debugs(50, DBG_IMPORTANT, "diskHandleWrite: FD " << fd << ": disk write error: " << xstrerr(xerrno));
241 
242             /*
243              * If there is no write callback, then this file is
244              * most likely something important like a log file, or
245              * an interprocess pipe.  Its not a swapfile.  We feel
246              * that a write failure on a log file is rather important,
247              * and Squid doesn't otherwise deal with this condition.
248              * So to get the administrators attention, we exit with
249              * a fatal message.
250              */
251 
252             if (fdd->wrt_handle == NULL)
253                 fatal("Write failure -- check your disk space and cache.log");
254 
255             /*
256              * If there is a write failure, then we notify the
257              * upper layer via the callback, at the end of this
258              * function.  Meanwhile, flush all pending buffers
259              * here.  Let the upper layer decide how to handle the
260              * failure.  This will prevent experiencing multiple,
261              * repeated write failures for the same FD because of
262              * the queued data.
263              */
264             do {
265                 fdd->write_q = q->next;
266 
267                 if (q->free_func)
268                     q->free_func(q->buf);
269 
270                 if (q) {
271                     memFree(q, MEM_DWRITE_Q);
272                     q = NULL;
273                 }
274             } while ((q = fdd->write_q));
275         }
276 
277         len = 0;
278     }
279 
280     if (q != NULL) {
281         /* q might become NULL from write failure above */
282         q->buf_offset += len;
283 
284         if (q->buf_offset > q->len)
285             debugs(50, DBG_IMPORTANT, "diskHandleWriteComplete: q->buf_offset > q->len (" <<
286                    q << "," << (int) q->buf_offset << ", " << q->len << ", " <<
287                    len << " FD " << fd << ")");
288 
289         assert(q->buf_offset <= q->len);
290 
291         if (q->buf_offset == q->len) {
292             /* complete write */
293             fdd->write_q = q->next;
294 
295             if (q->free_func)
296                 q->free_func(q->buf);
297 
298             if (q) {
299                 memFree(q, MEM_DWRITE_Q);
300                 q = NULL;
301             }
302         }
303     }
304 
305     if (fdd->write_q == NULL) {
306         /* no more data */
307         fdd->write_q_tail = NULL;
308     } else {
309         /* another block is queued */
310         diskCombineWrites(fdd);
311         Comm::SetSelect(fd, COMM_SELECT_WRITE, diskHandleWrite, NULL, 0);
312         F->flags.write_daemon = true;
313     }
314 
315     do_close = F->flags.close_request;
316 
317     if (fdd->wrt_handle) {
318         DWCB *callback = fdd->wrt_handle;
319         void *cbdata;
320         fdd->wrt_handle = NULL;
321 
322         if (cbdataReferenceValidDone(fdd->wrt_handle_data, &cbdata)) {
323             callback(fd, status, len, cbdata);
324             /*
325              * NOTE, this callback can close the FD, so we must
326              * not touch 'F', 'fdd', etc. after this.
327              */
328             PROF_stop(diskHandleWrite);
329             return;
330             /* XXX But what about close_request??? */
331         }
332     }
333 
334     if (do_close)
335         file_close(fd);
336 
337     PROF_stop(diskHandleWrite);
338 }
339 
340 /* write block to a file */
341 /* write back queue. Only one writer at a time. */
342 /* call a handle when writing is complete. */
343 void
file_write(int fd,off_t file_offset,void const * ptr_to_buf,int len,DWCB * handle,void * handle_data,FREE * free_func)344 file_write(int fd,
345            off_t file_offset,
346            void const *ptr_to_buf,
347            int len,
348            DWCB * handle,
349            void *handle_data,
350            FREE * free_func)
351 {
352     dwrite_q *wq = NULL;
353     fde *F = &fd_table[fd];
354     PROF_start(file_write);
355     assert(fd >= 0);
356     assert(F->flags.open);
357     /* if we got here. Caller is eligible to write. */
358     wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
359     wq->file_offset = file_offset;
360     wq->buf = (char *)ptr_to_buf;
361     wq->len = len;
362     wq->buf_offset = 0;
363     wq->next = NULL;
364     wq->free_func = free_func;
365 
366     if (!F->disk.wrt_handle_data) {
367         F->disk.wrt_handle = handle;
368         F->disk.wrt_handle_data = cbdataReference(handle_data);
369     } else {
370         /* Detect if there is multiple concurrent users of this fd.. we only support one callback */
371         assert(F->disk.wrt_handle_data == handle_data && F->disk.wrt_handle == handle);
372     }
373 
374     /* add to queue */
375     if (F->disk.write_q == NULL) {
376         /* empty queue */
377         F->disk.write_q = F->disk.write_q_tail = wq;
378     } else {
379         F->disk.write_q_tail->next = wq;
380         F->disk.write_q_tail = wq;
381     }
382 
383     if (!F->flags.write_daemon) {
384         diskHandleWrite(fd, NULL);
385     }
386 
387     PROF_stop(file_write);
388 }
389 
390 /*
391  * a wrapper around file_write to allow for MemBuf to be file_written
392  * in a snap
393  */
394 void
file_write_mbuf(int fd,off_t off,MemBuf mb,DWCB * handler,void * handler_data)395 file_write_mbuf(int fd, off_t off, MemBuf mb, DWCB * handler, void *handler_data)
396 {
397     file_write(fd, off, mb.buf, mb.size, handler, handler_data, mb.freeFunc());
398 }
399 
400 /* Read from FD */
401 static void
diskHandleRead(int fd,void * data)402 diskHandleRead(int fd, void *data)
403 {
404     dread_ctrl *ctrl_dat = (dread_ctrl *)data;
405     fde *F = &fd_table[fd];
406     int len;
407     int rc = DISK_OK;
408     int xerrno;
409 
410     /*
411      * FD < 0 indicates premature close; we just have to free
412      * the state data.
413      */
414 
415     if (fd < 0) {
416         memFree(ctrl_dat, MEM_DREAD_CTRL);
417         return;
418     }
419 
420     PROF_start(diskHandleRead);
421 
422 #if WRITES_MAINTAIN_DISK_OFFSET
423     if (F->disk.offset != ctrl_dat->offset) {
424 #else
425     {
426 #endif
427         debugs(6, 3, "diskHandleRead: FD " << fd << " seeking to offset " << ctrl_dat->offset);
428         errno = 0;
429         if (lseek(fd, ctrl_dat->offset, SEEK_SET) == -1) {
430             xerrno = errno;
431             // shouldn't happen, let's detect that
432             debugs(50, DBG_IMPORTANT, "error in seek for FD " << fd << ": " << xstrerr(xerrno));
433             // XXX handle failures?
434         }
435         ++ statCounter.syscalls.disk.seeks;
436         F->disk.offset = ctrl_dat->offset;
437     }
438 
439     errno = 0;
440     len = FD_READ_METHOD(fd, ctrl_dat->buf, ctrl_dat->req_len);
441     xerrno = errno;
442 
443     if (len > 0)
444         F->disk.offset += len;
445 
446     ++ statCounter.syscalls.disk.reads;
447 
448     fd_bytes(fd, len, FD_READ);
449 
450     if (len < 0) {
451         if (ignoreErrno(xerrno)) {
452             Comm::SetSelect(fd, COMM_SELECT_READ, diskHandleRead, ctrl_dat, 0);
453             PROF_stop(diskHandleRead);
454             return;
455         }
456 
457         debugs(50, DBG_IMPORTANT, "diskHandleRead: FD " << fd << ": " << xstrerr(xerrno));
458         len = 0;
459         rc = DISK_ERROR;
460     } else if (len == 0) {
461         rc = DISK_EOF;
462     }
463 
464     if (cbdataReferenceValid(ctrl_dat->client_data))
465         ctrl_dat->handler(fd, ctrl_dat->buf, len, rc, ctrl_dat->client_data);
466 
467     cbdataReferenceDone(ctrl_dat->client_data);
468 
469     memFree(ctrl_dat, MEM_DREAD_CTRL);
470 
471     PROF_stop(diskHandleRead);
472 }
473 
474 /* start read operation */
475 /* buffer must be allocated from the caller.
476  * It must have at least req_len space in there.
477  * call handler when a reading is complete. */
478 void
479 file_read(int fd, char *buf, int req_len, off_t offset, DRCB * handler, void *client_data)
480 {
481     dread_ctrl *ctrl_dat;
482     PROF_start(file_read);
483     assert(fd >= 0);
484     ctrl_dat = (dread_ctrl *)memAllocate(MEM_DREAD_CTRL);
485     ctrl_dat->fd = fd;
486     ctrl_dat->offset = offset;
487     ctrl_dat->req_len = req_len;
488     ctrl_dat->buf = buf;
489     ctrl_dat->end_of_file = 0;
490     ctrl_dat->handler = handler;
491     ctrl_dat->client_data = cbdataReference(client_data);
492     diskHandleRead(fd, ctrl_dat);
493     PROF_stop(file_read);
494 }
495 
496 void
497 safeunlink(const char *s, int quiet)
498 {
499     ++ statCounter.syscalls.disk.unlinks;
500 
501     if (unlink(s) < 0 && !quiet) {
502         int xerrno = errno;
503         debugs(50, DBG_IMPORTANT, "safeunlink: Couldn't delete " << s << ": " << xstrerr(xerrno));
504     }
505 }
506 
507 bool
508 FileRename(const SBuf &from, const SBuf &to)
509 {
510     debugs(21, 2, "renaming " << from << " to " << to);
511 
512     // non-const copy for c_str()
513     SBuf from2(from);
514     // ensure c_str() lifetimes even if `to` and `from` share memory
515     SBuf to2(to.rawContent(), to.length());
516 
517 #if _SQUID_OS2_ || _SQUID_WINDOWS_
518     remove(to2.c_str());
519 #endif
520 
521     if (rename(from2.c_str(), to2.c_str()) == 0)
522         return true;
523 
524     int xerrno = errno;
525     debugs(21, (errno == ENOENT ? 2 : DBG_IMPORTANT), "Cannot rename " << from << " to " << to << ": " << xstrerr(xerrno));
526 
527     return false;
528 }
529 
530 int
531 fsBlockSize(const char *path, int *blksize)
532 {
533     struct statvfs sfs;
534 
535     if (xstatvfs(path, &sfs)) {
536         int xerrno = errno;
537         debugs(50, DBG_IMPORTANT, "" << path << ": " << xstrerr(xerrno));
538         *blksize = 2048;
539         return 1;
540     }
541 
542     *blksize = (int) sfs.f_frsize;
543 
544     // Sanity check; make sure we have a meaningful value.
545     if (*blksize < 512)
546         *blksize = 2048;
547 
548     return 0;
549 }
550 
551 #define fsbtoblk(num, fsbs, bs) \
552     (((fsbs) != 0 && (fsbs) < (bs)) ? \
553             (num) / ((bs) / (fsbs)) : (num) * ((fsbs) / (bs)))
554 int
555 fsStats(const char *path, int *totl_kb, int *free_kb, int *totl_in, int *free_in)
556 {
557     struct statvfs sfs;
558 
559     if (xstatvfs(path, &sfs)) {
560         int xerrno = errno;
561         debugs(50, DBG_IMPORTANT, "" << path << ": " << xstrerr(xerrno));
562         return 1;
563     }
564 
565     *totl_kb = (int) fsbtoblk(sfs.f_blocks, sfs.f_frsize, 1024);
566     *free_kb = (int) fsbtoblk(sfs.f_bfree, sfs.f_frsize, 1024);
567     *totl_in = (int) sfs.f_files;
568     *free_in = (int) sfs.f_ffree;
569     return 0;
570 }
571 
572