1 /*-------------------------------------------------------------------------
2  *
3  * be-fsstubs.c
4  *	  Builtin functions for open/close/read/write operations on large objects
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/libpq/be-fsstubs.c
12  *
13  * NOTES
14  *	  This should be moved to a more appropriate place.  It is here
15  *	  for lack of a better place.
16  *
17  *	  These functions store LargeObjectDesc structs in a private MemoryContext,
18  *	  which means that large object descriptors hang around until we destroy
19  *	  the context at transaction end.  It'd be possible to prolong the lifetime
20  *	  of the context so that LO FDs are good across transactions (for example,
21  *	  we could release the context only if we see that no FDs remain open).
22  *	  But we'd need additional state in order to do the right thing at the
23  *	  end of an aborted transaction.  FDs opened during an aborted xact would
24  *	  still need to be closed, since they might not be pointing at valid
25  *	  relations at all.  Locking semantics are also an interesting problem
26  *	  if LOs stay open across transactions.  For now, we'll stick with the
27  *	  existing documented semantics of LO FDs: they're only good within a
28  *	  transaction.
29  *
30  *	  As of PostgreSQL 8.0, much of the angst expressed above is no longer
31  *	  relevant, and in fact it'd be pretty easy to allow LO FDs to stay
32  *	  open across transactions.  (Snapshot relevancy would still be an issue.)
33  *	  However backwards compatibility suggests that we should stick to the
34  *	  status quo.
35  *
36  *-------------------------------------------------------------------------
37  */
38 
39 #include "postgres.h"
40 
41 #include <fcntl.h>
42 #include <sys/stat.h>
43 #include <unistd.h>
44 
45 #include "access/xact.h"
46 #include "libpq/be-fsstubs.h"
47 #include "libpq/libpq-fs.h"
48 #include "miscadmin.h"
49 #include "storage/fd.h"
50 #include "storage/large_object.h"
51 #include "utils/acl.h"
52 #include "utils/builtins.h"
53 #include "utils/memutils.h"
54 #include "utils/snapmgr.h"
55 
56 /* define this to enable debug logging */
57 /* #define FSDB 1 */
58 /* chunk size for lo_import/lo_export transfers */
59 #define BUFSIZE			8192
60 
61 /*
62  * LO "FD"s are indexes into the cookies array.
63  *
64  * A non-null entry is a pointer to a LargeObjectDesc allocated in the
65  * LO private memory context "fscxt".  The cookies array itself is also
66  * dynamically allocated in that context.  Its current allocated size is
67  * cookies_len entries, of which any unused entries will be NULL.
68  */
69 static LargeObjectDesc **cookies = NULL;
70 static int	cookies_size = 0;
71 
72 static bool lo_cleanup_needed = false;
73 static MemoryContext fscxt = NULL;
74 
75 static int	newLOfd(void);
76 static void closeLOfd(int fd);
77 static Oid	lo_import_internal(text *filename, Oid lobjOid);
78 
79 
80 /*****************************************************************************
81  *	File Interfaces for Large Objects
82  *****************************************************************************/
83 
84 Datum
be_lo_open(PG_FUNCTION_ARGS)85 be_lo_open(PG_FUNCTION_ARGS)
86 {
87 	Oid			lobjId = PG_GETARG_OID(0);
88 	int32		mode = PG_GETARG_INT32(1);
89 	LargeObjectDesc *lobjDesc;
90 	int			fd;
91 
92 #if FSDB
93 	elog(DEBUG4, "lo_open(%u,%d)", lobjId, mode);
94 #endif
95 
96 	/*
97 	 * Allocate a large object descriptor first.  This will also create
98 	 * 'fscxt' if this is the first LO opened in this transaction.
99 	 */
100 	fd = newLOfd();
101 
102 	lobjDesc = inv_open(lobjId, mode, fscxt);
103 	lobjDesc->subid = GetCurrentSubTransactionId();
104 
105 	/*
106 	 * We must register the snapshot in TopTransaction's resowner so that it
107 	 * stays alive until the LO is closed rather than until the current portal
108 	 * shuts down.
109 	 */
110 	if (lobjDesc->snapshot)
111 		lobjDesc->snapshot = RegisterSnapshotOnOwner(lobjDesc->snapshot,
112 													 TopTransactionResourceOwner);
113 
114 	Assert(cookies[fd] == NULL);
115 	cookies[fd] = lobjDesc;
116 
117 	PG_RETURN_INT32(fd);
118 }
119 
120 Datum
be_lo_close(PG_FUNCTION_ARGS)121 be_lo_close(PG_FUNCTION_ARGS)
122 {
123 	int32		fd = PG_GETARG_INT32(0);
124 
125 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
126 		ereport(ERROR,
127 				(errcode(ERRCODE_UNDEFINED_OBJECT),
128 				 errmsg("invalid large-object descriptor: %d", fd)));
129 
130 #if FSDB
131 	elog(DEBUG4, "lo_close(%d)", fd);
132 #endif
133 
134 	closeLOfd(fd);
135 
136 	PG_RETURN_INT32(0);
137 }
138 
139 
140 /*****************************************************************************
141  *	Bare Read/Write operations --- these are not fmgr-callable!
142  *
143  *	We assume the large object supports byte oriented reads and seeks so
144  *	that our work is easier.
145  *
146  *****************************************************************************/
147 
148 int
lo_read(int fd,char * buf,int len)149 lo_read(int fd, char *buf, int len)
150 {
151 	int			status;
152 	LargeObjectDesc *lobj;
153 
154 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
155 		ereport(ERROR,
156 				(errcode(ERRCODE_UNDEFINED_OBJECT),
157 				 errmsg("invalid large-object descriptor: %d", fd)));
158 	lobj = cookies[fd];
159 
160 	/*
161 	 * Check state.  inv_read() would throw an error anyway, but we want the
162 	 * error to be about the FD's state not the underlying privilege; it might
163 	 * be that the privilege exists but user forgot to ask for read mode.
164 	 */
165 	if ((lobj->flags & IFS_RDLOCK) == 0)
166 		ereport(ERROR,
167 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
168 				 errmsg("large object descriptor %d was not opened for reading",
169 						fd)));
170 
171 	status = inv_read(lobj, buf, len);
172 
173 	return status;
174 }
175 
176 int
lo_write(int fd,const char * buf,int len)177 lo_write(int fd, const char *buf, int len)
178 {
179 	int			status;
180 	LargeObjectDesc *lobj;
181 
182 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
183 		ereport(ERROR,
184 				(errcode(ERRCODE_UNDEFINED_OBJECT),
185 				 errmsg("invalid large-object descriptor: %d", fd)));
186 	lobj = cookies[fd];
187 
188 	/* see comment in lo_read() */
189 	if ((lobj->flags & IFS_WRLOCK) == 0)
190 		ereport(ERROR,
191 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
192 				 errmsg("large object descriptor %d was not opened for writing",
193 						fd)));
194 
195 	status = inv_write(lobj, buf, len);
196 
197 	return status;
198 }
199 
200 Datum
be_lo_lseek(PG_FUNCTION_ARGS)201 be_lo_lseek(PG_FUNCTION_ARGS)
202 {
203 	int32		fd = PG_GETARG_INT32(0);
204 	int32		offset = PG_GETARG_INT32(1);
205 	int32		whence = PG_GETARG_INT32(2);
206 	int64		status;
207 
208 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
209 		ereport(ERROR,
210 				(errcode(ERRCODE_UNDEFINED_OBJECT),
211 				 errmsg("invalid large-object descriptor: %d", fd)));
212 
213 	status = inv_seek(cookies[fd], offset, whence);
214 
215 	/* guard against result overflow */
216 	if (status != (int32) status)
217 		ereport(ERROR,
218 				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
219 				 errmsg("lo_lseek result out of range for large-object descriptor %d",
220 						fd)));
221 
222 	PG_RETURN_INT32((int32) status);
223 }
224 
225 Datum
be_lo_lseek64(PG_FUNCTION_ARGS)226 be_lo_lseek64(PG_FUNCTION_ARGS)
227 {
228 	int32		fd = PG_GETARG_INT32(0);
229 	int64		offset = PG_GETARG_INT64(1);
230 	int32		whence = PG_GETARG_INT32(2);
231 	int64		status;
232 
233 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
234 		ereport(ERROR,
235 				(errcode(ERRCODE_UNDEFINED_OBJECT),
236 				 errmsg("invalid large-object descriptor: %d", fd)));
237 
238 	status = inv_seek(cookies[fd], offset, whence);
239 
240 	PG_RETURN_INT64(status);
241 }
242 
243 Datum
be_lo_creat(PG_FUNCTION_ARGS)244 be_lo_creat(PG_FUNCTION_ARGS)
245 {
246 	Oid			lobjId;
247 
248 	lo_cleanup_needed = true;
249 	lobjId = inv_create(InvalidOid);
250 
251 	PG_RETURN_OID(lobjId);
252 }
253 
254 Datum
be_lo_create(PG_FUNCTION_ARGS)255 be_lo_create(PG_FUNCTION_ARGS)
256 {
257 	Oid			lobjId = PG_GETARG_OID(0);
258 
259 	lo_cleanup_needed = true;
260 	lobjId = inv_create(lobjId);
261 
262 	PG_RETURN_OID(lobjId);
263 }
264 
265 Datum
be_lo_tell(PG_FUNCTION_ARGS)266 be_lo_tell(PG_FUNCTION_ARGS)
267 {
268 	int32		fd = PG_GETARG_INT32(0);
269 	int64		offset;
270 
271 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
272 		ereport(ERROR,
273 				(errcode(ERRCODE_UNDEFINED_OBJECT),
274 				 errmsg("invalid large-object descriptor: %d", fd)));
275 
276 	offset = inv_tell(cookies[fd]);
277 
278 	/* guard against result overflow */
279 	if (offset != (int32) offset)
280 		ereport(ERROR,
281 				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
282 				 errmsg("lo_tell result out of range for large-object descriptor %d",
283 						fd)));
284 
285 	PG_RETURN_INT32((int32) offset);
286 }
287 
288 Datum
be_lo_tell64(PG_FUNCTION_ARGS)289 be_lo_tell64(PG_FUNCTION_ARGS)
290 {
291 	int32		fd = PG_GETARG_INT32(0);
292 	int64		offset;
293 
294 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
295 		ereport(ERROR,
296 				(errcode(ERRCODE_UNDEFINED_OBJECT),
297 				 errmsg("invalid large-object descriptor: %d", fd)));
298 
299 	offset = inv_tell(cookies[fd]);
300 
301 	PG_RETURN_INT64(offset);
302 }
303 
304 Datum
be_lo_unlink(PG_FUNCTION_ARGS)305 be_lo_unlink(PG_FUNCTION_ARGS)
306 {
307 	Oid			lobjId = PG_GETARG_OID(0);
308 
309 	/*
310 	 * Must be owner of the large object.  It would be cleaner to check this
311 	 * in inv_drop(), but we want to throw the error before not after closing
312 	 * relevant FDs.
313 	 */
314 	if (!lo_compat_privileges &&
315 		!pg_largeobject_ownercheck(lobjId, GetUserId()))
316 		ereport(ERROR,
317 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
318 				 errmsg("must be owner of large object %u", lobjId)));
319 
320 	/*
321 	 * If there are any open LO FDs referencing that ID, close 'em.
322 	 */
323 	if (fscxt != NULL)
324 	{
325 		int			i;
326 
327 		for (i = 0; i < cookies_size; i++)
328 		{
329 			if (cookies[i] != NULL && cookies[i]->id == lobjId)
330 				closeLOfd(i);
331 		}
332 	}
333 
334 	/*
335 	 * inv_drop does not create a need for end-of-transaction cleanup and
336 	 * hence we don't need to set lo_cleanup_needed.
337 	 */
338 	PG_RETURN_INT32(inv_drop(lobjId));
339 }
340 
341 /*****************************************************************************
342  *	Read/Write using bytea
343  *****************************************************************************/
344 
345 Datum
be_loread(PG_FUNCTION_ARGS)346 be_loread(PG_FUNCTION_ARGS)
347 {
348 	int32		fd = PG_GETARG_INT32(0);
349 	int32		len = PG_GETARG_INT32(1);
350 	bytea	   *retval;
351 	int			totalread;
352 
353 	if (len < 0)
354 		len = 0;
355 
356 	retval = (bytea *) palloc(VARHDRSZ + len);
357 	totalread = lo_read(fd, VARDATA(retval), len);
358 	SET_VARSIZE(retval, totalread + VARHDRSZ);
359 
360 	PG_RETURN_BYTEA_P(retval);
361 }
362 
363 Datum
be_lowrite(PG_FUNCTION_ARGS)364 be_lowrite(PG_FUNCTION_ARGS)
365 {
366 	int32		fd = PG_GETARG_INT32(0);
367 	bytea	   *wbuf = PG_GETARG_BYTEA_PP(1);
368 	int			bytestowrite;
369 	int			totalwritten;
370 
371 	bytestowrite = VARSIZE_ANY_EXHDR(wbuf);
372 	totalwritten = lo_write(fd, VARDATA_ANY(wbuf), bytestowrite);
373 	PG_RETURN_INT32(totalwritten);
374 }
375 
376 /*****************************************************************************
377  *	 Import/Export of Large Object
378  *****************************************************************************/
379 
380 /*
381  * lo_import -
382  *	  imports a file as an (inversion) large object.
383  */
384 Datum
be_lo_import(PG_FUNCTION_ARGS)385 be_lo_import(PG_FUNCTION_ARGS)
386 {
387 	text	   *filename = PG_GETARG_TEXT_PP(0);
388 
389 	PG_RETURN_OID(lo_import_internal(filename, InvalidOid));
390 }
391 
392 /*
393  * lo_import_with_oid -
394  *	  imports a file as an (inversion) large object specifying oid.
395  */
396 Datum
be_lo_import_with_oid(PG_FUNCTION_ARGS)397 be_lo_import_with_oid(PG_FUNCTION_ARGS)
398 {
399 	text	   *filename = PG_GETARG_TEXT_PP(0);
400 	Oid			oid = PG_GETARG_OID(1);
401 
402 	PG_RETURN_OID(lo_import_internal(filename, oid));
403 }
404 
405 static Oid
lo_import_internal(text * filename,Oid lobjOid)406 lo_import_internal(text *filename, Oid lobjOid)
407 {
408 	int			fd;
409 	int			nbytes,
410 				tmp PG_USED_FOR_ASSERTS_ONLY;
411 	char		buf[BUFSIZE];
412 	char		fnamebuf[MAXPGPATH];
413 	LargeObjectDesc *lobj;
414 	Oid			oid;
415 
416 	/*
417 	 * open the file to be read in
418 	 */
419 	text_to_cstring_buffer(filename, fnamebuf, sizeof(fnamebuf));
420 	fd = OpenTransientFile(fnamebuf, O_RDONLY | PG_BINARY);
421 	if (fd < 0)
422 		ereport(ERROR,
423 				(errcode_for_file_access(),
424 				 errmsg("could not open server file \"%s\": %m",
425 						fnamebuf)));
426 
427 	/*
428 	 * create an inversion object
429 	 */
430 	lo_cleanup_needed = true;
431 	oid = inv_create(lobjOid);
432 
433 	/*
434 	 * read in from the filesystem and write to the inversion object
435 	 */
436 	lobj = inv_open(oid, INV_WRITE, CurrentMemoryContext);
437 
438 	while ((nbytes = read(fd, buf, BUFSIZE)) > 0)
439 	{
440 		tmp = inv_write(lobj, buf, nbytes);
441 		Assert(tmp == nbytes);
442 	}
443 
444 	if (nbytes < 0)
445 		ereport(ERROR,
446 				(errcode_for_file_access(),
447 				 errmsg("could not read server file \"%s\": %m",
448 						fnamebuf)));
449 
450 	inv_close(lobj);
451 	CloseTransientFile(fd);
452 
453 	return oid;
454 }
455 
456 /*
457  * lo_export -
458  *	  exports an (inversion) large object.
459  */
460 Datum
be_lo_export(PG_FUNCTION_ARGS)461 be_lo_export(PG_FUNCTION_ARGS)
462 {
463 	Oid			lobjId = PG_GETARG_OID(0);
464 	text	   *filename = PG_GETARG_TEXT_PP(1);
465 	int			fd;
466 	int			nbytes,
467 				tmp;
468 	char		buf[BUFSIZE];
469 	char		fnamebuf[MAXPGPATH];
470 	LargeObjectDesc *lobj;
471 	mode_t		oumask;
472 
473 	/*
474 	 * open the inversion object (no need to test for failure)
475 	 */
476 	lo_cleanup_needed = true;
477 	lobj = inv_open(lobjId, INV_READ, CurrentMemoryContext);
478 
479 	/*
480 	 * open the file to be written to
481 	 *
482 	 * Note: we reduce backend's normal 077 umask to the slightly friendlier
483 	 * 022. This code used to drop it all the way to 0, but creating
484 	 * world-writable export files doesn't seem wise.
485 	 */
486 	text_to_cstring_buffer(filename, fnamebuf, sizeof(fnamebuf));
487 	oumask = umask(S_IWGRP | S_IWOTH);
488 	PG_TRY();
489 	{
490 		fd = OpenTransientFilePerm(fnamebuf, O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY,
491 								   S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
492 	}
493 	PG_CATCH();
494 	{
495 		umask(oumask);
496 		PG_RE_THROW();
497 	}
498 	PG_END_TRY();
499 	umask(oumask);
500 	if (fd < 0)
501 		ereport(ERROR,
502 				(errcode_for_file_access(),
503 				 errmsg("could not create server file \"%s\": %m",
504 						fnamebuf)));
505 
506 	/*
507 	 * read in from the inversion file and write to the filesystem
508 	 */
509 	while ((nbytes = inv_read(lobj, buf, BUFSIZE)) > 0)
510 	{
511 		tmp = write(fd, buf, nbytes);
512 		if (tmp != nbytes)
513 			ereport(ERROR,
514 					(errcode_for_file_access(),
515 					 errmsg("could not write server file \"%s\": %m",
516 							fnamebuf)));
517 	}
518 
519 	CloseTransientFile(fd);
520 	inv_close(lobj);
521 
522 	PG_RETURN_INT32(1);
523 }
524 
525 /*
526  * lo_truncate -
527  *	  truncate a large object to a specified length
528  */
529 static void
lo_truncate_internal(int32 fd,int64 len)530 lo_truncate_internal(int32 fd, int64 len)
531 {
532 	LargeObjectDesc *lobj;
533 
534 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
535 		ereport(ERROR,
536 				(errcode(ERRCODE_UNDEFINED_OBJECT),
537 				 errmsg("invalid large-object descriptor: %d", fd)));
538 	lobj = cookies[fd];
539 
540 	/* see comment in lo_read() */
541 	if ((lobj->flags & IFS_WRLOCK) == 0)
542 		ereport(ERROR,
543 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
544 				 errmsg("large object descriptor %d was not opened for writing",
545 						fd)));
546 
547 	inv_truncate(lobj, len);
548 }
549 
550 Datum
be_lo_truncate(PG_FUNCTION_ARGS)551 be_lo_truncate(PG_FUNCTION_ARGS)
552 {
553 	int32		fd = PG_GETARG_INT32(0);
554 	int32		len = PG_GETARG_INT32(1);
555 
556 	lo_truncate_internal(fd, len);
557 	PG_RETURN_INT32(0);
558 }
559 
560 Datum
be_lo_truncate64(PG_FUNCTION_ARGS)561 be_lo_truncate64(PG_FUNCTION_ARGS)
562 {
563 	int32		fd = PG_GETARG_INT32(0);
564 	int64		len = PG_GETARG_INT64(1);
565 
566 	lo_truncate_internal(fd, len);
567 	PG_RETURN_INT32(0);
568 }
569 
570 /*
571  * AtEOXact_LargeObject -
572  *		 prepares large objects for transaction commit
573  */
574 void
AtEOXact_LargeObject(bool isCommit)575 AtEOXact_LargeObject(bool isCommit)
576 {
577 	int			i;
578 
579 	if (!lo_cleanup_needed)
580 		return;					/* no LO operations in this xact */
581 
582 	/*
583 	 * Close LO fds and clear cookies array so that LO fds are no longer good.
584 	 * The memory context and resource owner holding them are going away at
585 	 * the end-of-transaction anyway, but on commit, we need to close them to
586 	 * avoid warnings about leaked resources at commit.  On abort we can skip
587 	 * this step.
588 	 */
589 	if (isCommit)
590 	{
591 		for (i = 0; i < cookies_size; i++)
592 		{
593 			if (cookies[i] != NULL)
594 				closeLOfd(i);
595 		}
596 	}
597 
598 	/* Needn't actually pfree since we're about to zap context */
599 	cookies = NULL;
600 	cookies_size = 0;
601 
602 	/* Release the LO memory context to prevent permanent memory leaks. */
603 	if (fscxt)
604 		MemoryContextDelete(fscxt);
605 	fscxt = NULL;
606 
607 	/* Give inv_api.c a chance to clean up, too */
608 	close_lo_relation(isCommit);
609 
610 	lo_cleanup_needed = false;
611 }
612 
613 /*
614  * AtEOSubXact_LargeObject
615  *		Take care of large objects at subtransaction commit/abort
616  *
617  * Reassign LOs created/opened during a committing subtransaction
618  * to the parent subtransaction.  On abort, just close them.
619  */
620 void
AtEOSubXact_LargeObject(bool isCommit,SubTransactionId mySubid,SubTransactionId parentSubid)621 AtEOSubXact_LargeObject(bool isCommit, SubTransactionId mySubid,
622 						SubTransactionId parentSubid)
623 {
624 	int			i;
625 
626 	if (fscxt == NULL)			/* no LO operations in this xact */
627 		return;
628 
629 	for (i = 0; i < cookies_size; i++)
630 	{
631 		LargeObjectDesc *lo = cookies[i];
632 
633 		if (lo != NULL && lo->subid == mySubid)
634 		{
635 			if (isCommit)
636 				lo->subid = parentSubid;
637 			else
638 				closeLOfd(i);
639 		}
640 	}
641 }
642 
643 /*****************************************************************************
644  *	Support routines for this file
645  *****************************************************************************/
646 
647 static int
newLOfd(void)648 newLOfd(void)
649 {
650 	int			i,
651 				newsize;
652 
653 	lo_cleanup_needed = true;
654 	if (fscxt == NULL)
655 		fscxt = AllocSetContextCreate(TopMemoryContext,
656 									  "Filesystem",
657 									  ALLOCSET_DEFAULT_SIZES);
658 
659 	/* Try to find a free slot */
660 	for (i = 0; i < cookies_size; i++)
661 	{
662 		if (cookies[i] == NULL)
663 			return i;
664 	}
665 
666 	/* No free slot, so make the array bigger */
667 	if (cookies_size <= 0)
668 	{
669 		/* First time through, arbitrarily make 64-element array */
670 		i = 0;
671 		newsize = 64;
672 		cookies = (LargeObjectDesc **)
673 			MemoryContextAllocZero(fscxt, newsize * sizeof(LargeObjectDesc *));
674 		cookies_size = newsize;
675 	}
676 	else
677 	{
678 		/* Double size of array */
679 		i = cookies_size;
680 		newsize = cookies_size * 2;
681 		cookies = (LargeObjectDesc **)
682 			repalloc(cookies, newsize * sizeof(LargeObjectDesc *));
683 		MemSet(cookies + cookies_size, 0,
684 			   (newsize - cookies_size) * sizeof(LargeObjectDesc *));
685 		cookies_size = newsize;
686 	}
687 
688 	return i;
689 }
690 
691 static void
closeLOfd(int fd)692 closeLOfd(int fd)
693 {
694 	LargeObjectDesc *lobj;
695 
696 	/*
697 	 * Make sure we do not try to free twice if this errors out for some
698 	 * reason.  Better a leak than a crash.
699 	 */
700 	lobj = cookies[fd];
701 	cookies[fd] = NULL;
702 
703 	if (lobj->snapshot)
704 		UnregisterSnapshotFromOwner(lobj->snapshot,
705 									TopTransactionResourceOwner);
706 	inv_close(lobj);
707 }
708 
709 /*****************************************************************************
710  *	Wrappers oriented toward SQL callers
711  *****************************************************************************/
712 
713 /*
714  * Read [offset, offset+nbytes) within LO; when nbytes is -1, read to end.
715  */
716 static bytea *
lo_get_fragment_internal(Oid loOid,int64 offset,int32 nbytes)717 lo_get_fragment_internal(Oid loOid, int64 offset, int32 nbytes)
718 {
719 	LargeObjectDesc *loDesc;
720 	int64		loSize;
721 	int64		result_length;
722 	int			total_read PG_USED_FOR_ASSERTS_ONLY;
723 	bytea	   *result = NULL;
724 
725 	lo_cleanup_needed = true;
726 	loDesc = inv_open(loOid, INV_READ, CurrentMemoryContext);
727 
728 	/*
729 	 * Compute number of bytes we'll actually read, accommodating nbytes == -1
730 	 * and reads beyond the end of the LO.
731 	 */
732 	loSize = inv_seek(loDesc, 0, SEEK_END);
733 	if (loSize > offset)
734 	{
735 		if (nbytes >= 0 && nbytes <= loSize - offset)
736 			result_length = nbytes; /* request is wholly inside LO */
737 		else
738 			result_length = loSize - offset;	/* adjust to end of LO */
739 	}
740 	else
741 		result_length = 0;		/* request is wholly outside LO */
742 
743 	/*
744 	 * A result_length calculated from loSize may not fit in a size_t.  Check
745 	 * that the size will satisfy this and subsequently-enforced size limits.
746 	 */
747 	if (result_length > MaxAllocSize - VARHDRSZ)
748 		ereport(ERROR,
749 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
750 				 errmsg("large object read request is too large")));
751 
752 	result = (bytea *) palloc(VARHDRSZ + result_length);
753 
754 	inv_seek(loDesc, offset, SEEK_SET);
755 	total_read = inv_read(loDesc, VARDATA(result), result_length);
756 	Assert(total_read == result_length);
757 	SET_VARSIZE(result, result_length + VARHDRSZ);
758 
759 	inv_close(loDesc);
760 
761 	return result;
762 }
763 
764 /*
765  * Read entire LO
766  */
767 Datum
be_lo_get(PG_FUNCTION_ARGS)768 be_lo_get(PG_FUNCTION_ARGS)
769 {
770 	Oid			loOid = PG_GETARG_OID(0);
771 	bytea	   *result;
772 
773 	result = lo_get_fragment_internal(loOid, 0, -1);
774 
775 	PG_RETURN_BYTEA_P(result);
776 }
777 
778 /*
779  * Read range within LO
780  */
781 Datum
be_lo_get_fragment(PG_FUNCTION_ARGS)782 be_lo_get_fragment(PG_FUNCTION_ARGS)
783 {
784 	Oid			loOid = PG_GETARG_OID(0);
785 	int64		offset = PG_GETARG_INT64(1);
786 	int32		nbytes = PG_GETARG_INT32(2);
787 	bytea	   *result;
788 
789 	if (nbytes < 0)
790 		ereport(ERROR,
791 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
792 				 errmsg("requested length cannot be negative")));
793 
794 	result = lo_get_fragment_internal(loOid, offset, nbytes);
795 
796 	PG_RETURN_BYTEA_P(result);
797 }
798 
799 /*
800  * Create LO with initial contents given by a bytea argument
801  */
802 Datum
be_lo_from_bytea(PG_FUNCTION_ARGS)803 be_lo_from_bytea(PG_FUNCTION_ARGS)
804 {
805 	Oid			loOid = PG_GETARG_OID(0);
806 	bytea	   *str = PG_GETARG_BYTEA_PP(1);
807 	LargeObjectDesc *loDesc;
808 	int			written PG_USED_FOR_ASSERTS_ONLY;
809 
810 	lo_cleanup_needed = true;
811 	loOid = inv_create(loOid);
812 	loDesc = inv_open(loOid, INV_WRITE, CurrentMemoryContext);
813 	written = inv_write(loDesc, VARDATA_ANY(str), VARSIZE_ANY_EXHDR(str));
814 	Assert(written == VARSIZE_ANY_EXHDR(str));
815 	inv_close(loDesc);
816 
817 	PG_RETURN_OID(loOid);
818 }
819 
820 /*
821  * Update range within LO
822  */
823 Datum
be_lo_put(PG_FUNCTION_ARGS)824 be_lo_put(PG_FUNCTION_ARGS)
825 {
826 	Oid			loOid = PG_GETARG_OID(0);
827 	int64		offset = PG_GETARG_INT64(1);
828 	bytea	   *str = PG_GETARG_BYTEA_PP(2);
829 	LargeObjectDesc *loDesc;
830 	int			written PG_USED_FOR_ASSERTS_ONLY;
831 
832 	lo_cleanup_needed = true;
833 	loDesc = inv_open(loOid, INV_WRITE, CurrentMemoryContext);
834 
835 	/* Permission check */
836 	if (!lo_compat_privileges &&
837 		pg_largeobject_aclcheck_snapshot(loDesc->id,
838 										 GetUserId(),
839 										 ACL_UPDATE,
840 										 loDesc->snapshot) != ACLCHECK_OK)
841 		ereport(ERROR,
842 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
843 				 errmsg("permission denied for large object %u",
844 						loDesc->id)));
845 
846 	inv_seek(loDesc, offset, SEEK_SET);
847 	written = inv_write(loDesc, VARDATA_ANY(str), VARSIZE_ANY_EXHDR(str));
848 	Assert(written == VARSIZE_ANY_EXHDR(str));
849 	inv_close(loDesc);
850 
851 	PG_RETURN_VOID();
852 }
853