1 /*-------------------------------------------------------------------------
2  *
3  * be-fsstubs.c
4  *	  Builtin functions for open/close/read/write operations on large objects
5  *
6  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/libpq/be-fsstubs.c
12  *
13  * NOTES
14  *	  This should be moved to a more appropriate place.  It is here
15  *	  for lack of a better place.
16  *
17  *	  These functions store LargeObjectDesc structs in a private MemoryContext,
18  *	  which means that large object descriptors hang around until we destroy
19  *	  the context at transaction end.  It'd be possible to prolong the lifetime
20  *	  of the context so that LO FDs are good across transactions (for example,
21  *	  we could release the context only if we see that no FDs remain open).
22  *	  But we'd need additional state in order to do the right thing at the
23  *	  end of an aborted transaction.  FDs opened during an aborted xact would
24  *	  still need to be closed, since they might not be pointing at valid
25  *	  relations at all.  Locking semantics are also an interesting problem
26  *	  if LOs stay open across transactions.  For now, we'll stick with the
27  *	  existing documented semantics of LO FDs: they're only good within a
28  *	  transaction.
29  *
30  *	  As of PostgreSQL 8.0, much of the angst expressed above is no longer
31  *	  relevant, and in fact it'd be pretty easy to allow LO FDs to stay
32  *	  open across transactions.  (Snapshot relevancy would still be an issue.)
33  *	  However backwards compatibility suggests that we should stick to the
34  *	  status quo.
35  *
36  *-------------------------------------------------------------------------
37  */
38 
39 #include "postgres.h"
40 
41 #include <fcntl.h>
42 #include <sys/stat.h>
43 #include <unistd.h>
44 
45 #include "access/xact.h"
46 #include "libpq/be-fsstubs.h"
47 #include "libpq/libpq-fs.h"
48 #include "miscadmin.h"
49 #include "storage/fd.h"
50 #include "storage/large_object.h"
51 #include "utils/acl.h"
52 #include "utils/builtins.h"
53 #include "utils/memutils.h"
54 #include "utils/snapmgr.h"
55 
56 /*
57  * compatibility flag for permission checks
58  */
59 bool		lo_compat_privileges;
60 
61 /* define this to enable debug logging */
62 /* #define FSDB 1 */
63 /* chunk size for lo_import/lo_export transfers */
64 #define BUFSIZE			8192
65 
66 /*
67  * LO "FD"s are indexes into the cookies array.
68  *
69  * A non-null entry is a pointer to a LargeObjectDesc allocated in the
70  * LO private memory context "fscxt".  The cookies array itself is also
71  * dynamically allocated in that context.  Its current allocated size is
72  * cookies_len entries, of which any unused entries will be NULL.
73  */
74 static LargeObjectDesc **cookies = NULL;
75 static int	cookies_size = 0;
76 
77 static bool lo_cleanup_needed = false;
78 static MemoryContext fscxt = NULL;
79 
80 static int	newLOfd(void);
81 static void closeLOfd(int fd);
82 static Oid	lo_import_internal(text *filename, Oid lobjOid);
83 
84 
85 /*****************************************************************************
86  *	File Interfaces for Large Objects
87  *****************************************************************************/
88 
89 Datum
lo_open(PG_FUNCTION_ARGS)90 lo_open(PG_FUNCTION_ARGS)
91 {
92 	Oid			lobjId = PG_GETARG_OID(0);
93 	int32		mode = PG_GETARG_INT32(1);
94 	LargeObjectDesc *lobjDesc;
95 	int			fd;
96 
97 #if FSDB
98 	elog(DEBUG4, "lo_open(%u,%d)", lobjId, mode);
99 #endif
100 
101 	/*
102 	 * Allocate a large object descriptor first.  This will also create
103 	 * 'fscxt' if this is the first LO opened in this transaction.
104 	 */
105 	fd = newLOfd();
106 
107 	lobjDesc = inv_open(lobjId, mode, fscxt);
108 	if (lobjDesc == NULL)
109 	{							/* lookup failed */
110 #if FSDB
111 		elog(DEBUG4, "could not open large object %u", lobjId);
112 #endif
113 		PG_RETURN_INT32(-1);
114 	}
115 	lobjDesc->subid = GetCurrentSubTransactionId();
116 
117 	/*
118 	 * We must register the snapshot in TopTransaction's resowner so that it
119 	 * stays alive until the LO is closed rather than until the current portal
120 	 * shuts down.
121 	 */
122 	if (lobjDesc->snapshot)
123 		lobjDesc->snapshot = RegisterSnapshotOnOwner(lobjDesc->snapshot,
124 													 TopTransactionResourceOwner);
125 
126 	Assert(cookies[fd] == NULL);
127 	cookies[fd] = lobjDesc;
128 
129 	PG_RETURN_INT32(fd);
130 }
131 
132 Datum
lo_close(PG_FUNCTION_ARGS)133 lo_close(PG_FUNCTION_ARGS)
134 {
135 	int32		fd = PG_GETARG_INT32(0);
136 
137 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
138 		ereport(ERROR,
139 				(errcode(ERRCODE_UNDEFINED_OBJECT),
140 				 errmsg("invalid large-object descriptor: %d", fd)));
141 
142 #if FSDB
143 	elog(DEBUG4, "lo_close(%d)", fd);
144 #endif
145 
146 	closeLOfd(fd);
147 
148 	PG_RETURN_INT32(0);
149 }
150 
151 
152 /*****************************************************************************
153  *	Bare Read/Write operations --- these are not fmgr-callable!
154  *
155  *	We assume the large object supports byte oriented reads and seeks so
156  *	that our work is easier.
157  *
158  *****************************************************************************/
159 
160 int
lo_read(int fd,char * buf,int len)161 lo_read(int fd, char *buf, int len)
162 {
163 	int			status;
164 	LargeObjectDesc *lobj;
165 
166 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
167 		ereport(ERROR,
168 				(errcode(ERRCODE_UNDEFINED_OBJECT),
169 				 errmsg("invalid large-object descriptor: %d", fd)));
170 	lobj = cookies[fd];
171 
172 	/* We don't bother to check IFS_RDLOCK, since it's always set */
173 
174 	/* Permission checks --- first time through only */
175 	if ((lobj->flags & IFS_RD_PERM_OK) == 0)
176 	{
177 		if (!lo_compat_privileges &&
178 			pg_largeobject_aclcheck_snapshot(lobj->id,
179 											 GetUserId(),
180 											 ACL_SELECT,
181 											 lobj->snapshot) != ACLCHECK_OK)
182 			ereport(ERROR,
183 					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
184 					 errmsg("permission denied for large object %u",
185 							lobj->id)));
186 		lobj->flags |= IFS_RD_PERM_OK;
187 	}
188 
189 	status = inv_read(lobj, buf, len);
190 
191 	return status;
192 }
193 
194 int
lo_write(int fd,const char * buf,int len)195 lo_write(int fd, const char *buf, int len)
196 {
197 	int			status;
198 	LargeObjectDesc *lobj;
199 
200 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
201 		ereport(ERROR,
202 				(errcode(ERRCODE_UNDEFINED_OBJECT),
203 				 errmsg("invalid large-object descriptor: %d", fd)));
204 	lobj = cookies[fd];
205 
206 	if ((lobj->flags & IFS_WRLOCK) == 0)
207 		ereport(ERROR,
208 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
209 			  errmsg("large object descriptor %d was not opened for writing",
210 					 fd)));
211 
212 	/* Permission checks --- first time through only */
213 	if ((lobj->flags & IFS_WR_PERM_OK) == 0)
214 	{
215 		if (!lo_compat_privileges &&
216 			pg_largeobject_aclcheck_snapshot(lobj->id,
217 											 GetUserId(),
218 											 ACL_UPDATE,
219 											 lobj->snapshot) != ACLCHECK_OK)
220 			ereport(ERROR,
221 					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
222 					 errmsg("permission denied for large object %u",
223 							lobj->id)));
224 		lobj->flags |= IFS_WR_PERM_OK;
225 	}
226 
227 	status = inv_write(lobj, buf, len);
228 
229 	return status;
230 }
231 
232 Datum
lo_lseek(PG_FUNCTION_ARGS)233 lo_lseek(PG_FUNCTION_ARGS)
234 {
235 	int32		fd = PG_GETARG_INT32(0);
236 	int32		offset = PG_GETARG_INT32(1);
237 	int32		whence = PG_GETARG_INT32(2);
238 	int64		status;
239 
240 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
241 		ereport(ERROR,
242 				(errcode(ERRCODE_UNDEFINED_OBJECT),
243 				 errmsg("invalid large-object descriptor: %d", fd)));
244 
245 	status = inv_seek(cookies[fd], offset, whence);
246 
247 	/* guard against result overflow */
248 	if (status != (int32) status)
249 		ereport(ERROR,
250 				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
251 		errmsg("lo_lseek result out of range for large-object descriptor %d",
252 			   fd)));
253 
254 	PG_RETURN_INT32((int32) status);
255 }
256 
257 Datum
lo_lseek64(PG_FUNCTION_ARGS)258 lo_lseek64(PG_FUNCTION_ARGS)
259 {
260 	int32		fd = PG_GETARG_INT32(0);
261 	int64		offset = PG_GETARG_INT64(1);
262 	int32		whence = PG_GETARG_INT32(2);
263 	int64		status;
264 
265 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
266 		ereport(ERROR,
267 				(errcode(ERRCODE_UNDEFINED_OBJECT),
268 				 errmsg("invalid large-object descriptor: %d", fd)));
269 
270 	status = inv_seek(cookies[fd], offset, whence);
271 
272 	PG_RETURN_INT64(status);
273 }
274 
275 Datum
lo_creat(PG_FUNCTION_ARGS)276 lo_creat(PG_FUNCTION_ARGS)
277 {
278 	Oid			lobjId;
279 
280 	lo_cleanup_needed = true;
281 	lobjId = inv_create(InvalidOid);
282 
283 	PG_RETURN_OID(lobjId);
284 }
285 
286 Datum
lo_create(PG_FUNCTION_ARGS)287 lo_create(PG_FUNCTION_ARGS)
288 {
289 	Oid			lobjId = PG_GETARG_OID(0);
290 
291 	lo_cleanup_needed = true;
292 	lobjId = inv_create(lobjId);
293 
294 	PG_RETURN_OID(lobjId);
295 }
296 
297 Datum
lo_tell(PG_FUNCTION_ARGS)298 lo_tell(PG_FUNCTION_ARGS)
299 {
300 	int32		fd = PG_GETARG_INT32(0);
301 	int64		offset;
302 
303 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
304 		ereport(ERROR,
305 				(errcode(ERRCODE_UNDEFINED_OBJECT),
306 				 errmsg("invalid large-object descriptor: %d", fd)));
307 
308 	offset = inv_tell(cookies[fd]);
309 
310 	/* guard against result overflow */
311 	if (offset != (int32) offset)
312 		ereport(ERROR,
313 				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
314 		 errmsg("lo_tell result out of range for large-object descriptor %d",
315 				fd)));
316 
317 	PG_RETURN_INT32((int32) offset);
318 }
319 
320 Datum
lo_tell64(PG_FUNCTION_ARGS)321 lo_tell64(PG_FUNCTION_ARGS)
322 {
323 	int32		fd = PG_GETARG_INT32(0);
324 	int64		offset;
325 
326 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
327 		ereport(ERROR,
328 				(errcode(ERRCODE_UNDEFINED_OBJECT),
329 				 errmsg("invalid large-object descriptor: %d", fd)));
330 
331 	offset = inv_tell(cookies[fd]);
332 
333 	PG_RETURN_INT64(offset);
334 }
335 
336 Datum
lo_unlink(PG_FUNCTION_ARGS)337 lo_unlink(PG_FUNCTION_ARGS)
338 {
339 	Oid			lobjId = PG_GETARG_OID(0);
340 
341 	/* Must be owner of the largeobject */
342 	if (!lo_compat_privileges &&
343 		!pg_largeobject_ownercheck(lobjId, GetUserId()))
344 		ereport(ERROR,
345 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
346 				 errmsg("must be owner of large object %u", lobjId)));
347 
348 	/*
349 	 * If there are any open LO FDs referencing that ID, close 'em.
350 	 */
351 	if (fscxt != NULL)
352 	{
353 		int			i;
354 
355 		for (i = 0; i < cookies_size; i++)
356 		{
357 			if (cookies[i] != NULL && cookies[i]->id == lobjId)
358 				closeLOfd(i);
359 		}
360 	}
361 
362 	/*
363 	 * inv_drop does not create a need for end-of-transaction cleanup and
364 	 * hence we don't need to set lo_cleanup_needed.
365 	 */
366 	PG_RETURN_INT32(inv_drop(lobjId));
367 }
368 
369 /*****************************************************************************
370  *	Read/Write using bytea
371  *****************************************************************************/
372 
373 Datum
loread(PG_FUNCTION_ARGS)374 loread(PG_FUNCTION_ARGS)
375 {
376 	int32		fd = PG_GETARG_INT32(0);
377 	int32		len = PG_GETARG_INT32(1);
378 	bytea	   *retval;
379 	int			totalread;
380 
381 	if (len < 0)
382 		len = 0;
383 
384 	retval = (bytea *) palloc(VARHDRSZ + len);
385 	totalread = lo_read(fd, VARDATA(retval), len);
386 	SET_VARSIZE(retval, totalread + VARHDRSZ);
387 
388 	PG_RETURN_BYTEA_P(retval);
389 }
390 
391 Datum
lowrite(PG_FUNCTION_ARGS)392 lowrite(PG_FUNCTION_ARGS)
393 {
394 	int32		fd = PG_GETARG_INT32(0);
395 	bytea	   *wbuf = PG_GETARG_BYTEA_P(1);
396 	int			bytestowrite;
397 	int			totalwritten;
398 
399 	bytestowrite = VARSIZE(wbuf) - VARHDRSZ;
400 	totalwritten = lo_write(fd, VARDATA(wbuf), bytestowrite);
401 	PG_RETURN_INT32(totalwritten);
402 }
403 
404 /*****************************************************************************
405  *	 Import/Export of Large Object
406  *****************************************************************************/
407 
408 /*
409  * lo_import -
410  *	  imports a file as an (inversion) large object.
411  */
412 Datum
lo_import(PG_FUNCTION_ARGS)413 lo_import(PG_FUNCTION_ARGS)
414 {
415 	text	   *filename = PG_GETARG_TEXT_PP(0);
416 
417 	PG_RETURN_OID(lo_import_internal(filename, InvalidOid));
418 }
419 
420 /*
421  * lo_import_with_oid -
422  *	  imports a file as an (inversion) large object specifying oid.
423  */
424 Datum
lo_import_with_oid(PG_FUNCTION_ARGS)425 lo_import_with_oid(PG_FUNCTION_ARGS)
426 {
427 	text	   *filename = PG_GETARG_TEXT_PP(0);
428 	Oid			oid = PG_GETARG_OID(1);
429 
430 	PG_RETURN_OID(lo_import_internal(filename, oid));
431 }
432 
433 static Oid
lo_import_internal(text * filename,Oid lobjOid)434 lo_import_internal(text *filename, Oid lobjOid)
435 {
436 	int			fd;
437 	int			nbytes,
438 				tmp PG_USED_FOR_ASSERTS_ONLY;
439 	char		buf[BUFSIZE];
440 	char		fnamebuf[MAXPGPATH];
441 	LargeObjectDesc *lobj;
442 	Oid			oid;
443 
444 #ifndef ALLOW_DANGEROUS_LO_FUNCTIONS
445 	if (!superuser())
446 		ereport(ERROR,
447 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
448 				 errmsg("must be superuser to use server-side lo_import()"),
449 				 errhint("Anyone can use the client-side lo_import() provided by libpq.")));
450 #endif
451 
452 	/*
453 	 * open the file to be read in
454 	 */
455 	text_to_cstring_buffer(filename, fnamebuf, sizeof(fnamebuf));
456 	fd = OpenTransientFile(fnamebuf, O_RDONLY | PG_BINARY, S_IRWXU);
457 	if (fd < 0)
458 		ereport(ERROR,
459 				(errcode_for_file_access(),
460 				 errmsg("could not open server file \"%s\": %m",
461 						fnamebuf)));
462 
463 	/*
464 	 * create an inversion object
465 	 */
466 	lo_cleanup_needed = true;
467 	oid = inv_create(lobjOid);
468 
469 	/*
470 	 * read in from the filesystem and write to the inversion object
471 	 */
472 	lobj = inv_open(oid, INV_WRITE, CurrentMemoryContext);
473 
474 	while ((nbytes = read(fd, buf, BUFSIZE)) > 0)
475 	{
476 		tmp = inv_write(lobj, buf, nbytes);
477 		Assert(tmp == nbytes);
478 	}
479 
480 	if (nbytes < 0)
481 		ereport(ERROR,
482 				(errcode_for_file_access(),
483 				 errmsg("could not read server file \"%s\": %m",
484 						fnamebuf)));
485 
486 	inv_close(lobj);
487 	CloseTransientFile(fd);
488 
489 	return oid;
490 }
491 
492 /*
493  * lo_export -
494  *	  exports an (inversion) large object.
495  */
496 Datum
lo_export(PG_FUNCTION_ARGS)497 lo_export(PG_FUNCTION_ARGS)
498 {
499 	Oid			lobjId = PG_GETARG_OID(0);
500 	text	   *filename = PG_GETARG_TEXT_PP(1);
501 	int			fd;
502 	int			nbytes,
503 				tmp;
504 	char		buf[BUFSIZE];
505 	char		fnamebuf[MAXPGPATH];
506 	LargeObjectDesc *lobj;
507 	mode_t		oumask;
508 
509 #ifndef ALLOW_DANGEROUS_LO_FUNCTIONS
510 	if (!superuser())
511 		ereport(ERROR,
512 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
513 				 errmsg("must be superuser to use server-side lo_export()"),
514 				 errhint("Anyone can use the client-side lo_export() provided by libpq.")));
515 #endif
516 
517 	/*
518 	 * open the inversion object (no need to test for failure)
519 	 */
520 	lo_cleanup_needed = true;
521 	lobj = inv_open(lobjId, INV_READ, CurrentMemoryContext);
522 
523 	/*
524 	 * open the file to be written to
525 	 *
526 	 * Note: we reduce backend's normal 077 umask to the slightly friendlier
527 	 * 022. This code used to drop it all the way to 0, but creating
528 	 * world-writable export files doesn't seem wise.
529 	 */
530 	text_to_cstring_buffer(filename, fnamebuf, sizeof(fnamebuf));
531 	oumask = umask(S_IWGRP | S_IWOTH);
532 	PG_TRY();
533 	{
534 		fd = OpenTransientFile(fnamebuf, O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY,
535 							   S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
536 	}
537 	PG_CATCH();
538 	{
539 		umask(oumask);
540 		PG_RE_THROW();
541 	}
542 	PG_END_TRY();
543 	umask(oumask);
544 	if (fd < 0)
545 		ereport(ERROR,
546 				(errcode_for_file_access(),
547 				 errmsg("could not create server file \"%s\": %m",
548 						fnamebuf)));
549 
550 	/*
551 	 * read in from the inversion file and write to the filesystem
552 	 */
553 	while ((nbytes = inv_read(lobj, buf, BUFSIZE)) > 0)
554 	{
555 		tmp = write(fd, buf, nbytes);
556 		if (tmp != nbytes)
557 			ereport(ERROR,
558 					(errcode_for_file_access(),
559 					 errmsg("could not write server file \"%s\": %m",
560 							fnamebuf)));
561 	}
562 
563 	CloseTransientFile(fd);
564 	inv_close(lobj);
565 
566 	PG_RETURN_INT32(1);
567 }
568 
569 /*
570  * lo_truncate -
571  *	  truncate a large object to a specified length
572  */
573 static void
lo_truncate_internal(int32 fd,int64 len)574 lo_truncate_internal(int32 fd, int64 len)
575 {
576 	LargeObjectDesc *lobj;
577 
578 	if (fd < 0 || fd >= cookies_size || cookies[fd] == NULL)
579 		ereport(ERROR,
580 				(errcode(ERRCODE_UNDEFINED_OBJECT),
581 				 errmsg("invalid large-object descriptor: %d", fd)));
582 	lobj = cookies[fd];
583 
584 	if ((lobj->flags & IFS_WRLOCK) == 0)
585 		ereport(ERROR,
586 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
587 			  errmsg("large object descriptor %d was not opened for writing",
588 					 fd)));
589 
590 	/* Permission checks --- first time through only */
591 	if ((lobj->flags & IFS_WR_PERM_OK) == 0)
592 	{
593 		if (!lo_compat_privileges &&
594 			pg_largeobject_aclcheck_snapshot(lobj->id,
595 											 GetUserId(),
596 											 ACL_UPDATE,
597 											 lobj->snapshot) != ACLCHECK_OK)
598 			ereport(ERROR,
599 					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
600 					 errmsg("permission denied for large object %u",
601 							lobj->id)));
602 		lobj->flags |= IFS_WR_PERM_OK;
603 	}
604 
605 	inv_truncate(lobj, len);
606 }
607 
608 Datum
lo_truncate(PG_FUNCTION_ARGS)609 lo_truncate(PG_FUNCTION_ARGS)
610 {
611 	int32		fd = PG_GETARG_INT32(0);
612 	int32		len = PG_GETARG_INT32(1);
613 
614 	lo_truncate_internal(fd, len);
615 	PG_RETURN_INT32(0);
616 }
617 
618 Datum
lo_truncate64(PG_FUNCTION_ARGS)619 lo_truncate64(PG_FUNCTION_ARGS)
620 {
621 	int32		fd = PG_GETARG_INT32(0);
622 	int64		len = PG_GETARG_INT64(1);
623 
624 	lo_truncate_internal(fd, len);
625 	PG_RETURN_INT32(0);
626 }
627 
628 /*
629  * AtEOXact_LargeObject -
630  *		 prepares large objects for transaction commit
631  */
632 void
AtEOXact_LargeObject(bool isCommit)633 AtEOXact_LargeObject(bool isCommit)
634 {
635 	int			i;
636 
637 	if (!lo_cleanup_needed)
638 		return;					/* no LO operations in this xact */
639 
640 	/*
641 	 * Close LO fds and clear cookies array so that LO fds are no longer good.
642 	 * The memory context and resource owner holding them are going away at
643 	 * the end-of-transaction anyway, but on commit, we need to close them to
644 	 * avoid warnings about leaked resources at commit.  On abort we can skip
645 	 * this step.
646 	 */
647 	if (isCommit)
648 	{
649 		for (i = 0; i < cookies_size; i++)
650 		{
651 			if (cookies[i] != NULL)
652 				closeLOfd(i);
653 		}
654 	}
655 
656 	/* Needn't actually pfree since we're about to zap context */
657 	cookies = NULL;
658 	cookies_size = 0;
659 
660 	/* Release the LO memory context to prevent permanent memory leaks. */
661 	if (fscxt)
662 		MemoryContextDelete(fscxt);
663 	fscxt = NULL;
664 
665 	/* Give inv_api.c a chance to clean up, too */
666 	close_lo_relation(isCommit);
667 
668 	lo_cleanup_needed = false;
669 }
670 
671 /*
672  * AtEOSubXact_LargeObject
673  *		Take care of large objects at subtransaction commit/abort
674  *
675  * Reassign LOs created/opened during a committing subtransaction
676  * to the parent subtransaction.  On abort, just close them.
677  */
678 void
AtEOSubXact_LargeObject(bool isCommit,SubTransactionId mySubid,SubTransactionId parentSubid)679 AtEOSubXact_LargeObject(bool isCommit, SubTransactionId mySubid,
680 						SubTransactionId parentSubid)
681 {
682 	int			i;
683 
684 	if (fscxt == NULL)			/* no LO operations in this xact */
685 		return;
686 
687 	for (i = 0; i < cookies_size; i++)
688 	{
689 		LargeObjectDesc *lo = cookies[i];
690 
691 		if (lo != NULL && lo->subid == mySubid)
692 		{
693 			if (isCommit)
694 				lo->subid = parentSubid;
695 			else
696 				closeLOfd(i);
697 		}
698 	}
699 }
700 
701 /*****************************************************************************
702  *	Support routines for this file
703  *****************************************************************************/
704 
705 static int
newLOfd(void)706 newLOfd(void)
707 {
708 	int			i,
709 				newsize;
710 
711 	lo_cleanup_needed = true;
712 	if (fscxt == NULL)
713 		fscxt = AllocSetContextCreate(TopMemoryContext,
714 									  "Filesystem",
715 									  ALLOCSET_DEFAULT_SIZES);
716 
717 	/* Try to find a free slot */
718 	for (i = 0; i < cookies_size; i++)
719 	{
720 		if (cookies[i] == NULL)
721 			return i;
722 	}
723 
724 	/* No free slot, so make the array bigger */
725 	if (cookies_size <= 0)
726 	{
727 		/* First time through, arbitrarily make 64-element array */
728 		i = 0;
729 		newsize = 64;
730 		cookies = (LargeObjectDesc **)
731 			MemoryContextAllocZero(fscxt, newsize * sizeof(LargeObjectDesc *));
732 		cookies_size = newsize;
733 	}
734 	else
735 	{
736 		/* Double size of array */
737 		i = cookies_size;
738 		newsize = cookies_size * 2;
739 		cookies = (LargeObjectDesc **)
740 			repalloc(cookies, newsize * sizeof(LargeObjectDesc *));
741 		MemSet(cookies + cookies_size, 0,
742 			   (newsize - cookies_size) * sizeof(LargeObjectDesc *));
743 		cookies_size = newsize;
744 	}
745 
746 	return i;
747 }
748 
749 static void
closeLOfd(int fd)750 closeLOfd(int fd)
751 {
752 	LargeObjectDesc *lobj;
753 
754 	/*
755 	 * Make sure we do not try to free twice if this errors out for some
756 	 * reason.  Better a leak than a crash.
757 	 */
758 	lobj = cookies[fd];
759 	cookies[fd] = NULL;
760 
761 	if (lobj->snapshot)
762 		UnregisterSnapshotFromOwner(lobj->snapshot,
763 									TopTransactionResourceOwner);
764 	inv_close(lobj);
765 }
766 
767 /*****************************************************************************
768  *	Wrappers oriented toward SQL callers
769  *****************************************************************************/
770 
771 /*
772  * Read [offset, offset+nbytes) within LO; when nbytes is -1, read to end.
773  */
774 static bytea *
lo_get_fragment_internal(Oid loOid,int64 offset,int32 nbytes)775 lo_get_fragment_internal(Oid loOid, int64 offset, int32 nbytes)
776 {
777 	LargeObjectDesc *loDesc;
778 	int64		loSize;
779 	int64		result_length;
780 	int total_read PG_USED_FOR_ASSERTS_ONLY;
781 	bytea	   *result = NULL;
782 
783 	lo_cleanup_needed = true;
784 	loDesc = inv_open(loOid, INV_READ, CurrentMemoryContext);
785 
786 	/* Permission check */
787 	if (!lo_compat_privileges &&
788 		pg_largeobject_aclcheck_snapshot(loDesc->id,
789 										 GetUserId(),
790 										 ACL_SELECT,
791 										 loDesc->snapshot) != ACLCHECK_OK)
792 		ereport(ERROR,
793 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
794 				 errmsg("permission denied for large object %u",
795 						loDesc->id)));
796 
797 	/*
798 	 * Compute number of bytes we'll actually read, accommodating nbytes == -1
799 	 * and reads beyond the end of the LO.
800 	 */
801 	loSize = inv_seek(loDesc, 0, SEEK_END);
802 	if (loSize > offset)
803 	{
804 		if (nbytes >= 0 && nbytes <= loSize - offset)
805 			result_length = nbytes;		/* request is wholly inside LO */
806 		else
807 			result_length = loSize - offset;	/* adjust to end of LO */
808 	}
809 	else
810 		result_length = 0;		/* request is wholly outside LO */
811 
812 	/*
813 	 * A result_length calculated from loSize may not fit in a size_t.  Check
814 	 * that the size will satisfy this and subsequently-enforced size limits.
815 	 */
816 	if (result_length > MaxAllocSize - VARHDRSZ)
817 		ereport(ERROR,
818 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
819 				 errmsg("large object read request is too large")));
820 
821 	result = (bytea *) palloc(VARHDRSZ + result_length);
822 
823 	inv_seek(loDesc, offset, SEEK_SET);
824 	total_read = inv_read(loDesc, VARDATA(result), result_length);
825 	Assert(total_read == result_length);
826 	SET_VARSIZE(result, result_length + VARHDRSZ);
827 
828 	inv_close(loDesc);
829 
830 	return result;
831 }
832 
833 /*
834  * Read entire LO
835  */
836 Datum
lo_get(PG_FUNCTION_ARGS)837 lo_get(PG_FUNCTION_ARGS)
838 {
839 	Oid			loOid = PG_GETARG_OID(0);
840 	bytea	   *result;
841 
842 	result = lo_get_fragment_internal(loOid, 0, -1);
843 
844 	PG_RETURN_BYTEA_P(result);
845 }
846 
847 /*
848  * Read range within LO
849  */
850 Datum
lo_get_fragment(PG_FUNCTION_ARGS)851 lo_get_fragment(PG_FUNCTION_ARGS)
852 {
853 	Oid			loOid = PG_GETARG_OID(0);
854 	int64		offset = PG_GETARG_INT64(1);
855 	int32		nbytes = PG_GETARG_INT32(2);
856 	bytea	   *result;
857 
858 	if (nbytes < 0)
859 		ereport(ERROR,
860 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
861 				 errmsg("requested length cannot be negative")));
862 
863 	result = lo_get_fragment_internal(loOid, offset, nbytes);
864 
865 	PG_RETURN_BYTEA_P(result);
866 }
867 
868 /*
869  * Create LO with initial contents given by a bytea argument
870  */
871 Datum
lo_from_bytea(PG_FUNCTION_ARGS)872 lo_from_bytea(PG_FUNCTION_ARGS)
873 {
874 	Oid			loOid = PG_GETARG_OID(0);
875 	bytea	   *str = PG_GETARG_BYTEA_PP(1);
876 	LargeObjectDesc *loDesc;
877 	int written PG_USED_FOR_ASSERTS_ONLY;
878 
879 	lo_cleanup_needed = true;
880 	loOid = inv_create(loOid);
881 	loDesc = inv_open(loOid, INV_WRITE, CurrentMemoryContext);
882 	written = inv_write(loDesc, VARDATA_ANY(str), VARSIZE_ANY_EXHDR(str));
883 	Assert(written == VARSIZE_ANY_EXHDR(str));
884 	inv_close(loDesc);
885 
886 	PG_RETURN_OID(loOid);
887 }
888 
889 /*
890  * Update range within LO
891  */
892 Datum
lo_put(PG_FUNCTION_ARGS)893 lo_put(PG_FUNCTION_ARGS)
894 {
895 	Oid			loOid = PG_GETARG_OID(0);
896 	int64		offset = PG_GETARG_INT64(1);
897 	bytea	   *str = PG_GETARG_BYTEA_PP(2);
898 	LargeObjectDesc *loDesc;
899 	int written PG_USED_FOR_ASSERTS_ONLY;
900 
901 	lo_cleanup_needed = true;
902 	loDesc = inv_open(loOid, INV_WRITE, CurrentMemoryContext);
903 
904 	/* Permission check */
905 	if (!lo_compat_privileges &&
906 		pg_largeobject_aclcheck_snapshot(loDesc->id,
907 										 GetUserId(),
908 										 ACL_UPDATE,
909 										 loDesc->snapshot) != ACLCHECK_OK)
910 		ereport(ERROR,
911 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
912 				 errmsg("permission denied for large object %u",
913 						loDesc->id)));
914 
915 	inv_seek(loDesc, offset, SEEK_SET);
916 	written = inv_write(loDesc, VARDATA_ANY(str), VARSIZE_ANY_EXHDR(str));
917 	Assert(written == VARSIZE_ANY_EXHDR(str));
918 	inv_close(loDesc);
919 
920 	PG_RETURN_VOID();
921 }
922