From e53867c375cf5aef4f21a13140bef3c10d245314 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 12 Dec 2021 05:22:25 +0300 Subject: [PATCH 001/102] Custom user shared invalidation message --- src/backend/utils/cache/inval.c | 49 +++++++++++++++++++++++++++++++++ src/include/storage/sinval.h | 11 ++++++++ src/include/utils/inval.h | 4 +++ 3 files changed, 64 insertions(+) diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index a140a99f0a7..057f4168a13 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -252,6 +252,7 @@ int debug_discard_caches = 0; #define MAX_SYSCACHE_CALLBACKS 64 #define MAX_RELCACHE_CALLBACKS 10 +#define MAX_USERCACHE_CALLBACKS 10 static struct SYSCACHECALLBACK { @@ -273,6 +274,14 @@ static struct RELCACHECALLBACK static int relcache_callback_count = 0; +static struct USERCACHECALLBACK +{ + UsercacheCallbackFunction function; + Datum arg; +} usercache_callback_list[MAX_RELCACHE_CALLBACKS]; + +static int usercache_callback_count = 0; + /* ---------------------------------------------------------------- * Invalidation subgroup support functions * ---------------------------------------------------------------- @@ -683,6 +692,19 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) else if (msg->sn.dbId == MyDatabaseId) InvalidateCatalogSnapshot(); } + else if (msg->id == SHAREDINVALUSERCACHE_ID) + { + int i; + for (i = 0; i < usercache_callback_count; i++) + { + struct USERCACHECALLBACK *ccitem = usercache_callback_list + i; + + ccitem->function(ccitem->arg, + msg->usr.arg1, + msg->usr.arg2, + msg->usr.arg3); + } + } else elog(FATAL, "unrecognized SI message ID: %d", msg->id); } @@ -726,6 +748,17 @@ InvalidateSystemCachesExtended(bool debug_discard) ccitem->function(ccitem->arg, InvalidOid); } + + for (i = 0; i < usercache_callback_count; i++) + { + struct USERCACHECALLBACK *ccitem = usercache_callback_list + i; + + ccitem->function(ccitem->arg, + InvalidOid, + InvalidOid, + InvalidOid); + } + } @@ -1570,6 +1603,22 @@ CacheRegisterRelcacheCallback(RelcacheCallbackFunction func, ++relcache_callback_count; } +/* + * CacheRegisterUsercacheCallback + */ +void +CacheRegisterUsercacheCallback(UsercacheCallbackFunction func, + Datum arg) +{ + if (usercache_callback_count >= MAX_USERCACHE_CALLBACKS) + elog(FATAL, "out of usercache_callback_list slots"); + + usercache_callback_list[usercache_callback_count].function = func; + usercache_callback_list[usercache_callback_count].arg = arg; + + ++usercache_callback_count; +} + /* * CallSyscacheCallbacks * diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index 0721e4d2058..ef748bfe1e1 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -110,6 +110,16 @@ typedef struct Oid relId; /* relation ID */ } SharedInvalSnapshotMsg; +#define SHAREDINVALUSERCACHE_ID (-6) + +typedef struct +{ + int8 id; /* type field --- must be first */ + Oid arg1; /* user-specific values */ + Oid arg2; + Oid arg3; +} SharedInvalUserMsg; + typedef union { int8 id; /* type field --- must be first */ @@ -119,6 +129,7 @@ typedef union SharedInvalSmgrMsg sm; SharedInvalRelmapMsg rm; SharedInvalSnapshotMsg sn; + SharedInvalUserMsg usr; } SharedInvalidationMessage; diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 14b4eac0630..a57f834d01d 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -22,6 +22,7 @@ extern PGDLLIMPORT int debug_discard_caches; typedef void (*SyscacheCallbackFunction) (Datum arg, int cacheid, uint32 hashvalue); typedef void (*RelcacheCallbackFunction) (Datum arg, Oid relid); +typedef void (*UsercacheCallbackFunction) (Datum arg, Oid arg1, Oid arg2, Oid arg3); extern void AcceptInvalidationMessages(void); @@ -59,6 +60,9 @@ extern void CacheRegisterSyscacheCallback(int cacheid, extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func, Datum arg); +extern void CacheRegisterUsercacheCallback(UsercacheCallbackFunction func, + Datum arg); + extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue); extern void InvalidateSystemCaches(void); From 7abf8dd1ef32d742ecfa6ebdf09b4c51c4ef6be0 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 12 Dec 2021 05:24:57 +0300 Subject: [PATCH 002/102] CacheInvalidateRelcacheByDbidRelid() --- src/backend/utils/cache/inval.c | 19 +++++++++++++++++++ src/include/utils/inval.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 057f4168a13..b09c2f7cfd4 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -1465,6 +1465,25 @@ CacheInvalidateRelcacheByRelid(Oid relid) ReleaseSysCache(tup); } +/* + * CacheInvalidateRelcacheByDbidRelid + */ +void +CacheInvalidateRelcacheByDbidRelid(Oid dbid, Oid relid) +{ + SharedInvalidationMessage msg; + + PrepareInvalidationState(); + + msg.rc.id = SHAREDINVALRELCACHE_ID; + msg.rc.dbId = dbid; + msg.rc.relId = relid; + /* check AddCatcacheInvalidationMessage() for an explanation */ + VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg)); + + SendSharedInvalidMessages(&msg, 1); +} + /* * CacheInvalidateSmgr diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index a57f834d01d..1461271bbe6 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -49,6 +49,8 @@ extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple); extern void CacheInvalidateRelcacheByRelid(Oid relid); +extern void CacheInvalidateRelcacheByDbidRelid(Oid dbid, Oid relid); + extern void CacheInvalidateSmgr(RelFileLocatorBackend rlocator); extern void CacheInvalidateRelmap(Oid databaseId); From a16eaa6bdf026cecd00c9b83d1a9cdf72ec1a71c Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 12 Dec 2021 20:26:34 +0300 Subject: [PATCH 003/102] CommitSeqNo data type --- src/include/access/transam.h | 16 ++++++++++++++++ src/include/c.h | 4 +++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/include/access/transam.h b/src/include/access/transam.h index f5af6d30556..f7bcb4a8822 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -196,6 +196,22 @@ FullTransactionIdAdvance(FullTransactionId *dest) #define FirstUnpinnedObjectId 12000 #define FirstNormalObjectId 16384 +#define COMMITSEQNO_INPROGRESS UINT64CONST(0x0) +#define COMMITSEQNO_NON_DELETED UINT64CONST(0x1) +#define COMMITSEQNO_ABORTED UINT64CONST(0x2) +#define COMMITSEQNO_FROZEN UINT64CONST(0x3) +#define COMMITSEQNO_COMMITTING UINT64CONST(0x4) +#define COMMITSEQNO_FIRST_NORMAL UINT64CONST(0x5) +#define COMMITSEQNO_MAX_NORMAL UINT64CONST(0x7FFFFFFFFFFFFFFF) + +#define COMMITSEQNO_IS_INPROGRESS(csn) ((csn) == COMMITSEQNO_INPROGRESS || (csn) == COMMITSEQNO_NON_DELETED) +#define COMMITSEQNO_IS_NON_DELETED(csn) ((csn) == COMMITSEQNO_NON_DELETED) +#define COMMITSEQNO_IS_ABORTED(csn) ((csn) == COMMITSEQNO_ABORTED) +#define COMMITSEQNO_IS_FROZEN(csn) ((csn) == COMMITSEQNO_FROZEN) +#define COMMITSEQNO_IS_NORMAL(csn) ((csn) >= COMMITSEQNO_FIRST_NORMAL) +#define COMMITSEQNO_IS_COMMITTING(csn) ((csn) == COMMITSEQNO_COMMITTING) +#define COMMITSEQNO_IS_COMMITTED(csn) ((csn) >= COMMITSEQNO_FROZEN) + /* * VariableCache is a data structure in shared memory that is used to track * OID and XID assignment state. For largely historical reasons, there is diff --git a/src/include/c.h b/src/include/c.h index a7258cce1fc..e578ff5384a 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -621,7 +621,7 @@ typedef double float8; /* * Oid, RegProcedure, TransactionId, SubTransactionId, MultiXactId, - * CommandId + * CommandId, CommitSeqNo */ /* typedef Oid is in postgres_ext.h */ @@ -652,6 +652,8 @@ typedef uint32 CommandId; #define FirstCommandId ((CommandId) 0) #define InvalidCommandId (~(CommandId)0) +typedef uint64 CommitSeqNo; + /* ---------------- * Variable-length datatypes all share the 'struct varlena' header. From 8433a45301148e13b98fabc5fb74fe4cfe4ba786 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 12 Dec 2021 20:36:18 +0300 Subject: [PATCH 004/102] Custom TOAST --- contrib/pageinspect/heapfuncs.c | 1 + contrib/test_decoding/test_decoding.c | 2 +- src/backend/access/common/detoast.c | 42 ++++++++++++++++--- src/backend/access/common/toast_compression.c | 7 +++- src/backend/access/common/toast_internals.c | 4 +- src/backend/access/table/toast_helper.c | 6 +-- src/backend/replication/logical/proto.c | 2 +- src/backend/replication/pgoutput/pgoutput.c | 4 +- src/include/access/detoast.h | 14 +++++++ src/include/varatt.h | 33 ++++++++++++++- src/test/regress/regress.c | 2 +- 11 files changed, 98 insertions(+), 19 deletions(-) diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index e8f43b8bdbf..6ad6c49dedb 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -377,6 +377,7 @@ tuple_data_split_internal(Oid relid, char *tupdata, */ if (VARATT_IS_EXTERNAL(tupdata + off) && !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) && + !VARATT_IS_EXTERNAL_ORIOLEDB(tupdata + off) && !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), diff --git a/contrib/test_decoding/test_decoding.c b/contrib/test_decoding/test_decoding.c index 12d1d0505d7..dedc4be074f 100644 --- a/contrib/test_decoding/test_decoding.c +++ b/contrib/test_decoding/test_decoding.c @@ -578,7 +578,7 @@ tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple, bool skip_ /* print data */ if (isnull) appendStringInfoString(s, "null"); - else if (typisvarlena && VARATT_IS_EXTERNAL_ONDISK(origval)) + else if (typisvarlena && (VARATT_IS_EXTERNAL_ONDISK(origval) || VARATT_IS_EXTERNAL_ORIOLEDB(origval))) appendStringInfoString(s, "unchanged-toast-datum"); else if (!typisvarlena) print_literal(s, typid, diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index 108e0126a14..8af80c80865 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -26,7 +26,6 @@ static struct varlena *toast_fetch_datum(struct varlena *attr); static struct varlena *toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 slicelength); -static struct varlena *toast_decompress_datum(struct varlena *attr); static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength); /* ---------- @@ -46,7 +45,7 @@ detoast_external_attr(struct varlena *attr) { struct varlena *result; - if (VARATT_IS_EXTERNAL_ONDISK(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr)) { /* * This is an external stored plain value @@ -115,7 +114,7 @@ detoast_external_attr(struct varlena *attr) struct varlena * detoast_attr(struct varlena *attr) { - if (VARATT_IS_EXTERNAL_ONDISK(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr)) { /* * This is an externally stored datum --- fetch it back from there @@ -332,6 +331,20 @@ detoast_attr_slice(struct varlena *attr, return result; } +static ToastFunc o_detoast_func = NULL; + +void +register_o_detoast_func(ToastFunc func) +{ + o_detoast_func = func; +} + +void +deregister_o_detoast_func() +{ + o_detoast_func = NULL; +} + /* ---------- * toast_fetch_datum - * @@ -347,6 +360,17 @@ toast_fetch_datum(struct varlena *attr) struct varatt_external toast_pointer; int32 attrsize; + if (VARATT_IS_EXTERNAL_ORIOLEDB(attr)) + { + if (o_detoast_func != NULL) + { + result = o_detoast_func(attr); + if (result == NULL) + elog(ERROR, "unexpected NULL detoast result"); + return result; + } + } + if (!VARATT_IS_EXTERNAL_ONDISK(attr)) elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums"); @@ -467,7 +491,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, * * Decompress a compressed version of a varlena datum */ -static struct varlena * +struct varlena * toast_decompress_datum(struct varlena *attr) { ToastCompressionId cmid; @@ -547,11 +571,17 @@ toast_raw_datum_size(Datum value) struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; - if (VARATT_IS_EXTERNAL_ONDISK(attr)) + if (VARATT_IS_EXTERNAL_ORIOLEDB(attr)) + { + OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr); + result = toasted->raw_size + VARHDRSZ; + } + else if (VARATT_IS_EXTERNAL_ONDISK(attr)) { - /* va_rawsize is the size of the original datum -- including header */ struct varatt_external toast_pointer; + /* va_rawsize is the size of the original datum -- including header */ + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); result = toast_pointer.va_rawsize; } diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c index 4cf956a759c..4b281ed438d 100644 --- a/src/backend/access/common/toast_compression.c +++ b/src/backend/access/common/toast_compression.c @@ -262,7 +262,12 @@ toast_get_compression_id(struct varlena *attr) * the external toast pointer. If compressed inline, fetch it from the * toast compression header. */ - if (VARATT_IS_EXTERNAL_ONDISK(attr)) + if (VARATT_IS_EXTERNAL_ORIOLEDB(attr)) + { + OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr); + cmid = toasted->formatFlags >> ORIOLEDB_EXT_FORMAT_FLAGS_BITS; + } + else if (VARATT_IS_EXTERNAL_ONDISK(attr)) { struct varatt_external toast_pointer; diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index c82a914329e..f1e63599bf3 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -240,7 +240,7 @@ toast_save_datum(Relation rel, Datum value, { struct varatt_external old_toast_pointer; - Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal)); + Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal) || VARATT_IS_EXTERNAL_ORIOLEDB(oldexternal)); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal); if (old_toast_pointer.va_toastrelid == rel->rd_toastoid) @@ -396,7 +396,7 @@ toast_delete_datum(Relation rel, Datum value, bool is_speculative) int validIndex; SnapshotData SnapshotToast; - if (!VARATT_IS_EXTERNAL_ONDISK(attr)) + if (!VARATT_IS_EXTERNAL_ONDISK(attr) && !VARATT_IS_EXTERNAL_ORIOLEDB(attr)) return; /* Must copy to access aligned fields */ diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c index b5cfeb21aab..2afcd4830d5 100644 --- a/src/backend/access/table/toast_helper.c +++ b/src/backend/access/table/toast_helper.c @@ -72,10 +72,10 @@ toast_tuple_init(ToastTupleContext *ttc) * we have to delete it later. */ if (att->attlen == -1 && !ttc->ttc_oldisnull[i] && - VARATT_IS_EXTERNAL_ONDISK(old_value)) + (VARATT_IS_EXTERNAL_ONDISK(old_value) || VARATT_IS_EXTERNAL_ORIOLEDB(old_value))) { if (ttc->ttc_isnull[i] || - !VARATT_IS_EXTERNAL_ONDISK(new_value) || + !(VARATT_IS_EXTERNAL_ONDISK(new_value) || VARATT_IS_EXTERNAL_ORIOLEDB(new_value)) || memcmp((char *) old_value, (char *) new_value, VARSIZE_EXTERNAL(old_value)) != 0) { @@ -331,7 +331,7 @@ toast_delete_external(Relation rel, Datum *values, bool *isnull, if (isnull[i]) continue; - else if (VARATT_IS_EXTERNAL_ONDISK(value)) + else if (VARATT_IS_EXTERNAL_ONDISK(value) || VARATT_IS_EXTERNAL_ORIOLEDB(value)) toast_delete_datum(rel, value, is_speculative); } } diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c index 504f94d4a77..03f9a54f587 100644 --- a/src/backend/replication/logical/proto.c +++ b/src/backend/replication/logical/proto.c @@ -814,7 +814,7 @@ logicalrep_write_tuple(StringInfo out, Relation rel, TupleTableSlot *slot, continue; } - if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i])) + if (att->attlen == -1 && (VARATT_IS_EXTERNAL_ONDISK(values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(values[i]))) { /* * Unchanged toasted datum. (Note that we don't promise to detect diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c index 62e584a2cd1..ed5d83860d3 100644 --- a/src/backend/replication/pgoutput/pgoutput.c +++ b/src/backend/replication/pgoutput/pgoutput.c @@ -1353,8 +1353,8 @@ pgoutput_row_filter(Relation relation, TupleTableSlot *old_slot, * VARTAG_INDIRECT. See ReorderBufferToastReplace. */ if (att->attlen == -1 && - VARATT_IS_EXTERNAL_ONDISK(new_slot->tts_values[i]) && - !VARATT_IS_EXTERNAL_ONDISK(old_slot->tts_values[i])) + (VARATT_IS_EXTERNAL_ONDISK(new_slot->tts_values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(new_slot->tts_values[i])) && + !(VARATT_IS_EXTERNAL_ONDISK(old_slot->tts_values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(old_slot->tts_values[i])) ) { if (!tmp_new_slot) { diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h index 908e1fc6919..26ef91e23df 100644 --- a/src/include/access/detoast.h +++ b/src/include/access/detoast.h @@ -63,6 +63,13 @@ extern struct varlena *detoast_attr_slice(struct varlena *attr, int32 sliceoffset, int32 slicelength); +/* ---------- + * toast_decompress_datum - + * + * Decompress a compressed version of a varlena datum + */ +extern struct varlena *toast_decompress_datum(struct varlena *attr); + /* ---------- * toast_raw_datum_size - * @@ -79,4 +86,11 @@ extern Size toast_raw_datum_size(Datum value); */ extern Size toast_datum_size(Datum value); +/* + * for in_memory module + */ +typedef struct varlena* (*ToastFunc) (struct varlena *attr); +extern void register_o_detoast_func(ToastFunc func); +extern void deregister_o_detoast_func(void); + #endif /* DETOAST_H */ diff --git a/src/include/varatt.h b/src/include/varatt.h index e34870526ba..0128c2d1778 100644 --- a/src/include/varatt.h +++ b/src/include/varatt.h @@ -38,6 +38,23 @@ typedef struct varatt_external Oid va_toastrelid; /* RelID of TOAST table containing it */ } varatt_external; +typedef struct OToastExternal +{ + uint16 data_size; /* length of OToastExternal data */ + int16 attnum; + int32 raw_size; /* original data size */ + int32 toasted_size; /* compressed original data size */ + /* for fetching data from TOAST tree */ + CommitSeqNo csn; + /* for finding TOAST tree */ + Oid datoid; + Oid relid; + Oid relnode; + /* for storing primary index tuple */ + uint8 formatFlags; /* primary index tuple flags */ + char data[FLEXIBLE_ARRAY_MEMBER]; /* data (primary index tuple) */ +} OToastExternal; + /* * These macros define the "saved size" portion of va_extinfo. Its remaining * two high-order bits identify the compression method. @@ -86,17 +103,21 @@ typedef enum vartag_external VARTAG_INDIRECT = 1, VARTAG_EXPANDED_RO = 2, VARTAG_EXPANDED_RW = 3, - VARTAG_ONDISK = 18 + VARTAG_ONDISK = 18, + VARTAG_ORIOLEDB = 34 } vartag_external; /* this test relies on the specific tag values above */ #define VARTAG_IS_EXPANDED(tag) \ (((tag) & ~1) == VARTAG_EXPANDED_RO) +#define O_TOAST_EXTERNAL_SZ offsetof(OToastExternal, data) + #define VARTAG_SIZE(tag) \ ((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \ VARTAG_IS_EXPANDED(tag) ? sizeof(varatt_expanded) : \ (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \ + (tag) == VARTAG_ORIOLEDB ? O_TOAST_EXTERNAL_SZ : \ (AssertMacro(false), 0)) /* @@ -282,11 +303,16 @@ typedef struct #define VARDATA_SHORT(PTR) VARDATA_1B(PTR) #define VARTAG_EXTERNAL(PTR) VARTAG_1B_E(PTR) -#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR))) +#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)) \ + + (VARATT_IS_EXTERNAL_ORIOLEDB(PTR) ? \ + *((uint16 *) VARDATA_1B_E(PTR)) \ + : 0)) + #define VARDATA_EXTERNAL(PTR) VARDATA_1B_E(PTR) #define VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR) #define VARATT_IS_EXTERNAL(PTR) VARATT_IS_1B_E(PTR) + #define VARATT_IS_EXTERNAL_ONDISK(PTR) \ (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK) #define VARATT_IS_EXTERNAL_INDIRECT(PTR) \ @@ -299,6 +325,9 @@ typedef struct (VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR))) #define VARATT_IS_EXTERNAL_NON_EXPANDED(PTR) \ (VARATT_IS_EXTERNAL(PTR) && !VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR))) +#define VARATT_IS_EXTERNAL_ORIOLEDB(PTR) \ + (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ORIOLEDB) + #define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR) #define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR)) diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index c168081eb4a..fdc51899455 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -606,7 +606,7 @@ make_tuple_indirect(PG_FUNCTION_ARGS) continue; /* copy datum, so it still lives later */ - if (VARATT_IS_EXTERNAL_ONDISK(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr)) attr = detoast_external_attr(attr); else { From 47917dc2816868c5778a948077e5c484be255a25 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Thu, 23 Mar 2023 00:12:00 +0300 Subject: [PATCH 005/102] Allow locking updated tuples in tuple_update() and tuple_delete() Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp Reviewed-by: Andres Freund, Chris Travers --- src/backend/access/heap/heapam.c | 205 ++++++++++---- src/backend/access/heap/heapam_handler.c | 94 +++++-- src/backend/access/table/tableam.c | 26 +- src/backend/commands/trigger.c | 55 +--- src/backend/executor/execReplication.c | 19 +- src/backend/executor/nodeModifyTable.c | 343 +++++++++-------------- src/include/access/heapam.h | 19 +- src/include/access/tableam.h | 69 +++-- src/include/commands/trigger.h | 4 +- 9 files changed, 482 insertions(+), 352 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e6c83334de4..fe51777614d 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2560,10 +2560,11 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask) } /* - * heap_delete - delete a tuple + * heap_delete - delete a tuple, optionally fetching it into a slot * * See table_tuple_delete() for an explanation of the parameters, except that - * this routine directly takes a tuple rather than a slot. + * this routine directly takes a tuple rather than a slot. Also, we don't + * place a lock on the tuple in this function, just fetch the existing version. * * In the failure cases, the routine fills *tmfd with the tuple's t_ctid, * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last @@ -2572,8 +2573,9 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask) */ TM_Result heap_delete(Relation relation, ItemPointer tid, - CommandId cid, Snapshot crosscheck, bool wait, - TM_FailureData *tmfd, bool changingPart) + CommandId cid, Snapshot crosscheck, int options, + TM_FailureData *tmfd, bool changingPart, + TupleTableSlot *oldSlot) { TM_Result result; TransactionId xid = GetCurrentTransactionId(); @@ -2653,7 +2655,7 @@ heap_delete(Relation relation, ItemPointer tid, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("attempted to delete invisible tuple"))); } - else if (result == TM_BeingModified && wait) + else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT)) { TransactionId xwait; uint16 infomask; @@ -2794,7 +2796,30 @@ heap_delete(Relation relation, ItemPointer tid, tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data); else tmfd->cmax = InvalidCommandId; - UnlockReleaseBuffer(buffer); + + /* + * If we're asked to lock the updated tuple, we just fetch the + * existing tuple. That let's the caller save some resources on + * placing the lock. + */ + if (result == TM_Updated && + (options & TABLE_MODIFY_LOCK_UPDATED)) + { + BufferHeapTupleTableSlot *bslot; + + Assert(TTS_IS_BUFFERTUPLE(oldSlot)); + bslot = (BufferHeapTupleTableSlot *) oldSlot; + + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + bslot->base.tupdata = tp; + ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, + oldSlot, + buffer); + } + else + { + UnlockReleaseBuffer(buffer); + } if (have_tuple_lock) UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive); if (vmbuffer != InvalidBuffer) @@ -2968,8 +2993,24 @@ heap_delete(Relation relation, ItemPointer tid, */ CacheInvalidateHeapTuple(relation, &tp, NULL); - /* Now we can release the buffer */ - ReleaseBuffer(buffer); + /* Fetch the old tuple version if we're asked for that. */ + if (options & TABLE_MODIFY_FETCH_OLD_TUPLE) + { + BufferHeapTupleTableSlot *bslot; + + Assert(TTS_IS_BUFFERTUPLE(oldSlot)); + bslot = (BufferHeapTupleTableSlot *) oldSlot; + + bslot->base.tupdata = tp; + ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, + oldSlot, + buffer); + } + else + { + /* Now we can release the buffer */ + ReleaseBuffer(buffer); + } /* * Release the lmgr tuple lock, if we had it. @@ -3001,8 +3042,8 @@ simple_heap_delete(Relation relation, ItemPointer tid) result = heap_delete(relation, tid, GetCurrentCommandId(true), InvalidSnapshot, - true /* wait for commit */ , - &tmfd, false /* changingPart */ ); + TABLE_MODIFY_WAIT /* wait for commit */ , + &tmfd, false /* changingPart */ , NULL); switch (result) { case TM_SelfModified: @@ -3029,10 +3070,11 @@ simple_heap_delete(Relation relation, ItemPointer tid) } /* - * heap_update - replace a tuple + * heap_update - replace a tuple, optionally fetching it into a slot * * See table_tuple_update() for an explanation of the parameters, except that - * this routine directly takes a tuple rather than a slot. + * this routine directly takes a tuple rather than a slot. Also, we don't + * place a lock on the tuple in this function, just fetch the existing version. * * In the failure cases, the routine fills *tmfd with the tuple's t_ctid, * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last @@ -3041,9 +3083,9 @@ simple_heap_delete(Relation relation, ItemPointer tid) */ TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, - CommandId cid, Snapshot crosscheck, bool wait, + CommandId cid, Snapshot crosscheck, int options, TM_FailureData *tmfd, LockTupleMode *lockmode, - TU_UpdateIndexes *update_indexes) + TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot) { TM_Result result; TransactionId xid = GetCurrentTransactionId(); @@ -3270,7 +3312,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer); /* see below about the "no wait" case */ - Assert(result != TM_BeingModified || wait); + Assert(result != TM_BeingModified || (options & TABLE_MODIFY_WAIT)); if (result == TM_Invisible) { @@ -3279,7 +3321,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("attempted to update invisible tuple"))); } - else if (result == TM_BeingModified && wait) + else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT)) { TransactionId xwait; uint16 infomask; @@ -3483,7 +3525,30 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data); else tmfd->cmax = InvalidCommandId; - UnlockReleaseBuffer(buffer); + + /* + * If we're asked to lock the updated tuple, we just fetch the + * existing tuple. That let's the caller save some resouces on + * placing the lock. + */ + if (result == TM_Updated && + (options & TABLE_MODIFY_LOCK_UPDATED)) + { + BufferHeapTupleTableSlot *bslot; + + Assert(TTS_IS_BUFFERTUPLE(oldSlot)); + bslot = (BufferHeapTupleTableSlot *) oldSlot; + + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + bslot->base.tupdata = oldtup; + ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, + oldSlot, + buffer); + } + else + { + UnlockReleaseBuffer(buffer); + } if (have_tuple_lock) UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode); if (vmbuffer != InvalidBuffer) @@ -3962,7 +4027,26 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, /* Now we can release the buffer(s) */ if (newbuf != buffer) ReleaseBuffer(newbuf); - ReleaseBuffer(buffer); + + /* Fetch the old tuple version if we're asked for that. */ + if (options & TABLE_MODIFY_FETCH_OLD_TUPLE) + { + BufferHeapTupleTableSlot *bslot; + + Assert(TTS_IS_BUFFERTUPLE(oldSlot)); + bslot = (BufferHeapTupleTableSlot *) oldSlot; + + bslot->base.tupdata = oldtup; + ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, + oldSlot, + buffer); + } + else + { + /* Now we can release the buffer */ + ReleaseBuffer(buffer); + } + if (BufferIsValid(vmbuffer_new)) ReleaseBuffer(vmbuffer_new); if (BufferIsValid(vmbuffer)) @@ -4292,8 +4376,8 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup, result = heap_update(relation, otid, tup, GetCurrentCommandId(true), InvalidSnapshot, - true /* wait for commit */ , - &tmfd, &lockmode, update_indexes); + TABLE_MODIFY_WAIT /* wait for commit */ , + &tmfd, &lockmode, update_indexes, NULL); switch (result) { case TM_SelfModified: @@ -4356,12 +4440,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update) * tuples. * * Output parameters: - * *tuple: all fields filled in - * *buffer: set to buffer holding tuple (pinned but not locked at exit) + * *slot: BufferHeapTupleTableSlot filled with tuple * *tmfd: filled in failure cases (see below) * * Function results are the same as the ones for table_tuple_lock(). * + * If *slot already contains the target tuple, it takes advantage on that by + * skipping the ReadBuffer() call. + * * In the failure cases other than TM_Invisible, the routine fills * *tmfd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact, * if necessary), and t_cmax (the last only for TM_SelfModified, @@ -4372,15 +4458,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update) * See README.tuplock for a thorough explanation of this mechanism. */ TM_Result -heap_lock_tuple(Relation relation, HeapTuple tuple, +heap_lock_tuple(Relation relation, ItemPointer tid, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, - bool follow_updates, - Buffer *buffer, TM_FailureData *tmfd) + bool follow_updates, TM_FailureData *tmfd) { TM_Result result; - ItemPointer tid = &(tuple->t_self); ItemId lp; Page page; + Buffer buffer; Buffer vmbuffer = InvalidBuffer; BlockNumber block; TransactionId xid, @@ -4392,8 +4477,24 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, bool skip_tuple_lock = false; bool have_tuple_lock = false; bool cleared_all_frozen = false; + BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; + HeapTuple tuple = &bslot->base.tupdata; + + Assert(TTS_IS_BUFFERTUPLE(slot)); - *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); + /* Take advantage if slot already contains the relevant tuple */ + if (!TTS_EMPTY(slot) && + slot->tts_tableOid == relation->rd_id && + ItemPointerCompare(&slot->tts_tid, tid) == 0 && + BufferIsValid(bslot->buffer)) + { + buffer = bslot->buffer; + IncrBufferRefCount(buffer); + } + else + { + buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); + } block = ItemPointerGetBlockNumber(tid); /* @@ -4402,21 +4503,22 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, * in the middle of changing this, so we'll need to recheck after we have * the lock. */ - if (PageIsAllVisible(BufferGetPage(*buffer))) + if (PageIsAllVisible(BufferGetPage(buffer))) visibilitymap_pin(relation, block, &vmbuffer); - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - page = BufferGetPage(*buffer); + page = BufferGetPage(buffer); lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid)); Assert(ItemIdIsNormal(lp)); + tuple->t_self = *tid; tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); l3: - result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer); + result = HeapTupleSatisfiesUpdate(tuple, cid, buffer); if (result == TM_Invisible) { @@ -4445,7 +4547,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, infomask2 = tuple->t_data->t_infomask2; ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid); - LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* * If any subtransaction of the current top transaction already holds @@ -4597,12 +4699,12 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, { result = res; /* recovery code expects to have buffer lock held */ - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); goto failed; } } - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* * Make sure it's still an appropriate lock, else start over. @@ -4637,7 +4739,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) && !HEAP_XMAX_IS_EXCL_LOCKED(infomask)) { - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* * Make sure it's still an appropriate lock, else start over. @@ -4665,7 +4767,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, * No conflict, but if the xmax changed under us in the * meantime, start over. */ - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), xwait)) @@ -4677,7 +4779,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, } else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)) { - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* if the xmax changed in the meantime, start over */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || @@ -4705,7 +4807,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, TransactionIdIsCurrentTransactionId(xwait)) { /* ... but if the xmax changed in the meantime, start over */ - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), xwait)) @@ -4727,7 +4829,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, */ if (require_sleep && (result == TM_Updated || result == TM_Deleted)) { - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); goto failed; } else if (require_sleep) @@ -4752,7 +4854,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, */ result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); goto failed; } @@ -4778,7 +4880,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, { result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); goto failed; } break; @@ -4818,7 +4920,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, { result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); goto failed; } break; @@ -4844,12 +4946,12 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, { result = res; /* recovery code expects to have buffer lock held */ - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); goto failed; } } - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* * xwait is done, but if xwait had just locked the tuple then some @@ -4871,7 +4973,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, * don't check for this in the multixact case, because some * locker transactions might still be running. */ - UpdateXmaxHintBits(tuple->t_data, *buffer, xwait); + UpdateXmaxHintBits(tuple->t_data, buffer, xwait); } } @@ -4930,9 +5032,9 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, */ if (vmbuffer == InvalidBuffer && PageIsAllVisible(page)) { - LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); visibilitymap_pin(relation, block, &vmbuffer); - LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); goto l3; } @@ -4995,7 +5097,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, cleared_all_frozen = true; - MarkBufferDirty(*buffer); + MarkBufferDirty(buffer); /* * XLOG stuff. You might think that we don't need an XLOG record because @@ -5015,7 +5117,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, XLogRecPtr recptr; XLogBeginInsert(); - XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD); + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self); xlrec.xmax = xid; @@ -5036,7 +5138,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, result = TM_Ok; out_locked: - LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); out_unlocked: if (BufferIsValid(vmbuffer)) @@ -5054,6 +5156,9 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, if (have_tuple_lock) UnlockTupleTuplock(relation, tid, mode); + /* Put the target tuple to the slot */ + ExecStorePinnedBufferHeapTuple(tuple, slot, buffer); + return result; } diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 5a17112c91e..6de8868a91c 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -45,6 +45,12 @@ #include "utils/builtins.h" #include "utils/rel.h" +static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, + Snapshot snapshot, TupleTableSlot *slot, + CommandId cid, LockTupleMode mode, + LockWaitPolicy wait_policy, uint8 flags, + TM_FailureData *tmfd); + static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate); @@ -298,23 +304,55 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, static TM_Result heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, - Snapshot snapshot, Snapshot crosscheck, bool wait, - TM_FailureData *tmfd, bool changingPart) + Snapshot snapshot, Snapshot crosscheck, int options, + TM_FailureData *tmfd, bool changingPart, + TupleTableSlot *oldSlot) { + TM_Result result; + /* * Currently Deleting of index tuples are handled at vacuum, in case if * the storage itself is cleaning the dead tuples by itself, it is the * time to call the index tuple deletion also. */ - return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart); + result = heap_delete(relation, tid, cid, crosscheck, options, + tmfd, changingPart, oldSlot); + + /* + * If the tuple has been concurrently updated, then get the lock on it. + * (Do only if caller asked for this by setting the + * TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the + * delete should succeed even if there are more concurrent update + * attempts. + */ + if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED)) + { + /* + * heapam_tuple_lock() will take advantage of tuple loaded into + * oldSlot by heap_delete(). + */ + result = heapam_tuple_lock(relation, tid, snapshot, + oldSlot, cid, LockTupleExclusive, + (options & TABLE_MODIFY_WAIT) ? + LockWaitBlock : + LockWaitSkip, + TUPLE_LOCK_FLAG_FIND_LAST_VERSION, + tmfd); + + if (result == TM_Ok) + return TM_Updated; + } + + return result; } static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, - bool wait, TM_FailureData *tmfd, - LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes) + int options, TM_FailureData *tmfd, + LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes, + TupleTableSlot *oldSlot) { bool shouldFree = true; HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); @@ -324,8 +362,8 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, slot->tts_tableOid = RelationGetRelid(relation); tuple->t_tableOid = slot->tts_tableOid; - result = heap_update(relation, otid, tuple, cid, crosscheck, wait, - tmfd, lockmode, update_indexes); + result = heap_update(relation, otid, tuple, cid, crosscheck, options, + tmfd, lockmode, update_indexes, oldSlot); ItemPointerCopy(&tuple->t_self, &slot->tts_tid); /* @@ -352,6 +390,31 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, if (shouldFree) pfree(tuple); + /* + * If the tuple has been concurrently updated, then get the lock on it. + * (Do only if caller asked for this by setting the + * TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the + * update should succeed even if there are more concurrent update + * attempts. + */ + if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED)) + { + /* + * heapam_tuple_lock() will take advantage of tuple loaded into + * oldSlot by heap_update(). + */ + result = heapam_tuple_lock(relation, otid, snapshot, + oldSlot, cid, *lockmode, + (options & TABLE_MODIFY_WAIT) ? + LockWaitBlock : + LockWaitSkip, + TUPLE_LOCK_FLAG_FIND_LAST_VERSION, + tmfd); + + if (result == TM_Ok) + return TM_Updated; + } + return result; } @@ -363,7 +426,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, { BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; TM_Result result; - Buffer buffer; HeapTuple tuple = &bslot->base.tupdata; bool follow_updates; @@ -373,9 +435,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, Assert(TTS_IS_BUFFERTUPLE(slot)); tuple_lock_retry: - tuple->t_self = *tid; - result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy, - follow_updates, &buffer, tmfd); + result = heap_lock_tuple(relation, tid, slot, cid, mode, wait_policy, + follow_updates, tmfd); if (result == TM_Updated && (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION)) @@ -383,8 +444,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, /* Should not encounter speculative tuple on recheck */ Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data)); - ReleaseBuffer(buffer); - if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self)) { SnapshotData SnapshotDirty; @@ -406,6 +465,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, InitDirtySnapshot(SnapshotDirty); for (;;) { + Buffer buffer = InvalidBuffer; + if (ItemPointerIndicatesMovedPartitions(tid)) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), @@ -500,7 +561,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, /* * This is a live tuple, so try to lock it again. */ - ReleaseBuffer(buffer); + ExecStorePinnedBufferHeapTuple(tuple, slot, buffer); goto tuple_lock_retry; } @@ -511,7 +572,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, */ if (tuple->t_data == NULL) { - Assert(!BufferIsValid(buffer)); + ReleaseBuffer(buffer); return TM_Deleted; } @@ -564,9 +625,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, slot->tts_tableOid = RelationGetRelid(relation); tuple->t_tableOid = slot->tts_tableOid; - /* store in slot, transferring existing pin */ - ExecStorePinnedBufferHeapTuple(tuple, slot, buffer); - return result; } diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 771438c8cec..3e88cb82cf0 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -297,16 +297,23 @@ simple_table_tuple_insert(Relation rel, TupleTableSlot *slot) * via ereport(). */ void -simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot) +simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot, + TupleTableSlot *oldSlot) { TM_Result result; TM_FailureData tmfd; + int options = TABLE_MODIFY_WAIT; /* wait for commit */ + + /* Fetch old tuple if the relevant slot is provided */ + if (oldSlot) + options |= TABLE_MODIFY_FETCH_OLD_TUPLE; result = table_tuple_delete(rel, tid, GetCurrentCommandId(true), snapshot, InvalidSnapshot, - true /* wait for commit */ , - &tmfd, false /* changingPart */ ); + options, + &tmfd, false /* changingPart */ , + oldSlot); switch (result) { @@ -345,17 +352,24 @@ void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, - TU_UpdateIndexes *update_indexes) + TU_UpdateIndexes *update_indexes, + TupleTableSlot *oldSlot) { TM_Result result; TM_FailureData tmfd; LockTupleMode lockmode; + int options = TABLE_MODIFY_WAIT; /* wait for commit */ + + /* Fetch old tuple if the relevant slot is provided */ + if (oldSlot) + options |= TABLE_MODIFY_FETCH_OLD_TUPLE; result = table_tuple_update(rel, otid, slot, GetCurrentCommandId(true), snapshot, InvalidSnapshot, - true /* wait for commit */ , - &tmfd, &lockmode, update_indexes); + options, + &tmfd, &lockmode, update_indexes, + oldSlot); switch (result) { diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index dda03a3c823..c5e6caf4453 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -2790,8 +2790,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, void ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo, - ItemPointer tupleid, HeapTuple fdw_trigtuple, + TupleTableSlot *slot, TransitionCaptureState *transition_capture, bool is_crosspart_update) { @@ -2809,20 +2809,11 @@ ExecARDeleteTriggers(EState *estate, if ((trigdesc && trigdesc->trig_delete_after_row) || (transition_capture && transition_capture->tcs_delete_old_table)) { - TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo); - - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); - if (fdw_trigtuple == NULL) - GetTupleForTrigger(estate, - NULL, - relinfo, - tupleid, - LockTupleExclusive, - slot, - NULL, - NULL, - NULL); - else + /* + * Put the FDW old tuple to the slot. Otherwise, caller is expected + * to have old tuple alredy fetched to the slot. + */ + if (fdw_trigtuple != NULL) ExecForceStoreHeapTuple(fdw_trigtuple, slot, false); AfterTriggerSaveEvent(estate, relinfo, NULL, NULL, @@ -3113,18 +3104,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, * Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source * and destination partitions, respectively, of a cross-partition update of * the root partitioned table mentioned in the query, given by 'relinfo'. - * 'tupleid' in that case refers to the ctid of the "old" tuple in the source - * partition, and 'newslot' contains the "new" tuple in the destination - * partition. This interface allows to support the requirements of - * ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in - * that case. + * 'oldslot' contains the "old" tuple in the source partition, and 'newslot' + * contains the "new" tuple in the destination partition. This interface + * allows to support the requirements of ExecCrossPartitionUpdateForeignKey(); + * is_crosspart_update must be true in that case. */ void ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, ResultRelInfo *src_partinfo, ResultRelInfo *dst_partinfo, - ItemPointer tupleid, HeapTuple fdw_trigtuple, + TupleTableSlot *oldslot, TupleTableSlot *newslot, List *recheckIndexes, TransitionCaptureState *transition_capture, @@ -3153,29 +3143,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, * separately for DELETE and INSERT to capture transition table rows. * In such case, either old tuple or new tuple can be NULL. */ - TupleTableSlot *oldslot; - ResultRelInfo *tupsrc; - Assert((src_partinfo != NULL && dst_partinfo != NULL) || !is_crosspart_update); - tupsrc = src_partinfo ? src_partinfo : relinfo; - oldslot = ExecGetTriggerOldSlot(estate, tupsrc); - - if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid)) - GetTupleForTrigger(estate, - NULL, - tupsrc, - tupleid, - LockTupleExclusive, - oldslot, - NULL, - NULL, - NULL); - else if (fdw_trigtuple != NULL) + if (fdw_trigtuple != NULL) + { + Assert(oldslot); ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false); - else - ExecClearTuple(oldslot); + } AfterTriggerSaveEvent(estate, relinfo, src_partinfo, dst_partinfo, diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 61ddbe548f3..03c705bf987 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -540,6 +540,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, { List *recheckIndexes = NIL; TU_UpdateIndexes update_indexes; + TupleTableSlot *oldSlot = NULL; /* Compute stored generated columns */ if (rel->rd_att->constr && @@ -553,8 +554,12 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, if (rel->rd_rel->relispartition) ExecPartitionCheck(resultRelInfo, slot, estate, true); + if (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_update_after_row) + oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo); + simple_table_tuple_update(rel, tid, slot, estate->es_snapshot, - &update_indexes); + &update_indexes, oldSlot); if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None)) recheckIndexes = ExecInsertIndexTuples(resultRelInfo, @@ -565,7 +570,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, /* AFTER ROW UPDATE Triggers */ ExecARUpdateTriggers(estate, resultRelInfo, NULL, NULL, - tid, NULL, slot, + NULL, oldSlot, slot, recheckIndexes, NULL, false); list_free(recheckIndexes); @@ -599,12 +604,18 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo, if (!skip_tuple) { + TupleTableSlot *oldSlot = NULL; + + if (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_delete_after_row) + oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo); + /* OK, delete the tuple */ - simple_table_tuple_delete(rel, tid, estate->es_snapshot); + simple_table_tuple_delete(rel, tid, estate->es_snapshot, oldSlot); /* AFTER ROW DELETE Triggers */ ExecARDeleteTriggers(estate, resultRelInfo, - tid, NULL, NULL, false); + NULL, oldSlot, NULL, false); } } diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 6597356c018..67793aa2291 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -142,7 +142,7 @@ static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, ResultRelInfo *sourcePartInfo, ResultRelInfo *destPartInfo, ItemPointer tupleid, - TupleTableSlot *oldslot, + TupleTableSlot *oldSlot, TupleTableSlot *newslot); static bool ExecOnConflictUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, @@ -599,6 +599,10 @@ ExecInitInsertProjection(ModifyTableState *mtstate, resultRelInfo->ri_newTupleSlot = table_slot_create(resultRelInfo->ri_RelationDesc, &estate->es_tupleTable); + if (node->onConflictAction == ONCONFLICT_UPDATE) + resultRelInfo->ri_oldTupleSlot = + table_slot_create(resultRelInfo->ri_RelationDesc, + &estate->es_tupleTable); /* Build ProjectionInfo if needed (it probably isn't). */ if (need_projection) @@ -1188,7 +1192,7 @@ ExecInsert(ModifyTableContext *context, ExecARUpdateTriggers(estate, resultRelInfo, NULL, NULL, NULL, - NULL, + resultRelInfo->ri_oldTupleSlot, slot, NULL, mtstate->mt_transition_capture, @@ -1368,7 +1372,8 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static TM_Result ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, bool changingPart) + ItemPointer tupleid, bool changingPart, int options, + TupleTableSlot *oldSlot) { EState *estate = context->estate; @@ -1376,9 +1381,10 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, estate->es_output_cid, estate->es_snapshot, estate->es_crosscheck_snapshot, - true /* wait for commit */ , + options /* wait for commit */ , &context->tmfd, - changingPart); + changingPart, + oldSlot); } /* @@ -1390,7 +1396,8 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static void ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, bool changingPart) + ItemPointer tupleid, HeapTuple oldtuple, + TupleTableSlot *slot, bool changingPart) { ModifyTableState *mtstate = context->mtstate; EState *estate = context->estate; @@ -1408,8 +1415,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, { ExecARUpdateTriggers(estate, resultRelInfo, NULL, NULL, - tupleid, oldtuple, - NULL, NULL, mtstate->mt_transition_capture, + oldtuple, + slot, NULL, NULL, mtstate->mt_transition_capture, false); /* @@ -1420,10 +1427,30 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, } /* AFTER ROW DELETE Triggers */ - ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple, + ExecARDeleteTriggers(estate, resultRelInfo, oldtuple, slot, ar_delete_trig_tcs, changingPart); } +/* + * Initializes the tuple slot in a ResultRelInfo for DELETE action. + * + * We mark 'projectNewInfoValid' even though the projections themselves + * are not initialized here. + */ +static void +ExecInitDeleteTupleSlot(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo) +{ + EState *estate = mtstate->ps.state; + + Assert(!resultRelInfo->ri_projectNewInfoValid); + + resultRelInfo->ri_oldTupleSlot = + table_slot_create(resultRelInfo->ri_RelationDesc, + &estate->es_tupleTable); + resultRelInfo->ri_projectNewInfoValid = true; +} + /* ---------------------------------------------------------------- * ExecDelete * @@ -1451,6 +1478,7 @@ ExecDelete(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ItemPointer tupleid, HeapTuple oldtuple, + TupleTableSlot *oldSlot, bool processReturning, bool changingPart, bool canSetTag, @@ -1514,6 +1542,11 @@ ExecDelete(ModifyTableContext *context, } else { + int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE; + + if (!IsolationUsesXactSnapshot()) + options |= TABLE_MODIFY_LOCK_UPDATED; + /* * delete the tuple * @@ -1524,7 +1557,8 @@ ExecDelete(ModifyTableContext *context, * transaction-snapshot mode transactions. */ ldelete: - result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart); + result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart, + options, oldSlot); if (tmresult) *tmresult = result; @@ -1571,7 +1605,6 @@ ExecDelete(ModifyTableContext *context, case TM_Updated: { - TupleTableSlot *inputslot; TupleTableSlot *epqslot; if (IsolationUsesXactSnapshot()) @@ -1580,87 +1613,29 @@ ExecDelete(ModifyTableContext *context, errmsg("could not serialize access due to concurrent update"))); /* - * Already know that we're going to need to do EPQ, so - * fetch tuple directly into the right slot. + * We need to do EPQ. The latest tuple is already found + * and locked as a result of TABLE_MODIFY_LOCK_UPDATED. */ - EvalPlanQualBegin(context->epqstate); - inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc, - resultRelInfo->ri_RangeTableIndex); - - result = table_tuple_lock(resultRelationDesc, tupleid, - estate->es_snapshot, - inputslot, estate->es_output_cid, - LockTupleExclusive, LockWaitBlock, - TUPLE_LOCK_FLAG_FIND_LAST_VERSION, - &context->tmfd); + Assert(context->tmfd.traversed); + epqslot = EvalPlanQual(context->epqstate, + resultRelationDesc, + resultRelInfo->ri_RangeTableIndex, + oldSlot); + if (TupIsNull(epqslot)) + /* Tuple not passing quals anymore, exiting... */ + return NULL; - switch (result) + /* + * If requested, skip delete and pass back the updated + * row. + */ + if (epqreturnslot) { - case TM_Ok: - Assert(context->tmfd.traversed); - epqslot = EvalPlanQual(context->epqstate, - resultRelationDesc, - resultRelInfo->ri_RangeTableIndex, - inputslot); - if (TupIsNull(epqslot)) - /* Tuple not passing quals anymore, exiting... */ - return NULL; - - /* - * If requested, skip delete and pass back the - * updated row. - */ - if (epqreturnslot) - { - *epqreturnslot = epqslot; - return NULL; - } - else - goto ldelete; - - case TM_SelfModified: - - /* - * This can be reached when following an update - * chain from a tuple updated by another session, - * reaching a tuple that was already updated in - * this transaction. If previously updated by this - * command, ignore the delete, otherwise error - * out. - * - * See also TM_SelfModified response to - * table_tuple_delete() above. - */ - if (context->tmfd.cmax != estate->es_output_cid) - ereport(ERROR, - (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION), - errmsg("tuple to be deleted was already modified by an operation triggered by the current command"), - errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); - return NULL; - - case TM_Deleted: - /* tuple already deleted; nothing to do */ - return NULL; - - default: - - /* - * TM_Invisible should be impossible because we're - * waiting for updated row versions, and would - * already have errored out if the first version - * is invisible. - * - * TM_Updated should be impossible, because we're - * locking the latest version via - * TUPLE_LOCK_FLAG_FIND_LAST_VERSION. - */ - elog(ERROR, "unexpected table_tuple_lock status: %u", - result); - return NULL; + *epqreturnslot = epqslot; + return NULL; } - - Assert(false); - break; + else + goto ldelete; } case TM_Deleted: @@ -1694,7 +1669,8 @@ ExecDelete(ModifyTableContext *context, if (tupleDeleted) *tupleDeleted = true; - ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, changingPart); + ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, + oldSlot, changingPart); /* Process RETURNING if present and if requested */ if (processReturning && resultRelInfo->ri_projectReturning) @@ -1712,17 +1688,13 @@ ExecDelete(ModifyTableContext *context, } else { + /* Copy old tuple to the returning slot */ slot = ExecGetReturningSlot(estate, resultRelInfo); if (oldtuple != NULL) - { ExecForceStoreHeapTuple(oldtuple, slot, false); - } else - { - if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid, - SnapshotAny, slot)) - elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING"); - } + ExecCopySlot(slot, oldSlot); + Assert(!TupIsNull(slot)); } rslot = ExecProcessReturning(resultRelInfo, slot, context->planSlot); @@ -1822,12 +1794,16 @@ ExecCrossPartitionUpdate(ModifyTableContext *context, MemoryContextSwitchTo(oldcxt); } + /* Make sure ri_oldTupleSlot is initialized. */ + if (unlikely(!resultRelInfo->ri_projectNewInfoValid)) + ExecInitUpdateProjection(mtstate, resultRelInfo); + /* * Row movement, part 1. Delete the tuple, but skip RETURNING processing. * We want to return rows from INSERT. */ ExecDelete(context, resultRelInfo, - tupleid, oldtuple, + tupleid, oldtuple, resultRelInfo->ri_oldTupleSlot, false, /* processReturning */ true, /* changingPart */ false, /* canSetTag */ @@ -1868,21 +1844,13 @@ ExecCrossPartitionUpdate(ModifyTableContext *context, return true; else { - /* Fetch the most recent version of old tuple. */ - TupleTableSlot *oldSlot; - - /* ... but first, make sure ri_oldTupleSlot is initialized. */ - if (unlikely(!resultRelInfo->ri_projectNewInfoValid)) - ExecInitUpdateProjection(mtstate, resultRelInfo); - oldSlot = resultRelInfo->ri_oldTupleSlot; - if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc, - tupleid, - SnapshotAny, - oldSlot)) - elog(ERROR, "failed to fetch tuple being updated"); - /* and project the new tuple to retry the UPDATE with */ + /* + * ExecDelete already fetches the most recent version of old tuple + * to resultRelInfo->ri_RelationDesc. So, just project the new + * tuple to retry the UPDATE with. + */ *retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot, - oldSlot); + resultRelInfo->ri_oldTupleSlot); return false; } } @@ -2001,7 +1969,8 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo, static TM_Result ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, - bool canSetTag, UpdateContext *updateCxt) + bool canSetTag, int options, TupleTableSlot *oldSlot, + UpdateContext *updateCxt) { EState *estate = context->estate; Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; @@ -2094,7 +2063,8 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ExecCrossPartitionUpdateForeignKey(context, resultRelInfo, insert_destrel, - tupleid, slot, + tupleid, + resultRelInfo->ri_oldTupleSlot, inserted_tuple); return TM_Ok; @@ -2137,9 +2107,10 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, estate->es_output_cid, estate->es_snapshot, estate->es_crosscheck_snapshot, - true /* wait for commit */ , + options /* wait for commit */ , &context->tmfd, &updateCxt->lockmode, - &updateCxt->updateIndexes); + &updateCxt->updateIndexes, + oldSlot); if (result == TM_Ok) updateCxt->updated = true; @@ -2155,7 +2126,8 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, static void ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt, ResultRelInfo *resultRelInfo, ItemPointer tupleid, - HeapTuple oldtuple, TupleTableSlot *slot) + HeapTuple oldtuple, TupleTableSlot *slot, + TupleTableSlot *oldSlot) { ModifyTableState *mtstate = context->mtstate; List *recheckIndexes = NIL; @@ -2171,7 +2143,7 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt, /* AFTER ROW UPDATE Triggers */ ExecARUpdateTriggers(context->estate, resultRelInfo, NULL, NULL, - tupleid, oldtuple, slot, + oldtuple, oldSlot, slot, recheckIndexes, mtstate->operation == CMD_INSERT ? mtstate->mt_oc_transition_capture : @@ -2260,7 +2232,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, /* Perform the root table's triggers. */ ExecARUpdateTriggers(context->estate, rootRelInfo, sourcePartInfo, destPartInfo, - tupleid, NULL, newslot, NIL, NULL, true); + NULL, oldslot, newslot, NIL, NULL, true); } /* ---------------------------------------------------------------- @@ -2282,6 +2254,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, * NULL when the foreign table has no relevant triggers. * * slot contains the new tuple value to be stored. + * oldSlot is the slot to store the old tuple. * planSlot is the output of the ModifyTable's subplan; we use it * to access values from other input tables (for RETURNING), * row-ID junk columns, etc. @@ -2294,7 +2267,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, static TupleTableSlot * ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, - bool canSetTag) + TupleTableSlot *oldSlot, bool canSetTag, bool locked) { EState *estate = context->estate; Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; @@ -2349,6 +2322,11 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, { ItemPointerData lockedtid; + int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE; + + if (!locked && !IsolationUsesXactSnapshot()) + options |= TABLE_MODIFY_LOCK_UPDATED; + /* * If we generate a new candidate tuple after EvalPlanQual testing, we * must loop back here to try again. (We don't need to redo triggers, @@ -2359,7 +2337,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, redo_act: lockedtid = *tupleid; result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot, - canSetTag, &updateCxt); + canSetTag, options, oldSlot, &updateCxt); /* * If ExecUpdateAct reports that a cross-partition update was done, @@ -2410,96 +2388,39 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, case TM_Updated: { - TupleTableSlot *inputslot; TupleTableSlot *epqslot; - TupleTableSlot *oldSlot; if (IsolationUsesXactSnapshot()) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + Assert(!locked); /* - * Already know that we're going to need to do EPQ, so - * fetch tuple directly into the right slot. + * We need to do EPQ. The latest tuple is already found + * and locked as a result of TABLE_MODIFY_LOCK_UPDATED. */ - inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc, - resultRelInfo->ri_RangeTableIndex); + Assert(context->tmfd.traversed); + epqslot = EvalPlanQual(context->epqstate, + resultRelationDesc, + resultRelInfo->ri_RangeTableIndex, + oldSlot); + if (TupIsNull(epqslot)) + /* Tuple not passing quals anymore, exiting... */ + return NULL; - result = table_tuple_lock(resultRelationDesc, tupleid, - estate->es_snapshot, - inputslot, estate->es_output_cid, - updateCxt.lockmode, LockWaitBlock, - TUPLE_LOCK_FLAG_FIND_LAST_VERSION, - &context->tmfd); - - switch (result) + if (resultRelInfo->ri_needLockTagTuple) { - case TM_Ok: - Assert(context->tmfd.traversed); - - epqslot = EvalPlanQual(context->epqstate, - resultRelationDesc, - resultRelInfo->ri_RangeTableIndex, - inputslot); - if (TupIsNull(epqslot)) - /* Tuple not passing quals anymore, exiting... */ - return NULL; - - /* Make sure ri_oldTupleSlot is initialized. */ - if (unlikely(!resultRelInfo->ri_projectNewInfoValid)) - ExecInitUpdateProjection(context->mtstate, - resultRelInfo); - - if (resultRelInfo->ri_needLockTagTuple) - { - UnlockTuple(resultRelationDesc, - &lockedtid, InplaceUpdateTupleLock); - LockTuple(resultRelationDesc, - tupleid, InplaceUpdateTupleLock); - } - - /* Fetch the most recent version of old tuple. */ - oldSlot = resultRelInfo->ri_oldTupleSlot; - if (!table_tuple_fetch_row_version(resultRelationDesc, - tupleid, - SnapshotAny, - oldSlot)) - elog(ERROR, "failed to fetch tuple being updated"); - slot = ExecGetUpdateNewTuple(resultRelInfo, - epqslot, oldSlot); - goto redo_act; - - case TM_Deleted: - /* tuple already deleted; nothing to do */ - return NULL; - - case TM_SelfModified: - - /* - * This can be reached when following an update - * chain from a tuple updated by another session, - * reaching a tuple that was already updated in - * this transaction. If previously modified by - * this command, ignore the redundant update, - * otherwise error out. - * - * See also TM_SelfModified response to - * table_tuple_update() above. - */ - if (context->tmfd.cmax != estate->es_output_cid) - ereport(ERROR, - (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION), - errmsg("tuple to be updated was already modified by an operation triggered by the current command"), - errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); - return NULL; - - default: - /* see table_tuple_lock call in ExecDelete() */ - elog(ERROR, "unexpected table_tuple_lock status: %u", - result); - return NULL; + UnlockTuple(resultRelationDesc, + &lockedtid, InplaceUpdateTupleLock); + LockTuple(resultRelationDesc, + tupleid, InplaceUpdateTupleLock); } + + slot = ExecGetUpdateNewTuple(resultRelInfo, + epqslot, + oldSlot); + goto redo_act; } break; @@ -2523,7 +2444,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, (estate->es_processed)++; ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple, - slot); + slot, oldSlot); /* Process RETURNING if present */ if (resultRelInfo->ri_projectReturning) @@ -2749,7 +2670,8 @@ ExecOnConflictUpdate(ModifyTableContext *context, *returning = ExecUpdate(context, resultRelInfo, conflictTid, NULL, resultRelInfo->ri_onConflict->oc_ProjSlot, - canSetTag); + existing, + canSetTag, true); /* * Clear out existing tuple, as there might not be another conflict among @@ -2969,7 +2891,8 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, break; /* concurrent update/delete */ } result = ExecUpdateAct(context, resultRelInfo, tupleid, NULL, - newslot, canSetTag, &updateCxt); + newslot, canSetTag, TABLE_MODIFY_WAIT, + NULL, &updateCxt); /* * As in ExecUpdate(), if ExecUpdateAct() reports that a @@ -2987,7 +2910,8 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, if (result == TM_Ok && updateCxt.updated) { ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, - tupleid, NULL, newslot); + tupleid, NULL, newslot, + resultRelInfo->ri_oldTupleSlot); mtstate->mt_merge_updated += 1; } break; @@ -3001,11 +2925,12 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, goto out; /* "do nothing" */ break; /* concurrent update/delete */ } - result = ExecDeleteAct(context, resultRelInfo, tupleid, false); + result = ExecDeleteAct(context, resultRelInfo, tupleid, + false, TABLE_MODIFY_WAIT, NULL); if (result == TM_Ok) { ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL, - false); + resultRelInfo->ri_oldTupleSlot, false); mtstate->mt_merge_deleted += 1; } break; @@ -4050,15 +3975,21 @@ ExecModifyTable(PlanState *pstate) /* Now apply the update. */ slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple, - slot, node->canSetTag); + slot, resultRelInfo->ri_oldTupleSlot, + node->canSetTag, false); if (tuplock) UnlockTuple(resultRelInfo->ri_RelationDesc, tupleid, InplaceUpdateTupleLock); break; case CMD_DELETE: + /* Initialize slot for DELETE to fetch the old tuple */ + if (unlikely(!resultRelInfo->ri_projectNewInfoValid)) + ExecInitDeleteTupleSlot(node, resultRelInfo); + slot = ExecDelete(&context, resultRelInfo, tupleid, oldtuple, - true, false, node->canSetTag, NULL, NULL, NULL); + resultRelInfo->ri_oldTupleSlot, true, false, + node->canSetTag, NULL, NULL, NULL); break; case CMD_MERGE: diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index edf8ec69b14..864f3b3f8d9 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -242,19 +242,22 @@ extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate); extern TM_Result heap_delete(Relation relation, ItemPointer tid, - CommandId cid, Snapshot crosscheck, bool wait, - struct TM_FailureData *tmfd, bool changingPart); + CommandId cid, Snapshot crosscheck, int options, + struct TM_FailureData *tmfd, bool changingPart, + TupleTableSlot *oldSlot); extern void heap_finish_speculative(Relation relation, ItemPointer tid); extern void heap_abort_speculative(Relation relation, ItemPointer tid); extern TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, - CommandId cid, Snapshot crosscheck, bool wait, + CommandId cid, Snapshot crosscheck, int options, struct TM_FailureData *tmfd, LockTupleMode *lockmode, - TU_UpdateIndexes *update_indexes); -extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, - CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, - bool follow_updates, - Buffer *buffer, struct TM_FailureData *tmfd); + TU_UpdateIndexes *update_indexes, + TupleTableSlot *oldSlot); +extern TM_Result heap_lock_tuple(Relation relation, ItemPointer tid, + TupleTableSlot *slot, + CommandId cid, LockTupleMode mode, + LockWaitPolicy wait_policy, bool follow_updates, + struct TM_FailureData *tmfd); extern bool heap_inplace_lock(Relation relation, HeapTuple oldtup_ptr, Buffer buffer, diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 1a399e24460..a4f3fca3dc3 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -265,6 +265,11 @@ typedef struct TM_IndexDeleteOp /* Follow update chain and lock latest version of tuple */ #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1) +/* "options" flag bits for table_tuple_update and table_tuple_delete */ +#define TABLE_MODIFY_WAIT 0x0001 +#define TABLE_MODIFY_FETCH_OLD_TUPLE 0x0002 +#define TABLE_MODIFY_LOCK_UPDATED 0x0004 + /* Typedef for callback function for table_index_build_scan */ typedef void (*IndexBuildCallback) (Relation index, @@ -534,9 +539,10 @@ typedef struct TableAmRoutine CommandId cid, Snapshot snapshot, Snapshot crosscheck, - bool wait, + int options, TM_FailureData *tmfd, - bool changingPart); + bool changingPart, + TupleTableSlot *oldSlot); /* see table_tuple_update() for reference about parameters */ TM_Result (*tuple_update) (Relation rel, @@ -545,10 +551,11 @@ typedef struct TableAmRoutine CommandId cid, Snapshot snapshot, Snapshot crosscheck, - bool wait, + int options, TM_FailureData *tmfd, LockTupleMode *lockmode, - TU_UpdateIndexes *update_indexes); + TU_UpdateIndexes *update_indexes, + TupleTableSlot *oldSlot); /* see table_tuple_lock() for reference about parameters */ TM_Result (*tuple_lock) (Relation rel, @@ -1463,7 +1470,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, } /* - * Delete a tuple. + * Delete a tuple (and optionally lock the last tuple version). * * NB: do not call this directly unless prepared to deal with * concurrent-update conditions. Use simple_table_tuple_delete instead. @@ -1474,11 +1481,21 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, * cid - delete command ID (used for visibility test, and stored into * cmax if successful) * crosscheck - if not InvalidSnapshot, also check tuple against this - * wait - true if should wait for any conflicting update to commit/abort + * options: + * If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort. + * If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is + * fetched into oldSlot when the update is successful. + * If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is + * concurrently updated, then the last tuple version is locked and fetched + * into oldSlot. + * * Output parameters: * tmfd - filled in failure cases (see below) * changingPart - true iff the tuple is being moved to another partition * table due to an update of the partition key. Otherwise, false. + * oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of + * TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options + * is specified. * * Normal, successful return value is TM_Ok, which means we did actually * delete it. Failure return codes are TM_SelfModified, TM_Updated, and @@ -1490,16 +1507,18 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, */ static inline TM_Result table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, - Snapshot snapshot, Snapshot crosscheck, bool wait, - TM_FailureData *tmfd, bool changingPart) + Snapshot snapshot, Snapshot crosscheck, int options, + TM_FailureData *tmfd, bool changingPart, + TupleTableSlot *oldSlot) { return rel->rd_tableam->tuple_delete(rel, tid, cid, snapshot, crosscheck, - wait, tmfd, changingPart); + options, tmfd, changingPart, + oldSlot); } /* - * Update a tuple. + * Update a tuple (and optionally lock the last tuple version). * * NB: do not call this directly unless you are prepared to deal with * concurrent-update conditions. Use simple_table_tuple_update instead. @@ -1511,13 +1530,23 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, * cid - update command ID (used for visibility test, and stored into * cmax/cmin if successful) * crosscheck - if not InvalidSnapshot, also check old tuple against this - * wait - true if should wait for any conflicting update to commit/abort + * options: + * If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort. + * If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is + * fetched into oldSlot when the update is successful. + * If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is + * concurrently updated, then the last tuple version is locked and fetched + * into oldSlot. + * * Output parameters: * tmfd - filled in failure cases (see below) * lockmode - filled with lock mode acquired on tuple * update_indexes - in success cases this is set to true if new index entries * are required for this tuple - * + * oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of + * TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options + * is specified. + * Normal, successful return value is TM_Ok, which means we did actually * update it. Failure return codes are TM_SelfModified, TM_Updated, and * TM_BeingModified (the last only possible if wait == false). @@ -1535,13 +1564,15 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, static inline TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, - bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, - TU_UpdateIndexes *update_indexes) + int options, TM_FailureData *tmfd, LockTupleMode *lockmode, + TU_UpdateIndexes *update_indexes, + TupleTableSlot *oldSlot) { return rel->rd_tableam->tuple_update(rel, otid, slot, cid, snapshot, crosscheck, - wait, tmfd, - lockmode, update_indexes); + options, tmfd, + lockmode, update_indexes, + oldSlot); } /* @@ -2059,10 +2090,12 @@ table_scan_sample_next_tuple(TableScanDesc scan, extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot); extern void simple_table_tuple_delete(Relation rel, ItemPointer tid, - Snapshot snapshot); + Snapshot snapshot, + TupleTableSlot *oldSlot); extern void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, - TU_UpdateIndexes *update_indexes); + TU_UpdateIndexes *update_indexes, + TupleTableSlot *oldSlot); /* ---------------------------------------------------------------------------- diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index 430e3ca7ddf..4903b4b7bc2 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -216,8 +216,8 @@ extern bool ExecBRDeleteTriggers(EState *estate, TM_FailureData *tmfd); extern void ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo, - ItemPointer tupleid, HeapTuple fdw_trigtuple, + TupleTableSlot *slot, TransitionCaptureState *transition_capture, bool is_crosspart_update); extern bool ExecIRDeleteTriggers(EState *estate, @@ -240,8 +240,8 @@ extern void ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, ResultRelInfo *src_partinfo, ResultRelInfo *dst_partinfo, - ItemPointer tupleid, HeapTuple fdw_trigtuple, + TupleTableSlot *oldslot, TupleTableSlot *newslot, List *recheckIndexes, TransitionCaptureState *transition_capture, From 4eb44779aefbda9fc5b77b6921446de140f036f0 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 22 Mar 2023 16:47:09 -0700 Subject: [PATCH 006/102] Add EvalPlanQual delete returning isolation test Author: Andres Freund Reviewed-by: Pavel Borisov Discussion: https://www.postgresql.org/message-id/flat/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com --- .../isolation/expected/eval-plan-qual-2.out | 37 +++++++++++++++++++ src/test/isolation/isolation_schedule | 1 + .../isolation/specs/eval-plan-qual-2.spec | 30 +++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 src/test/isolation/expected/eval-plan-qual-2.out create mode 100644 src/test/isolation/specs/eval-plan-qual-2.spec diff --git a/src/test/isolation/expected/eval-plan-qual-2.out b/src/test/isolation/expected/eval-plan-qual-2.out new file mode 100644 index 00000000000..117a3d3be8d --- /dev/null +++ b/src/test/isolation/expected/eval-plan-qual-2.out @@ -0,0 +1,37 @@ +Parsed test spec with 3 sessions + +starting permutation: read_u wx2 wb1 c2 c1 read_u read +step read_u: SELECT * FROM accounts; +accountid|balance|balance2 +---------+-------+-------- +checking | 600| 1200 +savings | 600| 1200 +(2 rows) + +step wx2: UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance; +balance +------- + 1050 +(1 row) + +step wb1: DELETE FROM accounts WHERE balance = 600 RETURNING *; +step c2: COMMIT; +step wb1: <... completed> +accountid|balance|balance2 +---------+-------+-------- +savings | 600| 1200 +(1 row) + +step c1: COMMIT; +step read_u: SELECT * FROM accounts; +accountid|balance|balance2 +---------+-------+-------- +checking | 1050| 2100 +(1 row) + +step read: SELECT * FROM accounts ORDER BY accountid; +accountid|balance|balance2 +---------+-------+-------- +checking | 1050| 2100 +(1 row) + diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 9b0bb8a29b3..124c170746c 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -36,6 +36,7 @@ test: fk-partitioned-2 test: fk-snapshot test: subxid-overflow test: eval-plan-qual +test: eval-plan-qual-2 test: eval-plan-qual-trigger test: inplace-inval test: intra-grant-inplace diff --git a/src/test/isolation/specs/eval-plan-qual-2.spec b/src/test/isolation/specs/eval-plan-qual-2.spec new file mode 100644 index 00000000000..30447bef24a --- /dev/null +++ b/src/test/isolation/specs/eval-plan-qual-2.spec @@ -0,0 +1,30 @@ +setup +{ + CREATE TABLE accounts (accountid text PRIMARY KEY, balance numeric not null, + balance2 numeric GENERATED ALWAYS AS (balance * 2) STORED); + INSERT INTO accounts VALUES ('checking', 600), ('savings', 600); +} + +teardown +{ + DROP TABLE accounts; +} + +session s1 +setup { BEGIN ISOLATION LEVEL READ COMMITTED; } +step wb1 { DELETE FROM accounts WHERE balance = 600 RETURNING *; } +step c1 { COMMIT; } + +session s2 +setup { BEGIN ISOLATION LEVEL READ COMMITTED; } +step wx2 { UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance; } +step c2 { COMMIT; } + +session s3 +setup { BEGIN ISOLATION LEVEL READ COMMITTED; } +step read { SELECT * FROM accounts ORDER BY accountid; } +step read_u { SELECT * FROM accounts; } + +teardown { COMMIT; } + +permutation read_u wx2 wb1 c2 c1 read_u read From 7fdb30031cc5e9725ac5423a4b06985c485872e8 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 00:04:21 +0300 Subject: [PATCH 007/102] Improvements to TableAM API --- src/backend/access/common/detoast.c | 20 +- src/backend/access/common/heaptuple.c | 4 + src/backend/access/common/reloptions.c | 6 +- src/backend/access/heap/heapam_handler.c | 343 +++++++++++++++++- src/backend/access/table/tableam.c | 4 +- src/backend/access/table/tableamapi.c | 23 +- src/backend/catalog/aclchk.c | 2 +- src/backend/commands/analyze.c | 14 +- src/backend/commands/tablecmds.c | 53 +-- src/backend/commands/trigger.c | 237 ++++++++++--- src/backend/executor/execExprInterp.c | 4 +- src/backend/executor/execMain.c | 28 +- src/backend/executor/execReplication.c | 10 +- src/backend/executor/nodeLockRows.c | 17 +- src/backend/executor/nodeModifyTable.c | 433 +++++++---------------- src/backend/executor/nodeTidscan.c | 2 +- src/backend/nodes/read.c | 11 + src/backend/optimizer/plan/planner.c | 16 +- src/backend/optimizer/prep/preptlist.c | 20 +- src/backend/optimizer/util/appendinfo.c | 32 +- src/backend/optimizer/util/inherit.c | 48 ++- src/backend/parser/parse_relation.c | 13 + src/backend/postmaster/autovacuum.c | 4 +- src/backend/rewrite/rewriteHandler.c | 1 + src/backend/utils/adt/ri_triggers.c | 11 +- src/backend/utils/cache/relcache.c | 38 +- src/backend/utils/sort/tuplestore.c | 30 ++ src/include/access/reloptions.h | 2 + src/include/access/sysattr.h | 3 +- src/include/access/tableam.h | 185 +++++++--- src/include/commands/trigger.h | 4 +- src/include/commands/vacuum.h | 3 + src/include/foreign/fdwapi.h | 6 +- src/include/nodes/execnodes.h | 3 + src/include/nodes/parsenodes.h | 1 + src/include/nodes/plannodes.h | 4 +- src/include/nodes/primnodes.h | 7 + src/include/nodes/readfuncs.h | 1 + src/include/optimizer/appendinfo.h | 5 + src/include/optimizer/planner.h | 3 +- src/include/utils/tuplestore.h | 3 + src/include/varatt.h | 2 + 42 files changed, 1120 insertions(+), 536 deletions(-) diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index 8af80c80865..f54dcb03517 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -28,6 +28,8 @@ static struct varlena *toast_fetch_datum_slice(struct varlena *attr, int32 slicelength); static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength); +static ToastFunc o_detoast_func = NULL; + /* ---------- * detoast_external_attr - * @@ -222,7 +224,14 @@ detoast_attr_slice(struct varlena *attr, else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit)) slicelength = slicelimit = -1; - if (VARATT_IS_EXTERNAL_ONDISK(attr)) + if (VARATT_IS_EXTERNAL_ORIOLEDB(attr)) + { + Assert(o_detoast_func != NULL); + preslice = o_detoast_func(attr); + if (preslice == NULL) + elog(ERROR, "unexpected NULL detoast result"); + } + else if (VARATT_IS_EXTERNAL_ONDISK(attr)) { struct varatt_external toast_pointer; @@ -331,8 +340,6 @@ detoast_attr_slice(struct varlena *attr, return result; } -static ToastFunc o_detoast_func = NULL; - void register_o_detoast_func(ToastFunc func) { @@ -633,7 +640,12 @@ toast_datum_size(Datum value) struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; - if (VARATT_IS_EXTERNAL_ONDISK(attr)) + if (VARATT_IS_EXTERNAL_ORIOLEDB(attr)) + { + OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr); + result = toasted->toasted_size - VARHDRSZ; + } + else if (VARATT_IS_EXTERNAL_ONDISK(attr)) { /* * Attribute is stored externally - return the extsize whether diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 6bedbdf07ff..75d9c272177 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -756,6 +756,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) case TableOidAttributeNumber: result = ObjectIdGetDatum(tup->t_tableOid); break; + case RowIdAttributeNumber: + *isnull = true; + result = 0; + break; default: elog(ERROR, "invalid attnum: %d", attnum); result = 0; /* keep compiler quiet */ diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 9648a769580..855b1fd7f27 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -24,6 +24,7 @@ #include "access/nbtree.h" #include "access/reloptions.h" #include "access/spgist_private.h" +#include "access/tableam.h" #include "catalog/pg_type.h" #include "commands/defrem.h" #include "commands/tablespace.h" @@ -1388,7 +1389,7 @@ untransformRelOptions(Datum options) */ bytea * extractRelOptions(HeapTuple tuple, TupleDesc tupdesc, - amoptions_function amoptions) + const TableAmRoutine *tableam, amoptions_function amoptions) { bytea *options; bool isnull; @@ -1410,7 +1411,8 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc, case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_MATVIEW: - options = heap_reloptions(classForm->relkind, datum, false); + options = tableam_reloptions(tableam, classForm->relkind, + datum, false); break; case RELKIND_PARTITIONED_TABLE: options = partitioned_table_reloptions(datum, false); diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 6de8868a91c..ea6759e8a7f 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -23,6 +23,7 @@ #include "access/heapam.h" #include "access/heaptoast.h" #include "access/multixact.h" +#include "access/reloptions.h" #include "access/rewriteheap.h" #include "access/syncscan.h" #include "access/tableam.h" @@ -45,7 +46,7 @@ #include "utils/builtins.h" #include "utils/rel.h" -static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, +static TM_Result heapam_tuple_lock(Relation relation, Datum tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, @@ -75,6 +76,20 @@ heapam_slot_callbacks(Relation relation) return &TTSOpsBufferHeapTuple; } +static RowRefType +heapam_get_row_ref_type(Relation rel) +{ + return ROW_REF_TID; +} + +static void +heapam_free_rd_amcache(Relation rel) +{ + if (rel->rd_amcache) + pfree(rel->rd_amcache); + rel->rd_amcache = NULL; +} + /* ------------------------------------------------------------------------ * Index Scan Callbacks for heap AM @@ -184,7 +199,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan, static bool heapam_fetch_row_version(Relation relation, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot) { @@ -193,7 +208,7 @@ heapam_fetch_row_version(Relation relation, Assert(TTS_IS_BUFFERTUPLE(slot)); - bslot->base.tupdata.t_self = *tid; + bslot->base.tupdata.t_self = *DatumGetItemPointer(tupleid); if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false)) { /* store in slot, transferring existing pin */ @@ -243,7 +258,7 @@ heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, * ---------------------------------------------------------------------------- */ -static void +static TupleTableSlot * heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) { @@ -260,6 +275,8 @@ heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, if (shouldFree) pfree(tuple); + + return slot; } static void @@ -302,13 +319,285 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, pfree(tuple); } +/* + * ExecCheckTupleVisible -- verify tuple is visible + * + * It would not be consistent with guarantees of the higher isolation levels to + * proceed with avoiding insertion (taking speculative insertion's alternative + * path) on the basis of another tuple that is not visible to MVCC snapshot. + * Check for the need to raise a serialization failure, and do so as necessary. + */ +static void +ExecCheckTupleVisible(EState *estate, + Relation rel, + TupleTableSlot *slot) +{ + if (!IsolationUsesXactSnapshot()) + return; + + if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot)) + { + Datum xminDatum; + TransactionId xmin; + bool isnull; + + xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull); + Assert(!isnull); + xmin = DatumGetTransactionId(xminDatum); + + /* + * We should not raise a serialization failure if the conflict is + * against a tuple inserted by our own transaction, even if it's not + * visible to our snapshot. (This would happen, for example, if + * conflicting keys are proposed for insertion in a single command.) + */ + if (!TransactionIdIsCurrentTransactionId(xmin)) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + } +} + +/* + * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible() + */ +static void +ExecCheckTIDVisible(EState *estate, + Relation rel, + ItemPointer tid, + TupleTableSlot *tempSlot) +{ + /* Redundantly check isolation level */ + if (!IsolationUsesXactSnapshot()) + return; + + if (!table_tuple_fetch_row_version(rel, PointerGetDatum(tid), + SnapshotAny, tempSlot)) + elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT"); + ExecCheckTupleVisible(estate, rel, tempSlot); + ExecClearTuple(tempSlot); +} + +static inline TupleTableSlot * +heapam_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo, + TupleTableSlot *slot, + CommandId cid, int options, + struct BulkInsertStateData *bistate, + List *arbiterIndexes, + EState *estate, + LockTupleMode lockmode, + TupleTableSlot *lockedSlot, + TupleTableSlot *tempSlot) +{ + Relation rel = resultRelInfo->ri_RelationDesc; + uint32 specToken; + ItemPointerData conflictTid; + bool specConflict; + List *recheckIndexes = NIL; + + while (true) + { + specConflict = false; + if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate, &conflictTid, + arbiterIndexes)) + { + if (lockedSlot) + { + TM_Result test; + TM_FailureData tmfd; + Datum xminDatum; + TransactionId xmin; + bool isnull; + + /* Determine lock mode to use */ + lockmode = ExecUpdateLockMode(estate, resultRelInfo); + + /* + * Lock tuple for update. Don't follow updates when tuple cannot be + * locked without doing so. A row locking conflict here means our + * previous conclusion that the tuple is conclusively committed is not + * true anymore. + */ + test = table_tuple_lock(rel, PointerGetDatum(&conflictTid), + estate->es_snapshot, + lockedSlot, estate->es_output_cid, + lockmode, LockWaitBlock, 0, + &tmfd); + switch (test) + { + case TM_Ok: + /* success! */ + break; + + case TM_Invisible: + + /* + * This can occur when a just inserted tuple is updated again in + * the same command. E.g. because multiple rows with the same + * conflicting key values are inserted. + * + * This is somewhat similar to the ExecUpdate() TM_SelfModified + * case. We do not want to proceed because it would lead to the + * same row being updated a second time in some unspecified order, + * and in contrast to plain UPDATEs there's no historical behavior + * to break. + * + * It is the user's responsibility to prevent this situation from + * occurring. These problems are why the SQL standard similarly + * specifies that for SQL MERGE, an exception must be raised in + * the event of an attempt to update the same row twice. + */ + xminDatum = slot_getsysattr(lockedSlot, + MinTransactionIdAttributeNumber, + &isnull); + Assert(!isnull); + xmin = DatumGetTransactionId(xminDatum); + + if (TransactionIdIsCurrentTransactionId(xmin)) + ereport(ERROR, + (errcode(ERRCODE_CARDINALITY_VIOLATION), + /* translator: %s is a SQL command name */ + errmsg("%s command cannot affect row a second time", + "ON CONFLICT DO UPDATE"), + errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values."))); + + /* This shouldn't happen */ + elog(ERROR, "attempted to lock invisible tuple"); + break; + + case TM_SelfModified: + + /* + * This state should never be reached. As a dirty snapshot is used + * to find conflicting tuples, speculative insertion wouldn't have + * seen this row to conflict with. + */ + elog(ERROR, "unexpected self-updated tuple"); + break; + + case TM_Updated: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + + /* + * As long as we don't support an UPDATE of INSERT ON CONFLICT for + * a partitioned table we shouldn't reach to a case where tuple to + * be lock is moved to another partition due to concurrent update + * of the partition key. + */ + Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid)); + + /* + * Tell caller to try again from the very start. + * + * It does not make sense to use the usual EvalPlanQual() style + * loop here, as the new version of the row might not conflict + * anymore, or the conflicting tuple has actually been deleted. + */ + ExecClearTuple(lockedSlot); + return false; + + case TM_Deleted: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent delete"))); + + /* see TM_Updated case */ + Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid)); + ExecClearTuple(lockedSlot); + return false; + + default: + elog(ERROR, "unrecognized table_tuple_lock status: %u", test); + } + + /* Success, the tuple is locked. */ + + /* + * Verify that the tuple is visible to our MVCC snapshot if the current + * isolation level mandates that. + * + * It's not sufficient to rely on the check within ExecUpdate() as e.g. + * CONFLICT ... WHERE clause may prevent us from reaching that. + * + * This means we only ever continue when a new command in the current + * transaction could see the row, even though in READ COMMITTED mode the + * tuple will not be visible according to the current statement's + * snapshot. This is in line with the way UPDATE deals with newer tuple + * versions. + */ + ExecCheckTupleVisible(estate, rel, lockedSlot); + return NULL; + } + else + { + ExecCheckTIDVisible(estate, rel, &conflictTid, tempSlot); + return NULL; + } + } + + /* + * Before we start insertion proper, acquire our "speculative + * insertion lock". Others can use that to wait for us to decide + * if we're going to go ahead with the insertion, instead of + * waiting for the whole transaction to complete. + */ + specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId()); + + /* insert the tuple, with the speculative token */ + heapam_tuple_insert_speculative(rel, slot, + estate->es_output_cid, + 0, + NULL, + specToken); + + /* insert index entries for tuple */ + recheckIndexes = ExecInsertIndexTuples(resultRelInfo, + slot, estate, false, true, + &specConflict, + arbiterIndexes, + false); + + /* adjust the tuple's state accordingly */ + heapam_tuple_complete_speculative(rel, slot, + specToken, !specConflict); + + /* + * Wake up anyone waiting for our decision. They will re-check + * the tuple, see that it's no longer speculative, and wait on our + * XID as if this was a regularly inserted tuple all along. Or if + * we killed the tuple, they will see it's dead, and proceed as if + * the tuple never existed. + */ + SpeculativeInsertionLockRelease(GetCurrentTransactionId()); + + /* + * If there was a conflict, start from the beginning. We'll do + * the pre-check again, which will now find the conflicting tuple + * (unless it aborts before we get there). + */ + if (specConflict) + { + list_free(recheckIndexes); + CHECK_FOR_INTERRUPTS(); + continue; + } + + return slot; + } +} + static TM_Result -heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, +heapam_tuple_delete(Relation relation, Datum tupleid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, bool changingPart, TupleTableSlot *oldSlot) { TM_Result result; + ItemPointer tid = DatumGetItemPointer(tupleid); /* * Currently Deleting of index tuples are handled at vacuum, in case if @@ -331,7 +620,7 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, * heapam_tuple_lock() will take advantage of tuple loaded into * oldSlot by heap_delete(). */ - result = heapam_tuple_lock(relation, tid, snapshot, + result = heapam_tuple_lock(relation, tupleid, snapshot, oldSlot, cid, LockTupleExclusive, (options & TABLE_MODIFY_WAIT) ? LockWaitBlock : @@ -348,7 +637,7 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, static TM_Result -heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, +heapam_tuple_update(Relation relation, Datum tupleid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes, @@ -357,6 +646,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, bool shouldFree = true; HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); TM_Result result; + ItemPointer otid = DatumGetItemPointer(tupleid); /* Update the tuple with table oid */ slot->tts_tableOid = RelationGetRelid(relation); @@ -403,7 +693,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, * heapam_tuple_lock() will take advantage of tuple loaded into * oldSlot by heap_update(). */ - result = heapam_tuple_lock(relation, otid, snapshot, + result = heapam_tuple_lock(relation, tupleid, snapshot, oldSlot, cid, *lockmode, (options & TABLE_MODIFY_WAIT) ? LockWaitBlock : @@ -419,7 +709,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, } static TM_Result -heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, +heapam_tuple_lock(Relation relation, Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd) @@ -427,6 +717,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; TM_Result result; HeapTuple tuple = &bslot->base.tupdata; + ItemPointer tid = DatumGetItemPointer(tupleid); bool follow_updates; follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0; @@ -2594,6 +2885,29 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, } } +static bool +heapam_tuple_is_current(Relation rel, TupleTableSlot *slot) +{ + Datum xminDatum; + TransactionId xmin; + bool isnull; + + xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull); + Assert(!isnull); + xmin = DatumGetTransactionId(xminDatum); + return TransactionIdIsCurrentTransactionId(xmin); +} + +static bytea * +heapam_reloptions(char relkind, Datum reloptions, bool validate) +{ + if (relkind == RELKIND_RELATION || + relkind == RELKIND_TOASTVALUE || + relkind == RELKIND_MATVIEW) + return heap_reloptions(relkind, reloptions, validate); + + return NULL; +} /* ------------------------------------------------------------------------ * Definition of the heap table access method. @@ -2604,6 +2918,8 @@ static const TableAmRoutine heapam_methods = { .type = T_TableAmRoutine, .slot_callbacks = heapam_slot_callbacks, + .get_row_ref_type = heapam_get_row_ref_type, + .free_rd_amcache = heapam_free_rd_amcache, .scan_begin = heap_beginscan, .scan_end = heap_endscan, @@ -2623,8 +2939,7 @@ static const TableAmRoutine heapam_methods = { .index_fetch_tuple = heapam_index_fetch_tuple, .tuple_insert = heapam_tuple_insert, - .tuple_insert_speculative = heapam_tuple_insert_speculative, - .tuple_complete_speculative = heapam_tuple_complete_speculative, + .tuple_insert_with_arbiter = heapam_tuple_insert_with_arbiter, .multi_insert = heap_multi_insert, .tuple_delete = heapam_tuple_delete, .tuple_update = heapam_tuple_update, @@ -2656,7 +2971,11 @@ static const TableAmRoutine heapam_methods = { .scan_bitmap_next_block = heapam_scan_bitmap_next_block, .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple, .scan_sample_next_block = heapam_scan_sample_next_block, - .scan_sample_next_tuple = heapam_scan_sample_next_tuple + .scan_sample_next_tuple = heapam_scan_sample_next_tuple, + + .tuple_is_current = heapam_tuple_is_current, + + .reloptions = heapam_reloptions }; diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 3e88cb82cf0..c8329db8f34 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -308,7 +308,7 @@ simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot, if (oldSlot) options |= TABLE_MODIFY_FETCH_OLD_TUPLE; - result = table_tuple_delete(rel, tid, + result = table_tuple_delete(rel, PointerGetDatum(tid), GetCurrentCommandId(true), snapshot, InvalidSnapshot, options, @@ -364,7 +364,7 @@ simple_table_tuple_update(Relation rel, ItemPointer otid, if (oldSlot) options |= TABLE_MODIFY_FETCH_OLD_TUPLE; - result = table_tuple_update(rel, otid, slot, + result = table_tuple_update(rel, PointerGetDatum(otid), slot, GetCurrentCommandId(true), snapshot, InvalidSnapshot, options, diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c index d7798b6afb6..26aca18dc50 100644 --- a/src/backend/access/table/tableamapi.c +++ b/src/backend/access/table/tableamapi.c @@ -75,8 +75,7 @@ GetTableAmRoutine(Oid amhandler) * Could be made optional, but would require throwing error during * parse-analysis. */ - Assert(routine->tuple_insert_speculative != NULL); - Assert(routine->tuple_complete_speculative != NULL); + Assert(routine->tuple_insert_with_arbiter != NULL); Assert(routine->multi_insert != NULL); Assert(routine->tuple_delete != NULL); @@ -104,9 +103,29 @@ GetTableAmRoutine(Oid amhandler) Assert(routine->scan_sample_next_block != NULL); Assert(routine->scan_sample_next_tuple != NULL); + Assert(routine->tuple_is_current != NULL); + return routine; } +const TableAmRoutine * +GetTableAmRoutineByAmOid(Oid amoid) +{ + HeapTuple ht_am; + Form_pg_am amrec; + const TableAmRoutine *tableam = NULL; + + ht_am = SearchSysCache1(AMOID, ObjectIdGetDatum(amoid)); + if (!HeapTupleIsValid(ht_am)) + elog(ERROR, "cache lookup failed for access method %u", + amoid); + amrec = (Form_pg_am)GETSTRUCT(ht_am); + + tableam = GetTableAmRoutine(amrec->amhandler); + ReleaseSysCache(ht_am); + return tableam; +} + /* check_hook: validate new default_table_access_method */ bool check_default_table_access_method(char **newval, void **extra, GucSource source) diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 2d80a04ce9f..30183b487a1 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -1618,7 +1618,7 @@ expand_all_col_privileges(Oid table_oid, Form_pg_class classForm, AttrNumber curr_att; Assert(classForm->relnatts - FirstLowInvalidHeapAttributeNumber < num_col_privileges); - for (curr_att = FirstLowInvalidHeapAttributeNumber + 1; + for (curr_att = FirstLowInvalidHeapAttributeNumber + 2; curr_att <= classForm->relnatts; curr_att++) { diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index bda364552ca..cd4a16a5572 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -97,9 +97,6 @@ static void compute_index_stats(Relation onerel, double totalrows, MemoryContext col_context); static VacAttrStats *examine_attribute(Relation onerel, int attnum, Node *index_expr); -static int acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows); static int compare_rows(const void *a, const void *b, void *arg); static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, @@ -201,10 +198,7 @@ analyze_rel(Oid relid, RangeVar *relation, if (onerel->rd_rel->relkind == RELKIND_RELATION || onerel->rd_rel->relkind == RELKIND_MATVIEW) { - /* Regular table, so we'll use the regular row acquisition function */ - acquirefunc = acquire_sample_rows; - /* Also get regular table's size */ - relpages = RelationGetNumberOfBlocks(onerel); + table_analyze(onerel, &acquirefunc, &relpages); } else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { @@ -1133,7 +1127,7 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr) * block. The previous sampling method put too much credence in the row * density near the start of the table. */ -static int +int acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows) @@ -1460,9 +1454,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, if (childrel->rd_rel->relkind == RELKIND_RELATION || childrel->rd_rel->relkind == RELKIND_MATVIEW) { - /* Regular table, so use the regular row acquisition function */ - acquirefunc = acquire_sample_rows; - relpages = RelationGetNumberOfBlocks(childrel); + table_analyze(childrel, &acquirefunc, &relpages); } else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 7aae9d208c4..0c52b64053c 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -701,6 +701,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, LOCKMODE parentLockmode; const char *accessMethod = NULL; Oid accessMethodId = InvalidOid; + const TableAmRoutine *tableam = NULL; /* * Truncate relname to appropriate length (probably a waste of time, as @@ -836,6 +837,26 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, if (!OidIsValid(ownerId)) ownerId = GetUserId(); + /* + * If the statement hasn't specified an access method, but we're defining + * a type of relation that needs one, use the default. + */ + if (stmt->accessMethod != NULL) + { + accessMethod = stmt->accessMethod; + + if (partitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("specifying a table access method is not supported on a partitioned table"))); + } + else if (RELKIND_HAS_TABLE_AM(relkind)) + accessMethod = default_table_access_method; + + /* look up the access method, verify it is for a table */ + if (accessMethod != NULL) + accessMethodId = get_table_am_oid(accessMethod, false); + /* * Parse and validate reloptions, if any. */ @@ -844,6 +865,12 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, switch (relkind) { + case RELKIND_RELATION: + case RELKIND_TOASTVALUE: + case RELKIND_MATVIEW: + tableam = GetTableAmRoutineByAmOid(accessMethodId); + (void) tableam_reloptions(tableam, relkind, reloptions, true); + break; case RELKIND_VIEW: (void) view_reloptions(reloptions, true); break; @@ -852,6 +879,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, break; default: (void) heap_reloptions(relkind, reloptions, true); + break; } if (stmt->ofTypename) @@ -955,26 +983,6 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, attr->attstorage = GetAttributeStorage(attr->atttypid, colDef->storage_name); } - /* - * If the statement hasn't specified an access method, but we're defining - * a type of relation that needs one, use the default. - */ - if (stmt->accessMethod != NULL) - { - accessMethod = stmt->accessMethod; - - if (partitioned) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("specifying a table access method is not supported on a partitioned table"))); - } - else if (RELKIND_HAS_TABLE_AM(relkind)) - accessMethod = default_table_access_method; - - /* look up the access method, verify it is for a table */ - if (accessMethod != NULL) - accessMethodId = get_table_am_oid(accessMethod, false); - /* * Create the relation. Inherited defaults and constraints are passed in * for immediate handling --- since they don't need parsing, they can be @@ -6159,8 +6167,10 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) /* Write the tuple out to the new relation */ if (newrel) + { table_tuple_insert(newrel, insertslot, mycid, ti_options, bistate); + } ResetExprContext(econtext); @@ -14629,7 +14639,8 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_MATVIEW: - (void) heap_reloptions(rel->rd_rel->relkind, newOptions, true); + (void) table_reloptions(rel, rel->rd_rel->relkind, + newOptions, true); break; case RELKIND_PARTITIONED_TABLE: (void) partitioned_table_reloptions(newOptions, true); diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index c5e6caf4453..fdd545dae8f 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -83,7 +83,7 @@ static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger); static bool GetTupleForTrigger(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tid, + Datum tupleid, LockTupleMode lockmode, TupleTableSlot *oldslot, TupleTableSlot **epqslot, @@ -2699,7 +2699,7 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo, bool ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot **epqslot, TM_Result *tmresult, @@ -2713,7 +2713,7 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, bool should_free = false; int i; - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL)); if (fdw_trigtuple == NULL) { TupleTableSlot *epqslot_candidate = NULL; @@ -2950,7 +2950,7 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo, bool ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot *newslot, TM_Result *tmresult, @@ -2970,7 +2970,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, /* Determine lock mode to use */ lockmode = ExecUpdateLockMode(estate, relinfo); - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL)); if (fdw_trigtuple == NULL) { TupleTableSlot *epqslot_candidate = NULL; @@ -3297,7 +3297,7 @@ static bool GetTupleForTrigger(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tid, + Datum tupleid, LockTupleMode lockmode, TupleTableSlot *oldslot, TupleTableSlot **epqslot, @@ -3322,7 +3322,9 @@ GetTupleForTrigger(EState *estate, */ if (!IsolationUsesXactSnapshot()) lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION; - test = table_tuple_lock(relation, tid, estate->es_snapshot, oldslot, + + test = table_tuple_lock(relation, tupleid, + estate->es_snapshot, oldslot, estate->es_output_cid, lockmode, LockWaitBlock, lockflags, @@ -3418,8 +3420,8 @@ GetTupleForTrigger(EState *estate, * We expect the tuple to be present, thus very simple error handling * suffices. */ - if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny, - oldslot)) + if (!table_tuple_fetch_row_version(relation, tupleid, + SnapshotAny, oldslot)) elog(ERROR, "failed to fetch tuple for trigger"); } @@ -3625,18 +3627,22 @@ typedef SetConstraintStateData *SetConstraintState; * cycles. So we need only ensure that ats_firing_id is zero when attaching * a new event to an existing AfterTriggerSharedData record. */ -typedef uint32 TriggerFlags; +typedef uint64 TriggerFlags; -#define AFTER_TRIGGER_OFFSET 0x07FFFFFF /* must be low-order bits */ -#define AFTER_TRIGGER_DONE 0x80000000 -#define AFTER_TRIGGER_IN_PROGRESS 0x40000000 +#define AFTER_TRIGGER_SIZE UINT64CONST(0xFFFF000000000) /* must be low-order bits */ +#define AFTER_TRIGGER_SIZE_SHIFT (36) +#define AFTER_TRIGGER_OFFSET UINT64CONST(0x000000FFFFFFF) /* must be low-order bits */ +#define AFTER_TRIGGER_DONE UINT64CONST(0x0000800000000) +#define AFTER_TRIGGER_IN_PROGRESS UINT64CONST(0x0000400000000) /* bits describing the size and tuple sources of this event */ -#define AFTER_TRIGGER_FDW_REUSE 0x00000000 -#define AFTER_TRIGGER_FDW_FETCH 0x20000000 -#define AFTER_TRIGGER_1CTID 0x10000000 -#define AFTER_TRIGGER_2CTID 0x30000000 -#define AFTER_TRIGGER_CP_UPDATE 0x08000000 -#define AFTER_TRIGGER_TUP_BITS 0x38000000 +#define AFTER_TRIGGER_FDW_REUSE UINT64CONST(0x0000000000000) +#define AFTER_TRIGGER_FDW_FETCH UINT64CONST(0x0000200000000) +#define AFTER_TRIGGER_1CTID UINT64CONST(0x0000100000000) +#define AFTER_TRIGGER_ROWID1 UINT64CONST(0x0000010000000) +#define AFTER_TRIGGER_2CTID UINT64CONST(0x0000300000000) +#define AFTER_TRIGGER_ROWID2 UINT64CONST(0x0000020000000) +#define AFTER_TRIGGER_CP_UPDATE UINT64CONST(0x0000080000000) +#define AFTER_TRIGGER_TUP_BITS UINT64CONST(0x0000380000000) typedef struct AfterTriggerSharedData *AfterTriggerShared; typedef struct AfterTriggerSharedData @@ -3688,6 +3694,9 @@ typedef struct AfterTriggerEventDataZeroCtids } AfterTriggerEventDataZeroCtids; #define SizeofTriggerEvent(evt) \ + (((evt)->ate_flags & AFTER_TRIGGER_SIZE) >> AFTER_TRIGGER_SIZE_SHIFT) + +#define BasicSizeofTriggerEvent(evt) \ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \ sizeof(AfterTriggerEventData) : \ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \ @@ -4033,14 +4042,34 @@ afterTriggerCopyBitmap(Bitmapset *src) */ static void afterTriggerAddEvent(AfterTriggerEventList *events, - AfterTriggerEvent event, AfterTriggerShared evtshared) + AfterTriggerEvent event, AfterTriggerShared evtshared, + bytea *rowid1, bytea *rowid2) { - Size eventsize = SizeofTriggerEvent(event); - Size needed = eventsize + sizeof(AfterTriggerSharedData); + Size basiceventsize = MAXALIGN(BasicSizeofTriggerEvent(event)); + Size eventsize; + Size needed; AfterTriggerEventChunk *chunk; AfterTriggerShared newshared; AfterTriggerEvent newevent; + if (SizeofTriggerEvent(event) == 0) + { + eventsize = basiceventsize; + if (event->ate_flags & AFTER_TRIGGER_ROWID1) + eventsize += MAXALIGN(VARSIZE(rowid1)); + + if (event->ate_flags & AFTER_TRIGGER_ROWID2) + eventsize += MAXALIGN(VARSIZE(rowid2)); + + event->ate_flags |= eventsize << AFTER_TRIGGER_SIZE_SHIFT; + } + else + { + eventsize = SizeofTriggerEvent(event); + } + + needed = eventsize + sizeof(AfterTriggerSharedData); + /* * If empty list or not enough room in the tail chunk, make a new chunk. * We assume here that a new shared record will always be needed. @@ -4073,7 +4102,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events, * sizes used should be MAXALIGN multiples, to ensure that the shared * records will be aligned safely. */ -#define MIN_CHUNK_SIZE 1024 +#define MIN_CHUNK_SIZE (1024*4) #define MAX_CHUNK_SIZE (1024*1024) #if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1) @@ -4092,6 +4121,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events, chunksize *= 2; /* okay, double it */ else chunksize /= 2; /* too many shared records */ + chunksize = Max(chunksize, MIN_CHUNK_SIZE); chunksize = Min(chunksize, MAX_CHUNK_SIZE); } chunk = MemoryContextAlloc(afterTriggers.event_cxt, chunksize); @@ -4137,7 +4167,26 @@ afterTriggerAddEvent(AfterTriggerEventList *events, /* Insert the data */ newevent = (AfterTriggerEvent) chunk->freeptr; - memcpy(newevent, event, eventsize); + if (!rowid1 && !rowid2) + { + memcpy(newevent, event, eventsize); + } + else + { + Pointer ptr = chunk->freeptr; + + memcpy(newevent, event, basiceventsize); + ptr += basiceventsize; + + if (event->ate_flags & AFTER_TRIGGER_ROWID1) + { + memcpy(ptr, rowid1, MAXALIGN(VARSIZE(rowid1))); + ptr += MAXALIGN(VARSIZE(rowid1)); + } + + if (event->ate_flags & AFTER_TRIGGER_ROWID2) + memcpy(ptr, rowid2, MAXALIGN(VARSIZE(rowid2))); + } /* ... and link the new event to its shared record */ newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET; newevent->ate_flags |= (char *) newshared - (char *) newevent; @@ -4297,6 +4346,7 @@ AfterTriggerExecute(EState *estate, int tgindx; bool should_free_trig = false; bool should_free_new = false; + Pointer ptr; /* * Locate trigger in trigdesc. It might not be present, and in fact the @@ -4332,15 +4382,17 @@ AfterTriggerExecute(EState *estate, { Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore(); - if (!tuplestore_gettupleslot(fdw_tuplestore, true, false, - trig_tuple_slot1)) + if (!tuplestore_force_gettupleslot(fdw_tuplestore, true, false, + trig_tuple_slot1)) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) == TRIGGER_EVENT_UPDATE && - !tuplestore_gettupleslot(fdw_tuplestore, true, false, - trig_tuple_slot2)) + !tuplestore_force_gettupleslot(fdw_tuplestore, true, false, + trig_tuple_slot2)) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); + trig_tuple_slot1->tts_tid = event->ate_ctid1; + trig_tuple_slot2->tts_tid = event->ate_ctid2; } /* fall through */ case AFTER_TRIGGER_FDW_REUSE: @@ -4372,13 +4424,26 @@ AfterTriggerExecute(EState *estate, break; default: - if (ItemPointerIsValid(&(event->ate_ctid1))) + ptr = (Pointer) event + MAXALIGN(BasicSizeofTriggerEvent(event)); + if (ItemPointerIsValid(&(event->ate_ctid1)) || + (event->ate_flags & AFTER_TRIGGER_ROWID1)) { + Datum tupleid; + TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate, src_relInfo); - if (!table_tuple_fetch_row_version(src_rel, - &(event->ate_ctid1), + if (event->ate_flags & AFTER_TRIGGER_ROWID1) + { + tupleid = PointerGetDatum(ptr); + ptr += MAXALIGN(VARSIZE(ptr)); + } + else + { + tupleid = PointerGetDatum(&(event->ate_ctid1)); + } + + if (!table_tuple_fetch_row_version(src_rel, tupleid, SnapshotAny, src_slot)) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); @@ -4414,13 +4479,23 @@ AfterTriggerExecute(EState *estate, /* don't touch ctid2 if not there */ if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID || (event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) && - ItemPointerIsValid(&(event->ate_ctid2))) + (ItemPointerIsValid(&(event->ate_ctid2)) || + (event->ate_flags & AFTER_TRIGGER_ROWID2))) { + Datum tupleid; + TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate, dst_relInfo); - if (!table_tuple_fetch_row_version(dst_rel, - &(event->ate_ctid2), + if (event->ate_flags & AFTER_TRIGGER_ROWID2) + { + tupleid = PointerGetDatum(ptr); + } + else + { + tupleid = PointerGetDatum(&(event->ate_ctid2)); + } + if (!table_tuple_fetch_row_version(dst_rel, tupleid, SnapshotAny, dst_slot)) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); @@ -4594,7 +4669,7 @@ afterTriggerMarkEvents(AfterTriggerEventList *events, { deferred_found = true; /* add it to move_list */ - afterTriggerAddEvent(move_list, event, evtshared); + afterTriggerAddEvent(move_list, event, evtshared, NULL, NULL); /* mark original copy "done" so we don't do it again */ event->ate_flags |= AFTER_TRIGGER_DONE; } @@ -4698,6 +4773,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, /* caution: trigdesc could be NULL here */ finfo = rInfo->ri_TrigFunctions; instr = rInfo->ri_TrigInstrument; + if (slot1 != NULL) { ExecDropSingleTupleTableSlot(slot1); @@ -6087,6 +6163,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, int tgtype_level; int i; Tuplestorestate *fdw_tuplestore = NULL; + bytea *rowId1 = NULL; + bytea *rowId2 = NULL; /* * Check state. We use a normal test not Assert because it is possible to @@ -6180,6 +6258,21 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, * if so. This preserves the behavior that statement-level triggers fire * just once per statement and fire after row-level triggers. */ + + /* Determine flags */ + if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger)) + { + if (row_trigger && event == TRIGGER_EVENT_UPDATE) + { + if (relkind == RELKIND_PARTITIONED_TABLE) + new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE; + else + new_event.ate_flags = AFTER_TRIGGER_2CTID; + } + else + new_event.ate_flags = AFTER_TRIGGER_1CTID; + } + switch (event) { case TRIGGER_EVENT_INSERT: @@ -6190,6 +6283,13 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, Assert(newslot != NULL); ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); + if (table_get_row_ref_type(rel) == ROW_REF_ROWID) + { + bool isnull; + rowId1 = DatumGetByteaP(slot_getsysattr(newslot, RowIdAttributeNumber, &isnull)); + new_event.ate_flags |= AFTER_TRIGGER_ROWID1; + Assert(!isnull); + } } else { @@ -6209,6 +6309,13 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, Assert(newslot == NULL); ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); + if (table_get_row_ref_type(rel) == ROW_REF_ROWID) + { + bool isnull; + rowId1 = DatumGetByteaP(slot_getsysattr(oldslot, RowIdAttributeNumber, &isnull)); + new_event.ate_flags |= AFTER_TRIGGER_ROWID1; + Assert(!isnull); + } } else { @@ -6224,10 +6331,54 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, tgtype_event = TRIGGER_TYPE_UPDATE; if (row_trigger) { + bool src_rowid = false, + dst_rowid = false; Assert(oldslot != NULL); Assert(newslot != NULL); ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2)); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + Relation src_rel = src_partinfo->ri_RelationDesc; + Relation dst_rel = dst_partinfo->ri_RelationDesc; + + src_rowid = table_get_row_ref_type(src_rel) == + ROW_REF_ROWID; + dst_rowid = table_get_row_ref_type(dst_rel) == + ROW_REF_ROWID; + } + else + { + if (table_get_row_ref_type(rel) == ROW_REF_ROWID) + { + src_rowid = true; + dst_rowid = true; + } + } + + if (src_rowid) + { + Datum val; + bool isnull; + val = slot_getsysattr(oldslot, + RowIdAttributeNumber, + &isnull); + rowId1 = DatumGetByteaP(val); + Assert(!isnull); + new_event.ate_flags |= AFTER_TRIGGER_ROWID1; + } + + if (dst_rowid) + { + Datum val; + bool isnull; + val = slot_getsysattr(newslot, + RowIdAttributeNumber, + &isnull); + rowId2 = DatumGetByteaP(val); + Assert(!isnull); + new_event.ate_flags |= AFTER_TRIGGER_ROWID2; + } /* * Also remember the OIDs of partitions to fetch these tuples @@ -6265,20 +6416,6 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, break; } - /* Determine flags */ - if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger)) - { - if (row_trigger && event == TRIGGER_EVENT_UPDATE) - { - if (relkind == RELKIND_PARTITIONED_TABLE) - new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE; - else - new_event.ate_flags = AFTER_TRIGGER_2CTID; - } - else - new_event.ate_flags = AFTER_TRIGGER_1CTID; - } - /* else, we'll initialize ate_flags for each trigger */ tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT); @@ -6444,7 +6581,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, new_shared.ats_modifiedcols = modifiedCols; afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events, - &new_event, &new_shared); + &new_event, &new_shared, rowId1, rowId2); } /* diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 30ea9627c75..44179ce1fc9 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -4400,7 +4400,9 @@ ExecEvalSysVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext, op->resnull); *op->resvalue = d; /* this ought to be unreachable, but it's cheap enough to check */ - if (unlikely(*op->resnull)) + if (op->d.var.attnum != RowIdAttributeNumber && + op->d.var.attnum != SelfItemPointerAttributeNumber && + unlikely(*op->resnull)) elog(ERROR, "failed to fetch attribute from slot"); } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 6a60fb33377..009951bc626 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -862,13 +862,15 @@ InitPlan(QueryDesc *queryDesc, int eflags) Oid relid; Relation relation; ExecRowMark *erm; + RangeTblEntry *rangeEntry; /* ignore "parent" rowmarks; they are irrelevant at runtime */ if (rc->isParent) continue; /* get relation's OID (will produce InvalidOid if subquery) */ - relid = exec_rt_fetch(rc->rti, estate)->relid; + rangeEntry = exec_rt_fetch(rc->rti, estate); + relid = rangeEntry->relid; /* open relation, if we need to access it for this mark type */ switch (rc->markType) @@ -901,6 +903,10 @@ InitPlan(QueryDesc *queryDesc, int eflags) erm->prti = rc->prti; erm->rowmarkId = rc->rowmarkId; erm->markType = rc->markType; + if (erm->markType == ROW_MARK_COPY) + erm->refType = ROW_REF_COPY; + else + erm->refType = rangeEntry->reftype; erm->strength = rc->strength; erm->waitPolicy = rc->waitPolicy; erm->ermActive = false; @@ -1336,6 +1342,8 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_ChildToRootMap = NULL; resultRelInfo->ri_ChildToRootMapValid = false; resultRelInfo->ri_CopyMultiInsertBuffer = NULL; + + resultRelInfo->ri_RowRefType = table_get_row_ref_type(resultRelationDesc); } /* @@ -2483,17 +2491,28 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist) aerm->rowmark = erm; /* Look up the resjunk columns associated with this rowmark */ - if (erm->markType != ROW_MARK_COPY) + if (erm->refType == ROW_REF_TID) { + Assert(erm->markType != ROW_MARK_COPY); /* need ctid for all methods other than COPY */ snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId); aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist, resname); if (!AttributeNumberIsValid(aerm->ctidAttNo)) elog(ERROR, "could not find junk %s column", resname); + } else if (erm->refType == ROW_REF_ROWID) + { + Assert(erm->markType != ROW_MARK_COPY); + /* need ctid for all methods other than COPY */ + snprintf(resname, sizeof(resname), "rowid%u", erm->rowmarkId); + aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist, + resname); + if (!AttributeNumberIsValid(aerm->ctidAttNo)) + elog(ERROR, "could not find junk %s column", resname); } else { + Assert(erm->markType == ROW_MARK_COPY); /* need wholerow if COPY */ snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId); aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist, @@ -2783,8 +2802,9 @@ EvalPlanQualFetchRowMark(EPQState *epqstate, Index rti, TupleTableSlot *slot) { /* ordinary table, fetch the tuple */ if (!table_tuple_fetch_row_version(erm->relation, - (ItemPointer) DatumGetPointer(datum), - SnapshotAny, slot)) + datum, + SnapshotAny, + slot)) elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); return true; } diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 03c705bf987..c9345cfae04 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -209,7 +209,8 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, PushActiveSnapshot(GetLatestSnapshot()); - res = table_tuple_lock(rel, &(outslot->tts_tid), GetActiveSnapshot(), + res = table_tuple_lock(rel, PointerGetDatum(&(outslot->tts_tid)), + GetActiveSnapshot(), outslot, GetCurrentCommandId(false), lockmode, @@ -393,7 +394,8 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, PushActiveSnapshot(GetLatestSnapshot()); - res = table_tuple_lock(rel, &(outslot->tts_tid), GetActiveSnapshot(), + res = table_tuple_lock(rel, PointerGetDatum(&(outslot->tts_tid)), + GetActiveSnapshot(), outslot, GetCurrentCommandId(false), lockmode, @@ -532,7 +534,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigDesc->trig_update_before_row) { if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, - tid, NULL, slot, NULL, NULL)) + PointerGetDatum(tid), NULL, slot, NULL, NULL)) skip_tuple = true; /* "do nothing" */ } @@ -599,7 +601,7 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigDesc->trig_delete_before_row) { skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo, - tid, NULL, NULL, NULL, NULL); + PointerGetDatum(tid), NULL, NULL, NULL, NULL); } if (!skip_tuple) diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index e459971d32e..049c9841309 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -27,6 +27,7 @@ #include "executor/nodeLockRows.h" #include "foreign/fdwapi.h" #include "miscadmin.h" +#include "utils/datum.h" #include "utils/rel.h" @@ -157,7 +158,16 @@ ExecLockRows(PlanState *pstate) } /* okay, try to lock (and fetch) the tuple */ - tid = *((ItemPointer) DatumGetPointer(datum)); + if (erm->refType == ROW_REF_TID) + { + tid = *((ItemPointer) DatumGetPointer(datum)); + datum = PointerGetDatum(&tid); + } + else + { + Assert(erm->refType = ROW_REF_ROWID); + datum = datumCopy(datum, false, -1); + } switch (erm->markType) { case ROW_MARK_EXCLUSIVE: @@ -182,12 +192,15 @@ ExecLockRows(PlanState *pstate) if (!IsolationUsesXactSnapshot()) lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION; - test = table_tuple_lock(erm->relation, &tid, estate->es_snapshot, + test = table_tuple_lock(erm->relation, datum, estate->es_snapshot, markSlot, estate->es_output_cid, lockmode, erm->waitPolicy, lockflags, &tmfd); + if (erm->refType == ROW_REF_ROWID) + pfree(DatumGetPointer(datum)); + switch (test) { case TM_WouldBlock: diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 67793aa2291..2066fdde3c8 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -66,6 +66,7 @@ #include "rewrite/rewriteHandler.h" #include "rewrite/rewriteManip.h" #include "storage/bufmgr.h" +#include "storage/itemptr.h" #include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/datum.h" @@ -141,12 +142,11 @@ static void ExecPendingInserts(EState *estate); static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, ResultRelInfo *sourcePartInfo, ResultRelInfo *destPartInfo, - ItemPointer tupleid, + Datum tupleid, TupleTableSlot *oldSlot, TupleTableSlot *newslot); static bool ExecOnConflictUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer conflictTid, TupleTableSlot *excludedSlot, bool canSetTag, TupleTableSlot **returning); @@ -159,12 +159,12 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate, static TupleTableSlot *ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, + Datum tupleid, bool canSetTag); static void ExecInitMerge(ModifyTableState *mtstate, EState *estate); static bool ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, + Datum tupleid, bool canSetTag); static void ExecMergeNotMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, @@ -299,66 +299,6 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo, return ExecProject(projectReturning); } -/* - * ExecCheckTupleVisible -- verify tuple is visible - * - * It would not be consistent with guarantees of the higher isolation levels to - * proceed with avoiding insertion (taking speculative insertion's alternative - * path) on the basis of another tuple that is not visible to MVCC snapshot. - * Check for the need to raise a serialization failure, and do so as necessary. - */ -static void -ExecCheckTupleVisible(EState *estate, - Relation rel, - TupleTableSlot *slot) -{ - if (!IsolationUsesXactSnapshot()) - return; - - if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot)) - { - Datum xminDatum; - TransactionId xmin; - bool isnull; - - xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull); - Assert(!isnull); - xmin = DatumGetTransactionId(xminDatum); - - /* - * We should not raise a serialization failure if the conflict is - * against a tuple inserted by our own transaction, even if it's not - * visible to our snapshot. (This would happen, for example, if - * conflicting keys are proposed for insertion in a single command.) - */ - if (!TransactionIdIsCurrentTransactionId(xmin)) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent update"))); - } -} - -/* - * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible() - */ -static void -ExecCheckTIDVisible(EState *estate, - ResultRelInfo *relinfo, - ItemPointer tid, - TupleTableSlot *tempSlot) -{ - Relation rel = relinfo->ri_RelationDesc; - - /* Redundantly check isolation level */ - if (!IsolationUsesXactSnapshot()) - return; - - if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot)) - elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT"); - ExecCheckTupleVisible(estate, rel, tempSlot); - ExecClearTuple(tempSlot); -} - /* * Initialize to compute stored generated columns for a tuple * @@ -1044,12 +984,19 @@ ExecInsert(ModifyTableContext *context, if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0) { /* Perform a speculative insertion. */ - uint32 specToken; - ItemPointerData conflictTid; - bool specConflict; List *arbiterIndexes; + TupleTableSlot *existing = NULL, + *returningSlot, + *inserted; + LockTupleMode lockmode = LockTupleExclusive; arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes; + returningSlot = ExecGetReturningSlot(estate, resultRelInfo); + if (onconflict == ONCONFLICT_UPDATE) + { + lockmode = ExecUpdateLockMode(estate, resultRelInfo); + existing = resultRelInfo->ri_onConflict->oc_Existing; + } /* * Do a non-conclusive check for conflicts first. @@ -1066,23 +1013,29 @@ ExecInsert(ModifyTableContext *context, */ vlock: CHECK_FOR_INTERRUPTS(); - specConflict = false; - if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate, - &conflictTid, arbiterIndexes)) + + inserted = table_tuple_insert_with_arbiter(resultRelInfo, + slot, estate->es_output_cid, + 0, NULL, arbiterIndexes, estate, + lockmode, existing, returningSlot); + if (!inserted) { - /* committed conflict tuple found */ if (onconflict == ONCONFLICT_UPDATE) { + TupleTableSlot *returning = NULL; + + if (TTS_EMPTY(existing)) + goto vlock; + /* * In case of ON CONFLICT DO UPDATE, execute the UPDATE * part. Be prepared to retry if the UPDATE fails because * of another concurrent UPDATE/DELETE to the conflict * tuple. */ - TupleTableSlot *returning = NULL; if (ExecOnConflictUpdate(context, resultRelInfo, - &conflictTid, slot, canSetTag, + slot, canSetTag, &returning)) { InstrCountTuples2(&mtstate->ps, 1); @@ -1105,57 +1058,13 @@ ExecInsert(ModifyTableContext *context, * ExecGetReturningSlot() in the DO NOTHING case... */ Assert(onconflict == ONCONFLICT_NOTHING); - ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid, - ExecGetReturningSlot(estate, resultRelInfo)); InstrCountTuples2(&mtstate->ps, 1); return NULL; } } - - /* - * Before we start insertion proper, acquire our "speculative - * insertion lock". Others can use that to wait for us to decide - * if we're going to go ahead with the insertion, instead of - * waiting for the whole transaction to complete. - */ - specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId()); - - /* insert the tuple, with the speculative token */ - table_tuple_insert_speculative(resultRelationDesc, slot, - estate->es_output_cid, - 0, - NULL, - specToken); - - /* insert index entries for tuple */ - recheckIndexes = ExecInsertIndexTuples(resultRelInfo, - slot, estate, false, true, - &specConflict, - arbiterIndexes, - false); - - /* adjust the tuple's state accordingly */ - table_tuple_complete_speculative(resultRelationDesc, slot, - specToken, !specConflict); - - /* - * Wake up anyone waiting for our decision. They will re-check - * the tuple, see that it's no longer speculative, and wait on our - * XID as if this was a regularly inserted tuple all along. Or if - * we killed the tuple, they will see it's dead, and proceed as if - * the tuple never existed. - */ - SpeculativeInsertionLockRelease(GetCurrentTransactionId()); - - /* - * If there was a conflict, start from the beginning. We'll do - * the pre-check again, which will now find the conflicting tuple - * (unless it aborts before we get there). - */ - if (specConflict) + else { - list_free(recheckIndexes); - goto vlock; + slot = inserted; } /* Since there was no insertion conflict, we're done */ @@ -1163,9 +1072,9 @@ ExecInsert(ModifyTableContext *context, else { /* insert the tuple normally */ - table_tuple_insert(resultRelationDesc, slot, - estate->es_output_cid, - 0, NULL); + slot = table_tuple_insert(resultRelationDesc, slot, + estate->es_output_cid, + 0, NULL); /* insert index entries for tuple */ if (resultRelInfo->ri_NumIndices > 0) @@ -1341,7 +1250,7 @@ ExecPendingInserts(EState *estate) */ static bool ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot **epqreturnslot, TM_Result *result) { if (result) @@ -1372,7 +1281,7 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static TM_Result ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, bool changingPart, int options, + Datum tupleid, bool changingPart, int options, TupleTableSlot *oldSlot) { EState *estate = context->estate; @@ -1396,7 +1305,7 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static void ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, + HeapTuple oldtuple, TupleTableSlot *slot, bool changingPart) { ModifyTableState *mtstate = context->mtstate; @@ -1476,7 +1385,7 @@ ExecInitDeleteTupleSlot(ModifyTableState *mtstate, static TupleTableSlot * ExecDelete(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *oldSlot, bool processReturning, @@ -1669,7 +1578,7 @@ ExecDelete(ModifyTableContext *context, if (tupleDeleted) *tupleDeleted = true; - ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, + ExecDeleteEpilogue(context, resultRelInfo, oldtuple, oldSlot, changingPart); /* Process RETURNING if present and if requested */ @@ -1686,7 +1595,7 @@ ExecDelete(ModifyTableContext *context, /* FDW must have provided a slot containing the deleted row */ Assert(!TupIsNull(slot)); } - else + else if (!slot || TupIsNull(slot)) { /* Copy old tuple to the returning slot */ slot = ExecGetReturningSlot(estate, resultRelInfo); @@ -1735,7 +1644,7 @@ ExecDelete(ModifyTableContext *context, static bool ExecCrossPartitionUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, bool canSetTag, UpdateContext *updateCxt, @@ -1891,7 +1800,7 @@ ExecCrossPartitionUpdate(ModifyTableContext *context, */ static bool ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, TM_Result *result) { Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; @@ -1968,7 +1877,7 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo, */ static TM_Result ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, bool canSetTag, int options, TupleTableSlot *oldSlot, UpdateContext *updateCxt) { @@ -2125,7 +2034,7 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static void ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt, - ResultRelInfo *resultRelInfo, ItemPointer tupleid, + ResultRelInfo *resultRelInfo, HeapTuple oldtuple, TupleTableSlot *slot, TupleTableSlot *oldSlot) { @@ -2175,7 +2084,7 @@ static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, ResultRelInfo *sourcePartInfo, ResultRelInfo *destPartInfo, - ItemPointer tupleid, + Datum tupleid, TupleTableSlot *oldslot, TupleTableSlot *newslot) { @@ -2266,7 +2175,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, */ static TupleTableSlot * ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, TupleTableSlot *oldSlot, bool canSetTag, bool locked) { EState *estate = context->estate; @@ -2322,10 +2231,14 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, { ItemPointerData lockedtid; - int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE; + int options = TABLE_MODIFY_WAIT; - if (!locked && !IsolationUsesXactSnapshot()) - options |= TABLE_MODIFY_LOCK_UPDATED; + if (!locked) + { + options |= TABLE_MODIFY_FETCH_OLD_TUPLE; + if (!IsolationUsesXactSnapshot()) + options |= TABLE_MODIFY_LOCK_UPDATED; + } /* * If we generate a new candidate tuple after EvalPlanQual testing, we @@ -2335,7 +2248,11 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * to do them again.) */ redo_act: - lockedtid = *tupleid; + if (resultRelInfo->ri_needLockTagTuple) + { + Assert(resultRelInfo->ri_RowRefType == ROW_REF_TID); + lockedtid = *((ItemPointer) tupleid); + } result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot, canSetTag, options, oldSlot, &updateCxt); @@ -2414,7 +2331,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, UnlockTuple(resultRelationDesc, &lockedtid, InplaceUpdateTupleLock); LockTuple(resultRelationDesc, - tupleid, InplaceUpdateTupleLock); + (ItemPointer) tupleid, InplaceUpdateTupleLock); } slot = ExecGetUpdateNewTuple(resultRelInfo, @@ -2443,7 +2360,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, if (canSetTag) (estate->es_processed)++; - ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple, + ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, oldtuple, slot, oldSlot); /* Process RETURNING if present */ @@ -2467,152 +2384,26 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, static bool ExecOnConflictUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer conflictTid, TupleTableSlot *excludedSlot, bool canSetTag, TupleTableSlot **returning) { ModifyTableState *mtstate = context->mtstate; ExprContext *econtext = mtstate->ps.ps_ExprContext; - Relation relation = resultRelInfo->ri_RelationDesc; ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause; TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing; - TM_FailureData tmfd; - LockTupleMode lockmode; - TM_Result test; - Datum xminDatum; - TransactionId xmin; - bool isnull; - - /* - * Parse analysis should have blocked ON CONFLICT for all system - * relations, which includes these. There's no fundamental obstacle to - * supporting this; we'd just need to handle LOCKTAG_TUPLE like the other - * ExecUpdate() caller. - */ - Assert(!resultRelInfo->ri_needLockTagTuple); + Datum tupleid; - /* Determine lock mode to use */ - lockmode = ExecUpdateLockMode(context->estate, resultRelInfo); - - /* - * Lock tuple for update. Don't follow updates when tuple cannot be - * locked without doing so. A row locking conflict here means our - * previous conclusion that the tuple is conclusively committed is not - * true anymore. - */ - test = table_tuple_lock(relation, conflictTid, - context->estate->es_snapshot, - existing, context->estate->es_output_cid, - lockmode, LockWaitBlock, 0, - &tmfd); - switch (test) + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) { - case TM_Ok: - /* success! */ - break; - - case TM_Invisible: - - /* - * This can occur when a just inserted tuple is updated again in - * the same command. E.g. because multiple rows with the same - * conflicting key values are inserted. - * - * This is somewhat similar to the ExecUpdate() TM_SelfModified - * case. We do not want to proceed because it would lead to the - * same row being updated a second time in some unspecified order, - * and in contrast to plain UPDATEs there's no historical behavior - * to break. - * - * It is the user's responsibility to prevent this situation from - * occurring. These problems are why the SQL standard similarly - * specifies that for SQL MERGE, an exception must be raised in - * the event of an attempt to update the same row twice. - */ - xminDatum = slot_getsysattr(existing, - MinTransactionIdAttributeNumber, - &isnull); - Assert(!isnull); - xmin = DatumGetTransactionId(xminDatum); - - if (TransactionIdIsCurrentTransactionId(xmin)) - ereport(ERROR, - (errcode(ERRCODE_CARDINALITY_VIOLATION), - /* translator: %s is a SQL command name */ - errmsg("%s command cannot affect row a second time", - "ON CONFLICT DO UPDATE"), - errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values."))); - - /* This shouldn't happen */ - elog(ERROR, "attempted to lock invisible tuple"); - break; - - case TM_SelfModified: - - /* - * This state should never be reached. As a dirty snapshot is used - * to find conflicting tuples, speculative insertion wouldn't have - * seen this row to conflict with. - */ - elog(ERROR, "unexpected self-updated tuple"); - break; - - case TM_Updated: - if (IsolationUsesXactSnapshot()) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent update"))); - - /* - * As long as we don't support an UPDATE of INSERT ON CONFLICT for - * a partitioned table we shouldn't reach to a case where tuple to - * be lock is moved to another partition due to concurrent update - * of the partition key. - */ - Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid)); - - /* - * Tell caller to try again from the very start. - * - * It does not make sense to use the usual EvalPlanQual() style - * loop here, as the new version of the row might not conflict - * anymore, or the conflicting tuple has actually been deleted. - */ - ExecClearTuple(existing); - return false; - - case TM_Deleted: - if (IsolationUsesXactSnapshot()) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent delete"))); - - /* see TM_Updated case */ - Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid)); - ExecClearTuple(existing); - return false; - - default: - elog(ERROR, "unrecognized table_tuple_lock status: %u", test); + bool isnull; + tupleid = slot_getsysattr(existing, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + tupleid = PointerGetDatum(&existing->tts_tid); } - - /* Success, the tuple is locked. */ - - /* - * Verify that the tuple is visible to our MVCC snapshot if the current - * isolation level mandates that. - * - * It's not sufficient to rely on the check within ExecUpdate() as e.g. - * CONFLICT ... WHERE clause may prevent us from reaching that. - * - * This means we only ever continue when a new command in the current - * transaction could see the row, even though in READ COMMITTED mode the - * tuple will not be visible according to the current statement's - * snapshot. This is in line with the way UPDATE deals with newer tuple - * versions. - */ - ExecCheckTupleVisible(context->estate, relation, existing); /* * Make tuple and any needed join variables available to ExecQual and @@ -2668,7 +2459,7 @@ ExecOnConflictUpdate(ModifyTableContext *context, /* Execute UPDATE with projection */ *returning = ExecUpdate(context, resultRelInfo, - conflictTid, NULL, + tupleid, NULL, resultRelInfo->ri_onConflict->oc_ProjSlot, existing, canSetTag, true); @@ -2687,7 +2478,7 @@ ExecOnConflictUpdate(ModifyTableContext *context, */ static TupleTableSlot * ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, bool canSetTag) + Datum tupleid, bool canSetTag) { bool matched; @@ -2734,7 +2525,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * from ExecMergeNotMatched to ExecMergeMatched, there is no risk of a * livelock. */ - matched = tupleid != NULL; + matched = DatumGetPointer(tupleid) != NULL; if (matched) matched = ExecMergeMatched(context, resultRelInfo, tupleid, canSetTag); @@ -2773,7 +2564,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static bool ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, bool canSetTag) + Datum tupleid, bool canSetTag) { ModifyTableState *mtstate = context->mtstate; ItemPointerData lockedtid; @@ -2810,9 +2601,10 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * that don't match mas_whenqual. MERGE on system catalogs is a minor * use case, so don't bother optimizing those. */ - LockTuple(resultRelInfo->ri_RelationDesc, tupleid, + Assert(resultRelInfo->ri_RowRefType == ROW_REF_TID); + LockTuple(resultRelInfo->ri_RelationDesc, (ItemPointer) tupleid, InplaceUpdateTupleLock); - lockedtid = *tupleid; + lockedtid = *((ItemPointer) tupleid); } else ItemPointerSetInvalid(&lockedtid); @@ -2910,7 +2702,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, if (result == TM_Ok && updateCxt.updated) { ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, - tupleid, NULL, newslot, + NULL, newslot, resultRelInfo->ri_oldTupleSlot); mtstate->mt_merge_updated += 1; } @@ -2929,7 +2721,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, false, TABLE_MODIFY_WAIT, NULL); if (result == TM_Ok) { - ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL, + ExecDeleteEpilogue(context, resultRelInfo, NULL, resultRelInfo->ri_oldTupleSlot, false); mtstate->mt_merge_deleted += 1; } @@ -3053,7 +2845,11 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, goto out; } - (void) ExecGetJunkAttribute(epqslot, + /* + * Update tupleid to that of the new tuple, for + * the refetch we do at the top. + */ + tupleid = ExecGetJunkAttribute(epqslot, resultRelInfo->ri_RowIdAttNo, &isNull); if (isNull) @@ -3088,9 +2884,14 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * top. */ if (resultRelInfo->ri_needLockTagTuple) + { + Assert(resultRelInfo->ri_RowRefType == ROW_REF_TID); UnlockTuple(resultRelInfo->ri_RelationDesc, &lockedtid, InplaceUpdateTupleLock); + ItemPointerCopy(&context->tmfd.ctid, + (ItemPointer) tupleid); + } goto lmerge_matched; case TM_Deleted: @@ -3690,10 +3491,10 @@ ExecModifyTable(PlanState *pstate) PlanState *subplanstate; TupleTableSlot *slot; TupleTableSlot *oldSlot; + Datum tupleid; ItemPointerData tuple_ctid; HeapTupleData oldtupdata; HeapTuple oldtuple; - ItemPointer tupleid; bool tuplock; CHECK_FOR_INTERRUPTS(); @@ -3743,6 +3544,8 @@ ExecModifyTable(PlanState *pstate) */ for (;;) { + RowRefType refType; + /* * Reset the per-output-tuple exprcontext. This is needed because * triggers expect to use that context as workspace. It's a bit ugly @@ -3792,7 +3595,7 @@ ExecModifyTable(PlanState *pstate) { EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot); - ExecMerge(&context, node->resultRelInfo, NULL, node->canSetTag); + ExecMerge(&context, node->resultRelInfo, PointerGetDatum(NULL), node->canSetTag); continue; /* no RETURNING support yet */ } @@ -3828,7 +3631,8 @@ ExecModifyTable(PlanState *pstate) EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot); slot = context.planSlot; - tupleid = NULL; + refType = resultRelInfo->ri_RowRefType; + tupleid = PointerGetDatum(NULL); oldtuple = NULL; /* @@ -3870,16 +3674,32 @@ ExecModifyTable(PlanState *pstate) { EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot); - ExecMerge(&context, node->resultRelInfo, NULL, node->canSetTag); + ExecMerge(&context, node->resultRelInfo, + PointerGetDatum(NULL), node->canSetTag); continue; /* no RETURNING support yet */ } elog(ERROR, "ctid is NULL"); } - tupleid = (ItemPointer) DatumGetPointer(datum); - tuple_ctid = *tupleid; /* be sure we don't free ctid!! */ - tupleid = &tuple_ctid; + if (refType == ROW_REF_TID) + { + /* shouldn't ever get a null result... */ + if (isNull) + elog(ERROR, "ctid is NULL"); + + tuple_ctid = *((ItemPointer) DatumGetPointer(datum)); /* be sure we don't free ctid!! */ + tupleid = PointerGetDatum(&tuple_ctid); + } + else + { + Assert(refType == ROW_REF_ROWID); + /* shouldn't ever get a null result... */ + if (isNull) + elog(ERROR, "rowid is NULL"); + + tupleid = datumCopy(datum, false, -1); + } } /* @@ -3961,9 +3781,12 @@ ExecModifyTable(PlanState *pstate) if (resultRelInfo->ri_needLockTagTuple) { - LockTuple(relation, tupleid, InplaceUpdateTupleLock); + Assert(resultRelInfo->ri_RowRefType == ROW_REF_TID); + LockTuple(relation, (ItemPointer) tupleid, + InplaceUpdateTupleLock); tuplock = true; } + Assert(DatumGetPointer(tupleid) != NULL); if (!table_tuple_fetch_row_version(relation, tupleid, SnapshotAny, oldSlot)) @@ -3978,7 +3801,8 @@ ExecModifyTable(PlanState *pstate) slot, resultRelInfo->ri_oldTupleSlot, node->canSetTag, false); if (tuplock) - UnlockTuple(resultRelInfo->ri_RelationDesc, tupleid, + UnlockTuple(resultRelInfo->ri_RelationDesc, + (ItemPointer) tupleid, InplaceUpdateTupleLock); break; @@ -4001,6 +3825,9 @@ ExecModifyTable(PlanState *pstate) break; } + if (refType == ROW_REF_ROWID && DatumGetPointer(tupleid) != NULL) + pfree(DatumGetPointer(tupleid)); + /* * If we got a RETURNING result, return it to caller. We'll continue * the work on next call. @@ -4245,10 +4072,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) relkind == RELKIND_MATVIEW || relkind == RELKIND_PARTITIONED_TABLE) { - resultRelInfo->ri_RowIdAttNo = - ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid"); - if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) - elog(ERROR, "could not find junk ctid column"); + if (resultRelInfo->ri_RowRefType == ROW_REF_TID) + { + resultRelInfo->ri_RowIdAttNo = + ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid"); + if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) + elog(ERROR, "could not find junk ctid column"); + } + else + { + resultRelInfo->ri_RowIdAttNo = + ExecFindJunkAttributeInTlist(subplan->targetlist, "rowid"); + if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) + elog(ERROR, "could not find junk rowid column"); + } } else if (relkind == RELKIND_FOREIGN_TABLE) { @@ -4560,6 +4397,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) estate->es_auxmodifytables = lcons(mtstate, estate->es_auxmodifytables); + + return mtstate; } diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index 3a3f1644265..cf98fe3c2b8 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -378,7 +378,7 @@ TidNext(TidScanState *node) if (node->tss_isCurrentOf) table_tuple_get_latest_tid(scan, &tid); - if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot)) + if (table_tuple_fetch_row_version(heapRelation, PointerGetDatum(&tid), snapshot, slot)) return slot; /* Bad TID or failed snapshot qual; try next */ diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c index 5d76f56e4e8..07df92d813c 100644 --- a/src/backend/nodes/read.c +++ b/src/backend/nodes/read.c @@ -205,6 +205,17 @@ pg_strtok(int *length) return ret_str; } +bool +pg_str_hasfield(void) +{ + const char *local_str = pg_strtok_ptr; + + while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t') + local_str++; + + return (*local_str == ':'); +} + /* * debackslash - * create a palloc'd string holding the given token. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index d7801a13091..ffcbdc7599d 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2296,6 +2296,7 @@ preprocess_rowmarks(PlannerInfo *root) RowMarkClause *rc = lfirst_node(RowMarkClause, l); RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable); PlanRowMark *newrc; + RowRefType refType; /* * Currently, it is syntactically impossible to have FOR UPDATE et al @@ -2318,8 +2319,8 @@ preprocess_rowmarks(PlannerInfo *root) newrc = makeNode(PlanRowMark); newrc->rti = newrc->prti = rc->rti; newrc->rowmarkId = ++(root->glob->lastRowMarkId); - newrc->markType = select_rowmark_type(rte, rc->strength); - newrc->allMarkTypes = (1 << newrc->markType); + newrc->markType = select_rowmark_type(rte, rc->strength, &refType); + newrc->allRefTypes = (1 << refType); newrc->strength = rc->strength; newrc->waitPolicy = rc->waitPolicy; newrc->isParent = false; @@ -2335,6 +2336,7 @@ preprocess_rowmarks(PlannerInfo *root) { RangeTblEntry *rte = lfirst_node(RangeTblEntry, l); PlanRowMark *newrc; + RowRefType refType = ROW_REF_TID; i++; if (!bms_is_member(i, rels)) @@ -2343,8 +2345,8 @@ preprocess_rowmarks(PlannerInfo *root) newrc = makeNode(PlanRowMark); newrc->rti = newrc->prti = i; newrc->rowmarkId = ++(root->glob->lastRowMarkId); - newrc->markType = select_rowmark_type(rte, LCS_NONE); - newrc->allMarkTypes = (1 << newrc->markType); + newrc->markType = select_rowmark_type(rte, LCS_NONE, &refType); + newrc->allRefTypes = (1 << refType); newrc->strength = LCS_NONE; newrc->waitPolicy = LockWaitBlock; /* doesn't matter */ newrc->isParent = false; @@ -2359,11 +2361,13 @@ preprocess_rowmarks(PlannerInfo *root) * Select RowMarkType to use for a given table */ RowMarkType -select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength) +select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength, + RowRefType *refType) { if (rte->rtekind != RTE_RELATION) { /* If it's not a table at all, use ROW_MARK_COPY */ + *refType = ROW_REF_COPY; return ROW_MARK_COPY; } else if (rte->relkind == RELKIND_FOREIGN_TABLE) @@ -2374,10 +2378,12 @@ select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength) if (fdwroutine->GetForeignRowMarkType != NULL) return fdwroutine->GetForeignRowMarkType(rte, strength); /* Otherwise, use ROW_MARK_COPY by default */ + *refType = ROW_REF_COPY; return ROW_MARK_COPY; } else { + *refType = rte->reftype; /* Regular table, apply the appropriate lock type */ switch (strength) { diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c index dfd71dbc763..c1edf20a3bf 100644 --- a/src/backend/optimizer/prep/preptlist.c +++ b/src/backend/optimizer/prep/preptlist.c @@ -213,7 +213,7 @@ preprocess_targetlist(PlannerInfo *root) if (rc->rti != rc->prti) continue; - if (rc->allMarkTypes & ~(1 << ROW_MARK_COPY)) + if (rc->allRefTypes & (1 << ROW_REF_TID)) { /* Need to fetch TID */ var = makeVar(rc->rti, @@ -229,7 +229,23 @@ preprocess_targetlist(PlannerInfo *root) true); tlist = lappend(tlist, tle); } - if (rc->allMarkTypes & (1 << ROW_MARK_COPY)) + if (rc->allRefTypes & (1 << ROW_REF_ROWID)) + { + /* Need to fetch TID */ + var = makeVar(rc->rti, + RowIdAttributeNumber, + BYTEAOID, + -1, + InvalidOid, + 0); + snprintf(resname, sizeof(resname), "rowid%u", rc->rowmarkId); + tle = makeTargetEntry((Expr *) var, + list_length(tlist) + 1, + pstrdup(resname), + true); + tlist = lappend(tlist, tle); + } + if (rc->allRefTypes & (1 << ROW_REF_COPY)) { /* Need the whole row as a junk var */ var = makeWholeRowVar(rt_fetch(rc->rti, range_table), diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index f456b3b0a44..43af763f1fe 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -896,17 +896,35 @@ add_row_identity_columns(PlannerInfo *root, Index rtindex, relkind == RELKIND_MATVIEW || relkind == RELKIND_PARTITIONED_TABLE) { + RowRefType refType = ROW_REF_TID; + + refType = table_get_row_ref_type(target_relation); + /* * Emit CTID so that executor can find the row to merge, update or * delete. */ - var = makeVar(rtindex, - SelfItemPointerAttributeNumber, - TIDOID, - -1, - InvalidOid, - 0); - add_row_identity_var(root, var, rtindex, "ctid"); + if (refType == ROW_REF_TID) + { + var = makeVar(rtindex, + SelfItemPointerAttributeNumber, + TIDOID, + -1, + InvalidOid, + 0); + add_row_identity_var(root, var, rtindex, "ctid"); + } + else + { + Assert(refType == ROW_REF_ROWID); + var = makeVar(rtindex, + RowIdAttributeNumber, + BYTEAOID, + -1, + InvalidOid, + 0); + add_row_identity_var(root, var, rtindex, "rowid"); + } } else if (relkind == RELKIND_FOREIGN_TABLE) { diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index 5e0bb6a9ac3..9b4c52952ed 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -16,6 +16,7 @@ #include "access/sysattr.h" #include "access/table.h" +#include "access/tableam.h" #include "catalog/partition.h" #include "catalog/pg_inherits.h" #include "catalog/pg_type.h" @@ -91,7 +92,7 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel, LOCKMODE lockmode; PlanRowMark *oldrc; bool old_isParent = false; - int old_allMarkTypes = 0; + int old_allRefTypes = 0; Assert(rte->inh); /* else caller error */ @@ -131,8 +132,8 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel, { old_isParent = oldrc->isParent; oldrc->isParent = true; - /* Save initial value of allMarkTypes before children add to it */ - old_allMarkTypes = oldrc->allMarkTypes; + /* Save initial value of allRefTypes before children add to it */ + old_allRefTypes = oldrc->allRefTypes; } /* Scan the inheritance set and expand it */ @@ -239,15 +240,15 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel, */ if (oldrc) { - int new_allMarkTypes = oldrc->allMarkTypes; + int new_allRefTypes = oldrc->allRefTypes; Var *var; TargetEntry *tle; char resname[32]; List *newvars = NIL; /* Add TID junk Var if needed, unless we had it already */ - if (new_allMarkTypes & ~(1 << ROW_MARK_COPY) && - !(old_allMarkTypes & ~(1 << ROW_MARK_COPY))) + if (new_allRefTypes & (1 << ROW_REF_TID) && + !(old_allRefTypes & (1 << ROW_REF_TID))) { /* Need to fetch TID */ var = makeVar(oldrc->rti, @@ -266,8 +267,8 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel, } /* Add whole-row junk Var if needed, unless we had it already */ - if ((new_allMarkTypes & (1 << ROW_MARK_COPY)) && - !(old_allMarkTypes & (1 << ROW_MARK_COPY))) + if ((new_allRefTypes & (1 << ROW_REF_COPY)) && + !(old_allRefTypes & (1 << ROW_REF_COPY))) { var = makeWholeRowVar(planner_rt_fetch(oldrc->rti, root), oldrc->rti, @@ -282,6 +283,24 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel, newvars = lappend(newvars, var); } + if ((new_allRefTypes & (1 << ROW_REF_ROWID)) && + !(old_allRefTypes & (1 << ROW_REF_ROWID))) + { + var = makeVar(oldrc->rti, + RowIdAttributeNumber, + BYTEAOID, + -1, + InvalidOid, + 0); + snprintf(resname, sizeof(resname), "rowid%u", oldrc->rowmarkId); + tle = makeTargetEntry((Expr *) var, + list_length(root->processed_tlist) + 1, + pstrdup(resname), + true); + root->processed_tlist = lappend(root->processed_tlist, tle); + newvars = lappend(newvars, var); + } + /* Add tableoid junk Var, unless we had it already */ if (!old_isParent) { @@ -449,7 +468,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, * where the hierarchy is flattened during RTE expansion.) * * PlanRowMarks still carry the top-parent's RTI, and the top-parent's - * allMarkTypes field still accumulates values from all descendents. + * allRefTypes field still accumulates values from all descendents. * * "parentrte" and "parentRTindex" are immediate parent's RTE and * RTI. "top_parentrc" is top parent's PlanRowMark. @@ -493,6 +512,7 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, Assert(parentrte->rtekind == RTE_RELATION); /* else this is dubious */ childrte->relid = childOID; childrte->relkind = childrel->rd_rel->relkind; + childrte->reftype = table_get_row_ref_type(childrel); /* A partitioned child will need to be expanded further. */ if (childrte->relkind == RELKIND_PARTITIONED_TABLE) { @@ -582,14 +602,16 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, if (top_parentrc) { PlanRowMark *childrc = makeNode(PlanRowMark); + RowRefType refType; childrc->rti = childRTindex; childrc->prti = top_parentrc->rti; childrc->rowmarkId = top_parentrc->rowmarkId; /* Reselect rowmark type, because relkind might not match parent */ childrc->markType = select_rowmark_type(childrte, - top_parentrc->strength); - childrc->allMarkTypes = (1 << childrc->markType); + top_parentrc->strength, + &refType); + childrc->allRefTypes = (1 << refType); childrc->strength = top_parentrc->strength; childrc->waitPolicy = top_parentrc->waitPolicy; @@ -600,8 +622,8 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, */ childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE); - /* Include child's rowmark type in top parent's allMarkTypes */ - top_parentrc->allMarkTypes |= childrc->allMarkTypes; + /* Include child's rowmark type in top parent's allRefTypes */ + top_parentrc->allRefTypes |= childrc->allRefTypes; root->rowMarks = lappend(root->rowMarks, childrc); } diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 58bc222a8b9..23ef258340a 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -20,6 +20,7 @@ #include "access/relation.h" #include "access/sysattr.h" #include "access/table.h" +#include "access/tableam.h" #include "catalog/heap.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" @@ -1502,6 +1503,7 @@ addRangeTableEntry(ParseState *pstate, rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; rte->rellockmode = lockmode; + rte->reftype = table_get_row_ref_type(rel); /* * Build the list of effective column names using user-supplied aliases @@ -1587,6 +1589,7 @@ addRangeTableEntryForRelation(ParseState *pstate, rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; rte->rellockmode = lockmode; + rte->reftype = table_get_row_ref_type(rel); /* * Build the list of effective column names using user-supplied aliases @@ -1656,6 +1659,7 @@ addRangeTableEntryForSubquery(ParseState *pstate, rte->rtekind = RTE_SUBQUERY; rte->subquery = subquery; rte->alias = alias; + rte->reftype = ROW_REF_COPY; eref = alias ? copyObject(alias) : makeAlias("unnamed_subquery", NIL); numaliases = list_length(eref->colnames); @@ -1764,6 +1768,7 @@ addRangeTableEntryForFunction(ParseState *pstate, rte->functions = NIL; /* we'll fill this list below */ rte->funcordinality = rangefunc->ordinality; rte->alias = alias; + rte->reftype = ROW_REF_COPY; /* * Choose the RTE alias name. We default to using the first function's @@ -2083,6 +2088,7 @@ addRangeTableEntryForTableFunc(ParseState *pstate, rte->coltypmods = tf->coltypmods; rte->colcollations = tf->colcollations; rte->alias = alias; + rte->reftype = ROW_REF_COPY; eref = alias ? copyObject(alias) : makeAlias(refname, NIL); numaliases = list_length(eref->colnames); @@ -2159,6 +2165,7 @@ addRangeTableEntryForValues(ParseState *pstate, rte->coltypmods = coltypmods; rte->colcollations = colcollations; rte->alias = alias; + rte->reftype = ROW_REF_COPY; eref = alias ? copyObject(alias) : makeAlias(refname, NIL); @@ -2256,6 +2263,7 @@ addRangeTableEntryForJoin(ParseState *pstate, rte->joinrightcols = rightcols; rte->join_using_alias = join_using_alias; rte->alias = alias; + rte->reftype = ROW_REF_COPY; eref = alias ? copyObject(alias) : makeAlias("unnamed_join", NIL); numaliases = list_length(eref->colnames); @@ -2337,6 +2345,7 @@ addRangeTableEntryForCTE(ParseState *pstate, rte->rtekind = RTE_CTE; rte->ctename = cte->ctename; rte->ctelevelsup = levelsup; + rte->reftype = ROW_REF_COPY; /* Self-reference if and only if CTE's parse analysis isn't completed */ rte->self_reference = !IsA(cte->ctequery, Query); @@ -2499,6 +2508,7 @@ addRangeTableEntryForENR(ParseState *pstate, * if they access transition tables linked to a table that is altered. */ rte->relid = enrmd->reliddesc; + rte->reftype = ROW_REF_COPY; /* * Build the list of effective column names using user-supplied aliases @@ -3268,6 +3278,9 @@ get_rte_attribute_name(RangeTblEntry *rte, AttrNumber attnum) attnum > 0 && attnum <= list_length(rte->alias->colnames)) return strVal(list_nth(rte->alias->colnames, attnum - 1)); + if (attnum == RowIdAttributeNumber) + return "rowid"; + /* * If the RTE is a relation, go to the system catalogs not the * eref->colnames list. This is a little slower but it will give the diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 246ac2bec88..406eb7c58b8 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -2863,7 +2863,9 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc) ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW || ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE); - relopts = extractRelOptions(tup, pg_class_desc, NULL); + relopts = extractRelOptions(tup, pg_class_desc, + GetTableAmRoutineByAmOid(((Form_pg_class) GETSTRUCT(tup))->relam), + NULL); if (relopts == NULL) return NULL; diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 57f075de394..c844bc6e957 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -23,6 +23,7 @@ #include "access/relation.h" #include "access/sysattr.h" #include "access/table.h" +#include "access/tableam.h" #include "catalog/dependency.h" #include "catalog/pg_type.h" #include "commands/trigger.h" diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 6945d99b3d5..9abe334b563 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -247,6 +247,7 @@ RI_FKey_check(TriggerData *trigdata) TupleTableSlot *newslot; RI_QueryKey qkey; SPIPlanPtr qplan; + Relation rel = trigdata->tg_relation; riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger, trigdata->tg_relation, false); @@ -264,7 +265,7 @@ RI_FKey_check(TriggerData *trigdata) * and lock on the buffer to call HeapTupleSatisfiesVisibility. Caller * should be holding pin, but not lock. */ - if (!table_tuple_satisfies_snapshot(trigdata->tg_relation, newslot, SnapshotSelf)) + if (!table_tuple_satisfies_snapshot(rel, newslot, SnapshotSelf)) return PointerGetDatum(NULL); /* @@ -1263,9 +1264,6 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel, { const RI_ConstraintInfo *riinfo; int ri_nullcheck; - Datum xminDatum; - TransactionId xmin; - bool isnull; /* * AfterTriggerSaveEvent() handles things such that this function is never @@ -1333,10 +1331,7 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel, * this if we knew the INSERT trigger already fired, but there is no easy * way to know that.) */ - xminDatum = slot_getsysattr(oldslot, MinTransactionIdAttributeNumber, &isnull); - Assert(!isnull); - xmin = DatumGetTransactionId(xminDatum); - if (TransactionIdIsCurrentTransactionId(xmin)) + if (table_tuple_is_current(fk_rel, oldslot)) return true; /* If all old and new key values are equal, no check is needed */ diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 34c3cfa8e8c..89d9fd37dc6 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -34,6 +34,7 @@ #include "access/multixact.h" #include "access/nbtree.h" #include "access/parallel.h" +#include "access/relation.h" #include "access/reloptions.h" #include "access/sysattr.h" #include "access/table.h" @@ -317,6 +318,7 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid, StrategyNumber numSupport); static void RelationCacheInitFileRemoveInDir(const char *tblspcpath); static void unlink_initfile(const char *initfilename, int elevel); +static void release_rd_amcache(Relation rel); /* @@ -461,8 +463,9 @@ AllocateRelationDesc(Form_pg_class relp) static void RelationParseRelOptions(Relation relation, HeapTuple tuple) { - bytea *options; - amoptions_function amoptsfn; + bytea *options; + amoptions_function amoptsfn; + const TableAmRoutine *tableam = NULL; relation->rd_options = NULL; @@ -474,9 +477,10 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple) { case RELKIND_RELATION: case RELKIND_TOASTVALUE: - case RELKIND_VIEW: case RELKIND_MATVIEW: + case RELKIND_VIEW: case RELKIND_PARTITIONED_TABLE: + tableam = relation->rd_tableam; amoptsfn = NULL; break; case RELKIND_INDEX: @@ -488,11 +492,12 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple) } /* - * Fetch reloptions from tuple; have to use a hardwired descriptor because - * we might not have any other for pg_class yet (consider executing this - * code for pg_class itself) - */ - options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn); + * Fetch reloptions from tuple; have to use a hardwired descriptor because + * we might not have any other for pg_class yet (consider executing this + * code for pg_class itself) + */ + options = extractRelOptions(tuple, GetPgClassDescriptor(), + tableam, amoptsfn); /* * Copy parsed data into CacheMemoryContext. To guard against the @@ -2230,9 +2235,7 @@ RelationReloadIndexInfo(Relation relation) RelationCloseSmgr(relation); /* Must free any AM cached data upon relcache flush */ - if (relation->rd_amcache) - pfree(relation->rd_amcache); - relation->rd_amcache = NULL; + release_rd_amcache(relation); /* * If it's a shared index, we might be called before backend startup has @@ -2452,8 +2455,7 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) pfree(relation->rd_options); if (relation->rd_indextuple) pfree(relation->rd_indextuple); - if (relation->rd_amcache) - pfree(relation->rd_amcache); + release_rd_amcache(relation); if (relation->rd_fdwroutine) pfree(relation->rd_fdwroutine); if (relation->rd_indexcxt) @@ -2515,9 +2517,7 @@ RelationClearRelation(Relation relation, bool rebuild) RelationCloseSmgr(relation); /* Free AM cached data, if any */ - if (relation->rd_amcache) - pfree(relation->rd_amcache); - relation->rd_amcache = NULL; + release_rd_amcache(relation); /* * Treat nailed-in system relations separately, they always need to be @@ -6823,3 +6823,9 @@ unlink_initfile(const char *initfilename, int elevel) initfilename))); } } + +static void +release_rd_amcache(Relation rel) +{ + table_free_rd_amcache(rel); +} diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c index f60633df241..120db339150 100644 --- a/src/backend/utils/sort/tuplestore.c +++ b/src/backend/utils/sort/tuplestore.c @@ -1100,6 +1100,36 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward, } } +/* + * Same as tuplestore_gettupleslot(), but foces tuple storage to slot. Thus, + * it can work with slot types different than minimal tuple. + */ +bool +tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward, + bool copy, TupleTableSlot *slot) +{ + MinimalTuple tuple; + bool should_free; + + tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free); + + if (tuple) + { + if (copy && !should_free) + { + tuple = heap_copy_minimal_tuple(tuple); + should_free = true; + } + ExecForceStoreMinimalTuple(tuple, slot, should_free); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + /* * tuplestore_advance - exported function to adjust position without fetching * diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h index 1d5bfa62ffc..4812bc4481d 100644 --- a/src/include/access/reloptions.h +++ b/src/include/access/reloptions.h @@ -21,6 +21,7 @@ #include "access/amapi.h" #include "access/htup.h" +#include "access/tableam.h" #include "access/tupdesc.h" #include "nodes/pg_list.h" #include "storage/lock.h" @@ -224,6 +225,7 @@ extern Datum transformRelOptions(Datum oldOptions, List *defList, bool acceptOidsOff, bool isReset); extern List *untransformRelOptions(Datum options); extern bytea *extractRelOptions(HeapTuple tuple, TupleDesc tupdesc, + const TableAmRoutine *tableam, amoptions_function amoptions); extern void *build_reloptions(Datum reloptions, bool validate, relopt_kind kind, diff --git a/src/include/access/sysattr.h b/src/include/access/sysattr.h index 8f08682750b..d717a7cafec 100644 --- a/src/include/access/sysattr.h +++ b/src/include/access/sysattr.h @@ -24,6 +24,7 @@ #define MaxTransactionIdAttributeNumber (-4) #define MaxCommandIdAttributeNumber (-5) #define TableOidAttributeNumber (-6) -#define FirstLowInvalidHeapAttributeNumber (-7) +#define RowIdAttributeNumber (-7) +#define FirstLowInvalidHeapAttributeNumber (-8) #endif /* SYSATTR_H */ diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index a4f3fca3dc3..7356d5de507 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -17,10 +17,14 @@ #ifndef TABLEAM_H #define TABLEAM_H +#include "access/amapi.h" #include "access/relscan.h" #include "access/sdir.h" #include "access/xact.h" #include "executor/tuptable.h" +#include "nodes/execnodes.h" +#include "storage/bufmgr.h" +#include "utils/guc.h" #include "utils/rel.h" #include "utils/snapshot.h" @@ -39,6 +43,16 @@ struct TBMIterateResult; struct VacuumParams; struct ValidateIndexState; +typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, + double *totaldeadrows); + +/* in commands/analyze.c */ +extern int acquire_sample_rows(Relation onerel, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows); + /* * Bitmask values for the flags argument to the scan_begin callback. */ @@ -306,6 +320,9 @@ typedef struct TableAmRoutine */ const TupleTableSlotOps *(*slot_callbacks) (Relation rel); + RowRefType (*get_row_ref_type) (Relation rel); + + void (*free_rd_amcache) (Relation rel); /* ------------------------------------------------------------------------ * Table scan callbacks. @@ -475,7 +492,7 @@ typedef struct TableAmRoutine * test, returns true, false otherwise. */ bool (*tuple_fetch_row_version) (Relation rel, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot); @@ -511,23 +528,19 @@ typedef struct TableAmRoutine */ /* see table_tuple_insert() for reference about parameters */ - void (*tuple_insert) (Relation rel, TupleTableSlot *slot, + TupleTableSlot *(*tuple_insert) (Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate); - /* see table_tuple_insert_speculative() for reference about parameters */ - void (*tuple_insert_speculative) (Relation rel, - TupleTableSlot *slot, - CommandId cid, - int options, - struct BulkInsertStateData *bistate, - uint32 specToken); - - /* see table_tuple_complete_speculative() for reference about parameters */ - void (*tuple_complete_speculative) (Relation rel, - TupleTableSlot *slot, - uint32 specToken, - bool succeeded); + TupleTableSlot *(*tuple_insert_with_arbiter) (ResultRelInfo *resultRelInfo, + TupleTableSlot *slot, + CommandId cid, int options, + struct BulkInsertStateData *bistate, + List *arbiterIndexes, + EState *estate, + LockTupleMode lockmode, + TupleTableSlot *lockedSlot, + TupleTableSlot *tempSlot); /* see table_multi_insert() for reference about parameters */ void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots, @@ -535,7 +548,7 @@ typedef struct TableAmRoutine /* see table_tuple_delete() for reference about parameters */ TM_Result (*tuple_delete) (Relation rel, - ItemPointer tid, + Datum tupleid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, @@ -546,7 +559,7 @@ typedef struct TableAmRoutine /* see table_tuple_update() for reference about parameters */ TM_Result (*tuple_update) (Relation rel, - ItemPointer otid, + Datum tupleid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, @@ -559,7 +572,7 @@ typedef struct TableAmRoutine /* see table_tuple_lock() for reference about parameters */ TM_Result (*tuple_lock) (Relation rel, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, @@ -879,6 +892,14 @@ typedef struct TableAmRoutine struct SampleScanState *scanstate, TupleTableSlot *slot); + /* Check if tuple in the slot belongs to the current transaction */ + bool (*tuple_is_current) (Relation rel, TupleTableSlot *slot); + + void (*analyze_table) (Relation relation, + AcquireSampleRowsFunc *func, + BlockNumber *totalpages); + + bytea *(*reloptions) (char relkind, Datum reloptions, bool validate); } TableAmRoutine; @@ -1294,7 +1315,7 @@ extern bool table_index_fetch_tuple_check(Relation rel, */ static inline bool table_tuple_fetch_row_version(Relation rel, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot) { @@ -1306,7 +1327,7 @@ table_tuple_fetch_row_version(Relation rel, if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan)) elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding"); - return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot); + return rel->rd_tableam->tuple_fetch_row_version(rel, tupleid, snapshot, slot); } /* @@ -1406,45 +1427,32 @@ table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) * insertion. But note that any toasting of fields within the slot is NOT * reflected in the slots contents. */ -static inline void +static inline TupleTableSlot * table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid, int options, struct BulkInsertStateData *bistate) { - rel->rd_tableam->tuple_insert(rel, slot, cid, options, - bistate); + return rel->rd_tableam->tuple_insert(rel, slot, cid, options, bistate); } -/* - * Perform a "speculative insertion". These can be backed out afterwards - * without aborting the whole transaction. Other sessions can wait for the - * speculative insertion to be confirmed, turning it into a regular tuple, or - * aborted, as if it never existed. Speculatively inserted tuples behave as - * "value locks" of short duration, used to implement INSERT .. ON CONFLICT. - * - * A transaction having performed a speculative insertion has to either abort, - * or finish the speculative insertion with - * table_tuple_complete_speculative(succeeded = ...). - */ -static inline void -table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot, - CommandId cid, int options, - struct BulkInsertStateData *bistate, - uint32 specToken) -{ - rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options, - bistate, specToken); -} - -/* - * Complete "speculative insertion" started in the same transaction. If - * succeeded is true, the tuple is fully inserted, if false, it's removed. - */ -static inline void -table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot, - uint32 specToken, bool succeeded) +static inline TupleTableSlot * +table_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo, + TupleTableSlot *slot, + CommandId cid, int options, + struct BulkInsertStateData *bistate, + List *arbiterIndexes, + EState *estate, + LockTupleMode lockmode, + TupleTableSlot *lockedSlot, + TupleTableSlot *tempSlot) { - rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken, - succeeded); + Relation rel = resultRelInfo->ri_RelationDesc; + + return rel->rd_tableam->tuple_insert_with_arbiter(resultRelInfo, + slot, cid, options, + bistate, arbiterIndexes, + estate, + lockmode, lockedSlot, + tempSlot); } /* @@ -1506,12 +1514,12 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, * TM_FailureData for additional info. */ static inline TM_Result -table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, +table_tuple_delete(Relation rel, Datum tupleid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, bool changingPart, TupleTableSlot *oldSlot) { - return rel->rd_tableam->tuple_delete(rel, tid, cid, + return rel->rd_tableam->tuple_delete(rel, tupleid, cid, snapshot, crosscheck, options, tmfd, changingPart, oldSlot); @@ -1562,13 +1570,13 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, * for additional info. */ static inline TM_Result -table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, +table_tuple_update(Relation rel, Datum tupleid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot) { - return rel->rd_tableam->tuple_update(rel, otid, slot, + return rel->rd_tableam->tuple_update(rel, tupleid, slot, cid, snapshot, crosscheck, options, tmfd, lockmode, update_indexes, @@ -1611,12 +1619,12 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, * additional info. */ static inline TM_Result -table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, +table_tuple_lock(Relation rel, Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd) { - return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot, + return rel->rd_tableam->tuple_lock(rel, tupleid, snapshot, slot, cid, mode, wait_policy, flags, tmfd); } @@ -2082,6 +2090,11 @@ table_scan_sample_next_tuple(TableScanDesc scan, slot); } +static inline bool +table_tuple_is_current(Relation rel, TupleTableSlot *slot) +{ + return rel->rd_tableam->tuple_is_current(rel, slot); +} /* ---------------------------------------------------------------------------- * Functions to make modifications a bit simpler. @@ -2136,6 +2149,60 @@ extern void table_block_relation_estimate_size(Relation rel, */ extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler); +extern const TableAmRoutine *GetTableAmRoutineByAmOid(Oid amoid); extern const TableAmRoutine *GetHeapamTableAmRoutine(void); +static inline RowRefType +table_get_row_ref_type(Relation rel) +{ + if (rel->rd_tableam) + return rel->rd_tableam->get_row_ref_type(rel); + else + return ROW_REF_TID; +} + +static inline void +table_free_rd_amcache(Relation rel) +{ + if (rel->rd_tableam) + { + rel->rd_tableam->free_rd_amcache(rel); + } + else + { + if (rel->rd_amcache) + pfree(rel->rd_amcache); + rel->rd_amcache = NULL; + } +} + +static inline void +table_analyze(Relation relation, AcquireSampleRowsFunc *func, + BlockNumber *totalpages) +{ + if (relation->rd_tableam->analyze_table) + { + relation->rd_tableam->analyze_table(relation, func, totalpages); + } + else + { + *func = acquire_sample_rows; + *totalpages = RelationGetNumberOfBlocks(relation); + } +} + +static inline bytea * +table_reloptions(Relation rel, char relkind, + Datum reloptions, bool validate) +{ + return rel->rd_tableam->reloptions(relkind, reloptions, validate); +} + +static inline bytea * +tableam_reloptions(const TableAmRoutine *tableam, char relkind, + Datum reloptions, bool validate) +{ + return tableam->reloptions(relkind, reloptions, validate); +} + #endif /* TABLEAM_H */ diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index 4903b4b7bc2..15e1fbe7700 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -209,7 +209,7 @@ extern void ExecASDeleteTriggers(EState *estate, extern bool ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot **epqslot, TM_Result *tmresult, @@ -231,7 +231,7 @@ extern void ExecASUpdateTriggers(EState *estate, extern bool ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot *newslot, TM_Result *tmresult, diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 39fbd5f10a5..3a8ee4fbf05 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -376,6 +376,9 @@ extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc); extern void analyze_rel(Oid relid, RangeVar *relation, VacuumParams *params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy); +extern int acquire_sample_rows(Relation onerel, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows); extern bool std_typanalyze(VacAttrStats *stats); /* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */ diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index 996c62e3055..50a2494c019 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -13,6 +13,7 @@ #define FDWAPI_H #include "access/parallel.h" +#include "access/tableam.h" #include "nodes/execnodes.h" #include "nodes/pathnodes.h" @@ -148,11 +149,6 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate, typedef void (*ExplainDirectModify_function) (ForeignScanState *node, struct ExplainState *es); -typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, - double *totaldeadrows); - typedef bool (*AnalyzeForeignTable_function) (Relation relation, AcquireSampleRowsFunc *func, BlockNumber *totalpages); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 74718fa256d..310115af0f1 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -450,6 +450,8 @@ typedef struct ResultRelInfo /* relation descriptor for result relation */ Relation ri_RelationDesc; + RowRefType ri_RowRefType; + /* # of indices existing on result relation */ int ri_NumIndices; @@ -746,6 +748,7 @@ typedef struct ExecRowMark Index prti; /* parent range table index, if child */ Index rowmarkId; /* unique identifier for resjunk columns */ RowMarkType markType; /* see enum in nodes/plannodes.h */ + RowRefType refType; LockClauseStrength strength; /* LockingClause's strength, or LCS_NONE */ LockWaitPolicy waitPolicy; /* NOWAIT and SKIP LOCKED */ bool ermActive; /* is this mark relevant for current tuple? */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 9dca3b65287..99173f541d5 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1073,6 +1073,7 @@ typedef struct RangeTblEntry int rellockmode; /* lock level that query requires on the rel */ struct TableSampleClause *tablesample; /* sampling info, or NULL */ Index perminfoindex; + RowRefType reftype; /* * Fields valid for a subquery RTE (else NULL): diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index d64fe6a328b..77130245e8f 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1352,7 +1352,7 @@ typedef enum RowMarkType * child relations will also have entries with isParent = true. The child * entries have rti == child rel's RT index and prti == top parent's RT index, * and can therefore be recognized as children by the fact that prti != rti. - * The parent's allMarkTypes field gets the OR of (1< Date: Mon, 13 Dec 2021 00:19:41 +0300 Subject: [PATCH 008/102] Hook for custom error cleanup --- src/backend/access/transam/xact.c | 2 ++ src/backend/postmaster/autovacuum.c | 1 + src/backend/postmaster/auxprocess.c | 1 + src/backend/postmaster/bgwriter.c | 1 + src/backend/postmaster/checkpointer.c | 2 ++ src/backend/postmaster/walwriter.c | 1 + src/backend/replication/walsender.c | 1 + src/backend/storage/lmgr/proc.c | 2 ++ src/backend/utils/error/elog.c | 10 +++++++++- src/include/utils/elog.h | 6 ++++++ 10 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 4a2ea4adbaf..c0a6dd44e3e 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -2714,6 +2714,7 @@ AbortTransaction(void) * while cleaning up! */ LWLockReleaseAll(); + CustomErrorCleanup(); /* Clear wait information and command progress indicator */ pgstat_report_wait_end(); @@ -5076,6 +5077,7 @@ AbortSubTransaction(void) * Buffer locks, for example? I don't think so but I'm not sure. */ LWLockReleaseAll(); + CustomErrorCleanup(); pgstat_report_wait_end(); pgstat_progress_end_command(); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 406eb7c58b8..5d59b78e282 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -538,6 +538,7 @@ AutoVacLauncherMain(int argc, char *argv[]) * transaction. */ LWLockReleaseAll(); + CustomErrorCleanup(); pgstat_report_wait_end(); UnlockBuffers(); /* this is probably dead code, but let's be safe: */ diff --git a/src/backend/postmaster/auxprocess.c b/src/backend/postmaster/auxprocess.c index cae6feb3562..bc4c3d11359 100644 --- a/src/backend/postmaster/auxprocess.c +++ b/src/backend/postmaster/auxprocess.c @@ -178,6 +178,7 @@ static void ShutdownAuxiliaryProcess(int code, Datum arg) { LWLockReleaseAll(); + CustomErrorCleanup(); ConditionVariableCancelSleep(); pgstat_report_wait_end(); } diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index f2e4f23d9fc..7963fcd2a38 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -166,6 +166,7 @@ BackgroundWriterMain(void) * about in bgwriter, but we do have LWLocks, buffers, and temp files. */ LWLockReleaseAll(); + CustomErrorCleanup(); ConditionVariableCancelSleep(); UnlockBuffers(); ReleaseAuxProcessResources(false); diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 147abe9259f..985b9b73545 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -210,6 +210,7 @@ CheckpointerMain(void) */ pqsignal(SIGCHLD, SIG_DFL); + /* * Initialize so that first time-driven event happens at the correct time. */ @@ -272,6 +273,7 @@ CheckpointerMain(void) * files. */ LWLockReleaseAll(); + CustomErrorCleanup(); ConditionVariableCancelSleep(); pgstat_report_wait_end(); UnlockBuffers(); diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index 266fbc23399..4e8a9573006 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -161,6 +161,7 @@ WalWriterMain(void) * about in walwriter, but we do have LWLocks, and perhaps buffers? */ LWLockReleaseAll(); + CustomErrorCleanup(); ConditionVariableCancelSleep(); pgstat_report_wait_end(); UnlockBuffers(); diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index e79f77c8d9f..531f9976fec 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -332,6 +332,7 @@ void WalSndErrorCleanup(void) { LWLockReleaseAll(); + CustomErrorCleanup(); ConditionVariableCancelSleep(); pgstat_report_wait_end(); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 2c990e50e74..1b38553d43e 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -829,6 +829,7 @@ ProcKill(int code, Datum arg) * facility by releasing our PGPROC ... */ LWLockReleaseAll(); + CustomErrorCleanup(); /* Cancel any pending condition variable sleep, too */ ConditionVariableCancelSleep(); @@ -940,6 +941,7 @@ AuxiliaryProcKill(int code, Datum arg) /* Release any LW locks I am holding (see notes above) */ LWLockReleaseAll(); + CustomErrorCleanup(); /* Cancel any pending condition variable sleep, too */ ConditionVariableCancelSleep(); diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 4f99f701ea5..6e1cc18210c 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -3776,7 +3776,6 @@ vwrite_stderr(const char *fmt, va_list ap) #endif } - /* * Write a message to STDERR using only async-signal-safe functions. This can * be used to safely emit a message from a signal handler. @@ -3829,3 +3828,12 @@ trace_recovery(int trace_level) return trace_level; } + +CustomErrorCleanupHookType CustomErrorCleanupHook = NULL; + +void +CustomErrorCleanup(void) +{ + if (CustomErrorCleanupHook) + CustomErrorCleanupHook(); +} diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h index b3e95044174..4f8fbfa4c25 100644 --- a/src/include/utils/elog.h +++ b/src/include/utils/elog.h @@ -544,4 +544,10 @@ extern void vwrite_stderr(const char *fmt, va_list ap) pg_attribute_printf(1, 0) */ extern void write_stderr_signal_safe(const char *fmt); +typedef void (*CustomErrorCleanupHookType) (void); + +extern CustomErrorCleanupHookType CustomErrorCleanupHook; + +extern void CustomErrorCleanup(void); + #endif /* ELOG_H */ From 3e7a3f606c5392b66b0bb6e9837a0d562fa69218 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 01:51:03 +0300 Subject: [PATCH 009/102] Snapshot extension and hooks Snapshot have two pairing heap nodes: for data and system undos. --- src/backend/access/transam/xact.c | 11 ++++++++ src/backend/access/transam/xlog.c | 3 ++ src/backend/storage/ipc/procarray.c | 8 ++++++ src/backend/utils/time/snapmgr.c | 44 +++++++++++++++++++++++++++++ src/include/access/transam.h | 10 ++++++- src/include/access/xlog.h | 1 + src/include/storage/proc.h | 1 + src/include/storage/procarray.h | 2 ++ src/include/utils/snapmgr.h | 11 +++++++- src/include/utils/snapshot.h | 13 +++++++++ 10 files changed, 102 insertions(+), 2 deletions(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index c0a6dd44e3e..8f3adde828d 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -209,6 +209,7 @@ typedef struct TransactionStateData int parallelModeLevel; /* Enter/ExitParallelMode counter */ bool chain; /* start a new block after this one */ bool topXidLogged; /* for a subxact: is top-level XID logged? */ + CommitSeqNo csn; struct TransactionStateData *parent; /* back link to parent */ } TransactionStateData; @@ -242,6 +243,7 @@ static TransactionStateData TopTransactionStateData = { .state = TRANS_DEFAULT, .blockState = TBLOCK_DEFAULT, .topXidLogged = false, + .csn = COMMITSEQNO_INPROGRESS }; /* @@ -2014,6 +2016,7 @@ StartTransaction(void) */ s->state = TRANS_START; s->fullTransactionId = InvalidFullTransactionId; /* until assigned */ + s->csn = COMMITSEQNO_INPROGRESS; /* Determine if statements are logged in this transaction */ xact_is_sampled = log_xact_sample_rate != 0 && @@ -2288,7 +2291,9 @@ CommitTransaction(void) * must be done _before_ releasing locks we hold and _after_ * RecordTransactionCommit. */ + MyProc->lastCommittedCSN = s->csn; ProcArrayEndTransaction(MyProc, latestXid); + s->csn = MyProc->lastCommittedCSN; /* * This is all post-commit cleanup. Note that if an error is raised here, @@ -6269,3 +6274,9 @@ xact_redo(XLogReaderState *record) else elog(PANIC, "xact_redo: unknown op code %u", info); } + +CommitSeqNo +GetCurrentCSN(void) +{ + return TopTransactionStateData.csn; +} diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 70cb88a7fee..f01caa44e9b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -139,6 +139,7 @@ int wal_retrieve_retry_interval = 5000; int max_slot_wal_keep_size_mb = -1; int wal_decode_buffer_size = 512 * 1024; bool track_wal_io_timing = false; +CommitSeqNo startupCommitSeqNo = COMMITSEQNO_FIRST_NORMAL + 1; #ifdef WAL_DEBUG bool XLOG_DEBUG = false; @@ -4712,6 +4713,7 @@ BootStrapXLOG(void) ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; + pg_atomic_write_u64(&ShmemVariableCache->nextCommitSeqNo, COMMITSEQNO_FIRST_NORMAL + 1); MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); AdvanceOldestClogXid(checkPoint.oldestXid); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); @@ -5178,6 +5180,7 @@ StartupXLOG(void) ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; + pg_atomic_write_u64(&ShmemVariableCache->nextCommitSeqNo, startupCommitSeqNo); MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); AdvanceOldestClogXid(checkPoint.oldestXid); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index c81c32d0270..553663b1dfb 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -309,6 +309,8 @@ static GlobalVisState GlobalVisTempRels; */ static TransactionId ComputeXidHorizonsResultLastXmin; +snapshot_hook_type snapshot_hook = NULL; + #ifdef XIDCACHE_DEBUG /* counters for XidCache measurement */ @@ -752,6 +754,7 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid) proc->delayChkptFlags = 0; proc->recoveryConflictPending = false; + proc->lastCommittedCSN = pg_atomic_fetch_add_u64(&ShmemVariableCache->nextCommitSeqNo, 1); /* must be cleared with xid/xmin: */ /* avoid unnecessarily dirtying shared cachelines */ @@ -2258,6 +2261,8 @@ GetSnapshotData(Snapshot snapshot) if (GetSnapshotDataReuse(snapshot)) { + if (snapshot_hook) + snapshot_hook(snapshot); LWLockRelease(ProcArrayLock); return snapshot; } @@ -2439,6 +2444,9 @@ GetSnapshotData(Snapshot snapshot) if (!TransactionIdIsValid(MyProc->xmin)) MyProc->xmin = TransactionXmin = xmin; + if (snapshot_hook) + snapshot_hook(snapshot); + LWLockRelease(ProcArrayLock); /* maintain state for GlobalVis* */ diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 681ca822558..080e7b15496 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -116,6 +116,10 @@ TransactionId RecentXmin = FirstNormalTransactionId; /* (table, ctid) => (cmin, cmax) mapping during timetravel */ static HTAB *tuplecid_data = NULL; +snapshot_hook_type snapshot_register_hook = NULL; +snapshot_hook_type snapshot_deregister_hook = NULL; +reset_xmin_hook_type reset_xmin_hook = NULL; + /* * Elements of the active snapshot stack. * @@ -192,6 +196,11 @@ typedef struct SerializedSnapshotData CommandId curcid; TimestampTz whenTaken; XLogRecPtr lsn; + CommitSeqNo snapshotcsn; + uint64 undoRegularLocation; + uint64 undoRegularXmin; + uint64 undoSystemLocation; + uint64 undoSystemXmin; } SerializedSnapshotData; Size @@ -298,6 +307,8 @@ GetTransactionSnapshot(void) /* Mark it as "registered" in FirstXactSnapshot */ FirstXactSnapshot->regd_count++; pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); + if (snapshot_register_hook) + snapshot_register_hook(FirstXactSnapshot); } else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); @@ -438,6 +449,8 @@ GetNonHistoricCatalogSnapshot(Oid relid) * CatalogSnapshot pointer is already valid. */ pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node); + if (snapshot_register_hook) + snapshot_register_hook(CatalogSnapshot); } return CatalogSnapshot; @@ -459,6 +472,8 @@ InvalidateCatalogSnapshot(void) if (CatalogSnapshot) { pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node); + if (snapshot_deregister_hook) + snapshot_deregister_hook(CatalogSnapshot); CatalogSnapshot = NULL; SnapshotResetXmin(); } @@ -593,6 +608,8 @@ SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid, /* Mark it as "registered" in FirstXactSnapshot */ FirstXactSnapshot->regd_count++; pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); + if (snapshot_register_hook) + snapshot_register_hook(FirstXactSnapshot); } FirstSnapshotSet = true; @@ -855,7 +872,11 @@ RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner) ResourceOwnerRememberSnapshot(owner, snap); if (snap->regd_count == 1) + { pairingheap_add(&RegisteredSnapshots, &snap->ph_node); + if (snapshot_register_hook) + snapshot_register_hook(snap); + } return snap; } @@ -893,7 +914,11 @@ UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner) snapshot->regd_count--; if (snapshot->regd_count == 0) + { pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node); + if (snapshot_deregister_hook) + snapshot_deregister_hook(snapshot); + } if (snapshot->regd_count == 0 && snapshot->active_count == 0) { @@ -945,6 +970,9 @@ SnapshotResetXmin(void) { Snapshot minSnapshot; + if (reset_xmin_hook) + reset_xmin_hook(); + if (ActiveSnapshot != NULL) return; @@ -1038,6 +1066,8 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin) Assert(FirstXactSnapshot->regd_count > 0); Assert(!pairingheap_is_empty(&RegisteredSnapshots)); pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); + if (snapshot_deregister_hook) + snapshot_deregister_hook(FirstXactSnapshot); } FirstXactSnapshot = NULL; @@ -1069,6 +1099,8 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin) pairingheap_remove(&RegisteredSnapshots, &esnap->snapshot->ph_node); + if (snapshot_deregister_hook) + snapshot_deregister_hook(esnap->snapshot); } exportedSnapshots = NIL; @@ -1196,6 +1228,8 @@ ExportSnapshot(Snapshot snapshot) snapshot->regd_count++; pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node); + if (snapshot_register_hook) + snapshot_register_hook(snapshot); /* * Fill buf with a text serialization of the snapshot, plus identification @@ -2160,6 +2194,11 @@ SerializeSnapshot(Snapshot snapshot, char *start_address) serialized_snapshot.curcid = snapshot->curcid; serialized_snapshot.whenTaken = snapshot->whenTaken; serialized_snapshot.lsn = snapshot->lsn; + serialized_snapshot.snapshotcsn = snapshot->snapshotcsn; + serialized_snapshot.undoRegularXmin = snapshot->undoRegularLocationPhNode.xmin; + serialized_snapshot.undoRegularLocation = snapshot->undoRegularLocationPhNode.undoLocation; + serialized_snapshot.undoSystemXmin = snapshot->undoSystemLocationPhNode.xmin; + serialized_snapshot.undoSystemLocation = snapshot->undoSystemLocationPhNode.undoLocation; /* * Ignore the SubXID array if it has overflowed, unless the snapshot was @@ -2235,6 +2274,11 @@ RestoreSnapshot(char *start_address) snapshot->whenTaken = serialized_snapshot.whenTaken; snapshot->lsn = serialized_snapshot.lsn; snapshot->snapXactCompletionCount = 0; + snapshot->snapshotcsn = serialized_snapshot.snapshotcsn; + snapshot->undoRegularLocationPhNode.xmin = serialized_snapshot.undoRegularXmin; + snapshot->undoRegularLocationPhNode.undoLocation = serialized_snapshot.undoRegularLocation; + snapshot->undoSystemLocationPhNode.xmin = serialized_snapshot.undoSystemXmin; + snapshot->undoSystemLocationPhNode.undoLocation = serialized_snapshot.undoSystemLocation; /* Copy XIDs, if present. */ if (serialized_snapshot.xcnt > 0) diff --git a/src/include/access/transam.h b/src/include/access/transam.h index f7bcb4a8822..ed931c770ec 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -15,7 +15,9 @@ #define TRANSAM_H #include "access/xlogdefs.h" - +#ifndef FRONTEND +#include "port/atomics.h" +#endif /* ---------------- * Special transaction ID values @@ -268,6 +270,11 @@ typedef struct VariableCacheData */ TransactionId oldestClogXid; /* oldest it's safe to look up in clog */ +#ifndef FRONTEND + pg_atomic_uint64 nextCommitSeqNo; +#else + CommitSeqNo nextCommitSeqNo; +#endif } VariableCacheData; typedef VariableCacheData *VariableCache; @@ -310,6 +317,7 @@ extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid); extern bool ForceTransactionIdLimitUpdate(void); extern Oid GetNewObjectId(void); extern void StopGeneratingPinnedObjectIds(void); +extern CommitSeqNo GetCurrentCSN(void); #ifdef USE_ASSERT_CHECKING extern void AssertTransactionIdInAllowableRange(TransactionId xid); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ebb9eaade0a..f53348234f7 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -53,6 +53,7 @@ extern PGDLLIMPORT bool track_wal_io_timing; extern PGDLLIMPORT int wal_decode_buffer_size; extern PGDLLIMPORT int CheckPointSegments; +extern PGDLLIMPORT CommitSeqNo startupCommitSeqNo; /* Archive modes */ typedef enum ArchiveMode diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index ea3ca9c48a6..c52c3421735 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -293,6 +293,7 @@ struct PGPROC bool fpVXIDLock; /* are we holding a fast-path VXID lock? */ LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID * lock */ + CommitSeqNo lastCommittedCSN; /* * Support for lock groups. Use LockHashPartitionLockByProc on the group diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index d8cae3ce1c5..64db8a3aa8b 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -96,4 +96,6 @@ extern void ProcArraySetReplicationSlotXmin(TransactionId xmin, extern void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, TransactionId *catalog_xmin); +extern snapshot_hook_type snapshot_hook; + #endif /* PROCARRAY_H */ diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index 980d37a1947..d05de790428 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -18,6 +18,9 @@ #include "utils/resowner.h" #include "utils/snapshot.h" +#ifndef SNAPSHOT_H +typedef void (*snapshot_hook_type) (Snapshot snapshot); +#endif /* * The structure used to map times to TransactionId values for the "snapshot @@ -120,7 +123,7 @@ extern void PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level); extern void PushCopiedSnapshot(Snapshot snapshot); extern void UpdateActiveSnapshotCommandId(void); extern void PopActiveSnapshot(void); -extern Snapshot GetActiveSnapshot(void); +extern PGDLLIMPORT Snapshot GetActiveSnapshot(void); extern bool ActiveSnapshotSet(void); extern Snapshot RegisterSnapshot(Snapshot snapshot); @@ -178,4 +181,10 @@ extern void SerializeSnapshot(Snapshot snapshot, char *start_address); extern Snapshot RestoreSnapshot(char *start_address); extern void RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc); +typedef void (*reset_xmin_hook_type) (void); + +extern snapshot_hook_type snapshot_register_hook; +extern snapshot_hook_type snapshot_deregister_hook; +extern reset_xmin_hook_type reset_xmin_hook; + #endif /* SNAPMGR_H */ diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index 583a667a40a..d4392d7dc04 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -122,6 +122,13 @@ typedef struct SnapshotData *Snapshot; #define InvalidSnapshot ((Snapshot) NULL) +typedef struct +{ + uint64 undoLocation; /* undo log location retained by this snapshot */ + uint64 xmin; + pairingheap_node ph_node; +} RetainUndoLocationPHNode; + /* * Struct representing all kind of possible snapshots. * @@ -214,6 +221,12 @@ typedef struct SnapshotData * transactions completed since the last GetSnapshotData(). */ uint64 snapXactCompletionCount; + + RetainUndoLocationPHNode undoRegularLocationPhNode; + RetainUndoLocationPHNode undoSystemLocationPhNode; + CommitSeqNo snapshotcsn; } SnapshotData; +typedef void (*snapshot_hook_type) (Snapshot snapshot); + #endif /* SNAPSHOT_H */ From 8bb51d2c28f0727955238253204c4b507f909551 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 01:57:10 +0300 Subject: [PATCH 010/102] Hooks for builtin functions and datatypes and orioledb recovery * Added SearchCatCacheInternal_hook, SearchCatCacheList_hook * Added SysCacheGetAttr_hook --- src/backend/commands/indexcmds.c | 4 ++++ src/backend/executor/execExpr.c | 3 +++ src/backend/utils/cache/catcache.c | 25 +++++++++++++++++++++++++ src/backend/utils/cache/syscache.c | 10 ++++++++-- src/backend/utils/cache/typcache.c | 14 ++++++++++++++ src/backend/utils/fmgr/fmgr.c | 4 ++-- src/include/commands/defrem.h | 3 +++ src/include/utils/catcache.h | 23 +++++++++++++++++++++++ src/include/utils/fmgrtab.h | 3 +++ src/include/utils/typcache.h | 5 +++++ 10 files changed, 90 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 80da56d9743..3ba96c63264 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -70,6 +70,7 @@ #include "utils/snapmgr.h" #include "utils/syscache.h" +GetDefaultOpClass_hook_type GetDefaultOpClass_hook = NULL; /* non-export function prototypes */ static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts); @@ -2291,6 +2292,9 @@ GetDefaultOpClass(Oid type_id, Oid am_id) /* If it's a domain, look at the base type instead */ type_id = getBaseType(type_id); + if (GetDefaultOpClass_hook) + return GetDefaultOpClass_hook(type_id, am_id); + tcategory = TypeCategory(type_id); /* diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index ca0d38b3095..0dcc4ba6195 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -48,6 +48,9 @@ #include "utils/array.h" #include "utils/builtins.h" #include "utils/datum.h" +#include "utils/json.h" +#include "utils/jsonb.h" +#include "utils/jsonpath.h" #include "utils/lsyscache.h" #include "utils/typcache.h" diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index d3527cbe3f3..105d3c7e53b 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -83,6 +83,10 @@ static CatCInProgress *catcache_in_progress_stack = NULL; /* Cache management header --- pointer is NULL until created */ static CatCacheHeader *CacheHdr = NULL; +SearchCatCacheInternal_hook_type SearchCatCacheInternal_hook = NULL; +SearchCatCacheList_hook_type SearchCatCacheList_hook = NULL; +GetCatCacheHashValue_hook_type GetCatCacheHashValue_hook = NULL; + static inline HeapTuple SearchCatCacheInternal(CatCache *cache, int nkeys, Datum v1, Datum v2, @@ -1319,6 +1323,14 @@ SearchCatCacheInternal(CatCache *cache, dlist_head *bucket; CatCTup *ct; + if (SearchCatCacheInternal_hook) + { + ct = SearchCatCacheInternal_hook(cache, nkeys, v1, v2, v3, v4); + + if (ct) + return &ct->tuple; + } + /* Make sure we're in an xact, even if this ends up being a cache hit */ Assert(IsTransactionState()); @@ -1604,6 +1616,11 @@ GetCatCacheHashValue(CatCache *cache, Datum v3, Datum v4) { + if (GetCatCacheHashValue_hook) + { + return GetCatCacheHashValue_hook(cache, cache->cc_nkeys, + v1, v2, v3, v4); + } /* * one-time startup overhead for each cache */ @@ -1656,6 +1673,14 @@ SearchCatCacheList(CatCache *cache, CatCInProgress *save_in_progress; CatCInProgress in_progress_ent; + if (SearchCatCacheList_hook) + { + cl = SearchCatCacheList_hook(cache, nkeys, v1, v2, v3); + + if (cl) + return cl; + } + /* * one-time startup overhead for each cache */ diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 40517895d82..0bbd0b5c775 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -699,6 +699,7 @@ static int SysCacheSupportingRelOidSize; static int oid_compare(const void *a, const void *b); +SysCacheGetAttr_hook_type SysCacheGetAttr_hook = NULL; /* * InitCatalogCache - initialize the caches @@ -1202,6 +1203,7 @@ SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull) { + TupleDesc cc_tupdesc = SysCache[cacheId]->cc_tupdesc; /* * We just need to get the TupleDesc out of the cache entry, and then we * can apply heap_getattr(). Normally the cache control data is already @@ -1211,14 +1213,18 @@ SysCacheGetAttr(int cacheId, HeapTuple tup, if (cacheId < 0 || cacheId >= SysCacheSize || !PointerIsValid(SysCache[cacheId])) elog(ERROR, "invalid cache ID: %d", cacheId); - if (!PointerIsValid(SysCache[cacheId]->cc_tupdesc)) + + if (!PointerIsValid(cc_tupdesc) && SysCacheGetAttr_hook) + cc_tupdesc = SysCacheGetAttr_hook(SysCache[cacheId]); + if (!PointerIsValid(cc_tupdesc)) { InitCatCachePhase2(SysCache[cacheId], false); Assert(PointerIsValid(SysCache[cacheId]->cc_tupdesc)); + cc_tupdesc = SysCache[cacheId]->cc_tupdesc; } return heap_getattr(tup, attributeNumber, - SysCache[cacheId]->cc_tupdesc, + cc_tupdesc, isNull); } diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index 608cd5e8e43..71619cf04d0 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -290,6 +290,8 @@ static int32 NextRecordTypmod = 0; /* number of entries used */ * as identifiers, so we start the counter at INVALID_TUPLEDESC_IDENTIFIER. */ static uint64 tupledesc_id_counter = INVALID_TUPLEDESC_IDENTIFIER; +load_typcache_tupdesc_hook_type load_typcache_tupdesc_hook = NULL; +load_enum_cache_data_hook_type load_enum_cache_data_hook = NULL; static void load_typcache_tupdesc(TypeCacheEntry *typentry); static void load_rangetype_info(TypeCacheEntry *typentry); @@ -879,6 +881,12 @@ load_typcache_tupdesc(TypeCacheEntry *typentry) { Relation rel; + if (load_typcache_tupdesc_hook) + { + load_typcache_tupdesc_hook(typentry); + return; + } + if (!OidIsValid(typentry->typrelid)) /* should not happen */ elog(ERROR, "invalid typrelid for composite type %u", typentry->type_id); @@ -2560,6 +2568,12 @@ load_enum_cache_data(TypeCacheEntry *tcache) int bm_size, start_pos; + if (load_enum_cache_data_hook) + { + load_enum_cache_data_hook(tcache); + return; + } + /* Check that this is actually an enum */ if (tcache->typtype != TYPTYPE_ENUM) ereport(ERROR, diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index 9208c31fe06..85811af84ff 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -72,7 +72,7 @@ extern Datum fmgr_security_definer(PG_FUNCTION_ARGS); * or name, but search by Oid is much faster. */ -static const FmgrBuiltin * +const FmgrBuiltin * fmgr_isbuiltin(Oid id) { uint16 index; @@ -97,7 +97,7 @@ fmgr_isbuiltin(Oid id) * the array with the same name, but they should all point to the same * routine. */ -static const FmgrBuiltin * +const FmgrBuiltin * fmgr_lookupByName(const char *name) { int i; diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 83423d3ba13..0b0d6763ff6 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -158,4 +158,7 @@ extern int defGetTypeLength(DefElem *def); extern List *defGetStringList(DefElem *def); extern void errorConflictingDefElem(DefElem *defel, ParseState *pstate) pg_attribute_noreturn(); +typedef Oid (*GetDefaultOpClass_hook_type)(Oid type_id, Oid am_id); +extern PGDLLIMPORT GetDefaultOpClass_hook_type GetDefaultOpClass_hook; + #endif /* DEFREM_H */ diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index 84ca10704bc..98d442687a1 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -233,5 +233,28 @@ extern void PrepareToInvalidateCacheTuple(Relation relation, extern void PrintCatCacheLeakWarning(HeapTuple tuple); extern void PrintCatCacheListLeakWarning(CatCList *list); +typedef CatCTup *(*SearchCatCacheInternal_hook_type)(CatCache *cache, + int nkeys, + Datum v1, Datum v2, + Datum v3, Datum v4); +extern SearchCatCacheInternal_hook_type SearchCatCacheInternal_hook; + +typedef CatCList *(*SearchCatCacheList_hook_type)(CatCache *cache, + int nkeys, + Datum v1, + Datum v2, + Datum v3); +extern SearchCatCacheList_hook_type SearchCatCacheList_hook; + +typedef TupleDesc (*SysCacheGetAttr_hook_type)(CatCache *SysCache); +extern SysCacheGetAttr_hook_type SysCacheGetAttr_hook; + +typedef uint32 (*GetCatCacheHashValue_hook_type)(CatCache *cache, + int nkeys, + Datum v1, + Datum v2, + Datum v3, + Datum v4); +extern GetCatCacheHashValue_hook_type GetCatCacheHashValue_hook; #endif /* CATCACHE_H */ diff --git a/src/include/utils/fmgrtab.h b/src/include/utils/fmgrtab.h index 838ffe3bc1c..f7e416653a6 100644 --- a/src/include/utils/fmgrtab.h +++ b/src/include/utils/fmgrtab.h @@ -46,4 +46,7 @@ extern PGDLLIMPORT const Oid fmgr_last_builtin_oid; /* highest function OID in #define InvalidOidBuiltinMapping PG_UINT16_MAX extern PGDLLIMPORT const uint16 fmgr_builtin_oid_index[]; +extern const FmgrBuiltin *fmgr_isbuiltin(Oid id); +extern const FmgrBuiltin *fmgr_lookupByName(const char *name); + #endif /* FMGRTAB_H */ diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h index 95f3a9ee308..77d57927de0 100644 --- a/src/include/utils/typcache.h +++ b/src/include/utils/typcache.h @@ -206,4 +206,9 @@ extern void SharedRecordTypmodRegistryInit(SharedRecordTypmodRegistry *, extern void SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *); +typedef void (*load_typcache_tupdesc_hook_type)(TypeCacheEntry *typentry); +extern PGDLLIMPORT load_typcache_tupdesc_hook_type load_typcache_tupdesc_hook; +typedef void (*load_enum_cache_data_hook_type)(TypeCacheEntry *tcache); +extern PGDLLIMPORT load_enum_cache_data_hook_type load_enum_cache_data_hook; + #endif /* TYPCACHE_H */ From 00d4f624e62f7d5a822d328bc03c49f80d3ff1d5 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 02:01:16 +0300 Subject: [PATCH 011/102] Recovery and checkpointer hooks --- src/backend/access/transam/transam.c | 1 + src/backend/access/transam/xact.c | 4 ++++ src/backend/access/transam/xlog.c | 18 ++++++++++++++++++ src/backend/access/transam/xlogrecovery.c | 2 ++ src/backend/storage/buffer/bufmgr.c | 6 +++++- src/include/access/xact.h | 3 +++ src/include/access/xlog.h | 10 ++++++++++ 7 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index 7629904bbf7..d118c5fd61a 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -22,6 +22,7 @@ #include "access/clog.h" #include "access/subtrans.h" #include "access/transam.h" +#include "storage/proc.h" #include "utils/snapmgr.h" /* diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 8f3adde828d..dab73df4b2c 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -322,6 +322,7 @@ typedef struct SubXactCallbackItem static SubXactCallbackItem *SubXact_callbacks = NULL; +xact_redo_hook_type xact_redo_hook = NULL; /* local function prototypes */ static void AssignTransactionId(TransactionState s); @@ -5965,6 +5966,9 @@ xact_redo_commit(xl_xact_parsed_commit *parsed, TransactionId max_xid; TimestampTz commit_time; + if (xact_redo_hook) + xact_redo_hook(xid, lsn); + Assert(TransactionIdIsValid(xid)); max_xid = TransactionIdLatest(xid, parsed->nsubxacts, parsed->subxacts); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f01caa44e9b..6b4e70c5178 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -147,6 +147,11 @@ bool XLOG_DEBUG = false; int wal_segment_size = DEFAULT_XLOG_SEG_SIZE; +/* Hook for plugins to get control in CheckPointGuts() */ +CheckPoint_hook_type CheckPoint_hook = NULL; +double CheckPointProgress; +after_checkpoint_cleanup_hook_type after_checkpoint_cleanup_hook = NULL; + /* * Number of WAL insertion locks to use. A higher value allows more insertions * to happen concurrently, but adds some CPU overhead to flushing the WAL, @@ -5053,6 +5058,7 @@ StartupXLOG(void) XLogRecPtr missingContrecPtr; TransactionId oldestActiveXID; bool promoted = false; + bool wasInRecovery; /* * We should have an aux process resource owner to use, and we should not @@ -5693,6 +5699,8 @@ StartupXLOG(void) */ PreallocXlogFiles(EndOfLog, newTLI); + wasInRecovery = InRecovery; + /* * Okay, we're officially UP. */ @@ -5771,6 +5779,9 @@ StartupXLOG(void) */ CompleteCommitTsInitialization(); + if (wasInRecovery && after_checkpoint_cleanup_hook) + after_checkpoint_cleanup_hook(EndOfLog, 0); + /* * All done with end-of-recovery actions. * @@ -6913,6 +6924,9 @@ CreateCheckPoint(int flags) if (!RecoveryInProgress()) TruncateSUBTRANS(GetOldestTransactionIdConsideredRunning()); + if (after_checkpoint_cleanup_hook) + after_checkpoint_cleanup_hook(ProcLastRecPtr, flags); + /* Real work is done; log and update stats. */ LogCheckpointEnd(false); @@ -7071,6 +7085,8 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags) { + if (CheckPoint_hook) + CheckPoint_hook(checkPointRedo, flags); CheckPointRelationMap(); CheckPointReplicationSlots(); CheckPointSnapBuild(); @@ -9061,3 +9077,5 @@ SetWalWriterSleeping(bool sleeping) XLogCtl->WalWriterSleeping = sleeping; SpinLockRelease(&XLogCtl->info_lck); } + +void (*RedoShutdownHook) (void) = NULL; diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index de49bd65c06..6e09937b018 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -1806,6 +1806,8 @@ PerformWalRecovery(void) * exit with special return code to request shutdown of * postmaster. Log messages issued from postmaster. */ + if (RedoShutdownHook != NULL) + RedoShutdownHook(); proc_exit(3); case RECOVERY_TARGET_ACTION_PAUSE: diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index e007bd46e93..4ccbc87acf3 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -2667,6 +2667,7 @@ BufferSync(int flags) BufferDesc *bufHdr = NULL; CkptTsStatus *ts_stat = (CkptTsStatus *) DatumGetPointer(binaryheap_first(ts_heap)); + double progress; buf_id = CkptBufferIds[ts_stat->index].buf_id; Assert(buf_id != -1); @@ -2721,7 +2722,10 @@ BufferSync(int flags) * * (This will check for barrier events even if it doesn't sleep.) */ - CheckpointWriteDelay(flags, (double) num_processed / num_to_scan); + progress = (double) num_processed / num_to_scan; + progress = CheckPointProgress + progress * (1 - CheckPointProgress); + + CheckpointWriteDelay(flags, progress); } /* diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 7d3b9446e62..e8200d55720 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -527,4 +527,7 @@ extern void EnterParallelMode(void); extern void ExitParallelMode(void); extern bool IsInParallelMode(void); +typedef void (*xact_redo_hook_type) (TransactionId xid, XLogRecPtr lsn); +extern xact_redo_hook_type xact_redo_hook; + #endif /* XACT_H */ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index f53348234f7..ec6a6b17fe9 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -301,4 +301,14 @@ extern SessionBackupState get_backup_status(void); /* files to signal promotion to primary */ #define PROMOTE_SIGNAL_FILE "promote" +typedef void (*CheckPoint_hook_type) (XLogRecPtr checkPointRedo, int flags); +extern PGDLLIMPORT CheckPoint_hook_type CheckPoint_hook; +extern double CheckPointProgress; +typedef void (*after_checkpoint_cleanup_hook_type)(XLogRecPtr checkPointRedo, + int flags); +extern PGDLLIMPORT after_checkpoint_cleanup_hook_type + after_checkpoint_cleanup_hook; + +extern void (*RedoShutdownHook) (void); + #endif /* XLOG_H */ From 1bc0c59cb40b1eeb7f3888433480ae550e6774ad Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 02:07:13 +0300 Subject: [PATCH 012/102] Allow skipping logging for AccessExclusiveLock --- src/backend/storage/lmgr/lock.c | 14 ++++++++++++-- src/include/storage/lock.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index ec8113bdbb3..65367b25253 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -797,7 +797,7 @@ LockAcquireExtended(const LOCKTAG *locktag, bool reportMemoryError, LOCALLOCK **locallockp) { - LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid; + LOCKMETHODID lockmethodid; LockMethod lockMethodTable; LOCALLOCKTAG localtag; LOCALLOCK *locallock; @@ -809,6 +809,15 @@ LockAcquireExtended(const LOCKTAG *locktag, LWLock *partitionLock; bool found_conflict; bool log_lock = false; + bool no_log_lock = false; + + if (locktag->locktag_lockmethodid == NO_LOG_LOCKMETHOD) + { + ((LOCKTAG *)locktag)->locktag_lockmethodid = DEFAULT_LOCKMETHOD; + no_log_lock = true; + } + + lockmethodid = locktag->locktag_lockmethodid; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) elog(ERROR, "unrecognized lock method: %d", lockmethodid); @@ -923,7 +932,8 @@ LockAcquireExtended(const LOCKTAG *locktag, if (lockmode >= AccessExclusiveLock && locktag->locktag_type == LOCKTAG_RELATION && !RecoveryInProgress() && - XLogStandbyInfoActive()) + XLogStandbyInfoActive() && + !no_log_lock) { LogAccessExclusiveLockPrepare(); log_lock = true; diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index f67056a82b5..f4b02c12261 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -124,6 +124,7 @@ typedef uint16 LOCKMETHODID; /* These identify the known lock methods */ #define DEFAULT_LOCKMETHOD 1 #define USER_LOCKMETHOD 2 +#define NO_LOG_LOCKMETHOD 255 /* Skip logging of AccessExclusiveLock */ /* * LOCKTAG is the key information needed to look up a LOCK item in the From 93b8bb4c2209384255a5d17f8752a47833a2b0f9 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 02:08:21 +0300 Subject: [PATCH 013/102] Add convenience functions IsFatalError() have_backup_in_progress() SnapBuildNextPhaseAt() DoLocalLockExist() --- src/backend/access/transam/xlog.c | 13 +++++++++++++ src/backend/postmaster/postmaster.c | 6 ++++++ src/backend/replication/logical/snapbuild.c | 11 +++++++++++ src/backend/storage/lmgr/lock.c | 21 +++++++++++++++++++++ src/include/access/xlog.h | 1 + src/include/postmaster/postmaster.h | 1 + src/include/replication/snapbuild.h | 1 + src/include/storage/lock.h | 1 + 8 files changed, 55 insertions(+) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 6b4e70c5178..9857691e417 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8662,6 +8662,19 @@ get_backup_status(void) return sessionBackupState; } +/* + * Check if there is a backup in progress. + * + * We do this check without lock assuming 32-bit reads are atomic. In fact, + * the false result means that there was at least a moment of time when there + * were no backups. + */ +bool +have_backup_in_progress(void) +{ + return (XLogCtl->Insert.runningBackups > 0); +} + /* * do_pg_backup_stop * diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 5d328b9807c..bfc23ffc751 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -579,6 +579,12 @@ int postmaster_alive_fds[2] = {-1, -1}; HANDLE PostmasterHandle; #endif +bool +IsFatalError(void) +{ + return FatalError; +} + /* * Postmaster main entry point */ diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 6c49dda75b6..601006db7a7 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -404,6 +404,17 @@ SnapBuildCurrentState(SnapBuild *builder) return builder->state; } +/* + * An which transaction id the next phase of initial snapshot building will + * happen? + */ +TransactionId +SnapBuildNextPhaseAt(SnapBuild *builder) +{ + return builder->next_phase_at; +} + + /* * Return the LSN at which the two-phase decoding was first enabled. */ diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 65367b25253..f04d6d4d5c8 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -648,6 +648,27 @@ GetLockMethodLocalHash(void) } #endif +/* + * Returns true if any LOCKMODE lock with given locktag exist in LocalMethodLocalHash. + */ +bool +DoLocalLockExist(const LOCKTAG *locktag) +{ + HASH_SEQ_STATUS scan_status; + LOCALLOCK* locallock; + + hash_seq_init(&scan_status, LockMethodLocalHash); + while ((locallock = (LOCALLOCK *) hash_seq_search(&scan_status)) != NULL) + { + if (memcmp(&locallock->tag.lock, locktag, sizeof(LOCKTAG)) == 0) + { + hash_seq_term(&scan_status); + return true; + } + } + return false; +} + /* * LockHasWaiters -- look up 'locktag' and check if releasing this * lock would wake up other processes waiting for it. diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ec6a6b17fe9..055bfea2fd6 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -286,6 +286,7 @@ extern void do_pg_backup_start(const char *backupidstr, bool fast, StringInfo tblspcmapfile); extern void do_pg_backup_stop(BackupState *state, bool waitforarchive); extern void do_pg_abort_backup(int code, Datum arg); +extern bool have_backup_in_progress(void); extern void register_persistent_abort_backup_handler(void); extern SessionBackupState get_backup_status(void); diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index 3b3889c58c0..ddc409ec240 100644 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -50,6 +50,7 @@ extern PGDLLIMPORT int postmaster_alive_fds[2]; extern PGDLLIMPORT const char *progname; +extern bool IsFatalError(void); extern void PostmasterMain(int argc, char *argv[]) pg_attribute_noreturn(); extern void ClosePostmasterPorts(bool am_syslogger); extern void InitProcessGlobals(void); diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h index f49b941b53e..071fca6d3b5 100644 --- a/src/include/replication/snapbuild.h +++ b/src/include/replication/snapbuild.h @@ -73,6 +73,7 @@ extern void SnapBuildClearExportedSnapshot(void); extern void SnapBuildResetExportedSnapshotState(void); extern SnapBuildState SnapBuildCurrentState(SnapBuild *builder); +extern TransactionId SnapBuildNextPhaseAt(SnapBuild *builder); extern Snapshot SnapBuildGetOrBuildSnapshot(SnapBuild *builder); extern bool SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index f4b02c12261..d7b095b1464 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -551,6 +551,7 @@ extern LockMethod GetLocksMethodTable(const LOCK *lock); extern LockMethod GetLockTagsMethodTable(const LOCKTAG *locktag); extern uint32 LockTagHashCode(const LOCKTAG *locktag); extern bool DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2); +extern bool DoLocalLockExist(const LOCKTAG *locktag); extern LockAcquireResult LockAcquire(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock, From 9e00ca2937adab1ce0f3b3b9477ac98904ee6033 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 02:11:14 +0300 Subject: [PATCH 014/102] PERFORM_DELETION_OF_RELATION flag for object hooks --- src/backend/catalog/dependency.c | 36 +++++++++++++++++++++++++++++++- src/include/catalog/dependency.h | 2 ++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 494738824cb..8627810dc23 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -242,6 +242,7 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel, int flags) { int i; + bool *depends_on_relation; /* * Keep track of objects for event triggers, if necessary. @@ -269,6 +270,33 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel, } } + depends_on_relation = palloc0(sizeof(bool) * targetObjects->numrefs); + + for (i = targetObjects->numrefs - 1; i >= 0; i--) + { + ObjectAddressExtra *thisextra = targetObjects->extras + i; + int j; + + if (thisextra->dependee.classId == RelationRelationId && + thisextra->dependee.objectSubId == 0) + { + depends_on_relation[i] = true; + continue; + } + + for (j = i + 1; j < targetObjects->numrefs; j++) + { + ObjectAddress *depobj = targetObjects->refs + j; + if (depobj->classId == thisextra->dependee.classId && + depobj->objectId == thisextra->dependee.objectId && + depobj->objectSubId == thisextra->dependee.objectSubId) + { + depends_on_relation[i] = depends_on_relation[j]; + break; + } + } + } + /* * Delete all the objects in the proper order, except that if told to, we * should skip the original object(s). @@ -277,13 +305,19 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel, { ObjectAddress *thisobj = targetObjects->refs + i; ObjectAddressExtra *thisextra = targetObjects->extras + i; + int temp_flags = flags; if ((flags & PERFORM_DELETION_SKIP_ORIGINAL) && (thisextra->flags & DEPFLAG_ORIGINAL)) continue; - deleteOneObject(thisobj, depRel, flags); + if (depends_on_relation[i]) + temp_flags |= PERFORM_DELETION_OF_RELATION; + + deleteOneObject(thisobj, depRel, temp_flags); } + + pfree(depends_on_relation); } /* diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index ffd5e9dc82d..4cf8df01077 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -140,6 +140,8 @@ typedef enum ObjectClass #define PERFORM_DELETION_SKIP_EXTENSIONS 0x0010 /* keep extensions */ #define PERFORM_DELETION_CONCURRENT_LOCK 0x0020 /* normal drop with * concurrent lock mode */ +#define PERFORM_DELETION_OF_RELATION 0x0040 /* used for orioledb + * extension */ /* in dependency.c */ From a1038406ec855fd36d42cd90533440abcfb5a6d0 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 02:20:32 +0300 Subject: [PATCH 015/102] Expose existing planning funcs and structs --- src/backend/catalog/index.c | 5 +---- src/backend/commands/explain.c | 14 +++----------- src/backend/commands/indexcmds.c | 8 ++------ src/backend/optimizer/path/indxpath.c | 13 +------------ src/backend/optimizer/plan/createplan.c | 16 ++++++++++------ src/include/catalog/index.h | 2 ++ src/include/commands/defrem.h | 4 ++++ src/include/commands/explain.h | 8 ++++++++ src/include/optimizer/paths.h | 12 ++++++++++++ src/include/optimizer/planmain.h | 5 +++++ 10 files changed, 48 insertions(+), 39 deletions(-) diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index bc08ab66bd6..0dacdea43b6 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -125,9 +125,6 @@ static void UpdateIndexRelation(Oid indexoid, Oid heapoid, bool immediate, bool isvalid, bool isready); -static void index_update_stats(Relation rel, - bool hasindex, - double reltuples); static void IndexCheckExclusion(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo); @@ -2807,7 +2804,7 @@ FormIndexDatum(IndexInfo *indexInfo, * index. When updating an index, it's important because some index AMs * expect a relcache flush to occur after REINDEX. */ -static void +void index_update_stats(Relation rel, bool hasindex, double reltuples) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 6c2e5c8a4f9..b3421e6e5a8 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -64,9 +64,6 @@ static void report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es); static double elapsed_time(instr_time *starttime); static bool ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used); -static void ExplainNode(PlanState *planstate, List *ancestors, - const char *relationship, const char *plan_name, - ExplainState *es); static void show_plan_tlist(PlanState *planstate, List *ancestors, ExplainState *es); static void show_expression(Node *node, const char *qlabel, @@ -75,9 +72,6 @@ static void show_expression(Node *node, const char *qlabel, static void show_qual(List *qual, const char *qlabel, PlanState *planstate, List *ancestors, bool useprefix, ExplainState *es); -static void show_scan_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - ExplainState *es); static void show_upper_qual(List *qual, const char *qlabel, PlanState *planstate, List *ancestors, ExplainState *es); @@ -114,8 +108,6 @@ static void show_memoize_info(MemoizeState *mstate, List *ancestors, static void show_hashagg_info(AggState *aggstate, ExplainState *es); static void show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es); -static void show_instrumentation_count(const char *qlabel, int which, - PlanState *planstate, ExplainState *es); static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es); static void show_eval_params(Bitmapset *bms_params, ExplainState *es); static const char *explain_get_index_name(Oid indexId); @@ -1174,7 +1166,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used) * to the nesting depth of logical output groups, and therefore is controlled * by ExplainOpenGroup/ExplainCloseGroup. */ -static void +void ExplainNode(PlanState *planstate, List *ancestors, const char *relationship, const char *plan_name, ExplainState *es) @@ -2346,7 +2338,7 @@ show_qual(List *qual, const char *qlabel, /* * Show a qualifier expression for a scan plan node */ -static void +void show_scan_qual(List *qual, const char *qlabel, PlanState *planstate, List *ancestors, ExplainState *es) @@ -3437,7 +3429,7 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es) * * "which" identifies which instrumentation counter to print */ -static void +void show_instrumentation_count(const char *qlabel, int which, PlanState *planstate, ExplainState *es) { diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 3ba96c63264..90986c605e0 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -89,11 +89,7 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo, Oid ddl_userid, int ddl_sec_context, int *ddl_save_nestlevel); -static char *ChooseIndexName(const char *tabname, Oid namespaceId, - List *colnames, List *exclusionOpNames, - bool primary, bool isconstraint); static char *ChooseIndexNameAddition(List *colnames); -static List *ChooseIndexColumnNames(List *indexElems); static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel); static void RangeVarCallbackForReindexIndex(const RangeVar *relation, @@ -2544,7 +2540,7 @@ ChooseRelationName(const char *name1, const char *name2, * * The argument list is pretty ad-hoc :-( */ -static char * +char * ChooseIndexName(const char *tabname, Oid namespaceId, List *colnames, List *exclusionOpNames, bool primary, bool isconstraint) @@ -2633,7 +2629,7 @@ ChooseIndexNameAddition(List *colnames) * * Returns a List of plain strings (char *, not String nodes). */ -static List * +List * ChooseIndexColumnNames(List *indexElems) { List *result = NIL; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 0065c8992bd..bf4968e348b 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -48,14 +48,6 @@ typedef enum ST_ANYSCAN /* either is okay */ } ScanTypeControl; -/* Data structure for collecting qual clauses that match an index */ -typedef struct -{ - bool nonempty; /* True if lists are not all empty */ - /* Lists of IndexClause nodes, one list per index column */ - List *indexclauses[INDEX_MAX_KEYS]; -} IndexClauseSet; - /* Per-path data used within choose_bitmap_and() */ typedef struct { @@ -130,9 +122,6 @@ static double adjust_rowcount_for_semijoins(PlannerInfo *root, Index outer_relid, double rowcount); static double approximate_joinrel_size(PlannerInfo *root, Relids relids); -static void match_restriction_clauses_to_index(PlannerInfo *root, - IndexOptInfo *index, - IndexClauseSet *clauseset); static void match_join_clauses_to_index(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index, IndexClauseSet *clauseset, @@ -2012,7 +2001,7 @@ approximate_joinrel_size(PlannerInfo *root, Relids relids) * Identify restriction clauses for the rel that match the index. * Matching clauses are added to *clauseset. */ -static void +void match_restriction_clauses_to_index(PlannerInfo *root, IndexOptInfo *index, IndexClauseSet *clauseset) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 942392218cf..a1fa1a62e59 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -164,16 +164,12 @@ static MergeJoin *create_mergejoin_plan(PlannerInfo *root, MergePath *best_path) static HashJoin *create_hashjoin_plan(PlannerInfo *root, HashPath *best_path); static Node *replace_nestloop_params(PlannerInfo *root, Node *expr); static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root); -static void fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, - List **stripped_indexquals_p, - List **fixed_indexquals_p); static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path); static Node *fix_indexqual_clause(PlannerInfo *root, IndexOptInfo *index, int indexcol, Node *clause, List *indexcolnos); static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); static List *get_switched_clauses(List *clauses, Relids outerrelids); -static List *order_qual_clauses(PlannerInfo *root, List *clauses); static void copy_generic_path_info(Plan *dest, Path *src); static void copy_plan_costsize(Plan *dest, Plan *src); static void label_sort_with_costsize(PlannerInfo *root, Sort *plan, @@ -4897,6 +4893,14 @@ replace_nestloop_params(PlannerInfo *root, Node *expr) return replace_nestloop_params_mutator(expr, root); } +Node * +replace_nestloop_params_compat(PlannerInfo *root, Node *expr) +{ + /* No setup needed for tree walk, so away we go */ + return replace_nestloop_params_mutator(expr, root); +} + + static Node * replace_nestloop_params_mutator(Node *node, PlannerInfo *root) { @@ -4977,7 +4981,7 @@ replace_nestloop_params_mutator(Node *node, PlannerInfo *root) * are subplans in it (we need two separate copies of the subplan tree, or * things will go awry). */ -static void +void fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, List **stripped_indexquals_p, List **fixed_indexquals_p) { @@ -5270,7 +5274,7 @@ get_switched_clauses(List *clauses, Relids outerrelids) * instead of bare clauses. This is another reason why trying to consider * selectivity in the ordering would likely do the wrong thing. */ -static List * +List * order_qual_clauses(PlannerInfo *root, List *clauses) { typedef struct diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index c8532fb97c8..3fa15391d83 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -211,4 +211,6 @@ itemptr_decode(ItemPointer itemptr, int64 encoded) ItemPointerSet(itemptr, block, offset); } +extern void index_update_stats(Relation rel, bool hasindex, double reltuples); + #endif /* INDEX_H */ diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 0b0d6763ff6..cff3842be90 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -41,6 +41,10 @@ extern char *makeObjectName(const char *name1, const char *name2, extern char *ChooseRelationName(const char *name1, const char *name2, const char *label, Oid namespaceid, bool isconstraint); +extern List *ChooseIndexColumnNames(List *indexElems); +extern char *ChooseIndexName(const char *tabname, Oid namespaceId, + List *colnames, List *exclusionOpNames, + bool primary, bool isconstraint); extern bool CheckIndexCompatible(Oid oldId, const char *accessMethodName, List *attributeList, diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h index 3d3e632a0cc..ae8b2b63de9 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -93,6 +93,14 @@ extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ParamListInfo params, QueryEnvironment *queryEnv, const instr_time *planduration, const BufferUsage *bufusage); +extern void ExplainNode(PlanState *planstate, List *ancestors, + const char *relationship, const char *plan_name, + ExplainState *es); +extern void show_scan_qual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + ExplainState *es); +extern void show_instrumentation_count(const char *qlabel, int which, + PlanState *planstate, ExplainState *es); extern void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc); extern void ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 50bc3b503a6..80abe2467c0 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -67,6 +67,14 @@ extern void generate_partitionwise_join_paths(PlannerInfo *root, extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel); #endif +/* Data structure for collecting qual clauses that match an index */ +typedef struct +{ + bool nonempty; /* True if lists are not all empty */ + /* Lists of IndexClause nodes, one list per index column */ + List *indexclauses[INDEX_MAX_KEYS]; +} IndexClauseSet; + /* * indxpath.c * routines to generate index paths @@ -82,6 +90,10 @@ extern bool match_index_to_operand(Node *operand, int indexcol, IndexOptInfo *index); extern void check_index_predicates(PlannerInfo *root, RelOptInfo *rel); +extern void match_restriction_clauses_to_index(PlannerInfo *root, + IndexOptInfo *index, + IndexClauseSet *clauseset); + /* * tidpath.h * routines to generate tid paths diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 31c188176b7..8ec52018173 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -39,6 +39,11 @@ extern void preprocess_minmax_aggregates(PlannerInfo *root); * prototypes for plan/createplan.c */ extern Plan *create_plan(PlannerInfo *root, Path *best_path); +extern List *order_qual_clauses(PlannerInfo *root, List *clauses); +extern void fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, + List **stripped_indexquals_p, + List **fixed_indexquals_p); +extern Node *replace_nestloop_params_compat(PlannerInfo *root, Node *expr); extern ForeignScan *make_foreignscan(List *qptlist, List *qpqual, Index scanrelid, List *fdw_exprs, List *fdw_private, List *fdw_scan_tlist, List *fdw_recheck_quals, From 4f4ec85756690feced29010d9a5e52b4045ca881 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 02:22:17 +0300 Subject: [PATCH 016/102] Allow locks in checkpointer --- src/backend/postmaster/checkpointer.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 985b9b73545..18f1ba5667f 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -53,11 +53,20 @@ #include "storage/proc.h" #include "storage/procsignal.h" #include "storage/shmem.h" +#include "storage/sinvaladt.h" #include "storage/smgr.h" #include "storage/spin.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/resowner.h" +#include "utils/syscache.h" + +/* + * Included for InitializeTimeouts and RegisterTimeout functions that + * needed for correct working of OrioleDB checkpoint. + * See comment for InitializeTimeouts call in CheckpointerMain for details. + */ +#include "utils/timeout.h" /*---------- @@ -210,6 +219,20 @@ CheckpointerMain(void) */ pqsignal(SIGCHLD, SIG_DFL); + /* + * To use OrioleDB checkpoint, we must initialize the data for the primary + * lock mechanism (lock.h) to work correctly. Because locks of this type are + * needed by the OrioleDB module for debug events and relation locks, but + * they are not used by the postgres checkpointer and are not initialized + * for it. + */ + InitializeTimeouts(); /* establishes SIGALRM handler */ + InitDeadLockChecking(); + RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLockAlert); + RelationCacheInitialize(); + InitCatalogCache(); + SharedInvalBackendInit(false); + /* * Initialize so that first time-driven event happens at the correct time. From 6f2bce1dc8b6d27bab42933dbfe334b0b244a0d6 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 03:14:57 +0300 Subject: [PATCH 017/102] Add base_init_startup_hook and HandleStartupProcInterrupts_hook --- src/backend/postmaster/startup.c | 5 +++++ src/backend/utils/init/postinit.c | 5 ++++- src/include/postmaster/postmaster.h | 4 ++++ src/include/postmaster/startup.h | 3 +++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index 0e7de26bc28..ce79e4f8f43 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -79,6 +79,8 @@ static volatile sig_atomic_t startup_progress_timer_expired = false; */ int log_startup_progress_interval = 10000; /* 10 sec */ +HandleStartupProcInterrupts_hook_type HandleStartupProcInterrupts_hook = NULL; + /* Signal handlers */ static void StartupProcTriggerHandler(SIGNAL_ARGS); static void StartupProcSigHupHandler(SIGNAL_ARGS); @@ -186,6 +188,9 @@ HandleStartupProcInterrupts(void) static uint32 postmaster_poll_count = 0; #endif + if (HandleStartupProcInterrupts_hook) + HandleStartupProcInterrupts_hook(); + /* * Process any requests or signals received recently. */ diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 35aa077a850..ee22cdcbc2b 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -81,7 +81,7 @@ static void ClientCheckTimeoutHandler(void); static bool ThereIsAtLeastOneRole(void); static void process_startup_options(Port *port, bool am_superuser); static void process_settings(Oid databaseid, Oid roleid); - +base_init_startup_hook_type base_init_startup_hook = NULL; /*** InitPostgres support ***/ @@ -644,6 +644,9 @@ BaseInit(void) */ InitFileAccess(); + if (base_init_startup_hook) + base_init_startup_hook(); + /* * Initialize statistics reporting. This needs to happen early to ensure * that pgstat's shutdown callback runs after the shutdown callbacks of diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index ddc409ec240..3d1da495915 100644 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -59,6 +59,10 @@ extern int MaxLivePostmasterChildren(void); extern bool PostmasterMarkPIDForWorkerNotify(int); +typedef void (*base_init_startup_hook_type)(void); + +extern PGDLLIMPORT base_init_startup_hook_type base_init_startup_hook; + #ifdef EXEC_BACKEND extern pid_t postmaster_forkexec(int argc, char *argv[]); extern void SubPostmasterMain(int argc, char *argv[]) pg_attribute_noreturn(); diff --git a/src/include/postmaster/startup.h b/src/include/postmaster/startup.h index 6a2e4c4526b..95eb25f9f4d 100644 --- a/src/include/postmaster/startup.h +++ b/src/include/postmaster/startup.h @@ -23,7 +23,10 @@ ereport(LOG, errmsg(msg, secs, (usecs / 10000), __VA_ARGS__ )); \ } while(0) +typedef void (*HandleStartupProcInterrupts_hook_type)(void); + extern PGDLLIMPORT int log_startup_progress_interval; +extern PGDLLIMPORT HandleStartupProcInterrupts_hook_type HandleStartupProcInterrupts_hook; extern void HandleStartupProcInterrupts(void); extern void StartupProcessMain(void) pg_attribute_noreturn(); From 5dd8121b93fed87a0e452423f47dc8b8cab00d92 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 03:17:36 +0300 Subject: [PATCH 018/102] Don't cancel recovery processes because of deadlocks --- src/backend/storage/lmgr/proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 1b38553d43e..d3502f0b51a 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -1192,7 +1192,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) * If InHotStandby we set lock waits slightly later for clarity with other * code. */ - if (!InHotStandby) + if (!InHotStandby && !InRecovery) { if (LockTimeout > 0) { @@ -1552,7 +1552,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) * already caused QueryCancelPending to become set, we want the cancel to * be reported as a lock timeout, not a user cancel. */ - if (!InHotStandby) + if (!InHotStandby && !InRecovery) { if (LockTimeout > 0) { From be9c92aaad19a2e64251b4d7c5eba7bb6f1fb1a2 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 03:42:18 +0300 Subject: [PATCH 019/102] set_plain_rel_pathlist_hook --- src/backend/optimizer/path/allpaths.c | 7 +++++-- src/include/optimizer/paths.h | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index f75e0f99cb9..c62a407f4ca 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -86,6 +86,7 @@ int min_parallel_index_scan_size; /* Hook for plugins to get control in set_rel_pathlist() */ set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL; +set_plain_rel_pathlist_hook_type set_plain_rel_pathlist_hook = NULL; /* Hook for plugins to replace standard_join_search() */ join_search_hook_type join_search_hook = NULL; @@ -775,8 +776,10 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) */ required_outer = rel->lateral_relids; - /* Consider sequential scan */ - add_path(rel, create_seqscan_path(root, rel, required_outer, 0)); + if (!set_plain_rel_pathlist_hook || + set_plain_rel_pathlist_hook(root, rel, rte)) + /* Consider sequential scan */ + add_path(rel, create_seqscan_path(root, rel, required_outer, 0)); /* If appropriate, consider parallel sequential scan */ if (rel->consider_parallel && required_outer == NULL) diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 80abe2467c0..6e4b570fce0 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -31,6 +31,10 @@ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root, Index rti, RangeTblEntry *rte); extern PGDLLIMPORT set_rel_pathlist_hook_type set_rel_pathlist_hook; +typedef bool (*set_plain_rel_pathlist_hook_type)(PlannerInfo *root, + RelOptInfo *rel, + RangeTblEntry *rte); +extern PGDLLIMPORT set_plain_rel_pathlist_hook_type set_plain_rel_pathlist_hook; /* Hook for plugins to get control in add_paths_to_joinrel() */ typedef void (*set_join_pathlist_hook_type) (PlannerInfo *root, From 485d3c788a8aa50ebfa268ddd6682c9ff486524f Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 13 Dec 2021 14:17:57 +0300 Subject: [PATCH 020/102] Let locker tolerate being removed from the waiting queue without obtaining a lock. --- src/backend/storage/lmgr/lock.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index f04d6d4d5c8..da6871fff8a 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -1165,12 +1165,35 @@ LockAcquireExtended(const LOCKTAG *locktag, */ if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) { + int i; + AbortStrongLockAcquire(); PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock); LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); /* Should we retry ? */ LWLockRelease(partitionLock); - elog(ERROR, "LockAcquire failed"); + /* + * We've been removed from the queue without obtaining a lock. + * That's OK, we're going to return LOCKACQUIRE_NOT_AVAIL, but + * need to release a local lock first. + */ + locallock->nLocks--; + for (i = 0; i < locallock->numLockOwners; i++) + { + if (locallock->lockOwners[i].owner == owner) + { + locallock->lockOwners[i].nLocks--; + if (locallock->lockOwners[i].nLocks == 0) + { + ResourceOwnerForgetLock(owner, locallock); + locallock->lockOwners[i] = locallock->lockOwners[--locallock->numLockOwners]; + } + break; + } + } + + return LOCKACQUIRE_NOT_AVAIL; + } PROCLOCK_PRINT("LockAcquire: granted", proclock); LOCK_PRINT("LockAcquire: granted", lock, lockmode); @@ -4682,8 +4705,8 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait) LWLockRelease(&proc->fpInfoLock); /* Time to wait. */ - (void) LockAcquire(&tag, ShareLock, false, false); - + if (LockAcquire(&tag, ShareLock, false, false) == LOCKACQUIRE_NOT_AVAIL) + return false; LockRelease(&tag, ShareLock, false); return XactLockForVirtualXact(vxid, xid, wait); } From 8c05d60dbcf14fd82d06ba007363ea3e3f05fdef Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Thu, 17 Feb 2022 07:46:49 +0300 Subject: [PATCH 021/102] Count extension wait events in pg_isolation_test_session_is_blocked() --- src/backend/utils/adt/lockfuncs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index f9b9590997b..6f7bcc4394c 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -18,8 +18,11 @@ #include "funcapi.h" #include "miscadmin.h" #include "storage/predicate_internals.h" +#include "storage/proc.h" +#include "storage/procarray.h" #include "utils/array.h" #include "utils/builtins.h" +#include "utils/wait_event.h" /* @@ -614,6 +617,7 @@ pg_safe_snapshot_blocking_pids(PG_FUNCTION_ARGS) Datum pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) { + PGPROC *blocked_proc; int blocked_pid = PG_GETARG_INT32(0); ArrayType *interesting_pids_a = PG_GETARG_ARRAYTYPE_P(1); ArrayType *blocking_pids_a; @@ -674,6 +678,10 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) if (GetSafeSnapshotBlockingPids(blocked_pid, &dummy, 1) > 0) PG_RETURN_BOOL(true); + blocked_proc = BackendPidGetProc(blocked_pid); + if ((blocked_proc->wait_event_info & 0xFF000000) == PG_WAIT_EXTENSION) + PG_RETURN_BOOL(true); + PG_RETURN_BOOL(false); } From 6253a0da749de78846ec4967cf224fdafaec295b Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Thu, 24 Feb 2022 03:19:39 +0300 Subject: [PATCH 022/102] Support for custom table AM in pgbench --- src/bin/pgbench/pgbench.c | 45 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index adf6e45953b..2b8ac067420 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -220,6 +220,11 @@ double throttle_delay = 0; */ int64 latency_limit = 0; +/* + * tableam selection + */ +char *tableam = NULL; + /* * tablespace selection */ @@ -890,6 +895,7 @@ usage(void) " --partition-method=(range|hash)\n" " partition pgbench_accounts with this method (default: range)\n" " --partitions=NUM partition pgbench_accounts into NUM parts (default: 0)\n" + " --tableam=TABLEAM create tables using the specified tableam\n" " --tablespace=TABLESPACE create tables in the specified tablespace\n" " --unlogged-tables create tables as unlogged tables\n" "\nOptions to select what to run:\n" @@ -4780,14 +4786,34 @@ createPartitions(PGconn *con) appendPQExpBufferStr(&query, "maxvalue"); appendPQExpBufferChar(&query, ')'); + + if (tableam != NULL) + { + char *escape_tableam; + + escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam)); + appendPQExpBuffer(&query, " using %s", escape_tableam); + PQfreemem(escape_tableam); + } } else if (partition_method == PART_HASH) + { printfPQExpBuffer(&query, "create%s table pgbench_accounts_%d\n" " partition of pgbench_accounts\n" " for values with (modulus %d, remainder %d)", unlogged_tables ? " unlogged" : "", p, partitions, p - 1); + + if (tableam != NULL) + { + char *escape_tableam; + + escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam)); + appendPQExpBuffer(&query, " using %s", escape_tableam); + PQfreemem(escape_tableam); + } + } else /* cannot get there */ Assert(0); @@ -4874,10 +4900,20 @@ initCreateTables(PGconn *con) if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0) appendPQExpBuffer(&query, " partition by %s (aid)", PARTITION_METHOD[partition_method]); - else if (ddl->declare_fillfactor) + else { + if (tableam != NULL) + { + char *escape_tableam; + + escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam)); + appendPQExpBuffer(&query, " using %s", escape_tableam); + PQfreemem(escape_tableam); + } + /* fillfactor is only expected on actual tables */ - appendPQExpBuffer(&query, " with (fillfactor=%d)", fillfactor); + if (ddl->declare_fillfactor) + appendPQExpBuffer(&query, " with (fillfactor=%d)", fillfactor); } if (tablespace != NULL) @@ -6655,6 +6691,7 @@ main(int argc, char **argv) {"failures-detailed", no_argument, NULL, 13}, {"max-tries", required_argument, NULL, 14}, {"verbose-errors", no_argument, NULL, 15}, + {"tableam", required_argument, NULL, 16}, {NULL, 0, NULL, 0} }; @@ -6992,6 +7029,10 @@ main(int argc, char **argv) benchmarking_option_set = true; verbose_errors = true; break; + case 16: /* tableam */ + initialization_option_set = true; + tableam = pg_strdup(optarg); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); From c796bd9c5bb28d6445e6da8b3de528e5bcf7a063 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 2 Mar 2022 14:49:29 +0300 Subject: [PATCH 023/102] Support for outline atomics on aarch64 Outline-atomics is a gcc compilation flag that enables runtime detection of CPU support for atomic instructions. Performance on CPUs that do support atomic instructions is improved, while compatibility and performance on CPUs without atomic instructions is not hurt. Discussion: https://postgr.es/m/flat/099F69EE-51D3-4214-934A-1F28C0A1A7A7%40amazon.com Author: Tsahi Zidenberg --- configure | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++ configure.ac | 4 +++ 2 files changed, 97 insertions(+) diff --git a/configure b/configure index 5e6ea15e876..7fccb0de3c9 100755 --- a/configure +++ b/configure @@ -6657,6 +6657,99 @@ fi if test -n "$NOT_THE_CFLAGS"; then CFLAGS="$CFLAGS -Wno-cast-function-type-strict" fi + if test x"$host_cpu" == x"aarch64"; then + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -moutline-atomics, for CFLAGS" >&5 +$as_echo_n "checking whether ${CC} supports -moutline-atomics, for CFLAGS... " >&6; } +if ${pgac_cv_prog_CC_cflags__moutline_atomics+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +pgac_save_CC=$CC +CC=${CC} +CFLAGS="${CFLAGS} -moutline-atomics" +ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_prog_CC_cflags__moutline_atomics=yes +else + pgac_cv_prog_CC_cflags__moutline_atomics=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_c_werror_flag=$ac_save_c_werror_flag +CFLAGS="$pgac_save_CFLAGS" +CC="$pgac_save_CC" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CC_cflags__moutline_atomics" >&5 +$as_echo "$pgac_cv_prog_CC_cflags__moutline_atomics" >&6; } +if test x"$pgac_cv_prog_CC_cflags__moutline_atomics" = x"yes"; then + CFLAGS="${CFLAGS} -moutline-atomics" +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CXX} supports -moutline-atomics, for CXXFLAGS" >&5 +$as_echo_n "checking whether ${CXX} supports -moutline-atomics, for CXXFLAGS... " >&6; } +if ${pgac_cv_prog_CXX_cxxflags__moutline_atomics+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CXXFLAGS=$CXXFLAGS +pgac_save_CXX=$CXX +CXX=${CXX} +CXXFLAGS="${CXXFLAGS} -moutline-atomics" +ac_save_cxx_werror_flag=$ac_cxx_werror_flag +ac_cxx_werror_flag=yes +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + pgac_cv_prog_CXX_cxxflags__moutline_atomics=yes +else + pgac_cv_prog_CXX_cxxflags__moutline_atomics=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_cxx_werror_flag=$ac_save_cxx_werror_flag +CXXFLAGS="$pgac_save_CXXFLAGS" +CXX="$pgac_save_CXX" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CXX_cxxflags__moutline_atomics" >&5 +$as_echo "$pgac_cv_prog_CXX_cxxflags__moutline_atomics" >&6; } +if test x"$pgac_cv_prog_CXX_cxxflags__moutline_atomics" = x"yes"; then + CXXFLAGS="${CXXFLAGS} -moutline-atomics" +fi + + + fi elif test "$ICC" = yes; then # Intel's compiler has a bug/misoptimization in checking for # division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS. diff --git a/configure.ac b/configure.ac index e2c1dab5198..77a4be7ba0e 100644 --- a/configure.ac +++ b/configure.ac @@ -579,6 +579,10 @@ if test "$GCC" = yes -a "$ICC" = no; then if test -n "$NOT_THE_CFLAGS"; then CFLAGS="$CFLAGS -Wno-cast-function-type-strict" fi + if test x"$host_cpu" == x"aarch64"; then + PGAC_PROG_CC_CFLAGS_OPT([-moutline-atomics]) + PGAC_PROG_CXX_CFLAGS_OPT([-moutline-atomics]) + fi elif test "$ICC" = yes; then # Intel's compiler has a bug/misoptimization in checking for # division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS. From 62c5f9e3569a1c38f309bd0818eadf1935f52c5d Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Thu, 17 Feb 2022 08:43:32 +0300 Subject: [PATCH 024/102] OrioleDB specific CI --- .github/workflows/build.yml | 31 +++++++++++++++++++++++++++++++ ci/build.sh | 21 +++++++++++++++++++++ ci/check.sh | 11 +++++++++++ ci/check_output.sh | 30 ++++++++++++++++++++++++++++++ ci/prerequisites.sh | 22 ++++++++++++++++++++++ configure | 5 +++++ configure.ac | 4 ++++ meson.build | 1 + src/Makefile.global.in | 3 +++ src/bin/pg_rewind/meson.build | 6 ++++++ src/makefiles/meson.build | 1 + 11 files changed, 135 insertions(+) create mode 100644 .github/workflows/build.yml create mode 100644 ci/build.sh create mode 100644 ci/check.sh create mode 100644 ci/check_output.sh create mode 100644 ci/prerequisites.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000000..c6f1bef64aa --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,31 @@ +name: build + +on: + push: + pull_request: + +jobs: + test: + runs-on: + - ubuntu-20.04 + strategy: + fail-fast: false + matrix: + compiler: [clang, gcc] + check_type: [normal, debug] + env: + LLVM_VER: 10 + COMPILER: ${{ matrix.compiler }} + CHECK_TYPE: ${{ matrix.check_type }} + steps: + - name: Checkout code into workspace directory + uses: actions/checkout@v2 + - name: Setup prerequisites + run: bash ./ci/prerequisites.sh + - name: Build + run: bash ./ci/build.sh + - name: Check + run: bash ./ci/check.sh + - name: Check output + run: bash ./ci/check_output.sh + if: ${{ success() || failure() }} diff --git a/ci/build.sh b/ci/build.sh new file mode 100644 index 00000000000..f541929e69c --- /dev/null +++ b/ci/build.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -eu + +if [ $COMPILER = "clang" ]; then + export CC=clang-$LLVM_VER +else + export CC=gcc +fi + +# configure & build +if [ $CHECK_TYPE = "debug" ]; then + CFLAGS="-O0" ./configure --enable-debug --enable-cassert --enable-tap-tests --with-icu +else + ./configure --disable-debug --disable-cassert --enable-tap-tests --with-icu +fi + +make -sj4 +cd contrib +make -sj4 +cd .. diff --git a/ci/check.sh b/ci/check.sh new file mode 100644 index 00000000000..faa8c25e84a --- /dev/null +++ b/ci/check.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -eu + +# unsets limit for coredumps size +ulimit -c unlimited -S +# sets a coredump file pattern +mkdir -p /tmp/cores-$GITHUB_SHA-$TIMESTAMP +sudo sh -c "echo \"/tmp/cores-$GITHUB_SHA-$TIMESTAMP/%t_%p_%s.core\" > /proc/sys/kernel/core_pattern" + +make check-world -j4 diff --git a/ci/check_output.sh b/ci/check_output.sh new file mode 100644 index 00000000000..ae26cf63d68 --- /dev/null +++ b/ci/check_output.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -eu + +status=0 + +# show diff if it exists +for f in ` find . -name regression.diffs ` ; do + echo "========= Contents of $f" + cat $f + status=1 +done + +# check core dumps if any +cores=$(find /tmp/cores-$GITHUB_SHA-$TIMESTAMP/ -name '*.core' 2>/dev/null) + +if [ -n "$cores" ]; then + for corefile in $cores ; do + if [[ $corefile != *_3.core ]]; then + binary=$(gdb -quiet -core $corefile -batch -ex 'info auxv' | grep AT_EXECFN | perl -pe "s/^.*\"(.*)\"\$/\$1/g") + echo dumping $corefile for $binary + gdb --batch --quiet -ex "thread apply all bt full" -ex "quit" $binary $corefile + status=1 + fi + done +fi + +rm -rf /tmp/cores-$GITHUB_SHA-$TIMESTAMP + +exit $status diff --git a/ci/prerequisites.sh b/ci/prerequisites.sh new file mode 100644 index 00000000000..b26251b711c --- /dev/null +++ b/ci/prerequisites.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -eu + +# print the hostname to be able to identify runner by logs +echo "HOSTNAME=`hostname`" +TIMESTAMP=$(date +%s) +echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_ENV +echo "TIMESTAMP=$TIMESTAMP" + +sudo apt-get -y install -qq wget ca-certificates + +sudo apt-get update -qq + +apt_packages="build-essential flex bison pkg-config libreadline-dev make gdb libipc-run-perl libicu-dev python3 python3-dev python3-pip python3-setuptools python3-testresources" + +if [ $COMPILER = "clang" ]; then + apt_packages="$apt_packages llvm-$LLVM_VER clang-$LLVM_VER clang-tools-$LLVM_VER" +fi + +# install required packages +sudo apt-get -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -y install -qq $apt_packages diff --git a/configure b/configure index 7fccb0de3c9..48bce6baac9 100755 --- a/configure +++ b/configure @@ -628,6 +628,7 @@ ac_includes_default="\ ac_subst_vars='LTLIBOBJS vpath_build PG_SYSROOT +ORIOLEDB_PATCHSET_VERSION PG_VERSION_NUM LDFLAGS_EX_BE PROVE @@ -19557,6 +19558,10 @@ _ACEOF +# Needed to check postgresql patches git tag during orioledb extension build +ORIOLEDB_PATCHSET_VERSION=`git describe --tags | cut -d'_' -f2` + + # If we are inserting PG_SYSROOT into CPPFLAGS, do so symbolically not # literally, so that it's possible to override it at build time using # a command like "make ... PG_SYSROOT=path". This has to be done after diff --git a/configure.ac b/configure.ac index 77a4be7ba0e..8274103dbad 100644 --- a/configure.ac +++ b/configure.ac @@ -2426,6 +2426,10 @@ $AWK '{printf "%d%04d", $1, $2}'`"] AC_DEFINE_UNQUOTED(PG_VERSION_NUM, $PG_VERSION_NUM, [PostgreSQL version as a number]) AC_SUBST(PG_VERSION_NUM) +# Needed to check postgresql patches git tag during orioledb extension build +[ORIOLEDB_PATCHSET_VERSION=`git describe --tags | cut -d'_' -f2`] +AC_SUBST(ORIOLEDB_PATCHSET_VERSION) + # If we are inserting PG_SYSROOT into CPPFLAGS, do so symbolically not # literally, so that it's possible to override it at build time using # a command like "make ... PG_SYSROOT=path". This has to be done after diff --git a/meson.build b/meson.build index 77e963bd3b5..84eb95ba496 100644 --- a/meson.build +++ b/meson.build @@ -153,6 +153,7 @@ cdata.set('PG_VERSION_NUM', pg_version_num) # PG_VERSION_STR is built later, it depends on compiler test results cdata.set_quoted('CONFIGURE_ARGS', '') +orioledb_patchset_version = '22' ############################################################### diff --git a/src/Makefile.global.in b/src/Makefile.global.in index ce05cc1429a..e40f50e82b3 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -42,6 +42,9 @@ VERSION_NUM = @PG_VERSION_NUM@ PACKAGE_URL = @PACKAGE_URL@ +# OrioleDB patchset git tag number +ORIOLEDB_PATCHSET_VERSION = @ORIOLEDB_PATCHSET_VERSION@ + # Set top_srcdir, srcdir, and VPATH. ifdef PGXS top_srcdir = $(top_builddir) diff --git a/src/bin/pg_rewind/meson.build b/src/bin/pg_rewind/meson.build index 880346f37b6..1d20d3f357e 100644 --- a/src/bin/pg_rewind/meson.build +++ b/src/bin/pg_rewind/meson.build @@ -2,6 +2,7 @@ pg_rewind_sources = files( 'datapagemap.c', + 'extension.c', 'file_ops.c', 'filemap.c', 'libpq_source.c', @@ -23,6 +24,7 @@ pg_rewind = executable('pg_rewind', pg_rewind_sources, dependencies: [frontend_code, libpq, lz4, zstd], c_args: ['-DFRONTEND'], # needed for xlogreader et al + export_dynamic: true, kwargs: default_bin_args, ) bin_targets += pg_rewind @@ -49,3 +51,7 @@ tests += { } subdir('po', if_found: libintl) + +install_headers( + 'pg_rewind_ext.h' +) \ No newline at end of file diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index 13045cbd6e4..16ce1650e2e 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -37,6 +37,7 @@ pgxs_kv = { 'PACKAGE_VERSION': pg_version, 'PG_MAJORVERSION': pg_version_major, 'PG_VERSION_NUM': pg_version_num, + 'ORIOLEDB_PATCHSET_VERSION': orioledb_patchset_version, 'configure_input': 'meson', 'vpath_build': 'yes', From f8ac104bef879844f4d54d79987b6f4376abef29 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 9 Apr 2023 01:57:21 +0300 Subject: [PATCH 025/102] Close indices in AttachPartitionEnsureIndexes() before DefineIndex() --- src/backend/commands/tablecmds.c | 43 +++++++++++++++++++------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 0c52b64053c..34fe864a069 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -18347,6 +18347,7 @@ static void AttachPartitionEnsureIndexes(Relation rel, Relation attachrel) { List *idxes; + List *buildIdxes = NIL; List *attachRelIdxs; Relation *attachrelIdxRels; IndexInfo **attachInfos; @@ -18354,6 +18355,7 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel) ListCell *cell; MemoryContext cxt; MemoryContext oldcxt; + AttrMap *attmap; cxt = AllocSetContextCreate(CurrentMemoryContext, "AttachPartitionEnsureIndexes", @@ -18404,6 +18406,10 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel) goto out; } + attmap = build_attrmap_by_name(RelationGetDescr(attachrel), + RelationGetDescr(rel), + false); + /* * For each index on the partitioned table, find a matching one in the * partition-to-be; if one is not found, create one. @@ -18413,7 +18419,6 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel) Oid idx = lfirst_oid(cell); Relation idxRel = index_open(idx, AccessShareLock); IndexInfo *info; - AttrMap *attmap; bool found = false; Oid constraintOid; @@ -18429,9 +18434,6 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel) /* construct an indexinfo to compare existing indexes against */ info = BuildIndexInfo(idxRel); - attmap = build_attrmap_by_name(RelationGetDescr(attachrel), - RelationGetDescr(rel), - false); constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(rel), idx); /* @@ -18492,19 +18494,7 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel) * now. */ if (!found) - { - IndexStmt *stmt; - Oid conOid; - - stmt = generateClonedIndexStmt(NULL, - idxRel, attmap, - &conOid); - DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid, - RelationGetRelid(idxRel), - conOid, - -1, - true, false, false, false, false); - } + buildIdxes = lappend_oid(buildIdxes, RelationGetRelid(idxRel)); index_close(idxRel, AccessShareLock); } @@ -18513,6 +18503,25 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel) /* Clean up. */ for (i = 0; i < list_length(attachRelIdxs); i++) index_close(attachrelIdxRels[i], AccessShareLock); + + foreach(cell, buildIdxes) + { + Oid idx = lfirst_oid(cell); + Relation idxRel = index_open(idx, AccessShareLock); + IndexStmt *stmt; + Oid conOid; + + stmt = generateClonedIndexStmt(NULL, + idxRel, attmap, + &conOid); + DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid, + RelationGetRelid(idxRel), + conOid, + -1, + true, false, false, false, false); + index_close(idxRel, AccessShareLock); + } + MemoryContextSwitchTo(oldcxt); MemoryContextDelete(cxt); } From 8f9009cf2b8f613ac94d8389d045d5942568a147 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Fri, 30 Jun 2023 01:35:54 +0300 Subject: [PATCH 026/102] New BGWORKER_CLASS_SYSTEM bgworkers class They are allowed to stay during shutdown checkpointing and help checkpointer do its work. --- src/backend/postmaster/postmaster.c | 39 +++++++++++++++++++++-------- src/include/postmaster/bgworker.h | 6 +++++ 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index bfc23ffc751..40c840946c9 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -145,7 +145,8 @@ #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */ #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */ #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */ -#define BACKEND_TYPE_ALL 0x000F /* OR of all the above */ +#define BACKEND_TYPE_SYSTEM_BGWORKER 0x0010 /* system bgworker process */ +#define BACKEND_TYPE_ALL 0x001F /* OR of all the above */ /* * List of active backends (or child processes anyway; we don't actually @@ -2476,8 +2477,9 @@ processCancelRequest(Port *port, void *pkt) /* * canAcceptConnections --- check to see if database state allows connections * of the specified type. backend_type can be BACKEND_TYPE_NORMAL, - * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet - * know whether a NORMAL connection might turn into a walsender.) + * BACKEND_TYPE_AUTOVAC, BACKEND_TYPE_BGWORKER or BACKEND_TYPE_SYSTEM_BGWORKER. + * (Note that we don't yet know whether a NORMAL connection might turn into + * a walsender.) */ static CAC_state canAcceptConnections(int backend_type) @@ -2491,7 +2493,8 @@ canAcceptConnections(int backend_type) * bgworker_should_start_now() decided whether the DB state allows them. */ if (pmState != PM_RUN && pmState != PM_HOT_STANDBY && - backend_type != BACKEND_TYPE_BGWORKER) + backend_type != BACKEND_TYPE_BGWORKER && + backend_type != BACKEND_TYPE_SYSTEM_BGWORKER) { if (Shutdown > NoShutdown) return CAC_SHUTDOWN; /* shutdown is pending */ @@ -3169,6 +3172,13 @@ process_pm_child_exit(void) if (PgArchPID != 0) signal_child(PgArchPID, SIGUSR2); + /* + * Terminate system background workers since checpoint is + * complete. + */ + SignalSomeChildren(SIGTERM, + BACKEND_TYPE_SYSTEM_BGWORKER); + /* * Waken walsenders for the last time. No regular backends * should be around anymore. @@ -3570,7 +3580,8 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) * Background workers were already processed above; ignore them * here. */ - if (bp->bkend_type == BACKEND_TYPE_BGWORKER) + if (bp->bkend_type == BACKEND_TYPE_BGWORKER || + bp->bkend_type == BACKEND_TYPE_SYSTEM_BGWORKER) continue; if (take_action) @@ -3749,7 +3760,7 @@ PostmasterStateMachine(void) /* Signal all backend children except walsenders */ SignalSomeChildren(SIGTERM, - BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND); + BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND - BACKEND_TYPE_SYSTEM_BGWORKER); /* and the autovac launcher too */ if (AutoVacPID != 0) signal_child(AutoVacPID, SIGTERM); @@ -3787,7 +3798,7 @@ PostmasterStateMachine(void) * and archiver are also disregarded, they will be terminated later * after writing the checkpoint record. */ - if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 && + if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND - BACKEND_TYPE_SYSTEM_BGWORKER) == 0 && StartupPID == 0 && WalReceiverPID == 0 && BgWriterPID == 0 && @@ -5798,16 +5809,20 @@ do_start_bgworker(RegisteredBgWorker *rw) * specified start_time? */ static bool -bgworker_should_start_now(BgWorkerStartTime start_time) +bgworker_should_start_now(BgWorkerStartTime start_time, int flags) { switch (pmState) { case PM_NO_CHILDREN: case PM_WAIT_DEAD_END: case PM_SHUTDOWN_2: + break; + case PM_SHUTDOWN: case PM_WAIT_BACKENDS: case PM_STOP_BACKENDS: + if (flags & BGWORKER_CLASS_SYSTEM) + return true; break; case PM_RUN: @@ -5882,7 +5897,10 @@ assign_backendlist_entry(RegisteredBgWorker *rw) bn->cancel_key = MyCancelKey; bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); - bn->bkend_type = BACKEND_TYPE_BGWORKER; + if (rw->rw_worker.bgw_flags & BGWORKER_CLASS_SYSTEM) + bn->bkend_type = BACKEND_TYPE_SYSTEM_BGWORKER; + else + bn->bkend_type = BACKEND_TYPE_BGWORKER; bn->dead_end = false; bn->bgworker_notify = false; @@ -5980,7 +5998,8 @@ maybe_start_bgworkers(void) } } - if (bgworker_should_start_now(rw->rw_worker.bgw_start_time)) + if (bgworker_should_start_now(rw->rw_worker.bgw_start_time, + rw->rw_worker.bgw_flags)) { /* reset crash time before trying to start worker */ rw->rw_crashed_at = 0; diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h index 845d4498e65..e5af3247632 100644 --- a/src/include/postmaster/bgworker.h +++ b/src/include/postmaster/bgworker.h @@ -66,6 +66,12 @@ * background workers should not use this class. */ #define BGWORKER_CLASS_PARALLEL 0x0010 + +/* + * This class of bgworkers are allowed to stay working during shutdown + * checkpointing. + */ +#define BGWORKER_CLASS_SYSTEM 0x0020 /* add additional bgworker classes here */ From 8e8b83e0f0ffd4d65f77e6ecc4f10954295ed0bb Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Thu, 7 Sep 2023 21:33:03 +0200 Subject: [PATCH 027/102] Add pg_newlocale_from_collation_hook to perform stricter collation checks --- src/backend/utils/adt/pg_locale.c | 7 ++++++- src/include/utils/pg_locale.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 111a7888779..53a6b5d7c7f 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -135,6 +135,7 @@ typedef struct static HTAB *collation_cache = NULL; +pg_newlocale_from_collation_hook_type pg_newlocale_from_collation_hook = NULL; #if defined(WIN32) && defined(LC_MESSAGES) static char *IsoLocaleName(const char *); @@ -1623,6 +1624,7 @@ pg_newlocale_from_collation(Oid collid) { char *actual_versionstr; char *collversionstr; + int level = WARNING; collversionstr = TextDatumGetCString(datum); @@ -1642,8 +1644,11 @@ pg_newlocale_from_collation(Oid collid) NameStr(collform->collname)))); } + if (pg_newlocale_from_collation_hook && pg_newlocale_from_collation_hook()) + level = ERROR; + if (strcmp(actual_versionstr, collversionstr) != 0) - ereport(WARNING, + ereport(level, (errmsg("collation \"%s\" has version mismatch", NameStr(collform->collname)), errdetail("The collation in the database was created using version %s, " diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 73d3e6e9310..30b186a9c18 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -107,6 +107,8 @@ extern void make_icu_collator(const char *iculocstr, extern bool pg_locale_deterministic(pg_locale_t locale); extern pg_locale_t pg_newlocale_from_collation(Oid collid); +typedef bool (*pg_newlocale_from_collation_hook_type)(); +extern pg_newlocale_from_collation_hook_type pg_newlocale_from_collation_hook; extern char *get_collation_actual_version(char collprovider, const char *collcollate); extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale); From b34727e2b751212d1604e6679830de81840397d6 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 12 Jul 2023 23:40:12 +0300 Subject: [PATCH 028/102] Archive preload callback --- src/backend/postmaster/pgarch.c | 16 ++++++++++++++++ src/include/archive/archive_module.h | 6 +++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 46af3495644..93ce77683a4 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -670,6 +670,22 @@ pgarch_readyXlog(char *xlog) for (int i = 0; i < arch_files->arch_files_size; i++) arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap)); + /* + * Preload the WAL files if the relevant callback is provided. + */ + if (ArchiveCallbacks->archive_preload_file_cb) + { + for (int i = 0; i < arch_files->arch_files_size; i++) + { + char *xlog1 = arch_files->arch_files[i]; + char pathname[MAXPGPATH]; + + snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog1); + ArchiveCallbacks->archive_preload_file_cb(archive_module_state, + xlog1, pathname); + } + } + /* Return the highest priority file. */ arch_files->arch_files_size--; strcpy(xlog, arch_files->arch_files[arch_files->arch_files_size]); diff --git a/src/include/archive/archive_module.h b/src/include/archive/archive_module.h index 679ce5a6dbd..2921c0a05f8 100644 --- a/src/include/archive/archive_module.h +++ b/src/include/archive/archive_module.h @@ -37,13 +37,17 @@ typedef struct ArchiveModuleState */ typedef void (*ArchiveStartupCB) (ArchiveModuleState *state); typedef bool (*ArchiveCheckConfiguredCB) (ArchiveModuleState *state); -typedef bool (*ArchiveFileCB) (ArchiveModuleState *state, const char *file, const char *path); +typedef void (*ArchivePreloadFileCB) (ArchiveModuleState *state, + const char *file, const char *path); +typedef bool (*ArchiveFileCB) (ArchiveModuleState *state, + const char *file, const char *path); typedef void (*ArchiveShutdownCB) (ArchiveModuleState *state); typedef struct ArchiveModuleCallbacks { ArchiveStartupCB startup_cb; ArchiveCheckConfiguredCB check_configured_cb; + ArchivePreloadFileCB archive_preload_file_cb; ArchiveFileCB archive_file_cb; ArchiveShutdownCB shutdown_cb; } ArchiveModuleCallbacks; From 6a15109e403eb411abb1ab99f21f306b264e4249 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 18 Feb 2024 06:10:50 +0200 Subject: [PATCH 029/102] Remove pthread_is_threaded_np() call To use curl during shared_preload_libraries initialization. --- configure | 2 +- configure.ac | 1 - meson.build | 1 - src/backend/postmaster/postmaster.c | 49 ----------------------------- src/include/pg_config.h.in | 3 -- 5 files changed, 1 insertion(+), 55 deletions(-) diff --git a/configure b/configure index 48bce6baac9..2eef37f3970 100755 --- a/configure +++ b/configure @@ -15809,7 +15809,7 @@ fi LIBS_including_readline="$LIBS" LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` -for ac_func in backtrace_symbols copyfile getifaddrs getpeerucred inet_pton kqueue mbstowcs_l posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strsignal syncfs sync_file_range uselocale wcstombs_l +for ac_func in backtrace_symbols copyfile getifaddrs getpeerucred inet_pton kqueue mbstowcs_l posix_fallocate ppoll setproctitle setproctitle_fast strsignal syncfs sync_file_range uselocale wcstombs_l do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" diff --git a/configure.ac b/configure.ac index 8274103dbad..973036e4915 100644 --- a/configure.ac +++ b/configure.ac @@ -1809,7 +1809,6 @@ AC_CHECK_FUNCS(m4_normalize([ mbstowcs_l posix_fallocate ppoll - pthread_is_threaded_np setproctitle setproctitle_fast strsignal diff --git a/meson.build b/meson.build index 84eb95ba496..d35c257f14a 100644 --- a/meson.build +++ b/meson.build @@ -2593,7 +2593,6 @@ func_checks = [ ['posix_fallocate'], ['ppoll'], ['pthread_barrier_wait', {'dependencies': [thread_dep]}], - ['pthread_is_threaded_np', {'dependencies': [thread_dep]}], ['sem_init', {'dependencies': [rt_dep, thread_dep], 'skip': sema_kind != 'unnamed_posix', 'define': false}], ['setproctitle', {'dependencies': [util_dep]}], ['setproctitle_fast'], diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 40c840946c9..1b98bb17586 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -85,10 +85,6 @@ #include #endif -#ifdef HAVE_PTHREAD_IS_THREADED_NP -#include -#endif - #include "access/transam.h" #include "access/xlog.h" #include "access/xlogrecovery.h" @@ -1426,26 +1422,6 @@ PostmasterMain(int argc, char *argv[]) */ } -#ifdef HAVE_PTHREAD_IS_THREADED_NP - - /* - * On macOS, libintl replaces setlocale() with a version that calls - * CFLocaleCopyCurrent() when its second argument is "" and every relevant - * environment variable is unset or empty. CFLocaleCopyCurrent() makes - * the process multithreaded. The postmaster calls sigprocmask() and - * calls fork() without an immediate exec(), both of which have undefined - * behavior in a multithreaded program. A multithreaded postmaster is the - * normal case on Windows, which offers neither fork() nor sigprocmask(). - * Currently, macOS is the only platform having pthread_is_threaded_np(), - * so we need not worry whether this HINT is appropriate elsewhere. - */ - if (pthread_is_threaded_np() != 0) - ereport(FATAL, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("postmaster became multithreaded during startup"), - errhint("Set the LC_ALL environment variable to a valid locale."))); -#endif - /* * Remember postmaster startup time */ @@ -1863,15 +1839,6 @@ ServerLoop(void) if (StartWorkerNeeded || HaveCrashedWorker) maybe_start_bgworkers(); -#ifdef HAVE_PTHREAD_IS_THREADED_NP - - /* - * With assertions enabled, check regularly for appearance of - * additional threads. All builds check at start and exit. - */ - Assert(pthread_is_threaded_np() == 0); -#endif - /* * Lastly, check to see if it's time to do some things that we don't * want to do every single time through the loop, because they're a @@ -5065,22 +5032,6 @@ SubPostmasterMain(int argc, char *argv[]) static void ExitPostmaster(int status) { -#ifdef HAVE_PTHREAD_IS_THREADED_NP - - /* - * There is no known cause for a postmaster to become multithreaded after - * startup. However, we might reach here via an error exit before - * reaching the test in PostmasterMain, so provide the same hint as there. - * This message uses LOG level, because an unclean shutdown at this point - * would usually not look much different from a clean shutdown. - */ - if (pthread_is_threaded_np() != 0) - ereport(LOG, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("postmaster became multithreaded"), - errhint("Set the LC_ALL environment variable to a valid locale."))); -#endif - /* should cleanup shared memory and kill all backends */ /* diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index be27b8abe10..90957f91a96 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -348,9 +348,6 @@ /* Define to 1 if you have the `pthread_barrier_wait' function. */ #undef HAVE_PTHREAD_BARRIER_WAIT -/* Define to 1 if you have the `pthread_is_threaded_np' function. */ -#undef HAVE_PTHREAD_IS_THREADED_NP - /* Have PTHREAD_PRIO_INHERIT. */ #undef HAVE_PTHREAD_PRIO_INHERIT From ef59f06701b845ebb44e6fd7c0622bd1efc3f1e1 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Fri, 8 Dec 2023 01:37:02 +0100 Subject: [PATCH 030/102] Added option to pg_rewind to perform extension specific rewind - added option --extension for pg_rewind - extracted SimpleXLogRead from extractPageMap for generic wal iteration in pg_rewind --- doc/src/sgml/ref/pg_rewind.sgml | 10 +++ src/bin/pg_rewind/Makefile | 7 +- src/bin/pg_rewind/extension.c | 132 ++++++++++++++++++++++++++++++ src/bin/pg_rewind/filemap.c | 41 +++++++++- src/bin/pg_rewind/parsexlog.c | 36 +++++--- src/bin/pg_rewind/pg_rewind.c | 16 +++- src/bin/pg_rewind/pg_rewind.h | 10 +++ src/bin/pg_rewind/pg_rewind_ext.h | 44 ++++++++++ 8 files changed, 279 insertions(+), 17 deletions(-) create mode 100644 src/bin/pg_rewind/extension.c create mode 100644 src/bin/pg_rewind/pg_rewind_ext.h diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml index 2de747ec37f..01d20462e33 100644 --- a/doc/src/sgml/ref/pg_rewind.sgml +++ b/doc/src/sgml/ref/pg_rewind.sgml @@ -284,6 +284,16 @@ PostgreSQL documentation + + + + + + Load shared library that performs custom rewind for postgres extension. The path may be full or relative to PKGLIBDIR. File extension is optional. Multiple extensions can be selected by multiple switches. + + + + diff --git a/src/bin/pg_rewind/Makefile b/src/bin/pg_rewind/Makefile index bed05f1609c..5ff8163b841 100644 --- a/src/bin/pg_rewind/Makefile +++ b/src/bin/pg_rewind/Makefile @@ -21,6 +21,7 @@ LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) OBJS = \ $(WIN32RES) \ datapagemap.o \ + extension.o \ file_ops.o \ filemap.o \ libpq_source.o \ @@ -35,19 +36,21 @@ EXTRA_CLEAN = xlogreader.c all: pg_rewind pg_rewind: $(OBJS) | submake-libpq submake-libpgport - $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) + $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LDFLAGS_EX_BE) $(LIBS) -o $@$(X) xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/% rm -f $@ && $(LN_S) $< . install: all installdirs $(INSTALL_PROGRAM) pg_rewind$(X) '$(DESTDIR)$(bindir)/pg_rewind$(X)' + $(INSTALL_DATA) $(srcdir)/pg_rewind_ext.h '$(DESTDIR)$(includedir)' installdirs: - $(MKDIR_P) '$(DESTDIR)$(bindir)' + $(MKDIR_P) '$(DESTDIR)$(bindir)' '$(DESTDIR)$(includedir)' uninstall: rm -f '$(DESTDIR)$(bindir)/pg_rewind$(X)' + rm -f '$(DESTDIR)$(includedir)/pg_rewind_ext.h' clean distclean maintainer-clean: rm -f pg_rewind$(X) $(OBJS) xlogreader.c diff --git a/src/bin/pg_rewind/extension.c b/src/bin/pg_rewind/extension.c new file mode 100644 index 00000000000..29ec4b5a6f6 --- /dev/null +++ b/src/bin/pg_rewind/extension.c @@ -0,0 +1,132 @@ +/*------------------------------------------------------------------------- + * + * extension.c + * Functions for processing shared libraries loaded by pg_rewind. + * + * Copyright (c) 2013-2023, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#ifndef WIN32 +#include + +/* + * On macOS, insists on including . If we're not + * using stdbool, undef bool to undo the damage. + */ +#ifndef PG_USE_STDBOOL +#ifdef bool +#undef bool +#endif +#endif +#endif /* !WIN32 */ + +#include + +#include "access/xlog_internal.h" +#include "pg_rewind.h" + +/* signature for pg_rewind extension library rewind function */ +typedef void (*PG_rewind_t) (const char *datadir_target, char *datadir_source, + char *connstr_source, XLogRecPtr startpoint, + int tliIndex, XLogRecPtr endpoint, + const char *restoreCommand, const char *argv0, + bool debug); + +static bool +file_exists(const char *argv0, const char *name) +{ + struct stat st; + + Assert(name != NULL); + + if (stat(name, &st) == 0) + return !S_ISDIR(st.st_mode); + else if (!(errno == ENOENT || errno == ENOTDIR || errno == EACCES)) + { + const char *progname; + + progname = get_progname(argv0); + pg_log_error("could not access file \"%s\": %m", name); + pg_log_error_hint("Try \"%s --help\" for more information.", progname); + exit(1); + } + + return false; +} + +static char * +expand_dynamic_library_name(const char *argv0, const char *name) +{ + char *full; + char my_exec_path[MAXPGPATH]; + char pkglib_path[MAXPGPATH]; + + Assert(name); + + if (find_my_exec(argv0, my_exec_path) < 0) + pg_fatal("%s: could not locate my own executable path", argv0); + get_pkglib_path(my_exec_path, pkglib_path); + full = palloc(strlen(pkglib_path) + 1 + strlen(name) + 1); + sprintf(full, "%s/%s", pkglib_path, name); + if (file_exists(argv0, full)) + return full; + pfree(full); + + full = palloc(strlen(pkglib_path) + 1 + strlen(name) + 1 + + strlen(DLSUFFIX) + 1); + sprintf(full, "%s/%s%s", pkglib_path, name, DLSUFFIX); + if (file_exists(argv0, full)) + return full; + pfree(full); + + return pstrdup(name); +} + +void +process_extensions(SimpleStringList *extensions, const char *datadir_target, + char *datadir_source, char *connstr_source, + XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint, + const char *restoreCommand, const char *argv0, + bool debug) +{ + SimpleStringListCell *cell; + + if (extensions->head == NULL) + return; /* nothing to do */ + + for (cell = extensions->head; cell; cell = cell->next) + { + char *filename = cell->val; + char *fullname; + void *lib_handle; + PG_rewind_t PG_rewind; + char *load_error; + + fullname = expand_dynamic_library_name(argv0, filename); + + lib_handle = dlopen(fullname, RTLD_NOW | RTLD_GLOBAL); + if (lib_handle == NULL) + { + load_error = dlerror(); + pg_fatal("could not load library \"%s\": %s", fullname, load_error); + } + + PG_rewind = dlsym(lib_handle, "_PG_rewind"); + + if (PG_rewind == NULL) + pg_fatal("could not find function \"_PG_rewind\" in \"%s\"", + fullname); + pfree(fullname); + + if (showprogress) + pg_log_info("performing rewind for '%s' extension", filename); + PG_rewind(datadir_target, datadir_source, connstr_source, startpoint, + tliIndex, endpoint, restoreCommand, argv0, debug); + + pg_log_debug("loaded library \"%s\"", filename); + } +} diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index a2e2110fb1e..1e2c4f0adc2 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -54,6 +54,7 @@ static uint32 hash_string_pointer(const char *s); #define FILEHASH_INITIAL_SIZE 1000 static filehash_hash *filehash; +static SimpleStringList extensions_exclude = {NULL, NULL}; static bool isRelDataFile(const char *path); static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum, @@ -323,6 +324,8 @@ process_target_file(const char *path, file_type_t type, size_t size, * from the target data folder all paths which have been filtered out from * the source data folder when processing the source files. */ + if (check_file_excluded(path, false)) + return; /* * Like in process_source_file, pretend that pg_wal is always a directory. @@ -467,6 +470,31 @@ check_file_excluded(const char *path, bool is_source) } } + /* + * Exclude extensions directories + */ + if (extensions_exclude.head != NULL) + { + SimpleStringListCell *cell; + + for (cell = extensions_exclude.head; cell; cell = cell->next) + { + char *exclude_dir = cell->val; + + snprintf(localpath, sizeof(localpath), "%s/", exclude_dir); + if (strstr(path, localpath) == path) + { + if (is_source) + pg_log_debug("entry \"%s\" excluded from source file list", + path); + else + pg_log_debug("entry \"%s\" excluded from target file list", + path); + return true; + } + } + } + return false; } @@ -892,7 +920,6 @@ decide_file_actions(void) return filemap; } - /* * Helper function for filemap hash table. */ @@ -903,3 +930,15 @@ hash_string_pointer(const char *s) return hash_bytes(ss, strlen(s)); } + +void +extensions_exclude_add(char **exclude_dirs) +{ + int i; + + for (i = 0; exclude_dirs[i] != NULL; i++) + { + simple_string_list_append(&extensions_exclude, + pstrdup(exclude_dirs[i])); + } +} diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c index 0ed08841523..68437cf1ceb 100644 --- a/src/bin/pg_rewind/parsexlog.c +++ b/src/bin/pg_rewind/parsexlog.c @@ -38,7 +38,7 @@ static const char *RmgrNames[RM_MAX_ID + 1] = { #define RmgrName(rmid) (((rmid) <= RM_MAX_BUILTIN_ID) ? \ RmgrNames[rmid] : "custom") -static void extractPageInfo(XLogReaderState *record); +static void extractPageInfo(XLogReaderState *record, void *arg); static int xlogreadfd = -1; static XLogSegNo xlogreadsegno = 0; @@ -54,17 +54,11 @@ static int SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf); -/* - * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline - * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of - * the data blocks touched by the WAL records, and return them in a page map. - * - * 'endpoint' is the end of the last record to read. The record starting at - * 'endpoint' is the first one that is not read. - */ void -extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, - XLogRecPtr endpoint, const char *restoreCommand) +SimpleXLogRead(const char *datadir, XLogRecPtr startpoint, int tliIndex, + XLogRecPtr endpoint, const char *restoreCommand, + void (*page_callback) (XLogReaderState *, void *arg), + void *arg) { XLogRecord *record; XLogReaderState *xlogreader; @@ -97,7 +91,7 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, LSN_FORMAT_ARGS(errptr)); } - extractPageInfo(xlogreader); + page_callback(xlogreader, arg); } while (xlogreader->EndRecPtr < endpoint); /* @@ -116,6 +110,22 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, } } +/* + * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline + * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of + * the data blocks touched by the WAL records, and return them in a page map. + * + * 'endpoint' is the end of the last record to read. The record starting at + * 'endpoint' is the first one that is not read. + */ +void +extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, + XLogRecPtr endpoint, const char *restoreCommand) +{ + SimpleXLogRead(datadir, startpoint, tliIndex, endpoint, restoreCommand, + extractPageInfo, NULL); +} + /* * Reads one WAL record. Returns the end position of the record, without * doing anything with the record itself. @@ -386,7 +396,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, * Extract information on which blocks the current record modifies. */ static void -extractPageInfo(XLogReaderState *record) +extractPageInfo(XLogReaderState *record, void *arg) { int block_id; RmgrId rmid = XLogRecGetRmid(record); diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index c2c0f0b9f2f..ee0baf7db28 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -75,6 +75,8 @@ bool dry_run = false; bool do_sync = true; bool restore_wal = false; +static SimpleStringList extensions = {NULL, NULL}; + /* Target history */ TimeLineHistoryEntry *targetHistory; int targetNentries; @@ -107,6 +109,7 @@ usage(const char *progname) " file when running target cluster\n")); printf(_(" --debug write a lot of debug messages\n")); printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n")); + printf(_(" -e, --extension=PATH path to library performing rewind for extension\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); @@ -131,6 +134,7 @@ main(int argc, char **argv) {"no-sync", no_argument, NULL, 'N'}, {"progress", no_argument, NULL, 'P'}, {"debug", no_argument, NULL, 3}, + {"extension", required_argument, NULL, 'e'}, {NULL, 0, NULL, 0} }; int option_index; @@ -169,7 +173,7 @@ main(int argc, char **argv) } } - while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1) + while ((c = getopt_long(argc, argv, "cD:nNPRe", long_options, &option_index)) != -1) { switch (c) { @@ -218,6 +222,10 @@ main(int argc, char **argv) config_file = pg_strdup(optarg); break; + case 'e': /* -e or --extension */ + simple_string_list_append(&extensions, optarg); + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -457,6 +465,12 @@ main(int argc, char **argv) /* Initialize the hash table to track the status of each file */ filehash_init(); + if (extensions.head != NULL) + process_extensions(&extensions, datadir_target, datadir_source, + connstr_source, chkptrec, lastcommontliIndex, + target_wal_endrec, restore_command, argv[0], + debug); + /* * Collect information about all files in the both data directories. */ diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h index ef8bdc1fbb8..1d42a921246 100644 --- a/src/bin/pg_rewind/pg_rewind.h +++ b/src/bin/pg_rewind/pg_rewind.h @@ -14,7 +14,9 @@ #include "access/timeline.h" #include "common/logging.h" #include "datapagemap.h" +#include "fe_utils/simple_list.h" #include "libpq-fe.h" +#include "pg_rewind_ext.h" #include "storage/block.h" #include "storage/relfilelocator.h" @@ -53,4 +55,12 @@ extern TimeLineHistoryEntry *rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries); +/* in extension.c */ +extern void process_extensions(SimpleStringList *extensions, + const char *datadir_target, char *datadir_source, + char *connstr_source, XLogRecPtr startpoint, + int tliIndex, XLogRecPtr endpoint, + const char *restoreCommand, const char *argv0, + bool debug); + #endif /* PG_REWIND_H */ diff --git a/src/bin/pg_rewind/pg_rewind_ext.h b/src/bin/pg_rewind/pg_rewind_ext.h new file mode 100644 index 00000000000..3616d94f588 --- /dev/null +++ b/src/bin/pg_rewind/pg_rewind_ext.h @@ -0,0 +1,44 @@ +/*------------------------------------------------------------------------- + * + * pg_rewind_ext.h + * + * + * Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#ifndef PG_REWIND_EXT_H +#define PG_REWIND_EXT_H + +#include "access/xlogreader.h" + +/* in parsexlog.c */ +/* + * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline + * index 'tliIndex' in target timeline history, until 'endpoint'. + * Pass all WAL records to 'page_callback'. + * + * 'endpoint' is the end of the last record to read. The record starting at + * 'endpoint' is the first one that is not read. + */ +extern void SimpleXLogRead(const char *datadir, XLogRecPtr startpoint, + int tliIndex, XLogRecPtr endpoint, + const char *restoreCommand, + void (*page_callback) (XLogReaderState *, + void *arg), + void *arg); + + +/* in filemap.c */ +/* Add NULL-terminated list of dirs that pg_rewind can skip copying */ +extern void extensions_exclude_add(char **exclude_dirs); + +/* signature for pg_rewind extension library rewind function */ +extern PGDLLEXPORT void _PG_rewind(const char *datadir_target, + char *datadir_source, char *connstr_source, + XLogRecPtr startpoint, int tliIndex, + XLogRecPtr endpoint, + const char *restoreCommand, + const char *argv0, bool debug); + +#endif /* PG_REWIND_EXT_H */ From 4b93302f22964b6e9058607a1d45a0415885a44b Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Fri, 3 May 2024 22:05:35 +0200 Subject: [PATCH 031/102] Index scan and index only scan with rowid --- src/backend/access/heap/heapam_handler.c | 3 +- src/backend/access/index/genam.c | 2 + src/backend/access/index/indexam.c | 88 +++++++++++++++++++++--- src/backend/access/table/tableam.c | 2 +- src/backend/commands/constraint.c | 2 +- src/backend/executor/nodeIndexonlyscan.c | 33 +++++++-- src/backend/utils/adt/selfuncs.c | 28 ++++++-- src/include/access/genam.h | 3 + src/include/access/relscan.h | 2 + src/include/access/tableam.h | 6 +- 10 files changed, 147 insertions(+), 22 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index ea6759e8a7f..a32fc3b69fb 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -131,7 +131,7 @@ heapam_index_fetch_end(IndexFetchTableData *scan) static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead) @@ -139,6 +139,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan, IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan; BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; bool got_heap_tuple; + ItemPointer tid = DatumGetItemPointer(tupleid); Assert(TTS_IS_BUFFERTUPLE(slot)); diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 67246406cff..03752f11830 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -104,6 +104,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->orderByData = NULL; scan->xs_want_itup = false; /* may be set later */ + scan->xs_want_rowid = false; /* may be set later */ /* * During recovery we ignore killed tuples and don't bother to kill them @@ -125,6 +126,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->xs_itupdesc = NULL; scan->xs_hitup = NULL; scan->xs_hitupdesc = NULL; + scan->xs_rowid.isnull = true; return scan; } diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 715e91e25f0..b19a7b7297e 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -603,6 +603,55 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) return &scan->xs_heaptid; } +/* ---------------- + * index_getnext_rowid - get the next ROWID from a scan + * + * The result is the next ROWID satisfying the scan keys, + * or isnull if no more matching tuples exist. + * ---------------- + */ +NullableDatum +index_getnext_rowid(IndexScanDesc scan, ScanDirection direction) +{ + NullableDatum result; + bool found; + + SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(amgettuple); + + /* XXX: we should assert that a snapshot is pushed or registered */ + Assert(TransactionIdIsValid(RecentXmin)); + + /* + * The AM's amgettuple proc finds the next index entry matching the scan + * keys, and puts the TID into scan->xs_heaptid. It should also set + * scan->xs_recheck and possibly scan->xs_itup/scan->xs_hitup, though we + * pay no attention to those fields here. + */ + found = scan->indexRelation->rd_indam->amgettuple(scan, direction); + + /* Reset kill flag immediately for safety */ + scan->kill_prior_tuple = false; + scan->xs_heap_continue = false; + + /* If we're out of index entries, we're done */ + if (!found) + { + /* release resources (like buffer pins) from table accesses */ + if (scan->xs_heapfetch) + table_index_fetch_reset(scan->xs_heapfetch); + + result.isnull = true; + return result; + } + /* Assert(RowidIsValid(&scan->xs_rowid)); */ + + pgstat_count_index_tuples(scan->indexRelation, 1); + + /* Return the ROWID of the tuple we found. */ + return scan->xs_rowid; +} + /* ---------------- * index_fetch_heap - get the scan's next heap tuple * @@ -626,8 +675,17 @@ index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot) { bool all_dead = false; bool found; + Datum tupleid; + + if (scan->xs_want_rowid) + { + Assert(!scan->xs_rowid.isnull); + tupleid = scan->xs_rowid.value; + } + else + tupleid = PointerGetDatum(&scan->xs_heaptid); - found = table_index_fetch_tuple(scan->xs_heapfetch, &scan->xs_heaptid, + found = table_index_fetch_tuple(scan->xs_heapfetch, tupleid, scan->xs_snapshot, slot, &scan->xs_heap_continue, &all_dead); @@ -669,16 +727,30 @@ index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot * { if (!scan->xs_heap_continue) { - ItemPointer tid; + if (scan->xs_want_rowid) + { + NullableDatum rowid; + /* Time to fetch the next TID from the index */ + rowid = index_getnext_rowid(scan, direction); - /* Time to fetch the next TID from the index */ - tid = index_getnext_tid(scan, direction); + /* If we're out of index entries, we're done */ + if (rowid.isnull) + break; - /* If we're out of index entries, we're done */ - if (tid == NULL) - break; + /* Assert(RowidEquals(rowid, &scan->xs_rowid)); */ + } + else + { + ItemPointer tid; + /* Time to fetch the next TID from the index */ + tid = index_getnext_tid(scan, direction); - Assert(ItemPointerEquals(tid, &scan->xs_heaptid)); + /* If we're out of index entries, we're done */ + if (tid == NULL) + break; + + Assert(ItemPointerEquals(tid, &scan->xs_heaptid)); + } } /* diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index c8329db8f34..67d63e0a6ec 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -227,7 +227,7 @@ table_index_fetch_tuple_check(Relation rel, slot = table_slot_create(rel, NULL); scan = table_index_fetch_begin(rel); - found = table_index_fetch_tuple(scan, tid, snapshot, slot, &call_again, + found = table_index_fetch_tuple(scan, PointerGetDatum(tid), snapshot, slot, &call_again, all_dead); table_index_fetch_end(scan); ExecDropSingleTupleTableSlot(slot); diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index 35c4451fc06..982bae9ed42 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -111,7 +111,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation); bool call_again = false; - if (!table_index_fetch_tuple(scan, &tmptid, SnapshotSelf, slot, + if (!table_index_fetch_tuple(scan, PointerGetDatum(&tmptid), SnapshotSelf, slot, &call_again, NULL)) { /* diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 45d1a67a713..6ebddd36c95 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -66,7 +66,7 @@ IndexOnlyNext(IndexOnlyScanState *node) ScanDirection direction; IndexScanDesc scandesc; TupleTableSlot *slot; - ItemPointer tid; + ItemPointer tid = NULL; /* * extract necessary information from index scan node @@ -118,12 +118,36 @@ IndexOnlyNext(IndexOnlyScanState *node) /* * OK, now that we have what we need, fetch the next tuple. */ - while ((tid = index_getnext_tid(scandesc, direction)) != NULL) + while (true) { bool tuple_from_heap = false; CHECK_FOR_INTERRUPTS(); + if (scandesc->xs_want_rowid) + { + NullableDatum rowid; + /* Time to fetch the next TID from the index */ + rowid = index_getnext_rowid(scandesc, direction); + + /* If we're out of index entries, we're done */ + if (rowid.isnull) + break; + + /* Assert(RowidEquals(rowid, &scan->xs_rowid)); */ + } + else + { + /* Time to fetch the next TID from the index */ + tid = index_getnext_tid(scandesc, direction); + + /* If we're out of index entries, we're done */ + if (tid == NULL) + break; + + Assert(ItemPointerEquals(tid, &scandesc->xs_heaptid)); + } + /* * We can skip the heap fetch if the TID references a heap page on * which all tuples are known visible to everybody. In any case, @@ -158,7 +182,8 @@ IndexOnlyNext(IndexOnlyScanState *node) * It's worth going through this complexity to avoid needing to lock * the VM buffer, which could cause significant contention. */ - if (!VM_ALL_VISIBLE(scandesc->heapRelation, + if (!scandesc->xs_want_rowid && + !VM_ALL_VISIBLE(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), &node->ioss_VMBuffer)) { @@ -243,7 +268,7 @@ IndexOnlyNext(IndexOnlyScanState *node) * If we didn't access the heap, then we'll need to take a predicate * lock explicitly, as if we had. For now we do that at page level. */ - if (!tuple_from_heap) + if (!tuple_from_heap && !scandesc->xs_want_rowid) PredicateLockPage(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), estate->es_snapshot); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 8f59631a5cf..f62ef539370 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6283,12 +6283,32 @@ get_actual_variable_endpoint(Relation heapRel, index_scan->xs_want_itup = true; index_rescan(index_scan, scankeys, 1, NULL, 0); - /* Fetch first/next tuple in specified direction */ - while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL) + while (true) { - BlockNumber block = ItemPointerGetBlockNumber(tid); + BlockNumber block = InvalidBlockNumber; - if (!VM_ALL_VISIBLE(heapRel, + /* Fetch first/next tuple in specified direction */ + if (index_scan->xs_want_rowid) + { + NullableDatum rowid; + rowid = index_getnext_rowid(index_scan, indexscandir); + + if (rowid.isnull) + break; + } + else + { + tid = index_getnext_tid(index_scan, indexscandir); + + if (tid == NULL) + break; + + Assert(ItemPointerEquals(tid, &index_scan->xs_heaptid)); + block = ItemPointerGetBlockNumber(tid); + } + + if (!index_scan->xs_want_rowid && + !VM_ALL_VISIBLE(heapRel, block, &vmbuffer)) { diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 7188d42894e..c9b4ac8c441 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -173,6 +173,9 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel, ParallelIndexScanDesc pscan); extern ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction); +extern NullableDatum index_getnext_rowid(IndexScanDesc scan, + ScanDirection direction); +extern Datum index_getnext_tupleid(IndexScanDesc scan, ScanDirection direction); struct TupleTableSlot; extern bool index_fetch_heap(IndexScanDesc scan, struct TupleTableSlot *slot); extern bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index d03360eac04..ea0913ce6f2 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -122,6 +122,7 @@ typedef struct IndexScanDescData struct ScanKeyData *keyData; /* array of index qualifier descriptors */ struct ScanKeyData *orderByData; /* array of ordering op descriptors */ bool xs_want_itup; /* caller requests index tuples */ + bool xs_want_rowid; /* caller requests index tuples */ bool xs_temp_snap; /* unregister snapshot at scan end? */ /* signaling to index AM about killing index tuples */ @@ -145,6 +146,7 @@ typedef struct IndexScanDescData struct TupleDescData *xs_hitupdesc; /* rowtype descriptor of xs_hitup */ ItemPointerData xs_heaptid; /* result */ + NullableDatum xs_rowid; /* result if xs_want_rowid */ bool xs_heap_continue; /* T if must keep walking, potential * further results */ IndexFetchTableData *xs_heapfetch; diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 7356d5de507..702482e0d7b 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -475,7 +475,7 @@ typedef struct TableAmRoutine * future searches. */ bool (*index_fetch_tuple) (struct IndexFetchTableData *scan, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead); @@ -1268,7 +1268,7 @@ table_index_fetch_end(struct IndexFetchTableData *scan) */ static inline bool table_index_fetch_tuple(struct IndexFetchTableData *scan, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead) @@ -1281,7 +1281,7 @@ table_index_fetch_tuple(struct IndexFetchTableData *scan, if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan)) elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding"); - return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot, + return scan->rel->rd_tableam->index_fetch_tuple(scan, tupleid, snapshot, slot, call_again, all_dead); } From df5399898c8cd37ef2d75263135ba66b81d62f65 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Wed, 8 May 2024 04:09:19 +0200 Subject: [PATCH 032/102] Remove primary index am check --- src/backend/access/index/indexam.c | 3 ++- src/backend/catalog/index.c | 3 --- src/backend/parser/parse_utilcmd.c | 13 ------------- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index b19a7b7297e..70974ce9c17 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -758,7 +758,8 @@ index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot * * If we don't find anything, loop around and grab the next TID from * the index. */ - Assert(ItemPointerIsValid(&scan->xs_heaptid)); + if (!scan->xs_want_rowid) + Assert(ItemPointerIsValid(&scan->xs_heaptid)); if (index_fetch_heap(scan, slot)) return true; } diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 0dacdea43b6..b6a00442109 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2678,9 +2678,6 @@ BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii) */ Assert(ii->ii_Unique); - if (index->rd_rel->relam != BTREE_AM_OID) - elog(ERROR, "unexpected non-btree speculative unique index"); - ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts); ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts); ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts); diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 8fcaf15cd51..129448f85e3 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -2348,19 +2348,6 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) errdetail("Cannot create a non-deferrable constraint using a deferrable index."), parser_errposition(cxt->pstate, constraint->location))); - /* - * Insist on it being a btree. That's the only kind that supports - * uniqueness at the moment anyway; but we must have an index that - * exactly matches what you'd get from plain ADD CONSTRAINT syntax, - * else dump and reload will produce a different index (breaking - * pg_upgrade in particular). - */ - if (index_rel->rd_rel->relam != get_index_am_oid(DEFAULT_INDEX_TYPE, false)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("index \"%s\" is not a btree", index_name), - parser_errposition(cxt->pstate, constraint->location))); - /* Must get indclass the hard way */ indclassDatum = SysCacheGetAttrNotNull(INDEXRELID, index_rel->rd_indextuple, From 7f67b57723e2cae7014964a238094c19c0bbf10d Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Mon, 13 May 2024 20:33:54 +0200 Subject: [PATCH 033/102] Passing tupleid to insert now --- contrib/bloom/blinsert.c | 3 ++- contrib/bloom/bloom.h | 2 +- src/backend/access/brin/brin.c | 3 ++- src/backend/access/common/toast_internals.c | 2 +- src/backend/access/gin/gininsert.c | 3 ++- src/backend/access/gist/gist.c | 3 ++- src/backend/access/hash/hash.c | 3 ++- src/backend/access/heap/heapam_handler.c | 2 +- src/backend/access/index/indexam.c | 4 ++-- src/backend/access/nbtree/nbtree.c | 3 ++- src/backend/access/spgist/spginsert.c | 3 ++- src/backend/catalog/indexing.c | 2 +- src/backend/commands/constraint.c | 2 +- src/backend/executor/execIndexing.c | 18 +++++++++++++++--- src/backend/executor/nodeModifyTable.c | 4 ++-- src/include/access/amapi.h | 2 +- src/include/access/brin_internal.h | 2 +- src/include/access/genam.h | 2 +- src/include/access/gin_private.h | 2 +- src/include/access/gist_private.h | 2 +- src/include/access/hash.h | 2 +- src/include/access/nbtree.h | 2 +- src/include/access/spgist.h | 2 +- .../modules/dummy_index_am/dummy_index_am.c | 2 +- 24 files changed, 47 insertions(+), 28 deletions(-) diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c index b90145148d4..99aed8f9948 100644 --- a/contrib/bloom/blinsert.c +++ b/contrib/bloom/blinsert.c @@ -172,7 +172,7 @@ blbuildempty(Relation index) */ bool blinsert(Relation index, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo) @@ -189,6 +189,7 @@ blinsert(Relation index, Datum *values, bool *isnull, BlockNumber blkno = InvalidBlockNumber; OffsetNumber nStart; GenericXLogState *state; + ItemPointer ht_ctid = DatumGetItemPointer(tupleid); insertCtx = AllocSetContextCreate(CurrentMemoryContext, "Bloom insert temporary context", diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h index 330811ec608..15ef1b9aee2 100644 --- a/contrib/bloom/bloom.h +++ b/contrib/bloom/bloom.h @@ -189,7 +189,7 @@ extern bool blvalidate(Oid opclassoid); /* index access method interface functions */ extern bool blinsert(Relation index, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo); diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index d17cec3c12a..1db6068f05b 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -154,7 +154,7 @@ brinhandler(PG_FUNCTION_ARGS) */ bool brininsert(Relation idxRel, Datum *values, bool *nulls, - ItemPointer heaptid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo) @@ -168,6 +168,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, MemoryContext tupcxt = NULL; MemoryContext oldcxt = CurrentMemoryContext; bool autosummarize = BrinGetAutoSummarize(idxRel); + ItemPointer heaptid = DatumGetItemPointer(tupleid); revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL); diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index f1e63599bf3..c95462c2755 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -339,7 +339,7 @@ toast_save_datum(Relation rel, Datum value, /* Only index relations marked as ready can be updated */ if (toastidxs[i]->rd_index->indisready) index_insert(toastidxs[i], t_values, t_isnull, - &(toasttup->t_self), + ItemPointerGetDatum(&(toasttup->t_self)), toastrel, toastidxs[i]->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 56968b95acf..36815547151 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -484,7 +484,7 @@ ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum, bool gininsert(Relation index, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo) @@ -493,6 +493,7 @@ gininsert(Relation index, Datum *values, bool *isnull, MemoryContext oldCtx; MemoryContext insertCtx; int i; + ItemPointer ht_ctid = DatumGetItemPointer(tupleid); /* Initialize GinState cache if first call in this statement */ if (ginstate == NULL) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 9a5647eff49..eb39873cd3e 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -156,7 +156,7 @@ gistbuildempty(Relation index) */ bool gistinsert(Relation r, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo) @@ -164,6 +164,7 @@ gistinsert(Relation r, Datum *values, bool *isnull, GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache; IndexTuple itup; MemoryContext oldCxt; + ItemPointer ht_ctid = DatumGetItemPointer(tupleid); /* Initialize GISTSTATE cache if first call in this statement */ if (giststate == NULL) diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index fc5d97f606e..c8202e9349d 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -247,7 +247,7 @@ hashbuildCallback(Relation index, */ bool hashinsert(Relation rel, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo) @@ -255,6 +255,7 @@ hashinsert(Relation rel, Datum *values, bool *isnull, Datum index_values[1]; bool index_isnull[1]; IndexTuple itup; + ItemPointer ht_ctid = DatumGetItemPointer(tupleid); /* convert data to a hash key; on failure, do not insert anything */ if (!_hash_convert_tuple(rel, diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index a32fc3b69fb..8f6559c9c3e 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2310,7 +2310,7 @@ heapam_index_validate_scan(Relation heapRelation, index_insert(indexRelation, values, isnull, - &rootTuple, + ItemPointerGetDatum(&rootTuple), heapRelation, indexInfo->ii_Unique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 70974ce9c17..72ee401e875 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -218,7 +218,7 @@ bool index_insert(Relation indexRelation, Datum *values, bool *isnull, - ItemPointer heap_t_ctid, + Datum tupleid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, @@ -233,7 +233,7 @@ index_insert(Relation indexRelation, InvalidBlockNumber); return indexRelation->rd_indam->aminsert(indexRelation, values, isnull, - heap_t_ctid, heapRelation, + tupleid, heapRelation, checkUnique, indexUnchanged, indexInfo); } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 6c5b5c69ce5..32ec09b1ec7 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -188,13 +188,14 @@ btbuildempty(Relation index) */ bool btinsert(Relation rel, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo) { bool result; IndexTuple itup; + ItemPointer ht_ctid = DatumGetItemPointer(tupleid); /* generate an index tuple */ itup = index_form_tuple(RelationGetDescr(rel), values, isnull); diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index 4443f1918df..1f5c9a930d2 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -198,7 +198,7 @@ spgbuildempty(Relation index) */ bool spginsert(Relation index, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo) @@ -206,6 +206,7 @@ spginsert(Relation index, Datum *values, bool *isnull, SpGistState spgstate; MemoryContext oldCtx; MemoryContext insertCtx; + ItemPointer ht_ctid = DatumGetItemPointer(tupleid); insertCtx = AllocSetContextCreate(CurrentMemoryContext, "SP-GiST insert temporary context", diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index 522da0ac855..9846637537c 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -170,7 +170,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple, index_insert(index, /* index relation */ values, /* array of index Datums */ isnull, /* is-null flags */ - &(heapTuple->t_self), /* tid of heap tuple */ + ItemPointerGetDatum(&(heapTuple->t_self)), /* tid of heap tuple */ heapRelation, index->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index 982bae9ed42..04e718c95cd 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -173,7 +173,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) * the row is now dead, because that is the TID the index will know * about. */ - index_insert(indexRel, values, isnull, &checktid, + index_insert(indexRel, values, isnull, ItemPointerGetDatum(&checktid), trigdata->tg_relation, UNIQUE_CHECK_EXISTING, false, indexInfo); } diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index a11f43d04e9..033962a2721 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -299,7 +299,6 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, List *arbiterIndexes, bool onlySummarizing) { - ItemPointer tupleid = &slot->tts_tid; List *result = NIL; int i; int numIndices; @@ -309,8 +308,20 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, ExprContext *econtext; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; + Datum tupleid; - Assert(ItemPointerIsValid(tupleid)); + + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) + { + bool isnull; + tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + Assert(ItemPointerIsValid(&slot->tts_tid)); + tupleid = PointerGetDatum(&slot->tts_tid); + } /* * Get information from the result relation info structure. @@ -457,6 +468,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, { bool violationOK; CEOUC_WAIT_MODE waitMode; + ItemPointer raw_tupleid = DatumGetItemPointer(tupleid); if (applyNoDupErr) { @@ -477,7 +489,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, satisfiesConstraint = check_exclusion_or_unique_constraint(heapRelation, indexRelation, indexInfo, - tupleid, values, isnull, + raw_tupleid, values, isnull, estate, false, waitMode, violationOK, NULL); } diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 2066fdde3c8..05571acbbe5 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -1777,8 +1777,8 @@ ExecCrossPartitionUpdate(ModifyTableContext *context, /* Tuple routing starts from the root table. */ context->cpUpdateReturningSlot = - ExecInsert(context, mtstate->rootResultRelInfo, slot, canSetTag, - inserted_tuple, insert_destrel); + ExecInsert(context, mtstate->rootResultRelInfo, + slot, canSetTag, inserted_tuple, insert_destrel); /* * Reset the transition state that may possibly have been written by diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 2d70c5678c3..4cd0eb0523a 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -111,7 +111,7 @@ typedef void (*ambuildempty_function) (Relation indexRelation); typedef bool (*aminsert_function) (Relation indexRelation, Datum *values, bool *isnull, - ItemPointer heap_tid, + Datum tupleid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, diff --git a/src/include/access/brin_internal.h b/src/include/access/brin_internal.h index 97ddc925b27..418b32d5515 100644 --- a/src/include/access/brin_internal.h +++ b/src/include/access/brin_internal.h @@ -92,7 +92,7 @@ extern IndexBuildResult *brinbuild(Relation heap, Relation index, struct IndexInfo *indexInfo); extern void brinbuildempty(Relation index); extern bool brininsert(Relation idxRel, Datum *values, bool *nulls, - ItemPointer heaptid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo); diff --git a/src/include/access/genam.h b/src/include/access/genam.h index c9b4ac8c441..d0a19c20bdf 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -144,7 +144,7 @@ extern void index_close(Relation relation, LOCKMODE lockmode); extern bool index_insert(Relation indexRelation, Datum *values, bool *isnull, - ItemPointer heap_t_ctid, + Datum tupleid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 6da64928b66..7ba1d4bc999 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -114,7 +114,7 @@ extern IndexBuildResult *ginbuild(Relation heap, Relation index, struct IndexInfo *indexInfo); extern void ginbuildempty(Relation index); extern bool gininsert(Relation index, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo); diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 3edc740a3f3..0cd19757208 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -401,7 +401,7 @@ typedef struct GiSTOptions /* gist.c */ extern void gistbuildempty(Relation index); extern bool gistinsert(Relation r, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo); diff --git a/src/include/access/hash.h b/src/include/access/hash.h index 9e035270a16..14fb8e4ce1e 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -364,7 +364,7 @@ extern IndexBuildResult *hashbuild(Relation heap, Relation index, struct IndexInfo *indexInfo); extern void hashbuildempty(Relation index); extern bool hashinsert(Relation rel, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 9020abebc92..3f36ea455aa 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -1128,7 +1128,7 @@ typedef struct BTOptions */ extern void btbuildempty(Relation index); extern bool btinsert(Relation rel, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo); diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h index fe31d32dbe9..e44d3561abf 100644 --- a/src/include/access/spgist.h +++ b/src/include/access/spgist.h @@ -197,7 +197,7 @@ extern IndexBuildResult *spgbuild(Relation heap, Relation index, struct IndexInfo *indexInfo); extern void spgbuildempty(Relation index); extern bool spginsert(Relation index, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo); diff --git a/src/test/modules/dummy_index_am/dummy_index_am.c b/src/test/modules/dummy_index_am/dummy_index_am.c index c14e0abe0c6..562a578cad2 100644 --- a/src/test/modules/dummy_index_am/dummy_index_am.c +++ b/src/test/modules/dummy_index_am/dummy_index_am.c @@ -164,7 +164,7 @@ dibuildempty(Relation index) */ static bool diinsert(Relation index, Datum *values, bool *isnull, - ItemPointer ht_ctid, Relation heapRel, + Datum tupleid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo) From dd340f45b5235f40ebfe03f6aa80494f47a9c05f Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Fri, 17 May 2024 00:27:02 +0200 Subject: [PATCH 034/102] Methods for index update and delete Also validates compatability of index AM with table AM at index creation --- src/backend/access/index/indexam.c | 60 ++++ src/backend/executor/execIndexing.c | 401 +++++++++++++++++++++++++ src/backend/executor/nodeModifyTable.c | 22 +- src/backend/parser/gram.y | 16 +- src/include/access/amapi.h | 23 ++ src/include/access/genam.h | 15 + src/include/executor/executor.h | 10 + src/include/nodes/parsenodes.h | 1 + 8 files changed, 542 insertions(+), 6 deletions(-) diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 72ee401e875..0dabdabca8e 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -238,6 +238,66 @@ index_insert(Relation indexRelation, indexInfo); } +/* ---------------- + * index_update - update an index tuple in a relation + * ---------------- + */ +bool +index_update(Relation indexRelation, + bool new_valid, + bool old_valid, + Datum *values, + bool *isnull, + Datum tupleid, + Datum *valuesOld, + bool *isnullOld, + Datum oldTupleid, + Relation heapRelation, + IndexUniqueCheck checkUnique, + IndexInfo *indexInfo) +{ + RELATION_CHECKS; + CHECK_REL_PROCEDURE(amupdate); + + if (!(indexRelation->rd_indam->ampredlocks)) + CheckForSerializableConflictIn(indexRelation, + (ItemPointer) NULL, + InvalidBlockNumber); + + return indexRelation->rd_indam->amupdate(indexRelation, + new_valid, old_valid, + values, isnull, tupleid, + valuesOld, isnullOld, oldTupleid, + heapRelation, + checkUnique, + indexInfo); +} + + +/* ---------------- + * index_delete - delete an index tuple from a relation + * ---------------- + */ +bool +index_delete(Relation indexRelation, + Datum *values, bool *isnull, Datum tupleid, + Relation heapRelation, + IndexInfo *indexInfo) +{ + RELATION_CHECKS; + CHECK_REL_PROCEDURE(amdelete); + + if (!(indexRelation->rd_indam->ampredlocks)) + CheckForSerializableConflictIn(indexRelation, + (ItemPointer) NULL, + InvalidBlockNumber); + + return indexRelation->rd_indam->amdelete(indexRelation, + values, isnull, tupleid, + heapRelation, + indexInfo); +} + /* * index_beginscan - start a scan of an index with amgettuple * diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 033962a2721..f18d2c0fe8f 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -513,6 +513,407 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, return result; } +List * +ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, + TupleTableSlot *slot, + TupleTableSlot *oldSlot, + EState *estate, + bool noDupErr, + bool *specConflict, + List *arbiterIndexes, + bool onlySummarizing) +{ + List *result = NIL; + int i; + int numIndices; + RelationPtr relationDescs; + Relation heapRelation; + IndexInfo **indexInfoArray; + ExprContext *econtext; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + Datum tupleid; + + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) + { + bool isnull; + tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + Assert(ItemPointerIsValid(&slot->tts_tid)); + tupleid = PointerGetDatum(&slot->tts_tid); + } + + /* + * Get information from the result relation info structure. + */ + numIndices = resultRelInfo->ri_NumIndices; + relationDescs = resultRelInfo->ri_IndexRelationDescs; + indexInfoArray = resultRelInfo->ri_IndexRelationInfo; + heapRelation = resultRelInfo->ri_RelationDesc; + + /* Sanity check: slot must belong to the same rel as the resultRelInfo. */ + Assert(slot->tts_tableOid == RelationGetRelid(heapRelation)); + + /* + * for each index, form and insert the index tuple + */ + for (i = 0; i < numIndices; i++) + { + Relation indexRelation = relationDescs[i]; + IndexInfo *indexInfo; + bool applyNoDupErr; + IndexUniqueCheck checkUnique; + bool satisfiesConstraint; + bool new_valid = true; + + if (indexRelation == NULL) + continue; + + indexInfo = indexInfoArray[i]; + + /* If the index is marked as read-only, ignore it */ + if (!indexInfo->ii_ReadyForInserts) + continue; + + /* + * Skip processing of non-summarizing indexes if we only update + * summarizing indexes + */ + if (onlySummarizing && !indexInfo->ii_Summarizing) + continue; + + /* + * We will use the EState's per-tuple context for evaluating predicates + * and index expressions (creating it if it's not already there). + */ + econtext = GetPerTupleExprContext(estate); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* Check for partial index */ + if (indexInfo->ii_Predicate != NIL) + { + ExprState *predicate; + + /* + * If predicate state not set up yet, create it (in the estate's + * per-query context) + */ + predicate = indexInfo->ii_PredicateState; + if (predicate == NULL) + { + predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); + indexInfo->ii_PredicateState = predicate; + } + + /* Skip this index-update if the predicate isn't satisfied */ + if (!ExecQual(predicate, econtext)) + { + if (!indexRelation->rd_indam->ammvccaware) + continue; + new_valid = false; + } + } + + /* + * FormIndexDatum fills in its values and isnull parameters with the + * appropriate values for the column(s) of the index. + */ + FormIndexDatum(indexInfo, + slot, + estate, + values, + isnull); + + /* Check whether to apply noDupErr to this index */ + applyNoDupErr = noDupErr && + (arbiterIndexes == NIL || + list_member_oid(arbiterIndexes, + indexRelation->rd_index->indexrelid)); + + /* + * The index AM does the actual insertion, plus uniqueness checking. + * + * For an immediate-mode unique index, we just tell the index AM to + * throw error if not unique. + * + * For a deferrable unique index, we tell the index AM to just detect + * possible non-uniqueness, and we add the index OID to the result + * list if further checking is needed. + * + * For a speculative insertion (used by INSERT ... ON CONFLICT), do + * the same as for a deferrable unique index. + */ + if (!indexRelation->rd_index->indisunique) + checkUnique = UNIQUE_CHECK_NO; + else if (applyNoDupErr) + checkUnique = UNIQUE_CHECK_PARTIAL; + else if (indexRelation->rd_index->indimmediate) + checkUnique = UNIQUE_CHECK_YES; + else + checkUnique = UNIQUE_CHECK_PARTIAL; + + if (indexRelation->rd_indam->ammvccaware) + { + Datum valuesOld[INDEX_MAX_KEYS]; + bool isnullOld[INDEX_MAX_KEYS]; + Datum oldTupleid; + bool old_valid = true; + + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) + { + bool isnull; + oldTupleid = slot_getsysattr(oldSlot, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + Assert(ItemPointerIsValid(&oldSlot->tts_tid)); + oldTupleid = PointerGetDatum(&oldSlot->tts_tid); + } + + econtext = GetPerTupleExprContext(estate); + econtext->ecxt_scantuple = oldSlot; + + /* Check for partial index */ + if (indexInfo->ii_Predicate != NIL) + { + ExprState *predicate; + + /* + * If predicate state not set up yet, create it (in the estate's + * per-query context) + */ + predicate = indexInfo->ii_PredicateState; + if (predicate == NULL) + { + predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); + indexInfo->ii_PredicateState = predicate; + } + + /* Skip this index-update if the predicate isn't satisfied */ + if (!ExecQual(predicate, econtext)) + old_valid = false; + } + + FormIndexDatum(indexInfo, + oldSlot, + estate, + valuesOld, + isnullOld); + + satisfiesConstraint = + index_update(indexRelation, /* index relation */ + new_valid, + old_valid, + values, /* array of index Datums */ + isnull, /* null flags */ + tupleid, /* tid of heap tuple */ + valuesOld, + isnullOld, + oldTupleid, + heapRelation, /* heap relation */ + checkUnique, /* type of uniqueness check to do */ + indexInfo); /* index AM may need this */ + + } + else + { + bool indexUnchanged; + /* + * There's definitely going to be an index_insert() call for this + * index. If we're being called as part of an UPDATE statement, + * consider if the 'indexUnchanged' = true hint should be passed. + */ + indexUnchanged = index_unchanged_by_update(resultRelInfo, + estate, + indexInfo, + indexRelation); + + satisfiesConstraint = + index_insert(indexRelation, /* index relation */ + values, /* array of index Datums */ + isnull, /* null flags */ + tupleid, /* tid of heap tuple */ + heapRelation, /* heap relation */ + checkUnique, /* type of uniqueness check to do */ + indexUnchanged, /* UPDATE without logical change? */ + indexInfo); /* index AM may need this */ + } + + /* + * If the index has an associated exclusion constraint, check that. + * This is simpler than the process for uniqueness checks since we + * always insert first and then check. If the constraint is deferred, + * we check now anyway, but don't throw error on violation or wait for + * a conclusive outcome from a concurrent insertion; instead we'll + * queue a recheck event. Similarly, noDupErr callers (speculative + * inserters) will recheck later, and wait for a conclusive outcome + * then. + * + * An index for an exclusion constraint can't also be UNIQUE (not an + * essential property, we just don't allow it in the grammar), so no + * need to preserve the prior state of satisfiesConstraint. + */ + if (indexInfo->ii_ExclusionOps != NULL) + { + bool violationOK; + CEOUC_WAIT_MODE waitMode; + ItemPointer raw_tupleid = DatumGetItemPointer(tupleid); + + if (applyNoDupErr) + { + violationOK = true; + waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT; + } + else if (!indexRelation->rd_index->indimmediate) + { + violationOK = true; + waitMode = CEOUC_NOWAIT; + } + else + { + violationOK = false; + waitMode = CEOUC_WAIT; + } + + satisfiesConstraint = + check_exclusion_or_unique_constraint(heapRelation, + indexRelation, indexInfo, + raw_tupleid, values, isnull, + estate, false, + waitMode, violationOK, NULL); + } + + if ((checkUnique == UNIQUE_CHECK_PARTIAL || + indexInfo->ii_ExclusionOps != NULL) && + !satisfiesConstraint) + { + /* + * The tuple potentially violates the uniqueness or exclusion + * constraint, so make a note of the index so that we can re-check + * it later. Speculative inserters are told if there was a + * speculative conflict, since that always requires a restart. + */ + result = lappend_oid(result, RelationGetRelid(indexRelation)); + if (indexRelation->rd_index->indimmediate && specConflict) + *specConflict = true; + } + } + + return result; +} + +void +ExecDeleteIndexTuples(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, + EState *estate) +{ + int i; + int numIndices; + RelationPtr relationDescs; + Relation heapRelation; + IndexInfo **indexInfoArray; + ExprContext *econtext; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + Datum tupleid; + + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) + { + bool isnull; + tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + Assert(ItemPointerIsValid(&slot->tts_tid)); + tupleid = PointerGetDatum(&slot->tts_tid); + } + + /* + * Get information from the result relation info structure. + */ + numIndices = resultRelInfo->ri_NumIndices; + relationDescs = resultRelInfo->ri_IndexRelationDescs; + indexInfoArray = resultRelInfo->ri_IndexRelationInfo; + heapRelation = resultRelInfo->ri_RelationDesc; + + /* Sanity check: slot must belong to the same rel as the resultRelInfo. */ + Assert(slot->tts_tableOid == RelationGetRelid(heapRelation)); + + /* + * for each index, form and insert the index tuple + */ + for (i = 0; i < numIndices; i++) + { + Relation indexRelation = relationDescs[i]; + IndexInfo *indexInfo; + + if (indexRelation == NULL) + continue; + + indexInfo = indexInfoArray[i]; + + /* If the index is marked as read-only, ignore it */ + if (!indexInfo->ii_ReadyForInserts) + continue; + + if (!indexRelation->rd_indam->ammvccaware) + continue; + + /* + * We will use the EState's per-tuple context for evaluating predicates + * and index expressions (creating it if it's not already there). + */ + econtext = GetPerTupleExprContext(estate); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* Check for partial index */ + if (indexInfo->ii_Predicate != NIL) + { + ExprState *predicate; + + /* + * If predicate state not set up yet, create it (in the estate's + * per-query context) + */ + predicate = indexInfo->ii_PredicateState; + if (predicate == NULL) + { + predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); + indexInfo->ii_PredicateState = predicate; + } + + /* Skip this index-update if the predicate isn't satisfied */ + if (!ExecQual(predicate, econtext)) + continue; + } + + /* + * FormIndexDatum fills in its values and isnull parameters with the + * appropriate values for the column(s) of the index. + */ + FormIndexDatum(indexInfo, + slot, + estate, + values, + isnull); + + index_delete(indexRelation, /* index relation */ + values, /* array of index Datums */ + isnull, /* null flags */ + tupleid, /* tid of heap tuple */ + heapRelation, /* heap relation */ + indexInfo); /* index AM may need this */ + } +} + /* ---------------------------------------------------------------- * ExecCheckIndexConstraints * diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 05571acbbe5..c75e4df7cc1 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -1256,6 +1256,14 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, if (result) *result = TM_Ok; + /* + * Open the table's indexes, if we have not done so already, so that we + * can delete index entries. + */ + if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex && + resultRelInfo->ri_IndexRelationDescs == NULL) + ExecOpenIndices(resultRelInfo, false); + /* BEFORE ROW DELETE triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_delete_before_row) @@ -1312,6 +1320,10 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, EState *estate = context->estate; TransitionCaptureState *ar_delete_trig_tcs; + /* delete index entries if necessary */ + if (resultRelInfo->ri_NumIndices > 0) + ExecDeleteIndexTuples(resultRelInfo, slot, context->estate); + /* * If this delete is the result of a partition key update that moved the * tuple to a new partition, put this row into the transition OLD TABLE, @@ -2043,11 +2055,15 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt, /* insert index entries for tuple if necessary */ if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None)) - recheckIndexes = ExecInsertIndexTuples(resultRelInfo, - slot, context->estate, - true, false, + { + recheckIndexes = ExecUpdateIndexTuples(resultRelInfo, + slot, + oldSlot, + context->estate, + false, NULL, NIL, (updateCxt->updateIndexes == TU_Summarizing)); + } /* AFTER ROW UPDATE Triggers */ ExecARUpdateTriggers(context->estate, resultRelInfo, diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 702522f3d30..aad8d35563c 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -371,6 +371,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type OptSchemaEltList parameter_name_list %type am_type +%type opt_for_tableam %type TriggerForSpec TriggerForType %type TriggerActionTime @@ -5753,17 +5754,21 @@ row_security_cmd: /***************************************************************************** * * QUERY: - * CREATE ACCESS METHOD name HANDLER handler_name + * CREATE ACCESS METHOD name TYPE am_type + * [FOR tableam_name] + * HANDLER handler_name * *****************************************************************************/ -CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type HANDLER handler_name +CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type + opt_for_tableam HANDLER handler_name { CreateAmStmt *n = makeNode(CreateAmStmt); n->amname = $4; - n->handler_name = $8; n->amtype = $6; + n->tableam_name = $7; + n->handler_name = $9; $$ = (Node *) n; } ; @@ -5773,6 +5778,11 @@ am_type: | TABLE { $$ = AMTYPE_TABLE; } ; +opt_for_tableam: + FOR name { $$ = $2; } + | /*EMPTY*/ { $$ = NULL; } + ; + /***************************************************************************** * * QUERIES : diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 4cd0eb0523a..d6ee79dcdf5 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -116,6 +116,25 @@ typedef bool (*aminsert_function) (Relation indexRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo); +/* update this tuple */ +typedef bool (*amupdate_function) (Relation indexRelation, + bool new_valid, + bool old_valid, + Datum *values, + bool *isnull, + Datum tupleid, + Datum *valuesOld, + bool *isnullOld, + Datum oldTupleid, + Relation heapRelation, + IndexUniqueCheck checkUnique, + struct IndexInfo *indexInfo); +/* delete this tuple */ +typedef bool (*amdelete_function) (Relation indexRelation, + Datum *values, bool *isnull, + Datum tupleid, + Relation heapRelation, + struct IndexInfo *indexInfo); /* bulk delete */ typedef IndexBulkDeleteResult *(*ambulkdelete_function) (IndexVacuumInfo *info, @@ -250,6 +269,8 @@ typedef struct IndexAmRoutine bool amusemaintenanceworkmem; /* does AM store tuple information only at block granularity? */ bool amsummarizing; + /* does AM can provide MVCC */ + bool ammvccaware; /* OR of parallel vacuum flags. See vacuum.h for flags. */ uint8 amparallelvacuumoptions; /* type of data stored in index, or InvalidOid if variable */ @@ -265,6 +286,8 @@ typedef struct IndexAmRoutine ambuild_function ambuild; ambuildempty_function ambuildempty; aminsert_function aminsert; + amupdate_function amupdate; + amdelete_function amdelete; ambulkdelete_function ambulkdelete; amvacuumcleanup_function amvacuumcleanup; amcanreturn_function amcanreturn; /* can be NULL */ diff --git a/src/include/access/genam.h b/src/include/access/genam.h index d0a19c20bdf..908608184a1 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -149,6 +149,21 @@ extern bool index_insert(Relation indexRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo); +extern bool index_update(Relation indexRelation, + bool new_valid, + bool old_valid, + Datum *values, + bool *isnull, + Datum tupleid, + Datum *valuesOld, + bool *isnullOld, + Datum oldTupleid, + Relation heapRelation, + IndexUniqueCheck checkUnique, + struct IndexInfo *indexInfo); +extern bool index_delete(Relation indexRelation, Datum *values, bool *isnull, + Datum tupleid, Relation heapRelation, + struct IndexInfo *indexInfo); extern IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index baef7e031ee..355e92a361e 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -637,6 +637,16 @@ extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, bool noDupErr, bool *specConflict, List *arbiterIndexes, bool onlySummarizing); +extern List *ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, + TupleTableSlot *slot, + TupleTableSlot *oldSlot, + EState *estate, + bool noDupErr, + bool *specConflict, List *arbiterIndexes, + bool onlySummarizing); +extern void ExecDeleteIndexTuples(ResultRelInfo *resultRelInfo, + TupleTableSlot *slot, + EState *estate); extern bool ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, ItemPointer conflictTid, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 99173f541d5..f51ec29fc92 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2824,6 +2824,7 @@ typedef struct CreateAmStmt char *amname; /* access method name */ List *handler_name; /* handler function name */ char amtype; /* type of access method */ + char *tableam_name; /* table AM name */ } CreateAmStmt; /* ---------------------- From ea15f47d4d0244ac5f4faaa68d4977ff8b1bf65a Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 12 Aug 2024 12:30:00 +0300 Subject: [PATCH 035/102] Hook to override index AM routine --- src/backend/access/index/amapi.c | 67 ++++++++++++++++++++++++------ src/backend/catalog/index.c | 2 +- src/backend/commands/indexcmds.c | 4 +- src/backend/commands/opclasscmds.c | 9 ++-- src/backend/executor/execAmi.c | 2 +- src/backend/utils/adt/amutils.c | 4 +- src/backend/utils/adt/ruleutils.c | 2 +- src/backend/utils/cache/relcache.c | 2 +- src/include/access/amapi.h | 9 +++- 9 files changed, 75 insertions(+), 26 deletions(-) diff --git a/src/backend/access/index/amapi.c b/src/backend/access/index/amapi.c index 8b02cdbe825..f4526997474 100644 --- a/src/backend/access/index/amapi.c +++ b/src/backend/access/index/amapi.c @@ -16,25 +16,27 @@ #include "access/amapi.h" #include "access/htup_details.h" #include "catalog/pg_am.h" +#include "catalog/pg_class.h" +#include "catalog/pg_index.h" #include "catalog/pg_opclass.h" #include "utils/builtins.h" #include "utils/syscache.h" +IndexAMRoutineHookType IndexAMRoutineHook = NULL; -/* - * GetIndexAmRoutine - call the specified access method handler routine to get - * its IndexAmRoutine struct, which will be palloc'd in the caller's context. - * - * Note that if the amhandler function is built-in, this will not involve - * any catalog access. It's therefore safe to use this while bootstrapping - * indexes for the system catalogs. relcache.c relies on that. - */ IndexAmRoutine * -GetIndexAmRoutine(Oid amhandler) +GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler) { Datum datum; IndexAmRoutine *routine; + if (IndexAMRoutineHook != NULL) + { + routine = IndexAMRoutineHook(tamoid, amhandler); + if (routine) + return routine; + } + datum = OidFunctionCall0(amhandler); routine = (IndexAmRoutine *) DatumGetPointer(datum); @@ -45,6 +47,47 @@ GetIndexAmRoutine(Oid amhandler) return routine; } + +/* + * GetIndexAmRoutine - call the specified access method handler routine to get + * its IndexAmRoutine struct, which will be palloc'd in the caller's context. + * + * Note that if the amhandler function is built-in, this will not involve + * any catalog access. It's therefore safe to use this while bootstrapping + * indexes for the system catalogs. relcache.c relies on that. + */ +IndexAmRoutine * +GetIndexAmRoutine(Oid indoid, Oid amhandler) +{ + HeapTuple ht_idx; + HeapTuple ht_tblrel; + Form_pg_index idxrec; + Form_pg_class tblrelrec; + Oid indrelid; + Oid tamoid; + + if (!OidIsValid((indoid)) || indoid < FirstNormalObjectId) + return GetIndexAmRoutineWithTableAM(HEAP_TABLE_AM_OID, amhandler); + + ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indoid)); + if (!HeapTupleIsValid(ht_idx)) + elog(ERROR, "cache lookup failed for index %u", indoid); + idxrec = (Form_pg_index) GETSTRUCT(ht_idx); + Assert(indoid == idxrec->indexrelid); + indrelid = idxrec->indrelid; + + ht_tblrel = SearchSysCache1(RELOID, ObjectIdGetDatum(indrelid)); + if (!HeapTupleIsValid(ht_tblrel)) + elog(ERROR, "cache lookup failed for relation %u", indrelid); + tblrelrec = (Form_pg_class) GETSTRUCT(ht_tblrel); + tamoid = tblrelrec->relam; + + ReleaseSysCache(ht_tblrel); + ReleaseSysCache(ht_idx); + + return GetIndexAmRoutineWithTableAM(tamoid, amhandler); +} + /* * GetIndexAmRoutineByAmId - look up the handler of the index access method * with the given OID, and get its IndexAmRoutine struct. @@ -53,7 +96,7 @@ GetIndexAmRoutine(Oid amhandler) * noerror is true, else throws error. */ IndexAmRoutine * -GetIndexAmRoutineByAmId(Oid amoid, bool noerror) +GetIndexAmRoutineByAmId(Oid indoid, Oid amoid, bool noerror) { HeapTuple tuple; Form_pg_am amform; @@ -103,7 +146,7 @@ GetIndexAmRoutineByAmId(Oid amoid, bool noerror) ReleaseSysCache(tuple); /* And finally, call the handler function to get the API struct. */ - return GetIndexAmRoutine(amhandler); + return GetIndexAmRoutine(indoid, amhandler); } @@ -129,7 +172,7 @@ amvalidate(PG_FUNCTION_ARGS) ReleaseSysCache(classtup); - amroutine = GetIndexAmRoutineByAmId(amoid, false); + amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false); if (amroutine->amvalidate == NULL) elog(ERROR, "function amvalidate is not defined for index access method %u", diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index b6a00442109..0f3367f8ff8 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -298,7 +298,7 @@ ConstructTupleDescriptor(Relation heapRelation, int i; /* We need access to the index AM's API struct */ - amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false); + amroutine = GetIndexAmRoutineByAmId(InvalidOid, accessMethodObjectId, false); /* ... and to the table's tuple descriptor */ heapTupDesc = RelationGetDescr(heapRelation); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 90986c605e0..8e8ffe22ee1 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -215,7 +215,7 @@ CheckIndexCompatible(Oid oldId, accessMethodName))); accessMethodForm = (Form_pg_am) GETSTRUCT(tuple); accessMethodId = accessMethodForm->oid; - amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler); + amRoutine = GetIndexAmRoutine(oldId, accessMethodForm->amhandler); ReleaseSysCache(tuple); amcanorder = amRoutine->amcanorder; @@ -845,7 +845,7 @@ DefineIndex(Oid relationId, } accessMethodForm = (Form_pg_am) GETSTRUCT(tuple); accessMethodId = accessMethodForm->oid; - amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler); + amRoutine = GetIndexAmRoutineWithTableAM(rel->rd_rel->relam, accessMethodForm->amhandler); pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID, accessMethodId); diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c index 3c0acfafbd2..21a432e821e 100644 --- a/src/backend/commands/opclasscmds.c +++ b/src/backend/commands/opclasscmds.c @@ -43,6 +43,7 @@ #include "parser/parse_func.h" #include "parser/parse_oper.h" #include "parser/parse_type.h" +#include "postgres_ext.h" #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" @@ -378,7 +379,7 @@ DefineOpClass(CreateOpClassStmt *stmt) amform = (Form_pg_am) GETSTRUCT(tup); amoid = amform->oid; - amroutine = GetIndexAmRoutineByAmId(amoid, false); + amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false); ReleaseSysCache(tup); maxOpNumber = amroutine->amstrategies; @@ -836,7 +837,7 @@ AlterOpFamily(AlterOpFamilyStmt *stmt) amform = (Form_pg_am) GETSTRUCT(tup); amoid = amform->oid; - amroutine = GetIndexAmRoutineByAmId(amoid, false); + amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false); ReleaseSysCache(tup); maxOpNumber = amroutine->amstrategies; @@ -883,7 +884,7 @@ AlterOpFamilyAdd(AlterOpFamilyStmt *stmt, Oid amoid, Oid opfamilyoid, int maxOpNumber, int maxProcNumber, int optsProcNumber, List *items) { - IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false); + IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false); List *operators; /* OpFamilyMember list for operators */ List *procedures; /* OpFamilyMember list for support procs */ ListCell *l; @@ -1166,7 +1167,7 @@ assignOperTypes(OpFamilyMember *member, Oid amoid, Oid typeoid) * the family has been created but not yet populated with the required * operators.) */ - IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false); + IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false); if (!amroutine->amcanorderbyop) ereport(ERROR, diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 9d18ce8c6b2..286a0f8f222 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -615,7 +615,7 @@ IndexSupportsBackwardScan(Oid indexid) idxrelrec = (Form_pg_class) GETSTRUCT(ht_idxrel); /* Fetch the index AM's API struct */ - amroutine = GetIndexAmRoutineByAmId(idxrelrec->relam, false); + amroutine = GetIndexAmRoutineByAmId(indexid, idxrelrec->relam, false); result = amroutine->amcanbackward; diff --git a/src/backend/utils/adt/amutils.c b/src/backend/utils/adt/amutils.c index 48852bf79e2..265fcfc86c4 100644 --- a/src/backend/utils/adt/amutils.c +++ b/src/backend/utils/adt/amutils.c @@ -195,7 +195,7 @@ indexam_property(FunctionCallInfo fcinfo, /* * Get AM information. If we don't have a valid AM OID, return NULL. */ - routine = GetIndexAmRoutineByAmId(amoid, true); + routine = GetIndexAmRoutineByAmId(index_oid, amoid, true); if (routine == NULL) PG_RETURN_NULL(); @@ -455,7 +455,7 @@ pg_indexam_progress_phasename(PG_FUNCTION_ARGS) IndexAmRoutine *routine; char *name; - routine = GetIndexAmRoutineByAmId(amoid, true); + routine = GetIndexAmRoutineByAmId(InvalidOid, amoid, true); if (routine == NULL || !routine->ambuildphasename) PG_RETURN_NULL(); diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 445a59a10b6..a7ea18c1c11 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1309,7 +1309,7 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, amrec = (Form_pg_am) GETSTRUCT(ht_am); /* Fetch the index AM's API struct */ - amroutine = GetIndexAmRoutine(amrec->amhandler); + amroutine = GetIndexAmRoutine(indexrelid, amrec->amhandler); /* * Get the index expressions, if any. (NOTE: we do not use the relcache diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 89d9fd37dc6..1af02a5585d 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1404,7 +1404,7 @@ InitIndexAmRoutine(Relation relation) * Call the amhandler in current, short-lived memory context, just in case * it leaks anything (it probably won't, but let's be paranoid). */ - tmp = GetIndexAmRoutine(relation->rd_amhandler); + tmp = GetIndexAmRoutine(relation->rd_id, relation->rd_amhandler); /* OK, now transfer the data into relation's rd_indexcxt. */ cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt, diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index d6ee79dcdf5..5c9c9adcc82 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -313,7 +313,12 @@ typedef struct IndexAmRoutine /* Functions in access/index/amapi.c */ -extern IndexAmRoutine *GetIndexAmRoutine(Oid amhandler); -extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid amoid, bool noerror); +extern IndexAmRoutine *GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler); +extern IndexAmRoutine *GetIndexAmRoutine(Oid indoid, Oid amhandler); +extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid indoid, Oid amoid, bool noerror); + +typedef IndexAmRoutine *(*IndexAMRoutineHookType) (Oid tamoid, Oid amhandler); + +extern IndexAMRoutineHookType IndexAMRoutineHook; #endif /* AMAPI_H */ From 2ccc0991ac9e061d7165e176a859fc6ef009a6f0 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Thu, 5 Sep 2024 00:03:23 +0200 Subject: [PATCH 036/102] Always building child/root maps for relations with ROW_REF_ROWID --- src/backend/executor/execUtils.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 3057c0988f9..bb77bfc9b57 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -1242,9 +1242,19 @@ ExecGetChildToRootMap(ResultRelInfo *resultRelInfo) ResultRelInfo *rootRelInfo = resultRelInfo->ri_RootResultRelInfo; if (rootRelInfo) - resultRelInfo->ri_ChildToRootMap = - convert_tuples_by_name(RelationGetDescr(resultRelInfo->ri_RelationDesc), - RelationGetDescr(rootRelInfo->ri_RelationDesc)); + { + TupleDesc indesc = RelationGetDescr(resultRelInfo->ri_RelationDesc); + TupleDesc outdesc = RelationGetDescr(rootRelInfo->ri_RelationDesc); + AttrMap *attrMap; + + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) != ROW_REF_ROWID) + attrMap = build_attrmap_by_name_if_req(indesc, outdesc, false); + else + attrMap = build_attrmap_by_name(indesc, outdesc, false); + if (attrMap) + resultRelInfo->ri_ChildToRootMap = + convert_tuples_by_name_attrmap(indesc, outdesc, attrMap); + } else /* this isn't a child result rel */ resultRelInfo->ri_ChildToRootMap = NULL; @@ -1281,8 +1291,10 @@ ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate) * to ignore by passing true for missing_ok. */ oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); - attrMap = build_attrmap_by_name_if_req(indesc, outdesc, - !childrel->rd_rel->relispartition); + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) != ROW_REF_ROWID) + attrMap = build_attrmap_by_name_if_req(indesc, outdesc, !childrel->rd_rel->relispartition); + else + attrMap = build_attrmap_by_name(indesc, outdesc, !childrel->rd_rel->relispartition); if (attrMap) resultRelInfo->ri_RootToChildMap = convert_tuples_by_name_attrmap(indesc, outdesc, attrMap); From f23841949e82d25cda93d742fbbecf262a498c84 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Tue, 20 Aug 2024 14:09:51 +0200 Subject: [PATCH 037/102] Don't run internal btree _bt_getrootheight on non-btree in get_relation_info --- src/backend/optimizer/util/plancat.c | 3 ++- src/include/optimizer/plancat.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index d5f00345548..32682ba46e9 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -60,6 +60,7 @@ int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION; /* Hook for plugins to get control in get_relation_info() */ get_relation_info_hook_type get_relation_info_hook = NULL; +skip_tree_height_hook_type skip_tree_height_hook = NULL; static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, @@ -457,7 +458,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->tuples = rel->tuples; } - if (info->relam == BTREE_AM_OID) + if (info->relam == BTREE_AM_OID && (!skip_tree_height_hook || !skip_tree_height_hook(indexRelation))) { /* * For btrees, get tree height while we have the index diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index dbb7eb86808..1589f7d07a6 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -24,6 +24,9 @@ typedef void (*get_relation_info_hook_type) (PlannerInfo *root, RelOptInfo *rel); extern PGDLLIMPORT get_relation_info_hook_type get_relation_info_hook; +typedef bool (*skip_tree_height_hook_type) (Relation indexRelation); +extern PGDLLIMPORT skip_tree_height_hook_type skip_tree_height_hook; + extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel); From 52eebe9c1a9eda46e8e88cad8199b97b8d3222bd Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 17 Sep 2024 01:58:24 +0300 Subject: [PATCH 038/102] Fix handling tupleid in logical replication --- src/backend/access/table/tableam.c | 8 ++-- src/backend/executor/execReplication.c | 54 +++++++++++++++++------- src/backend/replication/logical/worker.c | 15 +++---- src/include/access/tableam.h | 4 +- 4 files changed, 50 insertions(+), 31 deletions(-) diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 67d63e0a6ec..3f64d70666e 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -297,7 +297,7 @@ simple_table_tuple_insert(Relation rel, TupleTableSlot *slot) * via ereport(). */ void -simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot, +simple_table_tuple_delete(Relation rel, Datum tupleid, Snapshot snapshot, TupleTableSlot *oldSlot) { TM_Result result; @@ -308,7 +308,7 @@ simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot, if (oldSlot) options |= TABLE_MODIFY_FETCH_OLD_TUPLE; - result = table_tuple_delete(rel, PointerGetDatum(tid), + result = table_tuple_delete(rel, tupleid, GetCurrentCommandId(true), snapshot, InvalidSnapshot, options, @@ -349,7 +349,7 @@ simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot, * via ereport(). */ void -simple_table_tuple_update(Relation rel, ItemPointer otid, +simple_table_tuple_update(Relation rel, Datum tupleid, TupleTableSlot *slot, Snapshot snapshot, TU_UpdateIndexes *update_indexes, @@ -364,7 +364,7 @@ simple_table_tuple_update(Relation rel, ItemPointer otid, if (oldSlot) options |= TABLE_MODIFY_FETCH_OLD_TUPLE; - result = table_tuple_update(rel, PointerGetDatum(otid), slot, + result = table_tuple_update(rel, tupleid, slot, GetCurrentCommandId(true), snapshot, InvalidSnapshot, options, diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index c9345cfae04..84967929c49 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -125,6 +125,25 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel, return skey_attoff; } +static Datum +slot_get_tupleid(Relation rel, TupleTableSlot *slot) +{ + Datum tupleid; + + if (table_get_row_ref_type(rel) == ROW_REF_ROWID) + { + bool isnull; + tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + tupleid = PointerGetDatum(&slot->tts_tid); + } + + return tupleid; +} + /* * Search the relation 'rel' for tuple using the index. * @@ -209,7 +228,7 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, PushActiveSnapshot(GetLatestSnapshot()); - res = table_tuple_lock(rel, PointerGetDatum(&(outslot->tts_tid)), + res = table_tuple_lock(rel, slot_get_tupleid(rel, outslot), GetActiveSnapshot(), outslot, GetCurrentCommandId(false), @@ -394,7 +413,7 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, PushActiveSnapshot(GetLatestSnapshot()); - res = table_tuple_lock(rel, PointerGetDatum(&(outslot->tts_tid)), + res = table_tuple_lock(rel, slot_get_tupleid(rel, outslot), GetActiveSnapshot(), outslot, GetCurrentCommandId(false), @@ -518,7 +537,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, { bool skip_tuple = false; Relation rel = resultRelInfo->ri_RelationDesc; - ItemPointer tid = &(searchslot->tts_tid); + Datum tupleid = slot_get_tupleid(rel, searchslot); /* * We support only non-system tables, with @@ -534,7 +553,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigDesc->trig_update_before_row) { if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, - PointerGetDatum(tid), NULL, slot, NULL, NULL)) + tupleid, NULL, slot, NULL, NULL)) skip_tuple = true; /* "do nothing" */ } @@ -556,16 +575,17 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, if (rel->rd_rel->relispartition) ExecPartitionCheck(resultRelInfo, slot, estate, true); - if (resultRelInfo->ri_TrigDesc && - resultRelInfo->ri_TrigDesc->trig_update_after_row) - oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo); + oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo); - simple_table_tuple_update(rel, tid, slot, estate->es_snapshot, + simple_table_tuple_update(rel, tupleid, slot, estate->es_snapshot, &update_indexes, oldSlot); if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None)) - recheckIndexes = ExecInsertIndexTuples(resultRelInfo, - slot, estate, true, false, + recheckIndexes = ExecUpdateIndexTuples(resultRelInfo, + slot, + oldSlot, + estate, + false, NULL, NIL, (update_indexes == TU_Summarizing)); @@ -592,7 +612,7 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo, { bool skip_tuple = false; Relation rel = resultRelInfo->ri_RelationDesc; - ItemPointer tid = &searchslot->tts_tid; + Datum tupleid = slot_get_tupleid(rel, searchslot); CheckCmdReplicaIdentity(rel, CMD_DELETE); @@ -601,19 +621,21 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigDesc->trig_delete_before_row) { skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo, - PointerGetDatum(tid), NULL, NULL, NULL, NULL); + tupleid, NULL, NULL, NULL, NULL); } if (!skip_tuple) { TupleTableSlot *oldSlot = NULL; - if (resultRelInfo->ri_TrigDesc && - resultRelInfo->ri_TrigDesc->trig_delete_after_row) - oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo); + oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo); /* OK, delete the tuple */ - simple_table_tuple_delete(rel, tid, estate->es_snapshot, oldSlot); + simple_table_tuple_delete(rel, tupleid, estate->es_snapshot, oldSlot); + + /* delete index entries if necessary */ + if (resultRelInfo->ri_NumIndices > 0) + ExecDeleteIndexTuples(resultRelInfo, oldSlot, estate); /* AFTER ROW DELETE Triggers */ ExecARDeleteTriggers(estate, resultRelInfo, diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 9b5c641941f..6c91790ca0a 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -2432,9 +2432,8 @@ apply_handle_insert(StringInfo s) /* Initialize the executor state. */ edata = create_edata_for_relation(rel); estate = edata->estate; - remoteslot = ExecInitExtraTupleSlot(estate, - RelationGetDescr(rel->localrel), - &TTSOpsVirtual); + remoteslot = table_slot_create(rel->localrel, + &estate->es_tupleTable); /* Process and store remote tuple in the slot */ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); @@ -2588,9 +2587,8 @@ apply_handle_update(StringInfo s) /* Initialize the executor state. */ edata = create_edata_for_relation(rel); estate = edata->estate; - remoteslot = ExecInitExtraTupleSlot(estate, - RelationGetDescr(rel->localrel), - &TTSOpsVirtual); + remoteslot = table_slot_create(rel->localrel, + &estate->es_tupleTable); /* * Populate updatedCols so that per-column triggers can fire, and so @@ -2768,9 +2766,8 @@ apply_handle_delete(StringInfo s) /* Initialize the executor state. */ edata = create_edata_for_relation(rel); estate = edata->estate; - remoteslot = ExecInitExtraTupleSlot(estate, - RelationGetDescr(rel->localrel), - &TTSOpsVirtual); + remoteslot = table_slot_create(rel->localrel, + &estate->es_tupleTable); /* Build the search tuple. */ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 702482e0d7b..2348d8a2fbd 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -2102,10 +2102,10 @@ table_tuple_is_current(Relation rel, TupleTableSlot *slot) */ extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot); -extern void simple_table_tuple_delete(Relation rel, ItemPointer tid, +extern void simple_table_tuple_delete(Relation rel, Datum tupleid, Snapshot snapshot, TupleTableSlot *oldSlot); -extern void simple_table_tuple_update(Relation rel, ItemPointer otid, +extern void simple_table_tuple_update(Relation rel, Datum tupleid, TupleTableSlot *slot, Snapshot snapshot, TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot); From 642a7bbb5205b66875ec5fe3a83999664c2b7d40 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Fri, 27 Sep 2024 14:26:40 +0200 Subject: [PATCH 039/102] New CSN snapshot format * Add xlogptr and xmin to determine right order of transactions when decoding on replica. * Add CSN snapshot data to snapshot builder. * Record CSN to the running xids and restore it during logical decoding to the snapshot builder. * Add function to update CSN snapshot data in snapshot builder. * Update CSN snapshot LSN in snapshot building after each transaction commit. * Restore CSN snapshot data in SnapBuildBuildSnapshot(). --- src/backend/replication/logical/snapbuild.c | 16 ++++++++++++++++ src/backend/storage/ipc/procarray.c | 1 + src/backend/storage/ipc/standby.c | 1 + src/backend/utils/time/snapmgr.c | 10 +++++++--- src/include/replication/snapbuild.h | 2 ++ src/include/storage/standby.h | 1 + src/include/storage/standbydefs.h | 1 + src/include/utils/snapshot.h | 9 ++++++++- 8 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 601006db7a7..2b993f51c90 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -207,6 +207,8 @@ struct SnapBuild */ TransactionId next_phase_at; + CSNSnapshotData csnSnapshotData; + /* * Array of transactions which could have catalog changes that committed * between xmin and xmax. @@ -562,6 +564,8 @@ SnapBuildBuildSnapshot(SnapBuild *builder) snapshot->regd_count = 0; snapshot->snapXactCompletionCount = 0; + snapshot->csnSnapshotData = builder->csnSnapshotData; + return snapshot; } @@ -1083,6 +1087,8 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, TransactionId xmax = xid; + builder->csnSnapshotData.xlogptr = lsn; + /* * Transactions preceding BUILDING_SNAPSHOT will neither be decoded, nor * will they be part of a snapshot. So we don't need to record anything. @@ -1300,6 +1306,9 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact * we hit fast paths in heapam_visibility.c. */ builder->xmin = running->oldestRunningXid; + builder->csnSnapshotData.snapshotcsn = running->csn; + builder->csnSnapshotData.xmin = 0; + builder->csnSnapshotData.xlogptr = lsn; /* Remove transactions we don't need to keep track off anymore */ SnapBuildPurgeOlderTxn(builder); @@ -2197,3 +2206,10 @@ CheckPointSnapBuild(void) } FreeDir(snap_dir); } + +void +SnapBuildUpdateCSNSnaphot(SnapBuild *builder, + CSNSnapshotData *csnSnapshotData) +{ + builder->csnSnapshotData = *csnSnapshotData; +} diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 553663b1dfb..81acd267799 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -2866,6 +2866,7 @@ GetRunningTransactionData(void) CurrentRunningXacts->nextXid = XidFromFullTransactionId(ShmemVariableCache->nextXid); CurrentRunningXacts->oldestRunningXid = oldestRunningXid; CurrentRunningXacts->latestCompletedXid = latestCompletedXid; + CurrentRunningXacts->csn = pg_atomic_read_u64(&ShmemVariableCache->nextCommitSeqNo); Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid)); Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid)); diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 3bdc5f7fb6c..1e6760a7c49 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -1355,6 +1355,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) xlrec.nextXid = CurrRunningXacts->nextXid; xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid; xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid; + xlrec.csn = CurrRunningXacts->csn; /* Header */ XLogBeginInsert(); diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 080e7b15496..4c37292514c 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -196,7 +196,7 @@ typedef struct SerializedSnapshotData CommandId curcid; TimestampTz whenTaken; XLogRecPtr lsn; - CommitSeqNo snapshotcsn; + CSNSnapshotData csnSnapshotData; uint64 undoRegularLocation; uint64 undoRegularXmin; uint64 undoSystemLocation; @@ -2194,7 +2194,9 @@ SerializeSnapshot(Snapshot snapshot, char *start_address) serialized_snapshot.curcid = snapshot->curcid; serialized_snapshot.whenTaken = snapshot->whenTaken; serialized_snapshot.lsn = snapshot->lsn; - serialized_snapshot.snapshotcsn = snapshot->snapshotcsn; + serialized_snapshot.csnSnapshotData.xmin = snapshot->csnSnapshotData.xmin; + serialized_snapshot.csnSnapshotData.snapshotcsn = snapshot->csnSnapshotData.snapshotcsn; + serialized_snapshot.csnSnapshotData.xlogptr = snapshot->csnSnapshotData.xlogptr; serialized_snapshot.undoRegularXmin = snapshot->undoRegularLocationPhNode.xmin; serialized_snapshot.undoRegularLocation = snapshot->undoRegularLocationPhNode.undoLocation; serialized_snapshot.undoSystemXmin = snapshot->undoSystemLocationPhNode.xmin; @@ -2274,7 +2276,9 @@ RestoreSnapshot(char *start_address) snapshot->whenTaken = serialized_snapshot.whenTaken; snapshot->lsn = serialized_snapshot.lsn; snapshot->snapXactCompletionCount = 0; - snapshot->snapshotcsn = serialized_snapshot.snapshotcsn; + snapshot->csnSnapshotData.xmin = serialized_snapshot.csnSnapshotData.xmin; + snapshot->csnSnapshotData.snapshotcsn = serialized_snapshot.csnSnapshotData.snapshotcsn; + snapshot->csnSnapshotData.xlogptr = serialized_snapshot.csnSnapshotData.xlogptr; snapshot->undoRegularLocationPhNode.xmin = serialized_snapshot.undoRegularXmin; snapshot->undoRegularLocationPhNode.undoLocation = serialized_snapshot.undoRegularLocation; snapshot->undoSystemLocationPhNode.xmin = serialized_snapshot.undoSystemXmin; diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h index 071fca6d3b5..a7b793dae3c 100644 --- a/src/include/replication/snapbuild.h +++ b/src/include/replication/snapbuild.h @@ -91,5 +91,7 @@ extern void SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid, extern void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, struct xl_running_xacts *running); extern void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn); +extern void SnapBuildUpdateCSNSnaphot(SnapBuild *builder, + CSNSnapshotData *csnSnapshotData); #endif /* SNAPBUILD_H */ diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index bb7d90c7ad6..b97394b4841 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -91,6 +91,7 @@ typedef struct RunningTransactionsData TransactionId nextXid; /* xid from ShmemVariableCache->nextXid */ TransactionId oldestRunningXid; /* *not* oldestXmin */ TransactionId latestCompletedXid; /* so we can set xmax */ + CommitSeqNo csn; /* current csn */ TransactionId *xids; /* array of (sub)xids still running */ } RunningTransactionsData; diff --git a/src/include/storage/standbydefs.h b/src/include/storage/standbydefs.h index 188e348618a..23dddce8d84 100644 --- a/src/include/storage/standbydefs.h +++ b/src/include/storage/standbydefs.h @@ -52,6 +52,7 @@ typedef struct xl_running_xacts TransactionId nextXid; /* xid from ShmemVariableCache->nextXid */ TransactionId oldestRunningXid; /* *not* oldestXmin */ TransactionId latestCompletedXid; /* so we can set xmax */ + CommitSeqNo csn; /* current csn */ TransactionId xids[FLEXIBLE_ARRAY_MEMBER]; } xl_running_xacts; diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index d4392d7dc04..01093a33315 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -129,6 +129,13 @@ typedef struct pairingheap_node ph_node; } RetainUndoLocationPHNode; +typedef struct CSNSnapshotData +{ + uint64 xmin; + CommitSeqNo snapshotcsn; + XLogRecPtr xlogptr; +} CSNSnapshotData; + /* * Struct representing all kind of possible snapshots. * @@ -224,7 +231,7 @@ typedef struct SnapshotData RetainUndoLocationPHNode undoRegularLocationPhNode; RetainUndoLocationPHNode undoSystemLocationPhNode; - CommitSeqNo snapshotcsn; + CSNSnapshotData csnSnapshotData; } SnapshotData; typedef void (*snapshot_hook_type) (Snapshot snapshot); From 9525a762f22e887d17d0c44a8eaf9892dfda3cc6 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Thu, 3 Oct 2024 13:12:01 +0300 Subject: [PATCH 040/102] Restart archiver during PM_SHUTDOWN postmaster stage That allows S3 mode to finish WAL archiving if needed. --- src/backend/postmaster/postmaster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 1b98bb17586..bd4cd5849ef 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -448,7 +448,7 @@ static void InitPostmasterDeathWatchHandle(void); * even during recovery. */ #define PgArchStartupAllowed() \ - (((XLogArchivingActive() && pmState == PM_RUN) || \ + (((XLogArchivingActive() && (pmState == PM_RUN || pmState == PM_SHUTDOWN)) || \ (XLogArchivingAlways() && \ (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \ PgArchCanRestart()) From 64074f884a7b5050d548b9e3da10381f577fa090 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 8 Oct 2024 21:31:33 +0300 Subject: [PATCH 041/102] Add handling of CSN snapshot in some places of snapbuild.c --- src/backend/replication/logical/snapbuild.c | 8 +++++--- src/backend/utils/time/snapmgr.c | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 2b993f51c90..8495bde4fb4 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -663,6 +663,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder) snap->snapshot_type = SNAPSHOT_MVCC; snap->xcnt = newxcnt; snap->xip = newxip; + snap->csnSnapshotData = builder->csnSnapshotData; return snap; } @@ -1279,6 +1280,10 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact ReorderBufferTXN *txn; TransactionId xmin; + builder->csnSnapshotData.snapshotcsn = running->csn; + builder->csnSnapshotData.xmin = 0; + builder->csnSnapshotData.xlogptr = lsn; + /* * If we're not consistent yet, inspect the record to see whether it * allows to get closer to being consistent. If we are consistent, dump @@ -1306,9 +1311,6 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact * we hit fast paths in heapam_visibility.c. */ builder->xmin = running->oldestRunningXid; - builder->csnSnapshotData.snapshotcsn = running->csn; - builder->csnSnapshotData.xmin = 0; - builder->csnSnapshotData.xlogptr = lsn; /* Remove transactions we don't need to keep track off anymore */ SnapBuildPurgeOlderTxn(builder); diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 4c37292514c..b909f311acc 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -551,6 +551,7 @@ SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid, CurrentSnapshot->xmin = sourcesnap->xmin; CurrentSnapshot->xmax = sourcesnap->xmax; CurrentSnapshot->xcnt = sourcesnap->xcnt; + CurrentSnapshot->csnSnapshotData = sourcesnap->csnSnapshotData; Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount()); if (sourcesnap->xcnt > 0) memcpy(CurrentSnapshot->xip, sourcesnap->xip, From 2f8616023263512b5261299c335ca2d4eb0bde5e Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 14 Oct 2024 16:22:14 +0300 Subject: [PATCH 042/102] Move CheckPoint_hook() call after CheckPointBuffers() That allows to process flushed buffers in CheckPoint_hook(). --- src/backend/access/transam/xlog.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 9857691e417..a01292d9898 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7085,8 +7085,6 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags) { - if (CheckPoint_hook) - CheckPoint_hook(checkPointRedo, flags); CheckPointRelationMap(); CheckPointReplicationSlots(); CheckPointSnapBuild(); @@ -7103,6 +7101,9 @@ CheckPointGuts(XLogRecPtr checkPointRedo, int flags) CheckPointPredicate(); CheckPointBuffers(flags); + if (CheckPoint_hook) + CheckPoint_hook(checkPointRedo, flags); + /* Perform all queued up fsyncs */ TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START(); CheckpointStats.ckpt_sync_t = GetCurrentTimestamp(); From 78996b47a1ca0bfaaf9ac38abf8b048fe2e9bfb2 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 15 Oct 2024 22:39:08 +0400 Subject: [PATCH 043/102] Restore GetIndexAmRoutine signature for compatibility with other callers Use GetIndexAmRoutineExtended instead for all Orioledb extensibility. --- src/backend/access/index/amapi.c | 11 ++++++++--- src/backend/commands/indexcmds.c | 2 +- src/backend/utils/adt/ruleutils.c | 2 +- src/backend/utils/cache/relcache.c | 2 +- src/include/access/amapi.h | 3 ++- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/backend/access/index/amapi.c b/src/backend/access/index/amapi.c index f4526997474..ed2b9fc9e68 100644 --- a/src/backend/access/index/amapi.c +++ b/src/backend/access/index/amapi.c @@ -47,7 +47,6 @@ GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler) return routine; } - /* * GetIndexAmRoutine - call the specified access method handler routine to get * its IndexAmRoutine struct, which will be palloc'd in the caller's context. @@ -57,7 +56,13 @@ GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler) * indexes for the system catalogs. relcache.c relies on that. */ IndexAmRoutine * -GetIndexAmRoutine(Oid indoid, Oid amhandler) +GetIndexAmRoutine(Oid amhandler) +{ + return GetIndexAmRoutineExtended(InvalidOid, amhandler); +} + +IndexAmRoutine * +GetIndexAmRoutineExtended(Oid indoid, Oid amhandler) { HeapTuple ht_idx; HeapTuple ht_tblrel; @@ -146,7 +151,7 @@ GetIndexAmRoutineByAmId(Oid indoid, Oid amoid, bool noerror) ReleaseSysCache(tuple); /* And finally, call the handler function to get the API struct. */ - return GetIndexAmRoutine(indoid, amhandler); + return GetIndexAmRoutineExtended(indoid, amhandler); } diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 8e8ffe22ee1..b7c490bab4e 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -215,7 +215,7 @@ CheckIndexCompatible(Oid oldId, accessMethodName))); accessMethodForm = (Form_pg_am) GETSTRUCT(tuple); accessMethodId = accessMethodForm->oid; - amRoutine = GetIndexAmRoutine(oldId, accessMethodForm->amhandler); + amRoutine = GetIndexAmRoutineExtended(oldId, accessMethodForm->amhandler); ReleaseSysCache(tuple); amcanorder = amRoutine->amcanorder; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index a7ea18c1c11..9d68d736d2f 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1309,7 +1309,7 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, amrec = (Form_pg_am) GETSTRUCT(ht_am); /* Fetch the index AM's API struct */ - amroutine = GetIndexAmRoutine(indexrelid, amrec->amhandler); + amroutine = GetIndexAmRoutineExtended(indexrelid, amrec->amhandler); /* * Get the index expressions, if any. (NOTE: we do not use the relcache diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 1af02a5585d..0e2898579f7 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1404,7 +1404,7 @@ InitIndexAmRoutine(Relation relation) * Call the amhandler in current, short-lived memory context, just in case * it leaks anything (it probably won't, but let's be paranoid). */ - tmp = GetIndexAmRoutine(relation->rd_id, relation->rd_amhandler); + tmp = GetIndexAmRoutineExtended(relation->rd_id, relation->rd_amhandler); /* OK, now transfer the data into relation's rd_indexcxt. */ cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt, diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 5c9c9adcc82..03e7411c4b6 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -314,7 +314,8 @@ typedef struct IndexAmRoutine /* Functions in access/index/amapi.c */ extern IndexAmRoutine *GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler); -extern IndexAmRoutine *GetIndexAmRoutine(Oid indoid, Oid amhandler); +extern IndexAmRoutine *GetIndexAmRoutine(Oid amhandler); +extern IndexAmRoutine *GetIndexAmRoutineExtended(Oid indoid, Oid amhandler); extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid indoid, Oid amoid, bool noerror); typedef IndexAmRoutine *(*IndexAMRoutineHookType) (Oid tamoid, Oid amhandler); From d87f595411b2890ee96f6ef4974151fa7af3c30f Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Wed, 16 Oct 2024 19:23:07 +0400 Subject: [PATCH 044/102] Make index insert compatible with outside callers We split aminsert method to aminsert and aminsertextended. aminsert is a method for indexes implemented in other extensions, it accepts ItemPointer tupleid. aminsertextended is for internal Postgres indexes and Orioledb, it accepts Datum tupleid. They are not supposed to call aminsert method, so that it is set NULL for them. We can not rely that extensions are aware of aminsertextended, so index_insert() calls aminsert if it's not NULL preferentially. Signature of index_insert() is reverted so that it could be called by other extensions. Datum tupleid is confined inside index_insert method. --- contrib/bloom/blutils.c | 3 ++- doc/src/sgml/indexam.sgml | 1 + src/backend/access/brin/brin.c | 3 ++- src/backend/access/common/toast_internals.c | 2 +- src/backend/access/gin/ginutil.c | 3 ++- src/backend/access/gist/gist.c | 3 ++- src/backend/access/hash/hash.c | 3 ++- src/backend/access/heap/heapam_handler.c | 2 +- src/backend/access/index/indexam.c | 21 ++++++++++++++++--- src/backend/access/nbtree/nbtree.c | 3 ++- src/backend/access/spgist/spgutils.c | 3 ++- src/backend/catalog/indexing.c | 2 +- src/backend/commands/constraint.c | 2 +- src/backend/executor/execIndexing.c | 20 ++++++++---------- src/include/access/amapi.h | 12 +++++++++++ src/include/access/genam.h | 2 +- .../modules/dummy_index_am/dummy_index_am.c | 3 ++- src/tools/pgindent/typedefs.list | 1 + 18 files changed, 62 insertions(+), 27 deletions(-) diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index f23fbb1d9e0..d92858a3433 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -130,7 +130,8 @@ blhandler(PG_FUNCTION_ARGS) amroutine->ambuild = blbuild; amroutine->ambuildempty = blbuildempty; - amroutine->aminsert = blinsert; + amroutine->aminsert = NULL; + amroutine->aminsertextended = blinsert; amroutine->ambulkdelete = blbulkdelete; amroutine->amvacuumcleanup = blvacuumcleanup; amroutine->amcanreturn = NULL; diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index 30eda37afa8..cee79776683 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -139,6 +139,7 @@ typedef struct IndexAmRoutine ambuild_function ambuild; ambuildempty_function ambuildempty; aminsert_function aminsert; + aminsert_extended_function aminsertextended; ambulkdelete_function ambulkdelete; amvacuumcleanup_function amvacuumcleanup; amcanreturn_function amcanreturn; /* can be NULL */ diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 1db6068f05b..edf99d9816c 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -116,7 +116,8 @@ brinhandler(PG_FUNCTION_ARGS) amroutine->ambuild = brinbuild; amroutine->ambuildempty = brinbuildempty; - amroutine->aminsert = brininsert; + amroutine->aminsert = NULL; + amroutine->aminsertextended = brininsert; amroutine->ambulkdelete = brinbulkdelete; amroutine->amvacuumcleanup = brinvacuumcleanup; amroutine->amcanreturn = NULL; diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index c95462c2755..f1e63599bf3 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -339,7 +339,7 @@ toast_save_datum(Relation rel, Datum value, /* Only index relations marked as ready can be updated */ if (toastidxs[i]->rd_index->indisready) index_insert(toastidxs[i], t_values, t_isnull, - ItemPointerGetDatum(&(toasttup->t_self)), + &(toasttup->t_self), toastrel, toastidxs[i]->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 7a4cd93f301..52d9a725fc4 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -63,7 +63,8 @@ ginhandler(PG_FUNCTION_ARGS) amroutine->ambuild = ginbuild; amroutine->ambuildempty = ginbuildempty; - amroutine->aminsert = gininsert; + amroutine->aminsert = NULL; + amroutine->aminsertextended = gininsert; amroutine->ambulkdelete = ginbulkdelete; amroutine->amvacuumcleanup = ginvacuumcleanup; amroutine->amcanreturn = NULL; diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index eb39873cd3e..6dcfe1095c1 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -85,7 +85,8 @@ gisthandler(PG_FUNCTION_ARGS) amroutine->ambuild = gistbuild; amroutine->ambuildempty = gistbuildempty; - amroutine->aminsert = gistinsert; + amroutine->aminsert = NULL; + amroutine->aminsertextended = gistinsert; amroutine->ambulkdelete = gistbulkdelete; amroutine->amvacuumcleanup = gistvacuumcleanup; amroutine->amcanreturn = gistcanreturn; diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index c8202e9349d..ffddf7b900c 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -82,7 +82,8 @@ hashhandler(PG_FUNCTION_ARGS) amroutine->ambuild = hashbuild; amroutine->ambuildempty = hashbuildempty; - amroutine->aminsert = hashinsert; + amroutine->aminsert = NULL; + amroutine->aminsertextended = hashinsert; amroutine->ambulkdelete = hashbulkdelete; amroutine->amvacuumcleanup = hashvacuumcleanup; amroutine->amcanreturn = NULL; diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 8f6559c9c3e..a32fc3b69fb 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2310,7 +2310,7 @@ heapam_index_validate_scan(Relation heapRelation, index_insert(indexRelation, values, isnull, - ItemPointerGetDatum(&rootTuple), + &rootTuple, heapRelation, indexInfo->ii_Unique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 0dabdabca8e..94bdec63666 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -218,24 +218,39 @@ bool index_insert(Relation indexRelation, Datum *values, bool *isnull, - Datum tupleid, + ItemPointer tupleid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo) { RELATION_CHECKS; - CHECK_REL_PROCEDURE(aminsert); + + if (indexRelation->rd_indam->aminsertextended == NULL && indexRelation->rd_indam->aminsert == NULL ) + elog(ERROR, "at least one function aminsert or aminsertextended should be defined for index \"%s\"", \ + RelationGetRelationName(indexRelation)); if (!(indexRelation->rd_indam->ampredlocks)) CheckForSerializableConflictIn(indexRelation, (ItemPointer) NULL, InvalidBlockNumber); - return indexRelation->rd_indam->aminsert(indexRelation, values, isnull, + if (indexRelation->rd_indam->aminsert) + { + /* compatibility method for extension AM's not aware of aminsertextended */ + return indexRelation->rd_indam->aminsert(indexRelation, values, isnull, tupleid, heapRelation, checkUnique, indexUnchanged, indexInfo); + } + else + { + /* index insert method for internal AM's and Orioledb that are aware of aminsertextended */ + return indexRelation->rd_indam->aminsertextended(indexRelation, values, isnull, + ItemPointerGetDatum(tupleid), heapRelation, + checkUnique, indexUnchanged, + indexInfo); + } } /* ---------------- diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 32ec09b1ec7..44daed95baf 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -121,7 +121,8 @@ bthandler(PG_FUNCTION_ARGS) amroutine->ambuild = btbuild; amroutine->ambuildempty = btbuildempty; - amroutine->aminsert = btinsert; + amroutine->aminsert = NULL; + amroutine->aminsertextended = btinsert; amroutine->ambulkdelete = btbulkdelete; amroutine->amvacuumcleanup = btvacuumcleanup; amroutine->amcanreturn = btcanreturn; diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 5fa9e230c08..127ff3922d1 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -69,7 +69,8 @@ spghandler(PG_FUNCTION_ARGS) amroutine->ambuild = spgbuild; amroutine->ambuildempty = spgbuildempty; - amroutine->aminsert = spginsert; + amroutine->aminsert = NULL; + amroutine->aminsertextended = spginsert; amroutine->ambulkdelete = spgbulkdelete; amroutine->amvacuumcleanup = spgvacuumcleanup; amroutine->amcanreturn = spgcanreturn; diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index 9846637537c..522da0ac855 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -170,7 +170,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple, index_insert(index, /* index relation */ values, /* array of index Datums */ isnull, /* is-null flags */ - ItemPointerGetDatum(&(heapTuple->t_self)), /* tid of heap tuple */ + &(heapTuple->t_self), /* tid of heap tuple */ heapRelation, index->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index 04e718c95cd..982bae9ed42 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -173,7 +173,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) * the row is now dead, because that is the TID the index will know * about. */ - index_insert(indexRel, values, isnull, ItemPointerGetDatum(&checktid), + index_insert(indexRel, values, isnull, &checktid, trigdata->tg_relation, UNIQUE_CHECK_EXISTING, false, indexInfo); } diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index f18d2c0fe8f..4a544d2dc14 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -308,19 +308,19 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, ExprContext *econtext; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; - Datum tupleid; + ItemPointer tupleid; if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) { bool isnull; - tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull); + tupleid = DatumGetItemPointer(slot_getsysattr(slot, RowIdAttributeNumber, &isnull)); Assert(!isnull); } else { Assert(ItemPointerIsValid(&slot->tts_tid)); - tupleid = PointerGetDatum(&slot->tts_tid); + tupleid = &slot->tts_tid; } /* @@ -468,7 +468,6 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, { bool violationOK; CEOUC_WAIT_MODE waitMode; - ItemPointer raw_tupleid = DatumGetItemPointer(tupleid); if (applyNoDupErr) { @@ -489,7 +488,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, satisfiesConstraint = check_exclusion_or_unique_constraint(heapRelation, indexRelation, indexInfo, - raw_tupleid, values, isnull, + tupleid, values, isnull, estate, false, waitMode, violationOK, NULL); } @@ -532,18 +531,18 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, ExprContext *econtext; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; - Datum tupleid; + ItemPointer tupleid; if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) { bool isnull; - tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull); + tupleid = DatumGetItemPointer(slot_getsysattr(slot, RowIdAttributeNumber, &isnull)); Assert(!isnull); } else { Assert(ItemPointerIsValid(&slot->tts_tid)); - tupleid = PointerGetDatum(&slot->tts_tid); + tupleid = &slot->tts_tid; } /* @@ -712,7 +711,7 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, old_valid, values, /* array of index Datums */ isnull, /* null flags */ - tupleid, /* tid of heap tuple */ + ItemPointerGetDatum(tupleid), /* tid of heap tuple */ valuesOld, isnullOld, oldTupleid, @@ -763,7 +762,6 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, { bool violationOK; CEOUC_WAIT_MODE waitMode; - ItemPointer raw_tupleid = DatumGetItemPointer(tupleid); if (applyNoDupErr) { @@ -784,7 +782,7 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, satisfiesConstraint = check_exclusion_or_unique_constraint(heapRelation, indexRelation, indexInfo, - raw_tupleid, values, isnull, + tupleid, values, isnull, estate, false, waitMode, violationOK, NULL); } diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 03e7411c4b6..103eed5fa9f 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -109,6 +109,16 @@ typedef void (*ambuildempty_function) (Relation indexRelation); /* insert this tuple */ typedef bool (*aminsert_function) (Relation indexRelation, + Datum *values, + bool *isnull, + ItemPointer tupleid, + Relation heapRelation, + IndexUniqueCheck checkUnique, + bool indexUnchanged, + struct IndexInfo *indexInfo); + +/* extended version of aminsert taking Datum tupleid */ +typedef bool (*aminsert_extended_function) (Relation indexRelation, Datum *values, bool *isnull, Datum tupleid, @@ -116,6 +126,7 @@ typedef bool (*aminsert_function) (Relation indexRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, struct IndexInfo *indexInfo); + /* update this tuple */ typedef bool (*amupdate_function) (Relation indexRelation, bool new_valid, @@ -286,6 +297,7 @@ typedef struct IndexAmRoutine ambuild_function ambuild; ambuildempty_function ambuildempty; aminsert_function aminsert; + aminsert_extended_function aminsertextended; amupdate_function amupdate; amdelete_function amdelete; ambulkdelete_function ambulkdelete; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 908608184a1..bb17831720b 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -144,7 +144,7 @@ extern void index_close(Relation relation, LOCKMODE lockmode); extern bool index_insert(Relation indexRelation, Datum *values, bool *isnull, - Datum tupleid, + ItemPointer tupleid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, diff --git a/src/test/modules/dummy_index_am/dummy_index_am.c b/src/test/modules/dummy_index_am/dummy_index_am.c index 562a578cad2..09c5d20479d 100644 --- a/src/test/modules/dummy_index_am/dummy_index_am.c +++ b/src/test/modules/dummy_index_am/dummy_index_am.c @@ -302,7 +302,8 @@ dihandler(PG_FUNCTION_ARGS) amroutine->ambuild = dibuild; amroutine->ambuildempty = dibuildempty; - amroutine->aminsert = diinsert; + amroutine->aminsert = NULL; + amroutine->aminsertextended = diinsert; amroutine->ambulkdelete = dibulkdelete; amroutine->amvacuumcleanup = divacuumcleanup; amroutine->amcanreturn = NULL; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index a51888802ae..7773d7f875f 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3148,6 +3148,7 @@ amgetbitmap_function amgettuple_function aminitparallelscan_function aminsert_function +aminsert_extended_function ammarkpos_function amoptions_function amparallelrescan_function From 29fdfb0052ffae2be59b2b9a9a7884e1a54daaf6 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 23 Sep 2024 14:03:54 +0300 Subject: [PATCH 045/102] Transform OR-clauses to SAOP's during index matching Replace "(indexkey op C1) OR (indexkey op C2) ... (indexkey op CN)" with "indexkey op ANY(ARRAY[C1, C2, ...])" (ScalarArrayOpExpr node) during matching a clause to index. Here Ci is an i-th constant or parameters expression, 'expr' is non-constant expression, 'op' is an operator which returns boolean result and has a commuter (for the case of reverse order of constant and non-constant parts of the expression, like 'Cn op expr'). This transformation allows handling long OR-clauses with single IndexScan avoiding slower bitmap scans. Discussion: https://postgr.es/m/567ED6CA.2040504%40sigaev.ru Author: Alena Rybakina Author: Andrey Lepikhov Reviewed-by: Peter Geoghegan Reviewed-by: Ranier Vilela Reviewed-by: Alexander Korotkov Reviewed-by: Robert Haas Reviewed-by: Jian He Reviewed-by: Tom Lane Reviewed-by: Nikolay Shaplov --- src/backend/optimizer/path/indxpath.c | 281 ++++++++++++++++++++- src/test/regress/expected/create_index.out | 270 ++++++++++++++++++-- src/test/regress/expected/join.out | 57 ++++- src/test/regress/expected/rowsecurity.out | 7 + src/test/regress/expected/uuid.out | 31 +++ src/test/regress/sql/create_index.sql | 69 +++++ src/test/regress/sql/join.sql | 9 + src/test/regress/sql/rowsecurity.sql | 1 + src/test/regress/sql/uuid.sql | 12 + 9 files changed, 714 insertions(+), 23 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index bf4968e348b..50a21ab38b1 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -20,6 +20,7 @@ #include "access/stratnum.h" #include "access/sysattr.h" #include "catalog/pg_am.h" +#include "catalog/pg_amop.h" #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_type.h" @@ -32,8 +33,10 @@ #include "optimizer/paths.h" #include "optimizer/prep.h" #include "optimizer/restrictinfo.h" +#include "utils/array.h" #include "utils/lsyscache.h" #include "utils/selfuncs.h" +#include "utils/syscache.h" /* XXX see PartCollMatchesExprColl */ @@ -167,6 +170,10 @@ static IndexClause *match_rowcompare_to_indexcol(PlannerInfo *root, RestrictInfo *rinfo, int indexcol, IndexOptInfo *index); +static IndexClause *match_orclause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index); static IndexClause *expand_indexqual_rowcompare(PlannerInfo *root, RestrictInfo *rinfo, int indexcol, @@ -2186,7 +2193,10 @@ match_clause_to_index(PlannerInfo *root, * (3) must match the collation of the index, if collation is relevant. * * Our definition of "const" is exceedingly liberal: we allow anything that - * doesn't involve a volatile function or a Var of the index's relation. + * doesn't involve a volatile function or a Var of the index's relation + * except for a boolean OR expression input: due to a trade-off between the + * expected execution speedup and planning complexity, we limit or->saop + * transformation by obvious cases when an index scan can get a profit. * In particular, Vars belonging to other relations of the query are * accepted here, since a clause of that form can be used in a * parameterized indexscan. It's the responsibility of higher code levels @@ -2216,6 +2226,10 @@ match_clause_to_index(PlannerInfo *root, * It is also possible to match ScalarArrayOpExpr clauses to indexes, when * the clause is of the form "indexkey op ANY (arrayconst)". * + * It is also possible to match a list of OR clauses if it might be + * transformed into a single ScalarArrayOpExpr clause. On success, + * the returning index clause will contain a trasformed clause. + * * For boolean indexes, it is also possible to match the clause directly * to the indexkey; or perhaps the clause is (NOT indexkey). * @@ -2265,9 +2279,9 @@ match_clause_to_indexcol(PlannerInfo *root, } /* - * Clause must be an opclause, funcclause, ScalarArrayOpExpr, or - * RowCompareExpr. Or, if the index supports it, we can handle IS - * NULL/NOT NULL clauses. + * Clause must be an opclause, funcclause, ScalarArrayOpExpr, + * RowCompareExpr, or OR-clause that could be converted to SAOP. Or, if + * the index supports it, we can handle IS NULL/NOT NULL clauses. */ if (IsA(clause, OpExpr)) { @@ -2285,6 +2299,10 @@ match_clause_to_indexcol(PlannerInfo *root, { return match_rowcompare_to_indexcol(root, rinfo, indexcol, index); } + else if (restriction_is_or_clause(rinfo)) + { + return match_orclause_to_indexcol(root, rinfo, indexcol, index); + } else if (index->amsearchnulls && IsA(clause, NullTest)) { NullTest *nt = (NullTest *) clause; @@ -2808,6 +2826,261 @@ match_rowcompare_to_indexcol(PlannerInfo *root, return NULL; } +/* + * match_orclause_to_indexcol() + * Handles the OR-expr case for match_clause_to_indexcol() in the case + * when it could be transformed to ScalarArrayOpExpr. + * + * Given a list of OR-clause args, attempts to transform this BoolExpr into + * a single SAOP expression. On success, returns an IndexClause, containing + * the transformed expression or NULL, if failed. + */ +static IndexClause * +match_orclause_to_indexcol(PlannerInfo *root, + RestrictInfo *rinfo, + int indexcol, + IndexOptInfo *index) +{ + ListCell *lc; + BoolExpr *orclause = (BoolExpr *) rinfo->orclause; + Node *indexExpr = NULL; + List *consts = NIL; + Node *arrayNode = NULL; + ScalarArrayOpExpr *saopexpr = NULL; + Oid matchOpno = InvalidOid; + IndexClause *iclause; + Oid consttype = InvalidOid; + Oid arraytype = InvalidOid; + Oid inputcollid = InvalidOid; + bool firstTime = true; + bool have_param = false; + + Assert(IsA(orclause, BoolExpr)); + Assert(orclause->boolop == OR_EXPR); + + /* + * Try to convert a list of OR-clauses to a single SAOP expression. Each + * OR entry must be in the form: (indexkey operator constant) or (constant + * operator indexkey). Operators of all the entries must match. Constant + * might be either Const or Param. To be effective, give up on the first + * non-matching entry. Exit is implemented as a break from the loop, which + * is catched afterwards. + */ + foreach(lc, orclause->args) + { + RestrictInfo *subRinfo; + OpExpr *subClause; + Oid opno; + Node *leftop, + *rightop; + Node *constExpr; + + if (!IsA(lfirst(lc), RestrictInfo)) + break; + + subRinfo = (RestrictInfo *) lfirst(lc); + + /* Only operator clauses can match */ + if (!IsA(subRinfo->clause, OpExpr)) + break; + + subClause = (OpExpr *) subRinfo->clause; + opno = subClause->opno; + + /* Only binary operators can match */ + if (list_length(subClause->args) != 2) + break; + + /* + * The parameters below must match between sub-rinfo and its parent as + * make_restrictinfo() fills them with the same values, and further + * modifications are also the same for the whole subtree. However, + * still make a sanity check. + */ + Assert(subRinfo->is_pushed_down == rinfo->is_pushed_down); + Assert(subRinfo->is_clone == rinfo->is_clone); + Assert(subRinfo->security_level == rinfo->security_level); + Assert(bms_equal(subRinfo->incompatible_relids, rinfo->incompatible_relids)); + Assert(bms_equal(subRinfo->outer_relids, rinfo->outer_relids)); + + /* + * Also, check that required_relids in sub-rinfo is subset of parent's + * required_relids. + */ + Assert(bms_is_subset(subRinfo->required_relids, rinfo->required_relids)); + + /* Only operator returning boolean suits the transformation */ + if (get_op_rettype(opno) != BOOLOID) + break; + + /* + * Check for clauses of the form: (indexkey operator constant) or + * (constant operator indexkey). Determine indexkey side first, check + * the constant later. + */ + leftop = (Node *) linitial(subClause->args); + rightop = (Node *) lsecond(subClause->args); + if (match_index_to_operand(leftop, indexcol, index)) + { + indexExpr = leftop; + constExpr = rightop; + } + else if (match_index_to_operand(rightop, indexcol, index)) + { + opno = get_commutator(opno); + if (!OidIsValid(opno)) + { + /* commutator doesn't exist, we can't reverse the order */ + break; + } + indexExpr = rightop; + constExpr = leftop; + } + else + { + break; + } + + /* + * Ignore any RelabelType node above the operands. This is needed to + * be able to apply indexscanning in binary-compatible-operator cases. + * Note: we can assume there is at most one RelabelType node; + * eval_const_expressions() will have simplified if more than one. + */ + if (IsA(constExpr, RelabelType)) + constExpr = (Node *) ((RelabelType *) constExpr)->arg; + if (IsA(indexExpr, RelabelType)) + indexExpr = (Node *) ((RelabelType *) indexExpr)->arg; + + /* We allow constant to be Const or Param */ + if (!IsA(constExpr, Const) && !IsA(constExpr, Param)) + break; + + /* Forbid transformation for composite types, records. */ + if (type_is_rowtype(exprType(constExpr)) || + type_is_rowtype(exprType(indexExpr))) + break; + + /* + * Save information about the operator, type, and collation for the + * first matching qual. Then, check that subsequent quals match the + * first. + */ + if (firstTime) + { + matchOpno = opno; + consttype = exprType(constExpr); + arraytype = get_array_type(consttype); + inputcollid = subClause->inputcollid; + + /* + * Check that the operator is presented in the opfamily and that + * the expression collation matches the index collation. Also, + * there must be an array type to construct an array later. + */ + if (!IndexCollMatchesExprColl(index->indexcollations[indexcol], inputcollid) || + !op_in_opfamily(matchOpno, index->opfamily[indexcol]) || + !OidIsValid(arraytype)) + break; + firstTime = false; + } + else + { + if (opno != matchOpno || + inputcollid != subClause->inputcollid || + consttype != exprType(constExpr)) + break; + } + + if (IsA(constExpr, Param)) + have_param = true; + consts = lappend(consts, constExpr); + } + + /* + * Catch the break from the loop above. Normally, a foreach() loop ends + * up with a NULL list cell. A non-NULL list cell indicates a break from + * the foreach() loop. Free the consts list and return NULL then. + */ + if (lc != NULL) + { + list_free(consts); + return NULL; + } + + /* + * Assemble an array from the list of constants. It seems more profitable + * to build a const array. But in the presence of parameters, we don't + * have a specific value here and must employ an ArrayExpr instead. + */ + + if (have_param) + { + ArrayExpr *arrayExpr = makeNode(ArrayExpr); + + /* array_collid will be set by parse_collate.c */ + arrayExpr->element_typeid = consttype; + arrayExpr->array_typeid = arraytype; + arrayExpr->multidims = false; + arrayExpr->elements = consts; + arrayExpr->location = -1; + + arrayNode = (Node *) arrayExpr; + } + else + { + int16 typlen; + bool typbyval; + char typalign; + Datum *elems; + int i = 0; + ArrayType *arrayConst; + + get_typlenbyvalalign(consttype, &typlen, &typbyval, &typalign); + + elems = (Datum *) palloc(sizeof(Datum) * list_length(consts)); + foreach(lc, consts) + elems[i++] = ((Const *) lfirst(lc))->constvalue; + + arrayConst = construct_array(elems, i, consttype, + typlen, typbyval, typalign); + arrayNode = (Node *) makeConst(arraytype, -1, inputcollid, + -1, PointerGetDatum(arrayConst), + false, false); + + pfree(elems); + list_free(consts); + } + + /* Build the SAOP expression node */ + saopexpr = makeNode(ScalarArrayOpExpr); + saopexpr->opno = matchOpno; + saopexpr->opfuncid = get_opcode(matchOpno); + saopexpr->hashfuncid = InvalidOid; + saopexpr->negfuncid = InvalidOid; + saopexpr->useOr = true; + saopexpr->inputcollid = inputcollid; + saopexpr->args = list_make2(indexExpr, arrayNode); + saopexpr->location = -1; + + /* + * Finally, build an IndexClause based on the SAOP node. Use + * make_simple_restrictinfo() to get RestrictInfo with clean selectivity + * estimations because it may differ from the estimation made for an OR + * clause. Although it is not a lossy expression, keep the old version of + * rinfo in iclause->rinfo to detect duplicates and recheck the original + * clause. + */ + iclause = makeNode(IndexClause); + iclause->rinfo = rinfo; + iclause->indexquals = list_make1(make_simple_restrictinfo(root, + &saopexpr->xpr)); + iclause->lossy = false; + iclause->indexcol = indexcol; + iclause->indexcols = NIL; + return iclause; +} + /* * expand_indexqual_rowcompare --- expand a single indexqual condition * that is a RowCompareExpr diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index c90d0e40145..abcc34677ca 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -1838,18 +1838,11 @@ DROP TABLE onek_with_null; EXPLAIN (COSTS OFF) SELECT * FROM tenk1 WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------ - Bitmap Heap Scan on tenk1 - Recheck Cond: (((thousand = 42) AND (tenthous = 1)) OR ((thousand = 42) AND (tenthous = 3)) OR ((thousand = 42) AND (tenthous = 42))) - -> BitmapOr - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: ((thousand = 42) AND (tenthous = 1)) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: ((thousand = 42) AND (tenthous = 3)) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: ((thousand = 42) AND (tenthous = 42)) -(9 rows) + QUERY PLAN +------------------------------------------------------------------------------ + Index Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[]))) +(2 rows) SELECT * FROM tenk1 WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); @@ -1858,6 +1851,24 @@ SELECT * FROM tenk1 42 | 5530 | 0 | 2 | 2 | 2 | 42 | 42 | 42 | 42 | 42 | 84 | 85 | QBAAAA | SEIAAA | OOOOxx (1 row) +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = (SELECT 1 + 2) OR tenthous = 42); + QUERY PLAN +------------------------------------------------------------------------- + Index Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand = 42) AND (tenthous = ANY (ARRAY[1, $0, 42]))) + InitPlan 1 (returns $0) + -> Result +(4 rows) + +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = (SELECT 1 + 2) OR tenthous = 42); + unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 +---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+--------- + 42 | 5530 | 0 | 2 | 2 | 2 | 42 | 42 | 42 | 42 | 42 | 84 | 85 | QBAAAA | SEIAAA | OOOOxx +(1 row) + EXPLAIN (COSTS OFF) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); @@ -1866,6 +1877,161 @@ SELECT count(*) FROM tenk1 Aggregate -> Bitmap Heap Scan on tenk1 Recheck Cond: ((hundred = 42) AND ((thousand = 42) OR (thousand = 99))) + -> BitmapAnd + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 42) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = ANY ('{42,99}'::integer[])) +(8 rows) + +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); + QUERY PLAN +------------------------------------------------------------------------------ + Index Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[]))) +(2 rows) + +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); + unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 +---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+--------- + 42 | 5530 | 0 | 2 | 2 | 2 | 42 | 42 | 42 | 42 | 42 | 84 | 85 | QBAAAA | SEIAAA | OOOOxx +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1::numeric OR tenthous = 3::int4 OR tenthous = 42::numeric); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Bitmap Heap Scan on tenk1 + Recheck Cond: (thousand = 42) + Filter: (((tenthous)::numeric = '1'::numeric) OR (tenthous = 3) OR ((tenthous)::numeric = '42'::numeric)) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 42) +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE tenthous = 1::numeric OR tenthous = 3::int4 OR tenthous = 42::numeric; + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Seq Scan on tenk1 + Filter: (((tenthous)::numeric = '1'::numeric) OR (tenthous = 3) OR ((tenthous)::numeric = '42'::numeric)) +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); + QUERY PLAN +--------------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tenk1 + Recheck Cond: ((hundred = 42) AND ((thousand = 42) OR (thousand = 99))) + -> BitmapAnd + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 42) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = ANY ('{42,99}'::integer[])) +(8 rows) + +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand < 42 OR thousand < 99 OR 43 > thousand OR 42 > thousand); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tenk1 + Recheck Cond: ((hundred = 42) AND ((thousand < 42) OR (thousand < 99) OR (43 > thousand) OR (42 > thousand))) + -> BitmapAnd + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 42) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand < ANY ('{42,99,43,42}'::integer[])) +(8 rows) + +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand < 42 OR thousand < 99 OR 43 > thousand OR 42 > thousand); + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3) OR thousand = 41; + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tenk1 + Recheck Cond: (((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3))) OR (thousand = 41)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3}'::integer[]))) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 41) +(8 rows) + +SELECT count(*) FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3) OR thousand = 41; + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Aggregate + -> Bitmap Heap Scan on tenk1 + Recheck Cond: (((hundred = 42) AND ((thousand = 42) OR (thousand = 99) OR (tenthous < 2))) OR (thousand = 41)) + -> BitmapOr + -> BitmapAnd + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 42) + -> BitmapOr + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 42) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 99) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (tenthous < 2) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 41) +(16 rows) + +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41; + count +------- + 20 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tenk1 + Recheck Cond: ((hundred = 42) AND ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2)))) -> BitmapAnd -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 42) @@ -1873,16 +2039,90 @@ SELECT count(*) FROM tenk1 -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: (thousand = 42) -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = 99) -(11 rows) + Index Cond: (thousand = 41) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: ((thousand = 99) AND (tenthous = 2)) +(13 rows) SELECT count(*) FROM tenk1 - WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); + WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2); count ------- 10 (1 row) +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1, tenk2 + WHERE tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk1.thousand = 41 OR tenk2.tenthous = 2) AND + tenk2.hundred = tenk1.hundred; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: ((tenk2.thousand = 42) OR (tenk1.thousand = 41) OR (tenk2.tenthous = 2)) + -> Bitmap Heap Scan on tenk1 + Recheck Cond: (hundred = 42) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 42) + -> Materialize + -> Bitmap Heap Scan on tenk2 + Recheck Cond: (hundred = 42) + -> Bitmap Index Scan on tenk2_hundred + Index Cond: (hundred = 42) +(12 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1, tenk2 + WHERE tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND + tenk2.hundred = tenk1.hundred; + QUERY PLAN +------------------------------------------------------------------------------ + Aggregate + -> Nested Loop + -> Bitmap Heap Scan on tenk2 + Recheck Cond: (hundred = 42) + Filter: ((thousand = 42) OR (thousand = 41) OR (tenthous = 2)) + -> Bitmap Index Scan on tenk2_hundred + Index Cond: (hundred = 42) + -> Index Only Scan using tenk1_hundred on tenk1 + Index Cond: (hundred = 42) +(9 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 JOIN tenk2 ON + tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND + tenk2.hundred = tenk1.hundred; + QUERY PLAN +------------------------------------------------------------------------------ + Aggregate + -> Nested Loop + -> Bitmap Heap Scan on tenk2 + Recheck Cond: (hundred = 42) + Filter: ((thousand = 42) OR (thousand = 41) OR (tenthous = 2)) + -> Bitmap Index Scan on tenk2_hundred + Index Cond: (hundred = 42) + -> Index Only Scan using tenk1_hundred on tenk1 + Index Cond: (hundred = 42) +(9 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 LEFT JOIN tenk2 ON + tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND + tenk2.hundred = tenk1.hundred; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: (tenk1.hundred = 42) + -> Index Only Scan using tenk1_hundred on tenk1 + -> Memoize + Cache Key: tenk1.hundred + Cache Mode: logical + -> Index Scan using tenk2_hundred on tenk2 + Index Cond: (hundred = tenk1.hundred) + Filter: ((thousand = 42) OR (thousand = 41) OR (tenthous = 2)) +(10 rows) + -- -- Check behavior with duplicate index column contents -- diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 84e35981ed8..4422b0610e5 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4278,15 +4278,64 @@ select * from tenk1 a join tenk1 b on Index Cond: (hundred = 4) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) + Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7))) + Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) -> BitmapOr -> Bitmap Index Scan on tenk1_unique1 Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 - Index Cond: (unique2 = 3) + Index Cond: (unique2 = ANY ('{3,7}'::integer[])) +(18 rows) + +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or + ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4))) + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: ((unique1 = 2) OR (hundred = 4)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 2) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 4) + -> Materialize + -> Bitmap Heap Scan on tenk1 a + Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7))) + Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 - Index Cond: (unique2 = 7) -(19 rows) + Index Cond: (unique2 = ANY ('{3,7}'::integer[])) +(18 rows) + +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 < 20 or a.unique1 = 3 or a.unique1 = 1 and b.unique1 = 2) or + ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: ((a.unique1 < 20) OR (a.unique1 = 3) OR ((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4))) + -> Seq Scan on tenk1 b + -> Materialize + -> Bitmap Heap Scan on tenk1 a + Recheck Cond: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7))) + Filter: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 < 20) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 3) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) + -> Bitmap Index Scan on tenk1_unique2 + Index Cond: (unique2 = ANY ('{3,7}'::integer[])) +(16 rows) -- -- test placement of movable quals in a parameterized join tree diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 39528627f3c..c025548f990 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -4421,6 +4421,13 @@ SELECT * FROM rls_tbl WHERE a <<< 1000; --- (0 rows) +EXPLAIN (COSTS OFF) SELECT * FROM rls_tbl WHERE a <<< 1000 or a <<< 900; + QUERY PLAN +----------------------------------------------------- + Seq Scan on rls_tbl + Filter: (false AND ((a <<< 1000) OR (a <<< 900))) +(2 rows) + RESET SESSION AUTHORIZATION; CREATE TABLE rls_child_tbl () INHERITS (rls_tbl); INSERT INTO rls_child_tbl SELECT x/10 FROM generate_series(1, 100) x; diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out index 8e7f21910d6..bbe275500b9 100644 --- a/src/test/regress/expected/uuid.out +++ b/src/test/regress/expected/uuid.out @@ -129,6 +129,37 @@ CREATE INDEX guid1_btree ON guid1 USING BTREE (guid_field); CREATE INDEX guid1_hash ON guid1 USING HASH (guid_field); -- unique index test CREATE UNIQUE INDEX guid1_unique_BTREE ON guid1 USING BTREE (guid_field); +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM guid1 WHERE guid_field <> '11111111111111111111111111111111' OR + guid_field <> '3f3e3c3b-3a30-3938-3736-353433a2313e'; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ + Aggregate + -> Seq Scan on guid1 + Filter: ((guid_field <> '11111111-1111-1111-1111-111111111111'::uuid) OR (guid_field <> '3f3e3c3b-3a30-3938-3736-353433a2313e'::uuid)) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM guid1 WHERE guid_field <= '22222222-2222-2222-2222-222222222222' OR + guid_field <= '11111111111111111111111111111111' OR + guid_field <= '3f3e3c3b-3a30-3938-3736-353433a2313e'; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Aggregate + -> Seq Scan on guid1 + Filter: ((guid_field <= '22222222-2222-2222-2222-222222222222'::uuid) OR (guid_field <= '11111111-1111-1111-1111-111111111111'::uuid) OR (guid_field <= '3f3e3c3b-3a30-3938-3736-353433a2313e'::uuid)) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM guid1 WHERE guid_field = '3f3e3c3b-3a30-3938-3736-353433a2313e' OR + guid_field = '11111111111111111111111111111111'; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------- + Aggregate + -> Seq Scan on guid1 + Filter: ((guid_field = '3f3e3c3b-3a30-3938-3736-353433a2313e'::uuid) OR (guid_field = '11111111-1111-1111-1111-111111111111'::uuid)) +(3 rows) + -- should fail INSERT INTO guid1(guid_field) VALUES('11111111-1111-1111-1111-111111111111'); ERROR: duplicate key value violates unique constraint "guid1_unique_btree" diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 24241a40cec..a9eaf14d702 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -731,12 +731,81 @@ SELECT * FROM tenk1 SELECT * FROM tenk1 WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = (SELECT 1 + 2) OR tenthous = 42); +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = (SELECT 1 + 2) OR tenthous = 42); + EXPLAIN (COSTS OFF) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1::numeric OR tenthous = 3::int4 OR tenthous = 42::numeric); + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE tenthous = 1::numeric OR tenthous = 3::int4 OR tenthous = 42::numeric; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand < 42 OR thousand < 99 OR 43 > thousand OR 42 > thousand); +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand < 42 OR thousand < 99 OR 43 > thousand OR 42 > thousand); + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3) OR thousand = 41; +SELECT count(*) FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3) OR thousand = 41; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41; +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2); +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2); + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1, tenk2 + WHERE tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk1.thousand = 41 OR tenk2.tenthous = 2) AND + tenk2.hundred = tenk1.hundred; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1, tenk2 + WHERE tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND + tenk2.hundred = tenk1.hundred; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 JOIN tenk2 ON + tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND + tenk2.hundred = tenk1.hundred; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 LEFT JOIN tenk2 ON + tenk1.hundred = 42 AND (tenk2.thousand = 42 OR tenk2.thousand = 41 OR tenk2.tenthous = 2) AND + tenk2.hundred = tenk1.hundred; -- -- Check behavior with duplicate index column contents -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index d6f646a1d50..808ac2ad338 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1433,6 +1433,15 @@ select * from tenk1 a join tenk1 b on (a.unique1 = 1 and b.unique1 = 2) or ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4); +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or + ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4); +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 < 20 or a.unique1 = 3 or a.unique1 = 1 and b.unique1 = 2) or + ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4); + -- -- test placement of movable quals in a parameterized join tree -- diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql index 4f5ca1626cf..dfccf19db68 100644 --- a/src/test/regress/sql/rowsecurity.sql +++ b/src/test/regress/sql/rowsecurity.sql @@ -2137,6 +2137,7 @@ CREATE FUNCTION op_leak(int, int) RETURNS bool CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int, restrict = scalarltsel); SELECT * FROM rls_tbl WHERE a <<< 1000; +EXPLAIN (COSTS OFF) SELECT * FROM rls_tbl WHERE a <<< 1000 or a <<< 900; RESET SESSION AUTHORIZATION; CREATE TABLE rls_child_tbl () INHERITS (rls_tbl); diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql index 9a8f437c7d2..a23e23b62ab 100644 --- a/src/test/regress/sql/uuid.sql +++ b/src/test/regress/sql/uuid.sql @@ -63,6 +63,18 @@ CREATE INDEX guid1_hash ON guid1 USING HASH (guid_field); -- unique index test CREATE UNIQUE INDEX guid1_unique_BTREE ON guid1 USING BTREE (guid_field); + +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM guid1 WHERE guid_field <> '11111111111111111111111111111111' OR + guid_field <> '3f3e3c3b-3a30-3938-3736-353433a2313e'; +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM guid1 WHERE guid_field <= '22222222-2222-2222-2222-222222222222' OR + guid_field <= '11111111111111111111111111111111' OR + guid_field <= '3f3e3c3b-3a30-3938-3736-353433a2313e'; +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM guid1 WHERE guid_field = '3f3e3c3b-3a30-3938-3736-353433a2313e' OR + guid_field = '11111111111111111111111111111111'; + -- should fail INSERT INTO guid1(guid_field) VALUES('11111111-1111-1111-1111-111111111111'); From a7de6438b26e7dbcf6972d23c6528de8e8728843 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 23 Sep 2024 14:04:03 +0300 Subject: [PATCH 046/102] Teach bitmap path generation about transforming OR-clauses to SAOP's When optimizer generates bitmap paths, it considers breaking OR-clause arguments one-by-one. But now, a group of similar OR-clauses can be transformed into SAOP during index matching. So, bitmap paths should keep up. This commit teaches bitmap paths generation machinery to group similar OR-clauses into dedicated RestrictInfos. Those RestrictInfos are considered both to match index as a whole (as SAOP), or to match as a set of individual OR-clause argument one-by-one (the old way). Therefore, bitmap path generation will takes advantage of OR-clauses to SAOP's transformation. The old way of handling them is also considered. So, there shouldn't be planning regression. Discussion: https://postgr.es/m/567ED6CA.2040504%40sigaev.ru Reviewed-by: Alexander Korotkov --- src/backend/optimizer/path/indxpath.c | 439 ++++++++++++++++++++- src/backend/optimizer/util/restrictinfo.c | 107 +++-- src/include/optimizer/restrictinfo.h | 11 + src/test/regress/expected/create_index.out | 125 +++++- src/test/regress/expected/join.out | 56 ++- src/test/regress/sql/create_index.sql | 38 ++ src/tools/pgindent/typedefs.list | 1 + 7 files changed, 671 insertions(+), 106 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 50a21ab38b1..73b875e66ea 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1210,6 +1210,383 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel, return result; } +/* + * Utility structure used to group similar OR-clause arguments in + * group_similar_or_args(). It represents information about the OR-clause + * argument and its matching index key. + */ +typedef struct +{ + int indexnum; /* index of the matching index, or -1 if no + * matching index */ + int colnum; /* index of the matching column, or -1 if no + * matching index */ + Oid opno; /* OID of the OpClause operator, or InvalidOid + * if not an OpExpr */ + Oid inputcollid; /* OID of the OpClause input collation */ + int argindex; /* index of the clause in the list of + * arguments */ +} OrArgIndexMatch; + +/* + * Comparison function for OrArgIndexMatch which provides sort order placing + * similar OR-clause arguments together. + */ +static int +or_arg_index_match_cmp(const void *a, const void *b) +{ + const OrArgIndexMatch *match_a = (const OrArgIndexMatch *) a; + const OrArgIndexMatch *match_b = (const OrArgIndexMatch *) b; + + if (match_a->indexnum < match_b->indexnum) + return -1; + else if (match_a->indexnum > match_b->indexnum) + return 1; + + if (match_a->colnum < match_b->colnum) + return -1; + else if (match_a->colnum > match_b->colnum) + return 1; + + if (match_a->opno < match_b->opno) + return -1; + else if (match_a->opno > match_b->opno) + return 1; + + if (match_a->inputcollid < match_b->inputcollid) + return -1; + else if (match_a->inputcollid > match_b->inputcollid) + return 1; + + if (match_a->argindex < match_b->argindex) + return -1; + else if (match_a->argindex > match_b->argindex) + return 1; + + return 0; +} + +/* + * group_similar_or_args + * Transform incoming OR-restrictinfo into a list of sub-restrictinfos, + * each of them containing a subset of OR-clauses from the source rinfo + * matching the same index column with the same operator and collation, + * It may be employed later, during the match_clause_to_indexcol() to + * transform whole OR-sub-rinfo to an SAOP clause. + * + * Similar arguments clauses of form "indexkey op constant" having same + * indexkey, operator, and collation. Constant may comprise either Const + * or Param. + * + * Returns the processed list of arguments. + */ +static List * +group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo) +{ + int n; + int i; + int group_start; + OrArgIndexMatch *matches; + bool matched = false; + ListCell *lc; + ListCell *lc2; + List *orargs; + List *result = NIL; + + Assert(IsA(rinfo->orclause, BoolExpr)); + orargs = ((BoolExpr *) rinfo->orclause)->args; + n = list_length(orargs); + + /* + * To avoid N^2 behavior, take utility pass along the list of OR-clause + * arguments. For each argument, fill the OrArgIndexMatch structure, + * which will be used to sort these arguments at the next step. + */ + i = -1; + matches = (OrArgIndexMatch *) palloc(sizeof(OrArgIndexMatch) * n); + foreach(lc, orargs) + { + Node *arg = lfirst(lc); + RestrictInfo *argrinfo; + OpExpr *clause; + Oid opno; + Node *leftop, + *rightop; + Node *nonConstExpr; + int indexnum; + int colnum; + + i++; + matches[i].argindex = i; + matches[i].indexnum = -1; + matches[i].colnum = -1; + matches[i].opno = InvalidOid; + matches[i].inputcollid = InvalidOid; + + if (!IsA(arg, RestrictInfo)) + continue; + + argrinfo = castNode(RestrictInfo, arg); + + /* Only operator clauses can match */ + if (!IsA(argrinfo->clause, OpExpr)) + continue; + + clause = (OpExpr *) argrinfo->clause; + opno = clause->opno; + + /* Only binary operators can match */ + if (list_length(clause->args) != 2) + continue; + + /* + * Ignore any RelabelType node above the operands. This is needed to + * be able to apply indexscanning in binary-compatible-operator cases. + * Note: we can assume there is at most one RelabelType node; + * eval_const_expressions() will have simplified if more than one. + */ + leftop = get_leftop(clause); + if (IsA(leftop, RelabelType)) + leftop = (Node *) ((RelabelType *) leftop)->arg; + + rightop = get_rightop(clause); + if (IsA(rightop, RelabelType)) + rightop = (Node *) ((RelabelType *) rightop)->arg; + + /* + * Check for clauses of the form: (indexkey operator constant) or + * (constant operator indexkey). But we don't know a particular index + * yet. First check for a constant, which must be Const or Param. + * That's cheaper than search for an index key among all indexes. + */ + if (IsA(leftop, Const) || IsA(leftop, Param)) + { + opno = get_commutator(opno); + + if (!OidIsValid(opno)) + { + /* commutator doesn't exist, we can't reverse the order */ + continue; + } + nonConstExpr = rightop; + } + else if (IsA(rightop, Const) || IsA(rightop, Param)) + { + nonConstExpr = leftop; + } + else + { + continue; + } + + /* + * Match non-constant part to the index key. It's possible that a + * single non-constant part matches multiple index keys. It's OK, we + * just stop with first matching index key. Given that this choice is + * determined the same for every clause, we will group similar clauses + * together anyway. + */ + indexnum = 0; + foreach(lc2, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc2); + + /* Ignore index if it doesn't support bitmap scans */ + if (!index->amhasgetbitmap) + continue; + + for (colnum = 0; colnum < index->nkeycolumns; colnum++) + { + if (match_index_to_operand(nonConstExpr, colnum, index)) + { + matches[i].indexnum = indexnum; + matches[i].colnum = colnum; + matches[i].opno = opno; + matches[i].inputcollid = clause->inputcollid; + matched = true; + break; + } + } + + /* + * Stop looping through the indexes, if we managed to match + * nonConstExpr to any index column. + */ + if (matches[i].indexnum >= 0) + break; + indexnum++; + } + } + + /* + * Fast-path check: if no clause is matching to the index column, we can + * just give up at this stage and return the clause list as-is. + */ + if (!matched) + { + pfree(matches); + return orargs; + } + + /* Sort clauses to make similar clauses go together */ + qsort(matches, n, sizeof(OrArgIndexMatch), or_arg_index_match_cmp); + + /* + * Group similar clauses into single sub-restrictinfo. Side effect: the + * resulting list of restrictions will be sorted by indexnum and colnum. + */ + group_start = 0; + for (i = 1; i <= n; i++) + { + /* Check if it's a group boundary */ + if (group_start >= 0 && + (i == n || + matches[i].indexnum != matches[group_start].indexnum || + matches[i].colnum != matches[group_start].colnum || + matches[i].opno != matches[group_start].opno || + matches[i].inputcollid != matches[group_start].inputcollid || + matches[i].indexnum == -1)) + { + /* + * One clause in group: add it "as is" to the upper-level OR. + */ + if (i - group_start == 1) + { + result = lappend(result, + list_nth(orargs, + matches[group_start].argindex)); + } + else + { + /* + * Two or more clauses in a group: create a nested OR. + */ + List *args = NIL; + List *rargs = NIL; + RestrictInfo *subrinfo; + int j; + + Assert(i - group_start >= 2); + + /* Construct the list of nested OR arguments */ + for (j = group_start; j < i; j++) + { + Node *arg = list_nth(orargs, matches[j].argindex); + + rargs = lappend(rargs, arg); + if (IsA(arg, RestrictInfo)) + args = lappend(args, ((RestrictInfo *) arg)->clause); + else + args = lappend(args, arg); + } + + /* Construct the nested OR and wrap it with RestrictInfo */ + subrinfo = make_plain_restrictinfo(root, + make_orclause(args), + make_orclause(rargs), + rinfo->is_pushed_down, + rinfo->has_clone, + rinfo->is_clone, + rinfo->pseudoconstant, + rinfo->security_level, + rinfo->required_relids, + rinfo->incompatible_relids, + rinfo->outer_relids); + result = lappend(result, subrinfo); + } + + group_start = i; + } + } + pfree(matches); + return result; +} + +/* + * make_bitmap_paths_for_or_group + * Generate bitmap paths for a group of similar OR-clause arguments + * produced by group_similar_or_args(). + * + * This function considers two cases: (1) matching a group of clauses to + * the index as a whole, and (2) matching the individual clauses one-by-one. + * (1) typically comprises an optimal solution. If not, (2) typically + * comprises fair alternative. + * + * Ideally, we could consider all arbitrary splits of arguments into + * subgroups, but that could lead to unacceptable computational complexity. + * This is why we only consider two cases of above. + */ +static List * +make_bitmap_paths_for_or_group(PlannerInfo *root, RelOptInfo *rel, + RestrictInfo *ri, List *other_clauses) +{ + List *jointlist = NIL; + List *splitlist = NIL; + ListCell *lc; + List *orargs; + List *args = ((BoolExpr *) ri->orclause)->args; + Cost jointcost = 0.0, + splitcost = 0.0; + Path *bitmapqual; + List *indlist; + + /* + * First, try to match the whole group to the one index. + */ + orargs = list_make1(ri); + indlist = build_paths_for_OR(root, rel, + orargs, + other_clauses); + if (indlist != NIL) + { + bitmapqual = choose_bitmap_and(root, rel, indlist); + jointcost = bitmapqual->total_cost; + jointlist = list_make1(bitmapqual); + } + + /* + * If we manage to find a bitmap scan, which uses the group of OR-clause + * arguments as a whole, we can skip matching OR-clause arguments + * one-by-one as long as there are no other clauses, which can bring more + * efficiency to one-by-one case. + */ + if (jointlist != NIL && other_clauses == NIL) + return jointlist; + + /* + * Also try to match all containing clauses one-by-one. + */ + foreach(lc, args) + { + orargs = list_make1(lfirst(lc)); + + indlist = build_paths_for_OR(root, rel, + orargs, + other_clauses); + + if (indlist == NIL) + { + splitlist = NIL; + break; + } + + bitmapqual = choose_bitmap_and(root, rel, indlist); + splitcost += bitmapqual->total_cost; + splitlist = lappend(splitlist, bitmapqual); + } + + /* + * Pick the best option. + */ + if (splitlist == NIL) + return jointlist; + else if (jointlist == NIL) + return splitlist; + else + return (jointcost < splitcost) ? jointlist : splitlist; +} + + /* * generate_bitmap_or_paths * Look through the list of clauses to find OR clauses, and generate @@ -1240,6 +1617,8 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, List *pathlist; Path *bitmapqual; ListCell *j; + List *groupedArgs; + List *inner_other_clauses = NIL; /* Ignore RestrictInfos that aren't ORs */ if (!restriction_is_or_clause(rinfo)) @@ -1250,7 +1629,28 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, * the OR, else we can't use it. */ pathlist = NIL; - foreach(j, ((BoolExpr *) rinfo->orclause)->args) + + /* + * Group the similar OR-clause argument into dedicated RestrictInfos, + * because those RestrictInfos might match to the index as a whole. + */ + groupedArgs = group_similar_or_args(root, rel, rinfo); + + if (groupedArgs != ((BoolExpr *) rinfo->orclause)->args) + { + /* + * Some parts of the rinfo were grouped. In this case, we have a + * set of sub-rinfos that together are an exact duplicate of + * rinfo. Thus, we need to remove the rinfo from other clauses. + * match_clauses_to_index detects duplicated iclauses by comparing + * pointers to original rinfos that would be different. So, we + * must delete rinfo to avoid de-facto duplicated clauses in the + * index clauses list. + */ + inner_other_clauses = list_delete(list_copy(all_clauses), rinfo); + } + + foreach(j, groupedArgs) { Node *orarg = (Node *) lfirst(j); List *indlist; @@ -1270,12 +1670,34 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, andargs, all_clauses)); } + else if (restriction_is_or_clause(castNode(RestrictInfo, orarg))) + { + RestrictInfo *ri = castNode(RestrictInfo, orarg); + + /* + * Generate bitmap paths for the group of similar OR-clause + * arguments. + */ + indlist = make_bitmap_paths_for_or_group(root, + rel, ri, + inner_other_clauses); + + if (indlist == NIL) + { + pathlist = NIL; + break; + } + else + { + pathlist = list_concat(pathlist, indlist); + continue; + } + } else { RestrictInfo *ri = castNode(RestrictInfo, orarg); List *orargs; - Assert(!restriction_is_or_clause(ri)); orargs = list_make1(ri); indlist = build_paths_for_OR(root, rel, @@ -1301,6 +1723,9 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, pathlist = lappend(pathlist, bitmapqual); } + if (inner_other_clauses != NIL) + list_free(inner_other_clauses); + /* * If we have a match for every arm, then turn them into a * BitmapOrPath, and add to result list. @@ -2478,7 +2903,7 @@ match_opclause_to_indexcol(PlannerInfo *root, /* * Check for clauses of the form: (indexkey operator constant) or - * (constant operator indexkey). See match_clause_to_indexcol's notes + * (constant operator indexkey). See match_clause_to_indexcol()'s notes * about const-ness. * * Note that we don't ask the support function about clauses that don't @@ -3040,7 +3465,13 @@ match_orclause_to_indexcol(PlannerInfo *root, elems = (Datum *) palloc(sizeof(Datum) * list_length(consts)); foreach(lc, consts) - elems[i++] = ((Const *) lfirst(lc))->constvalue; + { + Const *value = (Const *) lfirst(lc); + + Assert(!value->constisnull && value->constvalue); + + elems[i++] = value->constvalue; + } arrayConst = construct_array(elems, i, consttype, typlen, typbyval, typalign); diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index c1fbbb6bfee..06777d19341 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -21,17 +21,6 @@ #include "optimizer/restrictinfo.h" -static RestrictInfo *make_restrictinfo_internal(PlannerInfo *root, - Expr *clause, - Expr *orclause, - bool is_pushed_down, - bool has_clone, - bool is_clone, - bool pseudoconstant, - Index security_level, - Relids required_relids, - Relids incompatible_relids, - Relids outer_relids); static Expr *make_sub_restrictinfos(PlannerInfo *root, Expr *clause, bool is_pushed_down, @@ -90,36 +79,38 @@ make_restrictinfo(PlannerInfo *root, /* Shouldn't be an AND clause, else AND/OR flattening messed up */ Assert(!is_andclause(clause)); - return make_restrictinfo_internal(root, - clause, - NULL, - is_pushed_down, - has_clone, - is_clone, - pseudoconstant, - security_level, - required_relids, - incompatible_relids, - outer_relids); + return make_plain_restrictinfo(root, + clause, + NULL, + is_pushed_down, + has_clone, + is_clone, + pseudoconstant, + security_level, + required_relids, + incompatible_relids, + outer_relids); } /* - * make_restrictinfo_internal + * make_plain_restrictinfo * - * Common code for the main entry points and the recursive cases. + * Common code for the main entry points and the recursive cases. Also, + * useful while contrucitng RestrictInfos above OR clause, which already has + * RestrictInfos above its subclauses. */ -static RestrictInfo * -make_restrictinfo_internal(PlannerInfo *root, - Expr *clause, - Expr *orclause, - bool is_pushed_down, - bool has_clone, - bool is_clone, - bool pseudoconstant, - Index security_level, - Relids required_relids, - Relids incompatible_relids, - Relids outer_relids) +RestrictInfo * +make_plain_restrictinfo(PlannerInfo *root, + Expr *clause, + Expr *orclause, + bool is_pushed_down, + bool has_clone, + bool is_clone, + bool pseudoconstant, + Index security_level, + Relids required_relids, + Relids incompatible_relids, + Relids outer_relids) { RestrictInfo *restrictinfo = makeNode(RestrictInfo); Relids baserels; @@ -296,17 +287,17 @@ make_sub_restrictinfos(PlannerInfo *root, NULL, incompatible_relids, outer_relids)); - return (Expr *) make_restrictinfo_internal(root, - clause, - make_orclause(orlist), - is_pushed_down, - has_clone, - is_clone, - pseudoconstant, - security_level, - required_relids, - incompatible_relids, - outer_relids); + return (Expr *) make_plain_restrictinfo(root, + clause, + make_orclause(orlist), + is_pushed_down, + has_clone, + is_clone, + pseudoconstant, + security_level, + required_relids, + incompatible_relids, + outer_relids); } else if (is_andclause(clause)) { @@ -328,17 +319,17 @@ make_sub_restrictinfos(PlannerInfo *root, return make_andclause(andlist); } else - return (Expr *) make_restrictinfo_internal(root, - clause, - NULL, - is_pushed_down, - has_clone, - is_clone, - pseudoconstant, - security_level, - required_relids, - incompatible_relids, - outer_relids); + return (Expr *) make_plain_restrictinfo(root, + clause, + NULL, + is_pushed_down, + has_clone, + is_clone, + pseudoconstant, + security_level, + required_relids, + incompatible_relids, + outer_relids); } /* diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h index 14cdce750cb..975b1aacc5a 100644 --- a/src/include/optimizer/restrictinfo.h +++ b/src/include/optimizer/restrictinfo.h @@ -22,6 +22,17 @@ make_restrictinfo(root, clause, true, false, false, false, 0, \ NULL, NULL, NULL) +extern RestrictInfo *make_plain_restrictinfo(PlannerInfo *root, + Expr *clause, + Expr *orclause, + bool is_pushed_down, + bool has_clone, + bool is_clone, + bool pseudoconstant, + Index security_level, + Relids required_relids, + Relids incompatible_relids, + Relids outer_relids); extern RestrictInfo *make_restrictinfo(PlannerInfo *root, Expr *clause, bool is_pushed_down, diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index abcc34677ca..8ccc0f3f390 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -1869,6 +1869,60 @@ SELECT * FROM tenk1 42 | 5530 | 0 | 2 | 2 | 2 | 42 | 42 | 42 | 42 | 42 | 84 | 85 | QBAAAA | SEIAAA | OOOOxx (1 row) +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous IS NULL); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------- + Bitmap Heap Scan on tenk1 + Recheck Cond: (((thousand = 42) AND (tenthous IS NULL)) OR ((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42)))) + Filter: ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42) OR (tenthous IS NULL)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: ((thousand = 42) AND (tenthous IS NULL)) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[]))) +(8 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous::int2 = 3::int8 OR tenthous = 42::int8); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Bitmap Heap Scan on tenk1 + Recheck Cond: (thousand = 42) + Filter: ((tenthous = '1'::smallint) OR ((tenthous)::smallint = '3'::bigint) OR (tenthous = '42'::bigint)) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 42) +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous::int2 = 3::int8 OR tenthous::int2 = 42::int8); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Bitmap Heap Scan on tenk1 + Recheck Cond: (thousand = 42) + Filter: ((tenthous = '1'::smallint) OR ((tenthous)::smallint = '3'::bigint) OR ((tenthous)::smallint = '42'::bigint)) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 42) +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous = 3::int8 OR tenthous = 42::int8); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------- + Bitmap Heap Scan on tenk1 + Recheck Cond: (((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))) OR ((thousand = 42) AND (tenthous = '1'::smallint))) + Filter: ((tenthous = '1'::smallint) OR (tenthous = '3'::bigint) OR (tenthous = '42'::bigint)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[]))) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: ((thousand = 42) AND (tenthous = '1'::smallint)) +(8 rows) + EXPLAIN (COSTS OFF) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); @@ -1997,25 +2051,24 @@ SELECT count(*) FROM tenk1 EXPLAIN (COSTS OFF) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- Aggregate -> Bitmap Heap Scan on tenk1 - Recheck Cond: (((hundred = 42) AND ((thousand = 42) OR (thousand = 99) OR (tenthous < 2))) OR (thousand = 41)) + Recheck Cond: (((hundred = 42) AND (((thousand = 42) OR (thousand = 99)) OR (tenthous < 2))) OR (thousand = 41)) + Filter: (((hundred = 42) AND ((thousand = 42) OR (thousand = 99) OR (tenthous < 2))) OR (thousand = 41)) -> BitmapOr -> BitmapAnd -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 42) -> BitmapOr -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = 42) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = 99) + Index Cond: (thousand = ANY ('{42,99}'::integer[])) -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: (tenthous < 2) -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: (thousand = 41) -(16 rows) +(15 rows) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 99 OR tenthous < 2) OR thousand = 41; @@ -2027,22 +2080,21 @@ SELECT count(*) FROM tenk1 EXPLAIN (COSTS OFF) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2); - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- Aggregate -> Bitmap Heap Scan on tenk1 - Recheck Cond: ((hundred = 42) AND ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2)))) + Recheck Cond: ((hundred = 42) AND (((thousand = 99) AND (tenthous = 2)) OR ((thousand = 42) OR (thousand = 41)))) + Filter: ((thousand = 42) OR (thousand = 41) OR ((thousand = 99) AND (tenthous = 2))) -> BitmapAnd -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 42) -> BitmapOr - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = 42) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = 41) -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: ((thousand = 99) AND (tenthous = 2)) -(13 rows) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = ANY ('{42,41}'::integer[])) +(12 rows) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 41 OR thousand = 99 AND tenthous = 2); @@ -3000,6 +3052,49 @@ SELECT b.relname, (2 rows) DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before; +-- Check bitmap scan can consider similar OR arguments separately without +-- grouping them into SAOP. +CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL); +INSERT INTO bitmap_split_or (SELECT 1, 1, i FROM generate_series(1, 1000) i); +INSERT INTO bitmap_split_or (select i, 2, 2 FROM generate_series(1, 1000) i); +VACUUM ANALYZE bitmap_split_or; +CREATE INDEX t_b_partial_1_idx ON bitmap_split_or (b) WHERE a = 1; +CREATE INDEX t_b_partial_2_idx ON bitmap_split_or (b) WHERE a = 2; +EXPLAIN (COSTS OFF) +SELECT * FROM bitmap_split_or WHERE (a = 1 OR a = 2) AND b = 2; + QUERY PLAN +------------------------------------------------------------------ + Bitmap Heap Scan on bitmap_split_or + Recheck Cond: (((b = 2) AND (a = 1)) OR ((b = 2) AND (a = 2))) + -> BitmapOr + -> Bitmap Index Scan on t_b_partial_1_idx + Index Cond: (b = 2) + -> Bitmap Index Scan on t_b_partial_2_idx + Index Cond: (b = 2) +(7 rows) + +DROP INDEX t_b_partial_1_idx; +DROP INDEX t_b_partial_2_idx; +CREATE INDEX t_a_b_idx ON bitmap_split_or (a, b); +CREATE INDEX t_b_c_idx ON bitmap_split_or (b, c); +CREATE STATISTICS t_a_b_stat (mcv) ON a, b FROM bitmap_split_or; +CREATE STATISTICS t_b_c_stat (mcv) ON b, c FROM bitmap_split_or; +ANALYZE bitmap_split_or; +EXPLAIN (COSTS OFF) +SELECT * FROM bitmap_split_or WHERE a = 1 AND (b = 1 OR b = 2) AND c = 2; + QUERY PLAN +------------------------------------------------------------------ + Bitmap Heap Scan on bitmap_split_or + Recheck Cond: (((b = 1) AND (c = 2)) OR ((a = 1) AND (b = 2))) + Filter: ((a = 1) AND (c = 2)) + -> BitmapOr + -> Bitmap Index Scan on t_b_c_idx + Index Cond: ((b = 1) AND (c = 2)) + -> Bitmap Index Scan on t_a_b_idx + Index Cond: ((a = 1) AND (b = 2)) +(8 rows) + +DROP TABLE bitmap_split_or; -- -- REINDEX SCHEMA -- diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 4422b0610e5..87b3750dd2f 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4226,20 +4226,20 @@ select * from tenk1 a join tenk1 b on Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4))) -> Bitmap Heap Scan on tenk1 b - Recheck Cond: ((unique1 = 2) OR (hundred = 4)) + Recheck Cond: ((hundred = 4) OR (unique1 = 2)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 2) -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 4) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 2) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) + Recheck Cond: ((unique2 = 3) OR (unique1 = 1)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 Index Cond: (unique2 = 3) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) (17 rows) explain (costs off) @@ -4253,12 +4253,12 @@ select * from tenk1 a join tenk1 b on Filter: ((unique1 = 2) OR (ten = 4)) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) + Recheck Cond: ((unique2 = 3) OR (unique1 = 1)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 Index Cond: (unique2 = 3) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) (12 rows) explain (costs off) @@ -4270,21 +4270,21 @@ select * from tenk1 a join tenk1 b on Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4))) -> Bitmap Heap Scan on tenk1 b - Recheck Cond: ((unique1 = 2) OR (hundred = 4)) + Recheck Cond: ((hundred = 4) OR (unique1 = 2)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 2) -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 4) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 2) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7))) + Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1)) Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 Index Cond: (unique2 = ANY ('{3,7}'::integer[])) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) (18 rows) explain (costs off) @@ -4296,21 +4296,21 @@ select * from tenk1 a join tenk1 b on Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4))) -> Bitmap Heap Scan on tenk1 b - Recheck Cond: ((unique1 = 2) OR (hundred = 4)) + Recheck Cond: ((hundred = 4) OR (unique1 = 2)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 2) -> Bitmap Index Scan on tenk1_hundred Index Cond: (hundred = 4) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 2) -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7))) + Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR (unique1 = 1)) Filter: ((unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 Index Cond: (unique2 = ANY ('{3,7}'::integer[])) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) (18 rows) explain (costs off) @@ -4324,18 +4324,16 @@ select * from tenk1 a join tenk1 b on -> Seq Scan on tenk1 b -> Materialize -> Bitmap Heap Scan on tenk1 a - Recheck Cond: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR ((unique2 = 3) OR (unique2 = 7))) + Recheck Cond: (((unique2 = 3) OR (unique2 = 7)) OR ((unique1 = 3) OR (unique1 = 1)) OR (unique1 < 20)) Filter: ((unique1 < 20) OR (unique1 = 3) OR (unique1 = 1) OR (unique2 = 3) OR (unique2 = 7)) -> BitmapOr - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 < 20) - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 3) - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = 1) -> Bitmap Index Scan on tenk1_unique2 Index Cond: (unique2 = ANY ('{3,7}'::integer[])) -(16 rows) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = ANY ('{3,1}'::integer[])) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 < 20) +(14 rows) -- -- test placement of movable quals in a parameterized join tree diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index a9eaf14d702..017f5c15d48 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -737,6 +737,23 @@ SELECT * FROM tenk1 SELECT * FROM tenk1 WHERE thousand = 42 AND (tenthous = 1 OR tenthous = (SELECT 1 + 2) OR tenthous = 42); +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous IS NULL); + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous::int2 = 3::int8 OR tenthous = 42::int8); + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous::int2 = 3::int8 OR tenthous::int2 = 42::int8); + + +EXPLAIN (COSTS OFF) +SELECT * FROM tenk1 + WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous = 3::int8 OR tenthous = 42::int8); + EXPLAIN (COSTS OFF) SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); @@ -1269,6 +1286,27 @@ SELECT b.relname, ORDER BY 1; DROP TABLE concur_temp_tab_1, concur_temp_tab_2, reindex_temp_before; +-- Check bitmap scan can consider similar OR arguments separately without +-- grouping them into SAOP. +CREATE TABLE bitmap_split_or (a int NOT NULL, b int NOT NULL, c int NOT NULL); +INSERT INTO bitmap_split_or (SELECT 1, 1, i FROM generate_series(1, 1000) i); +INSERT INTO bitmap_split_or (select i, 2, 2 FROM generate_series(1, 1000) i); +VACUUM ANALYZE bitmap_split_or; +CREATE INDEX t_b_partial_1_idx ON bitmap_split_or (b) WHERE a = 1; +CREATE INDEX t_b_partial_2_idx ON bitmap_split_or (b) WHERE a = 2; +EXPLAIN (COSTS OFF) +SELECT * FROM bitmap_split_or WHERE (a = 1 OR a = 2) AND b = 2; +DROP INDEX t_b_partial_1_idx; +DROP INDEX t_b_partial_2_idx; +CREATE INDEX t_a_b_idx ON bitmap_split_or (a, b); +CREATE INDEX t_b_c_idx ON bitmap_split_or (b, c); +CREATE STATISTICS t_a_b_stat (mcv) ON a, b FROM bitmap_split_or; +CREATE STATISTICS t_b_c_stat (mcv) ON b, c FROM bitmap_split_or; +ANALYZE bitmap_split_or; +EXPLAIN (COSTS OFF) +SELECT * FROM bitmap_split_or WHERE a = 1 AND (b = 1 OR b = 2) AND c = 2; +DROP TABLE bitmap_split_or; + -- -- REINDEX SCHEMA -- diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 7773d7f875f..3faaa17168e 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1681,6 +1681,7 @@ OprCacheKey OprInfo OprProofCacheEntry OprProofCacheKey +OrArgIndexMatch OuterJoinClauseInfo OutputPluginCallbacks OutputPluginOptions From b85ac8140ce66fa9bbf80103d7e8ba31533c0b2b Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 25 Nov 2024 09:05:26 +0200 Subject: [PATCH 047/102] Remove the wrong assertion from match_orclause_to_indexcol() Obviously, the constant could be zero. Also, add the relevant check to regression tests. Reported-by: Richard Guo Discussion: https://postgr.es/m/CAMbWs4-siKJdtWhcbqk4Y-xG12do2Ckm1qw672GNsSnDqL9FQg%40mail.gmail.com --- src/backend/optimizer/path/indxpath.c | 3 +-- src/test/regress/expected/create_index.out | 10 +++++----- src/test/regress/sql/create_index.sql | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 73b875e66ea..37f6e543855 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -3468,8 +3468,7 @@ match_orclause_to_indexcol(PlannerInfo *root, { Const *value = (Const *) lfirst(lc); - Assert(!value->constisnull && value->constvalue); - + Assert(!value->constisnull); elems[i++] = value->constvalue; } diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 8ccc0f3f390..f70bb92c15f 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -1837,15 +1837,15 @@ DROP TABLE onek_with_null; -- EXPLAIN (COSTS OFF) SELECT * FROM tenk1 - WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); - QUERY PLAN ------------------------------------------------------------------------------- + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous = 0); + QUERY PLAN +-------------------------------------------------------------------------------- Index Scan using tenk1_thous_tenthous on tenk1 - Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[]))) + Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42,0}'::integer[]))) (2 rows) SELECT * FROM tenk1 - WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous = 0); unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 ---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+--------- 42 | 5530 | 0 | 2 | 2 | 2 | 42 | 42 | 42 | 42 | 42 | 84 | 85 | QBAAAA | SEIAAA | OOOOxx diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 017f5c15d48..323c57ed896 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -727,9 +727,9 @@ DROP TABLE onek_with_null; EXPLAIN (COSTS OFF) SELECT * FROM tenk1 - WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous = 0); SELECT * FROM tenk1 - WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous = 0); EXPLAIN (COSTS OFF) SELECT * FROM tenk1 From b8027eff502fa287e551854a30afd136d2410609 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Fri, 29 Nov 2024 01:46:43 +0200 Subject: [PATCH 048/102] Skip not SOAP-supported indexes while transforming an OR clause into SAOP There is no point in transforming OR-clauses into SAOP's if the target index doesn't support SAOP scans anyway. This commit adds corresponding checks to match_orclause_to_indexcol() and group_similar_or_args(). The first check fixes the actual bug, while the second just saves some cycles. Reported-by: Alexander Lakhin Discussion: https://postgr.es/m/8174de69-9e1a-0827-0e81-ef97f56a5939%40gmail.com Author: Alena Rybakina Reviewed-by: Ranier Vilela, Alexander Korotkov --- src/backend/optimizer/path/indxpath.c | 11 +++++++++-- src/test/regress/expected/create_index.out | 18 ++++++++++++++++++ src/test/regress/sql/create_index.sql | 6 ++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 37f6e543855..98e56ab21a1 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1391,8 +1391,11 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo) { IndexOptInfo *index = (IndexOptInfo *) lfirst(lc2); - /* Ignore index if it doesn't support bitmap scans */ - if (!index->amhasgetbitmap) + /* + * Ignore index if it doesn't support bitmap scans or SAOP + * clauses. + */ + if (!index->amhasgetbitmap || !index->amsearcharray) continue; for (colnum = 0; colnum < index->nkeycolumns; colnum++) @@ -3283,6 +3286,10 @@ match_orclause_to_indexcol(PlannerInfo *root, Assert(IsA(orclause, BoolExpr)); Assert(orclause->boolop == OR_EXPR); + /* Ignore index if it doesn't support SAOP clauses */ + if(!index->amsearcharray) + return NULL; + /* * Try to convert a list of OR-clauses to a single SAOP expression. Each * OR entry must be in the form: (indexkey operator constant) or (constant diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index f70bb92c15f..3e11459b91a 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -1233,6 +1233,24 @@ SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA'; 14 (1 row) +-- OR-clauses shouldn't be transformed into SAOP because hash indexes don't +-- support SAOP scans. +SET enable_seqscan = off; +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM tenk1 WHERE stringu1 = 'TVAAAA' OR stringu1 = 'TVAAAB'; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Bitmap Heap Scan on tenk1 + Recheck Cond: ((stringu1 = 'TVAAAA'::name) OR (stringu1 = 'TVAAAB'::name)) + -> BitmapOr + -> Bitmap Index Scan on hash_tuplesort_idx + Index Cond: (stringu1 = 'TVAAAA'::name) + -> Bitmap Index Scan on hash_tuplesort_idx + Index Cond: (stringu1 = 'TVAAAB'::name) +(8 rows) + +RESET enable_seqscan; DROP INDEX hash_tuplesort_idx; RESET maintenance_work_mem; -- diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 323c57ed896..6f0da3679b8 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -372,6 +372,12 @@ CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fi EXPLAIN (COSTS OFF) SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA'; SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA'; +-- OR-clauses shouldn't be transformed into SAOP because hash indexes don't +-- support SAOP scans. +SET enable_seqscan = off; +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM tenk1 WHERE stringu1 = 'TVAAAA' OR stringu1 = 'TVAAAB'; +RESET enable_seqscan; DROP INDEX hash_tuplesort_idx; RESET maintenance_work_mem; From e0e673bfff3a17c56e9d63beffd8c60dd4ec28fc Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 5 Feb 2025 13:07:34 +0200 Subject: [PATCH 049/102] Update SnapshotData: split row-level and page-level undo locations Also, get xmin out of RetainUndoLocationPHNode. --- src/backend/utils/time/snapmgr.c | 15 ++++++--------- src/include/utils/snapshot.h | 4 ++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index b909f311acc..080cbe4da38 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -197,10 +197,9 @@ typedef struct SerializedSnapshotData TimestampTz whenTaken; XLogRecPtr lsn; CSNSnapshotData csnSnapshotData; - uint64 undoRegularLocation; - uint64 undoRegularXmin; + uint64 undoRegularRowLocation; + uint64 undoRegularPageLocation; uint64 undoSystemLocation; - uint64 undoSystemXmin; } SerializedSnapshotData; Size @@ -2198,9 +2197,8 @@ SerializeSnapshot(Snapshot snapshot, char *start_address) serialized_snapshot.csnSnapshotData.xmin = snapshot->csnSnapshotData.xmin; serialized_snapshot.csnSnapshotData.snapshotcsn = snapshot->csnSnapshotData.snapshotcsn; serialized_snapshot.csnSnapshotData.xlogptr = snapshot->csnSnapshotData.xlogptr; - serialized_snapshot.undoRegularXmin = snapshot->undoRegularLocationPhNode.xmin; - serialized_snapshot.undoRegularLocation = snapshot->undoRegularLocationPhNode.undoLocation; - serialized_snapshot.undoSystemXmin = snapshot->undoSystemLocationPhNode.xmin; + serialized_snapshot.undoRegularRowLocation = snapshot->undoRegularRowLocationPhNode.undoLocation; + serialized_snapshot.undoRegularPageLocation = snapshot->undoRegularPageLocationPhNode.undoLocation; serialized_snapshot.undoSystemLocation = snapshot->undoSystemLocationPhNode.undoLocation; /* @@ -2280,9 +2278,8 @@ RestoreSnapshot(char *start_address) snapshot->csnSnapshotData.xmin = serialized_snapshot.csnSnapshotData.xmin; snapshot->csnSnapshotData.snapshotcsn = serialized_snapshot.csnSnapshotData.snapshotcsn; snapshot->csnSnapshotData.xlogptr = serialized_snapshot.csnSnapshotData.xlogptr; - snapshot->undoRegularLocationPhNode.xmin = serialized_snapshot.undoRegularXmin; - snapshot->undoRegularLocationPhNode.undoLocation = serialized_snapshot.undoRegularLocation; - snapshot->undoSystemLocationPhNode.xmin = serialized_snapshot.undoSystemXmin; + snapshot->undoRegularRowLocationPhNode.undoLocation = serialized_snapshot.undoRegularRowLocation; + snapshot->undoRegularPageLocationPhNode.undoLocation = serialized_snapshot.undoRegularPageLocation; snapshot->undoSystemLocationPhNode.undoLocation = serialized_snapshot.undoSystemLocation; /* Copy XIDs, if present. */ diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index 01093a33315..e645b9c89e9 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -125,7 +125,6 @@ typedef struct SnapshotData *Snapshot; typedef struct { uint64 undoLocation; /* undo log location retained by this snapshot */ - uint64 xmin; pairingheap_node ph_node; } RetainUndoLocationPHNode; @@ -229,7 +228,8 @@ typedef struct SnapshotData */ uint64 snapXactCompletionCount; - RetainUndoLocationPHNode undoRegularLocationPhNode; + RetainUndoLocationPHNode undoRegularRowLocationPhNode; + RetainUndoLocationPHNode undoRegularPageLocationPhNode; RetainUndoLocationPHNode undoSystemLocationPhNode; CSNSnapshotData csnSnapshotData; } SnapshotData; From 748d9035da7319287c0b33117228008049e62461 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Thu, 3 Apr 2025 19:44:44 +0200 Subject: [PATCH 050/102] Added hint bits horizon --- src/backend/access/heap/heapam_visibility.c | 12 ++++++++++++ src/include/access/heapam.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index a7160013419..47209156f7e 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -80,6 +80,7 @@ #include "utils/combocid.h" #include "utils/snapmgr.h" +static TransactionId hint_bit_horizon = InvalidTransactionId; /* * SetHintBits() @@ -129,6 +130,11 @@ SetHintBits(HeapTupleHeader tuple, Buffer buffer, } } + if (TransactionIdIsValid(hint_bit_horizon) && + TransactionIdIsValid(xid) && + TransactionIdFollows(xid, hint_bit_horizon)) + return; + tuple->t_infomask |= infomask; MarkBufferDirtyHint(buffer, true); } @@ -1788,3 +1794,9 @@ HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer) return false; /* keep compiler quiet */ } + +void +SetHintBitsHorizon(TransactionId new_horizon) +{ + hint_bit_horizon = new_horizon; +} diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 864f3b3f8d9..0802952e57a 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -328,6 +328,8 @@ extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple); extern bool HeapTupleIsSurelyDead(HeapTuple htup, struct GlobalVisState *vistest); +extern void SetHintBitsHorizon(TransactionId new_horizon); + /* * To avoid leaking too much knowledge about reorderbuffer implementation * details this is implemented in reorderbuffer.c not heapam_visibility.c From d7ac5d70d660c2fe24358ac3f6e4ff8722e4805a Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Mon, 28 Apr 2025 14:23:20 +0200 Subject: [PATCH 051/102] Use Ubuntu 24.04 image Github Actions deprecated usage of Ubuntu 20.04 --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c6f1bef64aa..f22a5232baf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,14 +7,14 @@ on: jobs: test: runs-on: - - ubuntu-20.04 + - ubuntu-24.04 strategy: fail-fast: false matrix: compiler: [clang, gcc] check_type: [normal, debug] env: - LLVM_VER: 10 + LLVM_VER: 19 COMPILER: ${{ matrix.compiler }} CHECK_TYPE: ${{ matrix.check_type }} steps: From 934f77d2dc7542fa974114ca196395c7b15bba0d Mon Sep 17 00:00:00 2001 From: Egor Ivkov Date: Tue, 15 Apr 2025 19:47:27 +0300 Subject: [PATCH 052/102] Regress check with orioledb storage engine (cherry picked from commit 427ce8b31714d2f9fbb576455b769e5e355a7cc5) --- patches/test_setup_enable_oriole.diff | 27 +++++ src/test/regress/GNUmakefile | 4 + src/test/regress/parallel_schedule_oriole | 129 ++++++++++++++++++++++ src/test/regress/pg_regress.c | 4 +- 4 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 patches/test_setup_enable_oriole.diff create mode 100644 src/test/regress/parallel_schedule_oriole diff --git a/patches/test_setup_enable_oriole.diff b/patches/test_setup_enable_oriole.diff new file mode 100644 index 00000000000..302f616810a --- /dev/null +++ b/patches/test_setup_enable_oriole.diff @@ -0,0 +1,27 @@ +diff --git a/src/test/regress/expected/test_setup.out b/src/test/regress/expected/test_setup.out +index 3d0eeec996..70ce94e21a 100644 +--- a/src/test/regress/expected/test_setup.out ++++ b/src/test/regress/expected/test_setup.out +@@ -21,6 +21,8 @@ GRANT ALL ON SCHEMA public TO public; + -- Create a tablespace we can use in tests. + SET allow_in_place_tablespaces = true; + CREATE TABLESPACE regress_tblspace LOCATION ''; ++-- Enable orioledb extension ++CREATE EXTENSION orioledb; + -- + -- These tables have traditionally been referenced by many tests, + -- so create and populate them. Insert only non-error values here. +diff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql +index 06b0e2121f..867ad6a2df 100644 +--- a/src/test/regress/sql/test_setup.sql ++++ b/src/test/regress/sql/test_setup.sql +@@ -27,6 +27,9 @@ GRANT ALL ON SCHEMA public TO public; + SET allow_in_place_tablespaces = true; + CREATE TABLESPACE regress_tblspace LOCATION ''; + ++-- Enable orioledb extension ++CREATE EXTENSION orioledb; ++ + -- + -- These tables have traditionally been referenced by many tests, + -- so create and populate them. Insert only non-error values here. diff --git a/src/test/regress/GNUmakefile b/src/test/regress/GNUmakefile index 38c3a1f85b7..1a9a30cd7af 100644 --- a/src/test/regress/GNUmakefile +++ b/src/test/regress/GNUmakefile @@ -123,6 +123,10 @@ check-tests: all | temp-install installcheck: all $(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule --max-connections=1 $(EXTRA_TESTS) +# Run tests that work with oriole +installcheck-oriole: all + $(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule_oriole --max-connections=1 $(EXTRA_TESTS) + installcheck-parallel: all $(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule $(MAXCONNOPT) $(EXTRA_TESTS) diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole new file mode 100644 index 00000000000..dfd35775913 --- /dev/null +++ b/src/test/regress/parallel_schedule_oriole @@ -0,0 +1,129 @@ +# ---------- +# src/test/regress/parallel_schedule +# +# Most test scripts can be run after running just test_setup and possibly +# create_index. Exceptions to this rule are documented below. +# +# By convention, we put no more than twenty tests in any one parallel group; +# this limits the number of connections needed to run the tests. +# ---------- + +# required setup steps +test: test_setup + +# ---------- +# The first group of parallel tests +# ---------- +test: boolean char name varchar text int2 int4 int8 oid float4 float8 bit numeric txid uuid enum money pg_lsn regproc + +# ---------- +# The second group of parallel tests +# multirangetypes depends on rangetypes +# multirangetypes shouldn't run concurrently with type_sanity +# ---------- +test: md5 numerology lseg line path circle date time timetz timestamp timestamptz interval inet macaddr macaddr8 + +# ---------- +# Another group of parallel tests +# geometry depends on point, lseg, line, box, path, polygon, circle +# horology depends on date, time, timetz, timestamp, timestamptz, interval +# ---------- +test: horology tstypes regex comments expressions unicode xid mvcc + +# ---------- +# Load huge amounts of data +# We should split the data files into single files and then +# execute two copy tests in parallel, to check that copy itself +# is concurrent safe. +# ---------- +test: copy copyselect copydml + +# ---------- +# More groups of parallel tests +# Note: many of the tests in later groups depend on create_index +# ---------- +test: create_function_c create_misc create_operator create_procedure create_type create_schema create_view + +# ---------- +# Another group of parallel tests +# ---------- +test: create_aggregate create_function_sql create_cast typed_table drop_if_exists + +# ---------- +# sanity_check does a vacuum, affecting the sort order of SELECT * +# results. So it should not run parallel to other tests. +# ---------- + +# ---------- +# Another group of parallel tests +# aggregates depends on create_aggregate +# join depends on create_misc +# ---------- +test: select_into select_implicit select_having random delete + +# ---------- +# Another group of parallel tests +# ---------- +test: init_privs security_label collate object_address drop_operator password + +# ---------- +# Additional BRIN tests +# ---------- + +# ---------- +# Another group of parallel tests +# psql depends on create_am +# amutils depends on geometry, create_index_spgist, hash_index, brin +# ---------- +test: alter_generic alter_operator misc async dbsize sysviews tsrf create_role + +# collate.linux.utf8 and collate.icu.utf8 tests cannot be run in parallel with each other + +# ---------- +# Run these alone so they don't run out of parallel workers +# select_parallel depends on create_misc +# ---------- + +# no relation related tests can be put in this group +test: subscription + +# ---------- +# Another group of parallel tests +# select_views depends on create_view +# ---------- +test: select_views portals_p2 dependency guc bitmapops xmlmap functional_deps advisory_lock + +# ---------- +# Another group of parallel tests (JSON related) +# ---------- +test: json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson sqljson_queryfuncs sqljson_jsontable + +# ---------- +# Another group of parallel tests +# with depends on create_misc +# NB: temp.sql does a reconnect which transiently uses 2 connections, +# so keep this parallel group to at most 19 tests +# ---------- +test: plancache rangefuncs prepare sequence polymorphism largeobject xml + +# ---------- +# Another group of parallel tests +# +# The stats test resets stats, so nothing else needing stats access can be in +# this group. +# ---------- +test: hash_part predicate + +# event_trigger depends on create_am and cannot run concurrently with +# any test that runs DDL +# oidjoins is read-only, though, and should run late for best coverage +test: oidjoins + +# event_trigger_login cannot run concurrently with any other tests because +# on-login event handling could catch connection of a concurrent test. +test: event_trigger_login + +# this test also uses event triggers, so likewise run it by itself + +# run tablespace test at the end because it drops the tablespace created during +# setup that other tests may use. diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c index e3a0267d5e0..b105fca8450 100644 --- a/src/test/regress/pg_regress.c +++ b/src/test/regress/pg_regress.c @@ -61,8 +61,8 @@ static char *shellprog = SHELLPROG; * Windows-style newlines, but the comparison files might or might not. */ #ifndef WIN32 -const char *basic_diff_opts = ""; -const char *pretty_diff_opts = "-U3"; +const char *basic_diff_opts = "-I \"NOTICE\" -I \"DETAIL\" -I \"WARNING\""; +const char *pretty_diff_opts = "-I \"NOTICE\" -I \"DETAIL\" -I \"WARNING\" -U3"; #else const char *basic_diff_opts = "-w"; const char *pretty_diff_opts = "-w -U3"; From daf861df36a61fa3a219e3b2e845634689c6e10b Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 28 Apr 2025 17:51:50 +0400 Subject: [PATCH 053/102] Remove tests non-existing in PG16 from parallel_schedule_oriole --- src/test/regress/parallel_schedule_oriole | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole index dfd35775913..cc66f1cd8c6 100644 --- a/src/test/regress/parallel_schedule_oriole +++ b/src/test/regress/parallel_schedule_oriole @@ -96,7 +96,7 @@ test: select_views portals_p2 dependency guc bitmapops xmlmap functional_deps ad # ---------- # Another group of parallel tests (JSON related) # ---------- -test: json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson sqljson_queryfuncs sqljson_jsontable +test: json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson # ---------- # Another group of parallel tests @@ -112,17 +112,13 @@ test: plancache rangefuncs prepare sequence polymorphism largeobject xml # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: hash_part predicate +test: hash_part # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL # oidjoins is read-only, though, and should run late for best coverage test: oidjoins -# event_trigger_login cannot run concurrently with any other tests because -# on-login event handling could catch connection of a concurrent test. -test: event_trigger_login - # this test also uses event triggers, so likewise run it by itself # run tablespace test at the end because it drops the tablespace created during From 326f195bf55a3f67f371c8c0959ad0bff40a3160 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 4 Apr 2025 16:52:43 +0400 Subject: [PATCH 054/102] Add wait event for rewind worker --- src/backend/utils/activity/wait_event.c | 3 +++ src/include/utils/wait_event.h | 1 + 2 files changed, 4 insertions(+) diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c index 05b71f9bc2a..d5e720e6c1d 100644 --- a/src/backend/utils/activity/wait_event.c +++ b/src/backend/utils/activity/wait_event.c @@ -221,6 +221,9 @@ pgstat_get_wait_activity(WaitEventActivity w) case WAIT_EVENT_BGWRITER_MAIN: event_name = "BgWriterMain"; break; + case WAIT_EVENT_REWIND_WORKER_MAIN: + event_name = "RewindWorkerMain"; + break; case WAIT_EVENT_CHECKPOINTER_MAIN: event_name = "CheckpointerMain"; break; diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index 2adc5df2e79..acf51475855 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -39,6 +39,7 @@ typedef enum WAIT_EVENT_AUTOVACUUM_MAIN, WAIT_EVENT_BGWRITER_HIBERNATE, WAIT_EVENT_BGWRITER_MAIN, + WAIT_EVENT_REWIND_WORKER_MAIN, WAIT_EVENT_CHECKPOINTER_MAIN, WAIT_EVENT_LOGICAL_APPLY_MAIN, WAIT_EVENT_LOGICAL_LAUNCHER_MAIN, From 835699d0d1f9d4645fb12b72f871742ccacc20e9 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 21 May 2025 03:12:58 +0300 Subject: [PATCH 055/102] Add VacuumHorizonHook instead of just hint bits horizon --- src/backend/access/heap/heapam_visibility.c | 20 +++++++++----------- src/backend/commands/vacuum.c | 16 ++++++++++++++++ src/include/access/heapam.h | 6 ++++-- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index 47209156f7e..74e1266195b 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -80,7 +80,7 @@ #include "utils/combocid.h" #include "utils/snapmgr.h" -static TransactionId hint_bit_horizon = InvalidTransactionId; +VacuumHorizonHookType VacuumHorizonHook = NULL; /* * SetHintBits() @@ -130,10 +130,14 @@ SetHintBits(HeapTupleHeader tuple, Buffer buffer, } } - if (TransactionIdIsValid(hint_bit_horizon) && - TransactionIdIsValid(xid) && - TransactionIdFollows(xid, hint_bit_horizon)) - return; + if (TransactionIdIsValid(xid) && VacuumHorizonHook) + { + TransactionId horizon = VacuumHorizonHook(); + + if (TransactionIdIsValid(horizon) && + TransactionIdFollows(xid, horizon)) + return; + } tuple->t_infomask |= infomask; MarkBufferDirtyHint(buffer, true); @@ -1794,9 +1798,3 @@ HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer) return false; /* keep compiler quiet */ } - -void -SetHintBitsHorizon(TransactionId new_horizon) -{ - hint_bit_horizon = new_horizon; -} diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 9ec511979e6..1713757d5e7 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1119,6 +1119,14 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params, * any time, and that each vacuum is always an independent transaction. */ cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel); + if (VacuumHorizonHook) + { + TransactionId horizon = VacuumHorizonHook(); + + if (TransactionIdIsValid(horizon) && + TransactionIdFollows(cutoffs->OldestXmin, horizon)) + cutoffs->OldestXmin = horizon; + } if (OldSnapshotThresholdActive()) { @@ -1635,6 +1643,14 @@ vac_update_datfrozenxid(void) * cannot produce a wrong minimum by starting with this. */ newFrozenXid = GetOldestNonRemovableTransactionId(NULL); + if (VacuumHorizonHook) + { + TransactionId horizon = VacuumHorizonHook(); + + if (TransactionIdIsValid(horizon) && + TransactionIdFollows(newFrozenXid, horizon)) + newFrozenXid = horizon; + } /* * Similarly, initialize the MultiXact "min" with the value that would be diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 0802952e57a..cf881f67fce 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -314,6 +314,10 @@ extern void heap_vacuum_rel(Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy); /* in heap/heapam_visibility.c */ +typedef TransactionId (*VacuumHorizonHookType) (void); + +extern VacuumHorizonHookType VacuumHorizonHook; + extern bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer); extern TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, @@ -328,8 +332,6 @@ extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple); extern bool HeapTupleIsSurelyDead(HeapTuple htup, struct GlobalVisState *vistest); -extern void SetHintBitsHorizon(TransactionId new_horizon); - /* * To avoid leaking too much knowledge about reorderbuffer implementation * details this is implemented in reorderbuffer.c not heapam_visibility.c From 0134238a11e5c750158bccad05c12f87cc5d3013 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Thu, 22 May 2025 17:19:03 +0400 Subject: [PATCH 056/102] Modify CountOtherDBBackends/TerminateOtherDBBackends to process whole cluster by providing invalid databaseId For bgworkers CountOtherDBBackends doesn't add it to nbackends, but check if they are prepared transactions to add to nprepared --- src/backend/storage/ipc/procarray.c | 37 +++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 81acd267799..a359afbce9a 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -3703,6 +3703,8 @@ CountUserBackends(Oid roleid) * backend startup. The caller should normally hold an exclusive lock on the * target DB before calling this, which is one reason we mustn't wait * indefinitely. + * + * If databaseId is InvalidOid, count all non-bgworker backends in a cluster. */ bool CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) @@ -3732,8 +3734,22 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) PGPROC *proc = &allProcs[pgprocno]; uint8 statusFlags = ProcGlobal->statusFlags[index]; - if (proc->databaseId != databaseId) - continue; + if (databaseId != InvalidOid) + { + if (proc->databaseId != databaseId) + continue; + } + else + { + if (proc->isBackgroundWorker) + { + if (proc->pid == 0) + (*nprepared)++; + + continue; /* do not count background workers */ + } + } + if (proc == MyProc) continue; @@ -3781,6 +3797,8 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) * * If the target database has a prepared transaction or permissions checks * fail for a connection, this fails without terminating anything. + * + * If databaseId is InvalidOid, terminate all backends in a cluster. */ void TerminateOtherDBBackends(Oid databaseId) @@ -3797,7 +3815,7 @@ TerminateOtherDBBackends(Oid databaseId) int pgprocno = arrayP->pgprocnos[i]; PGPROC *proc = &allProcs[pgprocno]; - if (proc->databaseId != databaseId) + if (databaseId != InvalidOid && proc->databaseId != databaseId) continue; if (proc == MyProc) continue; @@ -3811,7 +3829,9 @@ TerminateOtherDBBackends(Oid databaseId) LWLockRelease(ProcArrayLock); if (nprepared > 0) - ereport(ERROR, + { + if (databaseId != InvalidOid) + ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("database \"%s\" is being used by prepared transactions", get_database_name(databaseId)), @@ -3819,6 +3839,15 @@ TerminateOtherDBBackends(Oid databaseId) "There are %d prepared transactions using the database.", nprepared, nprepared))); + else + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("cluster is being used by prepared transactions"), + errdetail_plural("There is %d prepared transaction using the cluster.", + "There are %d prepared transactions using the cluster.", + nprepared, + nprepared))); + } if (pids) { From 9d36d97b24711cee288d21936ba080445f9afda3 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 26 May 2025 20:51:29 +0400 Subject: [PATCH 057/102] Disable assert in clog This is required to have rewind functionality on heap tables --- src/backend/access/transam/clog.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 4a431d58767..29663470dac 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -591,10 +591,10 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i * Current state change should be from 0 or subcommitted to target state * or we should already be there when replaying changes during recovery. */ - Assert(curval == 0 || - (curval == TRANSACTION_STATUS_SUB_COMMITTED && - status != TRANSACTION_STATUS_IN_PROGRESS) || - curval == status); +// Assert(curval == 0 || +// (curval == TRANSACTION_STATUS_SUB_COMMITTED && +// status != TRANSACTION_STATUS_IN_PROGRESS) || +// curval == status); /* note this assumes exclusive access to the clog page */ byteval = *byteptr; From 86464e471016f7038cbbb514e819c2393b335fb0 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 29 Jun 2025 17:45:18 +0300 Subject: [PATCH 058/102] Use VacuumHorizonHook in more places --- src/backend/storage/ipc/procarray.c | 40 ++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index a359afbce9a..0bf6a921924 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -48,6 +48,7 @@ #include #include "access/clog.h" +#include "access/heapam.h" #include "access/subtrans.h" #include "access/transam.h" #include "access/twophase.h" @@ -2004,23 +2005,36 @@ TransactionId GetOldestNonRemovableTransactionId(Relation rel) { ComputeXidHorizonsResult horizons; + TransactionId result = InvalidTransactionId; ComputeXidHorizons(&horizons); switch (GlobalVisHorizonKindForRel(rel)) { case VISHORIZON_SHARED: - return horizons.shared_oldest_nonremovable; + result = horizons.shared_oldest_nonremovable; + break; case VISHORIZON_CATALOG: - return horizons.catalog_oldest_nonremovable; + result = horizons.catalog_oldest_nonremovable; + break; case VISHORIZON_DATA: - return horizons.data_oldest_nonremovable; + result = horizons.data_oldest_nonremovable; + break; case VISHORIZON_TEMP: - return horizons.temp_oldest_nonremovable; + result = horizons.temp_oldest_nonremovable; + break; } - /* just to prevent compiler warnings */ - return InvalidTransactionId; + if (VacuumHorizonHook) + { + TransactionId horizon = VacuumHorizonHook(); + + if (TransactionIdIsValid(horizon) && + TransactionIdFollows(result, horizon)) + result = horizon; + } + + return result; } /* @@ -4114,6 +4128,20 @@ GlobalVisTestFor(Relation rel) break; } + if (VacuumHorizonHook) + { + TransactionId horizon = VacuumHorizonHook(); + if (TransactionIdIsValid(horizon)) + { + FullTransactionId fullHorizon = FullXidRelativeTo(state->definitely_needed, horizon); + + if (FullTransactionIdFollows(state->definitely_needed, fullHorizon)) + state->definitely_needed = fullHorizon; + if (FullTransactionIdFollows(state->maybe_needed, fullHorizon)) + state->maybe_needed = fullHorizon; + } + } + Assert(FullTransactionIdIsValid(state->definitely_needed) && FullTransactionIdIsValid(state->maybe_needed)); From 9fa8ba7de2347e59d2003148f6e89ad4360cecc7 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Wed, 28 May 2025 13:55:49 +0200 Subject: [PATCH 059/102] Serialize reftype for RTE_RELATION Fixes "rules" regress test for running with orioledb --- src/backend/nodes/outfuncs.c | 1 + src/backend/nodes/readfuncs.c | 1 + 2 files changed, 2 insertions(+) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index e56392e6f9d..1a7ddd41f1b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -504,6 +504,7 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node) case RTE_RELATION: WRITE_OID_FIELD(relid); WRITE_CHAR_FIELD(relkind); + WRITE_INT_FIELD(reftype); WRITE_INT_FIELD(rellockmode); WRITE_NODE_FIELD(tablesample); WRITE_UINT_FIELD(perminfoindex); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 97e43cbb49c..d0b851d06bf 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -494,6 +494,7 @@ _readRangeTblEntry(void) case RTE_RELATION: READ_OID_FIELD(relid); READ_CHAR_FIELD(relkind); + READ_INT_FIELD(reftype); READ_INT_FIELD(rellockmode); READ_NODE_FIELD(tablesample); READ_UINT_FIELD(perminfoindex); From 935e22a5bb8568105ae55095fcaaed6aad045c5b Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Tue, 26 Aug 2025 14:58:42 +0200 Subject: [PATCH 060/102] Issue #487: Add TableSupportsBackwardScan() and TableAmRoutine->amcanbackward OrioleDB table AM doesn't support BACKWARD SCAN in all cases. A user needs to declare a cursor with SCROLL option. --- src/backend/access/heap/heapam_handler.c | 1 + src/backend/commands/portalcmds.c | 2 +- src/backend/executor/execAmi.c | 52 +++++++++++++++++++++--- src/backend/executor/spi.c | 3 +- src/backend/optimizer/plan/planner.c | 2 +- src/include/access/tableam.h | 2 + src/include/executor/executor.h | 2 +- 7 files changed, 55 insertions(+), 9 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index a32fc3b69fb..50ca1db23d7 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2917,6 +2917,7 @@ heapam_reloptions(char relkind, Datum reloptions, bool validate) static const TableAmRoutine heapam_methods = { .type = T_TableAmRoutine, + .amcanbackward = true, .slot_callbacks = heapam_slot_callbacks, .get_row_ref_type = heapam_get_row_ref_type, diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index 73ed7aa2f0a..ad5969691cb 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -134,7 +134,7 @@ PerformCursorOpen(ParseState *pstate, DeclareCursorStmt *cstmt, ParamListInfo pa if (!(portal->cursorOptions & (CURSOR_OPT_SCROLL | CURSOR_OPT_NO_SCROLL))) { if (plan->rowMarks == NIL && - ExecSupportsBackwardScan(plan->planTree)) + ExecSupportsBackwardScan(plan->planTree, plan->rtable)) portal->cursorOptions |= CURSOR_OPT_SCROLL; else portal->cursorOptions |= CURSOR_OPT_NO_SCROLL; diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 286a0f8f222..a534c72ed4a 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -14,7 +14,9 @@ #include "access/amapi.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "executor/execdebug.h" +#include "catalog/pg_class.h" #include "executor/nodeAgg.h" #include "executor/nodeAppend.h" #include "executor/nodeBitmapAnd.h" @@ -61,10 +63,12 @@ #include "nodes/extensible.h" #include "nodes/nodeFuncs.h" #include "nodes/pathnodes.h" +#include "parser/parsetree.h" #include "utils/rel.h" #include "utils/syscache.h" static bool IndexSupportsBackwardScan(Oid indexid); +static bool TableSupportsBackwardScan(Oid tableid); /* @@ -509,7 +513,7 @@ ExecSupportsMarkRestore(Path *pathnode) * children do. Therefore, this routine must be passed a complete plan tree. */ bool -ExecSupportsBackwardScan(Plan *node) +ExecSupportsBackwardScan(Plan *node, List *rtable) { if (node == NULL) return false; @@ -526,7 +530,7 @@ ExecSupportsBackwardScan(Plan *node) { case T_Result: if (outerPlan(node) != NULL) - return ExecSupportsBackwardScan(outerPlan(node)); + return ExecSupportsBackwardScan(outerPlan(node), rtable); else return false; @@ -540,7 +544,7 @@ ExecSupportsBackwardScan(Plan *node) foreach(l, ((Append *) node)->appendplans) { - if (!ExecSupportsBackwardScan((Plan *) lfirst(l))) + if (!ExecSupportsBackwardScan((Plan *) lfirst(l), rtable)) return false; } /* need not check tlist because Append doesn't evaluate it */ @@ -561,7 +565,7 @@ ExecSupportsBackwardScan(Plan *node) return IndexSupportsBackwardScan(((IndexOnlyScan *) node)->indexid); case T_SubqueryScan: - return ExecSupportsBackwardScan(((SubqueryScan *) node)->subplan); + return ExecSupportsBackwardScan(((SubqueryScan *) node)->subplan, rtable); case T_CustomScan: if (((CustomScan *) node)->flags & CUSTOMPATH_SUPPORT_BACKWARD_SCAN) @@ -571,6 +575,16 @@ ExecSupportsBackwardScan(Plan *node) case T_SeqScan: case T_TidScan: case T_TidRangeScan: + { + RangeTblEntry *rte; + + Assert(((Scan *) node)->scanrelid > 0 && + ((Scan *) node)->scanrelid <= list_length(rtable)); + + rte = rt_fetch(((Scan *) node)->scanrelid, rtable); + return TableSupportsBackwardScan(rte->relid); + } + case T_FunctionScan: case T_ValuesScan: case T_CteScan: @@ -589,7 +603,7 @@ ExecSupportsBackwardScan(Plan *node) case T_LockRows: case T_Limit: - return ExecSupportsBackwardScan(outerPlan(node)); + return ExecSupportsBackwardScan(outerPlan(node), rtable); default: return false; @@ -625,6 +639,34 @@ IndexSupportsBackwardScan(Oid indexid) return result; } +/* + * An SeqScan, TidScan or TidRangeScan node supports backward scan only if the + * table's AM does. + */ +static bool +TableSupportsBackwardScan(Oid tableid) +{ + bool result; + HeapTuple ht_tabrel; + Form_pg_class tabrelrec; + const TableAmRoutine *amroutine; + + /* Fetch the pg_class tuple of the index relation */ + ht_tabrel = SearchSysCache1(RELOID, ObjectIdGetDatum(tableid)); + if (!HeapTupleIsValid(ht_tabrel)) + elog(ERROR, "cache lookup failed for relation %u", tableid); + tabrelrec = (Form_pg_class) GETSTRUCT(ht_tabrel); + + /* Fetch the table AM's API struct */ + amroutine = GetTableAmRoutineByAmOid(tabrelrec->relam); + + result = amroutine->amcanbackward; + + ReleaseSysCache(ht_tabrel); + + return result; +} + /* * ExecMaterializesOutput - does a plan type materialize its output? * diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 99efa36c057..8b196715078 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -1698,7 +1698,8 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, if (list_length(stmt_list) == 1 && linitial_node(PlannedStmt, stmt_list)->commandType != CMD_UTILITY && linitial_node(PlannedStmt, stmt_list)->rowMarks == NIL && - ExecSupportsBackwardScan(linitial_node(PlannedStmt, stmt_list)->planTree)) + ExecSupportsBackwardScan(linitial_node(PlannedStmt, stmt_list)->planTree, + linitial_node(PlannedStmt, stmt_list)->rtable)) portal->cursorOptions |= CURSOR_OPT_SCROLL; else portal->cursorOptions |= CURSOR_OPT_NO_SCROLL; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ffcbdc7599d..0fcb2d8cf00 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -426,7 +426,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, */ if (cursorOptions & CURSOR_OPT_SCROLL) { - if (!ExecSupportsBackwardScan(top_plan)) + if (!ExecSupportsBackwardScan(top_plan, root->parse->rtable)) top_plan = materialize_finished_plan(top_plan); } diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 2348d8a2fbd..a16fd5eaa2b 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -309,6 +309,8 @@ typedef struct TableAmRoutine /* this must be set to T_TableAmRoutine */ NodeTag type; + /* does AM support backward scanning? */ + bool amcanbackward; /* ------------------------------------------------------------------------ * Slot related callbacks. diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 355e92a361e..0ca3a91fc66 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -106,7 +106,7 @@ extern void ExecReScan(PlanState *node); extern void ExecMarkPos(PlanState *node); extern void ExecRestrPos(PlanState *node); extern bool ExecSupportsMarkRestore(struct Path *pathnode); -extern bool ExecSupportsBackwardScan(Plan *node); +extern bool ExecSupportsBackwardScan(Plan *node, List *rtable); extern bool ExecMaterializesOutput(NodeTag plantype); /* From 178e995422aeb78e8291299a67d6b1fc6e73929c Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 4 Aug 2025 20:50:59 +0400 Subject: [PATCH 061/102] Extend tableam->relation_size() functionality to use different calculation methods. This allows to output size metrics for relations and indexes provided by OrioleDB extension using existing PG functions. If extension function doesn't support the requested method it should output negative value and thus asking to fall back to using PG internal calculation e.g. - Orioledb relation_size outputs -1 for bridged indexes to fallback to counting them as PG indexes - PG table_block_relation_size outputs -1 for any method except the only DEFAULT_SIZE that it supports. This API relies on table AM extensibility and doesn't use index AM extensibility yet, which could look more logical, but also more complicated. --- src/backend/access/table/tableam.c | 12 +++- src/backend/utils/adt/dbsize.c | 106 ++++++++++++++++++++++++++++- src/include/access/tableam.h | 16 ++++- 3 files changed, 127 insertions(+), 7 deletions(-) diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 3f64d70666e..afe35f79668 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -635,12 +635,18 @@ table_block_parallelscan_nextpage(Relation rel, * is stored, and if it uses them in the expected manner (e.g. the actual data * is in the main fork rather than some other), it can use this implementation * of the relation_size callback rather than implementing its own. + * + * Different counting methods is not supported for this function yet. It's expected + * DEFAULT_SIZE in all cases. */ -uint64 -table_block_relation_size(Relation rel, ForkNumber forkNumber) +int64 +table_block_relation_size(Relation rel, ForkNumber forkNumber, uint8 method) { uint64 nblocks = 0; + if (method != DEFAULT_SIZE) + return -1; + /* InvalidForkNumber indicates returning the size for all forks */ if (forkNumber == InvalidForkNumber) { @@ -650,7 +656,7 @@ table_block_relation_size(Relation rel, ForkNumber forkNumber) else nblocks = smgrnblocks(RelationGetSmgr(rel), forkNumber); - return nblocks * BLCKSZ; + return (int64) nblocks * BLCKSZ; } /* diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 9bc8e81c400..5037bcb45ef 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -31,6 +31,7 @@ #include "utils/relfilenumbermap.h" #include "utils/relmapper.h" #include "utils/syscache.h" +#include "access/tableam.h" /* Divide by two and round away from zero */ #define half_rounded(x) (((x) + ((x) < 0 ? -1 : 1)) / 2) @@ -343,6 +344,61 @@ calculate_relation_size(RelFileLocator *rfn, BackendId backend, ForkNumber forkn return totalsize; } +/* + * Try to get size using proper relation_size method in table am. Fallback to previous method + * if rd_tableam->relation_size doesn't support requested counting method or otherwise refuses + * to count (with negative output) + */ +static +int64 try_tableam_relation_size(Relation rel, ForkNumber forkNum, bool allforks, uint8 method) +{ + int64 size = 0; + + if (rel->rd_rel->relkind == RELKIND_INDEX) + { + /* For index check rd_tableam for parent relation */ + Relation tbl; + + tbl = relation_open(rel->rd_index->indrelid, AccessShareLock); + if(tbl->rd_tableam && tbl->rd_tableam->relation_size) + { + /* We call relation_size method for parent relation but provide index relation as an argument. + * Method for index is always RELATION_SIZE + */ + if (allforks) + { + for (ForkNumber i = 0; i <= MAX_FORKNUM; i++) + { + size += tbl->rd_tableam->relation_size(rel, i, RELATION_SIZE); + } + } + else + size = tbl->rd_tableam->relation_size(rel, forkNum, RELATION_SIZE); + + if (size >= 0) + { + relation_close(tbl, AccessShareLock); + return size; + } + } + relation_close(tbl, AccessShareLock); + } + else if (rel->rd_tableam && rel->rd_tableam->relation_size) + { + if (allforks) + { + for (ForkNumber i = 0; i <= MAX_FORKNUM; i++) + size += rel->rd_tableam->relation_size(rel, i, method); + } + else + size = rel->rd_tableam->relation_size(rel, forkNum, method); + + if (size >= 0) + return size; + } + return -1; +} + Datum pg_relation_size(PG_FUNCTION_ARGS) { @@ -363,6 +419,18 @@ pg_relation_size(PG_FUNCTION_ARGS) if (rel == NULL) PG_RETURN_NULL(); + /* + * Try to get size using proper relation_size method in table am. Fallback to previous method + * if rd_tableam->relation_size doesn't support requested counting method or otherwise refuses + * to count (with negative output) + */ + size = try_tableam_relation_size(rel, forkname_to_number(text_to_cstring(forkName)), false, RELATION_SIZE); + if (size >= 0) + { + relation_close(rel, AccessShareLock); + PG_RETURN_INT64(size); + } + size = calculate_relation_size(&(rel->rd_locator), rel->rd_backend, forkname_to_number(text_to_cstring(forkName))); @@ -495,6 +563,18 @@ pg_table_size(PG_FUNCTION_ARGS) if (rel == NULL) PG_RETURN_NULL(); + /* + * Try to get size using proper relation_size method in table am. Fallback to previous method + * if rd_tableam->relation_size doesn't support requested counting method or otherwise refuses + * to count (with negative output) + */ + size = try_tableam_relation_size(rel, InvalidForkNumber, true, TABLE_SIZE); + if (size >= 0) + { + relation_close(rel, AccessShareLock); + PG_RETURN_INT64(size); + } + size = calculate_table_size(rel); relation_close(rel, AccessShareLock); @@ -507,13 +587,25 @@ pg_indexes_size(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); Relation rel; - int64 size; + int64 size = 0; rel = try_relation_open(relOid, AccessShareLock); if (rel == NULL) PG_RETURN_NULL(); + if (rel->rd_tableam && rel->rd_tableam->relation_size) + { + for (ForkNumber forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) + size += rel->rd_tableam->relation_size(rel, forkNum , INDEXES_SIZE); + + if(size >= 0) + { + relation_close(rel, AccessShareLock); + PG_RETURN_INT64(size); + } + } + size = calculate_indexes_size(rel); relation_close(rel, AccessShareLock); @@ -556,6 +648,18 @@ pg_total_relation_size(PG_FUNCTION_ARGS) if (rel == NULL) PG_RETURN_NULL(); + /* + * Try to get size using proper relation_size method in table am. Fallback to previous method + * if rd_tableam->relation_size doesn't support requested counting method or otherwise refuses + * to count (with negative output) + */ + size = try_tableam_relation_size(rel, InvalidForkNumber, true, TOTAL_SIZE); + if (size >= 0) + { + relation_close(rel, AccessShareLock); + PG_RETURN_INT64(size); + } + size = calculate_total_relation_size(rel); relation_close(rel, AccessShareLock); diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index a16fd5eaa2b..63aa0501668 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -284,6 +284,15 @@ typedef struct TM_IndexDeleteOp #define TABLE_MODIFY_FETCH_OLD_TUPLE 0x0002 #define TABLE_MODIFY_LOCK_UPDATED 0x0004 +/* "method" flag bits for relation_size */ +/* Default behavior implemented for heap AM */ +#define DEFAULT_SIZE (0) +/* Extended behavior that AM can provide */ +#define RELATION_SIZE (1) +#define TABLE_SIZE (2) +#define TOTAL_SIZE (3) +#define TOAST_TABLE_SIZE (4) +#define INDEXES_SIZE (5) /* Typedef for callback function for table_index_build_scan */ typedef void (*IndexBuildCallback) (Relation index, @@ -744,7 +753,7 @@ typedef struct TableAmRoutine * probable that we'll need to revise the interface for those at some * point. */ - uint64 (*relation_size) (Relation rel, ForkNumber forkNumber); + int64 (*relation_size) (Relation rel, ForkNumber forkNumber, uint8 method); /* @@ -1913,7 +1922,8 @@ table_index_validate_scan(Relation table_rel, static inline uint64 table_relation_size(Relation rel, ForkNumber forkNumber) { - return rel->rd_tableam->relation_size(rel, forkNumber); + int64 res = rel->rd_tableam->relation_size(rel, forkNumber, DEFAULT_SIZE); + return res >= 0 ? res : 0; } /* @@ -2136,7 +2146,7 @@ extern void table_block_parallelscan_startblock_init(Relation rel, * ---------------------------------------------------------------------------- */ -extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber); +extern int64 table_block_relation_size(Relation rel, ForkNumber forkNumber, uint8 method); extern void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, From 98416012c4250f461a6b385c951539c15a2803bd Mon Sep 17 00:00:00 2001 From: Angus Dippenaar Date: Thu, 19 Dec 2024 00:30:47 +0100 Subject: [PATCH 062/102] fix pg_rewind docs --- doc/src/sgml/ref/pg_rewind.sgml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml index 01d20462e33..eed7fc83944 100644 --- a/doc/src/sgml/ref/pg_rewind.sgml +++ b/doc/src/sgml/ref/pg_rewind.sgml @@ -289,7 +289,10 @@ PostgreSQL documentation - Load shared library that performs custom rewind for postgres extension. The path may be full or relative to PKGLIBDIR. File extension is optional. Multiple extensions can be selected by multiple switches. + Load shared library that performs custom rewind for postgres extension. + The path may be full or + relative to PKGLIBDIR. File extension is optional. Multiple extensions + can be selected by multiple switches. From 6930465b4976d38c0d5c2715a72fe534e23043cf Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 24 Sep 2025 01:41:41 +0300 Subject: [PATCH 063/102] Fix copying datum in ExecModifyTable() Save the alignment. --- src/backend/executor/nodeModifyTable.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index c75e4df7cc1..1c6467fd101 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -3714,7 +3714,9 @@ ExecModifyTable(PlanState *pstate) if (isNull) elog(ERROR, "rowid is NULL"); - tupleid = datumCopy(datum, false, -1); + tupleid = PointerGetDatum(PG_DETOAST_DATUM(datum)); + if (tupleid == datum) + tupleid = datumCopy(datum, false, -1); } } From fea656088fe910028f9404480480ad4ef517426b Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 30 Sep 2025 14:00:37 +0300 Subject: [PATCH 064/102] Add GetReplayXlogPtrHook allowing extensions to adjust replay LSN Useful when extension performs some async operations in the WAL record redo function. --- src/backend/access/transam/xlogfuncs.c | 2 +- src/backend/access/transam/xlogrecovery.c | 22 ++++++++++++++++++++++ src/backend/replication/walreceiver.c | 2 +- src/backend/replication/walreceiverfuncs.c | 2 +- src/include/access/xlogrecovery.h | 6 ++++++ 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 5044ff06432..3430685b8a2 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -363,7 +363,7 @@ pg_last_wal_replay_lsn(PG_FUNCTION_ARGS) { XLogRecPtr recptr; - recptr = GetXLogReplayRecPtr(NULL); + recptr = GetEffectiveXlogReplayRecPtr(); if (recptr == 0) PG_RETURN_NULL(); diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 6e09937b018..ebe644973f6 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -4515,6 +4515,28 @@ GetXLogReplayRecPtr(TimeLineID *replayTLI) return recptr; } +GetReplayXlogPtrHookType GetReplayXlogPtrHook = NULL; + +/* + * Get effective latest redo apply position. + * + * Can be tuned by extensions processing WAL records asyncronously. + */ +XLogRecPtr +GetEffectiveXlogReplayRecPtr(void) +{ + XLogRecPtr recptr = InvalidXLogRecPtr; + + SpinLockAcquire(&XLogRecoveryCtl->info_lck); + if (GetReplayXlogPtrHook) + recptr = GetReplayXlogPtrHook(); + if (recptr == InvalidXLogRecPtr) + recptr = XLogRecoveryCtl->lastReplayedEndRecPtr; + SpinLockRelease(&XLogRecoveryCtl->info_lck); + + return recptr; +} + /* * Get position of last applied, or the record being applied. diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index feff7094351..a99e8d31376 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -1130,7 +1130,7 @@ XLogWalRcvSendReply(bool force, bool requestReply) /* Construct a new message */ writePtr = LogstreamResult.Write; flushPtr = LogstreamResult.Flush; - applyPtr = GetXLogReplayRecPtr(NULL); + applyPtr = GetEffectiveXlogReplayRecPtr(); resetStringInfo(&reply_message); pq_sendbyte(&reply_message, 'r'); diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c index 8305022ce4d..70aa806ebf0 100644 --- a/src/backend/replication/walreceiverfuncs.c +++ b/src/backend/replication/walreceiverfuncs.c @@ -373,7 +373,7 @@ GetReplicationApplyDelay(void) receivePtr = walrcv->flushedUpto; SpinLockRelease(&walrcv->mutex); - replayPtr = GetXLogReplayRecPtr(NULL); + replayPtr = GetEffectiveXlogReplayRecPtr(); if (receivePtr == replayPtr) return 0; diff --git a/src/include/access/xlogrecovery.h b/src/include/access/xlogrecovery.h index 47c29350f5d..cec67fb9c89 100644 --- a/src/include/access/xlogrecovery.h +++ b/src/include/access/xlogrecovery.h @@ -48,6 +48,8 @@ typedef enum RecoveryPauseState RECOVERY_PAUSED /* recovery is paused */ } RecoveryPauseState; +typedef XLogRecPtr (*GetReplayXlogPtrHookType) (void); + /* User-settable GUC parameters */ extern PGDLLIMPORT bool recoveryTargetInclusive; extern PGDLLIMPORT int recoveryTargetAction; @@ -76,6 +78,9 @@ extern PGDLLIMPORT bool reachedConsistency; /* Are we currently in standby mode? */ extern PGDLLIMPORT bool StandbyMode; +/* Hook for extensions to tune replay xlog pointer */ +extern PGDLLIMPORT GetReplayXlogPtrHookType GetReplayXlogPtrHook; + extern Size XLogRecoveryShmemSize(void); extern void XLogRecoveryShmemInit(void); @@ -137,6 +142,7 @@ extern void RemovePromoteSignalFiles(void); extern bool HotStandbyActive(void); extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI); +extern XLogRecPtr GetEffectiveXlogReplayRecPtr(void); extern RecoveryPauseState GetRecoveryPauseState(void); extern void SetRecoveryPause(bool recoveryPause); extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream); From d8ca3f8944335c558c9a0d07c1acb74b74dfa807 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 5 Oct 2025 22:14:58 +0300 Subject: [PATCH 065/102] Rework handling of running transactions by extensions. * RunningTransactionsExtension structure as a part of xl_running_xacts and RunningTransactionsData to be filled by extensions. * getRunningTransactionsExtension hook to fill RunningTransactionsExtension. * Add nextXid field to CSNSnapshotData. * SnapBuildGetCSNSnaphot() function to modify CSNSnapshotData in the SnapBuild. * waitSnapshotHook hook to wait for transactions inside the snapshot builder. --- src/backend/replication/logical/snapbuild.c | 26 +++++++++++++++------ src/backend/storage/ipc/procarray.c | 20 +++++++++++++++- src/backend/storage/ipc/standby.c | 2 +- src/backend/utils/time/snapmgr.c | 8 ++----- src/include/replication/snapbuild.h | 8 +++++-- src/include/storage/standby.h | 6 ++++- src/include/storage/standbydefs.h | 13 ++++++++++- src/include/utils/snapshot.h | 1 + 8 files changed, 65 insertions(+), 19 deletions(-) diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 8495bde4fb4..d9a7c9b1a4f 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -1280,8 +1280,9 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact ReorderBufferTXN *txn; TransactionId xmin; - builder->csnSnapshotData.snapshotcsn = running->csn; - builder->csnSnapshotData.xmin = 0; + builder->csnSnapshotData.snapshotcsn = running->extension.csn; + builder->csnSnapshotData.xmin = running->extension.runXmin; + builder->csnSnapshotData.nextXid = running->extension.nextXid; builder->csnSnapshotData.xlogptr = lsn; /* @@ -1439,7 +1440,8 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn * NB: We might have already started to incrementally assemble a snapshot, * so we need to be careful to deal with that. */ - if (running->oldestRunningXid == running->nextXid) + if (running->oldestRunningXid == running->nextXid && + running->extension.runXmin == running->extension.nextXid) { if (builder->start_decoding_at == InvalidXLogRecPtr || builder->start_decoding_at <= lsn) @@ -1568,6 +1570,11 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn return true; } +/* + * Hook for custom waits in SnapBuildWaitSnapshot() provided by extensions. + */ +WaitSnapshotHookType waitSnapshotHook = NULL; + /* --- * Iterate through xids in record, wait for all older than the cutoff to * finish. Then, if possible, log a new xl_running_xacts record. @@ -1602,6 +1609,12 @@ SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff) XactLockTableWait(xid, NULL, NULL, XLTW_None); } + /* + * Give extensions chance for their custom waits. + */ + if (waitSnapshotHook) + waitSnapshotHook(&running->extension); + /* * All transactions we needed to finish finished - try to ensure there is * another xl_running_xacts record in a timely manner, without having to @@ -2209,9 +2222,8 @@ CheckPointSnapBuild(void) FreeDir(snap_dir); } -void -SnapBuildUpdateCSNSnaphot(SnapBuild *builder, - CSNSnapshotData *csnSnapshotData) +CSNSnapshotData * +SnapBuildGetCSNSnaphot(SnapBuild *builder) { - builder->csnSnapshotData = *csnSnapshotData; + return &builder->csnSnapshotData; } diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 0bf6a921924..d4b367e8d43 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -2699,6 +2699,11 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc) return result; } +/* + * A hook for filling RunningTransactionsExtension structure by extensions. + */ +GetRunningTransactionsExtensionHookType getRunningTransactionsExtension = NULL; + /* * GetRunningTransactionData -- returns information about running transactions. * @@ -2880,7 +2885,20 @@ GetRunningTransactionData(void) CurrentRunningXacts->nextXid = XidFromFullTransactionId(ShmemVariableCache->nextXid); CurrentRunningXacts->oldestRunningXid = oldestRunningXid; CurrentRunningXacts->latestCompletedXid = latestCompletedXid; - CurrentRunningXacts->csn = pg_atomic_read_u64(&ShmemVariableCache->nextCommitSeqNo); + + /* + * Give extensions chance to fill their structs. + */ + if (getRunningTransactionsExtension) + { + getRunningTransactionsExtension(&CurrentRunningXacts->extension); + } + else + { + CurrentRunningXacts->extension.csn = 0; + CurrentRunningXacts->extension.nextXid = 0; + CurrentRunningXacts->extension.runXmin = 0; + } Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid)); Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid)); diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 1e6760a7c49..23e06c60d44 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -1355,7 +1355,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) xlrec.nextXid = CurrRunningXacts->nextXid; xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid; xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid; - xlrec.csn = CurrRunningXacts->csn; + xlrec.extension = CurrRunningXacts->extension; /* Header */ XLogBeginInsert(); diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 080cbe4da38..b2df65f0871 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -2194,9 +2194,7 @@ SerializeSnapshot(Snapshot snapshot, char *start_address) serialized_snapshot.curcid = snapshot->curcid; serialized_snapshot.whenTaken = snapshot->whenTaken; serialized_snapshot.lsn = snapshot->lsn; - serialized_snapshot.csnSnapshotData.xmin = snapshot->csnSnapshotData.xmin; - serialized_snapshot.csnSnapshotData.snapshotcsn = snapshot->csnSnapshotData.snapshotcsn; - serialized_snapshot.csnSnapshotData.xlogptr = snapshot->csnSnapshotData.xlogptr; + serialized_snapshot.csnSnapshotData = snapshot->csnSnapshotData; serialized_snapshot.undoRegularRowLocation = snapshot->undoRegularRowLocationPhNode.undoLocation; serialized_snapshot.undoRegularPageLocation = snapshot->undoRegularPageLocationPhNode.undoLocation; serialized_snapshot.undoSystemLocation = snapshot->undoSystemLocationPhNode.undoLocation; @@ -2275,9 +2273,7 @@ RestoreSnapshot(char *start_address) snapshot->whenTaken = serialized_snapshot.whenTaken; snapshot->lsn = serialized_snapshot.lsn; snapshot->snapXactCompletionCount = 0; - snapshot->csnSnapshotData.xmin = serialized_snapshot.csnSnapshotData.xmin; - snapshot->csnSnapshotData.snapshotcsn = serialized_snapshot.csnSnapshotData.snapshotcsn; - snapshot->csnSnapshotData.xlogptr = serialized_snapshot.csnSnapshotData.xlogptr; + snapshot->csnSnapshotData = serialized_snapshot.csnSnapshotData; snapshot->undoRegularRowLocationPhNode.undoLocation = serialized_snapshot.undoRegularRowLocation; snapshot->undoRegularPageLocationPhNode.undoLocation = serialized_snapshot.undoRegularPageLocation; snapshot->undoSystemLocationPhNode.undoLocation = serialized_snapshot.undoSystemLocation; diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h index a7b793dae3c..d639a5de2b7 100644 --- a/src/include/replication/snapbuild.h +++ b/src/include/replication/snapbuild.h @@ -13,6 +13,7 @@ #define SNAPBUILD_H #include "access/xlogdefs.h" +#include "storage/standbydefs.h" #include "utils/snapmgr.h" typedef enum @@ -57,6 +58,10 @@ struct ReorderBuffer; struct xl_heap_new_cid; struct xl_running_xacts; +typedef void (*WaitSnapshotHookType) (RunningTransactionsExtension *extension); + +extern PGDLLIMPORT WaitSnapshotHookType waitSnapshotHook; + extern void CheckPointSnapBuild(void); extern SnapBuild *AllocateSnapshotBuilder(struct ReorderBuffer *reorder, @@ -91,7 +96,6 @@ extern void SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid, extern void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, struct xl_running_xacts *running); extern void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn); -extern void SnapBuildUpdateCSNSnaphot(SnapBuild *builder, - CSNSnapshotData *csnSnapshotData); +extern CSNSnapshotData *SnapBuildGetCSNSnaphot(SnapBuild *builder); #endif /* SNAPBUILD_H */ diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index b97394b4841..99a002fb3de 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -91,13 +91,17 @@ typedef struct RunningTransactionsData TransactionId nextXid; /* xid from ShmemVariableCache->nextXid */ TransactionId oldestRunningXid; /* *not* oldestXmin */ TransactionId latestCompletedXid; /* so we can set xmax */ - CommitSeqNo csn; /* current csn */ + RunningTransactionsExtension extension; TransactionId *xids; /* array of (sub)xids still running */ } RunningTransactionsData; typedef RunningTransactionsData *RunningTransactions; +typedef void (*GetRunningTransactionsExtensionHookType) (RunningTransactionsExtension *extension); + +extern PGDLLIMPORT GetRunningTransactionsExtensionHookType getRunningTransactionsExtension; + extern void LogAccessExclusiveLock(Oid dbOid, Oid relOid); extern void LogAccessExclusiveLockPrepare(void); diff --git a/src/include/storage/standbydefs.h b/src/include/storage/standbydefs.h index 23dddce8d84..d49b43fadaf 100644 --- a/src/include/storage/standbydefs.h +++ b/src/include/storage/standbydefs.h @@ -41,6 +41,17 @@ typedef struct xl_standby_locks xl_standby_lock locks[FLEXIBLE_ARRAY_MEMBER]; } xl_standby_locks; +/* + * A part of xl_running_xacts and RunningTransactionsData to be filled by + * extensions. + */ +typedef struct +{ + uint64 nextXid; + uint64 runXmin; + CommitSeqNo csn; /* current csn */ +} RunningTransactionsExtension; + /* * When we write running xact data to WAL, we use this structure. */ @@ -52,7 +63,7 @@ typedef struct xl_running_xacts TransactionId nextXid; /* xid from ShmemVariableCache->nextXid */ TransactionId oldestRunningXid; /* *not* oldestXmin */ TransactionId latestCompletedXid; /* so we can set xmax */ - CommitSeqNo csn; /* current csn */ + RunningTransactionsExtension extension; TransactionId xids[FLEXIBLE_ARRAY_MEMBER]; } xl_running_xacts; diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index e645b9c89e9..0cd0ccdeb4d 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -131,6 +131,7 @@ typedef struct typedef struct CSNSnapshotData { uint64 xmin; + uint64 nextXid; CommitSeqNo snapshotcsn; XLogRecPtr xlogptr; } CSNSnapshotData; From d0de966dc4b629f7a5fca532ba40b4d2999e2479 Mon Sep 17 00:00:00 2001 From: Egor Ivkov Date: Mon, 6 Oct 2025 18:17:04 +0300 Subject: [PATCH 066/102] TableAM: add relation_reindex --- src/backend/access/heap/heapam_handler.c | 7 ++++++ src/backend/catalog/index.c | 29 +++++++++------------ src/backend/commands/cluster.c | 14 ++++++++++- src/backend/commands/indexcmds.c | 32 ++++++++++++++++++++---- src/backend/commands/tablecmds.c | 5 +--- src/include/access/tableam.h | 11 ++++++++ src/include/catalog/index.h | 2 +- 7 files changed, 72 insertions(+), 28 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 50ca1db23d7..b343b1b29b9 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -982,6 +982,12 @@ heapam_relation_nontransactional_truncate(Relation rel) RelationTruncate(rel, 0); } +static bool +heapam_relation_reindex(Relation rel, int flags, ReindexParams *params) +{ + return reindex_relation(rel, flags, params); +} + static void heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator) { @@ -2955,6 +2961,7 @@ static const TableAmRoutine heapam_methods = { .relation_set_new_filelocator = heapam_relation_set_new_filelocator, .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate, + .relation_reindex = heapam_relation_reindex, .relation_copy_data = heapam_relation_copy_data, .relation_copy_for_cluster = heapam_relation_copy_for_cluster, .relation_vacuum = heap_vacuum_rel, diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 0f3367f8ff8..579a533c496 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3891,9 +3891,8 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, * index rebuild. */ bool -reindex_relation(Oid relid, int flags, ReindexParams *params) +reindex_relation(Relation rel, int flags, ReindexParams *params) { - Relation rel; Oid toast_relid; List *indexIds; char persistence; @@ -3901,15 +3900,6 @@ reindex_relation(Oid relid, int flags, ReindexParams *params) ListCell *indexId; int i; - /* - * Open and lock the relation. ShareLock is sufficient since we only need - * to prevent schema and data changes in it. The lock level used here - * should match ReindexTable(). - */ - if ((params->options & REINDEXOPT_MISSING_OK) != 0) - rel = try_table_open(relid, ShareLock); - else - rel = table_open(relid, ShareLock); /* if relation is gone, leave */ if (!rel) @@ -4001,11 +3991,6 @@ reindex_relation(Oid relid, int flags, ReindexParams *params) i++; } - /* - * Close rel, but continue to hold the lock. - */ - table_close(rel, NoLock); - result = (indexIds != NIL); /* @@ -4021,10 +4006,20 @@ reindex_relation(Oid relid, int flags, ReindexParams *params) * This rule is enforced by setting tablespaceOid to InvalidOid. */ ReindexParams newparams = *params; + /* + * Open and lock the relation. ShareLock is sufficient since we only need + * to prevent schema and data changes in it. The lock level used here + * should match ReindexTable(). + */ + Relation toast_rel = table_open(toast_relid, ShareLock); newparams.options &= ~(REINDEXOPT_MISSING_OK); newparams.tablespaceOid = InvalidOid; - result |= reindex_relation(toast_relid, flags, &newparams); + result |= reindex_relation(toast_rel, flags, &newparams); + /* + * Close rel, but continue to hold the lock. + */ + table_close(toast_rel, NoLock); } return result; diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 92c465c05b4..eb06eb7ecc5 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -1462,6 +1462,7 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, int reindex_flags; ReindexParams reindex_params = {0}; int i; + Relation oldHeap; /* Report that we are now swapping relation files */ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, @@ -1517,8 +1518,19 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, /* Report that we are now reindexing relations */ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, PROGRESS_CLUSTER_PHASE_REBUILD_INDEX); + /* + * Open and lock the relation. ShareLock is sufficient since we only need + * to prevent schema and data changes in it. The lock level used here + * should match ReindexTable(). + */ + oldHeap = table_open(OIDOldHeap, ShareLock); + + reindex_relation(oldHeap, reindex_flags, &reindex_params); + /* + * Close rel, but continue to hold the lock. + */ + table_close(oldHeap, NoLock); - reindex_relation(OIDOldHeap, reindex_flags, &reindex_params); /* Report that we are now doing clean up */ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index b7c490bab4e..17f439e8a34 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -2943,12 +2943,23 @@ ReindexTable(RangeVar *relation, ReindexParams *params, bool isTopLevel) else { ReindexParams newparams = *params; + Relation rel; newparams.options |= REINDEXOPT_REPORT_PROGRESS; - result = reindex_relation(heapOid, - REINDEX_REL_PROCESS_TOAST | - REINDEX_REL_CHECK_CONSTRAINTS, - &newparams); + if ((newparams.options & REINDEXOPT_MISSING_OK) != 0) + rel = try_table_open(heapOid, ShareLock); + else + rel = table_open(heapOid, ShareLock); + + result = table_relation_reindex(rel, + REINDEX_REL_PROCESS_TOAST | + REINDEX_REL_CHECK_CONSTRAINTS, + &newparams); + /* + * Close rel, but continue to hold the lock. + */ + table_close(rel, NoLock); + if (!result) ereport(NOTICE, (errmsg("table \"%s\" has no indexes to reindex", @@ -3376,13 +3387,24 @@ ReindexMultipleInternal(List *relids, ReindexParams *params) { bool result; ReindexParams newparams = *params; + Relation rel; newparams.options |= REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK; - result = reindex_relation(relid, + /* + * Open and lock the relation. ShareLock is sufficient since we only need + * to prevent schema and data changes in it. The lock level used here + * should match ReindexTable(). + */ + rel = try_table_open(relid, ShareLock); + result = table_relation_reindex(rel, REINDEX_REL_PROCESS_TOAST | REINDEX_REL_CHECK_CONSTRAINTS, &newparams); + /* + * Close rel, but continue to hold the lock. + */ + table_close(rel, NoLock); if (result && (params->options & REINDEXOPT_VERBOSE) != 0) ereport(INFO, diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 34fe864a069..5775b914581 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -2037,7 +2037,6 @@ ExecuteTruncateGuts(List *explicit_rels, } else { - Oid heap_relid; Oid toast_relid; ReindexParams reindex_params = {0}; @@ -2058,8 +2057,6 @@ ExecuteTruncateGuts(List *explicit_rels, */ RelationSetNewRelfilenumber(rel, rel->rd_rel->relpersistence); - heap_relid = RelationGetRelid(rel); - /* * The same for the toast table, if any. */ @@ -2077,7 +2074,7 @@ ExecuteTruncateGuts(List *explicit_rels, /* * Reconstruct the indexes to match, and we're done. */ - reindex_relation(heap_relid, REINDEX_REL_PROCESS_TOAST, + table_relation_reindex(rel, REINDEX_REL_PROCESS_TOAST, &reindex_params); } diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 63aa0501668..6c0197178ee 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -27,6 +27,7 @@ #include "utils/guc.h" #include "utils/rel.h" #include "utils/snapshot.h" +#include "catalog/index.h" #define DEFAULT_TABLE_ACCESS_METHOD "heap" @@ -644,6 +645,9 @@ typedef struct TableAmRoutine */ void (*relation_nontransactional_truncate) (Relation rel); + /* See reindex_relation for reference about parameters */ + bool (*relation_reindex) (Relation rel, int flags, ReindexParams *params); + /* * See table_relation_copy_data(). * @@ -1694,6 +1698,13 @@ table_relation_nontransactional_truncate(Relation rel) rel->rd_tableam->relation_nontransactional_truncate(rel); } +/* See reindex_relation for reference about parameters */ +static inline bool +table_relation_reindex(Relation rel, int flags, ReindexParams *params) +{ + return rel->rd_tableam->relation_reindex(rel, flags, params); +} + /* * Copy data from `rel` into the new relfilelocator `newrlocator`. The new * relfilelocator may not have storage associated before this function is diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 3fa15391d83..20ac4a4b465 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -158,7 +158,7 @@ extern void reindex_index(Oid indexId, bool skip_constraint_checks, #define REINDEX_REL_FORCE_INDEXES_UNLOGGED 0x08 #define REINDEX_REL_FORCE_INDEXES_PERMANENT 0x10 -extern bool reindex_relation(Oid relid, int flags, ReindexParams *params); +extern bool reindex_relation(Relation rel, int flags, ReindexParams *params); extern bool ReindexIsProcessingHeap(Oid heapOid); extern bool ReindexIsProcessingIndex(Oid indexOid); From e46b2a0b9198e7fb99fae9c742267d08f0414447 Mon Sep 17 00:00:00 2001 From: Egor Ivkov Date: Fri, 10 Oct 2025 20:11:29 +0300 Subject: [PATCH 067/102] IndexAM: Add amreuse --- contrib/bloom/blutils.c | 1 + src/backend/access/brin/brin.c | 1 + src/backend/access/gin/ginutil.c | 1 + src/backend/access/gist/gist.c | 1 + src/backend/access/hash/hash.c | 1 + src/backend/access/nbtree/nbtree.c | 1 + src/backend/access/spgist/spgutils.c | 1 + src/backend/commands/tablecmds.c | 42 +++++++++++++++++++ src/include/access/amapi.h | 4 ++ .../modules/dummy_index_am/dummy_index_am.c | 1 + src/tools/pgindent/typedefs.list | 1 + 11 files changed, 55 insertions(+) diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index d92858a3433..6e7e5261708 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -129,6 +129,7 @@ blhandler(PG_FUNCTION_ARGS) amroutine->amkeytype = InvalidOid; amroutine->ambuild = blbuild; + amroutine->amreuse = NULL; amroutine->ambuildempty = blbuildempty; amroutine->aminsert = NULL; amroutine->aminsertextended = blinsert; diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index edf99d9816c..9335d137719 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -115,6 +115,7 @@ brinhandler(PG_FUNCTION_ARGS) amroutine->amkeytype = InvalidOid; amroutine->ambuild = brinbuild; + amroutine->amreuse = NULL; amroutine->ambuildempty = brinbuildempty; amroutine->aminsert = NULL; amroutine->aminsertextended = brininsert; diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 52d9a725fc4..7ada7e831e7 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -62,6 +62,7 @@ ginhandler(PG_FUNCTION_ARGS) amroutine->amkeytype = InvalidOid; amroutine->ambuild = ginbuild; + amroutine->amreuse = NULL; amroutine->ambuildempty = ginbuildempty; amroutine->aminsert = NULL; amroutine->aminsertextended = gininsert; diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 6dcfe1095c1..5576ac3c227 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -84,6 +84,7 @@ gisthandler(PG_FUNCTION_ARGS) amroutine->amkeytype = InvalidOid; amroutine->ambuild = gistbuild; + amroutine->amreuse = NULL; amroutine->ambuildempty = gistbuildempty; amroutine->aminsert = NULL; amroutine->aminsertextended = gistinsert; diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index ffddf7b900c..f59498d54a4 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -81,6 +81,7 @@ hashhandler(PG_FUNCTION_ARGS) amroutine->amkeytype = INT4OID; amroutine->ambuild = hashbuild; + amroutine->amreuse = NULL; amroutine->ambuildempty = hashbuildempty; amroutine->aminsert = NULL; amroutine->aminsertextended = hashinsert; diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 44daed95baf..cb388ecfcba 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -120,6 +120,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->amkeytype = InvalidOid; amroutine->ambuild = btbuild; + amroutine->amreuse = NULL; amroutine->ambuildempty = btbuildempty; amroutine->aminsert = NULL; amroutine->aminsertextended = btinsert; diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 127ff3922d1..44aa51d0882 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -68,6 +68,7 @@ spghandler(PG_FUNCTION_ARGS) amroutine->amkeytype = InvalidOid; amroutine->ambuild = spgbuild; + amroutine->amreuse = NULL; amroutine->ambuildempty = spgbuildempty; amroutine->aminsert = NULL; amroutine->aminsertextended = spginsert; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 5775b914581..4d93e4848e3 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -92,6 +92,7 @@ #include "tcop/utility.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/elog.h" #include "utils/fmgroids.h" #include "utils/inval.h" #include "utils/lsyscache.h" @@ -13942,9 +13943,50 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt) /* If it's a partitioned index, there is no storage to share. */ if (irel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) { + HeapTuple tuple; + Form_pg_am accessMethodForm; + IndexAmRoutine *amRoutine; + char *accessMethodName; + Oid heapRelId = IndexGetRelation(oldId, false); + Relation heapRel = table_open(heapRelId, ShareLock); + stmt->oldNumber = irel->rd_locator.relNumber; stmt->oldCreateSubid = irel->rd_createSubid; stmt->oldFirstRelfilelocatorSubid = irel->rd_firstRelfilelocatorSubid; + + + /* + * look up the access method to call amreuse + */ + accessMethodName = stmt->accessMethod; + tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName)); + if (!HeapTupleIsValid(tuple)) + { + /* + * Hack to provide more-or-less-transparent updating of old RTREE + * indexes to GiST: if RTREE is requested and not found, use GIST. + */ + if (strcmp(accessMethodName, "rtree") == 0) + { + ereport(NOTICE, + (errmsg("substituting access method \"gist\" for obsolete method \"rtree\""))); + accessMethodName = "gist"; + tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName)); + } + + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("access method \"%s\" does not exist", + accessMethodName))); + } + accessMethodForm = (Form_pg_am) GETSTRUCT(tuple); + amRoutine = GetIndexAmRoutineWithTableAM(heapRel->rd_rel->relam, accessMethodForm->amhandler); + table_close(heapRel, NoLock); + + if(amRoutine->amreuse) { + (*amRoutine->amreuse)(irel); + } } index_close(irel, NoLock); } diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 103eed5fa9f..55b120575f9 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -104,6 +104,9 @@ typedef IndexBuildResult *(*ambuild_function) (Relation heapRelation, Relation indexRelation, struct IndexInfo *indexInfo); +/* reuse current index - don't drop it */ +typedef void (*amreuse_function) (Relation indexRelation); + /* build empty index */ typedef void (*ambuildempty_function) (Relation indexRelation); @@ -295,6 +298,7 @@ typedef struct IndexAmRoutine /* interface functions */ ambuild_function ambuild; + amreuse_function amreuse; ambuildempty_function ambuildempty; aminsert_function aminsert; aminsert_extended_function aminsertextended; diff --git a/src/test/modules/dummy_index_am/dummy_index_am.c b/src/test/modules/dummy_index_am/dummy_index_am.c index 09c5d20479d..6fbd30b1a71 100644 --- a/src/test/modules/dummy_index_am/dummy_index_am.c +++ b/src/test/modules/dummy_index_am/dummy_index_am.c @@ -301,6 +301,7 @@ dihandler(PG_FUNCTION_ARGS) amroutine->amkeytype = InvalidOid; amroutine->ambuild = dibuild; + amroutine->amreuse = NULL; amroutine->ambuildempty = dibuildempty; amroutine->aminsert = NULL; amroutine->aminsertextended = diinsert; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 3faaa17168e..68569becae1 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3138,6 +3138,7 @@ allocfunc amadjustmembers_function ambeginscan_function ambuild_function +amreuse_function ambuildempty_function ambuildphasename_function ambulkdelete_function From 372c376f22cb64508460bae91f55c08affc313ce Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Wed, 22 Oct 2025 18:20:44 +0200 Subject: [PATCH 068/102] Revert "TableAM: add relation_reindex" This reverts commit f8103eda4be299783e7c23552e7c82ff8488e800. --- src/backend/access/heap/heapam_handler.c | 7 ------ src/backend/catalog/index.c | 29 ++++++++++++--------- src/backend/commands/cluster.c | 14 +---------- src/backend/commands/indexcmds.c | 32 ++++-------------------- src/backend/commands/tablecmds.c | 5 +++- src/include/access/tableam.h | 11 -------- src/include/catalog/index.h | 2 +- 7 files changed, 28 insertions(+), 72 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index b343b1b29b9..50ca1db23d7 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -982,12 +982,6 @@ heapam_relation_nontransactional_truncate(Relation rel) RelationTruncate(rel, 0); } -static bool -heapam_relation_reindex(Relation rel, int flags, ReindexParams *params) -{ - return reindex_relation(rel, flags, params); -} - static void heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator) { @@ -2961,7 +2955,6 @@ static const TableAmRoutine heapam_methods = { .relation_set_new_filelocator = heapam_relation_set_new_filelocator, .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate, - .relation_reindex = heapam_relation_reindex, .relation_copy_data = heapam_relation_copy_data, .relation_copy_for_cluster = heapam_relation_copy_for_cluster, .relation_vacuum = heap_vacuum_rel, diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 579a533c496..0f3367f8ff8 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3891,8 +3891,9 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, * index rebuild. */ bool -reindex_relation(Relation rel, int flags, ReindexParams *params) +reindex_relation(Oid relid, int flags, ReindexParams *params) { + Relation rel; Oid toast_relid; List *indexIds; char persistence; @@ -3900,6 +3901,15 @@ reindex_relation(Relation rel, int flags, ReindexParams *params) ListCell *indexId; int i; + /* + * Open and lock the relation. ShareLock is sufficient since we only need + * to prevent schema and data changes in it. The lock level used here + * should match ReindexTable(). + */ + if ((params->options & REINDEXOPT_MISSING_OK) != 0) + rel = try_table_open(relid, ShareLock); + else + rel = table_open(relid, ShareLock); /* if relation is gone, leave */ if (!rel) @@ -3991,6 +4001,11 @@ reindex_relation(Relation rel, int flags, ReindexParams *params) i++; } + /* + * Close rel, but continue to hold the lock. + */ + table_close(rel, NoLock); + result = (indexIds != NIL); /* @@ -4006,20 +4021,10 @@ reindex_relation(Relation rel, int flags, ReindexParams *params) * This rule is enforced by setting tablespaceOid to InvalidOid. */ ReindexParams newparams = *params; - /* - * Open and lock the relation. ShareLock is sufficient since we only need - * to prevent schema and data changes in it. The lock level used here - * should match ReindexTable(). - */ - Relation toast_rel = table_open(toast_relid, ShareLock); newparams.options &= ~(REINDEXOPT_MISSING_OK); newparams.tablespaceOid = InvalidOid; - result |= reindex_relation(toast_rel, flags, &newparams); - /* - * Close rel, but continue to hold the lock. - */ - table_close(toast_rel, NoLock); + result |= reindex_relation(toast_relid, flags, &newparams); } return result; diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index eb06eb7ecc5..92c465c05b4 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -1462,7 +1462,6 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, int reindex_flags; ReindexParams reindex_params = {0}; int i; - Relation oldHeap; /* Report that we are now swapping relation files */ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, @@ -1518,19 +1517,8 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, /* Report that we are now reindexing relations */ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, PROGRESS_CLUSTER_PHASE_REBUILD_INDEX); - /* - * Open and lock the relation. ShareLock is sufficient since we only need - * to prevent schema and data changes in it. The lock level used here - * should match ReindexTable(). - */ - oldHeap = table_open(OIDOldHeap, ShareLock); - - reindex_relation(oldHeap, reindex_flags, &reindex_params); - /* - * Close rel, but continue to hold the lock. - */ - table_close(oldHeap, NoLock); + reindex_relation(OIDOldHeap, reindex_flags, &reindex_params); /* Report that we are now doing clean up */ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 17f439e8a34..b7c490bab4e 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -2943,23 +2943,12 @@ ReindexTable(RangeVar *relation, ReindexParams *params, bool isTopLevel) else { ReindexParams newparams = *params; - Relation rel; newparams.options |= REINDEXOPT_REPORT_PROGRESS; - if ((newparams.options & REINDEXOPT_MISSING_OK) != 0) - rel = try_table_open(heapOid, ShareLock); - else - rel = table_open(heapOid, ShareLock); - - result = table_relation_reindex(rel, - REINDEX_REL_PROCESS_TOAST | - REINDEX_REL_CHECK_CONSTRAINTS, - &newparams); - /* - * Close rel, but continue to hold the lock. - */ - table_close(rel, NoLock); - + result = reindex_relation(heapOid, + REINDEX_REL_PROCESS_TOAST | + REINDEX_REL_CHECK_CONSTRAINTS, + &newparams); if (!result) ereport(NOTICE, (errmsg("table \"%s\" has no indexes to reindex", @@ -3387,24 +3376,13 @@ ReindexMultipleInternal(List *relids, ReindexParams *params) { bool result; ReindexParams newparams = *params; - Relation rel; newparams.options |= REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK; - /* - * Open and lock the relation. ShareLock is sufficient since we only need - * to prevent schema and data changes in it. The lock level used here - * should match ReindexTable(). - */ - rel = try_table_open(relid, ShareLock); - result = table_relation_reindex(rel, + result = reindex_relation(relid, REINDEX_REL_PROCESS_TOAST | REINDEX_REL_CHECK_CONSTRAINTS, &newparams); - /* - * Close rel, but continue to hold the lock. - */ - table_close(rel, NoLock); if (result && (params->options & REINDEXOPT_VERBOSE) != 0) ereport(INFO, diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 4d93e4848e3..2630d4f9f47 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -2038,6 +2038,7 @@ ExecuteTruncateGuts(List *explicit_rels, } else { + Oid heap_relid; Oid toast_relid; ReindexParams reindex_params = {0}; @@ -2058,6 +2059,8 @@ ExecuteTruncateGuts(List *explicit_rels, */ RelationSetNewRelfilenumber(rel, rel->rd_rel->relpersistence); + heap_relid = RelationGetRelid(rel); + /* * The same for the toast table, if any. */ @@ -2075,7 +2078,7 @@ ExecuteTruncateGuts(List *explicit_rels, /* * Reconstruct the indexes to match, and we're done. */ - table_relation_reindex(rel, REINDEX_REL_PROCESS_TOAST, + reindex_relation(heap_relid, REINDEX_REL_PROCESS_TOAST, &reindex_params); } diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 6c0197178ee..63aa0501668 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -27,7 +27,6 @@ #include "utils/guc.h" #include "utils/rel.h" #include "utils/snapshot.h" -#include "catalog/index.h" #define DEFAULT_TABLE_ACCESS_METHOD "heap" @@ -645,9 +644,6 @@ typedef struct TableAmRoutine */ void (*relation_nontransactional_truncate) (Relation rel); - /* See reindex_relation for reference about parameters */ - bool (*relation_reindex) (Relation rel, int flags, ReindexParams *params); - /* * See table_relation_copy_data(). * @@ -1698,13 +1694,6 @@ table_relation_nontransactional_truncate(Relation rel) rel->rd_tableam->relation_nontransactional_truncate(rel); } -/* See reindex_relation for reference about parameters */ -static inline bool -table_relation_reindex(Relation rel, int flags, ReindexParams *params) -{ - return rel->rd_tableam->relation_reindex(rel, flags, params); -} - /* * Copy data from `rel` into the new relfilelocator `newrlocator`. The new * relfilelocator may not have storage associated before this function is diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 20ac4a4b465..3fa15391d83 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -158,7 +158,7 @@ extern void reindex_index(Oid indexId, bool skip_constraint_checks, #define REINDEX_REL_FORCE_INDEXES_UNLOGGED 0x08 #define REINDEX_REL_FORCE_INDEXES_PERMANENT 0x10 -extern bool reindex_relation(Relation rel, int flags, ReindexParams *params); +extern bool reindex_relation(Oid relid, int flags, ReindexParams *params); extern bool ReindexIsProcessingHeap(Oid heapOid); extern bool ReindexIsProcessingIndex(Oid indexOid); From 5705b3807b555f8d2a04004fd5f2cb3cb78f3282 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sat, 25 Oct 2025 19:50:06 +0300 Subject: [PATCH 069/102] Correctly extract ORIOLEDB_PATCHSET_VERSION from git * Use patchset number is there is an exact match to the tag * Use commit hash otherwise --- configure | 2 +- configure.ac | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 2eef37f3970..9e5d5b6af42 100755 --- a/configure +++ b/configure @@ -19559,7 +19559,7 @@ _ACEOF # Needed to check postgresql patches git tag during orioledb extension build -ORIOLEDB_PATCHSET_VERSION=`git describe --tags | cut -d'_' -f2` +ORIOLEDB_PATCHSET_VERSION=`tag=$(git describe --tags --exact-match 2>/dev/null) && echo "$tag" | cut -d'_' -f2 || git rev-parse HEAD` # If we are inserting PG_SYSROOT into CPPFLAGS, do so symbolically not diff --git a/configure.ac b/configure.ac index 973036e4915..de1a3d1b62d 100644 --- a/configure.ac +++ b/configure.ac @@ -2426,7 +2426,7 @@ AC_DEFINE_UNQUOTED(PG_VERSION_NUM, $PG_VERSION_NUM, [PostgreSQL version as a num AC_SUBST(PG_VERSION_NUM) # Needed to check postgresql patches git tag during orioledb extension build -[ORIOLEDB_PATCHSET_VERSION=`git describe --tags | cut -d'_' -f2`] +[ORIOLEDB_PATCHSET_VERSION=`tag=$(git describe --tags --exact-match 2>/dev/null) && echo "$tag" | cut -d'_' -f2 || git rev-parse HEAD`] AC_SUBST(ORIOLEDB_PATCHSET_VERSION) # If we are inserting PG_SYSROOT into CPPFLAGS, do so symbolically not From 15d81f0bdd0dbea26e2b07834c78014fe842c298 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sat, 25 Oct 2025 21:04:34 +0300 Subject: [PATCH 070/102] Allow skipping transactions for ReorderBufferAbortOld() Transactions need to be marked as RBTXN_DISTR_SKIP_CLEANUP to be skipped in ReorderBufferAbortOld(). --- src/backend/replication/logical/reorderbuffer.c | 3 ++- src/include/replication/reorderbuffer.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 08ef94728eb..e9ae70c1d18 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -2992,7 +2992,8 @@ ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid) txn = dlist_container(ReorderBufferTXN, node, it.cur); - if (TransactionIdPrecedes(txn->xid, oldestRunningXid)) + if (!(txn->txn_flags & RBTXN_DISTR_SKIP_CLEANUP) && + TransactionIdPrecedes(txn->xid, oldestRunningXid)) { elog(DEBUG2, "aborting old transaction %u", txn->xid); diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h index 1606e0ea007..610443ca824 100644 --- a/src/include/replication/reorderbuffer.h +++ b/src/include/replication/reorderbuffer.h @@ -187,6 +187,7 @@ typedef struct ReorderBufferChange #define RBTXN_SKIPPED_PREPARE 0x0080 #define RBTXN_HAS_STREAMABLE_CHANGE 0x0100 #define RBTXN_DISTR_INVAL_OVERFLOWED 0x0200 +#define RBTXN_DISTR_SKIP_CLEANUP 0x0400 /* Does the transaction have catalog changes? */ #define rbtxn_has_catalog_changes(txn) \ From 7f4adbbb6bdb1577956a1bd719a01a24c705e146 Mon Sep 17 00:00:00 2001 From: Egor Ivkov Date: Wed, 5 Nov 2025 14:44:04 +0300 Subject: [PATCH 071/102] Fix warning: resource was not closed: cache pg_am --- src/backend/commands/tablecmds.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 2630d4f9f47..c11dcb63d89 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -13985,6 +13985,7 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt) } accessMethodForm = (Form_pg_am) GETSTRUCT(tuple); amRoutine = GetIndexAmRoutineWithTableAM(heapRel->rd_rel->relam, accessMethodForm->amhandler); + ReleaseSysCache(tuple); table_close(heapRel, NoLock); if(amRoutine->amreuse) { From 007e9726fba04b8bbf6d10aef011b1d21e961253 Mon Sep 17 00:00:00 2001 From: Egor Ivkov Date: Wed, 5 Nov 2025 14:49:03 +0300 Subject: [PATCH 072/102] Fix formatting for amreuse call --- src/backend/commands/tablecmds.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index c11dcb63d89..42341e73547 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -13950,14 +13950,14 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt) Form_pg_am accessMethodForm; IndexAmRoutine *amRoutine; char *accessMethodName; - Oid heapRelId = IndexGetRelation(oldId, false); - Relation heapRel = table_open(heapRelId, ShareLock); - + Oid heapRelId = IndexGetRelation(oldId, false); + Relation heapRel = table_open(heapRelId, ShareLock); + stmt->oldNumber = irel->rd_locator.relNumber; stmt->oldCreateSubid = irel->rd_createSubid; stmt->oldFirstRelfilelocatorSubid = irel->rd_firstRelfilelocatorSubid; - - + + /* * look up the access method to call amreuse */ @@ -13966,8 +13966,9 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt) if (!HeapTupleIsValid(tuple)) { /* - * Hack to provide more-or-less-transparent updating of old RTREE - * indexes to GiST: if RTREE is requested and not found, use GIST. + * Hack to provide more-or-less-transparent updating of old + * RTREE indexes to GiST: if RTREE is requested and not found, + * use GIST. */ if (strcmp(accessMethodName, "rtree") == 0) { @@ -13987,9 +13988,10 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt) amRoutine = GetIndexAmRoutineWithTableAM(heapRel->rd_rel->relam, accessMethodForm->amhandler); ReleaseSysCache(tuple); table_close(heapRel, NoLock); - - if(amRoutine->amreuse) { - (*amRoutine->amreuse)(irel); + + if (amRoutine->amreuse) + { + (*amRoutine->amreuse) (irel); } } index_close(irel, NoLock); From 54897a990e5d2cd2596bf47ab8a0a103b26212e9 Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Tue, 4 Nov 2025 14:28:54 +0100 Subject: [PATCH 073/102] Add RecoveryStopsHook for extensions to handle custom WAL records --- src/backend/access/transam/xlogrecovery.c | 24 ++++++++++++++++++++++- src/include/access/xlogrecovery.h | 10 ++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index ebe644973f6..fecdc0da4f8 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -2567,8 +2567,28 @@ recoveryStopsBefore(XLogReaderState *record) return true; } + /* Check RecoveryStopsHook for custom records */ + if (RmgrIdIsCustom(XLogRecGetRmid(record)) && (RecoveryStopsBeforeHook != NULL)) + { + stopsHere = RecoveryStopsBeforeHook(record, &recordXid, &recordXtime); + + if (stopsHere) + { + recoveryStopAfter = false; + recoveryStopXid = recordXid; + recoveryStopLSN = InvalidXLogRecPtr; + recoveryStopTime = recordXtime; + recoveryStopName[0] = '\0'; + + ereport(LOG, + (errmsg("recovery stopping by hook before transaction %u, time %s", + recoveryStopXid, + timestamptz_to_str(recoveryStopTime)))); + return true; + } + } /* Otherwise we only consider stopping before COMMIT or ABORT records. */ - if (XLogRecGetRmid(record) != RM_XACT_ID) + else if (XLogRecGetRmid(record) != RM_XACT_ID) return false; xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK; @@ -4517,6 +4537,8 @@ GetXLogReplayRecPtr(TimeLineID *replayTLI) GetReplayXlogPtrHookType GetReplayXlogPtrHook = NULL; +RecoveryStopsBeforeHookType RecoveryStopsBeforeHook = NULL; + /* * Get effective latest redo apply position. * diff --git a/src/include/access/xlogrecovery.h b/src/include/access/xlogrecovery.h index cec67fb9c89..8ab6e99f86d 100644 --- a/src/include/access/xlogrecovery.h +++ b/src/include/access/xlogrecovery.h @@ -50,6 +50,10 @@ typedef enum RecoveryPauseState typedef XLogRecPtr (*GetReplayXlogPtrHookType) (void); +typedef bool (*RecoveryStopsBeforeHookType) (XLogReaderState *record, + TransactionId *recordXid, + TimestampTz *recordXtime); + /* User-settable GUC parameters */ extern PGDLLIMPORT bool recoveryTargetInclusive; extern PGDLLIMPORT int recoveryTargetAction; @@ -81,6 +85,12 @@ extern PGDLLIMPORT bool StandbyMode; /* Hook for extensions to tune replay xlog pointer */ extern PGDLLIMPORT GetReplayXlogPtrHookType GetReplayXlogPtrHook; +/* + * Hook for extensions to be able to decides to stop applying the WAL files + * based on custom WAL records. + */ +extern PGDLLIMPORT RecoveryStopsBeforeHookType RecoveryStopsBeforeHook; + extern Size XLogRecoveryShmemSize(void); extern void XLogRecoveryShmemInit(void); From 96bc5116f59c5d78db7df22766d85c1891091616 Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Tue, 16 Dec 2025 15:23:30 +0100 Subject: [PATCH 074/102] Add missing SHAREDINVALUSERCACHE_ID to standby_desc_invalidations --- src/backend/access/rmgrdesc/standbydesc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/access/rmgrdesc/standbydesc.c b/src/backend/access/rmgrdesc/standbydesc.c index f2bce9a37ad..3806fa42cc8 100644 --- a/src/backend/access/rmgrdesc/standbydesc.c +++ b/src/backend/access/rmgrdesc/standbydesc.c @@ -136,6 +136,8 @@ standby_desc_invalidations(StringInfo buf, appendStringInfo(buf, " relmap db %u", msg->rm.dbId); else if (msg->id == SHAREDINVALSNAPSHOT_ID) appendStringInfo(buf, " snapshot %u", msg->sn.relId); + else if (msg->id == SHAREDINVALUSERCACHE_ID) + appendStringInfo(buf, " usercache %u", msg->usr.arg2); else appendStringInfo(buf, " unrecognized id %d", msg->id); } From a76aec9725956de73b5afeaac762137aef0c2cdb Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Mon, 25 Aug 2025 21:03:04 +0200 Subject: [PATCH 075/102] Added support for bytea* tupleid for exclusion constraints --- src/backend/access/common/toast_internals.c | 2 +- src/backend/access/heap/heapam_handler.c | 17 ++- src/backend/access/index/indexam.c | 6 +- src/backend/catalog/index.c | 17 ++- src/backend/catalog/indexing.c | 2 +- src/backend/commands/constraint.c | 40 ++++-- src/backend/executor/execIndexing.c | 129 +++++++++++--------- src/include/access/genam.h | 2 +- src/include/executor/executor.h | 22 +++- 9 files changed, 160 insertions(+), 77 deletions(-) diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index f1e63599bf3..c95462c2755 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -339,7 +339,7 @@ toast_save_datum(Relation rel, Datum value, /* Only index relations marked as ready can be updated */ if (toastidxs[i]->rd_index->indisready) index_insert(toastidxs[i], t_values, t_isnull, - &(toasttup->t_self), + ItemPointerGetDatum(&(toasttup->t_self)), toastrel, toastidxs[i]->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 50ca1db23d7..4db5f5e0e9d 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -392,14 +392,25 @@ heapam_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo, { Relation rel = resultRelInfo->ri_RelationDesc; uint32 specToken; - ItemPointerData conflictTid; + ItemPointerData conflictTid; + Datum conflictTidDatum; bool specConflict; List *recheckIndexes = NIL; while (true) { specConflict = false; - if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate, &conflictTid, + + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) + { + Assert(false); + } + else + { + conflictTidDatum = ItemPointerGetDatum(&conflictTid); + } + + if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate, conflictTidDatum, arbiterIndexes)) { if (lockedSlot) @@ -2310,7 +2321,7 @@ heapam_index_validate_scan(Relation heapRelation, index_insert(indexRelation, values, isnull, - &rootTuple, + ItemPointerGetDatum(&rootTuple), heapRelation, indexInfo->ii_Unique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 94bdec63666..32203f20257 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -218,7 +218,7 @@ bool index_insert(Relation indexRelation, Datum *values, bool *isnull, - ItemPointer tupleid, + Datum tupleidDatum, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, @@ -239,7 +239,7 @@ index_insert(Relation indexRelation, { /* compatibility method for extension AM's not aware of aminsertextended */ return indexRelation->rd_indam->aminsert(indexRelation, values, isnull, - tupleid, heapRelation, + DatumGetItemPointer(tupleidDatum), heapRelation, checkUnique, indexUnchanged, indexInfo); } @@ -247,7 +247,7 @@ index_insert(Relation indexRelation, { /* index insert method for internal AM's and Orioledb that are aware of aminsertextended */ return indexRelation->rd_indam->aminsertextended(indexRelation, values, isnull, - ItemPointerGetDatum(tupleid), heapRelation, + tupleidDatum, heapRelation, checkUnique, indexUnchanged, indexInfo); } diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 0f3367f8ff8..f02e2fecf87 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3198,6 +3198,8 @@ IndexCheckExclusion(Relation heapRelation, while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) { + Datum tupleidDatum; + CHECK_FOR_INTERRUPTS(); /* @@ -3218,12 +3220,25 @@ IndexCheckExclusion(Relation heapRelation, values, isnull); + if (table_get_row_ref_type(heapRelation) == ROW_REF_ROWID) + { + bool isnull; + tupleidDatum = slot_getsysattr(slot, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + Assert(ItemPointerIsValid(&slot->tts_tid)); + tupleidDatum = PointerGetDatum(&slot->tts_tid); + } + /* * Check that this tuple has no conflicts. */ check_exclusion_constraint(heapRelation, indexRelation, indexInfo, - &(slot->tts_tid), values, isnull, + tupleidDatum, + values, isnull, estate, true); MemoryContextReset(econtext->ecxt_per_tuple_memory); diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index 522da0ac855..9846637537c 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -170,7 +170,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple, index_insert(index, /* index relation */ values, /* array of index Datums */ isnull, /* is-null flags */ - &(heapTuple->t_self), /* tid of heap tuple */ + ItemPointerGetDatum(&(heapTuple->t_self)), /* tid of heap tuple */ heapRelation, index->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index 982bae9ed42..618cdab5872 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -42,8 +42,8 @@ unique_key_recheck(PG_FUNCTION_ARGS) { TriggerData *trigdata = (TriggerData *) fcinfo->context; const char *funcname = "unique_key_recheck"; - ItemPointerData checktid; - ItemPointerData tmptid; + Datum checktidDatum; + Datum tmptidDatum; Relation indexRel; IndexInfo *indexInfo; EState *estate; @@ -74,16 +74,38 @@ unique_key_recheck(PG_FUNCTION_ARGS) * Get the new data that was inserted/updated. */ if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) - checktid = trigdata->tg_trigslot->tts_tid; + { + if (table_get_row_ref_type(trigdata->tg_relation) == ROW_REF_ROWID) + { + bool isnull; + checktidDatum = slot_getsysattr(trigdata->tg_trigslot, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + checktidDatum = ItemPointerGetDatum(&trigdata->tg_trigslot->tts_tid); + } + } else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) - checktid = trigdata->tg_newslot->tts_tid; + { + if (table_get_row_ref_type(trigdata->tg_relation) == ROW_REF_ROWID) + { + bool isnull; + checktidDatum = slot_getsysattr(trigdata->tg_trigslot, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + checktidDatum = ItemPointerGetDatum(&trigdata->tg_trigslot->tts_tid); + } + } else { ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), errmsg("function \"%s\" must be fired for INSERT or UPDATE", funcname))); - ItemPointerSetInvalid(&checktid); /* keep compiler quiet */ + checktidDatum = 0; } slot = table_slot_create(trigdata->tg_relation, NULL); @@ -106,12 +128,12 @@ unique_key_recheck(PG_FUNCTION_ARGS) * it's possible the index entry has also been marked dead, and even * removed. */ - tmptid = checktid; + tmptidDatum = checktidDatum; { IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation); bool call_again = false; - if (!table_index_fetch_tuple(scan, PointerGetDatum(&tmptid), SnapshotSelf, slot, + if (!table_index_fetch_tuple(scan, tmptidDatum, SnapshotSelf, slot, &call_again, NULL)) { /* @@ -173,7 +195,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) * the row is now dead, because that is the TID the index will know * about. */ - index_insert(indexRel, values, isnull, &checktid, + index_insert(indexRel, values, isnull, checktidDatum, trigdata->tg_relation, UNIQUE_CHECK_EXISTING, false, indexInfo); } @@ -186,7 +208,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) * the child is a conflict. */ check_exclusion_constraint(trigdata->tg_relation, indexRel, indexInfo, - &tmptid, values, isnull, + tmptidDatum, values, isnull, estate, false); } diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 4a544d2dc14..e2a10855ec9 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -116,23 +116,6 @@ #include "storage/lmgr.h" #include "utils/snapmgr.h" -/* waitMode argument to check_exclusion_or_unique_constraint() */ -typedef enum -{ - CEOUC_WAIT, - CEOUC_NOWAIT, - CEOUC_LIVELOCK_PREVENTING_WAIT -} CEOUC_WAIT_MODE; - -static bool check_exclusion_or_unique_constraint(Relation heap, Relation index, - IndexInfo *indexInfo, - ItemPointer tupleid, - Datum *values, bool *isnull, - EState *estate, bool newIndex, - CEOUC_WAIT_MODE waitMode, - bool violationOK, - ItemPointer conflictTid); - static bool index_recheck_constraint(Relation index, Oid *constr_procs, Datum *existing_values, bool *existing_isnull, Datum *new_values); @@ -308,19 +291,18 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, ExprContext *econtext; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; - ItemPointer tupleid; - + Datum tupleidDatum; if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) { bool isnull; - tupleid = DatumGetItemPointer(slot_getsysattr(slot, RowIdAttributeNumber, &isnull)); + tupleidDatum = slot_getsysattr(slot, RowIdAttributeNumber, &isnull); Assert(!isnull); } else { Assert(ItemPointerIsValid(&slot->tts_tid)); - tupleid = &slot->tts_tid; + tupleidDatum = ItemPointerGetDatum(&slot->tts_tid); } /* @@ -444,7 +426,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, index_insert(indexRelation, /* index relation */ values, /* array of index Datums */ isnull, /* null flags */ - tupleid, /* tid of heap tuple */ + tupleidDatum, /* tid of heap tuple */ heapRelation, /* heap relation */ checkUnique, /* type of uniqueness check to do */ indexUnchanged, /* UPDATE without logical change? */ @@ -488,7 +470,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, satisfiesConstraint = check_exclusion_or_unique_constraint(heapRelation, indexRelation, indexInfo, - tupleid, values, isnull, + tupleidDatum, values, isnull, estate, false, waitMode, violationOK, NULL); } @@ -531,18 +513,18 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, ExprContext *econtext; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; - ItemPointer tupleid; + Datum tupleidDatum; if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) { bool isnull; - tupleid = DatumGetItemPointer(slot_getsysattr(slot, RowIdAttributeNumber, &isnull)); + tupleidDatum = slot_getsysattr(slot, RowIdAttributeNumber, &isnull); Assert(!isnull); } else { Assert(ItemPointerIsValid(&slot->tts_tid)); - tupleid = &slot->tts_tid; + tupleidDatum = ItemPointerGetDatum(&slot->tts_tid); } /* @@ -660,19 +642,19 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, { Datum valuesOld[INDEX_MAX_KEYS]; bool isnullOld[INDEX_MAX_KEYS]; - Datum oldTupleid; + Datum oldTupleidDatum; bool old_valid = true; if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) { bool isnull; - oldTupleid = slot_getsysattr(oldSlot, RowIdAttributeNumber, &isnull); + oldTupleidDatum = slot_getsysattr(oldSlot, RowIdAttributeNumber, &isnull); Assert(!isnull); } else { Assert(ItemPointerIsValid(&oldSlot->tts_tid)); - oldTupleid = PointerGetDatum(&oldSlot->tts_tid); + oldTupleidDatum = ItemPointerGetDatum(&oldSlot->tts_tid); } econtext = GetPerTupleExprContext(estate); @@ -711,10 +693,10 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, old_valid, values, /* array of index Datums */ isnull, /* null flags */ - ItemPointerGetDatum(tupleid), /* tid of heap tuple */ + tupleidDatum, /* tid of heap tuple */ valuesOld, isnullOld, - oldTupleid, + oldTupleidDatum, heapRelation, /* heap relation */ checkUnique, /* type of uniqueness check to do */ indexInfo); /* index AM may need this */ @@ -737,7 +719,7 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, index_insert(indexRelation, /* index relation */ values, /* array of index Datums */ isnull, /* null flags */ - tupleid, /* tid of heap tuple */ + tupleidDatum, /* tid of heap tuple */ heapRelation, /* heap relation */ checkUnique, /* type of uniqueness check to do */ indexUnchanged, /* UPDATE without logical change? */ @@ -782,7 +764,7 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, satisfiesConstraint = check_exclusion_or_unique_constraint(heapRelation, indexRelation, indexInfo, - tupleid, values, isnull, + tupleidDatum, values, isnull, estate, false, waitMode, violationOK, NULL); } @@ -931,7 +913,7 @@ ExecDeleteIndexTuples(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, */ bool ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, - EState *estate, ItemPointer conflictTid, + EState *estate, Datum conflictTidDatum, List *arbiterIndexes) { int i; @@ -943,10 +925,19 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; ItemPointerData invalidItemPtr; + Datum invalidItemPtrDatum; bool checkedIndex = false; - ItemPointerSetInvalid(conflictTid); - ItemPointerSetInvalid(&invalidItemPtr); + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) + { + Assert(false); + } + else + { + ItemPointerSetInvalid(DatumGetItemPointer(conflictTidDatum)); + ItemPointerSetInvalid(&invalidItemPtr); + invalidItemPtrDatum = ItemPointerGetDatum(&invalidItemPtr); + } /* * Get information from the result relation info structure. @@ -1033,12 +1024,19 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, values, isnull); - satisfiesConstraint = - check_exclusion_or_unique_constraint(heapRelation, indexRelation, - indexInfo, &invalidItemPtr, - values, isnull, estate, false, - CEOUC_WAIT, true, - conflictTid); + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) + { + Assert(false); + } + else + { + satisfiesConstraint = + check_exclusion_or_unique_constraint(heapRelation, indexRelation, + indexInfo, invalidItemPtrDatum, + values, isnull, estate, false, + CEOUC_WAIT, true, + &conflictTidDatum); + } if (!satisfiesConstraint) return false; } @@ -1091,15 +1089,15 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, * constraint, when doing speculative insertion. Caller may use the returned * conflict TID to take further steps. */ -static bool +bool check_exclusion_or_unique_constraint(Relation heap, Relation index, IndexInfo *indexInfo, - ItemPointer tupleid, + Datum tupleidDatum, Datum *values, bool *isnull, EState *estate, bool newIndex, CEOUC_WAIT_MODE waitMode, bool violationOK, - ItemPointer conflictTid) + Datum *conflictTidDatum) { Oid *constr_procs; uint16 *constr_strats; @@ -1195,14 +1193,21 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, /* * Ignore the entry for the tuple we're trying to check. */ - if (ItemPointerIsValid(tupleid) && - ItemPointerEquals(tupleid, &existing_slot->tts_tid)) + if (table_get_row_ref_type(heap) != ROW_REF_ROWID) { - if (found_self) /* should not happen */ - elog(ERROR, "found self tuple multiple times in index \"%s\"", - RelationGetRelationName(index)); - found_self = true; - continue; + ItemPointer tupleid = DatumGetItemPointer(tupleidDatum); + + if (ItemPointerIsValid(tupleid) && + ItemPointerEquals(tupleid, &existing_slot->tts_tid)) + { + if (found_self) /* should not happen */ + elog(ERROR, "found self tuple multiple times in index \"%s\"", + RelationGetRelationName(index)); + found_self = true; + continue; + } + } else { + /* TODO: Add same thing for rowid if possible */ } /* @@ -1262,8 +1267,20 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, if (violationOK) { conflict = true; - if (conflictTid) - *conflictTid = existing_slot->tts_tid; + if (conflictTidDatum) + { + if (table_get_row_ref_type(heap) == ROW_REF_ROWID) + { + bool isnull; + *conflictTidDatum = slot_getsysattr(existing_slot, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + ItemPointer conflictTid = DatumGetItemPointer(*conflictTidDatum); + *conflictTid = existing_slot->tts_tid; + } + } break; } @@ -1320,11 +1337,11 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, void check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo, - ItemPointer tupleid, + Datum tupleidDatum, Datum *values, bool *isnull, EState *estate, bool newIndex) { - (void) check_exclusion_or_unique_constraint(heap, index, indexInfo, tupleid, + (void) check_exclusion_or_unique_constraint(heap, index, indexInfo, tupleidDatum, values, isnull, estate, newIndex, CEOUC_WAIT, false, NULL); diff --git a/src/include/access/genam.h b/src/include/access/genam.h index bb17831720b..e682b0b2576 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -144,7 +144,7 @@ extern void index_close(Relation relation, LOCKMODE lockmode); extern bool index_insert(Relation indexRelation, Datum *values, bool *isnull, - ItemPointer tupleid, + Datum tupleidDatum, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 0ca3a91fc66..99e0cfffe76 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -649,14 +649,32 @@ extern void ExecDeleteIndexTuples(ResultRelInfo *resultRelInfo, EState *estate); extern bool ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, - EState *estate, ItemPointer conflictTid, + EState *estate, Datum conflictTidDatum, List *arbiterIndexes); extern void check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo, - ItemPointer tupleid, + Datum tupleidDatum, Datum *values, bool *isnull, EState *estate, bool newIndex); +/* waitMode argument to check_exclusion_or_unique_constraint() */ +typedef enum +{ + CEOUC_WAIT, + CEOUC_NOWAIT, + CEOUC_LIVELOCK_PREVENTING_WAIT +} CEOUC_WAIT_MODE; + +extern bool +check_exclusion_or_unique_constraint(Relation heap, Relation index, + IndexInfo *indexInfo, + Datum tupleidDatum, + Datum *values, bool *isnull, + EState *estate, bool newIndex, + CEOUC_WAIT_MODE waitMode, + bool violationOK, + Datum *conflictTidDatum); + /* * prototypes from functions in execReplication.c */ From ee3b844df7efa4b59e0e1ae11d045fdd11bd0d1f Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Tue, 26 Aug 2025 19:14:44 +0200 Subject: [PATCH 076/102] Add row_ref_equals hook for check_exclusion_or_unique_constraint to work with rowid --- src/backend/access/heap/heapam_handler.c | 9 +++++++++ src/backend/executor/execIndexing.c | 21 ++++++++++++++++++++- src/include/access/tableam.h | 14 ++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 4db5f5e0e9d..2ddb8ec7d44 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -82,6 +82,14 @@ heapam_get_row_ref_type(Relation rel) return ROW_REF_TID; } +static bool +heapam_row_ref_equals(Relation rel, Datum tupleidDatum1, Datum tupleidDatum2) +{ + ItemPointer tupleid1 = DatumGetItemPointer(tupleidDatum1); + ItemPointer tupleid2 = DatumGetItemPointer(tupleidDatum2); + return ItemPointerEquals(tupleid1, tupleid2); +} + static void heapam_free_rd_amcache(Relation rel) { @@ -2932,6 +2940,7 @@ static const TableAmRoutine heapam_methods = { .slot_callbacks = heapam_slot_callbacks, .get_row_ref_type = heapam_get_row_ref_type, + .row_ref_equals = heapam_row_ref_equals, .free_rd_amcache = heapam_free_rd_amcache, .scan_begin = heap_beginscan, diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index e2a10855ec9..a25bb035d2e 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -1207,7 +1207,26 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, continue; } } else { - /* TODO: Add same thing for rowid if possible */ + Assert(tupleidDatum > 0); + Pointer rowid = DatumGetPointer(tupleidDatum); + + if (PointerIsValid(rowid)) + { + bool isnull; + Datum existing_rowid; + + existing_rowid = slot_getsysattr(existing_slot, RowIdAttributeNumber, &isnull); + Assert(!isnull); + + if (table_row_ref_equals(heap, tupleidDatum, existing_rowid)) + { + if (found_self) /* should not happen */ + elog(ERROR, "found self tuple multiple times in index \"%s\"", + RelationGetRelationName(index)); + found_self = true; + continue; + } + } } /* diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 63aa0501668..fd0449ec140 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -332,6 +332,7 @@ typedef struct TableAmRoutine const TupleTableSlotOps *(*slot_callbacks) (Relation rel); RowRefType (*get_row_ref_type) (Relation rel); + bool (*row_ref_equals)(Relation rel, Datum tupleidDatum1, Datum tupleidDatum2); void (*free_rd_amcache) (Relation rel); @@ -2173,6 +2174,19 @@ table_get_row_ref_type(Relation rel) return ROW_REF_TID; } +static inline bool +table_row_ref_equals(Relation rel, Datum tupleidDatum1, Datum tupleidDatum2) +{ + if (rel->rd_tableam) + return rel->rd_tableam->row_ref_equals(rel, tupleidDatum1, tupleidDatum2); + else + { + ItemPointer tupleid1 = DatumGetItemPointer(tupleidDatum1); + ItemPointer tupleid2 = DatumGetItemPointer(tupleidDatum2); + return ItemPointerEquals(tupleid1, tupleid2); + } +} + static inline void table_free_rd_amcache(Relation rel) { From 45b14fe9cd03bd2a72682037e38fbe9fac6130d0 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Wed, 5 Nov 2025 02:36:16 +0100 Subject: [PATCH 077/102] Fix for after update trigger and unique defferrable constraints --- src/backend/commands/constraint.c | 4 +- src/include/pg_config_manual.h.bak | 385 +++++++++++++++++++++++++++++ 2 files changed, 387 insertions(+), 2 deletions(-) create mode 100644 src/include/pg_config_manual.h.bak diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index 618cdab5872..491a66f47a1 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -91,12 +91,12 @@ unique_key_recheck(PG_FUNCTION_ARGS) if (table_get_row_ref_type(trigdata->tg_relation) == ROW_REF_ROWID) { bool isnull; - checktidDatum = slot_getsysattr(trigdata->tg_trigslot, RowIdAttributeNumber, &isnull); + checktidDatum = slot_getsysattr(trigdata->tg_newslot, RowIdAttributeNumber, &isnull); Assert(!isnull); } else { - checktidDatum = ItemPointerGetDatum(&trigdata->tg_trigslot->tts_tid); + checktidDatum = ItemPointerGetDatum(&trigdata->tg_newslot->tts_tid); } } else diff --git a/src/include/pg_config_manual.h.bak b/src/include/pg_config_manual.h.bak new file mode 100644 index 00000000000..f941ee2faf8 --- /dev/null +++ b/src/include/pg_config_manual.h.bak @@ -0,0 +1,385 @@ +/*------------------------------------------------------------------------ + * PostgreSQL manual configuration settings + * + * This file contains various configuration symbols and limits. In + * all cases, changing them is only useful in very rare situations or + * for developers. If you edit any of these, be sure to do a *full* + * rebuild (and an initdb if noted). + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/pg_config_manual.h + *------------------------------------------------------------------------ + */ + +/* + * This is the default value for wal_segment_size to be used when initdb is run + * without the --wal-segsize option. It must be a valid segment size. + */ +#define DEFAULT_XLOG_SEG_SIZE (16*1024*1024) + +/* + * Maximum length for identifiers (e.g. table names, column names, + * function names). Names actually are limited to one fewer byte than this, + * because the length must include a trailing zero byte. + * + * Changing this requires an initdb. + */ +#define NAMEDATALEN 64 + +/* + * Maximum number of arguments to a function. + * + * The minimum value is 8 (GIN indexes use 8-argument support functions). + * The maximum possible value is around 600 (limited by index tuple size in + * pg_proc's index; BLCKSZ larger than 8K would allow more). Values larger + * than needed will waste memory and processing time, but do not directly + * cost disk space. + * + * Changing this does not require an initdb, but it does require a full + * backend recompile (including any user-defined C functions). + */ +#define FUNC_MAX_ARGS 100 + +/* + * When creating a product derived from PostgreSQL with changes that cause + * incompatibilities for loadable modules, it is recommended to change this + * string so that dfmgr.c can refuse to load incompatible modules with a clean + * error message. Typical examples that cause incompatibilities are any + * changes to node tags or node structures. (Note that dfmgr.c already + * detects common sources of incompatibilities due to major version + * differences and due to some changed compile-time constants. This setting + * is for catching anything that cannot be detected in a straightforward way.) + * + * There is no prescribed format for the string. The suggestion is to include + * product or company name, and optionally any internally-relevant ABI + * version. Example: "ACME Postgres/1.2". Note that the string will appear + * in a user-facing error message if an ABI mismatch is detected. + */ +#define FMGR_ABI_EXTRA "PostgreSQL" + +/* + * Maximum number of columns in an index. There is little point in making + * this anything but a multiple of 32, because the main cost is associated + * with index tuple header size (see access/itup.h). + * + * Changing this requires an initdb. + */ +#define INDEX_MAX_KEYS 32 + +/* + * Maximum number of columns in a partition key + */ +#define PARTITION_MAX_KEYS 32 + +/* + * Decide whether built-in 8-byte types, including float8, int8, and + * timestamp, are passed by value. This is on by default if sizeof(Datum) >= + * 8 (that is, on 64-bit platforms). If sizeof(Datum) < 8 (32-bit platforms), + * this must be off. We keep this here as an option so that it is easy to + * test the pass-by-reference code paths on 64-bit platforms. + * + * Changing this requires an initdb. + */ +#if SIZEOF_VOID_P >= 8 +#define USE_FLOAT8_BYVAL 1 +#endif + +/* + * When we don't have native spinlocks, we use semaphores to simulate them. + * Decreasing this value reduces consumption of OS resources; increasing it + * may improve performance, but supplying a real spinlock implementation is + * probably far better. + */ +#define NUM_SPINLOCK_SEMAPHORES 128 + +/* + * When we have neither spinlocks nor atomic operations support we're + * implementing atomic operations on top of spinlock on top of semaphores. To + * be safe against atomic operations while holding a spinlock separate + * semaphores have to be used. + */ +#define NUM_ATOMICS_SEMAPHORES 64 + +/* + * MAXPGPATH: standard size of a pathname buffer in PostgreSQL (hence, + * maximum usable pathname length is one less). + * + * We'd use a standard system header symbol for this, if there weren't + * so many to choose from: MAXPATHLEN, MAX_PATH, PATH_MAX are all + * defined by different "standards", and often have different values + * on the same platform! So we just punt and use a reasonably + * generous setting here. + */ +#define MAXPGPATH 1024 + +/* + * You can try changing this if you have a machine with bytes of + * another size, but no guarantee... + */ +#define BITS_PER_BYTE 8 + +/* + * Preferred alignment for disk I/O buffers. On some CPUs, copies between + * user space and kernel space are significantly faster if the user buffer + * is aligned on a larger-than-MAXALIGN boundary. Ideally this should be + * a platform-dependent value, but for now we just hard-wire it. + */ +#define ALIGNOF_BUFFER 32 + +/* + * If EXEC_BACKEND is defined, the postmaster uses an alternative method for + * starting subprocesses: Instead of simply using fork(), as is standard on + * Unix platforms, it uses fork()+exec() or something equivalent on Windows, + * as well as lots of extra code to bring the required global state to those + * new processes. This must be enabled on Windows (because there is no + * fork()). On other platforms, it's only useful for verifying those + * otherwise Windows-specific code paths. + */ +#if defined(WIN32) && !defined(__CYGWIN__) +#define EXEC_BACKEND +#endif + +/* + * USE_POSIX_FADVISE controls whether Postgres will attempt to use the + * posix_fadvise() kernel call. Usually the automatic configure tests are + * sufficient, but some older Linux distributions had broken versions of + * posix_fadvise(). If necessary you can remove the #define here. + */ +#if HAVE_DECL_POSIX_FADVISE && defined(HAVE_POSIX_FADVISE) +#define USE_POSIX_FADVISE +#endif + +/* + * USE_PREFETCH code should be compiled only if we have a way to implement + * prefetching. (This is decoupled from USE_POSIX_FADVISE because there + * might in future be support for alternative low-level prefetch APIs. + * If you change this, you probably need to adjust the error message in + * check_effective_io_concurrency.) + */ +#ifdef USE_POSIX_FADVISE +#define USE_PREFETCH +#endif + +/* + * Default and maximum values for backend_flush_after, bgwriter_flush_after + * and checkpoint_flush_after; measured in blocks. Currently, these are + * enabled by default if sync_file_range() exists, ie, only on Linux. Perhaps + * we could also enable by default if we have mmap and msync(MS_ASYNC)? + */ +#ifdef HAVE_SYNC_FILE_RANGE +#define DEFAULT_BACKEND_FLUSH_AFTER 0 /* never enabled by default */ +#define DEFAULT_BGWRITER_FLUSH_AFTER 64 +#define DEFAULT_CHECKPOINT_FLUSH_AFTER 32 +#else +#define DEFAULT_BACKEND_FLUSH_AFTER 0 +#define DEFAULT_BGWRITER_FLUSH_AFTER 0 +#define DEFAULT_CHECKPOINT_FLUSH_AFTER 0 +#endif +/* upper limit for all three variables */ +#define WRITEBACK_MAX_PENDING_FLUSHES 256 + +/* + * USE_SSL code should be compiled only when compiling with an SSL + * implementation. + */ +#ifdef USE_OPENSSL +#define USE_SSL +#endif + +/* + * This is the default directory in which AF_UNIX socket files are + * placed. Caution: changing this risks breaking your existing client + * applications, which are likely to continue to look in the old + * directory. But if you just hate the idea of sockets in /tmp, + * here's where to twiddle it. You can also override this at runtime + * with the postmaster's -k switch. + * + * If set to an empty string, then AF_UNIX sockets are not used by default: A + * server will not create an AF_UNIX socket unless the run-time configuration + * is changed, a client will connect via TCP/IP by default and will only use + * an AF_UNIX socket if one is explicitly specified. + * + * This is done by default on Windows because there is no good standard + * location for AF_UNIX sockets and many installations on Windows don't + * support them yet. + */ +#ifndef WIN32 +#define DEFAULT_PGSOCKET_DIR "/tmp" +#else +#define DEFAULT_PGSOCKET_DIR "" +#endif + +/* + * This is the default event source for Windows event log. + */ +#define DEFAULT_EVENT_SOURCE "PostgreSQL" + +/* + * Assumed cache line size. This doesn't affect correctness, but can be used + * for low-level optimizations. This is mostly used to pad various data + * structures, to ensure that highly-contended fields are on different cache + * lines. Too small a value can hurt performance due to false sharing, while + * the only downside of too large a value is a few bytes of wasted memory. + * The default is 128, which should be large enough for all supported + * platforms. + */ +#define PG_CACHE_LINE_SIZE 128 + +/* + * Assumed alignment requirement for direct I/O. 4K corresponds to common + * sector and memory page size. + */ +#define PG_IO_ALIGN_SIZE 4096 + +/* + *------------------------------------------------------------------------ + * The following symbols are for enabling debugging code, not for + * controlling user-visible features or resource limits. + *------------------------------------------------------------------------ + */ + +/* + * Force use of the non-recursive JSON parser in all cases. This is useful + * to validate the working of the parser, and the regression tests should + * pass except for some different error messages about the stack limit. + */ +/* #define FORCE_JSON_PSTACK */ + +/* + * Include Valgrind "client requests", mostly in the memory allocator, so + * Valgrind understands PostgreSQL memory contexts. This permits detecting + * memory errors that Valgrind would not detect on a vanilla build. It also + * enables detection of buffer accesses that take place without holding a + * buffer pin (or without holding a buffer lock in the case of index access + * methods that superimpose their own custom client requests on top of the + * generic bufmgr.c requests). + * + * "make installcheck" is significantly slower under Valgrind. The client + * requests fall in hot code paths, so USE_VALGRIND slows execution by a few + * percentage points even when not run under Valgrind. + * + * Do not try to test the server under Valgrind without having built the + * server with USE_VALGRIND; else you will get false positives from sinval + * messaging (see comments in AddCatcacheInvalidationMessage). It's also + * important to use the suppression file src/tools/valgrind.supp to + * exclude other known false positives. + * + * You should normally use MEMORY_CONTEXT_CHECKING with USE_VALGRIND; + * instrumentation of repalloc() is inferior without it. + */ +/* #define USE_VALGRIND */ + +/* + * Define this to cause pfree()'d memory to be cleared immediately, to + * facilitate catching bugs that refer to already-freed values. + * Right now, this gets defined automatically if --enable-cassert. + */ +#ifdef USE_ASSERT_CHECKING +#define CLOBBER_FREED_MEMORY +#endif + +/* + * Define this to check memory allocation errors (scribbling on more + * bytes than were allocated). Right now, this gets defined + * automatically if --enable-cassert or USE_VALGRIND. + */ +#if defined(USE_ASSERT_CHECKING) || defined(USE_VALGRIND) +#define MEMORY_CONTEXT_CHECKING +#endif + +/* + * Define this to cause palloc()'d memory to be filled with random data, to + * facilitate catching code that depends on the contents of uninitialized + * memory. Caution: this is horrendously expensive. + */ +/* #define RANDOMIZE_ALLOCATED_MEMORY */ + +/* + * For cache-invalidation debugging, define DISCARD_CACHES_ENABLED to enable + * use of the debug_discard_caches GUC to aggressively flush syscache/relcache + * entries whenever it's possible to deliver invalidations. See + * AcceptInvalidationMessages() in src/backend/utils/cache/inval.c for + * details. + * + * USE_ASSERT_CHECKING builds default to enabling this. It's possible to use + * DISCARD_CACHES_ENABLED without a cassert build and the implied + * CLOBBER_FREED_MEMORY and MEMORY_CONTEXT_CHECKING options, but it's unlikely + * to be as effective at identifying problems. + */ +/* #define DISCARD_CACHES_ENABLED */ + +#if defined(USE_ASSERT_CHECKING) && !defined(DISCARD_CACHES_ENABLED) +#define DISCARD_CACHES_ENABLED +#endif + +/* + * Backwards compatibility for the older compile-time-only clobber-cache + * macros. + */ +#if !defined(DISCARD_CACHES_ENABLED) && (defined(CLOBBER_CACHE_ALWAYS) || defined(CLOBBER_CACHE_RECURSIVELY)) +#define DISCARD_CACHES_ENABLED +#endif + +/* + * Recover memory used for relcache entries when invalidated. See + * RelationBuildDesc() in src/backend/utils/cache/relcache.c. + * + * This is active automatically for clobber-cache builds when clobbering is + * active, but can be overridden here by explicitly defining + * RECOVER_RELATION_BUILD_MEMORY. Define to 1 to always free relation cache + * memory even when clobber is off, or to 0 to never free relation cache + * memory even when clobbering is on. + */ + /* #define RECOVER_RELATION_BUILD_MEMORY 0 */ /* Force disable */ + /* #define RECOVER_RELATION_BUILD_MEMORY 1 */ /* Force enable */ + +/* + * Define this to force all parse and plan trees to be passed through + * copyObject(), to facilitate catching errors and omissions in + * copyObject(). + */ +/* #define COPY_PARSE_PLAN_TREES */ + +/* + * Define this to force Bitmapset reallocation on each modification. Helps + * to find dangling pointers to Bitmapset's. + */ +/* #define REALLOCATE_BITMAPSETS */ + +/* + * Define this to force all parse and plan trees to be passed through + * outfuncs.c/readfuncs.c, to facilitate catching errors and omissions in + * those modules. + */ +/* #define WRITE_READ_PARSE_PLAN_TREES */ + +/* + * Define this to force all raw parse trees for DML statements to be scanned + * by raw_expression_tree_walker(), to facilitate catching errors and + * omissions in that function. + */ +/* #define RAW_EXPRESSION_COVERAGE_TEST */ + +/* + * Enable debugging print statements for lock-related operations. + */ +/* #define LOCK_DEBUG */ + +/* + * Enable debugging print statements for WAL-related operations; see + * also the wal_debug GUC var. + */ +/* #define WAL_DEBUG */ + +/* + * Enable tracing of resource consumption during sort operations; + * see also the trace_sort GUC var. For 8.1 this is enabled by default. + */ +#define TRACE_SORT 1 + +/* + * Enable tracing of syncscan operations (see also the trace_syncscan GUC var). + */ +/* #define TRACE_SYNCSCAN */ From 424ea5f2f6cb7b6111eb9814a6e0a60f7c059964 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Tue, 25 Nov 2025 03:59:16 +0100 Subject: [PATCH 078/102] Pass indexUnchanged to index_update method --- src/backend/access/index/indexam.c | 2 ++ src/backend/executor/execIndexing.c | 28 +++++++++++++++------------- src/include/access/amapi.h | 1 + src/include/access/genam.h | 1 + 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 32203f20257..9246bb6252d 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -269,6 +269,7 @@ index_update(Relation indexRelation, Datum oldTupleid, Relation heapRelation, IndexUniqueCheck checkUnique, + bool indexUnchanged, IndexInfo *indexInfo) { RELATION_CHECKS; @@ -285,6 +286,7 @@ index_update(Relation indexRelation, valuesOld, isnullOld, oldTupleid, heapRelation, checkUnique, + indexUnchanged, indexInfo); } diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index a25bb035d2e..a9a97cf95b6 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -547,6 +547,7 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, IndexInfo *indexInfo; bool applyNoDupErr; IndexUniqueCheck checkUnique; + bool indexUnchanged; bool satisfiesConstraint; bool new_valid = true; @@ -638,6 +639,16 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, else checkUnique = UNIQUE_CHECK_PARTIAL; + /* + * There's definitely going to be an index_insert() call for this + * index. If we're being called as part of an UPDATE statement, + * consider if the 'indexUnchanged' = true hint should be passed. + */ + indexUnchanged = index_unchanged_by_update(resultRelInfo, + estate, + indexInfo, + indexRelation); + if (indexRelation->rd_indam->ammvccaware) { Datum valuesOld[INDEX_MAX_KEYS]; @@ -699,22 +710,12 @@ ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo, oldTupleidDatum, heapRelation, /* heap relation */ checkUnique, /* type of uniqueness check to do */ + indexUnchanged, /* UPDATE without logical change? */ indexInfo); /* index AM may need this */ } else { - bool indexUnchanged; - /* - * There's definitely going to be an index_insert() call for this - * index. If we're being called as part of an UPDATE statement, - * consider if the 'indexUnchanged' = true hint should be passed. - */ - indexUnchanged = index_unchanged_by_update(resultRelInfo, - estate, - indexInfo, - indexRelation); - satisfiesConstraint = index_insert(indexRelation, /* index relation */ values, /* array of index Datums */ @@ -1207,9 +1208,10 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, continue; } } else { - Assert(tupleidDatum > 0); - Pointer rowid = DatumGetPointer(tupleidDatum); + Pointer rowid; + Assert(tupleidDatum > 0); + rowid = DatumGetPointer(tupleidDatum); if (PointerIsValid(rowid)) { bool isnull; diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 55b120575f9..6667ab79a86 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -142,6 +142,7 @@ typedef bool (*amupdate_function) (Relation indexRelation, Datum oldTupleid, Relation heapRelation, IndexUniqueCheck checkUnique, + bool indexUnchanged, struct IndexInfo *indexInfo); /* delete this tuple */ typedef bool (*amdelete_function) (Relation indexRelation, diff --git a/src/include/access/genam.h b/src/include/access/genam.h index e682b0b2576..92ef552e241 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -160,6 +160,7 @@ extern bool index_update(Relation indexRelation, Datum oldTupleid, Relation heapRelation, IndexUniqueCheck checkUnique, + bool indexUnchanged, struct IndexInfo *indexInfo); extern bool index_delete(Relation indexRelation, Datum *values, bool *isnull, Datum tupleid, Relation heapRelation, From 6b94bcfee9ae88aaa5afdd9c68eb910419fe8d8b Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Thu, 4 Dec 2025 04:47:37 +0100 Subject: [PATCH 079/102] pass indexRel to table_index_fetch_begin to detect index bridging for orioledb --- src/backend/access/heap/heapam_handler.c | 2 +- src/backend/access/index/indexam.c | 4 ++-- src/backend/access/nbtree/nbtinsert.c | 4 ++-- src/backend/access/table/tableam.c | 3 ++- src/backend/commands/constraint.c | 5 ++++- src/backend/executor/execIndexing.c | 1 + src/include/access/tableam.h | 7 ++++--- 7 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 2ddb8ec7d44..be2ce941143 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -105,7 +105,7 @@ heapam_free_rd_amcache(Relation rel) */ static IndexFetchTableData * -heapam_index_fetch_begin(Relation rel) +heapam_index_fetch_begin(Relation rel, Relation index) { IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData)); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 9246bb6252d..d766b61c1b9 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -340,7 +340,7 @@ index_beginscan(Relation heapRelation, scan->xs_snapshot = snapshot; /* prepare to fetch index matches from table */ - scan->xs_heapfetch = table_index_fetch_begin(heapRelation); + scan->xs_heapfetch = table_index_fetch_begin(heapRelation, indexRelation); return scan; } @@ -628,7 +628,7 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys, scan->xs_snapshot = snapshot; /* prepare to fetch index matches from table */ - scan->xs_heapfetch = table_index_fetch_begin(heaprel); + scan->xs_heapfetch = table_index_fetch_begin(heaprel, indexrel); return scan; } diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index d33f814a938..a203f46936a 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -557,7 +557,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, * with optimizations like heap's HOT, we have just a single * index entry for the entire chain. */ - else if (table_index_fetch_tuple_check(heapRel, &htid, + else if (table_index_fetch_tuple_check(heapRel, rel, &htid, &SnapshotDirty, &all_dead)) { @@ -615,7 +615,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, * entry. */ htid = itup->t_tid; - if (table_index_fetch_tuple_check(heapRel, &htid, + if (table_index_fetch_tuple_check(heapRel, rel, &htid, SnapshotSelf, NULL)) { /* Normal case --- it's still live */ diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index afe35f79668..338f2222951 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -216,6 +216,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan) */ bool table_index_fetch_tuple_check(Relation rel, + Relation indexRel, ItemPointer tid, Snapshot snapshot, bool *all_dead) @@ -226,7 +227,7 @@ table_index_fetch_tuple_check(Relation rel, bool found; slot = table_slot_create(rel, NULL); - scan = table_index_fetch_begin(rel); + scan = table_index_fetch_begin(rel, indexRel); found = table_index_fetch_tuple(scan, PointerGetDatum(tid), snapshot, slot, &call_again, all_dead); table_index_fetch_end(scan); diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index 491a66f47a1..e08d0c690da 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -130,9 +130,12 @@ unique_key_recheck(PG_FUNCTION_ARGS) */ tmptidDatum = checktidDatum; { - IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation); + IndexFetchTableData *scan; bool call_again = false; + indexRel = index_open(trigdata->tg_trigger->tgconstrindid, AccessShareLock); + scan = table_index_fetch_begin(trigdata->tg_relation, indexRel); + index_close(indexRel, AccessShareLock); if (!table_index_fetch_tuple(scan, tmptidDatum, SnapshotSelf, slot, &call_again, NULL)) { diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index a9a97cf95b6..a5b6f9cc34a 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -1114,6 +1114,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, TupleTableSlot *existing_slot; TupleTableSlot *save_scantuple; + if (indexInfo->ii_ExclusionOps) { constr_procs = indexInfo->ii_ExclusionProcs; diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index fd0449ec140..72fb74b7adf 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -453,7 +453,7 @@ typedef struct TableAmRoutine * * Tuples for an index scan can then be fetched via index_fetch_tuple. */ - struct IndexFetchTableData *(*index_fetch_begin) (Relation rel); + struct IndexFetchTableData *(*index_fetch_begin) (Relation rel, Relation index); /* * Reset index fetch. Typically this will release cross index fetch @@ -1230,9 +1230,9 @@ table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan) * Tuples for an index scan can then be fetched via table_index_fetch_tuple(). */ static inline IndexFetchTableData * -table_index_fetch_begin(Relation rel) +table_index_fetch_begin(Relation rel, Relation index) { - return rel->rd_tableam->index_fetch_begin(rel); + return rel->rd_tableam->index_fetch_begin(rel, index); } /* @@ -1305,6 +1305,7 @@ table_index_fetch_tuple(struct IndexFetchTableData *scan, * unique index. */ extern bool table_index_fetch_tuple_check(Relation rel, + Relation indexRel, ItemPointer tid, Snapshot snapshot, bool *all_dead); From 2a0a87e5f65fc440007983771344506552f44de9 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Wed, 10 Dec 2025 07:30:18 +0100 Subject: [PATCH 080/102] Fixed unique_key_recheck for postgres relations --- src/backend/commands/constraint.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index e08d0c690da..adabc5359d3 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -44,6 +44,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) const char *funcname = "unique_key_recheck"; Datum checktidDatum; Datum tmptidDatum; + ItemPointerData tmptid; Relation indexRel; IndexInfo *indexInfo; EState *estate; @@ -128,7 +129,16 @@ unique_key_recheck(PG_FUNCTION_ARGS) * it's possible the index entry has also been marked dead, and even * removed. */ - tmptidDatum = checktidDatum; + if (table_get_row_ref_type(trigdata->tg_relation) == ROW_REF_ROWID) + { + /* FIX: Probably wrong assumption */ + tmptidDatum = checktidDatum; + } + else + { + tmptid = *DatumGetItemPointer(checktidDatum); + tmptidDatum = ItemPointerGetDatum(&tmptid); + } { IndexFetchTableData *scan; bool call_again = false; From 46362c7aa79f892377ee9debadf638349588646c Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Sun, 21 Dec 2025 22:49:25 +0100 Subject: [PATCH 081/102] passing is_rowid to index_fetch_tuple for bridged btrees to work --- src/backend/access/heap/heapam_handler.c | 1 + src/backend/access/index/indexam.c | 4 +++- src/backend/access/table/tableam.c | 2 +- src/backend/commands/constraint.c | 9 ++++----- src/include/access/tableam.h | 4 +++- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index be2ce941143..c29b60e9768 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -140,6 +140,7 @@ heapam_index_fetch_end(IndexFetchTableData *scan) static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, Datum tupleid, + bool is_rowid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead) diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index d766b61c1b9..18c7ee5cf9d 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -753,16 +753,18 @@ index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot) bool all_dead = false; bool found; Datum tupleid; + bool is_rowid = false; if (scan->xs_want_rowid) { Assert(!scan->xs_rowid.isnull); tupleid = scan->xs_rowid.value; + is_rowid = true; } else tupleid = PointerGetDatum(&scan->xs_heaptid); - found = table_index_fetch_tuple(scan->xs_heapfetch, tupleid, + found = table_index_fetch_tuple(scan->xs_heapfetch, tupleid, is_rowid, scan->xs_snapshot, slot, &scan->xs_heap_continue, &all_dead); diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 338f2222951..4c40351d756 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -228,7 +228,7 @@ table_index_fetch_tuple_check(Relation rel, slot = table_slot_create(rel, NULL); scan = table_index_fetch_begin(rel, indexRel); - found = table_index_fetch_tuple(scan, PointerGetDatum(tid), snapshot, slot, &call_again, + found = table_index_fetch_tuple(scan, PointerGetDatum(tid), false, snapshot, slot, &call_again, all_dead); table_index_fetch_end(scan); ExecDropSingleTupleTableSlot(slot); diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index adabc5359d3..0eae078128d 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -44,6 +44,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) const char *funcname = "unique_key_recheck"; Datum checktidDatum; Datum tmptidDatum; + bool is_rowid = false; ItemPointerData tmptid; Relation indexRel; IndexInfo *indexInfo; @@ -81,11 +82,10 @@ unique_key_recheck(PG_FUNCTION_ARGS) bool isnull; checktidDatum = slot_getsysattr(trigdata->tg_trigslot, RowIdAttributeNumber, &isnull); Assert(!isnull); + is_rowid = true; } else - { checktidDatum = ItemPointerGetDatum(&trigdata->tg_trigslot->tts_tid); - } } else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) { @@ -94,11 +94,10 @@ unique_key_recheck(PG_FUNCTION_ARGS) bool isnull; checktidDatum = slot_getsysattr(trigdata->tg_newslot, RowIdAttributeNumber, &isnull); Assert(!isnull); + is_rowid = true; } else - { checktidDatum = ItemPointerGetDatum(&trigdata->tg_newslot->tts_tid); - } } else { @@ -146,7 +145,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) indexRel = index_open(trigdata->tg_trigger->tgconstrindid, AccessShareLock); scan = table_index_fetch_begin(trigdata->tg_relation, indexRel); index_close(indexRel, AccessShareLock); - if (!table_index_fetch_tuple(scan, tmptidDatum, SnapshotSelf, slot, + if (!table_index_fetch_tuple(scan, tmptidDatum, is_rowid, SnapshotSelf, slot, &call_again, NULL)) { /* diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 72fb74b7adf..165144eb995 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -488,6 +488,7 @@ typedef struct TableAmRoutine */ bool (*index_fetch_tuple) (struct IndexFetchTableData *scan, Datum tupleid, + bool is_rowid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead); @@ -1281,6 +1282,7 @@ table_index_fetch_end(struct IndexFetchTableData *scan) static inline bool table_index_fetch_tuple(struct IndexFetchTableData *scan, Datum tupleid, + bool is_rowid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead) @@ -1293,7 +1295,7 @@ table_index_fetch_tuple(struct IndexFetchTableData *scan, if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan)) elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding"); - return scan->rel->rd_tableam->index_fetch_tuple(scan, tupleid, snapshot, + return scan->rel->rd_tableam->index_fetch_tuple(scan, tupleid, is_rowid, snapshot, slot, call_again, all_dead); } From 68a5c3aa0cf44950f6de69c5b164d01539e54c69 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Fri, 26 Dec 2025 04:35:55 +0100 Subject: [PATCH 082/102] allow invalid rowid in check_exclusion_or_unique_constraint --- src/backend/executor/execIndexing.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index a5b6f9cc34a..d204a044134 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -1211,7 +1211,6 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, } else { Pointer rowid; - Assert(tupleidDatum > 0); rowid = DatumGetPointer(tupleidDatum); if (PointerIsValid(rowid)) { From b6a8f579a6a4bb7facc76ae28dc4bdfb27ae5f61 Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Fri, 26 Dec 2025 18:32:26 +0100 Subject: [PATCH 083/102] properly process conflictTid for rowid --- src/backend/access/heap/heapam_handler.c | 10 ++----- src/backend/executor/execIndexing.c | 33 +++++++++++------------- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index c29b60e9768..7eeff2d568a 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -410,14 +410,8 @@ heapam_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo, { specConflict = false; - if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) - { - Assert(false); - } - else - { - conflictTidDatum = ItemPointerGetDatum(&conflictTid); - } + Assert(table_get_row_ref_type(resultRelInfo->ri_RelationDesc) != ROW_REF_ROWID); + conflictTidDatum = ItemPointerGetDatum(&conflictTid); if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate, conflictTidDatum, arbiterIndexes)) diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index d204a044134..9ffd23554c7 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -931,7 +931,7 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) { - Assert(false); + invalidItemPtrDatum = ItemPointerGetDatum(NULL); } else { @@ -1025,19 +1025,13 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, values, isnull); - if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) - { - Assert(false); - } - else - { - satisfiesConstraint = - check_exclusion_or_unique_constraint(heapRelation, indexRelation, - indexInfo, invalidItemPtrDatum, - values, isnull, estate, false, - CEOUC_WAIT, true, - &conflictTidDatum); - } + satisfiesConstraint = + check_exclusion_or_unique_constraint(heapRelation, indexRelation, + indexInfo, invalidItemPtrDatum, + values, isnull, estate, false, + CEOUC_WAIT, true, + &conflictTidDatum); + if (!satisfiesConstraint) return false; } @@ -1098,7 +1092,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, EState *estate, bool newIndex, CEOUC_WAIT_MODE waitMode, bool violationOK, - Datum *conflictTidDatum) + Datum *conflictTidDatumPtr) { Oid *constr_procs; uint16 *constr_strats; @@ -1288,17 +1282,20 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, if (violationOK) { conflict = true; - if (conflictTidDatum) + if (conflictTidDatumPtr) { if (table_get_row_ref_type(heap) == ROW_REF_ROWID) { bool isnull; - *conflictTidDatum = slot_getsysattr(existing_slot, RowIdAttributeNumber, &isnull); + Datum conflictRowidPtrDatum = *conflictTidDatumPtr; + Datum *conflictRowidPtr = (Datum *) DatumGetPointer(conflictRowidPtrDatum); + + *conflictRowidPtr = slot_getsysattr(existing_slot, RowIdAttributeNumber, &isnull); Assert(!isnull); } else { - ItemPointer conflictTid = DatumGetItemPointer(*conflictTidDatum); + ItemPointer conflictTid = DatumGetItemPointer(*conflictTidDatumPtr); *conflictTid = existing_slot->tts_tid; } } From fd257adbca241a419e7f86037f6a7a51f9e29901 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Thu, 5 Feb 2026 14:42:37 +0400 Subject: [PATCH 084/102] Fix merge with 5db6df0c0117 --- src/backend/executor/nodeModifyTable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 1c6467fd101..5a748bb6f56 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -2883,7 +2883,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * the tuple moved, and setting our current * resultRelInfo to that. */ - if (ItemPointerIndicatesMovedPartitions(tupleid)) + if (ItemPointerIndicatesMovedPartitions(&context->tmfd.ctid)) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("tuple to be deleted was already moved to another partition due to concurrent update"))); From a7e8ba8e2175678ab493663e70ac07c944308c7b Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 16 Feb 2026 22:18:05 +0200 Subject: [PATCH 085/102] Use TABLE_MODIFY_LOCK_UPDATED in ExecMergeMatched() --- src/backend/executor/nodeModifyTable.c | 232 +++++++++++-------------- 1 file changed, 104 insertions(+), 128 deletions(-) diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 5a748bb6f56..f6078d596c7 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -2591,6 +2591,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, EPQState *epqstate = &mtstate->mt_epqstate; ListCell *l; bool no_further_action = true; + TupleTableSlot *oldSlot = resultRelInfo->ri_oldTupleSlot; /* * If there are no WHEN MATCHED actions, we are done. @@ -2604,7 +2605,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * Again, this target relation's slot is required only in the case of a * MATCHED tuple and UPDATE/DELETE actions. */ - econtext->ecxt_scantuple = resultRelInfo->ri_oldTupleSlot; + econtext->ecxt_scantuple = oldSlot; econtext->ecxt_innertuple = context->planSlot; econtext->ecxt_outertuple = NULL; @@ -2646,6 +2647,10 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, CmdType commandType = relaction->mas_action->commandType; TM_Result result; UpdateContext updateCxt = {0}; + int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE; + + if (!IsolationUsesXactSnapshot()) + options |= TABLE_MODIFY_LOCK_UPDATED; /* * Test condition, if any. @@ -2674,7 +2679,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, ExecWithCheckOptions(commandType == CMD_UPDATE ? WCO_RLS_MERGE_UPDATE_CHECK : WCO_RLS_MERGE_DELETE_CHECK, resultRelInfo, - resultRelInfo->ri_oldTupleSlot, + oldSlot, context->mtstate->ps.state); } @@ -2694,13 +2699,18 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, if (!ExecUpdatePrologue(context, resultRelInfo, tupleid, NULL, newslot, &result)) { + TupleTableSlot *trigOldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo); + if (result == TM_Ok) goto out; /* "do nothing" */ + + if (trigOldSlot != oldSlot && result != TM_Deleted) + ExecCopySlot(oldSlot, trigOldSlot); break; /* concurrent update/delete */ } result = ExecUpdateAct(context, resultRelInfo, tupleid, NULL, - newslot, canSetTag, TABLE_MODIFY_WAIT, - NULL, &updateCxt); + newslot, canSetTag, options, + oldSlot, &updateCxt); /* * As in ExecUpdate(), if ExecUpdateAct() reports that a @@ -2719,7 +2729,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, { ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, NULL, newslot, - resultRelInfo->ri_oldTupleSlot); + oldSlot); mtstate->mt_merge_updated += 1; } break; @@ -2729,16 +2739,22 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, if (!ExecDeletePrologue(context, resultRelInfo, tupleid, NULL, NULL, &result)) { + TupleTableSlot *trigOldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo); + if (result == TM_Ok) goto out; /* "do nothing" */ + + if (trigOldSlot != oldSlot && result != TM_Deleted) + ExecCopySlot(oldSlot, trigOldSlot); + break; /* concurrent update/delete */ } result = ExecDeleteAct(context, resultRelInfo, tupleid, - false, TABLE_MODIFY_WAIT, NULL); + false, options, oldSlot); if (result == TM_Ok) { ExecDeleteEpilogue(context, resultRelInfo, NULL, - resultRelInfo->ri_oldTupleSlot, false); + oldSlot, false); mtstate->mt_merge_deleted += 1; } break; @@ -2815,9 +2831,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, case TM_Updated: { Relation resultRelationDesc; - TupleTableSlot *epqslot, - *inputslot; - LockTupleMode lockmode; + TupleTableSlot *epqslot; /* * The target tuple was concurrently updated by some other @@ -2829,132 +2843,94 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * starting from the top, and execute the first qualifying * action. */ - resultRelationDesc = resultRelInfo->ri_RelationDesc; - lockmode = ExecUpdateLockMode(estate, resultRelInfo); - - inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc, - resultRelInfo->ri_RangeTableIndex); - - result = table_tuple_lock(resultRelationDesc, tupleid, - estate->es_snapshot, - inputslot, estate->es_output_cid, - lockmode, LockWaitBlock, - TUPLE_LOCK_FLAG_FIND_LAST_VERSION, - &context->tmfd); - switch (result) - { - case TM_Ok: - epqslot = EvalPlanQual(epqstate, - resultRelationDesc, - resultRelInfo->ri_RangeTableIndex, - inputslot); + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); - /* - * If we got no tuple, or the tuple we get has a - * NULL ctid, go back to caller: this one is not a - * MATCHED tuple anymore, so they can retry with - * NOT MATCHED actions. - */ - if (TupIsNull(epqslot)) - { - no_further_action = false; - goto out; - } + /* + * If the tuple was updated and migrated to + * another partition concurrently, the current + * MERGE implementation can't follow. There's + * probably a better way to handle this case, but + * it'd require recognizing the relation to which + * the tuple moved, and setting our current + * resultRelInfo to that. + */ + if (ItemPointerIndicatesMovedPartitions(&context->tmfd.ctid)) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("tuple to be merged was already moved to another partition due to concurrent update"))); - /* - * Update tupleid to that of the new tuple, for - * the refetch we do at the top. - */ - tupleid = ExecGetJunkAttribute(epqslot, - resultRelInfo->ri_RowIdAttNo, - &isNull); - if (isNull) - { - no_further_action = false; - goto out; - } + resultRelationDesc = resultRelInfo->ri_RelationDesc; - /* - * When a tuple was updated and migrated to - * another partition concurrently, the current - * MERGE implementation can't follow. There's - * probably a better way to handle this case, but - * it'd require recognizing the relation to which - * the tuple moved, and setting our current - * resultRelInfo to that. - */ - if (ItemPointerIndicatesMovedPartitions(&context->tmfd.ctid)) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("tuple to be deleted was already moved to another partition due to concurrent update"))); + epqslot = EvalPlanQual(epqstate, + resultRelationDesc, + resultRelInfo->ri_RangeTableIndex, + oldSlot); - /* - * A non-NULL ctid means that we are still dealing - * with MATCHED case. Restart the loop so that we - * apply all the MATCHED rules again, to ensure - * that the first qualifying WHEN MATCHED action - * is executed. - * - * tupleid has been updated to that of the new - * tuple, as required for the refetch we do at the - * top. - */ - if (resultRelInfo->ri_needLockTagTuple) - { - Assert(resultRelInfo->ri_RowRefType == ROW_REF_TID); - UnlockTuple(resultRelInfo->ri_RelationDesc, - &lockedtid, - InplaceUpdateTupleLock); - ItemPointerCopy(&context->tmfd.ctid, - (ItemPointer) tupleid); - } - goto lmerge_matched; - - case TM_Deleted: + /* + * If we got no tuple, or the tuple we get has a + * NULL ctid, go back to caller: this one is not a + * MATCHED tuple anymore, so they can retry with + * NOT MATCHED actions. + */ + if (TupIsNull(epqslot)) + { + no_further_action = false; + goto out; + } - /* - * tuple already deleted; tell caller to run NOT - * MATCHED actions - */ - no_further_action = false; - goto out; + /* + * Update tupleid to that of the new tuple, for + * the refetch we do at the top. + */ + tupleid = ExecGetJunkAttribute(epqslot, + resultRelInfo->ri_RowIdAttNo, + &isNull); + if (isNull) + { + no_further_action = false; + goto out; + } - case TM_SelfModified: - /* - * This can be reached when following an update - * chain from a tuple updated by another session, - * reaching a tuple that was already updated or - * deleted by the current command, or by a later - * command in the current transaction. As above, - * this should always be treated as an error. - */ - if (context->tmfd.cmax != estate->es_output_cid) - ereport(ERROR, - (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION), - errmsg("tuple to be updated or deleted was already modified by an operation triggered by the current command"), - errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); - - if (TransactionIdIsCurrentTransactionId(context->tmfd.xmax)) - ereport(ERROR, - (errcode(ERRCODE_CARDINALITY_VIOLATION), - /* translator: %s is a SQL command name */ - errmsg("%s command cannot affect row a second time", - "MERGE"), - errhint("Ensure that not more than one source row matches any one target row."))); - - /* This shouldn't happen */ - elog(ERROR, "attempted to update or delete invisible tuple"); - no_further_action = false; - goto out; - - default: - /* see table_tuple_lock call in ExecDelete() */ - elog(ERROR, "unexpected table_tuple_lock status: %u", - result); - no_further_action = false; - goto out; + /* + * When a tuple was updated and migrated to + * another partition concurrently, the current + * MERGE implementation can't follow. There's + * probably a better way to handle this case, but + * it'd require recognizing the relation to which + * the tuple moved, and setting our current + * resultRelInfo to that. + */ + if (ItemPointerIndicatesMovedPartitions(&context->tmfd.ctid)) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("tuple to be deleted was already moved to another partition due to concurrent update"))); + + /* + * A non-NULL ctid means that we are still dealing + * with MATCHED case. Restart the loop so that we + * apply all the MATCHED rules again, to ensure + * that the first qualifying WHEN MATCHED action + * is executed. + * + * tupleid has been updated to that of the new + * tuple, as required for the refetch we do at the + * top. + */ + if (resultRelInfo->ri_needLockTagTuple) + { + Assert(resultRelInfo->ri_RowRefType == ROW_REF_TID); + UnlockTuple(resultRelInfo->ri_RelationDesc, + &lockedtid, + InplaceUpdateTupleLock); + ItemPointerCopy(&context->tmfd.ctid, + (ItemPointer) tupleid); } + result = TM_Ok; + goto lmerge_matched; } case TM_Invisible: From 1f4a79afcedf4c658e9dfc4dbf115e647fc54015 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 23 Feb 2026 19:43:45 +0400 Subject: [PATCH 086/102] Fix recognizing extension wait events in isolation tester Instead of interperting all the extension wait events as process blocker, introduce a special PG_WAIT_EXTENSION_BLOCKED magic number for that purpose. --- src/backend/utils/adt/lockfuncs.c | 16 +++++++++++----- src/include/utils/wait_event.h | 2 ++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 6f7bcc4394c..8c13a3df638 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -602,6 +602,7 @@ pg_safe_snapshot_blocking_pids(PG_FUNCTION_ARGS) PG_RETURN_ARRAYTYPE_P(construct_array_builtin(blocker_datums, num_blockers, INT4OID)); } +#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) /* * pg_isolation_test_session_is_blocked - support function for isolationtester @@ -617,9 +618,10 @@ pg_safe_snapshot_blocking_pids(PG_FUNCTION_ARGS) Datum pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) { - PGPROC *blocked_proc; int blocked_pid = PG_GETARG_INT32(0); ArrayType *interesting_pids_a = PG_GETARG_ARRAYTYPE_P(1); + PGPROC *proc; + uint32 wait_event_info; ArrayType *blocking_pids_a; int32 *interesting_pids; int32 *blocking_pids; @@ -629,6 +631,14 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) int i, j; + proc = BackendPidGetProc(blocked_pid); + if (proc == NULL) + PG_RETURN_BOOL(false); /* session gone: definitely unblocked */ + + wait_event_info = UINT32_ACCESS_ONCE(proc->wait_event_info); + if (wait_event_info == PG_WAIT_EXTENSION_BLOCKED) + PG_RETURN_BOOL(true); + /* Validate the passed-in array */ Assert(ARR_ELEMTYPE(interesting_pids_a) == INT4OID); if (array_contains_nulls(interesting_pids_a)) @@ -678,10 +688,6 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) if (GetSafeSnapshotBlockingPids(blocked_pid, &dummy, 1) > 0) PG_RETURN_BOOL(true); - blocked_proc = BackendPidGetProc(blocked_pid); - if ((blocked_proc->wait_event_info & 0xFF000000) == PG_WAIT_EXTENSION) - PG_RETURN_BOOL(true); - PG_RETURN_BOOL(false); } diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index acf51475855..2d86304bb80 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -240,6 +240,8 @@ typedef enum } WaitEventIO; +#define PG_WAIT_EXTENSION_BLOCKED 0x07FFFFFFU + extern const char *pgstat_get_wait_event(uint32 wait_event_info); extern const char *pgstat_get_wait_event_type(uint32 wait_event_info); static inline void pgstat_report_wait_start(uint32 wait_event_info); From 651af5d49965d58295ca51f0b2c703067bff805c Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 18 Feb 2026 14:18:17 +0200 Subject: [PATCH 087/102] Modify the way ExecMergeMatched() detects moved partitions Use tupleid for ROW_REF_TID as in upsteam. Use &context->tmfd.ctid for ROW_REF_ROWID as in OrioleDB. --- src/backend/executor/nodeModifyTable.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index f6078d596c7..81bccbf3519 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -2832,6 +2832,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, { Relation resultRelationDesc; TupleTableSlot *epqslot; + ItemPointer movedPartitionCheckCtid; /* * The target tuple was concurrently updated by some other @@ -2857,7 +2858,11 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * the tuple moved, and setting our current * resultRelInfo to that. */ - if (ItemPointerIndicatesMovedPartitions(&context->tmfd.ctid)) + if (resultRelInfo->ri_RowRefType == ROW_REF_TID) + movedPartitionCheckCtid = (ItemPointer) tupleid; + else + movedPartitionCheckCtid = &context->tmfd.ctid; + if (ItemPointerIndicatesMovedPartitions(movedPartitionCheckCtid)) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("tuple to be merged was already moved to another partition due to concurrent update"))); From 9080ccecc72c0c1d5cac429b09d3a9ca00171ca9 Mon Sep 17 00:00:00 2001 From: stsargsyan Date: Fri, 27 Feb 2026 17:34:42 +0400 Subject: [PATCH 088/102] Allow pre-initialized catalog cache in single-user mode; fix missing o_unset_syscache_hooks call Single-user mode may run recovery and then start a session in the same process. When OrioleDB is loaded, recovery initializes the catalog cache, which caused session startup to fail because it expected a non-initialized state. Allow the catalog cache to be already initialized when starting a session in single-user mode after recovery. Additionally, fix a missing o_unset_syscache_hooks call, which left OrioleDB syscache hooks installed across the recovery/session boundary and contributed to crashes and inconsistent backend state. This ensures that backend-global state is properly handled when recovery and normal session execution happen within the same process. --- src/backend/utils/cache/relcache.c | 8 +++++++- src/backend/utils/cache/syscache.c | 10 +++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 0e2898579f7..4c0deb6f4e1 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -132,7 +132,7 @@ typedef struct relidcacheent Relation reldesc; } RelIdCacheEnt; -static HTAB *RelationIdCache; +static HTAB *RelationIdCache = NULL; /* * This flag is false until we have prepared the critical relcache entries @@ -3946,6 +3946,12 @@ RelationCacheInitialize(void) if (!CacheMemoryContext) CreateCacheMemoryContext(); + /* + * nothing to do if it's already done + */ + if (RelationIdCache) + return; + /* * create hashtable that indexes the relcache */ diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 0bbd0b5c775..cc75aa8e636 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -714,7 +714,15 @@ InitCatalogCache(void) { int cacheId; - Assert(!CacheInitialized); + /* + * In single user mode, recovery might set up + * catalog caches before InitPostgres is called. + */ + Assert(!CacheInitialized || !IsUnderPostmaster); + + /* nothing to do if it's already done */ + if (CacheInitialized) + return; SysCacheRelationOidSize = SysCacheSupportingRelOidSize = 0; From a70df44b09165925c5947047ee141ff75f4cb261 Mon Sep 17 00:00:00 2001 From: GremSnoort Date: Mon, 2 Mar 2026 17:08:50 +0300 Subject: [PATCH 089/102] Enable more regress tests for Oriole --- src/test/regress/parallel_schedule_oriole | 31 ++++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole index cc66f1cd8c6..cad4f2be71c 100644 --- a/src/test/regress/parallel_schedule_oriole +++ b/src/test/regress/parallel_schedule_oriole @@ -14,21 +14,21 @@ test: test_setup # ---------- # The first group of parallel tests # ---------- -test: boolean char name varchar text int2 int4 int8 oid float4 float8 bit numeric txid uuid enum money pg_lsn regproc +test: boolean char name varchar text int2 int4 int8 oid float4 float8 bit numeric txid uuid enum money rangetypes pg_lsn regproc # ---------- # The second group of parallel tests # multirangetypes depends on rangetypes # multirangetypes shouldn't run concurrently with type_sanity # ---------- -test: md5 numerology lseg line path circle date time timetz timestamp timestamptz interval inet macaddr macaddr8 +test: md5 numerology point lseg line box path circle date time timetz timestamp timestamptz interval inet macaddr macaddr8 multirangetypes # ---------- # Another group of parallel tests # geometry depends on point, lseg, line, box, path, polygon, circle # horology depends on date, time, timetz, timestamp, timestamptz, interval # ---------- -test: horology tstypes regex comments expressions unicode xid mvcc +test: horology tstypes regex type_sanity misc_sanity comments expressions unicode xid mvcc database # ---------- # Load huge amounts of data @@ -42,29 +42,31 @@ test: copy copyselect copydml # More groups of parallel tests # Note: many of the tests in later groups depend on create_index # ---------- -test: create_function_c create_misc create_operator create_procedure create_type create_schema create_view +test: create_function_c create_misc create_operator create_procedure create_table create_type create_schema +test: create_view # ---------- # Another group of parallel tests # ---------- -test: create_aggregate create_function_sql create_cast typed_table drop_if_exists +test: create_aggregate create_function_sql create_cast typed_table drop_if_exists roleattributes hash_func errors infinite_recurse # ---------- # sanity_check does a vacuum, affecting the sort order of SELECT * # results. So it should not run parallel to other tests. # ---------- +test: sanity_check # ---------- # Another group of parallel tests # aggregates depends on create_aggregate # join depends on create_misc # ---------- -test: select_into select_implicit select_having random delete +test: select_into select_implicit select_having random hash_index delete # ---------- # Another group of parallel tests # ---------- -test: init_privs security_label collate object_address drop_operator password +test: gist init_privs security_label collate lock object_address drop_operator password # ---------- # Additional BRIN tests @@ -78,12 +80,17 @@ test: init_privs security_label collate object_address drop_operator password test: alter_generic alter_operator misc async dbsize sysviews tsrf create_role # collate.linux.utf8 and collate.icu.utf8 tests cannot be run in parallel with each other +test: psql_crosstab collate.linux.utf8 collate.windows.win1252 # ---------- # Run these alone so they don't run out of parallel workers # select_parallel depends on create_misc # ---------- +# Run this alone, because concurrent DROP TABLE would make non-superuser +# "ANALYZE;" fail with "relation with OID $n does not exist". +test: maintain_every + # no relation related tests can be put in this group test: subscription @@ -91,12 +98,12 @@ test: subscription # Another group of parallel tests # select_views depends on create_view # ---------- -test: select_views portals_p2 dependency guc bitmapops xmlmap functional_deps advisory_lock +test: select_views portals_p2 dependency guc bitmapops tsdicts foreign_data xmlmap functional_deps advisory_lock # ---------- # Another group of parallel tests (JSON related) # ---------- -test: json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson +test: json json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson # ---------- # Another group of parallel tests @@ -104,7 +111,7 @@ test: json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson # NB: temp.sql does a reconnect which transiently uses 2 connections, # so keep this parallel group to at most 19 tests # ---------- -test: plancache rangefuncs prepare sequence polymorphism largeobject xml +test: plancache rangefuncs prepare truncate sequence polymorphism largeobject xml # ---------- # Another group of parallel tests @@ -112,12 +119,12 @@ test: plancache rangefuncs prepare sequence polymorphism largeobject xml # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: hash_part +test: hash_part partition_aggregate partition_info # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL # oidjoins is read-only, though, and should run late for best coverage -test: oidjoins +test: oidjoins # this test also uses event triggers, so likewise run it by itself From 332f4571f682a5f5e7a89b57d84be16ad24b7c52 Mon Sep 17 00:00:00 2001 From: Artur Zakirov Date: Tue, 3 Mar 2026 17:12:50 +0100 Subject: [PATCH 090/102] Bump version of actions/checkout to v6 The latest actions/checkout@v6 has improved credential security. --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f22a5232baf..3f22a6f14f7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,7 +19,7 @@ jobs: CHECK_TYPE: ${{ matrix.check_type }} steps: - name: Checkout code into workspace directory - uses: actions/checkout@v2 + uses: actions/checkout@v6 - name: Setup prerequisites run: bash ./ci/prerequisites.sh - name: Build From 0111abac6cf937e9c7986f5d3e842cfb48748344 Mon Sep 17 00:00:00 2001 From: Reutt Elizaveta Date: Fri, 16 Jan 2026 09:26:43 +0300 Subject: [PATCH 091/102] Add alter table tests to oriole vanila regress --- src/test/regress/expected/alter_table_1.out | 4769 +++++++++++++++++ .../regress/expected/create_index_oriole.out | 6 + src/test/regress/parallel_schedule_oriole | 3 +- src/test/regress/sql/create_index_oriole.sql | 6 + 4 files changed, 4783 insertions(+), 1 deletion(-) create mode 100644 src/test/regress/expected/alter_table_1.out create mode 100644 src/test/regress/expected/create_index_oriole.out create mode 100644 src/test/regress/sql/create_index_oriole.sql diff --git a/src/test/regress/expected/alter_table_1.out b/src/test/regress/expected/alter_table_1.out new file mode 100644 index 00000000000..5db9b8536cf --- /dev/null +++ b/src/test/regress/expected/alter_table_1.out @@ -0,0 +1,4769 @@ +-- +-- ALTER_TABLE +-- +-- Clean up in case a prior regression run failed +SET client_min_messages TO 'warning'; +DROP ROLE IF EXISTS regress_alter_table_user1; +RESET client_min_messages; +CREATE USER regress_alter_table_user1; +-- +-- add attribute +-- +CREATE TABLE attmp (initial int4); +COMMENT ON TABLE attmp_wrong IS 'table comment'; +ERROR: relation "attmp_wrong" does not exist +COMMENT ON TABLE attmp IS 'table comment'; +COMMENT ON TABLE attmp IS NULL; +ALTER TABLE attmp ADD COLUMN xmin integer; -- fails +ERROR: column name "xmin" conflicts with a system column name +ALTER TABLE attmp ADD COLUMN a int4 default 3; +ALTER TABLE attmp ADD COLUMN b name; +ALTER TABLE attmp ADD COLUMN c text; +ALTER TABLE attmp ADD COLUMN d float8; +ALTER TABLE attmp ADD COLUMN e float4; +ALTER TABLE attmp ADD COLUMN f int2; +ALTER TABLE attmp ADD COLUMN g polygon; +ALTER TABLE attmp ADD COLUMN i char; +ALTER TABLE attmp ADD COLUMN k int4; +ALTER TABLE attmp ADD COLUMN l tid; +ALTER TABLE attmp ADD COLUMN m xid; +ALTER TABLE attmp ADD COLUMN n oidvector; +--ALTER TABLE attmp ADD COLUMN o lock; +ALTER TABLE attmp ADD COLUMN p boolean; +ALTER TABLE attmp ADD COLUMN q point; +ALTER TABLE attmp ADD COLUMN r lseg; +ALTER TABLE attmp ADD COLUMN s path; +ALTER TABLE attmp ADD COLUMN t box; +ALTER TABLE attmp ADD COLUMN v timestamp; +ALTER TABLE attmp ADD COLUMN w interval; +ALTER TABLE attmp ADD COLUMN x float8[]; +ALTER TABLE attmp ADD COLUMN y float4[]; +ALTER TABLE attmp ADD COLUMN z int2[]; +INSERT INTO attmp (a, b, c, d, e, f, g, i, k, l, m, n, p, q, r, s, t, + v, w, x, y, z) + VALUES (4, 'name', 'text', 4.1, 4.1, 2, '(4.1,4.1,3.1,3.1)', + 'c', + 314159, '(1,1)', '512', + '1 2 3 4 5 6 7 8', true, '(1.1,1.1)', '(4.1,4.1,3.1,3.1)', + '(0,2,4.1,4.1,3.1,3.1)', '(4.1,4.1,3.1,3.1)', + 'epoch', '01:00:10', '{1.0,2.0,3.0,4.0}', '{1.0,2.0,3.0,4.0}', '{1,2,3,4}'); +SELECT * FROM attmp; + initial | a | b | c | d | e | f | g | i | k | l | m | n | p | q | r | s | t | v | w | x | y | z +---------+---+------+------+-----+-----+---+-----------------------+---+--------+-------+-----+-----------------+---+-----------+-----------------------+-----------------------------+---------------------+--------------------------+------------------+-----------+-----------+----------- + | 4 | name | text | 4.1 | 4.1 | 2 | ((4.1,4.1),(3.1,3.1)) | c | 314159 | (1,1) | 512 | 1 2 3 4 5 6 7 8 | t | (1.1,1.1) | [(4.1,4.1),(3.1,3.1)] | ((0,2),(4.1,4.1),(3.1,3.1)) | (4.1,4.1),(3.1,3.1) | Thu Jan 01 00:00:00 1970 | @ 1 hour 10 secs | {1,2,3,4} | {1,2,3,4} | {1,2,3,4} +(1 row) + +DROP TABLE attmp; +-- the wolf bug - schema mods caused inconsistent row descriptors +CREATE TABLE attmp ( + initial int4 +); +ALTER TABLE attmp ADD COLUMN a int4; +ALTER TABLE attmp ADD COLUMN b name; +ALTER TABLE attmp ADD COLUMN c text; +ALTER TABLE attmp ADD COLUMN d float8; +ALTER TABLE attmp ADD COLUMN e float4; +ALTER TABLE attmp ADD COLUMN f int2; +ALTER TABLE attmp ADD COLUMN g polygon; +ALTER TABLE attmp ADD COLUMN i char; +ALTER TABLE attmp ADD COLUMN k int4; +ALTER TABLE attmp ADD COLUMN l tid; +ALTER TABLE attmp ADD COLUMN m xid; +ALTER TABLE attmp ADD COLUMN n oidvector; +--ALTER TABLE attmp ADD COLUMN o lock; +ALTER TABLE attmp ADD COLUMN p boolean; +ALTER TABLE attmp ADD COLUMN q point; +ALTER TABLE attmp ADD COLUMN r lseg; +ALTER TABLE attmp ADD COLUMN s path; +ALTER TABLE attmp ADD COLUMN t box; +ALTER TABLE attmp ADD COLUMN v timestamp; +ALTER TABLE attmp ADD COLUMN w interval; +ALTER TABLE attmp ADD COLUMN x float8[]; +ALTER TABLE attmp ADD COLUMN y float4[]; +ALTER TABLE attmp ADD COLUMN z int2[]; +INSERT INTO attmp (a, b, c, d, e, f, g, i, k, l, m, n, p, q, r, s, t, + v, w, x, y, z) + VALUES (4, 'name', 'text', 4.1, 4.1, 2, '(4.1,4.1,3.1,3.1)', + 'c', + 314159, '(1,1)', '512', + '1 2 3 4 5 6 7 8', true, '(1.1,1.1)', '(4.1,4.1,3.1,3.1)', + '(0,2,4.1,4.1,3.1,3.1)', '(4.1,4.1,3.1,3.1)', + 'epoch', '01:00:10', '{1.0,2.0,3.0,4.0}', '{1.0,2.0,3.0,4.0}', '{1,2,3,4}'); +SELECT * FROM attmp; + initial | a | b | c | d | e | f | g | i | k | l | m | n | p | q | r | s | t | v | w | x | y | z +---------+---+------+------+-----+-----+---+-----------------------+---+--------+-------+-----+-----------------+---+-----------+-----------------------+-----------------------------+---------------------+--------------------------+------------------+-----------+-----------+----------- + | 4 | name | text | 4.1 | 4.1 | 2 | ((4.1,4.1),(3.1,3.1)) | c | 314159 | (1,1) | 512 | 1 2 3 4 5 6 7 8 | t | (1.1,1.1) | [(4.1,4.1),(3.1,3.1)] | ((0,2),(4.1,4.1),(3.1,3.1)) | (4.1,4.1),(3.1,3.1) | Thu Jan 01 00:00:00 1970 | @ 1 hour 10 secs | {1,2,3,4} | {1,2,3,4} | {1,2,3,4} +(1 row) + +CREATE INDEX attmp_idx ON attmp (a, (d + e), b); +ALTER INDEX attmp_idx ALTER COLUMN 0 SET STATISTICS 1000; +ERROR: column number must be in range from 1 to 32767 +LINE 1: ALTER INDEX attmp_idx ALTER COLUMN 0 SET STATISTICS 1000; + ^ +ALTER INDEX attmp_idx ALTER COLUMN 1 SET STATISTICS 1000; +ERROR: cannot alter statistics on non-expression column "a" of index "attmp_idx" +HINT: Alter statistics on table column instead. +ALTER INDEX attmp_idx ALTER COLUMN 2 SET STATISTICS 1000; +\d+ attmp_idx + Index "public.attmp_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+------------------+------+------------+---------+-------------- + a | integer | yes | a | plain | + expr | double precision | yes | (d + e) | plain | 1000 + b | cstring | yes | b | plain | +btree, for table "public.attmp" + +ALTER INDEX attmp_idx ALTER COLUMN 3 SET STATISTICS 1000; +ERROR: cannot alter statistics on non-expression column "b" of index "attmp_idx" +HINT: Alter statistics on table column instead. +ALTER INDEX attmp_idx ALTER COLUMN 4 SET STATISTICS 1000; +ERROR: column number 4 of relation "attmp_idx" does not exist +ALTER INDEX attmp_idx ALTER COLUMN 2 SET STATISTICS -1; +DROP TABLE attmp; +-- +-- rename - check on both non-temp and temp tables +-- +CREATE TABLE attmp (regtable int); +CREATE TEMP TABLE attmp (attmptable int); +ALTER TABLE attmp RENAME TO attmp_new; +SELECT * FROM attmp; + regtable +---------- +(0 rows) + +SELECT * FROM attmp_new; + attmptable +------------ +(0 rows) + +ALTER TABLE attmp RENAME TO attmp_new2; +SELECT * FROM attmp; -- should fail +ERROR: relation "attmp" does not exist +LINE 1: SELECT * FROM attmp; + ^ +SELECT * FROM attmp_new; + attmptable +------------ +(0 rows) + +SELECT * FROM attmp_new2; + regtable +---------- +(0 rows) + +DROP TABLE attmp_new; +DROP TABLE attmp_new2; +-- check rename of partitioned tables and indexes also +CREATE TABLE part_attmp (a int primary key) partition by range (a); +CREATE TABLE part_attmp1 PARTITION OF part_attmp FOR VALUES FROM (0) TO (100); +ALTER INDEX part_attmp_pkey RENAME TO part_attmp_index; +ALTER INDEX part_attmp1_pkey RENAME TO part_attmp1_index; +ALTER TABLE part_attmp RENAME TO part_at2tmp; +ALTER TABLE part_attmp1 RENAME TO part_at2tmp1; +SET ROLE regress_alter_table_user1; +ALTER INDEX part_attmp_index RENAME TO fail; +ERROR: must be owner of index part_attmp_index +ALTER INDEX part_attmp1_index RENAME TO fail; +ERROR: must be owner of index part_attmp1_index +ALTER TABLE part_at2tmp RENAME TO fail; +ERROR: must be owner of table part_at2tmp +ALTER TABLE part_at2tmp1 RENAME TO fail; +ERROR: must be owner of table part_at2tmp1 +RESET ROLE; +DROP TABLE part_at2tmp; +-- +-- check renaming to a table's array type's autogenerated name +-- (the array type's name should get out of the way) +-- +CREATE TABLE attmp_array (id int); +CREATE TABLE attmp_array2 (id int); +SELECT typname FROM pg_type WHERE oid = 'attmp_array[]'::regtype; + typname +-------------- + _attmp_array +(1 row) + +SELECT typname FROM pg_type WHERE oid = 'attmp_array2[]'::regtype; + typname +--------------- + _attmp_array2 +(1 row) + +ALTER TABLE attmp_array2 RENAME TO _attmp_array; +SELECT typname FROM pg_type WHERE oid = 'attmp_array[]'::regtype; + typname +--------------- + __attmp_array +(1 row) + +SELECT typname FROM pg_type WHERE oid = '_attmp_array[]'::regtype; + typname +----------------- + __attmp_array_1 +(1 row) + +DROP TABLE _attmp_array; +DROP TABLE attmp_array; +-- renaming to table's own array type's name is an interesting corner case +CREATE TABLE attmp_array (id int); +SELECT typname FROM pg_type WHERE oid = 'attmp_array[]'::regtype; + typname +-------------- + _attmp_array +(1 row) + +ALTER TABLE attmp_array RENAME TO _attmp_array; +SELECT typname FROM pg_type WHERE oid = '_attmp_array[]'::regtype; + typname +--------------- + __attmp_array +(1 row) + +DROP TABLE _attmp_array; +-- ALTER TABLE ... RENAME on non-table relations +-- renaming indexes (FIXME: this should probably test the index's functionality) +ALTER INDEX IF EXISTS __onek_unique1 RENAME TO attmp_onek_unique1; +NOTICE: relation "__onek_unique1" does not exist, skipping +ALTER INDEX IF EXISTS __attmp_onek_unique1 RENAME TO onek_unique1; +NOTICE: relation "__attmp_onek_unique1" does not exist, skipping +ALTER INDEX onek_unique1 RENAME TO attmp_onek_unique1; +ALTER INDEX attmp_onek_unique1 RENAME TO onek_unique1; +SET ROLE regress_alter_table_user1; +ALTER INDEX onek_unique1 RENAME TO fail; -- permission denied +ERROR: must be owner of index onek_unique1 +RESET ROLE; +-- rename statements with mismatching statement and object types +CREATE TABLE alter_idx_rename_test (a INT); +CREATE INDEX alter_idx_rename_test_idx ON alter_idx_rename_test (a); +CREATE TABLE alter_idx_rename_test_parted (a INT) PARTITION BY LIST (a); +CREATE INDEX alter_idx_rename_test_parted_idx ON alter_idx_rename_test_parted (a); +BEGIN; +ALTER INDEX alter_idx_rename_test RENAME TO alter_idx_rename_test_2; +ALTER INDEX alter_idx_rename_test_parted RENAME TO alter_idx_rename_test_parted_2; +SELECT relation::regclass, mode FROM pg_locks +WHERE pid = pg_backend_pid() AND locktype = 'relation' + AND relation::regclass::text LIKE 'alter\_idx%' +ORDER BY relation::regclass::text COLLATE "C"; + relation | mode +--------------------------------+--------------------- + alter_idx_rename_test_2 | AccessExclusiveLock + alter_idx_rename_test_parted_2 | AccessExclusiveLock +(2 rows) + +COMMIT; +BEGIN; +ALTER INDEX alter_idx_rename_test_idx RENAME TO alter_idx_rename_test_idx_2; +ALTER INDEX alter_idx_rename_test_parted_idx RENAME TO alter_idx_rename_test_parted_idx_2; +SELECT relation::regclass, mode FROM pg_locks +WHERE pid = pg_backend_pid() AND locktype = 'relation' + AND relation::regclass::text LIKE 'alter\_idx%' +ORDER BY relation::regclass::text COLLATE "C"; + relation | mode +------------------------------------+-------------------------- + alter_idx_rename_test_idx_2 | ShareUpdateExclusiveLock + alter_idx_rename_test_parted_idx_2 | ShareUpdateExclusiveLock +(2 rows) + +COMMIT; +BEGIN; +ALTER TABLE alter_idx_rename_test_idx_2 RENAME TO alter_idx_rename_test_idx_3; +ALTER TABLE alter_idx_rename_test_parted_idx_2 RENAME TO alter_idx_rename_test_parted_idx_3; +SELECT relation::regclass, mode FROM pg_locks +WHERE pid = pg_backend_pid() AND locktype = 'relation' + AND relation::regclass::text LIKE 'alter\_idx%' +ORDER BY relation::regclass::text COLLATE "C"; + relation | mode +------------------------------------+--------------------- + alter_idx_rename_test_idx_3 | AccessExclusiveLock + alter_idx_rename_test_parted_idx_3 | AccessExclusiveLock +(2 rows) + +COMMIT; +DROP TABLE alter_idx_rename_test_2; +-- renaming views +CREATE VIEW attmp_view (unique1) AS SELECT unique1 FROM tenk1; +ALTER TABLE attmp_view RENAME TO attmp_view_new; +SET ROLE regress_alter_table_user1; +ALTER VIEW attmp_view_new RENAME TO fail; -- permission denied +ERROR: must be owner of view attmp_view_new +RESET ROLE; +-- hack to ensure we get an indexscan here +set enable_seqscan to off; +set enable_bitmapscan to off; +-- 5 values, sorted +SELECT unique1 FROM tenk1 WHERE unique1 < 5; + unique1 +--------- + 0 + 1 + 2 + 3 + 4 +(5 rows) + +reset enable_seqscan; +reset enable_bitmapscan; +DROP VIEW attmp_view_new; +-- toast-like relation name +alter table stud_emp rename to pg_toast_stud_emp; +alter table pg_toast_stud_emp rename to stud_emp; +-- renaming index should rename constraint as well +ALTER TABLE onek ADD CONSTRAINT onek_unique1_constraint UNIQUE (unique1); +ALTER INDEX onek_unique1_constraint RENAME TO onek_unique1_constraint_foo; +ALTER TABLE onek DROP CONSTRAINT onek_unique1_constraint_foo; +-- renaming constraint +ALTER TABLE onek ADD CONSTRAINT onek_check_constraint CHECK (unique1 >= 0); +ALTER TABLE onek RENAME CONSTRAINT onek_check_constraint TO onek_check_constraint_foo; +ALTER TABLE onek DROP CONSTRAINT onek_check_constraint_foo; +-- renaming constraint should rename index as well +ALTER TABLE onek ADD CONSTRAINT onek_unique1_constraint UNIQUE (unique1); +DROP INDEX onek_unique1_constraint; -- to see whether it's there +ERROR: cannot drop index onek_unique1_constraint because constraint onek_unique1_constraint on table onek requires it +HINT: You can drop constraint onek_unique1_constraint on table onek instead. +ALTER TABLE onek RENAME CONSTRAINT onek_unique1_constraint TO onek_unique1_constraint_foo; +DROP INDEX onek_unique1_constraint_foo; -- to see whether it's there +ERROR: cannot drop index onek_unique1_constraint_foo because constraint onek_unique1_constraint_foo on table onek requires it +HINT: You can drop constraint onek_unique1_constraint_foo on table onek instead. +ALTER TABLE onek DROP CONSTRAINT onek_unique1_constraint_foo; +-- renaming constraints vs. inheritance +CREATE TABLE constraint_rename_test (a int CONSTRAINT con1 CHECK (a > 0), b int, c int); +\d constraint_rename_test + Table "public.constraint_rename_test" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | +Check constraints: + "con1" CHECK (a > 0) + +CREATE TABLE constraint_rename_test2 (a int CONSTRAINT con1 CHECK (a > 0), d int) INHERITS (constraint_rename_test); +NOTICE: merging column "a" with inherited definition +NOTICE: merging constraint "con1" with inherited definition +\d constraint_rename_test2 + Table "public.constraint_rename_test2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | + d | integer | | | +Check constraints: + "con1" CHECK (a > 0) +Inherits: constraint_rename_test + +ALTER TABLE constraint_rename_test2 RENAME CONSTRAINT con1 TO con1foo; -- fail +ERROR: cannot rename inherited constraint "con1" +ALTER TABLE ONLY constraint_rename_test RENAME CONSTRAINT con1 TO con1foo; -- fail +ERROR: inherited constraint "con1" must be renamed in child tables too +ALTER TABLE constraint_rename_test RENAME CONSTRAINT con1 TO con1foo; -- ok +\d constraint_rename_test + Table "public.constraint_rename_test" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | +Check constraints: + "con1foo" CHECK (a > 0) +Number of child tables: 1 (Use \d+ to list them.) + +\d constraint_rename_test2 + Table "public.constraint_rename_test2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | + d | integer | | | +Check constraints: + "con1foo" CHECK (a > 0) +Inherits: constraint_rename_test + +ALTER TABLE constraint_rename_test ADD CONSTRAINT con2 CHECK (b > 0) NO INHERIT; +ALTER TABLE ONLY constraint_rename_test RENAME CONSTRAINT con2 TO con2foo; -- ok +ALTER TABLE constraint_rename_test RENAME CONSTRAINT con2foo TO con2bar; -- ok +\d constraint_rename_test + Table "public.constraint_rename_test" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | +Check constraints: + "con1foo" CHECK (a > 0) + "con2bar" CHECK (b > 0) NO INHERIT +Number of child tables: 1 (Use \d+ to list them.) + +\d constraint_rename_test2 + Table "public.constraint_rename_test2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | integer | | | + d | integer | | | +Check constraints: + "con1foo" CHECK (a > 0) +Inherits: constraint_rename_test + +ALTER TABLE constraint_rename_test ADD CONSTRAINT con3 PRIMARY KEY (a); +ALTER TABLE constraint_rename_test RENAME CONSTRAINT con3 TO con3foo; -- ok +\d constraint_rename_test + Table "public.constraint_rename_test" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | not null | + b | integer | | | + c | integer | | | +Indexes: + "con3foo" PRIMARY KEY, btree (a) +Check constraints: + "con1foo" CHECK (a > 0) + "con2bar" CHECK (b > 0) NO INHERIT +Number of child tables: 1 (Use \d+ to list them.) + +\d constraint_rename_test2 + Table "public.constraint_rename_test2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | not null | + b | integer | | | + c | integer | | | + d | integer | | | +Check constraints: + "con1foo" CHECK (a > 0) +Inherits: constraint_rename_test + +DROP TABLE constraint_rename_test2; +DROP TABLE constraint_rename_test; +ALTER TABLE IF EXISTS constraint_not_exist RENAME CONSTRAINT con3 TO con3foo; -- ok +NOTICE: relation "constraint_not_exist" does not exist, skipping +ALTER TABLE IF EXISTS constraint_rename_test ADD CONSTRAINT con4 UNIQUE (a); +NOTICE: relation "constraint_rename_test" does not exist, skipping +-- renaming constraints with cache reset of target relation +CREATE TABLE constraint_rename_cache (a int, + CONSTRAINT chk_a CHECK (a > 0), + PRIMARY KEY (a)); +ALTER TABLE constraint_rename_cache + RENAME CONSTRAINT chk_a TO chk_a_new; +ALTER TABLE constraint_rename_cache + RENAME CONSTRAINT constraint_rename_cache_pkey TO constraint_rename_pkey_new; +CREATE TABLE like_constraint_rename_cache + (LIKE constraint_rename_cache INCLUDING ALL); +\d like_constraint_rename_cache + Table "public.like_constraint_rename_cache" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | not null | +Indexes: + "like_constraint_rename_cache_pkey" PRIMARY KEY, btree (a) +Check constraints: + "chk_a_new" CHECK (a > 0) + +DROP TABLE constraint_rename_cache; +DROP TABLE like_constraint_rename_cache; +-- FOREIGN KEY CONSTRAINT adding TEST +CREATE TABLE attmp2 (a int primary key); +CREATE TABLE attmp3 (a int, b int); +CREATE TABLE attmp4 (a int, b int, unique(a,b)); +CREATE TABLE attmp5 (a int, b int); +-- Insert rows into attmp2 (pktable) +INSERT INTO attmp2 values (1); +INSERT INTO attmp2 values (2); +INSERT INTO attmp2 values (3); +INSERT INTO attmp2 values (4); +-- Insert rows into attmp3 +INSERT INTO attmp3 values (1,10); +INSERT INTO attmp3 values (1,20); +INSERT INTO attmp3 values (5,50); +-- Try (and fail) to add constraint due to invalid source columns +ALTER TABLE attmp3 add constraint attmpconstr foreign key(c) references attmp2 match full; +ERROR: column "c" referenced in foreign key constraint does not exist +-- Try (and fail) to add constraint due to invalid destination columns explicitly given +ALTER TABLE attmp3 add constraint attmpconstr foreign key(a) references attmp2(b) match full; +ERROR: column "b" referenced in foreign key constraint does not exist +-- Try (and fail) to add constraint due to invalid data +ALTER TABLE attmp3 add constraint attmpconstr foreign key (a) references attmp2 match full; +ERROR: insert or update on table "attmp3" violates foreign key constraint "attmpconstr" +DETAIL: Key (a)=(5) is not present in table "attmp2". +-- Delete failing row +DELETE FROM attmp3 where a=5; +-- Try (and succeed) +ALTER TABLE attmp3 add constraint attmpconstr foreign key (a) references attmp2 match full; +ALTER TABLE attmp3 drop constraint attmpconstr; +INSERT INTO attmp3 values (5,50); +-- Try NOT VALID and then VALIDATE CONSTRAINT, but fails. Delete failure then re-validate +ALTER TABLE attmp3 add constraint attmpconstr foreign key (a) references attmp2 match full NOT VALID; +ALTER TABLE attmp3 validate constraint attmpconstr; +ERROR: insert or update on table "attmp3" violates foreign key constraint "attmpconstr" +DETAIL: Key (a)=(5) is not present in table "attmp2". +-- Delete failing row +DELETE FROM attmp3 where a=5; +-- Try (and succeed) and repeat to show it works on already valid constraint +ALTER TABLE attmp3 validate constraint attmpconstr; +ALTER TABLE attmp3 validate constraint attmpconstr; +-- Try a non-verified CHECK constraint +ALTER TABLE attmp3 ADD CONSTRAINT b_greater_than_ten CHECK (b > 10); -- fail +ERROR: check constraint "b_greater_than_ten" of relation "attmp3" is violated by some row +ALTER TABLE attmp3 ADD CONSTRAINT b_greater_than_ten CHECK (b > 10) NOT VALID; -- succeeds +ALTER TABLE attmp3 VALIDATE CONSTRAINT b_greater_than_ten; -- fails +ERROR: check constraint "b_greater_than_ten" of relation "attmp3" is violated by some row +DELETE FROM attmp3 WHERE NOT b > 10; +ALTER TABLE attmp3 VALIDATE CONSTRAINT b_greater_than_ten; -- succeeds +ALTER TABLE attmp3 VALIDATE CONSTRAINT b_greater_than_ten; -- succeeds +-- Test inherited NOT VALID CHECK constraints +select * from attmp3; + a | b +---+---- + 1 | 20 +(1 row) + +CREATE TABLE attmp6 () INHERITS (attmp3); +CREATE TABLE attmp7 () INHERITS (attmp3); +INSERT INTO attmp6 VALUES (6, 30), (7, 16); +ALTER TABLE attmp3 ADD CONSTRAINT b_le_20 CHECK (b <= 20) NOT VALID; +ALTER TABLE attmp3 VALIDATE CONSTRAINT b_le_20; -- fails +ERROR: check constraint "b_le_20" of relation "attmp6" is violated by some row +DELETE FROM attmp6 WHERE b > 20; +ALTER TABLE attmp3 VALIDATE CONSTRAINT b_le_20; -- succeeds +-- An already validated constraint must not be revalidated +CREATE FUNCTION boo(int) RETURNS int IMMUTABLE STRICT LANGUAGE plpgsql AS $$ BEGIN RAISE NOTICE 'boo: %', $1; RETURN $1; END; $$; +INSERT INTO attmp7 VALUES (8, 18); +ALTER TABLE attmp7 ADD CONSTRAINT identity CHECK (b = boo(b)); +NOTICE: boo: 18 +ALTER TABLE attmp3 ADD CONSTRAINT IDENTITY check (b = boo(b)) NOT VALID; +NOTICE: merging constraint "identity" with inherited definition +ALTER TABLE attmp3 VALIDATE CONSTRAINT identity; +NOTICE: boo: 20 +NOTICE: boo: 16 +-- A NO INHERIT constraint should not be looked for in children during VALIDATE CONSTRAINT +create table parent_noinh_convalid (a int); +create table child_noinh_convalid () inherits (parent_noinh_convalid); +insert into parent_noinh_convalid values (1); +insert into child_noinh_convalid values (1); +alter table parent_noinh_convalid add constraint check_a_is_2 check (a = 2) no inherit not valid; +-- fail, because of the row in parent +alter table parent_noinh_convalid validate constraint check_a_is_2; +ERROR: check constraint "check_a_is_2" of relation "parent_noinh_convalid" is violated by some row +delete from only parent_noinh_convalid; +-- ok (parent itself contains no violating rows) +alter table parent_noinh_convalid validate constraint check_a_is_2; +select convalidated from pg_constraint where conrelid = 'parent_noinh_convalid'::regclass and conname = 'check_a_is_2'; + convalidated +-------------- + t +(1 row) + +-- cleanup +drop table parent_noinh_convalid, child_noinh_convalid; +-- Try (and fail) to create constraint from attmp5(a) to attmp4(a) - unique constraint on +-- attmp4 is a,b +ALTER TABLE attmp5 add constraint attmpconstr foreign key(a) references attmp4(a) match full; +ERROR: there is no unique constraint matching given keys for referenced table "attmp4" +DROP TABLE attmp7; +DROP TABLE attmp6; +DROP TABLE attmp5; +DROP TABLE attmp4; +DROP TABLE attmp3; +DROP TABLE attmp2; +-- NOT VALID with plan invalidation -- ensure we don't use a constraint for +-- exclusion until validated +set constraint_exclusion TO 'partition'; +create table nv_parent (d date, check (false) no inherit not valid); +-- not valid constraint added at creation time should automatically become valid +\d nv_parent + Table "public.nv_parent" + Column | Type | Collation | Nullable | Default +--------+------+-----------+----------+--------- + d | date | | | +Check constraints: + "nv_parent_check" CHECK (false) NO INHERIT + +create table nv_child_2010 () inherits (nv_parent); +create table nv_child_2011 () inherits (nv_parent); +alter table nv_child_2010 add check (d between '2010-01-01'::date and '2010-12-31'::date) not valid; +alter table nv_child_2011 add check (d between '2011-01-01'::date and '2011-12-31'::date) not valid; +explain (costs off) select * from nv_parent where d between '2011-08-01' and '2011-08-31'; + QUERY PLAN +--------------------------------------------------------------------------- + Append + -> Seq Scan on nv_parent nv_parent_1 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) + -> Seq Scan on nv_child_2010 nv_parent_2 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) + -> Seq Scan on nv_child_2011 nv_parent_3 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) +(7 rows) + +create table nv_child_2009 (check (d between '2009-01-01'::date and '2009-12-31'::date)) inherits (nv_parent); +explain (costs off) select * from nv_parent where d between '2011-08-01'::date and '2011-08-31'::date; + QUERY PLAN +--------------------------------------------------------------------------- + Append + -> Seq Scan on nv_parent nv_parent_1 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) + -> Seq Scan on nv_child_2010 nv_parent_2 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) + -> Seq Scan on nv_child_2011 nv_parent_3 + Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) +(7 rows) + +explain (costs off) select * from nv_parent where d between '2009-08-01'::date and '2009-08-31'::date; + QUERY PLAN +--------------------------------------------------------------------------- + Append + -> Seq Scan on nv_parent nv_parent_1 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) + -> Seq Scan on nv_child_2010 nv_parent_2 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) + -> Seq Scan on nv_child_2011 nv_parent_3 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) + -> Seq Scan on nv_child_2009 nv_parent_4 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) +(9 rows) + +-- after validation, the constraint should be used +alter table nv_child_2011 VALIDATE CONSTRAINT nv_child_2011_d_check; +explain (costs off) select * from nv_parent where d between '2009-08-01'::date and '2009-08-31'::date; + QUERY PLAN +--------------------------------------------------------------------------- + Append + -> Seq Scan on nv_parent nv_parent_1 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) + -> Seq Scan on nv_child_2010 nv_parent_2 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) + -> Seq Scan on nv_child_2009 nv_parent_3 + Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) +(7 rows) + +-- add an inherited NOT VALID constraint +alter table nv_parent add check (d between '2001-01-01'::date and '2099-12-31'::date) not valid; +\d nv_child_2009 + Table "public.nv_child_2009" + Column | Type | Collation | Nullable | Default +--------+------+-----------+----------+--------- + d | date | | | +Check constraints: + "nv_child_2009_d_check" CHECK (d >= '01-01-2009'::date AND d <= '12-31-2009'::date) + "nv_parent_d_check" CHECK (d >= '01-01-2001'::date AND d <= '12-31-2099'::date) NOT VALID +Inherits: nv_parent + +-- we leave nv_parent and children around to help test pg_dump logic +-- Foreign key adding test with mixed types +-- Note: these tables are TEMP to avoid name conflicts when this test +-- is run in parallel with foreign_key.sql. +CREATE TEMP TABLE PKTABLE (ptest1 int PRIMARY KEY); +INSERT INTO PKTABLE VALUES(42); +CREATE TEMP TABLE FKTABLE (ftest1 inet); +-- This next should fail, because int=inet does not exist +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; +ERROR: foreign key constraint "fktable_ftest1_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: inet and integer. +-- This should also fail for the same reason, but here we +-- give the column name +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable(ptest1); +ERROR: foreign key constraint "fktable_ftest1_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: inet and integer. +DROP TABLE FKTABLE; +-- This should succeed, even though they are different types, +-- because int=int8 exists and is a member of the integer opfamily +CREATE TEMP TABLE FKTABLE (ftest1 int8); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; +-- Check it actually works +INSERT INTO FKTABLE VALUES(42); -- should succeed +INSERT INTO FKTABLE VALUES(43); -- should fail +ERROR: insert or update on table "fktable" violates foreign key constraint "fktable_ftest1_fkey" +DETAIL: Key (ftest1)=(43) is not present in table "pktable". +DROP TABLE FKTABLE; +-- This should fail, because we'd have to cast numeric to int which is +-- not an implicit coercion (or use numeric=numeric, but that's not part +-- of the integer opfamily) +CREATE TEMP TABLE FKTABLE (ftest1 numeric); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; +ERROR: foreign key constraint "fktable_ftest1_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: numeric and integer. +DROP TABLE FKTABLE; +DROP TABLE PKTABLE; +-- On the other hand, this should work because int implicitly promotes to +-- numeric, and we allow promotion on the FK side +CREATE TEMP TABLE PKTABLE (ptest1 numeric PRIMARY KEY); +INSERT INTO PKTABLE VALUES(42); +CREATE TEMP TABLE FKTABLE (ftest1 int); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; +-- Check it actually works +INSERT INTO FKTABLE VALUES(42); -- should succeed +INSERT INTO FKTABLE VALUES(43); -- should fail +ERROR: insert or update on table "fktable" violates foreign key constraint "fktable_ftest1_fkey" +DETAIL: Key (ftest1)=(43) is not present in table "pktable". +DROP TABLE FKTABLE; +DROP TABLE PKTABLE; +CREATE TEMP TABLE PKTABLE (ptest1 int, ptest2 inet, + PRIMARY KEY(ptest1, ptest2)); +-- This should fail, because we just chose really odd types +CREATE TEMP TABLE FKTABLE (ftest1 cidr, ftest2 timestamp); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1, ftest2) references pktable; +ERROR: foreign key constraint "fktable_ftest1_ftest2_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: cidr and integer. +DROP TABLE FKTABLE; +-- Again, so should this... +CREATE TEMP TABLE FKTABLE (ftest1 cidr, ftest2 timestamp); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1, ftest2) + references pktable(ptest1, ptest2); +ERROR: foreign key constraint "fktable_ftest1_ftest2_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: cidr and integer. +DROP TABLE FKTABLE; +-- This fails because we mixed up the column ordering +CREATE TEMP TABLE FKTABLE (ftest1 int, ftest2 inet); +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1, ftest2) + references pktable(ptest2, ptest1); +ERROR: foreign key constraint "fktable_ftest1_ftest2_fkey" cannot be implemented +DETAIL: Key columns "ftest1" and "ptest2" are of incompatible types: integer and inet. +-- As does this... +ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest2, ftest1) + references pktable(ptest1, ptest2); +ERROR: foreign key constraint "fktable_ftest2_ftest1_fkey" cannot be implemented +DETAIL: Key columns "ftest2" and "ptest1" are of incompatible types: inet and integer. +DROP TABLE FKTABLE; +DROP TABLE PKTABLE; +-- Test that ALTER CONSTRAINT updates trigger deferrability properly +CREATE TEMP TABLE PKTABLE (ptest1 int primary key); +CREATE TEMP TABLE FKTABLE (ftest1 int); +ALTER TABLE FKTABLE ADD CONSTRAINT fknd FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION NOT DEFERRABLE; +ALTER TABLE FKTABLE ADD CONSTRAINT fkdd FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION DEFERRABLE INITIALLY DEFERRED; +ALTER TABLE FKTABLE ADD CONSTRAINT fkdi FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION DEFERRABLE INITIALLY IMMEDIATE; +ALTER TABLE FKTABLE ADD CONSTRAINT fknd2 FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION DEFERRABLE INITIALLY DEFERRED; +ALTER TABLE FKTABLE ALTER CONSTRAINT fknd2 NOT DEFERRABLE; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "ALTER CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE FKTABLE ADD CONSTRAINT fkdd2 FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION NOT DEFERRABLE; +ALTER TABLE FKTABLE ALTER CONSTRAINT fkdd2 DEFERRABLE INITIALLY DEFERRED; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "ALTER CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE FKTABLE ADD CONSTRAINT fkdi2 FOREIGN KEY(ftest1) REFERENCES pktable + ON DELETE CASCADE ON UPDATE NO ACTION NOT DEFERRABLE; +ALTER TABLE FKTABLE ALTER CONSTRAINT fkdi2 DEFERRABLE INITIALLY IMMEDIATE; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "ALTER CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. +SELECT conname, tgfoid::regproc, tgtype, tgdeferrable, tginitdeferred +FROM pg_trigger JOIN pg_constraint con ON con.oid = tgconstraint +WHERE tgrelid = 'pktable'::regclass +ORDER BY 1,2,3; + conname | tgfoid | tgtype | tgdeferrable | tginitdeferred +---------+------------------------+--------+--------------+---------------- + fkdd | "RI_FKey_cascade_del" | 9 | f | f + fkdd | "RI_FKey_noaction_upd" | 17 | t | t + fkdd2 | "RI_FKey_cascade_del" | 9 | f | f + fkdd2 | "RI_FKey_noaction_upd" | 17 | f | f + fkdi | "RI_FKey_cascade_del" | 9 | f | f + fkdi | "RI_FKey_noaction_upd" | 17 | t | f + fkdi2 | "RI_FKey_cascade_del" | 9 | f | f + fkdi2 | "RI_FKey_noaction_upd" | 17 | f | f + fknd | "RI_FKey_cascade_del" | 9 | f | f + fknd | "RI_FKey_noaction_upd" | 17 | f | f + fknd2 | "RI_FKey_cascade_del" | 9 | f | f + fknd2 | "RI_FKey_noaction_upd" | 17 | t | t +(12 rows) + +SELECT conname, tgfoid::regproc, tgtype, tgdeferrable, tginitdeferred +FROM pg_trigger JOIN pg_constraint con ON con.oid = tgconstraint +WHERE tgrelid = 'fktable'::regclass +ORDER BY 1,2,3; + conname | tgfoid | tgtype | tgdeferrable | tginitdeferred +---------+---------------------+--------+--------------+---------------- + fkdd | "RI_FKey_check_ins" | 5 | t | t + fkdd | "RI_FKey_check_upd" | 17 | t | t + fkdd2 | "RI_FKey_check_ins" | 5 | f | f + fkdd2 | "RI_FKey_check_upd" | 17 | f | f + fkdi | "RI_FKey_check_ins" | 5 | t | f + fkdi | "RI_FKey_check_upd" | 17 | t | f + fkdi2 | "RI_FKey_check_ins" | 5 | f | f + fkdi2 | "RI_FKey_check_upd" | 17 | f | f + fknd | "RI_FKey_check_ins" | 5 | f | f + fknd | "RI_FKey_check_upd" | 17 | f | f + fknd2 | "RI_FKey_check_ins" | 5 | t | t + fknd2 | "RI_FKey_check_upd" | 17 | t | t +(12 rows) + +-- temp tables should go away by themselves, need not drop them. +-- test check constraint adding +create table atacc1 ( test int ); +-- add a check constraint +alter table atacc1 add constraint atacc_test1 check (test>3); +-- should fail +insert into atacc1 (test) values (2); +ERROR: new row for relation "atacc1" violates check constraint "atacc_test1" +DETAIL: Failing row contains (2). +-- should succeed +insert into atacc1 (test) values (4); +drop table atacc1; +-- let's do one where the check fails when added +create table atacc1 ( test int ); +-- insert a soon to be failing row +insert into atacc1 (test) values (2); +-- add a check constraint (fails) +alter table atacc1 add constraint atacc_test1 check (test>3); +ERROR: check constraint "atacc_test1" of relation "atacc1" is violated by some row +insert into atacc1 (test) values (4); +drop table atacc1; +-- let's do one where the check fails because the column doesn't exist +create table atacc1 ( test int ); +-- add a check constraint (fails) +alter table atacc1 add constraint atacc_test1 check (test1>3); +ERROR: column "test1" does not exist +HINT: Perhaps you meant to reference the column "atacc1.test". +drop table atacc1; +-- something a little more complicated +create table atacc1 ( test int, test2 int, test3 int); +-- add a check constraint (fails) +alter table atacc1 add constraint atacc_test1 check (test+test23), test2 int); +alter table atacc1 add check (test2>test); +-- should fail for $2 +insert into atacc1 (test2, test) values (3, 4); +ERROR: new row for relation "atacc1" violates check constraint "atacc1_check" +DETAIL: Failing row contains (4, 3). +drop table atacc1; +-- inheritance related tests +create table atacc1 (test int); +create table atacc2 (test2 int); +create table atacc3 (test3 int) inherits (atacc1, atacc2); +alter table atacc2 add constraint foo check (test2>0); +-- fail and then succeed on atacc2 +insert into atacc2 (test2) values (-3); +ERROR: new row for relation "atacc2" violates check constraint "foo" +DETAIL: Failing row contains (-3). +insert into atacc2 (test2) values (3); +-- fail and then succeed on atacc3 +insert into atacc3 (test2) values (-3); +ERROR: new row for relation "atacc3" violates check constraint "foo" +DETAIL: Failing row contains (null, -3, null). +insert into atacc3 (test2) values (3); +drop table atacc3; +drop table atacc2; +drop table atacc1; +-- same things with one created with INHERIT +create table atacc1 (test int); +create table atacc2 (test2 int); +create table atacc3 (test3 int) inherits (atacc1, atacc2); +alter table atacc3 no inherit atacc2; +-- fail +alter table atacc3 no inherit atacc2; +ERROR: relation "atacc2" is not a parent of relation "atacc3" +-- make sure it really isn't a child +insert into atacc3 (test2) values (3); +select test2 from atacc2; + test2 +------- +(0 rows) + +-- fail due to missing constraint +alter table atacc2 add constraint foo check (test2>0); +alter table atacc3 inherit atacc2; +ERROR: child table is missing constraint "foo" +-- fail due to missing column +alter table atacc3 rename test2 to testx; +alter table atacc3 inherit atacc2; +ERROR: child table is missing column "test2" +-- fail due to mismatched data type +alter table atacc3 add test2 bool; +alter table atacc3 inherit atacc2; +ERROR: child table "atacc3" has different type for column "test2" +alter table atacc3 drop test2; +-- succeed +alter table atacc3 add test2 int; +update atacc3 set test2 = 4 where test2 is null; +alter table atacc3 add constraint foo check (test2>0); +alter table atacc3 inherit atacc2; +-- fail due to duplicates and circular inheritance +alter table atacc3 inherit atacc2; +ERROR: relation "atacc2" would be inherited from more than once +alter table atacc2 inherit atacc3; +ERROR: circular inheritance not allowed +DETAIL: "atacc3" is already a child of "atacc2". +alter table atacc2 inherit atacc2; +ERROR: circular inheritance not allowed +DETAIL: "atacc2" is already a child of "atacc2". +-- test that we really are a child now (should see 4 not 3 and cascade should go through) +select test2 from atacc2; + test2 +------- + 4 +(1 row) + +drop table atacc2 cascade; +NOTICE: drop cascades to table atacc3 +drop table atacc1; +-- adding only to a parent is allowed as of 9.2 +create table atacc1 (test int); +create table atacc2 (test2 int) inherits (atacc1); +-- ok: +alter table atacc1 add constraint foo check (test>0) no inherit; +-- check constraint is not there on child +insert into atacc2 (test) values (-3); +-- check constraint is there on parent +insert into atacc1 (test) values (-3); +ERROR: new row for relation "atacc1" violates check constraint "foo" +DETAIL: Failing row contains (-3). +insert into atacc1 (test) values (3); +-- fail, violating row: +alter table atacc2 add constraint foo check (test>0) no inherit; +ERROR: check constraint "foo" of relation "atacc2" is violated by some row +drop table atacc2; +drop table atacc1; +-- test unique constraint adding +create table atacc1 ( test int ) ; +-- add a unique constraint +alter table atacc1 add constraint atacc_test1 unique (test); +-- insert first value +insert into atacc1 (test) values (2); +-- should fail +insert into atacc1 (test) values (2); +ERROR: duplicate key value violates unique constraint "atacc_test1" +DETAIL: Key (test)=('2') already exists. +-- should succeed +insert into atacc1 (test) values (4); +-- try to create duplicates via alter table using - should fail +alter table atacc1 alter column test type integer using 0; +ERROR: could not create unique index "atacc_test1" +DETAIL: Duplicate keys exist. +drop table atacc1; +-- let's do one where the unique constraint fails when added +create table atacc1 ( test int ); +-- insert soon to be failing rows +insert into atacc1 (test) values (2); +insert into atacc1 (test) values (2); +-- add a unique constraint (fails) +alter table atacc1 add constraint atacc_test1 unique (test); +ERROR: could not create unique index "atacc_test1" +DETAIL: Duplicate keys exist. +insert into atacc1 (test) values (3); +drop table atacc1; +-- let's do one where the unique constraint fails +-- because the column doesn't exist +create table atacc1 ( test int ); +-- add a unique constraint (fails) +alter table atacc1 add constraint atacc_test1 unique (test1); +ERROR: column "test1" named in key does not exist +drop table atacc1; +-- something a little more complicated +create table atacc1 ( test int, test2 int); +-- add a unique constraint +alter table atacc1 add constraint atacc_test1 unique (test, test2); +-- insert initial value +insert into atacc1 (test,test2) values (4,4); +-- should fail +insert into atacc1 (test,test2) values (4,4); +ERROR: duplicate key value violates unique constraint "atacc_test1" +DETAIL: Key (test, test2)=('4', '4') already exists. +-- should all succeed +insert into atacc1 (test,test2) values (4,5); +insert into atacc1 (test,test2) values (5,4); +insert into atacc1 (test,test2) values (5,5); +drop table atacc1; +-- lets do some naming tests +create table atacc1 (test int, test2 int, unique(test)); +alter table atacc1 add unique (test2); +-- should fail for @@ second one @@ +insert into atacc1 (test2, test) values (3, 3); +insert into atacc1 (test2, test) values (2, 3); +ERROR: duplicate key value violates unique constraint "atacc1_test_key" +DETAIL: Key (test)=('3') already exists. +drop table atacc1; +-- test primary key constraint adding +create table atacc1 ( id serial, test int) ; +-- add a primary key constraint +alter table atacc1 add constraint atacc_test1 primary key (test); +-- insert first value +insert into atacc1 (test) values (2); +-- should fail +insert into atacc1 (test) values (2); +ERROR: duplicate key value violates unique constraint "atacc_test1" +DETAIL: Key (test)=('2') already exists. +-- should succeed +insert into atacc1 (test) values (4); +-- inserting NULL should fail +insert into atacc1 (test) values(NULL); +ERROR: null value in column "test" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (4, null). +-- try adding a second primary key (should fail) +alter table atacc1 add constraint atacc_oid1 primary key(id); +ERROR: multiple primary keys for table "atacc1" are not allowed +-- drop first primary key constraint +alter table atacc1 drop constraint atacc_test1 restrict; +-- try adding a primary key on oid (should succeed) +alter table atacc1 add constraint atacc_oid1 primary key(id); +drop table atacc1; +-- let's do one where the primary key constraint fails when added +create table atacc1 ( test int ); +-- insert soon to be failing rows +insert into atacc1 (test) values (2); +insert into atacc1 (test) values (2); +-- add a primary key (fails) +alter table atacc1 add constraint atacc_test1 primary key (test); +ERROR: could not create unique index "atacc_test1" +DETAIL: Duplicate keys exist. +insert into atacc1 (test) values (3); +drop table atacc1; +-- let's do another one where the primary key constraint fails when added +create table atacc1 ( test int ); +-- insert soon to be failing row +insert into atacc1 (test) values (NULL); +-- add a primary key (fails) +alter table atacc1 add constraint atacc_test1 primary key (test); +ERROR: column "test" of relation "atacc1" contains null values +insert into atacc1 (test) values (3); +drop table atacc1; +-- let's do one where the primary key constraint fails +-- because the column doesn't exist +create table atacc1 ( test int ); +-- add a primary key constraint (fails) +alter table atacc1 add constraint atacc_test1 primary key (test1); +ERROR: column "test1" of relation "atacc1" does not exist +drop table atacc1; +-- adding a new column as primary key to a non-empty table. +-- should fail unless the column has a non-null default value. +create table atacc1 ( test int ); +insert into atacc1 (test) values (0); +-- add a primary key column without a default (fails). +alter table atacc1 add column test2 int primary key; +ERROR: column "test2" of relation "atacc1" contains null values +-- now add a primary key column with a default (succeeds). +alter table atacc1 add column test2 int default 0 primary key; +drop table atacc1; +-- this combination used to have order-of-execution problems (bug #15580) +create table atacc1 (a int); +insert into atacc1 values(1); +alter table atacc1 + add column b float8 not null default random(), + add primary key(a); +drop table atacc1; +-- additionally, we've seen issues with foreign key validation not being +-- properly delayed until after a table rewrite. Check that works ok. +create table atacc1 (a int primary key); +alter table atacc1 add constraint atacc1_fkey foreign key (a) references atacc1 (a) not valid; +alter table atacc1 validate constraint atacc1_fkey, alter a type bigint; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "VALIDATE CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. +drop table atacc1; +-- we've also seen issues with check constraints being validated at the wrong +-- time when there's a pending table rewrite. +create table atacc1 (a bigint, b int); +insert into atacc1 values(1,1); +alter table atacc1 add constraint atacc1_chk check(b = 1) not valid; +alter table atacc1 validate constraint atacc1_chk, alter a type int; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "VALIDATE CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. +drop table atacc1; +-- same as above, but ensure the constraint violation is detected +create table atacc1 (a bigint, b int); +insert into atacc1 values(1,2); +alter table atacc1 add constraint atacc1_chk check(b = 1) not valid; +alter table atacc1 validate constraint atacc1_chk, alter a type int; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "VALIDATE CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. +drop table atacc1; +-- something a little more complicated +create table atacc1 ( test int, test2 int); +-- add a primary key constraint +alter table atacc1 add constraint atacc_test1 primary key (test, test2); +-- try adding a second primary key - should fail +alter table atacc1 add constraint atacc_test2 primary key (test); +ERROR: multiple primary keys for table "atacc1" are not allowed +-- insert initial value +insert into atacc1 (test,test2) values (4,4); +-- should fail +insert into atacc1 (test,test2) values (4,4); +ERROR: duplicate key value violates unique constraint "atacc_test1" +DETAIL: Key (test, test2)=('4', '4') already exists. +insert into atacc1 (test,test2) values (NULL,3); +ERROR: null value in column "test" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (null, 3). +insert into atacc1 (test,test2) values (3, NULL); +ERROR: null value in column "test2" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (3, null). +insert into atacc1 (test,test2) values (NULL,NULL); +ERROR: null value in column "test" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (null, null). +-- should all succeed +insert into atacc1 (test,test2) values (4,5); +insert into atacc1 (test,test2) values (5,4); +insert into atacc1 (test,test2) values (5,5); +drop table atacc1; +-- lets do some naming tests +create table atacc1 (test int, test2 int, primary key(test)); +-- only first should succeed +insert into atacc1 (test2, test) values (3, 3); +insert into atacc1 (test2, test) values (2, 3); +ERROR: duplicate key value violates unique constraint "atacc1_pkey" +DETAIL: Key (test)=('3') already exists. +insert into atacc1 (test2, test) values (1, NULL); +ERROR: null value in column "test" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (null, 1). +drop table atacc1; +-- alter table / alter column [set/drop] not null tests +-- try altering system catalogs, should fail +alter table pg_class alter column relname drop not null; +ERROR: permission denied: "pg_class" is a system catalog +alter table pg_class alter relname set not null; +ERROR: permission denied: "pg_class" is a system catalog +-- try altering non-existent table, should fail +alter table non_existent alter column bar set not null; +ERROR: relation "non_existent" does not exist +alter table non_existent alter column bar drop not null; +ERROR: relation "non_existent" does not exist +-- test setting columns to null and not null and vice versa +-- test checking for null values and primary key +create table atacc1 (test int not null); +alter table atacc1 add constraint "atacc1_pkey" primary key (test); +\d atacc1 + Table "public.atacc1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + test | integer | | not null | +Indexes: + "atacc1_pkey" PRIMARY KEY, btree (test) + +alter table atacc1 alter column test drop not null; +ERROR: column "test" is in a primary key +\d atacc1 + Table "public.atacc1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + test | integer | | not null | +Indexes: + "atacc1_pkey" PRIMARY KEY, btree (test) + +alter table atacc1 drop constraint "atacc1_pkey"; +alter table atacc1 alter column test drop not null; +\d atacc1 + Table "public.atacc1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + test | integer | | | + +insert into atacc1 values (null); +alter table atacc1 alter test set not null; +ERROR: column "test" of relation "atacc1" contains null values +delete from atacc1; +alter table atacc1 alter test set not null; +-- try altering a non-existent column, should fail +alter table atacc1 alter bar set not null; +ERROR: column "bar" of relation "atacc1" does not exist +alter table atacc1 alter bar drop not null; +ERROR: column "bar" of relation "atacc1" does not exist +-- try creating a view and altering that, should fail +create view myview as select * from atacc1; +alter table myview alter column test drop not null; +ERROR: ALTER action ALTER COLUMN ... DROP NOT NULL cannot be performed on relation "myview" +DETAIL: This operation is not supported for views. +alter table myview alter column test set not null; +ERROR: ALTER action ALTER COLUMN ... SET NOT NULL cannot be performed on relation "myview" +DETAIL: This operation is not supported for views. +drop view myview; +drop table atacc1; +-- set not null verified by constraints +create table atacc1 (test_a int, test_b int); +insert into atacc1 values (null, 1); +-- constraint not cover all values, should fail +alter table atacc1 add constraint atacc1_constr_or check(test_a is not null or test_b < 10); +alter table atacc1 alter test_a set not null; +ERROR: column "test_a" of relation "atacc1" contains null values +alter table atacc1 drop constraint atacc1_constr_or; +-- not valid constraint, should fail +alter table atacc1 add constraint atacc1_constr_invalid check(test_a is not null) not valid; +alter table atacc1 alter test_a set not null; +ERROR: column "test_a" of relation "atacc1" contains null values +alter table atacc1 drop constraint atacc1_constr_invalid; +-- with valid constraint +update atacc1 set test_a = 1; +alter table atacc1 add constraint atacc1_constr_a_valid check(test_a is not null); +alter table atacc1 alter test_a set not null; +delete from atacc1; +insert into atacc1 values (2, null); +alter table atacc1 alter test_a drop not null; +-- test multiple set not null at same time +-- test_a checked by atacc1_constr_a_valid, test_b should fail by table scan +alter table atacc1 alter test_a set not null, alter test_b set not null; +ERROR: column "test_b" of relation "atacc1" contains null values +-- commands order has no importance +alter table atacc1 alter test_b set not null, alter test_a set not null; +ERROR: column "test_b" of relation "atacc1" contains null values +-- valid one by table scan, one by check constraints +update atacc1 set test_b = 1; +alter table atacc1 alter test_b set not null, alter test_a set not null; +alter table atacc1 alter test_a drop not null, alter test_b drop not null; +-- both column has check constraints +alter table atacc1 add constraint atacc1_constr_b_valid check(test_b is not null); +alter table atacc1 alter test_b set not null, alter test_a set not null; +drop table atacc1; +-- test inheritance +create table parent (a int); +create table child (b varchar(255)) inherits (parent); +alter table parent alter a set not null; +insert into parent values (NULL); +ERROR: null value in column "a" of relation "parent" violates not-null constraint +DETAIL: Failing row contains (null). +insert into child (a, b) values (NULL, 'foo'); +ERROR: null value in column "a" of relation "child" violates not-null constraint +DETAIL: Failing row contains (null, foo). +alter table parent alter a drop not null; +insert into parent values (NULL); +insert into child (a, b) values (NULL, 'foo'); +alter table only parent alter a set not null; +ERROR: column "a" of relation "parent" contains null values +alter table child alter a set not null; +ERROR: column "a" of relation "child" contains null values +delete from parent; +alter table only parent alter a set not null; +insert into parent values (NULL); +ERROR: null value in column "a" of relation "parent" violates not-null constraint +DETAIL: Failing row contains (null). +alter table child alter a set not null; +insert into child (a, b) values (NULL, 'foo'); +ERROR: null value in column "a" of relation "child" violates not-null constraint +DETAIL: Failing row contains (null, foo). +delete from child; +alter table child alter a set not null; +insert into child (a, b) values (NULL, 'foo'); +ERROR: null value in column "a" of relation "child" violates not-null constraint +DETAIL: Failing row contains (null, foo). +drop table child; +drop table parent; +-- test setting and removing default values +create table def_test ( + c1 int4 default 5, + c2 text default 'initial_default' +); +insert into def_test default values; +alter table def_test alter column c1 drop default; +insert into def_test default values; +alter table def_test alter column c2 drop default; +insert into def_test default values; +alter table def_test alter column c1 set default 10; +alter table def_test alter column c2 set default 'new_default'; +insert into def_test default values; +select * from def_test; + c1 | c2 +----+----------------- + 5 | initial_default + | initial_default + | + 10 | new_default +(4 rows) + +-- set defaults to an incorrect type: this should fail +alter table def_test alter column c1 set default 'wrong_datatype'; +ERROR: invalid input syntax for type integer: "wrong_datatype" +alter table def_test alter column c2 set default 20; +-- set defaults on a non-existent column: this should fail +alter table def_test alter column c3 set default 30; +ERROR: column "c3" of relation "def_test" does not exist +-- set defaults on views: we need to create a view, add a rule +-- to allow insertions into it, and then alter the view to add +-- a default +create view def_view_test as select * from def_test; +create rule def_view_test_ins as + on insert to def_view_test + do instead insert into def_test select new.*; +insert into def_view_test default values; +alter table def_view_test alter column c1 set default 45; +insert into def_view_test default values; +alter table def_view_test alter column c2 set default 'view_default'; +insert into def_view_test default values; +select * from def_view_test; + c1 | c2 +----+----------------- + 5 | initial_default + | initial_default + | + 10 | new_default + | + 45 | + 45 | view_default +(7 rows) + +drop rule def_view_test_ins on def_view_test; +drop view def_view_test; +drop table def_test; +-- alter table / drop column tests +-- try altering system catalogs, should fail +alter table pg_class drop column relname; +ERROR: permission denied: "pg_class" is a system catalog +-- try altering non-existent table, should fail +alter table nosuchtable drop column bar; +ERROR: relation "nosuchtable" does not exist +-- test dropping columns +create table atacc1 (a int4 not null, b int4, c int4 not null, d int4); +insert into atacc1 values (1, 2, 3, 4); +alter table atacc1 drop a; +alter table atacc1 drop a; +ERROR: column "a" of relation "atacc1" does not exist +-- SELECTs +select * from atacc1; + b | c | d +---+---+--- + 2 | 3 | 4 +(1 row) + +select * from atacc1 order by a; +ERROR: column "a" does not exist +LINE 1: select * from atacc1 order by a; + ^ +select * from atacc1 order by "........pg.dropped.1........"; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: select * from atacc1 order by "........pg.dropped.1........"... + ^ +select * from atacc1 group by a; +ERROR: column "a" does not exist +LINE 1: select * from atacc1 group by a; + ^ +select * from atacc1 group by "........pg.dropped.1........"; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: select * from atacc1 group by "........pg.dropped.1........"... + ^ +select atacc1.* from atacc1; + b | c | d +---+---+--- + 2 | 3 | 4 +(1 row) + +select a from atacc1; +ERROR: column "a" does not exist +LINE 1: select a from atacc1; + ^ +select atacc1.a from atacc1; +ERROR: column atacc1.a does not exist +LINE 1: select atacc1.a from atacc1; + ^ +select b,c,d from atacc1; + b | c | d +---+---+--- + 2 | 3 | 4 +(1 row) + +select a,b,c,d from atacc1; +ERROR: column "a" does not exist +LINE 1: select a,b,c,d from atacc1; + ^ +select * from atacc1 where a = 1; +ERROR: column "a" does not exist +LINE 1: select * from atacc1 where a = 1; + ^ +select "........pg.dropped.1........" from atacc1; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: select "........pg.dropped.1........" from atacc1; + ^ +select atacc1."........pg.dropped.1........" from atacc1; +ERROR: column atacc1.........pg.dropped.1........ does not exist +LINE 1: select atacc1."........pg.dropped.1........" from atacc1; + ^ +select "........pg.dropped.1........",b,c,d from atacc1; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: select "........pg.dropped.1........",b,c,d from atacc1; + ^ +select * from atacc1 where "........pg.dropped.1........" = 1; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: select * from atacc1 where "........pg.dropped.1........" = ... + ^ +-- UPDATEs +update atacc1 set a = 3; +ERROR: column "a" of relation "atacc1" does not exist +LINE 1: update atacc1 set a = 3; + ^ +update atacc1 set b = 2 where a = 3; +ERROR: column "a" does not exist +LINE 1: update atacc1 set b = 2 where a = 3; + ^ +update atacc1 set "........pg.dropped.1........" = 3; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +LINE 1: update atacc1 set "........pg.dropped.1........" = 3; + ^ +update atacc1 set b = 2 where "........pg.dropped.1........" = 3; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: update atacc1 set b = 2 where "........pg.dropped.1........"... + ^ +-- INSERTs +insert into atacc1 values (10, 11, 12, 13); +ERROR: INSERT has more expressions than target columns +LINE 1: insert into atacc1 values (10, 11, 12, 13); + ^ +insert into atacc1 values (default, 11, 12, 13); +ERROR: INSERT has more expressions than target columns +LINE 1: insert into atacc1 values (default, 11, 12, 13); + ^ +insert into atacc1 values (11, 12, 13); +insert into atacc1 (a) values (10); +ERROR: column "a" of relation "atacc1" does not exist +LINE 1: insert into atacc1 (a) values (10); + ^ +insert into atacc1 (a) values (default); +ERROR: column "a" of relation "atacc1" does not exist +LINE 1: insert into atacc1 (a) values (default); + ^ +insert into atacc1 (a,b,c,d) values (10,11,12,13); +ERROR: column "a" of relation "atacc1" does not exist +LINE 1: insert into atacc1 (a,b,c,d) values (10,11,12,13); + ^ +insert into atacc1 (a,b,c,d) values (default,11,12,13); +ERROR: column "a" of relation "atacc1" does not exist +LINE 1: insert into atacc1 (a,b,c,d) values (default,11,12,13); + ^ +insert into atacc1 (b,c,d) values (11,12,13); +insert into atacc1 ("........pg.dropped.1........") values (10); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +LINE 1: insert into atacc1 ("........pg.dropped.1........") values (... + ^ +insert into atacc1 ("........pg.dropped.1........") values (default); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +LINE 1: insert into atacc1 ("........pg.dropped.1........") values (... + ^ +insert into atacc1 ("........pg.dropped.1........",b,c,d) values (10,11,12,13); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +LINE 1: insert into atacc1 ("........pg.dropped.1........",b,c,d) va... + ^ +insert into atacc1 ("........pg.dropped.1........",b,c,d) values (default,11,12,13); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +LINE 1: insert into atacc1 ("........pg.dropped.1........",b,c,d) va... + ^ +-- DELETEs +delete from atacc1 where a = 3; +ERROR: column "a" does not exist +LINE 1: delete from atacc1 where a = 3; + ^ +delete from atacc1 where "........pg.dropped.1........" = 3; +ERROR: column "........pg.dropped.1........" does not exist +LINE 1: delete from atacc1 where "........pg.dropped.1........" = 3; + ^ +delete from atacc1; +-- try dropping a non-existent column, should fail +alter table atacc1 drop bar; +ERROR: column "bar" of relation "atacc1" does not exist +-- try removing an oid column, should succeed (as it's nonexistent) +alter table atacc1 SET WITHOUT OIDS; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "SET WITHOUT OIDS" is not supported on OrioleDB tables yet. This will be implemented in future. +-- try adding an oid column, should fail (not supported) +alter table atacc1 SET WITH OIDS; +ERROR: syntax error at or near "WITH" +LINE 1: alter table atacc1 SET WITH OIDS; + ^ +-- try dropping the xmin column, should fail +alter table atacc1 drop xmin; +ERROR: cannot drop system column "xmin" +-- try creating a view and altering that, should fail +create view myview as select * from atacc1; +select * from myview; + b | c | d +---+---+--- +(0 rows) + +alter table myview drop d; +ERROR: ALTER action DROP COLUMN cannot be performed on relation "myview" +DETAIL: This operation is not supported for views. +drop view myview; +-- test some commands to make sure they fail on the dropped column +analyze atacc1(a); +ERROR: column "a" of relation "atacc1" does not exist +analyze atacc1("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +vacuum analyze atacc1(a); +ERROR: column "a" of relation "atacc1" does not exist +vacuum analyze atacc1("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +comment on column atacc1.a is 'testing'; +ERROR: column "a" of relation "atacc1" does not exist +comment on column atacc1."........pg.dropped.1........" is 'testing'; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a set storage plain; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" set storage plain; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a set statistics 0; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" set statistics 0; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a set default 3; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" set default 3; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a drop default; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" drop default; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a set not null; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" set not null; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 alter a drop not null; +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 alter "........pg.dropped.1........" drop not null; +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 rename a to x; +ERROR: column "a" does not exist +alter table atacc1 rename "........pg.dropped.1........" to x; +ERROR: column "........pg.dropped.1........" does not exist +alter table atacc1 add primary key(a); +ERROR: column "a" of relation "atacc1" does not exist +alter table atacc1 add primary key("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist +alter table atacc1 add unique(a); +ERROR: column "a" named in key does not exist +alter table atacc1 add unique("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" named in key does not exist +alter table atacc1 add check (a > 3); +ERROR: column "a" does not exist +alter table atacc1 add check ("........pg.dropped.1........" > 3); +ERROR: column "........pg.dropped.1........" does not exist +create table atacc2 (id int4 unique); +alter table atacc1 add foreign key (a) references atacc2(id); +ERROR: column "a" referenced in foreign key constraint does not exist +alter table atacc1 add foreign key ("........pg.dropped.1........") references atacc2(id); +ERROR: column "........pg.dropped.1........" referenced in foreign key constraint does not exist +alter table atacc2 add foreign key (id) references atacc1(a); +ERROR: column "a" referenced in foreign key constraint does not exist +alter table atacc2 add foreign key (id) references atacc1("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" referenced in foreign key constraint does not exist +drop table atacc2; +create index "testing_idx" on atacc1(a); +ERROR: column "a" does not exist +create index "testing_idx" on atacc1("........pg.dropped.1........"); +ERROR: column "........pg.dropped.1........" does not exist +-- test create as and select into +insert into atacc1 values (21, 22, 23); +create table attest1 as select * from atacc1; +select * from attest1; + b | c | d +----+----+---- + 21 | 22 | 23 +(1 row) + +drop table attest1; +select * into attest2 from atacc1; +select * from attest2; + b | c | d +----+----+---- + 21 | 22 | 23 +(1 row) + +drop table attest2; +-- try dropping all columns +alter table atacc1 drop c; +alter table atacc1 drop d; +alter table atacc1 drop b; +select * from atacc1; +-- +(1 row) + +drop table atacc1; +-- test constraint error reporting in presence of dropped columns +create table atacc1 (id serial primary key, value int check (value < 10)); +insert into atacc1(value) values (100); +ERROR: new row for relation "atacc1" violates check constraint "atacc1_value_check" +DETAIL: Failing row contains (1, 100). +alter table atacc1 drop column value; +alter table atacc1 add column value int check (value < 10); +insert into atacc1(value) values (100); +ERROR: new row for relation "atacc1" violates check constraint "atacc1_value_check" +DETAIL: Failing row contains (2, 100). +insert into atacc1(id, value) values (null, 0); +ERROR: null value in column "id" of relation "atacc1" violates not-null constraint +DETAIL: Failing row contains (null, 0). +drop table atacc1; +-- test inheritance +create table parent (a int, b int, c int); +insert into parent values (1, 2, 3); +alter table parent drop a; +create table child (d varchar(255)) inherits (parent); +insert into child values (12, 13, 'testing'); +select * from parent; + b | c +----+---- + 2 | 3 + 12 | 13 +(2 rows) + +select * from child; + b | c | d +----+----+--------- + 12 | 13 | testing +(1 row) + +alter table parent drop c; +select * from parent; + b +---- + 2 + 12 +(2 rows) + +select * from child; + b | d +----+--------- + 12 | testing +(1 row) + +drop table child; +drop table parent; +-- check error cases for inheritance column merging +create table parent (a float8, b numeric(10,4), c text collate "C"); +create table child (a float4) inherits (parent); -- fail +NOTICE: merging column "a" with inherited definition +ERROR: column "a" has a type conflict +DETAIL: double precision versus real +create table child (b decimal(10,7)) inherits (parent); -- fail +NOTICE: moving and merging column "b" with inherited definition +DETAIL: User-specified column moved to the position of the inherited column. +ERROR: column "b" has a type conflict +DETAIL: numeric(10,4) versus numeric(10,7) +create table child (c text collate "POSIX") inherits (parent); -- fail +NOTICE: moving and merging column "c" with inherited definition +DETAIL: User-specified column moved to the position of the inherited column. +ERROR: column "c" has a collation conflict +DETAIL: "C" versus "POSIX" +create table child (a double precision, b decimal(10,4)) inherits (parent); +NOTICE: merging column "a" with inherited definition +NOTICE: merging column "b" with inherited definition +drop table child; +drop table parent; +-- test copy in/out +create table attest (a int4, b int4, c int4); +insert into attest values (1,2,3); +alter table attest drop a; +copy attest to stdout; +2 3 +copy attest(a) to stdout; +ERROR: column "a" of relation "attest" does not exist +copy attest("........pg.dropped.1........") to stdout; +ERROR: column "........pg.dropped.1........" of relation "attest" does not exist +copy attest from stdin; +ERROR: extra data after last expected column +CONTEXT: COPY attest, line 1: "10 11 12" +select * from attest; + b | c +---+--- + 2 | 3 +(1 row) + +copy attest from stdin; +select * from attest; + b | c +----+---- + 2 | 3 + 21 | 22 +(2 rows) + +copy attest(a) from stdin; +ERROR: column "a" of relation "attest" does not exist +copy attest("........pg.dropped.1........") from stdin; +ERROR: column "........pg.dropped.1........" of relation "attest" does not exist +copy attest(b,c) from stdin; +select * from attest; + b | c +----+---- + 2 | 3 + 21 | 22 + 31 | 32 +(3 rows) + +drop table attest; +-- test inheritance +create table dropColumn (a int, b int, e int); +create table dropColumnChild (c int) inherits (dropColumn); +create table dropColumnAnother (d int) inherits (dropColumnChild); +-- these two should fail +alter table dropColumnchild drop column a; +ERROR: cannot drop inherited column "a" +alter table only dropColumnChild drop column b; +ERROR: cannot drop inherited column "b" +-- these three should work +alter table only dropColumn drop column e; +alter table dropColumnChild drop column c; +alter table dropColumn drop column a; +create table renameColumn (a int); +create table renameColumnChild (b int) inherits (renameColumn); +create table renameColumnAnother (c int) inherits (renameColumnChild); +-- these three should fail +alter table renameColumnChild rename column a to d; +ERROR: cannot rename inherited column "a" +alter table only renameColumnChild rename column a to d; +ERROR: inherited column "a" must be renamed in child tables too +alter table only renameColumn rename column a to d; +ERROR: inherited column "a" must be renamed in child tables too +-- these should work +alter table renameColumn rename column a to d; +alter table renameColumnChild rename column b to a; +-- these should work +alter table if exists doesnt_exist_tab rename column a to d; +NOTICE: relation "doesnt_exist_tab" does not exist, skipping +alter table if exists doesnt_exist_tab rename column b to a; +NOTICE: relation "doesnt_exist_tab" does not exist, skipping +-- this should work +alter table renameColumn add column w int; +-- this should fail +alter table only renameColumn add column x int; +ERROR: column must be added to child tables too +-- Test corner cases in dropping of inherited columns +create table p1 (f1 int, f2 int); +create table c1 (f1 int not null) inherits(p1); +NOTICE: merging column "f1" with inherited definition +-- should be rejected since c1.f1 is inherited +alter table c1 drop column f1; +ERROR: cannot drop inherited column "f1" +-- should work +alter table p1 drop column f1; +-- c1.f1 is still there, but no longer inherited +select f1 from c1; + f1 +---- +(0 rows) + +alter table c1 drop column f1; +select f1 from c1; +ERROR: column "f1" does not exist +LINE 1: select f1 from c1; + ^ +HINT: Perhaps you meant to reference the column "c1.f2". +drop table p1 cascade; +NOTICE: drop cascades to table c1 +create table p1 (f1 int, f2 int); +create table c1 () inherits(p1); +-- should be rejected since c1.f1 is inherited +alter table c1 drop column f1; +ERROR: cannot drop inherited column "f1" +alter table p1 drop column f1; +-- c1.f1 is dropped now, since there is no local definition for it +select f1 from c1; +ERROR: column "f1" does not exist +LINE 1: select f1 from c1; + ^ +HINT: Perhaps you meant to reference the column "c1.f2". +drop table p1 cascade; +NOTICE: drop cascades to table c1 +create table p1 (f1 int, f2 int); +create table c1 () inherits(p1); +-- should be rejected since c1.f1 is inherited +alter table c1 drop column f1; +ERROR: cannot drop inherited column "f1" +alter table only p1 drop column f1; +-- c1.f1 is NOT dropped, but must now be considered non-inherited +alter table c1 drop column f1; +drop table p1 cascade; +NOTICE: drop cascades to table c1 +create table p1 (f1 int, f2 int); +create table c1 (f1 int not null) inherits(p1); +NOTICE: merging column "f1" with inherited definition +-- should be rejected since c1.f1 is inherited +alter table c1 drop column f1; +ERROR: cannot drop inherited column "f1" +alter table only p1 drop column f1; +-- c1.f1 is still there, but no longer inherited +alter table c1 drop column f1; +drop table p1 cascade; +NOTICE: drop cascades to table c1 +create table p1(id int, name text); +create table p2(id2 int, name text, height int); +create table c1(age int) inherits(p1,p2); +NOTICE: merging multiple inherited definitions of column "name" +create table gc1() inherits (c1); +select relname, attname, attinhcount, attislocal +from pg_class join pg_attribute on (pg_class.oid = pg_attribute.attrelid) +where relname in ('p1','p2','c1','gc1') and attnum > 0 and not attisdropped +order by relname, attnum; + relname | attname | attinhcount | attislocal +---------+---------+-------------+------------ + c1 | id | 1 | f + c1 | name | 2 | f + c1 | id2 | 1 | f + c1 | height | 1 | f + c1 | age | 0 | t + gc1 | id | 1 | f + gc1 | name | 1 | f + gc1 | id2 | 1 | f + gc1 | height | 1 | f + gc1 | age | 1 | f + p1 | id | 0 | t + p1 | name | 0 | t + p2 | id2 | 0 | t + p2 | name | 0 | t + p2 | height | 0 | t +(15 rows) + +-- should work +alter table only p1 drop column name; +-- should work. Now c1.name is local and inhcount is 0. +alter table p2 drop column name; +-- should be rejected since its inherited +alter table gc1 drop column name; +ERROR: cannot drop inherited column "name" +-- should work, and drop gc1.name along +alter table c1 drop column name; +-- should fail: column does not exist +alter table gc1 drop column name; +ERROR: column "name" of relation "gc1" does not exist +-- should work and drop the attribute in all tables +alter table p2 drop column height; +-- IF EXISTS test +create table dropColumnExists (); +alter table dropColumnExists drop column non_existing; --fail +ERROR: column "non_existing" of relation "dropcolumnexists" does not exist +alter table dropColumnExists drop column if exists non_existing; --succeed +NOTICE: column "non_existing" of relation "dropcolumnexists" does not exist, skipping +select relname, attname, attinhcount, attislocal +from pg_class join pg_attribute on (pg_class.oid = pg_attribute.attrelid) +where relname in ('p1','p2','c1','gc1') and attnum > 0 and not attisdropped +order by relname, attnum; + relname | attname | attinhcount | attislocal +---------+---------+-------------+------------ + c1 | id | 1 | f + c1 | id2 | 1 | f + c1 | age | 0 | t + gc1 | id | 1 | f + gc1 | id2 | 1 | f + gc1 | age | 1 | f + p1 | id | 0 | t + p2 | id2 | 0 | t +(8 rows) + +drop table p1, p2 cascade; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table c1 +drop cascades to table gc1 +-- test attinhcount tracking with merged columns +create table depth0(); +create table depth1(c text) inherits (depth0); +create table depth2() inherits (depth1); +alter table depth0 add c text; +NOTICE: merging definition of column "c" for child "depth1" +select attrelid::regclass, attname, attinhcount, attislocal +from pg_attribute +where attnum > 0 and attrelid::regclass in ('depth0', 'depth1', 'depth2') +order by attrelid::regclass::text, attnum; + attrelid | attname | attinhcount | attislocal +----------+---------+-------------+------------ + depth0 | c | 0 | t + depth1 | c | 1 | t + depth2 | c | 1 | f +(3 rows) + +-- test renumbering of child-table columns in inherited operations +create table p1 (f1 int); +create table c1 (f2 text, f3 int) inherits (p1); +alter table p1 add column a1 int check (a1 > 0); +alter table p1 add column f2 text; +NOTICE: merging definition of column "f2" for child "c1" +insert into p1 values (1,2,'abc'); +insert into c1 values(11,'xyz',33,0); -- should fail +ERROR: new row for relation "c1" violates check constraint "p1_a1_check" +DETAIL: Failing row contains (11, xyz, 33, 0). +insert into c1 values(11,'xyz',33,22); +select * from p1; + f1 | a1 | f2 +----+----+----- + 1 | 2 | abc + 11 | 22 | xyz +(2 rows) + +update p1 set a1 = a1 + 1, f2 = upper(f2); +select * from p1; + f1 | a1 | f2 +----+----+----- + 1 | 3 | ABC + 11 | 23 | XYZ +(2 rows) + +drop table p1 cascade; +NOTICE: drop cascades to table c1 +-- test that operations with a dropped column do not try to reference +-- its datatype +create domain mytype as text; +create temp table foo (f1 text, f2 mytype, f3 text); +insert into foo values('bb','cc','dd'); +select * from foo; + f1 | f2 | f3 +----+----+---- + bb | cc | dd +(1 row) + +drop domain mytype cascade; +NOTICE: drop cascades to column f2 of table foo +select * from foo; + f1 | f3 +----+---- + bb | dd +(1 row) + +insert into foo values('qq','rr'); +select * from foo; + f1 | f3 +----+---- + bb | dd + qq | rr +(2 rows) + +update foo set f3 = 'zz'; +select * from foo; + f1 | f3 +----+---- + bb | zz + qq | zz +(2 rows) + +select f3,max(f1) from foo group by f3; + f3 | max +----+----- + zz | qq +(1 row) + +-- Simple tests for alter table column type +alter table foo alter f1 TYPE integer; -- fails +ERROR: column "f1" cannot be cast automatically to type integer +HINT: You might need to specify "USING f1::integer". +alter table foo alter f1 TYPE varchar(10); +create table anothertab (atcol1 serial8, atcol2 boolean, + constraint anothertab_chk check (atcol1 <= 3)); +insert into anothertab (atcol1, atcol2) values (default, true); +insert into anothertab (atcol1, atcol2) values (default, false); +select * from anothertab; + atcol1 | atcol2 +--------+-------- + 1 | t + 2 | f +(2 rows) + +alter table anothertab alter column atcol1 type boolean; -- fails +ERROR: column "atcol1" cannot be cast automatically to type boolean +HINT: You might need to specify "USING atcol1::boolean". +alter table anothertab alter column atcol1 type boolean using atcol1::int; -- fails +ERROR: result of USING clause for column "atcol1" cannot be cast automatically to type boolean +HINT: You might need to add an explicit cast. +alter table anothertab alter column atcol1 type integer; +select * from anothertab; + atcol1 | atcol2 +--------+-------- + 1 | t + 2 | f +(2 rows) + +insert into anothertab (atcol1, atcol2) values (45, null); -- fails +ERROR: new row for relation "anothertab" violates check constraint "anothertab_chk" +DETAIL: Failing row contains (45, null). +insert into anothertab (atcol1, atcol2) values (default, null); +select * from anothertab; + atcol1 | atcol2 +--------+-------- + 1 | t + 2 | f + 3 | +(3 rows) + +alter table anothertab alter column atcol2 type text + using case when atcol2 is true then 'IT WAS TRUE' + when atcol2 is false then 'IT WAS FALSE' + else 'IT WAS NULL!' end; +select * from anothertab; + atcol1 | atcol2 +--------+-------------- + 1 | IT WAS TRUE + 2 | IT WAS FALSE + 3 | IT WAS NULL! +(3 rows) + +alter table anothertab alter column atcol1 type boolean + using case when atcol1 % 2 = 0 then true else false end; -- fails +ERROR: default for column "atcol1" cannot be cast automatically to type boolean +alter table anothertab alter column atcol1 drop default; +alter table anothertab alter column atcol1 type boolean + using case when atcol1 % 2 = 0 then true else false end; -- fails +ERROR: operator does not exist: boolean <= integer +HINT: No operator matches the given name and argument types. You might need to add explicit type casts. +alter table anothertab drop constraint anothertab_chk; +alter table anothertab drop constraint anothertab_chk; -- fails +ERROR: constraint "anothertab_chk" of relation "anothertab" does not exist +alter table anothertab drop constraint IF EXISTS anothertab_chk; -- succeeds +NOTICE: constraint "anothertab_chk" of relation "anothertab" does not exist, skipping +alter table anothertab alter column atcol1 type boolean + using case when atcol1 % 2 = 0 then true else false end; +select * from anothertab; + atcol1 | atcol2 +--------+-------------- + f | IT WAS TRUE + t | IT WAS FALSE + f | IT WAS NULL! +(3 rows) + +drop table anothertab; +-- Test index handling in alter table column type (cf. bugs #15835, #15865) +create table anothertab(f1 int primary key, f2 int unique, + f3 int, f4 int, f5 int); +alter table anothertab + add exclude using btree (f3 with =); +alter table anothertab + add exclude using btree (f4 with =) where (f4 is not null); +alter table anothertab + add exclude using btree (f4 with =) where (f5 > 0); +alter table anothertab + add unique(f1,f4); +create index on anothertab(f2,f3); +create unique index on anothertab(f4); +\d anothertab + Table "public.anothertab" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + f1 | integer | | not null | + f2 | integer | | | + f3 | integer | | | + f4 | integer | | | + f5 | integer | | | +Indexes: + "anothertab_pkey" PRIMARY KEY, btree (f1) + "anothertab_f1_f4_key" UNIQUE CONSTRAINT, btree (f1, f4) + "anothertab_f2_f3_idx" btree (f2, f3) + "anothertab_f2_key" UNIQUE CONSTRAINT, btree (f2) + "anothertab_f3_excl" EXCLUDE USING btree (f3 WITH =) + "anothertab_f4_excl" EXCLUDE USING btree (f4 WITH =) WHERE (f4 IS NOT NULL) + "anothertab_f4_excl1" EXCLUDE USING btree (f4 WITH =) WHERE (f5 > 0) + "anothertab_f4_idx" UNIQUE, btree (f4) + +alter table anothertab alter column f1 type bigint; +alter table anothertab + alter column f2 type bigint, + alter column f3 type bigint, + alter column f4 type bigint; +alter table anothertab alter column f5 type bigint; +\d anothertab + Table "public.anothertab" + Column | Type | Collation | Nullable | Default +--------+--------+-----------+----------+--------- + f1 | bigint | | not null | + f2 | bigint | | | + f3 | bigint | | | + f4 | bigint | | | + f5 | bigint | | | +Indexes: + "anothertab_pkey" PRIMARY KEY, btree (f1) + "anothertab_f1_f4_key" UNIQUE CONSTRAINT, btree (f1, f4) + "anothertab_f2_f3_idx" btree (f2, f3) + "anothertab_f2_key" UNIQUE CONSTRAINT, btree (f2) + "anothertab_f3_excl" EXCLUDE USING btree (f3 WITH =) + "anothertab_f4_excl" EXCLUDE USING btree (f4 WITH =) WHERE (f4 IS NOT NULL) + "anothertab_f4_excl1" EXCLUDE USING btree (f4 WITH =) WHERE (f5 > 0) + "anothertab_f4_idx" UNIQUE, btree (f4) + +drop table anothertab; +-- test that USING expressions are parsed before column alter type / drop steps +create table another (f1 int, f2 text, f3 text); +insert into another values(1, 'one', 'uno'); +insert into another values(2, 'two', 'due'); +insert into another values(3, 'three', 'tre'); +select * from another; + f1 | f2 | f3 +----+-------+----- + 1 | one | uno + 2 | two | due + 3 | three | tre +(3 rows) + +alter table another + alter f1 type text using f2 || ' and ' || f3 || ' more', + alter f2 type bigint using f1 * 10, + drop column f3; +select * from another; + f1 | f2 +--------------------+---- + one and uno more | 10 + two and due more | 20 + three and tre more | 30 +(3 rows) + +drop table another; +-- Create an index that skips WAL, then perform a SET DATA TYPE that skips +-- rewriting the index. +begin; +create table skip_wal_skip_rewrite_index (c varchar(10) primary key); +alter table skip_wal_skip_rewrite_index alter c type varchar(20); +commit; +-- We disallow changing table's row type if it's used for storage +create table at_tab1 (a int, b text); +create table at_tab2 (x int, y at_tab1); +alter table at_tab1 alter column b type varchar; -- fails +ERROR: cannot alter table "at_tab1" because column "at_tab2.y" uses its row type +drop table at_tab2; +-- Use of row type in an expression is defended differently +create table at_tab2 (x int, y text, check((x,y)::at_tab1 = (1,'42')::at_tab1)); +alter table at_tab1 alter column b type varchar; -- allowed, but ... +insert into at_tab2 values(1,'42'); -- ... this will fail +ERROR: ROW() column has type text instead of type character varying +drop table at_tab1, at_tab2; +-- Check it for a partitioned table, too +create table at_tab1 (a int, b text) partition by list(a); +create table at_tab2 (x int, y at_tab1); +alter table at_tab1 alter column b type varchar; -- fails +ERROR: cannot alter table "at_tab1" because column "at_tab2.y" uses its row type +drop table at_tab1, at_tab2; +-- Alter column type that's part of a partitioned index +create table at_partitioned (a int, b text) partition by range (a); +create table at_part_1 partition of at_partitioned for values from (0) to (1000); +insert into at_partitioned values (512, '0.123'); +create table at_part_2 (b text, a int); +insert into at_part_2 values ('1.234', 1024); +create index on at_partitioned (b); +create index on at_partitioned (a); +\d at_part_1 + Table "public.at_part_1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | +Partition of: at_partitioned FOR VALUES FROM (0) TO (1000) +Indexes: + "at_part_1_a_idx" btree (a) + "at_part_1_b_idx" btree (b) + +\d at_part_2 + Table "public.at_part_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | text | | | + a | integer | | | + +alter table at_partitioned attach partition at_part_2 for values from (1000) to (2000); +\d at_part_2 + Table "public.at_part_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | text | | | + a | integer | | | +Partition of: at_partitioned FOR VALUES FROM (1000) TO (2000) +Indexes: + "at_part_2_a_idx" btree (a) + "at_part_2_b_idx" btree (b) + +alter table at_partitioned alter column b type numeric using b::numeric; +\d at_part_1 + Table "public.at_part_1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | numeric | | | +Partition of: at_partitioned FOR VALUES FROM (0) TO (1000) +Indexes: + "at_part_1_a_idx" btree (a) + "at_part_1_b_idx" btree (b) + +\d at_part_2 + Table "public.at_part_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | numeric | | | + a | integer | | | +Partition of: at_partitioned FOR VALUES FROM (1000) TO (2000) +Indexes: + "at_part_2_a_idx" btree (a) + "at_part_2_b_idx" btree (b) + +drop table at_partitioned; +-- Alter column type when no table rewrite is required +-- Also check that comments are preserved +create table at_partitioned(id int, name varchar(64), unique (id, name)) + partition by hash(id); +comment on constraint at_partitioned_id_name_key on at_partitioned is 'parent constraint'; +comment on index at_partitioned_id_name_key is 'parent index'; +create table at_partitioned_0 partition of at_partitioned + for values with (modulus 2, remainder 0); +comment on constraint at_partitioned_0_id_name_key on at_partitioned_0 is 'child 0 constraint'; +comment on index at_partitioned_0_id_name_key is 'child 0 index'; +create table at_partitioned_1 partition of at_partitioned + for values with (modulus 2, remainder 1); +comment on constraint at_partitioned_1_id_name_key on at_partitioned_1 is 'child 1 constraint'; +comment on index at_partitioned_1_id_name_key is 'child 1 index'; +insert into at_partitioned values(1, 'foo'); +insert into at_partitioned values(3, 'bar'); +create temp table old_oids as + select relname, oid as oldoid, relfilenode as oldfilenode + from pg_class where relname like 'at_partitioned%'; +select relname, + c.oid = oldoid as orig_oid, + case relfilenode + when 0 then 'none' + when c.oid then 'own' + when oldfilenode then 'orig' + else 'OTHER' + end as storage, + obj_description(c.oid, 'pg_class') as desc + from pg_class c left join old_oids using (relname) + where relname like 'at_partitioned%' + order by relname; + relname | orig_oid | storage | desc +------------------------------+----------+---------+--------------- + at_partitioned | t | none | + at_partitioned_0 | t | own | + at_partitioned_0_id_name_key | t | own | child 0 index + at_partitioned_1 | t | own | + at_partitioned_1_id_name_key | t | own | child 1 index + at_partitioned_id_name_key | t | none | parent index +(6 rows) + +select conname, obj_description(oid, 'pg_constraint') as desc + from pg_constraint where conname like 'at_partitioned%' + order by conname; + conname | desc +------------------------------+-------------------- + at_partitioned_0_id_name_key | child 0 constraint + at_partitioned_1_id_name_key | child 1 constraint + at_partitioned_id_name_key | parent constraint +(3 rows) + +alter table at_partitioned alter column name type varchar(127); +select relname, + c.oid = oldoid as orig_oid, + case relfilenode + when 0 then 'none' + when c.oid then 'own' + when oldfilenode then 'orig' + else 'OTHER' + end as storage, + obj_description(c.oid, 'pg_class') as desc + from pg_class c left join old_oids using (relname) + where relname like 'at_partitioned%' + order by relname; + relname | orig_oid | storage | desc +------------------------------+----------+---------+-------------- + at_partitioned | t | none | + at_partitioned_0 | t | own | + at_partitioned_0_id_name_key | f | own | + at_partitioned_1 | t | own | + at_partitioned_1_id_name_key | f | own | + at_partitioned_id_name_key | f | none | parent index +(6 rows) + +select conname, obj_description(oid, 'pg_constraint') as desc + from pg_constraint where conname like 'at_partitioned%' + order by conname; + conname | desc +------------------------------+------------------- + at_partitioned_0_id_name_key | + at_partitioned_1_id_name_key | + at_partitioned_id_name_key | parent constraint +(3 rows) + +-- Don't remove this DROP, it exposes bug #15672 +drop table at_partitioned; +-- disallow recursive containment of row types +create temp table recur1 (f1 int); +alter table recur1 add column f2 recur1; -- fails +ERROR: composite type recur1 cannot be made a member of itself +alter table recur1 add column f2 recur1[]; -- fails +ERROR: composite type recur1 cannot be made a member of itself +create domain array_of_recur1 as recur1[]; +alter table recur1 add column f2 array_of_recur1; -- fails +ERROR: composite type recur1 cannot be made a member of itself +create temp table recur2 (f1 int, f2 recur1); +alter table recur1 add column f2 recur2; -- fails +ERROR: composite type recur1 cannot be made a member of itself +alter table recur1 add column f2 int; +alter table recur1 alter column f2 type recur2; -- fails +ERROR: composite type recur1 cannot be made a member of itself +-- SET STORAGE may need to add a TOAST table +create table test_storage (a text, c text storage plain); +select reltoastrelid <> 0 as has_toast_table + from pg_class where oid = 'test_storage'::regclass; + has_toast_table +----------------- + t +(1 row) + +alter table test_storage alter a set storage plain; +-- rewrite table to remove its TOAST table; need a non-constant column default +alter table test_storage add b int default random()::int; +select reltoastrelid <> 0 as has_toast_table + from pg_class where oid = 'test_storage'::regclass; + has_toast_table +----------------- + t +(1 row) + +alter table test_storage alter a set storage default; -- re-add TOAST table +select reltoastrelid <> 0 as has_toast_table + from pg_class where oid = 'test_storage'::regclass; + has_toast_table +----------------- + t +(1 row) + +-- check STORAGE correctness +create table test_storage_failed (a text, b int storage extended); +ERROR: column data type integer can only have storage PLAIN +-- test that SET STORAGE propagates to index correctly +create index test_storage_idx on test_storage (b, a); +alter table test_storage alter column a set storage external; +\d+ test_storage + Table "public.test_storage" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+-------------------+----------+--------------+------------- + a | text | | | | external | | + c | text | | | | plain | | + b | integer | | | random()::integer | plain | | +Indexes: + "test_storage_idx" btree (b, a) + +\d+ test_storage_idx + Index "public.test_storage_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+----------+-------------- + b | integer | yes | b | plain | + a | text | yes | a | external | +btree, for table "public.test_storage" + +-- ALTER COLUMN TYPE with a check constraint and a child table (bug #13779) +CREATE TABLE test_inh_check (a float check (a > 10.2), b float); +CREATE TABLE test_inh_check_child() INHERITS(test_inh_check); +\d test_inh_check + Table "public.test_inh_check" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | double precision | | | + b | double precision | | | +Check constraints: + "test_inh_check_a_check" CHECK (a > 10.2::double precision) +Number of child tables: 1 (Use \d+ to list them.) + +\d test_inh_check_child + Table "public.test_inh_check_child" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | double precision | | | + b | double precision | | | +Check constraints: + "test_inh_check_a_check" CHECK (a > 10.2::double precision) +Inherits: test_inh_check + +select relname, conname, coninhcount, conislocal, connoinherit + from pg_constraint c, pg_class r + where relname like 'test_inh_check%' and c.conrelid = r.oid + order by 1, 2; + relname | conname | coninhcount | conislocal | connoinherit +----------------------+------------------------+-------------+------------+-------------- + test_inh_check | test_inh_check_a_check | 0 | t | f + test_inh_check_child | test_inh_check_a_check | 1 | f | f +(2 rows) + +ALTER TABLE test_inh_check ALTER COLUMN a TYPE numeric; +\d test_inh_check + Table "public.test_inh_check" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | numeric | | | + b | double precision | | | +Check constraints: + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Number of child tables: 1 (Use \d+ to list them.) + +\d test_inh_check_child + Table "public.test_inh_check_child" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | numeric | | | + b | double precision | | | +Check constraints: + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Inherits: test_inh_check + +select relname, conname, coninhcount, conislocal, connoinherit + from pg_constraint c, pg_class r + where relname like 'test_inh_check%' and c.conrelid = r.oid + order by 1, 2; + relname | conname | coninhcount | conislocal | connoinherit +----------------------+------------------------+-------------+------------+-------------- + test_inh_check | test_inh_check_a_check | 0 | t | f + test_inh_check_child | test_inh_check_a_check | 1 | f | f +(2 rows) + +-- also try noinherit, local, and local+inherited cases +ALTER TABLE test_inh_check ADD CONSTRAINT bnoinherit CHECK (b > 100) NO INHERIT; +ALTER TABLE test_inh_check_child ADD CONSTRAINT blocal CHECK (b < 1000); +ALTER TABLE test_inh_check_child ADD CONSTRAINT bmerged CHECK (b > 1); +ALTER TABLE test_inh_check ADD CONSTRAINT bmerged CHECK (b > 1); +NOTICE: merging constraint "bmerged" with inherited definition +\d test_inh_check + Table "public.test_inh_check" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | numeric | | | + b | double precision | | | +Check constraints: + "bmerged" CHECK (b > 1::double precision) + "bnoinherit" CHECK (b > 100::double precision) NO INHERIT + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Number of child tables: 1 (Use \d+ to list them.) + +\d test_inh_check_child + Table "public.test_inh_check_child" + Column | Type | Collation | Nullable | Default +--------+------------------+-----------+----------+--------- + a | numeric | | | + b | double precision | | | +Check constraints: + "blocal" CHECK (b < 1000::double precision) + "bmerged" CHECK (b > 1::double precision) + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Inherits: test_inh_check + +select relname, conname, coninhcount, conislocal, connoinherit + from pg_constraint c, pg_class r + where relname like 'test_inh_check%' and c.conrelid = r.oid + order by 1, 2; + relname | conname | coninhcount | conislocal | connoinherit +----------------------+------------------------+-------------+------------+-------------- + test_inh_check | bmerged | 0 | t | f + test_inh_check | bnoinherit | 0 | t | t + test_inh_check | test_inh_check_a_check | 0 | t | f + test_inh_check_child | blocal | 0 | t | f + test_inh_check_child | bmerged | 1 | t | f + test_inh_check_child | test_inh_check_a_check | 1 | f | f +(6 rows) + +ALTER TABLE test_inh_check ALTER COLUMN b TYPE numeric; +NOTICE: merging constraint "bmerged" with inherited definition +\d test_inh_check + Table "public.test_inh_check" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | numeric | | | + b | numeric | | | +Check constraints: + "bmerged" CHECK (b::double precision > 1::double precision) + "bnoinherit" CHECK (b::double precision > 100::double precision) NO INHERIT + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Number of child tables: 1 (Use \d+ to list them.) + +\d test_inh_check_child + Table "public.test_inh_check_child" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | numeric | | | + b | numeric | | | +Check constraints: + "blocal" CHECK (b::double precision < 1000::double precision) + "bmerged" CHECK (b::double precision > 1::double precision) + "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) +Inherits: test_inh_check + +select relname, conname, coninhcount, conislocal, connoinherit + from pg_constraint c, pg_class r + where relname like 'test_inh_check%' and c.conrelid = r.oid + order by 1, 2; + relname | conname | coninhcount | conislocal | connoinherit +----------------------+------------------------+-------------+------------+-------------- + test_inh_check | bmerged | 0 | t | f + test_inh_check | bnoinherit | 0 | t | t + test_inh_check | test_inh_check_a_check | 0 | t | f + test_inh_check_child | blocal | 0 | t | f + test_inh_check_child | bmerged | 1 | t | f + test_inh_check_child | test_inh_check_a_check | 1 | f | f +(6 rows) + +-- ALTER COLUMN TYPE with different schema in children +-- Bug at https://postgr.es/m/20170102225618.GA10071@telsasoft.com +CREATE TABLE test_type_diff (f1 int); +CREATE TABLE test_type_diff_c (extra smallint) INHERITS (test_type_diff); +ALTER TABLE test_type_diff ADD COLUMN f2 int; +INSERT INTO test_type_diff_c VALUES (1, 2, 3); +ALTER TABLE test_type_diff ALTER COLUMN f2 TYPE bigint USING f2::bigint; +CREATE TABLE test_type_diff2 (int_two int2, int_four int4, int_eight int8); +CREATE TABLE test_type_diff2_c1 (int_four int4, int_eight int8, int_two int2); +CREATE TABLE test_type_diff2_c2 (int_eight int8, int_two int2, int_four int4); +CREATE TABLE test_type_diff2_c3 (int_two int2, int_four int4, int_eight int8); +ALTER TABLE test_type_diff2_c1 INHERIT test_type_diff2; +ALTER TABLE test_type_diff2_c2 INHERIT test_type_diff2; +ALTER TABLE test_type_diff2_c3 INHERIT test_type_diff2; +INSERT INTO test_type_diff2_c1 VALUES (1, 2, 3); +INSERT INTO test_type_diff2_c2 VALUES (4, 5, 6); +INSERT INTO test_type_diff2_c3 VALUES (7, 8, 9); +ALTER TABLE test_type_diff2 ALTER COLUMN int_four TYPE int8 USING int_four::int8; +-- whole-row references are disallowed +ALTER TABLE test_type_diff2 ALTER COLUMN int_four TYPE int4 USING (pg_column_size(test_type_diff2)); +ERROR: cannot convert whole-row table reference +DETAIL: USING expression contains a whole-row table reference. +-- check for rollback of ANALYZE corrupting table property flags (bug #11638) +CREATE TABLE check_fk_presence_1 (id int PRIMARY KEY, t text); +CREATE TABLE check_fk_presence_2 (id int REFERENCES check_fk_presence_1, t text); +BEGIN; +ALTER TABLE check_fk_presence_2 DROP CONSTRAINT check_fk_presence_2_id_fkey; +ANALYZE check_fk_presence_2; +ROLLBACK; +\d check_fk_presence_2 + Table "public.check_fk_presence_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + id | integer | | | + t | text | | | +Foreign-key constraints: + "check_fk_presence_2_id_fkey" FOREIGN KEY (id) REFERENCES check_fk_presence_1(id) + +DROP TABLE check_fk_presence_1, check_fk_presence_2; +-- check column addition within a view (bug #14876) +create table at_base_table(id int, stuff text); +insert into at_base_table values (23, 'skidoo'); +create view at_view_1 as select * from at_base_table bt; +create view at_view_2 as select *, to_json(v1) as j from at_view_1 v1; +\d+ at_view_1 + View "public.at_view_1" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + id | integer | | | | plain | + stuff | text | | | | extended | +View definition: + SELECT id, + stuff + FROM at_base_table bt; + +\d+ at_view_2 + View "public.at_view_2" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + id | integer | | | | plain | + stuff | text | | | | extended | + j | json | | | | extended | +View definition: + SELECT id, + stuff, + to_json(v1.*) AS j + FROM at_view_1 v1; + +explain (verbose, costs off) select * from at_view_2; + QUERY PLAN +---------------------------------------------------------- + Seq Scan on public.at_base_table bt + Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff)) +(2 rows) + +select * from at_view_2; + id | stuff | j +----+--------+---------------------------- + 23 | skidoo | {"id":23,"stuff":"skidoo"} +(1 row) + +create or replace view at_view_1 as select *, 2+2 as more from at_base_table bt; +\d+ at_view_1 + View "public.at_view_1" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + id | integer | | | | plain | + stuff | text | | | | extended | + more | integer | | | | plain | +View definition: + SELECT id, + stuff, + 2 + 2 AS more + FROM at_base_table bt; + +\d+ at_view_2 + View "public.at_view_2" + Column | Type | Collation | Nullable | Default | Storage | Description +--------+---------+-----------+----------+---------+----------+------------- + id | integer | | | | plain | + stuff | text | | | | extended | + j | json | | | | extended | +View definition: + SELECT id, + stuff, + to_json(v1.*) AS j + FROM at_view_1 v1; + +explain (verbose, costs off) select * from at_view_2; + QUERY PLAN +------------------------------------------------------------- + Seq Scan on public.at_base_table bt + Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, 4)) +(2 rows) + +select * from at_view_2; + id | stuff | j +----+--------+------------------------------------- + 23 | skidoo | {"id":23,"stuff":"skidoo","more":4} +(1 row) + +drop view at_view_2; +drop view at_view_1; +drop table at_base_table; +-- related case (bug #17811) +begin; +create temp table t1 as select * from int8_tbl; +create temp view v1 as select 1::int8 as q1; +create temp view v2 as select * from v1; +create or replace temp view v1 with (security_barrier = true) + as select * from t1; +create temp table log (q1 int8, q2 int8); +create rule v1_upd_rule as on update to v1 + do also insert into log values (new.*); +update v2 set q1 = q1 + 1 where q1 = 123; +select * from t1; + q1 | q2 +------------------+------------------- + 124 | 456 + 124 | 4567890123456789 + 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 +(5 rows) + +select * from log; + q1 | q2 +-----+------------------ + 124 | 456 + 124 | 4567890123456789 +(2 rows) + +rollback; +-- check adding a column not itself requiring a rewrite, together with +-- a column requiring a default (bug #16038) +-- ensure that rewrites aren't silently optimized away, removing the +-- value of the test +CREATE FUNCTION check_ddl_rewrite(p_tablename regclass, p_ddl text) +RETURNS boolean +LANGUAGE plpgsql AS $$ +DECLARE + v_relfilenode oid; +BEGIN + v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename; + + EXECUTE p_ddl; + + RETURN v_relfilenode <> (SELECT relfilenode FROM pg_class WHERE oid = p_tablename); +END; +$$; +CREATE TABLE rewrite_test(col text); +INSERT INTO rewrite_test VALUES ('something'); +INSERT INTO rewrite_test VALUES (NULL); +-- empty[12] don't need rewrite, but notempty[12]_rewrite will force one +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN empty1 text, + ADD COLUMN notempty1_rewrite serial; +$$); + check_ddl_rewrite +------------------- + t +(1 row) + +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN notempty2_rewrite serial, + ADD COLUMN empty2 text; +$$); + check_ddl_rewrite +------------------- + t +(1 row) + +-- also check that fast defaults cause no problem, first without rewrite +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN empty3 text, + ADD COLUMN notempty3_norewrite int default 42; +$$); + check_ddl_rewrite +------------------- + f +(1 row) + +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN notempty4_norewrite int default 42, + ADD COLUMN empty4 text; +$$); + check_ddl_rewrite +------------------- + f +(1 row) + +-- then with rewrite +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN empty5 text, + ADD COLUMN notempty5_norewrite int default 42, + ADD COLUMN notempty5_rewrite serial; +$$); + check_ddl_rewrite +------------------- + t +(1 row) + +SELECT check_ddl_rewrite('rewrite_test', $$ + ALTER TABLE rewrite_test + ADD COLUMN notempty6_rewrite serial, + ADD COLUMN empty6 text, + ADD COLUMN notempty6_norewrite int default 42; +$$); + check_ddl_rewrite +------------------- + t +(1 row) + +-- cleanup +DROP FUNCTION check_ddl_rewrite(regclass, text); +DROP TABLE rewrite_test; +-- +-- lock levels +-- +drop type lockmodes; +ERROR: type "lockmodes" does not exist +create type lockmodes as enum ( + 'SIReadLock' +,'AccessShareLock' +,'RowShareLock' +,'RowExclusiveLock' +,'ShareUpdateExclusiveLock' +,'ShareLock' +,'ShareRowExclusiveLock' +,'ExclusiveLock' +,'AccessExclusiveLock' +); +drop view my_locks; +ERROR: view "my_locks" does not exist +create or replace view my_locks as +select case when c.relname like 'pg_toast%' then 'pg_toast' else c.relname end, max(mode::lockmodes) as max_lockmode +from pg_locks l join pg_class c on l.relation = c.oid +where virtualtransaction = ( + select virtualtransaction + from pg_locks + where transactionid = pg_current_xact_id()::xid) +and locktype = 'relation' +and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') +and c.relname != 'my_locks' +group by c.relname; +create table alterlock (f1 int primary key, f2 text); +insert into alterlock values (1, 'foo'); +create table alterlock2 (f3 int primary key, f1 int); +insert into alterlock2 values (1, 1); +begin; alter table alterlock alter column f2 set statistics 150; +select * from my_locks order by 1; + relname | max_lockmode +----------------+-------------------------- + alterlock | ShareUpdateExclusiveLock + alterlock_pkey | AccessShareLock +(2 rows) + +rollback; +begin; alter table alterlock cluster on alterlock_pkey; +select * from my_locks order by 1; + relname | max_lockmode +----------------+-------------------------- + alterlock | ShareUpdateExclusiveLock + alterlock_pkey | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock set without cluster; +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock +(1 row) + +commit; +begin; alter table alterlock set (fillfactor = 100); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock + pg_toast | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock reset (fillfactor); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock + pg_toast | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock set (toast.autovacuum_enabled = off); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock + pg_toast | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock set (autovacuum_enabled = off); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock + pg_toast | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock alter column f2 set (n_distinct = 1); +select * from my_locks order by 1; + relname | max_lockmode +----------------+-------------------------- + alterlock | ShareUpdateExclusiveLock + alterlock_pkey | AccessShareLock +(2 rows) + +rollback; +-- test that mixing options with different lock levels works as expected +begin; alter table alterlock set (autovacuum_enabled = off, fillfactor = 80); +select * from my_locks order by 1; + relname | max_lockmode +-----------+-------------------------- + alterlock | ShareUpdateExclusiveLock + pg_toast | ShareUpdateExclusiveLock +(2 rows) + +commit; +begin; alter table alterlock alter column f2 set storage extended; +select * from my_locks order by 1; + relname | max_lockmode +----------------+--------------------- + alterlock | AccessExclusiveLock + alterlock_pkey | AccessShareLock +(2 rows) + +rollback; +begin; alter table alterlock alter column f2 set default 'x'; +select * from my_locks order by 1; + relname | max_lockmode +-----------+--------------------- + alterlock | AccessExclusiveLock +(1 row) + +rollback; +begin; +create trigger ttdummy + before delete or update on alterlock + for each row + execute procedure + ttdummy (1, 1); +select * from my_locks order by 1; + relname | max_lockmode +-----------+----------------------- + alterlock | ShareRowExclusiveLock +(1 row) + +rollback; +begin; +select * from my_locks order by 1; + relname | max_lockmode +---------+-------------- +(0 rows) + +alter table alterlock2 add foreign key (f1) references alterlock (f1); +select * from my_locks order by 1; + relname | max_lockmode +-----------------+----------------------- + alterlock | ShareRowExclusiveLock + alterlock2 | ShareRowExclusiveLock + alterlock2_pkey | AccessShareLock + alterlock_pkey | AccessShareLock +(4 rows) + +rollback; +begin; +alter table alterlock2 +add constraint alterlock2nv foreign key (f1) references alterlock (f1) NOT VALID; +select * from my_locks order by 1; + relname | max_lockmode +------------+----------------------- + alterlock | ShareRowExclusiveLock + alterlock2 | ShareRowExclusiveLock +(2 rows) + +commit; +begin; +alter table alterlock2 validate constraint alterlock2nv; +select * from my_locks order by 1; + relname | max_lockmode +-----------------+-------------------------- + alterlock | RowShareLock + alterlock2 | ShareUpdateExclusiveLock + alterlock2_pkey | AccessShareLock + alterlock_pkey | AccessShareLock +(4 rows) + +rollback; +create or replace view my_locks as +select case when c.relname like 'pg_toast%' then 'pg_toast' else c.relname end, max(mode::lockmodes) as max_lockmode +from pg_locks l join pg_class c on l.relation = c.oid +where virtualtransaction = ( + select virtualtransaction + from pg_locks + where transactionid = pg_current_xact_id()::xid) +and locktype = 'relation' +and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') +and c.relname = 'my_locks' +group by c.relname; +-- raise exception +alter table my_locks set (autovacuum_enabled = false); +ERROR: unrecognized parameter "autovacuum_enabled" +alter view my_locks set (autovacuum_enabled = false); +ERROR: unrecognized parameter "autovacuum_enabled" +alter table my_locks reset (autovacuum_enabled); +alter view my_locks reset (autovacuum_enabled); +begin; +alter view my_locks set (security_barrier=off); +select * from my_locks order by 1; + relname | max_lockmode +----------+--------------------- + my_locks | AccessExclusiveLock +(1 row) + +alter view my_locks reset (security_barrier); +rollback; +-- this test intentionally applies the ALTER TABLE command against a view, but +-- uses a view option so we expect this to succeed. This form of SQL is +-- accepted for historical reasons, as shown in the docs for ALTER VIEW +begin; +alter table my_locks set (security_barrier=off); +select * from my_locks order by 1; + relname | max_lockmode +----------+--------------------- + my_locks | AccessExclusiveLock +(1 row) + +alter table my_locks reset (security_barrier); +rollback; +-- cleanup +drop table alterlock2; +drop table alterlock; +drop view my_locks; +drop type lockmodes; +-- +-- alter function +-- +create function test_strict(text) returns text as + 'select coalesce($1, ''got passed a null'');' + language sql returns null on null input; +select test_strict(NULL); + test_strict +------------- + +(1 row) + +alter function test_strict(text) called on null input; +select test_strict(NULL); + test_strict +------------------- + got passed a null +(1 row) + +create function non_strict(text) returns text as + 'select coalesce($1, ''got passed a null'');' + language sql called on null input; +select non_strict(NULL); + non_strict +------------------- + got passed a null +(1 row) + +alter function non_strict(text) returns null on null input; +select non_strict(NULL); + non_strict +------------ + +(1 row) + +-- +-- alter object set schema +-- +create schema alter1; +create schema alter2; +create table alter1.t1(f1 serial primary key, f2 int check (f2 > 0)); +create view alter1.v1 as select * from alter1.t1; +create function alter1.plus1(int) returns int as 'select $1+1' language sql; +create domain alter1.posint integer check (value > 0); +create type alter1.ctype as (f1 int, f2 text); +create function alter1.same(alter1.ctype, alter1.ctype) returns boolean language sql +as 'select $1.f1 is not distinct from $2.f1 and $1.f2 is not distinct from $2.f2'; +create operator alter1.=(procedure = alter1.same, leftarg = alter1.ctype, rightarg = alter1.ctype); +create operator class alter1.ctype_hash_ops default for type alter1.ctype using hash as + operator 1 alter1.=(alter1.ctype, alter1.ctype); +create conversion alter1.latin1_to_utf8 for 'latin1' to 'utf8' from iso8859_1_to_utf8; +create text search parser alter1.prs(start = prsd_start, gettoken = prsd_nexttoken, end = prsd_end, lextypes = prsd_lextype); +create text search configuration alter1.cfg(parser = alter1.prs); +create text search template alter1.tmpl(init = dsimple_init, lexize = dsimple_lexize); +create text search dictionary alter1.dict(template = alter1.tmpl); +insert into alter1.t1(f2) values(11); +insert into alter1.t1(f2) values(12); +alter table alter1.t1 set schema alter1; -- no-op, same schema +alter table alter1.t1 set schema alter2; +alter table alter1.v1 set schema alter2; +alter function alter1.plus1(int) set schema alter2; +alter domain alter1.posint set schema alter2; +alter operator class alter1.ctype_hash_ops using hash set schema alter2; +alter operator family alter1.ctype_hash_ops using hash set schema alter2; +alter operator alter1.=(alter1.ctype, alter1.ctype) set schema alter2; +alter function alter1.same(alter1.ctype, alter1.ctype) set schema alter2; +alter type alter1.ctype set schema alter1; -- no-op, same schema +alter type alter1.ctype set schema alter2; +alter conversion alter1.latin1_to_utf8 set schema alter2; +alter text search parser alter1.prs set schema alter2; +alter text search configuration alter1.cfg set schema alter2; +alter text search template alter1.tmpl set schema alter2; +alter text search dictionary alter1.dict set schema alter2; +-- this should succeed because nothing is left in alter1 +drop schema alter1; +insert into alter2.t1(f2) values(13); +insert into alter2.t1(f2) values(14); +select * from alter2.t1; + f1 | f2 +----+---- + 1 | 11 + 2 | 12 + 3 | 13 + 4 | 14 +(4 rows) + +select * from alter2.v1; + f1 | f2 +----+---- + 1 | 11 + 2 | 12 + 3 | 13 + 4 | 14 +(4 rows) + +select alter2.plus1(41); + plus1 +------- + 42 +(1 row) + +-- clean up +drop schema alter2 cascade; +NOTICE: drop cascades to 13 other objects +DETAIL: drop cascades to table alter2.t1 +drop cascades to view alter2.v1 +drop cascades to function alter2.plus1(integer) +drop cascades to type alter2.posint +drop cascades to type alter2.ctype +drop cascades to function alter2.same(alter2.ctype,alter2.ctype) +drop cascades to operator alter2.=(alter2.ctype,alter2.ctype) +drop cascades to operator family alter2.ctype_hash_ops for access method hash +drop cascades to conversion alter2.latin1_to_utf8 +drop cascades to text search parser alter2.prs +drop cascades to text search configuration alter2.cfg +drop cascades to text search template alter2.tmpl +drop cascades to text search dictionary alter2.dict +-- +-- composite types +-- +CREATE TYPE test_type AS (a int); +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + +ALTER TYPE nosuchtype ADD ATTRIBUTE b text; -- fails +ERROR: relation "nosuchtype" does not exist +ALTER TYPE test_type ADD ATTRIBUTE b text; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | + +ALTER TYPE test_type ADD ATTRIBUTE b text; -- fails +ERROR: column "b" of relation "test_type" already exists +ALTER TYPE test_type ALTER ATTRIBUTE b SET DATA TYPE varchar; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+-------------------+-----------+----------+--------- + a | integer | | | + b | character varying | | | + +ALTER TYPE test_type ALTER ATTRIBUTE b SET DATA TYPE integer; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + +ALTER TYPE test_type DROP ATTRIBUTE b; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + +ALTER TYPE test_type DROP ATTRIBUTE c; -- fails +ERROR: column "c" of relation "test_type" does not exist +ALTER TYPE test_type DROP ATTRIBUTE IF EXISTS c; +NOTICE: column "c" of relation "test_type" does not exist, skipping +ALTER TYPE test_type DROP ATTRIBUTE a, ADD ATTRIBUTE d boolean; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + d | boolean | | | + +ALTER TYPE test_type RENAME ATTRIBUTE a TO aa; +ERROR: column "a" does not exist +ALTER TYPE test_type RENAME ATTRIBUTE d TO dd; +\d test_type + Composite type "public.test_type" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + dd | boolean | | | + +DROP TYPE test_type; +CREATE TYPE test_type1 AS (a int, b text); +CREATE TABLE test_tbl1 (x int, y test_type1); +ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails +ERROR: cannot alter type "test_type1" because column "test_tbl1.y" uses it +DROP TABLE test_tbl1; +CREATE TABLE test_tbl1 (x int, y text); +CREATE INDEX test_tbl1_idx ON test_tbl1((row(x,y)::test_type1)); +ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails +ERROR: cannot alter type "test_type1" because column "test_tbl1_idx.row" uses it +DROP TABLE test_tbl1; +DROP TYPE test_type1; +CREATE TYPE test_type2 AS (a int, b text); +CREATE TABLE test_tbl2 OF test_type2; +CREATE TABLE test_tbl2_subclass () INHERITS (test_tbl2); +\d test_type2 + Composite type "public.test_type2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | + +\d test_tbl2 + Table "public.test_tbl2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | +Number of child tables: 1 (Use \d+ to list them.) +Typed table of type: test_type2 + +ALTER TYPE test_type2 ADD ATTRIBUTE c text; -- fails +ERROR: cannot alter type "test_type2" because it is the type of a typed table +HINT: Use ALTER ... CASCADE to alter the typed tables too. +ALTER TYPE test_type2 ADD ATTRIBUTE c text CASCADE; +\d test_type2 + Composite type "public.test_type2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | + c | text | | | + +\d test_tbl2 + Table "public.test_tbl2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | + c | text | | | +Number of child tables: 1 (Use \d+ to list them.) +Typed table of type: test_type2 + +ALTER TYPE test_type2 ALTER ATTRIBUTE b TYPE varchar; -- fails +ERROR: cannot alter type "test_type2" because it is the type of a typed table +HINT: Use ALTER ... CASCADE to alter the typed tables too. +ALTER TYPE test_type2 ALTER ATTRIBUTE b TYPE varchar CASCADE; +\d test_type2 + Composite type "public.test_type2" + Column | Type | Collation | Nullable | Default +--------+-------------------+-----------+----------+--------- + a | integer | | | + b | character varying | | | + c | text | | | + +\d test_tbl2 + Table "public.test_tbl2" + Column | Type | Collation | Nullable | Default +--------+-------------------+-----------+----------+--------- + a | integer | | | + b | character varying | | | + c | text | | | +Number of child tables: 1 (Use \d+ to list them.) +Typed table of type: test_type2 + +ALTER TYPE test_type2 DROP ATTRIBUTE b; -- fails +ERROR: cannot alter type "test_type2" because it is the type of a typed table +HINT: Use ALTER ... CASCADE to alter the typed tables too. +ALTER TYPE test_type2 DROP ATTRIBUTE b CASCADE; +\d test_type2 + Composite type "public.test_type2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + c | text | | | + +\d test_tbl2 + Table "public.test_tbl2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + c | text | | | +Number of child tables: 1 (Use \d+ to list them.) +Typed table of type: test_type2 + +ALTER TYPE test_type2 RENAME ATTRIBUTE a TO aa; -- fails +ERROR: cannot alter type "test_type2" because it is the type of a typed table +HINT: Use ALTER ... CASCADE to alter the typed tables too. +ALTER TYPE test_type2 RENAME ATTRIBUTE a TO aa CASCADE; +\d test_type2 + Composite type "public.test_type2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + aa | integer | | | + c | text | | | + +\d test_tbl2 + Table "public.test_tbl2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + aa | integer | | | + c | text | | | +Number of child tables: 1 (Use \d+ to list them.) +Typed table of type: test_type2 + +\d test_tbl2_subclass + Table "public.test_tbl2_subclass" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + aa | integer | | | + c | text | | | +Inherits: test_tbl2 + +DROP TABLE test_tbl2_subclass, test_tbl2; +DROP TYPE test_type2; +CREATE TYPE test_typex AS (a int, b text); +CREATE TABLE test_tblx (x int, y test_typex check ((y).a > 0)); +ALTER TYPE test_typex DROP ATTRIBUTE a; -- fails +ERROR: cannot drop column a of composite type test_typex because other objects depend on it +DETAIL: constraint test_tblx_y_check on table test_tblx depends on column a of composite type test_typex +HINT: Use DROP ... CASCADE to drop the dependent objects too. +ALTER TYPE test_typex DROP ATTRIBUTE a CASCADE; +NOTICE: drop cascades to constraint test_tblx_y_check on table test_tblx +\d test_tblx + Table "public.test_tblx" + Column | Type | Collation | Nullable | Default +--------+------------+-----------+----------+--------- + x | integer | | | + y | test_typex | | | + +DROP TABLE test_tblx; +DROP TYPE test_typex; +-- This test isn't that interesting on its own, but the purpose is to leave +-- behind a table to test pg_upgrade with. The table has a composite type +-- column in it, and the composite type has a dropped attribute. +CREATE TYPE test_type3 AS (a int); +CREATE TABLE test_tbl3 (c) AS SELECT '(1)'::test_type3; +ALTER TYPE test_type3 DROP ATTRIBUTE a, ADD ATTRIBUTE b int; +CREATE TYPE test_type_empty AS (); +DROP TYPE test_type_empty; +-- +-- typed tables: OF / NOT OF +-- +CREATE TYPE tt_t0 AS (z inet, x int, y numeric(8,2)); +ALTER TYPE tt_t0 DROP ATTRIBUTE z; +CREATE TABLE tt0 (x int NOT NULL, y numeric(8,2)); -- OK +CREATE TABLE tt1 (x int, y bigint); -- wrong base type +CREATE TABLE tt2 (x int, y numeric(9,2)); -- wrong typmod +CREATE TABLE tt3 (y numeric(8,2), x int); -- wrong column order +CREATE TABLE tt4 (x int); -- too few columns +CREATE TABLE tt5 (x int, y numeric(8,2), z int); -- too few columns +CREATE TABLE tt6 () INHERITS (tt0); -- can't have a parent +CREATE TABLE tt7 (x int, q text, y numeric(8,2)); +ALTER TABLE tt7 DROP q; -- OK +ALTER TABLE tt0 OF tt_t0; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE tt1 OF tt_t0; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE tt2 OF tt_t0; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE tt3 OF tt_t0; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE tt4 OF tt_t0; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE tt5 OF tt_t0; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE tt6 OF tt_t0; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE tt7 OF tt_t0; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. +CREATE TYPE tt_t1 AS (x int, y numeric(8,2)); +ALTER TABLE tt7 OF tt_t1; -- reassign an already-typed table +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE tt7 NOT OF; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "NOT OF" is not supported on OrioleDB tables yet. This will be implemented in future. +\d tt7 + Table "public.tt7" + Column | Type | Collation | Nullable | Default +--------+--------------+-----------+----------+--------- + x | integer | | | + y | numeric(8,2) | | | + +-- make sure we can drop a constraint on the parent but it remains on the child +CREATE TABLE test_drop_constr_parent (c text CHECK (c IS NOT NULL)); +CREATE TABLE test_drop_constr_child () INHERITS (test_drop_constr_parent); +ALTER TABLE ONLY test_drop_constr_parent DROP CONSTRAINT "test_drop_constr_parent_c_check"; +-- should fail +INSERT INTO test_drop_constr_child (c) VALUES (NULL); +ERROR: new row for relation "test_drop_constr_child" violates check constraint "test_drop_constr_parent_c_check" +DETAIL: Failing row contains (null). +DROP TABLE test_drop_constr_parent CASCADE; +NOTICE: drop cascades to table test_drop_constr_child +-- +-- IF EXISTS test +-- +ALTER TABLE IF EXISTS tt8 ADD COLUMN f int; +NOTICE: relation "tt8" does not exist, skipping +ALTER TABLE IF EXISTS tt8 ADD CONSTRAINT xxx PRIMARY KEY(f); +NOTICE: relation "tt8" does not exist, skipping +ALTER TABLE IF EXISTS tt8 ADD CHECK (f BETWEEN 0 AND 10); +NOTICE: relation "tt8" does not exist, skipping +ALTER TABLE IF EXISTS tt8 ALTER COLUMN f SET DEFAULT 0; +NOTICE: relation "tt8" does not exist, skipping +ALTER TABLE IF EXISTS tt8 RENAME COLUMN f TO f1; +NOTICE: relation "tt8" does not exist, skipping +ALTER TABLE IF EXISTS tt8 SET SCHEMA alter2; +NOTICE: relation "tt8" does not exist, skipping +CREATE TABLE tt8(a int); +CREATE SCHEMA alter2; +ALTER TABLE IF EXISTS tt8 ADD COLUMN f int; +ALTER TABLE IF EXISTS tt8 ADD CONSTRAINT xxx PRIMARY KEY(f); +ALTER TABLE IF EXISTS tt8 ADD CHECK (f BETWEEN 0 AND 10); +ALTER TABLE IF EXISTS tt8 ALTER COLUMN f SET DEFAULT 0; +ALTER TABLE IF EXISTS tt8 RENAME COLUMN f TO f1; +ALTER TABLE IF EXISTS tt8 SET SCHEMA alter2; +\d alter2.tt8 + Table "alter2.tt8" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + f1 | integer | | not null | 0 +Indexes: + "xxx" PRIMARY KEY, btree (f1) +Check constraints: + "tt8_f_check" CHECK (f1 >= 0 AND f1 <= 10) + +DROP TABLE alter2.tt8; +DROP SCHEMA alter2; +-- +-- Check conflicts between index and CHECK constraint names +-- +CREATE TABLE tt9(c integer); +ALTER TABLE tt9 ADD CHECK(c > 1); +ALTER TABLE tt9 ADD CHECK(c > 2); -- picks nonconflicting name +ALTER TABLE tt9 ADD CONSTRAINT foo CHECK(c > 3); +ALTER TABLE tt9 ADD CONSTRAINT foo CHECK(c > 4); -- fail, dup name +ERROR: constraint "foo" for relation "tt9" already exists +ALTER TABLE tt9 ADD UNIQUE(c); +ALTER TABLE tt9 ADD UNIQUE(c); -- picks nonconflicting name +ALTER TABLE tt9 ADD CONSTRAINT tt9_c_key UNIQUE(c); -- fail, dup name +ERROR: relation "tt9_c_key" already exists +ALTER TABLE tt9 ADD CONSTRAINT foo UNIQUE(c); -- fail, dup name +ERROR: constraint "foo" for relation "tt9" already exists +ALTER TABLE tt9 ADD CONSTRAINT tt9_c_key CHECK(c > 5); -- fail, dup name +ERROR: constraint "tt9_c_key" for relation "tt9" already exists +ALTER TABLE tt9 ADD CONSTRAINT tt9_c_key2 CHECK(c > 6); +ALTER TABLE tt9 ADD UNIQUE(c); -- picks nonconflicting name +\d tt9 + Table "public.tt9" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c | integer | | | +Indexes: + "tt9_c_key" UNIQUE CONSTRAINT, btree (c) + "tt9_c_key1" UNIQUE CONSTRAINT, btree (c) + "tt9_c_key3" UNIQUE CONSTRAINT, btree (c) +Check constraints: + "foo" CHECK (c > 3) + "tt9_c_check" CHECK (c > 1) + "tt9_c_check1" CHECK (c > 2) + "tt9_c_key2" CHECK (c > 6) + +DROP TABLE tt9; +-- Check that comments on constraints and indexes are not lost at ALTER TABLE. +CREATE TABLE comment_test ( + id int, + positive_col int CHECK (positive_col > 0), + indexed_col int, + CONSTRAINT comment_test_pk PRIMARY KEY (id)); +CREATE INDEX comment_test_index ON comment_test(indexed_col); +COMMENT ON COLUMN comment_test.id IS 'Column ''id'' on comment_test'; +COMMENT ON INDEX comment_test_index IS 'Simple index on comment_test'; +COMMENT ON CONSTRAINT comment_test_positive_col_check ON comment_test IS 'CHECK constraint on comment_test.positive_col'; +COMMENT ON CONSTRAINT comment_test_pk ON comment_test IS 'PRIMARY KEY constraint of comment_test'; +COMMENT ON INDEX comment_test_pk IS 'Index backing the PRIMARY KEY of comment_test'; +SELECT col_description('comment_test'::regclass, 1) as comment; + comment +----------------------------- + Column 'id' on comment_test +(1 row) + +SELECT indexrelid::regclass::text as index, obj_description(indexrelid, 'pg_class') as comment FROM pg_index where indrelid = 'comment_test'::regclass ORDER BY 1, 2; + index | comment +--------------------+----------------------------------------------- + comment_test_index | Simple index on comment_test + comment_test_pk | Index backing the PRIMARY KEY of comment_test +(2 rows) + +SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment FROM pg_constraint where conrelid = 'comment_test'::regclass ORDER BY 1, 2; + constraint | comment +---------------------------------+----------------------------------------------- + comment_test_pk | PRIMARY KEY constraint of comment_test + comment_test_positive_col_check | CHECK constraint on comment_test.positive_col +(2 rows) + +-- Change the datatype of all the columns. ALTER TABLE is optimized to not +-- rebuild an index if the new data type is binary compatible with the old +-- one. Check do a dummy ALTER TABLE that doesn't change the datatype +-- first, to test that no-op codepath, and another one that does. +ALTER TABLE comment_test ALTER COLUMN indexed_col SET DATA TYPE int; +ALTER TABLE comment_test ALTER COLUMN indexed_col SET DATA TYPE text; +ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE int; +ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE text; +ALTER TABLE comment_test ALTER COLUMN positive_col SET DATA TYPE int; +ALTER TABLE comment_test ALTER COLUMN positive_col SET DATA TYPE bigint; +-- Check that the comments are intact. +SELECT col_description('comment_test'::regclass, 1) as comment; + comment +----------------------------- + Column 'id' on comment_test +(1 row) + +SELECT indexrelid::regclass::text as index, obj_description(indexrelid, 'pg_class') as comment FROM pg_index where indrelid = 'comment_test'::regclass ORDER BY 1, 2; + index | comment +--------------------+----------------------------------------------- + comment_test_index | Simple index on comment_test + comment_test_pk | Index backing the PRIMARY KEY of comment_test +(2 rows) + +SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment FROM pg_constraint where conrelid = 'comment_test'::regclass ORDER BY 1, 2; + constraint | comment +---------------------------------+----------------------------------------------- + comment_test_pk | PRIMARY KEY constraint of comment_test + comment_test_positive_col_check | CHECK constraint on comment_test.positive_col +(2 rows) + +-- Check compatibility for foreign keys and comments. This is done +-- separately as rebuilding the column type of the parent leads +-- to an error and would reduce the test scope. +CREATE TABLE comment_test_child ( + id text CONSTRAINT comment_test_child_fk REFERENCES comment_test); +CREATE INDEX comment_test_child_fk ON comment_test_child(id); +COMMENT ON COLUMN comment_test_child.id IS 'Column ''id'' on comment_test_child'; +COMMENT ON INDEX comment_test_child_fk IS 'Index backing the FOREIGN KEY of comment_test_child'; +COMMENT ON CONSTRAINT comment_test_child_fk ON comment_test_child IS 'FOREIGN KEY constraint of comment_test_child'; +-- Change column type of parent +ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE text; +ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE int USING id::integer; +ERROR: foreign key constraint "comment_test_child_fk" cannot be implemented +DETAIL: Key columns "id" and "id" are of incompatible types: text and integer. +-- Comments should be intact +SELECT col_description('comment_test_child'::regclass, 1) as comment; + comment +----------------------------------- + Column 'id' on comment_test_child +(1 row) + +SELECT indexrelid::regclass::text as index, obj_description(indexrelid, 'pg_class') as comment FROM pg_index where indrelid = 'comment_test_child'::regclass ORDER BY 1, 2; + index | comment +-----------------------+----------------------------------------------------- + comment_test_child_fk | Index backing the FOREIGN KEY of comment_test_child +(1 row) + +SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment FROM pg_constraint where conrelid = 'comment_test_child'::regclass ORDER BY 1, 2; + constraint | comment +-----------------------+---------------------------------------------- + comment_test_child_fk | FOREIGN KEY constraint of comment_test_child +(1 row) + +-- Check that we map relation oids to filenodes and back correctly. Only +-- display bad mappings so the test output doesn't change all the time. A +-- filenode function call can return NULL for a relation dropped concurrently +-- with the call's surrounding query, so ignore a NULL mapped_oid for +-- relations that no longer exist after all calls finish. +CREATE TEMP TABLE filenode_mapping AS +SELECT + oid, mapped_oid, reltablespace, relfilenode, relname +FROM pg_class, + pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) AS mapped_oid +WHERE relkind IN ('r', 'i', 'S', 't', 'm') AND mapped_oid IS DISTINCT FROM oid; +SELECT m.* FROM filenode_mapping m LEFT JOIN pg_class c ON c.oid = m.oid +WHERE c.oid IS NOT NULL OR m.mapped_oid IS NOT NULL; + oid | mapped_oid | reltablespace | relfilenode | relname +-----+------------+---------------+-------------+--------- +(0 rows) + +-- Checks on creating and manipulation of user defined relations in +-- pg_catalog. +SHOW allow_system_table_mods; + allow_system_table_mods +------------------------- + off +(1 row) + +-- disallowed because of search_path issues with pg_dump +CREATE TABLE pg_catalog.new_system_table(); +ERROR: permission denied to create "pg_catalog.new_system_table" +DETAIL: System catalog modifications are currently disallowed. +-- instead create in public first, move to catalog +CREATE TABLE new_system_table(id serial primary key, othercol text); +ALTER TABLE new_system_table SET SCHEMA pg_catalog; +ALTER TABLE new_system_table SET SCHEMA public; +ALTER TABLE new_system_table SET SCHEMA pg_catalog; +-- will be ignored -- already there: +ALTER TABLE new_system_table SET SCHEMA pg_catalog; +ALTER TABLE new_system_table RENAME TO old_system_table; +CREATE INDEX old_system_table__othercol ON old_system_table (othercol); +INSERT INTO old_system_table(othercol) VALUES ('somedata'), ('otherdata'); +UPDATE old_system_table SET id = -id; +DELETE FROM old_system_table WHERE othercol = 'somedata'; +TRUNCATE old_system_table; +ALTER TABLE old_system_table DROP CONSTRAINT new_system_table_pkey; +ALTER TABLE old_system_table DROP COLUMN othercol; +DROP TABLE old_system_table; +-- set logged +CREATE UNLOGGED TABLE unlogged1(f1 SERIAL PRIMARY KEY, f2 TEXT); -- has sequence, toast +-- check relpersistence of an unlogged table +SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^unlogged1' +UNION ALL +SELECT r.relname || ' toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^unlogged1' +UNION ALL +SELECT r.relname || ' toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^unlogged1' +ORDER BY relname; + relname | relkind | relpersistence +-----------------------+---------+---------------- + unlogged1 | r | u + unlogged1 toast index | i | u + unlogged1 toast table | t | u + unlogged1_f1_seq | S | u + unlogged1_pkey | i | u +(5 rows) + +CREATE UNLOGGED TABLE unlogged2(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES unlogged1); -- foreign key +CREATE UNLOGGED TABLE unlogged3(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES unlogged3); -- self-referencing foreign key +ALTER TABLE unlogged3 SET LOGGED; -- skip self-referencing foreign key +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "SET LOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE unlogged2 SET LOGGED; -- fails because a foreign key to an unlogged table exists +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "SET LOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE unlogged1 SET LOGGED; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "SET LOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. +-- check relpersistence of an unlogged table after changing to permanent +SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^unlogged1' +UNION ALL +SELECT r.relname || ' toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^unlogged1' +UNION ALL +SELECT r.relname || ' toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^unlogged1' +ORDER BY relname; + relname | relkind | relpersistence +-----------------------+---------+---------------- + unlogged1 | r | u + unlogged1 toast index | i | u + unlogged1 toast table | t | u + unlogged1_f1_seq | S | u + unlogged1_pkey | i | u +(5 rows) + +ALTER TABLE unlogged1 SET LOGGED; -- silently do nothing +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "SET LOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. +DROP TABLE unlogged3; +DROP TABLE unlogged2; +DROP TABLE unlogged1; +-- set unlogged +CREATE TABLE logged1(f1 SERIAL PRIMARY KEY, f2 TEXT); -- has sequence, toast +-- check relpersistence of a permanent table +SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^logged1' +UNION ALL +SELECT r.relname || ' toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^logged1' +UNION ALL +SELECT r.relname ||' toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^logged1' +ORDER BY relname; + relname | relkind | relpersistence +---------------------+---------+---------------- + logged1 | r | p + logged1 toast index | i | p + logged1 toast table | t | p + logged1_f1_seq | S | p + logged1_pkey | i | p +(5 rows) + +CREATE TABLE logged2(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES logged1); -- foreign key +CREATE TABLE logged3(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES logged3); -- self-referencing foreign key +ALTER TABLE logged1 SET UNLOGGED; -- fails because a foreign key from a permanent table exists +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "SET UNLOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE logged3 SET UNLOGGED; -- skip self-referencing foreign key +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "SET UNLOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE logged2 SET UNLOGGED; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "SET UNLOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. +ALTER TABLE logged1 SET UNLOGGED; +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "SET UNLOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. +-- check relpersistence of a permanent table after changing to unlogged +SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^logged1' +UNION ALL +SELECT r.relname || ' toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^logged1' +UNION ALL +SELECT r.relname || ' toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^logged1' +ORDER BY relname; + relname | relkind | relpersistence +---------------------+---------+---------------- + logged1 | r | p + logged1 toast index | i | p + logged1 toast table | t | p + logged1_f1_seq | S | p + logged1_pkey | i | p +(5 rows) + +ALTER TABLE logged1 SET UNLOGGED; -- silently do nothing +ERROR: unsupported alter table subcommand +DETAIL: Subcommand "SET UNLOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. +DROP TABLE logged3; +DROP TABLE logged2; +DROP TABLE logged1; +-- test ADD COLUMN IF NOT EXISTS +CREATE TABLE test_add_column(c1 integer); +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + +ALTER TABLE test_add_column + ADD COLUMN c2 integer; +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + +ALTER TABLE test_add_column + ADD COLUMN c2 integer; -- fail because c2 already exists +ERROR: column "c2" of relation "test_add_column" already exists +ALTER TABLE ONLY test_add_column + ADD COLUMN c2 integer; -- fail because c2 already exists +ERROR: column "c2" of relation "test_add_column" already exists +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c2 integer; -- skipping because c2 already exists +NOTICE: column "c2" of relation "test_add_column" already exists, skipping +ALTER TABLE ONLY test_add_column + ADD COLUMN IF NOT EXISTS c2 integer; -- skipping because c2 already exists +NOTICE: column "c2" of relation "test_add_column" already exists, skipping +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + +ALTER TABLE test_add_column + ADD COLUMN c2 integer, -- fail because c2 already exists + ADD COLUMN c3 integer primary key; +ERROR: column "c2" of relation "test_add_column" already exists +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c2 integer, -- skipping because c2 already exists + ADD COLUMN c3 integer primary key; +NOTICE: column "c2" of relation "test_add_column" already exists, skipping +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c2 integer, -- skipping because c2 already exists + ADD COLUMN IF NOT EXISTS c3 integer primary key; -- skipping because c3 already exists +NOTICE: column "c2" of relation "test_add_column" already exists, skipping +NOTICE: column "c3" of relation "test_add_column" already exists, skipping +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c2 integer, -- skipping because c2 already exists + ADD COLUMN IF NOT EXISTS c3 integer, -- skipping because c3 already exists + ADD COLUMN c4 integer REFERENCES test_add_column; +NOTICE: column "c2" of relation "test_add_column" already exists, skipping +NOTICE: column "c3" of relation "test_add_column" already exists, skipping +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | + c4 | integer | | | +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) +Foreign-key constraints: + "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) +Referenced by: + TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c4 integer REFERENCES test_add_column; +NOTICE: column "c4" of relation "test_add_column" already exists, skipping +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | + c4 | integer | | | +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) +Foreign-key constraints: + "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) +Referenced by: + TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c5 SERIAL CHECK (c5 > 8); +\d test_add_column + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------------------------------------------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | + c4 | integer | | | + c5 | integer | | not null | nextval('test_add_column_c5_seq'::regclass) +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) +Check constraints: + "test_add_column_c5_check" CHECK (c5 > 8) +Foreign-key constraints: + "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) +Referenced by: + TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) + +ALTER TABLE test_add_column + ADD COLUMN IF NOT EXISTS c5 SERIAL CHECK (c5 > 10); +NOTICE: column "c5" of relation "test_add_column" already exists, skipping +\d test_add_column* + Table "public.test_add_column" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------------------------------------------- + c1 | integer | | | + c2 | integer | | | + c3 | integer | | not null | + c4 | integer | | | + c5 | integer | | not null | nextval('test_add_column_c5_seq'::regclass) +Indexes: + "test_add_column_pkey" PRIMARY KEY, btree (c3) +Check constraints: + "test_add_column_c5_check" CHECK (c5 > 8) +Foreign-key constraints: + "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) +Referenced by: + TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) + + Sequence "public.test_add_column_c5_seq" + Type | Start | Minimum | Maximum | Increment | Cycles? | Cache +---------+-------+---------+------------+-----------+---------+------- + integer | 1 | 1 | 2147483647 | 1 | no | 1 +Owned by: public.test_add_column.c5 + + Index "public.test_add_column_pkey" + Column | Type | Key? | Definition +--------+---------+------+------------ + c3 | integer | yes | c3 +primary key, btree, for table "public.test_add_column" + +DROP TABLE test_add_column; +\d test_add_column* +-- assorted cases with multiple ALTER TABLE steps +CREATE TABLE ataddindex(f1 INT); +INSERT INTO ataddindex VALUES (42), (43); +CREATE UNIQUE INDEX ataddindexi0 ON ataddindex(f1); +ALTER TABLE ataddindex + ADD PRIMARY KEY USING INDEX ataddindexi0, + ALTER f1 TYPE BIGINT; +\d ataddindex + Table "public.ataddindex" + Column | Type | Collation | Nullable | Default +--------+--------+-----------+----------+--------- + f1 | bigint | | not null | +Indexes: + "ataddindexi0" PRIMARY KEY, btree (f1) + +DROP TABLE ataddindex; +CREATE TABLE ataddindex(f1 VARCHAR(10)); +INSERT INTO ataddindex(f1) VALUES ('foo'), ('a'); +ALTER TABLE ataddindex + ALTER f1 SET DATA TYPE TEXT, + ADD EXCLUDE ((f1 LIKE 'a') WITH =); +\d ataddindex + Table "public.ataddindex" + Column | Type | Collation | Nullable | Default +--------+------+-----------+----------+--------- + f1 | text | | | +Indexes: + "ataddindex_expr_excl" EXCLUDE USING btree ((f1 ~~ 'a'::text) WITH =) + +DROP TABLE ataddindex; +CREATE TABLE ataddindex(id int, ref_id int); +ALTER TABLE ataddindex + ADD PRIMARY KEY (id), + ADD FOREIGN KEY (ref_id) REFERENCES ataddindex; +\d ataddindex + Table "public.ataddindex" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + id | integer | | not null | + ref_id | integer | | | +Indexes: + "ataddindex_pkey" PRIMARY KEY, btree (id) +Foreign-key constraints: + "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) +Referenced by: + TABLE "ataddindex" CONSTRAINT "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) + +DROP TABLE ataddindex; +CREATE TABLE ataddindex(id int, ref_id int); +ALTER TABLE ataddindex + ADD UNIQUE (id), + ADD FOREIGN KEY (ref_id) REFERENCES ataddindex (id); +\d ataddindex + Table "public.ataddindex" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + id | integer | | | + ref_id | integer | | | +Indexes: + "ataddindex_id_key" UNIQUE CONSTRAINT, btree (id) +Foreign-key constraints: + "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) +Referenced by: + TABLE "ataddindex" CONSTRAINT "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) + +DROP TABLE ataddindex; +CREATE TABLE atnotnull1 (); +ALTER TABLE atnotnull1 + ADD COLUMN a INT, + ALTER a SET NOT NULL; +ALTER TABLE atnotnull1 + ADD COLUMN c INT, + ADD PRIMARY KEY (c); +\d+ atnotnull1 + Table "public.atnotnull1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | not null | | plain | | + c | integer | | not null | | plain | | +Indexes: + "atnotnull1_pkey" PRIMARY KEY, btree (c) + +-- cannot drop column that is part of the partition key +CREATE TABLE partitioned ( + a int, + b int +) PARTITION BY RANGE (a, (a+b+1)); +ALTER TABLE partitioned DROP COLUMN a; +ERROR: cannot drop column "a" because it is part of the partition key of relation "partitioned" +ALTER TABLE partitioned ALTER COLUMN a TYPE char(5); +ERROR: cannot alter column "a" because it is part of the partition key of relation "partitioned" +ALTER TABLE partitioned DROP COLUMN b; +ERROR: cannot drop column "b" because it is part of the partition key of relation "partitioned" +ALTER TABLE partitioned ALTER COLUMN b TYPE char(5); +ERROR: cannot alter column "b" because it is part of the partition key of relation "partitioned" +-- specifying storage parameters for partitioned tables is not supported +ALTER TABLE partitioned SET (fillfactor=100); +ERROR: cannot specify storage parameters for a partitioned table +HINT: Specify storage parameters for its leaf partitions instead. +-- partitioned table cannot participate in regular inheritance +CREATE TABLE nonpartitioned ( + a int, + b int +); +ALTER TABLE partitioned INHERIT nonpartitioned; +ERROR: cannot change inheritance of partitioned table +ALTER TABLE nonpartitioned INHERIT partitioned; +ERROR: cannot inherit from partitioned table "partitioned" +-- cannot add NO INHERIT constraint to partitioned tables +ALTER TABLE partitioned ADD CONSTRAINT chk_a CHECK (a > 0) NO INHERIT; +ERROR: cannot add NO INHERIT constraint to partitioned table "partitioned" +DROP TABLE partitioned, nonpartitioned; +-- +-- ATTACH PARTITION +-- +-- check that target table is partitioned +CREATE TABLE unparted ( + a int +); +CREATE TABLE fail_part (like unparted); +ALTER TABLE unparted ATTACH PARTITION fail_part FOR VALUES IN ('a'); +ERROR: table "unparted" is not partitioned +DROP TABLE unparted, fail_part; +-- check that partition bound is compatible +CREATE TABLE list_parted ( + a int NOT NULL, + b char(2) COLLATE "C", + CONSTRAINT check_a CHECK (a > 0) +) PARTITION BY LIST (a); +CREATE TABLE fail_part (LIKE list_parted); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) TO (10); +ERROR: invalid bound specification for a list partition +LINE 1: ...list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) T... + ^ +DROP TABLE fail_part; +-- check that the table being attached exists +ALTER TABLE list_parted ATTACH PARTITION nonexistent FOR VALUES IN (1); +ERROR: relation "nonexistent" does not exist +-- check ownership of the source table +CREATE ROLE regress_test_me; +CREATE ROLE regress_test_not_me; +CREATE TABLE not_owned_by_me (LIKE list_parted); +ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me; +SET SESSION AUTHORIZATION regress_test_me; +CREATE TABLE owned_by_me ( + a int +) PARTITION BY LIST (a); +ALTER TABLE owned_by_me ATTACH PARTITION not_owned_by_me FOR VALUES IN (1); +ERROR: must be owner of table not_owned_by_me +RESET SESSION AUTHORIZATION; +DROP TABLE owned_by_me, not_owned_by_me; +DROP ROLE regress_test_not_me; +DROP ROLE regress_test_me; +-- check that the table being attached is not part of regular inheritance +CREATE TABLE parent (LIKE list_parted); +CREATE TABLE child () INHERITS (parent); +ALTER TABLE list_parted ATTACH PARTITION child FOR VALUES IN (1); +ERROR: cannot attach inheritance child as partition +ALTER TABLE list_parted ATTACH PARTITION parent FOR VALUES IN (1); +ERROR: cannot attach inheritance parent as partition +DROP TABLE child; +-- now it should work, with a little tweak +ALTER TABLE parent ADD CONSTRAINT check_a CHECK (a > 0); +ALTER TABLE list_parted ATTACH PARTITION parent FOR VALUES IN (1); +-- test insert/update, per bug #18550 +INSERT INTO parent VALUES (1); +UPDATE parent SET a = 2 WHERE a = 1; +ERROR: new row for relation "parent" violates partition constraint +DETAIL: Failing row contains (2, null). +DROP TABLE parent CASCADE; +-- check any TEMP-ness +CREATE TEMP TABLE temp_parted (a int) PARTITION BY LIST (a); +CREATE TABLE perm_part (a int); +ALTER TABLE temp_parted ATTACH PARTITION perm_part FOR VALUES IN (1); +ERROR: cannot attach a permanent relation as partition of temporary relation "temp_parted" +DROP TABLE temp_parted, perm_part; +-- check that the table being attached is not a typed table +CREATE TYPE mytype AS (a int); +CREATE TABLE fail_part OF mytype; +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: cannot attach a typed table as partition +DROP TYPE mytype CASCADE; +NOTICE: drop cascades to table fail_part +-- check that the table being attached has only columns present in the parent +CREATE TABLE fail_part (like list_parted, c int); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: table "fail_part" contains column "c" not found in parent "list_parted" +DETAIL: The new partition may contain only the columns present in parent. +DROP TABLE fail_part; +-- check that the table being attached has every column of the parent +CREATE TABLE fail_part (a int NOT NULL); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table is missing column "b" +DROP TABLE fail_part; +-- check that columns match in type, collation and NOT NULL status +CREATE TABLE fail_part ( + b char(3), + a int NOT NULL +); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table "fail_part" has different type for column "b" +ALTER TABLE fail_part ALTER b TYPE char (2) COLLATE "POSIX"; +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table "fail_part" has different collation for column "b" +DROP TABLE fail_part; +-- check that the table being attached has all constraints of the parent +CREATE TABLE fail_part ( + b char(2) COLLATE "C", + a int NOT NULL +); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table is missing constraint "check_a" +-- check that the constraint matches in definition with parent's constraint +ALTER TABLE fail_part ADD CONSTRAINT check_a CHECK (a >= 0); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table "fail_part" has different definition for check constraint "check_a" +DROP TABLE fail_part; +-- check the attributes and constraints after partition is attached +CREATE TABLE part_1 ( + a int NOT NULL, + b char(2) COLLATE "C", + CONSTRAINT check_a CHECK (a > 0) +); +ALTER TABLE list_parted ATTACH PARTITION part_1 FOR VALUES IN (1); +-- attislocal and conislocal are always false for merged attributes and constraints respectively. +SELECT attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_1'::regclass AND attnum > 0; + attislocal | attinhcount +------------+------------- + f | 1 + f | 1 +(2 rows) + +SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_1'::regclass AND conname = 'check_a'; + conislocal | coninhcount +------------+------------- + f | 1 +(1 row) + +-- check that the new partition won't overlap with an existing partition +CREATE TABLE fail_part (LIKE part_1 INCLUDING CONSTRAINTS); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: partition "fail_part" would overlap partition "part_1" +LINE 1: ...LE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); + ^ +DROP TABLE fail_part; +-- check that an existing table can be attached as a default partition +CREATE TABLE def_part (LIKE list_parted INCLUDING CONSTRAINTS); +ALTER TABLE list_parted ATTACH PARTITION def_part DEFAULT; +-- check attaching default partition fails if a default partition already +-- exists +CREATE TABLE fail_def_part (LIKE part_1 INCLUDING CONSTRAINTS); +ALTER TABLE list_parted ATTACH PARTITION fail_def_part DEFAULT; +ERROR: partition "fail_def_part" conflicts with existing default partition "def_part" +LINE 1: ...ER TABLE list_parted ATTACH PARTITION fail_def_part DEFAULT; + ^ +-- check validation when attaching list partitions +CREATE TABLE list_parted2 ( + a int, + b char +) PARTITION BY LIST (a); +-- check that violating rows are correctly reported +CREATE TABLE part_2 (LIKE list_parted2); +INSERT INTO part_2 VALUES (3, 'a'); +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); +ERROR: partition constraint of relation "part_2" is violated by some row +-- should be ok after deleting the bad row +DELETE FROM part_2; +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); +-- check partition cannot be attached if default has some row for its values +CREATE TABLE list_parted2_def PARTITION OF list_parted2 DEFAULT; +INSERT INTO list_parted2_def VALUES (11, 'z'); +CREATE TABLE part_3 (LIKE list_parted2); +ALTER TABLE list_parted2 ATTACH PARTITION part_3 FOR VALUES IN (11); +ERROR: updated partition constraint for default partition "list_parted2_def" would be violated by some row +-- should be ok after deleting the bad row +DELETE FROM list_parted2_def WHERE a = 11; +ALTER TABLE list_parted2 ATTACH PARTITION part_3 FOR VALUES IN (11); +-- adding constraints that describe the desired partition constraint +-- (or more restrictive) will help skip the validation scan +CREATE TABLE part_3_4 ( + LIKE list_parted2, + CONSTRAINT check_a CHECK (a IN (3)) +); +-- however, if a list partition does not accept nulls, there should be +-- an explicit NOT NULL constraint on the partition key column for the +-- validation scan to be skipped; +ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4); +-- adding a NOT NULL constraint will cause the scan to be skipped +ALTER TABLE list_parted2 DETACH PARTITION part_3_4; +ALTER TABLE part_3_4 ALTER a SET NOT NULL; +ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4); +-- check if default partition scan skipped +ALTER TABLE list_parted2_def ADD CONSTRAINT check_a CHECK (a IN (5, 6)); +CREATE TABLE part_55_66 PARTITION OF list_parted2 FOR VALUES IN (55, 66); +-- check validation when attaching range partitions +CREATE TABLE range_parted ( + a int, + b int +) PARTITION BY RANGE (a, b); +-- check that violating rows are correctly reported +CREATE TABLE part1 ( + a int NOT NULL CHECK (a = 1), + b int NOT NULL CHECK (b >= 1 AND b <= 10) +); +INSERT INTO part1 VALUES (1, 10); +-- Remember the TO bound is exclusive +ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10); +ERROR: partition constraint of relation "part1" is violated by some row +-- should be ok after deleting the bad row +DELETE FROM part1; +ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10); +-- adding constraints that describe the desired partition constraint +-- (or more restrictive) will help skip the validation scan +CREATE TABLE part2 ( + a int NOT NULL CHECK (a = 1), + b int NOT NULL CHECK (b >= 10 AND b < 18) +); +ALTER TABLE range_parted ATTACH PARTITION part2 FOR VALUES FROM (1, 10) TO (1, 20); +-- Create default partition +CREATE TABLE partr_def1 PARTITION OF range_parted DEFAULT; +-- Only one default partition is allowed, hence, following should give error +CREATE TABLE partr_def2 (LIKE part1 INCLUDING CONSTRAINTS); +ALTER TABLE range_parted ATTACH PARTITION partr_def2 DEFAULT; +ERROR: partition "partr_def2" conflicts with existing default partition "partr_def1" +LINE 1: ...LTER TABLE range_parted ATTACH PARTITION partr_def2 DEFAULT; + ^ +-- Overlapping partitions cannot be attached, hence, following should give error +INSERT INTO partr_def1 VALUES (2, 10); +CREATE TABLE part3 (LIKE range_parted); +ALTER TABLE range_parted ATTACH partition part3 FOR VALUES FROM (2, 10) TO (2, 20); +ERROR: updated partition constraint for default partition "partr_def1" would be violated by some row +-- Attaching partitions should be successful when there are no overlapping rows +ALTER TABLE range_parted ATTACH partition part3 FOR VALUES FROM (3, 10) TO (3, 20); +-- check that leaf partitions are scanned when attaching a partitioned +-- table +CREATE TABLE part_5 ( + LIKE list_parted2 +) PARTITION BY LIST (b); +-- check that violating rows are correctly reported +CREATE TABLE part_5_a PARTITION OF part_5 FOR VALUES IN ('a'); +INSERT INTO part_5_a (a, b) VALUES (6, 'a'); +ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); +ERROR: partition constraint of relation "part_5_a" is violated by some row +-- delete the faulting row and also add a constraint to skip the scan +DELETE FROM part_5_a WHERE a NOT IN (3); +ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 5); +ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); +ALTER TABLE list_parted2 DETACH PARTITION part_5; +ALTER TABLE part_5 DROP CONSTRAINT check_a; +-- scan should again be skipped, even though NOT NULL is now a column property +ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IN (5)), ALTER a SET NOT NULL; +ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); +-- Check the case where attnos of the partitioning columns in the table being +-- attached differs from the parent. It should not affect the constraint- +-- checking logic that allows to skip the scan. +CREATE TABLE part_6 ( + c int, + LIKE list_parted2, + CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 6) +); +ALTER TABLE part_6 DROP c; +ALTER TABLE list_parted2 ATTACH PARTITION part_6 FOR VALUES IN (6); +-- Similar to above, but the table being attached is a partitioned table +-- whose partition has still different attnos for the root partitioning +-- columns. +CREATE TABLE part_7 ( + LIKE list_parted2, + CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 7) +) PARTITION BY LIST (b); +CREATE TABLE part_7_a_null ( + c int, + d int, + e int, + LIKE list_parted2, -- 'a' will have attnum = 4 + CONSTRAINT check_b CHECK (b IS NULL OR b = 'a'), + CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 7) +); +ALTER TABLE part_7_a_null DROP c, DROP d, DROP e; +ALTER TABLE part_7 ATTACH PARTITION part_7_a_null FOR VALUES IN ('a', null); +ALTER TABLE list_parted2 ATTACH PARTITION part_7 FOR VALUES IN (7); +-- Same example, but check this time that the constraint correctly detects +-- violating rows +ALTER TABLE list_parted2 DETACH PARTITION part_7; +ALTER TABLE part_7 DROP CONSTRAINT check_a; -- thusly, scan won't be skipped +INSERT INTO part_7 (a, b) VALUES (8, null), (9, 'a'); +SELECT tableoid::regclass, a, b FROM part_7 order by a; + tableoid | a | b +---------------+---+--- + part_7_a_null | 8 | + part_7_a_null | 9 | a +(2 rows) + +ALTER TABLE list_parted2 ATTACH PARTITION part_7 FOR VALUES IN (7); +ERROR: partition constraint of relation "part_7_a_null" is violated by some row +-- check that leaf partitions of default partition are scanned when +-- attaching a partitioned table. +ALTER TABLE part_5 DROP CONSTRAINT check_a; +CREATE TABLE part5_def PARTITION OF part_5 DEFAULT PARTITION BY LIST(a); +CREATE TABLE part5_def_p1 PARTITION OF part5_def FOR VALUES IN (5); +INSERT INTO part5_def_p1 VALUES (5, 'y'); +CREATE TABLE part5_p1 (LIKE part_5); +ALTER TABLE part_5 ATTACH PARTITION part5_p1 FOR VALUES IN ('y'); +ERROR: updated partition constraint for default partition "part5_def_p1" would be violated by some row +-- should be ok after deleting the bad row +DELETE FROM part5_def_p1 WHERE b = 'y'; +ALTER TABLE part_5 ATTACH PARTITION part5_p1 FOR VALUES IN ('y'); +-- check that the table being attached is not already a partition +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); +ERROR: "part_2" is already a partition +-- check that circular inheritance is not allowed +ALTER TABLE part_5 ATTACH PARTITION list_parted2 FOR VALUES IN ('b'); +ERROR: circular inheritance not allowed +DETAIL: "part_5" is already a child of "list_parted2". +ALTER TABLE list_parted2 ATTACH PARTITION list_parted2 FOR VALUES IN (0); +ERROR: circular inheritance not allowed +DETAIL: "list_parted2" is already a child of "list_parted2". +-- If a partitioned table being created or an existing table being attached +-- as a partition does not have a constraint that would allow validation scan +-- to be skipped, but an individual partition does, then the partition's +-- validation scan is skipped. +CREATE TABLE quuux (a int, b text) PARTITION BY LIST (a); +CREATE TABLE quuux_default PARTITION OF quuux DEFAULT PARTITION BY LIST (b); +CREATE TABLE quuux_default1 PARTITION OF quuux_default ( + CONSTRAINT check_1 CHECK (a IS NOT NULL AND a = 1) +) FOR VALUES IN ('b'); +CREATE TABLE quuux1 (a int, b text); +ALTER TABLE quuux ATTACH PARTITION quuux1 FOR VALUES IN (1); -- validate! +CREATE TABLE quuux2 (a int, b text); +ALTER TABLE quuux ATTACH PARTITION quuux2 FOR VALUES IN (2); -- skip validation +DROP TABLE quuux1, quuux2; +-- should validate for quuux1, but not for quuux2 +CREATE TABLE quuux1 PARTITION OF quuux FOR VALUES IN (1); +CREATE TABLE quuux2 PARTITION OF quuux FOR VALUES IN (2); +DROP TABLE quuux; +-- check validation when attaching hash partitions +-- Use hand-rolled hash functions and operator class to get predictable result +-- on different machines. part_test_int4_ops is defined in test_setup.sql. +-- check that the new partition won't overlap with an existing partition +CREATE TABLE hash_parted ( + a int, + b int +) PARTITION BY HASH (a part_test_int4_ops); +CREATE TABLE hpart_1 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 4, REMAINDER 0); +CREATE TABLE fail_part (LIKE hpart_1); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 4); +ERROR: partition "fail_part" would overlap partition "hpart_1" +LINE 1: ...hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODU... + ^ +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 0); +ERROR: partition "fail_part" would overlap partition "hpart_1" +LINE 1: ...hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODU... + ^ +DROP TABLE fail_part; +-- check validation when attaching hash partitions +-- check that violating rows are correctly reported +CREATE TABLE hpart_2 (LIKE hash_parted); +INSERT INTO hpart_2 VALUES (3, 0); +ALTER TABLE hash_parted ATTACH PARTITION hpart_2 FOR VALUES WITH (MODULUS 4, REMAINDER 1); +ERROR: partition constraint of relation "hpart_2" is violated by some row +-- should be ok after deleting the bad row +DELETE FROM hpart_2; +ALTER TABLE hash_parted ATTACH PARTITION hpart_2 FOR VALUES WITH (MODULUS 4, REMAINDER 1); +-- check that leaf partitions are scanned when attaching a partitioned +-- table +CREATE TABLE hpart_5 ( + LIKE hash_parted +) PARTITION BY LIST (b); +-- check that violating rows are correctly reported +CREATE TABLE hpart_5_a PARTITION OF hpart_5 FOR VALUES IN ('1', '2', '3'); +INSERT INTO hpart_5_a (a, b) VALUES (7, 1); +ALTER TABLE hash_parted ATTACH PARTITION hpart_5 FOR VALUES WITH (MODULUS 4, REMAINDER 2); +ERROR: partition constraint of relation "hpart_5_a" is violated by some row +-- should be ok after deleting the bad row +DELETE FROM hpart_5_a; +ALTER TABLE hash_parted ATTACH PARTITION hpart_5 FOR VALUES WITH (MODULUS 4, REMAINDER 2); +-- check that the table being attach is with valid modulus and remainder value +CREATE TABLE fail_part(LIKE hash_parted); +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 0, REMAINDER 1); +ERROR: modulus for hash partition must be an integer value greater than zero +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 8); +ERROR: remainder for hash partition must be less than modulus +ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 3, REMAINDER 2); +ERROR: every hash partition modulus must be a factor of the next larger modulus +DETAIL: The new modulus 3 is not a factor of 4, the modulus of existing partition "hpart_1". +DROP TABLE fail_part; +-- +-- DETACH PARTITION +-- +-- check that the table is partitioned at all +CREATE TABLE regular_table (a int); +ALTER TABLE regular_table DETACH PARTITION any_name; +ERROR: table "regular_table" is not partitioned +DROP TABLE regular_table; +-- check that the partition being detached exists at all +ALTER TABLE list_parted2 DETACH PARTITION part_4; +ERROR: relation "part_4" does not exist +ALTER TABLE hash_parted DETACH PARTITION hpart_4; +ERROR: relation "hpart_4" does not exist +-- check that the partition being detached is actually a partition of the parent +CREATE TABLE not_a_part (a int); +ALTER TABLE list_parted2 DETACH PARTITION not_a_part; +ERROR: relation "not_a_part" is not a partition of relation "list_parted2" +ALTER TABLE list_parted2 DETACH PARTITION part_1; +ERROR: relation "part_1" is not a partition of relation "list_parted2" +ALTER TABLE hash_parted DETACH PARTITION not_a_part; +ERROR: relation "not_a_part" is not a partition of relation "hash_parted" +DROP TABLE not_a_part; +-- check that, after being detached, attinhcount/coninhcount is dropped to 0 and +-- attislocal/conislocal is set to true +ALTER TABLE list_parted2 DETACH PARTITION part_3_4; +SELECT attinhcount, attislocal FROM pg_attribute WHERE attrelid = 'part_3_4'::regclass AND attnum > 0; + attinhcount | attislocal +-------------+------------ + 0 | t + 0 | t +(2 rows) + +SELECT coninhcount, conislocal FROM pg_constraint WHERE conrelid = 'part_3_4'::regclass AND conname = 'check_a'; + coninhcount | conislocal +-------------+------------ + 0 | t +(1 row) + +DROP TABLE part_3_4; +-- check that a detached partition is not dropped on dropping a partitioned table +CREATE TABLE range_parted2 ( + a int +) PARTITION BY RANGE(a); +CREATE TABLE part_rp PARTITION OF range_parted2 FOR VALUES FROM (0) to (100); +ALTER TABLE range_parted2 DETACH PARTITION part_rp; +DROP TABLE range_parted2; +SELECT * from part_rp; + a +--- +(0 rows) + +DROP TABLE part_rp; +-- concurrent detach +CREATE TABLE range_parted2 ( + a int +) PARTITION BY RANGE(a); +CREATE TABLE part_rp PARTITION OF range_parted2 FOR VALUES FROM (0) to (100); +BEGIN; +-- doesn't work in a partition block +ALTER TABLE range_parted2 DETACH PARTITION part_rp CONCURRENTLY; +ERROR: ALTER TABLE ... DETACH CONCURRENTLY cannot run inside a transaction block +COMMIT; +CREATE TABLE part_rpd PARTITION OF range_parted2 DEFAULT; +-- doesn't work if there's a default partition +ALTER TABLE range_parted2 DETACH PARTITION part_rp CONCURRENTLY; +ERROR: cannot detach partitions concurrently when a default partition exists +-- doesn't work for the default partition +ALTER TABLE range_parted2 DETACH PARTITION part_rpd CONCURRENTLY; +ERROR: cannot detach partitions concurrently when a default partition exists +DROP TABLE part_rpd; +-- works fine +ALTER TABLE range_parted2 DETACH PARTITION part_rp CONCURRENTLY; +\d+ range_parted2 + Partitioned table "public.range_parted2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | +Partition key: RANGE (a) +Number of partitions: 0 + +-- constraint should be created +\d part_rp + Table "public.part_rp" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Check constraints: + "part_rp_a_check" CHECK (a IS NOT NULL AND a >= 0 AND a < 100) + +CREATE TABLE part_rp100 PARTITION OF range_parted2 (CHECK (a>=123 AND a<133 AND a IS NOT NULL)) FOR VALUES FROM (100) to (200); +ALTER TABLE range_parted2 DETACH PARTITION part_rp100 CONCURRENTLY; +-- redundant constraint should not be created +\d part_rp100 + Table "public.part_rp100" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Check constraints: + "part_rp100_a_check" CHECK (a >= 123 AND a < 133 AND a IS NOT NULL) + +DROP TABLE range_parted2; +-- Check ALTER TABLE commands for partitioned tables and partitions +-- cannot add/drop column to/from *only* the parent +ALTER TABLE ONLY list_parted2 ADD COLUMN c int; +ERROR: column must be added to child tables too +ALTER TABLE ONLY list_parted2 DROP COLUMN b; +ERROR: cannot drop column from only the partitioned table when partitions exist +HINT: Do not specify the ONLY keyword. +-- cannot add a column to partition or drop an inherited one +ALTER TABLE part_2 ADD COLUMN c text; +ERROR: cannot add column to a partition +ALTER TABLE part_2 DROP COLUMN b; +ERROR: cannot drop inherited column "b" +-- Nor rename, alter type +ALTER TABLE part_2 RENAME COLUMN b to c; +ERROR: cannot rename inherited column "b" +ALTER TABLE part_2 ALTER COLUMN b TYPE text; +ERROR: cannot alter inherited column "b" +-- cannot add/drop NOT NULL or check constraints to *only* the parent, when +-- partitions exist +ALTER TABLE ONLY list_parted2 ALTER b SET NOT NULL; +ERROR: constraint must be added to child tables too +DETAIL: Column "b" of relation "part_2" is not already NOT NULL. +HINT: Do not specify the ONLY keyword. +ALTER TABLE ONLY list_parted2 ADD CONSTRAINT check_b CHECK (b <> 'zz'); +ERROR: constraint must be added to child tables too +ALTER TABLE list_parted2 ALTER b SET NOT NULL; +ALTER TABLE ONLY list_parted2 ALTER b DROP NOT NULL; +ERROR: cannot remove constraint from only the partitioned table when partitions exist +HINT: Do not specify the ONLY keyword. +ALTER TABLE list_parted2 ADD CONSTRAINT check_b CHECK (b <> 'zz'); +ALTER TABLE ONLY list_parted2 DROP CONSTRAINT check_b; +ERROR: cannot remove constraint from only the partitioned table when partitions exist +HINT: Do not specify the ONLY keyword. +-- It's alright though, if no partitions are yet created +CREATE TABLE parted_no_parts (a int) PARTITION BY LIST (a); +ALTER TABLE ONLY parted_no_parts ALTER a SET NOT NULL; +ALTER TABLE ONLY parted_no_parts ADD CONSTRAINT check_a CHECK (a > 0); +ALTER TABLE ONLY parted_no_parts ALTER a DROP NOT NULL; +ALTER TABLE ONLY parted_no_parts DROP CONSTRAINT check_a; +DROP TABLE parted_no_parts; +-- cannot drop inherited NOT NULL or check constraints from partition +ALTER TABLE list_parted2 ALTER b SET NOT NULL, ADD CONSTRAINT check_a2 CHECK (a > 0); +ALTER TABLE part_2 ALTER b DROP NOT NULL; +ERROR: column "b" is marked NOT NULL in parent table +ALTER TABLE part_2 DROP CONSTRAINT check_a2; +ERROR: cannot drop inherited constraint "check_a2" of relation "part_2" +-- Doesn't make sense to add NO INHERIT constraints on partitioned tables +ALTER TABLE list_parted2 add constraint check_b2 check (b <> 'zz') NO INHERIT; +ERROR: cannot add NO INHERIT constraint to partitioned table "list_parted2" +-- check that a partition cannot participate in regular inheritance +CREATE TABLE inh_test () INHERITS (part_2); +ERROR: cannot inherit from partition "part_2" +CREATE TABLE inh_test (LIKE part_2); +ALTER TABLE inh_test INHERIT part_2; +ERROR: cannot inherit from a partition +ALTER TABLE part_2 INHERIT inh_test; +ERROR: cannot change inheritance of a partition +-- cannot drop or alter type of partition key columns of lower level +-- partitioned tables; for example, part_5, which is list_parted2's +-- partition, is partitioned on b; +ALTER TABLE list_parted2 DROP COLUMN b; +ERROR: cannot drop column "b" because it is part of the partition key of relation "part_5" +ALTER TABLE list_parted2 ALTER COLUMN b TYPE text; +ERROR: cannot alter column "b" because it is part of the partition key of relation "part_5" +-- dropping non-partition key columns should be allowed on the parent table. +ALTER TABLE list_parted DROP COLUMN b; +SELECT * FROM list_parted; + a +--- +(0 rows) + +-- cleanup +DROP TABLE list_parted, list_parted2, range_parted; +DROP TABLE fail_def_part; +DROP TABLE hash_parted; +-- more tests for certain multi-level partitioning scenarios +create table p (a int, b int) partition by range (a, b); +create table p1 (b int, a int not null) partition by range (b); +create table p11 (like p1); +alter table p11 drop a; +alter table p11 add a int; +alter table p11 drop a; +alter table p11 add a int not null; +-- attnum for key attribute 'a' is different in p, p1, and p11 +select attrelid::regclass, attname, attnum +from pg_attribute +where attname = 'a' + and (attrelid = 'p'::regclass + or attrelid = 'p1'::regclass + or attrelid = 'p11'::regclass) +order by attrelid::regclass::text; + attrelid | attname | attnum +----------+---------+-------- + p | a | 1 + p1 | a | 2 + p11 | a | 4 +(3 rows) + +alter table p1 attach partition p11 for values from (2) to (5); +insert into p1 (a, b) values (2, 3); +-- check that partition validation scan correctly detects violating rows +alter table p attach partition p1 for values from (1, 2) to (1, 10); +ERROR: partition constraint of relation "p11" is violated by some row +-- cleanup +drop table p; +drop table p1; +-- validate constraint on partitioned tables should only scan leaf partitions +create table parted_validate_test (a int) partition by list (a); +create table parted_validate_test_1 partition of parted_validate_test for values in (0, 1); +alter table parted_validate_test add constraint parted_validate_test_chka check (a > 0) not valid; +alter table parted_validate_test validate constraint parted_validate_test_chka; +drop table parted_validate_test; +-- test alter column options +CREATE TABLE attmp(i integer); +INSERT INTO attmp VALUES (1); +ALTER TABLE attmp ALTER COLUMN i SET (n_distinct = 1, n_distinct_inherited = 2); +ALTER TABLE attmp ALTER COLUMN i RESET (n_distinct_inherited); +ANALYZE attmp; +DROP TABLE attmp; +DROP USER regress_alter_table_user1; +-- check that violating rows are correctly reported when attaching as the +-- default partition +create table defpart_attach_test (a int) partition by list (a); +create table defpart_attach_test1 partition of defpart_attach_test for values in (1); +create table defpart_attach_test_d (b int, a int); +alter table defpart_attach_test_d drop b; +insert into defpart_attach_test_d values (1), (2); +-- error because its constraint as the default partition would be violated +-- by the row containing 1 +alter table defpart_attach_test attach partition defpart_attach_test_d default; +ERROR: partition constraint of relation "defpart_attach_test_d" is violated by some row +delete from defpart_attach_test_d where a = 1; +alter table defpart_attach_test_d add check (a > 1); +-- should be attached successfully and without needing to be scanned +alter table defpart_attach_test attach partition defpart_attach_test_d default; +-- check that attaching a partition correctly reports any rows in the default +-- partition that should not be there for the new partition to be attached +-- successfully +create table defpart_attach_test_2 (like defpart_attach_test_d); +alter table defpart_attach_test attach partition defpart_attach_test_2 for values in (2); +ERROR: updated partition constraint for default partition "defpart_attach_test_d" would be violated by some row +drop table defpart_attach_test; +-- check combinations of temporary and permanent relations when attaching +-- partitions. +create table perm_part_parent (a int) partition by list (a); +create temp table temp_part_parent (a int) partition by list (a); +create table perm_part_child (a int); +create temp table temp_part_child (a int); +alter table temp_part_parent attach partition perm_part_child default; -- error +ERROR: cannot attach a permanent relation as partition of temporary relation "temp_part_parent" +alter table perm_part_parent attach partition temp_part_child default; -- error +ERROR: cannot attach a temporary relation as partition of permanent relation "perm_part_parent" +alter table temp_part_parent attach partition temp_part_child default; -- ok +drop table perm_part_parent cascade; +drop table temp_part_parent cascade; +-- check that attaching partitions to a table while it is being used is +-- prevented +create table tab_part_attach (a int) partition by list (a); +create or replace function func_part_attach() returns trigger + language plpgsql as $$ + begin + execute 'create table tab_part_attach_1 (a int)'; + execute 'alter table tab_part_attach attach partition tab_part_attach_1 for values in (1)'; + return null; + end $$; +create trigger trig_part_attach before insert on tab_part_attach + for each statement execute procedure func_part_attach(); +insert into tab_part_attach values (1); +ERROR: cannot ALTER TABLE "tab_part_attach" because it is being used by active queries in this session +CONTEXT: SQL statement "alter table tab_part_attach attach partition tab_part_attach_1 for values in (1)" +PL/pgSQL function func_part_attach() line 4 at EXECUTE +drop table tab_part_attach; +drop function func_part_attach(); +-- test case where the partitioning operator is a SQL function whose +-- evaluation results in the table's relcache being rebuilt partway through +-- the execution of an ATTACH PARTITION command +create function at_test_sql_partop (int4, int4) returns int language sql +as $$ select case when $1 = $2 then 0 when $1 > $2 then 1 else -1 end; $$; +create operator class at_test_sql_partop for type int4 using btree as + operator 1 < (int4, int4), operator 2 <= (int4, int4), + operator 3 = (int4, int4), operator 4 >= (int4, int4), + operator 5 > (int4, int4), function 1 at_test_sql_partop(int4, int4); +create table at_test_sql_partop (a int) partition by range (a at_test_sql_partop); +create table at_test_sql_partop_1 (a int); +alter table at_test_sql_partop attach partition at_test_sql_partop_1 for values from (0) to (10); +drop table at_test_sql_partop; +drop operator class at_test_sql_partop using btree; +drop function at_test_sql_partop; +/* Test case for bug #16242 */ +-- We create a parent and child where the child has missing +-- non-null attribute values, and arrange to pass them through +-- tuple conversion from the child to the parent tupdesc +create table bar1 (a integer, b integer not null default 1) + partition by range (a); +create table bar2 (a integer); +insert into bar2 values (1); +alter table bar2 add column b integer not null default 1; +-- (at this point bar2 contains tuple with natts=1) +alter table bar1 attach partition bar2 default; +-- this works: +select * from bar1; + a | b +---+--- + 1 | 1 +(1 row) + +-- this exercises tuple conversion: +create function xtrig() + returns trigger language plpgsql +as $$ + declare + r record; + begin + for r in select * from old loop + raise info 'a=%, b=%', r.a, r.b; + end loop; + return NULL; + end; +$$; +create trigger xtrig + after update on bar1 + referencing old table as old + for each statement execute procedure xtrig(); +update bar1 set a = a + 1; +INFO: a=1, b=1 +/* End test case for bug #16242 */ +/* Test case for bug #17409 */ +create table attbl (p1 int constraint pk_attbl primary key); +create table atref (c1 int references attbl(p1)); +cluster attbl using pk_attbl; +ERROR: orioledb tables does not support CLUSTER +DETAIL: CLUSTER makes no much sense for index-organized tables. +alter table attbl alter column p1 set data type bigint; +alter table atref alter column c1 set data type bigint; +drop table attbl, atref; +create table attbl (p1 int constraint pk_attbl primary key); +alter table attbl replica identity using index pk_attbl; +ERROR: replica identity type INDEX is not supported for OrioleDB tables yet +create table atref (c1 int references attbl(p1)); +alter table attbl alter column p1 set data type bigint; +alter table atref alter column c1 set data type bigint; +drop table attbl, atref; +/* End test case for bug #17409 */ +/* Test case for bug #18970 */ +create table attbl(a int); +create table atref(b attbl check ((b).a is not null)); +alter table attbl alter column a type numeric; -- someday this should work +ERROR: cannot alter table "attbl" because column "atref.b" uses its row type +alter table atref drop constraint atref_b_check; +create statistics atref_stat on ((b).a is not null) from atref; +alter table attbl alter column a type numeric; -- someday this should work +ERROR: cannot alter table "attbl" because column "atref.b" uses its row type +drop statistics atref_stat; +create index atref_idx on atref (((b).a)); +alter table attbl alter column a type numeric; -- someday this should work +ERROR: cannot alter table "attbl" because column "atref.b" uses its row type +drop table attbl, atref; +/* End test case for bug #18970 */ +-- Test that ALTER TABLE rewrite preserves a clustered index +-- for normal indexes and indexes on constraints. +create table alttype_cluster (a int); +alter table alttype_cluster add primary key (a); +create index alttype_cluster_ind on alttype_cluster (a); +alter table alttype_cluster cluster on alttype_cluster_ind; +-- Normal index remains clustered. +select indexrelid::regclass, indisclustered from pg_index + where indrelid = 'alttype_cluster'::regclass + order by indexrelid::regclass::text; + indexrelid | indisclustered +----------------------+---------------- + alttype_cluster_ind | t + alttype_cluster_pkey | f +(2 rows) + +alter table alttype_cluster alter a type bigint; +select indexrelid::regclass, indisclustered from pg_index + where indrelid = 'alttype_cluster'::regclass + order by indexrelid::regclass::text; + indexrelid | indisclustered +----------------------+---------------- + alttype_cluster_ind | t + alttype_cluster_pkey | f +(2 rows) + +-- Constraint index remains clustered. +alter table alttype_cluster cluster on alttype_cluster_pkey; +select indexrelid::regclass, indisclustered from pg_index + where indrelid = 'alttype_cluster'::regclass + order by indexrelid::regclass::text; + indexrelid | indisclustered +----------------------+---------------- + alttype_cluster_ind | f + alttype_cluster_pkey | t +(2 rows) + +alter table alttype_cluster alter a type int; +select indexrelid::regclass, indisclustered from pg_index + where indrelid = 'alttype_cluster'::regclass + order by indexrelid::regclass::text; + indexrelid | indisclustered +----------------------+---------------- + alttype_cluster_ind | f + alttype_cluster_pkey | t +(2 rows) + +drop table alttype_cluster; +-- +-- Check that attaching or detaching a partitioned partition correctly leads +-- to its partitions' constraint being updated to reflect the parent's +-- newly added/removed constraint +create table target_parted (a int, b int) partition by list (a); +create table attach_parted (a int, b int) partition by list (b); +create table attach_parted_part1 partition of attach_parted for values in (1); +-- insert a row directly into the leaf partition so that its partition +-- constraint is built and stored in the relcache +insert into attach_parted_part1 values (1, 1); +-- the following better invalidate the partition constraint of the leaf +-- partition too... +alter table target_parted attach partition attach_parted for values in (1); +-- ...such that the following insert fails +insert into attach_parted_part1 values (2, 1); +ERROR: new row for relation "attach_parted_part1" violates partition constraint +DETAIL: Failing row contains (2, 1). +-- ...and doesn't when the partition is detached along with its own partition +alter table target_parted detach partition attach_parted; +insert into attach_parted_part1 values (2, 1); +-- Test altering table having publication +create schema alter1; +create schema alter2; +create table alter1.t1 (a int); +set client_min_messages = 'ERROR'; +create publication pub1 for table alter1.t1, tables in schema alter2; +reset client_min_messages; +alter table alter1.t1 set schema alter2; +\d+ alter2.t1 + Table "alter2.t1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + a | integer | | | | plain | | +Publications: + "pub1" + +drop publication pub1; +drop schema alter1 cascade; +drop schema alter2 cascade; +NOTICE: drop cascades to table alter2.t1 diff --git a/src/test/regress/expected/create_index_oriole.out b/src/test/regress/expected/create_index_oriole.out new file mode 100644 index 00000000000..f0f3d70a20f --- /dev/null +++ b/src/test/regress/expected/create_index_oriole.out @@ -0,0 +1,6 @@ +-- This file is used instead of the original create_index.sql because +-- it is not stable with OrioleDB. After create_index.sql stabilization this one +-- must be replaced with original. Currently it contains only indexes for alter_table.sql +-- script. +CREATE INDEX onek_unique1 ON onek USING btree(unique1 int4_ops); +CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops); diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole index cad4f2be71c..e0333d6e7c6 100644 --- a/src/test/regress/parallel_schedule_oriole +++ b/src/test/regress/parallel_schedule_oriole @@ -44,6 +44,7 @@ test: copy copyselect copydml # ---------- test: create_function_c create_misc create_operator create_procedure create_table create_type create_schema test: create_view +test: create_index_oriole # ---------- # Another group of parallel tests @@ -111,7 +112,7 @@ test: json json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson # NB: temp.sql does a reconnect which transiently uses 2 connections, # so keep this parallel group to at most 19 tests # ---------- -test: plancache rangefuncs prepare truncate sequence polymorphism largeobject xml +test: plancache rangefuncs prepare truncate alter_table sequence polymorphism largeobject xml # ---------- # Another group of parallel tests diff --git a/src/test/regress/sql/create_index_oriole.sql b/src/test/regress/sql/create_index_oriole.sql new file mode 100644 index 00000000000..f0f3d70a20f --- /dev/null +++ b/src/test/regress/sql/create_index_oriole.sql @@ -0,0 +1,6 @@ +-- This file is used instead of the original create_index.sql because +-- it is not stable with OrioleDB. After create_index.sql stabilization this one +-- must be replaced with original. Currently it contains only indexes for alter_table.sql +-- script. +CREATE INDEX onek_unique1 ON onek USING btree(unique1 int4_ops); +CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops); From ffe8b161e30ddc0c332087876b7011b569ac3b5c Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Wed, 21 Jan 2026 01:16:52 +0100 Subject: [PATCH 092/102] Enable regular create_index test Also remove some changes for pg_regress and regress tests --- patches/test_setup_enable_oriole.diff | 27 - src/test/regress/expected/alter_table_1.out | 4769 ----------------- .../regress/expected/create_index_oriole.out | 6 - src/test/regress/parallel_schedule_oriole | 3 +- src/test/regress/pg_regress.c | 4 +- src/test/regress/sql/create_index_oriole.sql | 6 - 6 files changed, 3 insertions(+), 4812 deletions(-) delete mode 100644 patches/test_setup_enable_oriole.diff delete mode 100644 src/test/regress/expected/alter_table_1.out delete mode 100644 src/test/regress/expected/create_index_oriole.out delete mode 100644 src/test/regress/sql/create_index_oriole.sql diff --git a/patches/test_setup_enable_oriole.diff b/patches/test_setup_enable_oriole.diff deleted file mode 100644 index 302f616810a..00000000000 --- a/patches/test_setup_enable_oriole.diff +++ /dev/null @@ -1,27 +0,0 @@ -diff --git a/src/test/regress/expected/test_setup.out b/src/test/regress/expected/test_setup.out -index 3d0eeec996..70ce94e21a 100644 ---- a/src/test/regress/expected/test_setup.out -+++ b/src/test/regress/expected/test_setup.out -@@ -21,6 +21,8 @@ GRANT ALL ON SCHEMA public TO public; - -- Create a tablespace we can use in tests. - SET allow_in_place_tablespaces = true; - CREATE TABLESPACE regress_tblspace LOCATION ''; -+-- Enable orioledb extension -+CREATE EXTENSION orioledb; - -- - -- These tables have traditionally been referenced by many tests, - -- so create and populate them. Insert only non-error values here. -diff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql -index 06b0e2121f..867ad6a2df 100644 ---- a/src/test/regress/sql/test_setup.sql -+++ b/src/test/regress/sql/test_setup.sql -@@ -27,6 +27,9 @@ GRANT ALL ON SCHEMA public TO public; - SET allow_in_place_tablespaces = true; - CREATE TABLESPACE regress_tblspace LOCATION ''; - -+-- Enable orioledb extension -+CREATE EXTENSION orioledb; -+ - -- - -- These tables have traditionally been referenced by many tests, - -- so create and populate them. Insert only non-error values here. diff --git a/src/test/regress/expected/alter_table_1.out b/src/test/regress/expected/alter_table_1.out deleted file mode 100644 index 5db9b8536cf..00000000000 --- a/src/test/regress/expected/alter_table_1.out +++ /dev/null @@ -1,4769 +0,0 @@ --- --- ALTER_TABLE --- --- Clean up in case a prior regression run failed -SET client_min_messages TO 'warning'; -DROP ROLE IF EXISTS regress_alter_table_user1; -RESET client_min_messages; -CREATE USER regress_alter_table_user1; --- --- add attribute --- -CREATE TABLE attmp (initial int4); -COMMENT ON TABLE attmp_wrong IS 'table comment'; -ERROR: relation "attmp_wrong" does not exist -COMMENT ON TABLE attmp IS 'table comment'; -COMMENT ON TABLE attmp IS NULL; -ALTER TABLE attmp ADD COLUMN xmin integer; -- fails -ERROR: column name "xmin" conflicts with a system column name -ALTER TABLE attmp ADD COLUMN a int4 default 3; -ALTER TABLE attmp ADD COLUMN b name; -ALTER TABLE attmp ADD COLUMN c text; -ALTER TABLE attmp ADD COLUMN d float8; -ALTER TABLE attmp ADD COLUMN e float4; -ALTER TABLE attmp ADD COLUMN f int2; -ALTER TABLE attmp ADD COLUMN g polygon; -ALTER TABLE attmp ADD COLUMN i char; -ALTER TABLE attmp ADD COLUMN k int4; -ALTER TABLE attmp ADD COLUMN l tid; -ALTER TABLE attmp ADD COLUMN m xid; -ALTER TABLE attmp ADD COLUMN n oidvector; ---ALTER TABLE attmp ADD COLUMN o lock; -ALTER TABLE attmp ADD COLUMN p boolean; -ALTER TABLE attmp ADD COLUMN q point; -ALTER TABLE attmp ADD COLUMN r lseg; -ALTER TABLE attmp ADD COLUMN s path; -ALTER TABLE attmp ADD COLUMN t box; -ALTER TABLE attmp ADD COLUMN v timestamp; -ALTER TABLE attmp ADD COLUMN w interval; -ALTER TABLE attmp ADD COLUMN x float8[]; -ALTER TABLE attmp ADD COLUMN y float4[]; -ALTER TABLE attmp ADD COLUMN z int2[]; -INSERT INTO attmp (a, b, c, d, e, f, g, i, k, l, m, n, p, q, r, s, t, - v, w, x, y, z) - VALUES (4, 'name', 'text', 4.1, 4.1, 2, '(4.1,4.1,3.1,3.1)', - 'c', - 314159, '(1,1)', '512', - '1 2 3 4 5 6 7 8', true, '(1.1,1.1)', '(4.1,4.1,3.1,3.1)', - '(0,2,4.1,4.1,3.1,3.1)', '(4.1,4.1,3.1,3.1)', - 'epoch', '01:00:10', '{1.0,2.0,3.0,4.0}', '{1.0,2.0,3.0,4.0}', '{1,2,3,4}'); -SELECT * FROM attmp; - initial | a | b | c | d | e | f | g | i | k | l | m | n | p | q | r | s | t | v | w | x | y | z ----------+---+------+------+-----+-----+---+-----------------------+---+--------+-------+-----+-----------------+---+-----------+-----------------------+-----------------------------+---------------------+--------------------------+------------------+-----------+-----------+----------- - | 4 | name | text | 4.1 | 4.1 | 2 | ((4.1,4.1),(3.1,3.1)) | c | 314159 | (1,1) | 512 | 1 2 3 4 5 6 7 8 | t | (1.1,1.1) | [(4.1,4.1),(3.1,3.1)] | ((0,2),(4.1,4.1),(3.1,3.1)) | (4.1,4.1),(3.1,3.1) | Thu Jan 01 00:00:00 1970 | @ 1 hour 10 secs | {1,2,3,4} | {1,2,3,4} | {1,2,3,4} -(1 row) - -DROP TABLE attmp; --- the wolf bug - schema mods caused inconsistent row descriptors -CREATE TABLE attmp ( - initial int4 -); -ALTER TABLE attmp ADD COLUMN a int4; -ALTER TABLE attmp ADD COLUMN b name; -ALTER TABLE attmp ADD COLUMN c text; -ALTER TABLE attmp ADD COLUMN d float8; -ALTER TABLE attmp ADD COLUMN e float4; -ALTER TABLE attmp ADD COLUMN f int2; -ALTER TABLE attmp ADD COLUMN g polygon; -ALTER TABLE attmp ADD COLUMN i char; -ALTER TABLE attmp ADD COLUMN k int4; -ALTER TABLE attmp ADD COLUMN l tid; -ALTER TABLE attmp ADD COLUMN m xid; -ALTER TABLE attmp ADD COLUMN n oidvector; ---ALTER TABLE attmp ADD COLUMN o lock; -ALTER TABLE attmp ADD COLUMN p boolean; -ALTER TABLE attmp ADD COLUMN q point; -ALTER TABLE attmp ADD COLUMN r lseg; -ALTER TABLE attmp ADD COLUMN s path; -ALTER TABLE attmp ADD COLUMN t box; -ALTER TABLE attmp ADD COLUMN v timestamp; -ALTER TABLE attmp ADD COLUMN w interval; -ALTER TABLE attmp ADD COLUMN x float8[]; -ALTER TABLE attmp ADD COLUMN y float4[]; -ALTER TABLE attmp ADD COLUMN z int2[]; -INSERT INTO attmp (a, b, c, d, e, f, g, i, k, l, m, n, p, q, r, s, t, - v, w, x, y, z) - VALUES (4, 'name', 'text', 4.1, 4.1, 2, '(4.1,4.1,3.1,3.1)', - 'c', - 314159, '(1,1)', '512', - '1 2 3 4 5 6 7 8', true, '(1.1,1.1)', '(4.1,4.1,3.1,3.1)', - '(0,2,4.1,4.1,3.1,3.1)', '(4.1,4.1,3.1,3.1)', - 'epoch', '01:00:10', '{1.0,2.0,3.0,4.0}', '{1.0,2.0,3.0,4.0}', '{1,2,3,4}'); -SELECT * FROM attmp; - initial | a | b | c | d | e | f | g | i | k | l | m | n | p | q | r | s | t | v | w | x | y | z ----------+---+------+------+-----+-----+---+-----------------------+---+--------+-------+-----+-----------------+---+-----------+-----------------------+-----------------------------+---------------------+--------------------------+------------------+-----------+-----------+----------- - | 4 | name | text | 4.1 | 4.1 | 2 | ((4.1,4.1),(3.1,3.1)) | c | 314159 | (1,1) | 512 | 1 2 3 4 5 6 7 8 | t | (1.1,1.1) | [(4.1,4.1),(3.1,3.1)] | ((0,2),(4.1,4.1),(3.1,3.1)) | (4.1,4.1),(3.1,3.1) | Thu Jan 01 00:00:00 1970 | @ 1 hour 10 secs | {1,2,3,4} | {1,2,3,4} | {1,2,3,4} -(1 row) - -CREATE INDEX attmp_idx ON attmp (a, (d + e), b); -ALTER INDEX attmp_idx ALTER COLUMN 0 SET STATISTICS 1000; -ERROR: column number must be in range from 1 to 32767 -LINE 1: ALTER INDEX attmp_idx ALTER COLUMN 0 SET STATISTICS 1000; - ^ -ALTER INDEX attmp_idx ALTER COLUMN 1 SET STATISTICS 1000; -ERROR: cannot alter statistics on non-expression column "a" of index "attmp_idx" -HINT: Alter statistics on table column instead. -ALTER INDEX attmp_idx ALTER COLUMN 2 SET STATISTICS 1000; -\d+ attmp_idx - Index "public.attmp_idx" - Column | Type | Key? | Definition | Storage | Stats target ---------+------------------+------+------------+---------+-------------- - a | integer | yes | a | plain | - expr | double precision | yes | (d + e) | plain | 1000 - b | cstring | yes | b | plain | -btree, for table "public.attmp" - -ALTER INDEX attmp_idx ALTER COLUMN 3 SET STATISTICS 1000; -ERROR: cannot alter statistics on non-expression column "b" of index "attmp_idx" -HINT: Alter statistics on table column instead. -ALTER INDEX attmp_idx ALTER COLUMN 4 SET STATISTICS 1000; -ERROR: column number 4 of relation "attmp_idx" does not exist -ALTER INDEX attmp_idx ALTER COLUMN 2 SET STATISTICS -1; -DROP TABLE attmp; --- --- rename - check on both non-temp and temp tables --- -CREATE TABLE attmp (regtable int); -CREATE TEMP TABLE attmp (attmptable int); -ALTER TABLE attmp RENAME TO attmp_new; -SELECT * FROM attmp; - regtable ----------- -(0 rows) - -SELECT * FROM attmp_new; - attmptable ------------- -(0 rows) - -ALTER TABLE attmp RENAME TO attmp_new2; -SELECT * FROM attmp; -- should fail -ERROR: relation "attmp" does not exist -LINE 1: SELECT * FROM attmp; - ^ -SELECT * FROM attmp_new; - attmptable ------------- -(0 rows) - -SELECT * FROM attmp_new2; - regtable ----------- -(0 rows) - -DROP TABLE attmp_new; -DROP TABLE attmp_new2; --- check rename of partitioned tables and indexes also -CREATE TABLE part_attmp (a int primary key) partition by range (a); -CREATE TABLE part_attmp1 PARTITION OF part_attmp FOR VALUES FROM (0) TO (100); -ALTER INDEX part_attmp_pkey RENAME TO part_attmp_index; -ALTER INDEX part_attmp1_pkey RENAME TO part_attmp1_index; -ALTER TABLE part_attmp RENAME TO part_at2tmp; -ALTER TABLE part_attmp1 RENAME TO part_at2tmp1; -SET ROLE regress_alter_table_user1; -ALTER INDEX part_attmp_index RENAME TO fail; -ERROR: must be owner of index part_attmp_index -ALTER INDEX part_attmp1_index RENAME TO fail; -ERROR: must be owner of index part_attmp1_index -ALTER TABLE part_at2tmp RENAME TO fail; -ERROR: must be owner of table part_at2tmp -ALTER TABLE part_at2tmp1 RENAME TO fail; -ERROR: must be owner of table part_at2tmp1 -RESET ROLE; -DROP TABLE part_at2tmp; --- --- check renaming to a table's array type's autogenerated name --- (the array type's name should get out of the way) --- -CREATE TABLE attmp_array (id int); -CREATE TABLE attmp_array2 (id int); -SELECT typname FROM pg_type WHERE oid = 'attmp_array[]'::regtype; - typname --------------- - _attmp_array -(1 row) - -SELECT typname FROM pg_type WHERE oid = 'attmp_array2[]'::regtype; - typname ---------------- - _attmp_array2 -(1 row) - -ALTER TABLE attmp_array2 RENAME TO _attmp_array; -SELECT typname FROM pg_type WHERE oid = 'attmp_array[]'::regtype; - typname ---------------- - __attmp_array -(1 row) - -SELECT typname FROM pg_type WHERE oid = '_attmp_array[]'::regtype; - typname ------------------ - __attmp_array_1 -(1 row) - -DROP TABLE _attmp_array; -DROP TABLE attmp_array; --- renaming to table's own array type's name is an interesting corner case -CREATE TABLE attmp_array (id int); -SELECT typname FROM pg_type WHERE oid = 'attmp_array[]'::regtype; - typname --------------- - _attmp_array -(1 row) - -ALTER TABLE attmp_array RENAME TO _attmp_array; -SELECT typname FROM pg_type WHERE oid = '_attmp_array[]'::regtype; - typname ---------------- - __attmp_array -(1 row) - -DROP TABLE _attmp_array; --- ALTER TABLE ... RENAME on non-table relations --- renaming indexes (FIXME: this should probably test the index's functionality) -ALTER INDEX IF EXISTS __onek_unique1 RENAME TO attmp_onek_unique1; -NOTICE: relation "__onek_unique1" does not exist, skipping -ALTER INDEX IF EXISTS __attmp_onek_unique1 RENAME TO onek_unique1; -NOTICE: relation "__attmp_onek_unique1" does not exist, skipping -ALTER INDEX onek_unique1 RENAME TO attmp_onek_unique1; -ALTER INDEX attmp_onek_unique1 RENAME TO onek_unique1; -SET ROLE regress_alter_table_user1; -ALTER INDEX onek_unique1 RENAME TO fail; -- permission denied -ERROR: must be owner of index onek_unique1 -RESET ROLE; --- rename statements with mismatching statement and object types -CREATE TABLE alter_idx_rename_test (a INT); -CREATE INDEX alter_idx_rename_test_idx ON alter_idx_rename_test (a); -CREATE TABLE alter_idx_rename_test_parted (a INT) PARTITION BY LIST (a); -CREATE INDEX alter_idx_rename_test_parted_idx ON alter_idx_rename_test_parted (a); -BEGIN; -ALTER INDEX alter_idx_rename_test RENAME TO alter_idx_rename_test_2; -ALTER INDEX alter_idx_rename_test_parted RENAME TO alter_idx_rename_test_parted_2; -SELECT relation::regclass, mode FROM pg_locks -WHERE pid = pg_backend_pid() AND locktype = 'relation' - AND relation::regclass::text LIKE 'alter\_idx%' -ORDER BY relation::regclass::text COLLATE "C"; - relation | mode ---------------------------------+--------------------- - alter_idx_rename_test_2 | AccessExclusiveLock - alter_idx_rename_test_parted_2 | AccessExclusiveLock -(2 rows) - -COMMIT; -BEGIN; -ALTER INDEX alter_idx_rename_test_idx RENAME TO alter_idx_rename_test_idx_2; -ALTER INDEX alter_idx_rename_test_parted_idx RENAME TO alter_idx_rename_test_parted_idx_2; -SELECT relation::regclass, mode FROM pg_locks -WHERE pid = pg_backend_pid() AND locktype = 'relation' - AND relation::regclass::text LIKE 'alter\_idx%' -ORDER BY relation::regclass::text COLLATE "C"; - relation | mode -------------------------------------+-------------------------- - alter_idx_rename_test_idx_2 | ShareUpdateExclusiveLock - alter_idx_rename_test_parted_idx_2 | ShareUpdateExclusiveLock -(2 rows) - -COMMIT; -BEGIN; -ALTER TABLE alter_idx_rename_test_idx_2 RENAME TO alter_idx_rename_test_idx_3; -ALTER TABLE alter_idx_rename_test_parted_idx_2 RENAME TO alter_idx_rename_test_parted_idx_3; -SELECT relation::regclass, mode FROM pg_locks -WHERE pid = pg_backend_pid() AND locktype = 'relation' - AND relation::regclass::text LIKE 'alter\_idx%' -ORDER BY relation::regclass::text COLLATE "C"; - relation | mode -------------------------------------+--------------------- - alter_idx_rename_test_idx_3 | AccessExclusiveLock - alter_idx_rename_test_parted_idx_3 | AccessExclusiveLock -(2 rows) - -COMMIT; -DROP TABLE alter_idx_rename_test_2; --- renaming views -CREATE VIEW attmp_view (unique1) AS SELECT unique1 FROM tenk1; -ALTER TABLE attmp_view RENAME TO attmp_view_new; -SET ROLE regress_alter_table_user1; -ALTER VIEW attmp_view_new RENAME TO fail; -- permission denied -ERROR: must be owner of view attmp_view_new -RESET ROLE; --- hack to ensure we get an indexscan here -set enable_seqscan to off; -set enable_bitmapscan to off; --- 5 values, sorted -SELECT unique1 FROM tenk1 WHERE unique1 < 5; - unique1 ---------- - 0 - 1 - 2 - 3 - 4 -(5 rows) - -reset enable_seqscan; -reset enable_bitmapscan; -DROP VIEW attmp_view_new; --- toast-like relation name -alter table stud_emp rename to pg_toast_stud_emp; -alter table pg_toast_stud_emp rename to stud_emp; --- renaming index should rename constraint as well -ALTER TABLE onek ADD CONSTRAINT onek_unique1_constraint UNIQUE (unique1); -ALTER INDEX onek_unique1_constraint RENAME TO onek_unique1_constraint_foo; -ALTER TABLE onek DROP CONSTRAINT onek_unique1_constraint_foo; --- renaming constraint -ALTER TABLE onek ADD CONSTRAINT onek_check_constraint CHECK (unique1 >= 0); -ALTER TABLE onek RENAME CONSTRAINT onek_check_constraint TO onek_check_constraint_foo; -ALTER TABLE onek DROP CONSTRAINT onek_check_constraint_foo; --- renaming constraint should rename index as well -ALTER TABLE onek ADD CONSTRAINT onek_unique1_constraint UNIQUE (unique1); -DROP INDEX onek_unique1_constraint; -- to see whether it's there -ERROR: cannot drop index onek_unique1_constraint because constraint onek_unique1_constraint on table onek requires it -HINT: You can drop constraint onek_unique1_constraint on table onek instead. -ALTER TABLE onek RENAME CONSTRAINT onek_unique1_constraint TO onek_unique1_constraint_foo; -DROP INDEX onek_unique1_constraint_foo; -- to see whether it's there -ERROR: cannot drop index onek_unique1_constraint_foo because constraint onek_unique1_constraint_foo on table onek requires it -HINT: You can drop constraint onek_unique1_constraint_foo on table onek instead. -ALTER TABLE onek DROP CONSTRAINT onek_unique1_constraint_foo; --- renaming constraints vs. inheritance -CREATE TABLE constraint_rename_test (a int CONSTRAINT con1 CHECK (a > 0), b int, c int); -\d constraint_rename_test - Table "public.constraint_rename_test" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | integer | | | - c | integer | | | -Check constraints: - "con1" CHECK (a > 0) - -CREATE TABLE constraint_rename_test2 (a int CONSTRAINT con1 CHECK (a > 0), d int) INHERITS (constraint_rename_test); -NOTICE: merging column "a" with inherited definition -NOTICE: merging constraint "con1" with inherited definition -\d constraint_rename_test2 - Table "public.constraint_rename_test2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | integer | | | - c | integer | | | - d | integer | | | -Check constraints: - "con1" CHECK (a > 0) -Inherits: constraint_rename_test - -ALTER TABLE constraint_rename_test2 RENAME CONSTRAINT con1 TO con1foo; -- fail -ERROR: cannot rename inherited constraint "con1" -ALTER TABLE ONLY constraint_rename_test RENAME CONSTRAINT con1 TO con1foo; -- fail -ERROR: inherited constraint "con1" must be renamed in child tables too -ALTER TABLE constraint_rename_test RENAME CONSTRAINT con1 TO con1foo; -- ok -\d constraint_rename_test - Table "public.constraint_rename_test" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | integer | | | - c | integer | | | -Check constraints: - "con1foo" CHECK (a > 0) -Number of child tables: 1 (Use \d+ to list them.) - -\d constraint_rename_test2 - Table "public.constraint_rename_test2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | integer | | | - c | integer | | | - d | integer | | | -Check constraints: - "con1foo" CHECK (a > 0) -Inherits: constraint_rename_test - -ALTER TABLE constraint_rename_test ADD CONSTRAINT con2 CHECK (b > 0) NO INHERIT; -ALTER TABLE ONLY constraint_rename_test RENAME CONSTRAINT con2 TO con2foo; -- ok -ALTER TABLE constraint_rename_test RENAME CONSTRAINT con2foo TO con2bar; -- ok -\d constraint_rename_test - Table "public.constraint_rename_test" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | integer | | | - c | integer | | | -Check constraints: - "con1foo" CHECK (a > 0) - "con2bar" CHECK (b > 0) NO INHERIT -Number of child tables: 1 (Use \d+ to list them.) - -\d constraint_rename_test2 - Table "public.constraint_rename_test2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | integer | | | - c | integer | | | - d | integer | | | -Check constraints: - "con1foo" CHECK (a > 0) -Inherits: constraint_rename_test - -ALTER TABLE constraint_rename_test ADD CONSTRAINT con3 PRIMARY KEY (a); -ALTER TABLE constraint_rename_test RENAME CONSTRAINT con3 TO con3foo; -- ok -\d constraint_rename_test - Table "public.constraint_rename_test" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | not null | - b | integer | | | - c | integer | | | -Indexes: - "con3foo" PRIMARY KEY, btree (a) -Check constraints: - "con1foo" CHECK (a > 0) - "con2bar" CHECK (b > 0) NO INHERIT -Number of child tables: 1 (Use \d+ to list them.) - -\d constraint_rename_test2 - Table "public.constraint_rename_test2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | not null | - b | integer | | | - c | integer | | | - d | integer | | | -Check constraints: - "con1foo" CHECK (a > 0) -Inherits: constraint_rename_test - -DROP TABLE constraint_rename_test2; -DROP TABLE constraint_rename_test; -ALTER TABLE IF EXISTS constraint_not_exist RENAME CONSTRAINT con3 TO con3foo; -- ok -NOTICE: relation "constraint_not_exist" does not exist, skipping -ALTER TABLE IF EXISTS constraint_rename_test ADD CONSTRAINT con4 UNIQUE (a); -NOTICE: relation "constraint_rename_test" does not exist, skipping --- renaming constraints with cache reset of target relation -CREATE TABLE constraint_rename_cache (a int, - CONSTRAINT chk_a CHECK (a > 0), - PRIMARY KEY (a)); -ALTER TABLE constraint_rename_cache - RENAME CONSTRAINT chk_a TO chk_a_new; -ALTER TABLE constraint_rename_cache - RENAME CONSTRAINT constraint_rename_cache_pkey TO constraint_rename_pkey_new; -CREATE TABLE like_constraint_rename_cache - (LIKE constraint_rename_cache INCLUDING ALL); -\d like_constraint_rename_cache - Table "public.like_constraint_rename_cache" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | not null | -Indexes: - "like_constraint_rename_cache_pkey" PRIMARY KEY, btree (a) -Check constraints: - "chk_a_new" CHECK (a > 0) - -DROP TABLE constraint_rename_cache; -DROP TABLE like_constraint_rename_cache; --- FOREIGN KEY CONSTRAINT adding TEST -CREATE TABLE attmp2 (a int primary key); -CREATE TABLE attmp3 (a int, b int); -CREATE TABLE attmp4 (a int, b int, unique(a,b)); -CREATE TABLE attmp5 (a int, b int); --- Insert rows into attmp2 (pktable) -INSERT INTO attmp2 values (1); -INSERT INTO attmp2 values (2); -INSERT INTO attmp2 values (3); -INSERT INTO attmp2 values (4); --- Insert rows into attmp3 -INSERT INTO attmp3 values (1,10); -INSERT INTO attmp3 values (1,20); -INSERT INTO attmp3 values (5,50); --- Try (and fail) to add constraint due to invalid source columns -ALTER TABLE attmp3 add constraint attmpconstr foreign key(c) references attmp2 match full; -ERROR: column "c" referenced in foreign key constraint does not exist --- Try (and fail) to add constraint due to invalid destination columns explicitly given -ALTER TABLE attmp3 add constraint attmpconstr foreign key(a) references attmp2(b) match full; -ERROR: column "b" referenced in foreign key constraint does not exist --- Try (and fail) to add constraint due to invalid data -ALTER TABLE attmp3 add constraint attmpconstr foreign key (a) references attmp2 match full; -ERROR: insert or update on table "attmp3" violates foreign key constraint "attmpconstr" -DETAIL: Key (a)=(5) is not present in table "attmp2". --- Delete failing row -DELETE FROM attmp3 where a=5; --- Try (and succeed) -ALTER TABLE attmp3 add constraint attmpconstr foreign key (a) references attmp2 match full; -ALTER TABLE attmp3 drop constraint attmpconstr; -INSERT INTO attmp3 values (5,50); --- Try NOT VALID and then VALIDATE CONSTRAINT, but fails. Delete failure then re-validate -ALTER TABLE attmp3 add constraint attmpconstr foreign key (a) references attmp2 match full NOT VALID; -ALTER TABLE attmp3 validate constraint attmpconstr; -ERROR: insert or update on table "attmp3" violates foreign key constraint "attmpconstr" -DETAIL: Key (a)=(5) is not present in table "attmp2". --- Delete failing row -DELETE FROM attmp3 where a=5; --- Try (and succeed) and repeat to show it works on already valid constraint -ALTER TABLE attmp3 validate constraint attmpconstr; -ALTER TABLE attmp3 validate constraint attmpconstr; --- Try a non-verified CHECK constraint -ALTER TABLE attmp3 ADD CONSTRAINT b_greater_than_ten CHECK (b > 10); -- fail -ERROR: check constraint "b_greater_than_ten" of relation "attmp3" is violated by some row -ALTER TABLE attmp3 ADD CONSTRAINT b_greater_than_ten CHECK (b > 10) NOT VALID; -- succeeds -ALTER TABLE attmp3 VALIDATE CONSTRAINT b_greater_than_ten; -- fails -ERROR: check constraint "b_greater_than_ten" of relation "attmp3" is violated by some row -DELETE FROM attmp3 WHERE NOT b > 10; -ALTER TABLE attmp3 VALIDATE CONSTRAINT b_greater_than_ten; -- succeeds -ALTER TABLE attmp3 VALIDATE CONSTRAINT b_greater_than_ten; -- succeeds --- Test inherited NOT VALID CHECK constraints -select * from attmp3; - a | b ----+---- - 1 | 20 -(1 row) - -CREATE TABLE attmp6 () INHERITS (attmp3); -CREATE TABLE attmp7 () INHERITS (attmp3); -INSERT INTO attmp6 VALUES (6, 30), (7, 16); -ALTER TABLE attmp3 ADD CONSTRAINT b_le_20 CHECK (b <= 20) NOT VALID; -ALTER TABLE attmp3 VALIDATE CONSTRAINT b_le_20; -- fails -ERROR: check constraint "b_le_20" of relation "attmp6" is violated by some row -DELETE FROM attmp6 WHERE b > 20; -ALTER TABLE attmp3 VALIDATE CONSTRAINT b_le_20; -- succeeds --- An already validated constraint must not be revalidated -CREATE FUNCTION boo(int) RETURNS int IMMUTABLE STRICT LANGUAGE plpgsql AS $$ BEGIN RAISE NOTICE 'boo: %', $1; RETURN $1; END; $$; -INSERT INTO attmp7 VALUES (8, 18); -ALTER TABLE attmp7 ADD CONSTRAINT identity CHECK (b = boo(b)); -NOTICE: boo: 18 -ALTER TABLE attmp3 ADD CONSTRAINT IDENTITY check (b = boo(b)) NOT VALID; -NOTICE: merging constraint "identity" with inherited definition -ALTER TABLE attmp3 VALIDATE CONSTRAINT identity; -NOTICE: boo: 20 -NOTICE: boo: 16 --- A NO INHERIT constraint should not be looked for in children during VALIDATE CONSTRAINT -create table parent_noinh_convalid (a int); -create table child_noinh_convalid () inherits (parent_noinh_convalid); -insert into parent_noinh_convalid values (1); -insert into child_noinh_convalid values (1); -alter table parent_noinh_convalid add constraint check_a_is_2 check (a = 2) no inherit not valid; --- fail, because of the row in parent -alter table parent_noinh_convalid validate constraint check_a_is_2; -ERROR: check constraint "check_a_is_2" of relation "parent_noinh_convalid" is violated by some row -delete from only parent_noinh_convalid; --- ok (parent itself contains no violating rows) -alter table parent_noinh_convalid validate constraint check_a_is_2; -select convalidated from pg_constraint where conrelid = 'parent_noinh_convalid'::regclass and conname = 'check_a_is_2'; - convalidated --------------- - t -(1 row) - --- cleanup -drop table parent_noinh_convalid, child_noinh_convalid; --- Try (and fail) to create constraint from attmp5(a) to attmp4(a) - unique constraint on --- attmp4 is a,b -ALTER TABLE attmp5 add constraint attmpconstr foreign key(a) references attmp4(a) match full; -ERROR: there is no unique constraint matching given keys for referenced table "attmp4" -DROP TABLE attmp7; -DROP TABLE attmp6; -DROP TABLE attmp5; -DROP TABLE attmp4; -DROP TABLE attmp3; -DROP TABLE attmp2; --- NOT VALID with plan invalidation -- ensure we don't use a constraint for --- exclusion until validated -set constraint_exclusion TO 'partition'; -create table nv_parent (d date, check (false) no inherit not valid); --- not valid constraint added at creation time should automatically become valid -\d nv_parent - Table "public.nv_parent" - Column | Type | Collation | Nullable | Default ---------+------+-----------+----------+--------- - d | date | | | -Check constraints: - "nv_parent_check" CHECK (false) NO INHERIT - -create table nv_child_2010 () inherits (nv_parent); -create table nv_child_2011 () inherits (nv_parent); -alter table nv_child_2010 add check (d between '2010-01-01'::date and '2010-12-31'::date) not valid; -alter table nv_child_2011 add check (d between '2011-01-01'::date and '2011-12-31'::date) not valid; -explain (costs off) select * from nv_parent where d between '2011-08-01' and '2011-08-31'; - QUERY PLAN ---------------------------------------------------------------------------- - Append - -> Seq Scan on nv_parent nv_parent_1 - Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) - -> Seq Scan on nv_child_2010 nv_parent_2 - Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) - -> Seq Scan on nv_child_2011 nv_parent_3 - Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) -(7 rows) - -create table nv_child_2009 (check (d between '2009-01-01'::date and '2009-12-31'::date)) inherits (nv_parent); -explain (costs off) select * from nv_parent where d between '2011-08-01'::date and '2011-08-31'::date; - QUERY PLAN ---------------------------------------------------------------------------- - Append - -> Seq Scan on nv_parent nv_parent_1 - Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) - -> Seq Scan on nv_child_2010 nv_parent_2 - Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) - -> Seq Scan on nv_child_2011 nv_parent_3 - Filter: ((d >= '08-01-2011'::date) AND (d <= '08-31-2011'::date)) -(7 rows) - -explain (costs off) select * from nv_parent where d between '2009-08-01'::date and '2009-08-31'::date; - QUERY PLAN ---------------------------------------------------------------------------- - Append - -> Seq Scan on nv_parent nv_parent_1 - Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) - -> Seq Scan on nv_child_2010 nv_parent_2 - Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) - -> Seq Scan on nv_child_2011 nv_parent_3 - Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) - -> Seq Scan on nv_child_2009 nv_parent_4 - Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) -(9 rows) - --- after validation, the constraint should be used -alter table nv_child_2011 VALIDATE CONSTRAINT nv_child_2011_d_check; -explain (costs off) select * from nv_parent where d between '2009-08-01'::date and '2009-08-31'::date; - QUERY PLAN ---------------------------------------------------------------------------- - Append - -> Seq Scan on nv_parent nv_parent_1 - Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) - -> Seq Scan on nv_child_2010 nv_parent_2 - Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) - -> Seq Scan on nv_child_2009 nv_parent_3 - Filter: ((d >= '08-01-2009'::date) AND (d <= '08-31-2009'::date)) -(7 rows) - --- add an inherited NOT VALID constraint -alter table nv_parent add check (d between '2001-01-01'::date and '2099-12-31'::date) not valid; -\d nv_child_2009 - Table "public.nv_child_2009" - Column | Type | Collation | Nullable | Default ---------+------+-----------+----------+--------- - d | date | | | -Check constraints: - "nv_child_2009_d_check" CHECK (d >= '01-01-2009'::date AND d <= '12-31-2009'::date) - "nv_parent_d_check" CHECK (d >= '01-01-2001'::date AND d <= '12-31-2099'::date) NOT VALID -Inherits: nv_parent - --- we leave nv_parent and children around to help test pg_dump logic --- Foreign key adding test with mixed types --- Note: these tables are TEMP to avoid name conflicts when this test --- is run in parallel with foreign_key.sql. -CREATE TEMP TABLE PKTABLE (ptest1 int PRIMARY KEY); -INSERT INTO PKTABLE VALUES(42); -CREATE TEMP TABLE FKTABLE (ftest1 inet); --- This next should fail, because int=inet does not exist -ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; -ERROR: foreign key constraint "fktable_ftest1_fkey" cannot be implemented -DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: inet and integer. --- This should also fail for the same reason, but here we --- give the column name -ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable(ptest1); -ERROR: foreign key constraint "fktable_ftest1_fkey" cannot be implemented -DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: inet and integer. -DROP TABLE FKTABLE; --- This should succeed, even though they are different types, --- because int=int8 exists and is a member of the integer opfamily -CREATE TEMP TABLE FKTABLE (ftest1 int8); -ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; --- Check it actually works -INSERT INTO FKTABLE VALUES(42); -- should succeed -INSERT INTO FKTABLE VALUES(43); -- should fail -ERROR: insert or update on table "fktable" violates foreign key constraint "fktable_ftest1_fkey" -DETAIL: Key (ftest1)=(43) is not present in table "pktable". -DROP TABLE FKTABLE; --- This should fail, because we'd have to cast numeric to int which is --- not an implicit coercion (or use numeric=numeric, but that's not part --- of the integer opfamily) -CREATE TEMP TABLE FKTABLE (ftest1 numeric); -ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; -ERROR: foreign key constraint "fktable_ftest1_fkey" cannot be implemented -DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: numeric and integer. -DROP TABLE FKTABLE; -DROP TABLE PKTABLE; --- On the other hand, this should work because int implicitly promotes to --- numeric, and we allow promotion on the FK side -CREATE TEMP TABLE PKTABLE (ptest1 numeric PRIMARY KEY); -INSERT INTO PKTABLE VALUES(42); -CREATE TEMP TABLE FKTABLE (ftest1 int); -ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1) references pktable; --- Check it actually works -INSERT INTO FKTABLE VALUES(42); -- should succeed -INSERT INTO FKTABLE VALUES(43); -- should fail -ERROR: insert or update on table "fktable" violates foreign key constraint "fktable_ftest1_fkey" -DETAIL: Key (ftest1)=(43) is not present in table "pktable". -DROP TABLE FKTABLE; -DROP TABLE PKTABLE; -CREATE TEMP TABLE PKTABLE (ptest1 int, ptest2 inet, - PRIMARY KEY(ptest1, ptest2)); --- This should fail, because we just chose really odd types -CREATE TEMP TABLE FKTABLE (ftest1 cidr, ftest2 timestamp); -ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1, ftest2) references pktable; -ERROR: foreign key constraint "fktable_ftest1_ftest2_fkey" cannot be implemented -DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: cidr and integer. -DROP TABLE FKTABLE; --- Again, so should this... -CREATE TEMP TABLE FKTABLE (ftest1 cidr, ftest2 timestamp); -ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1, ftest2) - references pktable(ptest1, ptest2); -ERROR: foreign key constraint "fktable_ftest1_ftest2_fkey" cannot be implemented -DETAIL: Key columns "ftest1" and "ptest1" are of incompatible types: cidr and integer. -DROP TABLE FKTABLE; --- This fails because we mixed up the column ordering -CREATE TEMP TABLE FKTABLE (ftest1 int, ftest2 inet); -ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest1, ftest2) - references pktable(ptest2, ptest1); -ERROR: foreign key constraint "fktable_ftest1_ftest2_fkey" cannot be implemented -DETAIL: Key columns "ftest1" and "ptest2" are of incompatible types: integer and inet. --- As does this... -ALTER TABLE FKTABLE ADD FOREIGN KEY(ftest2, ftest1) - references pktable(ptest1, ptest2); -ERROR: foreign key constraint "fktable_ftest2_ftest1_fkey" cannot be implemented -DETAIL: Key columns "ftest2" and "ptest1" are of incompatible types: inet and integer. -DROP TABLE FKTABLE; -DROP TABLE PKTABLE; --- Test that ALTER CONSTRAINT updates trigger deferrability properly -CREATE TEMP TABLE PKTABLE (ptest1 int primary key); -CREATE TEMP TABLE FKTABLE (ftest1 int); -ALTER TABLE FKTABLE ADD CONSTRAINT fknd FOREIGN KEY(ftest1) REFERENCES pktable - ON DELETE CASCADE ON UPDATE NO ACTION NOT DEFERRABLE; -ALTER TABLE FKTABLE ADD CONSTRAINT fkdd FOREIGN KEY(ftest1) REFERENCES pktable - ON DELETE CASCADE ON UPDATE NO ACTION DEFERRABLE INITIALLY DEFERRED; -ALTER TABLE FKTABLE ADD CONSTRAINT fkdi FOREIGN KEY(ftest1) REFERENCES pktable - ON DELETE CASCADE ON UPDATE NO ACTION DEFERRABLE INITIALLY IMMEDIATE; -ALTER TABLE FKTABLE ADD CONSTRAINT fknd2 FOREIGN KEY(ftest1) REFERENCES pktable - ON DELETE CASCADE ON UPDATE NO ACTION DEFERRABLE INITIALLY DEFERRED; -ALTER TABLE FKTABLE ALTER CONSTRAINT fknd2 NOT DEFERRABLE; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "ALTER CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE FKTABLE ADD CONSTRAINT fkdd2 FOREIGN KEY(ftest1) REFERENCES pktable - ON DELETE CASCADE ON UPDATE NO ACTION NOT DEFERRABLE; -ALTER TABLE FKTABLE ALTER CONSTRAINT fkdd2 DEFERRABLE INITIALLY DEFERRED; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "ALTER CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE FKTABLE ADD CONSTRAINT fkdi2 FOREIGN KEY(ftest1) REFERENCES pktable - ON DELETE CASCADE ON UPDATE NO ACTION NOT DEFERRABLE; -ALTER TABLE FKTABLE ALTER CONSTRAINT fkdi2 DEFERRABLE INITIALLY IMMEDIATE; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "ALTER CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. -SELECT conname, tgfoid::regproc, tgtype, tgdeferrable, tginitdeferred -FROM pg_trigger JOIN pg_constraint con ON con.oid = tgconstraint -WHERE tgrelid = 'pktable'::regclass -ORDER BY 1,2,3; - conname | tgfoid | tgtype | tgdeferrable | tginitdeferred ----------+------------------------+--------+--------------+---------------- - fkdd | "RI_FKey_cascade_del" | 9 | f | f - fkdd | "RI_FKey_noaction_upd" | 17 | t | t - fkdd2 | "RI_FKey_cascade_del" | 9 | f | f - fkdd2 | "RI_FKey_noaction_upd" | 17 | f | f - fkdi | "RI_FKey_cascade_del" | 9 | f | f - fkdi | "RI_FKey_noaction_upd" | 17 | t | f - fkdi2 | "RI_FKey_cascade_del" | 9 | f | f - fkdi2 | "RI_FKey_noaction_upd" | 17 | f | f - fknd | "RI_FKey_cascade_del" | 9 | f | f - fknd | "RI_FKey_noaction_upd" | 17 | f | f - fknd2 | "RI_FKey_cascade_del" | 9 | f | f - fknd2 | "RI_FKey_noaction_upd" | 17 | t | t -(12 rows) - -SELECT conname, tgfoid::regproc, tgtype, tgdeferrable, tginitdeferred -FROM pg_trigger JOIN pg_constraint con ON con.oid = tgconstraint -WHERE tgrelid = 'fktable'::regclass -ORDER BY 1,2,3; - conname | tgfoid | tgtype | tgdeferrable | tginitdeferred ----------+---------------------+--------+--------------+---------------- - fkdd | "RI_FKey_check_ins" | 5 | t | t - fkdd | "RI_FKey_check_upd" | 17 | t | t - fkdd2 | "RI_FKey_check_ins" | 5 | f | f - fkdd2 | "RI_FKey_check_upd" | 17 | f | f - fkdi | "RI_FKey_check_ins" | 5 | t | f - fkdi | "RI_FKey_check_upd" | 17 | t | f - fkdi2 | "RI_FKey_check_ins" | 5 | f | f - fkdi2 | "RI_FKey_check_upd" | 17 | f | f - fknd | "RI_FKey_check_ins" | 5 | f | f - fknd | "RI_FKey_check_upd" | 17 | f | f - fknd2 | "RI_FKey_check_ins" | 5 | t | t - fknd2 | "RI_FKey_check_upd" | 17 | t | t -(12 rows) - --- temp tables should go away by themselves, need not drop them. --- test check constraint adding -create table atacc1 ( test int ); --- add a check constraint -alter table atacc1 add constraint atacc_test1 check (test>3); --- should fail -insert into atacc1 (test) values (2); -ERROR: new row for relation "atacc1" violates check constraint "atacc_test1" -DETAIL: Failing row contains (2). --- should succeed -insert into atacc1 (test) values (4); -drop table atacc1; --- let's do one where the check fails when added -create table atacc1 ( test int ); --- insert a soon to be failing row -insert into atacc1 (test) values (2); --- add a check constraint (fails) -alter table atacc1 add constraint atacc_test1 check (test>3); -ERROR: check constraint "atacc_test1" of relation "atacc1" is violated by some row -insert into atacc1 (test) values (4); -drop table atacc1; --- let's do one where the check fails because the column doesn't exist -create table atacc1 ( test int ); --- add a check constraint (fails) -alter table atacc1 add constraint atacc_test1 check (test1>3); -ERROR: column "test1" does not exist -HINT: Perhaps you meant to reference the column "atacc1.test". -drop table atacc1; --- something a little more complicated -create table atacc1 ( test int, test2 int, test3 int); --- add a check constraint (fails) -alter table atacc1 add constraint atacc_test1 check (test+test23), test2 int); -alter table atacc1 add check (test2>test); --- should fail for $2 -insert into atacc1 (test2, test) values (3, 4); -ERROR: new row for relation "atacc1" violates check constraint "atacc1_check" -DETAIL: Failing row contains (4, 3). -drop table atacc1; --- inheritance related tests -create table atacc1 (test int); -create table atacc2 (test2 int); -create table atacc3 (test3 int) inherits (atacc1, atacc2); -alter table atacc2 add constraint foo check (test2>0); --- fail and then succeed on atacc2 -insert into atacc2 (test2) values (-3); -ERROR: new row for relation "atacc2" violates check constraint "foo" -DETAIL: Failing row contains (-3). -insert into atacc2 (test2) values (3); --- fail and then succeed on atacc3 -insert into atacc3 (test2) values (-3); -ERROR: new row for relation "atacc3" violates check constraint "foo" -DETAIL: Failing row contains (null, -3, null). -insert into atacc3 (test2) values (3); -drop table atacc3; -drop table atacc2; -drop table atacc1; --- same things with one created with INHERIT -create table atacc1 (test int); -create table atacc2 (test2 int); -create table atacc3 (test3 int) inherits (atacc1, atacc2); -alter table atacc3 no inherit atacc2; --- fail -alter table atacc3 no inherit atacc2; -ERROR: relation "atacc2" is not a parent of relation "atacc3" --- make sure it really isn't a child -insert into atacc3 (test2) values (3); -select test2 from atacc2; - test2 -------- -(0 rows) - --- fail due to missing constraint -alter table atacc2 add constraint foo check (test2>0); -alter table atacc3 inherit atacc2; -ERROR: child table is missing constraint "foo" --- fail due to missing column -alter table atacc3 rename test2 to testx; -alter table atacc3 inherit atacc2; -ERROR: child table is missing column "test2" --- fail due to mismatched data type -alter table atacc3 add test2 bool; -alter table atacc3 inherit atacc2; -ERROR: child table "atacc3" has different type for column "test2" -alter table atacc3 drop test2; --- succeed -alter table atacc3 add test2 int; -update atacc3 set test2 = 4 where test2 is null; -alter table atacc3 add constraint foo check (test2>0); -alter table atacc3 inherit atacc2; --- fail due to duplicates and circular inheritance -alter table atacc3 inherit atacc2; -ERROR: relation "atacc2" would be inherited from more than once -alter table atacc2 inherit atacc3; -ERROR: circular inheritance not allowed -DETAIL: "atacc3" is already a child of "atacc2". -alter table atacc2 inherit atacc2; -ERROR: circular inheritance not allowed -DETAIL: "atacc2" is already a child of "atacc2". --- test that we really are a child now (should see 4 not 3 and cascade should go through) -select test2 from atacc2; - test2 -------- - 4 -(1 row) - -drop table atacc2 cascade; -NOTICE: drop cascades to table atacc3 -drop table atacc1; --- adding only to a parent is allowed as of 9.2 -create table atacc1 (test int); -create table atacc2 (test2 int) inherits (atacc1); --- ok: -alter table atacc1 add constraint foo check (test>0) no inherit; --- check constraint is not there on child -insert into atacc2 (test) values (-3); --- check constraint is there on parent -insert into atacc1 (test) values (-3); -ERROR: new row for relation "atacc1" violates check constraint "foo" -DETAIL: Failing row contains (-3). -insert into atacc1 (test) values (3); --- fail, violating row: -alter table atacc2 add constraint foo check (test>0) no inherit; -ERROR: check constraint "foo" of relation "atacc2" is violated by some row -drop table atacc2; -drop table atacc1; --- test unique constraint adding -create table atacc1 ( test int ) ; --- add a unique constraint -alter table atacc1 add constraint atacc_test1 unique (test); --- insert first value -insert into atacc1 (test) values (2); --- should fail -insert into atacc1 (test) values (2); -ERROR: duplicate key value violates unique constraint "atacc_test1" -DETAIL: Key (test)=('2') already exists. --- should succeed -insert into atacc1 (test) values (4); --- try to create duplicates via alter table using - should fail -alter table atacc1 alter column test type integer using 0; -ERROR: could not create unique index "atacc_test1" -DETAIL: Duplicate keys exist. -drop table atacc1; --- let's do one where the unique constraint fails when added -create table atacc1 ( test int ); --- insert soon to be failing rows -insert into atacc1 (test) values (2); -insert into atacc1 (test) values (2); --- add a unique constraint (fails) -alter table atacc1 add constraint atacc_test1 unique (test); -ERROR: could not create unique index "atacc_test1" -DETAIL: Duplicate keys exist. -insert into atacc1 (test) values (3); -drop table atacc1; --- let's do one where the unique constraint fails --- because the column doesn't exist -create table atacc1 ( test int ); --- add a unique constraint (fails) -alter table atacc1 add constraint atacc_test1 unique (test1); -ERROR: column "test1" named in key does not exist -drop table atacc1; --- something a little more complicated -create table atacc1 ( test int, test2 int); --- add a unique constraint -alter table atacc1 add constraint atacc_test1 unique (test, test2); --- insert initial value -insert into atacc1 (test,test2) values (4,4); --- should fail -insert into atacc1 (test,test2) values (4,4); -ERROR: duplicate key value violates unique constraint "atacc_test1" -DETAIL: Key (test, test2)=('4', '4') already exists. --- should all succeed -insert into atacc1 (test,test2) values (4,5); -insert into atacc1 (test,test2) values (5,4); -insert into atacc1 (test,test2) values (5,5); -drop table atacc1; --- lets do some naming tests -create table atacc1 (test int, test2 int, unique(test)); -alter table atacc1 add unique (test2); --- should fail for @@ second one @@ -insert into atacc1 (test2, test) values (3, 3); -insert into atacc1 (test2, test) values (2, 3); -ERROR: duplicate key value violates unique constraint "atacc1_test_key" -DETAIL: Key (test)=('3') already exists. -drop table atacc1; --- test primary key constraint adding -create table atacc1 ( id serial, test int) ; --- add a primary key constraint -alter table atacc1 add constraint atacc_test1 primary key (test); --- insert first value -insert into atacc1 (test) values (2); --- should fail -insert into atacc1 (test) values (2); -ERROR: duplicate key value violates unique constraint "atacc_test1" -DETAIL: Key (test)=('2') already exists. --- should succeed -insert into atacc1 (test) values (4); --- inserting NULL should fail -insert into atacc1 (test) values(NULL); -ERROR: null value in column "test" of relation "atacc1" violates not-null constraint -DETAIL: Failing row contains (4, null). --- try adding a second primary key (should fail) -alter table atacc1 add constraint atacc_oid1 primary key(id); -ERROR: multiple primary keys for table "atacc1" are not allowed --- drop first primary key constraint -alter table atacc1 drop constraint atacc_test1 restrict; --- try adding a primary key on oid (should succeed) -alter table atacc1 add constraint atacc_oid1 primary key(id); -drop table atacc1; --- let's do one where the primary key constraint fails when added -create table atacc1 ( test int ); --- insert soon to be failing rows -insert into atacc1 (test) values (2); -insert into atacc1 (test) values (2); --- add a primary key (fails) -alter table atacc1 add constraint atacc_test1 primary key (test); -ERROR: could not create unique index "atacc_test1" -DETAIL: Duplicate keys exist. -insert into atacc1 (test) values (3); -drop table atacc1; --- let's do another one where the primary key constraint fails when added -create table atacc1 ( test int ); --- insert soon to be failing row -insert into atacc1 (test) values (NULL); --- add a primary key (fails) -alter table atacc1 add constraint atacc_test1 primary key (test); -ERROR: column "test" of relation "atacc1" contains null values -insert into atacc1 (test) values (3); -drop table atacc1; --- let's do one where the primary key constraint fails --- because the column doesn't exist -create table atacc1 ( test int ); --- add a primary key constraint (fails) -alter table atacc1 add constraint atacc_test1 primary key (test1); -ERROR: column "test1" of relation "atacc1" does not exist -drop table atacc1; --- adding a new column as primary key to a non-empty table. --- should fail unless the column has a non-null default value. -create table atacc1 ( test int ); -insert into atacc1 (test) values (0); --- add a primary key column without a default (fails). -alter table atacc1 add column test2 int primary key; -ERROR: column "test2" of relation "atacc1" contains null values --- now add a primary key column with a default (succeeds). -alter table atacc1 add column test2 int default 0 primary key; -drop table atacc1; --- this combination used to have order-of-execution problems (bug #15580) -create table atacc1 (a int); -insert into atacc1 values(1); -alter table atacc1 - add column b float8 not null default random(), - add primary key(a); -drop table atacc1; --- additionally, we've seen issues with foreign key validation not being --- properly delayed until after a table rewrite. Check that works ok. -create table atacc1 (a int primary key); -alter table atacc1 add constraint atacc1_fkey foreign key (a) references atacc1 (a) not valid; -alter table atacc1 validate constraint atacc1_fkey, alter a type bigint; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "VALIDATE CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. -drop table atacc1; --- we've also seen issues with check constraints being validated at the wrong --- time when there's a pending table rewrite. -create table atacc1 (a bigint, b int); -insert into atacc1 values(1,1); -alter table atacc1 add constraint atacc1_chk check(b = 1) not valid; -alter table atacc1 validate constraint atacc1_chk, alter a type int; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "VALIDATE CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. -drop table atacc1; --- same as above, but ensure the constraint violation is detected -create table atacc1 (a bigint, b int); -insert into atacc1 values(1,2); -alter table atacc1 add constraint atacc1_chk check(b = 1) not valid; -alter table atacc1 validate constraint atacc1_chk, alter a type int; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "VALIDATE CONSTRAINT" is not supported on OrioleDB tables yet. This will be implemented in future. -drop table atacc1; --- something a little more complicated -create table atacc1 ( test int, test2 int); --- add a primary key constraint -alter table atacc1 add constraint atacc_test1 primary key (test, test2); --- try adding a second primary key - should fail -alter table atacc1 add constraint atacc_test2 primary key (test); -ERROR: multiple primary keys for table "atacc1" are not allowed --- insert initial value -insert into atacc1 (test,test2) values (4,4); --- should fail -insert into atacc1 (test,test2) values (4,4); -ERROR: duplicate key value violates unique constraint "atacc_test1" -DETAIL: Key (test, test2)=('4', '4') already exists. -insert into atacc1 (test,test2) values (NULL,3); -ERROR: null value in column "test" of relation "atacc1" violates not-null constraint -DETAIL: Failing row contains (null, 3). -insert into atacc1 (test,test2) values (3, NULL); -ERROR: null value in column "test2" of relation "atacc1" violates not-null constraint -DETAIL: Failing row contains (3, null). -insert into atacc1 (test,test2) values (NULL,NULL); -ERROR: null value in column "test" of relation "atacc1" violates not-null constraint -DETAIL: Failing row contains (null, null). --- should all succeed -insert into atacc1 (test,test2) values (4,5); -insert into atacc1 (test,test2) values (5,4); -insert into atacc1 (test,test2) values (5,5); -drop table atacc1; --- lets do some naming tests -create table atacc1 (test int, test2 int, primary key(test)); --- only first should succeed -insert into atacc1 (test2, test) values (3, 3); -insert into atacc1 (test2, test) values (2, 3); -ERROR: duplicate key value violates unique constraint "atacc1_pkey" -DETAIL: Key (test)=('3') already exists. -insert into atacc1 (test2, test) values (1, NULL); -ERROR: null value in column "test" of relation "atacc1" violates not-null constraint -DETAIL: Failing row contains (null, 1). -drop table atacc1; --- alter table / alter column [set/drop] not null tests --- try altering system catalogs, should fail -alter table pg_class alter column relname drop not null; -ERROR: permission denied: "pg_class" is a system catalog -alter table pg_class alter relname set not null; -ERROR: permission denied: "pg_class" is a system catalog --- try altering non-existent table, should fail -alter table non_existent alter column bar set not null; -ERROR: relation "non_existent" does not exist -alter table non_existent alter column bar drop not null; -ERROR: relation "non_existent" does not exist --- test setting columns to null and not null and vice versa --- test checking for null values and primary key -create table atacc1 (test int not null); -alter table atacc1 add constraint "atacc1_pkey" primary key (test); -\d atacc1 - Table "public.atacc1" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - test | integer | | not null | -Indexes: - "atacc1_pkey" PRIMARY KEY, btree (test) - -alter table atacc1 alter column test drop not null; -ERROR: column "test" is in a primary key -\d atacc1 - Table "public.atacc1" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - test | integer | | not null | -Indexes: - "atacc1_pkey" PRIMARY KEY, btree (test) - -alter table atacc1 drop constraint "atacc1_pkey"; -alter table atacc1 alter column test drop not null; -\d atacc1 - Table "public.atacc1" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - test | integer | | | - -insert into atacc1 values (null); -alter table atacc1 alter test set not null; -ERROR: column "test" of relation "atacc1" contains null values -delete from atacc1; -alter table atacc1 alter test set not null; --- try altering a non-existent column, should fail -alter table atacc1 alter bar set not null; -ERROR: column "bar" of relation "atacc1" does not exist -alter table atacc1 alter bar drop not null; -ERROR: column "bar" of relation "atacc1" does not exist --- try creating a view and altering that, should fail -create view myview as select * from atacc1; -alter table myview alter column test drop not null; -ERROR: ALTER action ALTER COLUMN ... DROP NOT NULL cannot be performed on relation "myview" -DETAIL: This operation is not supported for views. -alter table myview alter column test set not null; -ERROR: ALTER action ALTER COLUMN ... SET NOT NULL cannot be performed on relation "myview" -DETAIL: This operation is not supported for views. -drop view myview; -drop table atacc1; --- set not null verified by constraints -create table atacc1 (test_a int, test_b int); -insert into atacc1 values (null, 1); --- constraint not cover all values, should fail -alter table atacc1 add constraint atacc1_constr_or check(test_a is not null or test_b < 10); -alter table atacc1 alter test_a set not null; -ERROR: column "test_a" of relation "atacc1" contains null values -alter table atacc1 drop constraint atacc1_constr_or; --- not valid constraint, should fail -alter table atacc1 add constraint atacc1_constr_invalid check(test_a is not null) not valid; -alter table atacc1 alter test_a set not null; -ERROR: column "test_a" of relation "atacc1" contains null values -alter table atacc1 drop constraint atacc1_constr_invalid; --- with valid constraint -update atacc1 set test_a = 1; -alter table atacc1 add constraint atacc1_constr_a_valid check(test_a is not null); -alter table atacc1 alter test_a set not null; -delete from atacc1; -insert into atacc1 values (2, null); -alter table atacc1 alter test_a drop not null; --- test multiple set not null at same time --- test_a checked by atacc1_constr_a_valid, test_b should fail by table scan -alter table atacc1 alter test_a set not null, alter test_b set not null; -ERROR: column "test_b" of relation "atacc1" contains null values --- commands order has no importance -alter table atacc1 alter test_b set not null, alter test_a set not null; -ERROR: column "test_b" of relation "atacc1" contains null values --- valid one by table scan, one by check constraints -update atacc1 set test_b = 1; -alter table atacc1 alter test_b set not null, alter test_a set not null; -alter table atacc1 alter test_a drop not null, alter test_b drop not null; --- both column has check constraints -alter table atacc1 add constraint atacc1_constr_b_valid check(test_b is not null); -alter table atacc1 alter test_b set not null, alter test_a set not null; -drop table atacc1; --- test inheritance -create table parent (a int); -create table child (b varchar(255)) inherits (parent); -alter table parent alter a set not null; -insert into parent values (NULL); -ERROR: null value in column "a" of relation "parent" violates not-null constraint -DETAIL: Failing row contains (null). -insert into child (a, b) values (NULL, 'foo'); -ERROR: null value in column "a" of relation "child" violates not-null constraint -DETAIL: Failing row contains (null, foo). -alter table parent alter a drop not null; -insert into parent values (NULL); -insert into child (a, b) values (NULL, 'foo'); -alter table only parent alter a set not null; -ERROR: column "a" of relation "parent" contains null values -alter table child alter a set not null; -ERROR: column "a" of relation "child" contains null values -delete from parent; -alter table only parent alter a set not null; -insert into parent values (NULL); -ERROR: null value in column "a" of relation "parent" violates not-null constraint -DETAIL: Failing row contains (null). -alter table child alter a set not null; -insert into child (a, b) values (NULL, 'foo'); -ERROR: null value in column "a" of relation "child" violates not-null constraint -DETAIL: Failing row contains (null, foo). -delete from child; -alter table child alter a set not null; -insert into child (a, b) values (NULL, 'foo'); -ERROR: null value in column "a" of relation "child" violates not-null constraint -DETAIL: Failing row contains (null, foo). -drop table child; -drop table parent; --- test setting and removing default values -create table def_test ( - c1 int4 default 5, - c2 text default 'initial_default' -); -insert into def_test default values; -alter table def_test alter column c1 drop default; -insert into def_test default values; -alter table def_test alter column c2 drop default; -insert into def_test default values; -alter table def_test alter column c1 set default 10; -alter table def_test alter column c2 set default 'new_default'; -insert into def_test default values; -select * from def_test; - c1 | c2 -----+----------------- - 5 | initial_default - | initial_default - | - 10 | new_default -(4 rows) - --- set defaults to an incorrect type: this should fail -alter table def_test alter column c1 set default 'wrong_datatype'; -ERROR: invalid input syntax for type integer: "wrong_datatype" -alter table def_test alter column c2 set default 20; --- set defaults on a non-existent column: this should fail -alter table def_test alter column c3 set default 30; -ERROR: column "c3" of relation "def_test" does not exist --- set defaults on views: we need to create a view, add a rule --- to allow insertions into it, and then alter the view to add --- a default -create view def_view_test as select * from def_test; -create rule def_view_test_ins as - on insert to def_view_test - do instead insert into def_test select new.*; -insert into def_view_test default values; -alter table def_view_test alter column c1 set default 45; -insert into def_view_test default values; -alter table def_view_test alter column c2 set default 'view_default'; -insert into def_view_test default values; -select * from def_view_test; - c1 | c2 -----+----------------- - 5 | initial_default - | initial_default - | - 10 | new_default - | - 45 | - 45 | view_default -(7 rows) - -drop rule def_view_test_ins on def_view_test; -drop view def_view_test; -drop table def_test; --- alter table / drop column tests --- try altering system catalogs, should fail -alter table pg_class drop column relname; -ERROR: permission denied: "pg_class" is a system catalog --- try altering non-existent table, should fail -alter table nosuchtable drop column bar; -ERROR: relation "nosuchtable" does not exist --- test dropping columns -create table atacc1 (a int4 not null, b int4, c int4 not null, d int4); -insert into atacc1 values (1, 2, 3, 4); -alter table atacc1 drop a; -alter table atacc1 drop a; -ERROR: column "a" of relation "atacc1" does not exist --- SELECTs -select * from atacc1; - b | c | d ----+---+--- - 2 | 3 | 4 -(1 row) - -select * from atacc1 order by a; -ERROR: column "a" does not exist -LINE 1: select * from atacc1 order by a; - ^ -select * from atacc1 order by "........pg.dropped.1........"; -ERROR: column "........pg.dropped.1........" does not exist -LINE 1: select * from atacc1 order by "........pg.dropped.1........"... - ^ -select * from atacc1 group by a; -ERROR: column "a" does not exist -LINE 1: select * from atacc1 group by a; - ^ -select * from atacc1 group by "........pg.dropped.1........"; -ERROR: column "........pg.dropped.1........" does not exist -LINE 1: select * from atacc1 group by "........pg.dropped.1........"... - ^ -select atacc1.* from atacc1; - b | c | d ----+---+--- - 2 | 3 | 4 -(1 row) - -select a from atacc1; -ERROR: column "a" does not exist -LINE 1: select a from atacc1; - ^ -select atacc1.a from atacc1; -ERROR: column atacc1.a does not exist -LINE 1: select atacc1.a from atacc1; - ^ -select b,c,d from atacc1; - b | c | d ----+---+--- - 2 | 3 | 4 -(1 row) - -select a,b,c,d from atacc1; -ERROR: column "a" does not exist -LINE 1: select a,b,c,d from atacc1; - ^ -select * from atacc1 where a = 1; -ERROR: column "a" does not exist -LINE 1: select * from atacc1 where a = 1; - ^ -select "........pg.dropped.1........" from atacc1; -ERROR: column "........pg.dropped.1........" does not exist -LINE 1: select "........pg.dropped.1........" from atacc1; - ^ -select atacc1."........pg.dropped.1........" from atacc1; -ERROR: column atacc1.........pg.dropped.1........ does not exist -LINE 1: select atacc1."........pg.dropped.1........" from atacc1; - ^ -select "........pg.dropped.1........",b,c,d from atacc1; -ERROR: column "........pg.dropped.1........" does not exist -LINE 1: select "........pg.dropped.1........",b,c,d from atacc1; - ^ -select * from atacc1 where "........pg.dropped.1........" = 1; -ERROR: column "........pg.dropped.1........" does not exist -LINE 1: select * from atacc1 where "........pg.dropped.1........" = ... - ^ --- UPDATEs -update atacc1 set a = 3; -ERROR: column "a" of relation "atacc1" does not exist -LINE 1: update atacc1 set a = 3; - ^ -update atacc1 set b = 2 where a = 3; -ERROR: column "a" does not exist -LINE 1: update atacc1 set b = 2 where a = 3; - ^ -update atacc1 set "........pg.dropped.1........" = 3; -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -LINE 1: update atacc1 set "........pg.dropped.1........" = 3; - ^ -update atacc1 set b = 2 where "........pg.dropped.1........" = 3; -ERROR: column "........pg.dropped.1........" does not exist -LINE 1: update atacc1 set b = 2 where "........pg.dropped.1........"... - ^ --- INSERTs -insert into atacc1 values (10, 11, 12, 13); -ERROR: INSERT has more expressions than target columns -LINE 1: insert into atacc1 values (10, 11, 12, 13); - ^ -insert into atacc1 values (default, 11, 12, 13); -ERROR: INSERT has more expressions than target columns -LINE 1: insert into atacc1 values (default, 11, 12, 13); - ^ -insert into atacc1 values (11, 12, 13); -insert into atacc1 (a) values (10); -ERROR: column "a" of relation "atacc1" does not exist -LINE 1: insert into atacc1 (a) values (10); - ^ -insert into atacc1 (a) values (default); -ERROR: column "a" of relation "atacc1" does not exist -LINE 1: insert into atacc1 (a) values (default); - ^ -insert into atacc1 (a,b,c,d) values (10,11,12,13); -ERROR: column "a" of relation "atacc1" does not exist -LINE 1: insert into atacc1 (a,b,c,d) values (10,11,12,13); - ^ -insert into atacc1 (a,b,c,d) values (default,11,12,13); -ERROR: column "a" of relation "atacc1" does not exist -LINE 1: insert into atacc1 (a,b,c,d) values (default,11,12,13); - ^ -insert into atacc1 (b,c,d) values (11,12,13); -insert into atacc1 ("........pg.dropped.1........") values (10); -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -LINE 1: insert into atacc1 ("........pg.dropped.1........") values (... - ^ -insert into atacc1 ("........pg.dropped.1........") values (default); -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -LINE 1: insert into atacc1 ("........pg.dropped.1........") values (... - ^ -insert into atacc1 ("........pg.dropped.1........",b,c,d) values (10,11,12,13); -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -LINE 1: insert into atacc1 ("........pg.dropped.1........",b,c,d) va... - ^ -insert into atacc1 ("........pg.dropped.1........",b,c,d) values (default,11,12,13); -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -LINE 1: insert into atacc1 ("........pg.dropped.1........",b,c,d) va... - ^ --- DELETEs -delete from atacc1 where a = 3; -ERROR: column "a" does not exist -LINE 1: delete from atacc1 where a = 3; - ^ -delete from atacc1 where "........pg.dropped.1........" = 3; -ERROR: column "........pg.dropped.1........" does not exist -LINE 1: delete from atacc1 where "........pg.dropped.1........" = 3; - ^ -delete from atacc1; --- try dropping a non-existent column, should fail -alter table atacc1 drop bar; -ERROR: column "bar" of relation "atacc1" does not exist --- try removing an oid column, should succeed (as it's nonexistent) -alter table atacc1 SET WITHOUT OIDS; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "SET WITHOUT OIDS" is not supported on OrioleDB tables yet. This will be implemented in future. --- try adding an oid column, should fail (not supported) -alter table atacc1 SET WITH OIDS; -ERROR: syntax error at or near "WITH" -LINE 1: alter table atacc1 SET WITH OIDS; - ^ --- try dropping the xmin column, should fail -alter table atacc1 drop xmin; -ERROR: cannot drop system column "xmin" --- try creating a view and altering that, should fail -create view myview as select * from atacc1; -select * from myview; - b | c | d ----+---+--- -(0 rows) - -alter table myview drop d; -ERROR: ALTER action DROP COLUMN cannot be performed on relation "myview" -DETAIL: This operation is not supported for views. -drop view myview; --- test some commands to make sure they fail on the dropped column -analyze atacc1(a); -ERROR: column "a" of relation "atacc1" does not exist -analyze atacc1("........pg.dropped.1........"); -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -vacuum analyze atacc1(a); -ERROR: column "a" of relation "atacc1" does not exist -vacuum analyze atacc1("........pg.dropped.1........"); -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -comment on column atacc1.a is 'testing'; -ERROR: column "a" of relation "atacc1" does not exist -comment on column atacc1."........pg.dropped.1........" is 'testing'; -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -alter table atacc1 alter a set storage plain; -ERROR: column "a" of relation "atacc1" does not exist -alter table atacc1 alter "........pg.dropped.1........" set storage plain; -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -alter table atacc1 alter a set statistics 0; -ERROR: column "a" of relation "atacc1" does not exist -alter table atacc1 alter "........pg.dropped.1........" set statistics 0; -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -alter table atacc1 alter a set default 3; -ERROR: column "a" of relation "atacc1" does not exist -alter table atacc1 alter "........pg.dropped.1........" set default 3; -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -alter table atacc1 alter a drop default; -ERROR: column "a" of relation "atacc1" does not exist -alter table atacc1 alter "........pg.dropped.1........" drop default; -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -alter table atacc1 alter a set not null; -ERROR: column "a" of relation "atacc1" does not exist -alter table atacc1 alter "........pg.dropped.1........" set not null; -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -alter table atacc1 alter a drop not null; -ERROR: column "a" of relation "atacc1" does not exist -alter table atacc1 alter "........pg.dropped.1........" drop not null; -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -alter table atacc1 rename a to x; -ERROR: column "a" does not exist -alter table atacc1 rename "........pg.dropped.1........" to x; -ERROR: column "........pg.dropped.1........" does not exist -alter table atacc1 add primary key(a); -ERROR: column "a" of relation "atacc1" does not exist -alter table atacc1 add primary key("........pg.dropped.1........"); -ERROR: column "........pg.dropped.1........" of relation "atacc1" does not exist -alter table atacc1 add unique(a); -ERROR: column "a" named in key does not exist -alter table atacc1 add unique("........pg.dropped.1........"); -ERROR: column "........pg.dropped.1........" named in key does not exist -alter table atacc1 add check (a > 3); -ERROR: column "a" does not exist -alter table atacc1 add check ("........pg.dropped.1........" > 3); -ERROR: column "........pg.dropped.1........" does not exist -create table atacc2 (id int4 unique); -alter table atacc1 add foreign key (a) references atacc2(id); -ERROR: column "a" referenced in foreign key constraint does not exist -alter table atacc1 add foreign key ("........pg.dropped.1........") references atacc2(id); -ERROR: column "........pg.dropped.1........" referenced in foreign key constraint does not exist -alter table atacc2 add foreign key (id) references atacc1(a); -ERROR: column "a" referenced in foreign key constraint does not exist -alter table atacc2 add foreign key (id) references atacc1("........pg.dropped.1........"); -ERROR: column "........pg.dropped.1........" referenced in foreign key constraint does not exist -drop table atacc2; -create index "testing_idx" on atacc1(a); -ERROR: column "a" does not exist -create index "testing_idx" on atacc1("........pg.dropped.1........"); -ERROR: column "........pg.dropped.1........" does not exist --- test create as and select into -insert into atacc1 values (21, 22, 23); -create table attest1 as select * from atacc1; -select * from attest1; - b | c | d -----+----+---- - 21 | 22 | 23 -(1 row) - -drop table attest1; -select * into attest2 from atacc1; -select * from attest2; - b | c | d -----+----+---- - 21 | 22 | 23 -(1 row) - -drop table attest2; --- try dropping all columns -alter table atacc1 drop c; -alter table atacc1 drop d; -alter table atacc1 drop b; -select * from atacc1; --- -(1 row) - -drop table atacc1; --- test constraint error reporting in presence of dropped columns -create table atacc1 (id serial primary key, value int check (value < 10)); -insert into atacc1(value) values (100); -ERROR: new row for relation "atacc1" violates check constraint "atacc1_value_check" -DETAIL: Failing row contains (1, 100). -alter table atacc1 drop column value; -alter table atacc1 add column value int check (value < 10); -insert into atacc1(value) values (100); -ERROR: new row for relation "atacc1" violates check constraint "atacc1_value_check" -DETAIL: Failing row contains (2, 100). -insert into atacc1(id, value) values (null, 0); -ERROR: null value in column "id" of relation "atacc1" violates not-null constraint -DETAIL: Failing row contains (null, 0). -drop table atacc1; --- test inheritance -create table parent (a int, b int, c int); -insert into parent values (1, 2, 3); -alter table parent drop a; -create table child (d varchar(255)) inherits (parent); -insert into child values (12, 13, 'testing'); -select * from parent; - b | c -----+---- - 2 | 3 - 12 | 13 -(2 rows) - -select * from child; - b | c | d -----+----+--------- - 12 | 13 | testing -(1 row) - -alter table parent drop c; -select * from parent; - b ----- - 2 - 12 -(2 rows) - -select * from child; - b | d -----+--------- - 12 | testing -(1 row) - -drop table child; -drop table parent; --- check error cases for inheritance column merging -create table parent (a float8, b numeric(10,4), c text collate "C"); -create table child (a float4) inherits (parent); -- fail -NOTICE: merging column "a" with inherited definition -ERROR: column "a" has a type conflict -DETAIL: double precision versus real -create table child (b decimal(10,7)) inherits (parent); -- fail -NOTICE: moving and merging column "b" with inherited definition -DETAIL: User-specified column moved to the position of the inherited column. -ERROR: column "b" has a type conflict -DETAIL: numeric(10,4) versus numeric(10,7) -create table child (c text collate "POSIX") inherits (parent); -- fail -NOTICE: moving and merging column "c" with inherited definition -DETAIL: User-specified column moved to the position of the inherited column. -ERROR: column "c" has a collation conflict -DETAIL: "C" versus "POSIX" -create table child (a double precision, b decimal(10,4)) inherits (parent); -NOTICE: merging column "a" with inherited definition -NOTICE: merging column "b" with inherited definition -drop table child; -drop table parent; --- test copy in/out -create table attest (a int4, b int4, c int4); -insert into attest values (1,2,3); -alter table attest drop a; -copy attest to stdout; -2 3 -copy attest(a) to stdout; -ERROR: column "a" of relation "attest" does not exist -copy attest("........pg.dropped.1........") to stdout; -ERROR: column "........pg.dropped.1........" of relation "attest" does not exist -copy attest from stdin; -ERROR: extra data after last expected column -CONTEXT: COPY attest, line 1: "10 11 12" -select * from attest; - b | c ----+--- - 2 | 3 -(1 row) - -copy attest from stdin; -select * from attest; - b | c -----+---- - 2 | 3 - 21 | 22 -(2 rows) - -copy attest(a) from stdin; -ERROR: column "a" of relation "attest" does not exist -copy attest("........pg.dropped.1........") from stdin; -ERROR: column "........pg.dropped.1........" of relation "attest" does not exist -copy attest(b,c) from stdin; -select * from attest; - b | c -----+---- - 2 | 3 - 21 | 22 - 31 | 32 -(3 rows) - -drop table attest; --- test inheritance -create table dropColumn (a int, b int, e int); -create table dropColumnChild (c int) inherits (dropColumn); -create table dropColumnAnother (d int) inherits (dropColumnChild); --- these two should fail -alter table dropColumnchild drop column a; -ERROR: cannot drop inherited column "a" -alter table only dropColumnChild drop column b; -ERROR: cannot drop inherited column "b" --- these three should work -alter table only dropColumn drop column e; -alter table dropColumnChild drop column c; -alter table dropColumn drop column a; -create table renameColumn (a int); -create table renameColumnChild (b int) inherits (renameColumn); -create table renameColumnAnother (c int) inherits (renameColumnChild); --- these three should fail -alter table renameColumnChild rename column a to d; -ERROR: cannot rename inherited column "a" -alter table only renameColumnChild rename column a to d; -ERROR: inherited column "a" must be renamed in child tables too -alter table only renameColumn rename column a to d; -ERROR: inherited column "a" must be renamed in child tables too --- these should work -alter table renameColumn rename column a to d; -alter table renameColumnChild rename column b to a; --- these should work -alter table if exists doesnt_exist_tab rename column a to d; -NOTICE: relation "doesnt_exist_tab" does not exist, skipping -alter table if exists doesnt_exist_tab rename column b to a; -NOTICE: relation "doesnt_exist_tab" does not exist, skipping --- this should work -alter table renameColumn add column w int; --- this should fail -alter table only renameColumn add column x int; -ERROR: column must be added to child tables too --- Test corner cases in dropping of inherited columns -create table p1 (f1 int, f2 int); -create table c1 (f1 int not null) inherits(p1); -NOTICE: merging column "f1" with inherited definition --- should be rejected since c1.f1 is inherited -alter table c1 drop column f1; -ERROR: cannot drop inherited column "f1" --- should work -alter table p1 drop column f1; --- c1.f1 is still there, but no longer inherited -select f1 from c1; - f1 ----- -(0 rows) - -alter table c1 drop column f1; -select f1 from c1; -ERROR: column "f1" does not exist -LINE 1: select f1 from c1; - ^ -HINT: Perhaps you meant to reference the column "c1.f2". -drop table p1 cascade; -NOTICE: drop cascades to table c1 -create table p1 (f1 int, f2 int); -create table c1 () inherits(p1); --- should be rejected since c1.f1 is inherited -alter table c1 drop column f1; -ERROR: cannot drop inherited column "f1" -alter table p1 drop column f1; --- c1.f1 is dropped now, since there is no local definition for it -select f1 from c1; -ERROR: column "f1" does not exist -LINE 1: select f1 from c1; - ^ -HINT: Perhaps you meant to reference the column "c1.f2". -drop table p1 cascade; -NOTICE: drop cascades to table c1 -create table p1 (f1 int, f2 int); -create table c1 () inherits(p1); --- should be rejected since c1.f1 is inherited -alter table c1 drop column f1; -ERROR: cannot drop inherited column "f1" -alter table only p1 drop column f1; --- c1.f1 is NOT dropped, but must now be considered non-inherited -alter table c1 drop column f1; -drop table p1 cascade; -NOTICE: drop cascades to table c1 -create table p1 (f1 int, f2 int); -create table c1 (f1 int not null) inherits(p1); -NOTICE: merging column "f1" with inherited definition --- should be rejected since c1.f1 is inherited -alter table c1 drop column f1; -ERROR: cannot drop inherited column "f1" -alter table only p1 drop column f1; --- c1.f1 is still there, but no longer inherited -alter table c1 drop column f1; -drop table p1 cascade; -NOTICE: drop cascades to table c1 -create table p1(id int, name text); -create table p2(id2 int, name text, height int); -create table c1(age int) inherits(p1,p2); -NOTICE: merging multiple inherited definitions of column "name" -create table gc1() inherits (c1); -select relname, attname, attinhcount, attislocal -from pg_class join pg_attribute on (pg_class.oid = pg_attribute.attrelid) -where relname in ('p1','p2','c1','gc1') and attnum > 0 and not attisdropped -order by relname, attnum; - relname | attname | attinhcount | attislocal ----------+---------+-------------+------------ - c1 | id | 1 | f - c1 | name | 2 | f - c1 | id2 | 1 | f - c1 | height | 1 | f - c1 | age | 0 | t - gc1 | id | 1 | f - gc1 | name | 1 | f - gc1 | id2 | 1 | f - gc1 | height | 1 | f - gc1 | age | 1 | f - p1 | id | 0 | t - p1 | name | 0 | t - p2 | id2 | 0 | t - p2 | name | 0 | t - p2 | height | 0 | t -(15 rows) - --- should work -alter table only p1 drop column name; --- should work. Now c1.name is local and inhcount is 0. -alter table p2 drop column name; --- should be rejected since its inherited -alter table gc1 drop column name; -ERROR: cannot drop inherited column "name" --- should work, and drop gc1.name along -alter table c1 drop column name; --- should fail: column does not exist -alter table gc1 drop column name; -ERROR: column "name" of relation "gc1" does not exist --- should work and drop the attribute in all tables -alter table p2 drop column height; --- IF EXISTS test -create table dropColumnExists (); -alter table dropColumnExists drop column non_existing; --fail -ERROR: column "non_existing" of relation "dropcolumnexists" does not exist -alter table dropColumnExists drop column if exists non_existing; --succeed -NOTICE: column "non_existing" of relation "dropcolumnexists" does not exist, skipping -select relname, attname, attinhcount, attislocal -from pg_class join pg_attribute on (pg_class.oid = pg_attribute.attrelid) -where relname in ('p1','p2','c1','gc1') and attnum > 0 and not attisdropped -order by relname, attnum; - relname | attname | attinhcount | attislocal ----------+---------+-------------+------------ - c1 | id | 1 | f - c1 | id2 | 1 | f - c1 | age | 0 | t - gc1 | id | 1 | f - gc1 | id2 | 1 | f - gc1 | age | 1 | f - p1 | id | 0 | t - p2 | id2 | 0 | t -(8 rows) - -drop table p1, p2 cascade; -NOTICE: drop cascades to 2 other objects -DETAIL: drop cascades to table c1 -drop cascades to table gc1 --- test attinhcount tracking with merged columns -create table depth0(); -create table depth1(c text) inherits (depth0); -create table depth2() inherits (depth1); -alter table depth0 add c text; -NOTICE: merging definition of column "c" for child "depth1" -select attrelid::regclass, attname, attinhcount, attislocal -from pg_attribute -where attnum > 0 and attrelid::regclass in ('depth0', 'depth1', 'depth2') -order by attrelid::regclass::text, attnum; - attrelid | attname | attinhcount | attislocal -----------+---------+-------------+------------ - depth0 | c | 0 | t - depth1 | c | 1 | t - depth2 | c | 1 | f -(3 rows) - --- test renumbering of child-table columns in inherited operations -create table p1 (f1 int); -create table c1 (f2 text, f3 int) inherits (p1); -alter table p1 add column a1 int check (a1 > 0); -alter table p1 add column f2 text; -NOTICE: merging definition of column "f2" for child "c1" -insert into p1 values (1,2,'abc'); -insert into c1 values(11,'xyz',33,0); -- should fail -ERROR: new row for relation "c1" violates check constraint "p1_a1_check" -DETAIL: Failing row contains (11, xyz, 33, 0). -insert into c1 values(11,'xyz',33,22); -select * from p1; - f1 | a1 | f2 -----+----+----- - 1 | 2 | abc - 11 | 22 | xyz -(2 rows) - -update p1 set a1 = a1 + 1, f2 = upper(f2); -select * from p1; - f1 | a1 | f2 -----+----+----- - 1 | 3 | ABC - 11 | 23 | XYZ -(2 rows) - -drop table p1 cascade; -NOTICE: drop cascades to table c1 --- test that operations with a dropped column do not try to reference --- its datatype -create domain mytype as text; -create temp table foo (f1 text, f2 mytype, f3 text); -insert into foo values('bb','cc','dd'); -select * from foo; - f1 | f2 | f3 -----+----+---- - bb | cc | dd -(1 row) - -drop domain mytype cascade; -NOTICE: drop cascades to column f2 of table foo -select * from foo; - f1 | f3 -----+---- - bb | dd -(1 row) - -insert into foo values('qq','rr'); -select * from foo; - f1 | f3 -----+---- - bb | dd - qq | rr -(2 rows) - -update foo set f3 = 'zz'; -select * from foo; - f1 | f3 -----+---- - bb | zz - qq | zz -(2 rows) - -select f3,max(f1) from foo group by f3; - f3 | max -----+----- - zz | qq -(1 row) - --- Simple tests for alter table column type -alter table foo alter f1 TYPE integer; -- fails -ERROR: column "f1" cannot be cast automatically to type integer -HINT: You might need to specify "USING f1::integer". -alter table foo alter f1 TYPE varchar(10); -create table anothertab (atcol1 serial8, atcol2 boolean, - constraint anothertab_chk check (atcol1 <= 3)); -insert into anothertab (atcol1, atcol2) values (default, true); -insert into anothertab (atcol1, atcol2) values (default, false); -select * from anothertab; - atcol1 | atcol2 ---------+-------- - 1 | t - 2 | f -(2 rows) - -alter table anothertab alter column atcol1 type boolean; -- fails -ERROR: column "atcol1" cannot be cast automatically to type boolean -HINT: You might need to specify "USING atcol1::boolean". -alter table anothertab alter column atcol1 type boolean using atcol1::int; -- fails -ERROR: result of USING clause for column "atcol1" cannot be cast automatically to type boolean -HINT: You might need to add an explicit cast. -alter table anothertab alter column atcol1 type integer; -select * from anothertab; - atcol1 | atcol2 ---------+-------- - 1 | t - 2 | f -(2 rows) - -insert into anothertab (atcol1, atcol2) values (45, null); -- fails -ERROR: new row for relation "anothertab" violates check constraint "anothertab_chk" -DETAIL: Failing row contains (45, null). -insert into anothertab (atcol1, atcol2) values (default, null); -select * from anothertab; - atcol1 | atcol2 ---------+-------- - 1 | t - 2 | f - 3 | -(3 rows) - -alter table anothertab alter column atcol2 type text - using case when atcol2 is true then 'IT WAS TRUE' - when atcol2 is false then 'IT WAS FALSE' - else 'IT WAS NULL!' end; -select * from anothertab; - atcol1 | atcol2 ---------+-------------- - 1 | IT WAS TRUE - 2 | IT WAS FALSE - 3 | IT WAS NULL! -(3 rows) - -alter table anothertab alter column atcol1 type boolean - using case when atcol1 % 2 = 0 then true else false end; -- fails -ERROR: default for column "atcol1" cannot be cast automatically to type boolean -alter table anothertab alter column atcol1 drop default; -alter table anothertab alter column atcol1 type boolean - using case when atcol1 % 2 = 0 then true else false end; -- fails -ERROR: operator does not exist: boolean <= integer -HINT: No operator matches the given name and argument types. You might need to add explicit type casts. -alter table anothertab drop constraint anothertab_chk; -alter table anothertab drop constraint anothertab_chk; -- fails -ERROR: constraint "anothertab_chk" of relation "anothertab" does not exist -alter table anothertab drop constraint IF EXISTS anothertab_chk; -- succeeds -NOTICE: constraint "anothertab_chk" of relation "anothertab" does not exist, skipping -alter table anothertab alter column atcol1 type boolean - using case when atcol1 % 2 = 0 then true else false end; -select * from anothertab; - atcol1 | atcol2 ---------+-------------- - f | IT WAS TRUE - t | IT WAS FALSE - f | IT WAS NULL! -(3 rows) - -drop table anothertab; --- Test index handling in alter table column type (cf. bugs #15835, #15865) -create table anothertab(f1 int primary key, f2 int unique, - f3 int, f4 int, f5 int); -alter table anothertab - add exclude using btree (f3 with =); -alter table anothertab - add exclude using btree (f4 with =) where (f4 is not null); -alter table anothertab - add exclude using btree (f4 with =) where (f5 > 0); -alter table anothertab - add unique(f1,f4); -create index on anothertab(f2,f3); -create unique index on anothertab(f4); -\d anothertab - Table "public.anothertab" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - f1 | integer | | not null | - f2 | integer | | | - f3 | integer | | | - f4 | integer | | | - f5 | integer | | | -Indexes: - "anothertab_pkey" PRIMARY KEY, btree (f1) - "anothertab_f1_f4_key" UNIQUE CONSTRAINT, btree (f1, f4) - "anothertab_f2_f3_idx" btree (f2, f3) - "anothertab_f2_key" UNIQUE CONSTRAINT, btree (f2) - "anothertab_f3_excl" EXCLUDE USING btree (f3 WITH =) - "anothertab_f4_excl" EXCLUDE USING btree (f4 WITH =) WHERE (f4 IS NOT NULL) - "anothertab_f4_excl1" EXCLUDE USING btree (f4 WITH =) WHERE (f5 > 0) - "anothertab_f4_idx" UNIQUE, btree (f4) - -alter table anothertab alter column f1 type bigint; -alter table anothertab - alter column f2 type bigint, - alter column f3 type bigint, - alter column f4 type bigint; -alter table anothertab alter column f5 type bigint; -\d anothertab - Table "public.anothertab" - Column | Type | Collation | Nullable | Default ---------+--------+-----------+----------+--------- - f1 | bigint | | not null | - f2 | bigint | | | - f3 | bigint | | | - f4 | bigint | | | - f5 | bigint | | | -Indexes: - "anothertab_pkey" PRIMARY KEY, btree (f1) - "anothertab_f1_f4_key" UNIQUE CONSTRAINT, btree (f1, f4) - "anothertab_f2_f3_idx" btree (f2, f3) - "anothertab_f2_key" UNIQUE CONSTRAINT, btree (f2) - "anothertab_f3_excl" EXCLUDE USING btree (f3 WITH =) - "anothertab_f4_excl" EXCLUDE USING btree (f4 WITH =) WHERE (f4 IS NOT NULL) - "anothertab_f4_excl1" EXCLUDE USING btree (f4 WITH =) WHERE (f5 > 0) - "anothertab_f4_idx" UNIQUE, btree (f4) - -drop table anothertab; --- test that USING expressions are parsed before column alter type / drop steps -create table another (f1 int, f2 text, f3 text); -insert into another values(1, 'one', 'uno'); -insert into another values(2, 'two', 'due'); -insert into another values(3, 'three', 'tre'); -select * from another; - f1 | f2 | f3 -----+-------+----- - 1 | one | uno - 2 | two | due - 3 | three | tre -(3 rows) - -alter table another - alter f1 type text using f2 || ' and ' || f3 || ' more', - alter f2 type bigint using f1 * 10, - drop column f3; -select * from another; - f1 | f2 ---------------------+---- - one and uno more | 10 - two and due more | 20 - three and tre more | 30 -(3 rows) - -drop table another; --- Create an index that skips WAL, then perform a SET DATA TYPE that skips --- rewriting the index. -begin; -create table skip_wal_skip_rewrite_index (c varchar(10) primary key); -alter table skip_wal_skip_rewrite_index alter c type varchar(20); -commit; --- We disallow changing table's row type if it's used for storage -create table at_tab1 (a int, b text); -create table at_tab2 (x int, y at_tab1); -alter table at_tab1 alter column b type varchar; -- fails -ERROR: cannot alter table "at_tab1" because column "at_tab2.y" uses its row type -drop table at_tab2; --- Use of row type in an expression is defended differently -create table at_tab2 (x int, y text, check((x,y)::at_tab1 = (1,'42')::at_tab1)); -alter table at_tab1 alter column b type varchar; -- allowed, but ... -insert into at_tab2 values(1,'42'); -- ... this will fail -ERROR: ROW() column has type text instead of type character varying -drop table at_tab1, at_tab2; --- Check it for a partitioned table, too -create table at_tab1 (a int, b text) partition by list(a); -create table at_tab2 (x int, y at_tab1); -alter table at_tab1 alter column b type varchar; -- fails -ERROR: cannot alter table "at_tab1" because column "at_tab2.y" uses its row type -drop table at_tab1, at_tab2; --- Alter column type that's part of a partitioned index -create table at_partitioned (a int, b text) partition by range (a); -create table at_part_1 partition of at_partitioned for values from (0) to (1000); -insert into at_partitioned values (512, '0.123'); -create table at_part_2 (b text, a int); -insert into at_part_2 values ('1.234', 1024); -create index on at_partitioned (b); -create index on at_partitioned (a); -\d at_part_1 - Table "public.at_part_1" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | text | | | -Partition of: at_partitioned FOR VALUES FROM (0) TO (1000) -Indexes: - "at_part_1_a_idx" btree (a) - "at_part_1_b_idx" btree (b) - -\d at_part_2 - Table "public.at_part_2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - b | text | | | - a | integer | | | - -alter table at_partitioned attach partition at_part_2 for values from (1000) to (2000); -\d at_part_2 - Table "public.at_part_2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - b | text | | | - a | integer | | | -Partition of: at_partitioned FOR VALUES FROM (1000) TO (2000) -Indexes: - "at_part_2_a_idx" btree (a) - "at_part_2_b_idx" btree (b) - -alter table at_partitioned alter column b type numeric using b::numeric; -\d at_part_1 - Table "public.at_part_1" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | numeric | | | -Partition of: at_partitioned FOR VALUES FROM (0) TO (1000) -Indexes: - "at_part_1_a_idx" btree (a) - "at_part_1_b_idx" btree (b) - -\d at_part_2 - Table "public.at_part_2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - b | numeric | | | - a | integer | | | -Partition of: at_partitioned FOR VALUES FROM (1000) TO (2000) -Indexes: - "at_part_2_a_idx" btree (a) - "at_part_2_b_idx" btree (b) - -drop table at_partitioned; --- Alter column type when no table rewrite is required --- Also check that comments are preserved -create table at_partitioned(id int, name varchar(64), unique (id, name)) - partition by hash(id); -comment on constraint at_partitioned_id_name_key on at_partitioned is 'parent constraint'; -comment on index at_partitioned_id_name_key is 'parent index'; -create table at_partitioned_0 partition of at_partitioned - for values with (modulus 2, remainder 0); -comment on constraint at_partitioned_0_id_name_key on at_partitioned_0 is 'child 0 constraint'; -comment on index at_partitioned_0_id_name_key is 'child 0 index'; -create table at_partitioned_1 partition of at_partitioned - for values with (modulus 2, remainder 1); -comment on constraint at_partitioned_1_id_name_key on at_partitioned_1 is 'child 1 constraint'; -comment on index at_partitioned_1_id_name_key is 'child 1 index'; -insert into at_partitioned values(1, 'foo'); -insert into at_partitioned values(3, 'bar'); -create temp table old_oids as - select relname, oid as oldoid, relfilenode as oldfilenode - from pg_class where relname like 'at_partitioned%'; -select relname, - c.oid = oldoid as orig_oid, - case relfilenode - when 0 then 'none' - when c.oid then 'own' - when oldfilenode then 'orig' - else 'OTHER' - end as storage, - obj_description(c.oid, 'pg_class') as desc - from pg_class c left join old_oids using (relname) - where relname like 'at_partitioned%' - order by relname; - relname | orig_oid | storage | desc -------------------------------+----------+---------+--------------- - at_partitioned | t | none | - at_partitioned_0 | t | own | - at_partitioned_0_id_name_key | t | own | child 0 index - at_partitioned_1 | t | own | - at_partitioned_1_id_name_key | t | own | child 1 index - at_partitioned_id_name_key | t | none | parent index -(6 rows) - -select conname, obj_description(oid, 'pg_constraint') as desc - from pg_constraint where conname like 'at_partitioned%' - order by conname; - conname | desc -------------------------------+-------------------- - at_partitioned_0_id_name_key | child 0 constraint - at_partitioned_1_id_name_key | child 1 constraint - at_partitioned_id_name_key | parent constraint -(3 rows) - -alter table at_partitioned alter column name type varchar(127); -select relname, - c.oid = oldoid as orig_oid, - case relfilenode - when 0 then 'none' - when c.oid then 'own' - when oldfilenode then 'orig' - else 'OTHER' - end as storage, - obj_description(c.oid, 'pg_class') as desc - from pg_class c left join old_oids using (relname) - where relname like 'at_partitioned%' - order by relname; - relname | orig_oid | storage | desc -------------------------------+----------+---------+-------------- - at_partitioned | t | none | - at_partitioned_0 | t | own | - at_partitioned_0_id_name_key | f | own | - at_partitioned_1 | t | own | - at_partitioned_1_id_name_key | f | own | - at_partitioned_id_name_key | f | none | parent index -(6 rows) - -select conname, obj_description(oid, 'pg_constraint') as desc - from pg_constraint where conname like 'at_partitioned%' - order by conname; - conname | desc -------------------------------+------------------- - at_partitioned_0_id_name_key | - at_partitioned_1_id_name_key | - at_partitioned_id_name_key | parent constraint -(3 rows) - --- Don't remove this DROP, it exposes bug #15672 -drop table at_partitioned; --- disallow recursive containment of row types -create temp table recur1 (f1 int); -alter table recur1 add column f2 recur1; -- fails -ERROR: composite type recur1 cannot be made a member of itself -alter table recur1 add column f2 recur1[]; -- fails -ERROR: composite type recur1 cannot be made a member of itself -create domain array_of_recur1 as recur1[]; -alter table recur1 add column f2 array_of_recur1; -- fails -ERROR: composite type recur1 cannot be made a member of itself -create temp table recur2 (f1 int, f2 recur1); -alter table recur1 add column f2 recur2; -- fails -ERROR: composite type recur1 cannot be made a member of itself -alter table recur1 add column f2 int; -alter table recur1 alter column f2 type recur2; -- fails -ERROR: composite type recur1 cannot be made a member of itself --- SET STORAGE may need to add a TOAST table -create table test_storage (a text, c text storage plain); -select reltoastrelid <> 0 as has_toast_table - from pg_class where oid = 'test_storage'::regclass; - has_toast_table ------------------ - t -(1 row) - -alter table test_storage alter a set storage plain; --- rewrite table to remove its TOAST table; need a non-constant column default -alter table test_storage add b int default random()::int; -select reltoastrelid <> 0 as has_toast_table - from pg_class where oid = 'test_storage'::regclass; - has_toast_table ------------------ - t -(1 row) - -alter table test_storage alter a set storage default; -- re-add TOAST table -select reltoastrelid <> 0 as has_toast_table - from pg_class where oid = 'test_storage'::regclass; - has_toast_table ------------------ - t -(1 row) - --- check STORAGE correctness -create table test_storage_failed (a text, b int storage extended); -ERROR: column data type integer can only have storage PLAIN --- test that SET STORAGE propagates to index correctly -create index test_storage_idx on test_storage (b, a); -alter table test_storage alter column a set storage external; -\d+ test_storage - Table "public.test_storage" - Column | Type | Collation | Nullable | Default | Storage | Stats target | Description ---------+---------+-----------+----------+-------------------+----------+--------------+------------- - a | text | | | | external | | - c | text | | | | plain | | - b | integer | | | random()::integer | plain | | -Indexes: - "test_storage_idx" btree (b, a) - -\d+ test_storage_idx - Index "public.test_storage_idx" - Column | Type | Key? | Definition | Storage | Stats target ---------+---------+------+------------+----------+-------------- - b | integer | yes | b | plain | - a | text | yes | a | external | -btree, for table "public.test_storage" - --- ALTER COLUMN TYPE with a check constraint and a child table (bug #13779) -CREATE TABLE test_inh_check (a float check (a > 10.2), b float); -CREATE TABLE test_inh_check_child() INHERITS(test_inh_check); -\d test_inh_check - Table "public.test_inh_check" - Column | Type | Collation | Nullable | Default ---------+------------------+-----------+----------+--------- - a | double precision | | | - b | double precision | | | -Check constraints: - "test_inh_check_a_check" CHECK (a > 10.2::double precision) -Number of child tables: 1 (Use \d+ to list them.) - -\d test_inh_check_child - Table "public.test_inh_check_child" - Column | Type | Collation | Nullable | Default ---------+------------------+-----------+----------+--------- - a | double precision | | | - b | double precision | | | -Check constraints: - "test_inh_check_a_check" CHECK (a > 10.2::double precision) -Inherits: test_inh_check - -select relname, conname, coninhcount, conislocal, connoinherit - from pg_constraint c, pg_class r - where relname like 'test_inh_check%' and c.conrelid = r.oid - order by 1, 2; - relname | conname | coninhcount | conislocal | connoinherit -----------------------+------------------------+-------------+------------+-------------- - test_inh_check | test_inh_check_a_check | 0 | t | f - test_inh_check_child | test_inh_check_a_check | 1 | f | f -(2 rows) - -ALTER TABLE test_inh_check ALTER COLUMN a TYPE numeric; -\d test_inh_check - Table "public.test_inh_check" - Column | Type | Collation | Nullable | Default ---------+------------------+-----------+----------+--------- - a | numeric | | | - b | double precision | | | -Check constraints: - "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) -Number of child tables: 1 (Use \d+ to list them.) - -\d test_inh_check_child - Table "public.test_inh_check_child" - Column | Type | Collation | Nullable | Default ---------+------------------+-----------+----------+--------- - a | numeric | | | - b | double precision | | | -Check constraints: - "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) -Inherits: test_inh_check - -select relname, conname, coninhcount, conislocal, connoinherit - from pg_constraint c, pg_class r - where relname like 'test_inh_check%' and c.conrelid = r.oid - order by 1, 2; - relname | conname | coninhcount | conislocal | connoinherit -----------------------+------------------------+-------------+------------+-------------- - test_inh_check | test_inh_check_a_check | 0 | t | f - test_inh_check_child | test_inh_check_a_check | 1 | f | f -(2 rows) - --- also try noinherit, local, and local+inherited cases -ALTER TABLE test_inh_check ADD CONSTRAINT bnoinherit CHECK (b > 100) NO INHERIT; -ALTER TABLE test_inh_check_child ADD CONSTRAINT blocal CHECK (b < 1000); -ALTER TABLE test_inh_check_child ADD CONSTRAINT bmerged CHECK (b > 1); -ALTER TABLE test_inh_check ADD CONSTRAINT bmerged CHECK (b > 1); -NOTICE: merging constraint "bmerged" with inherited definition -\d test_inh_check - Table "public.test_inh_check" - Column | Type | Collation | Nullable | Default ---------+------------------+-----------+----------+--------- - a | numeric | | | - b | double precision | | | -Check constraints: - "bmerged" CHECK (b > 1::double precision) - "bnoinherit" CHECK (b > 100::double precision) NO INHERIT - "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) -Number of child tables: 1 (Use \d+ to list them.) - -\d test_inh_check_child - Table "public.test_inh_check_child" - Column | Type | Collation | Nullable | Default ---------+------------------+-----------+----------+--------- - a | numeric | | | - b | double precision | | | -Check constraints: - "blocal" CHECK (b < 1000::double precision) - "bmerged" CHECK (b > 1::double precision) - "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) -Inherits: test_inh_check - -select relname, conname, coninhcount, conislocal, connoinherit - from pg_constraint c, pg_class r - where relname like 'test_inh_check%' and c.conrelid = r.oid - order by 1, 2; - relname | conname | coninhcount | conislocal | connoinherit -----------------------+------------------------+-------------+------------+-------------- - test_inh_check | bmerged | 0 | t | f - test_inh_check | bnoinherit | 0 | t | t - test_inh_check | test_inh_check_a_check | 0 | t | f - test_inh_check_child | blocal | 0 | t | f - test_inh_check_child | bmerged | 1 | t | f - test_inh_check_child | test_inh_check_a_check | 1 | f | f -(6 rows) - -ALTER TABLE test_inh_check ALTER COLUMN b TYPE numeric; -NOTICE: merging constraint "bmerged" with inherited definition -\d test_inh_check - Table "public.test_inh_check" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | numeric | | | - b | numeric | | | -Check constraints: - "bmerged" CHECK (b::double precision > 1::double precision) - "bnoinherit" CHECK (b::double precision > 100::double precision) NO INHERIT - "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) -Number of child tables: 1 (Use \d+ to list them.) - -\d test_inh_check_child - Table "public.test_inh_check_child" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | numeric | | | - b | numeric | | | -Check constraints: - "blocal" CHECK (b::double precision < 1000::double precision) - "bmerged" CHECK (b::double precision > 1::double precision) - "test_inh_check_a_check" CHECK (a::double precision > 10.2::double precision) -Inherits: test_inh_check - -select relname, conname, coninhcount, conislocal, connoinherit - from pg_constraint c, pg_class r - where relname like 'test_inh_check%' and c.conrelid = r.oid - order by 1, 2; - relname | conname | coninhcount | conislocal | connoinherit -----------------------+------------------------+-------------+------------+-------------- - test_inh_check | bmerged | 0 | t | f - test_inh_check | bnoinherit | 0 | t | t - test_inh_check | test_inh_check_a_check | 0 | t | f - test_inh_check_child | blocal | 0 | t | f - test_inh_check_child | bmerged | 1 | t | f - test_inh_check_child | test_inh_check_a_check | 1 | f | f -(6 rows) - --- ALTER COLUMN TYPE with different schema in children --- Bug at https://postgr.es/m/20170102225618.GA10071@telsasoft.com -CREATE TABLE test_type_diff (f1 int); -CREATE TABLE test_type_diff_c (extra smallint) INHERITS (test_type_diff); -ALTER TABLE test_type_diff ADD COLUMN f2 int; -INSERT INTO test_type_diff_c VALUES (1, 2, 3); -ALTER TABLE test_type_diff ALTER COLUMN f2 TYPE bigint USING f2::bigint; -CREATE TABLE test_type_diff2 (int_two int2, int_four int4, int_eight int8); -CREATE TABLE test_type_diff2_c1 (int_four int4, int_eight int8, int_two int2); -CREATE TABLE test_type_diff2_c2 (int_eight int8, int_two int2, int_four int4); -CREATE TABLE test_type_diff2_c3 (int_two int2, int_four int4, int_eight int8); -ALTER TABLE test_type_diff2_c1 INHERIT test_type_diff2; -ALTER TABLE test_type_diff2_c2 INHERIT test_type_diff2; -ALTER TABLE test_type_diff2_c3 INHERIT test_type_diff2; -INSERT INTO test_type_diff2_c1 VALUES (1, 2, 3); -INSERT INTO test_type_diff2_c2 VALUES (4, 5, 6); -INSERT INTO test_type_diff2_c3 VALUES (7, 8, 9); -ALTER TABLE test_type_diff2 ALTER COLUMN int_four TYPE int8 USING int_four::int8; --- whole-row references are disallowed -ALTER TABLE test_type_diff2 ALTER COLUMN int_four TYPE int4 USING (pg_column_size(test_type_diff2)); -ERROR: cannot convert whole-row table reference -DETAIL: USING expression contains a whole-row table reference. --- check for rollback of ANALYZE corrupting table property flags (bug #11638) -CREATE TABLE check_fk_presence_1 (id int PRIMARY KEY, t text); -CREATE TABLE check_fk_presence_2 (id int REFERENCES check_fk_presence_1, t text); -BEGIN; -ALTER TABLE check_fk_presence_2 DROP CONSTRAINT check_fk_presence_2_id_fkey; -ANALYZE check_fk_presence_2; -ROLLBACK; -\d check_fk_presence_2 - Table "public.check_fk_presence_2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - id | integer | | | - t | text | | | -Foreign-key constraints: - "check_fk_presence_2_id_fkey" FOREIGN KEY (id) REFERENCES check_fk_presence_1(id) - -DROP TABLE check_fk_presence_1, check_fk_presence_2; --- check column addition within a view (bug #14876) -create table at_base_table(id int, stuff text); -insert into at_base_table values (23, 'skidoo'); -create view at_view_1 as select * from at_base_table bt; -create view at_view_2 as select *, to_json(v1) as j from at_view_1 v1; -\d+ at_view_1 - View "public.at_view_1" - Column | Type | Collation | Nullable | Default | Storage | Description ---------+---------+-----------+----------+---------+----------+------------- - id | integer | | | | plain | - stuff | text | | | | extended | -View definition: - SELECT id, - stuff - FROM at_base_table bt; - -\d+ at_view_2 - View "public.at_view_2" - Column | Type | Collation | Nullable | Default | Storage | Description ---------+---------+-----------+----------+---------+----------+------------- - id | integer | | | | plain | - stuff | text | | | | extended | - j | json | | | | extended | -View definition: - SELECT id, - stuff, - to_json(v1.*) AS j - FROM at_view_1 v1; - -explain (verbose, costs off) select * from at_view_2; - QUERY PLAN ----------------------------------------------------------- - Seq Scan on public.at_base_table bt - Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff)) -(2 rows) - -select * from at_view_2; - id | stuff | j -----+--------+---------------------------- - 23 | skidoo | {"id":23,"stuff":"skidoo"} -(1 row) - -create or replace view at_view_1 as select *, 2+2 as more from at_base_table bt; -\d+ at_view_1 - View "public.at_view_1" - Column | Type | Collation | Nullable | Default | Storage | Description ---------+---------+-----------+----------+---------+----------+------------- - id | integer | | | | plain | - stuff | text | | | | extended | - more | integer | | | | plain | -View definition: - SELECT id, - stuff, - 2 + 2 AS more - FROM at_base_table bt; - -\d+ at_view_2 - View "public.at_view_2" - Column | Type | Collation | Nullable | Default | Storage | Description ---------+---------+-----------+----------+---------+----------+------------- - id | integer | | | | plain | - stuff | text | | | | extended | - j | json | | | | extended | -View definition: - SELECT id, - stuff, - to_json(v1.*) AS j - FROM at_view_1 v1; - -explain (verbose, costs off) select * from at_view_2; - QUERY PLAN -------------------------------------------------------------- - Seq Scan on public.at_base_table bt - Output: bt.id, bt.stuff, to_json(ROW(bt.id, bt.stuff, 4)) -(2 rows) - -select * from at_view_2; - id | stuff | j -----+--------+------------------------------------- - 23 | skidoo | {"id":23,"stuff":"skidoo","more":4} -(1 row) - -drop view at_view_2; -drop view at_view_1; -drop table at_base_table; --- related case (bug #17811) -begin; -create temp table t1 as select * from int8_tbl; -create temp view v1 as select 1::int8 as q1; -create temp view v2 as select * from v1; -create or replace temp view v1 with (security_barrier = true) - as select * from t1; -create temp table log (q1 int8, q2 int8); -create rule v1_upd_rule as on update to v1 - do also insert into log values (new.*); -update v2 set q1 = q1 + 1 where q1 = 123; -select * from t1; - q1 | q2 -------------------+------------------- - 124 | 456 - 124 | 4567890123456789 - 4567890123456789 | 123 - 4567890123456789 | 4567890123456789 - 4567890123456789 | -4567890123456789 -(5 rows) - -select * from log; - q1 | q2 ------+------------------ - 124 | 456 - 124 | 4567890123456789 -(2 rows) - -rollback; --- check adding a column not itself requiring a rewrite, together with --- a column requiring a default (bug #16038) --- ensure that rewrites aren't silently optimized away, removing the --- value of the test -CREATE FUNCTION check_ddl_rewrite(p_tablename regclass, p_ddl text) -RETURNS boolean -LANGUAGE plpgsql AS $$ -DECLARE - v_relfilenode oid; -BEGIN - v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename; - - EXECUTE p_ddl; - - RETURN v_relfilenode <> (SELECT relfilenode FROM pg_class WHERE oid = p_tablename); -END; -$$; -CREATE TABLE rewrite_test(col text); -INSERT INTO rewrite_test VALUES ('something'); -INSERT INTO rewrite_test VALUES (NULL); --- empty[12] don't need rewrite, but notempty[12]_rewrite will force one -SELECT check_ddl_rewrite('rewrite_test', $$ - ALTER TABLE rewrite_test - ADD COLUMN empty1 text, - ADD COLUMN notempty1_rewrite serial; -$$); - check_ddl_rewrite -------------------- - t -(1 row) - -SELECT check_ddl_rewrite('rewrite_test', $$ - ALTER TABLE rewrite_test - ADD COLUMN notempty2_rewrite serial, - ADD COLUMN empty2 text; -$$); - check_ddl_rewrite -------------------- - t -(1 row) - --- also check that fast defaults cause no problem, first without rewrite -SELECT check_ddl_rewrite('rewrite_test', $$ - ALTER TABLE rewrite_test - ADD COLUMN empty3 text, - ADD COLUMN notempty3_norewrite int default 42; -$$); - check_ddl_rewrite -------------------- - f -(1 row) - -SELECT check_ddl_rewrite('rewrite_test', $$ - ALTER TABLE rewrite_test - ADD COLUMN notempty4_norewrite int default 42, - ADD COLUMN empty4 text; -$$); - check_ddl_rewrite -------------------- - f -(1 row) - --- then with rewrite -SELECT check_ddl_rewrite('rewrite_test', $$ - ALTER TABLE rewrite_test - ADD COLUMN empty5 text, - ADD COLUMN notempty5_norewrite int default 42, - ADD COLUMN notempty5_rewrite serial; -$$); - check_ddl_rewrite -------------------- - t -(1 row) - -SELECT check_ddl_rewrite('rewrite_test', $$ - ALTER TABLE rewrite_test - ADD COLUMN notempty6_rewrite serial, - ADD COLUMN empty6 text, - ADD COLUMN notempty6_norewrite int default 42; -$$); - check_ddl_rewrite -------------------- - t -(1 row) - --- cleanup -DROP FUNCTION check_ddl_rewrite(regclass, text); -DROP TABLE rewrite_test; --- --- lock levels --- -drop type lockmodes; -ERROR: type "lockmodes" does not exist -create type lockmodes as enum ( - 'SIReadLock' -,'AccessShareLock' -,'RowShareLock' -,'RowExclusiveLock' -,'ShareUpdateExclusiveLock' -,'ShareLock' -,'ShareRowExclusiveLock' -,'ExclusiveLock' -,'AccessExclusiveLock' -); -drop view my_locks; -ERROR: view "my_locks" does not exist -create or replace view my_locks as -select case when c.relname like 'pg_toast%' then 'pg_toast' else c.relname end, max(mode::lockmodes) as max_lockmode -from pg_locks l join pg_class c on l.relation = c.oid -where virtualtransaction = ( - select virtualtransaction - from pg_locks - where transactionid = pg_current_xact_id()::xid) -and locktype = 'relation' -and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') -and c.relname != 'my_locks' -group by c.relname; -create table alterlock (f1 int primary key, f2 text); -insert into alterlock values (1, 'foo'); -create table alterlock2 (f3 int primary key, f1 int); -insert into alterlock2 values (1, 1); -begin; alter table alterlock alter column f2 set statistics 150; -select * from my_locks order by 1; - relname | max_lockmode -----------------+-------------------------- - alterlock | ShareUpdateExclusiveLock - alterlock_pkey | AccessShareLock -(2 rows) - -rollback; -begin; alter table alterlock cluster on alterlock_pkey; -select * from my_locks order by 1; - relname | max_lockmode -----------------+-------------------------- - alterlock | ShareUpdateExclusiveLock - alterlock_pkey | ShareUpdateExclusiveLock -(2 rows) - -commit; -begin; alter table alterlock set without cluster; -select * from my_locks order by 1; - relname | max_lockmode ------------+-------------------------- - alterlock | ShareUpdateExclusiveLock -(1 row) - -commit; -begin; alter table alterlock set (fillfactor = 100); -select * from my_locks order by 1; - relname | max_lockmode ------------+-------------------------- - alterlock | ShareUpdateExclusiveLock - pg_toast | ShareUpdateExclusiveLock -(2 rows) - -commit; -begin; alter table alterlock reset (fillfactor); -select * from my_locks order by 1; - relname | max_lockmode ------------+-------------------------- - alterlock | ShareUpdateExclusiveLock - pg_toast | ShareUpdateExclusiveLock -(2 rows) - -commit; -begin; alter table alterlock set (toast.autovacuum_enabled = off); -select * from my_locks order by 1; - relname | max_lockmode ------------+-------------------------- - alterlock | ShareUpdateExclusiveLock - pg_toast | ShareUpdateExclusiveLock -(2 rows) - -commit; -begin; alter table alterlock set (autovacuum_enabled = off); -select * from my_locks order by 1; - relname | max_lockmode ------------+-------------------------- - alterlock | ShareUpdateExclusiveLock - pg_toast | ShareUpdateExclusiveLock -(2 rows) - -commit; -begin; alter table alterlock alter column f2 set (n_distinct = 1); -select * from my_locks order by 1; - relname | max_lockmode -----------------+-------------------------- - alterlock | ShareUpdateExclusiveLock - alterlock_pkey | AccessShareLock -(2 rows) - -rollback; --- test that mixing options with different lock levels works as expected -begin; alter table alterlock set (autovacuum_enabled = off, fillfactor = 80); -select * from my_locks order by 1; - relname | max_lockmode ------------+-------------------------- - alterlock | ShareUpdateExclusiveLock - pg_toast | ShareUpdateExclusiveLock -(2 rows) - -commit; -begin; alter table alterlock alter column f2 set storage extended; -select * from my_locks order by 1; - relname | max_lockmode -----------------+--------------------- - alterlock | AccessExclusiveLock - alterlock_pkey | AccessShareLock -(2 rows) - -rollback; -begin; alter table alterlock alter column f2 set default 'x'; -select * from my_locks order by 1; - relname | max_lockmode ------------+--------------------- - alterlock | AccessExclusiveLock -(1 row) - -rollback; -begin; -create trigger ttdummy - before delete or update on alterlock - for each row - execute procedure - ttdummy (1, 1); -select * from my_locks order by 1; - relname | max_lockmode ------------+----------------------- - alterlock | ShareRowExclusiveLock -(1 row) - -rollback; -begin; -select * from my_locks order by 1; - relname | max_lockmode ----------+-------------- -(0 rows) - -alter table alterlock2 add foreign key (f1) references alterlock (f1); -select * from my_locks order by 1; - relname | max_lockmode ------------------+----------------------- - alterlock | ShareRowExclusiveLock - alterlock2 | ShareRowExclusiveLock - alterlock2_pkey | AccessShareLock - alterlock_pkey | AccessShareLock -(4 rows) - -rollback; -begin; -alter table alterlock2 -add constraint alterlock2nv foreign key (f1) references alterlock (f1) NOT VALID; -select * from my_locks order by 1; - relname | max_lockmode -------------+----------------------- - alterlock | ShareRowExclusiveLock - alterlock2 | ShareRowExclusiveLock -(2 rows) - -commit; -begin; -alter table alterlock2 validate constraint alterlock2nv; -select * from my_locks order by 1; - relname | max_lockmode ------------------+-------------------------- - alterlock | RowShareLock - alterlock2 | ShareUpdateExclusiveLock - alterlock2_pkey | AccessShareLock - alterlock_pkey | AccessShareLock -(4 rows) - -rollback; -create or replace view my_locks as -select case when c.relname like 'pg_toast%' then 'pg_toast' else c.relname end, max(mode::lockmodes) as max_lockmode -from pg_locks l join pg_class c on l.relation = c.oid -where virtualtransaction = ( - select virtualtransaction - from pg_locks - where transactionid = pg_current_xact_id()::xid) -and locktype = 'relation' -and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') -and c.relname = 'my_locks' -group by c.relname; --- raise exception -alter table my_locks set (autovacuum_enabled = false); -ERROR: unrecognized parameter "autovacuum_enabled" -alter view my_locks set (autovacuum_enabled = false); -ERROR: unrecognized parameter "autovacuum_enabled" -alter table my_locks reset (autovacuum_enabled); -alter view my_locks reset (autovacuum_enabled); -begin; -alter view my_locks set (security_barrier=off); -select * from my_locks order by 1; - relname | max_lockmode -----------+--------------------- - my_locks | AccessExclusiveLock -(1 row) - -alter view my_locks reset (security_barrier); -rollback; --- this test intentionally applies the ALTER TABLE command against a view, but --- uses a view option so we expect this to succeed. This form of SQL is --- accepted for historical reasons, as shown in the docs for ALTER VIEW -begin; -alter table my_locks set (security_barrier=off); -select * from my_locks order by 1; - relname | max_lockmode -----------+--------------------- - my_locks | AccessExclusiveLock -(1 row) - -alter table my_locks reset (security_barrier); -rollback; --- cleanup -drop table alterlock2; -drop table alterlock; -drop view my_locks; -drop type lockmodes; --- --- alter function --- -create function test_strict(text) returns text as - 'select coalesce($1, ''got passed a null'');' - language sql returns null on null input; -select test_strict(NULL); - test_strict -------------- - -(1 row) - -alter function test_strict(text) called on null input; -select test_strict(NULL); - test_strict -------------------- - got passed a null -(1 row) - -create function non_strict(text) returns text as - 'select coalesce($1, ''got passed a null'');' - language sql called on null input; -select non_strict(NULL); - non_strict -------------------- - got passed a null -(1 row) - -alter function non_strict(text) returns null on null input; -select non_strict(NULL); - non_strict ------------- - -(1 row) - --- --- alter object set schema --- -create schema alter1; -create schema alter2; -create table alter1.t1(f1 serial primary key, f2 int check (f2 > 0)); -create view alter1.v1 as select * from alter1.t1; -create function alter1.plus1(int) returns int as 'select $1+1' language sql; -create domain alter1.posint integer check (value > 0); -create type alter1.ctype as (f1 int, f2 text); -create function alter1.same(alter1.ctype, alter1.ctype) returns boolean language sql -as 'select $1.f1 is not distinct from $2.f1 and $1.f2 is not distinct from $2.f2'; -create operator alter1.=(procedure = alter1.same, leftarg = alter1.ctype, rightarg = alter1.ctype); -create operator class alter1.ctype_hash_ops default for type alter1.ctype using hash as - operator 1 alter1.=(alter1.ctype, alter1.ctype); -create conversion alter1.latin1_to_utf8 for 'latin1' to 'utf8' from iso8859_1_to_utf8; -create text search parser alter1.prs(start = prsd_start, gettoken = prsd_nexttoken, end = prsd_end, lextypes = prsd_lextype); -create text search configuration alter1.cfg(parser = alter1.prs); -create text search template alter1.tmpl(init = dsimple_init, lexize = dsimple_lexize); -create text search dictionary alter1.dict(template = alter1.tmpl); -insert into alter1.t1(f2) values(11); -insert into alter1.t1(f2) values(12); -alter table alter1.t1 set schema alter1; -- no-op, same schema -alter table alter1.t1 set schema alter2; -alter table alter1.v1 set schema alter2; -alter function alter1.plus1(int) set schema alter2; -alter domain alter1.posint set schema alter2; -alter operator class alter1.ctype_hash_ops using hash set schema alter2; -alter operator family alter1.ctype_hash_ops using hash set schema alter2; -alter operator alter1.=(alter1.ctype, alter1.ctype) set schema alter2; -alter function alter1.same(alter1.ctype, alter1.ctype) set schema alter2; -alter type alter1.ctype set schema alter1; -- no-op, same schema -alter type alter1.ctype set schema alter2; -alter conversion alter1.latin1_to_utf8 set schema alter2; -alter text search parser alter1.prs set schema alter2; -alter text search configuration alter1.cfg set schema alter2; -alter text search template alter1.tmpl set schema alter2; -alter text search dictionary alter1.dict set schema alter2; --- this should succeed because nothing is left in alter1 -drop schema alter1; -insert into alter2.t1(f2) values(13); -insert into alter2.t1(f2) values(14); -select * from alter2.t1; - f1 | f2 -----+---- - 1 | 11 - 2 | 12 - 3 | 13 - 4 | 14 -(4 rows) - -select * from alter2.v1; - f1 | f2 -----+---- - 1 | 11 - 2 | 12 - 3 | 13 - 4 | 14 -(4 rows) - -select alter2.plus1(41); - plus1 -------- - 42 -(1 row) - --- clean up -drop schema alter2 cascade; -NOTICE: drop cascades to 13 other objects -DETAIL: drop cascades to table alter2.t1 -drop cascades to view alter2.v1 -drop cascades to function alter2.plus1(integer) -drop cascades to type alter2.posint -drop cascades to type alter2.ctype -drop cascades to function alter2.same(alter2.ctype,alter2.ctype) -drop cascades to operator alter2.=(alter2.ctype,alter2.ctype) -drop cascades to operator family alter2.ctype_hash_ops for access method hash -drop cascades to conversion alter2.latin1_to_utf8 -drop cascades to text search parser alter2.prs -drop cascades to text search configuration alter2.cfg -drop cascades to text search template alter2.tmpl -drop cascades to text search dictionary alter2.dict --- --- composite types --- -CREATE TYPE test_type AS (a int); -\d test_type - Composite type "public.test_type" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - -ALTER TYPE nosuchtype ADD ATTRIBUTE b text; -- fails -ERROR: relation "nosuchtype" does not exist -ALTER TYPE test_type ADD ATTRIBUTE b text; -\d test_type - Composite type "public.test_type" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | text | | | - -ALTER TYPE test_type ADD ATTRIBUTE b text; -- fails -ERROR: column "b" of relation "test_type" already exists -ALTER TYPE test_type ALTER ATTRIBUTE b SET DATA TYPE varchar; -\d test_type - Composite type "public.test_type" - Column | Type | Collation | Nullable | Default ---------+-------------------+-----------+----------+--------- - a | integer | | | - b | character varying | | | - -ALTER TYPE test_type ALTER ATTRIBUTE b SET DATA TYPE integer; -\d test_type - Composite type "public.test_type" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | integer | | | - -ALTER TYPE test_type DROP ATTRIBUTE b; -\d test_type - Composite type "public.test_type" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - -ALTER TYPE test_type DROP ATTRIBUTE c; -- fails -ERROR: column "c" of relation "test_type" does not exist -ALTER TYPE test_type DROP ATTRIBUTE IF EXISTS c; -NOTICE: column "c" of relation "test_type" does not exist, skipping -ALTER TYPE test_type DROP ATTRIBUTE a, ADD ATTRIBUTE d boolean; -\d test_type - Composite type "public.test_type" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - d | boolean | | | - -ALTER TYPE test_type RENAME ATTRIBUTE a TO aa; -ERROR: column "a" does not exist -ALTER TYPE test_type RENAME ATTRIBUTE d TO dd; -\d test_type - Composite type "public.test_type" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - dd | boolean | | | - -DROP TYPE test_type; -CREATE TYPE test_type1 AS (a int, b text); -CREATE TABLE test_tbl1 (x int, y test_type1); -ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails -ERROR: cannot alter type "test_type1" because column "test_tbl1.y" uses it -DROP TABLE test_tbl1; -CREATE TABLE test_tbl1 (x int, y text); -CREATE INDEX test_tbl1_idx ON test_tbl1((row(x,y)::test_type1)); -ALTER TYPE test_type1 ALTER ATTRIBUTE b TYPE varchar; -- fails -ERROR: cannot alter type "test_type1" because column "test_tbl1_idx.row" uses it -DROP TABLE test_tbl1; -DROP TYPE test_type1; -CREATE TYPE test_type2 AS (a int, b text); -CREATE TABLE test_tbl2 OF test_type2; -CREATE TABLE test_tbl2_subclass () INHERITS (test_tbl2); -\d test_type2 - Composite type "public.test_type2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | text | | | - -\d test_tbl2 - Table "public.test_tbl2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | text | | | -Number of child tables: 1 (Use \d+ to list them.) -Typed table of type: test_type2 - -ALTER TYPE test_type2 ADD ATTRIBUTE c text; -- fails -ERROR: cannot alter type "test_type2" because it is the type of a typed table -HINT: Use ALTER ... CASCADE to alter the typed tables too. -ALTER TYPE test_type2 ADD ATTRIBUTE c text CASCADE; -\d test_type2 - Composite type "public.test_type2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | text | | | - c | text | | | - -\d test_tbl2 - Table "public.test_tbl2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - b | text | | | - c | text | | | -Number of child tables: 1 (Use \d+ to list them.) -Typed table of type: test_type2 - -ALTER TYPE test_type2 ALTER ATTRIBUTE b TYPE varchar; -- fails -ERROR: cannot alter type "test_type2" because it is the type of a typed table -HINT: Use ALTER ... CASCADE to alter the typed tables too. -ALTER TYPE test_type2 ALTER ATTRIBUTE b TYPE varchar CASCADE; -\d test_type2 - Composite type "public.test_type2" - Column | Type | Collation | Nullable | Default ---------+-------------------+-----------+----------+--------- - a | integer | | | - b | character varying | | | - c | text | | | - -\d test_tbl2 - Table "public.test_tbl2" - Column | Type | Collation | Nullable | Default ---------+-------------------+-----------+----------+--------- - a | integer | | | - b | character varying | | | - c | text | | | -Number of child tables: 1 (Use \d+ to list them.) -Typed table of type: test_type2 - -ALTER TYPE test_type2 DROP ATTRIBUTE b; -- fails -ERROR: cannot alter type "test_type2" because it is the type of a typed table -HINT: Use ALTER ... CASCADE to alter the typed tables too. -ALTER TYPE test_type2 DROP ATTRIBUTE b CASCADE; -\d test_type2 - Composite type "public.test_type2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - c | text | | | - -\d test_tbl2 - Table "public.test_tbl2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - c | text | | | -Number of child tables: 1 (Use \d+ to list them.) -Typed table of type: test_type2 - -ALTER TYPE test_type2 RENAME ATTRIBUTE a TO aa; -- fails -ERROR: cannot alter type "test_type2" because it is the type of a typed table -HINT: Use ALTER ... CASCADE to alter the typed tables too. -ALTER TYPE test_type2 RENAME ATTRIBUTE a TO aa CASCADE; -\d test_type2 - Composite type "public.test_type2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - aa | integer | | | - c | text | | | - -\d test_tbl2 - Table "public.test_tbl2" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - aa | integer | | | - c | text | | | -Number of child tables: 1 (Use \d+ to list them.) -Typed table of type: test_type2 - -\d test_tbl2_subclass - Table "public.test_tbl2_subclass" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - aa | integer | | | - c | text | | | -Inherits: test_tbl2 - -DROP TABLE test_tbl2_subclass, test_tbl2; -DROP TYPE test_type2; -CREATE TYPE test_typex AS (a int, b text); -CREATE TABLE test_tblx (x int, y test_typex check ((y).a > 0)); -ALTER TYPE test_typex DROP ATTRIBUTE a; -- fails -ERROR: cannot drop column a of composite type test_typex because other objects depend on it -DETAIL: constraint test_tblx_y_check on table test_tblx depends on column a of composite type test_typex -HINT: Use DROP ... CASCADE to drop the dependent objects too. -ALTER TYPE test_typex DROP ATTRIBUTE a CASCADE; -NOTICE: drop cascades to constraint test_tblx_y_check on table test_tblx -\d test_tblx - Table "public.test_tblx" - Column | Type | Collation | Nullable | Default ---------+------------+-----------+----------+--------- - x | integer | | | - y | test_typex | | | - -DROP TABLE test_tblx; -DROP TYPE test_typex; --- This test isn't that interesting on its own, but the purpose is to leave --- behind a table to test pg_upgrade with. The table has a composite type --- column in it, and the composite type has a dropped attribute. -CREATE TYPE test_type3 AS (a int); -CREATE TABLE test_tbl3 (c) AS SELECT '(1)'::test_type3; -ALTER TYPE test_type3 DROP ATTRIBUTE a, ADD ATTRIBUTE b int; -CREATE TYPE test_type_empty AS (); -DROP TYPE test_type_empty; --- --- typed tables: OF / NOT OF --- -CREATE TYPE tt_t0 AS (z inet, x int, y numeric(8,2)); -ALTER TYPE tt_t0 DROP ATTRIBUTE z; -CREATE TABLE tt0 (x int NOT NULL, y numeric(8,2)); -- OK -CREATE TABLE tt1 (x int, y bigint); -- wrong base type -CREATE TABLE tt2 (x int, y numeric(9,2)); -- wrong typmod -CREATE TABLE tt3 (y numeric(8,2), x int); -- wrong column order -CREATE TABLE tt4 (x int); -- too few columns -CREATE TABLE tt5 (x int, y numeric(8,2), z int); -- too few columns -CREATE TABLE tt6 () INHERITS (tt0); -- can't have a parent -CREATE TABLE tt7 (x int, q text, y numeric(8,2)); -ALTER TABLE tt7 DROP q; -- OK -ALTER TABLE tt0 OF tt_t0; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE tt1 OF tt_t0; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE tt2 OF tt_t0; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE tt3 OF tt_t0; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE tt4 OF tt_t0; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE tt5 OF tt_t0; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE tt6 OF tt_t0; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE tt7 OF tt_t0; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. -CREATE TYPE tt_t1 AS (x int, y numeric(8,2)); -ALTER TABLE tt7 OF tt_t1; -- reassign an already-typed table -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "OF" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE tt7 NOT OF; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "NOT OF" is not supported on OrioleDB tables yet. This will be implemented in future. -\d tt7 - Table "public.tt7" - Column | Type | Collation | Nullable | Default ---------+--------------+-----------+----------+--------- - x | integer | | | - y | numeric(8,2) | | | - --- make sure we can drop a constraint on the parent but it remains on the child -CREATE TABLE test_drop_constr_parent (c text CHECK (c IS NOT NULL)); -CREATE TABLE test_drop_constr_child () INHERITS (test_drop_constr_parent); -ALTER TABLE ONLY test_drop_constr_parent DROP CONSTRAINT "test_drop_constr_parent_c_check"; --- should fail -INSERT INTO test_drop_constr_child (c) VALUES (NULL); -ERROR: new row for relation "test_drop_constr_child" violates check constraint "test_drop_constr_parent_c_check" -DETAIL: Failing row contains (null). -DROP TABLE test_drop_constr_parent CASCADE; -NOTICE: drop cascades to table test_drop_constr_child --- --- IF EXISTS test --- -ALTER TABLE IF EXISTS tt8 ADD COLUMN f int; -NOTICE: relation "tt8" does not exist, skipping -ALTER TABLE IF EXISTS tt8 ADD CONSTRAINT xxx PRIMARY KEY(f); -NOTICE: relation "tt8" does not exist, skipping -ALTER TABLE IF EXISTS tt8 ADD CHECK (f BETWEEN 0 AND 10); -NOTICE: relation "tt8" does not exist, skipping -ALTER TABLE IF EXISTS tt8 ALTER COLUMN f SET DEFAULT 0; -NOTICE: relation "tt8" does not exist, skipping -ALTER TABLE IF EXISTS tt8 RENAME COLUMN f TO f1; -NOTICE: relation "tt8" does not exist, skipping -ALTER TABLE IF EXISTS tt8 SET SCHEMA alter2; -NOTICE: relation "tt8" does not exist, skipping -CREATE TABLE tt8(a int); -CREATE SCHEMA alter2; -ALTER TABLE IF EXISTS tt8 ADD COLUMN f int; -ALTER TABLE IF EXISTS tt8 ADD CONSTRAINT xxx PRIMARY KEY(f); -ALTER TABLE IF EXISTS tt8 ADD CHECK (f BETWEEN 0 AND 10); -ALTER TABLE IF EXISTS tt8 ALTER COLUMN f SET DEFAULT 0; -ALTER TABLE IF EXISTS tt8 RENAME COLUMN f TO f1; -ALTER TABLE IF EXISTS tt8 SET SCHEMA alter2; -\d alter2.tt8 - Table "alter2.tt8" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | - f1 | integer | | not null | 0 -Indexes: - "xxx" PRIMARY KEY, btree (f1) -Check constraints: - "tt8_f_check" CHECK (f1 >= 0 AND f1 <= 10) - -DROP TABLE alter2.tt8; -DROP SCHEMA alter2; --- --- Check conflicts between index and CHECK constraint names --- -CREATE TABLE tt9(c integer); -ALTER TABLE tt9 ADD CHECK(c > 1); -ALTER TABLE tt9 ADD CHECK(c > 2); -- picks nonconflicting name -ALTER TABLE tt9 ADD CONSTRAINT foo CHECK(c > 3); -ALTER TABLE tt9 ADD CONSTRAINT foo CHECK(c > 4); -- fail, dup name -ERROR: constraint "foo" for relation "tt9" already exists -ALTER TABLE tt9 ADD UNIQUE(c); -ALTER TABLE tt9 ADD UNIQUE(c); -- picks nonconflicting name -ALTER TABLE tt9 ADD CONSTRAINT tt9_c_key UNIQUE(c); -- fail, dup name -ERROR: relation "tt9_c_key" already exists -ALTER TABLE tt9 ADD CONSTRAINT foo UNIQUE(c); -- fail, dup name -ERROR: constraint "foo" for relation "tt9" already exists -ALTER TABLE tt9 ADD CONSTRAINT tt9_c_key CHECK(c > 5); -- fail, dup name -ERROR: constraint "tt9_c_key" for relation "tt9" already exists -ALTER TABLE tt9 ADD CONSTRAINT tt9_c_key2 CHECK(c > 6); -ALTER TABLE tt9 ADD UNIQUE(c); -- picks nonconflicting name -\d tt9 - Table "public.tt9" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c | integer | | | -Indexes: - "tt9_c_key" UNIQUE CONSTRAINT, btree (c) - "tt9_c_key1" UNIQUE CONSTRAINT, btree (c) - "tt9_c_key3" UNIQUE CONSTRAINT, btree (c) -Check constraints: - "foo" CHECK (c > 3) - "tt9_c_check" CHECK (c > 1) - "tt9_c_check1" CHECK (c > 2) - "tt9_c_key2" CHECK (c > 6) - -DROP TABLE tt9; --- Check that comments on constraints and indexes are not lost at ALTER TABLE. -CREATE TABLE comment_test ( - id int, - positive_col int CHECK (positive_col > 0), - indexed_col int, - CONSTRAINT comment_test_pk PRIMARY KEY (id)); -CREATE INDEX comment_test_index ON comment_test(indexed_col); -COMMENT ON COLUMN comment_test.id IS 'Column ''id'' on comment_test'; -COMMENT ON INDEX comment_test_index IS 'Simple index on comment_test'; -COMMENT ON CONSTRAINT comment_test_positive_col_check ON comment_test IS 'CHECK constraint on comment_test.positive_col'; -COMMENT ON CONSTRAINT comment_test_pk ON comment_test IS 'PRIMARY KEY constraint of comment_test'; -COMMENT ON INDEX comment_test_pk IS 'Index backing the PRIMARY KEY of comment_test'; -SELECT col_description('comment_test'::regclass, 1) as comment; - comment ------------------------------ - Column 'id' on comment_test -(1 row) - -SELECT indexrelid::regclass::text as index, obj_description(indexrelid, 'pg_class') as comment FROM pg_index where indrelid = 'comment_test'::regclass ORDER BY 1, 2; - index | comment ---------------------+----------------------------------------------- - comment_test_index | Simple index on comment_test - comment_test_pk | Index backing the PRIMARY KEY of comment_test -(2 rows) - -SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment FROM pg_constraint where conrelid = 'comment_test'::regclass ORDER BY 1, 2; - constraint | comment ----------------------------------+----------------------------------------------- - comment_test_pk | PRIMARY KEY constraint of comment_test - comment_test_positive_col_check | CHECK constraint on comment_test.positive_col -(2 rows) - --- Change the datatype of all the columns. ALTER TABLE is optimized to not --- rebuild an index if the new data type is binary compatible with the old --- one. Check do a dummy ALTER TABLE that doesn't change the datatype --- first, to test that no-op codepath, and another one that does. -ALTER TABLE comment_test ALTER COLUMN indexed_col SET DATA TYPE int; -ALTER TABLE comment_test ALTER COLUMN indexed_col SET DATA TYPE text; -ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE int; -ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE text; -ALTER TABLE comment_test ALTER COLUMN positive_col SET DATA TYPE int; -ALTER TABLE comment_test ALTER COLUMN positive_col SET DATA TYPE bigint; --- Check that the comments are intact. -SELECT col_description('comment_test'::regclass, 1) as comment; - comment ------------------------------ - Column 'id' on comment_test -(1 row) - -SELECT indexrelid::regclass::text as index, obj_description(indexrelid, 'pg_class') as comment FROM pg_index where indrelid = 'comment_test'::regclass ORDER BY 1, 2; - index | comment ---------------------+----------------------------------------------- - comment_test_index | Simple index on comment_test - comment_test_pk | Index backing the PRIMARY KEY of comment_test -(2 rows) - -SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment FROM pg_constraint where conrelid = 'comment_test'::regclass ORDER BY 1, 2; - constraint | comment ----------------------------------+----------------------------------------------- - comment_test_pk | PRIMARY KEY constraint of comment_test - comment_test_positive_col_check | CHECK constraint on comment_test.positive_col -(2 rows) - --- Check compatibility for foreign keys and comments. This is done --- separately as rebuilding the column type of the parent leads --- to an error and would reduce the test scope. -CREATE TABLE comment_test_child ( - id text CONSTRAINT comment_test_child_fk REFERENCES comment_test); -CREATE INDEX comment_test_child_fk ON comment_test_child(id); -COMMENT ON COLUMN comment_test_child.id IS 'Column ''id'' on comment_test_child'; -COMMENT ON INDEX comment_test_child_fk IS 'Index backing the FOREIGN KEY of comment_test_child'; -COMMENT ON CONSTRAINT comment_test_child_fk ON comment_test_child IS 'FOREIGN KEY constraint of comment_test_child'; --- Change column type of parent -ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE text; -ALTER TABLE comment_test ALTER COLUMN id SET DATA TYPE int USING id::integer; -ERROR: foreign key constraint "comment_test_child_fk" cannot be implemented -DETAIL: Key columns "id" and "id" are of incompatible types: text and integer. --- Comments should be intact -SELECT col_description('comment_test_child'::regclass, 1) as comment; - comment ------------------------------------ - Column 'id' on comment_test_child -(1 row) - -SELECT indexrelid::regclass::text as index, obj_description(indexrelid, 'pg_class') as comment FROM pg_index where indrelid = 'comment_test_child'::regclass ORDER BY 1, 2; - index | comment ------------------------+----------------------------------------------------- - comment_test_child_fk | Index backing the FOREIGN KEY of comment_test_child -(1 row) - -SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment FROM pg_constraint where conrelid = 'comment_test_child'::regclass ORDER BY 1, 2; - constraint | comment ------------------------+---------------------------------------------- - comment_test_child_fk | FOREIGN KEY constraint of comment_test_child -(1 row) - --- Check that we map relation oids to filenodes and back correctly. Only --- display bad mappings so the test output doesn't change all the time. A --- filenode function call can return NULL for a relation dropped concurrently --- with the call's surrounding query, so ignore a NULL mapped_oid for --- relations that no longer exist after all calls finish. -CREATE TEMP TABLE filenode_mapping AS -SELECT - oid, mapped_oid, reltablespace, relfilenode, relname -FROM pg_class, - pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) AS mapped_oid -WHERE relkind IN ('r', 'i', 'S', 't', 'm') AND mapped_oid IS DISTINCT FROM oid; -SELECT m.* FROM filenode_mapping m LEFT JOIN pg_class c ON c.oid = m.oid -WHERE c.oid IS NOT NULL OR m.mapped_oid IS NOT NULL; - oid | mapped_oid | reltablespace | relfilenode | relname ------+------------+---------------+-------------+--------- -(0 rows) - --- Checks on creating and manipulation of user defined relations in --- pg_catalog. -SHOW allow_system_table_mods; - allow_system_table_mods -------------------------- - off -(1 row) - --- disallowed because of search_path issues with pg_dump -CREATE TABLE pg_catalog.new_system_table(); -ERROR: permission denied to create "pg_catalog.new_system_table" -DETAIL: System catalog modifications are currently disallowed. --- instead create in public first, move to catalog -CREATE TABLE new_system_table(id serial primary key, othercol text); -ALTER TABLE new_system_table SET SCHEMA pg_catalog; -ALTER TABLE new_system_table SET SCHEMA public; -ALTER TABLE new_system_table SET SCHEMA pg_catalog; --- will be ignored -- already there: -ALTER TABLE new_system_table SET SCHEMA pg_catalog; -ALTER TABLE new_system_table RENAME TO old_system_table; -CREATE INDEX old_system_table__othercol ON old_system_table (othercol); -INSERT INTO old_system_table(othercol) VALUES ('somedata'), ('otherdata'); -UPDATE old_system_table SET id = -id; -DELETE FROM old_system_table WHERE othercol = 'somedata'; -TRUNCATE old_system_table; -ALTER TABLE old_system_table DROP CONSTRAINT new_system_table_pkey; -ALTER TABLE old_system_table DROP COLUMN othercol; -DROP TABLE old_system_table; --- set logged -CREATE UNLOGGED TABLE unlogged1(f1 SERIAL PRIMARY KEY, f2 TEXT); -- has sequence, toast --- check relpersistence of an unlogged table -SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^unlogged1' -UNION ALL -SELECT r.relname || ' toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^unlogged1' -UNION ALL -SELECT r.relname || ' toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^unlogged1' -ORDER BY relname; - relname | relkind | relpersistence ------------------------+---------+---------------- - unlogged1 | r | u - unlogged1 toast index | i | u - unlogged1 toast table | t | u - unlogged1_f1_seq | S | u - unlogged1_pkey | i | u -(5 rows) - -CREATE UNLOGGED TABLE unlogged2(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES unlogged1); -- foreign key -CREATE UNLOGGED TABLE unlogged3(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES unlogged3); -- self-referencing foreign key -ALTER TABLE unlogged3 SET LOGGED; -- skip self-referencing foreign key -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "SET LOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE unlogged2 SET LOGGED; -- fails because a foreign key to an unlogged table exists -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "SET LOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE unlogged1 SET LOGGED; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "SET LOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. --- check relpersistence of an unlogged table after changing to permanent -SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^unlogged1' -UNION ALL -SELECT r.relname || ' toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^unlogged1' -UNION ALL -SELECT r.relname || ' toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^unlogged1' -ORDER BY relname; - relname | relkind | relpersistence ------------------------+---------+---------------- - unlogged1 | r | u - unlogged1 toast index | i | u - unlogged1 toast table | t | u - unlogged1_f1_seq | S | u - unlogged1_pkey | i | u -(5 rows) - -ALTER TABLE unlogged1 SET LOGGED; -- silently do nothing -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "SET LOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. -DROP TABLE unlogged3; -DROP TABLE unlogged2; -DROP TABLE unlogged1; --- set unlogged -CREATE TABLE logged1(f1 SERIAL PRIMARY KEY, f2 TEXT); -- has sequence, toast --- check relpersistence of a permanent table -SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^logged1' -UNION ALL -SELECT r.relname || ' toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^logged1' -UNION ALL -SELECT r.relname ||' toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^logged1' -ORDER BY relname; - relname | relkind | relpersistence ----------------------+---------+---------------- - logged1 | r | p - logged1 toast index | i | p - logged1 toast table | t | p - logged1_f1_seq | S | p - logged1_pkey | i | p -(5 rows) - -CREATE TABLE logged2(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES logged1); -- foreign key -CREATE TABLE logged3(f1 SERIAL PRIMARY KEY, f2 INTEGER REFERENCES logged3); -- self-referencing foreign key -ALTER TABLE logged1 SET UNLOGGED; -- fails because a foreign key from a permanent table exists -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "SET UNLOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE logged3 SET UNLOGGED; -- skip self-referencing foreign key -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "SET UNLOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE logged2 SET UNLOGGED; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "SET UNLOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. -ALTER TABLE logged1 SET UNLOGGED; -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "SET UNLOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. --- check relpersistence of a permanent table after changing to unlogged -SELECT relname, relkind, relpersistence FROM pg_class WHERE relname ~ '^logged1' -UNION ALL -SELECT r.relname || ' toast table', t.relkind, t.relpersistence FROM pg_class r JOIN pg_class t ON t.oid = r.reltoastrelid WHERE r.relname ~ '^logged1' -UNION ALL -SELECT r.relname || ' toast index', ri.relkind, ri.relpersistence FROM pg_class r join pg_class t ON t.oid = r.reltoastrelid JOIN pg_index i ON i.indrelid = t.oid JOIN pg_class ri ON ri.oid = i.indexrelid WHERE r.relname ~ '^logged1' -ORDER BY relname; - relname | relkind | relpersistence ----------------------+---------+---------------- - logged1 | r | p - logged1 toast index | i | p - logged1 toast table | t | p - logged1_f1_seq | S | p - logged1_pkey | i | p -(5 rows) - -ALTER TABLE logged1 SET UNLOGGED; -- silently do nothing -ERROR: unsupported alter table subcommand -DETAIL: Subcommand "SET UNLOGGED" is not supported on OrioleDB tables yet. This will be implemented in future. -DROP TABLE logged3; -DROP TABLE logged2; -DROP TABLE logged1; --- test ADD COLUMN IF NOT EXISTS -CREATE TABLE test_add_column(c1 integer); -\d test_add_column - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - -ALTER TABLE test_add_column - ADD COLUMN c2 integer; -\d test_add_column - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - c2 | integer | | | - -ALTER TABLE test_add_column - ADD COLUMN c2 integer; -- fail because c2 already exists -ERROR: column "c2" of relation "test_add_column" already exists -ALTER TABLE ONLY test_add_column - ADD COLUMN c2 integer; -- fail because c2 already exists -ERROR: column "c2" of relation "test_add_column" already exists -\d test_add_column - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - c2 | integer | | | - -ALTER TABLE test_add_column - ADD COLUMN IF NOT EXISTS c2 integer; -- skipping because c2 already exists -NOTICE: column "c2" of relation "test_add_column" already exists, skipping -ALTER TABLE ONLY test_add_column - ADD COLUMN IF NOT EXISTS c2 integer; -- skipping because c2 already exists -NOTICE: column "c2" of relation "test_add_column" already exists, skipping -\d test_add_column - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - c2 | integer | | | - -ALTER TABLE test_add_column - ADD COLUMN c2 integer, -- fail because c2 already exists - ADD COLUMN c3 integer primary key; -ERROR: column "c2" of relation "test_add_column" already exists -\d test_add_column - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - c2 | integer | | | - -ALTER TABLE test_add_column - ADD COLUMN IF NOT EXISTS c2 integer, -- skipping because c2 already exists - ADD COLUMN c3 integer primary key; -NOTICE: column "c2" of relation "test_add_column" already exists, skipping -\d test_add_column - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - c2 | integer | | | - c3 | integer | | not null | -Indexes: - "test_add_column_pkey" PRIMARY KEY, btree (c3) - -ALTER TABLE test_add_column - ADD COLUMN IF NOT EXISTS c2 integer, -- skipping because c2 already exists - ADD COLUMN IF NOT EXISTS c3 integer primary key; -- skipping because c3 already exists -NOTICE: column "c2" of relation "test_add_column" already exists, skipping -NOTICE: column "c3" of relation "test_add_column" already exists, skipping -\d test_add_column - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - c2 | integer | | | - c3 | integer | | not null | -Indexes: - "test_add_column_pkey" PRIMARY KEY, btree (c3) - -ALTER TABLE test_add_column - ADD COLUMN IF NOT EXISTS c2 integer, -- skipping because c2 already exists - ADD COLUMN IF NOT EXISTS c3 integer, -- skipping because c3 already exists - ADD COLUMN c4 integer REFERENCES test_add_column; -NOTICE: column "c2" of relation "test_add_column" already exists, skipping -NOTICE: column "c3" of relation "test_add_column" already exists, skipping -\d test_add_column - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - c2 | integer | | | - c3 | integer | | not null | - c4 | integer | | | -Indexes: - "test_add_column_pkey" PRIMARY KEY, btree (c3) -Foreign-key constraints: - "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) -Referenced by: - TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) - -ALTER TABLE test_add_column - ADD COLUMN IF NOT EXISTS c4 integer REFERENCES test_add_column; -NOTICE: column "c4" of relation "test_add_column" already exists, skipping -\d test_add_column - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - c2 | integer | | | - c3 | integer | | not null | - c4 | integer | | | -Indexes: - "test_add_column_pkey" PRIMARY KEY, btree (c3) -Foreign-key constraints: - "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) -Referenced by: - TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) - -ALTER TABLE test_add_column - ADD COLUMN IF NOT EXISTS c5 SERIAL CHECK (c5 > 8); -\d test_add_column - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------------------------------------------- - c1 | integer | | | - c2 | integer | | | - c3 | integer | | not null | - c4 | integer | | | - c5 | integer | | not null | nextval('test_add_column_c5_seq'::regclass) -Indexes: - "test_add_column_pkey" PRIMARY KEY, btree (c3) -Check constraints: - "test_add_column_c5_check" CHECK (c5 > 8) -Foreign-key constraints: - "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) -Referenced by: - TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) - -ALTER TABLE test_add_column - ADD COLUMN IF NOT EXISTS c5 SERIAL CHECK (c5 > 10); -NOTICE: column "c5" of relation "test_add_column" already exists, skipping -\d test_add_column* - Table "public.test_add_column" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------------------------------------------- - c1 | integer | | | - c2 | integer | | | - c3 | integer | | not null | - c4 | integer | | | - c5 | integer | | not null | nextval('test_add_column_c5_seq'::regclass) -Indexes: - "test_add_column_pkey" PRIMARY KEY, btree (c3) -Check constraints: - "test_add_column_c5_check" CHECK (c5 > 8) -Foreign-key constraints: - "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) -Referenced by: - TABLE "test_add_column" CONSTRAINT "test_add_column_c4_fkey" FOREIGN KEY (c4) REFERENCES test_add_column(c3) - - Sequence "public.test_add_column_c5_seq" - Type | Start | Minimum | Maximum | Increment | Cycles? | Cache ----------+-------+---------+------------+-----------+---------+------- - integer | 1 | 1 | 2147483647 | 1 | no | 1 -Owned by: public.test_add_column.c5 - - Index "public.test_add_column_pkey" - Column | Type | Key? | Definition ---------+---------+------+------------ - c3 | integer | yes | c3 -primary key, btree, for table "public.test_add_column" - -DROP TABLE test_add_column; -\d test_add_column* --- assorted cases with multiple ALTER TABLE steps -CREATE TABLE ataddindex(f1 INT); -INSERT INTO ataddindex VALUES (42), (43); -CREATE UNIQUE INDEX ataddindexi0 ON ataddindex(f1); -ALTER TABLE ataddindex - ADD PRIMARY KEY USING INDEX ataddindexi0, - ALTER f1 TYPE BIGINT; -\d ataddindex - Table "public.ataddindex" - Column | Type | Collation | Nullable | Default ---------+--------+-----------+----------+--------- - f1 | bigint | | not null | -Indexes: - "ataddindexi0" PRIMARY KEY, btree (f1) - -DROP TABLE ataddindex; -CREATE TABLE ataddindex(f1 VARCHAR(10)); -INSERT INTO ataddindex(f1) VALUES ('foo'), ('a'); -ALTER TABLE ataddindex - ALTER f1 SET DATA TYPE TEXT, - ADD EXCLUDE ((f1 LIKE 'a') WITH =); -\d ataddindex - Table "public.ataddindex" - Column | Type | Collation | Nullable | Default ---------+------+-----------+----------+--------- - f1 | text | | | -Indexes: - "ataddindex_expr_excl" EXCLUDE USING btree ((f1 ~~ 'a'::text) WITH =) - -DROP TABLE ataddindex; -CREATE TABLE ataddindex(id int, ref_id int); -ALTER TABLE ataddindex - ADD PRIMARY KEY (id), - ADD FOREIGN KEY (ref_id) REFERENCES ataddindex; -\d ataddindex - Table "public.ataddindex" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - id | integer | | not null | - ref_id | integer | | | -Indexes: - "ataddindex_pkey" PRIMARY KEY, btree (id) -Foreign-key constraints: - "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) -Referenced by: - TABLE "ataddindex" CONSTRAINT "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) - -DROP TABLE ataddindex; -CREATE TABLE ataddindex(id int, ref_id int); -ALTER TABLE ataddindex - ADD UNIQUE (id), - ADD FOREIGN KEY (ref_id) REFERENCES ataddindex (id); -\d ataddindex - Table "public.ataddindex" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - id | integer | | | - ref_id | integer | | | -Indexes: - "ataddindex_id_key" UNIQUE CONSTRAINT, btree (id) -Foreign-key constraints: - "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) -Referenced by: - TABLE "ataddindex" CONSTRAINT "ataddindex_ref_id_fkey" FOREIGN KEY (ref_id) REFERENCES ataddindex(id) - -DROP TABLE ataddindex; -CREATE TABLE atnotnull1 (); -ALTER TABLE atnotnull1 - ADD COLUMN a INT, - ALTER a SET NOT NULL; -ALTER TABLE atnotnull1 - ADD COLUMN c INT, - ADD PRIMARY KEY (c); -\d+ atnotnull1 - Table "public.atnotnull1" - Column | Type | Collation | Nullable | Default | Storage | Stats target | Description ---------+---------+-----------+----------+---------+---------+--------------+------------- - a | integer | | not null | | plain | | - c | integer | | not null | | plain | | -Indexes: - "atnotnull1_pkey" PRIMARY KEY, btree (c) - --- cannot drop column that is part of the partition key -CREATE TABLE partitioned ( - a int, - b int -) PARTITION BY RANGE (a, (a+b+1)); -ALTER TABLE partitioned DROP COLUMN a; -ERROR: cannot drop column "a" because it is part of the partition key of relation "partitioned" -ALTER TABLE partitioned ALTER COLUMN a TYPE char(5); -ERROR: cannot alter column "a" because it is part of the partition key of relation "partitioned" -ALTER TABLE partitioned DROP COLUMN b; -ERROR: cannot drop column "b" because it is part of the partition key of relation "partitioned" -ALTER TABLE partitioned ALTER COLUMN b TYPE char(5); -ERROR: cannot alter column "b" because it is part of the partition key of relation "partitioned" --- specifying storage parameters for partitioned tables is not supported -ALTER TABLE partitioned SET (fillfactor=100); -ERROR: cannot specify storage parameters for a partitioned table -HINT: Specify storage parameters for its leaf partitions instead. --- partitioned table cannot participate in regular inheritance -CREATE TABLE nonpartitioned ( - a int, - b int -); -ALTER TABLE partitioned INHERIT nonpartitioned; -ERROR: cannot change inheritance of partitioned table -ALTER TABLE nonpartitioned INHERIT partitioned; -ERROR: cannot inherit from partitioned table "partitioned" --- cannot add NO INHERIT constraint to partitioned tables -ALTER TABLE partitioned ADD CONSTRAINT chk_a CHECK (a > 0) NO INHERIT; -ERROR: cannot add NO INHERIT constraint to partitioned table "partitioned" -DROP TABLE partitioned, nonpartitioned; --- --- ATTACH PARTITION --- --- check that target table is partitioned -CREATE TABLE unparted ( - a int -); -CREATE TABLE fail_part (like unparted); -ALTER TABLE unparted ATTACH PARTITION fail_part FOR VALUES IN ('a'); -ERROR: table "unparted" is not partitioned -DROP TABLE unparted, fail_part; --- check that partition bound is compatible -CREATE TABLE list_parted ( - a int NOT NULL, - b char(2) COLLATE "C", - CONSTRAINT check_a CHECK (a > 0) -) PARTITION BY LIST (a); -CREATE TABLE fail_part (LIKE list_parted); -ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) TO (10); -ERROR: invalid bound specification for a list partition -LINE 1: ...list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) T... - ^ -DROP TABLE fail_part; --- check that the table being attached exists -ALTER TABLE list_parted ATTACH PARTITION nonexistent FOR VALUES IN (1); -ERROR: relation "nonexistent" does not exist --- check ownership of the source table -CREATE ROLE regress_test_me; -CREATE ROLE regress_test_not_me; -CREATE TABLE not_owned_by_me (LIKE list_parted); -ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me; -SET SESSION AUTHORIZATION regress_test_me; -CREATE TABLE owned_by_me ( - a int -) PARTITION BY LIST (a); -ALTER TABLE owned_by_me ATTACH PARTITION not_owned_by_me FOR VALUES IN (1); -ERROR: must be owner of table not_owned_by_me -RESET SESSION AUTHORIZATION; -DROP TABLE owned_by_me, not_owned_by_me; -DROP ROLE regress_test_not_me; -DROP ROLE regress_test_me; --- check that the table being attached is not part of regular inheritance -CREATE TABLE parent (LIKE list_parted); -CREATE TABLE child () INHERITS (parent); -ALTER TABLE list_parted ATTACH PARTITION child FOR VALUES IN (1); -ERROR: cannot attach inheritance child as partition -ALTER TABLE list_parted ATTACH PARTITION parent FOR VALUES IN (1); -ERROR: cannot attach inheritance parent as partition -DROP TABLE child; --- now it should work, with a little tweak -ALTER TABLE parent ADD CONSTRAINT check_a CHECK (a > 0); -ALTER TABLE list_parted ATTACH PARTITION parent FOR VALUES IN (1); --- test insert/update, per bug #18550 -INSERT INTO parent VALUES (1); -UPDATE parent SET a = 2 WHERE a = 1; -ERROR: new row for relation "parent" violates partition constraint -DETAIL: Failing row contains (2, null). -DROP TABLE parent CASCADE; --- check any TEMP-ness -CREATE TEMP TABLE temp_parted (a int) PARTITION BY LIST (a); -CREATE TABLE perm_part (a int); -ALTER TABLE temp_parted ATTACH PARTITION perm_part FOR VALUES IN (1); -ERROR: cannot attach a permanent relation as partition of temporary relation "temp_parted" -DROP TABLE temp_parted, perm_part; --- check that the table being attached is not a typed table -CREATE TYPE mytype AS (a int); -CREATE TABLE fail_part OF mytype; -ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); -ERROR: cannot attach a typed table as partition -DROP TYPE mytype CASCADE; -NOTICE: drop cascades to table fail_part --- check that the table being attached has only columns present in the parent -CREATE TABLE fail_part (like list_parted, c int); -ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); -ERROR: table "fail_part" contains column "c" not found in parent "list_parted" -DETAIL: The new partition may contain only the columns present in parent. -DROP TABLE fail_part; --- check that the table being attached has every column of the parent -CREATE TABLE fail_part (a int NOT NULL); -ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); -ERROR: child table is missing column "b" -DROP TABLE fail_part; --- check that columns match in type, collation and NOT NULL status -CREATE TABLE fail_part ( - b char(3), - a int NOT NULL -); -ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); -ERROR: child table "fail_part" has different type for column "b" -ALTER TABLE fail_part ALTER b TYPE char (2) COLLATE "POSIX"; -ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); -ERROR: child table "fail_part" has different collation for column "b" -DROP TABLE fail_part; --- check that the table being attached has all constraints of the parent -CREATE TABLE fail_part ( - b char(2) COLLATE "C", - a int NOT NULL -); -ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); -ERROR: child table is missing constraint "check_a" --- check that the constraint matches in definition with parent's constraint -ALTER TABLE fail_part ADD CONSTRAINT check_a CHECK (a >= 0); -ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); -ERROR: child table "fail_part" has different definition for check constraint "check_a" -DROP TABLE fail_part; --- check the attributes and constraints after partition is attached -CREATE TABLE part_1 ( - a int NOT NULL, - b char(2) COLLATE "C", - CONSTRAINT check_a CHECK (a > 0) -); -ALTER TABLE list_parted ATTACH PARTITION part_1 FOR VALUES IN (1); --- attislocal and conislocal are always false for merged attributes and constraints respectively. -SELECT attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_1'::regclass AND attnum > 0; - attislocal | attinhcount -------------+------------- - f | 1 - f | 1 -(2 rows) - -SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_1'::regclass AND conname = 'check_a'; - conislocal | coninhcount -------------+------------- - f | 1 -(1 row) - --- check that the new partition won't overlap with an existing partition -CREATE TABLE fail_part (LIKE part_1 INCLUDING CONSTRAINTS); -ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); -ERROR: partition "fail_part" would overlap partition "part_1" -LINE 1: ...LE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); - ^ -DROP TABLE fail_part; --- check that an existing table can be attached as a default partition -CREATE TABLE def_part (LIKE list_parted INCLUDING CONSTRAINTS); -ALTER TABLE list_parted ATTACH PARTITION def_part DEFAULT; --- check attaching default partition fails if a default partition already --- exists -CREATE TABLE fail_def_part (LIKE part_1 INCLUDING CONSTRAINTS); -ALTER TABLE list_parted ATTACH PARTITION fail_def_part DEFAULT; -ERROR: partition "fail_def_part" conflicts with existing default partition "def_part" -LINE 1: ...ER TABLE list_parted ATTACH PARTITION fail_def_part DEFAULT; - ^ --- check validation when attaching list partitions -CREATE TABLE list_parted2 ( - a int, - b char -) PARTITION BY LIST (a); --- check that violating rows are correctly reported -CREATE TABLE part_2 (LIKE list_parted2); -INSERT INTO part_2 VALUES (3, 'a'); -ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); -ERROR: partition constraint of relation "part_2" is violated by some row --- should be ok after deleting the bad row -DELETE FROM part_2; -ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); --- check partition cannot be attached if default has some row for its values -CREATE TABLE list_parted2_def PARTITION OF list_parted2 DEFAULT; -INSERT INTO list_parted2_def VALUES (11, 'z'); -CREATE TABLE part_3 (LIKE list_parted2); -ALTER TABLE list_parted2 ATTACH PARTITION part_3 FOR VALUES IN (11); -ERROR: updated partition constraint for default partition "list_parted2_def" would be violated by some row --- should be ok after deleting the bad row -DELETE FROM list_parted2_def WHERE a = 11; -ALTER TABLE list_parted2 ATTACH PARTITION part_3 FOR VALUES IN (11); --- adding constraints that describe the desired partition constraint --- (or more restrictive) will help skip the validation scan -CREATE TABLE part_3_4 ( - LIKE list_parted2, - CONSTRAINT check_a CHECK (a IN (3)) -); --- however, if a list partition does not accept nulls, there should be --- an explicit NOT NULL constraint on the partition key column for the --- validation scan to be skipped; -ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4); --- adding a NOT NULL constraint will cause the scan to be skipped -ALTER TABLE list_parted2 DETACH PARTITION part_3_4; -ALTER TABLE part_3_4 ALTER a SET NOT NULL; -ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4); --- check if default partition scan skipped -ALTER TABLE list_parted2_def ADD CONSTRAINT check_a CHECK (a IN (5, 6)); -CREATE TABLE part_55_66 PARTITION OF list_parted2 FOR VALUES IN (55, 66); --- check validation when attaching range partitions -CREATE TABLE range_parted ( - a int, - b int -) PARTITION BY RANGE (a, b); --- check that violating rows are correctly reported -CREATE TABLE part1 ( - a int NOT NULL CHECK (a = 1), - b int NOT NULL CHECK (b >= 1 AND b <= 10) -); -INSERT INTO part1 VALUES (1, 10); --- Remember the TO bound is exclusive -ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10); -ERROR: partition constraint of relation "part1" is violated by some row --- should be ok after deleting the bad row -DELETE FROM part1; -ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10); --- adding constraints that describe the desired partition constraint --- (or more restrictive) will help skip the validation scan -CREATE TABLE part2 ( - a int NOT NULL CHECK (a = 1), - b int NOT NULL CHECK (b >= 10 AND b < 18) -); -ALTER TABLE range_parted ATTACH PARTITION part2 FOR VALUES FROM (1, 10) TO (1, 20); --- Create default partition -CREATE TABLE partr_def1 PARTITION OF range_parted DEFAULT; --- Only one default partition is allowed, hence, following should give error -CREATE TABLE partr_def2 (LIKE part1 INCLUDING CONSTRAINTS); -ALTER TABLE range_parted ATTACH PARTITION partr_def2 DEFAULT; -ERROR: partition "partr_def2" conflicts with existing default partition "partr_def1" -LINE 1: ...LTER TABLE range_parted ATTACH PARTITION partr_def2 DEFAULT; - ^ --- Overlapping partitions cannot be attached, hence, following should give error -INSERT INTO partr_def1 VALUES (2, 10); -CREATE TABLE part3 (LIKE range_parted); -ALTER TABLE range_parted ATTACH partition part3 FOR VALUES FROM (2, 10) TO (2, 20); -ERROR: updated partition constraint for default partition "partr_def1" would be violated by some row --- Attaching partitions should be successful when there are no overlapping rows -ALTER TABLE range_parted ATTACH partition part3 FOR VALUES FROM (3, 10) TO (3, 20); --- check that leaf partitions are scanned when attaching a partitioned --- table -CREATE TABLE part_5 ( - LIKE list_parted2 -) PARTITION BY LIST (b); --- check that violating rows are correctly reported -CREATE TABLE part_5_a PARTITION OF part_5 FOR VALUES IN ('a'); -INSERT INTO part_5_a (a, b) VALUES (6, 'a'); -ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); -ERROR: partition constraint of relation "part_5_a" is violated by some row --- delete the faulting row and also add a constraint to skip the scan -DELETE FROM part_5_a WHERE a NOT IN (3); -ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 5); -ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); -ALTER TABLE list_parted2 DETACH PARTITION part_5; -ALTER TABLE part_5 DROP CONSTRAINT check_a; --- scan should again be skipped, even though NOT NULL is now a column property -ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IN (5)), ALTER a SET NOT NULL; -ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); --- Check the case where attnos of the partitioning columns in the table being --- attached differs from the parent. It should not affect the constraint- --- checking logic that allows to skip the scan. -CREATE TABLE part_6 ( - c int, - LIKE list_parted2, - CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 6) -); -ALTER TABLE part_6 DROP c; -ALTER TABLE list_parted2 ATTACH PARTITION part_6 FOR VALUES IN (6); --- Similar to above, but the table being attached is a partitioned table --- whose partition has still different attnos for the root partitioning --- columns. -CREATE TABLE part_7 ( - LIKE list_parted2, - CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 7) -) PARTITION BY LIST (b); -CREATE TABLE part_7_a_null ( - c int, - d int, - e int, - LIKE list_parted2, -- 'a' will have attnum = 4 - CONSTRAINT check_b CHECK (b IS NULL OR b = 'a'), - CONSTRAINT check_a CHECK (a IS NOT NULL AND a = 7) -); -ALTER TABLE part_7_a_null DROP c, DROP d, DROP e; -ALTER TABLE part_7 ATTACH PARTITION part_7_a_null FOR VALUES IN ('a', null); -ALTER TABLE list_parted2 ATTACH PARTITION part_7 FOR VALUES IN (7); --- Same example, but check this time that the constraint correctly detects --- violating rows -ALTER TABLE list_parted2 DETACH PARTITION part_7; -ALTER TABLE part_7 DROP CONSTRAINT check_a; -- thusly, scan won't be skipped -INSERT INTO part_7 (a, b) VALUES (8, null), (9, 'a'); -SELECT tableoid::regclass, a, b FROM part_7 order by a; - tableoid | a | b ----------------+---+--- - part_7_a_null | 8 | - part_7_a_null | 9 | a -(2 rows) - -ALTER TABLE list_parted2 ATTACH PARTITION part_7 FOR VALUES IN (7); -ERROR: partition constraint of relation "part_7_a_null" is violated by some row --- check that leaf partitions of default partition are scanned when --- attaching a partitioned table. -ALTER TABLE part_5 DROP CONSTRAINT check_a; -CREATE TABLE part5_def PARTITION OF part_5 DEFAULT PARTITION BY LIST(a); -CREATE TABLE part5_def_p1 PARTITION OF part5_def FOR VALUES IN (5); -INSERT INTO part5_def_p1 VALUES (5, 'y'); -CREATE TABLE part5_p1 (LIKE part_5); -ALTER TABLE part_5 ATTACH PARTITION part5_p1 FOR VALUES IN ('y'); -ERROR: updated partition constraint for default partition "part5_def_p1" would be violated by some row --- should be ok after deleting the bad row -DELETE FROM part5_def_p1 WHERE b = 'y'; -ALTER TABLE part_5 ATTACH PARTITION part5_p1 FOR VALUES IN ('y'); --- check that the table being attached is not already a partition -ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); -ERROR: "part_2" is already a partition --- check that circular inheritance is not allowed -ALTER TABLE part_5 ATTACH PARTITION list_parted2 FOR VALUES IN ('b'); -ERROR: circular inheritance not allowed -DETAIL: "part_5" is already a child of "list_parted2". -ALTER TABLE list_parted2 ATTACH PARTITION list_parted2 FOR VALUES IN (0); -ERROR: circular inheritance not allowed -DETAIL: "list_parted2" is already a child of "list_parted2". --- If a partitioned table being created or an existing table being attached --- as a partition does not have a constraint that would allow validation scan --- to be skipped, but an individual partition does, then the partition's --- validation scan is skipped. -CREATE TABLE quuux (a int, b text) PARTITION BY LIST (a); -CREATE TABLE quuux_default PARTITION OF quuux DEFAULT PARTITION BY LIST (b); -CREATE TABLE quuux_default1 PARTITION OF quuux_default ( - CONSTRAINT check_1 CHECK (a IS NOT NULL AND a = 1) -) FOR VALUES IN ('b'); -CREATE TABLE quuux1 (a int, b text); -ALTER TABLE quuux ATTACH PARTITION quuux1 FOR VALUES IN (1); -- validate! -CREATE TABLE quuux2 (a int, b text); -ALTER TABLE quuux ATTACH PARTITION quuux2 FOR VALUES IN (2); -- skip validation -DROP TABLE quuux1, quuux2; --- should validate for quuux1, but not for quuux2 -CREATE TABLE quuux1 PARTITION OF quuux FOR VALUES IN (1); -CREATE TABLE quuux2 PARTITION OF quuux FOR VALUES IN (2); -DROP TABLE quuux; --- check validation when attaching hash partitions --- Use hand-rolled hash functions and operator class to get predictable result --- on different machines. part_test_int4_ops is defined in test_setup.sql. --- check that the new partition won't overlap with an existing partition -CREATE TABLE hash_parted ( - a int, - b int -) PARTITION BY HASH (a part_test_int4_ops); -CREATE TABLE hpart_1 PARTITION OF hash_parted FOR VALUES WITH (MODULUS 4, REMAINDER 0); -CREATE TABLE fail_part (LIKE hpart_1); -ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 4); -ERROR: partition "fail_part" would overlap partition "hpart_1" -LINE 1: ...hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODU... - ^ -ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 0); -ERROR: partition "fail_part" would overlap partition "hpart_1" -LINE 1: ...hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODU... - ^ -DROP TABLE fail_part; --- check validation when attaching hash partitions --- check that violating rows are correctly reported -CREATE TABLE hpart_2 (LIKE hash_parted); -INSERT INTO hpart_2 VALUES (3, 0); -ALTER TABLE hash_parted ATTACH PARTITION hpart_2 FOR VALUES WITH (MODULUS 4, REMAINDER 1); -ERROR: partition constraint of relation "hpart_2" is violated by some row --- should be ok after deleting the bad row -DELETE FROM hpart_2; -ALTER TABLE hash_parted ATTACH PARTITION hpart_2 FOR VALUES WITH (MODULUS 4, REMAINDER 1); --- check that leaf partitions are scanned when attaching a partitioned --- table -CREATE TABLE hpart_5 ( - LIKE hash_parted -) PARTITION BY LIST (b); --- check that violating rows are correctly reported -CREATE TABLE hpart_5_a PARTITION OF hpart_5 FOR VALUES IN ('1', '2', '3'); -INSERT INTO hpart_5_a (a, b) VALUES (7, 1); -ALTER TABLE hash_parted ATTACH PARTITION hpart_5 FOR VALUES WITH (MODULUS 4, REMAINDER 2); -ERROR: partition constraint of relation "hpart_5_a" is violated by some row --- should be ok after deleting the bad row -DELETE FROM hpart_5_a; -ALTER TABLE hash_parted ATTACH PARTITION hpart_5 FOR VALUES WITH (MODULUS 4, REMAINDER 2); --- check that the table being attach is with valid modulus and remainder value -CREATE TABLE fail_part(LIKE hash_parted); -ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 0, REMAINDER 1); -ERROR: modulus for hash partition must be an integer value greater than zero -ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 8, REMAINDER 8); -ERROR: remainder for hash partition must be less than modulus -ALTER TABLE hash_parted ATTACH PARTITION fail_part FOR VALUES WITH (MODULUS 3, REMAINDER 2); -ERROR: every hash partition modulus must be a factor of the next larger modulus -DETAIL: The new modulus 3 is not a factor of 4, the modulus of existing partition "hpart_1". -DROP TABLE fail_part; --- --- DETACH PARTITION --- --- check that the table is partitioned at all -CREATE TABLE regular_table (a int); -ALTER TABLE regular_table DETACH PARTITION any_name; -ERROR: table "regular_table" is not partitioned -DROP TABLE regular_table; --- check that the partition being detached exists at all -ALTER TABLE list_parted2 DETACH PARTITION part_4; -ERROR: relation "part_4" does not exist -ALTER TABLE hash_parted DETACH PARTITION hpart_4; -ERROR: relation "hpart_4" does not exist --- check that the partition being detached is actually a partition of the parent -CREATE TABLE not_a_part (a int); -ALTER TABLE list_parted2 DETACH PARTITION not_a_part; -ERROR: relation "not_a_part" is not a partition of relation "list_parted2" -ALTER TABLE list_parted2 DETACH PARTITION part_1; -ERROR: relation "part_1" is not a partition of relation "list_parted2" -ALTER TABLE hash_parted DETACH PARTITION not_a_part; -ERROR: relation "not_a_part" is not a partition of relation "hash_parted" -DROP TABLE not_a_part; --- check that, after being detached, attinhcount/coninhcount is dropped to 0 and --- attislocal/conislocal is set to true -ALTER TABLE list_parted2 DETACH PARTITION part_3_4; -SELECT attinhcount, attislocal FROM pg_attribute WHERE attrelid = 'part_3_4'::regclass AND attnum > 0; - attinhcount | attislocal --------------+------------ - 0 | t - 0 | t -(2 rows) - -SELECT coninhcount, conislocal FROM pg_constraint WHERE conrelid = 'part_3_4'::regclass AND conname = 'check_a'; - coninhcount | conislocal --------------+------------ - 0 | t -(1 row) - -DROP TABLE part_3_4; --- check that a detached partition is not dropped on dropping a partitioned table -CREATE TABLE range_parted2 ( - a int -) PARTITION BY RANGE(a); -CREATE TABLE part_rp PARTITION OF range_parted2 FOR VALUES FROM (0) to (100); -ALTER TABLE range_parted2 DETACH PARTITION part_rp; -DROP TABLE range_parted2; -SELECT * from part_rp; - a ---- -(0 rows) - -DROP TABLE part_rp; --- concurrent detach -CREATE TABLE range_parted2 ( - a int -) PARTITION BY RANGE(a); -CREATE TABLE part_rp PARTITION OF range_parted2 FOR VALUES FROM (0) to (100); -BEGIN; --- doesn't work in a partition block -ALTER TABLE range_parted2 DETACH PARTITION part_rp CONCURRENTLY; -ERROR: ALTER TABLE ... DETACH CONCURRENTLY cannot run inside a transaction block -COMMIT; -CREATE TABLE part_rpd PARTITION OF range_parted2 DEFAULT; --- doesn't work if there's a default partition -ALTER TABLE range_parted2 DETACH PARTITION part_rp CONCURRENTLY; -ERROR: cannot detach partitions concurrently when a default partition exists --- doesn't work for the default partition -ALTER TABLE range_parted2 DETACH PARTITION part_rpd CONCURRENTLY; -ERROR: cannot detach partitions concurrently when a default partition exists -DROP TABLE part_rpd; --- works fine -ALTER TABLE range_parted2 DETACH PARTITION part_rp CONCURRENTLY; -\d+ range_parted2 - Partitioned table "public.range_parted2" - Column | Type | Collation | Nullable | Default | Storage | Stats target | Description ---------+---------+-----------+----------+---------+---------+--------------+------------- - a | integer | | | | plain | | -Partition key: RANGE (a) -Number of partitions: 0 - --- constraint should be created -\d part_rp - Table "public.part_rp" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | -Check constraints: - "part_rp_a_check" CHECK (a IS NOT NULL AND a >= 0 AND a < 100) - -CREATE TABLE part_rp100 PARTITION OF range_parted2 (CHECK (a>=123 AND a<133 AND a IS NOT NULL)) FOR VALUES FROM (100) to (200); -ALTER TABLE range_parted2 DETACH PARTITION part_rp100 CONCURRENTLY; --- redundant constraint should not be created -\d part_rp100 - Table "public.part_rp100" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - a | integer | | | -Check constraints: - "part_rp100_a_check" CHECK (a >= 123 AND a < 133 AND a IS NOT NULL) - -DROP TABLE range_parted2; --- Check ALTER TABLE commands for partitioned tables and partitions --- cannot add/drop column to/from *only* the parent -ALTER TABLE ONLY list_parted2 ADD COLUMN c int; -ERROR: column must be added to child tables too -ALTER TABLE ONLY list_parted2 DROP COLUMN b; -ERROR: cannot drop column from only the partitioned table when partitions exist -HINT: Do not specify the ONLY keyword. --- cannot add a column to partition or drop an inherited one -ALTER TABLE part_2 ADD COLUMN c text; -ERROR: cannot add column to a partition -ALTER TABLE part_2 DROP COLUMN b; -ERROR: cannot drop inherited column "b" --- Nor rename, alter type -ALTER TABLE part_2 RENAME COLUMN b to c; -ERROR: cannot rename inherited column "b" -ALTER TABLE part_2 ALTER COLUMN b TYPE text; -ERROR: cannot alter inherited column "b" --- cannot add/drop NOT NULL or check constraints to *only* the parent, when --- partitions exist -ALTER TABLE ONLY list_parted2 ALTER b SET NOT NULL; -ERROR: constraint must be added to child tables too -DETAIL: Column "b" of relation "part_2" is not already NOT NULL. -HINT: Do not specify the ONLY keyword. -ALTER TABLE ONLY list_parted2 ADD CONSTRAINT check_b CHECK (b <> 'zz'); -ERROR: constraint must be added to child tables too -ALTER TABLE list_parted2 ALTER b SET NOT NULL; -ALTER TABLE ONLY list_parted2 ALTER b DROP NOT NULL; -ERROR: cannot remove constraint from only the partitioned table when partitions exist -HINT: Do not specify the ONLY keyword. -ALTER TABLE list_parted2 ADD CONSTRAINT check_b CHECK (b <> 'zz'); -ALTER TABLE ONLY list_parted2 DROP CONSTRAINT check_b; -ERROR: cannot remove constraint from only the partitioned table when partitions exist -HINT: Do not specify the ONLY keyword. --- It's alright though, if no partitions are yet created -CREATE TABLE parted_no_parts (a int) PARTITION BY LIST (a); -ALTER TABLE ONLY parted_no_parts ALTER a SET NOT NULL; -ALTER TABLE ONLY parted_no_parts ADD CONSTRAINT check_a CHECK (a > 0); -ALTER TABLE ONLY parted_no_parts ALTER a DROP NOT NULL; -ALTER TABLE ONLY parted_no_parts DROP CONSTRAINT check_a; -DROP TABLE parted_no_parts; --- cannot drop inherited NOT NULL or check constraints from partition -ALTER TABLE list_parted2 ALTER b SET NOT NULL, ADD CONSTRAINT check_a2 CHECK (a > 0); -ALTER TABLE part_2 ALTER b DROP NOT NULL; -ERROR: column "b" is marked NOT NULL in parent table -ALTER TABLE part_2 DROP CONSTRAINT check_a2; -ERROR: cannot drop inherited constraint "check_a2" of relation "part_2" --- Doesn't make sense to add NO INHERIT constraints on partitioned tables -ALTER TABLE list_parted2 add constraint check_b2 check (b <> 'zz') NO INHERIT; -ERROR: cannot add NO INHERIT constraint to partitioned table "list_parted2" --- check that a partition cannot participate in regular inheritance -CREATE TABLE inh_test () INHERITS (part_2); -ERROR: cannot inherit from partition "part_2" -CREATE TABLE inh_test (LIKE part_2); -ALTER TABLE inh_test INHERIT part_2; -ERROR: cannot inherit from a partition -ALTER TABLE part_2 INHERIT inh_test; -ERROR: cannot change inheritance of a partition --- cannot drop or alter type of partition key columns of lower level --- partitioned tables; for example, part_5, which is list_parted2's --- partition, is partitioned on b; -ALTER TABLE list_parted2 DROP COLUMN b; -ERROR: cannot drop column "b" because it is part of the partition key of relation "part_5" -ALTER TABLE list_parted2 ALTER COLUMN b TYPE text; -ERROR: cannot alter column "b" because it is part of the partition key of relation "part_5" --- dropping non-partition key columns should be allowed on the parent table. -ALTER TABLE list_parted DROP COLUMN b; -SELECT * FROM list_parted; - a ---- -(0 rows) - --- cleanup -DROP TABLE list_parted, list_parted2, range_parted; -DROP TABLE fail_def_part; -DROP TABLE hash_parted; --- more tests for certain multi-level partitioning scenarios -create table p (a int, b int) partition by range (a, b); -create table p1 (b int, a int not null) partition by range (b); -create table p11 (like p1); -alter table p11 drop a; -alter table p11 add a int; -alter table p11 drop a; -alter table p11 add a int not null; --- attnum for key attribute 'a' is different in p, p1, and p11 -select attrelid::regclass, attname, attnum -from pg_attribute -where attname = 'a' - and (attrelid = 'p'::regclass - or attrelid = 'p1'::regclass - or attrelid = 'p11'::regclass) -order by attrelid::regclass::text; - attrelid | attname | attnum -----------+---------+-------- - p | a | 1 - p1 | a | 2 - p11 | a | 4 -(3 rows) - -alter table p1 attach partition p11 for values from (2) to (5); -insert into p1 (a, b) values (2, 3); --- check that partition validation scan correctly detects violating rows -alter table p attach partition p1 for values from (1, 2) to (1, 10); -ERROR: partition constraint of relation "p11" is violated by some row --- cleanup -drop table p; -drop table p1; --- validate constraint on partitioned tables should only scan leaf partitions -create table parted_validate_test (a int) partition by list (a); -create table parted_validate_test_1 partition of parted_validate_test for values in (0, 1); -alter table parted_validate_test add constraint parted_validate_test_chka check (a > 0) not valid; -alter table parted_validate_test validate constraint parted_validate_test_chka; -drop table parted_validate_test; --- test alter column options -CREATE TABLE attmp(i integer); -INSERT INTO attmp VALUES (1); -ALTER TABLE attmp ALTER COLUMN i SET (n_distinct = 1, n_distinct_inherited = 2); -ALTER TABLE attmp ALTER COLUMN i RESET (n_distinct_inherited); -ANALYZE attmp; -DROP TABLE attmp; -DROP USER regress_alter_table_user1; --- check that violating rows are correctly reported when attaching as the --- default partition -create table defpart_attach_test (a int) partition by list (a); -create table defpart_attach_test1 partition of defpart_attach_test for values in (1); -create table defpart_attach_test_d (b int, a int); -alter table defpart_attach_test_d drop b; -insert into defpart_attach_test_d values (1), (2); --- error because its constraint as the default partition would be violated --- by the row containing 1 -alter table defpart_attach_test attach partition defpart_attach_test_d default; -ERROR: partition constraint of relation "defpart_attach_test_d" is violated by some row -delete from defpart_attach_test_d where a = 1; -alter table defpart_attach_test_d add check (a > 1); --- should be attached successfully and without needing to be scanned -alter table defpart_attach_test attach partition defpart_attach_test_d default; --- check that attaching a partition correctly reports any rows in the default --- partition that should not be there for the new partition to be attached --- successfully -create table defpart_attach_test_2 (like defpart_attach_test_d); -alter table defpart_attach_test attach partition defpart_attach_test_2 for values in (2); -ERROR: updated partition constraint for default partition "defpart_attach_test_d" would be violated by some row -drop table defpart_attach_test; --- check combinations of temporary and permanent relations when attaching --- partitions. -create table perm_part_parent (a int) partition by list (a); -create temp table temp_part_parent (a int) partition by list (a); -create table perm_part_child (a int); -create temp table temp_part_child (a int); -alter table temp_part_parent attach partition perm_part_child default; -- error -ERROR: cannot attach a permanent relation as partition of temporary relation "temp_part_parent" -alter table perm_part_parent attach partition temp_part_child default; -- error -ERROR: cannot attach a temporary relation as partition of permanent relation "perm_part_parent" -alter table temp_part_parent attach partition temp_part_child default; -- ok -drop table perm_part_parent cascade; -drop table temp_part_parent cascade; --- check that attaching partitions to a table while it is being used is --- prevented -create table tab_part_attach (a int) partition by list (a); -create or replace function func_part_attach() returns trigger - language plpgsql as $$ - begin - execute 'create table tab_part_attach_1 (a int)'; - execute 'alter table tab_part_attach attach partition tab_part_attach_1 for values in (1)'; - return null; - end $$; -create trigger trig_part_attach before insert on tab_part_attach - for each statement execute procedure func_part_attach(); -insert into tab_part_attach values (1); -ERROR: cannot ALTER TABLE "tab_part_attach" because it is being used by active queries in this session -CONTEXT: SQL statement "alter table tab_part_attach attach partition tab_part_attach_1 for values in (1)" -PL/pgSQL function func_part_attach() line 4 at EXECUTE -drop table tab_part_attach; -drop function func_part_attach(); --- test case where the partitioning operator is a SQL function whose --- evaluation results in the table's relcache being rebuilt partway through --- the execution of an ATTACH PARTITION command -create function at_test_sql_partop (int4, int4) returns int language sql -as $$ select case when $1 = $2 then 0 when $1 > $2 then 1 else -1 end; $$; -create operator class at_test_sql_partop for type int4 using btree as - operator 1 < (int4, int4), operator 2 <= (int4, int4), - operator 3 = (int4, int4), operator 4 >= (int4, int4), - operator 5 > (int4, int4), function 1 at_test_sql_partop(int4, int4); -create table at_test_sql_partop (a int) partition by range (a at_test_sql_partop); -create table at_test_sql_partop_1 (a int); -alter table at_test_sql_partop attach partition at_test_sql_partop_1 for values from (0) to (10); -drop table at_test_sql_partop; -drop operator class at_test_sql_partop using btree; -drop function at_test_sql_partop; -/* Test case for bug #16242 */ --- We create a parent and child where the child has missing --- non-null attribute values, and arrange to pass them through --- tuple conversion from the child to the parent tupdesc -create table bar1 (a integer, b integer not null default 1) - partition by range (a); -create table bar2 (a integer); -insert into bar2 values (1); -alter table bar2 add column b integer not null default 1; --- (at this point bar2 contains tuple with natts=1) -alter table bar1 attach partition bar2 default; --- this works: -select * from bar1; - a | b ----+--- - 1 | 1 -(1 row) - --- this exercises tuple conversion: -create function xtrig() - returns trigger language plpgsql -as $$ - declare - r record; - begin - for r in select * from old loop - raise info 'a=%, b=%', r.a, r.b; - end loop; - return NULL; - end; -$$; -create trigger xtrig - after update on bar1 - referencing old table as old - for each statement execute procedure xtrig(); -update bar1 set a = a + 1; -INFO: a=1, b=1 -/* End test case for bug #16242 */ -/* Test case for bug #17409 */ -create table attbl (p1 int constraint pk_attbl primary key); -create table atref (c1 int references attbl(p1)); -cluster attbl using pk_attbl; -ERROR: orioledb tables does not support CLUSTER -DETAIL: CLUSTER makes no much sense for index-organized tables. -alter table attbl alter column p1 set data type bigint; -alter table atref alter column c1 set data type bigint; -drop table attbl, atref; -create table attbl (p1 int constraint pk_attbl primary key); -alter table attbl replica identity using index pk_attbl; -ERROR: replica identity type INDEX is not supported for OrioleDB tables yet -create table atref (c1 int references attbl(p1)); -alter table attbl alter column p1 set data type bigint; -alter table atref alter column c1 set data type bigint; -drop table attbl, atref; -/* End test case for bug #17409 */ -/* Test case for bug #18970 */ -create table attbl(a int); -create table atref(b attbl check ((b).a is not null)); -alter table attbl alter column a type numeric; -- someday this should work -ERROR: cannot alter table "attbl" because column "atref.b" uses its row type -alter table atref drop constraint atref_b_check; -create statistics atref_stat on ((b).a is not null) from atref; -alter table attbl alter column a type numeric; -- someday this should work -ERROR: cannot alter table "attbl" because column "atref.b" uses its row type -drop statistics atref_stat; -create index atref_idx on atref (((b).a)); -alter table attbl alter column a type numeric; -- someday this should work -ERROR: cannot alter table "attbl" because column "atref.b" uses its row type -drop table attbl, atref; -/* End test case for bug #18970 */ --- Test that ALTER TABLE rewrite preserves a clustered index --- for normal indexes and indexes on constraints. -create table alttype_cluster (a int); -alter table alttype_cluster add primary key (a); -create index alttype_cluster_ind on alttype_cluster (a); -alter table alttype_cluster cluster on alttype_cluster_ind; --- Normal index remains clustered. -select indexrelid::regclass, indisclustered from pg_index - where indrelid = 'alttype_cluster'::regclass - order by indexrelid::regclass::text; - indexrelid | indisclustered -----------------------+---------------- - alttype_cluster_ind | t - alttype_cluster_pkey | f -(2 rows) - -alter table alttype_cluster alter a type bigint; -select indexrelid::regclass, indisclustered from pg_index - where indrelid = 'alttype_cluster'::regclass - order by indexrelid::regclass::text; - indexrelid | indisclustered -----------------------+---------------- - alttype_cluster_ind | t - alttype_cluster_pkey | f -(2 rows) - --- Constraint index remains clustered. -alter table alttype_cluster cluster on alttype_cluster_pkey; -select indexrelid::regclass, indisclustered from pg_index - where indrelid = 'alttype_cluster'::regclass - order by indexrelid::regclass::text; - indexrelid | indisclustered -----------------------+---------------- - alttype_cluster_ind | f - alttype_cluster_pkey | t -(2 rows) - -alter table alttype_cluster alter a type int; -select indexrelid::regclass, indisclustered from pg_index - where indrelid = 'alttype_cluster'::regclass - order by indexrelid::regclass::text; - indexrelid | indisclustered -----------------------+---------------- - alttype_cluster_ind | f - alttype_cluster_pkey | t -(2 rows) - -drop table alttype_cluster; --- --- Check that attaching or detaching a partitioned partition correctly leads --- to its partitions' constraint being updated to reflect the parent's --- newly added/removed constraint -create table target_parted (a int, b int) partition by list (a); -create table attach_parted (a int, b int) partition by list (b); -create table attach_parted_part1 partition of attach_parted for values in (1); --- insert a row directly into the leaf partition so that its partition --- constraint is built and stored in the relcache -insert into attach_parted_part1 values (1, 1); --- the following better invalidate the partition constraint of the leaf --- partition too... -alter table target_parted attach partition attach_parted for values in (1); --- ...such that the following insert fails -insert into attach_parted_part1 values (2, 1); -ERROR: new row for relation "attach_parted_part1" violates partition constraint -DETAIL: Failing row contains (2, 1). --- ...and doesn't when the partition is detached along with its own partition -alter table target_parted detach partition attach_parted; -insert into attach_parted_part1 values (2, 1); --- Test altering table having publication -create schema alter1; -create schema alter2; -create table alter1.t1 (a int); -set client_min_messages = 'ERROR'; -create publication pub1 for table alter1.t1, tables in schema alter2; -reset client_min_messages; -alter table alter1.t1 set schema alter2; -\d+ alter2.t1 - Table "alter2.t1" - Column | Type | Collation | Nullable | Default | Storage | Stats target | Description ---------+---------+-----------+----------+---------+---------+--------------+------------- - a | integer | | | | plain | | -Publications: - "pub1" - -drop publication pub1; -drop schema alter1 cascade; -drop schema alter2 cascade; -NOTICE: drop cascades to table alter2.t1 diff --git a/src/test/regress/expected/create_index_oriole.out b/src/test/regress/expected/create_index_oriole.out deleted file mode 100644 index f0f3d70a20f..00000000000 --- a/src/test/regress/expected/create_index_oriole.out +++ /dev/null @@ -1,6 +0,0 @@ --- This file is used instead of the original create_index.sql because --- it is not stable with OrioleDB. After create_index.sql stabilization this one --- must be replaced with original. Currently it contains only indexes for alter_table.sql --- script. -CREATE INDEX onek_unique1 ON onek USING btree(unique1 int4_ops); -CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops); diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole index e0333d6e7c6..6fc1b2c0a57 100644 --- a/src/test/regress/parallel_schedule_oriole +++ b/src/test/regress/parallel_schedule_oriole @@ -43,8 +43,7 @@ test: copy copyselect copydml # Note: many of the tests in later groups depend on create_index # ---------- test: create_function_c create_misc create_operator create_procedure create_table create_type create_schema -test: create_view -test: create_index_oriole +test: create_index create_view # ---------- # Another group of parallel tests diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c index b105fca8450..e3a0267d5e0 100644 --- a/src/test/regress/pg_regress.c +++ b/src/test/regress/pg_regress.c @@ -61,8 +61,8 @@ static char *shellprog = SHELLPROG; * Windows-style newlines, but the comparison files might or might not. */ #ifndef WIN32 -const char *basic_diff_opts = "-I \"NOTICE\" -I \"DETAIL\" -I \"WARNING\""; -const char *pretty_diff_opts = "-I \"NOTICE\" -I \"DETAIL\" -I \"WARNING\" -U3"; +const char *basic_diff_opts = ""; +const char *pretty_diff_opts = "-U3"; #else const char *basic_diff_opts = "-w"; const char *pretty_diff_opts = "-w -U3"; diff --git a/src/test/regress/sql/create_index_oriole.sql b/src/test/regress/sql/create_index_oriole.sql deleted file mode 100644 index f0f3d70a20f..00000000000 --- a/src/test/regress/sql/create_index_oriole.sql +++ /dev/null @@ -1,6 +0,0 @@ --- This file is used instead of the original create_index.sql because --- it is not stable with OrioleDB. After create_index.sql stabilization this one --- must be replaced with original. Currently it contains only indexes for alter_table.sql --- script. -CREATE INDEX onek_unique1 ON onek USING btree(unique1 int4_ops); -CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops); From 1cc192f9a1f6587141ae8e414723c17eac13d196 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 10 Mar 2026 00:04:42 +0200 Subject: [PATCH 093/102] Call xact_redo_hook on replaying transaction about Add extra argument indicating commit/abort. --- src/backend/access/transam/xact.c | 5 ++++- src/include/access/xact.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index dab73df4b2c..d5653cf0778 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -5967,7 +5967,7 @@ xact_redo_commit(xl_xact_parsed_commit *parsed, TimestampTz commit_time; if (xact_redo_hook) - xact_redo_hook(xid, lsn); + xact_redo_hook(xid, lsn, true); Assert(TransactionIdIsValid(xid)); @@ -6120,6 +6120,9 @@ xact_redo_abort(xl_xact_parsed_abort *parsed, TransactionId xid, { TransactionId max_xid; + if (xact_redo_hook) + xact_redo_hook(xid, lsn, false); + Assert(TransactionIdIsValid(xid)); /* Make sure nextXid is beyond any XID mentioned in the record. */ diff --git a/src/include/access/xact.h b/src/include/access/xact.h index e8200d55720..f5013e4a3ca 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -527,7 +527,7 @@ extern void EnterParallelMode(void); extern void ExitParallelMode(void); extern bool IsInParallelMode(void); -typedef void (*xact_redo_hook_type) (TransactionId xid, XLogRecPtr lsn); +typedef void (*xact_redo_hook_type) (TransactionId xid, XLogRecPtr lsn, bool commit); extern xact_redo_hook_type xact_redo_hook; #endif /* XACT_H */ From 3bb45d450149386b1ad0dee3fb4c0f094d748a6f Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Thu, 5 Mar 2026 18:17:07 +0400 Subject: [PATCH 094/102] Add database_size hook It's needed to count custom extension data for database that is stored in PG datadir or pg_tblspc dir but outside of dir. Size counted by a hook is added to database size counted by existing method --- src/backend/utils/adt/dbsize.c | 6 ++++++ src/include/access/tableam.h | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 5037bcb45ef..f9bebf3b355 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -71,6 +71,9 @@ static const struct size_bytes_unit_alias size_bytes_aliases[] = { {NULL} }; +/* Hook to calculate extension-custom database size */ +database_size_hook_type database_size_hook = NULL; + /* Return physical size of directory contents, or 0 if dir doesn't exist */ static int64 db_dir_size(const char *path) @@ -163,6 +166,9 @@ calculate_database_size(Oid dbOid) FreeDir(dirdesc); + if (database_size_hook != NULL) + totalsize += database_size_hook(dbOid); + return totalsize; } diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 165144eb995..ffddfa5d1be 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -47,6 +47,14 @@ typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows); +/* + * Hook for adding extension-specific part of database size. + * Technically not a part of table AM as database can have tables with different AM's. + * + * TODO: May consider moving to some other header file + */ +typedef int64 (*database_size_hook_type)(Oid dbOid); +extern database_size_hook_type database_size_hook; /* in commands/analyze.c */ extern int acquire_sample_rows(Relation onerel, int elevel, From a464762b787265ce61e925468efefbda16ec3e5d Mon Sep 17 00:00:00 2001 From: Trunov Vladimir Date: Thu, 5 Mar 2026 17:52:40 +0300 Subject: [PATCH 095/102] Enable more regress tests for Oriole (2) --- src/test/regress/parallel_schedule_oriole | 26 +++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole index 6fc1b2c0a57..d2ed5cd9852 100644 --- a/src/test/regress/parallel_schedule_oriole +++ b/src/test/regress/parallel_schedule_oriole @@ -21,14 +21,14 @@ test: boolean char name varchar text int2 int4 int8 oid float4 float8 bit numeri # multirangetypes depends on rangetypes # multirangetypes shouldn't run concurrently with type_sanity # ---------- -test: md5 numerology point lseg line box path circle date time timetz timestamp timestamptz interval inet macaddr macaddr8 multirangetypes +test: md5 numerology point lseg line box path polygon circle date time timetz timestamp timestamptz interval inet macaddr macaddr8 multirangetypes # ---------- # Another group of parallel tests # geometry depends on point, lseg, line, box, path, polygon, circle # horology depends on date, time, timetz, timestamp, timestamptz, interval # ---------- -test: horology tstypes regex type_sanity misc_sanity comments expressions unicode xid mvcc database +test: geometry horology tstypes regex type_sanity misc_sanity comments expressions unicode xid mvcc database # ---------- # Load huge amounts of data @@ -36,19 +36,19 @@ test: horology tstypes regex type_sanity misc_sanity comments expressions unicod # execute two copy tests in parallel, to check that copy itself # is concurrent safe. # ---------- -test: copy copyselect copydml +test: copy copyselect copydml insert insert_conflict # ---------- # More groups of parallel tests # Note: many of the tests in later groups depend on create_index # ---------- test: create_function_c create_misc create_operator create_procedure create_table create_type create_schema -test: create_index create_view +test: create_index create_index_spgist create_view # ---------- # Another group of parallel tests # ---------- -test: create_aggregate create_function_sql create_cast typed_table drop_if_exists roleattributes hash_func errors infinite_recurse +test: create_aggregate create_function_sql create_cast constraints select typed_table drop_if_exists roleattributes create_am hash_func errors infinite_recurse # ---------- # sanity_check does a vacuum, affecting the sort order of SELECT * @@ -61,12 +61,12 @@ test: sanity_check # aggregates depends on create_aggregate # join depends on create_misc # ---------- -test: select_into select_implicit select_having random hash_index delete +test: select_into select_distinct select_implicit select_having subselect case random arrays btree_index hash_index delete # ---------- # Another group of parallel tests # ---------- -test: gist init_privs security_label collate lock object_address drop_operator password +test: gin gist spgist init_privs security_label collate lock object_address groupingsets drop_operator password # ---------- # Additional BRIN tests @@ -77,10 +77,10 @@ test: gist init_privs security_label collate lock object_address drop_operator p # psql depends on create_am # amutils depends on geometry, create_index_spgist, hash_index, brin # ---------- -test: alter_generic alter_operator misc async dbsize sysviews tsrf create_role +test: create_table_like alter_generic alter_operator misc async dbsize merge sysviews tsrf collate.utf8 create_role # collate.linux.utf8 and collate.icu.utf8 tests cannot be run in parallel with each other -test: psql_crosstab collate.linux.utf8 collate.windows.win1252 +test: rules psql psql_crosstab collate.linux.utf8 collate.windows.win1252 # ---------- # Run these alone so they don't run out of parallel workers @@ -98,12 +98,12 @@ test: subscription # Another group of parallel tests # select_views depends on create_view # ---------- -test: select_views portals_p2 dependency guc bitmapops tsdicts foreign_data xmlmap functional_deps advisory_lock +test: select_views portals_p2 dependency guc bitmapops tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock equivclass # ---------- # Another group of parallel tests (JSON related) # ---------- -test: json json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson +test: json jsonb json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson # ---------- # Another group of parallel tests @@ -111,7 +111,7 @@ test: json json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson # NB: temp.sql does a reconnect which transiently uses 2 connections, # so keep this parallel group to at most 19 tests # ---------- -test: plancache rangefuncs prepare truncate alter_table sequence polymorphism largeobject xml +test: plancache copy2 domain rangefuncs prepare conversion truncate alter_table sequence polymorphism rowtypes returning largeobject with xml # ---------- # Another group of parallel tests @@ -119,7 +119,7 @@ test: plancache rangefuncs prepare truncate alter_table sequence polymorphism la # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: hash_part partition_aggregate partition_info +test: partition_prune hash_part partition_aggregate partition_info explain memoize # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL From b716d7355fd1a575e36dcda9e65df9549aafe3d2 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 22 Mar 2026 00:49:15 +0200 Subject: [PATCH 096/102] Introduce AcceptInvalidationMessagesHook() Using this hook, extension can delay the invalidation messages handling. --- src/backend/utils/cache/inval.c | 5 +++++ src/include/utils/inval.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index b09c2f7cfd4..ba3dd2c4b76 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -282,6 +282,8 @@ static struct USERCACHECALLBACK static int usercache_callback_count = 0; +AcceptInvalidationMessagesHookType AcceptInvalidationMessagesHook = NULL; + /* ---------------------------------------------------------------- * Invalidation subgroup support functions * ---------------------------------------------------------------- @@ -778,6 +780,9 @@ InvalidateSystemCachesExtended(bool debug_discard) void AcceptInvalidationMessages(void) { + if (AcceptInvalidationMessagesHook) + AcceptInvalidationMessagesHook(); + ReceiveSharedInvalidMessages(LocalExecuteInvalidationMessage, InvalidateSystemCaches); diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 1461271bbe6..bea4963cb93 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -27,6 +27,9 @@ typedef void (*UsercacheCallbackFunction) (Datum arg, Oid arg1, Oid arg2, Oid ar extern void AcceptInvalidationMessages(void); +typedef void (*AcceptInvalidationMessagesHookType) (void); +extern AcceptInvalidationMessagesHookType AcceptInvalidationMessagesHook; + extern void AtEOXact_Inval(bool isCommit); extern void AtEOSubXact_Inval(bool isCommit); From 4b1fb9bd6667b1b18f1a06d92d688466c4f481f2 Mon Sep 17 00:00:00 2001 From: Trunov Vladimir Date: Tue, 24 Mar 2026 14:45:44 +0300 Subject: [PATCH 097/102] Add more regress tests Enable generated, foreign_key, update postgresql regress tests --- src/test/regress/parallel_schedule_oriole | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole index d2ed5cd9852..f97096da905 100644 --- a/src/test/regress/parallel_schedule_oriole +++ b/src/test/regress/parallel_schedule_oriole @@ -61,12 +61,12 @@ test: sanity_check # aggregates depends on create_aggregate # join depends on create_misc # ---------- -test: select_into select_distinct select_implicit select_having subselect case random arrays btree_index hash_index delete +test: select_into select_distinct select_implicit select_having subselect case random arrays btree_index hash_index update delete # ---------- # Another group of parallel tests # ---------- -test: gin gist spgist init_privs security_label collate lock object_address groupingsets drop_operator password +test: gin gist spgist init_privs security_label collate lock object_address groupingsets drop_operator password generated # ---------- # Additional BRIN tests @@ -98,7 +98,7 @@ test: subscription # Another group of parallel tests # select_views depends on create_view # ---------- -test: select_views portals_p2 dependency guc bitmapops tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock equivclass +test: select_views portals_p2 foreign_key dependency guc bitmapops tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock equivclass # ---------- # Another group of parallel tests (JSON related) From d7c2b9190fbda486f98f74c27967be848cc33601 Mon Sep 17 00:00:00 2001 From: Trunov Vladimir Date: Tue, 31 Mar 2026 15:27:26 +0300 Subject: [PATCH 098/102] Added more regress tests fast_default, incremental_sort, aggregates, updatable_views, inherit, sort, misc_functions, vacuum_parallel --- src/test/regress/parallel_schedule_oriole | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole index f97096da905..f7f449b5be8 100644 --- a/src/test/regress/parallel_schedule_oriole +++ b/src/test/regress/parallel_schedule_oriole @@ -21,7 +21,7 @@ test: boolean char name varchar text int2 int4 int8 oid float4 float8 bit numeri # multirangetypes depends on rangetypes # multirangetypes shouldn't run concurrently with type_sanity # ---------- -test: md5 numerology point lseg line box path polygon circle date time timetz timestamp timestamptz interval inet macaddr macaddr8 multirangetypes +test: strings md5 numerology point lseg line box path polygon circle date time timetz timestamp timestamptz interval inet macaddr macaddr8 multirangetypes # ---------- # Another group of parallel tests @@ -48,8 +48,7 @@ test: create_index create_index_spgist create_view # ---------- # Another group of parallel tests # ---------- -test: create_aggregate create_function_sql create_cast constraints select typed_table drop_if_exists roleattributes create_am hash_func errors infinite_recurse - +test: create_aggregate create_function_sql create_cast constraints select inherit typed_table drop_if_exists updatable_views roleattributes create_am hash_func errors infinite_recurse # ---------- # sanity_check does a vacuum, affecting the sort order of SELECT * # results. So it should not run parallel to other tests. @@ -61,8 +60,7 @@ test: sanity_check # aggregates depends on create_aggregate # join depends on create_misc # ---------- -test: select_into select_distinct select_implicit select_having subselect case random arrays btree_index hash_index update delete - +test: select_into select_distinct select_implicit select_having subselect case aggregates random arrays btree_index hash_index update delete # ---------- # Another group of parallel tests # ---------- @@ -77,7 +75,7 @@ test: gin gist spgist init_privs security_label collate lock object_address grou # psql depends on create_am # amutils depends on geometry, create_index_spgist, hash_index, brin # ---------- -test: create_table_like alter_generic alter_operator misc async dbsize merge sysviews tsrf collate.utf8 create_role +test: create_table_like alter_generic alter_operator misc async dbsize merge misc_functions sysviews tsrf collate.utf8 incremental_sort create_role # collate.linux.utf8 and collate.icu.utf8 tests cannot be run in parallel with each other test: rules psql psql_crosstab collate.linux.utf8 collate.windows.win1252 @@ -86,6 +84,7 @@ test: rules psql psql_crosstab collate.linux.utf8 collate.windows.win1252 # Run these alone so they don't run out of parallel workers # select_parallel depends on create_misc # ---------- +test: vacuum_parallel # Run this alone, because concurrent DROP TABLE would make non-superuser # "ANALYZE;" fail with "relation with OID $n does not exist". @@ -127,6 +126,7 @@ test: partition_prune hash_part partition_aggregate partition_info explain memoi test: oidjoins # this test also uses event triggers, so likewise run it by itself +test: fast_default # run tablespace test at the end because it drops the tablespace created during # setup that other tests may use. From ed35d04b79eda548e3048655cf15a57451167cbb Mon Sep 17 00:00:00 2001 From: Ilya Kobets Date: Tue, 7 Apr 2026 00:54:54 +0200 Subject: [PATCH 099/102] enabled priviliges test --- src/test/regress/parallel_schedule_oriole | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole index f7f449b5be8..a790002b452 100644 --- a/src/test/regress/parallel_schedule_oriole +++ b/src/test/regress/parallel_schedule_oriole @@ -64,7 +64,7 @@ test: select_into select_distinct select_implicit select_having subselect case a # ---------- # Another group of parallel tests # ---------- -test: gin gist spgist init_privs security_label collate lock object_address groupingsets drop_operator password generated +test: gin gist spgist privileges init_privs security_label collate lock object_address groupingsets drop_operator password generated # ---------- # Additional BRIN tests From a072f0b83b33bf9e2969b18ab8126fa76ee9633d Mon Sep 17 00:00:00 2001 From: GremSnoort Date: Wed, 18 Mar 2026 17:03:04 +0300 Subject: [PATCH 100/102] Support replication origin advance for xid-less commits Add a transaction-level hook that lets an extension provide a durable local commit LSN for a top-level transaction that reaches commit without a heap XID. Use that LSN in RecordTransactionCommit() to advance replication origin from the no-XID path, and propagate it through the rest of the commit tail for flush, async commit tracking, synchronous replication wait, and XactLastCommitEnd. This keeps commit/origin policy in core PostgreSQL while allowing storage engines with xid-less commit paths to participate through a narrow hook. Co-authored-by: Pavel Borisov --- src/backend/access/transam/xact.c | 20 ++++++++++++++++++++ src/include/access/xact.h | 13 +++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index d5653cf0778..2d71fcd2f01 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -323,6 +323,7 @@ typedef struct SubXactCallbackItem static SubXactCallbackItem *SubXact_callbacks = NULL; xact_redo_hook_type xact_redo_hook = NULL; +get_xidless_commit_lsn_hook_type get_xidless_commit_lsn_hook = NULL; /* local function prototypes */ static void AssignTransactionId(TransactionState s); @@ -1324,6 +1325,8 @@ RecordTransactionCommit(void) */ if (!markXidCommitted) { + bool replorigin; + /* * We expect that every RelationDropStorage is followed by a catalog * update, and hence XID assignment, so we shouldn't get here with any @@ -1356,6 +1359,23 @@ RecordTransactionCommit(void) wrote_xlog = true; /* not strictly necessary */ } + replorigin = (replorigin_session_origin != InvalidRepOriginId && + replorigin_session_origin != DoNotReplicateId); + + /* + * Some storage engines can reach commit without a top-level heap XID + * while still having a durable local commit anchor in PG WAL. Allow + * such engines to surface that LSN here so that replication-origin + * state can still be advanced for xid-less commits. + */ + if (replorigin && get_xidless_commit_lsn_hook) + { + XLogRecPtr local_commit = get_xidless_commit_lsn_hook(&wrote_xlog); + + if (XLogRecPtrIsValid(local_commit)) + replorigin_session_advance(replorigin_session_origin_lsn, Max(local_commit, XactLastRecEnd)); + } + /* * If we didn't create XLOG entries, we're done here; otherwise we * should trigger flushing those entries the same as a commit record diff --git a/src/include/access/xact.h b/src/include/access/xact.h index f5013e4a3ca..6533a64ca7d 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -148,6 +148,19 @@ typedef enum typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid, SubTransactionId parentSubid, void *arg); +/* + * Hook for extensions that can produce a durable local commit LSN for a + * top-level transaction without a heap XID. + * + * The hook is consulted from the xid-less path of RecordTransactionCommit(). + * It should return a valid WAL LSN representing the extension's durable + * local commit anchor for the current transaction, or InvalidXLogRecPtr if + * no such anchor exists. + */ +typedef XLogRecPtr (*get_xidless_commit_lsn_hook_type) (bool *); +extern PGDLLIMPORT get_xidless_commit_lsn_hook_type + get_xidless_commit_lsn_hook; + /* Data structure for Save/RestoreTransactionCharacteristics */ typedef struct SavedTransactionCharacteristics { From dcc116eab56bd3e6267e34615e8ee4e4eb8f58be Mon Sep 17 00:00:00 2001 From: Trunov Vladimir Date: Tue, 14 Apr 2026 22:02:38 +0300 Subject: [PATCH 101/102] Added more regress tests Enables brin_bloom, brin_multi, reloptions, opr_sanity, union, join_hash, collate.icu.utf8, limit, stats_ext, identity. Added patch for regress/limit test that is used in orioledb/orioledb CI pipeline. --- patches/limit.patch | 49 +++++++++++++++++++++++ src/test/regress/parallel_schedule_oriole | 16 ++++---- 2 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 patches/limit.patch diff --git a/patches/limit.patch b/patches/limit.patch new file mode 100644 index 00000000000..fb27f905021 --- /dev/null +++ b/patches/limit.patch @@ -0,0 +1,49 @@ +diff --git a/src/test/regress/sql/limit.sql b/src/test/regress/sql/limit.sql +index 603910fe6d..0cfef4c77b 100644 +--- a/src/test/regress/sql/limit.sql ++++ b/src/test/regress/sql/limit.sql +@@ -39,7 +39,7 @@ select * from int8_tbl offset (case when random() < 0.5 then null::bigint end); + -- Test assorted cases involving backwards fetch from a LIMIT plan node + begin; + +-declare c1 cursor for select * from int8_tbl limit 10; ++declare c1 scroll cursor for select * from int8_tbl limit 10; + fetch all in c1; + fetch 1 in c1; + fetch backward 1 in c1; +@@ -47,7 +47,7 @@ fetch backward all in c1; + fetch backward 1 in c1; + fetch all in c1; + +-declare c2 cursor for select * from int8_tbl limit 3; ++declare c2 scroll cursor for select * from int8_tbl limit 3; + fetch all in c2; + fetch 1 in c2; + fetch backward 1 in c2; +@@ -55,7 +55,7 @@ fetch backward all in c2; + fetch backward 1 in c2; + fetch all in c2; + +-declare c3 cursor for select * from int8_tbl offset 3; ++declare c3 scroll cursor for select * from int8_tbl offset 3; + fetch all in c3; + fetch 1 in c3; + fetch backward 1 in c3; +@@ -63,7 +63,7 @@ fetch backward all in c3; + fetch backward 1 in c3; + fetch all in c3; + +-declare c4 cursor for select * from int8_tbl offset 10; ++declare c4 scroll cursor for select * from int8_tbl offset 10; + fetch all in c4; + fetch 1 in c4; + fetch backward 1 in c4; +@@ -71,7 +71,7 @@ fetch backward all in c4; + fetch backward 1 in c4; + fetch all in c4; + +-declare c5 cursor for select * from int8_tbl order by q1 fetch first 2 rows with ties; ++declare c5 scroll cursor for select * from int8_tbl order by q1 fetch first 2 rows with ties; + fetch all in c5; + fetch 1 in c5; + fetch backward 1 in c5; diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole index a790002b452..727a0a4bc3c 100644 --- a/src/test/regress/parallel_schedule_oriole +++ b/src/test/regress/parallel_schedule_oriole @@ -28,7 +28,7 @@ test: strings md5 numerology point lseg line box path polygon circle date time t # geometry depends on point, lseg, line, box, path, polygon, circle # horology depends on date, time, timetz, timestamp, timestamptz, interval # ---------- -test: geometry horology tstypes regex type_sanity misc_sanity comments expressions unicode xid mvcc database +test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database # ---------- # Load huge amounts of data @@ -60,25 +60,27 @@ test: sanity_check # aggregates depends on create_aggregate # join depends on create_misc # ---------- -test: select_into select_distinct select_implicit select_having subselect case aggregates random arrays btree_index hash_index update delete +test: select_into select_distinct select_implicit select_having subselect union case aggregates random arrays btree_index hash_index update delete + # ---------- # Another group of parallel tests # ---------- -test: gin gist spgist privileges init_privs security_label collate lock object_address groupingsets drop_operator password generated +test: gin gist spgist privileges init_privs security_label collate lock object_address groupingsets drop_operator password identity generated join_hash # ---------- # Additional BRIN tests # ---------- +test: brin_bloom brin_multi # ---------- # Another group of parallel tests # psql depends on create_am # amutils depends on geometry, create_index_spgist, hash_index, brin # ---------- -test: create_table_like alter_generic alter_operator misc async dbsize merge misc_functions sysviews tsrf collate.utf8 incremental_sort create_role +test: create_table_like alter_generic alter_operator misc async dbsize merge misc_functions sysviews tsrf collate.utf8 collate.icu.utf8 incremental_sort create_role # collate.linux.utf8 and collate.icu.utf8 tests cannot be run in parallel with each other -test: rules psql psql_crosstab collate.linux.utf8 collate.windows.win1252 +test: rules psql psql_crosstab stats_ext collate.linux.utf8 collate.windows.win1252 # ---------- # Run these alone so they don't run out of parallel workers @@ -110,7 +112,7 @@ test: json jsonb json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson # NB: temp.sql does a reconnect which transiently uses 2 connections, # so keep this parallel group to at most 19 tests # ---------- -test: plancache copy2 domain rangefuncs prepare conversion truncate alter_table sequence polymorphism rowtypes returning largeobject with xml +test: plancache limit copy2 domain rangefuncs prepare conversion truncate alter_table sequence polymorphism rowtypes returning largeobject with xml # ---------- # Another group of parallel tests @@ -118,7 +120,7 @@ test: plancache copy2 domain rangefuncs prepare conversion truncate alter_table # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_prune hash_part partition_aggregate partition_info explain memoize +test: partition_prune reloptions hash_part partition_aggregate partition_info explain memoize # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL From e8905f32f4fd4cc4c232ae43542c43065a9789c6 Mon Sep 17 00:00:00 2001 From: Trunov Vladimir Date: Mon, 20 Apr 2026 16:54:09 +0300 Subject: [PATCH 102/102] Disable collate.utf8 test for pg16 --- src/test/regress/parallel_schedule_oriole | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/regress/parallel_schedule_oriole b/src/test/regress/parallel_schedule_oriole index 727a0a4bc3c..3feeafbf883 100644 --- a/src/test/regress/parallel_schedule_oriole +++ b/src/test/regress/parallel_schedule_oriole @@ -77,7 +77,7 @@ test: brin_bloom brin_multi # psql depends on create_am # amutils depends on geometry, create_index_spgist, hash_index, brin # ---------- -test: create_table_like alter_generic alter_operator misc async dbsize merge misc_functions sysviews tsrf collate.utf8 collate.icu.utf8 incremental_sort create_role +test: create_table_like alter_generic alter_operator misc async dbsize merge misc_functions sysviews tsrf collate.icu.utf8 incremental_sort create_role # collate.linux.utf8 and collate.icu.utf8 tests cannot be run in parallel with each other test: rules psql psql_crosstab stats_ext collate.linux.utf8 collate.windows.win1252