diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index c45a48812b..d4478a2cba 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -1556,3 +1556,13 @@ minimal_tuple_from_heap_tuple(HeapTuple htup) result->t_len = len; return result; } + +/* + * This mainly exists so JIT can inline the definition, but it's also + * sometimes useful in debugging sessions. + */ +size_t +varsize_any(void *p) +{ + return VARSIZE_ANY(p); +} diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 13bf891cea..e284fd71d7 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -2287,18 +2287,21 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info) { scratch.opcode = EEOP_INNER_FETCHSOME; scratch.d.fetch.last_var = info->last_inner; + scratch.d.fetch.known_desc = NULL; ExprEvalPushStep(state, &scratch); } if (info->last_outer > 0) { scratch.opcode = EEOP_OUTER_FETCHSOME; scratch.d.fetch.last_var = info->last_outer; + scratch.d.fetch.known_desc = NULL; ExprEvalPushStep(state, &scratch); } if (info->last_scan > 0) { scratch.opcode = EEOP_SCAN_FETCHSOME; scratch.d.fetch.last_var = info->last_scan; + scratch.d.fetch.known_desc = NULL; ExprEvalPushStep(state, &scratch); } } @@ -3250,10 +3253,12 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, /* push deform steps */ scratch.opcode = EEOP_INNER_FETCHSOME; scratch.d.fetch.last_var = maxatt; + scratch.d.fetch.known_desc = ldesc; ExprEvalPushStep(state, &scratch); scratch.opcode = EEOP_OUTER_FETCHSOME; scratch.d.fetch.last_var = maxatt; + scratch.d.fetch.known_desc = rdesc; ExprEvalPushStep(state, &scratch); /* diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index c46d65cf93..acd1b97b0e 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -896,6 +896,7 @@ ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc) { scanstate->ss_ScanTupleSlot = ExecAllocTableSlot(&estate->es_tupleTable, tupledesc); + scanstate->ps.scandesc = tupledesc; } /* ---------------- diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c index 0084234b35..a2a28b7ec2 100644 --- a/src/backend/executor/nodeForeignscan.c +++ b/src/backend/executor/nodeForeignscan.c @@ -186,7 +186,11 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags) } else { - ExecInitScanTupleSlot(estate, &scanstate->ss, RelationGetDescr(currentRelation)); + TupleDesc scan_tupdesc; + + /* don't trust FDWs to return tuples fulfilling NOT NULL constraints */ + scan_tupdesc = CreateTupleDescCopy(RelationGetDescr(currentRelation)); + ExecInitScanTupleSlot(estate, &scanstate->ss, scan_tupdesc); /* Node's targetlist will contain Vars with varno = scanrelid */ tlistvarno = scanrelid; } diff --git a/src/backend/jit/jit.c b/src/backend/jit/jit.c index 971df4f8a5..67a015fb35 100644 --- a/src/backend/jit/jit.c +++ b/src/backend/jit/jit.c @@ -38,6 +38,7 @@ bool jit_debugging_support = false; bool jit_dump_bitcode = false; bool jit_expressions = true; bool jit_profiling_support = false; +bool jit_tuple_deforming = true; double jit_above_cost = 100000; double jit_optimize_above_cost = 500000; diff --git a/src/backend/jit/llvm/Makefile b/src/backend/jit/llvm/Makefile index 79097662d5..d6a1f5f02d 100644 --- a/src/backend/jit/llvm/Makefile +++ b/src/backend/jit/llvm/Makefile @@ -39,7 +39,7 @@ OBJS=$(WIN32RES) # Infrastructure OBJS += llvmjit.o llvmjit_error.o llvmjit_wrap.o # Code generation -OBJS += llvmjit_expr.o +OBJS += llvmjit_expr.o llvmjit_deform.o all: all-shared-lib llvmjit_types.bc diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c index cd3c40c5f1..d73237d002 100644 --- a/src/backend/jit/llvm/llvmjit.c +++ b/src/backend/jit/llvm/llvmjit.c @@ -74,6 +74,7 @@ LLVMTypeRef StructAggStatePerTransData; LLVMValueRef AttributeTemplate; LLVMValueRef FuncStrlen; +LLVMValueRef FuncVarsizeAny; LLVMValueRef FuncSlotGetsomeattrs; LLVMValueRef FuncHeapGetsysattr; LLVMValueRef FuncMakeExpandedObjectReadOnlyInternal; @@ -784,6 +785,7 @@ llvm_create_types(void) AttributeTemplate = LLVMGetNamedFunction(mod, "AttributeTemplate"); FuncStrlen = LLVMGetNamedFunction(mod, "strlen"); + FuncVarsizeAny = LLVMGetNamedFunction(mod, "varsize_any"); FuncSlotGetsomeattrs = LLVMGetNamedFunction(mod, "slot_getsomeattrs"); FuncHeapGetsysattr = LLVMGetNamedFunction(mod, "heap_getsysattr"); FuncMakeExpandedObjectReadOnlyInternal = LLVMGetNamedFunction(mod, "MakeExpandedObjectReadOnlyInternal"); diff --git a/src/backend/jit/llvm/llvmjit_deform.c b/src/backend/jit/llvm/llvmjit_deform.c new file mode 100644 index 0000000000..0762ab6786 --- /dev/null +++ b/src/backend/jit/llvm/llvmjit_deform.c @@ -0,0 +1,729 @@ +/*------------------------------------------------------------------------- + * + * llvmjit_deform.c + * Generate code for deforming a heap tuple. + * + * This gains performance benefits over unJITed deforming from compile-time + * knowledge of the tuple descriptor. Fixed column widths, NOT NULLness, etc + * can be taken advantage of. + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/jit/llvm/llvmjit_deform.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/htup_details.h" +#include "executor/tuptable.h" +#include "jit/llvmjit.h" +#include "jit/llvmjit_emit.h" + + +static LLVMValueRef get_memset(LLVMModuleRef mod); + + +/* + * Create a function that deforms a tuple of type desc up to natts columns. + */ +LLVMValueRef +slot_compile_deform(LLVMJitContext *context, TupleDesc desc, int natts) +{ + char *funcname; + + LLVMModuleRef mod; + LLVMBuilderRef b; + + LLVMTypeRef deform_sig; + LLVMValueRef v_deform_fn; + + LLVMBasicBlockRef b_entry; + LLVMBasicBlockRef b_adjust_unavail_cols; + LLVMBasicBlockRef b_find_start; + + LLVMBasicBlockRef b_out; + LLVMBasicBlockRef b_dead; + LLVMBasicBlockRef *attcheckattnoblocks; + LLVMBasicBlockRef *attstartblocks; + LLVMBasicBlockRef *attisnullblocks; + LLVMBasicBlockRef *attcheckalignblocks; + LLVMBasicBlockRef *attalignblocks; + LLVMBasicBlockRef *attstoreblocks; + + LLVMValueRef v_offp; + + LLVMValueRef v_tupdata_base; + LLVMValueRef v_tts_values; + LLVMValueRef v_tts_nulls; + LLVMValueRef v_slotoffp; + LLVMValueRef v_slowp; + LLVMValueRef v_nvalidp; + LLVMValueRef v_nvalid; + LLVMValueRef v_maxatt; + + LLVMValueRef v_slot; + + LLVMValueRef v_tupleheaderp; + LLVMValueRef v_tuplep; + LLVMValueRef v_infomask1; + LLVMValueRef v_infomask2; + LLVMValueRef v_bits; + + LLVMValueRef v_hoff; + + LLVMValueRef v_hasnulls; + + /* last column (0 indexed) guaranteed to exist */ + int guaranteed_column_number = -1; + + /* current known alignment */ + int known_alignment = 0; + + /* if true, known_alignment describes definite offset of column */ + bool attguaranteedalign = true; + + int attnum; + + mod = llvm_mutable_module(context); + + funcname = llvm_expand_funcname(context, "deform"); + + /* + * Check which columns do have to exist, so we don't have to check the + * rows natts unnecessarily. + */ + for (attnum = 0; attnum < desc->natts; attnum++) + { + if (TupleDescAttr(desc, attnum)->attnotnull) + { + guaranteed_column_number = attnum; + } + } + + /* Create the signature and function */ + { + LLVMTypeRef param_types[1]; + + param_types[0] = l_ptr(StructTupleTableSlot); + + deform_sig = LLVMFunctionType(LLVMVoidType(), param_types, + lengthof(param_types), 0); + } + v_deform_fn = LLVMAddFunction(mod, funcname, deform_sig); + LLVMSetLinkage(v_deform_fn, LLVMInternalLinkage); + LLVMSetParamAlignment(LLVMGetParam(v_deform_fn, 0), MAXIMUM_ALIGNOF); + llvm_copy_attributes(AttributeTemplate, v_deform_fn); + + b_entry = + LLVMAppendBasicBlock(v_deform_fn, "entry"); + b_adjust_unavail_cols = + LLVMAppendBasicBlock(v_deform_fn, "adjust_unavail_cols"); + b_find_start = + LLVMAppendBasicBlock(v_deform_fn, "find_startblock"); + b_out = + LLVMAppendBasicBlock(v_deform_fn, "outblock"); + b_dead = + LLVMAppendBasicBlock(v_deform_fn, "deadblock"); + + b = LLVMCreateBuilder(); + + attcheckattnoblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + attstartblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + attisnullblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + attcheckalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + attalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + attstoreblocks = palloc(sizeof(LLVMBasicBlockRef) * natts); + + known_alignment = 0; + + LLVMPositionBuilderAtEnd(b, b_entry); + + /* perform allocas first, llvm only converts those to registers */ + v_offp = LLVMBuildAlloca(b, TypeSizeT, "v_offp"); + + v_slot = LLVMGetParam(v_deform_fn, 0); + + v_tts_values = + l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_VALUES, + "tts_values"); + v_tts_nulls = + l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_ISNULL, + "tts_ISNULL"); + + v_slotoffp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_OFF, ""); + v_slowp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_SLOW, ""); + v_nvalidp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_NVALID, ""); + + v_tupleheaderp = + l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_TUPLE, + "tupleheader"); + v_tuplep = + l_load_struct_gep(b, v_tupleheaderp, FIELDNO_HEAPTUPLEDATA_DATA, + "tuple"); + v_bits = + LLVMBuildBitCast(b, + LLVMBuildStructGEP(b, v_tuplep, + FIELDNO_HEAPTUPLEHEADERDATA_BITS, + ""), + l_ptr(LLVMInt8Type()), + "t_bits"); + v_infomask1 = + l_load_struct_gep(b, v_tuplep, + FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK, + "infomask1"); + v_infomask2 = + l_load_struct_gep(b, + v_tuplep, FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK2, + "infomask2"); + + /* t_infomask & HEAP_HASNULL */ + v_hasnulls = + LLVMBuildICmp(b, LLVMIntNE, + LLVMBuildAnd(b, + l_int16_const(HEAP_HASNULL), + v_infomask1, ""), + l_int16_const(0), + "hasnulls"); + + /* t_infomask2 & HEAP_NATTS_MASK */ + v_maxatt = LLVMBuildAnd(b, + l_int16_const(HEAP_NATTS_MASK), + v_infomask2, + "maxatt"); + + v_hoff = + l_load_struct_gep(b, v_tuplep, + FIELDNO_HEAPTUPLEHEADERDATA_HOFF, + "t_hoff"); + + v_tupdata_base = + LLVMBuildGEP(b, + LLVMBuildBitCast(b, + v_tuplep, + l_ptr(LLVMInt8Type()), + ""), + &v_hoff, 1, + "v_tupdata_base"); + + /* + * Load tuple start offset from slot. Will be reset below in case there's + * no existing deformed columns in slot. + */ + { + LLVMValueRef v_off_start; + + v_off_start = LLVMBuildLoad(b, v_slotoffp, "v_slot_off"); + v_off_start = LLVMBuildZExt(b, v_off_start, TypeSizeT, ""); + LLVMBuildStore(b, v_off_start, v_offp); + } + + /* build the basic block for each attribute, need them as jump target */ + for (attnum = 0; attnum < natts; attnum++) + { + attcheckattnoblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckattno", attnum); + attstartblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.start", attnum); + attisnullblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.attisnull", attnum); + attcheckalignblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckalign", attnum); + attalignblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.align", attnum); + attstoreblocks[attnum] = + l_bb_append_v(v_deform_fn, "block.attr.%d.store", attnum); + } + + /* + * Check if's guaranteed the all the desired attributes are available in + * tuple. If so, we can start deforming. If not, need to make sure + * tts_values/isnull is set appropriately for columns not available in the + * tuple. + */ + if ((natts - 1) <= guaranteed_column_number) + { + /* just skip through unnecessary blocks */ + LLVMBuildBr(b, b_adjust_unavail_cols); + LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols); + LLVMBuildBr(b, b_find_start); + } + else + { + LLVMValueRef v_set; + LLVMValueRef v_startset; + LLVMValueRef v_params[5]; + + /* branch if not all columns available */ + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntULT, + v_maxatt, + l_int16_const(natts), + ""), + b_adjust_unavail_cols, + b_find_start); + + /* if not, memset tts_isnull of relevant cols to true */ + LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols); + + v_set = LLVMBuildSub(b, + l_int16_const(attnum), + v_maxatt, ""); + + v_startset = LLVMBuildGEP(b, v_tts_nulls, &v_maxatt, 1, ""); + + v_params[0] = v_startset; + v_params[1] = l_int8_const(1); + v_params[2] = LLVMBuildZExt(b, v_set, LLVMInt32Type(), ""); + v_params[3] = l_int32_const(1); + v_params[4] = LLVMConstInt(LLVMInt1Type(), 0, false); + + LLVMBuildCall(b, get_memset(mod), + v_params, lengthof(v_params), ""); + LLVMBuildBr(b, b_find_start); + } + + LLVMPositionBuilderAtEnd(b, b_find_start); + + v_nvalid = LLVMBuildLoad(b, v_nvalidp, ""); + + /* + * Build switch to go from nvalid to the right startblock. Callers + * currently don't have the knowledge, but it'd be good for performance to + * avoid this check when it's known that the slot is empty (e.g. in scan + * nodes). + */ + if (true) + { + LLVMValueRef v_switch = LLVMBuildSwitch(b, v_nvalid, + b_dead, natts); + + for (attnum = 0; attnum < natts; attnum++) + { + LLVMValueRef v_attno = l_int32_const(attnum); + + LLVMAddCase(v_switch, v_attno, attcheckattnoblocks[attnum]); + } + + } + else + { + /* jump from entry block to first block */ + LLVMBuildBr(b, attcheckattnoblocks[0]); + } + + LLVMPositionBuilderAtEnd(b, b_dead); + LLVMBuildUnreachable(b); + + /* + * Iterate over each attribute that needs to be deformed, build code to + * deform it. + */ + for (attnum = 0; attnum < natts; attnum++) + { + Form_pg_attribute att = TupleDescAttr(desc, attnum); + LLVMValueRef v_incby; + int alignto; + LLVMValueRef l_attno = l_int16_const(attnum); + LLVMValueRef v_attdatap; + LLVMValueRef v_resultp; + + /* build block checking whether we did all the necessary attributes */ + LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]); + + /* + * If this is the first attribute, slot->tts_nvalid was 0. Therefore + * reset offset to 0 to, it be from a previous execution. + */ + if (attnum == 0) + { + LLVMBuildStore(b, l_sizet_const(0), v_offp); + } + + /* + * Build check whether column is available (i.e. whether the tuple has + * that many columns stored). We can avoid the branch if we know + * there's a subsequent NOT NULL column. + */ + if (attnum <= guaranteed_column_number) + { + LLVMBuildBr(b, attstartblocks[attnum]); + } + else + { + LLVMValueRef v_islast; + + v_islast = LLVMBuildICmp(b, LLVMIntEQ, + l_attno, + v_maxatt, + "heap_natts"); + LLVMBuildCondBr(b, v_islast, b_out, attstartblocks[attnum]); + } + LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]); + + /* check for nulls if necessary */ + if (!att->attnotnull) + { + LLVMBasicBlockRef b_ifnotnull; + LLVMBasicBlockRef b_ifnull; + LLVMBasicBlockRef b_next; + LLVMValueRef v_attisnull; + LLVMValueRef v_nullbyteno; + LLVMValueRef v_nullbytemask; + LLVMValueRef v_nullbyte; + LLVMValueRef v_nullbit; + + b_ifnotnull = attcheckalignblocks[attnum]; + b_ifnull = attisnullblocks[attnum]; + + if (attnum + 1 == natts) + b_next = b_out; + else + b_next = attcheckattnoblocks[attnum + 1]; + + v_nullbyteno = l_int32_const(attnum >> 3); + v_nullbytemask = l_int8_const(1 << ((attnum) & 0x07)); + v_nullbyte = l_load_gep1(b, v_bits, v_nullbyteno, "attnullbyte"); + + v_nullbit = LLVMBuildICmp(b, + LLVMIntEQ, + LLVMBuildAnd(b, v_nullbyte, v_nullbytemask, ""), + l_int8_const(0), + "attisnull"); + + v_attisnull = LLVMBuildAnd(b, v_hasnulls, v_nullbit, ""); + + LLVMBuildCondBr(b, v_attisnull, b_ifnull, b_ifnotnull); + + LLVMPositionBuilderAtEnd(b, b_ifnull); + + /* store null-byte */ + LLVMBuildStore(b, + l_int8_const(1), + LLVMBuildGEP(b, v_tts_nulls, &l_attno, 1, "")); + /* store zero datum */ + LLVMBuildStore(b, + l_sizet_const(0), + LLVMBuildGEP(b, v_tts_values, &l_attno, 1, "")); + + LLVMBuildBr(b, b_next); + attguaranteedalign = false; + } + else + { + /* nothing to do */ + LLVMBuildBr(b, attcheckalignblocks[attnum]); + LLVMPositionBuilderAtEnd(b, attisnullblocks[attnum]); + LLVMBuildBr(b, attcheckalignblocks[attnum]); + } + LLVMPositionBuilderAtEnd(b, attcheckalignblocks[attnum]); + + /* determine required alignment */ + if (att->attalign == 'i') + alignto = ALIGNOF_INT; + else if (att->attalign == 'c') + alignto = 1; + else if (att->attalign == 'd') + alignto = ALIGNOF_DOUBLE; + else if (att->attalign == 's') + alignto = ALIGNOF_SHORT; + else + { + elog(ERROR, "unknown alignment"); + alignto = 0; + } + + /* ------ + * Even if alignment is required, we can skip doing it if provably + * unnecessary: + * - first column is guaranteed to be aligned + * - columns following a NOT NULL fixed width datum have known + * alignment, can skip alignment computation if that known alignment + * is compatible with current column. + * ------ + */ + if (alignto > 1 && + (known_alignment < 0 || known_alignment != TYPEALIGN(alignto, known_alignment))) + { + /* + * When accessing a varlena field we have to "peek" to see if we + * are looking at a pad byte or the first byte of a 1-byte-header + * datum. A zero byte must be either a pad byte, or the first + * byte of a correctly aligned 4-byte length word; in either case + * we can align safely. A non-zero byte must be either a 1-byte + * length word, or the first byte of a correctly aligned 4-byte + * length word; in either case we need not align. + */ + if (att->attlen == -1) + { + LLVMValueRef v_possible_padbyte; + LLVMValueRef v_ispad; + LLVMValueRef v_off; + + /* don't know if short varlena or not */ + attguaranteedalign = false; + + v_off = LLVMBuildLoad(b, v_offp, ""); + + v_possible_padbyte = + l_load_gep1(b, v_tupdata_base, v_off, "padbyte"); + v_ispad = + LLVMBuildICmp(b, LLVMIntEQ, + v_possible_padbyte, l_int8_const(0), + "ispadbyte"); + LLVMBuildCondBr(b, v_ispad, + attalignblocks[attnum], + attstoreblocks[attnum]); + } + else + { + LLVMBuildBr(b, attalignblocks[attnum]); + } + + LLVMPositionBuilderAtEnd(b, attalignblocks[attnum]); + + /* translation of alignment code (cf TYPEALIGN()) */ + { + LLVMValueRef v_off_aligned; + LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, ""); + + /* ((ALIGNVAL) - 1) */ + LLVMValueRef v_alignval = l_sizet_const(alignto - 1); + + /* ((uintptr_t) (LEN) + ((ALIGNVAL) - 1)) */ + LLVMValueRef v_lh = LLVMBuildAdd(b, v_off, v_alignval, ""); + + /* ~((uintptr_t) ((ALIGNVAL) - 1)) */ + LLVMValueRef v_rh = l_sizet_const(~(alignto - 1)); + + v_off_aligned = LLVMBuildAnd(b, v_lh, v_rh, "aligned_offset"); + + LLVMBuildStore(b, v_off_aligned, v_offp); + } + + /* + * As alignment either was unnecessary or has been performed, we + * now know the current alignment. This is only safe because this + * value isn't used for varlena and nullable columns. + */ + if (known_alignment >= 0) + { + Assert(known_alignment != 0); + known_alignment = TYPEALIGN(alignto, known_alignment); + } + + LLVMBuildBr(b, attstoreblocks[attnum]); + LLVMPositionBuilderAtEnd(b, attstoreblocks[attnum]); + } + else + { + LLVMPositionBuilderAtEnd(b, attcheckalignblocks[attnum]); + LLVMBuildBr(b, attalignblocks[attnum]); + LLVMPositionBuilderAtEnd(b, attalignblocks[attnum]); + LLVMBuildBr(b, attstoreblocks[attnum]); + } + LLVMPositionBuilderAtEnd(b, attstoreblocks[attnum]); + + /* + * Store the current offset if known to be constant. That allows LLVM + * to generate better code. Without that LLVM can't figure out that + * the offset might be constant due to the jumps for previously + * decoded columns. + */ + if (attguaranteedalign) + { + Assert(known_alignment >= 0); + LLVMBuildStore(b, l_sizet_const(known_alignment), v_offp); + } + + /* compute what following columns are aligned to */ + if (att->attlen < 0) + { + /* can't guarantee any alignment after variable length field */ + known_alignment = -1; + attguaranteedalign = false; + } + else if (att->attnotnull && attguaranteedalign && known_alignment >= 0) + { + /* + * If the offset to the column was previously known a NOT NULL & + * fixed width column guarantees that alignment is just the + * previous alignment plus column width. + */ + Assert(att->attlen > 0); + known_alignment += att->attlen; + } + else if (att->attnotnull && (att->attlen % alignto) == 0) + { + /* + * After a NOT NULL fixed-width column with a length that is a + * multiple of its alignment requirement, we know the following + * column is aligned to at least the current column's alignment. + */ + Assert(att->attlen > 0); + known_alignment = alignto; + Assert(known_alignment > 0); + attguaranteedalign = false; + } + else + { + known_alignment = -1; + attguaranteedalign = false; + } + + + /* compute address to load data from */ + { + LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, ""); + + v_attdatap = + LLVMBuildGEP(b, v_tupdata_base, &v_off, 1, ""); + } + + /* compute address to store value at */ + v_resultp = LLVMBuildGEP(b, v_tts_values, &l_attno, 1, ""); + + /* store null-byte (false) */ + LLVMBuildStore(b, l_int8_const(0), + LLVMBuildGEP(b, v_tts_nulls, &l_attno, 1, "")); + + /* + * Store datum. For byval datums copy the value, extend to Datum's + * width, and store. For byref types, store pointer to data. + */ + if (att->attbyval) + { + LLVMValueRef v_tmp_loaddata; + LLVMTypeRef vartypep = + LLVMPointerType(LLVMIntType(att->attlen * 8), 0); + + v_tmp_loaddata = + LLVMBuildPointerCast(b, v_attdatap, vartypep, ""); + v_tmp_loaddata = LLVMBuildLoad(b, v_tmp_loaddata, "attr_byval"); + v_tmp_loaddata = LLVMBuildZExt(b, v_tmp_loaddata, TypeSizeT, ""); + + LLVMBuildStore(b, v_tmp_loaddata, v_resultp); + } + else + { + LLVMValueRef v_tmp_loaddata; + + /* store pointer */ + v_tmp_loaddata = + LLVMBuildPtrToInt(b, + v_attdatap, + TypeSizeT, + "attr_ptr"); + LLVMBuildStore(b, v_tmp_loaddata, v_resultp); + } + + /* increment data pointer */ + if (att->attlen > 0) + { + v_incby = l_sizet_const(att->attlen); + } + else if (att->attlen == -1) + { + v_incby = LLVMBuildCall(b, + llvm_get_decl(mod, FuncVarsizeAny), + &v_attdatap, 1, + "varsize_any"); + l_callsite_ro(v_incby); + l_callsite_alwaysinline(v_incby); + } + else if (att->attlen == -2) + { + v_incby = LLVMBuildCall(b, + llvm_get_decl(mod, FuncStrlen), + &v_attdatap, 1, "strlen"); + + l_callsite_ro(v_incby); + + /* add 1 for NUL byte */ + v_incby = LLVMBuildAdd(b, v_incby, l_sizet_const(1), ""); + } + else + { + Assert(false); + v_incby = NULL; /* silence compiler */ + } + + if (attguaranteedalign) + { + Assert(known_alignment >= 0); + LLVMBuildStore(b, l_sizet_const(known_alignment), v_offp); + } + else + { + LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, ""); + + v_off = LLVMBuildAdd(b, v_off, v_incby, "increment_offset"); + LLVMBuildStore(b, v_off, v_offp); + } + + /* + * jump to next block, unless last possible column, or all desired + * (available) attributes have been fetched. + */ + if (attnum + 1 == natts) + { + /* jump out */ + LLVMBuildBr(b, b_out); + } + else + { + LLVMBuildBr(b, attcheckattnoblocks[attnum + 1]); + } + } + + + /* build block that returns */ + LLVMPositionBuilderAtEnd(b, b_out); + + { + LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, ""); + + LLVMBuildStore(b, l_int32_const(natts), v_nvalidp); + v_off = LLVMBuildTrunc(b, v_off, LLVMInt32Type(), ""); + LLVMBuildStore(b, v_off, v_slotoffp); + LLVMBuildStore(b, l_int8_const(1), v_slowp); + LLVMBuildRetVoid(b); + } + + LLVMDisposeBuilder(b); + + return v_deform_fn; +} + +static LLVMValueRef +get_memset(LLVMModuleRef mod) +{ + LLVMTypeRef sig; + LLVMValueRef v_fn; + LLVMTypeRef param_types[5]; + const char *nm = "llvm.memset.p0i8.i32"; + + v_fn = LLVMGetNamedFunction(mod, nm); + if (v_fn) + return v_fn; + + param_types[0] = LLVMPointerType(LLVMInt8Type(), 0); /* addr */ + param_types[1] = LLVMInt8Type(); /* val */ + param_types[2] = LLVMInt32Type(); /* len */ + param_types[3] = LLVMInt32Type(); /* align */ + param_types[4] = LLVMInt1Type(); /* volatile */ + + sig = LLVMFunctionType(LLVMVoidType(), param_types, lengthof(param_types), 0); + v_fn = LLVMAddFunction(mod, nm, sig); + + LLVMSetFunctionCallConv(v_fn, LLVMCCallConv); + + Assert(LLVMGetIntrinsicID(v_fn)); + + return v_fn; +} diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index 667fb01d3b..2074b067ba 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -152,7 +152,7 @@ llvm_compile_expr(ExprState *state) param_types[0] = l_ptr(StructExprState); /* state */ param_types[1] = l_ptr(StructExprContext); /* econtext */ - param_types[2] = l_ptr(TypeParamBool); /* isnull */ + param_types[2] = l_ptr(TypeParamBool); /* isnull */ eval_sig = LLVMFunctionType(TypeSizeT, param_types, lengthof(param_types), @@ -272,6 +272,7 @@ llvm_compile_expr(ExprState *state) case EEOP_OUTER_FETCHSOME: case EEOP_SCAN_FETCHSOME: { + TupleDesc desc = NULL; LLVMValueRef v_slot; LLVMBasicBlockRef b_fetch; LLVMValueRef v_nvalid; @@ -279,17 +280,38 @@ llvm_compile_expr(ExprState *state) b_fetch = l_bb_before_v(opblocks[i + 1], "op.%d.fetch", i); + if (op->d.fetch.known_desc) + desc = op->d.fetch.known_desc; + if (opcode == EEOP_INNER_FETCHSOME) { + PlanState *is = innerPlanState(parent); + v_slot = v_innerslot; + + if (!desc && + is && + is->ps_ResultTupleSlot && + is->ps_ResultTupleSlot->tts_fixedTupleDescriptor) + desc = is->ps_ResultTupleSlot->tts_tupleDescriptor; } else if (opcode == EEOP_OUTER_FETCHSOME) { + PlanState *os = outerPlanState(parent); + v_slot = v_outerslot; + + if (!desc && + os && + os->ps_ResultTupleSlot && + os->ps_ResultTupleSlot->tts_fixedTupleDescriptor) + desc = os->ps_ResultTupleSlot->tts_tupleDescriptor; } else { v_slot = v_scanslot; + if (!desc && parent) + desc = parent->scandesc; } /* @@ -308,6 +330,27 @@ llvm_compile_expr(ExprState *state) LLVMPositionBuilderAtEnd(b, b_fetch); + /* + * If the tupledesc of the to-be-deformed tuple is known, + * and JITing of deforming is enabled, build deform + * function specific to tupledesc and the exact number of + * to-be-extracted attributes. + */ + if (desc && (context->base.flags & PGJIT_DEFORM)) + { + LLVMValueRef params[1]; + LLVMValueRef l_jit_deform; + + l_jit_deform = + slot_compile_deform(context, desc, + op->d.fetch.last_var); + params[0] = v_slot; + + LLVMBuildCall(b, l_jit_deform, + params, lengthof(params), ""); + + } + else { LLVMValueRef params[2]; diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c index 84bc140737..ad29bafa8f 100644 --- a/src/backend/jit/llvm/llvmjit_types.c +++ b/src/backend/jit/llvm/llvmjit_types.c @@ -96,6 +96,7 @@ FunctionReturningBool(void) void *referenced_functions[] = { strlen, + varsize_any, slot_getsomeattrs, heap_getsysattr, MakeExpandedObjectReadOnlyInternal, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 50f858e420..52c21e6870 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -550,6 +550,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) */ if (jit_expressions) result->jitFlags |= PGJIT_EXPR; + if (jit_tuple_deforming) + result->jitFlags |= PGJIT_DEFORM; } return result; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index e6d79873dd..d075cb139a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1788,6 +1788,17 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"jit_tuple_deforming", PGC_USERSET, DEVELOPER_OPTIONS, + gettext_noop("Allow JIT compilation of tuple deforming."), + NULL, + GUC_NOT_IN_SAMPLE + }, + &jit_tuple_deforming, + true, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h index 3616a17b6f..67342ef63d 100644 --- a/src/include/access/htup_details.h +++ b/src/include/access/htup_details.h @@ -829,5 +829,6 @@ extern void heap_free_minimal_tuple(MinimalTuple mtup); extern MinimalTuple heap_copy_minimal_tuple(MinimalTuple mtup); extern HeapTuple heap_tuple_from_minimal_tuple(MinimalTuple mtup); extern MinimalTuple minimal_tuple_from_heap_tuple(HeapTuple htup); +extern size_t varsize_any(void *p); #endif /* HTUP_DETAILS_H */ diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index 6fc4ed640b..f4617a28fa 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -262,6 +262,7 @@ typedef struct ExprEvalStep { /* attribute number up to which to fetch (inclusive) */ int last_var; + TupleDesc known_desc; } fetch; /* for EEOP_INNER/OUTER/SCAN_[SYS]VAR[_FIRST] */ diff --git a/src/include/jit/jit.h b/src/include/jit/jit.h index 703c5011da..efcd6a52cf 100644 --- a/src/include/jit/jit.h +++ b/src/include/jit/jit.h @@ -21,6 +21,7 @@ #define PGJIT_OPT3 1 << 1 /* reserved for PGJIT_INLINE */ #define PGJIT_EXPR 1 << 3 +#define PGJIT_DEFORM 1 << 4 typedef struct JitContext @@ -67,6 +68,7 @@ extern bool jit_debugging_support; extern bool jit_dump_bitcode; extern bool jit_expressions; extern bool jit_profiling_support; +extern bool jit_tuple_deforming; extern double jit_above_cost; extern double jit_optimize_above_cost; diff --git a/src/include/jit/llvmjit.h b/src/include/jit/llvmjit.h index cc908477e8..9443a568d8 100644 --- a/src/include/jit/llvmjit.h +++ b/src/include/jit/llvmjit.h @@ -32,6 +32,7 @@ extern "C" #include "fmgr.h" #include "jit/jit.h" #include "nodes/pg_list.h" +#include "access/tupdesc.h" typedef struct LLVMJitContext @@ -75,6 +76,7 @@ extern LLVMTypeRef StructAggStatePerGroupData; extern LLVMValueRef AttributeTemplate; extern LLVMValueRef FuncStrlen; +extern LLVMValueRef FuncVarsizeAny; extern LLVMValueRef FuncSlotGetsomeattrs; extern LLVMValueRef FuncHeapGetsysattr; extern LLVMValueRef FuncMakeExpandedObjectReadOnlyInternal; @@ -107,6 +109,7 @@ extern LLVMValueRef llvm_function_reference(LLVMJitContext *context, **************************************************************************** */ extern bool llvm_compile_expr(struct ExprState *state); +extern LLVMValueRef slot_compile_deform(struct LLVMJitContext *context, TupleDesc desc, int natts); /* **************************************************************************** diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 2c2d2823c0..6070a42b6f 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -920,6 +920,7 @@ typedef struct PlanState ExprState *qual; /* boolean qual condition */ struct PlanState *lefttree; /* input plan tree(s) */ struct PlanState *righttree; + List *initPlan; /* Init SubPlanState nodes (un-correlated expr * subselects) */ List *subPlan; /* SubPlanState nodes in my expressions */ @@ -935,6 +936,13 @@ typedef struct PlanState TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */ ExprContext *ps_ExprContext; /* node's expression-evaluation context */ ProjectionInfo *ps_ProjInfo; /* info for doing tuple projection */ + + /* + * Scanslot's descriptor if known. This is a bit of a hack, but otherwise + * it's hard for expression compilation to optimize based on the + * descriptor, without encoding knowledge about all executor nodes. + */ + TupleDesc scandesc; } PlanState; /* ----------------