Index: doc/src/sgml/errcodes.sgml =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/doc/src/sgml/errcodes.sgml,v retrieving revision 1.22 diff -p -c -r1.22 errcodes.sgml *** doc/src/sgml/errcodes.sgml 31 Jan 2007 20:56:17 -0000 1.22 --- doc/src/sgml/errcodes.sgml 17 May 2007 23:36:59 -0000 *************** *** 440,445 **** --- 440,451 ---- + 2202H + INVALID_SAMPLE_SIZE + invalid_sample_size + + + 22009 INVALID TIME ZONE DISPLACEMENT VALUE invalid_time_zone_displacement_value Index: doc/src/sgml/ref/select.sgml =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/doc/src/sgml/ref/select.sgml,v retrieving revision 1.100 diff -p -c -r1.100 select.sgml *** doc/src/sgml/ref/select.sgml 15 May 2007 19:13:55 -0000 1.100 --- doc/src/sgml/ref/select.sgml 17 May 2007 23:36:59 -0000 *************** SELECT [ ALL | DISTINCT [ ON ( from_item can be one of: ! [ ONLY ] table_name [ * ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ] ( select ) [ AS ] alias [ ( column_alias [, ...] ) ] function_name ( [ argument [, ...] ] ) [ AS ] alias [ ( column_alias [, ...] | column_definition [, ...] ) ] function_name ( [ argument [, ...] ] ) AS ( column_definition [, ...] ) --- 34,40 ---- where from_item can be one of: ! [ ONLY ] table_name [ * ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ] [ TABLESAMPLE { BERNOULLI | SYSTEM } ( sample_percent ) [ REPEATABLE ( repeat_seed ) ] ] ( select ) [ AS ] alias [ ( column_alias [, ...] ) ] function_name ( [ argument [, ...] ] ) [ AS ] alias [ ( column_alias [, ...] | column_definition [, ...] ) ] function_name ( [ argument [, ...] ] ) AS ( column_definition [, ...] ) *************** where fro *** 215,221 **** ! select --- 215,258 ---- ! ! ! ! TABLESAMPLE ! ! TABLESAMPLE { BERNOULLI | SYSTEM } ( sample_percent ) [ REPEATABLE ( repeat_seed ) ] ! ! ! Indicates that a table should be randomly sampled from, rather ! than exhaustively scanned. The scan will visit approximately ! sample_percent percent of ! the blocks of the table. In each sampled block, all the rows ! will be returned. If the number of visible rows in every block ! is approximately equal, this will result in sampling about ! sample_percent ! percent of the rows in the table. ! ! ! ! In PostgreSQL 8.3, only the ! SYSTEM sample method is supported. ! ! ! ! The REPEATABLE clause can be used to ! produce consistent results from a probabilistic table ! scan. The repeat_seed is ! used to seed the random number generator before sampling ! begins. Therefore, the same sequence of numbers will be ! generated whenever the same REPEATABLE ! clause is specified. Note that by itself, this is not ! sufficient to ensure that the sampling query will produce the ! same results: the query's snapshot could have changed, or the ! query might contain non-deterministic elements. ! ! ! ! select Index: src/backend/commands/explain.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/commands/explain.c,v retrieving revision 1.163 diff -p -c -r1.163 explain.c *** src/backend/commands/explain.c 4 May 2007 21:29:52 -0000 1.163 --- src/backend/commands/explain.c 17 May 2007 23:36:59 -0000 *************** explain_outNode(StringInfo str, *** 525,530 **** --- 525,533 ---- case T_ValuesScan: pname = "Values Scan"; break; + case T_SampleScan: + pname = "Sample Scan"; + break; case T_Material: pname = "Materialize"; break; *************** explain_outNode(StringInfo str, *** 597,602 **** --- 600,606 ---- case T_SeqScan: case T_BitmapHeapScan: case T_TidScan: + case T_SampleScan: if (((Scan *) plan)->scanrelid > 0) { RangeTblEntry *rte = rt_fetch(((Scan *) plan)->scanrelid, *************** explain_outNode(StringInfo str, *** 743,748 **** --- 747,753 ---- case T_SeqScan: case T_FunctionScan: case T_ValuesScan: + case T_SampleScan: show_scan_qual(plan->qual, "Filter", ((Scan *) plan)->scanrelid, Index: src/backend/executor/Makefile =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/executor/Makefile,v retrieving revision 1.25 diff -p -c -r1.25 Makefile *** src/backend/executor/Makefile 20 Jan 2007 17:16:11 -0000 1.25 --- src/backend/executor/Makefile 17 May 2007 23:36:59 -0000 *************** OBJS = execAmi.o execGrouping.o execJunk *** 18,25 **** nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \ nodeHashjoin.o nodeIndexscan.o nodeMaterial.o nodeMergejoin.o \ ! nodeNestloop.o nodeFunctionscan.o nodeResult.o nodeSeqscan.o \ ! nodeSetOp.o nodeSort.o nodeUnique.o \ nodeValuesscan.o nodeLimit.o nodeGroup.o \ nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o tstoreReceiver.o spi.o --- 18,25 ---- nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \ nodeHashjoin.o nodeIndexscan.o nodeMaterial.o nodeMergejoin.o \ ! nodeNestloop.o nodeFunctionscan.o nodeResult.o nodeSamplescan.o \ ! nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \ nodeValuesscan.o nodeLimit.o nodeGroup.o \ nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o tstoreReceiver.o spi.o Index: src/backend/executor/execAmi.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/executor/execAmi.c,v retrieving revision 1.92 diff -p -c -r1.92 execAmi.c *** src/backend/executor/execAmi.c 19 Feb 2007 02:23:11 -0000 1.92 --- src/backend/executor/execAmi.c 17 May 2007 23:36:59 -0000 *************** ExecMayReturnRawTuples(PlanState *node) *** 440,445 **** --- 440,446 ---- switch (nodeTag(node)) { /* Table scan nodes */ + case T_SampleScanState: case T_SeqScanState: case T_IndexScanState: case T_BitmapHeapScanState: Index: src/backend/executor/execProcnode.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/executor/execProcnode.c,v retrieving revision 1.61 diff -p -c -r1.61 execProcnode.c *** src/backend/executor/execProcnode.c 27 Feb 2007 01:11:25 -0000 1.61 --- src/backend/executor/execProcnode.c 17 May 2007 23:36:59 -0000 *************** *** 95,100 **** --- 95,101 ---- #include "executor/nodeMergejoin.h" #include "executor/nodeNestloop.h" #include "executor/nodeResult.h" + #include "executor/nodeSamplescan.h" #include "executor/nodeSeqscan.h" #include "executor/nodeSetOp.h" #include "executor/nodeSort.h" *************** ExecInitNode(Plan *node, EState *estate, *** 200,205 **** --- 201,211 ---- estate, eflags); break; + case T_SampleScan: + result = (PlanState *) ExecInitSampleScan((SampleScan *) node, + estate, eflags); + break; + /* * join nodes */ *************** ExecProcNode(PlanState *node) *** 360,365 **** --- 366,375 ---- result = ExecValuesScan((ValuesScanState *) node); break; + case T_SampleScanState: + result = ExecSampleScan((SampleScanState *) node); + break; + /* * join nodes */ *************** ExecCountSlotsNode(Plan *node) *** 534,539 **** --- 544,552 ---- case T_ValuesScan: return ExecCountSlotsValuesScan((ValuesScan *) node); + case T_SampleScan: + return ExecCountSlotsSampleScan((SampleScan *) node); + /* * join nodes */ *************** ExecEndNode(PlanState *node) *** 663,668 **** --- 676,685 ---- ExecEndValuesScan((ValuesScanState *) node); break; + case T_SampleScanState: + ExecEndSampleScan((SampleScanState *) node); + break; + /* * join nodes */ Index: src/backend/executor/nodeSamplescan.c =================================================================== RCS file: src/backend/executor/nodeSamplescan.c diff -N src/backend/executor/nodeSamplescan.c *** /dev/null 1 Jan 1970 00:00:00 -0000 --- src/backend/executor/nodeSamplescan.c 21 May 2007 18:13:20 -0000 *************** *** 0 **** --- 1,347 ---- + /*------------------------------------------------------------------------- + * + * nodeSamplescan.c + * Support routines for TABLESAMPLE-based scans of a relation + * + * Copyright (c) 2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + #include "postgres.h" + + #include + + #include "access/heapam.h" + #include "executor/executor.h" + #include "executor/nodeSamplescan.h" + #include "parser/parsetree.h" + + static TupleTableSlot *SampleGetNext(SampleScanState *node); + static void LoadNextSampleBuffer(SampleScanState *node); + static int get_rand_in_range(int a, int b); + + /* + * Initialize the run-time state of the sample scan on a single + * relation. This requires setting up various executor machinery and + * initializing the state of the PRNG. + */ + SampleScanState * + ExecInitSampleScan(SampleScan *node, EState *estate, int eflags) + { + SampleScanState *scanstate; + Relation rel; + int seed; + + /* We don't expect to have any child plan nodes */ + Assert(outerPlan(node) == NULL); + Assert(innerPlan(node) == NULL); + + scanstate = makeNode(SampleScanState); + scanstate->ss.ps.plan = (Plan *) node; + scanstate->ss.ps.state = estate; + scanstate->cur_buf = InvalidBuffer; + scanstate->cur_offset = FirstOffsetNumber; + scanstate->cur_blkno = InvalidBlockNumber; + scanstate->need_new_buf = true; + + ExecAssignExprContext(estate, &scanstate->ss.ps); + + /* + * Initialize the expression contexts required for evaluating the + * target list and the scan's qualifiers, if any. We don't need to + * do qual evaluation ourselves (ExecScan does it), but we do need + * to do the required initialization. + */ + scanstate->ss.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->scan.plan.targetlist, + (PlanState *) scanstate); + scanstate->ss.ps.qual = (List *) + ExecInitExpr((Expr *) node->scan.plan.qual, + (PlanState *) scanstate); + + #define SAMPLESCAN_NSLOTS 2 + + /* + * Initialize the tuple table slots required by this scan: we need a + * slot for the current result of the scan, and a slot for the + * current scan tuple. + */ + ExecInitResultTupleSlot(estate, &scanstate->ss.ps); + ExecInitScanTupleSlot(estate, &scanstate->ss); + + /* + * Open and lock the heap relation we're going to scan. + * ExecOpenScanRelation() will acquire the appropriate lock, + * depending on whether we're scanning this table with FOR UPDATE, + * FOR SHARE, or in normal mode. + */ + rel = ExecOpenScanRelation(estate, node->scan.scanrelid); + scanstate->ss.ss_currentRelation = rel; + + /* + * Determine the number of blocks in the relation. We need only do + * this once for a given scan: if any new blocks are added to the + * relation, they won't be visible to this transaction anyway. + */ + scanstate->nblocks = RelationGetNumberOfBlocks(rel); + + ExecAssignScanType(&scanstate->ss, RelationGetDescr(rel)); + + scanstate->ss.ps.ps_TupFromTlist = false; + + /* Initialize result tuple type and projection info */ + ExecAssignResultTypeFromTL(&scanstate->ss.ps); + ExecAssignScanProjectionInfo(&scanstate->ss); + + /* + * Setup PRNG state; seed with the REPEATABLE clause, if any. We + * can't just use srandom(), since there could be multiple + * concurrent sample scans. + * + * XXX: using time() to seed the PRNG in the non-repeatable case + * could probably be improved. Different state array sizes could + * also be tried: do we need high-quality random numbers? + */ + if (node->sample_info->is_repeatable) + seed = node->sample_info->repeat_seed; + else + seed = (int) time(NULL); + + scanstate->prev_rand_state = initstate(seed, scanstate->rand_state, + sizeof(scanstate->rand_state)); + setstate(scanstate->prev_rand_state); + + return scanstate; + } + + /* Return the next tuple in the sample scan's result set. */ + TupleTableSlot * + ExecSampleScan(SampleScanState *node) + { + char *prev_state; + + PG_TRY(); + { + /* Install our PRNG state */ + prev_state = setstate(node->rand_state); + + /* + * ExecScan() provides generic infrastructure for "scan-like" + * executor nodes. It takes a ScanState describing the scan + * and a function pointer to an "access method". The access + * method is invoked repeatedly by ExecScan(); for each call, + * the access method should return the next tuple produced by + * the scan. ExecScan() then handles checking any relevant + * scan qualifiers, performing projection if necessary, and + * then stashing the result tuple in the appropriate + * TupleTableSlot. + */ + return ExecScan((ScanState *) node, + (ExecScanAccessMtd) SampleGetNext); + } + PG_CATCH(); + { + setstate(prev_state); + PG_RE_THROW(); + } + PG_END_TRY(); + + setstate(prev_state); + } + + static TupleTableSlot * + SampleGetNext(SampleScanState *node) + { + EState *estate; + TupleTableSlot *slot; + Relation rel; + Index scanrelid; + + estate = node->ss.ps.state; + slot = node->ss.ss_ScanTupleSlot; + rel = node->ss.ss_currentRelation; + scanrelid = ((SampleScan *) node->ss.ps.plan)->scan.scanrelid; + + while (true) + { + OffsetNumber max_offset; + Page page; + + /* + * If we don't have a valid buffer, choose the next block to + * sample and load it into memory. + */ + if (node->need_new_buf) + { + LoadNextSampleBuffer(node); + node->need_new_buf = false; + + /* We're out of blocks in the rel, so we're done */ + if (!BufferIsValid(node->cur_buf)) + break; + } + + /* + * Iterate through the current block, checking for heap tuples + * that are visible to our transaction. Return each such + * candidate match: ExecScan() takes care of checking whether + * the tuple satisfies the scan's quals. + */ + LockBuffer(node->cur_buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(node->cur_buf); + max_offset = PageGetMaxOffsetNumber(page); + while (node->cur_offset <= max_offset) + { + /* + * Postgres uses a somewhat unusual API for specifying the + * location of the tuple we want to fetch. We've already + * allocated space for a HeapTupleData; to indicate the TID + * we want to fetch into the HeapTuple, we fillin its + * "t_self" field, and then ask the heap access manager to + * fetch the tuple's data for us. + */ + ItemPointerSet(&node->cur_tup.t_self, + node->cur_blkno, node->cur_offset); + + node->cur_offset++; + + if (heap_release_fetch(rel, estate->es_snapshot, + &node->cur_tup, &node->cur_buf, + true, NULL)) + { + LockBuffer(node->cur_buf, BUFFER_LOCK_UNLOCK); + + ExecStoreTuple(&node->cur_tup, + slot, + node->cur_buf, + false); + + return slot; + } + } + + /* Out of tuples on this page, so go on to the next one */ + LockBuffer(node->cur_buf, BUFFER_LOCK_UNLOCK); + node->need_new_buf = true; + } + + /* No more blocks to scan, so we're done: clear result slot */ + ExecClearTuple(slot); + return NULL; + } + + /* + * Choose the next block from the relation to sample. This is called + * when (a) we haven't sampled any blocks from the relation yet + * (SampleScanState.cur_buf == InvalidBuffer) (b) we've examined every + * tuple in the block we're currently sampling. + * + * If we've run out of blocks in the relation, we leave "cur_buf" as + * InvalidBuffer. + */ + static void + LoadNextSampleBuffer(SampleScanState *node) + { + SampleScan *plan_node = (SampleScan *) node->ss.ps.plan; + + while (true) + { + int rand_percent; + + /* + * If this is the first time through, start at the beginning of + * the heap. + */ + if (BlockNumberIsValid(node->cur_blkno)) + node->cur_blkno++; + else + node->cur_blkno = 0; + + rand_percent = get_rand_in_range(0, 100); + + if (rand_percent >= plan_node->sample_info->sample_percent) + continue; + + /* + * If we've reached the end of the heap, we're done. Make sure + * to unpin the current buffer, if any. + */ + if (node->cur_blkno >= node->nblocks) + { + if (BufferIsValid(node->cur_buf)) + { + ReleaseBuffer(node->cur_buf); + node->cur_buf = InvalidBuffer; + } + + break; + } + + /* + * Okay, we've chosen another block to read: ask the bufmgr to + * load it into the buffer pool for us, pin it, and release the + * pin we hold on the previous "cur_buf". For the case that + * "cur_buf" == InvalidBuffer, ReleaseAndReadBuffer() is + * equivalent to ReadBuffer(). + */ + node->cur_buf = ReleaseAndReadBuffer(node->cur_buf, + node->ss.ss_currentRelation, + node->cur_blkno); + node->cur_offset = FirstOffsetNumber; + break; + } + } + + /* Returns a randomly-generated integer x, such that a <= x < b. */ + static int + get_rand_in_range(int a, int b) + { + /* + * XXX: Using modulus takes the low-order bits of the random + * number; since the high-order bits may contain more entropy with + * some PRNGs, we should probably use those instead. + */ + return (random() % b) + a; + } + + /* + * Count the number of tuple table slots required by an instance of + * the SampleScan. + */ + int + ExecCountSlotsSampleScan(SampleScan *node) + { + return SAMPLESCAN_NSLOTS; + } + + /* + * Shutdown this scan. This function should generally be symmetric with + * ExecInitSampleScan(): we ought to clean up after ourselves. + */ + void + ExecEndSampleScan(SampleScanState *node) + { + setstate(node->prev_rand_state); + + ExecFreeExprContext(&node->ss.ps); + + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + if (BufferIsValid(node->cur_buf)) + { + ReleaseBuffer(node->cur_buf); + node->cur_buf = InvalidBuffer; + } + + /* + * Note that ExecCloseScanRelation() does NOT release the lock we + * acquired on the scan relation: it is held until the end of the + * transaction. + */ + ExecCloseScanRelation(node->ss.ss_currentRelation); + } Index: src/backend/nodes/copyfuncs.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/nodes/copyfuncs.c,v retrieving revision 1.375 diff -p -c -r1.375 copyfuncs.c *** src/backend/nodes/copyfuncs.c 27 Apr 2007 22:05:47 -0000 1.375 --- src/backend/nodes/copyfuncs.c 17 May 2007 23:36:59 -0000 *************** _copyValuesScan(ValuesScan *from) *** 420,425 **** --- 420,441 ---- } /* + * _copySampleScan + */ + static SampleScan * + _copySampleScan(SampleScan *from) + { + SampleScan *newnode = makeNode(SampleScan); + + /* copy node superclass fields */ + CopyScanFields((Scan *) from, (Scan *) newnode); + + COPY_NODE_FIELD(sample_info); + + return newnode; + } + + /* * CopyJoinFields * * This function copies the fields of the Join node. It is used by *************** _copyRangeVar(RangeVar *from) *** 718,723 **** --- 734,740 ---- COPY_SCALAR_FIELD(inhOpt); COPY_SCALAR_FIELD(istemp); COPY_NODE_FIELD(alias); + COPY_NODE_FIELD(sample_info); return newnode; } *************** _copyFromExpr(FromExpr *from) *** 1350,1355 **** --- 1367,1388 ---- return newnode; } + /* + * _copyTableSampleInfo + */ + static TableSampleInfo * + _copyTableSampleInfo(TableSampleInfo *from) + { + TableSampleInfo *newnode = makeNode(TableSampleInfo); + + COPY_SCALAR_FIELD(sample_method); + COPY_SCALAR_FIELD(sample_percent); + COPY_SCALAR_FIELD(is_repeatable); + COPY_SCALAR_FIELD(repeat_seed); + + return newnode; + } + /* **************************************************************** * relation.h copy functions * *************** _copyRangeTblEntry(RangeTblEntry *from) *** 1476,1481 **** --- 1509,1515 ---- COPY_SCALAR_FIELD(rtekind); COPY_SCALAR_FIELD(relid); + COPY_NODE_FIELD(sample_info); COPY_NODE_FIELD(subquery); COPY_NODE_FIELD(funcexpr); COPY_NODE_FIELD(funccoltypes); *************** copyObject(void *from) *** 3013,3018 **** --- 3047,3055 ---- case T_ValuesScan: retval = _copyValuesScan(from); break; + case T_SampleScan: + retval = _copySampleScan(from); + break; case T_Join: retval = _copyJoin(from); break; *************** copyObject(void *from) *** 3170,3175 **** --- 3207,3215 ---- case T_FromExpr: retval = _copyFromExpr(from); break; + case T_TableSampleInfo: + retval = _copyTableSampleInfo(from); + break; /* * RELATION NODES Index: src/backend/nodes/equalfuncs.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/nodes/equalfuncs.c,v retrieving revision 1.306 diff -p -c -r1.306 equalfuncs.c *** src/backend/nodes/equalfuncs.c 27 Apr 2007 22:05:47 -0000 1.306 --- src/backend/nodes/equalfuncs.c 17 May 2007 23:36:59 -0000 *************** _equalRangeVar(RangeVar *a, RangeVar *b) *** 98,103 **** --- 98,104 ---- COMPARE_SCALAR_FIELD(inhOpt); COMPARE_SCALAR_FIELD(istemp); COMPARE_NODE_FIELD(alias); + COMPARE_NODE_FIELD(sample_info); return true; } *************** _equalFromExpr(FromExpr *a, FromExpr *b) *** 626,631 **** --- 627,643 ---- return true; } + static bool + _equalTableSampleInfo(TableSampleInfo *a, TableSampleInfo *b) + { + COMPARE_SCALAR_FIELD(sample_method); + COMPARE_SCALAR_FIELD(sample_percent); + COMPARE_SCALAR_FIELD(is_repeatable); + COMPARE_SCALAR_FIELD(repeat_seed); + + return true; + } + /* * Stuff from relation.h *************** _equalRangeTblEntry(RangeTblEntry *a, Ra *** 1815,1820 **** --- 1827,1833 ---- { COMPARE_SCALAR_FIELD(rtekind); COMPARE_SCALAR_FIELD(relid); + COMPARE_NODE_FIELD(sample_info); COMPARE_NODE_FIELD(subquery); COMPARE_NODE_FIELD(funcexpr); COMPARE_NODE_FIELD(funccoltypes); *************** equal(void *a, void *b) *** 2114,2119 **** --- 2127,2135 ---- case T_JoinExpr: retval = _equalJoinExpr(a, b); break; + case T_TableSampleInfo: + retval = _equalTableSampleInfo(a, b); + break; /* * RELATION NODES Index: src/backend/nodes/outfuncs.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/nodes/outfuncs.c,v retrieving revision 1.306 diff -p -c -r1.306 outfuncs.c *** src/backend/nodes/outfuncs.c 27 Apr 2007 22:05:47 -0000 1.306 --- src/backend/nodes/outfuncs.c 17 May 2007 23:36:59 -0000 *************** _outValuesScan(StringInfo str, ValuesSca *** 445,450 **** --- 445,460 ---- } static void + _outSampleScan(StringInfo str, SampleScan *node) + { + WRITE_NODE_TYPE("SAMPLESCAN"); + + _outScanInfo(str, (Scan *) node); + + WRITE_NODE_FIELD(sample_info); + } + + static void _outJoin(StringInfo str, Join *node) { WRITE_NODE_TYPE("JOIN"); *************** _outRangeVar(StringInfo str, RangeVar *n *** 652,657 **** --- 662,668 ---- WRITE_ENUM_FIELD(inhOpt, InhOption); WRITE_BOOL_FIELD(istemp); WRITE_NODE_FIELD(alias); + WRITE_NODE_FIELD(sample_info); } static void *************** _outRelOptInfo(StringInfo str, RelOptInf *** 1338,1343 **** --- 1349,1355 ---- WRITE_FLOAT_FIELD(tuples, "%.0f"); WRITE_NODE_FIELD(subplan); WRITE_NODE_FIELD(subrtable); + WRITE_BOOL_FIELD(has_table_sample); WRITE_NODE_FIELD(baserestrictinfo); WRITE_NODE_FIELD(joininfo); WRITE_BOOL_FIELD(has_eclass_joins); *************** _outRangeTblEntry(StringInfo str, RangeT *** 1768,1773 **** --- 1780,1788 ---- switch (node->rtekind) { case RTE_RELATION: + WRITE_OID_FIELD(relid); + WRITE_NODE_FIELD(sample_info); + break; case RTE_SPECIAL: WRITE_OID_FIELD(relid); break; *************** _outFkConstraint(StringInfo str, FkConst *** 2003,2008 **** --- 2018,2034 ---- WRITE_BOOL_FIELD(skip_validation); } + static void + _outTableSampleInfo(StringInfo str, TableSampleInfo *node) + { + WRITE_NODE_TYPE("TABLESAMPLE"); + + WRITE_ENUM_FIELD(sample_method, TableSampleMethod); + WRITE_INT_FIELD(sample_percent); + WRITE_BOOL_FIELD(is_repeatable); + WRITE_INT_FIELD(repeat_seed); + } + /* * _outNode - *************** _outNode(StringInfo str, void *obj) *** 2073,2078 **** --- 2099,2107 ---- case T_ValuesScan: _outValuesScan(str, obj); break; + case T_SampleScan: + _outSampleScan(str, obj); + break; case T_Join: _outJoin(str, obj); break; *************** _outNode(StringInfo str, void *obj) *** 2378,2383 **** --- 2407,2415 ---- case T_FkConstraint: _outFkConstraint(str, obj); break; + case T_TableSampleInfo: + _outTableSampleInfo(str, obj); + break; case T_FuncCall: _outFuncCall(str, obj); break; Index: src/backend/nodes/readfuncs.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/nodes/readfuncs.c,v retrieving revision 1.206 diff -p -c -r1.206 readfuncs.c *** src/backend/nodes/readfuncs.c 27 Apr 2007 22:05:47 -0000 1.206 --- src/backend/nodes/readfuncs.c 17 May 2007 23:36:59 -0000 *************** _readRangeVar(void) *** 278,283 **** --- 278,297 ---- READ_ENUM_FIELD(inhOpt, InhOption); READ_BOOL_FIELD(istemp); READ_NODE_FIELD(alias); + READ_NODE_FIELD(sample_info); + + READ_DONE(); + } + + static TableSampleInfo * + _readTableSampleInfo(void) + { + READ_LOCALS(TableSampleInfo); + + READ_ENUM_FIELD(sample_method, TableSampleMethod); + READ_INT_FIELD(sample_percent); + READ_BOOL_FIELD(is_repeatable); + READ_INT_FIELD(repeat_seed); READ_DONE(); } *************** _readRangeTblEntry(void) *** 945,950 **** --- 959,967 ---- switch (local_node->rtekind) { case RTE_RELATION: + READ_OID_FIELD(relid); + READ_NODE_FIELD(sample_info); + break; case RTE_SPECIAL: READ_OID_FIELD(relid); break; *************** parseNodeString(void) *** 1012,1017 **** --- 1029,1036 ---- return_value = _readAlias(); else if (MATCH("RANGEVAR", 8)) return_value = _readRangeVar(); + else if (MATCH("TABLESAMPLE", 11)) + return_value = _readTableSampleInfo(); else if (MATCH("INTOCLAUSE", 10)) return_value = _readIntoClause(); else if (MATCH("VAR", 3)) Index: src/backend/optimizer/path/allpaths.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/optimizer/path/allpaths.c,v retrieving revision 1.163 diff -p -c -r1.163 allpaths.c *** src/backend/optimizer/path/allpaths.c 21 Apr 2007 21:01:44 -0000 1.163 --- src/backend/optimizer/path/allpaths.c 18 May 2007 06:40:54 -0000 *************** set_plain_rel_pathlist(PlannerInfo *root *** 238,253 **** * Note: add_path() will discard any paths that are dominated by another * available path, keeping only those paths that are superior along at * least one dimension of cost or sortedness. */ ! /* Consider sequential scan */ ! add_path(rel, create_seqscan_path(root, rel)); ! ! /* Consider index scans */ ! create_index_paths(root, rel); ! /* Consider TID scans */ ! create_tidscan_paths(root, rel); /* Now find the cheapest of the paths for this rel */ set_cheapest(rel); --- 238,262 ---- * Note: add_path() will discard any paths that are dominated by another * available path, keeping only those paths that are superior along at * least one dimension of cost or sortedness. + * + * If there's a TABLESAMPLE clause, we ONLY consider using a + * SampleScan. This could be improved: in some circumstances it + * might make sense to do an IndexScan and then sample from the + * index scan's result set, for instance. */ + if (rel->has_table_sample) + add_path(rel, create_samplescan_path(root, rel)); + else + { + /* Consider sequential scan */ + add_path(rel, create_seqscan_path(root, rel)); ! /* Consider index scans */ ! create_index_paths(root, rel); ! /* Consider TID scans */ ! create_tidscan_paths(root, rel); ! } /* Now find the cheapest of the paths for this rel */ set_cheapest(rel); Index: src/backend/optimizer/path/costsize.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/optimizer/path/costsize.c,v retrieving revision 1.182 diff -p -c -r1.182 costsize.c *** src/backend/optimizer/path/costsize.c 4 May 2007 01:13:44 -0000 1.182 --- src/backend/optimizer/path/costsize.c 17 May 2007 23:36:59 -0000 *************** cost_valuesscan(Path *path, PlannerInfo *** 905,910 **** --- 905,950 ---- } /* + * cost_samplescan + * Determines and returns the cost of scanning a base relation with + * a TABLESAMPLE clause. + */ + void + cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel) + { + Cost startup_cost = 0; + Cost run_cost = 0; + Cost cpu_per_tuple; + RangeTblEntry *rte; + int sample_percent; + + /* Should only be applied to base relations */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RELATION); + Assert(path->pathtype == T_SampleScan); + + rte = planner_rt_fetch(baserel->relid, root); + sample_percent = rte->sample_info->sample_percent; + + /* + * Disk costs. When the sample percentage is close to 100, we're + * likely to be doing purely sequential I/O. Conversely, for small + * percentage samples, we're doing random I/O. For now, just be + * conservative and always assume that we need to do a random I/O + * for each sampled block. Of course, this is quite bogus. + */ + run_cost += random_page_cost * baserel->pages * sample_percent / 100; + + /* CPU costs */ + startup_cost += baserel->baserestrictcost.startup; + cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; + } + + /* * cost_sort * Determines and returns the cost of sorting a relation, including * the cost of reading the input data. *************** approx_selectivity(PlannerInfo *root, Li *** 2092,2105 **** * * We set the following fields of the rel node: * rows: the estimated number of output tuples (after applying ! * restriction clauses). * width: the estimated average output tuple width in bytes. * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses. */ void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { ! double nrows; /* Should only be applied to base relations */ Assert(rel->relid > 0); --- 2132,2146 ---- * * We set the following fields of the rel node: * rows: the estimated number of output tuples (after applying ! * restriction clauses and considering the effect of TABLESAMPLE). * width: the estimated average output tuple width in bytes. * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses. */ void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { ! double nrows; ! RangeTblEntry *rte; /* Should only be applied to base relations */ Assert(rel->relid > 0); *************** set_baserel_size_estimates(PlannerInfo * *** 2110,2115 **** --- 2151,2169 ---- 0, JOIN_INNER); + /* + * Consider TABLESAMPLE, if any. We assume that the live heap rows + * are uniformly distributed over the heap: this is a bogus + * simplifying assumption. Note that the executor will apply the + * TABLESAMPLE clause before applying any restrictions, we assume + * that the restrictions have the same selectivity for the sampled + * sub-relation as they do for the entire relation (which is + * likely reasonable). + */ + rte = planner_rt_fetch(rel->relid, root); + if (rte->sample_info) + nrows = nrows * rte->sample_info->sample_percent / 100; + rel->rows = clamp_row_est(nrows); cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); Index: src/backend/optimizer/plan/createplan.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/optimizer/plan/createplan.c,v retrieving revision 1.230 diff -p -c -r1.230 createplan.c *** src/backend/optimizer/plan/createplan.c 4 May 2007 01:13:44 -0000 1.230 --- src/backend/optimizer/plan/createplan.c 17 May 2007 23:36:59 -0000 *************** static FunctionScan *create_functionscan *** 62,67 **** --- 62,69 ---- List *tlist, List *scan_clauses); static ValuesScan *create_valuesscan_plan(PlannerInfo *root, Path *best_path, List *tlist, List *scan_clauses); + static SampleScan *create_samplescan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses); static NestLoop *create_nestloop_plan(PlannerInfo *root, NestPath *best_path, Plan *outer_plan, Plan *inner_plan); static MergeJoin *create_mergejoin_plan(PlannerInfo *root, MergePath *best_path, *************** static FunctionScan *make_functionscan(L *** 101,106 **** --- 103,110 ---- List *funccoltypes, List *funccoltypmods); static ValuesScan *make_valuesscan(List *qptlist, List *qpqual, Index scanrelid, List *values_lists); + static SampleScan *make_samplescan(PlannerInfo *root, List *tlist, + List *quals, Index scanrelid); static BitmapAnd *make_bitmap_and(List *bitmapplans); static BitmapOr *make_bitmap_or(List *bitmapplans); static NestLoop *make_nestloop(List *tlist, *************** create_plan(PlannerInfo *root, Path *bes *** 155,160 **** --- 159,165 ---- case T_SubqueryScan: case T_FunctionScan: case T_ValuesScan: + case T_SampleScan: plan = create_scan_plan(root, best_path); break; case T_HashJoin: *************** create_scan_plan(PlannerInfo *root, Path *** 278,283 **** --- 283,295 ---- scan_clauses); break; + case T_SampleScan: + plan = (Plan *) create_samplescan_plan(root, + best_path, + tlist, + scan_clauses); + break; + default: elog(ERROR, "unrecognized node type: %d", (int) best_path->pathtype); *************** create_valuesscan_plan(PlannerInfo *root *** 1406,1411 **** --- 1418,1456 ---- return scan_plan; } + /* + * create_samplescan_path + * Returns a SampleScan plan node for the base relation scanned by + * 'best_path', with the restriction clauses 'scan_clauses' and the + * targetlist 'tlist'. + */ + static SampleScan * + create_samplescan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) + { + SampleScan *scan_plan; + Index scan_relid = best_path->parent->relid; + + /* It should be a base relation */ + Assert(scan_relid > 0); + Assert(best_path->parent->rtekind == RTE_RELATION); + Assert(best_path->pathtype == T_SampleScan); + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + scan_plan = make_samplescan(root, tlist, + scan_clauses, + scan_relid); + + copy_path_costsize(&scan_plan->scan.plan, best_path); + + return scan_plan; + } + /***************************************************************************** * * JOIN METHODS *************** make_valuesscan(List *qptlist, *** 2421,2426 **** --- 2466,2507 ---- return node; } + /* + * make_samplescan + * Allocate and return a SampleScan plan node, containing the + * specified information ("tlist", "quals", and "scanrelid"). The + * caller (create_samplescan_plan) fills in the cost of the plan, + * and does the other hard work; we just allocate the node and fill + * it in here. + */ + static SampleScan * + make_samplescan(PlannerInfo *root, + List *tlist, + List *quals, + Index scanrelid) + { + SampleScan *node = makeNode(SampleScan); + Plan *plan = &node->scan.plan; + RangeTblEntry *rte; + + /* cost should be inserted by caller */ + plan->targetlist = tlist; + plan->qual = quals; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + + /* + * For the convenience of nodeSamplescan.c, we stash a pointer to + * the TableSampleInfo for this SampleScan in the SampleScan's + * plan node. + */ + rte = planner_rt_fetch(scanrelid, root); + node->sample_info = rte->sample_info; + + return node; + } + Append * make_append(List *appendplans, bool isTarget, List *tlist) { Index: src/backend/optimizer/plan/setrefs.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/optimizer/plan/setrefs.c,v retrieving revision 1.135 diff -p -c -r1.135 setrefs.c *** src/backend/optimizer/plan/setrefs.c 30 Apr 2007 00:16:43 -0000 1.135 --- src/backend/optimizer/plan/setrefs.c 17 May 2007 23:36:59 -0000 *************** set_plan_refs(PlannerGlobal *glob, Plan *** 200,207 **** switch (nodeTag(plan)) { case T_SeqScan: { ! SeqScan *splan = (SeqScan *) plan; splan->scanrelid += rtoffset; splan->plan.targetlist = --- 200,208 ---- switch (nodeTag(plan)) { case T_SeqScan: + case T_SampleScan: { ! Scan *splan = (Scan *) plan; splan->scanrelid += rtoffset; splan->plan.targetlist = Index: src/backend/optimizer/plan/subselect.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/optimizer/plan/subselect.c,v retrieving revision 1.122 diff -p -c -r1.122 subselect.c *** src/backend/optimizer/plan/subselect.c 27 Feb 2007 01:11:25 -0000 1.122 --- src/backend/optimizer/plan/subselect.c 17 May 2007 23:36:59 -0000 *************** finalize_plan(PlannerInfo *root, Plan *p *** 1223,1228 **** --- 1223,1229 ---- case T_Hash: case T_Agg: + case T_SampleScan: case T_SeqScan: case T_Material: case T_Sort: Index: src/backend/optimizer/util/pathnode.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/optimizer/util/pathnode.c,v retrieving revision 1.140 diff -p -c -r1.140 pathnode.c *** src/backend/optimizer/util/pathnode.c 4 May 2007 01:13:44 -0000 1.140 --- src/backend/optimizer/util/pathnode.c 17 May 2007 23:36:59 -0000 *************** create_valuesscan_path(PlannerInfo *root *** 1147,1152 **** --- 1147,1166 ---- return pathnode; } + Path * + create_samplescan_path(PlannerInfo *root, RelOptInfo *rel) + { + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_SampleScan; + pathnode->parent = rel; + pathnode->pathkeys = NIL; + + cost_samplescan(pathnode, root, rel); + + return pathnode; + } + /* * create_nestloop_path * Creates a pathnode corresponding to a nestloop join between two Index: src/backend/optimizer/util/relnode.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/optimizer/util/relnode.c,v retrieving revision 1.87 diff -p -c -r1.87 relnode.c *** src/backend/optimizer/util/relnode.c 21 Apr 2007 21:01:45 -0000 1.87 --- src/backend/optimizer/util/relnode.c 17 May 2007 23:36:59 -0000 *************** build_simple_rel(PlannerInfo *root, int *** 83,88 **** --- 83,89 ---- rel->tuples = 0; rel->subplan = NULL; rel->subrtable = NIL; + rel->has_table_sample = (rte->sample_info != NULL); rel->baserestrictinfo = NIL; rel->baserestrictcost.startup = 0; rel->baserestrictcost.per_tuple = 0; *************** build_join_rel(PlannerInfo *root, *** 335,340 **** --- 336,342 ---- joinrel->tuples = 0; joinrel->subplan = NULL; joinrel->subrtable = NIL; + joinrel->has_table_sample = false; joinrel->baserestrictinfo = NIL; joinrel->baserestrictcost.startup = 0; joinrel->baserestrictcost.per_tuple = 0; Index: src/backend/parser/analyze.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/parser/analyze.c,v retrieving revision 1.363 diff -p -c -r1.363 analyze.c *** src/backend/parser/analyze.c 27 Apr 2007 22:05:48 -0000 1.363 --- src/backend/parser/analyze.c 17 May 2007 23:36:59 -0000 *************** transformInsertStmt(ParseState *pstate, *** 469,474 **** --- 469,483 ---- ListCell *attnos; ListCell *lc; + /* + * This should be disallowed by the parser, but check anyway for + * the sake of paranoia. + */ + if (stmt->relation->sample_info) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("TABLESAMPLE cannot be specified for INSERT"))); + qry->commandType = CMD_INSERT; pstate->p_is_insert = true; Index: src/backend/parser/gram.y =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/parser/gram.y,v retrieving revision 2.591 diff -p -c -r2.591 gram.y *** src/backend/parser/gram.y 27 Apr 2007 22:05:48 -0000 2.591 --- src/backend/parser/gram.y 18 May 2007 16:52:55 -0000 *************** static Node *makeXmlExpr(XmlExprOp op, c *** 313,319 **** --- 313,323 ---- %type joined_table %type relation_expr %type relation_expr_opt_alias + %type relation_expr_opt_sample %type target_el single_set_clause set_target insert_column_item + %type opt_table_sample + %type sample_method + %type opt_repeatable_clause %type Typename SimpleTypename ConstTypename GenericType Numeric opt_float *************** static Node *makeXmlExpr(XmlExprOp op, c *** 369,375 **** AGGREGATE ALL ALSO ALTER ALWAYS ANALYSE ANALYZE AND ANY ARRAY AS ASC ASSERTION ASSIGNMENT ASYMMETRIC AT AUTHORIZATION ! BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT BOOLEAN_P BOTH BY CACHE CALLED CASCADE CASCADED CASE CAST CHAIN CHAR_P --- 373,379 ---- AGGREGATE ALL ALSO ALTER ALWAYS ANALYSE ANALYZE AND ANY ARRAY AS ASC ASSERTION ASSIGNMENT ASYMMETRIC AT AUTHORIZATION ! BACKWARD BEFORE BEGIN_P BERNOULLI BETWEEN BIGINT BINARY BIT BOOLEAN_P BOTH BY CACHE CALLED CASCADE CASCADED CASE CAST CHAIN CHAR_P *************** static Node *makeXmlExpr(XmlExprOp op, c *** 432,438 **** STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING SUPERUSER_P SYMMETRIC SYSID SYSTEM_P ! TABLE TABLESPACE TEMP TEMPLATE TEMPORARY THEN TIME TIMESTAMP TO TRAILING TRANSACTION TREAT TRIGGER TRIM TRUE_P TRUNCATE TRUSTED TYPE_P --- 436,442 ---- STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING SUPERUSER_P SYMMETRIC SYSID SYSTEM_P ! TABLE TABLESAMPLE TABLESPACE TEMP TEMPLATE TEMPORARY THEN TIME TIMESTAMP TO TRAILING TRANSACTION TREAT TRIGGER TRIM TRUE_P TRUNCATE TRUSTED TYPE_P *************** from_list: *** 6259,6269 **** * and joined_table := '(' joined_table ')'. So, we must have the * redundant-looking productions here instead. */ ! table_ref: relation_expr { $$ = (Node *) $1; } ! | relation_expr alias_clause { $1->alias = $2; $$ = (Node *) $1; --- 6263,6273 ---- * and joined_table := '(' joined_table ')'. So, we must have the * redundant-looking productions here instead. */ ! table_ref: relation_expr_opt_sample { $$ = (Node *) $1; } ! | relation_expr_opt_sample alias_clause { $1->alias = $2; $$ = (Node *) $1; *************** join_qual: USING '(' name_list ')' { *** 6491,6496 **** --- 6495,6520 ---- ; + /* + * Pedagogical comment: the "relation_expr" production parses an + * identifier name, optionally schema qualified, and including an + * optional inheritance specification. It is used by the SELECT, + * DELETE, and UPDATE productions, as well as several DDL commands. + * + * We want to allow the TABLESAMPLE clause to be specified for SELECT, + * DELETE, and UPDATE, but not for DDL commands. Therefore, we add a + * new production that is "relation_expr + optional TABLESAMPLE", and + * use that anywhere we'd like to allow a TABLESAMPLE clause to be + * specified. + */ + relation_expr_opt_sample: + relation_expr opt_table_sample + { + $$ = $1; + $$->sample_info = (TableSampleInfo *) $2; + } + ; + relation_expr: qualified_name { *************** relation_expr: *** 6522,6527 **** --- 6546,6599 ---- } ; + opt_table_sample: + TABLESAMPLE sample_method '(' Iconst ')' opt_repeatable_clause + { + TableSampleInfo *n = makeNode(TableSampleInfo); + + if ($2 == true) + n->sample_method = SAMPLE_BERNOULLI; + else + n->sample_method = SAMPLE_SYSTEM; + + n->sample_percent = $4; + if ($4 > 100) + ereport(ERROR, + (errcode(ERRCODE_INVALID_SAMPLE_SIZE), + errmsg("TABLESAMPLE percentage " + "cannot exceed 100"))); + if ($4 <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_SAMPLE_SIZE), + errmsg("TABLESAMPLE percentage must " + "be greater than 0"))); + + /* XXX: not supported yet */ + if (n->sample_method == SAMPLE_BERNOULLI) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("BERNOULLI sampling is not supported"))); + + if ($6 != NULL) + { + n->is_repeatable = true; + n->repeat_seed = intVal($6); + } + + $$ = (Node *) n; + } + | /* EMPTY */ { $$ = NULL; } + ; + + sample_method: + BERNOULLI { $$ = true; } + | SYSTEM_P { $$ = false; } + ; + + opt_repeatable_clause: + REPEATABLE '(' Iconst ')' { $$ = makeInteger($3); } + | /* EMPTY */ { $$ = NULL; } + ; /* * Given "UPDATE foo set set ...", we have to decide without looking any *************** relation_expr: *** 6532,6549 **** * has, causing the parser to prefer to reduce, in effect assuming that the * SET is not an alias. */ ! relation_expr_opt_alias: relation_expr %prec UMINUS { $$ = $1; } ! | relation_expr ColId { Alias *alias = makeNode(Alias); alias->aliasname = $2; $1->alias = alias; $$ = $1; } ! | relation_expr AS ColId { Alias *alias = makeNode(Alias); alias->aliasname = $3; --- 6604,6621 ---- * has, causing the parser to prefer to reduce, in effect assuming that the * SET is not an alias. */ ! relation_expr_opt_alias: relation_expr_opt_sample %prec UMINUS { $$ = $1; } ! | relation_expr_opt_sample ColId { Alias *alias = makeNode(Alias); alias->aliasname = $2; $1->alias = alias; $$ = $1; } ! | relation_expr_opt_sample AS ColId { Alias *alias = makeNode(Alias); alias->aliasname = $3; *************** unreserved_keyword: *** 8793,8798 **** --- 8865,8871 ---- | BACKWARD | BEFORE | BEGIN_P + | BERNOULLI | BY | CACHE | CALLED *************** unreserved_keyword: *** 8933,8939 **** | RELATIVE_P | RELEASE | RENAME - | REPEATABLE | REPLACE | REPLICA | RESET --- 9006,9011 ---- *************** type_func_name_keyword: *** 9092,9098 **** --- 9164,9172 ---- | OUTER_P | OVERLAPS | RIGHT + | REPEATABLE | SIMILAR + | TABLESAMPLE | VERBOSE ; Index: src/backend/parser/keywords.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/parser/keywords.c,v retrieving revision 1.187 diff -p -c -r1.187 keywords.c *** src/backend/parser/keywords.c 26 Apr 2007 16:13:12 -0000 1.187 --- src/backend/parser/keywords.c 17 May 2007 23:36:59 -0000 *************** static const ScanKeyword ScanKeywords[] *** 58,63 **** --- 58,64 ---- {"backward", BACKWARD}, {"before", BEFORE}, {"begin", BEGIN_P}, + {"bernoulli", BERNOULLI}, {"between", BETWEEN}, {"bigint", BIGINT}, {"binary", BINARY}, *************** static const ScanKeyword ScanKeywords[] *** 340,345 **** --- 341,347 ---- {"sysid", SYSID}, {"system", SYSTEM_P}, {"table", TABLE}, + {"tablesample", TABLESAMPLE}, {"tablespace", TABLESPACE}, {"temp", TEMP}, {"template", TEMPLATE}, Index: src/backend/parser/parse_clause.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/parser/parse_clause.c,v retrieving revision 1.165 diff -p -c -r1.165 parse_clause.c *** src/backend/parser/parse_clause.c 27 Apr 2007 22:05:48 -0000 1.165 --- src/backend/parser/parse_clause.c 17 May 2007 23:36:59 -0000 *************** setTargetTable(ParseState *pstate, Range *** 164,169 **** --- 164,175 ---- relation->alias, inh, false); pstate->p_target_rangetblentry = rte; + /* + * Minor kludge: addRangeTableEntryForRelation() can't see the + * TABLESAMPLE clause, so attach it to the new RTE manually. + */ + rte->sample_info = relation->sample_info; + /* assume new rte is at end */ rtindex = list_length(pstate->p_rtable); Assert(rte == rt_fetch(rtindex, pstate->p_rtable)); Index: src/backend/parser/parse_relation.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/parser/parse_relation.c,v retrieving revision 1.127 diff -p -c -r1.127 parse_relation.c *** src/backend/parser/parse_relation.c 5 Jan 2007 22:19:34 -0000 1.127 --- src/backend/parser/parse_relation.c 17 May 2007 23:36:59 -0000 *************** searchRangeTable(ParseState *pstate, Ran *** 209,215 **** if (OidIsValid(relId) && rte->rtekind == RTE_RELATION && ! rte->relid == relId) return rte; if (strcmp(rte->eref->aliasname, refname) == 0) return rte; --- 209,216 ---- if (OidIsValid(relId) && rte->rtekind == RTE_RELATION && ! rte->relid == relId && ! equal(rte->sample_info, relation->sample_info)) return rte; if (strcmp(rte->eref->aliasname, refname) == 0) return rte; *************** addRangeTableEntry(ParseState *pstate, *** 639,644 **** --- 640,646 ---- lockmode = isLockedRel(pstate, refname) ? RowShareLock : AccessShareLock; rel = heap_openrv(relation, lockmode); rte->relid = RelationGetRelid(rel); + rte->sample_info = relation->sample_info; /* * Build the list of effective column names using user-supplied aliases *************** addRangeTableEntryForRelation(ParseState *** 699,704 **** --- 701,707 ---- rte->rtekind = RTE_RELATION; rte->alias = alias; rte->relid = RelationGetRelid(rel); + rte->sample_info = NULL; /* * Build the list of effective column names using user-supplied aliases *************** addRangeTableEntryForSubquery(ParseState *** 754,759 **** --- 757,763 ---- rte->rtekind = RTE_SUBQUERY; rte->relid = InvalidOid; + rte->sample_info = NULL; rte->subquery = subquery; rte->alias = alias; *************** addRangeTableEntryForFunction(ParseState *** 833,838 **** --- 837,843 ---- rte->rtekind = RTE_FUNCTION; rte->relid = InvalidOid; + rte->sample_info = NULL; rte->subquery = NULL; rte->funcexpr = funcexpr; rte->funccoltypes = NIL; *************** addRangeTableEntryForValues(ParseState * *** 959,964 **** --- 964,970 ---- rte->rtekind = RTE_VALUES; rte->relid = InvalidOid; + rte->sample_info = NULL; rte->subquery = NULL; rte->values_lists = exprs; rte->alias = alias; *************** addRangeTableEntryForJoin(ParseState *ps *** 1028,1033 **** --- 1034,1040 ---- rte->rtekind = RTE_JOIN; rte->relid = InvalidOid; + rte->sample_info = NULL; rte->subquery = NULL; rte->jointype = jointype; rte->joinaliasvars = aliasvars; Index: src/backend/utils/adt/ruleutils.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/utils/adt/ruleutils.c,v retrieving revision 1.257 diff -p -c -r1.257 ruleutils.c *** src/backend/utils/adt/ruleutils.c 27 Mar 2007 23:21:10 -0000 1.257 --- src/backend/utils/adt/ruleutils.c 17 May 2007 23:36:59 -0000 *************** deparse_context_for(const char *aliasnam *** 1429,1434 **** --- 1429,1435 ---- rte = makeNode(RangeTblEntry); rte->rtekind = RTE_RELATION; rte->relid = relid; + rte->sample_info = NULL; rte->eref = makeAlias(aliasname, NIL); rte->inh = false; rte->inFromCl = true; *************** get_from_clause_item(Node *jtnode, Query *** 4740,4745 **** --- 4741,4775 ---- */ get_from_clause_alias(rte->alias, rte, context); } + + if (rte->sample_info) + { + TableSampleInfo *sample_info = rte->sample_info; + const char *method_name; + + Assert(rte->rtekind == RTE_RELATION); + + switch (sample_info->sample_method) + { + case SAMPLE_BERNOULLI: + method_name = "BERNOULLI"; + break; + case SAMPLE_SYSTEM: + method_name = "SYSTEM"; + break; + default: + elog(ERROR, "unrecognized sample method: %d", + (int) sample_info->sample_method); + method_name = NULL; /* keep compiler quiet */ + } + + appendStringInfo(buf, " TABLESAMPLE %s (%d)", + method_name, sample_info->sample_percent); + + if (sample_info->is_repeatable) + appendStringInfo(buf, " REPEATABLE (%d)", + sample_info->repeat_seed); + } } else if (IsA(jtnode, JoinExpr)) { Index: src/include/executor/nodeSamplescan.h =================================================================== RCS file: src/include/executor/nodeSamplescan.h diff -N src/include/executor/nodeSamplescan.h *** /dev/null 1 Jan 1970 00:00:00 -0000 --- src/include/executor/nodeSamplescan.h 17 May 2007 23:36:59 -0000 *************** *** 0 **** --- 1,22 ---- + /*------------------------------------------------------------------------- + * + * nodeSamplescan.h + * Public interface to the TABLESAMPLE-based executor node + * + * Copyright (c) 2007, PostgreSQL Global Development Group + * + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + #ifndef NODESAMPLESCAN_H + #define NODESAMPLESCAN_H + + #include "nodes/execnodes.h" + + extern SampleScanState *ExecInitSampleScan(SampleScan *node, EState *estate, int eflags); + extern TupleTableSlot *ExecSampleScan(SampleScanState *node); + extern int ExecCountSlotsSampleScan(SampleScan *node); + extern void ExecEndSampleScan(SampleScanState *node); + + #endif /* NODESAMPLESCAN_H */ Index: src/include/nodes/execnodes.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/nodes/execnodes.h,v retrieving revision 1.174 diff -p -c -r1.174 execnodes.h *** src/include/nodes/execnodes.h 17 May 2007 19:35:08 -0000 1.174 --- src/include/nodes/execnodes.h 21 May 2007 18:11:42 -0000 *************** typedef struct ValuesScanState *** 1140,1145 **** --- 1140,1192 ---- int marked_idx; } ValuesScanState; + /* + * SampleScanState: the run-time state associated with a single sample + * scan. This is the run-time dual of the SampleScan plan node: for + * each SampleScan in the Plan tree, we create a SampleScanState in + * the corresponding PlanState tree. A PlanState's associated Plan can + * be found via ss.ps.plan. + * + * In addition to the fields of its parent class (ScanState), a + * SampleScanState contains: + * + * cur_buf: the current buffer/page being scanned, if any. The + * sample scan holds a pin on this buffer while it is + * executing, to ensure it isn't evicted from the buffer + * pool while we're using it. InvalidBuffer if we + * haven't started the scan yet, or the scan has + * finished (reached the end of the heap). + * + * cur_offset: the current offset in the buffer being scanned. + * + * cur_blkno: the BlockNumber of cur_buf -- that is, cur_buf's + * position within the heap. + * + * nblocks: the total # of blocks in the relation being scanned. + * Unless the sample percentage is 100, the scan likely + * won't visit this many blocks. + * + * new_need_buf: have we run out of tuples on the current page? + * + * cur_tup: current result tuple. + * + * rand_state: PRNG state. + */ + typedef struct SampleScanState + { + /* parent class; first field is NodeTag */ + ScanState ss; + Buffer cur_buf; + OffsetNumber cur_offset; + BlockNumber cur_blkno; + BlockNumber nblocks; + bool need_new_buf; + HeapTupleData cur_tup; + char rand_state[128]; + char *prev_rand_state; + } SampleScanState; + + /* ---------------------------------------------------------------- * Join State Information * ---------------------------------------------------------------- Index: src/include/nodes/nodes.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/nodes/nodes.h,v retrieving revision 1.199 diff -p -c -r1.199 nodes.h *** src/include/nodes/nodes.h 26 Apr 2007 16:13:14 -0000 1.199 --- src/include/nodes/nodes.h 17 May 2007 23:36:59 -0000 *************** typedef enum NodeTag *** 47,52 **** --- 47,53 ---- T_BitmapAnd, T_BitmapOr, T_Scan, + T_SampleScan, T_SeqScan, T_IndexScan, T_BitmapIndexScan, *************** typedef enum NodeTag *** 79,84 **** --- 80,86 ---- T_BitmapAndState, T_BitmapOrState, T_ScanState, + T_SampleScanState, T_SeqScanState, T_IndexScanState, T_BitmapIndexScanState, *************** typedef enum NodeTag *** 143,148 **** --- 145,151 ---- T_JoinExpr, T_FromExpr, T_IntoClause, + T_TableSampleInfo, /* * TAGS FOR EXPRESSION STATE NODES (execnodes.h) Index: src/include/nodes/parsenodes.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/nodes/parsenodes.h,v retrieving revision 1.348 diff -p -c -r1.348 parsenodes.h *** src/include/nodes/parsenodes.h 27 Apr 2007 22:05:49 -0000 1.348 --- src/include/nodes/parsenodes.h 17 May 2007 23:36:59 -0000 *************** typedef struct RangeTblEntry *** 554,562 **** */ /* ! * Fields valid for a plain relation RTE (else zero): */ ! Oid relid; /* OID of the relation */ /* * Fields valid for a subquery RTE (else NULL): --- 554,563 ---- */ /* ! * Fields valid for a plain relation RTE (else zero or NULL): */ ! Oid relid; /* OID of the relation */ ! TableSampleInfo *sample_info; /* TABLESAMPLE clause, if any */ /* * Fields valid for a subquery RTE (else NULL): Index: src/include/nodes/plannodes.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/nodes/plannodes.h,v retrieving revision 1.94 diff -p -c -r1.94 plannodes.h *** src/include/nodes/plannodes.h 27 Apr 2007 22:05:49 -0000 1.94 --- src/include/nodes/plannodes.h 18 May 2007 05:29:08 -0000 *************** typedef struct ValuesScan *** 359,364 **** --- 359,383 ---- List *values_lists; /* list of expression lists */ } ValuesScan; + /* ---------------- + * SampleScan node + * ---------------- + * + * This is the information about a SampleScan that is fixed for a + * given Plan. SampleScanState holds the run-time (executor-time) + * state associated with a given ScanScan node. + * + * In addition to our parent class, we need only a single additional + * piece of information: the information contained in the TABLESAMPLE + * clause that corresponds to this SampleScan. + */ + typedef struct SampleScan + { + Scan scan; + TableSampleInfo *sample_info; + } SampleScan; + + /* * ========== * Join nodes Index: src/include/nodes/primnodes.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/nodes/primnodes.h,v retrieving revision 1.129 diff -p -c -r1.129 primnodes.h *** src/include/nodes/primnodes.h 27 Mar 2007 23:21:12 -0000 1.129 --- src/include/nodes/primnodes.h 17 May 2007 23:36:59 -0000 *************** typedef enum OnCommitAction *** 58,63 **** --- 58,78 ---- ONCOMMIT_DROP /* ON COMMIT DROP */ } OnCommitAction; + typedef enum TableSampleMethod + { + SAMPLE_BERNOULLI, + SAMPLE_SYSTEM + } TableSampleMethod; + + typedef struct TableSampleInfo + { + NodeTag type; + TableSampleMethod sample_method; + int sample_percent; + bool is_repeatable; + int repeat_seed; + } TableSampleInfo; + /* * RangeVar - range variable, used in FROM clauses * *************** typedef struct RangeVar *** 76,81 **** --- 91,97 ---- * on children? */ bool istemp; /* is this a temp relation/sequence? */ Alias *alias; /* table alias & optional column aliases */ + TableSampleInfo *sample_info; /* TABLESAMPLE clause, if any */ } RangeVar; /* Index: src/include/nodes/relation.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/nodes/relation.h,v retrieving revision 1.141 diff -p -c -r1.141 relation.h *** src/include/nodes/relation.h 21 Apr 2007 21:01:45 -0000 1.141 --- src/include/nodes/relation.h 17 May 2007 23:36:59 -0000 *************** typedef struct RelOptInfo *** 340,345 **** --- 340,346 ---- double tuples; struct Plan *subplan; /* if subquery */ List *subrtable; /* if subquery */ + bool has_table_sample; /* used by various scans and joins: */ List *baserestrictinfo; /* RestrictInfo structures (if base Index: src/include/optimizer/cost.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/optimizer/cost.h,v retrieving revision 1.86 diff -p -c -r1.86 cost.h *** src/include/optimizer/cost.h 4 May 2007 01:13:45 -0000 1.86 --- src/include/optimizer/cost.h 17 May 2007 23:36:59 -0000 *************** extern void cost_functionscan(Path *path *** 72,77 **** --- 72,79 ---- RelOptInfo *baserel); extern void cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel); + extern void cost_samplescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel); extern void cost_sort(Path *path, PlannerInfo *root, List *pathkeys, Cost input_cost, double tuples, int width, double limit_tuples); Index: src/include/optimizer/pathnode.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/optimizer/pathnode.h,v retrieving revision 1.76 diff -p -c -r1.76 pathnode.h *** src/include/optimizer/pathnode.h 20 Jan 2007 20:45:40 -0000 1.76 --- src/include/optimizer/pathnode.h 17 May 2007 23:36:59 -0000 *************** extern UniquePath *create_unique_path(Pl *** 54,59 **** --- 54,60 ---- extern Path *create_subqueryscan_path(RelOptInfo *rel, List *pathkeys); extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel); extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel); + extern Path *create_samplescan_path(PlannerInfo *root, RelOptInfo *rel); extern NestPath *create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, Index: src/include/utils/errcodes.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/utils/errcodes.h,v retrieving revision 1.23 diff -p -c -r1.23 errcodes.h *** src/include/utils/errcodes.h 3 Feb 2007 14:06:56 -0000 1.23 --- src/include/utils/errcodes.h 17 May 2007 23:36:59 -0000 *************** *** 131,136 **** --- 131,137 ---- #define ERRCODE_INVALID_LIMIT_VALUE MAKE_SQLSTATE('2','2', '0','2','0') #define ERRCODE_INVALID_PARAMETER_VALUE MAKE_SQLSTATE('2','2', '0','2','3') #define ERRCODE_INVALID_REGULAR_EXPRESSION MAKE_SQLSTATE('2','2', '0','1','B') + #define ERRCODE_INVALID_SAMPLE_SIZE MAKE_SQLSTATE('2','2', '0','2','H') #define ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE MAKE_SQLSTATE('2','2', '0','0','9') #define ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER MAKE_SQLSTATE('2','2', '0','0','C') #define ERRCODE_MOST_SPECIFIC_TYPE_MISMATCH MAKE_SQLSTATE('2','2', '0','0','G')