From 5a026747f4b7a5f2abd00f11cfcddc5a6a458fe7 Mon Sep 17 00:00:00 2001 From: Mohamed Sarwat Date: Fri, 10 Jul 2015 11:25:19 -0700 Subject: [PATCH 01/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index baf086f..46a823f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Welcome to RecDB RecDB is an Open Source Recommendation Engine Built Entirely Inside PostgreSQL 9.2. RecDB allows application developers to build recommendation applications in a heartbeat through a wide variety of built-in recommendation algorithms like user-user collaborative filtering, item-item collaborative filtering, singular value decomposition. Applications powered by RecDB can produce online and flexible personalized recommendations to end-users. -![RecDB Logo](http://www-users.cs.umn.edu/~sarwat/RecDB/pics/recdblogo.png) current version: ```v0.2-alpha``` +![RecDB Logo](http://www-users.cs.umn.edu/~sarwat/RecDB/pics/recdblogo.png) current version: ```v0.9-beta``` ## How to Get Source Code From 544378cf74b3ba7d9c9b0a401a213b3e701ce80d Mon Sep 17 00:00:00 2001 From: RMoraffah Date: Thu, 10 Nov 2016 15:19:29 +0330 Subject: [PATCH 02/18] =?UTF-8?q?Bugs=20fix.=20(Explain=20Analyze,=20Recom?= =?UTF-8?q?mend=20command=20as=20sub=20query=20and=20=E2=80=A6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PostgreSQL/src/backend/commands/explain.c | 3864 ++++----- .../src/backend/optimizer/path/allpaths.c | 3085 +++---- PostgreSQL/src/backend/parser/parse_rec.c | 1862 ++-- PostgreSQL/src/backend/utils/misc/recathon.c | 7655 +++++++++-------- PostgreSQL/src/include/utils/recathon.h | 73 +- 5 files changed, 8309 insertions(+), 8230 deletions(-) diff --git a/PostgreSQL/src/backend/commands/explain.c b/PostgreSQL/src/backend/commands/explain.c index ef7d6dc..0df25f8 100644 --- a/PostgreSQL/src/backend/commands/explain.c +++ b/PostgreSQL/src/backend/commands/explain.c @@ -32,6 +32,8 @@ #include "utils/snapmgr.h" #include "utils/tuplesort.h" #include "utils/xml.h" +//new +#include "utils/recathon.h" /* Hook for plugins to get control in ExplainOneQuery() */ @@ -48,57 +50,57 @@ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; #define X_NOWHITESPACE 4 static void ExplainOneQuery(Query *query, IntoClause *into, ExplainState *es, - const char *queryString, ParamListInfo params); + const char *queryString, ParamListInfo params); static void report_triggers(ResultRelInfo *rInfo, bool show_relname, - ExplainState *es); + ExplainState *es); static double elapsed_time(instr_time *starttime); static void ExplainNode(PlanState *planstate, List *ancestors, - const char *relationship, const char *plan_name, - ExplainState *es); + const char *relationship, const char *plan_name, + ExplainState *es); static void show_plan_tlist(PlanState *planstate, List *ancestors, - ExplainState *es); + ExplainState *es); static void show_expression(Node *node, const char *qlabel, - PlanState *planstate, List *ancestors, - bool useprefix, ExplainState *es); + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es); static void show_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - bool useprefix, ExplainState *es); + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es); static void show_scan_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - ExplainState *es); + PlanState *planstate, List *ancestors, + ExplainState *es); static void show_upper_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - ExplainState *es); + PlanState *planstate, List *ancestors, + ExplainState *es); static void show_sort_keys(SortState *sortstate, List *ancestors, - ExplainState *es); + ExplainState *es); static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors, - ExplainState *es); + ExplainState *es); static void show_sort_keys_common(PlanState *planstate, - int nkeys, AttrNumber *keycols, - List *ancestors, ExplainState *es); + int nkeys, AttrNumber *keycols, + List *ancestors, ExplainState *es); static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_instrumentation_count(const char *qlabel, int which, - PlanState *planstate, ExplainState *es); + PlanState *planstate, ExplainState *es); static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es); static const char *explain_get_index_name(Oid indexId); static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, - ExplainState *es); + ExplainState *es); static void ExplainScanTarget(Scan *plan, ExplainState *es); static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es); static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es); static void ExplainMemberNodes(List *plans, PlanState **planstates, - List *ancestors, ExplainState *es); + List *ancestors, ExplainState *es); static void ExplainSubPlans(List *plans, List *ancestors, - const char *relationship, ExplainState *es); + const char *relationship, ExplainState *es); static void ExplainProperty(const char *qlabel, const char *value, - bool numeric, ExplainState *es); + bool numeric, ExplainState *es); static void ExplainOpenGroup(const char *objtype, const char *labelname, - bool labeled, ExplainState *es); + bool labeled, ExplainState *es); static void ExplainCloseGroup(const char *objtype, const char *labelname, - bool labeled, ExplainState *es); + bool labeled, ExplainState *es); static void ExplainDummyGroup(const char *objtype, const char *labelname, - ExplainState *es); + ExplainState *es); static void ExplainXMLTag(const char *tagname, int flags, ExplainState *es); static void ExplainJSONLineEnding(ExplainState *es); static void ExplainYAMLLineStarting(ExplainState *es); @@ -112,130 +114,137 @@ static void escape_yaml(StringInfo buf, const char *str); */ void ExplainQuery(ExplainStmt *stmt, const char *queryString, - ParamListInfo params, DestReceiver *dest) + ParamListInfo params, DestReceiver *dest) { - ExplainState es; - TupOutputState *tstate; - List *rewritten; - ListCell *lc; - bool timing_set = false; - - /* Initialize ExplainState. */ - ExplainInitState(&es); - - /* Parse options list. */ - foreach(lc, stmt->options) - { - DefElem *opt = (DefElem *) lfirst(lc); - - if (strcmp(opt->defname, "analyze") == 0) - es.analyze = defGetBoolean(opt); - else if (strcmp(opt->defname, "verbose") == 0) - es.verbose = defGetBoolean(opt); - else if (strcmp(opt->defname, "costs") == 0) - es.costs = defGetBoolean(opt); - else if (strcmp(opt->defname, "buffers") == 0) - es.buffers = defGetBoolean(opt); - else if (strcmp(opt->defname, "timing") == 0) - { - timing_set = true; - es.timing = defGetBoolean(opt); - } - else if (strcmp(opt->defname, "format") == 0) - { - char *p = defGetString(opt); - - if (strcmp(p, "text") == 0) - es.format = EXPLAIN_FORMAT_TEXT; - else if (strcmp(p, "xml") == 0) - es.format = EXPLAIN_FORMAT_XML; - else if (strcmp(p, "json") == 0) - es.format = EXPLAIN_FORMAT_JSON; - else if (strcmp(p, "yaml") == 0) - es.format = EXPLAIN_FORMAT_YAML; - else - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unrecognized value for EXPLAIN option \"%s\": \"%s\"", - opt->defname, p))); - } - else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unrecognized EXPLAIN option \"%s\"", - opt->defname))); - } - - if (es.buffers && !es.analyze) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("EXPLAIN option BUFFERS requires ANALYZE"))); - - /* if the timing was not set explicitly, set default value */ - es.timing = (timing_set) ? es.timing : es.analyze; - - /* check that timing is used with EXPLAIN ANALYZE */ - if (es.timing && !es.analyze) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("EXPLAIN option TIMING requires ANALYZE"))); - - /* - * Parse analysis was done already, but we still have to run the rule - * rewriter. We do not do AcquireRewriteLocks: we assume the query either - * came straight from the parser, or suitable locks were acquired by - * plancache.c. - * - * Because the rewriter and planner tend to scribble on the input, we make - * a preliminary copy of the source querytree. This prevents problems in - * the case that the EXPLAIN is in a portal or plpgsql function and is - * executed repeatedly. (See also the same hack in DECLARE CURSOR and - * PREPARE.) XXX FIXME someday. - */ - Assert(IsA(stmt->query, Query)); - rewritten = QueryRewrite((Query *) copyObject(stmt->query)); - - /* emit opening boilerplate */ - ExplainBeginOutput(&es); - - if (rewritten == NIL) - { - /* - * In the case of an INSTEAD NOTHING, tell at least that. But in - * non-text format, the output is delimited, so this isn't necessary. - */ - if (es.format == EXPLAIN_FORMAT_TEXT) - appendStringInfoString(es.str, "Query rewrites to nothing\n"); - } - else - { - ListCell *l; - - /* Explain every plan */ - foreach(l, rewritten) - { - ExplainOneQuery((Query *) lfirst(l), NULL, &es, - queryString, params); - - /* Separate plans with an appropriate separator */ - if (lnext(l) != NULL) - ExplainSeparatePlans(&es); - } - } - - /* emit closing boilerplate */ - ExplainEndOutput(&es); - Assert(es.indent == 0); - - /* output tuples */ - tstate = begin_tup_output_tupdesc(dest, ExplainResultDesc(stmt)); - if (es.format == EXPLAIN_FORMAT_TEXT) - do_text_output_multiline(tstate, es.str->data); - else - do_text_output_oneline(tstate, es.str->data); - end_tup_output(tstate); - - pfree(es.str->data); + ExplainState es; + TupOutputState *tstate; + List *rewritten; + ListCell *lc; + bool timing_set = false; + + + /* Initialize ExplainState. */ + ExplainInitState(&es); + + /* Parse options list. */ + foreach(lc, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (strcmp(opt->defname, "analyze") == 0) + es.analyze = defGetBoolean(opt); + else if (strcmp(opt->defname, "verbose") == 0) + es.verbose = defGetBoolean(opt); + else if (strcmp(opt->defname, "costs") == 0) + es.costs = defGetBoolean(opt); + else if (strcmp(opt->defname, "buffers") == 0) + es.buffers = defGetBoolean(opt); + else if (strcmp(opt->defname, "timing") == 0) + { + timing_set = true; + es.timing = defGetBoolean(opt); + } + else if (strcmp(opt->defname, "format") == 0) + { + char *p = defGetString(opt); + + if (strcmp(p, "text") == 0) + es.format = EXPLAIN_FORMAT_TEXT; + else if (strcmp(p, "xml") == 0) + es.format = EXPLAIN_FORMAT_XML; + else if (strcmp(p, "json") == 0) + es.format = EXPLAIN_FORMAT_JSON; + else if (strcmp(p, "yaml") == 0) + es.format = EXPLAIN_FORMAT_YAML; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized value for EXPLAIN option \"%s\": \"%s\"", + opt->defname, p))); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized EXPLAIN option \"%s\"", + opt->defname))); + } + + if (es.buffers && !es.analyze) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("EXPLAIN option BUFFERS requires ANALYZE"))); + + /* if the timing was not set explicitly, set default value */ + es.timing = (timing_set) ? es.timing : es.analyze; + + /* check that timing is used with EXPLAIN ANALYZE */ + if (es.timing && !es.analyze) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("EXPLAIN option TIMING requires ANALYZE"))); + + /* + * Parse analysis was done already, but we still have to run the rule + * rewriter. We do not do AcquireRewriteLocks: we assume the query either + * came straight from the parser, or suitable locks were acquired by + * plancache.c. + * + * Because the rewriter and planner tend to scribble on the input, we make + * a preliminary copy of the source querytree. This prevents problems in + * the case that the EXPLAIN is in a portal or plpgsql function and is + * executed repeatedly. (See also the same hack in DECLARE CURSOR and + * PREPARE.) XXX FIXME someday. + */ + Assert(IsA(stmt->query, Query)); + //NEW FOR RECDB + //Prevent an error from happening while using a recommender in an explain analyze command + Query * temp = (Query *) copyObject(stmt->query); + copyQueryHelper(temp, (Query*)stmt->query); + rewritten = QueryRewrite(temp); + + + /* emit opening boilerplate */ + ExplainBeginOutput(&es); + + if (rewritten == NIL) + { + /* + * In the case of an INSTEAD NOTHING, tell at least that. But in + * non-text format, the output is delimited, so this isn't necessary. + */ + if (es.format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es.str, "Query rewrites to nothing\n"); + } + else + { + ListCell *l; + + /* Explain every plan */ + foreach(l, rewritten) + { + + ExplainOneQuery((Query *) lfirst(l), NULL, &es, + queryString, params); + + /* Separate plans with an appropriate separator */ + if (lnext(l) != NULL) + ExplainSeparatePlans(&es); + } + } + + /* emit closing boilerplate */ + ExplainEndOutput(&es); + Assert(es.indent == 0); + + /* output tuples */ + tstate = begin_tup_output_tupdesc(dest, ExplainResultDesc(stmt)); + if (es.format == EXPLAIN_FORMAT_TEXT) + do_text_output_multiline(tstate, es.str->data); + else + do_text_output_oneline(tstate, es.str->data); + end_tup_output(tstate); + + pfree(es.str->data); } /* @@ -244,11 +253,11 @@ ExplainQuery(ExplainStmt *stmt, const char *queryString, void ExplainInitState(ExplainState *es) { - /* Set default options. */ - memset(es, 0, sizeof(ExplainState)); - es->costs = true; - /* Prepare output buffer. */ - es->str = makeStringInfo(); + /* Set default options. */ + memset(es, 0, sizeof(ExplainState)); + es->costs = true; + /* Prepare output buffer. */ + es->str = makeStringInfo(); } /* @@ -258,34 +267,34 @@ ExplainInitState(ExplainState *es) TupleDesc ExplainResultDesc(ExplainStmt *stmt) { - TupleDesc tupdesc; - ListCell *lc; - Oid result_type = TEXTOID; - - /* Check for XML format option */ - foreach(lc, stmt->options) - { - DefElem *opt = (DefElem *) lfirst(lc); - - if (strcmp(opt->defname, "format") == 0) - { - char *p = defGetString(opt); - - if (strcmp(p, "xml") == 0) - result_type = XMLOID; - else if (strcmp(p, "json") == 0) - result_type = JSONOID; - else - result_type = TEXTOID; - /* don't "break", as ExplainQuery will use the last value */ - } - } - - /* Need a tuple descriptor representing a single TEXT or XML column */ - tupdesc = CreateTemplateTupleDesc(1, false); - TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN", - result_type, -1, 0); - return tupdesc; + TupleDesc tupdesc; + ListCell *lc; + Oid result_type = TEXTOID; + + /* Check for XML format option */ + foreach(lc, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (strcmp(opt->defname, "format") == 0) + { + char *p = defGetString(opt); + + if (strcmp(p, "xml") == 0) + result_type = XMLOID; + else if (strcmp(p, "json") == 0) + result_type = JSONOID; + else + result_type = TEXTOID; + /* don't "break", as ExplainQuery will use the last value */ + } + } + + /* Need a tuple descriptor representing a single TEXT or XML column */ + tupdesc = CreateTemplateTupleDesc(1, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN", + result_type, -1, 0); + return tupdesc; } /* @@ -296,28 +305,29 @@ ExplainResultDesc(ExplainStmt *stmt) */ static void ExplainOneQuery(Query *query, IntoClause *into, ExplainState *es, - const char *queryString, ParamListInfo params) + const char *queryString, ParamListInfo params) { - /* planner will not cope with utility statements */ - if (query->commandType == CMD_UTILITY) - { - ExplainOneUtility(query->utilityStmt, into, es, queryString, params); - return; - } - - /* if an advisor plugin is present, let it manage things */ - if (ExplainOneQuery_hook) - (*ExplainOneQuery_hook) (query, into, es, queryString, params); - else - { - PlannedStmt *plan; - - /* plan the query */ - plan = pg_plan_query(query, 0, params); - - /* run it (if needed) and produce output */ - ExplainOnePlan(plan, into, es, queryString, params); - } + /* planner will not cope with utility statements */ + if (query->commandType == CMD_UTILITY) + { + ExplainOneUtility(query->utilityStmt, into, es, queryString, params); + return; + } + + /* if an advisor plugin is present, let it manage things */ + if (ExplainOneQuery_hook) + (*ExplainOneQuery_hook) (query, into, es, queryString, params); + else + { + PlannedStmt *plan; + + /* plan the query */ + + plan = pg_plan_query(query, 0, params); + + /* run it (if needed) and produce output */ + ExplainOnePlan(plan, into, es, queryString, params); + } } /* @@ -333,45 +343,49 @@ ExplainOneQuery(Query *query, IntoClause *into, ExplainState *es, */ void ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, - const char *queryString, ParamListInfo params) + const char *queryString, ParamListInfo params) { - if (utilityStmt == NULL) - return; - - if (IsA(utilityStmt, CreateTableAsStmt)) - { - /* - * We have to rewrite the contained SELECT and then pass it back to - * ExplainOneQuery. It's probably not really necessary to copy the - * contained parsetree another time, but let's be safe. - */ - CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt; - List *rewritten; - - Assert(IsA(ctas->query, Query)); - rewritten = QueryRewrite((Query *) copyObject(ctas->query)); - Assert(list_length(rewritten) == 1); - ExplainOneQuery((Query *) linitial(rewritten), ctas->into, es, - queryString, params); - } - else if (IsA(utilityStmt, ExecuteStmt)) - ExplainExecuteQuery((ExecuteStmt *) utilityStmt, into, es, - queryString, params); - else if (IsA(utilityStmt, NotifyStmt)) - { - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfoString(es->str, "NOTIFY\n"); - else - ExplainDummyGroup("Notify", NULL, es); - } - else - { - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfoString(es->str, - "Utility statements have no plan structure\n"); - else - ExplainDummyGroup("Utility Statement", NULL, es); - } + if (utilityStmt == NULL) + return; + + if (IsA(utilityStmt, CreateTableAsStmt)) + { + /* + * We have to rewrite the contained SELECT and then pass it back to + * ExplainOneQuery. It's probably not really necessary to copy the + * contained parsetree another time, but let's be safe. + */ + CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt; + List *rewritten; + + Assert(IsA(ctas->query, Query)); + //NEW FOR RECDB + //Prevent an error from happening while using a recommender + Query * temp = (Query *) copyObject(ctas->query); + copyQueryHelper(temp, (Query*)ctas->query); + rewritten = QueryRewrite(temp); + Assert(list_length(rewritten) == 1); + ExplainOneQuery((Query *) linitial(rewritten), ctas->into, es, + queryString, params); + } + else if (IsA(utilityStmt, ExecuteStmt)) + ExplainExecuteQuery((ExecuteStmt *) utilityStmt, into, es, + queryString, params); + else if (IsA(utilityStmt, NotifyStmt)) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, "NOTIFY\n"); + else + ExplainDummyGroup("Notify", NULL, es); + } + else + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, + "Utility statements have no plan structure\n"); + else + ExplainDummyGroup("Utility Statement", NULL, es); + } } /* @@ -393,138 +407,138 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, */ void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, - const char *queryString, ParamListInfo params) + const char *queryString, ParamListInfo params) { - DestReceiver *dest; - QueryDesc *queryDesc; - instr_time starttime; - double totaltime = 0; - int eflags; - int instrument_option = 0; - - if (es->analyze && es->timing) - instrument_option |= INSTRUMENT_TIMER; - else if (es->analyze) - instrument_option |= INSTRUMENT_ROWS; - - if (es->buffers) - instrument_option |= INSTRUMENT_BUFFERS; - - INSTR_TIME_SET_CURRENT(starttime); - - /* - * Use a snapshot with an updated command ID to ensure this query sees - * results of any previously executed queries. - */ - PushCopiedSnapshot(GetActiveSnapshot()); - UpdateActiveSnapshotCommandId(); - - /* - * Normally we discard the query's output, but if explaining CREATE TABLE - * AS, we'd better use the appropriate tuple receiver. - */ - if (into) - dest = CreateIntoRelDestReceiver(into); - else - dest = None_Receiver; - - /* Create a QueryDesc for the query */ - queryDesc = CreateQueryDesc(plannedstmt, queryString, - GetActiveSnapshot(), InvalidSnapshot, - dest, params, instrument_option); - - /* Select execution options */ - if (es->analyze) - eflags = 0; /* default run-to-completion flags */ - else - eflags = EXEC_FLAG_EXPLAIN_ONLY; - if (into) - eflags |= GetIntoRelEFlags(into); - - /* call ExecutorStart to prepare the plan for execution */ - ExecutorStart(queryDesc, eflags); - - /* Execute the plan for statistics if asked for */ - if (es->analyze) - { - ScanDirection dir; - - /* EXPLAIN ANALYZE CREATE TABLE AS WITH NO DATA is weird */ - if (into && into->skipData) - dir = NoMovementScanDirection; - else - dir = ForwardScanDirection; - - /* run the plan */ - ExecutorRun(queryDesc, dir, 0L); - - /* run cleanup too */ - ExecutorFinish(queryDesc); - - /* We can't run ExecutorEnd 'till we're done printing the stats... */ - totaltime += elapsed_time(&starttime); - } - - ExplainOpenGroup("Query", NULL, true, es); - - /* Create textual dump of plan tree */ - ExplainPrintPlan(es, queryDesc); - - /* Print info about runtime of triggers */ - if (es->analyze) - { - ResultRelInfo *rInfo; - bool show_relname; - int numrels = queryDesc->estate->es_num_result_relations; - List *targrels = queryDesc->estate->es_trig_target_relations; - int nr; - ListCell *l; - - ExplainOpenGroup("Triggers", "Triggers", false, es); - - show_relname = (numrels > 1 || targrels != NIL); - rInfo = queryDesc->estate->es_result_relations; - for (nr = 0; nr < numrels; rInfo++, nr++) - report_triggers(rInfo, show_relname, es); - - foreach(l, targrels) - { - rInfo = (ResultRelInfo *) lfirst(l); - report_triggers(rInfo, show_relname, es); - } - - ExplainCloseGroup("Triggers", "Triggers", false, es); - } - - /* - * Close down the query and free resources. Include time for this in the - * total runtime (although it should be pretty minimal). - */ - INSTR_TIME_SET_CURRENT(starttime); - - ExecutorEnd(queryDesc); - - FreeQueryDesc(queryDesc); - - PopActiveSnapshot(); - - /* We need a CCI just in case query expanded to multiple plans */ - if (es->analyze) - CommandCounterIncrement(); - - totaltime += elapsed_time(&starttime); - - if (es->analyze) - { - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfo(es->str, "Total runtime: %.3f ms\n", - 1000.0 * totaltime); - else - ExplainPropertyFloat("Total Runtime", 1000.0 * totaltime, - 3, es); - } - - ExplainCloseGroup("Query", NULL, true, es); + DestReceiver *dest; + QueryDesc *queryDesc; + instr_time starttime; + double totaltime = 0; + int eflags; + int instrument_option = 0; + + if (es->analyze && es->timing) + instrument_option |= INSTRUMENT_TIMER; + else if (es->analyze) + instrument_option |= INSTRUMENT_ROWS; + + if (es->buffers) + instrument_option |= INSTRUMENT_BUFFERS; + + INSTR_TIME_SET_CURRENT(starttime); + + /* + * Use a snapshot with an updated command ID to ensure this query sees + * results of any previously executed queries. + */ + PushCopiedSnapshot(GetActiveSnapshot()); + UpdateActiveSnapshotCommandId(); + + /* + * Normally we discard the query's output, but if explaining CREATE TABLE + * AS, we'd better use the appropriate tuple receiver. + */ + if (into) + dest = CreateIntoRelDestReceiver(into); + else + dest = None_Receiver; + + /* Create a QueryDesc for the query */ + queryDesc = CreateQueryDesc(plannedstmt, queryString, + GetActiveSnapshot(), InvalidSnapshot, + dest, params, instrument_option); + + /* Select execution options */ + if (es->analyze) + eflags = 0; /* default run-to-completion flags */ + else + eflags = EXEC_FLAG_EXPLAIN_ONLY; + if (into) + eflags |= GetIntoRelEFlags(into); + + /* call ExecutorStart to prepare the plan for execution */ + ExecutorStart(queryDesc, eflags); + + /* Execute the plan for statistics if asked for */ + if (es->analyze) + { + ScanDirection dir; + + /* EXPLAIN ANALYZE CREATE TABLE AS WITH NO DATA is weird */ + if (into && into->skipData) + dir = NoMovementScanDirection; + else + dir = ForwardScanDirection; + + /* run the plan */ + ExecutorRun(queryDesc, dir, 0L); + + /* run cleanup too */ + ExecutorFinish(queryDesc); + + /* We can't run ExecutorEnd 'till we're done printing the stats... */ + totaltime += elapsed_time(&starttime); + } + + ExplainOpenGroup("Query", NULL, true, es); + + /* Create textual dump of plan tree */ + ExplainPrintPlan(es, queryDesc); + + /* Print info about runtime of triggers */ + if (es->analyze) + { + ResultRelInfo *rInfo; + bool show_relname; + int numrels = queryDesc->estate->es_num_result_relations; + List *targrels = queryDesc->estate->es_trig_target_relations; + int nr; + ListCell *l; + + ExplainOpenGroup("Triggers", "Triggers", false, es); + + show_relname = (numrels > 1 || targrels != NIL); + rInfo = queryDesc->estate->es_result_relations; + for (nr = 0; nr < numrels; rInfo++, nr++) + report_triggers(rInfo, show_relname, es); + + foreach(l, targrels) + { + rInfo = (ResultRelInfo *) lfirst(l); + report_triggers(rInfo, show_relname, es); + } + + ExplainCloseGroup("Triggers", "Triggers", false, es); + } + + /* + * Close down the query and free resources. Include time for this in the + * total runtime (although it should be pretty minimal). + */ + INSTR_TIME_SET_CURRENT(starttime); + + ExecutorEnd(queryDesc); + + FreeQueryDesc(queryDesc); + + PopActiveSnapshot(); + + /* We need a CCI just in case query expanded to multiple plans */ + if (es->analyze) + CommandCounterIncrement(); + + totaltime += elapsed_time(&starttime); + + if (es->analyze) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, "Total runtime: %.3f ms\n", + 1000.0 * totaltime); + else + ExplainPropertyFloat("Total Runtime", 1000.0 * totaltime, + 3, es); + } + + ExplainCloseGroup("Query", NULL, true, es); } /* @@ -540,10 +554,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc) { - Assert(queryDesc->plannedstmt != NULL); - es->pstmt = queryDesc->plannedstmt; - es->rtable = queryDesc->plannedstmt->rtable; - ExplainNode(queryDesc->planstate, NIL, NULL, NULL, es); + Assert(queryDesc->plannedstmt != NULL); + es->pstmt = queryDesc->plannedstmt; + es->rtable = queryDesc->plannedstmt->rtable; + ExplainNode(queryDesc->planstate, NIL, NULL, NULL, es); } /* @@ -557,8 +571,8 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc) void ExplainQueryText(ExplainState *es, QueryDesc *queryDesc) { - if (queryDesc->sourceText) - ExplainPropertyText("Query Text", queryDesc->sourceText, es); + if (queryDesc->sourceText) + ExplainPropertyText("Query Text", queryDesc->sourceText, es); } /* @@ -568,77 +582,77 @@ ExplainQueryText(ExplainState *es, QueryDesc *queryDesc) static void report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) { - int nt; - - if (!rInfo->ri_TrigDesc || !rInfo->ri_TrigInstrument) - return; - for (nt = 0; nt < rInfo->ri_TrigDesc->numtriggers; nt++) - { - Trigger *trig = rInfo->ri_TrigDesc->triggers + nt; - Instrumentation *instr = rInfo->ri_TrigInstrument + nt; - char *relname; - char *conname = NULL; - - /* Must clean up instrumentation state */ - InstrEndLoop(instr); - - /* - * We ignore triggers that were never invoked; they likely aren't - * relevant to the current query type. - */ - if (instr->ntuples == 0) - continue; - - ExplainOpenGroup("Trigger", NULL, true, es); - - relname = RelationGetRelationName(rInfo->ri_RelationDesc); - if (OidIsValid(trig->tgconstraint)) - conname = get_constraint_name(trig->tgconstraint); - - /* - * In text format, we avoid printing both the trigger name and the - * constraint name unless VERBOSE is specified. In non-text formats - * we just print everything. - */ - if (es->format == EXPLAIN_FORMAT_TEXT) - { - if (es->verbose || conname == NULL) - appendStringInfo(es->str, "Trigger %s", trig->tgname); - else - appendStringInfoString(es->str, "Trigger"); - if (conname) - appendStringInfo(es->str, " for constraint %s", conname); - if (show_relname) - appendStringInfo(es->str, " on %s", relname); - appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", - 1000.0 * instr->total, instr->ntuples); - } - else - { - ExplainPropertyText("Trigger Name", trig->tgname, es); - if (conname) - ExplainPropertyText("Constraint Name", conname, es); - ExplainPropertyText("Relation", relname, es); - ExplainPropertyFloat("Time", 1000.0 * instr->total, 3, es); - ExplainPropertyFloat("Calls", instr->ntuples, 0, es); - } - - if (conname) - pfree(conname); - - ExplainCloseGroup("Trigger", NULL, true, es); - } + int nt; + + if (!rInfo->ri_TrigDesc || !rInfo->ri_TrigInstrument) + return; + for (nt = 0; nt < rInfo->ri_TrigDesc->numtriggers; nt++) + { + Trigger *trig = rInfo->ri_TrigDesc->triggers + nt; + Instrumentation *instr = rInfo->ri_TrigInstrument + nt; + char *relname; + char *conname = NULL; + + /* Must clean up instrumentation state */ + InstrEndLoop(instr); + + /* + * We ignore triggers that were never invoked; they likely aren't + * relevant to the current query type. + */ + if (instr->ntuples == 0) + continue; + + ExplainOpenGroup("Trigger", NULL, true, es); + + relname = RelationGetRelationName(rInfo->ri_RelationDesc); + if (OidIsValid(trig->tgconstraint)) + conname = get_constraint_name(trig->tgconstraint); + + /* + * In text format, we avoid printing both the trigger name and the + * constraint name unless VERBOSE is specified. In non-text formats + * we just print everything. + */ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (es->verbose || conname == NULL) + appendStringInfo(es->str, "Trigger %s", trig->tgname); + else + appendStringInfoString(es->str, "Trigger"); + if (conname) + appendStringInfo(es->str, " for constraint %s", conname); + if (show_relname) + appendStringInfo(es->str, " on %s", relname); + appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", + 1000.0 * instr->total, instr->ntuples); + } + else + { + ExplainPropertyText("Trigger Name", trig->tgname, es); + if (conname) + ExplainPropertyText("Constraint Name", conname, es); + ExplainPropertyText("Relation", relname, es); + ExplainPropertyFloat("Time", 1000.0 * instr->total, 3, es); + ExplainPropertyFloat("Calls", instr->ntuples, 0, es); + } + + if (conname) + pfree(conname); + + ExplainCloseGroup("Trigger", NULL, true, es); + } } /* Compute elapsed time in seconds since given timestamp */ static double elapsed_time(instr_time *starttime) { - instr_time endtime; - - INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_SUBTRACT(endtime, *starttime); - return INSTR_TIME_GET_DOUBLE(endtime); + instr_time endtime; + + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SUBTRACT(endtime, *starttime); + return INSTR_TIME_GET_DOUBLE(endtime); } /* @@ -663,800 +677,800 @@ elapsed_time(instr_time *starttime) */ static void ExplainNode(PlanState *planstate, List *ancestors, - const char *relationship, const char *plan_name, - ExplainState *es) + const char *relationship, const char *plan_name, + ExplainState *es) { - Plan *plan = planstate->plan; - const char *pname; /* node type name for text output */ - const char *sname; /* node type name for non-text output */ - const char *strategy = NULL; - const char *operation = NULL; - int save_indent = es->indent; - bool haschildren; - - switch (nodeTag(plan)) - { - case T_Result: - pname = sname = "Result"; - break; - case T_ModifyTable: - sname = "ModifyTable"; - switch (((ModifyTable *) plan)->operation) - { - case CMD_INSERT: - pname = operation = "Insert"; - break; - case CMD_UPDATE: - pname = operation = "Update"; - break; - case CMD_DELETE: - pname = operation = "Delete"; - break; - default: - pname = "???"; - break; - } - break; - case T_Append: - pname = sname = "Append"; - break; - case T_MergeAppend: - pname = sname = "Merge Append"; - break; - case T_RecursiveUnion: - pname = sname = "Recursive Union"; - break; - case T_BitmapAnd: - pname = sname = "BitmapAnd"; - break; - case T_BitmapOr: - pname = sname = "BitmapOr"; - break; - case T_NestLoop: - pname = sname = "Nested Loop"; - break; - case T_MergeJoin: - pname = "Merge"; /* "Join" gets added by jointype switch */ - sname = "Merge Join"; - break; - case T_HashJoin: - pname = "Hash"; /* "Join" gets added by jointype switch */ - sname = "Hash Join"; - break; - /* NEW FOR RECATHON */ - case T_RecJoin: - pname = "Recommend"; /* "Join" gets added by jointype switch */ - sname = "Recommend Join"; - break; - case T_SeqScan: - pname = sname = "Seq Scan"; - break; - case T_IndexScan: - pname = sname = "Index Scan"; - break; - case T_IndexOnlyScan: - pname = sname = "Index Only Scan"; - break; - case T_BitmapIndexScan: - pname = sname = "Bitmap Index Scan"; - break; - case T_BitmapHeapScan: - pname = sname = "Bitmap Heap Scan"; - break; - case T_TidScan: - pname = sname = "Tid Scan"; - break; - case T_SubqueryScan: - pname = sname = "Subquery Scan"; - break; - case T_FunctionScan: - pname = sname = "Function Scan"; - break; - case T_ValuesScan: - pname = sname = "Values Scan"; - break; - case T_CteScan: - pname = sname = "CTE Scan"; - break; - case T_WorkTableScan: - pname = sname = "WorkTable Scan"; - break; - case T_ForeignScan: - pname = sname = "Foreign Scan"; - break; - /* NEW FOR RECATHON */ - case T_RecScan: - { - /* We'll mark a strategy depending on a few - * parameters. */ - RecommendInfo *recInfo; - pname = sname = "Recommend"; - - recInfo = (RecommendInfo*) ((RecScan*)plan)->recommender; - switch(recInfo->opType) { - case OP_GENERATE: - strategy = "GenerateRecommend"; - break; - case OP_JOIN: - strategy = "JoinRecommend"; - break; - case OP_GENERATEJOIN: - strategy = "GenerateJoinRecommend"; - break; - case OP_FILTER: - strategy = "FilterRecommend"; - break; - case OP_NOFILTER: - strategy = "StandardRecommend"; - break; - case OP_INDEX: - default: - strategy = "Recommend ???"; - break; - } - } - break; - case T_Material: - pname = sname = "Materialize"; - break; - case T_Sort: - pname = sname = "Sort"; - break; - case T_Group: - pname = sname = "Group"; - break; - case T_Agg: - sname = "Aggregate"; - switch (((Agg *) plan)->aggstrategy) - { - case AGG_PLAIN: - pname = "Aggregate"; - strategy = "Plain"; - break; - case AGG_SORTED: - pname = "GroupAggregate"; - strategy = "Sorted"; - break; - case AGG_HASHED: - pname = "HashAggregate"; - strategy = "Hashed"; - break; - default: - pname = "Aggregate ???"; - strategy = "???"; - break; - } - break; - case T_WindowAgg: - pname = sname = "WindowAgg"; - break; - case T_Unique: - pname = sname = "Unique"; - break; - case T_SetOp: - sname = "SetOp"; - switch (((SetOp *) plan)->strategy) - { - case SETOP_SORTED: - pname = "SetOp"; - strategy = "Sorted"; - break; - case SETOP_HASHED: - pname = "HashSetOp"; - strategy = "Hashed"; - break; - default: - pname = "SetOp ???"; - strategy = "???"; - break; - } - break; - case T_LockRows: - pname = sname = "LockRows"; - break; - case T_Limit: - pname = sname = "Limit"; - break; - case T_Hash: - pname = sname = "Hash"; - break; - default: - pname = sname = "???"; - break; - } - - ExplainOpenGroup("Plan", - relationship ? NULL : "Plan", - true, es); - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - if (plan_name) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, "%s\n", plan_name); - es->indent++; - } - if (es->indent) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfoString(es->str, "-> "); - es->indent += 2; - } - appendStringInfoString(es->str, pname); - es->indent++; - } - else - { - ExplainPropertyText("Node Type", sname, es); - if (strategy) - ExplainPropertyText("Strategy", strategy, es); - if (operation) - ExplainPropertyText("Operation", operation, es); - if (relationship) - ExplainPropertyText("Parent Relationship", relationship, es); - if (plan_name) - ExplainPropertyText("Subplan Name", plan_name, es); - } - - switch (nodeTag(plan)) - { - case T_SeqScan: - case T_BitmapHeapScan: - case T_TidScan: - case T_SubqueryScan: - case T_FunctionScan: - case T_ValuesScan: - case T_CteScan: - case T_WorkTableScan: - case T_ForeignScan: - /* NEW FOR RECATHON */ - case T_RecScan: - ExplainScanTarget((Scan *) plan, es); - break; - case T_IndexScan: - { - IndexScan *indexscan = (IndexScan *) plan; - - ExplainIndexScanDetails(indexscan->indexid, - indexscan->indexorderdir, - es); - ExplainScanTarget((Scan *) indexscan, es); - } - break; - case T_IndexOnlyScan: - { - IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; - - ExplainIndexScanDetails(indexonlyscan->indexid, - indexonlyscan->indexorderdir, - es); - ExplainScanTarget((Scan *) indexonlyscan, es); - } - break; - case T_BitmapIndexScan: - { - BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan; - const char *indexname = - explain_get_index_name(bitmapindexscan->indexid); - - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfo(es->str, " on %s", indexname); - else - ExplainPropertyText("Index Name", indexname, es); - } - break; - case T_ModifyTable: - ExplainModifyTarget((ModifyTable *) plan, es); - break; - case T_NestLoop: - case T_MergeJoin: - case T_HashJoin: - /* NEW FOR RECATHON */ - case T_RecJoin: - { - const char *jointype; - - switch (((Join *) plan)->jointype) - { - case JOIN_INNER: - jointype = "Inner"; - break; - case JOIN_LEFT: - jointype = "Left"; - break; - case JOIN_FULL: - jointype = "Full"; - break; - case JOIN_RIGHT: - jointype = "Right"; - break; - case JOIN_SEMI: - jointype = "Semi"; - break; - case JOIN_ANTI: - jointype = "Anti"; - break; - default: - jointype = "???"; - break; - } - if (es->format == EXPLAIN_FORMAT_TEXT) - { - /* - * For historical reasons, the join type is interpolated - * into the node type name... - */ - if (((Join *) plan)->jointype != JOIN_INNER) - appendStringInfo(es->str, " %s Join", jointype); - else if (!IsA(plan, NestLoop)) - appendStringInfo(es->str, " Join"); - } - else - ExplainPropertyText("Join Type", jointype, es); - } - break; - case T_SetOp: - { - const char *setopcmd; - - switch (((SetOp *) plan)->cmd) - { - case SETOPCMD_INTERSECT: - setopcmd = "Intersect"; - break; - case SETOPCMD_INTERSECT_ALL: - setopcmd = "Intersect All"; - break; - case SETOPCMD_EXCEPT: - setopcmd = "Except"; - break; - case SETOPCMD_EXCEPT_ALL: - setopcmd = "Except All"; - break; - default: - setopcmd = "???"; - break; - } - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfo(es->str, " %s", setopcmd); - else - ExplainPropertyText("Command", setopcmd, es); - } - break; - default: - break; - } - - if (es->costs) - { - if (es->format == EXPLAIN_FORMAT_TEXT) - { - appendStringInfo(es->str, " (cost=%.2f..%.2f rows=%.0f width=%d)", - plan->startup_cost, plan->total_cost, - plan->plan_rows, plan->plan_width); - } - else - { - ExplainPropertyFloat("Startup Cost", plan->startup_cost, 2, es); - ExplainPropertyFloat("Total Cost", plan->total_cost, 2, es); - ExplainPropertyFloat("Plan Rows", plan->plan_rows, 0, es); - ExplainPropertyInteger("Plan Width", plan->plan_width, es); - } - } - - /* - * We have to forcibly clean up the instrumentation state because we - * haven't done ExecutorEnd yet. This is pretty grotty ... - */ - if (planstate->instrument) - InstrEndLoop(planstate->instrument); - - if (planstate->instrument && planstate->instrument->nloops > 0) - { - double nloops = planstate->instrument->nloops; - double startup_sec = 1000.0 * planstate->instrument->startup / nloops; - double total_sec = 1000.0 * planstate->instrument->total / nloops; - double rows = planstate->instrument->ntuples / nloops; - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - if (planstate->instrument->need_timer) - appendStringInfo(es->str, - " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)", - startup_sec, total_sec, rows, nloops); - else - appendStringInfo(es->str, - " (actual rows=%.0f loops=%.0f)", - rows, nloops); - } - else - { - if (planstate->instrument->need_timer) - { - ExplainPropertyFloat("Actual Startup Time", startup_sec, 3, es); - ExplainPropertyFloat("Actual Total Time", total_sec, 3, es); - } - ExplainPropertyFloat("Actual Rows", rows, 0, es); - ExplainPropertyFloat("Actual Loops", nloops, 0, es); - } - } - else if (es->analyze) - { - - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfo(es->str, " (never executed)"); - else if (planstate->instrument->need_timer) - { - ExplainPropertyFloat("Actual Startup Time", 0.0, 3, es); - ExplainPropertyFloat("Actual Total Time", 0.0, 3, es); - } - else - { - ExplainPropertyFloat("Actual Rows", 0.0, 0, es); - ExplainPropertyFloat("Actual Loops", 0.0, 0, es); - } - - } - - /* in text format, first line ends here */ - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfoChar(es->str, '\n'); - - /* target list */ - if (es->verbose) - show_plan_tlist(planstate, ancestors, es); - - /* quals, sort keys, etc */ - switch (nodeTag(plan)) - { - case T_IndexScan: - show_scan_qual(((IndexScan *) plan)->indexqualorig, - "Index Cond", planstate, ancestors, es); - if (((IndexScan *) plan)->indexqualorig) - show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); - show_scan_qual(((IndexScan *) plan)->indexorderbyorig, - "Order By", planstate, ancestors, es); - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - break; - case T_IndexOnlyScan: - show_scan_qual(((IndexOnlyScan *) plan)->indexqual, - "Index Cond", planstate, ancestors, es); - if (((IndexOnlyScan *) plan)->indexqual) - show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); - show_scan_qual(((IndexOnlyScan *) plan)->indexorderby, - "Order By", planstate, ancestors, es); - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - if (es->analyze) - ExplainPropertyLong("Heap Fetches", - ((IndexOnlyScanState *) planstate)->ioss_HeapFetches, es); - break; - case T_BitmapIndexScan: - show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig, - "Index Cond", planstate, ancestors, es); - break; - case T_BitmapHeapScan: - show_scan_qual(((BitmapHeapScan *) plan)->bitmapqualorig, - "Recheck Cond", planstate, ancestors, es); - if (((BitmapHeapScan *) plan)->bitmapqualorig) - show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); - /* FALL THRU */ - case T_SeqScan: - case T_ValuesScan: - case T_CteScan: - case T_WorkTableScan: - case T_SubqueryScan: - /* NEW FOR RECATHON */ - case T_RecScan: - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - break; - case T_FunctionScan: - if (es->verbose) - show_expression(((FunctionScan *) plan)->funcexpr, - "Function Call", planstate, ancestors, - es->verbose, es); - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - break; - case T_TidScan: - { - /* - * The tidquals list has OR semantics, so be sure to show it - * as an OR condition. - */ - List *tidquals = ((TidScan *) plan)->tidquals; - - if (list_length(tidquals) > 1) - tidquals = list_make1(make_orclause(tidquals)); - show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es); - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - } - break; - case T_ForeignScan: - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - show_foreignscan_info((ForeignScanState *) planstate, es); - break; - case T_NestLoop: - /* NEW FOR RECATHON */ - case T_RecJoin: - show_upper_qual(((NestLoop *) plan)->join.joinqual, - "Join Filter", planstate, ancestors, es); - if (((NestLoop *) plan)->join.joinqual) - show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); - show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); - break; - case T_MergeJoin: - show_upper_qual(((MergeJoin *) plan)->mergeclauses, - "Merge Cond", planstate, ancestors, es); - show_upper_qual(((MergeJoin *) plan)->join.joinqual, - "Join Filter", planstate, ancestors, es); - if (((MergeJoin *) plan)->join.joinqual) - show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); - show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); - break; - case T_HashJoin: - show_upper_qual(((HashJoin *) plan)->hashclauses, - "Hash Cond", planstate, ancestors, es); - show_upper_qual(((HashJoin *) plan)->join.joinqual, - "Join Filter", planstate, ancestors, es); - if (((HashJoin *) plan)->join.joinqual) - show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); - show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); - break; - case T_Agg: - case T_Group: - show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - break; - case T_Sort: - show_sort_keys((SortState *) planstate, ancestors, es); - show_sort_info((SortState *) planstate, es); - break; - case T_MergeAppend: - show_merge_append_keys((MergeAppendState *) planstate, - ancestors, es); - break; - case T_Result: - show_upper_qual((List *) ((Result *) plan)->resconstantqual, - "One-Time Filter", planstate, ancestors, es); - show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - break; - case T_Hash: - show_hash_info((HashState *) planstate, es); - break; - default: - break; - } - - /* Show buffer usage */ - if (es->buffers) - { - const BufferUsage *usage = &planstate->instrument->bufusage; - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - bool has_shared = (usage->shared_blks_hit > 0 || - usage->shared_blks_read > 0 || - usage->shared_blks_dirtied > 0 || - usage->shared_blks_written > 0); - bool has_local = (usage->local_blks_hit > 0 || - usage->local_blks_read > 0 || - usage->local_blks_dirtied > 0 || - usage->local_blks_written > 0); - bool has_temp = (usage->temp_blks_read > 0 || - usage->temp_blks_written > 0); - bool has_timing = (!INSTR_TIME_IS_ZERO(usage->blk_read_time) || - !INSTR_TIME_IS_ZERO(usage->blk_write_time)); - - /* Show only positive counter values. */ - if (has_shared || has_local || has_temp) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfoString(es->str, "Buffers:"); - - if (has_shared) - { - appendStringInfoString(es->str, " shared"); - if (usage->shared_blks_hit > 0) - appendStringInfo(es->str, " hit=%ld", - usage->shared_blks_hit); - if (usage->shared_blks_read > 0) - appendStringInfo(es->str, " read=%ld", - usage->shared_blks_read); - if (usage->shared_blks_dirtied > 0) - appendStringInfo(es->str, " dirtied=%ld", - usage->shared_blks_dirtied); - if (usage->shared_blks_written > 0) - appendStringInfo(es->str, " written=%ld", - usage->shared_blks_written); - if (has_local || has_temp) - appendStringInfoChar(es->str, ','); - } - if (has_local) - { - appendStringInfoString(es->str, " local"); - if (usage->local_blks_hit > 0) - appendStringInfo(es->str, " hit=%ld", - usage->local_blks_hit); - if (usage->local_blks_read > 0) - appendStringInfo(es->str, " read=%ld", - usage->local_blks_read); - if (usage->local_blks_dirtied > 0) - appendStringInfo(es->str, " dirtied=%ld", - usage->local_blks_dirtied); - if (usage->local_blks_written > 0) - appendStringInfo(es->str, " written=%ld", - usage->local_blks_written); - if (has_temp) - appendStringInfoChar(es->str, ','); - } - if (has_temp) - { - appendStringInfoString(es->str, " temp"); - if (usage->temp_blks_read > 0) - appendStringInfo(es->str, " read=%ld", - usage->temp_blks_read); - if (usage->temp_blks_written > 0) - appendStringInfo(es->str, " written=%ld", - usage->temp_blks_written); - } - appendStringInfoChar(es->str, '\n'); - } - - /* As above, show only positive counter values. */ - if (has_timing) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfoString(es->str, "I/O Timings:"); - if (!INSTR_TIME_IS_ZERO(usage->blk_read_time)) - appendStringInfo(es->str, " read=%0.3f", - INSTR_TIME_GET_MILLISEC(usage->blk_read_time)); - if (!INSTR_TIME_IS_ZERO(usage->blk_write_time)) - appendStringInfo(es->str, " write=%0.3f", - INSTR_TIME_GET_MILLISEC(usage->blk_write_time)); - appendStringInfoChar(es->str, '\n'); - } - } - else - { - ExplainPropertyLong("Shared Hit Blocks", usage->shared_blks_hit, es); - ExplainPropertyLong("Shared Read Blocks", usage->shared_blks_read, es); - ExplainPropertyLong("Shared Dirtied Blocks", usage->shared_blks_dirtied, es); - ExplainPropertyLong("Shared Written Blocks", usage->shared_blks_written, es); - ExplainPropertyLong("Local Hit Blocks", usage->local_blks_hit, es); - ExplainPropertyLong("Local Read Blocks", usage->local_blks_read, es); - ExplainPropertyLong("Local Dirtied Blocks", usage->local_blks_dirtied, es); - ExplainPropertyLong("Local Written Blocks", usage->local_blks_written, es); - ExplainPropertyLong("Temp Read Blocks", usage->temp_blks_read, es); - ExplainPropertyLong("Temp Written Blocks", usage->temp_blks_written, es); - ExplainPropertyFloat("I/O Read Time", INSTR_TIME_GET_MILLISEC(usage->blk_read_time), 3, es); - ExplainPropertyFloat("I/O Write Time", INSTR_TIME_GET_MILLISEC(usage->blk_write_time), 3, es); - } - } - - /* Get ready to display the child plans */ - haschildren = planstate->initPlan || - outerPlanState(planstate) || - innerPlanState(planstate) || - IsA(plan, ModifyTable) || - IsA(plan, Append) || - IsA(plan, MergeAppend) || - IsA(plan, BitmapAnd) || - IsA(plan, BitmapOr) || - IsA(plan, SubqueryScan) || - planstate->subPlan; - if (haschildren) - { - ExplainOpenGroup("Plans", "Plans", false, es); - /* Pass current PlanState as head of ancestors list for children */ - ancestors = lcons(planstate, ancestors); - } - - /* initPlan-s */ - if (planstate->initPlan) - ExplainSubPlans(planstate->initPlan, ancestors, "InitPlan", es); - - /* lefttree */ - if (outerPlanState(planstate)) - ExplainNode(outerPlanState(planstate), ancestors, - "Outer", NULL, es); - - /* righttree */ - if (innerPlanState(planstate)) - ExplainNode(innerPlanState(planstate), ancestors, - "Inner", NULL, es); - - /* special child plans */ - switch (nodeTag(plan)) - { - case T_ModifyTable: - ExplainMemberNodes(((ModifyTable *) plan)->plans, - ((ModifyTableState *) planstate)->mt_plans, - ancestors, es); - break; - case T_Append: - ExplainMemberNodes(((Append *) plan)->appendplans, - ((AppendState *) planstate)->appendplans, - ancestors, es); - break; - case T_MergeAppend: - ExplainMemberNodes(((MergeAppend *) plan)->mergeplans, - ((MergeAppendState *) planstate)->mergeplans, - ancestors, es); - break; - case T_BitmapAnd: - ExplainMemberNodes(((BitmapAnd *) plan)->bitmapplans, - ((BitmapAndState *) planstate)->bitmapplans, - ancestors, es); - break; - case T_BitmapOr: - ExplainMemberNodes(((BitmapOr *) plan)->bitmapplans, - ((BitmapOrState *) planstate)->bitmapplans, - ancestors, es); - break; - case T_SubqueryScan: - ExplainNode(((SubqueryScanState *) planstate)->subplan, ancestors, - "Subquery", NULL, es); - break; - default: - break; - } - - /* subPlan-s */ - if (planstate->subPlan) - ExplainSubPlans(planstate->subPlan, ancestors, "SubPlan", es); - - /* end of child plans */ - if (haschildren) - { - ancestors = list_delete_first(ancestors); - ExplainCloseGroup("Plans", "Plans", false, es); - } - - /* in text format, undo whatever indentation we added */ - if (es->format == EXPLAIN_FORMAT_TEXT) - es->indent = save_indent; - - ExplainCloseGroup("Plan", - relationship ? NULL : "Plan", - true, es); + Plan *plan = planstate->plan; + const char *pname; /* node type name for text output */ + const char *sname; /* node type name for non-text output */ + const char *strategy = NULL; + const char *operation = NULL; + int save_indent = es->indent; + bool haschildren; + + switch (nodeTag(plan)) + { + case T_Result: + pname = sname = "Result"; + break; + case T_ModifyTable: + sname = "ModifyTable"; + switch (((ModifyTable *) plan)->operation) + { + case CMD_INSERT: + pname = operation = "Insert"; + break; + case CMD_UPDATE: + pname = operation = "Update"; + break; + case CMD_DELETE: + pname = operation = "Delete"; + break; + default: + pname = "???"; + break; + } + break; + case T_Append: + pname = sname = "Append"; + break; + case T_MergeAppend: + pname = sname = "Merge Append"; + break; + case T_RecursiveUnion: + pname = sname = "Recursive Union"; + break; + case T_BitmapAnd: + pname = sname = "BitmapAnd"; + break; + case T_BitmapOr: + pname = sname = "BitmapOr"; + break; + case T_NestLoop: + pname = sname = "Nested Loop"; + break; + case T_MergeJoin: + pname = "Merge"; /* "Join" gets added by jointype switch */ + sname = "Merge Join"; + break; + case T_HashJoin: + pname = "Hash"; /* "Join" gets added by jointype switch */ + sname = "Hash Join"; + break; + /* NEW FOR RECATHON */ + case T_RecJoin: + pname = "Recommend"; /* "Join" gets added by jointype switch */ + sname = "Recommend Join"; + break; + case T_SeqScan: + pname = sname = "Seq Scan"; + break; + case T_IndexScan: + pname = sname = "Index Scan"; + break; + case T_IndexOnlyScan: + pname = sname = "Index Only Scan"; + break; + case T_BitmapIndexScan: + pname = sname = "Bitmap Index Scan"; + break; + case T_BitmapHeapScan: + pname = sname = "Bitmap Heap Scan"; + break; + case T_TidScan: + pname = sname = "Tid Scan"; + break; + case T_SubqueryScan: + pname = sname = "Subquery Scan"; + break; + case T_FunctionScan: + pname = sname = "Function Scan"; + break; + case T_ValuesScan: + pname = sname = "Values Scan"; + break; + case T_CteScan: + pname = sname = "CTE Scan"; + break; + case T_WorkTableScan: + pname = sname = "WorkTable Scan"; + break; + case T_ForeignScan: + pname = sname = "Foreign Scan"; + break; + /* NEW FOR RECATHON */ + case T_RecScan: + { + /* We'll mark a strategy depending on a few + * parameters. */ + RecommendInfo *recInfo; + pname = sname = "Recommend"; + + recInfo = (RecommendInfo*) ((RecScan*)plan)->recommender; + switch(recInfo->opType) { + case OP_GENERATE: + strategy = "GenerateRecommend"; + break; + case OP_JOIN: + strategy = "JoinRecommend"; + break; + case OP_GENERATEJOIN: + strategy = "GenerateJoinRecommend"; + break; + case OP_FILTER: + strategy = "FilterRecommend"; + break; + case OP_NOFILTER: + strategy = "StandardRecommend"; + break; + case OP_INDEX: + default: + strategy = "Recommend ???"; + break; + } + } + break; + case T_Material: + pname = sname = "Materialize"; + break; + case T_Sort: + pname = sname = "Sort"; + break; + case T_Group: + pname = sname = "Group"; + break; + case T_Agg: + sname = "Aggregate"; + switch (((Agg *) plan)->aggstrategy) + { + case AGG_PLAIN: + pname = "Aggregate"; + strategy = "Plain"; + break; + case AGG_SORTED: + pname = "GroupAggregate"; + strategy = "Sorted"; + break; + case AGG_HASHED: + pname = "HashAggregate"; + strategy = "Hashed"; + break; + default: + pname = "Aggregate ???"; + strategy = "???"; + break; + } + break; + case T_WindowAgg: + pname = sname = "WindowAgg"; + break; + case T_Unique: + pname = sname = "Unique"; + break; + case T_SetOp: + sname = "SetOp"; + switch (((SetOp *) plan)->strategy) + { + case SETOP_SORTED: + pname = "SetOp"; + strategy = "Sorted"; + break; + case SETOP_HASHED: + pname = "HashSetOp"; + strategy = "Hashed"; + break; + default: + pname = "SetOp ???"; + strategy = "???"; + break; + } + break; + case T_LockRows: + pname = sname = "LockRows"; + break; + case T_Limit: + pname = sname = "Limit"; + break; + case T_Hash: + pname = sname = "Hash"; + break; + default: + pname = sname = "???"; + break; + } + + ExplainOpenGroup("Plan", + relationship ? NULL : "Plan", + true, es); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (plan_name) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, "%s\n", plan_name); + es->indent++; + } + if (es->indent) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoString(es->str, "-> "); + es->indent += 2; + } + appendStringInfoString(es->str, pname); + es->indent++; + } + else + { + ExplainPropertyText("Node Type", sname, es); + if (strategy) + ExplainPropertyText("Strategy", strategy, es); + if (operation) + ExplainPropertyText("Operation", operation, es); + if (relationship) + ExplainPropertyText("Parent Relationship", relationship, es); + if (plan_name) + ExplainPropertyText("Subplan Name", plan_name, es); + } + + switch (nodeTag(plan)) + { + case T_SeqScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_SubqueryScan: + case T_FunctionScan: + case T_ValuesScan: + case T_CteScan: + case T_WorkTableScan: + case T_ForeignScan: + /* NEW FOR RECATHON */ + case T_RecScan: + ExplainScanTarget((Scan *) plan, es); + break; + case T_IndexScan: + { + IndexScan *indexscan = (IndexScan *) plan; + + ExplainIndexScanDetails(indexscan->indexid, + indexscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexscan, es); + } + break; + case T_IndexOnlyScan: + { + IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; + + ExplainIndexScanDetails(indexonlyscan->indexid, + indexonlyscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexonlyscan, es); + } + break; + case T_BitmapIndexScan: + { + BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan; + const char *indexname = + explain_get_index_name(bitmapindexscan->indexid); + + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " on %s", indexname); + else + ExplainPropertyText("Index Name", indexname, es); + } + break; + case T_ModifyTable: + ExplainModifyTarget((ModifyTable *) plan, es); + break; + case T_NestLoop: + case T_MergeJoin: + case T_HashJoin: + /* NEW FOR RECATHON */ + case T_RecJoin: + { + const char *jointype; + + switch (((Join *) plan)->jointype) + { + case JOIN_INNER: + jointype = "Inner"; + break; + case JOIN_LEFT: + jointype = "Left"; + break; + case JOIN_FULL: + jointype = "Full"; + break; + case JOIN_RIGHT: + jointype = "Right"; + break; + case JOIN_SEMI: + jointype = "Semi"; + break; + case JOIN_ANTI: + jointype = "Anti"; + break; + default: + jointype = "???"; + break; + } + if (es->format == EXPLAIN_FORMAT_TEXT) + { + /* + * For historical reasons, the join type is interpolated + * into the node type name... + */ + if (((Join *) plan)->jointype != JOIN_INNER) + appendStringInfo(es->str, " %s Join", jointype); + else if (!IsA(plan, NestLoop)) + appendStringInfo(es->str, " Join"); + } + else + ExplainPropertyText("Join Type", jointype, es); + } + break; + case T_SetOp: + { + const char *setopcmd; + + switch (((SetOp *) plan)->cmd) + { + case SETOPCMD_INTERSECT: + setopcmd = "Intersect"; + break; + case SETOPCMD_INTERSECT_ALL: + setopcmd = "Intersect All"; + break; + case SETOPCMD_EXCEPT: + setopcmd = "Except"; + break; + case SETOPCMD_EXCEPT_ALL: + setopcmd = "Except All"; + break; + default: + setopcmd = "???"; + break; + } + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " %s", setopcmd); + else + ExplainPropertyText("Command", setopcmd, es); + } + break; + default: + break; + } + + if (es->costs) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + { + appendStringInfo(es->str, " (cost=%.2f..%.2f rows=%.0f width=%d)", + plan->startup_cost, plan->total_cost, + plan->plan_rows, plan->plan_width); + } + else + { + ExplainPropertyFloat("Startup Cost", plan->startup_cost, 2, es); + ExplainPropertyFloat("Total Cost", plan->total_cost, 2, es); + ExplainPropertyFloat("Plan Rows", plan->plan_rows, 0, es); + ExplainPropertyInteger("Plan Width", plan->plan_width, es); + } + } + + /* + * We have to forcibly clean up the instrumentation state because we + * haven't done ExecutorEnd yet. This is pretty grotty ... + */ + if (planstate->instrument) + InstrEndLoop(planstate->instrument); + + if (planstate->instrument && planstate->instrument->nloops > 0) + { + double nloops = planstate->instrument->nloops; + double startup_sec = 1000.0 * planstate->instrument->startup / nloops; + double total_sec = 1000.0 * planstate->instrument->total / nloops; + double rows = planstate->instrument->ntuples / nloops; + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (planstate->instrument->need_timer) + appendStringInfo(es->str, + " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)", + startup_sec, total_sec, rows, nloops); + else + appendStringInfo(es->str, + " (actual rows=%.0f loops=%.0f)", + rows, nloops); + } + else + { + if (planstate->instrument->need_timer) + { + ExplainPropertyFloat("Actual Startup Time", startup_sec, 3, es); + ExplainPropertyFloat("Actual Total Time", total_sec, 3, es); + } + ExplainPropertyFloat("Actual Rows", rows, 0, es); + ExplainPropertyFloat("Actual Loops", nloops, 0, es); + } + } + else if (es->analyze) + { + + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " (never executed)"); + else if (planstate->instrument->need_timer) + { + ExplainPropertyFloat("Actual Startup Time", 0.0, 3, es); + ExplainPropertyFloat("Actual Total Time", 0.0, 3, es); + } + else + { + ExplainPropertyFloat("Actual Rows", 0.0, 0, es); + ExplainPropertyFloat("Actual Loops", 0.0, 0, es); + } + + } + + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); + + /* target list */ + if (es->verbose) + show_plan_tlist(planstate, ancestors, es); + + /* quals, sort keys, etc */ + switch (nodeTag(plan)) + { + case T_IndexScan: + show_scan_qual(((IndexScan *) plan)->indexqualorig, + "Index Cond", planstate, ancestors, es); + if (((IndexScan *) plan)->indexqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(((IndexScan *) plan)->indexorderbyorig, + "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_IndexOnlyScan: + show_scan_qual(((IndexOnlyScan *) plan)->indexqual, + "Index Cond", planstate, ancestors, es); + if (((IndexOnlyScan *) plan)->indexqual) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(((IndexOnlyScan *) plan)->indexorderby, + "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + if (es->analyze) + ExplainPropertyLong("Heap Fetches", + ((IndexOnlyScanState *) planstate)->ioss_HeapFetches, es); + break; + case T_BitmapIndexScan: + show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig, + "Index Cond", planstate, ancestors, es); + break; + case T_BitmapHeapScan: + show_scan_qual(((BitmapHeapScan *) plan)->bitmapqualorig, + "Recheck Cond", planstate, ancestors, es); + if (((BitmapHeapScan *) plan)->bitmapqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + /* FALL THRU */ + case T_SeqScan: + case T_ValuesScan: + case T_CteScan: + case T_WorkTableScan: + case T_SubqueryScan: + /* NEW FOR RECATHON */ + case T_RecScan: + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_FunctionScan: + if (es->verbose) + show_expression(((FunctionScan *) plan)->funcexpr, + "Function Call", planstate, ancestors, + es->verbose, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_TidScan: + { + /* + * The tidquals list has OR semantics, so be sure to show it + * as an OR condition. + */ + List *tidquals = ((TidScan *) plan)->tidquals; + + if (list_length(tidquals) > 1) + tidquals = list_make1(make_orclause(tidquals)); + show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + } + break; + case T_ForeignScan: + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + show_foreignscan_info((ForeignScanState *) planstate, es); + break; + case T_NestLoop: + /* NEW FOR RECATHON */ + case T_RecJoin: + show_upper_qual(((NestLoop *) plan)->join.joinqual, + "Join Filter", planstate, ancestors, es); + if (((NestLoop *) plan)->join.joinqual) + show_instrumentation_count("Rows Removed by Join Filter", 1, + planstate, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 2, + planstate, es); + break; + case T_MergeJoin: + show_upper_qual(((MergeJoin *) plan)->mergeclauses, + "Merge Cond", planstate, ancestors, es); + show_upper_qual(((MergeJoin *) plan)->join.joinqual, + "Join Filter", planstate, ancestors, es); + if (((MergeJoin *) plan)->join.joinqual) + show_instrumentation_count("Rows Removed by Join Filter", 1, + planstate, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 2, + planstate, es); + break; + case T_HashJoin: + show_upper_qual(((HashJoin *) plan)->hashclauses, + "Hash Cond", planstate, ancestors, es); + show_upper_qual(((HashJoin *) plan)->join.joinqual, + "Join Filter", planstate, ancestors, es); + if (((HashJoin *) plan)->join.joinqual) + show_instrumentation_count("Rows Removed by Join Filter", 1, + planstate, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 2, + planstate, es); + break; + case T_Agg: + case T_Group: + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_Sort: + show_sort_keys((SortState *) planstate, ancestors, es); + show_sort_info((SortState *) planstate, es); + break; + case T_MergeAppend: + show_merge_append_keys((MergeAppendState *) planstate, + ancestors, es); + break; + case T_Result: + show_upper_qual((List *) ((Result *) plan)->resconstantqual, + "One-Time Filter", planstate, ancestors, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_Hash: + show_hash_info((HashState *) planstate, es); + break; + default: + break; + } + + /* Show buffer usage */ + if (es->buffers) + { + const BufferUsage *usage = &planstate->instrument->bufusage; + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + bool has_shared = (usage->shared_blks_hit > 0 || + usage->shared_blks_read > 0 || + usage->shared_blks_dirtied > 0 || + usage->shared_blks_written > 0); + bool has_local = (usage->local_blks_hit > 0 || + usage->local_blks_read > 0 || + usage->local_blks_dirtied > 0 || + usage->local_blks_written > 0); + bool has_temp = (usage->temp_blks_read > 0 || + usage->temp_blks_written > 0); + bool has_timing = (!INSTR_TIME_IS_ZERO(usage->blk_read_time) || + !INSTR_TIME_IS_ZERO(usage->blk_write_time)); + + /* Show only positive counter values. */ + if (has_shared || has_local || has_temp) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoString(es->str, "Buffers:"); + + if (has_shared) + { + appendStringInfoString(es->str, " shared"); + if (usage->shared_blks_hit > 0) + appendStringInfo(es->str, " hit=%ld", + usage->shared_blks_hit); + if (usage->shared_blks_read > 0) + appendStringInfo(es->str, " read=%ld", + usage->shared_blks_read); + if (usage->shared_blks_dirtied > 0) + appendStringInfo(es->str, " dirtied=%ld", + usage->shared_blks_dirtied); + if (usage->shared_blks_written > 0) + appendStringInfo(es->str, " written=%ld", + usage->shared_blks_written); + if (has_local || has_temp) + appendStringInfoChar(es->str, ','); + } + if (has_local) + { + appendStringInfoString(es->str, " local"); + if (usage->local_blks_hit > 0) + appendStringInfo(es->str, " hit=%ld", + usage->local_blks_hit); + if (usage->local_blks_read > 0) + appendStringInfo(es->str, " read=%ld", + usage->local_blks_read); + if (usage->local_blks_dirtied > 0) + appendStringInfo(es->str, " dirtied=%ld", + usage->local_blks_dirtied); + if (usage->local_blks_written > 0) + appendStringInfo(es->str, " written=%ld", + usage->local_blks_written); + if (has_temp) + appendStringInfoChar(es->str, ','); + } + if (has_temp) + { + appendStringInfoString(es->str, " temp"); + if (usage->temp_blks_read > 0) + appendStringInfo(es->str, " read=%ld", + usage->temp_blks_read); + if (usage->temp_blks_written > 0) + appendStringInfo(es->str, " written=%ld", + usage->temp_blks_written); + } + appendStringInfoChar(es->str, '\n'); + } + + /* As above, show only positive counter values. */ + if (has_timing) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoString(es->str, "I/O Timings:"); + if (!INSTR_TIME_IS_ZERO(usage->blk_read_time)) + appendStringInfo(es->str, " read=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->blk_read_time)); + if (!INSTR_TIME_IS_ZERO(usage->blk_write_time)) + appendStringInfo(es->str, " write=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->blk_write_time)); + appendStringInfoChar(es->str, '\n'); + } + } + else + { + ExplainPropertyLong("Shared Hit Blocks", usage->shared_blks_hit, es); + ExplainPropertyLong("Shared Read Blocks", usage->shared_blks_read, es); + ExplainPropertyLong("Shared Dirtied Blocks", usage->shared_blks_dirtied, es); + ExplainPropertyLong("Shared Written Blocks", usage->shared_blks_written, es); + ExplainPropertyLong("Local Hit Blocks", usage->local_blks_hit, es); + ExplainPropertyLong("Local Read Blocks", usage->local_blks_read, es); + ExplainPropertyLong("Local Dirtied Blocks", usage->local_blks_dirtied, es); + ExplainPropertyLong("Local Written Blocks", usage->local_blks_written, es); + ExplainPropertyLong("Temp Read Blocks", usage->temp_blks_read, es); + ExplainPropertyLong("Temp Written Blocks", usage->temp_blks_written, es); + ExplainPropertyFloat("I/O Read Time", INSTR_TIME_GET_MILLISEC(usage->blk_read_time), 3, es); + ExplainPropertyFloat("I/O Write Time", INSTR_TIME_GET_MILLISEC(usage->blk_write_time), 3, es); + } + } + + /* Get ready to display the child plans */ + haschildren = planstate->initPlan || + outerPlanState(planstate) || + innerPlanState(planstate) || + IsA(plan, ModifyTable) || + IsA(plan, Append) || + IsA(plan, MergeAppend) || + IsA(plan, BitmapAnd) || + IsA(plan, BitmapOr) || + IsA(plan, SubqueryScan) || + planstate->subPlan; + if (haschildren) + { + ExplainOpenGroup("Plans", "Plans", false, es); + /* Pass current PlanState as head of ancestors list for children */ + ancestors = lcons(planstate, ancestors); + } + + /* initPlan-s */ + if (planstate->initPlan) + ExplainSubPlans(planstate->initPlan, ancestors, "InitPlan", es); + + /* lefttree */ + if (outerPlanState(planstate)) + ExplainNode(outerPlanState(planstate), ancestors, + "Outer", NULL, es); + + /* righttree */ + if (innerPlanState(planstate)) + ExplainNode(innerPlanState(planstate), ancestors, + "Inner", NULL, es); + + /* special child plans */ + switch (nodeTag(plan)) + { + case T_ModifyTable: + ExplainMemberNodes(((ModifyTable *) plan)->plans, + ((ModifyTableState *) planstate)->mt_plans, + ancestors, es); + break; + case T_Append: + ExplainMemberNodes(((Append *) plan)->appendplans, + ((AppendState *) planstate)->appendplans, + ancestors, es); + break; + case T_MergeAppend: + ExplainMemberNodes(((MergeAppend *) plan)->mergeplans, + ((MergeAppendState *) planstate)->mergeplans, + ancestors, es); + break; + case T_BitmapAnd: + ExplainMemberNodes(((BitmapAnd *) plan)->bitmapplans, + ((BitmapAndState *) planstate)->bitmapplans, + ancestors, es); + break; + case T_BitmapOr: + ExplainMemberNodes(((BitmapOr *) plan)->bitmapplans, + ((BitmapOrState *) planstate)->bitmapplans, + ancestors, es); + break; + case T_SubqueryScan: + ExplainNode(((SubqueryScanState *) planstate)->subplan, ancestors, + "Subquery", NULL, es); + break; + default: + break; + } + + /* subPlan-s */ + if (planstate->subPlan) + ExplainSubPlans(planstate->subPlan, ancestors, "SubPlan", es); + + /* end of child plans */ + if (haschildren) + { + ancestors = list_delete_first(ancestors); + ExplainCloseGroup("Plans", "Plans", false, es); + } + + /* in text format, undo whatever indentation we added */ + if (es->format == EXPLAIN_FORMAT_TEXT) + es->indent = save_indent; + + ExplainCloseGroup("Plan", + relationship ? NULL : "Plan", + true, es); } /* @@ -1465,42 +1479,42 @@ ExplainNode(PlanState *planstate, List *ancestors, static void show_plan_tlist(PlanState *planstate, List *ancestors, ExplainState *es) { - Plan *plan = planstate->plan; - List *context; - List *result = NIL; - bool useprefix; - ListCell *lc; - - /* No work if empty tlist (this occurs eg in bitmap indexscans) */ - if (plan->targetlist == NIL) - return; - /* The tlist of an Append isn't real helpful, so suppress it */ - if (IsA(plan, Append)) - return; - /* Likewise for MergeAppend and RecursiveUnion */ - if (IsA(plan, MergeAppend)) - return; - if (IsA(plan, RecursiveUnion)) - return; - - /* Set up deparsing context */ - context = deparse_context_for_planstate((Node *) planstate, - ancestors, - es->rtable); - useprefix = list_length(es->rtable) > 1; - - /* Deparse each result column (we now include resjunk ones) */ - foreach(lc, plan->targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - - result = lappend(result, - deparse_expression((Node *) tle->expr, context, - useprefix, false)); - } - - /* Print results */ - ExplainPropertyList("Output", result, es); + Plan *plan = planstate->plan; + List *context; + List *result = NIL; + bool useprefix; + ListCell *lc; + + /* No work if empty tlist (this occurs eg in bitmap indexscans) */ + if (plan->targetlist == NIL) + return; + /* The tlist of an Append isn't real helpful, so suppress it */ + if (IsA(plan, Append)) + return; + /* Likewise for MergeAppend and RecursiveUnion */ + if (IsA(plan, MergeAppend)) + return; + if (IsA(plan, RecursiveUnion)) + return; + + /* Set up deparsing context */ + context = deparse_context_for_planstate((Node *) planstate, + ancestors, + es->rtable); + useprefix = list_length(es->rtable) > 1; + + /* Deparse each result column (we now include resjunk ones) */ + foreach(lc, plan->targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + result = lappend(result, + deparse_expression((Node *) tle->expr, context, + useprefix, false)); + } + + /* Print results */ + ExplainPropertyList("Output", result, es); } /* @@ -1508,22 +1522,22 @@ show_plan_tlist(PlanState *planstate, List *ancestors, ExplainState *es) */ static void show_expression(Node *node, const char *qlabel, - PlanState *planstate, List *ancestors, - bool useprefix, ExplainState *es) + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es) { - List *context; - char *exprstr; - - /* Set up deparsing context */ - context = deparse_context_for_planstate((Node *) planstate, - ancestors, - es->rtable); - - /* Deparse the expression */ - exprstr = deparse_expression(node, context, useprefix, false); - - /* And add to es->str */ - ExplainPropertyText(qlabel, exprstr, es); + List *context; + char *exprstr; + + /* Set up deparsing context */ + context = deparse_context_for_planstate((Node *) planstate, + ancestors, + es->rtable); + + /* Deparse the expression */ + exprstr = deparse_expression(node, context, useprefix, false); + + /* And add to es->str */ + ExplainPropertyText(qlabel, exprstr, es); } /* @@ -1531,20 +1545,20 @@ show_expression(Node *node, const char *qlabel, */ static void show_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - bool useprefix, ExplainState *es) + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es) { - Node *node; - - /* No work if empty qual */ - if (qual == NIL) - return; - - /* Convert AND list to explicit AND */ - node = (Node *) make_ands_explicit(qual); - - /* And show it */ - show_expression(node, qlabel, planstate, ancestors, useprefix, es); + Node *node; + + /* No work if empty qual */ + if (qual == NIL) + return; + + /* Convert AND list to explicit AND */ + node = (Node *) make_ands_explicit(qual); + + /* And show it */ + show_expression(node, qlabel, planstate, ancestors, useprefix, es); } /* @@ -1552,13 +1566,13 @@ show_qual(List *qual, const char *qlabel, */ static void show_scan_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - ExplainState *es) + PlanState *planstate, List *ancestors, + ExplainState *es) { - bool useprefix; - - useprefix = (IsA(planstate->plan, SubqueryScan) ||es->verbose); - show_qual(qual, qlabel, planstate, ancestors, useprefix, es); + bool useprefix; + + useprefix = (IsA(planstate->plan, SubqueryScan) ||es->verbose); + show_qual(qual, qlabel, planstate, ancestors, useprefix, es); } /* @@ -1566,13 +1580,13 @@ show_scan_qual(List *qual, const char *qlabel, */ static void show_upper_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - ExplainState *es) + PlanState *planstate, List *ancestors, + ExplainState *es) { - bool useprefix; - - useprefix = (list_length(es->rtable) > 1 || es->verbose); - show_qual(qual, qlabel, planstate, ancestors, useprefix, es); + bool useprefix; + + useprefix = (list_length(es->rtable) > 1 || es->verbose); + show_qual(qual, qlabel, planstate, ancestors, useprefix, es); } /* @@ -1581,11 +1595,11 @@ show_upper_qual(List *qual, const char *qlabel, static void show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es) { - Sort *plan = (Sort *) sortstate->ss.ps.plan; - - show_sort_keys_common((PlanState *) sortstate, - plan->numCols, plan->sortColIdx, - ancestors, es); + Sort *plan = (Sort *) sortstate->ss.ps.plan; + + show_sort_keys_common((PlanState *) sortstate, + plan->numCols, plan->sortColIdx, + ancestors, es); } /* @@ -1593,51 +1607,51 @@ show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es) */ static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors, - ExplainState *es) + ExplainState *es) { - MergeAppend *plan = (MergeAppend *) mstate->ps.plan; - - show_sort_keys_common((PlanState *) mstate, - plan->numCols, plan->sortColIdx, - ancestors, es); + MergeAppend *plan = (MergeAppend *) mstate->ps.plan; + + show_sort_keys_common((PlanState *) mstate, + plan->numCols, plan->sortColIdx, + ancestors, es); } static void show_sort_keys_common(PlanState *planstate, int nkeys, AttrNumber *keycols, - List *ancestors, ExplainState *es) + List *ancestors, ExplainState *es) { - Plan *plan = planstate->plan; - List *context; - List *result = NIL; - bool useprefix; - int keyno; - char *exprstr; - - if (nkeys <= 0) - return; - - /* Set up deparsing context */ - context = deparse_context_for_planstate((Node *) planstate, - ancestors, - es->rtable); - useprefix = (list_length(es->rtable) > 1 || es->verbose); - - for (keyno = 0; keyno < nkeys; keyno++) - { - /* find key expression in tlist */ - AttrNumber keyresno = keycols[keyno]; - TargetEntry *target = get_tle_by_resno(plan->targetlist, - keyresno); - - if (!target) - elog(ERROR, "no tlist entry for key %d", keyresno); - /* Deparse the expression, showing any top-level cast */ - exprstr = deparse_expression((Node *) target->expr, context, - useprefix, true); - result = lappend(result, exprstr); - } - - ExplainPropertyList("Sort Key", result, es); + Plan *plan = planstate->plan; + List *context; + List *result = NIL; + bool useprefix; + int keyno; + char *exprstr; + + if (nkeys <= 0) + return; + + /* Set up deparsing context */ + context = deparse_context_for_planstate((Node *) planstate, + ancestors, + es->rtable); + useprefix = (list_length(es->rtable) > 1 || es->verbose); + + for (keyno = 0; keyno < nkeys; keyno++) + { + /* find key expression in tlist */ + AttrNumber keyresno = keycols[keyno]; + TargetEntry *target = get_tle_by_resno(plan->targetlist, + keyresno); + + if (!target) + elog(ERROR, "no tlist entry for key %d", keyresno); + /* Deparse the expression, showing any top-level cast */ + exprstr = deparse_expression((Node *) target->expr, context, + useprefix, true); + result = lappend(result, exprstr); + } + + ExplainPropertyList("Sort Key", result, es); } /* @@ -1646,30 +1660,30 @@ show_sort_keys_common(PlanState *planstate, int nkeys, AttrNumber *keycols, static void show_sort_info(SortState *sortstate, ExplainState *es) { - Assert(IsA(sortstate, SortState)); - if (es->analyze && sortstate->sort_Done && - sortstate->tuplesortstate != NULL) - { - Tuplesortstate *state = (Tuplesortstate *) sortstate->tuplesortstate; - const char *sortMethod; - const char *spaceType; - long spaceUsed; - - tuplesort_get_stats(state, &sortMethod, &spaceType, &spaceUsed); - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, "Sort Method: %s %s: %ldkB\n", - sortMethod, spaceType, spaceUsed); - } - else - { - ExplainPropertyText("Sort Method", sortMethod, es); - ExplainPropertyLong("Sort Space Used", spaceUsed, es); - ExplainPropertyText("Sort Space Type", spaceType, es); - } - } + Assert(IsA(sortstate, SortState)); + if (es->analyze && sortstate->sort_Done && + sortstate->tuplesortstate != NULL) + { + Tuplesortstate *state = (Tuplesortstate *) sortstate->tuplesortstate; + const char *sortMethod; + const char *spaceType; + long spaceUsed; + + tuplesort_get_stats(state, &sortMethod, &spaceType, &spaceUsed); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, "Sort Method: %s %s: %ldkB\n", + sortMethod, spaceType, spaceUsed); + } + else + { + ExplainPropertyText("Sort Method", sortMethod, es); + ExplainPropertyLong("Sort Space Used", spaceUsed, es); + ExplainPropertyText("Sort Space Type", spaceType, es); + } + } } /* @@ -1678,40 +1692,40 @@ show_sort_info(SortState *sortstate, ExplainState *es) static void show_hash_info(HashState *hashstate, ExplainState *es) { - HashJoinTable hashtable; - - Assert(IsA(hashstate, HashState)); - hashtable = hashstate->hashtable; - - if (hashtable) - { - long spacePeakKb = (hashtable->spacePeak + 1023) / 1024; - - if (es->format != EXPLAIN_FORMAT_TEXT) - { - ExplainPropertyLong("Hash Buckets", hashtable->nbuckets, es); - ExplainPropertyLong("Hash Batches", hashtable->nbatch, es); - ExplainPropertyLong("Original Hash Batches", - hashtable->nbatch_original, es); - ExplainPropertyLong("Peak Memory Usage", spacePeakKb, es); - } - else if (hashtable->nbatch_original != hashtable->nbatch) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, - "Buckets: %d Batches: %d (originally %d) Memory Usage: %ldkB\n", - hashtable->nbuckets, hashtable->nbatch, - hashtable->nbatch_original, spacePeakKb); - } - else - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, - "Buckets: %d Batches: %d Memory Usage: %ldkB\n", - hashtable->nbuckets, hashtable->nbatch, - spacePeakKb); - } - } + HashJoinTable hashtable; + + Assert(IsA(hashstate, HashState)); + hashtable = hashstate->hashtable; + + if (hashtable) + { + long spacePeakKb = (hashtable->spacePeak + 1023) / 1024; + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyLong("Hash Buckets", hashtable->nbuckets, es); + ExplainPropertyLong("Hash Batches", hashtable->nbatch, es); + ExplainPropertyLong("Original Hash Batches", + hashtable->nbatch_original, es); + ExplainPropertyLong("Peak Memory Usage", spacePeakKb, es); + } + else if (hashtable->nbatch_original != hashtable->nbatch) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, + "Buckets: %d Batches: %d (originally %d) Memory Usage: %ldkB\n", + hashtable->nbuckets, hashtable->nbatch, + hashtable->nbatch_original, spacePeakKb); + } + else + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, + "Buckets: %d Batches: %d Memory Usage: %ldkB\n", + hashtable->nbuckets, hashtable->nbatch, + spacePeakKb); + } + } } /* @@ -1721,28 +1735,28 @@ show_hash_info(HashState *hashstate, ExplainState *es) */ static void show_instrumentation_count(const char *qlabel, int which, - PlanState *planstate, ExplainState *es) + PlanState *planstate, ExplainState *es) { - double nfiltered; - double nloops; - - if (!es->analyze || !planstate->instrument) - return; - - if (which == 2) - nfiltered = planstate->instrument->nfiltered2; - else - nfiltered = planstate->instrument->nfiltered1; - nloops = planstate->instrument->nloops; - - /* In text mode, suppress zero counts; they're not interesting enough */ - if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) - { - if (nloops > 0) - ExplainPropertyFloat(qlabel, nfiltered / nloops, 0, es); - else - ExplainPropertyFloat(qlabel, 0.0, 0, es); - } + double nfiltered; + double nloops; + + if (!es->analyze || !planstate->instrument) + return; + + if (which == 2) + nfiltered = planstate->instrument->nfiltered2; + else + nfiltered = planstate->instrument->nfiltered1; + nloops = planstate->instrument->nloops; + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) + { + if (nloops > 0) + ExplainPropertyFloat(qlabel, nfiltered / nloops, 0, es); + else + ExplainPropertyFloat(qlabel, 0.0, 0, es); + } } /* @@ -1751,10 +1765,10 @@ show_instrumentation_count(const char *qlabel, int which, static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es) { - FdwRoutine *fdwroutine = fsstate->fdwroutine; - - /* Let the FDW emit whatever fields it wants */ - fdwroutine->ExplainForeignScan(fsstate, es); + FdwRoutine *fdwroutine = fsstate->fdwroutine; + + /* Let the FDW emit whatever fields it wants */ + fdwroutine->ExplainForeignScan(fsstate, es); } /* @@ -1766,21 +1780,21 @@ show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es) static const char * explain_get_index_name(Oid indexId) { - const char *result; - - if (explain_get_index_name_hook) - result = (*explain_get_index_name_hook) (indexId); - else - result = NULL; - if (result == NULL) - { - /* default behavior: look in the catalogs and quote it */ - result = get_rel_name(indexId); - if (result == NULL) - elog(ERROR, "cache lookup failed for index %u", indexId); - result = quote_identifier(result); - } - return result; + const char *result; + + if (explain_get_index_name_hook) + result = (*explain_get_index_name_hook) (indexId); + else + result = NULL; + if (result == NULL) + { + /* default behavior: look in the catalogs and quote it */ + result = get_rel_name(indexId); + if (result == NULL) + elog(ERROR, "cache lookup failed for index %u", indexId); + result = quote_identifier(result); + } + return result; } /* @@ -1788,38 +1802,38 @@ explain_get_index_name(Oid indexId) */ static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, - ExplainState *es) + ExplainState *es) { - const char *indexname = explain_get_index_name(indexid); - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - if (ScanDirectionIsBackward(indexorderdir)) - appendStringInfoString(es->str, " Backward"); - appendStringInfo(es->str, " using %s", indexname); - } - else - { - const char *scandir; - - switch (indexorderdir) - { - case BackwardScanDirection: - scandir = "Backward"; - break; - case NoMovementScanDirection: - scandir = "NoMovement"; - break; - case ForwardScanDirection: - scandir = "Forward"; - break; - default: - scandir = "???"; - break; - } - ExplainPropertyText("Scan Direction", scandir, es); - ExplainPropertyText("Index Name", indexname, es); - } + const char *indexname = explain_get_index_name(indexid); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (ScanDirectionIsBackward(indexorderdir)) + appendStringInfoString(es->str, " Backward"); + appendStringInfo(es->str, " using %s", indexname); + } + else + { + const char *scandir; + + switch (indexorderdir) + { + case BackwardScanDirection: + scandir = "Backward"; + break; + case NoMovementScanDirection: + scandir = "NoMovement"; + break; + case ForwardScanDirection: + scandir = "Forward"; + break; + default: + scandir = "???"; + break; + } + ExplainPropertyText("Scan Direction", scandir, es); + ExplainPropertyText("Index Name", indexname, es); + } } /* @@ -1828,7 +1842,7 @@ ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, static void ExplainScanTarget(Scan *plan, ExplainState *es) { - ExplainTargetRel((Plan *) plan, plan->scanrelid, es); + ExplainTargetRel((Plan *) plan, plan->scanrelid, es); } /* @@ -1837,16 +1851,16 @@ ExplainScanTarget(Scan *plan, ExplainState *es) static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es) { - Index rti; - - /* - * We show the name of the first target relation. In multi-target-table - * cases this should always be the parent of the inheritance tree. - */ - Assert(plan->resultRelations != NIL); - rti = linitial_int(plan->resultRelations); - - ExplainTargetRel((Plan *) plan, rti, es); + Index rti; + + /* + * We show the name of the first target relation. In multi-target-table + * cases this should always be the parent of the inheritance tree. + */ + Assert(plan->resultRelations != NIL); + rti = linitial_int(plan->resultRelations); + + ExplainTargetRel((Plan *) plan, rti, es); } /* @@ -1855,99 +1869,99 @@ ExplainModifyTarget(ModifyTable *plan, ExplainState *es) static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) { - char *objectname = NULL; - char *namespace = NULL; - const char *objecttag = NULL; - RangeTblEntry *rte; - - rte = rt_fetch(rti, es->rtable); - - switch (nodeTag(plan)) - { - case T_SeqScan: - case T_IndexScan: - case T_IndexOnlyScan: - case T_BitmapHeapScan: - case T_TidScan: - case T_ForeignScan: - case T_ModifyTable: - /* NEW FOR RECATHON */ - case T_RecScan: - /* Assert it's on a real relation */ - Assert(rte->rtekind == RTE_RELATION); - objectname = get_rel_name(rte->relid); - if (es->verbose) - namespace = get_namespace_name(get_rel_namespace(rte->relid)); - objecttag = "Relation Name"; - break; - case T_FunctionScan: - { - Node *funcexpr; - - /* Assert it's on a RangeFunction */ - Assert(rte->rtekind == RTE_FUNCTION); - - /* - * If the expression is still a function call, we can get the - * real name of the function. Otherwise, punt (this can - * happen if the optimizer simplified away the function call, - * for example). - */ - funcexpr = ((FunctionScan *) plan)->funcexpr; - if (funcexpr && IsA(funcexpr, FuncExpr)) - { - Oid funcid = ((FuncExpr *) funcexpr)->funcid; - - objectname = get_func_name(funcid); - if (es->verbose) - namespace = - get_namespace_name(get_func_namespace(funcid)); - } - objecttag = "Function Name"; - } - break; - case T_ValuesScan: - Assert(rte->rtekind == RTE_VALUES); - break; - case T_CteScan: - /* Assert it's on a non-self-reference CTE */ - Assert(rte->rtekind == RTE_CTE); - Assert(!rte->self_reference); - objectname = rte->ctename; - objecttag = "CTE Name"; - break; - case T_WorkTableScan: - /* Assert it's on a self-reference CTE */ - Assert(rte->rtekind == RTE_CTE); - Assert(rte->self_reference); - objectname = rte->ctename; - objecttag = "CTE Name"; - break; - default: - break; - } - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - appendStringInfoString(es->str, " on"); - if (namespace != NULL) - appendStringInfo(es->str, " %s.%s", quote_identifier(namespace), - quote_identifier(objectname)); - else if (objectname != NULL) - appendStringInfo(es->str, " %s", quote_identifier(objectname)); - if (objectname == NULL || - strcmp(rte->eref->aliasname, objectname) != 0) - appendStringInfo(es->str, " %s", - quote_identifier(rte->eref->aliasname)); - } - else - { - if (objecttag != NULL && objectname != NULL) - ExplainPropertyText(objecttag, objectname, es); - if (namespace != NULL) - ExplainPropertyText("Schema", namespace, es); - ExplainPropertyText("Alias", rte->eref->aliasname, es); - } + char *objectname = NULL; + char *namespace = NULL; + const char *objecttag = NULL; + RangeTblEntry *rte; + + rte = rt_fetch(rti, es->rtable); + + switch (nodeTag(plan)) + { + case T_SeqScan: + case T_IndexScan: + case T_IndexOnlyScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_ForeignScan: + case T_ModifyTable: + /* NEW FOR RECATHON */ + case T_RecScan: + /* Assert it's on a real relation */ + Assert(rte->rtekind == RTE_RELATION); + objectname = get_rel_name(rte->relid); + if (es->verbose) + namespace = get_namespace_name(get_rel_namespace(rte->relid)); + objecttag = "Relation Name"; + break; + case T_FunctionScan: + { + Node *funcexpr; + + /* Assert it's on a RangeFunction */ + Assert(rte->rtekind == RTE_FUNCTION); + + /* + * If the expression is still a function call, we can get the + * real name of the function. Otherwise, punt (this can + * happen if the optimizer simplified away the function call, + * for example). + */ + funcexpr = ((FunctionScan *) plan)->funcexpr; + if (funcexpr && IsA(funcexpr, FuncExpr)) + { + Oid funcid = ((FuncExpr *) funcexpr)->funcid; + + objectname = get_func_name(funcid); + if (es->verbose) + namespace = + get_namespace_name(get_func_namespace(funcid)); + } + objecttag = "Function Name"; + } + break; + case T_ValuesScan: + Assert(rte->rtekind == RTE_VALUES); + break; + case T_CteScan: + /* Assert it's on a non-self-reference CTE */ + Assert(rte->rtekind == RTE_CTE); + Assert(!rte->self_reference); + objectname = rte->ctename; + objecttag = "CTE Name"; + break; + case T_WorkTableScan: + /* Assert it's on a self-reference CTE */ + Assert(rte->rtekind == RTE_CTE); + Assert(rte->self_reference); + objectname = rte->ctename; + objecttag = "CTE Name"; + break; + default: + break; + } + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + appendStringInfoString(es->str, " on"); + if (namespace != NULL) + appendStringInfo(es->str, " %s.%s", quote_identifier(namespace), + quote_identifier(objectname)); + else if (objectname != NULL) + appendStringInfo(es->str, " %s", quote_identifier(objectname)); + if (objectname == NULL || + strcmp(rte->eref->aliasname, objectname) != 0) + appendStringInfo(es->str, " %s", + quote_identifier(rte->eref->aliasname)); + } + else + { + if (objecttag != NULL && objectname != NULL) + ExplainPropertyText(objecttag, objectname, es); + if (namespace != NULL) + ExplainPropertyText("Schema", namespace, es); + ExplainPropertyText("Alias", rte->eref->aliasname, es); + } } /* @@ -1962,14 +1976,14 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) */ static void ExplainMemberNodes(List *plans, PlanState **planstates, - List *ancestors, ExplainState *es) + List *ancestors, ExplainState *es) { - int nplans = list_length(plans); - int j; - - for (j = 0; j < nplans; j++) - ExplainNode(planstates[j], ancestors, - "Member", NULL, es); + int nplans = list_length(plans); + int j; + + for (j = 0; j < nplans; j++) + ExplainNode(planstates[j], ancestors, + "Member", NULL, es); } /* @@ -1980,18 +1994,18 @@ ExplainMemberNodes(List *plans, PlanState **planstates, */ static void ExplainSubPlans(List *plans, List *ancestors, - const char *relationship, ExplainState *es) + const char *relationship, ExplainState *es) { - ListCell *lst; - - foreach(lst, plans) - { - SubPlanState *sps = (SubPlanState *) lfirst(lst); - SubPlan *sp = (SubPlan *) sps->xprstate.expr; - - ExplainNode(sps->planstate, ancestors, - relationship, sp->plan_name, es); - } + ListCell *lst; + + foreach(lst, plans) + { + SubPlanState *sps = (SubPlanState *) lfirst(lst); + SubPlan *sp = (SubPlan *) sps->xprstate.expr; + + ExplainNode(sps->planstate, ancestors, + relationship, sp->plan_name, es); + } } /* @@ -2001,67 +2015,67 @@ ExplainSubPlans(List *plans, List *ancestors, void ExplainPropertyList(const char *qlabel, List *data, ExplainState *es) { - ListCell *lc; - bool first = true; - - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, "%s: ", qlabel); - foreach(lc, data) - { - if (!first) - appendStringInfoString(es->str, ", "); - appendStringInfoString(es->str, (const char *) lfirst(lc)); - first = false; - } - appendStringInfoChar(es->str, '\n'); - break; - - case EXPLAIN_FORMAT_XML: - ExplainXMLTag(qlabel, X_OPENING, es); - foreach(lc, data) - { - char *str; - - appendStringInfoSpaces(es->str, es->indent * 2 + 2); - appendStringInfoString(es->str, ""); - str = escape_xml((const char *) lfirst(lc)); - appendStringInfoString(es->str, str); - pfree(str); - appendStringInfoString(es->str, "\n"); - } - ExplainXMLTag(qlabel, X_CLOSING, es); - break; - - case EXPLAIN_FORMAT_JSON: - ExplainJSONLineEnding(es); - appendStringInfoSpaces(es->str, es->indent * 2); - escape_json(es->str, qlabel); - appendStringInfoString(es->str, ": ["); - foreach(lc, data) - { - if (!first) - appendStringInfoString(es->str, ", "); - escape_json(es->str, (const char *) lfirst(lc)); - first = false; - } - appendStringInfoChar(es->str, ']'); - break; - - case EXPLAIN_FORMAT_YAML: - ExplainYAMLLineStarting(es); - appendStringInfo(es->str, "%s: ", qlabel); - foreach(lc, data) - { - appendStringInfoChar(es->str, '\n'); - appendStringInfoSpaces(es->str, es->indent * 2 + 2); - appendStringInfoString(es->str, "- "); - escape_yaml(es->str, (const char *) lfirst(lc)); - } - break; - } + ListCell *lc; + bool first = true; + + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, "%s: ", qlabel); + foreach(lc, data) + { + if (!first) + appendStringInfoString(es->str, ", "); + appendStringInfoString(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, '\n'); + break; + + case EXPLAIN_FORMAT_XML: + ExplainXMLTag(qlabel, X_OPENING, es); + foreach(lc, data) + { + char *str; + + appendStringInfoSpaces(es->str, es->indent * 2 + 2); + appendStringInfoString(es->str, ""); + str = escape_xml((const char *) lfirst(lc)); + appendStringInfoString(es->str, str); + pfree(str); + appendStringInfoString(es->str, "\n"); + } + ExplainXMLTag(qlabel, X_CLOSING, es); + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, es->indent * 2); + escape_json(es->str, qlabel); + appendStringInfoString(es->str, ": ["); + foreach(lc, data) + { + if (!first) + appendStringInfoString(es->str, ", "); + escape_json(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, ']'); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + appendStringInfo(es->str, "%s: ", qlabel); + foreach(lc, data) + { + appendStringInfoChar(es->str, '\n'); + appendStringInfoSpaces(es->str, es->indent * 2 + 2); + appendStringInfoString(es->str, "- "); + escape_yaml(es->str, (const char *) lfirst(lc)); + } + break; + } } /* @@ -2075,49 +2089,49 @@ ExplainPropertyList(const char *qlabel, List *data, ExplainState *es) */ static void ExplainProperty(const char *qlabel, const char *value, bool numeric, - ExplainState *es) + ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, "%s: %s\n", qlabel, value); - break; - - case EXPLAIN_FORMAT_XML: - { - char *str; - - appendStringInfoSpaces(es->str, es->indent * 2); - ExplainXMLTag(qlabel, X_OPENING | X_NOWHITESPACE, es); - str = escape_xml(value); - appendStringInfoString(es->str, str); - pfree(str); - ExplainXMLTag(qlabel, X_CLOSING | X_NOWHITESPACE, es); - appendStringInfoChar(es->str, '\n'); - } - break; - - case EXPLAIN_FORMAT_JSON: - ExplainJSONLineEnding(es); - appendStringInfoSpaces(es->str, es->indent * 2); - escape_json(es->str, qlabel); - appendStringInfoString(es->str, ": "); - if (numeric) - appendStringInfoString(es->str, value); - else - escape_json(es->str, value); - break; - - case EXPLAIN_FORMAT_YAML: - ExplainYAMLLineStarting(es); - appendStringInfo(es->str, "%s: ", qlabel); - if (numeric) - appendStringInfoString(es->str, value); - else - escape_yaml(es->str, value); - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, "%s: %s\n", qlabel, value); + break; + + case EXPLAIN_FORMAT_XML: + { + char *str; + + appendStringInfoSpaces(es->str, es->indent * 2); + ExplainXMLTag(qlabel, X_OPENING | X_NOWHITESPACE, es); + str = escape_xml(value); + appendStringInfoString(es->str, str); + pfree(str); + ExplainXMLTag(qlabel, X_CLOSING | X_NOWHITESPACE, es); + appendStringInfoChar(es->str, '\n'); + } + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, es->indent * 2); + escape_json(es->str, qlabel); + appendStringInfoString(es->str, ": "); + if (numeric) + appendStringInfoString(es->str, value); + else + escape_json(es->str, value); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + appendStringInfo(es->str, "%s: ", qlabel); + if (numeric) + appendStringInfoString(es->str, value); + else + escape_yaml(es->str, value); + break; + } } /* @@ -2126,7 +2140,7 @@ ExplainProperty(const char *qlabel, const char *value, bool numeric, void ExplainPropertyText(const char *qlabel, const char *value, ExplainState *es) { - ExplainProperty(qlabel, value, false, es); + ExplainProperty(qlabel, value, false, es); } /* @@ -2135,10 +2149,10 @@ ExplainPropertyText(const char *qlabel, const char *value, ExplainState *es) void ExplainPropertyInteger(const char *qlabel, int value, ExplainState *es) { - char buf[32]; - - snprintf(buf, sizeof(buf), "%d", value); - ExplainProperty(qlabel, buf, true, es); + char buf[32]; + + snprintf(buf, sizeof(buf), "%d", value); + ExplainProperty(qlabel, buf, true, es); } /* @@ -2147,10 +2161,10 @@ ExplainPropertyInteger(const char *qlabel, int value, ExplainState *es) void ExplainPropertyLong(const char *qlabel, long value, ExplainState *es) { - char buf[32]; - - snprintf(buf, sizeof(buf), "%ld", value); - ExplainProperty(qlabel, buf, true, es); + char buf[32]; + + snprintf(buf, sizeof(buf), "%ld", value); + ExplainProperty(qlabel, buf, true, es); } /* @@ -2159,12 +2173,12 @@ ExplainPropertyLong(const char *qlabel, long value, ExplainState *es) */ void ExplainPropertyFloat(const char *qlabel, double value, int ndigits, - ExplainState *es) + ExplainState *es) { - char buf[256]; - - snprintf(buf, sizeof(buf), "%.*f", ndigits, value); - ExplainProperty(qlabel, buf, true, es); + char buf[256]; + + snprintf(buf, sizeof(buf), "%.*f", ndigits, value); + ExplainProperty(qlabel, buf, true, es); } /* @@ -2178,61 +2192,61 @@ ExplainPropertyFloat(const char *qlabel, double value, int ndigits, */ static void ExplainOpenGroup(const char *objtype, const char *labelname, - bool labeled, ExplainState *es) + bool labeled, ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* nothing to do */ - break; - - case EXPLAIN_FORMAT_XML: - ExplainXMLTag(objtype, X_OPENING, es); - es->indent++; - break; - - case EXPLAIN_FORMAT_JSON: - ExplainJSONLineEnding(es); - appendStringInfoSpaces(es->str, 2 * es->indent); - if (labelname) - { - escape_json(es->str, labelname); - appendStringInfoString(es->str, ": "); - } - appendStringInfoChar(es->str, labeled ? '{' : '['); - - /* - * In JSON format, the grouping_stack is an integer list. 0 means - * we've emitted nothing at this grouping level, 1 means we've - * emitted something (and so the next item needs a comma). See - * ExplainJSONLineEnding(). - */ - es->grouping_stack = lcons_int(0, es->grouping_stack); - es->indent++; - break; - - case EXPLAIN_FORMAT_YAML: - - /* - * In YAML format, the grouping stack is an integer list. 0 means - * we've emitted nothing at this grouping level AND this grouping - * level is unlabelled and must be marked with "- ". See - * ExplainYAMLLineStarting(). - */ - ExplainYAMLLineStarting(es); - if (labelname) - { - appendStringInfo(es->str, "%s: ", labelname); - es->grouping_stack = lcons_int(1, es->grouping_stack); - } - else - { - appendStringInfoString(es->str, "- "); - es->grouping_stack = lcons_int(0, es->grouping_stack); - } - es->indent++; - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + ExplainXMLTag(objtype, X_OPENING, es); + es->indent++; + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, 2 * es->indent); + if (labelname) + { + escape_json(es->str, labelname); + appendStringInfoString(es->str, ": "); + } + appendStringInfoChar(es->str, labeled ? '{' : '['); + + /* + * In JSON format, the grouping_stack is an integer list. 0 means + * we've emitted nothing at this grouping level, 1 means we've + * emitted something (and so the next item needs a comma). See + * ExplainJSONLineEnding(). + */ + es->grouping_stack = lcons_int(0, es->grouping_stack); + es->indent++; + break; + + case EXPLAIN_FORMAT_YAML: + + /* + * In YAML format, the grouping stack is an integer list. 0 means + * we've emitted nothing at this grouping level AND this grouping + * level is unlabelled and must be marked with "- ". See + * ExplainYAMLLineStarting(). + */ + ExplainYAMLLineStarting(es); + if (labelname) + { + appendStringInfo(es->str, "%s: ", labelname); + es->grouping_stack = lcons_int(1, es->grouping_stack); + } + else + { + appendStringInfoString(es->str, "- "); + es->grouping_stack = lcons_int(0, es->grouping_stack); + } + es->indent++; + break; + } } /* @@ -2241,32 +2255,32 @@ ExplainOpenGroup(const char *objtype, const char *labelname, */ static void ExplainCloseGroup(const char *objtype, const char *labelname, - bool labeled, ExplainState *es) + bool labeled, ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* nothing to do */ - break; - - case EXPLAIN_FORMAT_XML: - es->indent--; - ExplainXMLTag(objtype, X_CLOSING, es); - break; - - case EXPLAIN_FORMAT_JSON: - es->indent--; - appendStringInfoChar(es->str, '\n'); - appendStringInfoSpaces(es->str, 2 * es->indent); - appendStringInfoChar(es->str, labeled ? '}' : ']'); - es->grouping_stack = list_delete_first(es->grouping_stack); - break; - - case EXPLAIN_FORMAT_YAML: - es->indent--; - es->grouping_stack = list_delete_first(es->grouping_stack); - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + es->indent--; + ExplainXMLTag(objtype, X_CLOSING, es); + break; + + case EXPLAIN_FORMAT_JSON: + es->indent--; + appendStringInfoChar(es->str, '\n'); + appendStringInfoSpaces(es->str, 2 * es->indent); + appendStringInfoChar(es->str, labeled ? '}' : ']'); + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + + case EXPLAIN_FORMAT_YAML: + es->indent--; + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + } } /* @@ -2278,41 +2292,41 @@ ExplainCloseGroup(const char *objtype, const char *labelname, static void ExplainDummyGroup(const char *objtype, const char *labelname, ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* nothing to do */ - break; - - case EXPLAIN_FORMAT_XML: - ExplainXMLTag(objtype, X_CLOSE_IMMEDIATE, es); - break; - - case EXPLAIN_FORMAT_JSON: - ExplainJSONLineEnding(es); - appendStringInfoSpaces(es->str, 2 * es->indent); - if (labelname) - { - escape_json(es->str, labelname); - appendStringInfoString(es->str, ": "); - } - escape_json(es->str, objtype); - break; - - case EXPLAIN_FORMAT_YAML: - ExplainYAMLLineStarting(es); - if (labelname) - { - escape_yaml(es->str, labelname); - appendStringInfoString(es->str, ": "); - } - else - { - appendStringInfoString(es->str, "- "); - } - escape_yaml(es->str, objtype); - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + ExplainXMLTag(objtype, X_CLOSE_IMMEDIATE, es); + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, 2 * es->indent); + if (labelname) + { + escape_json(es->str, labelname); + appendStringInfoString(es->str, ": "); + } + escape_json(es->str, objtype); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + if (labelname) + { + escape_yaml(es->str, labelname); + appendStringInfoString(es->str, ": "); + } + else + { + appendStringInfoString(es->str, "- "); + } + escape_yaml(es->str, objtype); + break; + } } /* @@ -2324,29 +2338,29 @@ ExplainDummyGroup(const char *objtype, const char *labelname, ExplainState *es) void ExplainBeginOutput(ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* nothing to do */ - break; - - case EXPLAIN_FORMAT_XML: - appendStringInfoString(es->str, - "\n"); - es->indent++; - break; - - case EXPLAIN_FORMAT_JSON: - /* top-level structure is an array of plans */ - appendStringInfoChar(es->str, '['); - es->grouping_stack = lcons_int(0, es->grouping_stack); - es->indent++; - break; - - case EXPLAIN_FORMAT_YAML: - es->grouping_stack = lcons_int(0, es->grouping_stack); - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + appendStringInfoString(es->str, + "\n"); + es->indent++; + break; + + case EXPLAIN_FORMAT_JSON: + /* top-level structure is an array of plans */ + appendStringInfoChar(es->str, '['); + es->grouping_stack = lcons_int(0, es->grouping_stack); + es->indent++; + break; + + case EXPLAIN_FORMAT_YAML: + es->grouping_stack = lcons_int(0, es->grouping_stack); + break; + } } /* @@ -2355,27 +2369,27 @@ ExplainBeginOutput(ExplainState *es) void ExplainEndOutput(ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* nothing to do */ - break; - - case EXPLAIN_FORMAT_XML: - es->indent--; - appendStringInfoString(es->str, ""); - break; - - case EXPLAIN_FORMAT_JSON: - es->indent--; - appendStringInfoString(es->str, "\n]"); - es->grouping_stack = list_delete_first(es->grouping_stack); - break; - - case EXPLAIN_FORMAT_YAML: - es->grouping_stack = list_delete_first(es->grouping_stack); - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + es->indent--; + appendStringInfoString(es->str, ""); + break; + + case EXPLAIN_FORMAT_JSON: + es->indent--; + appendStringInfoString(es->str, "\n]"); + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + + case EXPLAIN_FORMAT_YAML: + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + } } /* @@ -2384,19 +2398,19 @@ ExplainEndOutput(ExplainState *es) void ExplainSeparatePlans(ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* add a blank line */ - appendStringInfoChar(es->str, '\n'); - break; - - case EXPLAIN_FORMAT_XML: - case EXPLAIN_FORMAT_JSON: - case EXPLAIN_FORMAT_YAML: - /* nothing to do */ - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* add a blank line */ + appendStringInfoChar(es->str, '\n'); + break; + + case EXPLAIN_FORMAT_XML: + case EXPLAIN_FORMAT_JSON: + case EXPLAIN_FORMAT_YAML: + /* nothing to do */ + break; + } } /* @@ -2412,20 +2426,20 @@ ExplainSeparatePlans(ExplainState *es) static void ExplainXMLTag(const char *tagname, int flags, ExplainState *es) { - const char *s; - - if ((flags & X_NOWHITESPACE) == 0) - appendStringInfoSpaces(es->str, 2 * es->indent); - appendStringInfoCharMacro(es->str, '<'); - if ((flags & X_CLOSING) != 0) - appendStringInfoCharMacro(es->str, '/'); - for (s = tagname; *s; s++) - appendStringInfoCharMacro(es->str, (*s == ' ') ? '-' : *s); - if ((flags & X_CLOSE_IMMEDIATE) != 0) - appendStringInfoString(es->str, " /"); - appendStringInfoCharMacro(es->str, '>'); - if ((flags & X_NOWHITESPACE) == 0) - appendStringInfoCharMacro(es->str, '\n'); + const char *s; + + if ((flags & X_NOWHITESPACE) == 0) + appendStringInfoSpaces(es->str, 2 * es->indent); + appendStringInfoCharMacro(es->str, '<'); + if ((flags & X_CLOSING) != 0) + appendStringInfoCharMacro(es->str, '/'); + for (s = tagname; *s; s++) + appendStringInfoCharMacro(es->str, (*s == ' ') ? '-' : *s); + if ((flags & X_CLOSE_IMMEDIATE) != 0) + appendStringInfoString(es->str, " /"); + appendStringInfoCharMacro(es->str, '>'); + if ((flags & X_NOWHITESPACE) == 0) + appendStringInfoCharMacro(es->str, '\n'); } /* @@ -2438,12 +2452,12 @@ ExplainXMLTag(const char *tagname, int flags, ExplainState *es) static void ExplainJSONLineEnding(ExplainState *es) { - Assert(es->format == EXPLAIN_FORMAT_JSON); - if (linitial_int(es->grouping_stack) != 0) - appendStringInfoChar(es->str, ','); - else - linitial_int(es->grouping_stack) = 1; - appendStringInfoChar(es->str, '\n'); + Assert(es->format == EXPLAIN_FORMAT_JSON); + if (linitial_int(es->grouping_stack) != 0) + appendStringInfoChar(es->str, ','); + else + linitial_int(es->grouping_stack) = 1; + appendStringInfoChar(es->str, '\n'); } /* @@ -2458,16 +2472,16 @@ ExplainJSONLineEnding(ExplainState *es) static void ExplainYAMLLineStarting(ExplainState *es) { - Assert(es->format == EXPLAIN_FORMAT_YAML); - if (linitial_int(es->grouping_stack) == 0) - { - linitial_int(es->grouping_stack) = 1; - } - else - { - appendStringInfoChar(es->str, '\n'); - appendStringInfoSpaces(es->str, es->indent * 2); - } + Assert(es->format == EXPLAIN_FORMAT_YAML); + if (linitial_int(es->grouping_stack) == 0) + { + linitial_int(es->grouping_stack) = 1; + } + else + { + appendStringInfoChar(es->str, '\n'); + appendStringInfoSpaces(es->str, es->indent * 2); + } } /* @@ -2483,5 +2497,5 @@ ExplainYAMLLineStarting(ExplainState *es) static void escape_yaml(StringInfo buf, const char *str) { - escape_json(buf, str); + escape_json(buf, str); } diff --git a/PostgreSQL/src/backend/optimizer/path/allpaths.c b/PostgreSQL/src/backend/optimizer/path/allpaths.c index 808beaa..f022c70 100644 --- a/PostgreSQL/src/backend/optimizer/path/allpaths.c +++ b/PostgreSQL/src/backend/optimizer/path/allpaths.c @@ -37,6 +37,8 @@ #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" +//NEW FOR RECDB +#include "utils/recathon.h" /* These parameters are set by GUC */ @@ -50,49 +52,49 @@ join_search_hook_type join_search_hook = NULL; static void set_base_rel_sizes(PlannerInfo *root); static void set_base_rel_pathlists(PlannerInfo *root); static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, - List *live_childrels, - List *all_child_pathkeys); + List *live_childrels, + List *all_child_pathkeys); static List *accumulate_append_subpath(List *subpaths, Path *path); static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, - bool *differentTypes); + bool *differentTypes); static bool recurse_pushdown_safe(Node *setOp, Query *topquery, - bool *differentTypes); + bool *differentTypes); static void compare_tlist_datatypes(List *tlist, List *colTypes, - bool *differentTypes); + bool *differentTypes); static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, - bool *differentTypes); + bool *differentTypes); static void subquery_push_qual(Query *subquery, - RangeTblEntry *rte, Index rti, Node *qual); + RangeTblEntry *rte, Index rti, Node *qual); static void recurse_push_qual(Node *setOp, Query *topquery, - RangeTblEntry *rte, Index rti, Node *qual); + RangeTblEntry *rte, Index rti, Node *qual); /* @@ -103,47 +105,47 @@ static void recurse_push_qual(Node *setOp, Query *topquery, RelOptInfo * make_one_rel(PlannerInfo *root, List *joinlist) { - RelOptInfo *rel; - Index rti; - - /* - * Construct the all_baserels Relids set. - */ - root->all_baserels = NULL; - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *brel = root->simple_rel_array[rti]; - - /* there may be empty slots corresponding to non-baserel RTEs */ - if (brel == NULL) - continue; - - Assert(brel->relid == rti); /* sanity check on array */ - - /* ignore RTEs that are "other rels" */ - if (brel->reloptkind != RELOPT_BASEREL) - continue; - - root->all_baserels = bms_add_member(root->all_baserels, brel->relid); - } - - /* - * Generate access paths for the base rels. - */ - set_base_rel_sizes(root); - set_base_rel_pathlists(root); - - /* - * Generate access paths for the entire join tree. - */ - rel = make_rel_from_joinlist(root, joinlist); - - /* - * The result should join all and only the query's base rels. - */ - Assert(bms_equal(rel->relids, root->all_baserels)); - - return rel; + RelOptInfo *rel; + Index rti; + + /* + * Construct the all_baserels Relids set. + */ + root->all_baserels = NULL; + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (brel == NULL) + continue; + + Assert(brel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind != RELOPT_BASEREL) + continue; + + root->all_baserels = bms_add_member(root->all_baserels, brel->relid); + } + + /* + * Generate access paths for the base rels. + */ + set_base_rel_sizes(root); + set_base_rel_pathlists(root); + + /* + * Generate access paths for the entire join tree. + */ + rel = make_rel_from_joinlist(root, joinlist); + + /* + * The result should join all and only the query's base rels. + */ + Assert(bms_equal(rel->relids, root->all_baserels)); + + return rel; } /* @@ -156,24 +158,24 @@ make_one_rel(PlannerInfo *root, List *joinlist) static void set_base_rel_sizes(PlannerInfo *root) { - Index rti; - - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *rel = root->simple_rel_array[rti]; - - /* there may be empty slots corresponding to non-baserel RTEs */ - if (rel == NULL) - continue; - - Assert(rel->relid == rti); /* sanity check on array */ - - /* ignore RTEs that are "other rels" */ - if (rel->reloptkind != RELOPT_BASEREL) - continue; - - set_rel_size(root, rel, rti, root->simple_rte_array[rti]); - } + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + set_rel_size(root, rel, rti, root->simple_rte_array[rti]); + } } /* @@ -185,24 +187,24 @@ set_base_rel_sizes(PlannerInfo *root) static void set_base_rel_pathlists(PlannerInfo *root) { - Index rti; - - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *rel = root->simple_rel_array[rti]; - - /* there may be empty slots corresponding to non-baserel RTEs */ - if (rel == NULL) - continue; - - Assert(rel->relid == rti); /* sanity check on array */ - - /* ignore RTEs that are "other rels" */ - if (rel->reloptkind != RELOPT_BASEREL) - continue; - - set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]); - } + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]); + } } /* @@ -211,75 +213,75 @@ set_base_rel_pathlists(PlannerInfo *root) */ static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) + Index rti, RangeTblEntry *rte) { - if (rel->reloptkind == RELOPT_BASEREL && - relation_excluded_by_constraints(root, rel, rte)) - { - /* - * We proved we don't need to scan the rel via constraint exclusion, - * so set up a single dummy path for it. Here we only check this for - * regular baserels; if it's an otherrel, CE was already checked in - * set_append_rel_pathlist(). - * - * In this case, we go ahead and set up the relation's path right away - * instead of leaving it for set_rel_pathlist to do. This is because - * we don't have a convention for marking a rel as dummy except by - * assigning a dummy path to it. - */ - set_dummy_rel_pathlist(rel); - } - else if (rte->inh) - { - /* It's an "append relation", process accordingly */ - set_append_rel_size(root, rel, rti, rte); - } - else - { - switch (rel->rtekind) - { - case RTE_RELATION: - if (rte->relkind == RELKIND_FOREIGN_TABLE) - { - /* Foreign table */ - set_foreign_size(root, rel, rte); - } - else - { - /* Plain relation */ - set_plain_rel_size(root, rel, rte); - } - break; - case RTE_SUBQUERY: - - /* - * Subqueries don't support parameterized paths, so just go - * ahead and build their paths immediately. - */ - set_subquery_pathlist(root, rel, rti, rte); - break; - case RTE_FUNCTION: - set_function_size_estimates(root, rel); - break; - case RTE_VALUES: - set_values_size_estimates(root, rel); - break; - case RTE_CTE: - - /* - * CTEs don't support parameterized paths, so just go ahead - * and build their paths immediately. - */ - if (rte->self_reference) - set_worktable_pathlist(root, rel, rte); - else - set_cte_pathlist(root, rel, rte); - break; - default: - elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); - break; - } - } + if (rel->reloptkind == RELOPT_BASEREL && + relation_excluded_by_constraints(root, rel, rte)) + { + /* + * We proved we don't need to scan the rel via constraint exclusion, + * so set up a single dummy path for it. Here we only check this for + * regular baserels; if it's an otherrel, CE was already checked in + * set_append_rel_pathlist(). + * + * In this case, we go ahead and set up the relation's path right away + * instead of leaving it for set_rel_pathlist to do. This is because + * we don't have a convention for marking a rel as dummy except by + * assigning a dummy path to it. + */ + set_dummy_rel_pathlist(rel); + } + else if (rte->inh) + { + /* It's an "append relation", process accordingly */ + set_append_rel_size(root, rel, rti, rte); + } + else + { + switch (rel->rtekind) + { + case RTE_RELATION: + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Foreign table */ + set_foreign_size(root, rel, rte); + } + else + { + /* Plain relation */ + set_plain_rel_size(root, rel, rte); + } + break; + case RTE_SUBQUERY: + + /* + * Subqueries don't support parameterized paths, so just go + * ahead and build their paths immediately. + */ + set_subquery_pathlist(root, rel, rti, rte); + break; + case RTE_FUNCTION: + set_function_size_estimates(root, rel); + break; + case RTE_VALUES: + set_values_size_estimates(root, rel); + break; + case RTE_CTE: + + /* + * CTEs don't support parameterized paths, so just go ahead + * and build their paths immediately. + */ + if (rte->self_reference) + set_worktable_pathlist(root, rel, rte); + else + set_cte_pathlist(root, rel, rte); + break; + default: + elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); + break; + } + } } /* @@ -288,55 +290,55 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, */ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) + Index rti, RangeTblEntry *rte) { - if (IS_DUMMY_REL(rel)) - { - /* We already proved the relation empty, so nothing more to do */ - } - else if (rte->inh) - { - /* It's an "append relation", process accordingly */ - set_append_rel_pathlist(root, rel, rti, rte); - } - else - { - switch (rel->rtekind) - { - case RTE_RELATION: - if (rte->relkind == RELKIND_FOREIGN_TABLE) - { - /* Foreign table */ - set_foreign_pathlist(root, rel, rte); - } - else - { - /* Plain relation */ - set_plain_rel_pathlist(root, rel, rte); - } - break; - case RTE_SUBQUERY: - /* Subquery --- fully handled during set_rel_size */ - break; - case RTE_FUNCTION: - /* RangeFunction */ - set_function_pathlist(root, rel, rte); - break; - case RTE_VALUES: - /* Values list */ - set_values_pathlist(root, rel, rte); - break; - case RTE_CTE: - /* CTE reference --- fully handled during set_rel_size */ - break; - default: - elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); - break; - } - } - + if (IS_DUMMY_REL(rel)) + { + /* We already proved the relation empty, so nothing more to do */ + } + else if (rte->inh) + { + /* It's an "append relation", process accordingly */ + set_append_rel_pathlist(root, rel, rti, rte); + } + else + { + switch (rel->rtekind) + { + case RTE_RELATION: + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Foreign table */ + set_foreign_pathlist(root, rel, rte); + } + else + { + /* Plain relation */ + set_plain_rel_pathlist(root, rel, rte); + } + break; + case RTE_SUBQUERY: + /* Subquery --- fully handled during set_rel_size */ + break; + case RTE_FUNCTION: + /* RangeFunction */ + set_function_pathlist(root, rel, rte); + break; + case RTE_VALUES: + /* Values list */ + set_values_pathlist(root, rel, rte); + break; + case RTE_CTE: + /* CTE reference --- fully handled during set_rel_size */ + break; + default: + elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); + break; + } + } + #ifdef OPTIMIZER_DEBUG - debug_print_rel(root, rel); + debug_print_rel(root, rel); #endif } @@ -347,25 +349,25 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - /* - * Test any partial indexes of rel for applicability. We must do this - * first since partial unique indexes can affect size estimates. - */ - check_partial_indexes(root, rel); - - /* Mark rel with estimated output rows, width, etc */ - set_baserel_size_estimates(root, rel); - - /* - * Check to see if we can extract any restriction conditions from join - * quals that are OR-of-AND structures. If so, add them to the rel's - * restriction list, and redo the above steps. - */ - if (create_or_index_quals(root, rel)) - { - check_partial_indexes(root, rel); - set_baserel_size_estimates(root, rel); - } + /* + * Test any partial indexes of rel for applicability. We must do this + * first since partial unique indexes can affect size estimates. + */ + check_partial_indexes(root, rel); + + /* Mark rel with estimated output rows, width, etc */ + set_baserel_size_estimates(root, rel); + + /* + * Check to see if we can extract any restriction conditions from join + * quals that are OR-of-AND structures. If so, add them to the rel's + * restriction list, and redo the above steps. + */ + if (create_or_index_quals(root, rel)) + { + check_partial_indexes(root, rel); + set_baserel_size_estimates(root, rel); + } } /* @@ -376,45 +378,45 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - Path *seqscan_path; - - /* Consider sequential scan */ - seqscan_path = create_seqscan_path(root, rel, NULL); - - /* At this point, we check to see if we're dealing with a RECOMMEND - * query using FilterRecommend or JoinRecommend. If we are, we don't - * need to create any other paths at all, as SeqScan is required. - * Note that we also do this if this is the items table that is being - * used in a RecJoin. */ - if (rel->recommender) { - RecommendInfo *recInfo = (RecommendInfo*) rel->recommender; - if (recInfo->opType != OP_INDEX) { - /* A new type, to make our lives easier. Only do this - * if it's not OP_JOINPARTNER though. */ - if (recInfo->opType != OP_JOINPARTNER) - seqscan_path->pathtype = T_RecScan; - - rel->cheapest_startup_path = seqscan_path; - rel->cheapest_total_path = seqscan_path; - rel->cheapest_unique_path = NULL; - rel->cheapest_parameterized_paths = list_make1(seqscan_path); - - add_path(rel, seqscan_path); - - return; - } - } - - add_path(rel, seqscan_path); - - /* Consider index scans */ - create_index_paths(root, rel); - - /* Consider TID scans */ - create_tidscan_paths(root, rel); - - /* Now find the cheapest of the paths for this rel */ - set_cheapest(rel); + Path *seqscan_path; + + /* Consider sequential scan */ + seqscan_path = create_seqscan_path(root, rel, NULL); + + /* At this point, we check to see if we're dealing with a RECOMMEND + * query using FilterRecommend or JoinRecommend. If we are, we don't + * need to create any other paths at all, as SeqScan is required. + * Note that we also do this if this is the items table that is being + * used in a RecJoin. */ + if (rel->recommender) { + RecommendInfo *recInfo = (RecommendInfo*) rel->recommender; + if (recInfo->opType != OP_INDEX) { + /* A new type, to make our lives easier. Only do this + * if it's not OP_JOINPARTNER though. */ + if (recInfo->opType != OP_JOINPARTNER) + seqscan_path->pathtype = T_RecScan; + + rel->cheapest_startup_path = seqscan_path; + rel->cheapest_total_path = seqscan_path; + rel->cheapest_unique_path = NULL; + rel->cheapest_parameterized_paths = list_make1(seqscan_path); + + add_path(rel, seqscan_path); + + return; + } + } + + add_path(rel, seqscan_path); + + /* Consider index scans */ + create_index_paths(root, rel); + + /* Consider TID scans */ + create_tidscan_paths(root, rel); + + /* Now find the cheapest of the paths for this rel */ + set_cheapest(rel); } /* @@ -424,14 +426,14 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - /* Mark rel with estimated output rows, width, etc */ - set_foreign_size_estimates(root, rel); - - /* Get FDW routine pointers for the rel */ - rel->fdwroutine = GetFdwRoutineByRelId(rte->relid); - - /* Let FDW adjust the size estimates, if it can */ - rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid); + /* Mark rel with estimated output rows, width, etc */ + set_foreign_size_estimates(root, rel); + + /* Get FDW routine pointers for the rel */ + rel->fdwroutine = GetFdwRoutineByRelId(rte->relid); + + /* Let FDW adjust the size estimates, if it can */ + rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid); } /* @@ -441,11 +443,11 @@ set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - /* Call the FDW's GetForeignPaths function to generate path(s) */ - rel->fdwroutine->GetForeignPaths(root, rel, rte->relid); - - /* Select cheapest path */ - set_cheapest(rel); + /* Call the FDW's GetForeignPaths function to generate path(s) */ + rel->fdwroutine->GetForeignPaths(root, rel, rte->relid); + + /* Select cheapest path */ + set_cheapest(rel); } /* @@ -461,221 +463,221 @@ set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) */ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) + Index rti, RangeTblEntry *rte) { - int parentRTindex = rti; - double parent_rows; - double parent_size; - double *parent_attrsizes; - int nattrs; - ListCell *l; - - /* - * Initialize to compute size estimates for whole append relation. - * - * We handle width estimates by weighting the widths of different child - * rels proportionally to their number of rows. This is sensible because - * the use of width estimates is mainly to compute the total relation - * "footprint" if we have to sort or hash it. To do this, we sum the - * total equivalent size (in "double" arithmetic) and then divide by the - * total rowcount estimate. This is done separately for the total rel - * width and each attribute. - * - * Note: if you consider changing this logic, beware that child rels could - * have zero rows and/or width, if they were excluded by constraints. - */ - parent_rows = 0; - parent_size = 0; - nattrs = rel->max_attr - rel->min_attr + 1; - parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); - - foreach(l, root->append_rel_list) - { - AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); - int childRTindex; - RangeTblEntry *childRTE; - RelOptInfo *childrel; - List *childquals; - Node *childqual; - ListCell *parentvars; - ListCell *childvars; - - /* append_rel_list contains all append rels; ignore others */ - if (appinfo->parent_relid != parentRTindex) - continue; - - childRTindex = appinfo->child_relid; - childRTE = root->simple_rte_array[childRTindex]; - - /* - * The child rel's RelOptInfo was already created during - * add_base_rels_to_query. - */ - childrel = find_base_rel(root, childRTindex); - Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); - - /* - * We have to copy the parent's targetlist and quals to the child, - * with appropriate substitution of variables. However, only the - * baserestrictinfo quals are needed before we can check for - * constraint exclusion; so do that first and then check to see if we - * can disregard this child. - * - * As of 8.4, the child rel's targetlist might contain non-Var - * expressions, which means that substitution into the quals could - * produce opportunities for const-simplification, and perhaps even - * pseudoconstant quals. To deal with this, we strip the RestrictInfo - * nodes, do the substitution, do const-simplification, and then - * reconstitute the RestrictInfo layer. - */ - childquals = get_all_actual_clauses(rel->baserestrictinfo); - childquals = (List *) adjust_appendrel_attrs(root, - (Node *) childquals, - appinfo); - childqual = eval_const_expressions(root, (Node *) - make_ands_explicit(childquals)); - if (childqual && IsA(childqual, Const) && - (((Const *) childqual)->constisnull || - !DatumGetBool(((Const *) childqual)->constvalue))) - { - /* - * Restriction reduces to constant FALSE or constant NULL after - * substitution, so this child need not be scanned. - */ - set_dummy_rel_pathlist(childrel); - continue; - } - childquals = make_ands_implicit((Expr *) childqual); - childquals = make_restrictinfos_from_actual_clauses(root, - childquals); - childrel->baserestrictinfo = childquals; - - if (relation_excluded_by_constraints(root, childrel, childRTE)) - { - /* - * This child need not be scanned, so we can omit it from the - * appendrel. - */ - set_dummy_rel_pathlist(childrel); - continue; - } - - /* - * CE failed, so finish copying/modifying targetlist and join quals. - * - * Note: the resulting childrel->reltargetlist may contain arbitrary - * expressions, which normally would not occur in a reltargetlist. - * That is okay because nothing outside of this routine will look at - * the child rel's reltargetlist. We do have to cope with the case - * while constructing attr_widths estimates below, though. - */ - childrel->joininfo = (List *) - adjust_appendrel_attrs(root, - (Node *) rel->joininfo, - appinfo); - childrel->reltargetlist = (List *) - adjust_appendrel_attrs(root, - (Node *) rel->reltargetlist, - appinfo); - - /* - * We have to make child entries in the EquivalenceClass data - * structures as well. This is needed either if the parent - * participates in some eclass joins (because we will want to consider - * inner-indexscan joins on the individual children) or if the parent - * has useful pathkeys (because we should try to build MergeAppend - * paths that produce those sort orderings). - */ - if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) - add_child_rel_equivalences(root, appinfo, rel, childrel); - childrel->has_eclass_joins = rel->has_eclass_joins; - - /* - * Note: we could compute appropriate attr_needed data for the child's - * variables, by transforming the parent's attr_needed through the - * translated_vars mapping. However, currently there's no need - * because attr_needed is only examined for base relations not - * otherrels. So we just leave the child's attr_needed empty. - */ - - /* - * Compute the child's size. - */ - set_rel_size(root, childrel, childRTindex, childRTE); - - /* - * It is possible that constraint exclusion detected a contradiction - * within a child subquery, even though we didn't prove one above. If - * so, we can skip this child. - */ - if (IS_DUMMY_REL(childrel)) - continue; - - /* - * Accumulate size information from each live child. - */ - if (childrel->rows > 0) - { - parent_rows += childrel->rows; - parent_size += childrel->width * childrel->rows; - - /* - * Accumulate per-column estimates too. We need not do anything - * for PlaceHolderVars in the parent list. If child expression - * isn't a Var, or we didn't record a width estimate for it, we - * have to fall back on a datatype-based estimate. - * - * By construction, child's reltargetlist is 1-to-1 with parent's. - */ - forboth(parentvars, rel->reltargetlist, - childvars, childrel->reltargetlist) - { - Var *parentvar = (Var *) lfirst(parentvars); - Node *childvar = (Node *) lfirst(childvars); - - if (IsA(parentvar, Var)) - { - int pndx = parentvar->varattno - rel->min_attr; - int32 child_width = 0; - - if (IsA(childvar, Var)) - { - int cndx = ((Var *) childvar)->varattno - childrel->min_attr; - - child_width = childrel->attr_widths[cndx]; - } - if (child_width <= 0) - child_width = get_typavgwidth(exprType(childvar), - exprTypmod(childvar)); - Assert(child_width > 0); - parent_attrsizes[pndx] += child_width * childrel->rows; - } - } - } - } - - /* - * Save the finished size estimates. - */ - rel->rows = parent_rows; - if (parent_rows > 0) - { - int i; - - rel->width = rint(parent_size / parent_rows); - for (i = 0; i < nattrs; i++) - rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows); - } - else - rel->width = 0; /* attr_widths should be zero already */ - - /* - * Set "raw tuples" count equal to "rows" for the appendrel; needed - * because some places assume rel->tuples is valid for any baserel. - */ - rel->tuples = parent_rows; - - pfree(parent_attrsizes); + int parentRTindex = rti; + double parent_rows; + double parent_size; + double *parent_attrsizes; + int nattrs; + ListCell *l; + + /* + * Initialize to compute size estimates for whole append relation. + * + * We handle width estimates by weighting the widths of different child + * rels proportionally to their number of rows. This is sensible because + * the use of width estimates is mainly to compute the total relation + * "footprint" if we have to sort or hash it. To do this, we sum the + * total equivalent size (in "double" arithmetic) and then divide by the + * total rowcount estimate. This is done separately for the total rel + * width and each attribute. + * + * Note: if you consider changing this logic, beware that child rels could + * have zero rows and/or width, if they were excluded by constraints. + */ + parent_rows = 0; + parent_size = 0; + nattrs = rel->max_attr - rel->min_attr + 1; + parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); + + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + int childRTindex; + RangeTblEntry *childRTE; + RelOptInfo *childrel; + List *childquals; + Node *childqual; + ListCell *parentvars; + ListCell *childvars; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + childRTindex = appinfo->child_relid; + childRTE = root->simple_rte_array[childRTindex]; + + /* + * The child rel's RelOptInfo was already created during + * add_base_rels_to_query. + */ + childrel = find_base_rel(root, childRTindex); + Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); + + /* + * We have to copy the parent's targetlist and quals to the child, + * with appropriate substitution of variables. However, only the + * baserestrictinfo quals are needed before we can check for + * constraint exclusion; so do that first and then check to see if we + * can disregard this child. + * + * As of 8.4, the child rel's targetlist might contain non-Var + * expressions, which means that substitution into the quals could + * produce opportunities for const-simplification, and perhaps even + * pseudoconstant quals. To deal with this, we strip the RestrictInfo + * nodes, do the substitution, do const-simplification, and then + * reconstitute the RestrictInfo layer. + */ + childquals = get_all_actual_clauses(rel->baserestrictinfo); + childquals = (List *) adjust_appendrel_attrs(root, + (Node *) childquals, + appinfo); + childqual = eval_const_expressions(root, (Node *) + make_ands_explicit(childquals)); + if (childqual && IsA(childqual, Const) && + (((Const *) childqual)->constisnull || + !DatumGetBool(((Const *) childqual)->constvalue))) + { + /* + * Restriction reduces to constant FALSE or constant NULL after + * substitution, so this child need not be scanned. + */ + set_dummy_rel_pathlist(childrel); + continue; + } + childquals = make_ands_implicit((Expr *) childqual); + childquals = make_restrictinfos_from_actual_clauses(root, + childquals); + childrel->baserestrictinfo = childquals; + + if (relation_excluded_by_constraints(root, childrel, childRTE)) + { + /* + * This child need not be scanned, so we can omit it from the + * appendrel. + */ + set_dummy_rel_pathlist(childrel); + continue; + } + + /* + * CE failed, so finish copying/modifying targetlist and join quals. + * + * Note: the resulting childrel->reltargetlist may contain arbitrary + * expressions, which normally would not occur in a reltargetlist. + * That is okay because nothing outside of this routine will look at + * the child rel's reltargetlist. We do have to cope with the case + * while constructing attr_widths estimates below, though. + */ + childrel->joininfo = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->joininfo, + appinfo); + childrel->reltargetlist = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->reltargetlist, + appinfo); + + /* + * We have to make child entries in the EquivalenceClass data + * structures as well. This is needed either if the parent + * participates in some eclass joins (because we will want to consider + * inner-indexscan joins on the individual children) or if the parent + * has useful pathkeys (because we should try to build MergeAppend + * paths that produce those sort orderings). + */ + if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) + add_child_rel_equivalences(root, appinfo, rel, childrel); + childrel->has_eclass_joins = rel->has_eclass_joins; + + /* + * Note: we could compute appropriate attr_needed data for the child's + * variables, by transforming the parent's attr_needed through the + * translated_vars mapping. However, currently there's no need + * because attr_needed is only examined for base relations not + * otherrels. So we just leave the child's attr_needed empty. + */ + + /* + * Compute the child's size. + */ + set_rel_size(root, childrel, childRTindex, childRTE); + + /* + * It is possible that constraint exclusion detected a contradiction + * within a child subquery, even though we didn't prove one above. If + * so, we can skip this child. + */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* + * Accumulate size information from each live child. + */ + if (childrel->rows > 0) + { + parent_rows += childrel->rows; + parent_size += childrel->width * childrel->rows; + + /* + * Accumulate per-column estimates too. We need not do anything + * for PlaceHolderVars in the parent list. If child expression + * isn't a Var, or we didn't record a width estimate for it, we + * have to fall back on a datatype-based estimate. + * + * By construction, child's reltargetlist is 1-to-1 with parent's. + */ + forboth(parentvars, rel->reltargetlist, + childvars, childrel->reltargetlist) + { + Var *parentvar = (Var *) lfirst(parentvars); + Node *childvar = (Node *) lfirst(childvars); + + if (IsA(parentvar, Var)) + { + int pndx = parentvar->varattno - rel->min_attr; + int32 child_width = 0; + + if (IsA(childvar, Var)) + { + int cndx = ((Var *) childvar)->varattno - childrel->min_attr; + + child_width = childrel->attr_widths[cndx]; + } + if (child_width <= 0) + child_width = get_typavgwidth(exprType(childvar), + exprTypmod(childvar)); + Assert(child_width > 0); + parent_attrsizes[pndx] += child_width * childrel->rows; + } + } + } + } + + /* + * Save the finished size estimates. + */ + rel->rows = parent_rows; + if (parent_rows > 0) + { + int i; + + rel->width = rint(parent_size / parent_rows); + for (i = 0; i < nattrs; i++) + rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows); + } + else + rel->width = 0; /* attr_widths should be zero already */ + + /* + * Set "raw tuples" count equal to "rows" for the appendrel; needed + * because some places assume rel->tuples is valid for any baserel. + */ + rel->tuples = parent_rows; + + pfree(parent_attrsizes); } /* @@ -684,192 +686,192 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, */ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) + Index rti, RangeTblEntry *rte) { - int parentRTindex = rti; - List *live_childrels = NIL; - List *subpaths = NIL; - List *all_child_pathkeys = NIL; - List *all_child_outers = NIL; - ListCell *l; - - /* - * Generate access paths for each member relation, and remember the - * cheapest path for each one. Also, identify all pathkeys (orderings) - * and parameterizations (required_outer sets) available for the member - * relations. - */ - foreach(l, root->append_rel_list) - { - AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); - int childRTindex; - RangeTblEntry *childRTE; - RelOptInfo *childrel; - ListCell *lcp; - - /* append_rel_list contains all append rels; ignore others */ - if (appinfo->parent_relid != parentRTindex) - continue; - - /* Re-locate the child RTE and RelOptInfo */ - childRTindex = appinfo->child_relid; - childRTE = root->simple_rte_array[childRTindex]; - childrel = root->simple_rel_array[childRTindex]; - - /* - * Compute the child's access paths. - */ - set_rel_pathlist(root, childrel, childRTindex, childRTE); - - /* - * If child is dummy, ignore it. - */ - if (IS_DUMMY_REL(childrel)) - continue; - - /* - * Child is live, so add its cheapest access path to the Append path - * we are constructing for the parent. - */ - subpaths = accumulate_append_subpath(subpaths, - childrel->cheapest_total_path); - - /* Remember which childrels are live, for logic below */ - live_childrels = lappend(live_childrels, childrel); - - /* - * Collect lists of all the available path orderings and - * parameterizations for all the children. We use these as a - * heuristic to indicate which sort orderings and parameterizations we - * should build Append and MergeAppend paths for. - */ - foreach(lcp, childrel->pathlist) - { - Path *childpath = (Path *) lfirst(lcp); - List *childkeys = childpath->pathkeys; - Relids childouter = PATH_REQ_OUTER(childpath); - - /* Unsorted paths don't contribute to pathkey list */ - if (childkeys != NIL) - { - ListCell *lpk; - bool found = false; - - /* Have we already seen this ordering? */ - foreach(lpk, all_child_pathkeys) - { - List *existing_pathkeys = (List *) lfirst(lpk); - - if (compare_pathkeys(existing_pathkeys, - childkeys) == PATHKEYS_EQUAL) - { - found = true; - break; - } - } - if (!found) - { - /* No, so add it to all_child_pathkeys */ - all_child_pathkeys = lappend(all_child_pathkeys, - childkeys); - } - } - - /* Unparameterized paths don't contribute to param-set list */ - if (childouter) - { - ListCell *lco; - bool found = false; - - /* Have we already seen this param set? */ - foreach(lco, all_child_outers) - { - Relids existing_outers = (Relids) lfirst(lco); - - if (bms_equal(existing_outers, childouter)) - { - found = true; - break; - } - } - if (!found) - { - /* No, so add it to all_child_outers */ - all_child_outers = lappend(all_child_outers, - childouter); - } - } - } - } - - /* - * Next, build an unordered, unparameterized Append path for the rel. - * (Note: this is correct even if we have zero or one live subpath due to - * constraint exclusion.) - */ - add_path(rel, (Path *) create_append_path(rel, subpaths, NULL)); - - /* - * Build unparameterized MergeAppend paths based on the collected list of - * child pathkeys. - */ - generate_mergeappend_paths(root, rel, live_childrels, all_child_pathkeys); - - /* - * Build Append paths for each parameterization seen among the child rels. - * (This may look pretty expensive, but in most cases of practical - * interest, the child rels will expose mostly the same parameterizations, - * so that not that many cases actually get considered here.) - * - * The Append node itself cannot enforce quals, so all qual checking must - * be done in the child paths. This means that to have a parameterized - * Append path, we must have the exact same parameterization for each - * child path; otherwise some children might be failing to check the - * moved-down quals. To make them match up, we can try to increase the - * parameterization of lesser-parameterized paths. - */ - foreach(l, all_child_outers) - { - Relids required_outer = (Relids) lfirst(l); - bool ok = true; - ListCell *lcr; - - /* Select the child paths for an Append with this parameterization */ - subpaths = NIL; - foreach(lcr, live_childrels) - { - RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); - Path *cheapest_total; - - cheapest_total = - get_cheapest_path_for_pathkeys(childrel->pathlist, - NIL, - required_outer, - TOTAL_COST); - Assert(cheapest_total != NULL); - - /* Children must have exactly the desired parameterization */ - if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer)) - { - cheapest_total = reparameterize_path(root, cheapest_total, - required_outer, 1.0); - if (cheapest_total == NULL) - { - ok = false; - break; - } - } - - subpaths = accumulate_append_subpath(subpaths, cheapest_total); - } - - if (ok) - add_path(rel, (Path *) - create_append_path(rel, subpaths, required_outer)); - } - - /* Select cheapest paths */ - set_cheapest(rel); + int parentRTindex = rti; + List *live_childrels = NIL; + List *subpaths = NIL; + List *all_child_pathkeys = NIL; + List *all_child_outers = NIL; + ListCell *l; + + /* + * Generate access paths for each member relation, and remember the + * cheapest path for each one. Also, identify all pathkeys (orderings) + * and parameterizations (required_outer sets) available for the member + * relations. + */ + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + int childRTindex; + RangeTblEntry *childRTE; + RelOptInfo *childrel; + ListCell *lcp; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + /* Re-locate the child RTE and RelOptInfo */ + childRTindex = appinfo->child_relid; + childRTE = root->simple_rte_array[childRTindex]; + childrel = root->simple_rel_array[childRTindex]; + + /* + * Compute the child's access paths. + */ + set_rel_pathlist(root, childrel, childRTindex, childRTE); + + /* + * If child is dummy, ignore it. + */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* + * Child is live, so add its cheapest access path to the Append path + * we are constructing for the parent. + */ + subpaths = accumulate_append_subpath(subpaths, + childrel->cheapest_total_path); + + /* Remember which childrels are live, for logic below */ + live_childrels = lappend(live_childrels, childrel); + + /* + * Collect lists of all the available path orderings and + * parameterizations for all the children. We use these as a + * heuristic to indicate which sort orderings and parameterizations we + * should build Append and MergeAppend paths for. + */ + foreach(lcp, childrel->pathlist) + { + Path *childpath = (Path *) lfirst(lcp); + List *childkeys = childpath->pathkeys; + Relids childouter = PATH_REQ_OUTER(childpath); + + /* Unsorted paths don't contribute to pathkey list */ + if (childkeys != NIL) + { + ListCell *lpk; + bool found = false; + + /* Have we already seen this ordering? */ + foreach(lpk, all_child_pathkeys) + { + List *existing_pathkeys = (List *) lfirst(lpk); + + if (compare_pathkeys(existing_pathkeys, + childkeys) == PATHKEYS_EQUAL) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_pathkeys */ + all_child_pathkeys = lappend(all_child_pathkeys, + childkeys); + } + } + + /* Unparameterized paths don't contribute to param-set list */ + if (childouter) + { + ListCell *lco; + bool found = false; + + /* Have we already seen this param set? */ + foreach(lco, all_child_outers) + { + Relids existing_outers = (Relids) lfirst(lco); + + if (bms_equal(existing_outers, childouter)) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_outers */ + all_child_outers = lappend(all_child_outers, + childouter); + } + } + } + } + + /* + * Next, build an unordered, unparameterized Append path for the rel. + * (Note: this is correct even if we have zero or one live subpath due to + * constraint exclusion.) + */ + add_path(rel, (Path *) create_append_path(rel, subpaths, NULL)); + + /* + * Build unparameterized MergeAppend paths based on the collected list of + * child pathkeys. + */ + generate_mergeappend_paths(root, rel, live_childrels, all_child_pathkeys); + + /* + * Build Append paths for each parameterization seen among the child rels. + * (This may look pretty expensive, but in most cases of practical + * interest, the child rels will expose mostly the same parameterizations, + * so that not that many cases actually get considered here.) + * + * The Append node itself cannot enforce quals, so all qual checking must + * be done in the child paths. This means that to have a parameterized + * Append path, we must have the exact same parameterization for each + * child path; otherwise some children might be failing to check the + * moved-down quals. To make them match up, we can try to increase the + * parameterization of lesser-parameterized paths. + */ + foreach(l, all_child_outers) + { + Relids required_outer = (Relids) lfirst(l); + bool ok = true; + ListCell *lcr; + + /* Select the child paths for an Append with this parameterization */ + subpaths = NIL; + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *cheapest_total; + + cheapest_total = + get_cheapest_path_for_pathkeys(childrel->pathlist, + NIL, + required_outer, + TOTAL_COST); + Assert(cheapest_total != NULL); + + /* Children must have exactly the desired parameterization */ + if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer)) + { + cheapest_total = reparameterize_path(root, cheapest_total, + required_outer, 1.0); + if (cheapest_total == NULL) + { + ok = false; + break; + } + } + + subpaths = accumulate_append_subpath(subpaths, cheapest_total); + } + + if (ok) + add_path(rel, (Path *) + create_append_path(rel, subpaths, required_outer)); + } + + /* Select cheapest paths */ + set_cheapest(rel); } /* @@ -897,76 +899,76 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, */ static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, - List *live_childrels, - List *all_child_pathkeys) + List *live_childrels, + List *all_child_pathkeys) { - ListCell *lcp; - - foreach(lcp, all_child_pathkeys) - { - List *pathkeys = (List *) lfirst(lcp); - List *startup_subpaths = NIL; - List *total_subpaths = NIL; - bool startup_neq_total = false; - ListCell *lcr; - - /* Select the child paths for this ordering... */ - foreach(lcr, live_childrels) - { - RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); - Path *cheapest_startup, - *cheapest_total; - - /* Locate the right paths, if they are available. */ - cheapest_startup = - get_cheapest_path_for_pathkeys(childrel->pathlist, - pathkeys, - NULL, - STARTUP_COST); - cheapest_total = - get_cheapest_path_for_pathkeys(childrel->pathlist, - pathkeys, - NULL, - TOTAL_COST); - - /* - * If we can't find any paths with the right order just use the - * cheapest-total path; we'll have to sort it later. - */ - if (cheapest_startup == NULL || cheapest_total == NULL) - { - cheapest_startup = cheapest_total = - childrel->cheapest_total_path; - Assert(cheapest_total != NULL); - } - - /* - * Notice whether we actually have different paths for the - * "cheapest" and "total" cases; frequently there will be no point - * in two create_merge_append_path() calls. - */ - if (cheapest_startup != cheapest_total) - startup_neq_total = true; - - startup_subpaths = - accumulate_append_subpath(startup_subpaths, cheapest_startup); - total_subpaths = - accumulate_append_subpath(total_subpaths, cheapest_total); - } - - /* ... and build the MergeAppend paths */ - add_path(rel, (Path *) create_merge_append_path(root, - rel, - startup_subpaths, - pathkeys, - NULL)); - if (startup_neq_total) - add_path(rel, (Path *) create_merge_append_path(root, - rel, - total_subpaths, - pathkeys, - NULL)); - } + ListCell *lcp; + + foreach(lcp, all_child_pathkeys) + { + List *pathkeys = (List *) lfirst(lcp); + List *startup_subpaths = NIL; + List *total_subpaths = NIL; + bool startup_neq_total = false; + ListCell *lcr; + + /* Select the child paths for this ordering... */ + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *cheapest_startup, + *cheapest_total; + + /* Locate the right paths, if they are available. */ + cheapest_startup = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + STARTUP_COST); + cheapest_total = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + TOTAL_COST); + + /* + * If we can't find any paths with the right order just use the + * cheapest-total path; we'll have to sort it later. + */ + if (cheapest_startup == NULL || cheapest_total == NULL) + { + cheapest_startup = cheapest_total = + childrel->cheapest_total_path; + Assert(cheapest_total != NULL); + } + + /* + * Notice whether we actually have different paths for the + * "cheapest" and "total" cases; frequently there will be no point + * in two create_merge_append_path() calls. + */ + if (cheapest_startup != cheapest_total) + startup_neq_total = true; + + startup_subpaths = + accumulate_append_subpath(startup_subpaths, cheapest_startup); + total_subpaths = + accumulate_append_subpath(total_subpaths, cheapest_total); + } + + /* ... and build the MergeAppend paths */ + add_path(rel, (Path *) create_merge_append_path(root, + rel, + startup_subpaths, + pathkeys, + NULL)); + if (startup_neq_total) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + total_subpaths, + pathkeys, + NULL)); + } } /* @@ -981,15 +983,15 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, static List * accumulate_append_subpath(List *subpaths, Path *path) { - if (IsA(path, AppendPath)) - { - AppendPath *apath = (AppendPath *) path; - - /* list_copy is important here to avoid sharing list substructure */ - return list_concat(subpaths, list_copy(apath->subpaths)); - } - else - return lappend(subpaths, path); + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + /* list_copy is important here to avoid sharing list substructure */ + return list_concat(subpaths, list_copy(apath->subpaths)); + } + else + return lappend(subpaths, path); } /* @@ -1002,39 +1004,39 @@ accumulate_append_subpath(List *subpaths, Path *path) static void set_dummy_rel_pathlist(RelOptInfo *rel) { - /* Set dummy size estimates --- we leave attr_widths[] as zeroes */ - rel->rows = 0; - rel->width = 0; - - /* Discard any pre-existing paths; no further need for them */ - rel->pathlist = NIL; - - add_path(rel, (Path *) create_append_path(rel, NIL, NULL)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + /* Set dummy size estimates --- we leave attr_widths[] as zeroes */ + rel->rows = 0; + rel->width = 0; + + /* Discard any pre-existing paths; no further need for them */ + rel->pathlist = NIL; + + add_path(rel, (Path *) create_append_path(rel, NIL, NULL)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* quick-and-dirty test to see if any joining is needed */ static bool has_multiple_baserels(PlannerInfo *root) { - int num_base_rels = 0; - Index rti; - - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *brel = root->simple_rel_array[rti]; - - if (brel == NULL) - continue; - - /* ignore RTEs that are "other rels" */ - if (brel->reloptkind == RELOPT_BASEREL) - if (++num_base_rels > 1) - return true; - } - return false; + int num_base_rels = 0; + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + if (brel == NULL) + continue; + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind == RELOPT_BASEREL) + if (++num_base_rels > 1) + return true; + } + return false; } /* @@ -1046,127 +1048,134 @@ has_multiple_baserels(PlannerInfo *root) */ static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) + Index rti, RangeTblEntry *rte) { - Query *parse = root->parse; - Query *subquery = rte->subquery; - bool *differentTypes; - double tuple_fraction; - PlannerInfo *subroot; - List *pathkeys; - - /* - * Must copy the Query so that planning doesn't mess up the RTE contents - * (really really need to fix the planner to not scribble on its input, - * someday). - */ - subquery = copyObject(subquery); - - /* We need a workspace for keeping track of set-op type coercions */ - differentTypes = (bool *) - palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); - - /* - * If there are any restriction clauses that have been attached to the - * subquery relation, consider pushing them down to become WHERE or HAVING - * quals of the subquery itself. This transformation is useful because it - * may allow us to generate a better plan for the subquery than evaluating - * all the subquery output rows and then filtering them. - * - * There are several cases where we cannot push down clauses. Restrictions - * involving the subquery are checked by subquery_is_pushdown_safe(). - * Restrictions on individual clauses are checked by - * qual_is_pushdown_safe(). Also, we don't want to push down - * pseudoconstant clauses; better to have the gating node above the - * subquery. - * - * Also, if the sub-query has "security_barrier" flag, it means the - * sub-query originated from a view that must enforce row-level security. - * We must not push down quals in order to avoid information leaks, either - * via side-effects or error output. - * - * Non-pushed-down clauses will get evaluated as qpquals of the - * SubqueryScan node. - * - * XXX Are there any cases where we want to make a policy decision not to - * push down a pushable qual, because it'd result in a worse plan? - */ - if (rel->baserestrictinfo != NIL && - subquery_is_pushdown_safe(subquery, subquery, differentTypes)) - { - /* OK to consider pushing down individual quals */ - List *upperrestrictlist = NIL; - ListCell *l; - - foreach(l, rel->baserestrictinfo) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *clause = (Node *) rinfo->clause; - - if (!rinfo->pseudoconstant && - (!rte->security_barrier || - !contain_leaky_functions(clause)) && - qual_is_pushdown_safe(subquery, rti, clause, differentTypes)) - { - /* Push it down */ - subquery_push_qual(subquery, rte, rti, clause); - } - else - { - /* Keep it in the upper query */ - upperrestrictlist = lappend(upperrestrictlist, rinfo); - } - } - rel->baserestrictinfo = upperrestrictlist; - } - - pfree(differentTypes); - - /* - * We can safely pass the outer tuple_fraction down to the subquery if the - * outer level has no joining, aggregation, or sorting to do. Otherwise - * we'd better tell the subquery to plan for full retrieval. (XXX This - * could probably be made more intelligent ...) - */ - if (parse->hasAggs || - parse->groupClause || - parse->havingQual || - parse->distinctClause || - parse->sortClause || - has_multiple_baserels(root)) - tuple_fraction = 0.0; /* default case */ - else - tuple_fraction = root->tuple_fraction; - - /* Generate the plan for the subquery */ - rel->subplan = subquery_planner(root->glob, subquery, - root, - false, tuple_fraction, - &subroot); - rel->subroot = subroot; - - /* - * It's possible that constraint exclusion proved the subquery empty. If - * so, it's convenient to turn it back into a dummy path so that we will - * recognize appropriate optimizations at this level. - */ - if (is_dummy_plan(rel->subplan)) - { - set_dummy_rel_pathlist(rel); - return; - } - - /* Mark rel with estimated output rows, width, etc */ - set_subquery_size_estimates(root, rel); - - /* Convert subquery pathkeys to outer representation */ - pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys); - - /* Generate appropriate path */ - add_path(rel, create_subqueryscan_path(root, rel, pathkeys, NULL)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + Query *parse = root->parse; + Query *subquery = rte->subquery; + Query * tempQuery; + bool *differentTypes; + double tuple_fraction; + PlannerInfo *subroot; + List *pathkeys; + + /* + * Must copy the Query so that planning doesn't mess up the RTE contents + * (really really need to fix the planner to not scribble on its input, + * someday). + */ + + //NEW FOR RECDB + //Prevent an error from happening while using a recommender in sub query + tempQuery = copyObject(subquery); + copyQueryHelper(tempQuery, subquery); + subquery = tempQuery; + + + /* We need a workspace for keeping track of set-op type coercions */ + differentTypes = (bool *) + palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); + + /* + * If there are any restriction clauses that have been attached to the + * subquery relation, consider pushing them down to become WHERE or HAVING + * quals of the subquery itself. This transformation is useful because it + * may allow us to generate a better plan for the subquery than evaluating + * all the subquery output rows and then filtering them. + * + * There are several cases where we cannot push down clauses. Restrictions + * involving the subquery are checked by subquery_is_pushdown_safe(). + * Restrictions on individual clauses are checked by + * qual_is_pushdown_safe(). Also, we don't want to push down + * pseudoconstant clauses; better to have the gating node above the + * subquery. + * + * Also, if the sub-query has "security_barrier" flag, it means the + * sub-query originated from a view that must enforce row-level security. + * We must not push down quals in order to avoid information leaks, either + * via side-effects or error output. + * + * Non-pushed-down clauses will get evaluated as qpquals of the + * SubqueryScan node. + * + * XXX Are there any cases where we want to make a policy decision not to + * push down a pushable qual, because it'd result in a worse plan? + */ + if (rel->baserestrictinfo != NIL && + subquery_is_pushdown_safe(subquery, subquery, differentTypes)) + { + /* OK to consider pushing down individual quals */ + List *upperrestrictlist = NIL; + ListCell *l; + + foreach(l, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Node *clause = (Node *) rinfo->clause; + + if (!rinfo->pseudoconstant && + (!rte->security_barrier || + !contain_leaky_functions(clause)) && + qual_is_pushdown_safe(subquery, rti, clause, differentTypes)) + { + /* Push it down */ + subquery_push_qual(subquery, rte, rti, clause); + } + else + { + /* Keep it in the upper query */ + upperrestrictlist = lappend(upperrestrictlist, rinfo); + } + } + rel->baserestrictinfo = upperrestrictlist; + } + + pfree(differentTypes); + + /* + * We can safely pass the outer tuple_fraction down to the subquery if the + * outer level has no joining, aggregation, or sorting to do. Otherwise + * we'd better tell the subquery to plan for full retrieval. (XXX This + * could probably be made more intelligent ...) + */ + if (parse->hasAggs || + parse->groupClause || + parse->havingQual || + parse->distinctClause || + parse->sortClause || + has_multiple_baserels(root)) + tuple_fraction = 0.0; /* default case */ + else + tuple_fraction = root->tuple_fraction; + + /* Generate the plan for the subquery */ + rel->subplan = subquery_planner(root->glob, subquery, + root, + false, tuple_fraction, + &subroot); + rel->subroot = subroot; + + /* + * It's possible that constraint exclusion proved the subquery empty. If + * so, it's convenient to turn it back into a dummy path so that we will + * recognize appropriate optimizations at this level. + */ + if (is_dummy_plan(rel->subplan)) + { + set_dummy_rel_pathlist(rel); + return; + } + + /* Mark rel with estimated output rows, width, etc */ + set_subquery_size_estimates(root, rel); + + /* Convert subquery pathkeys to outer representation */ + pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys); + + /* Generate appropriate path */ + add_path(rel, create_subqueryscan_path(root, rel, pathkeys, NULL)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* @@ -1176,11 +1185,11 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - /* Generate appropriate path */ - add_path(rel, create_functionscan_path(root, rel)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + /* Generate appropriate path */ + add_path(rel, create_functionscan_path(root, rel)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* @@ -1190,11 +1199,11 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - /* Generate appropriate path */ - add_path(rel, create_valuesscan_path(root, rel)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + /* Generate appropriate path */ + add_path(rel, create_valuesscan_path(root, rel)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* @@ -1207,55 +1216,55 @@ set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - Plan *cteplan; - PlannerInfo *cteroot; - Index levelsup; - int ndx; - ListCell *lc; - int plan_id; - - /* - * Find the referenced CTE, and locate the plan previously made for it. - */ - levelsup = rte->ctelevelsup; - cteroot = root; - while (levelsup-- > 0) - { - cteroot = cteroot->parent_root; - if (!cteroot) /* shouldn't happen */ - elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); - } - - /* - * Note: cte_plan_ids can be shorter than cteList, if we are still working - * on planning the CTEs (ie, this is a side-reference from another CTE). - * So we mustn't use forboth here. - */ - ndx = 0; - foreach(lc, cteroot->parse->cteList) - { - CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); - - if (strcmp(cte->ctename, rte->ctename) == 0) - break; - ndx++; - } - if (lc == NULL) /* shouldn't happen */ - elog(ERROR, "could not find CTE \"%s\"", rte->ctename); - if (ndx >= list_length(cteroot->cte_plan_ids)) - elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); - plan_id = list_nth_int(cteroot->cte_plan_ids, ndx); - Assert(plan_id > 0); - cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1); - - /* Mark rel with estimated output rows, width, etc */ - set_cte_size_estimates(root, rel, cteplan); - - /* Generate appropriate path */ - add_path(rel, create_ctescan_path(root, rel)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + Plan *cteplan; + PlannerInfo *cteroot; + Index levelsup; + int ndx; + ListCell *lc; + int plan_id; + + /* + * Find the referenced CTE, and locate the plan previously made for it. + */ + levelsup = rte->ctelevelsup; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + + /* + * Note: cte_plan_ids can be shorter than cteList, if we are still working + * on planning the CTEs (ie, this is a side-reference from another CTE). + * So we mustn't use forboth here. + */ + ndx = 0; + foreach(lc, cteroot->parse->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); + + if (strcmp(cte->ctename, rte->ctename) == 0) + break; + ndx++; + } + if (lc == NULL) /* shouldn't happen */ + elog(ERROR, "could not find CTE \"%s\"", rte->ctename); + if (ndx >= list_length(cteroot->cte_plan_ids)) + elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); + plan_id = list_nth_int(cteroot->cte_plan_ids, ndx); + Assert(plan_id > 0); + cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1); + + /* Mark rel with estimated output rows, width, etc */ + set_cte_size_estimates(root, rel, cteplan); + + /* Generate appropriate path */ + add_path(rel, create_ctescan_path(root, rel)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* @@ -1268,38 +1277,38 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - Plan *cteplan; - PlannerInfo *cteroot; - Index levelsup; - - /* - * We need to find the non-recursive term's plan, which is in the plan - * level that's processing the recursive UNION, which is one level *below* - * where the CTE comes from. - */ - levelsup = rte->ctelevelsup; - if (levelsup == 0) /* shouldn't happen */ - elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); - levelsup--; - cteroot = root; - while (levelsup-- > 0) - { - cteroot = cteroot->parent_root; - if (!cteroot) /* shouldn't happen */ - elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); - } - cteplan = cteroot->non_recursive_plan; - if (!cteplan) /* shouldn't happen */ - elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); - - /* Mark rel with estimated output rows, width, etc */ - set_cte_size_estimates(root, rel, cteplan); - - /* Generate appropriate path */ - add_path(rel, create_worktablescan_path(root, rel)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + Plan *cteplan; + PlannerInfo *cteroot; + Index levelsup; + + /* + * We need to find the non-recursive term's plan, which is in the plan + * level that's processing the recursive UNION, which is one level *below* + * where the CTE comes from. + */ + levelsup = rte->ctelevelsup; + if (levelsup == 0) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + levelsup--; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + cteplan = cteroot->non_recursive_plan; + if (!cteplan) /* shouldn't happen */ + elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); + + /* Mark rel with estimated output rows, width, etc */ + set_cte_size_estimates(root, rel, cteplan); + + /* Generate appropriate path */ + add_path(rel, create_worktablescan_path(root, rel)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* @@ -1312,77 +1321,77 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static RelOptInfo * make_rel_from_joinlist(PlannerInfo *root, List *joinlist) { - int levels_needed; - List *initial_rels; - ListCell *jl; - - /* - * Count the number of child joinlist nodes. This is the depth of the - * dynamic-programming algorithm we must employ to consider all ways of - * joining the child nodes. - */ - levels_needed = list_length(joinlist); - - if (levels_needed <= 0) - return NULL; /* nothing to do? */ - - /* - * Construct a list of rels corresponding to the child joinlist nodes. - * This may contain both base rels and rels constructed according to - * sub-joinlists. - */ - initial_rels = NIL; - foreach(jl, joinlist) - { - Node *jlnode = (Node *) lfirst(jl); - RelOptInfo *thisrel; - - if (IsA(jlnode, RangeTblRef)) - { - int varno = ((RangeTblRef *) jlnode)->rtindex; - - thisrel = find_base_rel(root, varno); - } - else if (IsA(jlnode, List)) - { - /* Recurse to handle subproblem */ - thisrel = make_rel_from_joinlist(root, (List *) jlnode); - } - else - { - elog(ERROR, "unrecognized joinlist node type: %d", - (int) nodeTag(jlnode)); - thisrel = NULL; /* keep compiler quiet */ - } - - initial_rels = lappend(initial_rels, thisrel); - } - - if (levels_needed == 1) - { - /* - * Single joinlist node, so we're done. - */ - return (RelOptInfo *) linitial(initial_rels); - } - else - { - /* - * Consider the different orders in which we could join the rels, - * using a plugin, GEQO, or the regular join search code. - * - * We put the initial_rels list into a PlannerInfo field because - * has_legal_joinclause() needs to look at it (ugly :-(). - */ - root->initial_rels = initial_rels; - - if (join_search_hook) - return (*join_search_hook) (root, levels_needed, initial_rels); - else if (enable_geqo && levels_needed >= geqo_threshold) - return geqo(root, levels_needed, initial_rels); - else - return standard_join_search(root, levels_needed, initial_rels); - } + int levels_needed; + List *initial_rels; + ListCell *jl; + + /* + * Count the number of child joinlist nodes. This is the depth of the + * dynamic-programming algorithm we must employ to consider all ways of + * joining the child nodes. + */ + levels_needed = list_length(joinlist); + + if (levels_needed <= 0) + return NULL; /* nothing to do? */ + + /* + * Construct a list of rels corresponding to the child joinlist nodes. + * This may contain both base rels and rels constructed according to + * sub-joinlists. + */ + initial_rels = NIL; + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + RelOptInfo *thisrel; + + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; + + thisrel = find_base_rel(root, varno); + } + else if (IsA(jlnode, List)) + { + /* Recurse to handle subproblem */ + thisrel = make_rel_from_joinlist(root, (List *) jlnode); + } + else + { + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + thisrel = NULL; /* keep compiler quiet */ + } + + initial_rels = lappend(initial_rels, thisrel); + } + + if (levels_needed == 1) + { + /* + * Single joinlist node, so we're done. + */ + return (RelOptInfo *) linitial(initial_rels); + } + else + { + /* + * Consider the different orders in which we could join the rels, + * using a plugin, GEQO, or the regular join search code. + * + * We put the initial_rels list into a PlannerInfo field because + * has_legal_joinclause() needs to look at it (ugly :-(). + */ + root->initial_rels = initial_rels; + + if (join_search_hook) + return (*join_search_hook) (root, levels_needed, initial_rels); + else if (enable_geqo && levels_needed >= geqo_threshold) + return geqo(root, levels_needed, initial_rels); + else + return standard_join_search(root, levels_needed, initial_rels); + } } /* @@ -1417,69 +1426,69 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist) RelOptInfo * standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) { - int lev; - RelOptInfo *rel; - - /* - * This function cannot be invoked recursively within any one planning - * problem, so join_rel_level[] can't be in use already. - */ - Assert(root->join_rel_level == NULL); - - /* - * We employ a simple "dynamic programming" algorithm: we first find all - * ways to build joins of two jointree items, then all ways to build joins - * of three items (from two-item joins and single items), then four-item - * joins, and so on until we have considered all ways to join all the - * items into one rel. - * - * root->join_rel_level[j] is a list of all the j-item rels. Initially we - * set root->join_rel_level[1] to represent all the single-jointree-item - * relations. - */ - root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *)); - - root->join_rel_level[1] = initial_rels; - - for (lev = 2; lev <= levels_needed; lev++) - { - ListCell *lc; - - /* - * Determine all possible pairs of relations to be joined at this - * level, and build paths for making each one from every available - * pair of lower-level relations. - */ - join_search_one_level(root, lev); - - /* - * Do cleanup work on each just-processed rel. - */ - foreach(lc, root->join_rel_level[lev]) - { - rel = (RelOptInfo *) lfirst(lc); - - /* Find and save the cheapest paths for this rel */ - set_cheapest(rel); - + int lev; + RelOptInfo *rel; + + /* + * This function cannot be invoked recursively within any one planning + * problem, so join_rel_level[] can't be in use already. + */ + Assert(root->join_rel_level == NULL); + + /* + * We employ a simple "dynamic programming" algorithm: we first find all + * ways to build joins of two jointree items, then all ways to build joins + * of three items (from two-item joins and single items), then four-item + * joins, and so on until we have considered all ways to join all the + * items into one rel. + * + * root->join_rel_level[j] is a list of all the j-item rels. Initially we + * set root->join_rel_level[1] to represent all the single-jointree-item + * relations. + */ + root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *)); + + root->join_rel_level[1] = initial_rels; + + for (lev = 2; lev <= levels_needed; lev++) + { + ListCell *lc; + + /* + * Determine all possible pairs of relations to be joined at this + * level, and build paths for making each one from every available + * pair of lower-level relations. + */ + join_search_one_level(root, lev); + + /* + * Do cleanup work on each just-processed rel. + */ + foreach(lc, root->join_rel_level[lev]) + { + rel = (RelOptInfo *) lfirst(lc); + + /* Find and save the cheapest paths for this rel */ + set_cheapest(rel); + #ifdef OPTIMIZER_DEBUG - debug_print_rel(root, rel); + debug_print_rel(root, rel); #endif - } - } - - /* - * We should have a single rel at the final level. - */ - if (root->join_rel_level[levels_needed] == NIL) - elog(ERROR, "failed to build any %d-way joins", levels_needed); - Assert(list_length(root->join_rel_level[levels_needed]) == 1); - - rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]); - - root->join_rel_level = NULL; - - return rel; + } + } + + /* + * We should have a single rel at the final level. + */ + if (root->join_rel_level[levels_needed] == NIL) + elog(ERROR, "failed to build any %d-way joins", levels_needed); + Assert(list_length(root->join_rel_level[levels_needed]) == 1); + + rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]); + + root->join_rel_level = NULL; + + return rel; } /***************************************************************************** @@ -1518,44 +1527,44 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) */ static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, - bool *differentTypes) + bool *differentTypes) { - SetOperationStmt *topop; - - /* Check point 1 */ - if (subquery->limitOffset != NULL || subquery->limitCount != NULL) - return false; - - /* Check point 2 */ - if (subquery->hasWindowFuncs) - return false; - - /* Check point 3 (new for Recathon) */ - if (subquery->recommendStmt) - return false; - - /* Are we at top level, or looking at a setop component? */ - if (subquery == topquery) - { - /* Top level, so check any component queries */ - if (subquery->setOperations != NULL) - if (!recurse_pushdown_safe(subquery->setOperations, topquery, - differentTypes)) - return false; - } - else - { - /* Setop component must not have more components (too weird) */ - if (subquery->setOperations != NULL) - return false; - /* Check whether setop component output types match top level */ - topop = (SetOperationStmt *) topquery->setOperations; - Assert(topop && IsA(topop, SetOperationStmt)); - compare_tlist_datatypes(subquery->targetList, - topop->colTypes, - differentTypes); - } - return true; + SetOperationStmt *topop; + + /* Check point 1 */ + if (subquery->limitOffset != NULL || subquery->limitCount != NULL) + return false; + + /* Check point 2 */ + if (subquery->hasWindowFuncs) + return false; + + /* Check point 3 (new for Recathon) */ + if (subquery->recommendStmt) + return false; + + /* Are we at top level, or looking at a setop component? */ + if (subquery == topquery) + { + /* Top level, so check any component queries */ + if (subquery->setOperations != NULL) + if (!recurse_pushdown_safe(subquery->setOperations, topquery, + differentTypes)) + return false; + } + else + { + /* Setop component must not have more components (too weird) */ + if (subquery->setOperations != NULL) + return false; + /* Check whether setop component output types match top level */ + topop = (SetOperationStmt *) topquery->setOperations; + Assert(topop && IsA(topop, SetOperationStmt)); + compare_tlist_datatypes(subquery->targetList, + topop->colTypes, + differentTypes); + } + return true; } /* @@ -1563,36 +1572,36 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, */ static bool recurse_pushdown_safe(Node *setOp, Query *topquery, - bool *differentTypes) + bool *differentTypes) { - if (IsA(setOp, RangeTblRef)) - { - RangeTblRef *rtr = (RangeTblRef *) setOp; - RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable); - Query *subquery = rte->subquery; - - Assert(subquery != NULL); - return subquery_is_pushdown_safe(subquery, topquery, differentTypes); - } - else if (IsA(setOp, SetOperationStmt)) - { - SetOperationStmt *op = (SetOperationStmt *) setOp; - - /* EXCEPT is no good */ - if (op->op == SETOP_EXCEPT) - return false; - /* Else recurse */ - if (!recurse_pushdown_safe(op->larg, topquery, differentTypes)) - return false; - if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes)) - return false; - } - else - { - elog(ERROR, "unrecognized node type: %d", - (int) nodeTag(setOp)); - } - return true; + if (IsA(setOp, RangeTblRef)) + { + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable); + Query *subquery = rte->subquery; + + Assert(subquery != NULL); + return subquery_is_pushdown_safe(subquery, topquery, differentTypes); + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + + /* EXCEPT is no good */ + if (op->op == SETOP_EXCEPT) + return false; + /* Else recurse */ + if (!recurse_pushdown_safe(op->larg, topquery, differentTypes)) + return false; + if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes)) + return false; + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + } + return true; } /* @@ -1606,25 +1615,25 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, */ static void compare_tlist_datatypes(List *tlist, List *colTypes, - bool *differentTypes) + bool *differentTypes) { - ListCell *l; - ListCell *colType = list_head(colTypes); - - foreach(l, tlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (tle->resjunk) - continue; /* ignore resjunk columns */ - if (colType == NULL) - elog(ERROR, "wrong number of tlist entries"); - if (exprType((Node *) tle->expr) != lfirst_oid(colType)) - differentTypes[tle->resno] = true; - colType = lnext(colType); - } - if (colType != NULL) - elog(ERROR, "wrong number of tlist entries"); + ListCell *l; + ListCell *colType = list_head(colTypes); + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + if (colType == NULL) + elog(ERROR, "wrong number of tlist entries"); + if (exprType((Node *) tle->expr) != lfirst_oid(colType)) + differentTypes[tle->resno] = true; + colType = lnext(colType); + } + if (colType != NULL) + elog(ERROR, "wrong number of tlist entries"); } /* @@ -1665,107 +1674,107 @@ compare_tlist_datatypes(List *tlist, List *colTypes, */ static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, - bool *differentTypes) + bool *differentTypes) { - bool safe = true; - List *vars; - ListCell *vl; - Bitmapset *tested = NULL; - - /* Refuse subselects (point 1) */ - if (contain_subplans(qual)) - return false; - - /* - * It would be unsafe to push down window function calls, but at least for - * the moment we could never see any in a qual anyhow. (The same applies - * to aggregates, which we check for in pull_var_clause below.) - */ - Assert(!contain_window_function(qual)); - - /* - * Examine all Vars used in clause; since it's a restriction clause, all - * such Vars must refer to subselect output columns. - */ - vars = pull_var_clause(qual, - PVC_REJECT_AGGREGATES, - PVC_INCLUDE_PLACEHOLDERS); - foreach(vl, vars) - { - Var *var = (Var *) lfirst(vl); - TargetEntry *tle; - - /* - * XXX Punt if we find any PlaceHolderVars in the restriction clause. - * It's not clear whether a PHV could safely be pushed down, and even - * less clear whether such a situation could arise in any cases of - * practical interest anyway. So for the moment, just refuse to push - * down. - */ - if (!IsA(var, Var)) - { - safe = false; - break; - } - - Assert(var->varno == rti); - - /* Check point 2 */ - if (var->varattno == 0) - { - safe = false; - break; - } - - /* - * We use a bitmapset to avoid testing the same attno more than once. - * (NB: this only works because subquery outputs can't have negative - * attnos.) - */ - if (bms_is_member(var->varattno, tested)) - continue; - tested = bms_add_member(tested, var->varattno); - - /* Check point 3 */ - if (differentTypes[var->varattno]) - { - safe = false; - break; - } - - /* Must find the tlist element referenced by the Var */ - tle = get_tle_by_resno(subquery->targetList, var->varattno); - Assert(tle != NULL); - Assert(!tle->resjunk); - - /* If subquery uses DISTINCT ON, check point 4 */ - if (subquery->hasDistinctOn && - !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) - { - /* non-DISTINCT column, so fail */ - safe = false; - break; - } - - /* Refuse functions returning sets (point 5) */ - if (expression_returns_set((Node *) tle->expr)) - { - safe = false; - break; - } - - /* Refuse volatile functions (point 6) */ - if (contain_volatile_functions((Node *) tle->expr)) - { - safe = false; - break; - } - } - - list_free(vars); - bms_free(tested); - - return safe; + bool safe = true; + List *vars; + ListCell *vl; + Bitmapset *tested = NULL; + + /* Refuse subselects (point 1) */ + if (contain_subplans(qual)) + return false; + + /* + * It would be unsafe to push down window function calls, but at least for + * the moment we could never see any in a qual anyhow. (The same applies + * to aggregates, which we check for in pull_var_clause below.) + */ + Assert(!contain_window_function(qual)); + + /* + * Examine all Vars used in clause; since it's a restriction clause, all + * such Vars must refer to subselect output columns. + */ + vars = pull_var_clause(qual, + PVC_REJECT_AGGREGATES, + PVC_INCLUDE_PLACEHOLDERS); + foreach(vl, vars) + { + Var *var = (Var *) lfirst(vl); + TargetEntry *tle; + + /* + * XXX Punt if we find any PlaceHolderVars in the restriction clause. + * It's not clear whether a PHV could safely be pushed down, and even + * less clear whether such a situation could arise in any cases of + * practical interest anyway. So for the moment, just refuse to push + * down. + */ + if (!IsA(var, Var)) + { + safe = false; + break; + } + + Assert(var->varno == rti); + + /* Check point 2 */ + if (var->varattno == 0) + { + safe = false; + break; + } + + /* + * We use a bitmapset to avoid testing the same attno more than once. + * (NB: this only works because subquery outputs can't have negative + * attnos.) + */ + if (bms_is_member(var->varattno, tested)) + continue; + tested = bms_add_member(tested, var->varattno); + + /* Check point 3 */ + if (differentTypes[var->varattno]) + { + safe = false; + break; + } + + /* Must find the tlist element referenced by the Var */ + tle = get_tle_by_resno(subquery->targetList, var->varattno); + Assert(tle != NULL); + Assert(!tle->resjunk); + + /* If subquery uses DISTINCT ON, check point 4 */ + if (subquery->hasDistinctOn && + !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) + { + /* non-DISTINCT column, so fail */ + safe = false; + break; + } + + /* Refuse functions returning sets (point 5) */ + if (expression_returns_set((Node *) tle->expr)) + { + safe = false; + break; + } + + /* Refuse volatile functions (point 6) */ + if (contain_volatile_functions((Node *) tle->expr)) + { + safe = false; + break; + } + } + + list_free(vars); + bms_free(tested); + + return safe; } /* @@ -1774,45 +1783,45 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, static void subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) { - if (subquery->setOperations != NULL) - { - /* Recurse to push it separately to each component query */ - recurse_push_qual(subquery->setOperations, subquery, - rte, rti, qual); - } - else - { - /* - * We need to replace Vars in the qual (which must refer to outputs of - * the subquery) with copies of the subquery's targetlist expressions. - * Note that at this point, any uplevel Vars in the qual should have - * been replaced with Params, so they need no work. - * - * This step also ensures that when we are pushing into a setop tree, - * each component query gets its own copy of the qual. - */ - qual = ResolveNew(qual, rti, 0, rte, - subquery->targetList, - CMD_SELECT, 0, - &subquery->hasSubLinks); - - /* - * Now attach the qual to the proper place: normally WHERE, but if the - * subquery uses grouping or aggregation, put it in HAVING (since the - * qual really refers to the group-result rows). - */ - if (subquery->hasAggs || subquery->groupClause || subquery->havingQual) - subquery->havingQual = make_and_qual(subquery->havingQual, qual); - else - subquery->jointree->quals = - make_and_qual(subquery->jointree->quals, qual); - - /* - * We need not change the subquery's hasAggs or hasSublinks flags, - * since we can't be pushing down any aggregates that weren't there - * before, and we don't push down subselects at all. - */ - } + if (subquery->setOperations != NULL) + { + /* Recurse to push it separately to each component query */ + recurse_push_qual(subquery->setOperations, subquery, + rte, rti, qual); + } + else + { + /* + * We need to replace Vars in the qual (which must refer to outputs of + * the subquery) with copies of the subquery's targetlist expressions. + * Note that at this point, any uplevel Vars in the qual should have + * been replaced with Params, so they need no work. + * + * This step also ensures that when we are pushing into a setop tree, + * each component query gets its own copy of the qual. + */ + qual = ResolveNew(qual, rti, 0, rte, + subquery->targetList, + CMD_SELECT, 0, + &subquery->hasSubLinks); + + /* + * Now attach the qual to the proper place: normally WHERE, but if the + * subquery uses grouping or aggregation, put it in HAVING (since the + * qual really refers to the group-result rows). + */ + if (subquery->hasAggs || subquery->groupClause || subquery->havingQual) + subquery->havingQual = make_and_qual(subquery->havingQual, qual); + else + subquery->jointree->quals = + make_and_qual(subquery->jointree->quals, qual); + + /* + * We need not change the subquery's hasAggs or hasSublinks flags, + * since we can't be pushing down any aggregates that weren't there + * before, and we don't push down subselects at all. + */ + } } /* @@ -1820,29 +1829,29 @@ subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) */ static void recurse_push_qual(Node *setOp, Query *topquery, - RangeTblEntry *rte, Index rti, Node *qual) + RangeTblEntry *rte, Index rti, Node *qual) { - if (IsA(setOp, RangeTblRef)) - { - RangeTblRef *rtr = (RangeTblRef *) setOp; - RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable); - Query *subquery = subrte->subquery; - - Assert(subquery != NULL); - subquery_push_qual(subquery, rte, rti, qual); - } - else if (IsA(setOp, SetOperationStmt)) - { - SetOperationStmt *op = (SetOperationStmt *) setOp; - - recurse_push_qual(op->larg, topquery, rte, rti, qual); - recurse_push_qual(op->rarg, topquery, rte, rti, qual); - } - else - { - elog(ERROR, "unrecognized node type: %d", - (int) nodeTag(setOp)); - } + if (IsA(setOp, RangeTblRef)) + { + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable); + Query *subquery = subrte->subquery; + + Assert(subquery != NULL); + subquery_push_qual(subquery, rte, rti, qual); + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + + recurse_push_qual(op->larg, topquery, rte, rti, qual); + recurse_push_qual(op->rarg, topquery, rte, rti, qual); + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + } } /***************************************************************************** @@ -1854,183 +1863,183 @@ recurse_push_qual(Node *setOp, Query *topquery, static void print_relids(Relids relids) { - Relids tmprelids; - int x; - bool first = true; - - tmprelids = bms_copy(relids); - while ((x = bms_first_member(tmprelids)) >= 0) - { - if (!first) - printf(" "); - printf("%d", x); - first = false; - } - bms_free(tmprelids); + Relids tmprelids; + int x; + bool first = true; + + tmprelids = bms_copy(relids); + while ((x = bms_first_member(tmprelids)) >= 0) + { + if (!first) + printf(" "); + printf("%d", x); + first = false; + } + bms_free(tmprelids); } static void print_restrictclauses(PlannerInfo *root, List *clauses) { - ListCell *l; - - foreach(l, clauses) - { - RestrictInfo *c = lfirst(l); - - print_expr((Node *) c->clause, root->parse->rtable); - if (lnext(l)) - printf(", "); - } + ListCell *l; + + foreach(l, clauses) + { + RestrictInfo *c = lfirst(l); + + print_expr((Node *) c->clause, root->parse->rtable); + if (lnext(l)) + printf(", "); + } } static void print_path(PlannerInfo *root, Path *path, int indent) { - const char *ptype; - bool join = false; - Path *subpath = NULL; - int i; - - switch (nodeTag(path)) - { - case T_Path: - ptype = "SeqScan"; - break; - case T_IndexPath: - ptype = "IdxScan"; - break; - case T_BitmapHeapPath: - ptype = "BitmapHeapScan"; - break; - case T_BitmapAndPath: - ptype = "BitmapAndPath"; - break; - case T_BitmapOrPath: - ptype = "BitmapOrPath"; - break; - case T_TidPath: - ptype = "TidScan"; - break; - case T_ForeignPath: - ptype = "ForeignScan"; - break; - case T_AppendPath: - ptype = "Append"; - break; - case T_MergeAppendPath: - ptype = "MergeAppend"; - break; - case T_ResultPath: - ptype = "Result"; - break; - case T_MaterialPath: - ptype = "Material"; - subpath = ((MaterialPath *) path)->subpath; - break; - case T_UniquePath: - ptype = "Unique"; - subpath = ((UniquePath *) path)->subpath; - break; - case T_NestPath: - ptype = "NestLoop"; - join = true; - break; - case T_MergePath: - ptype = "MergeJoin"; - join = true; - break; - case T_HashPath: - ptype = "HashJoin"; - join = true; - break; - default: - ptype = "???Path"; - break; - } - - for (i = 0; i < indent; i++) - printf("\t"); - printf("%s", ptype); - - if (path->parent) - { - printf("("); - print_relids(path->parent->relids); - printf(") rows=%.0f", path->parent->rows); - } - printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost); - - if (path->pathkeys) - { - for (i = 0; i < indent; i++) - printf("\t"); - printf(" pathkeys: "); - print_pathkeys(path->pathkeys, root->parse->rtable); - } - - if (join) - { - JoinPath *jp = (JoinPath *) path; - - for (i = 0; i < indent; i++) - printf("\t"); - printf(" clauses: "); - print_restrictclauses(root, jp->joinrestrictinfo); - printf("\n"); - - if (IsA(path, MergePath)) - { - MergePath *mp = (MergePath *) path; - - for (i = 0; i < indent; i++) - printf("\t"); - printf(" sortouter=%d sortinner=%d materializeinner=%d\n", - ((mp->outersortkeys) ? 1 : 0), - ((mp->innersortkeys) ? 1 : 0), - ((mp->materialize_inner) ? 1 : 0)); - } - - print_path(root, jp->outerjoinpath, indent + 1); - print_path(root, jp->innerjoinpath, indent + 1); - } - - if (subpath) - print_path(root, subpath, indent + 1); + const char *ptype; + bool join = false; + Path *subpath = NULL; + int i; + + switch (nodeTag(path)) + { + case T_Path: + ptype = "SeqScan"; + break; + case T_IndexPath: + ptype = "IdxScan"; + break; + case T_BitmapHeapPath: + ptype = "BitmapHeapScan"; + break; + case T_BitmapAndPath: + ptype = "BitmapAndPath"; + break; + case T_BitmapOrPath: + ptype = "BitmapOrPath"; + break; + case T_TidPath: + ptype = "TidScan"; + break; + case T_ForeignPath: + ptype = "ForeignScan"; + break; + case T_AppendPath: + ptype = "Append"; + break; + case T_MergeAppendPath: + ptype = "MergeAppend"; + break; + case T_ResultPath: + ptype = "Result"; + break; + case T_MaterialPath: + ptype = "Material"; + subpath = ((MaterialPath *) path)->subpath; + break; + case T_UniquePath: + ptype = "Unique"; + subpath = ((UniquePath *) path)->subpath; + break; + case T_NestPath: + ptype = "NestLoop"; + join = true; + break; + case T_MergePath: + ptype = "MergeJoin"; + join = true; + break; + case T_HashPath: + ptype = "HashJoin"; + join = true; + break; + default: + ptype = "???Path"; + break; + } + + for (i = 0; i < indent; i++) + printf("\t"); + printf("%s", ptype); + + if (path->parent) + { + printf("("); + print_relids(path->parent->relids); + printf(") rows=%.0f", path->parent->rows); + } + printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost); + + if (path->pathkeys) + { + for (i = 0; i < indent; i++) + printf("\t"); + printf(" pathkeys: "); + print_pathkeys(path->pathkeys, root->parse->rtable); + } + + if (join) + { + JoinPath *jp = (JoinPath *) path; + + for (i = 0; i < indent; i++) + printf("\t"); + printf(" clauses: "); + print_restrictclauses(root, jp->joinrestrictinfo); + printf("\n"); + + if (IsA(path, MergePath)) + { + MergePath *mp = (MergePath *) path; + + for (i = 0; i < indent; i++) + printf("\t"); + printf(" sortouter=%d sortinner=%d materializeinner=%d\n", + ((mp->outersortkeys) ? 1 : 0), + ((mp->innersortkeys) ? 1 : 0), + ((mp->materialize_inner) ? 1 : 0)); + } + + print_path(root, jp->outerjoinpath, indent + 1); + print_path(root, jp->innerjoinpath, indent + 1); + } + + if (subpath) + print_path(root, subpath, indent + 1); } void debug_print_rel(PlannerInfo *root, RelOptInfo *rel) { - ListCell *l; - - printf("RELOPTINFO ("); - print_relids(rel->relids); - printf("): rows=%.0f width=%d\n", rel->rows, rel->width); - - if (rel->baserestrictinfo) - { - printf("\tbaserestrictinfo: "); - print_restrictclauses(root, rel->baserestrictinfo); - printf("\n"); - } - - if (rel->joininfo) - { - printf("\tjoininfo: "); - print_restrictclauses(root, rel->joininfo); - printf("\n"); - } - - printf("\tpath list:\n"); - foreach(l, rel->pathlist) - print_path(root, lfirst(l), 1); - printf("\n\tcheapest startup path:\n"); - print_path(root, rel->cheapest_startup_path, 1); - printf("\n\tcheapest total path:\n"); - print_path(root, rel->cheapest_total_path, 1); - printf("\n"); - fflush(stdout); + ListCell *l; + + printf("RELOPTINFO ("); + print_relids(rel->relids); + printf("): rows=%.0f width=%d\n", rel->rows, rel->width); + + if (rel->baserestrictinfo) + { + printf("\tbaserestrictinfo: "); + print_restrictclauses(root, rel->baserestrictinfo); + printf("\n"); + } + + if (rel->joininfo) + { + printf("\tjoininfo: "); + print_restrictclauses(root, rel->joininfo); + printf("\n"); + } + + printf("\tpath list:\n"); + foreach(l, rel->pathlist) + print_path(root, lfirst(l), 1); + printf("\n\tcheapest startup path:\n"); + print_path(root, rel->cheapest_startup_path, 1); + printf("\n\tcheapest total path:\n"); + print_path(root, rel->cheapest_total_path, 1); + printf("\n"); + fflush(stdout); } #endif /* OPTIMIZER_DEBUG */ diff --git a/PostgreSQL/src/backend/parser/parse_rec.c b/PostgreSQL/src/backend/parser/parse_rec.c index afac2d1..21ee8a3 100644 --- a/PostgreSQL/src/backend/parser/parse_rec.c +++ b/PostgreSQL/src/backend/parser/parse_rec.c @@ -30,7 +30,7 @@ static void validateClauses(SelectStmt *stmt); static RecommendInfo* getEventsTable(List *fromClause, Node *recommendClause); static char* getTableRef(ColumnRef *colref, char **colname); static AttributeInfo* getAttributeInfo(char *eventtable, char *userkey, char *itemkey, char *eventval, - RecommendInfo *recInfo); + RecommendInfo *recInfo); static int checkWhereClause(ColumnRef* attribute, RangeVar* recommender, char* userkey, char* eventval); //static void modifyAExpr(Node *currentExpr, char* recname, char* viewname); //static void modifyTargetList(List *target_list, char *recname, char *viewname); @@ -43,6 +43,7 @@ static bool filterfirstrecurse(Node *whereExpr, RecommendInfo *recInfo); static bool tableMatch(RangeVar* table, char* tablename); static Node *makeTrueConst(); static Node *userWhereClause(Node* whereClause, char *userkey); +static RangeVar* getRecommendVar(RangeVar * var); /* * transformRecommendClause - @@ -54,60 +55,60 @@ static Node *userWhereClause(Node* whereClause, char *userkey); */ SelectStmt * transformRecommendClause(ParseState *pstate, List **targetlist, SelectStmt *stmt, - const char *constructName) + const char *constructName) { - RecommendInfo *recInfo; - Node *userWhere; - recInfo = NULL; - - // We need to do some preprocessing and sanity checks. If any of the sanity - // checks fail, we'll throw an error. - validateClauses(stmt); - - // Now that we know the overall query has the right structure, - // it's time to look at the specific information included. - - // Step one: by using the provided information in the RECOMMEND clause, - // we need to determine what the event table is that we intend to use. - // We also need to make sure that all of the RECOMMEND clause columns - // correspond to said events table. - recInfo = getEventsTable(stmt->fromClause, stmt->recommendClause); - - if (!recInfo) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("a valid events table has not been provided"))); - - // Step two: look through the WHERE clause to see if there are any RecScore - // restrictions. If there are, we need to mark this specially. - filterfirst(stmt->whereClause, recInfo); - - // Step three: see if we are joining the user ID or item ID to another table, in - // which case we want to perform a RecJoin. -// applyRecJoin(stmt->whereClause, stmt->fromClause, recInfo); - - // Step four: now that we've verified the correctness of our query, we need to - // see if a recommender already exists for a given table and method. If so, we'll - // reference the created RecModel; if not, we'll build it on the fly from the - // schema of the events table. Either way, we make a note of which of our tables - // will be the basis of our recommender schema. - modifyFrom(stmt, recInfo); - - // Step five: we need to scan the WHERE clause and find which elements pertain - // to the user key; this will save us a lot of unnecessary work later on. We'll - // make a copy of it that we can modify for our purposes. - userWhere = userWhereClause(((Node *) copyObject(stmt->whereClause)), recInfo->attributes->userkey); -// userWhere = userWhereClause(stmt->whereClause, recInfo->attributes->userkey); - recInfo->attributes->userWhereClause = userWhere; - - // There's an additional step, where we add the RECOMMEND clause elements into - // the target list if they aren't there, but we can't perform this step until - // the target list and FROM clauses have been processed, so we'll leave that - // for later. - - // With that done, we put in the changed recInfo and return. - stmt->recommendClause = (Node*) recInfo; - return stmt; + RecommendInfo *recInfo; + Node *userWhere; + recInfo = NULL; + + // We need to do some preprocessing and sanity checks. If any of the sanity + // checks fail, we'll throw an error. + validateClauses(stmt); + + // Now that we know the overall query has the right structure, + // it's time to look at the specific information included. + + // Step one: by using the provided information in the RECOMMEND clause, + // we need to determine what the event table is that we intend to use. + // We also need to make sure that all of the RECOMMEND clause columns + // correspond to said events table. + recInfo = getEventsTable(stmt->fromClause, stmt->recommendClause); + + if (!recInfo) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("a valid events table has not been provided"))); + + // Step two: look through the WHERE clause to see if there are any RecScore + // restrictions. If there are, we need to mark this specially. + filterfirst(stmt->whereClause, recInfo); + + // Step three: see if we are joining the user ID or item ID to another table, in + // which case we want to perform a RecJoin. + // applyRecJoin(stmt->whereClause, stmt->fromClause, recInfo); + + // Step four: now that we've verified the correctness of our query, we need to + // see if a recommender already exists for a given table and method. If so, we'll + // reference the created RecModel; if not, we'll build it on the fly from the + // schema of the events table. Either way, we make a note of which of our tables + // will be the basis of our recommender schema. + modifyFrom(stmt, recInfo); + + // Step five: we need to scan the WHERE clause and find which elements pertain + // to the user key; this will save us a lot of unnecessary work later on. We'll + // make a copy of it that we can modify for our purposes. + userWhere = userWhereClause(((Node *) copyObject(stmt->whereClause)), recInfo->attributes->userkey); + // userWhere = userWhereClause(stmt->whereClause, recInfo->attributes->userkey); + recInfo->attributes->userWhereClause = userWhere; + + // There's an additional step, where we add the RECOMMEND clause elements into + // the target list if they aren't there, but we can't perform this step until + // the target list and FROM clauses have been processed, so we'll leave that + // for later. + + // With that done, we put in the changed recInfo and return. + stmt->recommendClause = (Node*) recInfo; + return stmt; } /* @@ -117,42 +118,63 @@ transformRecommendClause(ParseState *pstate, List **targetlist, SelectStmt *stmt */ static void validateClauses(SelectStmt *stmt) { - // A FROM clause is required. - if (!stmt->fromClause) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("RECOMMEND clause is not allowed without FROM clause"))); - // Numerous other clauses have to not be there. We're very picky. - if (stmt->distinctClause) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("RECOMMEND clause is not allowed with DISTINCT clause"))); - if (stmt->intoClause) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("RECOMMEND clause is not allowed with INTO clause"))); - if (stmt->groupClause) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("RECOMMEND clause is not allowed with GROUP BY clause"))); - if (stmt->havingClause) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("RECOMMEND clause is not allowed with HAVING clause"))); - if (stmt->windowClause) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("RECOMMEND clause is not allowed with WINDOW clause"))); - if (stmt->lockingClause) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("RECOMMEND clause is not allowed with FOR clause"))); - if (stmt->withClause) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("RECOMMEND clause is not allowed with WITH clause"))); + // A FROM clause is required. + if (!stmt->fromClause) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("RECOMMEND clause is not allowed without FROM clause"))); + // Numerous other clauses have to not be there. We're very picky. + if (stmt->distinctClause) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("RECOMMEND clause is not allowed with DISTINCT clause"))); + if (stmt->intoClause) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("RECOMMEND clause is not allowed with INTO clause"))); + if (stmt->groupClause) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("RECOMMEND clause is not allowed with GROUP BY clause"))); + if (stmt->havingClause) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("RECOMMEND clause is not allowed with HAVING clause"))); + if (stmt->windowClause) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("RECOMMEND clause is not allowed with WINDOW clause"))); + if (stmt->lockingClause) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("RECOMMEND clause is not allowed with FOR clause"))); + if (stmt->withClause) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("RECOMMEND clause is not allowed with WITH clause"))); + + return; +} - return; +/* + * getRecommendVar - + * A helper function for getEventsTable, which prevents + * circular linking from happening. + * + * + */ +static RangeVar* getRecommendVar(RangeVar * var){ + RangeVar * newvar = makeNode(RangeVar); + newvar->catalogname = var->catalogname; + newvar->schemaname = var->schemaname; + newvar->relname = var->relname; + newvar->inhOpt = var->inhOpt; + newvar->relpersistence = var->relpersistence; + newvar->alias = var->alias; + newvar->location = var->location; + newvar->recommender = NULL; + + return newvar; } /* @@ -164,112 +186,114 @@ validateClauses(SelectStmt *stmt) { */ static RecommendInfo* getEventsTable(List *fromClause, Node *recommendClause) { - char *usertref, *itemtref, *eventtref; - char *userkey, *itemkey, *eventval; - char *eventtable = NULL; - AttributeInfo *attributes; - bool elem_match = true; - - RecommendInfo *recInfo = (RecommendInfo*) recommendClause; - ColumnRef *usercr = (ColumnRef*) recInfo->userkey; - ColumnRef *itemcr = (ColumnRef*) recInfo->itemkey; - ColumnRef *eventcr = (ColumnRef*) recInfo->eventval; - - // First off, we'll perform sanity checks on the elements - // of the RECOMMEND clause. We need to make sure their - // table references are valid and matching. - userkey = getTableRef(usercr, &usertref); - itemkey = getTableRef(itemcr, &itemtref); - eventval = getTableRef(eventcr, &eventtref); - - // If one element has no table reference, none of them can have - // table references. - if ((usertref && !itemtref) || (!usertref && itemtref)) - elem_match = false; - if ((usertref && !eventtref) || (!usertref && eventtref)) - elem_match = false; - - // If table references exist, they all have to be the same. - if (usertref && elem_match) { - if ((strcmp(usertref,itemtref) != 0) || - (strcmp(usertref,eventtref) != 0)) - elem_match = false; - } - - if (!elem_match) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("elements of RECOMMEND clause must have matching table references"))); - - // Now that we've confirmed the correctness of the RECOMMEND - // clause table references, we will use them to find the event table - // referred to in the FROM clause. If there are table references, that makes - // the task easy; if not, then we can manually check to see if each column - // exists in a specific table. - if (usertref) { - // Let's look for a FROM table element that matches. - ListCell *from_cell; - foreach(from_cell,fromClause) { - Node *from_node = lfirst(from_cell); - if (nodeTag(from_node) == T_RangeVar) { - RangeVar *fromVar; - - fromVar = (RangeVar*) from_node; - if (tableMatch(fromVar,usertref)) { - eventtable = fromVar->relname; - recInfo->recommender = fromVar; - fromVar->recommender = (Node*) recInfo; - break; - } - } - } - } else { - // If we can't easily identify the events table via - // table references, we will do so by cross-referencing the - // column names with the name of each table. - ListCell *from_cell; - foreach(from_cell,fromClause) { - Node *from_node = lfirst(from_cell); - if (nodeTag(from_node) == T_RangeVar) { - RangeVar *fromVar; - - fromVar = (RangeVar*) from_node; - // If all the key columns are in this table... - if (columnExistsInRelation(userkey,fromVar) && - columnExistsInRelation(itemkey,fromVar) && - columnExistsInRelation(eventval,fromVar)) { - // Did we already find a table matching all of - // these columns? If so, that's an error. - if (eventtable) { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("ambiguous references in RECOMMEND clause"))); - } else { - // Make a note of the name, and also - // do cross-storage of the table. - eventtable = fromVar->relname; - recInfo->recommender = fromVar; - fromVar->recommender = (Node*) recInfo; - } - } - } - } - } - - // If we found nothing, return error. - if (!eventtable) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("valid events table not found"))); - - // Now that we've confirmed the RECOMMEND clause is well-formed, - // we can start assembling our structure. - attributes = getAttributeInfo(eventtable, userkey, itemkey, eventval, - recInfo); - recInfo->attributes = attributes; - - // That's all we need for now, really. - return recInfo; + char *usertref, *itemtref, *eventtref; + char *userkey, *itemkey, *eventval; + char *eventtable = NULL; + AttributeInfo *attributes; + bool elem_match = true; + + RecommendInfo *recInfo = (RecommendInfo*) recommendClause; + ColumnRef *usercr = (ColumnRef*) recInfo->userkey; + ColumnRef *itemcr = (ColumnRef*) recInfo->itemkey; + ColumnRef *eventcr = (ColumnRef*) recInfo->eventval; + + // First off, we'll perform sanity checks on the elements + // of the RECOMMEND clause. We need to make sure their + // table references are valid and matching. + userkey = getTableRef(usercr, &usertref); + itemkey = getTableRef(itemcr, &itemtref); + eventval = getTableRef(eventcr, &eventtref); + + // If one element has no table reference, none of them can have + // table references. + if ((usertref && !itemtref) || (!usertref && itemtref)) + elem_match = false; + if ((usertref && !eventtref) || (!usertref && eventtref)) + elem_match = false; + + // If table references exist, they all have to be the same. + if (usertref && elem_match) { + if ((strcmp(usertref,itemtref) != 0) || + (strcmp(usertref,eventtref) != 0)) + elem_match = false; + } + + if (!elem_match) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("elements of RECOMMEND clause must have matching table references"))); + + // Now that we've confirmed the correctness of the RECOMMEND + // clause table references, we will use them to find the event table + // referred to in the FROM clause. If there are table references, that makes + // the task easy; if not, then we can manually check to see if each column + // exists in a specific table. + if (usertref) { + // Let's look for a FROM table element that matches. + ListCell *from_cell; + foreach(from_cell,fromClause) { + Node *from_node = lfirst(from_cell); + if (nodeTag(from_node) == T_RangeVar) { + RangeVar *fromVar; + + fromVar = (RangeVar*) from_node; + if (tableMatch(fromVar,usertref)) { + eventtable = fromVar->relname; + //Use getRecommendVar to prevent circular linking while using copy functions + recInfo->recommender = getRecommendVar(fromVar); + fromVar->recommender = (Node*) recInfo; + break; + } + } + } + } else { + // If we can't easily identify the events table via + // table references, we will do so by cross-referencing the + // column names with the name of each table. + ListCell *from_cell; + foreach(from_cell,fromClause) { + Node *from_node = lfirst(from_cell); + if (nodeTag(from_node) == T_RangeVar) { + RangeVar *fromVar; + + fromVar = (RangeVar*) from_node; + // If all the key columns are in this table... + if (columnExistsInRelation(userkey,fromVar) && + columnExistsInRelation(itemkey,fromVar) && + columnExistsInRelation(eventval,fromVar)) { + // Did we already find a table matching all of + // these columns? If so, that's an error. + if (eventtable) { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("ambiguous references in RECOMMEND clause"))); + } else { + // Make a note of the name, and also + // do cross-storage of the table. + eventtable = fromVar->relname; + //Use getRecommendVar to prevent circular linking while using copy functions + recInfo->recommender = getRecommendVar(fromVar); + fromVar->recommender = (Node*) recInfo; + } + } + } + } + } + + // If we found nothing, return error. + if (!eventtable) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("valid events table not found"))); + + // Now that we've confirmed the RECOMMEND clause is well-formed, + // we can start assembling our structure. + attributes = getAttributeInfo(eventtable, userkey, itemkey, eventval, + recInfo); + recInfo->attributes = attributes; + + // That's all we need for now, really. + return recInfo; } /* @@ -279,52 +303,52 @@ getEventsTable(List *fromClause, Node *recommendClause) { */ static char* getTableRef(ColumnRef *colref, char **colname) { - ListCell* col_cell; - Value* col_string; - int i; - - if (!colref) - return NULL; - - if (nodeTag(colref) != T_ColumnRef) { - (*colname) = NULL; - return NULL; - } - - // We take a look at the fields of the ColumnRef. - if (colref->fields->length < 1) { - (*colname) = NULL; - return NULL; - } - - col_cell = colref->fields->head; - // Random error condition: is this field not a string? - // I can't imagine where this would happen. - if (nodeTag(lfirst(col_cell)) != T_String) { - (*colname) = NULL; - return NULL; - } - - if (colref->fields->length < 2) { - (*colname) = NULL; - col_string = (Value*) lfirst(col_cell); - return col_string->val.str; - } - - // Depending on the number of fields, we need to continue - // on until we reach the final two. - for (i = 0; i+2 < colref->fields->length; i++) - col_cell = col_cell->next; - - // Extract the first string value (the table reference) - // and apply it. - col_string = (Value*) lfirst(col_cell); - (*colname) = col_string->val.str; - - // Now extract the actual column name and return it. - col_cell = col_cell->next; - col_string = (Value*) lfirst(col_cell); - return col_string->val.str; + ListCell* col_cell; + Value* col_string; + int i; + + if (!colref) + return NULL; + + if (nodeTag(colref) != T_ColumnRef) { + (*colname) = NULL; + return NULL; + } + + // We take a look at the fields of the ColumnRef. + if (colref->fields->length < 1) { + (*colname) = NULL; + return NULL; + } + + col_cell = colref->fields->head; + // Random error condition: is this field not a string? + // I can't imagine where this would happen. + if (nodeTag(lfirst(col_cell)) != T_String) { + (*colname) = NULL; + return NULL; + } + + if (colref->fields->length < 2) { + (*colname) = NULL; + col_string = (Value*) lfirst(col_cell); + return col_string->val.str; + } + + // Depending on the number of fields, we need to continue + // on until we reach the final two. + for (i = 0; i+2 < colref->fields->length; i++) + col_cell = col_cell->next; + + // Extract the first string value (the table reference) + // and apply it. + col_string = (Value*) lfirst(col_cell); + (*colname) = col_string->val.str; + + // Now extract the actual column name and return it. + col_cell = col_cell->next; + col_string = (Value*) lfirst(col_cell); + return col_string->val.str; } /* @@ -335,28 +359,28 @@ getTableRef(ColumnRef *colref, char **colname) { */ static AttributeInfo* getAttributeInfo(char *eventtable, char *userkey, char *itemkey, char *eventval, - RecommendInfo *recInfo) { - AttributeInfo* attributes = makeNode(AttributeInfo); - attributes->userID = -1; - attributes->recName = NULL; - attributes->usertable = NULL; - attributes->itemtable = NULL; - attributes->eventtable = eventtable; - attributes->userkey = userkey; - attributes->itemkey = itemkey; - attributes->eventval = eventval; - attributes->method = getRecMethod(recInfo->strmethod); - attributes->recIndexName = NULL; - attributes->recModelName = NULL; - attributes->recModelName2 = NULL; - attributes->recViewName = NULL; - attributes->userWhereClause = NULL; - attributes->IDfound = false; - attributes->cellType = CELL_ALPHA; - attributes->opType = recInfo->opType; - attributes->noFilter = false; - - return attributes; + RecommendInfo *recInfo) { + AttributeInfo* attributes = makeNode(AttributeInfo); + attributes->userID = -1; + attributes->recName = NULL; + attributes->usertable = NULL; + attributes->itemtable = NULL; + attributes->eventtable = eventtable; + attributes->userkey = userkey; + attributes->itemkey = itemkey; + attributes->eventval = eventval; + attributes->method = getRecMethod(recInfo->strmethod); + attributes->recIndexName = NULL; + attributes->recModelName = NULL; + attributes->recModelName2 = NULL; + attributes->recViewName = NULL; + attributes->userWhereClause = NULL; + attributes->IDfound = false; + attributes->cellType = CELL_ALPHA; + attributes->opType = recInfo->opType; + attributes->noFilter = false; + + return attributes; } /* @@ -367,13 +391,13 @@ getAttributeInfo(char *eventtable, char *userkey, char *itemkey, char *eventval, */ void addRecTargets(ParseState *pstate, List **targetlist, Node *recClause) { - RecommendInfo *recInfo = (RecommendInfo*) recClause; - - // Add the elements. Note that 3 corresponds to "RECOMMEND_CLAUSE" - // in parse_clause.c. - findTargetlistEntrySQL92(pstate, recInfo->userkey, targetlist, 3); - findTargetlistEntrySQL92(pstate, recInfo->itemkey, targetlist, 3); - findTargetlistEntrySQL92(pstate, recInfo->eventval, targetlist, 3); + RecommendInfo *recInfo = (RecommendInfo*) recClause; + + // Add the elements. Note that 3 corresponds to "RECOMMEND_CLAUSE" + // in parse_clause.c. + findTargetlistEntrySQL92(pstate, recInfo->userkey, targetlist, 3); + findTargetlistEntrySQL92(pstate, recInfo->itemkey, targetlist, 3); + findTargetlistEntrySQL92(pstate, recInfo->eventval, targetlist, 3); } /* @@ -384,66 +408,66 @@ addRecTargets(ParseState *pstate, List **targetlist, Node *recClause) { */ static int checkWhereClause(ColumnRef* attribute, RangeVar* recommender, char* userkey, char *eventval) { - List* attFields; - ListCell* att_cell; - - attFields = attribute->fields; - - // Like before, what we do depends on the length of this list. - switch (attFields->length) { - case 1: - // If there's only one element in the target, it needs - // to be the user ID or RecScore. - att_cell = attFields->head; - - if (nodeTag(lfirst(att_cell)) == T_String) { - Value* att_string = (Value*) lfirst(att_cell); - - // If the userID attribute is matched. - if (strcmp(att_string->val.str,userkey) == 0) - return 1; - // If the event value is matched. - if (strcmp(att_string->val.str,eventval) == 0) - return 2; - } - break; - case 2: - { - // If our target has two elements, then the first needs to - // refer to our recommender, and the second needs to be - // the user ID or eventval - bool go_on = false; - att_cell = attFields->head; - if (nodeTag(lfirst(att_cell)) == T_String) { - Value* att_string = (Value*) lfirst(att_cell); - if (recommender->alias == NULL) { - if (strcmp(att_string->val.str,recommender->relname) == 0) - go_on = true; - } else { - if ((strcmp(att_string->val.str,recommender->relname) == 0) || - (strcmp(att_string->val.str,recommender->alias->aliasname) == 0)) - go_on = true; - } - } - // If the first element does indeed match our recommender, go on. - if (go_on == true) { - att_cell = att_cell->next; - if (nodeTag(lfirst(att_cell)) == T_String) { - Value* att_string = (Value*) lfirst(att_cell); - // If the userID attribute is matched. - if (strcmp(att_string->val.str,userkey) == 0) - return 1; - // If the event value is matched. - if (strcmp(att_string->val.str,eventval) == 0) - return 2; - } - } - break; - } - default: - break; - } - return -1; + List* attFields; + ListCell* att_cell; + + attFields = attribute->fields; + + // Like before, what we do depends on the length of this list. + switch (attFields->length) { + case 1: + // If there's only one element in the target, it needs + // to be the user ID or RecScore. + att_cell = attFields->head; + + if (nodeTag(lfirst(att_cell)) == T_String) { + Value* att_string = (Value*) lfirst(att_cell); + + // If the userID attribute is matched. + if (strcmp(att_string->val.str,userkey) == 0) + return 1; + // If the event value is matched. + if (strcmp(att_string->val.str,eventval) == 0) + return 2; + } + break; + case 2: + { + // If our target has two elements, then the first needs to + // refer to our recommender, and the second needs to be + // the user ID or eventval + bool go_on = false; + att_cell = attFields->head; + if (nodeTag(lfirst(att_cell)) == T_String) { + Value* att_string = (Value*) lfirst(att_cell); + if (recommender->alias == NULL) { + if (strcmp(att_string->val.str,recommender->relname) == 0) + go_on = true; + } else { + if ((strcmp(att_string->val.str,recommender->relname) == 0) || + (strcmp(att_string->val.str,recommender->alias->aliasname) == 0)) + go_on = true; + } + } + // If the first element does indeed match our recommender, go on. + if (go_on == true) { + att_cell = att_cell->next; + if (nodeTag(lfirst(att_cell)) == T_String) { + Value* att_string = (Value*) lfirst(att_cell); + // If the userID attribute is matched. + if (strcmp(att_string->val.str,userkey) == 0) + return 1; + // If the event value is matched. + if (strcmp(att_string->val.str,eventval) == 0) + return 2; + } + } + break; + } + default: + break; + } + return -1; } /* @@ -453,43 +477,43 @@ checkWhereClause(ColumnRef* attribute, RangeVar* recommender, char* userkey, cha * CURRENTLY NOT IN USE. */ /*static void -modifyAExpr(Node *currentExpr, char* recname, char* viewname) { + modifyAExpr(Node *currentExpr, char* recname, char* viewname) { A_Expr *currentAExpr; - + if (!currentExpr) - return; - + return; + currentAExpr = (A_Expr*) currentExpr; // If our expression is an AND, recurse. if (currentAExpr->kind == AEXPR_AND) { - modifyAExpr(currentAExpr->lexpr,recname,viewname); - modifyAExpr(currentAExpr->rexpr,recname,viewname); + modifyAExpr(currentAExpr->lexpr,recname,viewname); + modifyAExpr(currentAExpr->rexpr,recname,viewname); } // If our expression is an =, then do the actual check. if (currentAExpr->kind == AEXPR_OP) { - // We need to check the left argument to see if it matches our recommender. - - // Left should be a recommender attribute. - if (nodeTag(currentAExpr->lexpr) == T_ColumnRef) - modifyColumnRef((ColumnRef*)currentAExpr->lexpr, recname, viewname); + // We need to check the left argument to see if it matches our recommender. + + // Left should be a recommender attribute. + if (nodeTag(currentAExpr->lexpr) == T_ColumnRef) + modifyColumnRef((ColumnRef*)currentAExpr->lexpr, recname, viewname); } -}*/ + }*/ -/* +/* * modifyTargetList - * Scan through the target list and replace one table name for another. * CURRENTLY NOT IN USE. */ /*static void -modifyTargetList(List *target_list, char *recname, char *viewname) { + modifyTargetList(List *target_list, char *recname, char *viewname) { ListCell *select_cell; - + foreach(select_cell,target_list) { - ResTarget* select_target = (ResTarget*) lfirst(select_cell); - ColumnRef* target_val = (ColumnRef*) select_target->val; - modifyColumnRef(target_val, recname, viewname); + ResTarget* select_target = (ResTarget*) lfirst(select_cell); + ColumnRef* target_val = (ColumnRef*) select_target->val; + modifyColumnRef(target_val, recname, viewname); } -}*/ + }*/ /* * modifyColumnRef - @@ -497,28 +521,28 @@ modifyTargetList(List *target_list, char *recname, char *viewname) { * CURRENTLY NOT IN USE. */ /*static void -modifyColumnRef(ColumnRef *attribute, char *recname, char *viewname) { + modifyColumnRef(ColumnRef *attribute, char *recname, char *viewname) { List* attFields = attribute->fields; ListCell* att_cell; // What we do depends on the length of this list. switch (attFields->length) { - case 2: - { - // If our target has two elements, then the first needs to - // refer to our recommender. If it's an alias, we won't need - // to change anything; if it's a direct reference to the name, - // we need to modify it. - att_cell = attFields->head; - if (nodeTag(lfirst(att_cell)) == T_String) { - Value* att_string = (Value*) lfirst(att_cell); - if (strcmp(att_string->val.str,recname) == 0) - att_string->val.str = viewname; - } - } - default: - break; + case 2: + { + // If our target has two elements, then the first needs to + // refer to our recommender. If it's an alias, we won't need + // to change anything; if it's a direct reference to the name, + // we need to modify it. + att_cell = attFields->head; + if (nodeTag(lfirst(att_cell)) == T_String) { + Value* att_string = (Value*) lfirst(att_cell); + if (strcmp(att_string->val.str,recname) == 0) + att_string->val.str = viewname; + } + } + default: + break; } -}*/ + }*/ /* * modifyFrom - @@ -529,127 +553,128 @@ modifyColumnRef(ColumnRef *attribute, char *recname, char *viewname) { */ static void modifyFrom(SelectStmt *stmt, RecommendInfo *recInfo) { - int i; -// char *eventtable; - char *query_string, *recindexname; - char *recmodelname, *recmodelname2, *recviewname; - recMethod method; - // Query information. - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - method = (recMethod) recInfo->attributes->method; - // We'll take a look to see if a recommender was already built - // on this table and method. - recindexname = retrieveRecommender(recInfo->attributes->eventtable,recInfo->strmethod); - - // If no recommender turned up, we'll just return right away. - // We'll utilize the events table for our event generation. Though - // we should note if this is a join table. - if (!recindexname) { - if (recInfo->opType == OP_JOIN) { - recInfo->opType = OP_GENERATEJOIN; - recInfo->attributes->opType = OP_GENERATEJOIN; - } - return; - } - - // We're using a different method from the default, which is - // OP_GENERATE. If we changed it to OP_JOIN, though, we'll - // leave it. - if (recInfo->opType == OP_GENERATE) { - recInfo->opType = OP_FILTER; - recInfo->attributes->opType = OP_FILTER; - } - - // If a recommender did turn up, then we need to track down the - // RecView and replace our event table with it. We'll also store - // the model table(s) for later use. - query_string = (char*) palloc(1024*sizeof(char)); - if (method == SVD) - sprintf(query_string,"select r.recusermodelname,r.recitemmodelname,r.recviewname from %s r;", - recindexname); - else - sprintf(query_string,"select r.recmodelname,r.recviewname from %s r;", - recindexname); - - // Now we prep the query. - queryDesc = recathon_queryStart(query_string, &recathoncontext); - planstate = queryDesc->planstate; - - // Now that we have a correct planstate, we can actually get information - // from the table. - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) { - ereport(WARNING, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("recommender is built, but model could not be accessed. Building recommendation on the fly"))); - return; - } - - // Now we grab the appropriate information from the query tuple. - if (method == SVD) { - recmodelname = getTupleString(slot,"recusermodelname"); - recmodelname2 = getTupleString(slot,"recitemmodelname"); - } else { - recmodelname = getTupleString(slot,"recmodelname"); - recmodelname2 = NULL; - } - recviewname = getTupleString(slot,"recviewname"); - - // Now to tidy up. - recathon_queryEnd(queryDesc, recathoncontext); - pfree(query_string); - - // If we get to this point and there's no recmodelname, our query turned - // up no results, and something weird has gone wrong. I can't imagine a - // scenario where this is even possible, since the attributes we're trying - // to obtain are required to not be null, but it can't hurt. - if (recmodelname == NULL || recviewname == NULL) { - ereport(WARNING, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("recommender is built, but data could not be accessed. Building recommendation on the fly"))); - return; - } - - // Convert to lowercase. - for (i = 0; i < strlen(recindexname); i++) - recindexname[i] = tolower(recindexname[i]); - for (i = 0; i < strlen(recmodelname); i++) - recmodelname[i] = tolower(recmodelname[i]); - for (i = 0; i < strlen(recviewname); i++) - recviewname[i] = tolower(recviewname[i]); - if (recmodelname2) { - for (i = 0; i < strlen(recmodelname2); i++) - recmodelname2[i] = tolower(recmodelname2[i]); - } - - // Store the info, so we can use it later for the query. - recInfo->attributes->recIndexName = recindexname; - recInfo->attributes->recModelName = recmodelname; - recInfo->attributes->recModelName2 = recmodelname2; - recInfo->attributes->recViewName = recviewname; -// recInfo->attributes->recViewName = recInfo->attributes->eventtable; - - // When we do find the match, we need to replace our event table from the FROM clause - // with the recviewname we found. We also need to modify everything in the WHERE and - // RECOMMEND clauses if necessary, as well as the target list. At one point, we would - // try to add a condition to the WHERE clause to restrict the user ID, but since we're - // utilizing a custom scan operator, it's easier to do it manually at that stage. -// eventtable = recInfo->attributes->eventtable; -// modifyAExpr(stmt->whereClause,eventtable,recviewname); -// modifyTargetList(stmt->targetList,eventtable,recviewname); -// recInfo->recommender->relname = recviewname; - - // We need to store a pointer to this RecommendInfo struct in the RangeVar - // itself, because we'll be passing it on to future structures and eventually - // to the plan tree. Once we do this, we're done storing it in the recInfo, - // so we'll remove it to avoid the circular linking. That would be disastrous - // if copyObject were ever invoked. - recInfo->recommender->recommender = (Node*) recInfo; - recInfo->recommender = NULL; + int i; + // char *eventtable; + char *query_string, *recindexname; + char *recmodelname, *recmodelname2, *recviewname; + recMethod method; + // Query information. + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + method = (recMethod) recInfo->attributes->method; + // We'll take a look to see if a recommender was already built + // on this table and method. + recindexname = retrieveRecommender(recInfo->attributes->eventtable,recInfo->strmethod); + + // If no recommender turned up, we'll just return right away. + // We'll utilize the events table for our event generation. Though + // we should note if this is a join table. + if (!recindexname) { + if (recInfo->opType == OP_JOIN) { + recInfo->opType = OP_GENERATEJOIN; + recInfo->attributes->opType = OP_GENERATEJOIN; + } + return; + } + + // We're using a different method from the default, which is + // OP_GENERATE. If we changed it to OP_JOIN, though, we'll + // leave it. + if (recInfo->opType == OP_GENERATE) { + recInfo->opType = OP_FILTER; + recInfo->attributes->opType = OP_FILTER; + } + + // If a recommender did turn up, then we need to track down the + // RecView and replace our event table with it. We'll also store + // the model table(s) for later use. + query_string = (char*) palloc(1024*sizeof(char)); + if (method == SVD) + sprintf(query_string,"select r.recusermodelname,r.recitemmodelname,r.recviewname from %s r;", + recindexname); + else + sprintf(query_string,"select r.recmodelname,r.recviewname from %s r;", + recindexname); + + // Now we prep the query. + queryDesc = recathon_queryStart(query_string, &recathoncontext); + planstate = queryDesc->planstate; + + // Now that we have a correct planstate, we can actually get information + // from the table. + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) { + ereport(WARNING, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("recommender is built, but model could not be accessed. Building recommendation on the fly"))); + return; + } + + // Now we grab the appropriate information from the query tuple. + if (method == SVD) { + recmodelname = getTupleString(slot,"recusermodelname"); + recmodelname2 = getTupleString(slot,"recitemmodelname"); + } else { + recmodelname = getTupleString(slot,"recmodelname"); + recmodelname2 = NULL; + } + recviewname = getTupleString(slot,"recviewname"); + + // Now to tidy up. + recathon_queryEnd(queryDesc, recathoncontext); + pfree(query_string); + + // If we get to this point and there's no recmodelname, our query turned + // up no results, and something weird has gone wrong. I can't imagine a + // scenario where this is even possible, since the attributes we're trying + // to obtain are required to not be null, but it can't hurt. + if (recmodelname == NULL || recviewname == NULL) { + ereport(WARNING, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("recommender is built, but data could not be accessed. Building recommendation on the fly"))); + return; + } + + // Convert to lowercase. + for (i = 0; i < strlen(recindexname); i++) + recindexname[i] = tolower(recindexname[i]); + for (i = 0; i < strlen(recmodelname); i++) + recmodelname[i] = tolower(recmodelname[i]); + for (i = 0; i < strlen(recviewname); i++) + recviewname[i] = tolower(recviewname[i]); + if (recmodelname2) { + for (i = 0; i < strlen(recmodelname2); i++) + recmodelname2[i] = tolower(recmodelname2[i]); + } + + // Store the info, so we can use it later for the query. + recInfo->attributes->recIndexName = recindexname; + recInfo->attributes->recModelName = recmodelname; + recInfo->attributes->recModelName2 = recmodelname2; + recInfo->attributes->recViewName = recviewname; + // recInfo->attributes->recViewName = recInfo->attributes->eventtable; + + // When we do find the match, we need to replace our event table from the FROM clause + // with the recviewname we found. We also need to modify everything in the WHERE and + // RECOMMEND clauses if necessary, as well as the target list. At one point, we would + // try to add a condition to the WHERE clause to restrict the user ID, but since we're + // utilizing a custom scan operator, it's easier to do it manually at that stage. + // eventtable = recInfo->attributes->eventtable; + // modifyAExpr(stmt->whereClause,eventtable,recviewname); + // modifyTargetList(stmt->targetList,eventtable,recviewname); + // recInfo->recommender->relname = recviewname; + + // We need to store a pointer to this RecommendInfo struct in the RangeVar + // itself, because we'll be passing it on to future structures and eventually + // to the plan tree. Once we do this, we're done storing it in the recInfo, + // so we'll remove it to avoid the circular linking. That would be disastrous + // if copyObject were ever invoked. + + //recInfo->recommender->recommender = (Node*) recInfo; + //recInfo->recommender = NULL; } /* @@ -661,8 +686,8 @@ modifyFrom(SelectStmt *stmt, RecommendInfo *recInfo) { */ static void filterfirst(Node *whereExpr, RecommendInfo *recInfo) { - if (filterfirstrecurse(whereExpr, recInfo)) - recInfo->attributes->noFilter = true; + if (filterfirstrecurse(whereExpr, recInfo)) + recInfo->attributes->noFilter = true; } /* @@ -674,32 +699,32 @@ filterfirst(Node *whereExpr, RecommendInfo *recInfo) { */ static bool filterfirstrecurse(Node *whereExpr, RecommendInfo *recInfo) { - if (!whereExpr) - return false; - - // If we've found a ColumnRef, then we check it to see if it - // matches RecScore for this (or all) recommenders. - if (nodeTag(whereExpr) == T_ColumnRef) { - int retvalue = checkWhereClause((ColumnRef*)whereExpr, recInfo->recommender, recInfo->attributes->userkey, recInfo->attributes->eventval); - - // 2 corresponds to "RecScore". - if (retvalue == 2) - return true; - else - return false; - } - - // If we've found an A_Expr, recurse on the left and right. We only - // need to find the RecScore in one place, so we "or" them together. - if (nodeTag(whereExpr) == T_A_Expr) { - A_Expr *whereAExpr = (A_Expr*) whereExpr; - - return filterfirstrecurse(whereAExpr->lexpr,recInfo) || - filterfirstrecurse(whereAExpr->rexpr,recInfo); - } - - // All other types fail, for now. - return false; + if (!whereExpr) + return false; + + // If we've found a ColumnRef, then we check it to see if it + // matches RecScore for this (or all) recommenders. + if (nodeTag(whereExpr) == T_ColumnRef) { + int retvalue = checkWhereClause((ColumnRef*)whereExpr, recInfo->recommender, recInfo->attributes->userkey, recInfo->attributes->eventval); + + // 2 corresponds to "RecScore". + if (retvalue == 2) + return true; + else + return false; + } + + // If we've found an A_Expr, recurse on the left and right. We only + // need to find the RecScore in one place, so we "or" them together. + if (nodeTag(whereExpr) == T_A_Expr) { + A_Expr *whereAExpr = (A_Expr*) whereExpr; + + return filterfirstrecurse(whereAExpr->lexpr,recInfo) || + filterfirstrecurse(whereAExpr->rexpr,recInfo); + } + + // All other types fail, for now. + return false; } /* @@ -708,27 +733,27 @@ filterfirstrecurse(Node *whereExpr, RecommendInfo *recInfo) { * CURRENTLY NOT IN USE. */ /*static void -applyRecJoin(Node *whereClause, List *fromClause, RecommendInfo *recInfo) { + applyRecJoin(Node *whereClause, List *fromClause, RecommendInfo *recInfo) { RangeVar *partnerTable; AttributeInfo *attributes = recInfo->attributes; RecommendInfo* partnerInfo; - + // It's only possible if we have multiple tables. if (list_length(fromClause) < 2) - return; - + return; + // Start by seeing if there's a match with the item key. partnerTable = locateJoinTable(whereClause, fromClause, - recInfo->recommender, attributes->itemkey); + recInfo->recommender, attributes->itemkey); // Then go to the user key. if (!partnerTable) - partnerTable = locateJoinTable(whereClause, fromClause, - recInfo->recommender, attributes->userkey); - + partnerTable = locateJoinTable(whereClause, fromClause, + recInfo->recommender, attributes->userkey); + // If we found no such table, give up. if (!partnerTable) - return; - + return; + // Otherwise, we found an appropriate table. Make a note. recInfo->opType = OP_JOIN; attributes->opType = OP_JOIN; @@ -736,7 +761,7 @@ applyRecJoin(Node *whereClause, List *fromClause, RecommendInfo *recInfo) { partnerInfo = makeNode(RecommendInfo); partnerInfo->opType = OP_JOINPARTNER; partnerTable->recommender = partnerInfo; -}*/ + }*/ /* @@ -747,106 +772,106 @@ applyRecJoin(Node *whereClause, List *fromClause, RecommendInfo *recInfo) { * CURRENTLY NOT IN USE. */ /*static RangeVar* -locateJoinTable(Node* recExpr, List *fromClause, RangeVar* eventtable, char* key) { + locateJoinTable(Node* recExpr, List *fromClause, RangeVar* eventtable, char* key) { A_Expr *recAExpr; - + if (!recExpr) - return NULL; - + return NULL; + // Turns out this isn't necessarily an A_Expr. if (nodeTag(recExpr) != T_A_Expr) - return NULL; - + return NULL; + recAExpr = (A_Expr*) recExpr; - + // If our expression is an =, then do the actual check. if (recAExpr->kind == AEXPR_OP) { - Value *opVal; - char *opType; - - // It is possible to have this odd error under some circumstances. - if (recAExpr->name->length == 0) - return NULL; - - opVal = (Value*) lfirst(recAExpr->name->head); - opType = opVal->val.str; - - if (strcmp(opType,"=") == 0) { - // We need to check the left and right arguments. - char *leftcol, *lefttable; - char *rightcol, *righttable; - - // Left should be a ColumnRef. If it is, extract the info. - // If we're dealing with the key in question, continue. - if (nodeTag(recAExpr->lexpr) == T_ColumnRef) { - ColumnRef *leftcr = (ColumnRef*) recAExpr->lexpr; - - leftcol = getTableRef(leftcr,&lefttable); - if (strcmp(leftcol,key) != 0) - return NULL; - } else - return NULL; - - // Right should be a ColumnRef too. If it is, extract the info. - // If we're dealing with the key in question, continue. - if (nodeTag(recAExpr->rexpr) == T_ColumnRef) { - ColumnRef *rightcr = (ColumnRef*) recAExpr->rexpr; - - rightcol = getTableRef(rightcr,&righttable); - if (strcmp(rightcol,key) != 0) - return NULL; - } else - return NULL; - - // If either table reference is null, just return NULL. That's - // not a valid WHERE clause anyway. - if (!lefttable || !righttable) - return NULL; - - // So we're dealing with two tables equating the key column. - // If one of the tables matches our ratings table, we will - // return the other one. - if (tableMatch(eventtable,lefttable)) { - ListCell *from_cell; - foreach(from_cell,fromClause) { - Node *from_node = lfirst(from_cell); - if (nodeTag(from_node) == T_RangeVar) { - RangeVar *fromVar; - - fromVar = (RangeVar*) from_node; - if (tableMatch(fromVar,righttable)) - return fromVar; - } - } - } - else if (tableMatch(eventtable,righttable)) { - ListCell *from_cell; - foreach(from_cell,fromClause) { - Node *from_node = lfirst(from_cell); - if (nodeTag(from_node) == T_RangeVar) { - RangeVar *fromVar; - - fromVar = (RangeVar*) from_node; - if (tableMatch(fromVar,lefttable)) - return fromVar; - } - } - } - } + Value *opVal; + char *opType; + + // It is possible to have this odd error under some circumstances. + if (recAExpr->name->length == 0) + return NULL; + + opVal = (Value*) lfirst(recAExpr->name->head); + opType = opVal->val.str; + + if (strcmp(opType,"=") == 0) { + // We need to check the left and right arguments. + char *leftcol, *lefttable; + char *rightcol, *righttable; + + // Left should be a ColumnRef. If it is, extract the info. + // If we're dealing with the key in question, continue. + if (nodeTag(recAExpr->lexpr) == T_ColumnRef) { + ColumnRef *leftcr = (ColumnRef*) recAExpr->lexpr; + + leftcol = getTableRef(leftcr,&lefttable); + if (strcmp(leftcol,key) != 0) + return NULL; + } else + return NULL; + + // Right should be a ColumnRef too. If it is, extract the info. + // If we're dealing with the key in question, continue. + if (nodeTag(recAExpr->rexpr) == T_ColumnRef) { + ColumnRef *rightcr = (ColumnRef*) recAExpr->rexpr; + + rightcol = getTableRef(rightcr,&righttable); + if (strcmp(rightcol,key) != 0) + return NULL; + } else + return NULL; + + // If either table reference is null, just return NULL. That's + // not a valid WHERE clause anyway. + if (!lefttable || !righttable) + return NULL; + + // So we're dealing with two tables equating the key column. + // If one of the tables matches our ratings table, we will + // return the other one. + if (tableMatch(eventtable,lefttable)) { + ListCell *from_cell; + foreach(from_cell,fromClause) { + Node *from_node = lfirst(from_cell); + if (nodeTag(from_node) == T_RangeVar) { + RangeVar *fromVar; + + fromVar = (RangeVar*) from_node; + if (tableMatch(fromVar,righttable)) + return fromVar; + } + } + } + else if (tableMatch(eventtable,righttable)) { + ListCell *from_cell; + foreach(from_cell,fromClause) { + Node *from_node = lfirst(from_cell); + if (nodeTag(from_node) == T_RangeVar) { + RangeVar *fromVar; + + fromVar = (RangeVar*) from_node; + if (tableMatch(fromVar,lefttable)) + return fromVar; + } + } + } + } } // If we didn't find what we're looking for, recurse. else { - RangeVar *rtnvar; - - rtnvar = locateJoinTable(recAExpr->lexpr,fromClause,eventtable,key); - if (rtnvar) return rtnvar; - rtnvar = locateJoinTable(recAExpr->rexpr,fromClause,eventtable,key); - if (rtnvar) return rtnvar; + RangeVar *rtnvar; + + rtnvar = locateJoinTable(recAExpr->lexpr,fromClause,eventtable,key); + if (rtnvar) return rtnvar; + rtnvar = locateJoinTable(recAExpr->rexpr,fromClause,eventtable,key); + if (rtnvar) return rtnvar; } - + // All other kinds fail, at least for now. return NULL; -}*/ + }*/ /* * tableMatch - @@ -855,17 +880,17 @@ locateJoinTable(Node* recExpr, List *fromClause, RangeVar* eventtable, char* key */ static bool tableMatch(RangeVar* table, char* tablename) { - if (table->alias) { - if (strcmp(table->alias->aliasname,tablename) == 0) { - return true; - } - } else { - if (strcmp(table->relname,tablename) == 0) { - return true; - } - } - - return false; + if (table->alias) { + if (strcmp(table->alias->aliasname,tablename) == 0) { + return true; + } + } else { + if (strcmp(table->relname,tablename) == 0) { + return true; + } + } + + return false; } /* @@ -875,20 +900,20 @@ tableMatch(RangeVar* table, char* tablename) { */ static Node* makeTrueConst() { - A_Const *n; - TypeCast *tc; - - n = makeNode(A_Const); - n->val.type = T_String; - n->val.val.str = "t"; - n->location = -1; - - tc = makeNode(TypeCast); - tc->arg = (Node *) n; - tc->typeName = SystemTypeName("bool"); - tc->location = -1; - - return (Node *) tc; + A_Const *n; + TypeCast *tc; + + n = makeNode(A_Const); + n->val.type = T_String; + n->val.val.str = "t"; + n->location = -1; + + tc = makeNode(TypeCast); + tc->arg = (Node *) n; + tc->typeName = SystemTypeName("bool"); + tc->location = -1; + + return (Node *) tc; } /* @@ -900,176 +925,176 @@ makeTrueConst() { */ static int userWhereOp(Node* whereClause, char *userkey) { - A_Expr *recAExpr; - int leftresult = 0, rightresult = 0; - - if (!whereClause) - return 0; - - recAExpr = (A_Expr*) whereClause; - - // If our expression is an OP or IN, then do the actual check. - if (recAExpr->kind == AEXPR_OP || recAExpr->kind == AEXPR_IN) { - char *leftcol, *lefttable; - char *rightcol, *righttable; - bool leftiscol = false, rightiscol = false, userfound = false; - bool leftaexpr = false, rightaexpr = false; - - // It is possible to have this odd error under some circumstances. - if (recAExpr->name->length == 0) - return 0; - - // If the left column is our user key column, that's a good sign. - // Let's just hope we're not joining with some other column. - if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_ColumnRef) { - ColumnRef *leftcr = (ColumnRef*) recAExpr->lexpr; - - leftiscol = true; - leftcol = getTableRef(leftcr,&lefttable); - if (strcmp(leftcol,userkey) == 0) - userfound = true; - } - - // If the right column is our user key column, return right away. - if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_ColumnRef) { - ColumnRef *rightcr = (ColumnRef*) recAExpr->rexpr; - - rightiscol = true; - rightcol = getTableRef(rightcr,&righttable); - if (strcmp(rightcol,userkey) == 0) - userfound = true; - } - - // If this OP/IN doesn't involve the user key at all, then we'll - // replace it with a TRUE boolean constant. Likewise, if we're - // equating it with some other column, that's not useful to us, - // so we'll again replace it with a TRUE constant. - if (leftiscol && rightiscol) - return -1; - else if (!leftiscol && !rightiscol) - return 0; - - // If this OP has more A_Exprs under it, we need to recurse and - // see what's in them. - if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_A_Expr) { - leftaexpr = true; - leftresult = userWhereOp(recAExpr->lexpr,userkey); - } - if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_A_Expr) { - rightaexpr = true; - rightresult = userWhereOp(recAExpr->rexpr, userkey); - } - - // Any AExprs? - if (leftaexpr || rightaexpr) { - // If we found another column anywhere, the whole thing is - // useless. - if (leftresult < 0 || rightresult < 0) - return -1; - - // If we didn't find the user column, it's similarly useless. - if (leftresult == 0 && rightresult == 0) - return 0; - - // Otherwise, if we found the user key, we can use this item. - if (userfound) { - return 1; - } - else - return 0; - } - } - // If our expression is an OP or IN, then do the actual check. - if (recAExpr->kind == AEXPR_OP || recAExpr->kind == AEXPR_IN) { - char *leftcol, *lefttable; - char *rightcol, *righttable; - bool leftiscol = false, rightiscol = false, userfound = false; - bool leftaexpr = false, rightaexpr = false; - - // It is possible to have this odd error under some circumstances. - if (recAExpr->name->length == 0) - return 0; - - // If this OP has more A_Exprs under it, we need to recurse and - // see what's in them. - if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_A_Expr) { - leftaexpr = true; - leftresult = userWhereOp(recAExpr->lexpr,userkey); - } - if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_A_Expr) { - rightaexpr = true; - rightresult = userWhereOp(recAExpr->rexpr, userkey); - } - - // Any AExprs? - if (leftaexpr || rightaexpr) { - // If we found another column anywhere, the whole thing is - // useless. - if (leftresult < 0 || rightresult < 0) - return -1; - - // If we found the user column, though, make a note. - if (leftresult == 1 || rightresult == 1) - userfound = true; - } - - // If at least one isn't an A_Expr, then we check to see if either - // is a ColumnRef. - - // If the left column is our user key column, that's a good sign. - // Let's just hope we're not joining with some other column. - if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_ColumnRef) { - ColumnRef *leftcr = (ColumnRef*) recAExpr->lexpr; - - leftiscol = true; - leftcol = getTableRef(leftcr,&lefttable); - - if (strcmp(leftcol,userkey) == 0) - userfound = true; - else - return -1; - } - - // If the right column is our user key column, return right away. - if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_ColumnRef) { - ColumnRef *rightcr = (ColumnRef*) recAExpr->rexpr; - - rightiscol = true; - rightcol = getTableRef(rightcr,&righttable); - - if (strcmp(rightcol,userkey) == 0) - userfound = true; - else - return -1; - } - - // If both items are columns, that can't possibly be good. - if (leftiscol && rightiscol) - return 0; - - // Otherwise, if we found the user key, we can use this item. - if (userfound) - return 1; - else - return 0; - } - // Recurse in a similar manner if this is an AND/OR/NOT. - else if (recAExpr->kind == AEXPR_AND || recAExpr->kind == AEXPR_OR) { - leftresult = userWhereOp(recAExpr->lexpr,userkey); - rightresult = userWhereOp(recAExpr->rexpr,userkey); - - if (leftresult < 0 || rightresult < 0) - return -1; - else if (leftresult == 0 && rightresult == 0) - return 0; - else - return 1; - } else if (recAExpr->kind == AEXPR_NOT) { - return userWhereOp(recAExpr->rexpr,userkey); - } - - // Return 0 by default. - return 0; + A_Expr *recAExpr; + int leftresult = 0, rightresult = 0; + + if (!whereClause) + return 0; + + recAExpr = (A_Expr*) whereClause; + + // If our expression is an OP or IN, then do the actual check. + if (recAExpr->kind == AEXPR_OP || recAExpr->kind == AEXPR_IN) { + char *leftcol, *lefttable; + char *rightcol, *righttable; + bool leftiscol = false, rightiscol = false, userfound = false; + bool leftaexpr = false, rightaexpr = false; + + // It is possible to have this odd error under some circumstances. + if (recAExpr->name->length == 0) + return 0; + + // If the left column is our user key column, that's a good sign. + // Let's just hope we're not joining with some other column. + if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_ColumnRef) { + ColumnRef *leftcr = (ColumnRef*) recAExpr->lexpr; + + leftiscol = true; + leftcol = getTableRef(leftcr,&lefttable); + if (strcmp(leftcol,userkey) == 0) + userfound = true; + } + + // If the right column is our user key column, return right away. + if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_ColumnRef) { + ColumnRef *rightcr = (ColumnRef*) recAExpr->rexpr; + + rightiscol = true; + rightcol = getTableRef(rightcr,&righttable); + if (strcmp(rightcol,userkey) == 0) + userfound = true; + } + + // If this OP/IN doesn't involve the user key at all, then we'll + // replace it with a TRUE boolean constant. Likewise, if we're + // equating it with some other column, that's not useful to us, + // so we'll again replace it with a TRUE constant. + if (leftiscol && rightiscol) + return -1; + else if (!leftiscol && !rightiscol) + return 0; + + // If this OP has more A_Exprs under it, we need to recurse and + // see what's in them. + if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_A_Expr) { + leftaexpr = true; + leftresult = userWhereOp(recAExpr->lexpr,userkey); + } + if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_A_Expr) { + rightaexpr = true; + rightresult = userWhereOp(recAExpr->rexpr, userkey); + } + + // Any AExprs? + if (leftaexpr || rightaexpr) { + // If we found another column anywhere, the whole thing is + // useless. + if (leftresult < 0 || rightresult < 0) + return -1; + + // If we didn't find the user column, it's similarly useless. + if (leftresult == 0 && rightresult == 0) + return 0; + + // Otherwise, if we found the user key, we can use this item. + if (userfound) { + return 1; + } + else + return 0; + } + } + // If our expression is an OP or IN, then do the actual check. + if (recAExpr->kind == AEXPR_OP || recAExpr->kind == AEXPR_IN) { + char *leftcol, *lefttable; + char *rightcol, *righttable; + bool leftiscol = false, rightiscol = false, userfound = false; + bool leftaexpr = false, rightaexpr = false; + + // It is possible to have this odd error under some circumstances. + if (recAExpr->name->length == 0) + return 0; + + // If this OP has more A_Exprs under it, we need to recurse and + // see what's in them. + if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_A_Expr) { + leftaexpr = true; + leftresult = userWhereOp(recAExpr->lexpr,userkey); + } + if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_A_Expr) { + rightaexpr = true; + rightresult = userWhereOp(recAExpr->rexpr, userkey); + } + + // Any AExprs? + if (leftaexpr || rightaexpr) { + // If we found another column anywhere, the whole thing is + // useless. + if (leftresult < 0 || rightresult < 0) + return -1; + + // If we found the user column, though, make a note. + if (leftresult == 1 || rightresult == 1) + userfound = true; + } + + // If at least one isn't an A_Expr, then we check to see if either + // is a ColumnRef. + + // If the left column is our user key column, that's a good sign. + // Let's just hope we're not joining with some other column. + if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_ColumnRef) { + ColumnRef *leftcr = (ColumnRef*) recAExpr->lexpr; + + leftiscol = true; + leftcol = getTableRef(leftcr,&lefttable); + + if (strcmp(leftcol,userkey) == 0) + userfound = true; + else + return -1; + } + + // If the right column is our user key column, return right away. + if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_ColumnRef) { + ColumnRef *rightcr = (ColumnRef*) recAExpr->rexpr; + + rightiscol = true; + rightcol = getTableRef(rightcr,&righttable); + + if (strcmp(rightcol,userkey) == 0) + userfound = true; + else + return -1; + } + + // If both items are columns, that can't possibly be good. + if (leftiscol && rightiscol) + return 0; + + // Otherwise, if we found the user key, we can use this item. + if (userfound) + return 1; + else + return 0; + } + // Recurse in a similar manner if this is an AND/OR/NOT. + else if (recAExpr->kind == AEXPR_AND || recAExpr->kind == AEXPR_OR) { + leftresult = userWhereOp(recAExpr->lexpr,userkey); + rightresult = userWhereOp(recAExpr->rexpr,userkey); + + if (leftresult < 0 || rightresult < 0) + return -1; + else if (leftresult == 0 && rightresult == 0) + return 0; + else + return 1; + } else if (recAExpr->kind == AEXPR_NOT) { + return userWhereOp(recAExpr->rexpr,userkey); + } + + // Return 0 by default. + return 0; } /* @@ -1083,102 +1108,103 @@ userWhereOp(Node* whereClause, char *userkey) { */ static Node* userWhereClause(Node* whereClause, char *userkey) { - A_Expr *recAExpr; - - if (!whereClause) - return NULL; - - // Turns out this isn't necessarily an A_Expr. - if (nodeTag(whereClause) != T_A_Expr) - return NULL; - - recAExpr = (A_Expr*) whereClause; - - // If our expression is an OP or IN, then do the actual check. - if (recAExpr->kind == AEXPR_OP || recAExpr->kind == AEXPR_IN) { - char *leftcol, *lefttable; - char *rightcol, *righttable; - bool leftiscol = false, rightiscol = false, userfound = false; - bool leftaexpr = false, rightaexpr = false; - int leftresult = 0, rightresult = 0; - - // It is possible to have this odd error under some circumstances. - if (recAExpr->name->length == 0) - return NULL; - - // If this OP has more A_Exprs under it, we need to recurse and - // see what's in them. - if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_A_Expr) { - leftaexpr = true; - leftresult = userWhereOp(recAExpr->lexpr,userkey); - } - if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_A_Expr) { - rightaexpr = true; - rightresult = userWhereOp(recAExpr->rexpr, userkey); - } - - // Any AExprs? - if (leftaexpr || rightaexpr) { - // If we found another column anywhere, the whole thing is - // useless. - if (leftresult < 0 || rightresult < 0) - return makeTrueConst(); - - // If we found the user column, though, make a note. - if (leftresult == 1 || rightresult == 1) - userfound = true; - } - - // If at least one isn't an A_Expr, then we check to see if either - // is a ColumnRef. - - // If the left column is our user key column, that's a good sign. - // Let's just hope we're not joining with some other column. - if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_ColumnRef) { - ColumnRef *leftcr = (ColumnRef*) recAExpr->lexpr; - - leftiscol = true; - leftcol = getTableRef(leftcr,&lefttable); - - if (strcmp(leftcol,userkey) == 0) - userfound = true; - else - return makeTrueConst(); - } - - // If the right column is our user key column, return right away. - if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_ColumnRef) { - ColumnRef *rightcr = (ColumnRef*) recAExpr->rexpr; - - rightiscol = true; - rightcol = getTableRef(rightcr,&righttable); - - if (strcmp(rightcol,userkey) == 0) - userfound = true; - else - return makeTrueConst(); - } - - // If both items are columns, that can't possibly be good. - if (leftiscol && rightiscol) - return makeTrueConst(); - - // Otherwise, if we found the user key, we can use this item. - if (userfound) - return (Node *) recAExpr; - else - return makeTrueConst(); - } - // Recurse if this is an AND/OR/NOT. - else if (recAExpr->kind == AEXPR_AND || recAExpr->kind == AEXPR_OR) { - recAExpr->lexpr = userWhereClause(recAExpr->lexpr,userkey); - recAExpr->rexpr = userWhereClause(recAExpr->rexpr,userkey); - } else if (recAExpr->kind == AEXPR_NOT) { - recAExpr->rexpr = userWhereClause(recAExpr->rexpr,userkey); - } - - // Return the expression. - return (Node *) recAExpr; + A_Expr *recAExpr; + + if (!whereClause) + return NULL; + + // Turns out this isn't necessarily an A_Expr. + if (nodeTag(whereClause) != T_A_Expr) + return NULL; + + recAExpr = (A_Expr*) whereClause; + + // If our expression is an OP or IN, then do the actual check. + if (recAExpr->kind == AEXPR_OP || recAExpr->kind == AEXPR_IN) { + char *leftcol, *lefttable; + char *rightcol, *righttable; + bool leftiscol = false, rightiscol = false, userfound = false; + bool leftaexpr = false, rightaexpr = false; + int leftresult = 0, rightresult = 0; + + // It is possible to have this odd error under some circumstances. + if (recAExpr->name->length == 0) + return NULL; + + // If this OP has more A_Exprs under it, we need to recurse and + // see what's in them. + if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_A_Expr) { + leftaexpr = true; + leftresult = userWhereOp(recAExpr->lexpr,userkey); + } + if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_A_Expr) { + rightaexpr = true; + rightresult = userWhereOp(recAExpr->rexpr, userkey); + } + + // Any AExprs? + if (leftaexpr || rightaexpr) { + // If we found another column anywhere, the whole thing is + // useless. + if (leftresult < 0 || rightresult < 0) + return makeTrueConst(); + + // If we found the user column, though, make a note. + if (leftresult == 1 || rightresult == 1) + userfound = true; + } + + // If at least one isn't an A_Expr, then we check to see if either + // is a ColumnRef. + + // If the left column is our user key column, that's a good sign. + // Let's just hope we're not joining with some other column. + if (recAExpr->lexpr && nodeTag(recAExpr->lexpr) == T_ColumnRef) { + ColumnRef *leftcr = (ColumnRef*) recAExpr->lexpr; + + leftiscol = true; + leftcol = getTableRef(leftcr,&lefttable); + + if (strcmp(leftcol,userkey) == 0) + userfound = true; + else + return makeTrueConst(); + } + + // If the right column is our user key column, return right away. + if (recAExpr->rexpr && nodeTag(recAExpr->rexpr) == T_ColumnRef) { + ColumnRef *rightcr = (ColumnRef*) recAExpr->rexpr; + + rightiscol = true; + rightcol = getTableRef(rightcr,&righttable); + + if (strcmp(rightcol,userkey) == 0) + userfound = true; + else + return makeTrueConst(); + } + + // If both items are columns, that can't possibly be good. + if (leftiscol && rightiscol) + return makeTrueConst(); + + // Otherwise, if we found the user key, we can use this item. + if (userfound) + return (Node *) recAExpr; + else + return makeTrueConst(); + } + // Recurse if this is an AND/OR/NOT. + else if (recAExpr->kind == AEXPR_AND || recAExpr->kind == AEXPR_OR) { + recAExpr->lexpr = userWhereClause(recAExpr->lexpr,userkey); + recAExpr->rexpr = userWhereClause(recAExpr->rexpr,userkey); + } else if (recAExpr->kind == AEXPR_NOT) { + recAExpr->rexpr = userWhereClause(recAExpr->rexpr,userkey); + } + + + // Return the expression. + return (Node *) recAExpr; } /* @@ -1187,17 +1213,19 @@ userWhereClause(Node* whereClause, char *userkey) { */ void userWhereTransform(ParseState *pstate, Node* recommendClause) { - RecommendInfo *recInfo; - Node *userWhere; - - if (!recommendClause) - return; - - recInfo = (RecommendInfo*) recommendClause; - userWhere = recInfo->attributes->userWhereClause; - if (userWhere) { - userWhere = transformExpr(pstate, userWhere); - userWhere = coerce_to_boolean(pstate, userWhere, "USER_WHERE"); - } - recInfo->attributes->userWhereClause = userWhere; + RecommendInfo *recInfo; + Node *userWhere; + + if (!recommendClause) + return; + + recInfo = (RecommendInfo*) recommendClause; + userWhere = recInfo->attributes->userWhereClause; + if (userWhere) { + userWhere = transformExpr(pstate, userWhere); + userWhere = coerce_to_boolean(pstate, userWhere, "USER_WHERE"); + } + recInfo->attributes->userWhereClause = userWhere; + //debug + //printf("type of att userwhere: %u\n", userWhere->type); } diff --git a/PostgreSQL/src/backend/utils/misc/recathon.c b/PostgreSQL/src/backend/utils/misc/recathon.c index bfe1abe..3fe56ff 100644 --- a/PostgreSQL/src/backend/utils/misc/recathon.c +++ b/PostgreSQL/src/backend/utils/misc/recathon.c @@ -57,14 +57,14 @@ static float getUpdateThreshold(); */ sim_node createSimNode(int userid, float event) { - sim_node newnode; - - newnode = (sim_node) palloc(sizeof(struct sim_node_t)); - newnode->id = userid; - newnode->event = event; - newnode->next = NULL; - - return newnode; + sim_node newnode; + + newnode = (sim_node) palloc(sizeof(struct sim_node_t)); + newnode->id = userid; + newnode->event = event; + newnode->next = NULL; + + return newnode; } /* ---------------------------------------------------------------- @@ -76,28 +76,28 @@ createSimNode(int userid, float event) { */ sim_node simInsert(sim_node target, sim_node newnode) { - sim_node tempnode; - - // Base case 1: target is empty. - if (!target) return newnode; - - // Base case 2: target belongs at the head of the list. - tempnode = target; - if (newnode->id <= tempnode->id) { - newnode->next = tempnode; - return newnode; - } - - // Normal case. - while (tempnode->next) { - if (newnode->id <= tempnode->next->id) break; - tempnode = tempnode->next; - } - - newnode->next = tempnode->next; - tempnode->next = newnode; - - return target; + sim_node tempnode; + + // Base case 1: target is empty. + if (!target) return newnode; + + // Base case 2: target belongs at the head of the list. + tempnode = target; + if (newnode->id <= tempnode->id) { + newnode->next = tempnode; + return newnode; + } + + // Normal case. + while (tempnode->next) { + if (newnode->id <= tempnode->next->id) break; + tempnode = tempnode->next; + } + + newnode->next = tempnode->next; + tempnode->next = newnode; + + return target; } /* ---------------------------------------------------------------- @@ -108,12 +108,12 @@ simInsert(sim_node target, sim_node newnode) { */ void freeSimList(sim_node head) { - sim_node temp; - while (head) { - temp = head->next; - pfree(head); - head = temp; - } + sim_node temp; + while (head) { + temp = head->next; + pfree(head); + head = temp; + } } /* ---------------------------------------------------------------- @@ -125,15 +125,15 @@ freeSimList(sim_node head) { */ nbr_node createNbrNode(int item1, int item2, float similarity) { - nbr_node newnode; - - newnode = (nbr_node) palloc(sizeof(struct nbr_node_t)); - newnode->item1 = item1; - newnode->item2 = item2; - newnode->similarity = similarity; - newnode->next = NULL; - - return newnode; + nbr_node newnode; + + newnode = (nbr_node) palloc(sizeof(struct nbr_node_t)); + newnode->item1 = item1; + newnode->item2 = item2; + newnode->similarity = similarity; + newnode->next = NULL; + + return newnode; } /* ---------------------------------------------------------------- @@ -145,43 +145,43 @@ createNbrNode(int item1, int item2, float similarity) { */ nbr_node nbrInsert(nbr_node target, nbr_node newnode, int maxsize) { - int i; - nbr_node tempnode; - bool inserted = false; - - // Base case 1: target is empty. - if (!target) return newnode; - - // Base case 2: target belongs at the head of the list. - if (newnode->similarity >= target->similarity) { - newnode->next = target; - target = newnode; - inserted = true; - } - - tempnode = target; - i = 1; - - // Normal case. - while (tempnode->next && i < maxsize) { - if (newnode->similarity >= tempnode->next->similarity) { - if (!inserted) { - newnode->next = tempnode->next; - tempnode->next = newnode; - inserted = true; - } - } - tempnode = tempnode->next; - i++; - } - - // If we've run out of room on our list. - if (tempnode->next && i >= maxsize) { - pfree(tempnode->next); - tempnode->next = NULL; - } - - return target; + int i; + nbr_node tempnode; + bool inserted = false; + + // Base case 1: target is empty. + if (!target) return newnode; + + // Base case 2: target belongs at the head of the list. + if (newnode->similarity >= target->similarity) { + newnode->next = target; + target = newnode; + inserted = true; + } + + tempnode = target; + i = 1; + + // Normal case. + while (tempnode->next && i < maxsize) { + if (newnode->similarity >= tempnode->next->similarity) { + if (!inserted) { + newnode->next = tempnode->next; + tempnode->next = newnode; + inserted = true; + } + } + tempnode = tempnode->next; + i++; + } + + // If we've run out of room on our list. + if (tempnode->next && i >= maxsize) { + pfree(tempnode->next); + tempnode->next = NULL; + } + + return target; } /* ---------------------------------------------------------------- @@ -192,12 +192,12 @@ nbrInsert(nbr_node target, nbr_node newnode, int maxsize) { */ void freeNbrList(nbr_node head) { - nbr_node temp; - while (head) { - temp = head->next; - pfree(head); - head = temp; - } + nbr_node temp; + while (head) { + temp = head->next; + pfree(head); + head = temp; + } } /* ---------------------------------------------------------------- @@ -212,47 +212,47 @@ freeNbrList(nbr_node head) { */ QueryDesc * recathon_queryStart(char *query_string, MemoryContext *recathoncontext) { - List *parsetree_list, *querytree_list, *plantree_list; - Node *parsetree; - QueryDesc *queryDesc; - MemoryContext newcontext, oldcontext; - - // First we'll create a new memory context to operate in. - newcontext = AllocSetContextCreate(CurrentMemoryContext, - "RecathonQuery", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - oldcontext = MemoryContextSwitchTo(newcontext); - - // Now we parse the query and get a parse tree. - parsetree_list = pg_parse_query(query_string); - - // There should be only one item in the parse tree. - parsetree = lfirst(parsetree_list->head); - - // Now we generate plan trees. - querytree_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0); - plantree_list = pg_plan_queries(querytree_list, 0, NULL); - - // Now we need to update the current snapshot. - PushCopiedSnapshot(GetActiveSnapshot()); - UpdateActiveSnapshotCommandId(); - - // We need to do the ExecProcNode stage of the query, which means that we - // need an intact planstate. The following code just creates this state. - queryDesc = CreateQueryDesc((PlannedStmt*) linitial(plantree_list), - query_string, - GetActiveSnapshot(), - InvalidSnapshot, - None_Receiver, NULL, 0); - ExecutorStart(queryDesc, 0); - - // Return the newly created memory context. - MemoryContextSwitchTo(oldcontext); - (*recathoncontext) = newcontext; - - return queryDesc; + List *parsetree_list, *querytree_list, *plantree_list; + Node *parsetree; + QueryDesc *queryDesc; + MemoryContext newcontext, oldcontext; + + // First we'll create a new memory context to operate in. + newcontext = AllocSetContextCreate(CurrentMemoryContext, + "RecathonQuery", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldcontext = MemoryContextSwitchTo(newcontext); + + // Now we parse the query and get a parse tree. + parsetree_list = pg_parse_query(query_string); + + // There should be only one item in the parse tree. + parsetree = lfirst(parsetree_list->head); + + // Now we generate plan trees. + querytree_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0); + plantree_list = pg_plan_queries(querytree_list, 0, NULL); + + // Now we need to update the current snapshot. + PushCopiedSnapshot(GetActiveSnapshot()); + UpdateActiveSnapshotCommandId(); + + // We need to do the ExecProcNode stage of the query, which means that we + // need an intact planstate. The following code just creates this state. + queryDesc = CreateQueryDesc((PlannedStmt*) linitial(plantree_list), + query_string, + GetActiveSnapshot(), + InvalidSnapshot, + None_Receiver, NULL, 0); + ExecutorStart(queryDesc, 0); + + // Return the newly created memory context. + MemoryContextSwitchTo(oldcontext); + (*recathoncontext) = newcontext; + + return queryDesc; } /* ---------------------------------------------------------------- @@ -263,21 +263,21 @@ recathon_queryStart(char *query_string, MemoryContext *recathoncontext) { */ void recathon_queryEnd(QueryDesc *queryDesc, MemoryContext recathoncontext) { - MemoryContext oldcontext; - - oldcontext = MemoryContextSwitchTo(recathoncontext); - - // End the query. - ExecutorFinish(queryDesc); - ExecutorEnd(queryDesc); - FreeQueryDesc(queryDesc); - - // Pop our snapshot. - PopActiveSnapshot(); - - // Delete our memory context. - MemoryContextSwitchTo(oldcontext); - MemoryContextDelete(recathoncontext); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(recathoncontext); + + // End the query. + ExecutorFinish(queryDesc); + ExecutorEnd(queryDesc); + FreeQueryDesc(queryDesc); + + // Pop our snapshot. + PopActiveSnapshot(); + + // Delete our memory context. + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(recathoncontext); } /* ---------------------------------------------------------------- @@ -288,17 +288,17 @@ recathon_queryEnd(QueryDesc *queryDesc, MemoryContext recathoncontext) { */ void recathon_queryExecute(char *query_string) { - QueryDesc *queryDesc; - MemoryContext recathoncontext, oldcontext; - - // We do the query start and end, and sandwich ExecutorRun in the middle. - queryDesc = recathon_queryStart(query_string, &recathoncontext); - - oldcontext = MemoryContextSwitchTo(recathoncontext); - ExecutorRun(queryDesc, ForwardScanDirection, 0); - MemoryContextSwitchTo(oldcontext); - - recathon_queryEnd(queryDesc, recathoncontext); + QueryDesc *queryDesc; + MemoryContext recathoncontext, oldcontext; + + // We do the query start and end, and sandwich ExecutorRun in the middle. + queryDesc = recathon_queryStart(query_string, &recathoncontext); + + oldcontext = MemoryContextSwitchTo(recathoncontext); + ExecutorRun(queryDesc, ForwardScanDirection, 0); + MemoryContextSwitchTo(oldcontext); + + recathon_queryEnd(queryDesc, recathoncontext); } /* ---------------------------------------------------------------- @@ -311,38 +311,38 @@ recathon_queryExecute(char *query_string) { */ void recathon_utilityExecute(char *query_string) { - List *parsetree_list, *querytree_list, *plantree_list; - MemoryContext recathoncontext, oldcontext; - Node *parsetree, *utilStmt; - - // We do this inside another memory context - // so we can rid ourselves of this memory easily. - recathoncontext = AllocSetContextCreate(CurrentMemoryContext, - "RecathonExecute", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - oldcontext = MemoryContextSwitchTo(recathoncontext); - - // Now we parse the query and get a parse tree. - parsetree_list = pg_parse_query(query_string); - - // There should be only one item in the parse tree. - parsetree = lfirst(parsetree_list->head); - - // Now we generate plan trees. - querytree_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0); - plantree_list = pg_plan_queries(querytree_list, 0, NULL); - - // Isolate the statement. - utilStmt = (Node*) lfirst(list_head(plantree_list)); - - // Execute the query. - ProcessUtility(utilStmt, query_string, NULL, true, None_Receiver, NULL); - - // Nothing left to do. - MemoryContextSwitchTo(oldcontext); - MemoryContextDelete(recathoncontext); + List *parsetree_list, *querytree_list, *plantree_list; + MemoryContext recathoncontext, oldcontext; + Node *parsetree, *utilStmt; + + // We do this inside another memory context + // so we can rid ourselves of this memory easily. + recathoncontext = AllocSetContextCreate(CurrentMemoryContext, + "RecathonExecute", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldcontext = MemoryContextSwitchTo(recathoncontext); + + // Now we parse the query and get a parse tree. + parsetree_list = pg_parse_query(query_string); + + // There should be only one item in the parse tree. + parsetree = lfirst(parsetree_list->head); + + // Now we generate plan trees. + querytree_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0); + plantree_list = pg_plan_queries(querytree_list, 0, NULL); + + // Isolate the statement. + utilStmt = (Node*) lfirst(list_head(plantree_list)); + + // Execute the query. + ProcessUtility(utilStmt, query_string, NULL, true, None_Receiver, NULL); + + // Nothing left to do. + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(recathoncontext); } /* ---------------------------------------------------------------- @@ -355,16 +355,16 @@ recathon_utilityExecute(char *query_string) { */ RecScan* make_rec_from_scan(Scan *subscan, Node *recommender) { - RecScan *recscan; - - recscan = (RecScan*) makeNode(RecScan); - recscan->scan.plan = subscan->plan; - recscan->scan.scanrelid = subscan->scanrelid; - recscan->scan.plan.type = T_RecScan; - recscan->subscan = subscan; - recscan->recommender = recommender; - - return recscan; + RecScan *recscan; + + recscan = (RecScan*) makeNode(RecScan); + recscan->scan.plan = subscan->plan; + recscan->scan.scanrelid = subscan->scanrelid; + recscan->scan.plan.type = T_RecScan; + recscan->subscan = subscan; + recscan->recommender = recommender; + + return recscan; } /* ---------------------------------------------------------------- @@ -377,16 +377,16 @@ make_rec_from_scan(Scan *subscan, Node *recommender) { */ RecJoin* make_rec_from_join(Join *subjoin) { - RecJoin *recjoin; - - recjoin = (RecJoin*) makeNode(RecJoin); - recjoin->join.plan = subjoin->plan; - recjoin->join.jointype = subjoin->jointype; - recjoin->join.joinqual = subjoin->joinqual; - recjoin->join.plan.type = T_RecJoin; - recjoin->subjoin = subjoin; - - return recjoin; + RecJoin *recjoin; + + recjoin = (RecJoin*) makeNode(RecJoin); + recjoin->join.plan = subjoin->plan; + recjoin->join.jointype = subjoin->jointype; + recjoin->join.joinqual = subjoin->joinqual; + recjoin->join.plan.type = T_RecJoin; + recjoin->subjoin = subjoin; + + return recjoin; } /* ---------------------------------------------------------------- @@ -398,62 +398,62 @@ make_rec_from_join(Join *subjoin) { */ int count_rows(char *tablename) { - int i, numItems, natts; - // Objects for querying. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // We start with a simple query to get the number of items. - querystring = (char*) palloc(256*sizeof(char)); - sprintf(querystring,"SELECT COUNT(*) FROM %s;",tablename); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) { - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - return -1; - } - slot_getallattrs(slot); - natts = slot->tts_tupleDescriptor->natts; - - // Silence the compiler. - numItems = 0; - - for (i = 0; i < natts; i++) { - if (!slot->tts_isnull[i]) { - Datum slot_result; - unsigned int data_type; - - slot_result = slot->tts_values[i]; - data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; - - switch (data_type) { - case INT8OID: - numItems = (int) DatumGetInt64(slot_result); - break; - case INT2OID: - numItems = (int) DatumGetInt16(slot_result); - break; - case INT4OID: - numItems = (int) DatumGetInt32(slot_result); - break; - default: - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("fatal error in count_rows()"))); - } - } - } - - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - return numItems; + int i, numItems, natts; + // Objects for querying. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // We start with a simple query to get the number of items. + querystring = (char*) palloc(256*sizeof(char)); + sprintf(querystring,"SELECT COUNT(*) FROM %s;",tablename); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) { + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + return -1; + } + slot_getallattrs(slot); + natts = slot->tts_tupleDescriptor->natts; + + // Silence the compiler. + numItems = 0; + + for (i = 0; i < natts; i++) { + if (!slot->tts_isnull[i]) { + Datum slot_result; + unsigned int data_type; + + slot_result = slot->tts_values[i]; + data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; + + switch (data_type) { + case INT8OID: + numItems = (int) DatumGetInt64(slot_result); + break; + case INT2OID: + numItems = (int) DatumGetInt16(slot_result); + break; + case INT4OID: + numItems = (int) DatumGetInt32(slot_result); + break; + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("fatal error in count_rows()"))); + } + } + } + + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + return numItems; } /* ---------------------------------------------------------------- @@ -465,44 +465,44 @@ count_rows(char *tablename) { */ int getTupleInt(TupleTableSlot *slot, char *attname) { - int i, natts; - - slot_getallattrs(slot); - natts = slot->tts_tupleDescriptor->natts; - - for (i = 0; i < natts; i++) { - if (!slot->tts_isnull[i]) { - char *col_name; - Datum slot_result; - - col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; - slot_result = slot->tts_values[i]; - - if (strcmp(col_name, attname) == 0) { - unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; - // The data type will tell us what to do with it. - switch (data_type) { - case INT8OID: - return (int) DatumGetInt64(slot_result); - case INT2OID: - return (int) DatumGetInt16(slot_result); - case INT4OID: - return (int) DatumGetInt32(slot_result); - case FLOAT4OID: - return (int) DatumGetFloat4(slot_result); - case FLOAT8OID: - return (int) DatumGetFloat8(slot_result); - default: - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("type mismatch in getTupleInt()"))); - break; - } - } - } - } - - return -1; + int i, natts; + + slot_getallattrs(slot); + natts = slot->tts_tupleDescriptor->natts; + + for (i = 0; i < natts; i++) { + if (!slot->tts_isnull[i]) { + char *col_name; + Datum slot_result; + + col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; + slot_result = slot->tts_values[i]; + + if (strcmp(col_name, attname) == 0) { + unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; + // The data type will tell us what to do with it. + switch (data_type) { + case INT8OID: + return (int) DatumGetInt64(slot_result); + case INT2OID: + return (int) DatumGetInt16(slot_result); + case INT4OID: + return (int) DatumGetInt32(slot_result); + case FLOAT4OID: + return (int) DatumGetFloat4(slot_result); + case FLOAT8OID: + return (int) DatumGetFloat8(slot_result); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type mismatch in getTupleInt()"))); + break; + } + } + } + } + + return -1; } /* ---------------------------------------------------------------- @@ -514,44 +514,44 @@ getTupleInt(TupleTableSlot *slot, char *attname) { */ float getTupleFloat(TupleTableSlot *slot, char *attname) { - int i, natts; - - slot_getallattrs(slot); - natts = slot->tts_tupleDescriptor->natts; - - for (i = 0; i < natts; i++) { - if (!slot->tts_isnull[i]) { - char *col_name; - Datum slot_result; - - col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; - slot_result = slot->tts_values[i]; - - if (strcmp(col_name, attname) == 0) { - unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; - // The data type will tell us what to do with it. - switch (data_type) { - case FLOAT8OID: - return (float) DatumGetFloat8(slot_result); - case FLOAT4OID: - return (float) DatumGetFloat4(slot_result); - case INT8OID: - return (float) DatumGetInt64(slot_result); - case INT2OID: - return (float) DatumGetInt16(slot_result); - case INT4OID: - return (float) DatumGetInt32(slot_result); - default: - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("type mismatch in getTupleFloat()"))); - break; - } - } - } - } - - return -1.0; + int i, natts; + + slot_getallattrs(slot); + natts = slot->tts_tupleDescriptor->natts; + + for (i = 0; i < natts; i++) { + if (!slot->tts_isnull[i]) { + char *col_name; + Datum slot_result; + + col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; + slot_result = slot->tts_values[i]; + + if (strcmp(col_name, attname) == 0) { + unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; + // The data type will tell us what to do with it. + switch (data_type) { + case FLOAT8OID: + return (float) DatumGetFloat8(slot_result); + case FLOAT4OID: + return (float) DatumGetFloat4(slot_result); + case INT8OID: + return (float) DatumGetInt64(slot_result); + case INT2OID: + return (float) DatumGetInt16(slot_result); + case INT4OID: + return (float) DatumGetInt32(slot_result); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type mismatch in getTupleFloat()"))); + break; + } + } + } + } + + return -1.0; } /* ---------------------------------------------------------------- @@ -565,77 +565,77 @@ getTupleFloat(TupleTableSlot *slot, char *attname) { */ char* getTupleString(TupleTableSlot *slot, char *attname) { - int i, natts; - // Possible return cases. - int string_int; - float string_float; - bool string_bool; - char *rtn_string; - - slot_getallattrs(slot); - natts = slot->tts_tupleDescriptor->natts; - - for (i = 0; i < natts; i++) { - if (!slot->tts_isnull[i]) { - char *col_name; - Datum slot_result; - - col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; - slot_result = slot->tts_values[i]; - - if (strcmp(col_name, attname) == 0) { - unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; - - // The data type will tell us what to do with it. - switch(data_type) { - case INT2OID: - string_int = (int) DatumGetInt16(slot_result); - rtn_string = (char*) palloc(32*sizeof(char)); - sprintf(rtn_string,"%d",string_int); - return rtn_string; - case INT4OID: - string_int = (int) DatumGetInt32(slot_result); - rtn_string = (char*) palloc(32*sizeof(char)); - sprintf(rtn_string,"%d",string_int); - return rtn_string; - case INT8OID: - string_int = (int) DatumGetInt64(slot_result); - rtn_string = (char*) palloc(32*sizeof(char)); - sprintf(rtn_string,"%d",string_int); - return rtn_string; - case FLOAT4OID: - string_float = (float) DatumGetFloat4(slot_result); - rtn_string = (char*) palloc(128*sizeof(char)); - snprintf(rtn_string,128,"%f",string_float); - return rtn_string; - case FLOAT8OID: - string_float = (float) DatumGetFloat8(slot_result); - rtn_string = (char*) palloc(128*sizeof(char)); - snprintf(rtn_string,128,"%f",string_float); - return rtn_string; - case BOOLOID: - string_bool = DatumGetBool(slot_result); - rtn_string = (char*) palloc(8*sizeof(char)); - if (string_bool) - sprintf(rtn_string,"true"); - else - sprintf(rtn_string,"false"); - return rtn_string; - case VARCHAROID: - case TEXTOID: - case BPCHAROID: - case BYTEAOID: - return TextDatumGetCString(slot_result); - default: - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unsupported type in getTupleString()"))); - } - } - } - } - - return NULL; + int i, natts; + // Possible return cases. + int string_int; + float string_float; + bool string_bool; + char *rtn_string; + + slot_getallattrs(slot); + natts = slot->tts_tupleDescriptor->natts; + + for (i = 0; i < natts; i++) { + if (!slot->tts_isnull[i]) { + char *col_name; + Datum slot_result; + + col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; + slot_result = slot->tts_values[i]; + + if (strcmp(col_name, attname) == 0) { + unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; + + // The data type will tell us what to do with it. + switch(data_type) { + case INT2OID: + string_int = (int) DatumGetInt16(slot_result); + rtn_string = (char*) palloc(32*sizeof(char)); + sprintf(rtn_string,"%d",string_int); + return rtn_string; + case INT4OID: + string_int = (int) DatumGetInt32(slot_result); + rtn_string = (char*) palloc(32*sizeof(char)); + sprintf(rtn_string,"%d",string_int); + return rtn_string; + case INT8OID: + string_int = (int) DatumGetInt64(slot_result); + rtn_string = (char*) palloc(32*sizeof(char)); + sprintf(rtn_string,"%d",string_int); + return rtn_string; + case FLOAT4OID: + string_float = (float) DatumGetFloat4(slot_result); + rtn_string = (char*) palloc(128*sizeof(char)); + snprintf(rtn_string,128,"%f",string_float); + return rtn_string; + case FLOAT8OID: + string_float = (float) DatumGetFloat8(slot_result); + rtn_string = (char*) palloc(128*sizeof(char)); + snprintf(rtn_string,128,"%f",string_float); + return rtn_string; + case BOOLOID: + string_bool = DatumGetBool(slot_result); + rtn_string = (char*) palloc(8*sizeof(char)); + if (string_bool) + sprintf(rtn_string,"true"); + else + sprintf(rtn_string,"false"); + return rtn_string; + case VARCHAROID: + case TEXTOID: + case BPCHAROID: + case BYTEAOID: + return TextDatumGetCString(slot_result); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported type in getTupleString()"))); + } + } + } + } + + return NULL; } /* ---------------------------------------------------------------- @@ -647,9 +647,9 @@ getTupleString(TupleTableSlot *slot, char *attname) { */ bool relationExists(RangeVar* relation) { - Oid testOid; - testOid = RangeVarGetRelid(relation,0,true); - return OidIsValid(testOid); + Oid testOid; + testOid = RangeVarGetRelid(relation,0,true); + return OidIsValid(testOid); } /* ---------------------------------------------------------------- @@ -661,38 +661,38 @@ relationExists(RangeVar* relation) { */ bool columnExistsInRelation(char *colname, RangeVar *relation) { - Oid relOid; - Relation newRel; - RangeTblEntry *rte; - ListCell *c; - bool foundColumn; - - // Step 1: build a proper RTE to use. - relOid = RangeVarGetRelid(relation,0,true); - // Double-check to make sure the table exists. - if (!OidIsValid(relOid)) return false; - newRel = relation_open(relOid,NoLock); - rte = addRangeTableEntryForRelation(NULL,newRel,NULL,false,false); - - // Step 2: cross-reference the relation columns and - // our provided column name. - foundColumn = false; - foreach(c, rte->eref->colnames) - { - if (strcmp(strVal(lfirst(c)), colname) == 0) - { - if (foundColumn) { - perror("Ambiguous column request.\n"); - return false; - } else { - foundColumn = true; - } - } - } - // Close the relation to avoid leaks. - relation_close(newRel,NoLock); - pfree(rte); - return foundColumn; + Oid relOid; + Relation newRel; + RangeTblEntry *rte; + ListCell *c; + bool foundColumn; + + // Step 1: build a proper RTE to use. + relOid = RangeVarGetRelid(relation,0,true); + // Double-check to make sure the table exists. + if (!OidIsValid(relOid)) return false; + newRel = relation_open(relOid,NoLock); + rte = addRangeTableEntryForRelation(NULL,newRel,NULL,false,false); + + // Step 2: cross-reference the relation columns and + // our provided column name. + foundColumn = false; + foreach(c, rte->eref->colnames) + { + if (strcmp(strVal(lfirst(c)), colname) == 0) + { + if (foundColumn) { + perror("Ambiguous column request.\n"); + return false; + } else { + foundColumn = true; + } + } + } + // Close the relation to avoid leaks. + relation_close(newRel,NoLock); + pfree(rte); + return foundColumn; } /* ---------------------------------------------------------------- @@ -705,45 +705,45 @@ columnExistsInRelation(char *colname, RangeVar *relation) { */ char* retrieveRecommender(char *eventtable, char *method) { - RangeVar *cataloguerv; - char *querystring, *recindexname; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // If this fails, there's no RecModelsCatalogue, so - // there are no recommenders. - cataloguerv = makeRangeVar(NULL,"recmodelscatalogue",0); - if (!relationExists(cataloguerv)) { - pfree(cataloguerv); - return NULL; - } - pfree(cataloguerv); - - // If the catalogue does exist, we'll query it looking - // for recommenders based on the given information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT recommenderindexname FROM RecModelsCatalogue WHERE eventtable = '%s' AND method = '%s';", - eventtable, method); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - // If there are no results, the recommender does not exist. - if (TupIsNull(slot)) { - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - return NULL; - } - - recindexname = getTupleString(slot,"recommenderindexname"); - - // Query cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - return recindexname; + RangeVar *cataloguerv; + char *querystring, *recindexname; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // If this fails, there's no RecModelsCatalogue, so + // there are no recommenders. + cataloguerv = makeRangeVar(NULL,"recmodelscatalogue",0); + if (!relationExists(cataloguerv)) { + pfree(cataloguerv); + return NULL; + } + pfree(cataloguerv); + + // If the catalogue does exist, we'll query it looking + // for recommenders based on the given information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT recommenderindexname FROM RecModelsCatalogue WHERE eventtable = '%s' AND method = '%s';", + eventtable, method); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + // If there are no results, the recommender does not exist. + if (TupIsNull(slot)) { + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + return NULL; + } + + recindexname = getTupleString(slot,"recommenderindexname"); + + // Query cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + return recindexname; } /* ---------------------------------------------------------------- @@ -758,56 +758,56 @@ retrieveRecommender(char *eventtable, char *method) { */ void getRecInfo(char *recindexname, char **ret_eventtable, - char **ret_userkey, char **ret_itemkey, - char **ret_eventval, char **ret_method, int *ret_numatts) { - char *eventtable, *userkey, *itemkey, *eventval, *method; - // Information for query. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT * FROM RecModelsCatalogue WHERE recommenderindexname = '%s';", - recindexname); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - // This should never happen. - if (TupIsNull(slot)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("fatal error in getRecInfo()"))); - - // Obtain each of the values needed. - if (ret_eventtable) { - eventtable = getTupleString(slot,"eventtable"); - (*ret_eventtable) = eventtable; - } - if (ret_userkey) { - userkey = getTupleString(slot,"userkey"); - (*ret_userkey) = userkey; - } - if (ret_itemkey) { - itemkey = getTupleString(slot,"itemkey"); - (*ret_itemkey) = itemkey; - } - if (ret_eventval) { - eventval = getTupleString(slot,"eventval"); - (*ret_eventval) = eventval; - } - if (ret_method) { - method = getTupleString(slot,"method"); - (*ret_method) = method; - } - if (ret_numatts) - (*ret_numatts) = getTupleInt(slot,"contextattributes"); - - // Cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); + char **ret_userkey, char **ret_itemkey, + char **ret_eventval, char **ret_method, int *ret_numatts) { + char *eventtable, *userkey, *itemkey, *eventval, *method; + // Information for query. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT * FROM RecModelsCatalogue WHERE recommenderindexname = '%s';", + recindexname); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + // This should never happen. + if (TupIsNull(slot)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("fatal error in getRecInfo()"))); + + // Obtain each of the values needed. + if (ret_eventtable) { + eventtable = getTupleString(slot,"eventtable"); + (*ret_eventtable) = eventtable; + } + if (ret_userkey) { + userkey = getTupleString(slot,"userkey"); + (*ret_userkey) = userkey; + } + if (ret_itemkey) { + itemkey = getTupleString(slot,"itemkey"); + (*ret_itemkey) = itemkey; + } + if (ret_eventval) { + eventval = getTupleString(slot,"eventval"); + (*ret_eventval) = eventval; + } + if (ret_method) { + method = getTupleString(slot,"method"); + (*ret_method) = method; + } + if (ret_numatts) + (*ret_numatts) = getTupleInt(slot,"contextattributes"); + + // Cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); } /* ---------------------------------------------------------------- @@ -820,64 +820,64 @@ getRecInfo(char *recindexname, char **ret_eventtable, */ recMethod validateCreateRStmt(CreateRStmt *recStmt) { - recMethod method; - - // Our first test is to make sure the ratings table exists. - if (!relationExists(recStmt->eventtable)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("relation \"%s\" does not exist", - recStmt->eventtable->relname))); - - // Our second test is to see whether or not a recommender has already - // been created with the given events table and method, or name. - if (relationExists(recStmt->recname)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("a recommender with name \"%s\" already exists", - recStmt->recname->relname))); - - if (retrieveRecommender(recStmt->eventtable->relname,recStmt->method) != NULL) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("recommender on table \"%s\" using method \"%s\" already exists", - recStmt->eventtable->relname,recStmt->method))); - - // We next need to test that the provided columns - // exist in the events table. - // Test: user key is in event table. - if (!columnExistsInRelation(recStmt->userkey,recStmt->eventtable)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column \"%s\" does not exist in relation \"%s\"", - recStmt->userkey,recStmt->eventtable->relname))); - // Test: item key is in event table. - if (!columnExistsInRelation(recStmt->itemkey,recStmt->eventtable)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column \"%s\" does not exist in relation \"%s\"", - recStmt->itemkey,recStmt->eventtable->relname))); - // Test: event value is in event table. - if (!columnExistsInRelation(recStmt->eventval,recStmt->eventtable)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column \"%s\" does not exist in relation \"%s\"", - recStmt->eventval,recStmt->eventtable->relname))); - - // Now we convert our method name. - method = itemCosCF; - // To handle the case where no USING clause was provided. - if (recStmt->method) { - method = getRecMethod(recStmt->method); - if (method < 0) - ereport(ERROR, - (errcode(ERRCODE_CASE_NOT_FOUND), - errmsg("recommendation method %s not recognized", - recStmt->method))); - } - - // And return. - return method; + recMethod method; + + // Our first test is to make sure the ratings table exists. + if (!relationExists(recStmt->eventtable)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("relation \"%s\" does not exist", + recStmt->eventtable->relname))); + + // Our second test is to see whether or not a recommender has already + // been created with the given events table and method, or name. + if (relationExists(recStmt->recname)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("a recommender with name \"%s\" already exists", + recStmt->recname->relname))); + + if (retrieveRecommender(recStmt->eventtable->relname,recStmt->method) != NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("recommender on table \"%s\" using method \"%s\" already exists", + recStmt->eventtable->relname,recStmt->method))); + + // We next need to test that the provided columns + // exist in the events table. + // Test: user key is in event table. + if (!columnExistsInRelation(recStmt->userkey,recStmt->eventtable)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist in relation \"%s\"", + recStmt->userkey,recStmt->eventtable->relname))); + // Test: item key is in event table. + if (!columnExistsInRelation(recStmt->itemkey,recStmt->eventtable)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist in relation \"%s\"", + recStmt->itemkey,recStmt->eventtable->relname))); + // Test: event value is in event table. + if (!columnExistsInRelation(recStmt->eventval,recStmt->eventtable)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist in relation \"%s\"", + recStmt->eventval,recStmt->eventtable->relname))); + + // Now we convert our method name. + method = itemCosCF; + // To handle the case where no USING clause was provided. + if (recStmt->method) { + method = getRecMethod(recStmt->method); + if (method < 0) + ereport(ERROR, + (errcode(ERRCODE_CASE_NOT_FOUND), + errmsg("recommendation method %s not recognized", + recStmt->method))); + } + + // And return. + return method; } /* ---------------------------------------------------------------- @@ -888,20 +888,20 @@ validateCreateRStmt(CreateRStmt *recStmt) { */ recMethod getRecMethod(char *method) { - if (!method) return -1; - - if (strcmp("itemcoscf",method) == 0) - return itemCosCF; - else if (strcmp("itempearcf",method) == 0) - return itemPearCF; - else if (strcmp("usercoscf",method) == 0) - return userCosCF; - else if (strcmp("userpearcf",method) == 0) - return userPearCF; - else if (strcmp("svd",method) == 0) - return SVD; - else - return -1; + if (!method) return -1; + + if (strcmp("itemcoscf",method) == 0) + return itemCosCF; + else if (strcmp("itempearcf",method) == 0) + return itemPearCF; + else if (strcmp("usercoscf",method) == 0) + return userCosCF; + else if (strcmp("userpearcf",method) == 0) + return userPearCF; + else if (strcmp("svd",method) == 0) + return SVD; + else + return -1; } /* ---------------------------------------------------------------- @@ -912,40 +912,40 @@ getRecMethod(char *method) { */ static float getUpdateThreshold() { - float threshold = -1; - RangeVar *testrv; - // Query information. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - testrv = makeRangeVar(NULL,"recdbproperties",0); - if (!relationExists(testrv)) { - pfree(testrv); - return -1; - } - pfree(testrv); - - querystring = (char*) palloc(128*sizeof(char)); - sprintf(querystring,"SELECT update_threshold FROM recdbproperties;"); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) { - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - return -1; - } - - threshold = getTupleFloat(slot,"update_threshold"); - - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - return threshold; + float threshold = -1; + RangeVar *testrv; + // Query information. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + testrv = makeRangeVar(NULL,"recdbproperties",0); + if (!relationExists(testrv)) { + pfree(testrv); + return -1; + } + pfree(testrv); + + querystring = (char*) palloc(128*sizeof(char)); + sprintf(querystring,"SELECT update_threshold FROM recdbproperties;"); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) { + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + return -1; + } + + threshold = getTupleFloat(slot,"update_threshold"); + + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + return threshold; } /* ---------------------------------------------------------------- @@ -959,247 +959,247 @@ getUpdateThreshold() { */ void updateCellCounter(char *eventtable, TupleTableSlot *insertslot) { - float update_threshold; - RangeVar *cataloguerv; - // Query information. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // If this fails, there's no RecModelsCatalogue, so - // there are no recommenders. - cataloguerv = makeRangeVar(NULL,"recmodelscatalogue",0); - if (!relationExists(cataloguerv)) { - pfree(cataloguerv); - return; - } - pfree(cataloguerv); - - // Obtain the update threshold. - update_threshold = getUpdateThreshold(); - - // Now that we've confirmed the RecModelsCatalogue - // exists, let's query it to find the necessary - // information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT * FROM RecModelsCatalogue WHERE eventtable = '%s';", - eventtable); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - // In case of SVD, recmodelname is the user model, and the other is the - // item model. Otherwise, recmodelname2 is nothing. - char *recindexname, *recmodelname, *recmodelname2; - char *userkey, *itemkey, *eventval, *strmethod; - int updatecounter = -1; - int eventtotal = -1; - recMethod method; - // Query information for our internal query. - char *countquerystring; - QueryDesc *countqueryDesc; - PlanState *countplanstate; - TupleTableSlot *countslot; - MemoryContext countcontext; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - // Acquire the data for this recommender. - recindexname = getTupleString(slot,"recommenderindexname"); - userkey = getTupleString(slot,"userkey"); - itemkey = getTupleString(slot,"itemkey"); - eventval = getTupleString(slot,"eventval"); - strmethod = getTupleString(slot,"method"); - - // Get the recMethod. - method = getRecMethod(strmethod); - pfree(strmethod); - - // Failure case, continue to next tuple. - if (method < 0) { - pfree(recindexname); - pfree(userkey); - pfree(itemkey); - pfree(eventval); - continue; - } - - // We now have all the information necessary to update this - // recommender's cell counter. First we need to acquire it, - // and we might as well get the model name while we're at it. - countquerystring = (char*) palloc(1024*sizeof(char)); - if (method == SVD) - sprintf(countquerystring,"SELECT recusermodelname, recitemmodelname, updatecounter, eventtotal FROM %s;", - recindexname); - else - sprintf(countquerystring,"SELECT recmodelname, updatecounter, eventtotal FROM %s;", - recindexname); - - countqueryDesc = recathon_queryStart(countquerystring,&countcontext); - countplanstate = countqueryDesc->planstate; - - // Go through what should be the only tuple and obtain the data. - countslot = ExecProcNode(countplanstate); - if (TupIsNull(countslot)) { - // More failure conditions. We can't just error out - // because the INSERT still needs to happen. - recathon_queryEnd(countqueryDesc,countcontext); - pfree(countquerystring); - pfree(recindexname); - pfree(userkey); - pfree(itemkey); - pfree(eventval); - continue; - } - - // Get the relevant data. - if (method == SVD) { - recmodelname = getTupleString(countslot,"recusermodelname"); - recmodelname2 = getTupleString(countslot,"recitemmodelname"); - } else { - recmodelname = getTupleString(countslot,"recmodelname"); - recmodelname2 = NULL; - } - updatecounter = getTupleInt(countslot,"updatecounter"); - eventtotal = getTupleInt(countslot,"eventtotal"); - - recathon_queryEnd(countqueryDesc,countcontext); - pfree(countquerystring); - - // Even more failure conditions. - if (updatecounter < 0) { - pfree(recmodelname); - if (recmodelname2) - pfree(recmodelname2); - pfree(recindexname); - pfree(userkey); - pfree(itemkey); - continue; - } - - // With that done, we check the original counter. If the - // number of new events is greater than threshold * the - // number of events currently used in the model, we need - // to trigger an update. Otherwise, just increment. - updatecounter++; - - if (updatecounter >= (int) (update_threshold * eventtotal)) { - int numEvents = 0; - - // What we do depends on the recommendation method. - switch (method) { - case itemCosCF: - { - // Before we update the similarity model, we need to obtain - // a few item-related things. - int numItems; - int *IDs; - float *lengths; - - lengths = vector_lengths(itemkey, eventtable, eventval, - &numItems, &IDs); - - // Now update the similarity model. - numEvents = updateItemCosModel(eventtable, userkey, - itemkey, eventval, recmodelname, - IDs, lengths, numItems, true); - } - break; - case itemPearCF: - { - // Before we update the similarity model, we need to obtain - // a few item-related things. - int numItems; - int *IDs; - float *avgs, *pearsons; - - pearson_info(itemkey, eventtable, eventval, &numItems, - &IDs, &avgs, &pearsons); - - // Now update the similarity model. - numEvents = updateItemPearModel(eventtable, userkey, - itemkey, eventval, recmodelname, - IDs, avgs, pearsons, numItems, true); - } - break; - case userCosCF: - { - // Before we update the similarity model, we need to obtain - // a few user-related things. - int numUsers; - int *IDs; - float *lengths; - - lengths = vector_lengths(userkey, eventtable, eventval, - &numUsers, &IDs); - - // Now update the similarity model. - numEvents = updateUserCosModel(eventtable, userkey, - itemkey, eventval, recmodelname, - IDs, lengths, numUsers, true); - } - break; - case userPearCF: - { - // Before we update the similarity model, we need to obtain - // a few user-related things. - int numUsers; - int *IDs; - float *avgs, *pearsons; - - pearson_info(userkey, eventtable, eventval, &numUsers, - &IDs, &avgs, &pearsons); - - // Now update the similarity model. - numEvents = updateUserPearModel(eventtable, userkey, - itemkey, eventval, recmodelname, - IDs, avgs, pearsons, numUsers, true); - } - break; - case SVD: - // No additional functions, just update the model. - numEvents = SVDtrain(userkey, itemkey, - eventtable, eventval, - recmodelname, recmodelname2, true); - break; - default: - break; - } - - // Finally, we update the cell to indicate how many events were used - // to build it. We'll also reset the updatecounter. - countquerystring = (char*) palloc(1024*sizeof(char)); - sprintf(countquerystring,"UPDATE %s SET updatecounter = 0, eventtotal = %d;", - recindexname,numEvents); - - // Execute normally, we don't need to see results. - recathon_queryExecute(countquerystring); - pfree(countquerystring); - } else { - // Just increment. - countquerystring = (char*) palloc(1024*sizeof(char)); - sprintf(countquerystring,"UPDATE %s SET updatecounter = updatecounter+1;", - recindexname); - // Execute normally, we don't need to see results. - recathon_queryExecute(countquerystring); - pfree(countquerystring); - } - - // Final cleanup. - pfree(recmodelname); - if (recmodelname2) - pfree(recmodelname2); - pfree(recindexname); - pfree(userkey); - pfree(itemkey); - pfree(eventval); - } - - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); + float update_threshold; + RangeVar *cataloguerv; + // Query information. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // If this fails, there's no RecModelsCatalogue, so + // there are no recommenders. + cataloguerv = makeRangeVar(NULL,"recmodelscatalogue",0); + if (!relationExists(cataloguerv)) { + pfree(cataloguerv); + return; + } + pfree(cataloguerv); + + // Obtain the update threshold. + update_threshold = getUpdateThreshold(); + + // Now that we've confirmed the RecModelsCatalogue + // exists, let's query it to find the necessary + // information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT * FROM RecModelsCatalogue WHERE eventtable = '%s';", + eventtable); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + // In case of SVD, recmodelname is the user model, and the other is the + // item model. Otherwise, recmodelname2 is nothing. + char *recindexname, *recmodelname, *recmodelname2; + char *userkey, *itemkey, *eventval, *strmethod; + int updatecounter = -1; + int eventtotal = -1; + recMethod method; + // Query information for our internal query. + char *countquerystring; + QueryDesc *countqueryDesc; + PlanState *countplanstate; + TupleTableSlot *countslot; + MemoryContext countcontext; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + // Acquire the data for this recommender. + recindexname = getTupleString(slot,"recommenderindexname"); + userkey = getTupleString(slot,"userkey"); + itemkey = getTupleString(slot,"itemkey"); + eventval = getTupleString(slot,"eventval"); + strmethod = getTupleString(slot,"method"); + + // Get the recMethod. + method = getRecMethod(strmethod); + pfree(strmethod); + + // Failure case, continue to next tuple. + if (method < 0) { + pfree(recindexname); + pfree(userkey); + pfree(itemkey); + pfree(eventval); + continue; + } + + // We now have all the information necessary to update this + // recommender's cell counter. First we need to acquire it, + // and we might as well get the model name while we're at it. + countquerystring = (char*) palloc(1024*sizeof(char)); + if (method == SVD) + sprintf(countquerystring,"SELECT recusermodelname, recitemmodelname, updatecounter, eventtotal FROM %s;", + recindexname); + else + sprintf(countquerystring,"SELECT recmodelname, updatecounter, eventtotal FROM %s;", + recindexname); + + countqueryDesc = recathon_queryStart(countquerystring,&countcontext); + countplanstate = countqueryDesc->planstate; + + // Go through what should be the only tuple and obtain the data. + countslot = ExecProcNode(countplanstate); + if (TupIsNull(countslot)) { + // More failure conditions. We can't just error out + // because the INSERT still needs to happen. + recathon_queryEnd(countqueryDesc,countcontext); + pfree(countquerystring); + pfree(recindexname); + pfree(userkey); + pfree(itemkey); + pfree(eventval); + continue; + } + + // Get the relevant data. + if (method == SVD) { + recmodelname = getTupleString(countslot,"recusermodelname"); + recmodelname2 = getTupleString(countslot,"recitemmodelname"); + } else { + recmodelname = getTupleString(countslot,"recmodelname"); + recmodelname2 = NULL; + } + updatecounter = getTupleInt(countslot,"updatecounter"); + eventtotal = getTupleInt(countslot,"eventtotal"); + + recathon_queryEnd(countqueryDesc,countcontext); + pfree(countquerystring); + + // Even more failure conditions. + if (updatecounter < 0) { + pfree(recmodelname); + if (recmodelname2) + pfree(recmodelname2); + pfree(recindexname); + pfree(userkey); + pfree(itemkey); + continue; + } + + // With that done, we check the original counter. If the + // number of new events is greater than threshold * the + // number of events currently used in the model, we need + // to trigger an update. Otherwise, just increment. + updatecounter++; + + if (updatecounter >= (int) (update_threshold * eventtotal)) { + int numEvents = 0; + + // What we do depends on the recommendation method. + switch (method) { + case itemCosCF: + { + // Before we update the similarity model, we need to obtain + // a few item-related things. + int numItems; + int *IDs; + float *lengths; + + lengths = vector_lengths(itemkey, eventtable, eventval, + &numItems, &IDs); + + // Now update the similarity model. + numEvents = updateItemCosModel(eventtable, userkey, + itemkey, eventval, recmodelname, + IDs, lengths, numItems, true); + } + break; + case itemPearCF: + { + // Before we update the similarity model, we need to obtain + // a few item-related things. + int numItems; + int *IDs; + float *avgs, *pearsons; + + pearson_info(itemkey, eventtable, eventval, &numItems, + &IDs, &avgs, &pearsons); + + // Now update the similarity model. + numEvents = updateItemPearModel(eventtable, userkey, + itemkey, eventval, recmodelname, + IDs, avgs, pearsons, numItems, true); + } + break; + case userCosCF: + { + // Before we update the similarity model, we need to obtain + // a few user-related things. + int numUsers; + int *IDs; + float *lengths; + + lengths = vector_lengths(userkey, eventtable, eventval, + &numUsers, &IDs); + + // Now update the similarity model. + numEvents = updateUserCosModel(eventtable, userkey, + itemkey, eventval, recmodelname, + IDs, lengths, numUsers, true); + } + break; + case userPearCF: + { + // Before we update the similarity model, we need to obtain + // a few user-related things. + int numUsers; + int *IDs; + float *avgs, *pearsons; + + pearson_info(userkey, eventtable, eventval, &numUsers, + &IDs, &avgs, &pearsons); + + // Now update the similarity model. + numEvents = updateUserPearModel(eventtable, userkey, + itemkey, eventval, recmodelname, + IDs, avgs, pearsons, numUsers, true); + } + break; + case SVD: + // No additional functions, just update the model. + numEvents = SVDtrain(userkey, itemkey, + eventtable, eventval, + recmodelname, recmodelname2, true); + break; + default: + break; + } + + // Finally, we update the cell to indicate how many events were used + // to build it. We'll also reset the updatecounter. + countquerystring = (char*) palloc(1024*sizeof(char)); + sprintf(countquerystring,"UPDATE %s SET updatecounter = 0, eventtotal = %d;", + recindexname,numEvents); + + // Execute normally, we don't need to see results. + recathon_queryExecute(countquerystring); + pfree(countquerystring); + } else { + // Just increment. + countquerystring = (char*) palloc(1024*sizeof(char)); + sprintf(countquerystring,"UPDATE %s SET updatecounter = updatecounter+1;", + recindexname); + // Execute normally, we don't need to see results. + recathon_queryExecute(countquerystring); + pfree(countquerystring); + } + + // Final cleanup. + pfree(recmodelname); + if (recmodelname2) + pfree(recmodelname2); + pfree(recindexname); + pfree(userkey); + pfree(itemkey); + pfree(eventval); + } + + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); } /* ---------------------------------------------------------------- @@ -1211,17 +1211,17 @@ updateCellCounter(char *eventtable, TupleTableSlot *insertslot) { */ int binarySearch(int *array, int value, int lo, int hi) { - int mid; - - mid = (hi + lo) / 2; - if (array[mid] == value) return mid; - // Edge case. - if (mid == lo) return -1; - // Normal recursive case. - if (array[mid] < value) - return binarySearch(array, value, mid, hi); - else - return binarySearch(array, value, lo, mid); + int mid; + + mid = (hi + lo) / 2; + if (array[mid] == value) return mid; + // Edge case. + if (mid == lo) return -1; + // Normal recursive case. + if (array[mid] < value) + return binarySearch(array, value, mid, hi); + else + return binarySearch(array, value, lo, mid); } /* ---------------------------------------------------------------- @@ -1235,76 +1235,76 @@ binarySearch(int *array, int value, int lo, int hi) { */ float* vector_lengths(char *key, char *eventtable, char *eventval, int *totalNum, int **IDlist) { - int *IDs; - float *lengths; - int i, j, numItems, priorID; - // Objects for querying. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // We start by getting the number of distinct items in the event table. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", - key,eventtable); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - slot = ExecProcNode(planstate); - numItems = getTupleInt(slot,"count"); - recathon_queryEnd(queryDesc,recathoncontext); - - // Now that we have the number of items, we can create an array or two. - IDs = (int*) palloc(numItems*sizeof(int)); - lengths = (float*) palloc(numItems*sizeof(float)); - for (j = 0; j < numItems; j++) - lengths[j] = 0.0; - - // Now we need to populate the two arrays. We'll get all the events from - // the events table. - priorID = -1; - sprintf(querystring,"SELECT * FROM %s ORDER BY %s;",eventtable,key); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - i = -1; - - // This query grabs all item IDs, so we can store them. Later we'll calculate - // vector lengths. - for (;;) { - int currentID = 0; - float currentEvent = 0.0; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - currentID = getTupleInt(slot,key); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (currentID != priorID) { - i++; - priorID = currentID; - IDs[i] = currentID; - } - - currentEvent = getTupleFloat(slot,eventval); - lengths[i] += currentEvent*currentEvent; - } - - // Query cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - // Now that we've totally queried the events table, we need to - // take the square root of each length and we're done. - for (i = 0; i < numItems; i++) - lengths[i] = sqrtf(lengths[i]); - - // Return data. - (*totalNum) = numItems; - (*IDlist) = IDs; - - return lengths; + int *IDs; + float *lengths; + int i, j, numItems, priorID; + // Objects for querying. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // We start by getting the number of distinct items in the event table. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", + key,eventtable); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + slot = ExecProcNode(planstate); + numItems = getTupleInt(slot,"count"); + recathon_queryEnd(queryDesc,recathoncontext); + + // Now that we have the number of items, we can create an array or two. + IDs = (int*) palloc(numItems*sizeof(int)); + lengths = (float*) palloc(numItems*sizeof(float)); + for (j = 0; j < numItems; j++) + lengths[j] = 0.0; + + // Now we need to populate the two arrays. We'll get all the events from + // the events table. + priorID = -1; + sprintf(querystring,"SELECT * FROM %s ORDER BY %s;",eventtable,key); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + i = -1; + + // This query grabs all item IDs, so we can store them. Later we'll calculate + // vector lengths. + for (;;) { + int currentID = 0; + float currentEvent = 0.0; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + currentID = getTupleInt(slot,key); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (currentID != priorID) { + i++; + priorID = currentID; + IDs[i] = currentID; + } + + currentEvent = getTupleFloat(slot,eventval); + lengths[i] += currentEvent*currentEvent; + } + + // Query cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + // Now that we've totally queried the events table, we need to + // take the square root of each length and we're done. + for (i = 0; i < numItems; i++) + lengths[i] = sqrtf(lengths[i]); + + // Return data. + (*totalNum) = numItems; + (*IDlist) = IDs; + + return lengths; } /* ---------------------------------------------------------------- @@ -1318,29 +1318,29 @@ vector_lengths(char *key, char *eventtable, char *eventval, int *totalNum, int * */ float dotProduct(sim_node item1, sim_node item2) { - sim_node temp1, temp2; - float similarity; - - if (item1 == NULL || item2 == NULL) return 0; - - similarity = 0.0; - - // Check every event for the first item, and see how - // many of those users also rated the second item. - temp1 = item1; temp2 = item2; - while (temp1 && temp2) { - if (temp1->id == temp2->id) { - similarity += temp1->event * temp2->event; - temp1 = temp1->next; - temp2 = temp2->next; - } else if (temp1->id > temp2->id) { - temp2 = temp2->next; - } else { - temp1 = temp1->next; - } - } - - return similarity; + sim_node temp1, temp2; + float similarity; + + if (item1 == NULL || item2 == NULL) return 0; + + similarity = 0.0; + + // Check every event for the first item, and see how + // many of those users also rated the second item. + temp1 = item1; temp2 = item2; + while (temp1 && temp2) { + if (temp1->id == temp2->id) { + similarity += temp1->event * temp2->event; + temp1 = temp1->next; + temp2 = temp2->next; + } else if (temp1->id > temp2->id) { + temp2 = temp2->next; + } else { + temp1 = temp1->next; + } + } + + return similarity; } /* ---------------------------------------------------------------- @@ -1352,18 +1352,18 @@ dotProduct(sim_node item1, sim_node item2) { */ float cosineSimilarity(sim_node item1, sim_node item2, float length1, float length2) { - float numerator; - float denominator; - - // Short-circuit check. If one of the items has no events, - // no point checking similarity. This also avoids a possible - // divide-by-zero error. - denominator = length1 * length2; - if (denominator <= 0) return 0; - - numerator = dotProduct(item1,item2); - if (numerator <= 0) return 0; - else return numerator / denominator; + float numerator; + float denominator; + + // Short-circuit check. If one of the items has no events, + // no point checking similarity. This also avoids a possible + // divide-by-zero error. + denominator = length1 * length2; + if (denominator <= 0) return 0; + + numerator = dotProduct(item1,item2); + if (numerator <= 0) return 0; + else return numerator / denominator; } /* ---------------------------------------------------------------- @@ -1377,188 +1377,188 @@ cosineSimilarity(sim_node item1, sim_node item2, float length1, float length2) { */ int updateItemCosModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *itemIDs, float *itemLengths, - int numItems, bool update) { - int i, j, priorID; - int numEvents = 0; - char *querystring, *insertstring, *temprecfile; - sim_node *itemEvents; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - // Information for writing to file. - FILE *fp; - temprecfile = (char*) palloc(256*sizeof(char)); - sprintf(temprecfile,"recathon_temp_%s.dat",modelname); - - // If this is us updating a cell as opposed to building - // a recommender, we need to drop the existing entries. - if (update) { - char *dropstring; - - dropstring = (char*) palloc(256*sizeof(char)); - sprintf(dropstring,"DELETE FROM %s;",modelname); - recathon_queryExecute(dropstring); - pfree(dropstring); - } - - // With the precomputation done, we need to derive the actual item - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); - for (i = 0; i < numItems; i++) - itemEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all item pairs. We need to query the events table - // in order to get the key information. We'll also keep track of the number - // of events used, since we need to store that information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,itemkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (simitem != priorID) { - priorID = simitem; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // itemEvents table; we'll do calculations later. - newnode = createSimNode(simuser, simevent); - itemEvents[i] = simInsert(itemEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - - // We're going to write out the results to file. - if ((fp = fopen(temprecfile,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - insertstring = (char*) palloc(128*sizeof(char)); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first item ALWAYS has a lower value than the second. - for (i = 0; i < numItems; i++) { - float length_i; - sim_node item_i; - nbr_node temp_nbr; - nbr_node nbr_list = NULL; - - item_i = itemEvents[i]; - if (!item_i) continue; - length_i = itemLengths[i]; - - for (j = i+1; j < numItems; j++) { - float length_j; - sim_node item_j; - int item1, item2; - float similarity; - - item_j = itemEvents[j]; - if (!item_j) continue; - length_j = itemLengths[j]; - - similarity = cosineSimilarity(item_i, item_j, length_i, length_j); - if (similarity <= 0) continue; - item1 = itemIDs[i]; - item2 = itemIDs[j]; - - // Now we write. - if (NBRHOOD <= 0) { - sprintf(insertstring,"%d;%d;%f\n", - item1,item2,similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } else { - nbr_node newnbr = createNbrNode(item1,item2,similarity); - nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); - } - } - - // If we have a limited neighborhood, we write the results here. - if (NBRHOOD > 0) { - for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { - sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, - temp_nbr->item2,temp_nbr->similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } - freeNbrList(nbr_list); - } - - CHECK_FOR_INTERRUPTS(); - } - - pfree(insertstring); - fclose(fp); - - // If we are updating an existing similarity model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - sprintf(querystring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - modelname,modelname); - recathon_utilityExecute(querystring); - } - - // With all the data written out, we're going to - // issue a COPY FROM command to bulk load the data - // into the database. - sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", - modelname,temprecfile); - recathon_utilityExecute(querystring); - - // Now we add the primary key constraint. It's - // faster to add it after adding the data than - // having it incrementally updated. - sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (item1, item2)",modelname); - recathon_utilityExecute(querystring); - pfree(querystring); - - // We'll delete the temporary file here, to not hold onto - // memory longer than necessary. - if (unlink(temprecfile) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Free up the lists of sim_nodes and start again. - for (i = 0; i < numItems; i++) { - freeSimList(itemEvents[i]); - itemEvents[i] = NULL; - } - - // Return the number of events we used. - return numEvents; + char *eventval, char *modelname, int *itemIDs, float *itemLengths, + int numItems, bool update) { + int i, j, priorID; + int numEvents = 0; + char *querystring, *insertstring, *temprecfile; + sim_node *itemEvents; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + // Information for writing to file. + FILE *fp; + temprecfile = (char*) palloc(256*sizeof(char)); + sprintf(temprecfile,"recathon_temp_%s.dat",modelname); + + // If this is us updating a cell as opposed to building + // a recommender, we need to drop the existing entries. + if (update) { + char *dropstring; + + dropstring = (char*) palloc(256*sizeof(char)); + sprintf(dropstring,"DELETE FROM %s;",modelname); + recathon_queryExecute(dropstring); + pfree(dropstring); + } + + // With the precomputation done, we need to derive the actual item + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); + for (i = 0; i < numItems; i++) + itemEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all item pairs. We need to query the events table + // in order to get the key information. We'll also keep track of the number + // of events used, since we need to store that information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,itemkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (simitem != priorID) { + priorID = simitem; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // itemEvents table; we'll do calculations later. + newnode = createSimNode(simuser, simevent); + itemEvents[i] = simInsert(itemEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + + // We're going to write out the results to file. + if ((fp = fopen(temprecfile,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + insertstring = (char*) palloc(128*sizeof(char)); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first item ALWAYS has a lower value than the second. + for (i = 0; i < numItems; i++) { + float length_i; + sim_node item_i; + nbr_node temp_nbr; + nbr_node nbr_list = NULL; + + item_i = itemEvents[i]; + if (!item_i) continue; + length_i = itemLengths[i]; + + for (j = i+1; j < numItems; j++) { + float length_j; + sim_node item_j; + int item1, item2; + float similarity; + + item_j = itemEvents[j]; + if (!item_j) continue; + length_j = itemLengths[j]; + + similarity = cosineSimilarity(item_i, item_j, length_i, length_j); + if (similarity <= 0) continue; + item1 = itemIDs[i]; + item2 = itemIDs[j]; + + // Now we write. + if (NBRHOOD <= 0) { + sprintf(insertstring,"%d;%d;%f\n", + item1,item2,similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } else { + nbr_node newnbr = createNbrNode(item1,item2,similarity); + nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); + } + } + + // If we have a limited neighborhood, we write the results here. + if (NBRHOOD > 0) { + for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { + sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, + temp_nbr->item2,temp_nbr->similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } + freeNbrList(nbr_list); + } + + CHECK_FOR_INTERRUPTS(); + } + + pfree(insertstring); + fclose(fp); + + // If we are updating an existing similarity model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + sprintf(querystring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + modelname,modelname); + recathon_utilityExecute(querystring); + } + + // With all the data written out, we're going to + // issue a COPY FROM command to bulk load the data + // into the database. + sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", + modelname,temprecfile); + recathon_utilityExecute(querystring); + + // Now we add the primary key constraint. It's + // faster to add it after adding the data than + // having it incrementally updated. + sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (item1, item2)",modelname); + recathon_utilityExecute(querystring); + pfree(querystring); + + // We'll delete the temporary file here, to not hold onto + // memory longer than necessary. + if (unlink(temprecfile) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Free up the lists of sim_nodes and start again. + for (i = 0; i < numItems; i++) { + freeSimList(itemEvents[i]); + itemEvents[i] = NULL; + } + + // Return the number of events we used. + return numEvents; } /* ---------------------------------------------------------------- @@ -1574,126 +1574,126 @@ updateItemCosModel(char *eventtable, char *userkey, char *itemkey, */ void pearson_info(char *key, char *eventtable, char *eventval, int *totalNum, - int **IDlist, float **avgList, float **pearsonList) { - int *IDs, *counts; - float *avgs, *pearsons; - int i, j, numItems, priorID; - // Objects for querying. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // We start by getting the number of items in the event table. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", - key,eventtable); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - slot = ExecProcNode(planstate); - numItems = getTupleInt(slot,"count"); - recathon_queryEnd(queryDesc,recathoncontext); - - // Now that we have the number of items, we can create an array or two. - IDs = (int*) palloc(numItems*sizeof(int)); - counts = (int*) palloc(numItems*sizeof(int)); - for (j = 0; j < numItems; j++) - counts[j] = 0; - avgs = (float*) palloc(numItems*sizeof(float)); - for (j = 0; j < numItems; j++) - avgs[j] = 0.0; - pearsons = (float*) palloc(numItems*sizeof(float)); - for (j = 0; j < numItems; j++) - pearsons[j] = 0.0; - - // Now we need to populate the four arrays. We'll get all the events from - // the events table. - priorID = -1; - sprintf(querystring,"SELECT * FROM %s ORDER BY %s;",eventtable,key); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - i = -1; - - // This query grabs all item IDs, so we can store them. It also fills in - // some other information we'll need. - for (;;) { - int currentID = 0; - float currentEvent = 0.0; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - currentID = getTupleInt(slot,key); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (currentID != priorID) { - i++; - priorID = currentID; - IDs[i] = currentID; - } - - currentEvent = getTupleFloat(slot,eventval); - counts[i] += 1; - avgs[i] += currentEvent; - } - - // Query cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - - // Now that we've totally queried the events table, we need to - // obtain the actual averages for each item. - for (i = 0; i < numItems; i++) { - if (counts[i] > 0) - avgs[i] /= ((float)counts[i]); - } - pfree(counts); - - // We can reuse the same query to obtain the events again, and - // calculate Pearsons. - priorID = -1; - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - i = -1; - - // We scan through the entire event table once, sorting the events - // based on which item they apply to. - for (;;) { - int currentID = 0; - float currentEvent = 0.0; - float difference = 0.0; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - currentID = getTupleInt(slot,key); - // Are we dealing with a new item ID? If so, switch to the next slot. - if (currentID != priorID) { - priorID = currentID; - i++; - } - currentEvent = getTupleFloat(slot,eventval); - - // We have the item number and event value from this tuple. - // Now we need to update Pearsons. - difference = currentEvent - avgs[i]; - pearsons[i] += difference*difference; - } - - // Query cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - // Now that we've totally queried the events table, we need to - // take the square root of each Pearson and we're done. - for (i = 0; i < numItems; i++) - pearsons[i] = sqrtf(pearsons[i]); - - // Return data. - (*totalNum) = numItems; - (*IDlist) = IDs; - (*avgList) = avgs; - (*pearsonList) = pearsons; + int **IDlist, float **avgList, float **pearsonList) { + int *IDs, *counts; + float *avgs, *pearsons; + int i, j, numItems, priorID; + // Objects for querying. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // We start by getting the number of items in the event table. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", + key,eventtable); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + slot = ExecProcNode(planstate); + numItems = getTupleInt(slot,"count"); + recathon_queryEnd(queryDesc,recathoncontext); + + // Now that we have the number of items, we can create an array or two. + IDs = (int*) palloc(numItems*sizeof(int)); + counts = (int*) palloc(numItems*sizeof(int)); + for (j = 0; j < numItems; j++) + counts[j] = 0; + avgs = (float*) palloc(numItems*sizeof(float)); + for (j = 0; j < numItems; j++) + avgs[j] = 0.0; + pearsons = (float*) palloc(numItems*sizeof(float)); + for (j = 0; j < numItems; j++) + pearsons[j] = 0.0; + + // Now we need to populate the four arrays. We'll get all the events from + // the events table. + priorID = -1; + sprintf(querystring,"SELECT * FROM %s ORDER BY %s;",eventtable,key); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + i = -1; + + // This query grabs all item IDs, so we can store them. It also fills in + // some other information we'll need. + for (;;) { + int currentID = 0; + float currentEvent = 0.0; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + currentID = getTupleInt(slot,key); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (currentID != priorID) { + i++; + priorID = currentID; + IDs[i] = currentID; + } + + currentEvent = getTupleFloat(slot,eventval); + counts[i] += 1; + avgs[i] += currentEvent; + } + + // Query cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + + // Now that we've totally queried the events table, we need to + // obtain the actual averages for each item. + for (i = 0; i < numItems; i++) { + if (counts[i] > 0) + avgs[i] /= ((float)counts[i]); + } + pfree(counts); + + // We can reuse the same query to obtain the events again, and + // calculate Pearsons. + priorID = -1; + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + i = -1; + + // We scan through the entire event table once, sorting the events + // based on which item they apply to. + for (;;) { + int currentID = 0; + float currentEvent = 0.0; + float difference = 0.0; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + currentID = getTupleInt(slot,key); + // Are we dealing with a new item ID? If so, switch to the next slot. + if (currentID != priorID) { + priorID = currentID; + i++; + } + currentEvent = getTupleFloat(slot,eventval); + + // We have the item number and event value from this tuple. + // Now we need to update Pearsons. + difference = currentEvent - avgs[i]; + pearsons[i] += difference*difference; + } + + // Query cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + // Now that we've totally queried the events table, we need to + // take the square root of each Pearson and we're done. + for (i = 0; i < numItems; i++) + pearsons[i] = sqrtf(pearsons[i]); + + // Return data. + (*totalNum) = numItems; + (*IDlist) = IDs; + (*avgList) = avgs; + (*pearsonList) = pearsons; } /* ---------------------------------------------------------------- @@ -1707,29 +1707,29 @@ pearson_info(char *key, char *eventtable, char *eventval, int *totalNum, */ float pearsonDotProduct(sim_node item1, sim_node item2, float avg1, float avg2) { - sim_node temp1, temp2; - float similarity; - - if (item1 == NULL || item2 == NULL) return 0.0; - - similarity = 0.0; - - // Check every event for the first item, and see how - // many of those users also rated the second item. - temp1 = item1; temp2 = item2; - while (temp1 && temp2) { - if (temp1->id == temp2->id) { - similarity += (temp1->event - avg1) * (temp2->event - avg2); - temp1 = temp1->next; - temp2 = temp2->next; - } else if (temp1->id > temp2->id) { - temp2 = temp2->next; - } else { - temp1 = temp1->next; - } - } - - return similarity; + sim_node temp1, temp2; + float similarity; + + if (item1 == NULL || item2 == NULL) return 0.0; + + similarity = 0.0; + + // Check every event for the first item, and see how + // many of those users also rated the second item. + temp1 = item1; temp2 = item2; + while (temp1 && temp2) { + if (temp1->id == temp2->id) { + similarity += (temp1->event - avg1) * (temp2->event - avg2); + temp1 = temp1->next; + temp2 = temp2->next; + } else if (temp1->id > temp2->id) { + temp2 = temp2->next; + } else { + temp1 = temp1->next; + } + } + + return similarity; } /* ---------------------------------------------------------------- @@ -1741,19 +1741,19 @@ pearsonDotProduct(sim_node item1, sim_node item2, float avg1, float avg2) { */ float pearsonSimilarity(sim_node item1, sim_node item2, float avg1, float avg2, - float pearson1, float pearson2) { - float numerator; - float denominator; - - // Short-circuit check. If one of the items has no events, - // no point checking similarity. This also avoids a possible - // divide-by-zero error. - denominator = pearson1 * pearson2; - if (denominator == 0.0) return 0.0; - - numerator = pearsonDotProduct(item1,item2,avg1,avg2); - if (numerator == 0.0) return 0.0; - else return numerator / denominator; + float pearson1, float pearson2) { + float numerator; + float denominator; + + // Short-circuit check. If one of the items has no events, + // no point checking similarity. This also avoids a possible + // divide-by-zero error. + denominator = pearson1 * pearson2; + if (denominator == 0.0) return 0.0; + + numerator = pearsonDotProduct(item1,item2,avg1,avg2); + if (numerator == 0.0) return 0.0; + else return numerator / denominator; } /* ---------------------------------------------------------------- @@ -1767,194 +1767,194 @@ pearsonSimilarity(sim_node item1, sim_node item2, float avg1, float avg2, */ int updateItemPearModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *itemIDs, float *itemAvgs, - float *itemPearsons, int numItems, bool update) { - int i, j, priorID; - int numEvents = 0; - char *querystring, *insertstring, *temprecfile; - sim_node *itemEvents; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - // Information for writing to file. - FILE *fp; - temprecfile = (char*) palloc(256*sizeof(char)); - sprintf(temprecfile,"recathon_temp_%s.dat",modelname); - - // If this is us updating a cell as opposed to building - // a recommender, we need to drop the existing entries. - if (update) { - char *dropstring; - - dropstring = (char*) palloc(256*sizeof(char)); - sprintf(dropstring,"DELETE FROM %s;",modelname); - recathon_queryExecute(dropstring); - pfree(dropstring); - } - - // With the precomputation done, we need to derive the actual item - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); - for (i = 0; i < numItems; i++) - itemEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all item pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,itemkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (simitem != priorID) { - priorID = simitem; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // itemEvents table; we'll do calculations later. - newnode = createSimNode(simuser, simevent); - itemEvents[i] = simInsert(itemEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // We're going to write out the results to file. - if ((fp = fopen(temprecfile,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - insertstring = (char*) palloc(128*sizeof(char)); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first item ALWAYS has a lower value than the second. - for (i = 0; i < numItems; i++) { - float avg_i, pearson_i; - sim_node item_i; - nbr_node temp_nbr; - nbr_node nbr_list = NULL; - - item_i = itemEvents[i]; - if (!item_i) continue; - avg_i = itemAvgs[i]; - pearson_i = itemPearsons[i]; - - for (j = i+1; j < numItems; j++) { - float avg_j, pearson_j; - sim_node item_j; - int item1, item2; - float similarity; - - item_j = itemEvents[j]; - if (!item_j) continue; - avg_j = itemAvgs[j]; - pearson_j = itemPearsons[j]; - - similarity = pearsonSimilarity(item_i, item_j, avg_i, avg_j, pearson_i, pearson_j); - if (similarity == 0.0) continue; - item1 = itemIDs[i]; - item2 = itemIDs[j]; - - // Now we write. - if (NBRHOOD <= 0) { - sprintf(insertstring,"%d;%d;%f\n",item1,item2,similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } else { - nbr_node newnbr = createNbrNode(item1,item2,similarity); - nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); - } - } - - // If we have a limited neighborhood, we write the results here. - if (NBRHOOD > 0) { - for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { - sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, - temp_nbr->item2,temp_nbr->similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } - freeNbrList(nbr_list); - } - - CHECK_FOR_INTERRUPTS(); - } - - pfree(insertstring); - fclose(fp); - - // If we are updating an existing similarity model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - modelname,modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - } - - // With all the data written out, we're going to - // issue a COPY FROM command to bulk load the data - // into the database. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", - modelname,temprecfile); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // Now we add the primary key constraint. It's - // faster to add it after adding the data than - // having it incrementally updated. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (item1, item2)",modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // We'll delete the temporary file here, to not hold onto - // memory longer than necessary. - if (unlink(temprecfile) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Free up the lists of sim_nodes and start again. - for (i = 0; i < numItems; i++) { - freeSimList(itemEvents[i]); - itemEvents[i] = NULL; - } - - // Return the number of events we used. - return numEvents; + char *eventval, char *modelname, int *itemIDs, float *itemAvgs, + float *itemPearsons, int numItems, bool update) { + int i, j, priorID; + int numEvents = 0; + char *querystring, *insertstring, *temprecfile; + sim_node *itemEvents; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + // Information for writing to file. + FILE *fp; + temprecfile = (char*) palloc(256*sizeof(char)); + sprintf(temprecfile,"recathon_temp_%s.dat",modelname); + + // If this is us updating a cell as opposed to building + // a recommender, we need to drop the existing entries. + if (update) { + char *dropstring; + + dropstring = (char*) palloc(256*sizeof(char)); + sprintf(dropstring,"DELETE FROM %s;",modelname); + recathon_queryExecute(dropstring); + pfree(dropstring); + } + + // With the precomputation done, we need to derive the actual item + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); + for (i = 0; i < numItems; i++) + itemEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all item pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,itemkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (simitem != priorID) { + priorID = simitem; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // itemEvents table; we'll do calculations later. + newnode = createSimNode(simuser, simevent); + itemEvents[i] = simInsert(itemEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // We're going to write out the results to file. + if ((fp = fopen(temprecfile,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + insertstring = (char*) palloc(128*sizeof(char)); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first item ALWAYS has a lower value than the second. + for (i = 0; i < numItems; i++) { + float avg_i, pearson_i; + sim_node item_i; + nbr_node temp_nbr; + nbr_node nbr_list = NULL; + + item_i = itemEvents[i]; + if (!item_i) continue; + avg_i = itemAvgs[i]; + pearson_i = itemPearsons[i]; + + for (j = i+1; j < numItems; j++) { + float avg_j, pearson_j; + sim_node item_j; + int item1, item2; + float similarity; + + item_j = itemEvents[j]; + if (!item_j) continue; + avg_j = itemAvgs[j]; + pearson_j = itemPearsons[j]; + + similarity = pearsonSimilarity(item_i, item_j, avg_i, avg_j, pearson_i, pearson_j); + if (similarity == 0.0) continue; + item1 = itemIDs[i]; + item2 = itemIDs[j]; + + // Now we write. + if (NBRHOOD <= 0) { + sprintf(insertstring,"%d;%d;%f\n",item1,item2,similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } else { + nbr_node newnbr = createNbrNode(item1,item2,similarity); + nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); + } + } + + // If we have a limited neighborhood, we write the results here. + if (NBRHOOD > 0) { + for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { + sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, + temp_nbr->item2,temp_nbr->similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } + freeNbrList(nbr_list); + } + + CHECK_FOR_INTERRUPTS(); + } + + pfree(insertstring); + fclose(fp); + + // If we are updating an existing similarity model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + modelname,modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + } + + // With all the data written out, we're going to + // issue a COPY FROM command to bulk load the data + // into the database. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", + modelname,temprecfile); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // Now we add the primary key constraint. It's + // faster to add it after adding the data than + // having it incrementally updated. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (item1, item2)",modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // We'll delete the temporary file here, to not hold onto + // memory longer than necessary. + if (unlink(temprecfile) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Free up the lists of sim_nodes and start again. + for (i = 0; i < numItems; i++) { + freeSimList(itemEvents[i]); + itemEvents[i] = NULL; + } + + // Return the number of events we used. + return numEvents; } /* ---------------------------------------------------------------- @@ -1968,192 +1968,192 @@ updateItemPearModel(char *eventtable, char *userkey, char *itemkey, */ int updateUserCosModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *userIDs, float *userLengths, - int numUsers, bool update) { - int i, j, priorID; - int numEvents = 0; - char *querystring, *insertstring, *temprecfile; - sim_node *userEvents; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - // Information for writing to file. - FILE *fp; - temprecfile = (char*) palloc(256*sizeof(char)); - sprintf(temprecfile,"recathon_temp_%s.dat",modelname); - - // If this is us updating a cell as opposed to building - // a recommender, we need to drop the existing entries. - if (update) { - char *dropstring; - - dropstring = (char*) palloc(256*sizeof(char)); - sprintf(dropstring,"DELETE FROM %s;",modelname); - recathon_queryExecute(dropstring); - pfree(dropstring); - } - - // With the precomputation done, we need to derive the actual user - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); - for (i = 0; i < numUsers; i++) - userEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all user pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new user ID? If so, switch to the next slot. - if (simuser != priorID) { - priorID = simuser; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // userEvents table; we'll do calculations later. - newnode = createSimNode(simitem, simevent); - userEvents[i] = simInsert(userEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // We're going to write out the results to file. - if ((fp = fopen(temprecfile,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - insertstring = (char*) palloc(128*sizeof(char)); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first user ALWAYS has a lower value than the second. - for (i = 0; i < numUsers; i++) { - float length_i; - sim_node user_i; - nbr_node temp_nbr; - nbr_node nbr_list = NULL; - - user_i = userEvents[i]; - if (!user_i) continue; - length_i = userLengths[i]; - - for (j = i+1; j < numUsers; j++) { - float length_j; - sim_node user_j; - int user1, user2; - float similarity; - - user_j = userEvents[j]; - if (!user_j) continue; - length_j = userLengths[j]; - - similarity = cosineSimilarity(user_i, user_j, length_i, length_j); - if (similarity <= 0) continue; - user1 = userIDs[i]; - user2 = userIDs[j]; - - // Now we write. - if (NBRHOOD <= 0) { - sprintf(insertstring,"%d;%d;%f\n",user1,user2,similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } else { - nbr_node newnbr = createNbrNode(user1,user2,similarity); - nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); - } - } - - // If we have a limited neighborhood, we write the results here. - if (NBRHOOD > 0) { - for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { - sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, - temp_nbr->item2,temp_nbr->similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } - freeNbrList(nbr_list); - } - - CHECK_FOR_INTERRUPTS(); - } - - pfree(insertstring); - fclose(fp); - - // If we are updating an existing similarity model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - modelname,modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - } - - // With all the data written out, we're going to - // issue a COPY FROM command to bulk load the data - // into the database. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", - modelname,temprecfile); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // Now we add the primary key constraint. It's - // faster to add it after adding the data than - // having it incrementally updated. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (user1, user2)",modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // We'll delete the temporary file here, to not hold onto - // memory longer than necessary. - if (unlink(temprecfile) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Free up the lists of sim_nodes and start again. - for (i = 0; i < numUsers; i++) { - freeSimList(userEvents[i]); - userEvents[i] = NULL; - } - - // Return the number of events we used. - return numEvents; + char *eventval, char *modelname, int *userIDs, float *userLengths, + int numUsers, bool update) { + int i, j, priorID; + int numEvents = 0; + char *querystring, *insertstring, *temprecfile; + sim_node *userEvents; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + // Information for writing to file. + FILE *fp; + temprecfile = (char*) palloc(256*sizeof(char)); + sprintf(temprecfile,"recathon_temp_%s.dat",modelname); + + // If this is us updating a cell as opposed to building + // a recommender, we need to drop the existing entries. + if (update) { + char *dropstring; + + dropstring = (char*) palloc(256*sizeof(char)); + sprintf(dropstring,"DELETE FROM %s;",modelname); + recathon_queryExecute(dropstring); + pfree(dropstring); + } + + // With the precomputation done, we need to derive the actual user + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); + for (i = 0; i < numUsers; i++) + userEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all user pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new user ID? If so, switch to the next slot. + if (simuser != priorID) { + priorID = simuser; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // userEvents table; we'll do calculations later. + newnode = createSimNode(simitem, simevent); + userEvents[i] = simInsert(userEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // We're going to write out the results to file. + if ((fp = fopen(temprecfile,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + insertstring = (char*) palloc(128*sizeof(char)); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first user ALWAYS has a lower value than the second. + for (i = 0; i < numUsers; i++) { + float length_i; + sim_node user_i; + nbr_node temp_nbr; + nbr_node nbr_list = NULL; + + user_i = userEvents[i]; + if (!user_i) continue; + length_i = userLengths[i]; + + for (j = i+1; j < numUsers; j++) { + float length_j; + sim_node user_j; + int user1, user2; + float similarity; + + user_j = userEvents[j]; + if (!user_j) continue; + length_j = userLengths[j]; + + similarity = cosineSimilarity(user_i, user_j, length_i, length_j); + if (similarity <= 0) continue; + user1 = userIDs[i]; + user2 = userIDs[j]; + + // Now we write. + if (NBRHOOD <= 0) { + sprintf(insertstring,"%d;%d;%f\n",user1,user2,similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } else { + nbr_node newnbr = createNbrNode(user1,user2,similarity); + nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); + } + } + + // If we have a limited neighborhood, we write the results here. + if (NBRHOOD > 0) { + for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { + sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, + temp_nbr->item2,temp_nbr->similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } + freeNbrList(nbr_list); + } + + CHECK_FOR_INTERRUPTS(); + } + + pfree(insertstring); + fclose(fp); + + // If we are updating an existing similarity model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + modelname,modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + } + + // With all the data written out, we're going to + // issue a COPY FROM command to bulk load the data + // into the database. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", + modelname,temprecfile); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // Now we add the primary key constraint. It's + // faster to add it after adding the data than + // having it incrementally updated. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (user1, user2)",modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // We'll delete the temporary file here, to not hold onto + // memory longer than necessary. + if (unlink(temprecfile) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Free up the lists of sim_nodes and start again. + for (i = 0; i < numUsers; i++) { + freeSimList(userEvents[i]); + userEvents[i] = NULL; + } + + // Return the number of events we used. + return numEvents; } /* ---------------------------------------------------------------- @@ -2167,230 +2167,230 @@ updateUserCosModel(char *eventtable, char *userkey, char *itemkey, */ int updateUserPearModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *userIDs, float *userAvgs, - float *userPearsons, int numUsers, bool update) { - int i, j, priorID; - int numEvents = 0; - char *querystring, *insertstring, *temprecfile; - sim_node *userEvents; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - // Information for writing to file. - FILE *fp; - temprecfile = (char*) palloc(256*sizeof(char)); - sprintf(temprecfile,"recathon_temp_%s.dat",modelname); + char *eventval, char *modelname, int *userIDs, float *userAvgs, + float *userPearsons, int numUsers, bool update) { + int i, j, priorID; + int numEvents = 0; + char *querystring, *insertstring, *temprecfile; + sim_node *userEvents; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + // Information for writing to file. + FILE *fp; + temprecfile = (char*) palloc(256*sizeof(char)); + sprintf(temprecfile,"recathon_temp_%s.dat",modelname); + + // If this is us updating a cell as opposed to building + // a recommender, we need to drop the existing entries. + if (update) { + char *dropstring; + + dropstring = (char*) palloc(256*sizeof(char)); + sprintf(dropstring,"DELETE FROM %s;",modelname); + recathon_queryExecute(dropstring); + pfree(dropstring); + } + + // With the precomputation done, we need to derive the actual item + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); + for (i = 0; i < numUsers; i++) + userEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all item pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new user ID? If so, switch to the next slot. + if (simuser != priorID) { + priorID = simuser; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // userEvents table; we'll do calculations later. + newnode = createSimNode(simitem, simevent); + userEvents[i] = simInsert(userEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // We're going to write out the results to file. + if ((fp = fopen(temprecfile,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + insertstring = (char*) palloc(128*sizeof(char)); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first item ALWAYS has a lower value than the second. + for (i = 0; i < numUsers; i++) { + float avg_i, pearson_i; + sim_node user_i; + nbr_node temp_nbr; + nbr_node nbr_list = NULL; + + user_i = userEvents[i]; + if (!user_i) continue; + avg_i = userAvgs[i]; + pearson_i = userPearsons[i]; + + for (j = i+1; j < numUsers; j++) { + float avg_j, pearson_j; + sim_node user_j; + int user1, user2; + float similarity; + + user_j = userEvents[j]; + if (!user_j) continue; + avg_j = userAvgs[j]; + pearson_j = userPearsons[j]; + + similarity = pearsonSimilarity(user_i, user_j, avg_i, avg_j, pearson_i, pearson_j); + if (similarity == 0.0) continue; + user1 = userIDs[i]; + user2 = userIDs[j]; + + // Now we write. + if (NBRHOOD <= 0) { + sprintf(insertstring,"%d;%d;%f\n",user1,user2,similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } else { + nbr_node newnbr = createNbrNode(user1,user2,similarity); + nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); + } + } + + // If we have a limited neighborhood, we write the results here. + if (NBRHOOD > 0) { + for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { + sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, + temp_nbr->item2,temp_nbr->similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } + freeNbrList(nbr_list); + } + + CHECK_FOR_INTERRUPTS(); + } + + pfree(insertstring); + fclose(fp); + + // If we are updating an existing similarity model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + modelname,modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + } + + // With all the data written out, we're going to + // issue a COPY FROM command to bulk load the data + // into the database. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", + modelname,temprecfile); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // Now we add the primary key constraint. It's + // faster to add it after adding the data than + // having it incrementally updated. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (user1, user2)",modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // We'll delete the temporary file here, to not hold onto + // memory longer than necessary. + if (unlink(temprecfile) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Free up the lists of sim_nodes and start again. + for (i = 0; i < numUsers; i++) { + freeSimList(userEvents[i]); + userEvents[i] = NULL; + } + + // Return the number of events we used. + return numEvents; +} - // If this is us updating a cell as opposed to building - // a recommender, we need to drop the existing entries. - if (update) { - char *dropstring; - - dropstring = (char*) palloc(256*sizeof(char)); - sprintf(dropstring,"DELETE FROM %s;",modelname); - recathon_queryExecute(dropstring); - pfree(dropstring); - } - - // With the precomputation done, we need to derive the actual item - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); - for (i = 0; i < numUsers; i++) - userEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all item pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new user ID? If so, switch to the next slot. - if (simuser != priorID) { - priorID = simuser; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // userEvents table; we'll do calculations later. - newnode = createSimNode(simitem, simevent); - userEvents[i] = simInsert(userEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // We're going to write out the results to file. - if ((fp = fopen(temprecfile,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - insertstring = (char*) palloc(128*sizeof(char)); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first item ALWAYS has a lower value than the second. - for (i = 0; i < numUsers; i++) { - float avg_i, pearson_i; - sim_node user_i; - nbr_node temp_nbr; - nbr_node nbr_list = NULL; - - user_i = userEvents[i]; - if (!user_i) continue; - avg_i = userAvgs[i]; - pearson_i = userPearsons[i]; - - for (j = i+1; j < numUsers; j++) { - float avg_j, pearson_j; - sim_node user_j; - int user1, user2; - float similarity; - - user_j = userEvents[j]; - if (!user_j) continue; - avg_j = userAvgs[j]; - pearson_j = userPearsons[j]; - - similarity = pearsonSimilarity(user_i, user_j, avg_i, avg_j, pearson_i, pearson_j); - if (similarity == 0.0) continue; - user1 = userIDs[i]; - user2 = userIDs[j]; - - // Now we write. - if (NBRHOOD <= 0) { - sprintf(insertstring,"%d;%d;%f\n",user1,user2,similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } else { - nbr_node newnbr = createNbrNode(user1,user2,similarity); - nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); - } - } - - // If we have a limited neighborhood, we write the results here. - if (NBRHOOD > 0) { - for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { - sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, - temp_nbr->item2,temp_nbr->similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } - freeNbrList(nbr_list); - } - - CHECK_FOR_INTERRUPTS(); - } - - pfree(insertstring); - fclose(fp); - - // If we are updating an existing similarity model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - modelname,modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - } - - // With all the data written out, we're going to - // issue a COPY FROM command to bulk load the data - // into the database. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", - modelname,temprecfile); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // Now we add the primary key constraint. It's - // faster to add it after adding the data than - // having it incrementally updated. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (user1, user2)",modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // We'll delete the temporary file here, to not hold onto - // memory longer than necessary. - if (unlink(temprecfile) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Free up the lists of sim_nodes and start again. - for (i = 0; i < numUsers; i++) { - freeSimList(userEvents[i]); - userEvents[i] = NULL; - } - - // Return the number of events we used. - return numEvents; -} - -/* ---------------------------------------------------------------- - * createSVDnode - * - * This function creates a new SVD node out of a - * TupleTableSlot. - * ---------------------------------------------------------------- - */ -svd_node createSVDnode(TupleTableSlot *slot, char *userkey, char *itemkey, char *eventval, - int *userIDs, int *itemIDs, int numUsers, int numItems) { - int userid, itemid; - svd_node new_svd; - - // Quiet the compiler. - userid = -1; - itemid = -1; - - new_svd = (svd_node) palloc(sizeof(struct svd_node_t)); - // Default values. - new_svd->userid = -1; - new_svd->itemid = -1; - new_svd->event = -1; - new_svd->residual = 0.0; - - userid = getTupleInt(slot,userkey); - itemid = getTupleInt(slot,itemkey); - new_svd->event = getTupleFloat(slot,eventval); - - // If we convert IDs to indexes in our arrays, it will make - // our lives easier. - new_svd->userid = binarySearch(userIDs,userid,0,numUsers); - new_svd->itemid = binarySearch(itemIDs,itemid,0,numItems); - - return new_svd; -} +/* ---------------------------------------------------------------- + * createSVDnode + * + * This function creates a new SVD node out of a + * TupleTableSlot. + * ---------------------------------------------------------------- + */ +svd_node createSVDnode(TupleTableSlot *slot, char *userkey, char *itemkey, char *eventval, + int *userIDs, int *itemIDs, int numUsers, int numItems) { + int userid, itemid; + svd_node new_svd; + + // Quiet the compiler. + userid = -1; + itemid = -1; + + new_svd = (svd_node) palloc(sizeof(struct svd_node_t)); + // Default values. + new_svd->userid = -1; + new_svd->itemid = -1; + new_svd->event = -1; + new_svd->residual = 0.0; + + userid = getTupleInt(slot,userkey); + itemid = getTupleInt(slot,itemkey); + new_svd->event = getTupleFloat(slot,eventval); + + // If we convert IDs to indexes in our arrays, it will make + // our lives easier. + new_svd->userid = binarySearch(userIDs,userid,0,numUsers); + new_svd->itemid = binarySearch(itemIDs,itemid,0,numItems); + + return new_svd; +} /* ---------------------------------------------------------------- * SVDlists @@ -2401,94 +2401,94 @@ svd_node createSVDnode(TupleTableSlot *slot, char *userkey, char *itemkey, char */ void SVDlists(char *userkey, char *itemkey, char *eventtable, - int **ret_userIDs, int **ret_itemIDs, - int *ret_numUsers, int *ret_numItems) { - int i, numUsers, numItems; - int *userIDs, *itemIDs; - char *querystring; - // Information for other queries. - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - querystring = (char*) palloc(1024*sizeof(char)); - - // First, let's get the list of users. We need to count how many - // we're dealing with. - sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", - userkey,eventtable); - - queryDesc = recathon_queryStart(querystring, &recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) - numUsers = 0; - else - numUsers = getTupleInt(slot,"count"); - recathon_queryEnd(queryDesc, recathoncontext); - userIDs = (int*) palloc(numUsers*sizeof(int)); - - sprintf(querystring,"SELECT DISTINCT %s FROM %s ORDER BY %s;", - userkey,eventtable,userkey); - - queryDesc = recathon_queryStart(querystring, &recathoncontext); - planstate = queryDesc->planstate; - - i = 0; - for (;;) { - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - userIDs[i] = getTupleInt(slot, userkey); - - i++; - if (i >= numUsers) break; - } - - recathon_queryEnd(queryDesc, recathoncontext); - - // Next, the list of items. - sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", - itemkey,eventtable); - - queryDesc = recathon_queryStart(querystring, &recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) - numItems = 0; - else - numItems = getTupleInt(slot,"count"); - recathon_queryEnd(queryDesc, recathoncontext); - itemIDs = (int*) palloc(numItems*sizeof(int)); - - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT DISTINCT %s FROM %s ORDER BY %s;", - itemkey, eventtable, itemkey); - queryDesc = recathon_queryStart(querystring, &recathoncontext); - planstate = queryDesc->planstate; - - i = 0; - for (;;) { - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - itemIDs[i] = getTupleInt(slot, itemkey); - - i++; - if (i >= numItems) break; - } - - recathon_queryEnd(queryDesc, recathoncontext); - pfree(querystring); - - // Now we return the data. - (*ret_userIDs) = userIDs; - (*ret_itemIDs) = itemIDs; - (*ret_numUsers) = numUsers; - (*ret_numItems) = numItems; + int **ret_userIDs, int **ret_itemIDs, + int *ret_numUsers, int *ret_numItems) { + int i, numUsers, numItems; + int *userIDs, *itemIDs; + char *querystring; + // Information for other queries. + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + querystring = (char*) palloc(1024*sizeof(char)); + + // First, let's get the list of users. We need to count how many + // we're dealing with. + sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", + userkey,eventtable); + + queryDesc = recathon_queryStart(querystring, &recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) + numUsers = 0; + else + numUsers = getTupleInt(slot,"count"); + recathon_queryEnd(queryDesc, recathoncontext); + userIDs = (int*) palloc(numUsers*sizeof(int)); + + sprintf(querystring,"SELECT DISTINCT %s FROM %s ORDER BY %s;", + userkey,eventtable,userkey); + + queryDesc = recathon_queryStart(querystring, &recathoncontext); + planstate = queryDesc->planstate; + + i = 0; + for (;;) { + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + userIDs[i] = getTupleInt(slot, userkey); + + i++; + if (i >= numUsers) break; + } + + recathon_queryEnd(queryDesc, recathoncontext); + + // Next, the list of items. + sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", + itemkey,eventtable); + + queryDesc = recathon_queryStart(querystring, &recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) + numItems = 0; + else + numItems = getTupleInt(slot,"count"); + recathon_queryEnd(queryDesc, recathoncontext); + itemIDs = (int*) palloc(numItems*sizeof(int)); + + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT DISTINCT %s FROM %s ORDER BY %s;", + itemkey, eventtable, itemkey); + queryDesc = recathon_queryStart(querystring, &recathoncontext); + planstate = queryDesc->planstate; + + i = 0; + for (;;) { + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + itemIDs[i] = getTupleInt(slot, itemkey); + + i++; + if (i >= numItems) break; + } + + recathon_queryEnd(queryDesc, recathoncontext); + pfree(querystring); + + // Now we return the data. + (*ret_userIDs) = userIDs; + (*ret_itemIDs) = itemIDs; + (*ret_numUsers) = numUsers; + (*ret_numItems) = numItems; } /* ---------------------------------------------------------------- @@ -2501,172 +2501,172 @@ SVDlists(char *userkey, char *itemkey, char *eventtable, */ void SVDaverages(char *userkey, char *itemkey, char *eventtable, char *eventval, - int *userIDs, int *itemIDs, int numUsers, int numItems, - float **ret_itemAvgs, float **ret_userOffsets) { - int i, priorID; - int *userCounts, *itemCounts; - float *userAvgs, *itemAvgs; - float *itemSums; - float *itemSqs; // Squares of sums. Used to calculate variances. - float *itemVars; // Variances. - float globalAvg; - float globalSum = 0.0; - float globalAvgSum = 0.0; - float globalSq = 0.0; - float globalVar; - // Information for other queries. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // Initialize arrays. - itemCounts = (int*) palloc(numItems*sizeof(int)); - itemAvgs = (float*) palloc(numItems*sizeof(float)); - itemSums = (float*) palloc(numItems*sizeof(float)); - itemSqs = (float*) palloc(numItems*sizeof(float)); - itemVars = (float*) palloc(numItems*sizeof(float)); - for (i = 0; i < numItems; i++) { - itemCounts[i] = 0; - itemSums[i] = 0.0; - itemSqs[i] = 0.0; - } - - // We need to issue a query to get event information. - querystring = (char*) palloc(256*sizeof(char)); - sprintf(querystring,"SELECT %s,%s FROM %s ORDER BY %s;", - itemkey,eventval,eventtable,itemkey); - - priorID = -1; - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - i = -1; - - for (;;) { - int itemnum = 0; - float event = 0.0; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - itemnum = getTupleInt(slot,itemkey); - event = getTupleFloat(slot,eventval); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (itemnum != priorID) { - priorID = itemnum; - i++; - } - - itemCounts[i] += 1; - itemSums[i] += event; - itemSqs[i] += (event*event); - } - - recathon_queryEnd(queryDesc,recathoncontext); - - // We have enough data to calculate individual item variances. - for (i = 0; i < numItems; i++) { - float sum, sumsqr; - int n; - - n = itemCounts[i]; - sum = itemSums[i]; - sumsqr = itemSqs[i]; - - if (n <= 0) - itemVars[i] = 0; - else - itemVars[i] = (sumsqr - ((sum*sum)/n))/n; - - // We can also start calculating the global variance in this loop. - // Some notation abuse. - globalSum += sum; - if (n > 0) { - sum = sum/n; - globalAvgSum += sum; - globalSq += (sum*sum); - } - } - - // Now we derive the global variance. - globalVar = (globalSq - ((globalAvgSum*globalAvgSum)/numItems))/numItems; - globalAvg = globalSum/count_rows(eventtable); - - // Finally, we can obtain the baseline averages for each item. - for (i = 0; i < numItems; i++) { - float k; - - if (globalVar == 0) - k = 0; - else - k = itemVars[i] / globalVar; - - if ((k + itemCounts[i]) > 0) - itemAvgs[i] = ((globalAvg*k) + itemSums[i]) / (k + itemCounts[i]); - else - itemAvgs[i] = 0; - } - - // With the averages calculated, we can now calculate the average offset - // for each user. This involves querying the user table again. - userCounts = (int*) palloc(numUsers*sizeof(int)); - for (i = 0; i < numUsers; i++) - userCounts[i] = 0; - userAvgs = (float*) palloc(numUsers*sizeof(float)); - for (i = 0; i < numUsers; i++) - userAvgs[i] = 0.0; - - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r;", - userkey,itemkey,eventval,eventtable); - - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int userindex, itemindex; - int usernum, itemnum; - float event; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - usernum = getTupleInt(slot,userkey); - itemnum = getTupleInt(slot,itemkey); - event = getTupleFloat(slot,eventval); - userindex = binarySearch(userIDs, usernum, 0, numUsers); - itemindex = binarySearch(itemIDs, itemnum, 0, numItems); - - // We need to find the average offset of a user's event from - // the average event. - if (userindex >= 0 && userindex < numUsers) { - userCounts[userindex] += 1; - userAvgs[userindex] += event - itemAvgs[itemindex]; - } - } - - recathon_queryEnd(queryDesc,recathoncontext); - - // Now we just divide by the counts. - for (i = 0; i < numUsers; i++) { - if (userCounts[i] > 0) - userAvgs[i] /= userCounts[i]; - else - userAvgs[i] = 0; - } - - // Free up memory. - pfree(itemCounts); - pfree(itemSums); - pfree(itemSqs); - pfree(itemVars); - pfree(userCounts); - pfree(querystring); - - // With that information calculated, we can finally return. - (*ret_itemAvgs) = itemAvgs; - (*ret_userOffsets) = userAvgs; + int *userIDs, int *itemIDs, int numUsers, int numItems, + float **ret_itemAvgs, float **ret_userOffsets) { + int i, priorID; + int *userCounts, *itemCounts; + float *userAvgs, *itemAvgs; + float *itemSums; + float *itemSqs; // Squares of sums. Used to calculate variances. + float *itemVars; // Variances. + float globalAvg; + float globalSum = 0.0; + float globalAvgSum = 0.0; + float globalSq = 0.0; + float globalVar; + // Information for other queries. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // Initialize arrays. + itemCounts = (int*) palloc(numItems*sizeof(int)); + itemAvgs = (float*) palloc(numItems*sizeof(float)); + itemSums = (float*) palloc(numItems*sizeof(float)); + itemSqs = (float*) palloc(numItems*sizeof(float)); + itemVars = (float*) palloc(numItems*sizeof(float)); + for (i = 0; i < numItems; i++) { + itemCounts[i] = 0; + itemSums[i] = 0.0; + itemSqs[i] = 0.0; + } + + // We need to issue a query to get event information. + querystring = (char*) palloc(256*sizeof(char)); + sprintf(querystring,"SELECT %s,%s FROM %s ORDER BY %s;", + itemkey,eventval,eventtable,itemkey); + + priorID = -1; + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + i = -1; + + for (;;) { + int itemnum = 0; + float event = 0.0; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + itemnum = getTupleInt(slot,itemkey); + event = getTupleFloat(slot,eventval); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (itemnum != priorID) { + priorID = itemnum; + i++; + } + + itemCounts[i] += 1; + itemSums[i] += event; + itemSqs[i] += (event*event); + } + + recathon_queryEnd(queryDesc,recathoncontext); + + // We have enough data to calculate individual item variances. + for (i = 0; i < numItems; i++) { + float sum, sumsqr; + int n; + + n = itemCounts[i]; + sum = itemSums[i]; + sumsqr = itemSqs[i]; + + if (n <= 0) + itemVars[i] = 0; + else + itemVars[i] = (sumsqr - ((sum*sum)/n))/n; + + // We can also start calculating the global variance in this loop. + // Some notation abuse. + globalSum += sum; + if (n > 0) { + sum = sum/n; + globalAvgSum += sum; + globalSq += (sum*sum); + } + } + + // Now we derive the global variance. + globalVar = (globalSq - ((globalAvgSum*globalAvgSum)/numItems))/numItems; + globalAvg = globalSum/count_rows(eventtable); + + // Finally, we can obtain the baseline averages for each item. + for (i = 0; i < numItems; i++) { + float k; + + if (globalVar == 0) + k = 0; + else + k = itemVars[i] / globalVar; + + if ((k + itemCounts[i]) > 0) + itemAvgs[i] = ((globalAvg*k) + itemSums[i]) / (k + itemCounts[i]); + else + itemAvgs[i] = 0; + } + + // With the averages calculated, we can now calculate the average offset + // for each user. This involves querying the user table again. + userCounts = (int*) palloc(numUsers*sizeof(int)); + for (i = 0; i < numUsers; i++) + userCounts[i] = 0; + userAvgs = (float*) palloc(numUsers*sizeof(float)); + for (i = 0; i < numUsers; i++) + userAvgs[i] = 0.0; + + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r;", + userkey,itemkey,eventval,eventtable); + + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int userindex, itemindex; + int usernum, itemnum; + float event; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + usernum = getTupleInt(slot,userkey); + itemnum = getTupleInt(slot,itemkey); + event = getTupleFloat(slot,eventval); + userindex = binarySearch(userIDs, usernum, 0, numUsers); + itemindex = binarySearch(itemIDs, itemnum, 0, numItems); + + // We need to find the average offset of a user's event from + // the average event. + if (userindex >= 0 && userindex < numUsers) { + userCounts[userindex] += 1; + userAvgs[userindex] += event - itemAvgs[itemindex]; + } + } + + recathon_queryEnd(queryDesc,recathoncontext); + + // Now we just divide by the counts. + for (i = 0; i < numUsers; i++) { + if (userCounts[i] > 0) + userAvgs[i] /= userCounts[i]; + else + userAvgs[i] = 0; + } + + // Free up memory. + pfree(itemCounts); + pfree(itemSums); + pfree(itemSqs); + pfree(itemVars); + pfree(userCounts); + pfree(querystring); + + // With that information calculated, we can finally return. + (*ret_itemAvgs) = itemAvgs; + (*ret_userOffsets) = userAvgs; } /* ---------------------------------------------------------------- @@ -2678,15 +2678,15 @@ SVDaverages(char *userkey, char *itemkey, char *eventtable, char *eventval, */ float predictRating(int featurenum, int numFeatures, int userid, int itemid, - float **userFeatures, float **itemFeatures, float residual) { - int i; - float rating; - - rating = residual; - for (i = featurenum; i < numFeatures; i++) - rating += userFeatures[i][userid] * itemFeatures[i][itemid]; - - return rating; + float **userFeatures, float **itemFeatures, float residual) { + int i; + float rating; + + rating = residual; + for (i = featurenum; i < numFeatures; i++) + rating += userFeatures[i][userid] * itemFeatures[i][itemid]; + + return rating; } /* ---------------------------------------------------------------- @@ -2698,245 +2698,245 @@ predictRating(int featurenum, int numFeatures, int userid, int itemid, */ int SVDtrain(char *userkey, char *itemkey, char *eventtable, char *eventval, - char *usermodelname, char *itemmodelname, bool update) { - float **userFeatures, **itemFeatures; - int *userIDs, *itemIDs; - float *itemAvgs, *userOffsets; - int numUsers, numItems; - int i, j, k, numEvents; - int numFeatures = 50; - svd_node *allEvents; - FILE *fp; - char *tempfilename, *insertstring; - // Information for other queries. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // If this is us updating a cell as opposed to building - // a recommender, we need to drop the existing entries. - if (update) { - char *dropstring; - - dropstring = (char*) palloc(256*sizeof(char)); - sprintf(dropstring,"DELETE FROM %s;",usermodelname); - recathon_queryExecute(dropstring); - sprintf(dropstring,"DELETE FROM %s;",itemmodelname); - recathon_queryExecute(dropstring); - pfree(dropstring); - } - - // First, we get our lists of users and items. - SVDlists(userkey,itemkey,eventtable, - &userIDs, &itemIDs, &numUsers, &numItems); - - // Then we get information for baseline averages. - SVDaverages(userkey,itemkey,eventtable,eventval, - userIDs,itemIDs,numUsers,numItems, - &itemAvgs,&userOffsets); - - // Initialize our feature arrays. - userFeatures = (float**) palloc(numFeatures*sizeof(float*)); - for (i = 0; i < numFeatures; i++) { - userFeatures[i] = (float*) palloc(numUsers*sizeof(float)); - for (j = 0; j < numUsers; j++) - userFeatures[i][j] = 0.1; - } - itemFeatures = (float**) palloc(numFeatures*sizeof(float*)); - for (i = 0; i < numFeatures; i++) { - itemFeatures[i] = (float*) palloc(numItems*sizeof(float)); - for (j = 0; j < numItems; j++) - itemFeatures[i][j] = 0.1; - } - - // First we need to count the number of events we'll be - // considering. - querystring = (char*) palloc(1024*sizeof(char)); - numEvents = count_rows(eventtable); - - // Initialize the events array. - allEvents = (svd_node*) palloc(numEvents*sizeof(svd_node)); - - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Let's acquire all of our events and store them. Sorting initially by - // user ID avoids unnecessary binary searches. - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - i = 0; - for (;;) { - svd_node new_svd; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - new_svd = createSVDnode(slot, userkey, itemkey, eventval, userIDs, itemIDs, numUsers, numItems); - - allEvents[i] = new_svd; - - i++; - if (i >= numEvents) break; - } - - recathon_queryEnd(queryDesc,recathoncontext); - - // We now have all of the events, so we can start training our features. - for (j = 0; j < 100; j++) { - for (i = 0; i < numFeatures; i++) { - float learn = 0.001; - float penalty = 0.002; - float *userVal = userFeatures[i]; - float *itemVal = itemFeatures[i]; - - for (k = 0; k < numEvents; k++) { - int userid; - int itemid; - float event, err, residual, temp; - svd_node current_svd; - - current_svd = allEvents[k]; - userid = current_svd->userid; - itemid = current_svd->itemid; - event = current_svd->event; - // Need to reset residuals for each new - // iteration of the trainer. - if (i == 0) - current_svd->residual = 0; - residual = current_svd->residual; - - if (i == 0 && j == 0) { - err = event - (itemAvgs[itemid] + userOffsets[userid]); - } else { - err = event - predictRating(i, numFeatures, userid, itemid, - userFeatures, itemFeatures, residual); - } - temp = userVal[userid]; - userVal[userid] += learn * ((err * itemVal[itemid]) - (penalty * userVal[userid])); - itemVal[itemid] += learn * ((err * temp) - (penalty * itemVal[itemid])); - - // Store residuals. - if (i == 0) - current_svd->residual = userVal[userid] * itemVal[itemid]; - else - current_svd->residual += userVal[userid] * itemVal[itemid]; - } - - CHECK_FOR_INTERRUPTS(); - } - } - - // With the training finished, we need to write out the data to file, - // so we can put it back. First, the user model. - tempfilename = (char*) palloc(256*sizeof(char)); - sprintf(tempfilename,"recathon_temp_%s.dat",usermodelname); - if ((fp = fopen(tempfilename,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - insertstring = (char*) palloc(128*sizeof(char)); - - for (i = 0; i < numFeatures; i++) { - for (j = 0; j < numUsers; j++) { - sprintf(insertstring,"%d;%d;%f\n",userIDs[j],i,userFeatures[i][j]); - fwrite(insertstring,1,strlen(insertstring),fp); - } - } - fclose(fp); - - // If we are updating an existing SVD model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - usermodelname,usermodelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - } - - // We can bulk load the data with COPY FROM. It's faster - // than individual inserts by a good margin. - sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", - usermodelname,tempfilename); - recathon_utilityExecute(querystring); - - // Adding a primary key after the COPY FROM is about 25% faster - // than adding it before. - sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (users, feature);",usermodelname); - recathon_utilityExecute(querystring); - - // Delete the temporary file. - if (unlink(tempfilename) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Now do it again for the item model. - tempfilename = (char*) palloc(256*sizeof(char)); - sprintf(tempfilename,"recathon_temp_%s.dat",itemmodelname); - if ((fp = fopen(tempfilename,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - - for (i = 0; i < numFeatures; i++) { - for (j = 0; j < numItems; j++) { - char insertstring[128]; - sprintf(insertstring,"%d;%d;%f\n",itemIDs[j],i,itemFeatures[i][j]); - fwrite(insertstring,1,strlen(insertstring),fp); - } - } - fclose(fp); - - // If we are updating an existing SVD model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - itemmodelname,itemmodelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - } - - // We can bulk load the data with COPY FROM. It's faster - // than individual inserts by a good margin. - sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", - itemmodelname,tempfilename); - recathon_utilityExecute(querystring); - - // Adding a primary key after the COPY FROM is about 25% faster - // than adding it before. - sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (items, feature);",itemmodelname); - recathon_utilityExecute(querystring); - - // Delete the temporary file. - if (unlink(tempfilename) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Free up memory. - pfree(querystring); - pfree(userIDs); - pfree(itemIDs); - pfree(itemAvgs); - pfree(userOffsets); - pfree(allEvents); - - for (i = 0; i < numFeatures; i++) - pfree(userFeatures[i]); - pfree(userFeatures); - for (i = 0; i < numFeatures; i++) - pfree(itemFeatures[i]); - pfree(itemFeatures); - - // Return the number of events we used. - return numEvents; + char *usermodelname, char *itemmodelname, bool update) { + float **userFeatures, **itemFeatures; + int *userIDs, *itemIDs; + float *itemAvgs, *userOffsets; + int numUsers, numItems; + int i, j, k, numEvents; + int numFeatures = 50; + svd_node *allEvents; + FILE *fp; + char *tempfilename, *insertstring; + // Information for other queries. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // If this is us updating a cell as opposed to building + // a recommender, we need to drop the existing entries. + if (update) { + char *dropstring; + + dropstring = (char*) palloc(256*sizeof(char)); + sprintf(dropstring,"DELETE FROM %s;",usermodelname); + recathon_queryExecute(dropstring); + sprintf(dropstring,"DELETE FROM %s;",itemmodelname); + recathon_queryExecute(dropstring); + pfree(dropstring); + } + + // First, we get our lists of users and items. + SVDlists(userkey,itemkey,eventtable, + &userIDs, &itemIDs, &numUsers, &numItems); + + // Then we get information for baseline averages. + SVDaverages(userkey,itemkey,eventtable,eventval, + userIDs,itemIDs,numUsers,numItems, + &itemAvgs,&userOffsets); + + // Initialize our feature arrays. + userFeatures = (float**) palloc(numFeatures*sizeof(float*)); + for (i = 0; i < numFeatures; i++) { + userFeatures[i] = (float*) palloc(numUsers*sizeof(float)); + for (j = 0; j < numUsers; j++) + userFeatures[i][j] = 0.1; + } + itemFeatures = (float**) palloc(numFeatures*sizeof(float*)); + for (i = 0; i < numFeatures; i++) { + itemFeatures[i] = (float*) palloc(numItems*sizeof(float)); + for (j = 0; j < numItems; j++) + itemFeatures[i][j] = 0.1; + } + + // First we need to count the number of events we'll be + // considering. + querystring = (char*) palloc(1024*sizeof(char)); + numEvents = count_rows(eventtable); + + // Initialize the events array. + allEvents = (svd_node*) palloc(numEvents*sizeof(svd_node)); + + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Let's acquire all of our events and store them. Sorting initially by + // user ID avoids unnecessary binary searches. + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + i = 0; + for (;;) { + svd_node new_svd; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + new_svd = createSVDnode(slot, userkey, itemkey, eventval, userIDs, itemIDs, numUsers, numItems); + + allEvents[i] = new_svd; + + i++; + if (i >= numEvents) break; + } + + recathon_queryEnd(queryDesc,recathoncontext); + + // We now have all of the events, so we can start training our features. + for (j = 0; j < 100; j++) { + for (i = 0; i < numFeatures; i++) { + float learn = 0.001; + float penalty = 0.002; + float *userVal = userFeatures[i]; + float *itemVal = itemFeatures[i]; + + for (k = 0; k < numEvents; k++) { + int userid; + int itemid; + float event, err, residual, temp; + svd_node current_svd; + + current_svd = allEvents[k]; + userid = current_svd->userid; + itemid = current_svd->itemid; + event = current_svd->event; + // Need to reset residuals for each new + // iteration of the trainer. + if (i == 0) + current_svd->residual = 0; + residual = current_svd->residual; + + if (i == 0 && j == 0) { + err = event - (itemAvgs[itemid] + userOffsets[userid]); + } else { + err = event - predictRating(i, numFeatures, userid, itemid, + userFeatures, itemFeatures, residual); + } + temp = userVal[userid]; + userVal[userid] += learn * ((err * itemVal[itemid]) - (penalty * userVal[userid])); + itemVal[itemid] += learn * ((err * temp) - (penalty * itemVal[itemid])); + + // Store residuals. + if (i == 0) + current_svd->residual = userVal[userid] * itemVal[itemid]; + else + current_svd->residual += userVal[userid] * itemVal[itemid]; + } + + CHECK_FOR_INTERRUPTS(); + } + } + + // With the training finished, we need to write out the data to file, + // so we can put it back. First, the user model. + tempfilename = (char*) palloc(256*sizeof(char)); + sprintf(tempfilename,"recathon_temp_%s.dat",usermodelname); + if ((fp = fopen(tempfilename,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + insertstring = (char*) palloc(128*sizeof(char)); + + for (i = 0; i < numFeatures; i++) { + for (j = 0; j < numUsers; j++) { + sprintf(insertstring,"%d;%d;%f\n",userIDs[j],i,userFeatures[i][j]); + fwrite(insertstring,1,strlen(insertstring),fp); + } + } + fclose(fp); + + // If we are updating an existing SVD model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + usermodelname,usermodelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + } + + // We can bulk load the data with COPY FROM. It's faster + // than individual inserts by a good margin. + sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", + usermodelname,tempfilename); + recathon_utilityExecute(querystring); + + // Adding a primary key after the COPY FROM is about 25% faster + // than adding it before. + sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (users, feature);",usermodelname); + recathon_utilityExecute(querystring); + + // Delete the temporary file. + if (unlink(tempfilename) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Now do it again for the item model. + tempfilename = (char*) palloc(256*sizeof(char)); + sprintf(tempfilename,"recathon_temp_%s.dat",itemmodelname); + if ((fp = fopen(tempfilename,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + + for (i = 0; i < numFeatures; i++) { + for (j = 0; j < numItems; j++) { + char insertstring[128]; + sprintf(insertstring,"%d;%d;%f\n",itemIDs[j],i,itemFeatures[i][j]); + fwrite(insertstring,1,strlen(insertstring),fp); + } + } + fclose(fp); + + // If we are updating an existing SVD model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + itemmodelname,itemmodelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + } + + // We can bulk load the data with COPY FROM. It's faster + // than individual inserts by a good margin. + sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", + itemmodelname,tempfilename); + recathon_utilityExecute(querystring); + + // Adding a primary key after the COPY FROM is about 25% faster + // than adding it before. + sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (items, feature);",itemmodelname); + recathon_utilityExecute(querystring); + + // Delete the temporary file. + if (unlink(tempfilename) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Free up memory. + pfree(querystring); + pfree(userIDs); + pfree(itemIDs); + pfree(itemAvgs); + pfree(userOffsets); + pfree(allEvents); + + for (i = 0; i < numFeatures; i++) + pfree(userFeatures[i]); + pfree(userFeatures); + for (i = 0; i < numFeatures; i++) + pfree(itemFeatures[i]); + pfree(itemFeatures); + + // Return the number of events we used. + return numEvents; } /* ---------------------------------------------------------------- @@ -2947,128 +2947,128 @@ SVDtrain(char *userkey, char *itemkey, char *eventtable, char *eventval, */ void generateItemCosModel(RecScanState *recnode) { - int i, j, priorID; - AttributeInfo *attributes; - float **itemmodel; - char *eventtable, *userkey, *itemkey, *eventval; - int numItems; - int *itemIDs; - float *itemLengths; - sim_node *itemEvents; - // Information for other queries. - char *querystring; - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - - attributes = (AttributeInfo*) recnode->attributes; - eventtable = attributes->eventtable; - userkey = attributes->userkey; - itemkey = attributes->itemkey; - eventval = attributes->eventval; - - /* We start by getting vector lengths. */ - itemLengths = vector_lengths(itemkey,eventtable,eventval,&numItems,&itemIDs); - - /* We have the number of items, so we can initialize our model. */ - itemmodel = (float**) palloc(numItems*sizeof(float*)); - for (i = 0; i < numItems; i++) - itemmodel[i] = (float*) palloc0(numItems*sizeof(float)); - - /* Then we can calculate similarity values for our model. We start by - * storing all the ratings. */ - itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); - for (i = 0; i < numItems; i++) - itemEvents[i] = NULL; - - /* With the model created, we need to populate it, which means calculating - * similarity between all item pairs. We need to query the events table - * in order to get the key information. We'll also keep track of the number - * of events used, since we need to store that information. */ - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,itemkey); - - /* Begin extracting data. */ - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - /* Shut the compiler up. */ - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - /* Are we dealing with a new item ID? If so, switch to the next slot. */ - if (simitem != priorID) { - priorID = simitem; - i++; - } - - /* We now have the user, item, and event for this tuple. - * We insert the results as a sim_node into the - * itemEvents table; we'll do calculations later. */ - newnode = createSimNode(simuser, simevent); - itemEvents[i] = simInsert(itemEvents[i], newnode); - } - - /* Query cleanup. */ - recathon_queryEnd(simqueryDesc, simcontext); - - /* Now we do the similarity calculations. Note that we - * don't include duplicate entries, to save time and space. - * The first item ALWAYS has a lower value than the second. */ - for (i = 0; i < numItems; i++) { - float length_i; - sim_node item_i; - - item_i = itemEvents[i]; - if (!item_i) continue; - length_i = itemLengths[i]; - - for (j = i+1; j < numItems; j++) { - float length_j; - sim_node item_j; - float similarity; - - item_j = itemEvents[j]; - if (!item_j) continue; - length_j = itemLengths[j]; - - similarity = cosineSimilarity(item_i, item_j, length_i, length_j); - if (similarity <= 0) continue; - - /* Now we output. Like with the pre-computed model, we'll - * only worry about half the model. This allows us to fill - * in the matrix left-to-right, top-to-bottom. */ - itemmodel[i][j] = similarity; - } - - CHECK_FOR_INTERRUPTS(); - } - - /* Free up the lists of sim_nodes now, since we're done. */ - for (i = 0; i < numItems; i++) { - freeSimList(itemEvents[i]); - itemEvents[i] = NULL; - } - - /* Fill in the appropriate information. */ - recnode->fullTotalItems = numItems; - recnode->fullItemList = itemIDs; - recnode->itemCFmodel = itemmodel; + int i, j, priorID; + AttributeInfo *attributes; + float **itemmodel; + char *eventtable, *userkey, *itemkey, *eventval; + int numItems; + int *itemIDs; + float *itemLengths; + sim_node *itemEvents; + // Information for other queries. + char *querystring; + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + + attributes = (AttributeInfo*) recnode->attributes; + eventtable = attributes->eventtable; + userkey = attributes->userkey; + itemkey = attributes->itemkey; + eventval = attributes->eventval; + + /* We start by getting vector lengths. */ + itemLengths = vector_lengths(itemkey,eventtable,eventval,&numItems,&itemIDs); + + /* We have the number of items, so we can initialize our model. */ + itemmodel = (float**) palloc(numItems*sizeof(float*)); + for (i = 0; i < numItems; i++) + itemmodel[i] = (float*) palloc0(numItems*sizeof(float)); + + /* Then we can calculate similarity values for our model. We start by + * storing all the ratings. */ + itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); + for (i = 0; i < numItems; i++) + itemEvents[i] = NULL; + + /* With the model created, we need to populate it, which means calculating + * similarity between all item pairs. We need to query the events table + * in order to get the key information. We'll also keep track of the number + * of events used, since we need to store that information. */ + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,itemkey); + + /* Begin extracting data. */ + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + /* Shut the compiler up. */ + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + /* Are we dealing with a new item ID? If so, switch to the next slot. */ + if (simitem != priorID) { + priorID = simitem; + i++; + } + + /* We now have the user, item, and event for this tuple. + * We insert the results as a sim_node into the + * itemEvents table; we'll do calculations later. */ + newnode = createSimNode(simuser, simevent); + itemEvents[i] = simInsert(itemEvents[i], newnode); + } + + /* Query cleanup. */ + recathon_queryEnd(simqueryDesc, simcontext); + + /* Now we do the similarity calculations. Note that we + * don't include duplicate entries, to save time and space. + * The first item ALWAYS has a lower value than the second. */ + for (i = 0; i < numItems; i++) { + float length_i; + sim_node item_i; + + item_i = itemEvents[i]; + if (!item_i) continue; + length_i = itemLengths[i]; + + for (j = i+1; j < numItems; j++) { + float length_j; + sim_node item_j; + float similarity; + + item_j = itemEvents[j]; + if (!item_j) continue; + length_j = itemLengths[j]; + + similarity = cosineSimilarity(item_i, item_j, length_i, length_j); + if (similarity <= 0) continue; + + /* Now we output. Like with the pre-computed model, we'll + * only worry about half the model. This allows us to fill + * in the matrix left-to-right, top-to-bottom. */ + itemmodel[i][j] = similarity; + } + + CHECK_FOR_INTERRUPTS(); + } + + /* Free up the lists of sim_nodes now, since we're done. */ + for (i = 0; i < numItems; i++) { + freeSimList(itemEvents[i]); + itemEvents[i] = NULL; + } + + /* Fill in the appropriate information. */ + recnode->fullTotalItems = numItems; + recnode->fullItemList = itemIDs; + recnode->itemCFmodel = itemmodel; } /* ---------------------------------------------------------------- @@ -3079,134 +3079,134 @@ generateItemCosModel(RecScanState *recnode) { */ void generateItemPearModel(RecScanState *recnode) { - int i, j, priorID; - char *querystring; - char *eventtable, *userkey, *itemkey, *eventval; - sim_node *itemEvents; - int numItems; - int *itemIDs; - float *itemAvgs; - float *itemPearsons; - AttributeInfo *attributes; - float **itemmodel; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - - attributes = (AttributeInfo*) recnode->attributes; - eventtable = attributes->eventtable; - userkey = attributes->userkey; - itemkey = attributes->itemkey; - eventval = attributes->eventval; - - // First we need to get relevant Pearson information. - pearson_info(itemkey, eventtable, eventval, &numItems, &itemIDs, &itemAvgs, &itemPearsons); - - /* We have the number of items, so we can initialize our model. */ - itemmodel = (float**) palloc(numItems*sizeof(float*)); - for (i = 0; i < numItems; i++) - itemmodel[i] = (float*) palloc0(numItems*sizeof(float)); - - // With the precomputation done, we need to derive the actual item - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); - for (i = 0; i < numItems; i++) - itemEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all item pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,itemkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (simitem != priorID) { - priorID = simitem; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // itemEvents table; we'll do calculations later. - newnode = createSimNode(simuser, simevent); - itemEvents[i] = simInsert(itemEvents[i], newnode); - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first item ALWAYS has a lower value than the second. - for (i = 0; i < numItems; i++) { - float avg_i, pearson_i; - sim_node item_i; - - item_i = itemEvents[i]; - if (!item_i) continue; - avg_i = itemAvgs[i]; - pearson_i = itemPearsons[i]; - - for (j = i+1; j < numItems; j++) { - float avg_j, pearson_j; - sim_node item_j; - float similarity; - - item_j = itemEvents[j]; - if (!item_j) continue; - avg_j = itemAvgs[j]; - pearson_j = itemPearsons[j]; - - similarity = pearsonSimilarity(item_i, item_j, avg_i, avg_j, pearson_i, pearson_j); - if (similarity == 0.0) continue; - - /* Now we output. Like with the pre-computed model, we'll - * only worry about half the model. This allows us to fill - * in the matrix left-to-right, top-to-bottom. */ - itemmodel[i][j] = similarity; - } - - CHECK_FOR_INTERRUPTS(); - } - - // Free up the lists of sim_nodes and we're done. - for (i = 0; i < numItems; i++) { - freeSimList(itemEvents[i]); - itemEvents[i] = NULL; - } - - // Return the relevant information. - recnode->fullTotalItems = numItems; - recnode->fullItemList = itemIDs; - recnode->itemCFmodel = itemmodel; + int i, j, priorID; + char *querystring; + char *eventtable, *userkey, *itemkey, *eventval; + sim_node *itemEvents; + int numItems; + int *itemIDs; + float *itemAvgs; + float *itemPearsons; + AttributeInfo *attributes; + float **itemmodel; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + + attributes = (AttributeInfo*) recnode->attributes; + eventtable = attributes->eventtable; + userkey = attributes->userkey; + itemkey = attributes->itemkey; + eventval = attributes->eventval; + + // First we need to get relevant Pearson information. + pearson_info(itemkey, eventtable, eventval, &numItems, &itemIDs, &itemAvgs, &itemPearsons); + + /* We have the number of items, so we can initialize our model. */ + itemmodel = (float**) palloc(numItems*sizeof(float*)); + for (i = 0; i < numItems; i++) + itemmodel[i] = (float*) palloc0(numItems*sizeof(float)); + + // With the precomputation done, we need to derive the actual item + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); + for (i = 0; i < numItems; i++) + itemEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all item pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,itemkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (simitem != priorID) { + priorID = simitem; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // itemEvents table; we'll do calculations later. + newnode = createSimNode(simuser, simevent); + itemEvents[i] = simInsert(itemEvents[i], newnode); + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first item ALWAYS has a lower value than the second. + for (i = 0; i < numItems; i++) { + float avg_i, pearson_i; + sim_node item_i; + + item_i = itemEvents[i]; + if (!item_i) continue; + avg_i = itemAvgs[i]; + pearson_i = itemPearsons[i]; + + for (j = i+1; j < numItems; j++) { + float avg_j, pearson_j; + sim_node item_j; + float similarity; + + item_j = itemEvents[j]; + if (!item_j) continue; + avg_j = itemAvgs[j]; + pearson_j = itemPearsons[j]; + + similarity = pearsonSimilarity(item_i, item_j, avg_i, avg_j, pearson_i, pearson_j); + if (similarity == 0.0) continue; + + /* Now we output. Like with the pre-computed model, we'll + * only worry about half the model. This allows us to fill + * in the matrix left-to-right, top-to-bottom. */ + itemmodel[i][j] = similarity; + } + + CHECK_FOR_INTERRUPTS(); + } + + // Free up the lists of sim_nodes and we're done. + for (i = 0; i < numItems; i++) { + freeSimList(itemEvents[i]); + itemEvents[i] = NULL; + } + + // Return the relevant information. + recnode->fullTotalItems = numItems; + recnode->fullItemList = itemIDs; + recnode->itemCFmodel = itemmodel; } /* ---------------------------------------------------------------- @@ -3217,133 +3217,133 @@ generateItemPearModel(RecScanState *recnode) { */ void generateUserCosModel(RecScanState *recnode) { - int i, j, priorID; - int numEvents = 0; - char *querystring; - sim_node *userEvents; - char *eventtable, *userkey, *itemkey, *eventval; - AttributeInfo *attributes; - float **usermodel; - int numUsers; - int *userIDs; - float *userLengths; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - - attributes = (AttributeInfo*) recnode->attributes; - eventtable = attributes->eventtable; - userkey = attributes->userkey; - itemkey = attributes->itemkey; - eventval = attributes->eventval; - - // First we need vector lengths. - userLengths = vector_lengths(userkey, eventtable, eventval, &numUsers, &userIDs); - - /* We have the number of users, so we can initialize our model. */ - usermodel = (float**) palloc(numUsers*sizeof(float*)); - for (i = 0; i < numUsers; i++) - usermodel[i] = (float*) palloc0(numUsers*sizeof(float)); - - // With the precomputation done, we need to derive the actual user - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); - for (i = 0; i < numUsers; i++) - userEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all user pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new user ID? If so, switch to the next slot. - if (simuser != priorID) { - priorID = simuser; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // userEvents table; we'll do calculations later. - newnode = createSimNode(simitem, simevent); - userEvents[i] = simInsert(userEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first user ALWAYS has a lower value than the second. - for (i = 0; i < numUsers; i++) { - float length_i; - sim_node user_i; - - user_i = userEvents[i]; - if (!user_i) continue; - length_i = userLengths[i]; - - for (j = i+1; j < numUsers; j++) { - float length_j; - sim_node user_j; - float similarity; - - user_j = userEvents[j]; - if (!user_j) continue; - length_j = userLengths[j]; - - similarity = cosineSimilarity(user_i, user_j, length_i, length_j); - if (similarity <= 0) continue; - - /* Now we output. Like with the pre-computed model, we'll - * only worry about half the model. This allows us to fill - * in the matrix left-to-right, top-to-bottom. */ - usermodel[i][j] = similarity; - } - - CHECK_FOR_INTERRUPTS(); - } - - // Free up the lists of sim_nodes and we're done. - for (i = 0; i < numUsers; i++) { - freeSimList(userEvents[i]); - userEvents[i] = NULL; - } - - // Return the relevant information. - recnode->totalUsers = numUsers; - recnode->userList = userIDs; - recnode->userCFmodel = usermodel; + int i, j, priorID; + int numEvents = 0; + char *querystring; + sim_node *userEvents; + char *eventtable, *userkey, *itemkey, *eventval; + AttributeInfo *attributes; + float **usermodel; + int numUsers; + int *userIDs; + float *userLengths; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + + attributes = (AttributeInfo*) recnode->attributes; + eventtable = attributes->eventtable; + userkey = attributes->userkey; + itemkey = attributes->itemkey; + eventval = attributes->eventval; + + // First we need vector lengths. + userLengths = vector_lengths(userkey, eventtable, eventval, &numUsers, &userIDs); + + /* We have the number of users, so we can initialize our model. */ + usermodel = (float**) palloc(numUsers*sizeof(float*)); + for (i = 0; i < numUsers; i++) + usermodel[i] = (float*) palloc0(numUsers*sizeof(float)); + + // With the precomputation done, we need to derive the actual user + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); + for (i = 0; i < numUsers; i++) + userEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all user pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new user ID? If so, switch to the next slot. + if (simuser != priorID) { + priorID = simuser; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // userEvents table; we'll do calculations later. + newnode = createSimNode(simitem, simevent); + userEvents[i] = simInsert(userEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first user ALWAYS has a lower value than the second. + for (i = 0; i < numUsers; i++) { + float length_i; + sim_node user_i; + + user_i = userEvents[i]; + if (!user_i) continue; + length_i = userLengths[i]; + + for (j = i+1; j < numUsers; j++) { + float length_j; + sim_node user_j; + float similarity; + + user_j = userEvents[j]; + if (!user_j) continue; + length_j = userLengths[j]; + + similarity = cosineSimilarity(user_i, user_j, length_i, length_j); + if (similarity <= 0) continue; + + /* Now we output. Like with the pre-computed model, we'll + * only worry about half the model. This allows us to fill + * in the matrix left-to-right, top-to-bottom. */ + usermodel[i][j] = similarity; + } + + CHECK_FOR_INTERRUPTS(); + } + + // Free up the lists of sim_nodes and we're done. + for (i = 0; i < numUsers; i++) { + freeSimList(userEvents[i]); + userEvents[i] = NULL; + } + + // Return the relevant information. + recnode->totalUsers = numUsers; + recnode->userList = userIDs; + recnode->userCFmodel = usermodel; } /* ---------------------------------------------------------------- @@ -3354,136 +3354,136 @@ generateUserCosModel(RecScanState *recnode) { */ void generateUserPearModel(RecScanState *recnode) { - int i, j, priorID; - int numEvents = 0; - char *querystring; - sim_node *userEvents; - char *eventtable, *userkey, *itemkey, *eventval; - AttributeInfo *attributes; - float **usermodel; - int numUsers; - int *userIDs; - float *userAvgs; - float *userPearsons; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - - attributes = (AttributeInfo*) recnode->attributes; - eventtable = attributes->eventtable; - userkey = attributes->userkey; - itemkey = attributes->itemkey; - eventval = attributes->eventval; - - // First, we need Pearson info. - pearson_info(userkey, eventtable, eventval, &numUsers, &userIDs, &userAvgs, &userPearsons); - - /* We have the number of users, so we can initialize our model. */ - usermodel = (float**) palloc(numUsers*sizeof(float*)); - for (i = 0; i < numUsers; i++) - usermodel[i] = (float*) palloc0(numUsers*sizeof(float)); - - // With the precomputation done, we need to derive the actual item - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); - for (i = 0; i < numUsers; i++) - userEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all item pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new user ID? If so, switch to the next slot. - if (simuser != priorID) { - priorID = simuser; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // userEvents table; we'll do calculations later. - newnode = createSimNode(simitem, simevent); - userEvents[i] = simInsert(userEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first item ALWAYS has a lower value than the second. - for (i = 0; i < numUsers; i++) { - float avg_i, pearson_i; - sim_node user_i; - - user_i = userEvents[i]; - if (!user_i) continue; - avg_i = userAvgs[i]; - pearson_i = userPearsons[i]; - - for (j = i+1; j < numUsers; j++) { - float avg_j, pearson_j; - sim_node user_j; - float similarity; - - user_j = userEvents[j]; - if (!user_j) continue; - avg_j = userAvgs[j]; - pearson_j = userPearsons[j]; - - similarity = pearsonSimilarity(user_i, user_j, avg_i, avg_j, pearson_i, pearson_j); - if (similarity == 0.0) continue; - - /* Now we output. Like with the pre-computed model, we'll - * only worry about half the model. This allows us to fill - * in the matrix left-to-right, top-to-bottom. */ - usermodel[i][j] = similarity; - } - - CHECK_FOR_INTERRUPTS(); - } - - // Free up the lists of sim_nodes and we're done. - for (i = 0; i < numUsers; i++) { - freeSimList(userEvents[i]); - userEvents[i] = NULL; - } - - // Return the relevant information. - recnode->totalUsers = numUsers; - recnode->userList = userIDs; - recnode->userCFmodel = usermodel; + int i, j, priorID; + int numEvents = 0; + char *querystring; + sim_node *userEvents; + char *eventtable, *userkey, *itemkey, *eventval; + AttributeInfo *attributes; + float **usermodel; + int numUsers; + int *userIDs; + float *userAvgs; + float *userPearsons; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + + attributes = (AttributeInfo*) recnode->attributes; + eventtable = attributes->eventtable; + userkey = attributes->userkey; + itemkey = attributes->itemkey; + eventval = attributes->eventval; + + // First, we need Pearson info. + pearson_info(userkey, eventtable, eventval, &numUsers, &userIDs, &userAvgs, &userPearsons); + + /* We have the number of users, so we can initialize our model. */ + usermodel = (float**) palloc(numUsers*sizeof(float*)); + for (i = 0; i < numUsers; i++) + usermodel[i] = (float*) palloc0(numUsers*sizeof(float)); + + // With the precomputation done, we need to derive the actual item + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); + for (i = 0; i < numUsers; i++) + userEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all item pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new user ID? If so, switch to the next slot. + if (simuser != priorID) { + priorID = simuser; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // userEvents table; we'll do calculations later. + newnode = createSimNode(simitem, simevent); + userEvents[i] = simInsert(userEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first item ALWAYS has a lower value than the second. + for (i = 0; i < numUsers; i++) { + float avg_i, pearson_i; + sim_node user_i; + + user_i = userEvents[i]; + if (!user_i) continue; + avg_i = userAvgs[i]; + pearson_i = userPearsons[i]; + + for (j = i+1; j < numUsers; j++) { + float avg_j, pearson_j; + sim_node user_j; + float similarity; + + user_j = userEvents[j]; + if (!user_j) continue; + avg_j = userAvgs[j]; + pearson_j = userPearsons[j]; + + similarity = pearsonSimilarity(user_i, user_j, avg_i, avg_j, pearson_i, pearson_j); + if (similarity == 0.0) continue; + + /* Now we output. Like with the pre-computed model, we'll + * only worry about half the model. This allows us to fill + * in the matrix left-to-right, top-to-bottom. */ + usermodel[i][j] = similarity; + } + + CHECK_FOR_INTERRUPTS(); + } + + // Free up the lists of sim_nodes and we're done. + for (i = 0; i < numUsers; i++) { + freeSimList(userEvents[i]); + userEvents[i] = NULL; + } + + // Return the relevant information. + recnode->totalUsers = numUsers; + recnode->userList = userIDs; + recnode->userCFmodel = usermodel; } /* ---------------------------------------------------------------- @@ -3494,143 +3494,143 @@ generateUserPearModel(RecScanState *recnode) { */ void generateSVDmodel(RecScanState *recnode) { - float **userFeatures, **itemFeatures; - int *userIDs, *itemIDs; - float *itemAvgs, *userOffsets; - int numUsers, numItems; - int i, j, k, numEvents; - int numFeatures = 50; - svd_node *allEvents; - AttributeInfo *attributes; - char *eventtable, *userkey, *itemkey, *eventval; - // Information for other queries. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - attributes = (AttributeInfo*) recnode->attributes; - eventtable = attributes->eventtable; - userkey = attributes->userkey; - itemkey = attributes->itemkey; - eventval = attributes->eventval; - - // First, we get our lists of users and items. - SVDlists(userkey,itemkey,eventtable, - &userIDs, &itemIDs, &numUsers, &numItems); - - // Then we get information for baseline averages. - SVDaverages(userkey,itemkey,eventtable,eventval, - userIDs,itemIDs,numUsers,numItems, - &itemAvgs,&userOffsets); - - // Initialize our feature arrays. - userFeatures = (float**) palloc(numFeatures*sizeof(float*)); - for (i = 0; i < numFeatures; i++) { - userFeatures[i] = (float*) palloc(numUsers*sizeof(float)); - for (j = 0; j < numUsers; j++) - userFeatures[i][j] = 0.1; - } - itemFeatures = (float**) palloc(numFeatures*sizeof(float*)); - for (i = 0; i < numFeatures; i++) { - itemFeatures[i] = (float*) palloc(numItems*sizeof(float)); - for (j = 0; j < numItems; j++) - itemFeatures[i][j] = 0.1; - } - - // First we need to count the number of events we'll be - // considering. - querystring = (char*) palloc(1024*sizeof(char)); - numEvents = count_rows(eventtable); - - // Initialize the events array. - allEvents = (svd_node*) palloc(numEvents*sizeof(svd_node)); - - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Let's acquire all of our events and store them. Sorting initially by - // user ID avoids unnecessary binary searches. - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - i = 0; - for (;;) { - svd_node new_svd; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - new_svd = createSVDnode(slot, userkey, itemkey, eventval, userIDs, itemIDs, numUsers, numItems); - - allEvents[i] = new_svd; - - i++; - if (i >= numEvents) break; - } - - recathon_queryEnd(queryDesc,recathoncontext); - - // We now have all of the events, so we can start training our features. - for (j = 0; j < 100; j++) { - for (i = 0; i < numFeatures; i++) { - float learn = 0.001; - float penalty = 0.002; - float *userVal = userFeatures[i]; - float *itemVal = itemFeatures[i]; - - for (k = 0; k < numEvents; k++) { - int userid; - int itemid; - float event, err, residual, temp; - svd_node current_svd; - - current_svd = allEvents[k]; - userid = current_svd->userid; - itemid = current_svd->itemid; - event = current_svd->event; - // Need to reset residuals for each new - // iteration of the trainer. - if (i == 0) - current_svd->residual = 0; - residual = current_svd->residual; - - if (i == 0 && j == 0) { - err = event - (itemAvgs[itemid] + userOffsets[userid]); - } else { - err = event - predictRating(i, numFeatures, userid, itemid, - userFeatures, itemFeatures, residual); - } - temp = userVal[userid]; - userVal[userid] += learn * ((err * itemVal[itemid]) - (penalty * userVal[userid])); - itemVal[itemid] += learn * ((err * temp) - (penalty * itemVal[itemid])); - - // Store residuals. - if (i == 0) - current_svd->residual = userVal[userid] * itemVal[itemid]; - else - current_svd->residual += userVal[userid] * itemVal[itemid]; - } - - CHECK_FOR_INTERRUPTS(); - } - } - - // Free up memory. - pfree(querystring); - pfree(itemAvgs); - pfree(userOffsets); - pfree(allEvents); - - // Return the relevant information. - recnode->numFeatures = numFeatures; - recnode->totalUsers = numUsers; - recnode->fullTotalItems = numItems; - recnode->userList = userIDs; - recnode->fullItemList = itemIDs; - recnode->SVDusermodel = userFeatures; - recnode->SVDitemmodel = itemFeatures; + float **userFeatures, **itemFeatures; + int *userIDs, *itemIDs; + float *itemAvgs, *userOffsets; + int numUsers, numItems; + int i, j, k, numEvents; + int numFeatures = 50; + svd_node *allEvents; + AttributeInfo *attributes; + char *eventtable, *userkey, *itemkey, *eventval; + // Information for other queries. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + attributes = (AttributeInfo*) recnode->attributes; + eventtable = attributes->eventtable; + userkey = attributes->userkey; + itemkey = attributes->itemkey; + eventval = attributes->eventval; + + // First, we get our lists of users and items. + SVDlists(userkey,itemkey,eventtable, + &userIDs, &itemIDs, &numUsers, &numItems); + + // Then we get information for baseline averages. + SVDaverages(userkey,itemkey,eventtable,eventval, + userIDs,itemIDs,numUsers,numItems, + &itemAvgs,&userOffsets); + + // Initialize our feature arrays. + userFeatures = (float**) palloc(numFeatures*sizeof(float*)); + for (i = 0; i < numFeatures; i++) { + userFeatures[i] = (float*) palloc(numUsers*sizeof(float)); + for (j = 0; j < numUsers; j++) + userFeatures[i][j] = 0.1; + } + itemFeatures = (float**) palloc(numFeatures*sizeof(float*)); + for (i = 0; i < numFeatures; i++) { + itemFeatures[i] = (float*) palloc(numItems*sizeof(float)); + for (j = 0; j < numItems; j++) + itemFeatures[i][j] = 0.1; + } + + // First we need to count the number of events we'll be + // considering. + querystring = (char*) palloc(1024*sizeof(char)); + numEvents = count_rows(eventtable); + + // Initialize the events array. + allEvents = (svd_node*) palloc(numEvents*sizeof(svd_node)); + + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Let's acquire all of our events and store them. Sorting initially by + // user ID avoids unnecessary binary searches. + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + i = 0; + for (;;) { + svd_node new_svd; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + new_svd = createSVDnode(slot, userkey, itemkey, eventval, userIDs, itemIDs, numUsers, numItems); + + allEvents[i] = new_svd; + + i++; + if (i >= numEvents) break; + } + + recathon_queryEnd(queryDesc,recathoncontext); + + // We now have all of the events, so we can start training our features. + for (j = 0; j < 100; j++) { + for (i = 0; i < numFeatures; i++) { + float learn = 0.001; + float penalty = 0.002; + float *userVal = userFeatures[i]; + float *itemVal = itemFeatures[i]; + + for (k = 0; k < numEvents; k++) { + int userid; + int itemid; + float event, err, residual, temp; + svd_node current_svd; + + current_svd = allEvents[k]; + userid = current_svd->userid; + itemid = current_svd->itemid; + event = current_svd->event; + // Need to reset residuals for each new + // iteration of the trainer. + if (i == 0) + current_svd->residual = 0; + residual = current_svd->residual; + + if (i == 0 && j == 0) { + err = event - (itemAvgs[itemid] + userOffsets[userid]); + } else { + err = event - predictRating(i, numFeatures, userid, itemid, + userFeatures, itemFeatures, residual); + } + temp = userVal[userid]; + userVal[userid] += learn * ((err * itemVal[itemid]) - (penalty * userVal[userid])); + itemVal[itemid] += learn * ((err * temp) - (penalty * itemVal[itemid])); + + // Store residuals. + if (i == 0) + current_svd->residual = userVal[userid] * itemVal[itemid]; + else + current_svd->residual += userVal[userid] * itemVal[itemid]; + } + + CHECK_FOR_INTERRUPTS(); + } + } + + // Free up memory. + pfree(querystring); + pfree(itemAvgs); + pfree(userOffsets); + pfree(allEvents); + + // Return the relevant information. + recnode->numFeatures = numFeatures; + recnode->totalUsers = numUsers; + recnode->fullTotalItems = numItems; + recnode->userList = userIDs; + recnode->fullItemList = itemIDs; + recnode->SVDusermodel = userFeatures; + recnode->SVDitemmodel = itemFeatures; } /* ---------------------------------------------------------------- @@ -3644,45 +3644,45 @@ generateSVDmodel(RecScanState *recnode) { float itemCFgenerate(RecScanState *recnode, int itemid, int itemindex) { - int i; - float recScore; - GenRating *currentItem; - - // First, we grab the GenRating for this item ID. - currentItem = hashFind(recnode->pendingTable, itemid); - // In case there's some error. - if (!currentItem) - return -1; - - // We're going to look through the similarity matrix for the - // numbers that correspond to this item, and find which of those - // also correspond to items this user rated. We will use that - // information to obtain the estimated rating. - - for (i = itemindex+1; i < recnode->fullTotalItems; i++) { - int itemID; - float similarity; - GenRating *ratedItem; - - itemID = recnode->fullItemList[i]; - similarity = recnode->itemCFmodel[itemindex][i]; - - // Find the array slot this item ID corresponds to. - // If -1 is returned, then the item ID corresponds to - // another item we haven't rated, so we don't care. - ratedItem = hashFind(recnode->ratedTable,itemID); - if (ratedItem) { - currentItem->score += similarity*ratedItem->score; - if (similarity < 0) - similarity *= -1; - currentItem->totalSim += similarity; - } - } - - if (currentItem->totalSim == 0) return 0; - - recScore = currentItem->score / currentItem->totalSim; - return recScore; + int i; + float recScore; + GenRating *currentItem; + + // First, we grab the GenRating for this item ID. + currentItem = hashFind(recnode->pendingTable, itemid); + // In case there's some error. + if (!currentItem) + return -1; + + // We're going to look through the similarity matrix for the + // numbers that correspond to this item, and find which of those + // also correspond to items this user rated. We will use that + // information to obtain the estimated rating. + + for (i = itemindex+1; i < recnode->fullTotalItems; i++) { + int itemID; + float similarity; + GenRating *ratedItem; + + itemID = recnode->fullItemList[i]; + similarity = recnode->itemCFmodel[itemindex][i]; + + // Find the array slot this item ID corresponds to. + // If -1 is returned, then the item ID corresponds to + // another item we haven't rated, so we don't care. + ratedItem = hashFind(recnode->ratedTable,itemID); + if (ratedItem) { + currentItem->score += similarity*ratedItem->score; + if (similarity < 0) + similarity *= -1; + currentItem->totalSim += similarity; + } + } + + if (currentItem->totalSim == 0) return 0; + + recScore = currentItem->score / currentItem->totalSim; + return recScore; } /* ---------------------------------------------------------------- @@ -3696,61 +3696,61 @@ itemCFgenerate(RecScanState *recnode, int itemid, int itemindex) float userCFgenerate(RecScanState *recnode, int itemid, int itemindex) { - float event, totalSim, average; - AttributeInfo *attributes; - // Query objects; - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *qslot; - MemoryContext recathoncontext; - - attributes = (AttributeInfo*) recnode->attributes; - - event = 0.0; - totalSim = 0.0; - average = recnode->average; - - /* We need to query the events table, so that we can - * find all events for this item and match them up - * with what we have in the similarity matrix. We note - * that it's necessarily true that the user has not - * rated these items. */ - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"select * from %s where %s = %d;", - attributes->eventtable,attributes->itemkey,itemid); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int currentUserID; - float currentRating, similarity; - GenRating *currentUser; - - qslot = ExecProcNode(planstate); - if (TupIsNull(qslot)) break; - - currentUserID = getTupleInt(qslot,attributes->userkey); - currentRating = getTupleFloat(qslot,attributes->eventval); - - currentUser = hashFind(recnode->simTable,currentUserID); - if (!currentUser) continue; - similarity = currentUser->totalSim; - - event += (currentRating - average) * similarity; - // Poor man's absolute value of the similarity. - if (similarity < 0) - similarity *= -1; - totalSim += similarity; - } - recathon_queryEnd(queryDesc,recathoncontext); - - if (totalSim == 0.0) return 0.0; - - event /= totalSim; - event += average; - - return event; + float event, totalSim, average; + AttributeInfo *attributes; + // Query objects; + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *qslot; + MemoryContext recathoncontext; + + attributes = (AttributeInfo*) recnode->attributes; + + event = 0.0; + totalSim = 0.0; + average = recnode->average; + + /* We need to query the events table, so that we can + * find all events for this item and match them up + * with what we have in the similarity matrix. We note + * that it's necessarily true that the user has not + * rated these items. */ + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"select * from %s where %s = %d;", + attributes->eventtable,attributes->itemkey,itemid); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int currentUserID; + float currentRating, similarity; + GenRating *currentUser; + + qslot = ExecProcNode(planstate); + if (TupIsNull(qslot)) break; + + currentUserID = getTupleInt(qslot,attributes->userkey); + currentRating = getTupleFloat(qslot,attributes->eventval); + + currentUser = hashFind(recnode->simTable,currentUserID); + if (!currentUser) continue; + similarity = currentUser->totalSim; + + event += (currentRating - average) * similarity; + // Poor man's absolute value of the similarity. + if (similarity < 0) + similarity *= -1; + totalSim += similarity; + } + recathon_queryEnd(queryDesc,recathoncontext); + + if (totalSim == 0.0) return 0.0; + + event /= totalSim; + event += average; + + return event; } /* ---------------------------------------------------------------- @@ -3764,18 +3764,18 @@ userCFgenerate(RecScanState *recnode, int itemid, int itemindex) float SVDgenerate(RecScanState *recnode, int itemid, int itemindex) { - int i; - float **userFeatures, **itemFeatures; - float recscore = 0.0; - - userFeatures = recnode->SVDusermodel; - itemFeatures = recnode->SVDitemmodel; - - // At this point, our work is easy. - for (i = 0; i < recnode->numFeatures; i++) - recscore += userFeatures[i][recnode->userindex] * itemFeatures[i][itemindex]; - - return recscore; + int i; + float **userFeatures, **itemFeatures; + float recscore = 0.0; + + userFeatures = recnode->SVDusermodel; + itemFeatures = recnode->SVDitemmodel; + + // At this point, our work is easy. + for (i = 0; i < recnode->numFeatures; i++) + recscore += userFeatures[i][recnode->userindex] * itemFeatures[i][itemindex]; + + return recscore; } /* ---------------------------------------------------------------- @@ -3788,47 +3788,47 @@ SVDgenerate(RecScanState *recnode, int itemid, int itemindex) void applyItemSimGenerate(RecScanState *recnode) { - int i, j; - GenHash *ratedTable; - - ratedTable = recnode->ratedTable; - - // For every item we've rated, we need to obtain its similarity - // scores and apply them to the appropriate items. This is - // necessary because we're only storing half of the similarity - // matrix. - for (i = 0; i < ratedTable->hash; i++) { - GenRating *currentItem; - - for (currentItem = ratedTable->table[i]; currentItem; - currentItem = currentItem->next) { - int itemindex = currentItem->index; - - for (j = itemindex+1; j < recnode->fullTotalItems; j++) { - int itemID; - float similarity; - GenRating *pendingItem; - - itemID = recnode->fullItemList[j]; - similarity = recnode->itemCFmodel[itemindex][j]; - - // If the similarity is 0, there's no point. - if (similarity == 0.0) - continue; - - // Find the array slot this item ID corresponds to. - // If -1 is returned, then the item ID corresponds to - // another item we've rated, so we don't care. - pendingItem = hashFind(recnode->pendingTable,itemID); - if (pendingItem) { - pendingItem->score += similarity*currentItem->score; - if (similarity < 0) - similarity *= -1; - pendingItem->totalSim += similarity; - } - } - } - } + int i, j; + GenHash *ratedTable; + + ratedTable = recnode->ratedTable; + + // For every item we've rated, we need to obtain its similarity + // scores and apply them to the appropriate items. This is + // necessary because we're only storing half of the similarity + // matrix. + for (i = 0; i < ratedTable->hash; i++) { + GenRating *currentItem; + + for (currentItem = ratedTable->table[i]; currentItem; + currentItem = currentItem->next) { + int itemindex = currentItem->index; + + for (j = itemindex+1; j < recnode->fullTotalItems; j++) { + int itemID; + float similarity; + GenRating *pendingItem; + + itemID = recnode->fullItemList[j]; + similarity = recnode->itemCFmodel[itemindex][j]; + + // If the similarity is 0, there's no point. + if (similarity == 0.0) + continue; + + // Find the array slot this item ID corresponds to. + // If -1 is returned, then the item ID corresponds to + // another item we've rated, so we don't care. + pendingItem = hashFind(recnode->pendingTable,itemID); + if (pendingItem) { + pendingItem->score += similarity*currentItem->score; + if (similarity < 0) + similarity *= -1; + pendingItem->totalSim += similarity; + } + } + } + } } /* ---------------------------------------------------------------- @@ -3841,281 +3841,281 @@ applyItemSimGenerate(RecScanState *recnode) */ bool prepUserForRating(RecScanState *recstate, int userID) { - int i, userindex; - // Query objects. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *hslot; - MemoryContext recathoncontext; - - AttributeInfo *attributes = (AttributeInfo*) recstate->attributes; - attributes->userID = userID; - - /* First off, we need to delete any existing structures. */ - if (recstate->ratedTable) { - freeHash(recstate->ratedTable); - recstate->ratedTable = NULL; - } - if (recstate->pendingTable) { - freeHash(recstate->pendingTable); - recstate->pendingTable = NULL; - } - if (recstate->simTable) { - freeHash(recstate->simTable); - recstate->simTable = NULL; - } - if (recstate->userFeatures) { - pfree(recstate->userFeatures); - recstate->userFeatures = NULL; - } - - /* INSERT FORMER LIST CODE HERE */ - querystring = (char*) palloc(1024*sizeof(char)); - - switch ((recMethod) attributes->method) { - /* If this is an item-based CF recommender, we can pre-obtain - * the ratings of this user, and add in their contributions to - * the scores of all the other items. */ - case itemCosCF: - case itemPearCF: - /* The rated list is all of the items this user has - * rated already. We store the ratings now and we'll - * use them during calculation. */ - sprintf(querystring,"select count(*) from %s where %s = %d;", - attributes->eventtable,attributes->userkey,userID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - hslot = ExecProcNode(planstate); - recstate->totalRatings = getTupleInt(hslot,"count"); - recathon_queryEnd(queryDesc,recathoncontext); - - /* It's possible that someone has rated no items. */ - if (recstate->totalRatings <= 0) { - elog(WARNING, "user %d has rated no items, no predictions can be made", - userID); - return false; - } - - recstate->ratedTable = hashCreate(recstate->totalRatings); - - /* Now to acquire the actual ratings. */ - sprintf(querystring,"select * from %s where %s = %d order by %s;", - attributes->eventtable,attributes->userkey, - userID,attributes->itemkey); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - i = 0; - for (;;) { - int currentItem; - float currentRating; - GenRating *newItem; - - hslot = ExecProcNode(planstate); - if (TupIsNull(hslot)) break; - - currentItem = getTupleInt(hslot,attributes->itemkey); - currentRating = getTupleFloat(hslot,attributes->eventval); - - newItem = (GenRating*) palloc(sizeof(GenRating)); - newItem->ID = currentItem; - newItem->index = binarySearch(recstate->fullItemList,currentItem,0,recstate->fullTotalItems); - newItem->score = currentRating; - newItem->next = NULL; - hashAdd(recstate->ratedTable, newItem); - - i++; - if (i >= recstate->totalRatings) break; - } - recathon_queryEnd(queryDesc,recathoncontext); - - /* Quick error protection. Again, I don't know how this could - * possibly happen, but better safe than sorry. */ - recstate->totalRatings = i; - if (recstate->totalRatings <= 0) { - elog(WARNING, "user %d has rated no items, no predictions can be made", - userID); - return false; - } - - /* The pending list is all of the items we have yet to - * calculate ratings for. We need to maintain partial - * scores and similarity sums for each one. In this version - * of the code, note that we rate all items. */ - recstate->pendingTable = hashCreate(recstate->fullTotalItems); - for (i = 0; i < recstate->fullTotalItems; i++) { - GenRating *newItem; - - newItem = (GenRating*) palloc(sizeof(GenRating)); - newItem->ID = recstate->fullItemList[i]; - /* The pending list doesn't need indexes. */ - newItem->index = -1; - newItem->score = 0.0; - newItem->totalSim = 0.0; - newItem->next = NULL; - hashAdd(recstate->pendingTable, newItem); - } - - /* With another function, we apply the ratings and similarities - * from the rated items to the unrated ones. It's good to get - * this done early, as this will allow the operator to be - * non-blocking, which is important. */ - if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) - applyItemSimGenerate(recstate); - else - applyItemSim(recstate, attributes->recModelName); - break; - case userCosCF: - case userPearCF: - userindex = binarySearch(recstate->userList, userID, 0, recstate->totalUsers); - - /* The first thing we'll do is obtain the average rating. */ - sprintf(querystring,"select avg(%s) as average from %s where %s = %d;", - attributes->eventval,attributes->eventtable, - attributes->userkey,userID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - hslot = ExecProcNode(planstate); - recstate->average = getTupleFloat(hslot,"average"); - recathon_queryEnd(queryDesc,recathoncontext); - - /* Next, we need to store this user's similarity model - * in a hash table for easier access. We base the table on - * the number of items we have to rate - a close enough - * approximation that we won't have much trouble. */ - recstate->simTable = hashCreate(recstate->fullTotalItems); - - /* We need to find the entire similarity table for this - * user, which will be in two parts. */ - if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) { - for (i = 0; i < userindex; i++) { - int currentUser; - float currentSim; - GenRating *newUser; - - currentUser = recstate->userList[i]; - currentSim = recstate->userCFmodel[i][userindex]; - - newUser = (GenRating*) palloc(sizeof(GenRating)); - newUser->ID = currentUser; - newUser->index = i; - newUser->totalSim = currentSim; - newUser->next = NULL; - hashAdd(recstate->simTable, newUser); - } - - for (i = userindex+1; i < recstate->totalUsers; i++) { - int currentUser; - float currentSim; - GenRating *newUser; - - currentUser = recstate->userList[i]; - currentSim = recstate->userCFmodel[userindex][i]; - - newUser = (GenRating*) palloc(sizeof(GenRating)); - newUser->ID = currentUser; - newUser->index = i; - newUser->totalSim = currentSim; - newUser->next = NULL; - hashAdd(recstate->simTable, newUser); - } - } else { - sprintf(querystring,"select * from %s where user1 < %d and user2 = %d;", - attributes->recModelName,attributes->userID, - attributes->userID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int currentUser; - float currentSim; - GenRating *newUser; - - hslot = ExecProcNode(planstate); - if (TupIsNull(hslot)) break; - - currentUser = getTupleInt(hslot,"user1"); - currentSim = getTupleFloat(hslot,"similarity"); - - newUser = (GenRating*) palloc(sizeof(GenRating)); - newUser->ID = currentUser; - /* Pre-generated recommendation doesn't need - * indexes. */ - newUser->index = -1; - newUser->totalSim = currentSim; - newUser->next = NULL; - hashAdd(recstate->simTable, newUser); - } - recathon_queryEnd(queryDesc,recathoncontext); - - /* Here's the second. */ - sprintf(querystring,"select * from %s where user1 = %d;", - attributes->recModelName,attributes->userID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int currentUser; - float currentSim; - GenRating *newUser; - - hslot = ExecProcNode(planstate); - if (TupIsNull(hslot)) break; - - currentUser = getTupleInt(hslot,"user2"); - currentSim = getTupleFloat(hslot,"similarity"); - - newUser = (GenRating*) palloc(sizeof(GenRating)); - newUser->ID = currentUser; - /* Pre-generated recommendation doesn't need - * indexes. */ - newUser->index = -1; - newUser->totalSim = currentSim; - newUser->next = NULL; - hashAdd(recstate->simTable, newUser); - } - recathon_queryEnd(queryDesc,recathoncontext); - } - - break; - /* If this is a SVD recommender, we can pre-obtain the user features, - * which stay fixed, and cut the I/O time in half. Of course, if this - * is generated on-the-fly, this is done already. */ - case SVD: - if (attributes->opType != OP_GENERATE && attributes->opType != OP_GENERATEJOIN) { - recstate->userFeatures = (float*) palloc(50*sizeof(float)); - for (i = 0; i < 50; i++) - recstate->userFeatures[i] = 0; - sprintf(querystring,"select * from %s where users = %d;", - attributes->recModelName,userID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int feature; - float featValue; - - hslot = ExecProcNode(planstate); - if (TupIsNull(hslot)) break; - - feature = getTupleInt(hslot,"feature"); - featValue = getTupleFloat(hslot,"value"); - - recstate->userFeatures[feature] = featValue; - } - - recathon_queryEnd(queryDesc,recathoncontext); - } - break; - default: - elog(ERROR, "invalid recommendation method in prepUserForRating()"); - } - - /* If we've gotten to this point, this is a valid user, so return true. */ - pfree(querystring); - return true; + int i, userindex; + // Query objects. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *hslot; + MemoryContext recathoncontext; + + AttributeInfo *attributes = (AttributeInfo*) recstate->attributes; + attributes->userID = userID; + + /* First off, we need to delete any existing structures. */ + if (recstate->ratedTable) { + freeHash(recstate->ratedTable); + recstate->ratedTable = NULL; + } + if (recstate->pendingTable) { + freeHash(recstate->pendingTable); + recstate->pendingTable = NULL; + } + if (recstate->simTable) { + freeHash(recstate->simTable); + recstate->simTable = NULL; + } + if (recstate->userFeatures) { + pfree(recstate->userFeatures); + recstate->userFeatures = NULL; + } + + /* INSERT FORMER LIST CODE HERE */ + querystring = (char*) palloc(1024*sizeof(char)); + + switch ((recMethod) attributes->method) { + /* If this is an item-based CF recommender, we can pre-obtain + * the ratings of this user, and add in their contributions to + * the scores of all the other items. */ + case itemCosCF: + case itemPearCF: + /* The rated list is all of the items this user has + * rated already. We store the ratings now and we'll + * use them during calculation. */ + sprintf(querystring,"select count(*) from %s where %s = %d;", + attributes->eventtable,attributes->userkey,userID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + hslot = ExecProcNode(planstate); + recstate->totalRatings = getTupleInt(hslot,"count"); + recathon_queryEnd(queryDesc,recathoncontext); + + /* It's possible that someone has rated no items. */ + if (recstate->totalRatings <= 0) { + elog(WARNING, "user %d has rated no items, no predictions can be made", + userID); + return false; + } + + recstate->ratedTable = hashCreate(recstate->totalRatings); + + /* Now to acquire the actual ratings. */ + sprintf(querystring,"select * from %s where %s = %d order by %s;", + attributes->eventtable,attributes->userkey, + userID,attributes->itemkey); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + i = 0; + for (;;) { + int currentItem; + float currentRating; + GenRating *newItem; + + hslot = ExecProcNode(planstate); + if (TupIsNull(hslot)) break; + + currentItem = getTupleInt(hslot,attributes->itemkey); + currentRating = getTupleFloat(hslot,attributes->eventval); + + newItem = (GenRating*) palloc(sizeof(GenRating)); + newItem->ID = currentItem; + newItem->index = binarySearch(recstate->fullItemList,currentItem,0,recstate->fullTotalItems); + newItem->score = currentRating; + newItem->next = NULL; + hashAdd(recstate->ratedTable, newItem); + + i++; + if (i >= recstate->totalRatings) break; + } + recathon_queryEnd(queryDesc,recathoncontext); + + /* Quick error protection. Again, I don't know how this could + * possibly happen, but better safe than sorry. */ + recstate->totalRatings = i; + if (recstate->totalRatings <= 0) { + elog(WARNING, "user %d has rated no items, no predictions can be made", + userID); + return false; + } + + /* The pending list is all of the items we have yet to + * calculate ratings for. We need to maintain partial + * scores and similarity sums for each one. In this version + * of the code, note that we rate all items. */ + recstate->pendingTable = hashCreate(recstate->fullTotalItems); + for (i = 0; i < recstate->fullTotalItems; i++) { + GenRating *newItem; + + newItem = (GenRating*) palloc(sizeof(GenRating)); + newItem->ID = recstate->fullItemList[i]; + /* The pending list doesn't need indexes. */ + newItem->index = -1; + newItem->score = 0.0; + newItem->totalSim = 0.0; + newItem->next = NULL; + hashAdd(recstate->pendingTable, newItem); + } + + /* With another function, we apply the ratings and similarities + * from the rated items to the unrated ones. It's good to get + * this done early, as this will allow the operator to be + * non-blocking, which is important. */ + if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) + applyItemSimGenerate(recstate); + else + applyItemSim(recstate, attributes->recModelName); + break; + case userCosCF: + case userPearCF: + userindex = binarySearch(recstate->userList, userID, 0, recstate->totalUsers); + + /* The first thing we'll do is obtain the average rating. */ + sprintf(querystring,"select avg(%s) as average from %s where %s = %d;", + attributes->eventval,attributes->eventtable, + attributes->userkey,userID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + hslot = ExecProcNode(planstate); + recstate->average = getTupleFloat(hslot,"average"); + recathon_queryEnd(queryDesc,recathoncontext); + + /* Next, we need to store this user's similarity model + * in a hash table for easier access. We base the table on + * the number of items we have to rate - a close enough + * approximation that we won't have much trouble. */ + recstate->simTable = hashCreate(recstate->fullTotalItems); + + /* We need to find the entire similarity table for this + * user, which will be in two parts. */ + if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) { + for (i = 0; i < userindex; i++) { + int currentUser; + float currentSim; + GenRating *newUser; + + currentUser = recstate->userList[i]; + currentSim = recstate->userCFmodel[i][userindex]; + + newUser = (GenRating*) palloc(sizeof(GenRating)); + newUser->ID = currentUser; + newUser->index = i; + newUser->totalSim = currentSim; + newUser->next = NULL; + hashAdd(recstate->simTable, newUser); + } + + for (i = userindex+1; i < recstate->totalUsers; i++) { + int currentUser; + float currentSim; + GenRating *newUser; + + currentUser = recstate->userList[i]; + currentSim = recstate->userCFmodel[userindex][i]; + + newUser = (GenRating*) palloc(sizeof(GenRating)); + newUser->ID = currentUser; + newUser->index = i; + newUser->totalSim = currentSim; + newUser->next = NULL; + hashAdd(recstate->simTable, newUser); + } + } else { + sprintf(querystring,"select * from %s where user1 < %d and user2 = %d;", + attributes->recModelName,attributes->userID, + attributes->userID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int currentUser; + float currentSim; + GenRating *newUser; + + hslot = ExecProcNode(planstate); + if (TupIsNull(hslot)) break; + + currentUser = getTupleInt(hslot,"user1"); + currentSim = getTupleFloat(hslot,"similarity"); + + newUser = (GenRating*) palloc(sizeof(GenRating)); + newUser->ID = currentUser; + /* Pre-generated recommendation doesn't need + * indexes. */ + newUser->index = -1; + newUser->totalSim = currentSim; + newUser->next = NULL; + hashAdd(recstate->simTable, newUser); + } + recathon_queryEnd(queryDesc,recathoncontext); + + /* Here's the second. */ + sprintf(querystring,"select * from %s where user1 = %d;", + attributes->recModelName,attributes->userID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int currentUser; + float currentSim; + GenRating *newUser; + + hslot = ExecProcNode(planstate); + if (TupIsNull(hslot)) break; + + currentUser = getTupleInt(hslot,"user2"); + currentSim = getTupleFloat(hslot,"similarity"); + + newUser = (GenRating*) palloc(sizeof(GenRating)); + newUser->ID = currentUser; + /* Pre-generated recommendation doesn't need + * indexes. */ + newUser->index = -1; + newUser->totalSim = currentSim; + newUser->next = NULL; + hashAdd(recstate->simTable, newUser); + } + recathon_queryEnd(queryDesc,recathoncontext); + } + + break; + /* If this is a SVD recommender, we can pre-obtain the user features, + * which stay fixed, and cut the I/O time in half. Of course, if this + * is generated on-the-fly, this is done already. */ + case SVD: + if (attributes->opType != OP_GENERATE && attributes->opType != OP_GENERATEJOIN) { + recstate->userFeatures = (float*) palloc(50*sizeof(float)); + for (i = 0; i < 50; i++) + recstate->userFeatures[i] = 0; + sprintf(querystring,"select * from %s where users = %d;", + attributes->recModelName,userID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int feature; + float featValue; + + hslot = ExecProcNode(planstate); + if (TupIsNull(hslot)) break; + + feature = getTupleInt(hslot,"feature"); + featValue = getTupleFloat(hslot,"value"); + + recstate->userFeatures[feature] = featValue; + } + + recathon_queryEnd(queryDesc,recathoncontext); + } + break; + default: + elog(ERROR, "invalid recommendation method in prepUserForRating()"); + } + + /* If we've gotten to this point, this is a valid user, so return true. */ + pfree(querystring); + return true; } /* ---------------------------------------------------------------- @@ -4127,17 +4127,17 @@ prepUserForRating(RecScanState *recstate, int userID) { GenHash* hashCreate(int totalItems) { - int hash; - GenHash *newHashTable; - - if (totalItems <= 10) hash = totalItems; - else hash = totalItems / 3; - - newHashTable = (GenHash*) palloc(sizeof(GenHash)); - newHashTable->hash = hash; - newHashTable->table = (GenRating**) palloc0(hash*sizeof(GenRating)); - - return newHashTable; + int hash; + GenHash *newHashTable; + + if (totalItems <= 10) hash = totalItems; + else hash = totalItems / 3; + + newHashTable = (GenHash*) palloc(sizeof(GenHash)); + newHashTable->hash = hash; + newHashTable->table = (GenRating**) palloc0(hash*sizeof(GenRating)); + + return newHashTable; } /* ---------------------------------------------------------------- @@ -4152,21 +4152,21 @@ hashCreate(int totalItems) void hashAdd(GenHash *table, GenRating *item) { - int hashval; - GenRating *tempRating; - - hashval = item->ID % table->hash; - tempRating = table->table[hashval]; - - if (!tempRating) { - table->table[hashval] = item; - return; - } - - while (tempRating->next) - tempRating = tempRating->next; - - tempRating->next = item; + int hashval; + GenRating *tempRating; + + hashval = item->ID % table->hash; + tempRating = table->table[hashval]; + + if (!tempRating) { + table->table[hashval] = item; + return; + } + + while (tempRating->next) + tempRating = tempRating->next; + + tempRating->next = item; } /* ---------------------------------------------------------------- @@ -4179,21 +4179,21 @@ hashAdd(GenHash *table, GenRating *item) GenRating* hashFind(GenHash *table, int itemID) { - int hashval; - GenRating *tempRating; - - hashval = itemID % table->hash; - tempRating = table->table[hashval]; - - while (tempRating) { - if (tempRating->ID == itemID) - return tempRating; - if (tempRating->ID > itemID) - return NULL; - tempRating = tempRating->next; - } - - return NULL; + int hashval; + GenRating *tempRating; + + hashval = itemID % table->hash; + tempRating = table->table[hashval]; + + while (tempRating) { + if (tempRating->ID == itemID) + return tempRating; + if (tempRating->ID > itemID) + return NULL; + tempRating = tempRating->next; + } + + return NULL; } /* ---------------------------------------------------------------- @@ -4204,24 +4204,24 @@ hashFind(GenHash *table, int itemID) */ void freeHash(GenHash *table) { - int i; - - if (!table) - return; - - for (i = 0; i < table->hash; i++) { - GenRating *tempRating; - - tempRating = table->table[i]; - while (tempRating) { - GenRating *tempRating2 = tempRating->next; - pfree(tempRating); - tempRating = tempRating2; - } - } - - pfree(table->table); - pfree(table); + int i; + + if (!table) + return; + + for (i = 0; i < table->hash; i++) { + GenRating *tempRating; + + tempRating = table->table[i]; + while (tempRating) { + GenRating *tempRating2 = tempRating->next; + pfree(tempRating); + tempRating = tempRating2; + } + } + + pfree(table->table); + pfree(table); } /* ---------------------------------------------------------------- @@ -4235,63 +4235,63 @@ freeHash(GenHash *table) { float itemCFpredict(RecScanState *recnode, char *itemmodel, int itemid) { - float recScore; - GenRating *currentItem; - // Query objects. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // First, we grab the GenRating for this item ID. - currentItem = hashFind(recnode->pendingTable, itemid); - // In case there's some error. - if (!currentItem) - return -1; - - querystring = (char*) palloc(1024*sizeof(char)); - - // We're going to look through the similarity matrix for the - // numbers that correspond to this item, and find which of those - // also correspond to items this user rated. We will use that - // information to obtain the estimated rating. - sprintf(querystring,"select * from %s where item1 = %d;", - itemmodel,currentItem->ID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int itemID; - float similarity; - GenRating *ratedItem; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - itemID = getTupleInt(slot,"item2"); - similarity = getTupleFloat(slot,"similarity"); - - // Find the array slot this item ID corresponds to. - // If -1 is returned, then the item ID corresponds to - // another item we haven't rated, so we don't care. - ratedItem = hashFind(recnode->ratedTable,itemID); - if (ratedItem) { - currentItem->score += similarity*ratedItem->score; - if (similarity < 0) - similarity *= -1; - currentItem->totalSim += similarity; - } - } - - // Cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - if (currentItem->totalSim == 0) return 0; - - recScore = currentItem->score / currentItem->totalSim; - return recScore; + float recScore; + GenRating *currentItem; + // Query objects. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // First, we grab the GenRating for this item ID. + currentItem = hashFind(recnode->pendingTable, itemid); + // In case there's some error. + if (!currentItem) + return -1; + + querystring = (char*) palloc(1024*sizeof(char)); + + // We're going to look through the similarity matrix for the + // numbers that correspond to this item, and find which of those + // also correspond to items this user rated. We will use that + // information to obtain the estimated rating. + sprintf(querystring,"select * from %s where item1 = %d;", + itemmodel,currentItem->ID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int itemID; + float similarity; + GenRating *ratedItem; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + itemID = getTupleInt(slot,"item2"); + similarity = getTupleFloat(slot,"similarity"); + + // Find the array slot this item ID corresponds to. + // If -1 is returned, then the item ID corresponds to + // another item we haven't rated, so we don't care. + ratedItem = hashFind(recnode->ratedTable,itemID); + if (ratedItem) { + currentItem->score += similarity*ratedItem->score; + if (similarity < 0) + similarity *= -1; + currentItem->totalSim += similarity; + } + } + + // Cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + if (currentItem->totalSim == 0) return 0; + + recScore = currentItem->score / currentItem->totalSim; + return recScore; } /* ---------------------------------------------------------------- @@ -4305,61 +4305,61 @@ itemCFpredict(RecScanState *recnode, char *itemmodel, int itemid) float userCFpredict(RecScanState *recnode, char *eventval, int itemid) { - float event, totalSim, average; - AttributeInfo *attributes; - // Query objects; - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *qslot; - MemoryContext recathoncontext; - - attributes = (AttributeInfo*) recnode->attributes; - - event = 0.0; - totalSim = 0.0; - average = recnode->average; - - /* We need to query the events table, so that we can - * find all events for this item and match them up - * with what we have in the similarity matrix. We note - * that it's necessarily true that the user has not - * rated these items. */ - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"select * from %s where %s = %d;", - attributes->eventtable,attributes->itemkey,itemid); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int currentUserID; - float currentRating, similarity; - GenRating *currentUser; - - qslot = ExecProcNode(planstate); - if (TupIsNull(qslot)) break; - - currentUserID = getTupleInt(qslot,attributes->userkey); - currentRating = getTupleFloat(qslot,eventval); - - currentUser = hashFind(recnode->simTable,currentUserID); - if (!currentUser) continue; - similarity = currentUser->totalSim; - - event += (currentRating - average) * similarity; - // Poor man's absolute value of the similarity. - if (similarity < 0) - similarity *= -1; - totalSim += similarity; - } - recathon_queryEnd(queryDesc,recathoncontext); - - if (totalSim == 0.0) return 0.0; - - event /= totalSim; - event += average; - - return event; + float event, totalSim, average; + AttributeInfo *attributes; + // Query objects; + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *qslot; + MemoryContext recathoncontext; + + attributes = (AttributeInfo*) recnode->attributes; + + event = 0.0; + totalSim = 0.0; + average = recnode->average; + + /* We need to query the events table, so that we can + * find all events for this item and match them up + * with what we have in the similarity matrix. We note + * that it's necessarily true that the user has not + * rated these items. */ + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"select * from %s where %s = %d;", + attributes->eventtable,attributes->itemkey,itemid); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int currentUserID; + float currentRating, similarity; + GenRating *currentUser; + + qslot = ExecProcNode(planstate); + if (TupIsNull(qslot)) break; + + currentUserID = getTupleInt(qslot,attributes->userkey); + currentRating = getTupleFloat(qslot,eventval); + + currentUser = hashFind(recnode->simTable,currentUserID); + if (!currentUser) continue; + similarity = currentUser->totalSim; + + event += (currentRating - average) * similarity; + // Poor man's absolute value of the similarity. + if (similarity < 0) + similarity *= -1; + totalSim += similarity; + } + recathon_queryEnd(queryDesc,recathoncontext); + + if (totalSim == 0.0) return 0.0; + + event /= totalSim; + event += average; + + return event; } /* ---------------------------------------------------------------- @@ -4372,64 +4372,64 @@ userCFpredict(RecScanState *recnode, char *eventval, int itemid) float SVDpredict(RecScanState *recnode, char *itemmodel, int itemid) { - float *userFeatures; - float recscore = 0.0; - // Query objects; - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *qslot; - MemoryContext recathoncontext; - - userFeatures = recnode->userFeatures; - - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"select * from %s where items = %d;", - itemmodel,itemid); - - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - // Here we don't use the simpler methods, because they're slightly - // less efficient. Since we'll be doing this several thousand times, - // we'll take what we can get. - for (;;) { - int i, natts; - int feature = -1; - float featValue = 0; - - qslot = ExecProcNode(planstate); - if (TupIsNull(qslot)) break; - slot_getallattrs(qslot); - natts = qslot->tts_tupleDescriptor->natts; - - for (i = 0; i < natts; i++) { - if (!qslot->tts_isnull[i]) { - char *col_name; - Datum slot_result; - // What we do depends on the column name. - col_name = qslot->tts_tupleDescriptor->attrs[i]->attname.data; - slot_result = qslot->tts_values[i]; - - if (strcmp(col_name,"feature") == 0) - feature = DatumGetInt32(slot_result); - else if (strcmp(col_name,"value") == 0) - featValue = DatumGetFloat4(slot_result); - } - } - - // If there's an error and we didn't find the column. - if (feature < 0) continue; - - // Add it into the event and continue. - recscore += featValue * userFeatures[feature]; - } - - // Cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - return recscore; + float *userFeatures; + float recscore = 0.0; + // Query objects; + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *qslot; + MemoryContext recathoncontext; + + userFeatures = recnode->userFeatures; + + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"select * from %s where items = %d;", + itemmodel,itemid); + + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + // Here we don't use the simpler methods, because they're slightly + // less efficient. Since we'll be doing this several thousand times, + // we'll take what we can get. + for (;;) { + int i, natts; + int feature = -1; + float featValue = 0; + + qslot = ExecProcNode(planstate); + if (TupIsNull(qslot)) break; + slot_getallattrs(qslot); + natts = qslot->tts_tupleDescriptor->natts; + + for (i = 0; i < natts; i++) { + if (!qslot->tts_isnull[i]) { + char *col_name; + Datum slot_result; + // What we do depends on the column name. + col_name = qslot->tts_tupleDescriptor->attrs[i]->attname.data; + slot_result = qslot->tts_values[i]; + + if (strcmp(col_name,"feature") == 0) + feature = DatumGetInt32(slot_result); + else if (strcmp(col_name,"value") == 0) + featValue = DatumGetFloat4(slot_result); + } + } + + // If there's an error and we didn't find the column. + if (feature < 0) continue; + + // Add it into the event and continue. + recscore += featValue * userFeatures[feature]; + } + + // Cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + return recscore; } /* ---------------------------------------------------------------- @@ -4442,39 +4442,39 @@ SVDpredict(RecScanState *recnode, char *itemmodel, int itemid) void applyRecScore(RecScanState *recnode, TupleTableSlot *slot, int itemid, int itemindex) { - float recscore; - AttributeInfo *attributes; - - attributes = (AttributeInfo*) recnode->attributes; - - switch ((recMethod)attributes->method) { - case itemCosCF: - case itemPearCF: - if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) - recscore = itemCFgenerate(recnode,itemid,itemindex); - else - recscore = itemCFpredict(recnode,attributes->recModelName,itemid); - break; - case userCosCF: - case userPearCF: - if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) - recscore = userCFgenerate(recnode,itemid,itemindex); - else - recscore = userCFpredict(recnode,attributes->eventval,itemid); - break; - case SVD: - if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) - recscore = SVDgenerate(recnode,itemid,itemindex); - else - recscore = SVDpredict(recnode,attributes->recModelName2,itemid); - break; - default: - recscore = -1; - break; - } - - slot->tts_values[recnode->eventatt] = Float4GetDatum(recscore); - slot->tts_isnull[recnode->eventatt] = false; + float recscore; + AttributeInfo *attributes; + + attributes = (AttributeInfo*) recnode->attributes; + + switch ((recMethod)attributes->method) { + case itemCosCF: + case itemPearCF: + if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) + recscore = itemCFgenerate(recnode,itemid,itemindex); + else + recscore = itemCFpredict(recnode,attributes->recModelName,itemid); + break; + case userCosCF: + case userPearCF: + if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) + recscore = userCFgenerate(recnode,itemid,itemindex); + else + recscore = userCFpredict(recnode,attributes->eventval,itemid); + break; + case SVD: + if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) + recscore = SVDgenerate(recnode,itemid,itemindex); + else + recscore = SVDpredict(recnode,attributes->recModelName2,itemid); + break; + default: + recscore = -1; + break; + } + + slot->tts_values[recnode->eventatt] = Float4GetDatum(recscore); + slot->tts_isnull[recnode->eventatt] = false; } /* ---------------------------------------------------------------- @@ -4489,59 +4489,84 @@ applyRecScore(RecScanState *recnode, TupleTableSlot *slot, int itemid, int itemi void applyItemSim(RecScanState *recnode, char *itemmodel) { - int i; - GenHash *ratedTable; - // Query objects. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - ratedTable = recnode->ratedTable; - - querystring = (char*) palloc(1024*sizeof(char)); - - // For every item we've rated, we need to obtain its similarity - // scores and apply them to the appropriate items. This is - // necessary because we're only storing half of the similarity - // matrix. - for (i = 0; i < ratedTable->hash; i++) { - GenRating *currentItem; - - for (currentItem = ratedTable->table[i]; currentItem; - currentItem = currentItem->next) { - sprintf(querystring,"select * from %s where item1 = %d;", - itemmodel,currentItem->ID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int itemID; - float similarity; - GenRating *pendingItem; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - itemID = getTupleInt(slot,"item2"); - similarity = getTupleFloat(slot,"similarity"); - - // Find the array slot this item ID corresponds to. - // If -1 is returned, then the item ID corresponds to - // another item we've rated, so we don't care. - pendingItem = hashFind(recnode->pendingTable,itemID); - if (pendingItem) { - pendingItem->score += similarity*currentItem->score; - if (similarity < 0) - similarity *= -1; - pendingItem->totalSim += similarity; - } - } + int i; + GenHash *ratedTable; + // Query objects. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + ratedTable = recnode->ratedTable; + + querystring = (char*) palloc(1024*sizeof(char)); + + // For every item we've rated, we need to obtain its similarity + // scores and apply them to the appropriate items. This is + // necessary because we're only storing half of the similarity + // matrix. + for (i = 0; i < ratedTable->hash; i++) { + GenRating *currentItem; + + for (currentItem = ratedTable->table[i]; currentItem; + currentItem = currentItem->next) { + sprintf(querystring,"select * from %s where item1 = %d;", + itemmodel,currentItem->ID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int itemID; + float similarity; + GenRating *pendingItem; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + itemID = getTupleInt(slot,"item2"); + similarity = getTupleFloat(slot,"similarity"); + + // Find the array slot this item ID corresponds to. + // If -1 is returned, then the item ID corresponds to + // another item we've rated, so we don't care. + pendingItem = hashFind(recnode->pendingTable,itemID); + if (pendingItem) { + pendingItem->score += similarity*currentItem->score; + if (similarity < 0) + similarity *= -1; + pendingItem->totalSim += similarity; + } + } + + recathon_queryEnd(queryDesc,recathoncontext); + } + } + + pfree(querystring); +} - recathon_queryEnd(queryDesc,recathoncontext); - } - } - pfree(querystring); +/* **************************************************************** + * _copyQuery function helper + * **************************************************************** + */ +void +copyQueryHelper(Query *query, Query *mainQuery) +{ + + ListCell * l; + ListCell *curr_old = mainQuery->rtable->head; + + if(mainQuery->recommendStmt != NULL) + query->recommendStmt = (Node*)(mainQuery->recommendStmt); + if(list_length(query->rtable) >= 1){ + forboth(l, query->rtable, curr_old, mainQuery->rtable){ + if(((RangeTblEntry*)l)->recommender != NULL){ + lfirst(l) = lfirst(curr_old); + } + } + } + + return; } diff --git a/PostgreSQL/src/include/utils/recathon.h b/PostgreSQL/src/include/utils/recathon.h index e08b9c8..1ea123b 100644 --- a/PostgreSQL/src/include/utils/recathon.h +++ b/PostgreSQL/src/include/utils/recathon.h @@ -21,38 +21,38 @@ /* An enum to list all of our recommendation methods. */ typedef enum { - itemCosCF, - itemPearCF, - userCosCF, - userPearCF, - SVD + itemCosCF, + itemPearCF, + userCosCF, + userPearCF, + SVD } recMethod; /* Structures for a linked list of similarity cells. */ struct sim_node_t { - int id; - float event; - struct sim_node_t *next; + int id; + float event; + struct sim_node_t *next; }; typedef struct sim_node_t* sim_node; /* Structures for a linked list of neighbor nodes. * Used when we have a specific neighborhood size. */ struct nbr_node_t { - int item1; - int item2; - float similarity; - struct nbr_node_t *next; + int item1; + int item2; + float similarity; + struct nbr_node_t *next; }; typedef struct nbr_node_t* nbr_node; /* Structure to hold event information for SVD * training. Includes space for residual information. */ struct svd_node_t { - int userid; - int itemid; - float event; - float residual; + int userid; + int itemid; + float event; + float residual; }; typedef struct svd_node_t* svd_node; @@ -89,8 +89,8 @@ extern char* retrieveRecommender(char *eventtable, char *method); /* Functions for getting recommender data. */ extern void getRecInfo(char *recindexname, char **ret_eventtable, - char **ret_userkey, char **ret_itemkey, - char **ret_eventval, char **ret_method, int *ret_numatts); + char **ret_userkey, char **ret_itemkey, + char **ret_eventval, char **ret_method, int *ret_numatts); /* Functions for parsing CreateRStmt data. */ extern recMethod validateCreateRStmt(CreateRStmt *recStmt); @@ -105,43 +105,43 @@ extern void updateCellCounter(char *eventtable, TupleTableSlot *insertslot); extern int binarySearch(int *array, int value, int lo, int hi); extern int *getAllUsers(int numusers, char* usertable); extern float *vector_lengths(char *key, char *eventtable, char *eventval, - int *totalNum, int **IDlist); + int *totalNum, int **IDlist); extern float dotProduct(sim_node item1, sim_node item2); extern float cosineSimilarity(sim_node item1, sim_node item2, float length1, float length2); extern int updateItemCosModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *itemIDs, float *itemLengths, - int numItems, bool update); + char *eventval, char *modelname, int *itemIDs, float *itemLengths, + int numItems, bool update); /* Functions for building a recommender based on itemPearCF. */ extern void pearson_info(char *key, char *eventtable, char *eventval, int *totalNum, - int **IDlist, float **avgList, float **pearsonList); + int **IDlist, float **avgList, float **pearsonList); extern float pearsonDotProduct(sim_node item1, sim_node item2, float avg1, float avg2); extern float pearsonSimilarity(sim_node item1, sim_node item2, float avg1, float avg2, - float pearson1, float pearson2); + float pearson1, float pearson2); extern int updateItemPearModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *itemIDs, float *itemAvgs, - float *itemPearsons, int numItems, bool update); + char *eventval, char *modelname, int *itemIDs, float *itemAvgs, + float *itemPearsons, int numItems, bool update); /* Functions for building a user-based recommender. */ extern int updateUserCosModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *userIDs, float *userLengths, - int numUsers, bool update); + char *eventval, char *modelname, int *userIDs, float *userLengths, + int numUsers, bool update); extern int updateUserPearModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *userIDs, float *userAvgs, - float *userPearsons, int numUsers, bool update); + char *eventval, char *modelname, int *userIDs, float *userAvgs, + float *userPearsons, int numUsers, bool update); /* Functions for building a SVD recommender. */ extern svd_node createSVDnode(TupleTableSlot *slot, char *userkey, char *itemkey, char *eventval, - int *userIDs, int *itemIDs, int numUsers, int numItems); + int *userIDs, int *itemIDs, int numUsers, int numItems); extern void SVDlists(char *userkey, char *itemkey, char *eventtable, - int **ret_userIDs, int **ret_itemIDs, int *ret_numUsers, int *ret_numItems); + int **ret_userIDs, int **ret_itemIDs, int *ret_numUsers, int *ret_numItems); extern void SVDaverages(char *userkey, char *itemkey, char *eventtable, char *eventval, - int *userIDs, int *itemIDs, int numUsers, int numItems, - float **ret_itemAvgs, float **ret_userOffsets); + int *userIDs, int *itemIDs, int numUsers, int numItems, + float **ret_itemAvgs, float **ret_userOffsets); extern float predictRating(int featurenum, int numFeatures, int userid, int itemid, - float **userFeatures, float **itemFeatures, float redisual); + float **userFeatures, float **itemFeatures, float redisual); extern int SVDtrain(char *userkey, char *itemkey, char *eventtable, char *eventval, - char *usermodelname, char *itemmodelname, bool update); + char *usermodelname, char *itemmodelname, bool update); /* Functions for building and querying recommenders on-the-fly. */ extern void generateItemCosModel(RecScanState *recnode); @@ -166,4 +166,7 @@ extern float SVDpredict(RecScanState *recnode, char *itemmodel, int itemid); extern void applyRecScore(RecScanState *recnode, TupleTableSlot *slot, int itemid, int itemindex); extern void applyItemSim(RecScanState *recnode, char *itemmodel); +/* Functions for copyQuery function. */ +extern void copyQueryHelper(Query *query, Query *mainQuery); + #endif /* RECATHON_H */ From b28bba36db07f496250a5029423198d6a8e7d051 Mon Sep 17 00:00:00 2001 From: RMoraffah Date: Thu, 10 Nov 2016 15:21:28 +0330 Subject: [PATCH 03/18] Add tests for fixed bugs --- examples/MoiveLens/script2.py | 217 ++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 examples/MoiveLens/script2.py diff --git a/examples/MoiveLens/script2.py b/examples/MoiveLens/script2.py new file mode 100644 index 0000000..bae3c76 --- /dev/null +++ b/examples/MoiveLens/script2.py @@ -0,0 +1,217 @@ +####Movie Lens 1m### +#Tests for fixed bugs (Explain, Explain Analyze, ..)# + + +import psycopg2 +import sys +import time +import os +def main(): + host = raw_input("Enter the host address of postgresql: ") + + dbname = raw_input("Enter the database name: ") + + user = raw_input("Enter the username of postgresql: ") + + password = raw_input("Enter the password of postgresql: ") + + #First data set + UserDataPath = raw_input("Enter the abs path for the first set of user data(.dat file): ") + + ItemDataPath = raw_input("Enter the abs path for the first set of item data(.dat file): ") + + RatingDataPath = raw_input("Enter the abs path for the first set of ratings data(.dat file): ") + + #Second recommender data set path + + RatingDataPath2 = raw_input("Enter the abs path for the MoiveTweets ratings data(.csv file): ") + + + #Define our connection string + conn_string = "host='"+host+"' dbname='"+dbname+"' user='"+user+"' password='"+password+"'" + + # print the connection string we will use to connect + print "Connecting to database\n ->%s" % (conn_string) + + # get a connection, if a connect cannot be made an exception will be raised here + conn = psycopg2.connect(conn_string) + cursor = conn.cursor() + + print "Data copying from .csv file to postgresql database" + + import os + path = os.getcwd() + + executionStart = time.time() + + cursor.execute(" set client_encoding = LATIN1;"); + conn.commit() + + cursor.execute("create table if not exists users( userid int, age varchar, gender varchar, job varchar, zipcode varchar);"); + conn.commit() + executionTime = time.time() - executionStart + print "\n Execution time is :-" + print executionTime + + executionStart = time.time() + query = "COPY users(userid,gender,age,job,zipcode) from "+UserDataPath+"DELIMITERS ';';" + cursor.execute(query); + conn.commit() + executionTime = time.time() - executionStart + print "\n Execution time is :-" + print executionTime + + cursor.execute("create table if not exists moive( itemid int, name varchar, genre varchar);"); + conn.commit() + + query = "COPY moive(itemid,name,genre) from "+ItemDataPath+" DELIMITERS ';';" + cursor.execute(query); + conn.commit() + + + cursor.execute("create table if not exists ratings ( userid int, itemid int, rating real ,garbage varchar);"); + conn.commit() + + query = "COPY ratings(userid,itemid,rating,garbage) from "+RatingDataPath+" DELIMITERS ';';" + cursor.execute(query); + conn.commit() + + #Second recommender informations + cursor.execute("create table if not exists gsratings (userid int, itemid int, rating int);"); + conn.commit() + executionTime = time.time() - executionStart + + query = "copy gsratings(userid, itemid, rating) from "+RatingDataPath2+" DELIMITER ':';" + cursor.execute(query); + conn.commit() + + + + + print "Connected!\n" + + ############### + print "\n \n Creating Recommender for join of two recommendation queries.." + executionStart = time.time() + cursor.execute("CREATE RECOMMENDER mtitemcos on gsratings Users FROM userid Items FROM itemid Events FROM rating Using ItemCosCF;"); + conn.commit() + executionTime = time.time() - executionStart + print "\n" + print (" Execution time is :-", executionTime) + + + ############### + + print "Recommendation query being shooted with ItemCosCF technique" + + ############### + + print "\n \n Creating Recommender.." + executionStart = time.time() + cursor.execute("CREATE RECOMMENDER mlRecItemCos on ratings Users FROM userid Items FROM itemid Events FROM rating Using ItemCosCF;"); + conn.commit() + executionTime = time.time() - executionStart + print " Execution time is :-" + print executionTime + + ############### + + print "\n \n Explain the selection of movie for single user.." + executionStart = time.time() + cursor.execute("explain select itemid from ratings RECOMMEND itemid to userid ON rating Using ItemCosCF where userid =21;"); + conn.commit() + executionTime = time.time() - executionStart + print " Execution time is :-" + print executionTime + + ############### + + print "\n \n Explain Analyze for the selection of movie for single user.." + executionStart = time.time() + cursor.execute("explain analyze select itemid from ratings RECOMMEND itemid to userid ON rating Using ItemCosCF where userid =21;"); + conn.commit() + executionTime = time.time() - executionStart + print " Execution time is :-" + print executionTime + + ################ + + print "\n \n Explain the Join Query.." + executionStart = time.time() + cursor.execute("explain select r.itemid, i.name, i,genre, r.rating , r.userid, b.age from ratings r, moive i, users b Recommend r.itemid to r.userid On r.rating Using ItemCosCF where r.userid = 1 and r.userid = b.userid and r.itemid = i.itemid AND i.genre ILIKE '%action%' ;"); + conn.commit() + executionTime = time.time() - executionStart + print " Execution time is :-" + print executionTime + + ################ + + print "\n \n Explain Analyze the Join Query.." + executionStart = time.time() + cursor.execute("explain analyze select r.itemid, i.name, i,genre, r.rating , r.userid, b.age from ratings r, moive i, users b Recommend r.itemid to r.userid On r.rating Using ItemCosCF where r.userid = 1 and r.userid = b.userid and r.itemid = i.itemid AND i.genre ILIKE '%action%' ;"); + conn.commit() + executionTime = time.time() - executionStart + print " Execution time is :-" + print executionTime + + + + ################ + + print "\n \n Join of Two Recommendations.." + executionStart = time.time() + cursor.execute("select t1.itemid, t1.userid, t2.itemid, t2.userid FROM (select * from ratings r Recommend r.itemid TO r.userid ON r.rating USING ItemCosCF where r.userid =15 limit 10) t1 join (select * from gsratings g RECOMMEND g.itemid TO g.userid ON g.rating USING ItemCosCF where g.userid between 10 and 20 AND g.itemid between 90000 and 100000) t2 ON t1.userid =t2.userid limit 10;"); + conn.commit() + executionTime = time.time() - executionStart + print " Execution time is :-" + print executionTime + + ############### + + print "\n \n Union of Two Recommendations.." + executionStart = time.time() + cursor.execute("(select r.userid from ratings r RECOMMEND r.itemid TO r.userid ON r.rating USING ItemCosCF where r.userid =1 ) union (select g.userid from gsratings g RECOMMEND g.itemid TO g.userid ON g.rating USING ItemCosCF where g.userid =21);"); + conn.commit() + executionTime = time.time() - executionStart + print "Execution time is :-" + print executionTime + + ############### + + print "\n \n Recommender in subselect" + executionStart = time.time() + cursor.execute("select u.age, sub.rating from users u, (select * FROM ratings r RECOMMEND r.itemid TO r.userid ON r.rating USING ItemCosCF WHERE r.itemid in (1,2,3) limit 10) sub WHERE u.userid = sub.userid;"); + conn.commit() + executionTime = time.time() - executionStart + print " Execution time is :-" + print executionTime + + ############### + + print "\n \n Recommender and IN" + executionStart = time.time() + cursor.execute("select * from moive m where m.itemid IN (SELECT r.itemid FROM ratings r RECOMMEND r.itemid TO r.userid ON r.rating USING ItemCosCF WHERE r.userid=1 limit 10);"); + conn.commit() + executionTime = time.time() - executionStart + print " Execution time is :-" + print executionTime + + ################ + + cursor.execute("drop table ratings;"); + conn.commit() + + cursor.execute("drop table users;"); + conn.commit() + + cursor.execute("drop table moive;"); + conn.commit() + + cursor.execute("drop table gsratings;"); + conn.commit() + + + + +if __name__ == "__main__": + main() From 8bf728f92a47c014a1177e6728e09bd9cc49a2e1 Mon Sep 17 00:00:00 2001 From: RMoraffah Date: Sun, 13 Nov 2016 03:25:11 +0330 Subject: [PATCH 04/18] Bugs fix.(Explain Analyze,..)prevent probable side effects --- PostgreSQL/src/backend/utils/misc/recathon.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/PostgreSQL/src/backend/utils/misc/recathon.c b/PostgreSQL/src/backend/utils/misc/recathon.c index 3fe56ff..9b3282b 100644 --- a/PostgreSQL/src/backend/utils/misc/recathon.c +++ b/PostgreSQL/src/backend/utils/misc/recathon.c @@ -4557,13 +4557,13 @@ copyQueryHelper(Query *query, Query *mainQuery) ListCell * l; ListCell *curr_old = mainQuery->rtable->head; - + if(mainQuery->recommendStmt != NULL) query->recommendStmt = (Node*)(mainQuery->recommendStmt); if(list_length(query->rtable) >= 1){ forboth(l, query->rtable, curr_old, mainQuery->rtable){ - if(((RangeTblEntry*)l)->recommender != NULL){ - lfirst(l) = lfirst(curr_old); + if(((RangeTblEntry*)lfirst(l))->recommender != NULL){ + ((RangeTblEntry*)lfirst(l))->recommender = ((RangeTblEntry*)lfirst(curr_old))->recommender; } } } From 151ce10d9821ad5834c833c37e57551e1b1e31b7 Mon Sep 17 00:00:00 2001 From: RMoraffah Date: Mon, 14 Nov 2016 17:03:20 +0330 Subject: [PATCH 05/18] Bugs fix. prevent all probable side effects of changes for fixed bugs from happening. --- PostgreSQL/src/backend/commands/explain.c | 3864 +++++---- .../src/backend/optimizer/path/allpaths.c | 3085 ++++--- .../src/backend/optimizer/plan/planner.c | 25 +- PostgreSQL/src/backend/utils/misc/recathon.c | 7655 ++++++++--------- PostgreSQL/src/include/utils/recathon.h | 73 +- 5 files changed, 7330 insertions(+), 7372 deletions(-) diff --git a/PostgreSQL/src/backend/commands/explain.c b/PostgreSQL/src/backend/commands/explain.c index 0df25f8..ef7d6dc 100644 --- a/PostgreSQL/src/backend/commands/explain.c +++ b/PostgreSQL/src/backend/commands/explain.c @@ -32,8 +32,6 @@ #include "utils/snapmgr.h" #include "utils/tuplesort.h" #include "utils/xml.h" -//new -#include "utils/recathon.h" /* Hook for plugins to get control in ExplainOneQuery() */ @@ -50,57 +48,57 @@ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; #define X_NOWHITESPACE 4 static void ExplainOneQuery(Query *query, IntoClause *into, ExplainState *es, - const char *queryString, ParamListInfo params); + const char *queryString, ParamListInfo params); static void report_triggers(ResultRelInfo *rInfo, bool show_relname, - ExplainState *es); + ExplainState *es); static double elapsed_time(instr_time *starttime); static void ExplainNode(PlanState *planstate, List *ancestors, - const char *relationship, const char *plan_name, - ExplainState *es); + const char *relationship, const char *plan_name, + ExplainState *es); static void show_plan_tlist(PlanState *planstate, List *ancestors, - ExplainState *es); + ExplainState *es); static void show_expression(Node *node, const char *qlabel, - PlanState *planstate, List *ancestors, - bool useprefix, ExplainState *es); + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es); static void show_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - bool useprefix, ExplainState *es); + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es); static void show_scan_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - ExplainState *es); + PlanState *planstate, List *ancestors, + ExplainState *es); static void show_upper_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - ExplainState *es); + PlanState *planstate, List *ancestors, + ExplainState *es); static void show_sort_keys(SortState *sortstate, List *ancestors, - ExplainState *es); + ExplainState *es); static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors, - ExplainState *es); + ExplainState *es); static void show_sort_keys_common(PlanState *planstate, - int nkeys, AttrNumber *keycols, - List *ancestors, ExplainState *es); + int nkeys, AttrNumber *keycols, + List *ancestors, ExplainState *es); static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_instrumentation_count(const char *qlabel, int which, - PlanState *planstate, ExplainState *es); + PlanState *planstate, ExplainState *es); static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es); static const char *explain_get_index_name(Oid indexId); static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, - ExplainState *es); + ExplainState *es); static void ExplainScanTarget(Scan *plan, ExplainState *es); static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es); static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es); static void ExplainMemberNodes(List *plans, PlanState **planstates, - List *ancestors, ExplainState *es); + List *ancestors, ExplainState *es); static void ExplainSubPlans(List *plans, List *ancestors, - const char *relationship, ExplainState *es); + const char *relationship, ExplainState *es); static void ExplainProperty(const char *qlabel, const char *value, - bool numeric, ExplainState *es); + bool numeric, ExplainState *es); static void ExplainOpenGroup(const char *objtype, const char *labelname, - bool labeled, ExplainState *es); + bool labeled, ExplainState *es); static void ExplainCloseGroup(const char *objtype, const char *labelname, - bool labeled, ExplainState *es); + bool labeled, ExplainState *es); static void ExplainDummyGroup(const char *objtype, const char *labelname, - ExplainState *es); + ExplainState *es); static void ExplainXMLTag(const char *tagname, int flags, ExplainState *es); static void ExplainJSONLineEnding(ExplainState *es); static void ExplainYAMLLineStarting(ExplainState *es); @@ -114,137 +112,130 @@ static void escape_yaml(StringInfo buf, const char *str); */ void ExplainQuery(ExplainStmt *stmt, const char *queryString, - ParamListInfo params, DestReceiver *dest) + ParamListInfo params, DestReceiver *dest) { - ExplainState es; - TupOutputState *tstate; - List *rewritten; - ListCell *lc; - bool timing_set = false; - - - /* Initialize ExplainState. */ - ExplainInitState(&es); - - /* Parse options list. */ - foreach(lc, stmt->options) - { - DefElem *opt = (DefElem *) lfirst(lc); - - if (strcmp(opt->defname, "analyze") == 0) - es.analyze = defGetBoolean(opt); - else if (strcmp(opt->defname, "verbose") == 0) - es.verbose = defGetBoolean(opt); - else if (strcmp(opt->defname, "costs") == 0) - es.costs = defGetBoolean(opt); - else if (strcmp(opt->defname, "buffers") == 0) - es.buffers = defGetBoolean(opt); - else if (strcmp(opt->defname, "timing") == 0) - { - timing_set = true; - es.timing = defGetBoolean(opt); - } - else if (strcmp(opt->defname, "format") == 0) - { - char *p = defGetString(opt); - - if (strcmp(p, "text") == 0) - es.format = EXPLAIN_FORMAT_TEXT; - else if (strcmp(p, "xml") == 0) - es.format = EXPLAIN_FORMAT_XML; - else if (strcmp(p, "json") == 0) - es.format = EXPLAIN_FORMAT_JSON; - else if (strcmp(p, "yaml") == 0) - es.format = EXPLAIN_FORMAT_YAML; - else - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unrecognized value for EXPLAIN option \"%s\": \"%s\"", - opt->defname, p))); - } - else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unrecognized EXPLAIN option \"%s\"", - opt->defname))); - } - - if (es.buffers && !es.analyze) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("EXPLAIN option BUFFERS requires ANALYZE"))); - - /* if the timing was not set explicitly, set default value */ - es.timing = (timing_set) ? es.timing : es.analyze; - - /* check that timing is used with EXPLAIN ANALYZE */ - if (es.timing && !es.analyze) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("EXPLAIN option TIMING requires ANALYZE"))); - - /* - * Parse analysis was done already, but we still have to run the rule - * rewriter. We do not do AcquireRewriteLocks: we assume the query either - * came straight from the parser, or suitable locks were acquired by - * plancache.c. - * - * Because the rewriter and planner tend to scribble on the input, we make - * a preliminary copy of the source querytree. This prevents problems in - * the case that the EXPLAIN is in a portal or plpgsql function and is - * executed repeatedly. (See also the same hack in DECLARE CURSOR and - * PREPARE.) XXX FIXME someday. - */ - Assert(IsA(stmt->query, Query)); - //NEW FOR RECDB - //Prevent an error from happening while using a recommender in an explain analyze command - Query * temp = (Query *) copyObject(stmt->query); - copyQueryHelper(temp, (Query*)stmt->query); - rewritten = QueryRewrite(temp); - - - /* emit opening boilerplate */ - ExplainBeginOutput(&es); - - if (rewritten == NIL) - { - /* - * In the case of an INSTEAD NOTHING, tell at least that. But in - * non-text format, the output is delimited, so this isn't necessary. - */ - if (es.format == EXPLAIN_FORMAT_TEXT) - appendStringInfoString(es.str, "Query rewrites to nothing\n"); - } - else - { - ListCell *l; - - /* Explain every plan */ - foreach(l, rewritten) - { - - ExplainOneQuery((Query *) lfirst(l), NULL, &es, - queryString, params); - - /* Separate plans with an appropriate separator */ - if (lnext(l) != NULL) - ExplainSeparatePlans(&es); - } - } - - /* emit closing boilerplate */ - ExplainEndOutput(&es); - Assert(es.indent == 0); - - /* output tuples */ - tstate = begin_tup_output_tupdesc(dest, ExplainResultDesc(stmt)); - if (es.format == EXPLAIN_FORMAT_TEXT) - do_text_output_multiline(tstate, es.str->data); - else - do_text_output_oneline(tstate, es.str->data); - end_tup_output(tstate); - - pfree(es.str->data); + ExplainState es; + TupOutputState *tstate; + List *rewritten; + ListCell *lc; + bool timing_set = false; + + /* Initialize ExplainState. */ + ExplainInitState(&es); + + /* Parse options list. */ + foreach(lc, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (strcmp(opt->defname, "analyze") == 0) + es.analyze = defGetBoolean(opt); + else if (strcmp(opt->defname, "verbose") == 0) + es.verbose = defGetBoolean(opt); + else if (strcmp(opt->defname, "costs") == 0) + es.costs = defGetBoolean(opt); + else if (strcmp(opt->defname, "buffers") == 0) + es.buffers = defGetBoolean(opt); + else if (strcmp(opt->defname, "timing") == 0) + { + timing_set = true; + es.timing = defGetBoolean(opt); + } + else if (strcmp(opt->defname, "format") == 0) + { + char *p = defGetString(opt); + + if (strcmp(p, "text") == 0) + es.format = EXPLAIN_FORMAT_TEXT; + else if (strcmp(p, "xml") == 0) + es.format = EXPLAIN_FORMAT_XML; + else if (strcmp(p, "json") == 0) + es.format = EXPLAIN_FORMAT_JSON; + else if (strcmp(p, "yaml") == 0) + es.format = EXPLAIN_FORMAT_YAML; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized value for EXPLAIN option \"%s\": \"%s\"", + opt->defname, p))); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized EXPLAIN option \"%s\"", + opt->defname))); + } + + if (es.buffers && !es.analyze) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("EXPLAIN option BUFFERS requires ANALYZE"))); + + /* if the timing was not set explicitly, set default value */ + es.timing = (timing_set) ? es.timing : es.analyze; + + /* check that timing is used with EXPLAIN ANALYZE */ + if (es.timing && !es.analyze) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("EXPLAIN option TIMING requires ANALYZE"))); + + /* + * Parse analysis was done already, but we still have to run the rule + * rewriter. We do not do AcquireRewriteLocks: we assume the query either + * came straight from the parser, or suitable locks were acquired by + * plancache.c. + * + * Because the rewriter and planner tend to scribble on the input, we make + * a preliminary copy of the source querytree. This prevents problems in + * the case that the EXPLAIN is in a portal or plpgsql function and is + * executed repeatedly. (See also the same hack in DECLARE CURSOR and + * PREPARE.) XXX FIXME someday. + */ + Assert(IsA(stmt->query, Query)); + rewritten = QueryRewrite((Query *) copyObject(stmt->query)); + + /* emit opening boilerplate */ + ExplainBeginOutput(&es); + + if (rewritten == NIL) + { + /* + * In the case of an INSTEAD NOTHING, tell at least that. But in + * non-text format, the output is delimited, so this isn't necessary. + */ + if (es.format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es.str, "Query rewrites to nothing\n"); + } + else + { + ListCell *l; + + /* Explain every plan */ + foreach(l, rewritten) + { + ExplainOneQuery((Query *) lfirst(l), NULL, &es, + queryString, params); + + /* Separate plans with an appropriate separator */ + if (lnext(l) != NULL) + ExplainSeparatePlans(&es); + } + } + + /* emit closing boilerplate */ + ExplainEndOutput(&es); + Assert(es.indent == 0); + + /* output tuples */ + tstate = begin_tup_output_tupdesc(dest, ExplainResultDesc(stmt)); + if (es.format == EXPLAIN_FORMAT_TEXT) + do_text_output_multiline(tstate, es.str->data); + else + do_text_output_oneline(tstate, es.str->data); + end_tup_output(tstate); + + pfree(es.str->data); } /* @@ -253,11 +244,11 @@ ExplainQuery(ExplainStmt *stmt, const char *queryString, void ExplainInitState(ExplainState *es) { - /* Set default options. */ - memset(es, 0, sizeof(ExplainState)); - es->costs = true; - /* Prepare output buffer. */ - es->str = makeStringInfo(); + /* Set default options. */ + memset(es, 0, sizeof(ExplainState)); + es->costs = true; + /* Prepare output buffer. */ + es->str = makeStringInfo(); } /* @@ -267,34 +258,34 @@ ExplainInitState(ExplainState *es) TupleDesc ExplainResultDesc(ExplainStmt *stmt) { - TupleDesc tupdesc; - ListCell *lc; - Oid result_type = TEXTOID; - - /* Check for XML format option */ - foreach(lc, stmt->options) - { - DefElem *opt = (DefElem *) lfirst(lc); - - if (strcmp(opt->defname, "format") == 0) - { - char *p = defGetString(opt); - - if (strcmp(p, "xml") == 0) - result_type = XMLOID; - else if (strcmp(p, "json") == 0) - result_type = JSONOID; - else - result_type = TEXTOID; - /* don't "break", as ExplainQuery will use the last value */ - } - } - - /* Need a tuple descriptor representing a single TEXT or XML column */ - tupdesc = CreateTemplateTupleDesc(1, false); - TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN", - result_type, -1, 0); - return tupdesc; + TupleDesc tupdesc; + ListCell *lc; + Oid result_type = TEXTOID; + + /* Check for XML format option */ + foreach(lc, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (strcmp(opt->defname, "format") == 0) + { + char *p = defGetString(opt); + + if (strcmp(p, "xml") == 0) + result_type = XMLOID; + else if (strcmp(p, "json") == 0) + result_type = JSONOID; + else + result_type = TEXTOID; + /* don't "break", as ExplainQuery will use the last value */ + } + } + + /* Need a tuple descriptor representing a single TEXT or XML column */ + tupdesc = CreateTemplateTupleDesc(1, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN", + result_type, -1, 0); + return tupdesc; } /* @@ -305,29 +296,28 @@ ExplainResultDesc(ExplainStmt *stmt) */ static void ExplainOneQuery(Query *query, IntoClause *into, ExplainState *es, - const char *queryString, ParamListInfo params) + const char *queryString, ParamListInfo params) { - /* planner will not cope with utility statements */ - if (query->commandType == CMD_UTILITY) - { - ExplainOneUtility(query->utilityStmt, into, es, queryString, params); - return; - } - - /* if an advisor plugin is present, let it manage things */ - if (ExplainOneQuery_hook) - (*ExplainOneQuery_hook) (query, into, es, queryString, params); - else - { - PlannedStmt *plan; - - /* plan the query */ - - plan = pg_plan_query(query, 0, params); - - /* run it (if needed) and produce output */ - ExplainOnePlan(plan, into, es, queryString, params); - } + /* planner will not cope with utility statements */ + if (query->commandType == CMD_UTILITY) + { + ExplainOneUtility(query->utilityStmt, into, es, queryString, params); + return; + } + + /* if an advisor plugin is present, let it manage things */ + if (ExplainOneQuery_hook) + (*ExplainOneQuery_hook) (query, into, es, queryString, params); + else + { + PlannedStmt *plan; + + /* plan the query */ + plan = pg_plan_query(query, 0, params); + + /* run it (if needed) and produce output */ + ExplainOnePlan(plan, into, es, queryString, params); + } } /* @@ -343,49 +333,45 @@ ExplainOneQuery(Query *query, IntoClause *into, ExplainState *es, */ void ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, - const char *queryString, ParamListInfo params) + const char *queryString, ParamListInfo params) { - if (utilityStmt == NULL) - return; - - if (IsA(utilityStmt, CreateTableAsStmt)) - { - /* - * We have to rewrite the contained SELECT and then pass it back to - * ExplainOneQuery. It's probably not really necessary to copy the - * contained parsetree another time, but let's be safe. - */ - CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt; - List *rewritten; - - Assert(IsA(ctas->query, Query)); - //NEW FOR RECDB - //Prevent an error from happening while using a recommender - Query * temp = (Query *) copyObject(ctas->query); - copyQueryHelper(temp, (Query*)ctas->query); - rewritten = QueryRewrite(temp); - Assert(list_length(rewritten) == 1); - ExplainOneQuery((Query *) linitial(rewritten), ctas->into, es, - queryString, params); - } - else if (IsA(utilityStmt, ExecuteStmt)) - ExplainExecuteQuery((ExecuteStmt *) utilityStmt, into, es, - queryString, params); - else if (IsA(utilityStmt, NotifyStmt)) - { - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfoString(es->str, "NOTIFY\n"); - else - ExplainDummyGroup("Notify", NULL, es); - } - else - { - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfoString(es->str, - "Utility statements have no plan structure\n"); - else - ExplainDummyGroup("Utility Statement", NULL, es); - } + if (utilityStmt == NULL) + return; + + if (IsA(utilityStmt, CreateTableAsStmt)) + { + /* + * We have to rewrite the contained SELECT and then pass it back to + * ExplainOneQuery. It's probably not really necessary to copy the + * contained parsetree another time, but let's be safe. + */ + CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt; + List *rewritten; + + Assert(IsA(ctas->query, Query)); + rewritten = QueryRewrite((Query *) copyObject(ctas->query)); + Assert(list_length(rewritten) == 1); + ExplainOneQuery((Query *) linitial(rewritten), ctas->into, es, + queryString, params); + } + else if (IsA(utilityStmt, ExecuteStmt)) + ExplainExecuteQuery((ExecuteStmt *) utilityStmt, into, es, + queryString, params); + else if (IsA(utilityStmt, NotifyStmt)) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, "NOTIFY\n"); + else + ExplainDummyGroup("Notify", NULL, es); + } + else + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, + "Utility statements have no plan structure\n"); + else + ExplainDummyGroup("Utility Statement", NULL, es); + } } /* @@ -407,138 +393,138 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, */ void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, - const char *queryString, ParamListInfo params) + const char *queryString, ParamListInfo params) { - DestReceiver *dest; - QueryDesc *queryDesc; - instr_time starttime; - double totaltime = 0; - int eflags; - int instrument_option = 0; - - if (es->analyze && es->timing) - instrument_option |= INSTRUMENT_TIMER; - else if (es->analyze) - instrument_option |= INSTRUMENT_ROWS; - - if (es->buffers) - instrument_option |= INSTRUMENT_BUFFERS; - - INSTR_TIME_SET_CURRENT(starttime); - - /* - * Use a snapshot with an updated command ID to ensure this query sees - * results of any previously executed queries. - */ - PushCopiedSnapshot(GetActiveSnapshot()); - UpdateActiveSnapshotCommandId(); - - /* - * Normally we discard the query's output, but if explaining CREATE TABLE - * AS, we'd better use the appropriate tuple receiver. - */ - if (into) - dest = CreateIntoRelDestReceiver(into); - else - dest = None_Receiver; - - /* Create a QueryDesc for the query */ - queryDesc = CreateQueryDesc(plannedstmt, queryString, - GetActiveSnapshot(), InvalidSnapshot, - dest, params, instrument_option); - - /* Select execution options */ - if (es->analyze) - eflags = 0; /* default run-to-completion flags */ - else - eflags = EXEC_FLAG_EXPLAIN_ONLY; - if (into) - eflags |= GetIntoRelEFlags(into); - - /* call ExecutorStart to prepare the plan for execution */ - ExecutorStart(queryDesc, eflags); - - /* Execute the plan for statistics if asked for */ - if (es->analyze) - { - ScanDirection dir; - - /* EXPLAIN ANALYZE CREATE TABLE AS WITH NO DATA is weird */ - if (into && into->skipData) - dir = NoMovementScanDirection; - else - dir = ForwardScanDirection; - - /* run the plan */ - ExecutorRun(queryDesc, dir, 0L); - - /* run cleanup too */ - ExecutorFinish(queryDesc); - - /* We can't run ExecutorEnd 'till we're done printing the stats... */ - totaltime += elapsed_time(&starttime); - } - - ExplainOpenGroup("Query", NULL, true, es); - - /* Create textual dump of plan tree */ - ExplainPrintPlan(es, queryDesc); - - /* Print info about runtime of triggers */ - if (es->analyze) - { - ResultRelInfo *rInfo; - bool show_relname; - int numrels = queryDesc->estate->es_num_result_relations; - List *targrels = queryDesc->estate->es_trig_target_relations; - int nr; - ListCell *l; - - ExplainOpenGroup("Triggers", "Triggers", false, es); - - show_relname = (numrels > 1 || targrels != NIL); - rInfo = queryDesc->estate->es_result_relations; - for (nr = 0; nr < numrels; rInfo++, nr++) - report_triggers(rInfo, show_relname, es); - - foreach(l, targrels) - { - rInfo = (ResultRelInfo *) lfirst(l); - report_triggers(rInfo, show_relname, es); - } - - ExplainCloseGroup("Triggers", "Triggers", false, es); - } - - /* - * Close down the query and free resources. Include time for this in the - * total runtime (although it should be pretty minimal). - */ - INSTR_TIME_SET_CURRENT(starttime); - - ExecutorEnd(queryDesc); - - FreeQueryDesc(queryDesc); - - PopActiveSnapshot(); - - /* We need a CCI just in case query expanded to multiple plans */ - if (es->analyze) - CommandCounterIncrement(); - - totaltime += elapsed_time(&starttime); - - if (es->analyze) - { - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfo(es->str, "Total runtime: %.3f ms\n", - 1000.0 * totaltime); - else - ExplainPropertyFloat("Total Runtime", 1000.0 * totaltime, - 3, es); - } - - ExplainCloseGroup("Query", NULL, true, es); + DestReceiver *dest; + QueryDesc *queryDesc; + instr_time starttime; + double totaltime = 0; + int eflags; + int instrument_option = 0; + + if (es->analyze && es->timing) + instrument_option |= INSTRUMENT_TIMER; + else if (es->analyze) + instrument_option |= INSTRUMENT_ROWS; + + if (es->buffers) + instrument_option |= INSTRUMENT_BUFFERS; + + INSTR_TIME_SET_CURRENT(starttime); + + /* + * Use a snapshot with an updated command ID to ensure this query sees + * results of any previously executed queries. + */ + PushCopiedSnapshot(GetActiveSnapshot()); + UpdateActiveSnapshotCommandId(); + + /* + * Normally we discard the query's output, but if explaining CREATE TABLE + * AS, we'd better use the appropriate tuple receiver. + */ + if (into) + dest = CreateIntoRelDestReceiver(into); + else + dest = None_Receiver; + + /* Create a QueryDesc for the query */ + queryDesc = CreateQueryDesc(plannedstmt, queryString, + GetActiveSnapshot(), InvalidSnapshot, + dest, params, instrument_option); + + /* Select execution options */ + if (es->analyze) + eflags = 0; /* default run-to-completion flags */ + else + eflags = EXEC_FLAG_EXPLAIN_ONLY; + if (into) + eflags |= GetIntoRelEFlags(into); + + /* call ExecutorStart to prepare the plan for execution */ + ExecutorStart(queryDesc, eflags); + + /* Execute the plan for statistics if asked for */ + if (es->analyze) + { + ScanDirection dir; + + /* EXPLAIN ANALYZE CREATE TABLE AS WITH NO DATA is weird */ + if (into && into->skipData) + dir = NoMovementScanDirection; + else + dir = ForwardScanDirection; + + /* run the plan */ + ExecutorRun(queryDesc, dir, 0L); + + /* run cleanup too */ + ExecutorFinish(queryDesc); + + /* We can't run ExecutorEnd 'till we're done printing the stats... */ + totaltime += elapsed_time(&starttime); + } + + ExplainOpenGroup("Query", NULL, true, es); + + /* Create textual dump of plan tree */ + ExplainPrintPlan(es, queryDesc); + + /* Print info about runtime of triggers */ + if (es->analyze) + { + ResultRelInfo *rInfo; + bool show_relname; + int numrels = queryDesc->estate->es_num_result_relations; + List *targrels = queryDesc->estate->es_trig_target_relations; + int nr; + ListCell *l; + + ExplainOpenGroup("Triggers", "Triggers", false, es); + + show_relname = (numrels > 1 || targrels != NIL); + rInfo = queryDesc->estate->es_result_relations; + for (nr = 0; nr < numrels; rInfo++, nr++) + report_triggers(rInfo, show_relname, es); + + foreach(l, targrels) + { + rInfo = (ResultRelInfo *) lfirst(l); + report_triggers(rInfo, show_relname, es); + } + + ExplainCloseGroup("Triggers", "Triggers", false, es); + } + + /* + * Close down the query and free resources. Include time for this in the + * total runtime (although it should be pretty minimal). + */ + INSTR_TIME_SET_CURRENT(starttime); + + ExecutorEnd(queryDesc); + + FreeQueryDesc(queryDesc); + + PopActiveSnapshot(); + + /* We need a CCI just in case query expanded to multiple plans */ + if (es->analyze) + CommandCounterIncrement(); + + totaltime += elapsed_time(&starttime); + + if (es->analyze) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, "Total runtime: %.3f ms\n", + 1000.0 * totaltime); + else + ExplainPropertyFloat("Total Runtime", 1000.0 * totaltime, + 3, es); + } + + ExplainCloseGroup("Query", NULL, true, es); } /* @@ -554,10 +540,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc) { - Assert(queryDesc->plannedstmt != NULL); - es->pstmt = queryDesc->plannedstmt; - es->rtable = queryDesc->plannedstmt->rtable; - ExplainNode(queryDesc->planstate, NIL, NULL, NULL, es); + Assert(queryDesc->plannedstmt != NULL); + es->pstmt = queryDesc->plannedstmt; + es->rtable = queryDesc->plannedstmt->rtable; + ExplainNode(queryDesc->planstate, NIL, NULL, NULL, es); } /* @@ -571,8 +557,8 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc) void ExplainQueryText(ExplainState *es, QueryDesc *queryDesc) { - if (queryDesc->sourceText) - ExplainPropertyText("Query Text", queryDesc->sourceText, es); + if (queryDesc->sourceText) + ExplainPropertyText("Query Text", queryDesc->sourceText, es); } /* @@ -582,77 +568,77 @@ ExplainQueryText(ExplainState *es, QueryDesc *queryDesc) static void report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) { - int nt; - - if (!rInfo->ri_TrigDesc || !rInfo->ri_TrigInstrument) - return; - for (nt = 0; nt < rInfo->ri_TrigDesc->numtriggers; nt++) - { - Trigger *trig = rInfo->ri_TrigDesc->triggers + nt; - Instrumentation *instr = rInfo->ri_TrigInstrument + nt; - char *relname; - char *conname = NULL; - - /* Must clean up instrumentation state */ - InstrEndLoop(instr); - - /* - * We ignore triggers that were never invoked; they likely aren't - * relevant to the current query type. - */ - if (instr->ntuples == 0) - continue; - - ExplainOpenGroup("Trigger", NULL, true, es); - - relname = RelationGetRelationName(rInfo->ri_RelationDesc); - if (OidIsValid(trig->tgconstraint)) - conname = get_constraint_name(trig->tgconstraint); - - /* - * In text format, we avoid printing both the trigger name and the - * constraint name unless VERBOSE is specified. In non-text formats - * we just print everything. - */ - if (es->format == EXPLAIN_FORMAT_TEXT) - { - if (es->verbose || conname == NULL) - appendStringInfo(es->str, "Trigger %s", trig->tgname); - else - appendStringInfoString(es->str, "Trigger"); - if (conname) - appendStringInfo(es->str, " for constraint %s", conname); - if (show_relname) - appendStringInfo(es->str, " on %s", relname); - appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", - 1000.0 * instr->total, instr->ntuples); - } - else - { - ExplainPropertyText("Trigger Name", trig->tgname, es); - if (conname) - ExplainPropertyText("Constraint Name", conname, es); - ExplainPropertyText("Relation", relname, es); - ExplainPropertyFloat("Time", 1000.0 * instr->total, 3, es); - ExplainPropertyFloat("Calls", instr->ntuples, 0, es); - } - - if (conname) - pfree(conname); - - ExplainCloseGroup("Trigger", NULL, true, es); - } + int nt; + + if (!rInfo->ri_TrigDesc || !rInfo->ri_TrigInstrument) + return; + for (nt = 0; nt < rInfo->ri_TrigDesc->numtriggers; nt++) + { + Trigger *trig = rInfo->ri_TrigDesc->triggers + nt; + Instrumentation *instr = rInfo->ri_TrigInstrument + nt; + char *relname; + char *conname = NULL; + + /* Must clean up instrumentation state */ + InstrEndLoop(instr); + + /* + * We ignore triggers that were never invoked; they likely aren't + * relevant to the current query type. + */ + if (instr->ntuples == 0) + continue; + + ExplainOpenGroup("Trigger", NULL, true, es); + + relname = RelationGetRelationName(rInfo->ri_RelationDesc); + if (OidIsValid(trig->tgconstraint)) + conname = get_constraint_name(trig->tgconstraint); + + /* + * In text format, we avoid printing both the trigger name and the + * constraint name unless VERBOSE is specified. In non-text formats + * we just print everything. + */ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (es->verbose || conname == NULL) + appendStringInfo(es->str, "Trigger %s", trig->tgname); + else + appendStringInfoString(es->str, "Trigger"); + if (conname) + appendStringInfo(es->str, " for constraint %s", conname); + if (show_relname) + appendStringInfo(es->str, " on %s", relname); + appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", + 1000.0 * instr->total, instr->ntuples); + } + else + { + ExplainPropertyText("Trigger Name", trig->tgname, es); + if (conname) + ExplainPropertyText("Constraint Name", conname, es); + ExplainPropertyText("Relation", relname, es); + ExplainPropertyFloat("Time", 1000.0 * instr->total, 3, es); + ExplainPropertyFloat("Calls", instr->ntuples, 0, es); + } + + if (conname) + pfree(conname); + + ExplainCloseGroup("Trigger", NULL, true, es); + } } /* Compute elapsed time in seconds since given timestamp */ static double elapsed_time(instr_time *starttime) { - instr_time endtime; - - INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_SUBTRACT(endtime, *starttime); - return INSTR_TIME_GET_DOUBLE(endtime); + instr_time endtime; + + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SUBTRACT(endtime, *starttime); + return INSTR_TIME_GET_DOUBLE(endtime); } /* @@ -677,800 +663,800 @@ elapsed_time(instr_time *starttime) */ static void ExplainNode(PlanState *planstate, List *ancestors, - const char *relationship, const char *plan_name, - ExplainState *es) + const char *relationship, const char *plan_name, + ExplainState *es) { - Plan *plan = planstate->plan; - const char *pname; /* node type name for text output */ - const char *sname; /* node type name for non-text output */ - const char *strategy = NULL; - const char *operation = NULL; - int save_indent = es->indent; - bool haschildren; - - switch (nodeTag(plan)) - { - case T_Result: - pname = sname = "Result"; - break; - case T_ModifyTable: - sname = "ModifyTable"; - switch (((ModifyTable *) plan)->operation) - { - case CMD_INSERT: - pname = operation = "Insert"; - break; - case CMD_UPDATE: - pname = operation = "Update"; - break; - case CMD_DELETE: - pname = operation = "Delete"; - break; - default: - pname = "???"; - break; - } - break; - case T_Append: - pname = sname = "Append"; - break; - case T_MergeAppend: - pname = sname = "Merge Append"; - break; - case T_RecursiveUnion: - pname = sname = "Recursive Union"; - break; - case T_BitmapAnd: - pname = sname = "BitmapAnd"; - break; - case T_BitmapOr: - pname = sname = "BitmapOr"; - break; - case T_NestLoop: - pname = sname = "Nested Loop"; - break; - case T_MergeJoin: - pname = "Merge"; /* "Join" gets added by jointype switch */ - sname = "Merge Join"; - break; - case T_HashJoin: - pname = "Hash"; /* "Join" gets added by jointype switch */ - sname = "Hash Join"; - break; - /* NEW FOR RECATHON */ - case T_RecJoin: - pname = "Recommend"; /* "Join" gets added by jointype switch */ - sname = "Recommend Join"; - break; - case T_SeqScan: - pname = sname = "Seq Scan"; - break; - case T_IndexScan: - pname = sname = "Index Scan"; - break; - case T_IndexOnlyScan: - pname = sname = "Index Only Scan"; - break; - case T_BitmapIndexScan: - pname = sname = "Bitmap Index Scan"; - break; - case T_BitmapHeapScan: - pname = sname = "Bitmap Heap Scan"; - break; - case T_TidScan: - pname = sname = "Tid Scan"; - break; - case T_SubqueryScan: - pname = sname = "Subquery Scan"; - break; - case T_FunctionScan: - pname = sname = "Function Scan"; - break; - case T_ValuesScan: - pname = sname = "Values Scan"; - break; - case T_CteScan: - pname = sname = "CTE Scan"; - break; - case T_WorkTableScan: - pname = sname = "WorkTable Scan"; - break; - case T_ForeignScan: - pname = sname = "Foreign Scan"; - break; - /* NEW FOR RECATHON */ - case T_RecScan: - { - /* We'll mark a strategy depending on a few - * parameters. */ - RecommendInfo *recInfo; - pname = sname = "Recommend"; - - recInfo = (RecommendInfo*) ((RecScan*)plan)->recommender; - switch(recInfo->opType) { - case OP_GENERATE: - strategy = "GenerateRecommend"; - break; - case OP_JOIN: - strategy = "JoinRecommend"; - break; - case OP_GENERATEJOIN: - strategy = "GenerateJoinRecommend"; - break; - case OP_FILTER: - strategy = "FilterRecommend"; - break; - case OP_NOFILTER: - strategy = "StandardRecommend"; - break; - case OP_INDEX: - default: - strategy = "Recommend ???"; - break; - } - } - break; - case T_Material: - pname = sname = "Materialize"; - break; - case T_Sort: - pname = sname = "Sort"; - break; - case T_Group: - pname = sname = "Group"; - break; - case T_Agg: - sname = "Aggregate"; - switch (((Agg *) plan)->aggstrategy) - { - case AGG_PLAIN: - pname = "Aggregate"; - strategy = "Plain"; - break; - case AGG_SORTED: - pname = "GroupAggregate"; - strategy = "Sorted"; - break; - case AGG_HASHED: - pname = "HashAggregate"; - strategy = "Hashed"; - break; - default: - pname = "Aggregate ???"; - strategy = "???"; - break; - } - break; - case T_WindowAgg: - pname = sname = "WindowAgg"; - break; - case T_Unique: - pname = sname = "Unique"; - break; - case T_SetOp: - sname = "SetOp"; - switch (((SetOp *) plan)->strategy) - { - case SETOP_SORTED: - pname = "SetOp"; - strategy = "Sorted"; - break; - case SETOP_HASHED: - pname = "HashSetOp"; - strategy = "Hashed"; - break; - default: - pname = "SetOp ???"; - strategy = "???"; - break; - } - break; - case T_LockRows: - pname = sname = "LockRows"; - break; - case T_Limit: - pname = sname = "Limit"; - break; - case T_Hash: - pname = sname = "Hash"; - break; - default: - pname = sname = "???"; - break; - } - - ExplainOpenGroup("Plan", - relationship ? NULL : "Plan", - true, es); - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - if (plan_name) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, "%s\n", plan_name); - es->indent++; - } - if (es->indent) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfoString(es->str, "-> "); - es->indent += 2; - } - appendStringInfoString(es->str, pname); - es->indent++; - } - else - { - ExplainPropertyText("Node Type", sname, es); - if (strategy) - ExplainPropertyText("Strategy", strategy, es); - if (operation) - ExplainPropertyText("Operation", operation, es); - if (relationship) - ExplainPropertyText("Parent Relationship", relationship, es); - if (plan_name) - ExplainPropertyText("Subplan Name", plan_name, es); - } - - switch (nodeTag(plan)) - { - case T_SeqScan: - case T_BitmapHeapScan: - case T_TidScan: - case T_SubqueryScan: - case T_FunctionScan: - case T_ValuesScan: - case T_CteScan: - case T_WorkTableScan: - case T_ForeignScan: - /* NEW FOR RECATHON */ - case T_RecScan: - ExplainScanTarget((Scan *) plan, es); - break; - case T_IndexScan: - { - IndexScan *indexscan = (IndexScan *) plan; - - ExplainIndexScanDetails(indexscan->indexid, - indexscan->indexorderdir, - es); - ExplainScanTarget((Scan *) indexscan, es); - } - break; - case T_IndexOnlyScan: - { - IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; - - ExplainIndexScanDetails(indexonlyscan->indexid, - indexonlyscan->indexorderdir, - es); - ExplainScanTarget((Scan *) indexonlyscan, es); - } - break; - case T_BitmapIndexScan: - { - BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan; - const char *indexname = - explain_get_index_name(bitmapindexscan->indexid); - - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfo(es->str, " on %s", indexname); - else - ExplainPropertyText("Index Name", indexname, es); - } - break; - case T_ModifyTable: - ExplainModifyTarget((ModifyTable *) plan, es); - break; - case T_NestLoop: - case T_MergeJoin: - case T_HashJoin: - /* NEW FOR RECATHON */ - case T_RecJoin: - { - const char *jointype; - - switch (((Join *) plan)->jointype) - { - case JOIN_INNER: - jointype = "Inner"; - break; - case JOIN_LEFT: - jointype = "Left"; - break; - case JOIN_FULL: - jointype = "Full"; - break; - case JOIN_RIGHT: - jointype = "Right"; - break; - case JOIN_SEMI: - jointype = "Semi"; - break; - case JOIN_ANTI: - jointype = "Anti"; - break; - default: - jointype = "???"; - break; - } - if (es->format == EXPLAIN_FORMAT_TEXT) - { - /* - * For historical reasons, the join type is interpolated - * into the node type name... - */ - if (((Join *) plan)->jointype != JOIN_INNER) - appendStringInfo(es->str, " %s Join", jointype); - else if (!IsA(plan, NestLoop)) - appendStringInfo(es->str, " Join"); - } - else - ExplainPropertyText("Join Type", jointype, es); - } - break; - case T_SetOp: - { - const char *setopcmd; - - switch (((SetOp *) plan)->cmd) - { - case SETOPCMD_INTERSECT: - setopcmd = "Intersect"; - break; - case SETOPCMD_INTERSECT_ALL: - setopcmd = "Intersect All"; - break; - case SETOPCMD_EXCEPT: - setopcmd = "Except"; - break; - case SETOPCMD_EXCEPT_ALL: - setopcmd = "Except All"; - break; - default: - setopcmd = "???"; - break; - } - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfo(es->str, " %s", setopcmd); - else - ExplainPropertyText("Command", setopcmd, es); - } - break; - default: - break; - } - - if (es->costs) - { - if (es->format == EXPLAIN_FORMAT_TEXT) - { - appendStringInfo(es->str, " (cost=%.2f..%.2f rows=%.0f width=%d)", - plan->startup_cost, plan->total_cost, - plan->plan_rows, plan->plan_width); - } - else - { - ExplainPropertyFloat("Startup Cost", plan->startup_cost, 2, es); - ExplainPropertyFloat("Total Cost", plan->total_cost, 2, es); - ExplainPropertyFloat("Plan Rows", plan->plan_rows, 0, es); - ExplainPropertyInteger("Plan Width", plan->plan_width, es); - } - } - - /* - * We have to forcibly clean up the instrumentation state because we - * haven't done ExecutorEnd yet. This is pretty grotty ... - */ - if (planstate->instrument) - InstrEndLoop(planstate->instrument); - - if (planstate->instrument && planstate->instrument->nloops > 0) - { - double nloops = planstate->instrument->nloops; - double startup_sec = 1000.0 * planstate->instrument->startup / nloops; - double total_sec = 1000.0 * planstate->instrument->total / nloops; - double rows = planstate->instrument->ntuples / nloops; - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - if (planstate->instrument->need_timer) - appendStringInfo(es->str, - " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)", - startup_sec, total_sec, rows, nloops); - else - appendStringInfo(es->str, - " (actual rows=%.0f loops=%.0f)", - rows, nloops); - } - else - { - if (planstate->instrument->need_timer) - { - ExplainPropertyFloat("Actual Startup Time", startup_sec, 3, es); - ExplainPropertyFloat("Actual Total Time", total_sec, 3, es); - } - ExplainPropertyFloat("Actual Rows", rows, 0, es); - ExplainPropertyFloat("Actual Loops", nloops, 0, es); - } - } - else if (es->analyze) - { - - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfo(es->str, " (never executed)"); - else if (planstate->instrument->need_timer) - { - ExplainPropertyFloat("Actual Startup Time", 0.0, 3, es); - ExplainPropertyFloat("Actual Total Time", 0.0, 3, es); - } - else - { - ExplainPropertyFloat("Actual Rows", 0.0, 0, es); - ExplainPropertyFloat("Actual Loops", 0.0, 0, es); - } - - } - - /* in text format, first line ends here */ - if (es->format == EXPLAIN_FORMAT_TEXT) - appendStringInfoChar(es->str, '\n'); - - /* target list */ - if (es->verbose) - show_plan_tlist(planstate, ancestors, es); - - /* quals, sort keys, etc */ - switch (nodeTag(plan)) - { - case T_IndexScan: - show_scan_qual(((IndexScan *) plan)->indexqualorig, - "Index Cond", planstate, ancestors, es); - if (((IndexScan *) plan)->indexqualorig) - show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); - show_scan_qual(((IndexScan *) plan)->indexorderbyorig, - "Order By", planstate, ancestors, es); - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - break; - case T_IndexOnlyScan: - show_scan_qual(((IndexOnlyScan *) plan)->indexqual, - "Index Cond", planstate, ancestors, es); - if (((IndexOnlyScan *) plan)->indexqual) - show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); - show_scan_qual(((IndexOnlyScan *) plan)->indexorderby, - "Order By", planstate, ancestors, es); - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - if (es->analyze) - ExplainPropertyLong("Heap Fetches", - ((IndexOnlyScanState *) planstate)->ioss_HeapFetches, es); - break; - case T_BitmapIndexScan: - show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig, - "Index Cond", planstate, ancestors, es); - break; - case T_BitmapHeapScan: - show_scan_qual(((BitmapHeapScan *) plan)->bitmapqualorig, - "Recheck Cond", planstate, ancestors, es); - if (((BitmapHeapScan *) plan)->bitmapqualorig) - show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); - /* FALL THRU */ - case T_SeqScan: - case T_ValuesScan: - case T_CteScan: - case T_WorkTableScan: - case T_SubqueryScan: - /* NEW FOR RECATHON */ - case T_RecScan: - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - break; - case T_FunctionScan: - if (es->verbose) - show_expression(((FunctionScan *) plan)->funcexpr, - "Function Call", planstate, ancestors, - es->verbose, es); - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - break; - case T_TidScan: - { - /* - * The tidquals list has OR semantics, so be sure to show it - * as an OR condition. - */ - List *tidquals = ((TidScan *) plan)->tidquals; - - if (list_length(tidquals) > 1) - tidquals = list_make1(make_orclause(tidquals)); - show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es); - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - } - break; - case T_ForeignScan: - show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - show_foreignscan_info((ForeignScanState *) planstate, es); - break; - case T_NestLoop: - /* NEW FOR RECATHON */ - case T_RecJoin: - show_upper_qual(((NestLoop *) plan)->join.joinqual, - "Join Filter", planstate, ancestors, es); - if (((NestLoop *) plan)->join.joinqual) - show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); - show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); - break; - case T_MergeJoin: - show_upper_qual(((MergeJoin *) plan)->mergeclauses, - "Merge Cond", planstate, ancestors, es); - show_upper_qual(((MergeJoin *) plan)->join.joinqual, - "Join Filter", planstate, ancestors, es); - if (((MergeJoin *) plan)->join.joinqual) - show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); - show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); - break; - case T_HashJoin: - show_upper_qual(((HashJoin *) plan)->hashclauses, - "Hash Cond", planstate, ancestors, es); - show_upper_qual(((HashJoin *) plan)->join.joinqual, - "Join Filter", planstate, ancestors, es); - if (((HashJoin *) plan)->join.joinqual) - show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); - show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); - break; - case T_Agg: - case T_Group: - show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - break; - case T_Sort: - show_sort_keys((SortState *) planstate, ancestors, es); - show_sort_info((SortState *) planstate, es); - break; - case T_MergeAppend: - show_merge_append_keys((MergeAppendState *) planstate, - ancestors, es); - break; - case T_Result: - show_upper_qual((List *) ((Result *) plan)->resconstantqual, - "One-Time Filter", planstate, ancestors, es); - show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); - if (plan->qual) - show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); - break; - case T_Hash: - show_hash_info((HashState *) planstate, es); - break; - default: - break; - } - - /* Show buffer usage */ - if (es->buffers) - { - const BufferUsage *usage = &planstate->instrument->bufusage; - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - bool has_shared = (usage->shared_blks_hit > 0 || - usage->shared_blks_read > 0 || - usage->shared_blks_dirtied > 0 || - usage->shared_blks_written > 0); - bool has_local = (usage->local_blks_hit > 0 || - usage->local_blks_read > 0 || - usage->local_blks_dirtied > 0 || - usage->local_blks_written > 0); - bool has_temp = (usage->temp_blks_read > 0 || - usage->temp_blks_written > 0); - bool has_timing = (!INSTR_TIME_IS_ZERO(usage->blk_read_time) || - !INSTR_TIME_IS_ZERO(usage->blk_write_time)); - - /* Show only positive counter values. */ - if (has_shared || has_local || has_temp) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfoString(es->str, "Buffers:"); - - if (has_shared) - { - appendStringInfoString(es->str, " shared"); - if (usage->shared_blks_hit > 0) - appendStringInfo(es->str, " hit=%ld", - usage->shared_blks_hit); - if (usage->shared_blks_read > 0) - appendStringInfo(es->str, " read=%ld", - usage->shared_blks_read); - if (usage->shared_blks_dirtied > 0) - appendStringInfo(es->str, " dirtied=%ld", - usage->shared_blks_dirtied); - if (usage->shared_blks_written > 0) - appendStringInfo(es->str, " written=%ld", - usage->shared_blks_written); - if (has_local || has_temp) - appendStringInfoChar(es->str, ','); - } - if (has_local) - { - appendStringInfoString(es->str, " local"); - if (usage->local_blks_hit > 0) - appendStringInfo(es->str, " hit=%ld", - usage->local_blks_hit); - if (usage->local_blks_read > 0) - appendStringInfo(es->str, " read=%ld", - usage->local_blks_read); - if (usage->local_blks_dirtied > 0) - appendStringInfo(es->str, " dirtied=%ld", - usage->local_blks_dirtied); - if (usage->local_blks_written > 0) - appendStringInfo(es->str, " written=%ld", - usage->local_blks_written); - if (has_temp) - appendStringInfoChar(es->str, ','); - } - if (has_temp) - { - appendStringInfoString(es->str, " temp"); - if (usage->temp_blks_read > 0) - appendStringInfo(es->str, " read=%ld", - usage->temp_blks_read); - if (usage->temp_blks_written > 0) - appendStringInfo(es->str, " written=%ld", - usage->temp_blks_written); - } - appendStringInfoChar(es->str, '\n'); - } - - /* As above, show only positive counter values. */ - if (has_timing) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfoString(es->str, "I/O Timings:"); - if (!INSTR_TIME_IS_ZERO(usage->blk_read_time)) - appendStringInfo(es->str, " read=%0.3f", - INSTR_TIME_GET_MILLISEC(usage->blk_read_time)); - if (!INSTR_TIME_IS_ZERO(usage->blk_write_time)) - appendStringInfo(es->str, " write=%0.3f", - INSTR_TIME_GET_MILLISEC(usage->blk_write_time)); - appendStringInfoChar(es->str, '\n'); - } - } - else - { - ExplainPropertyLong("Shared Hit Blocks", usage->shared_blks_hit, es); - ExplainPropertyLong("Shared Read Blocks", usage->shared_blks_read, es); - ExplainPropertyLong("Shared Dirtied Blocks", usage->shared_blks_dirtied, es); - ExplainPropertyLong("Shared Written Blocks", usage->shared_blks_written, es); - ExplainPropertyLong("Local Hit Blocks", usage->local_blks_hit, es); - ExplainPropertyLong("Local Read Blocks", usage->local_blks_read, es); - ExplainPropertyLong("Local Dirtied Blocks", usage->local_blks_dirtied, es); - ExplainPropertyLong("Local Written Blocks", usage->local_blks_written, es); - ExplainPropertyLong("Temp Read Blocks", usage->temp_blks_read, es); - ExplainPropertyLong("Temp Written Blocks", usage->temp_blks_written, es); - ExplainPropertyFloat("I/O Read Time", INSTR_TIME_GET_MILLISEC(usage->blk_read_time), 3, es); - ExplainPropertyFloat("I/O Write Time", INSTR_TIME_GET_MILLISEC(usage->blk_write_time), 3, es); - } - } - - /* Get ready to display the child plans */ - haschildren = planstate->initPlan || - outerPlanState(planstate) || - innerPlanState(planstate) || - IsA(plan, ModifyTable) || - IsA(plan, Append) || - IsA(plan, MergeAppend) || - IsA(plan, BitmapAnd) || - IsA(plan, BitmapOr) || - IsA(plan, SubqueryScan) || - planstate->subPlan; - if (haschildren) - { - ExplainOpenGroup("Plans", "Plans", false, es); - /* Pass current PlanState as head of ancestors list for children */ - ancestors = lcons(planstate, ancestors); - } - - /* initPlan-s */ - if (planstate->initPlan) - ExplainSubPlans(planstate->initPlan, ancestors, "InitPlan", es); - - /* lefttree */ - if (outerPlanState(planstate)) - ExplainNode(outerPlanState(planstate), ancestors, - "Outer", NULL, es); - - /* righttree */ - if (innerPlanState(planstate)) - ExplainNode(innerPlanState(planstate), ancestors, - "Inner", NULL, es); - - /* special child plans */ - switch (nodeTag(plan)) - { - case T_ModifyTable: - ExplainMemberNodes(((ModifyTable *) plan)->plans, - ((ModifyTableState *) planstate)->mt_plans, - ancestors, es); - break; - case T_Append: - ExplainMemberNodes(((Append *) plan)->appendplans, - ((AppendState *) planstate)->appendplans, - ancestors, es); - break; - case T_MergeAppend: - ExplainMemberNodes(((MergeAppend *) plan)->mergeplans, - ((MergeAppendState *) planstate)->mergeplans, - ancestors, es); - break; - case T_BitmapAnd: - ExplainMemberNodes(((BitmapAnd *) plan)->bitmapplans, - ((BitmapAndState *) planstate)->bitmapplans, - ancestors, es); - break; - case T_BitmapOr: - ExplainMemberNodes(((BitmapOr *) plan)->bitmapplans, - ((BitmapOrState *) planstate)->bitmapplans, - ancestors, es); - break; - case T_SubqueryScan: - ExplainNode(((SubqueryScanState *) planstate)->subplan, ancestors, - "Subquery", NULL, es); - break; - default: - break; - } - - /* subPlan-s */ - if (planstate->subPlan) - ExplainSubPlans(planstate->subPlan, ancestors, "SubPlan", es); - - /* end of child plans */ - if (haschildren) - { - ancestors = list_delete_first(ancestors); - ExplainCloseGroup("Plans", "Plans", false, es); - } - - /* in text format, undo whatever indentation we added */ - if (es->format == EXPLAIN_FORMAT_TEXT) - es->indent = save_indent; - - ExplainCloseGroup("Plan", - relationship ? NULL : "Plan", - true, es); + Plan *plan = planstate->plan; + const char *pname; /* node type name for text output */ + const char *sname; /* node type name for non-text output */ + const char *strategy = NULL; + const char *operation = NULL; + int save_indent = es->indent; + bool haschildren; + + switch (nodeTag(plan)) + { + case T_Result: + pname = sname = "Result"; + break; + case T_ModifyTable: + sname = "ModifyTable"; + switch (((ModifyTable *) plan)->operation) + { + case CMD_INSERT: + pname = operation = "Insert"; + break; + case CMD_UPDATE: + pname = operation = "Update"; + break; + case CMD_DELETE: + pname = operation = "Delete"; + break; + default: + pname = "???"; + break; + } + break; + case T_Append: + pname = sname = "Append"; + break; + case T_MergeAppend: + pname = sname = "Merge Append"; + break; + case T_RecursiveUnion: + pname = sname = "Recursive Union"; + break; + case T_BitmapAnd: + pname = sname = "BitmapAnd"; + break; + case T_BitmapOr: + pname = sname = "BitmapOr"; + break; + case T_NestLoop: + pname = sname = "Nested Loop"; + break; + case T_MergeJoin: + pname = "Merge"; /* "Join" gets added by jointype switch */ + sname = "Merge Join"; + break; + case T_HashJoin: + pname = "Hash"; /* "Join" gets added by jointype switch */ + sname = "Hash Join"; + break; + /* NEW FOR RECATHON */ + case T_RecJoin: + pname = "Recommend"; /* "Join" gets added by jointype switch */ + sname = "Recommend Join"; + break; + case T_SeqScan: + pname = sname = "Seq Scan"; + break; + case T_IndexScan: + pname = sname = "Index Scan"; + break; + case T_IndexOnlyScan: + pname = sname = "Index Only Scan"; + break; + case T_BitmapIndexScan: + pname = sname = "Bitmap Index Scan"; + break; + case T_BitmapHeapScan: + pname = sname = "Bitmap Heap Scan"; + break; + case T_TidScan: + pname = sname = "Tid Scan"; + break; + case T_SubqueryScan: + pname = sname = "Subquery Scan"; + break; + case T_FunctionScan: + pname = sname = "Function Scan"; + break; + case T_ValuesScan: + pname = sname = "Values Scan"; + break; + case T_CteScan: + pname = sname = "CTE Scan"; + break; + case T_WorkTableScan: + pname = sname = "WorkTable Scan"; + break; + case T_ForeignScan: + pname = sname = "Foreign Scan"; + break; + /* NEW FOR RECATHON */ + case T_RecScan: + { + /* We'll mark a strategy depending on a few + * parameters. */ + RecommendInfo *recInfo; + pname = sname = "Recommend"; + + recInfo = (RecommendInfo*) ((RecScan*)plan)->recommender; + switch(recInfo->opType) { + case OP_GENERATE: + strategy = "GenerateRecommend"; + break; + case OP_JOIN: + strategy = "JoinRecommend"; + break; + case OP_GENERATEJOIN: + strategy = "GenerateJoinRecommend"; + break; + case OP_FILTER: + strategy = "FilterRecommend"; + break; + case OP_NOFILTER: + strategy = "StandardRecommend"; + break; + case OP_INDEX: + default: + strategy = "Recommend ???"; + break; + } + } + break; + case T_Material: + pname = sname = "Materialize"; + break; + case T_Sort: + pname = sname = "Sort"; + break; + case T_Group: + pname = sname = "Group"; + break; + case T_Agg: + sname = "Aggregate"; + switch (((Agg *) plan)->aggstrategy) + { + case AGG_PLAIN: + pname = "Aggregate"; + strategy = "Plain"; + break; + case AGG_SORTED: + pname = "GroupAggregate"; + strategy = "Sorted"; + break; + case AGG_HASHED: + pname = "HashAggregate"; + strategy = "Hashed"; + break; + default: + pname = "Aggregate ???"; + strategy = "???"; + break; + } + break; + case T_WindowAgg: + pname = sname = "WindowAgg"; + break; + case T_Unique: + pname = sname = "Unique"; + break; + case T_SetOp: + sname = "SetOp"; + switch (((SetOp *) plan)->strategy) + { + case SETOP_SORTED: + pname = "SetOp"; + strategy = "Sorted"; + break; + case SETOP_HASHED: + pname = "HashSetOp"; + strategy = "Hashed"; + break; + default: + pname = "SetOp ???"; + strategy = "???"; + break; + } + break; + case T_LockRows: + pname = sname = "LockRows"; + break; + case T_Limit: + pname = sname = "Limit"; + break; + case T_Hash: + pname = sname = "Hash"; + break; + default: + pname = sname = "???"; + break; + } + + ExplainOpenGroup("Plan", + relationship ? NULL : "Plan", + true, es); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (plan_name) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, "%s\n", plan_name); + es->indent++; + } + if (es->indent) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoString(es->str, "-> "); + es->indent += 2; + } + appendStringInfoString(es->str, pname); + es->indent++; + } + else + { + ExplainPropertyText("Node Type", sname, es); + if (strategy) + ExplainPropertyText("Strategy", strategy, es); + if (operation) + ExplainPropertyText("Operation", operation, es); + if (relationship) + ExplainPropertyText("Parent Relationship", relationship, es); + if (plan_name) + ExplainPropertyText("Subplan Name", plan_name, es); + } + + switch (nodeTag(plan)) + { + case T_SeqScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_SubqueryScan: + case T_FunctionScan: + case T_ValuesScan: + case T_CteScan: + case T_WorkTableScan: + case T_ForeignScan: + /* NEW FOR RECATHON */ + case T_RecScan: + ExplainScanTarget((Scan *) plan, es); + break; + case T_IndexScan: + { + IndexScan *indexscan = (IndexScan *) plan; + + ExplainIndexScanDetails(indexscan->indexid, + indexscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexscan, es); + } + break; + case T_IndexOnlyScan: + { + IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; + + ExplainIndexScanDetails(indexonlyscan->indexid, + indexonlyscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexonlyscan, es); + } + break; + case T_BitmapIndexScan: + { + BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan; + const char *indexname = + explain_get_index_name(bitmapindexscan->indexid); + + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " on %s", indexname); + else + ExplainPropertyText("Index Name", indexname, es); + } + break; + case T_ModifyTable: + ExplainModifyTarget((ModifyTable *) plan, es); + break; + case T_NestLoop: + case T_MergeJoin: + case T_HashJoin: + /* NEW FOR RECATHON */ + case T_RecJoin: + { + const char *jointype; + + switch (((Join *) plan)->jointype) + { + case JOIN_INNER: + jointype = "Inner"; + break; + case JOIN_LEFT: + jointype = "Left"; + break; + case JOIN_FULL: + jointype = "Full"; + break; + case JOIN_RIGHT: + jointype = "Right"; + break; + case JOIN_SEMI: + jointype = "Semi"; + break; + case JOIN_ANTI: + jointype = "Anti"; + break; + default: + jointype = "???"; + break; + } + if (es->format == EXPLAIN_FORMAT_TEXT) + { + /* + * For historical reasons, the join type is interpolated + * into the node type name... + */ + if (((Join *) plan)->jointype != JOIN_INNER) + appendStringInfo(es->str, " %s Join", jointype); + else if (!IsA(plan, NestLoop)) + appendStringInfo(es->str, " Join"); + } + else + ExplainPropertyText("Join Type", jointype, es); + } + break; + case T_SetOp: + { + const char *setopcmd; + + switch (((SetOp *) plan)->cmd) + { + case SETOPCMD_INTERSECT: + setopcmd = "Intersect"; + break; + case SETOPCMD_INTERSECT_ALL: + setopcmd = "Intersect All"; + break; + case SETOPCMD_EXCEPT: + setopcmd = "Except"; + break; + case SETOPCMD_EXCEPT_ALL: + setopcmd = "Except All"; + break; + default: + setopcmd = "???"; + break; + } + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " %s", setopcmd); + else + ExplainPropertyText("Command", setopcmd, es); + } + break; + default: + break; + } + + if (es->costs) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + { + appendStringInfo(es->str, " (cost=%.2f..%.2f rows=%.0f width=%d)", + plan->startup_cost, plan->total_cost, + plan->plan_rows, plan->plan_width); + } + else + { + ExplainPropertyFloat("Startup Cost", plan->startup_cost, 2, es); + ExplainPropertyFloat("Total Cost", plan->total_cost, 2, es); + ExplainPropertyFloat("Plan Rows", plan->plan_rows, 0, es); + ExplainPropertyInteger("Plan Width", plan->plan_width, es); + } + } + + /* + * We have to forcibly clean up the instrumentation state because we + * haven't done ExecutorEnd yet. This is pretty grotty ... + */ + if (planstate->instrument) + InstrEndLoop(planstate->instrument); + + if (planstate->instrument && planstate->instrument->nloops > 0) + { + double nloops = planstate->instrument->nloops; + double startup_sec = 1000.0 * planstate->instrument->startup / nloops; + double total_sec = 1000.0 * planstate->instrument->total / nloops; + double rows = planstate->instrument->ntuples / nloops; + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (planstate->instrument->need_timer) + appendStringInfo(es->str, + " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)", + startup_sec, total_sec, rows, nloops); + else + appendStringInfo(es->str, + " (actual rows=%.0f loops=%.0f)", + rows, nloops); + } + else + { + if (planstate->instrument->need_timer) + { + ExplainPropertyFloat("Actual Startup Time", startup_sec, 3, es); + ExplainPropertyFloat("Actual Total Time", total_sec, 3, es); + } + ExplainPropertyFloat("Actual Rows", rows, 0, es); + ExplainPropertyFloat("Actual Loops", nloops, 0, es); + } + } + else if (es->analyze) + { + + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " (never executed)"); + else if (planstate->instrument->need_timer) + { + ExplainPropertyFloat("Actual Startup Time", 0.0, 3, es); + ExplainPropertyFloat("Actual Total Time", 0.0, 3, es); + } + else + { + ExplainPropertyFloat("Actual Rows", 0.0, 0, es); + ExplainPropertyFloat("Actual Loops", 0.0, 0, es); + } + + } + + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); + + /* target list */ + if (es->verbose) + show_plan_tlist(planstate, ancestors, es); + + /* quals, sort keys, etc */ + switch (nodeTag(plan)) + { + case T_IndexScan: + show_scan_qual(((IndexScan *) plan)->indexqualorig, + "Index Cond", planstate, ancestors, es); + if (((IndexScan *) plan)->indexqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(((IndexScan *) plan)->indexorderbyorig, + "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_IndexOnlyScan: + show_scan_qual(((IndexOnlyScan *) plan)->indexqual, + "Index Cond", planstate, ancestors, es); + if (((IndexOnlyScan *) plan)->indexqual) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(((IndexOnlyScan *) plan)->indexorderby, + "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + if (es->analyze) + ExplainPropertyLong("Heap Fetches", + ((IndexOnlyScanState *) planstate)->ioss_HeapFetches, es); + break; + case T_BitmapIndexScan: + show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig, + "Index Cond", planstate, ancestors, es); + break; + case T_BitmapHeapScan: + show_scan_qual(((BitmapHeapScan *) plan)->bitmapqualorig, + "Recheck Cond", planstate, ancestors, es); + if (((BitmapHeapScan *) plan)->bitmapqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + /* FALL THRU */ + case T_SeqScan: + case T_ValuesScan: + case T_CteScan: + case T_WorkTableScan: + case T_SubqueryScan: + /* NEW FOR RECATHON */ + case T_RecScan: + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_FunctionScan: + if (es->verbose) + show_expression(((FunctionScan *) plan)->funcexpr, + "Function Call", planstate, ancestors, + es->verbose, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_TidScan: + { + /* + * The tidquals list has OR semantics, so be sure to show it + * as an OR condition. + */ + List *tidquals = ((TidScan *) plan)->tidquals; + + if (list_length(tidquals) > 1) + tidquals = list_make1(make_orclause(tidquals)); + show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + } + break; + case T_ForeignScan: + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + show_foreignscan_info((ForeignScanState *) planstate, es); + break; + case T_NestLoop: + /* NEW FOR RECATHON */ + case T_RecJoin: + show_upper_qual(((NestLoop *) plan)->join.joinqual, + "Join Filter", planstate, ancestors, es); + if (((NestLoop *) plan)->join.joinqual) + show_instrumentation_count("Rows Removed by Join Filter", 1, + planstate, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 2, + planstate, es); + break; + case T_MergeJoin: + show_upper_qual(((MergeJoin *) plan)->mergeclauses, + "Merge Cond", planstate, ancestors, es); + show_upper_qual(((MergeJoin *) plan)->join.joinqual, + "Join Filter", planstate, ancestors, es); + if (((MergeJoin *) plan)->join.joinqual) + show_instrumentation_count("Rows Removed by Join Filter", 1, + planstate, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 2, + planstate, es); + break; + case T_HashJoin: + show_upper_qual(((HashJoin *) plan)->hashclauses, + "Hash Cond", planstate, ancestors, es); + show_upper_qual(((HashJoin *) plan)->join.joinqual, + "Join Filter", planstate, ancestors, es); + if (((HashJoin *) plan)->join.joinqual) + show_instrumentation_count("Rows Removed by Join Filter", 1, + planstate, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 2, + planstate, es); + break; + case T_Agg: + case T_Group: + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_Sort: + show_sort_keys((SortState *) planstate, ancestors, es); + show_sort_info((SortState *) planstate, es); + break; + case T_MergeAppend: + show_merge_append_keys((MergeAppendState *) planstate, + ancestors, es); + break; + case T_Result: + show_upper_qual((List *) ((Result *) plan)->resconstantqual, + "One-Time Filter", planstate, ancestors, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_Hash: + show_hash_info((HashState *) planstate, es); + break; + default: + break; + } + + /* Show buffer usage */ + if (es->buffers) + { + const BufferUsage *usage = &planstate->instrument->bufusage; + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + bool has_shared = (usage->shared_blks_hit > 0 || + usage->shared_blks_read > 0 || + usage->shared_blks_dirtied > 0 || + usage->shared_blks_written > 0); + bool has_local = (usage->local_blks_hit > 0 || + usage->local_blks_read > 0 || + usage->local_blks_dirtied > 0 || + usage->local_blks_written > 0); + bool has_temp = (usage->temp_blks_read > 0 || + usage->temp_blks_written > 0); + bool has_timing = (!INSTR_TIME_IS_ZERO(usage->blk_read_time) || + !INSTR_TIME_IS_ZERO(usage->blk_write_time)); + + /* Show only positive counter values. */ + if (has_shared || has_local || has_temp) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoString(es->str, "Buffers:"); + + if (has_shared) + { + appendStringInfoString(es->str, " shared"); + if (usage->shared_blks_hit > 0) + appendStringInfo(es->str, " hit=%ld", + usage->shared_blks_hit); + if (usage->shared_blks_read > 0) + appendStringInfo(es->str, " read=%ld", + usage->shared_blks_read); + if (usage->shared_blks_dirtied > 0) + appendStringInfo(es->str, " dirtied=%ld", + usage->shared_blks_dirtied); + if (usage->shared_blks_written > 0) + appendStringInfo(es->str, " written=%ld", + usage->shared_blks_written); + if (has_local || has_temp) + appendStringInfoChar(es->str, ','); + } + if (has_local) + { + appendStringInfoString(es->str, " local"); + if (usage->local_blks_hit > 0) + appendStringInfo(es->str, " hit=%ld", + usage->local_blks_hit); + if (usage->local_blks_read > 0) + appendStringInfo(es->str, " read=%ld", + usage->local_blks_read); + if (usage->local_blks_dirtied > 0) + appendStringInfo(es->str, " dirtied=%ld", + usage->local_blks_dirtied); + if (usage->local_blks_written > 0) + appendStringInfo(es->str, " written=%ld", + usage->local_blks_written); + if (has_temp) + appendStringInfoChar(es->str, ','); + } + if (has_temp) + { + appendStringInfoString(es->str, " temp"); + if (usage->temp_blks_read > 0) + appendStringInfo(es->str, " read=%ld", + usage->temp_blks_read); + if (usage->temp_blks_written > 0) + appendStringInfo(es->str, " written=%ld", + usage->temp_blks_written); + } + appendStringInfoChar(es->str, '\n'); + } + + /* As above, show only positive counter values. */ + if (has_timing) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoString(es->str, "I/O Timings:"); + if (!INSTR_TIME_IS_ZERO(usage->blk_read_time)) + appendStringInfo(es->str, " read=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->blk_read_time)); + if (!INSTR_TIME_IS_ZERO(usage->blk_write_time)) + appendStringInfo(es->str, " write=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->blk_write_time)); + appendStringInfoChar(es->str, '\n'); + } + } + else + { + ExplainPropertyLong("Shared Hit Blocks", usage->shared_blks_hit, es); + ExplainPropertyLong("Shared Read Blocks", usage->shared_blks_read, es); + ExplainPropertyLong("Shared Dirtied Blocks", usage->shared_blks_dirtied, es); + ExplainPropertyLong("Shared Written Blocks", usage->shared_blks_written, es); + ExplainPropertyLong("Local Hit Blocks", usage->local_blks_hit, es); + ExplainPropertyLong("Local Read Blocks", usage->local_blks_read, es); + ExplainPropertyLong("Local Dirtied Blocks", usage->local_blks_dirtied, es); + ExplainPropertyLong("Local Written Blocks", usage->local_blks_written, es); + ExplainPropertyLong("Temp Read Blocks", usage->temp_blks_read, es); + ExplainPropertyLong("Temp Written Blocks", usage->temp_blks_written, es); + ExplainPropertyFloat("I/O Read Time", INSTR_TIME_GET_MILLISEC(usage->blk_read_time), 3, es); + ExplainPropertyFloat("I/O Write Time", INSTR_TIME_GET_MILLISEC(usage->blk_write_time), 3, es); + } + } + + /* Get ready to display the child plans */ + haschildren = planstate->initPlan || + outerPlanState(planstate) || + innerPlanState(planstate) || + IsA(plan, ModifyTable) || + IsA(plan, Append) || + IsA(plan, MergeAppend) || + IsA(plan, BitmapAnd) || + IsA(plan, BitmapOr) || + IsA(plan, SubqueryScan) || + planstate->subPlan; + if (haschildren) + { + ExplainOpenGroup("Plans", "Plans", false, es); + /* Pass current PlanState as head of ancestors list for children */ + ancestors = lcons(planstate, ancestors); + } + + /* initPlan-s */ + if (planstate->initPlan) + ExplainSubPlans(planstate->initPlan, ancestors, "InitPlan", es); + + /* lefttree */ + if (outerPlanState(planstate)) + ExplainNode(outerPlanState(planstate), ancestors, + "Outer", NULL, es); + + /* righttree */ + if (innerPlanState(planstate)) + ExplainNode(innerPlanState(planstate), ancestors, + "Inner", NULL, es); + + /* special child plans */ + switch (nodeTag(plan)) + { + case T_ModifyTable: + ExplainMemberNodes(((ModifyTable *) plan)->plans, + ((ModifyTableState *) planstate)->mt_plans, + ancestors, es); + break; + case T_Append: + ExplainMemberNodes(((Append *) plan)->appendplans, + ((AppendState *) planstate)->appendplans, + ancestors, es); + break; + case T_MergeAppend: + ExplainMemberNodes(((MergeAppend *) plan)->mergeplans, + ((MergeAppendState *) planstate)->mergeplans, + ancestors, es); + break; + case T_BitmapAnd: + ExplainMemberNodes(((BitmapAnd *) plan)->bitmapplans, + ((BitmapAndState *) planstate)->bitmapplans, + ancestors, es); + break; + case T_BitmapOr: + ExplainMemberNodes(((BitmapOr *) plan)->bitmapplans, + ((BitmapOrState *) planstate)->bitmapplans, + ancestors, es); + break; + case T_SubqueryScan: + ExplainNode(((SubqueryScanState *) planstate)->subplan, ancestors, + "Subquery", NULL, es); + break; + default: + break; + } + + /* subPlan-s */ + if (planstate->subPlan) + ExplainSubPlans(planstate->subPlan, ancestors, "SubPlan", es); + + /* end of child plans */ + if (haschildren) + { + ancestors = list_delete_first(ancestors); + ExplainCloseGroup("Plans", "Plans", false, es); + } + + /* in text format, undo whatever indentation we added */ + if (es->format == EXPLAIN_FORMAT_TEXT) + es->indent = save_indent; + + ExplainCloseGroup("Plan", + relationship ? NULL : "Plan", + true, es); } /* @@ -1479,42 +1465,42 @@ ExplainNode(PlanState *planstate, List *ancestors, static void show_plan_tlist(PlanState *planstate, List *ancestors, ExplainState *es) { - Plan *plan = planstate->plan; - List *context; - List *result = NIL; - bool useprefix; - ListCell *lc; - - /* No work if empty tlist (this occurs eg in bitmap indexscans) */ - if (plan->targetlist == NIL) - return; - /* The tlist of an Append isn't real helpful, so suppress it */ - if (IsA(plan, Append)) - return; - /* Likewise for MergeAppend and RecursiveUnion */ - if (IsA(plan, MergeAppend)) - return; - if (IsA(plan, RecursiveUnion)) - return; - - /* Set up deparsing context */ - context = deparse_context_for_planstate((Node *) planstate, - ancestors, - es->rtable); - useprefix = list_length(es->rtable) > 1; - - /* Deparse each result column (we now include resjunk ones) */ - foreach(lc, plan->targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - - result = lappend(result, - deparse_expression((Node *) tle->expr, context, - useprefix, false)); - } - - /* Print results */ - ExplainPropertyList("Output", result, es); + Plan *plan = planstate->plan; + List *context; + List *result = NIL; + bool useprefix; + ListCell *lc; + + /* No work if empty tlist (this occurs eg in bitmap indexscans) */ + if (plan->targetlist == NIL) + return; + /* The tlist of an Append isn't real helpful, so suppress it */ + if (IsA(plan, Append)) + return; + /* Likewise for MergeAppend and RecursiveUnion */ + if (IsA(plan, MergeAppend)) + return; + if (IsA(plan, RecursiveUnion)) + return; + + /* Set up deparsing context */ + context = deparse_context_for_planstate((Node *) planstate, + ancestors, + es->rtable); + useprefix = list_length(es->rtable) > 1; + + /* Deparse each result column (we now include resjunk ones) */ + foreach(lc, plan->targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + result = lappend(result, + deparse_expression((Node *) tle->expr, context, + useprefix, false)); + } + + /* Print results */ + ExplainPropertyList("Output", result, es); } /* @@ -1522,22 +1508,22 @@ show_plan_tlist(PlanState *planstate, List *ancestors, ExplainState *es) */ static void show_expression(Node *node, const char *qlabel, - PlanState *planstate, List *ancestors, - bool useprefix, ExplainState *es) + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es) { - List *context; - char *exprstr; - - /* Set up deparsing context */ - context = deparse_context_for_planstate((Node *) planstate, - ancestors, - es->rtable); - - /* Deparse the expression */ - exprstr = deparse_expression(node, context, useprefix, false); - - /* And add to es->str */ - ExplainPropertyText(qlabel, exprstr, es); + List *context; + char *exprstr; + + /* Set up deparsing context */ + context = deparse_context_for_planstate((Node *) planstate, + ancestors, + es->rtable); + + /* Deparse the expression */ + exprstr = deparse_expression(node, context, useprefix, false); + + /* And add to es->str */ + ExplainPropertyText(qlabel, exprstr, es); } /* @@ -1545,20 +1531,20 @@ show_expression(Node *node, const char *qlabel, */ static void show_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - bool useprefix, ExplainState *es) + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es) { - Node *node; - - /* No work if empty qual */ - if (qual == NIL) - return; - - /* Convert AND list to explicit AND */ - node = (Node *) make_ands_explicit(qual); - - /* And show it */ - show_expression(node, qlabel, planstate, ancestors, useprefix, es); + Node *node; + + /* No work if empty qual */ + if (qual == NIL) + return; + + /* Convert AND list to explicit AND */ + node = (Node *) make_ands_explicit(qual); + + /* And show it */ + show_expression(node, qlabel, planstate, ancestors, useprefix, es); } /* @@ -1566,13 +1552,13 @@ show_qual(List *qual, const char *qlabel, */ static void show_scan_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - ExplainState *es) + PlanState *planstate, List *ancestors, + ExplainState *es) { - bool useprefix; - - useprefix = (IsA(planstate->plan, SubqueryScan) ||es->verbose); - show_qual(qual, qlabel, planstate, ancestors, useprefix, es); + bool useprefix; + + useprefix = (IsA(planstate->plan, SubqueryScan) ||es->verbose); + show_qual(qual, qlabel, planstate, ancestors, useprefix, es); } /* @@ -1580,13 +1566,13 @@ show_scan_qual(List *qual, const char *qlabel, */ static void show_upper_qual(List *qual, const char *qlabel, - PlanState *planstate, List *ancestors, - ExplainState *es) + PlanState *planstate, List *ancestors, + ExplainState *es) { - bool useprefix; - - useprefix = (list_length(es->rtable) > 1 || es->verbose); - show_qual(qual, qlabel, planstate, ancestors, useprefix, es); + bool useprefix; + + useprefix = (list_length(es->rtable) > 1 || es->verbose); + show_qual(qual, qlabel, planstate, ancestors, useprefix, es); } /* @@ -1595,11 +1581,11 @@ show_upper_qual(List *qual, const char *qlabel, static void show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es) { - Sort *plan = (Sort *) sortstate->ss.ps.plan; - - show_sort_keys_common((PlanState *) sortstate, - plan->numCols, plan->sortColIdx, - ancestors, es); + Sort *plan = (Sort *) sortstate->ss.ps.plan; + + show_sort_keys_common((PlanState *) sortstate, + plan->numCols, plan->sortColIdx, + ancestors, es); } /* @@ -1607,51 +1593,51 @@ show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es) */ static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors, - ExplainState *es) + ExplainState *es) { - MergeAppend *plan = (MergeAppend *) mstate->ps.plan; - - show_sort_keys_common((PlanState *) mstate, - plan->numCols, plan->sortColIdx, - ancestors, es); + MergeAppend *plan = (MergeAppend *) mstate->ps.plan; + + show_sort_keys_common((PlanState *) mstate, + plan->numCols, plan->sortColIdx, + ancestors, es); } static void show_sort_keys_common(PlanState *planstate, int nkeys, AttrNumber *keycols, - List *ancestors, ExplainState *es) + List *ancestors, ExplainState *es) { - Plan *plan = planstate->plan; - List *context; - List *result = NIL; - bool useprefix; - int keyno; - char *exprstr; - - if (nkeys <= 0) - return; - - /* Set up deparsing context */ - context = deparse_context_for_planstate((Node *) planstate, - ancestors, - es->rtable); - useprefix = (list_length(es->rtable) > 1 || es->verbose); - - for (keyno = 0; keyno < nkeys; keyno++) - { - /* find key expression in tlist */ - AttrNumber keyresno = keycols[keyno]; - TargetEntry *target = get_tle_by_resno(plan->targetlist, - keyresno); - - if (!target) - elog(ERROR, "no tlist entry for key %d", keyresno); - /* Deparse the expression, showing any top-level cast */ - exprstr = deparse_expression((Node *) target->expr, context, - useprefix, true); - result = lappend(result, exprstr); - } - - ExplainPropertyList("Sort Key", result, es); + Plan *plan = planstate->plan; + List *context; + List *result = NIL; + bool useprefix; + int keyno; + char *exprstr; + + if (nkeys <= 0) + return; + + /* Set up deparsing context */ + context = deparse_context_for_planstate((Node *) planstate, + ancestors, + es->rtable); + useprefix = (list_length(es->rtable) > 1 || es->verbose); + + for (keyno = 0; keyno < nkeys; keyno++) + { + /* find key expression in tlist */ + AttrNumber keyresno = keycols[keyno]; + TargetEntry *target = get_tle_by_resno(plan->targetlist, + keyresno); + + if (!target) + elog(ERROR, "no tlist entry for key %d", keyresno); + /* Deparse the expression, showing any top-level cast */ + exprstr = deparse_expression((Node *) target->expr, context, + useprefix, true); + result = lappend(result, exprstr); + } + + ExplainPropertyList("Sort Key", result, es); } /* @@ -1660,30 +1646,30 @@ show_sort_keys_common(PlanState *planstate, int nkeys, AttrNumber *keycols, static void show_sort_info(SortState *sortstate, ExplainState *es) { - Assert(IsA(sortstate, SortState)); - if (es->analyze && sortstate->sort_Done && - sortstate->tuplesortstate != NULL) - { - Tuplesortstate *state = (Tuplesortstate *) sortstate->tuplesortstate; - const char *sortMethod; - const char *spaceType; - long spaceUsed; - - tuplesort_get_stats(state, &sortMethod, &spaceType, &spaceUsed); - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, "Sort Method: %s %s: %ldkB\n", - sortMethod, spaceType, spaceUsed); - } - else - { - ExplainPropertyText("Sort Method", sortMethod, es); - ExplainPropertyLong("Sort Space Used", spaceUsed, es); - ExplainPropertyText("Sort Space Type", spaceType, es); - } - } + Assert(IsA(sortstate, SortState)); + if (es->analyze && sortstate->sort_Done && + sortstate->tuplesortstate != NULL) + { + Tuplesortstate *state = (Tuplesortstate *) sortstate->tuplesortstate; + const char *sortMethod; + const char *spaceType; + long spaceUsed; + + tuplesort_get_stats(state, &sortMethod, &spaceType, &spaceUsed); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, "Sort Method: %s %s: %ldkB\n", + sortMethod, spaceType, spaceUsed); + } + else + { + ExplainPropertyText("Sort Method", sortMethod, es); + ExplainPropertyLong("Sort Space Used", spaceUsed, es); + ExplainPropertyText("Sort Space Type", spaceType, es); + } + } } /* @@ -1692,40 +1678,40 @@ show_sort_info(SortState *sortstate, ExplainState *es) static void show_hash_info(HashState *hashstate, ExplainState *es) { - HashJoinTable hashtable; - - Assert(IsA(hashstate, HashState)); - hashtable = hashstate->hashtable; - - if (hashtable) - { - long spacePeakKb = (hashtable->spacePeak + 1023) / 1024; - - if (es->format != EXPLAIN_FORMAT_TEXT) - { - ExplainPropertyLong("Hash Buckets", hashtable->nbuckets, es); - ExplainPropertyLong("Hash Batches", hashtable->nbatch, es); - ExplainPropertyLong("Original Hash Batches", - hashtable->nbatch_original, es); - ExplainPropertyLong("Peak Memory Usage", spacePeakKb, es); - } - else if (hashtable->nbatch_original != hashtable->nbatch) - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, - "Buckets: %d Batches: %d (originally %d) Memory Usage: %ldkB\n", - hashtable->nbuckets, hashtable->nbatch, - hashtable->nbatch_original, spacePeakKb); - } - else - { - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, - "Buckets: %d Batches: %d Memory Usage: %ldkB\n", - hashtable->nbuckets, hashtable->nbatch, - spacePeakKb); - } - } + HashJoinTable hashtable; + + Assert(IsA(hashstate, HashState)); + hashtable = hashstate->hashtable; + + if (hashtable) + { + long spacePeakKb = (hashtable->spacePeak + 1023) / 1024; + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyLong("Hash Buckets", hashtable->nbuckets, es); + ExplainPropertyLong("Hash Batches", hashtable->nbatch, es); + ExplainPropertyLong("Original Hash Batches", + hashtable->nbatch_original, es); + ExplainPropertyLong("Peak Memory Usage", spacePeakKb, es); + } + else if (hashtable->nbatch_original != hashtable->nbatch) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, + "Buckets: %d Batches: %d (originally %d) Memory Usage: %ldkB\n", + hashtable->nbuckets, hashtable->nbatch, + hashtable->nbatch_original, spacePeakKb); + } + else + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, + "Buckets: %d Batches: %d Memory Usage: %ldkB\n", + hashtable->nbuckets, hashtable->nbatch, + spacePeakKb); + } + } } /* @@ -1735,28 +1721,28 @@ show_hash_info(HashState *hashstate, ExplainState *es) */ static void show_instrumentation_count(const char *qlabel, int which, - PlanState *planstate, ExplainState *es) + PlanState *planstate, ExplainState *es) { - double nfiltered; - double nloops; - - if (!es->analyze || !planstate->instrument) - return; - - if (which == 2) - nfiltered = planstate->instrument->nfiltered2; - else - nfiltered = planstate->instrument->nfiltered1; - nloops = planstate->instrument->nloops; - - /* In text mode, suppress zero counts; they're not interesting enough */ - if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) - { - if (nloops > 0) - ExplainPropertyFloat(qlabel, nfiltered / nloops, 0, es); - else - ExplainPropertyFloat(qlabel, 0.0, 0, es); - } + double nfiltered; + double nloops; + + if (!es->analyze || !planstate->instrument) + return; + + if (which == 2) + nfiltered = planstate->instrument->nfiltered2; + else + nfiltered = planstate->instrument->nfiltered1; + nloops = planstate->instrument->nloops; + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) + { + if (nloops > 0) + ExplainPropertyFloat(qlabel, nfiltered / nloops, 0, es); + else + ExplainPropertyFloat(qlabel, 0.0, 0, es); + } } /* @@ -1765,10 +1751,10 @@ show_instrumentation_count(const char *qlabel, int which, static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es) { - FdwRoutine *fdwroutine = fsstate->fdwroutine; - - /* Let the FDW emit whatever fields it wants */ - fdwroutine->ExplainForeignScan(fsstate, es); + FdwRoutine *fdwroutine = fsstate->fdwroutine; + + /* Let the FDW emit whatever fields it wants */ + fdwroutine->ExplainForeignScan(fsstate, es); } /* @@ -1780,21 +1766,21 @@ show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es) static const char * explain_get_index_name(Oid indexId) { - const char *result; - - if (explain_get_index_name_hook) - result = (*explain_get_index_name_hook) (indexId); - else - result = NULL; - if (result == NULL) - { - /* default behavior: look in the catalogs and quote it */ - result = get_rel_name(indexId); - if (result == NULL) - elog(ERROR, "cache lookup failed for index %u", indexId); - result = quote_identifier(result); - } - return result; + const char *result; + + if (explain_get_index_name_hook) + result = (*explain_get_index_name_hook) (indexId); + else + result = NULL; + if (result == NULL) + { + /* default behavior: look in the catalogs and quote it */ + result = get_rel_name(indexId); + if (result == NULL) + elog(ERROR, "cache lookup failed for index %u", indexId); + result = quote_identifier(result); + } + return result; } /* @@ -1802,38 +1788,38 @@ explain_get_index_name(Oid indexId) */ static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, - ExplainState *es) + ExplainState *es) { - const char *indexname = explain_get_index_name(indexid); - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - if (ScanDirectionIsBackward(indexorderdir)) - appendStringInfoString(es->str, " Backward"); - appendStringInfo(es->str, " using %s", indexname); - } - else - { - const char *scandir; - - switch (indexorderdir) - { - case BackwardScanDirection: - scandir = "Backward"; - break; - case NoMovementScanDirection: - scandir = "NoMovement"; - break; - case ForwardScanDirection: - scandir = "Forward"; - break; - default: - scandir = "???"; - break; - } - ExplainPropertyText("Scan Direction", scandir, es); - ExplainPropertyText("Index Name", indexname, es); - } + const char *indexname = explain_get_index_name(indexid); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (ScanDirectionIsBackward(indexorderdir)) + appendStringInfoString(es->str, " Backward"); + appendStringInfo(es->str, " using %s", indexname); + } + else + { + const char *scandir; + + switch (indexorderdir) + { + case BackwardScanDirection: + scandir = "Backward"; + break; + case NoMovementScanDirection: + scandir = "NoMovement"; + break; + case ForwardScanDirection: + scandir = "Forward"; + break; + default: + scandir = "???"; + break; + } + ExplainPropertyText("Scan Direction", scandir, es); + ExplainPropertyText("Index Name", indexname, es); + } } /* @@ -1842,7 +1828,7 @@ ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, static void ExplainScanTarget(Scan *plan, ExplainState *es) { - ExplainTargetRel((Plan *) plan, plan->scanrelid, es); + ExplainTargetRel((Plan *) plan, plan->scanrelid, es); } /* @@ -1851,16 +1837,16 @@ ExplainScanTarget(Scan *plan, ExplainState *es) static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es) { - Index rti; - - /* - * We show the name of the first target relation. In multi-target-table - * cases this should always be the parent of the inheritance tree. - */ - Assert(plan->resultRelations != NIL); - rti = linitial_int(plan->resultRelations); - - ExplainTargetRel((Plan *) plan, rti, es); + Index rti; + + /* + * We show the name of the first target relation. In multi-target-table + * cases this should always be the parent of the inheritance tree. + */ + Assert(plan->resultRelations != NIL); + rti = linitial_int(plan->resultRelations); + + ExplainTargetRel((Plan *) plan, rti, es); } /* @@ -1869,99 +1855,99 @@ ExplainModifyTarget(ModifyTable *plan, ExplainState *es) static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) { - char *objectname = NULL; - char *namespace = NULL; - const char *objecttag = NULL; - RangeTblEntry *rte; - - rte = rt_fetch(rti, es->rtable); - - switch (nodeTag(plan)) - { - case T_SeqScan: - case T_IndexScan: - case T_IndexOnlyScan: - case T_BitmapHeapScan: - case T_TidScan: - case T_ForeignScan: - case T_ModifyTable: - /* NEW FOR RECATHON */ - case T_RecScan: - /* Assert it's on a real relation */ - Assert(rte->rtekind == RTE_RELATION); - objectname = get_rel_name(rte->relid); - if (es->verbose) - namespace = get_namespace_name(get_rel_namespace(rte->relid)); - objecttag = "Relation Name"; - break; - case T_FunctionScan: - { - Node *funcexpr; - - /* Assert it's on a RangeFunction */ - Assert(rte->rtekind == RTE_FUNCTION); - - /* - * If the expression is still a function call, we can get the - * real name of the function. Otherwise, punt (this can - * happen if the optimizer simplified away the function call, - * for example). - */ - funcexpr = ((FunctionScan *) plan)->funcexpr; - if (funcexpr && IsA(funcexpr, FuncExpr)) - { - Oid funcid = ((FuncExpr *) funcexpr)->funcid; - - objectname = get_func_name(funcid); - if (es->verbose) - namespace = - get_namespace_name(get_func_namespace(funcid)); - } - objecttag = "Function Name"; - } - break; - case T_ValuesScan: - Assert(rte->rtekind == RTE_VALUES); - break; - case T_CteScan: - /* Assert it's on a non-self-reference CTE */ - Assert(rte->rtekind == RTE_CTE); - Assert(!rte->self_reference); - objectname = rte->ctename; - objecttag = "CTE Name"; - break; - case T_WorkTableScan: - /* Assert it's on a self-reference CTE */ - Assert(rte->rtekind == RTE_CTE); - Assert(rte->self_reference); - objectname = rte->ctename; - objecttag = "CTE Name"; - break; - default: - break; - } - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - appendStringInfoString(es->str, " on"); - if (namespace != NULL) - appendStringInfo(es->str, " %s.%s", quote_identifier(namespace), - quote_identifier(objectname)); - else if (objectname != NULL) - appendStringInfo(es->str, " %s", quote_identifier(objectname)); - if (objectname == NULL || - strcmp(rte->eref->aliasname, objectname) != 0) - appendStringInfo(es->str, " %s", - quote_identifier(rte->eref->aliasname)); - } - else - { - if (objecttag != NULL && objectname != NULL) - ExplainPropertyText(objecttag, objectname, es); - if (namespace != NULL) - ExplainPropertyText("Schema", namespace, es); - ExplainPropertyText("Alias", rte->eref->aliasname, es); - } + char *objectname = NULL; + char *namespace = NULL; + const char *objecttag = NULL; + RangeTblEntry *rte; + + rte = rt_fetch(rti, es->rtable); + + switch (nodeTag(plan)) + { + case T_SeqScan: + case T_IndexScan: + case T_IndexOnlyScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_ForeignScan: + case T_ModifyTable: + /* NEW FOR RECATHON */ + case T_RecScan: + /* Assert it's on a real relation */ + Assert(rte->rtekind == RTE_RELATION); + objectname = get_rel_name(rte->relid); + if (es->verbose) + namespace = get_namespace_name(get_rel_namespace(rte->relid)); + objecttag = "Relation Name"; + break; + case T_FunctionScan: + { + Node *funcexpr; + + /* Assert it's on a RangeFunction */ + Assert(rte->rtekind == RTE_FUNCTION); + + /* + * If the expression is still a function call, we can get the + * real name of the function. Otherwise, punt (this can + * happen if the optimizer simplified away the function call, + * for example). + */ + funcexpr = ((FunctionScan *) plan)->funcexpr; + if (funcexpr && IsA(funcexpr, FuncExpr)) + { + Oid funcid = ((FuncExpr *) funcexpr)->funcid; + + objectname = get_func_name(funcid); + if (es->verbose) + namespace = + get_namespace_name(get_func_namespace(funcid)); + } + objecttag = "Function Name"; + } + break; + case T_ValuesScan: + Assert(rte->rtekind == RTE_VALUES); + break; + case T_CteScan: + /* Assert it's on a non-self-reference CTE */ + Assert(rte->rtekind == RTE_CTE); + Assert(!rte->self_reference); + objectname = rte->ctename; + objecttag = "CTE Name"; + break; + case T_WorkTableScan: + /* Assert it's on a self-reference CTE */ + Assert(rte->rtekind == RTE_CTE); + Assert(rte->self_reference); + objectname = rte->ctename; + objecttag = "CTE Name"; + break; + default: + break; + } + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + appendStringInfoString(es->str, " on"); + if (namespace != NULL) + appendStringInfo(es->str, " %s.%s", quote_identifier(namespace), + quote_identifier(objectname)); + else if (objectname != NULL) + appendStringInfo(es->str, " %s", quote_identifier(objectname)); + if (objectname == NULL || + strcmp(rte->eref->aliasname, objectname) != 0) + appendStringInfo(es->str, " %s", + quote_identifier(rte->eref->aliasname)); + } + else + { + if (objecttag != NULL && objectname != NULL) + ExplainPropertyText(objecttag, objectname, es); + if (namespace != NULL) + ExplainPropertyText("Schema", namespace, es); + ExplainPropertyText("Alias", rte->eref->aliasname, es); + } } /* @@ -1976,14 +1962,14 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) */ static void ExplainMemberNodes(List *plans, PlanState **planstates, - List *ancestors, ExplainState *es) + List *ancestors, ExplainState *es) { - int nplans = list_length(plans); - int j; - - for (j = 0; j < nplans; j++) - ExplainNode(planstates[j], ancestors, - "Member", NULL, es); + int nplans = list_length(plans); + int j; + + for (j = 0; j < nplans; j++) + ExplainNode(planstates[j], ancestors, + "Member", NULL, es); } /* @@ -1994,18 +1980,18 @@ ExplainMemberNodes(List *plans, PlanState **planstates, */ static void ExplainSubPlans(List *plans, List *ancestors, - const char *relationship, ExplainState *es) + const char *relationship, ExplainState *es) { - ListCell *lst; - - foreach(lst, plans) - { - SubPlanState *sps = (SubPlanState *) lfirst(lst); - SubPlan *sp = (SubPlan *) sps->xprstate.expr; - - ExplainNode(sps->planstate, ancestors, - relationship, sp->plan_name, es); - } + ListCell *lst; + + foreach(lst, plans) + { + SubPlanState *sps = (SubPlanState *) lfirst(lst); + SubPlan *sp = (SubPlan *) sps->xprstate.expr; + + ExplainNode(sps->planstate, ancestors, + relationship, sp->plan_name, es); + } } /* @@ -2015,67 +2001,67 @@ ExplainSubPlans(List *plans, List *ancestors, void ExplainPropertyList(const char *qlabel, List *data, ExplainState *es) { - ListCell *lc; - bool first = true; - - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, "%s: ", qlabel); - foreach(lc, data) - { - if (!first) - appendStringInfoString(es->str, ", "); - appendStringInfoString(es->str, (const char *) lfirst(lc)); - first = false; - } - appendStringInfoChar(es->str, '\n'); - break; - - case EXPLAIN_FORMAT_XML: - ExplainXMLTag(qlabel, X_OPENING, es); - foreach(lc, data) - { - char *str; - - appendStringInfoSpaces(es->str, es->indent * 2 + 2); - appendStringInfoString(es->str, ""); - str = escape_xml((const char *) lfirst(lc)); - appendStringInfoString(es->str, str); - pfree(str); - appendStringInfoString(es->str, "\n"); - } - ExplainXMLTag(qlabel, X_CLOSING, es); - break; - - case EXPLAIN_FORMAT_JSON: - ExplainJSONLineEnding(es); - appendStringInfoSpaces(es->str, es->indent * 2); - escape_json(es->str, qlabel); - appendStringInfoString(es->str, ": ["); - foreach(lc, data) - { - if (!first) - appendStringInfoString(es->str, ", "); - escape_json(es->str, (const char *) lfirst(lc)); - first = false; - } - appendStringInfoChar(es->str, ']'); - break; - - case EXPLAIN_FORMAT_YAML: - ExplainYAMLLineStarting(es); - appendStringInfo(es->str, "%s: ", qlabel); - foreach(lc, data) - { - appendStringInfoChar(es->str, '\n'); - appendStringInfoSpaces(es->str, es->indent * 2 + 2); - appendStringInfoString(es->str, "- "); - escape_yaml(es->str, (const char *) lfirst(lc)); - } - break; - } + ListCell *lc; + bool first = true; + + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, "%s: ", qlabel); + foreach(lc, data) + { + if (!first) + appendStringInfoString(es->str, ", "); + appendStringInfoString(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, '\n'); + break; + + case EXPLAIN_FORMAT_XML: + ExplainXMLTag(qlabel, X_OPENING, es); + foreach(lc, data) + { + char *str; + + appendStringInfoSpaces(es->str, es->indent * 2 + 2); + appendStringInfoString(es->str, ""); + str = escape_xml((const char *) lfirst(lc)); + appendStringInfoString(es->str, str); + pfree(str); + appendStringInfoString(es->str, "\n"); + } + ExplainXMLTag(qlabel, X_CLOSING, es); + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, es->indent * 2); + escape_json(es->str, qlabel); + appendStringInfoString(es->str, ": ["); + foreach(lc, data) + { + if (!first) + appendStringInfoString(es->str, ", "); + escape_json(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, ']'); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + appendStringInfo(es->str, "%s: ", qlabel); + foreach(lc, data) + { + appendStringInfoChar(es->str, '\n'); + appendStringInfoSpaces(es->str, es->indent * 2 + 2); + appendStringInfoString(es->str, "- "); + escape_yaml(es->str, (const char *) lfirst(lc)); + } + break; + } } /* @@ -2089,49 +2075,49 @@ ExplainPropertyList(const char *qlabel, List *data, ExplainState *es) */ static void ExplainProperty(const char *qlabel, const char *value, bool numeric, - ExplainState *es) + ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - appendStringInfoSpaces(es->str, es->indent * 2); - appendStringInfo(es->str, "%s: %s\n", qlabel, value); - break; - - case EXPLAIN_FORMAT_XML: - { - char *str; - - appendStringInfoSpaces(es->str, es->indent * 2); - ExplainXMLTag(qlabel, X_OPENING | X_NOWHITESPACE, es); - str = escape_xml(value); - appendStringInfoString(es->str, str); - pfree(str); - ExplainXMLTag(qlabel, X_CLOSING | X_NOWHITESPACE, es); - appendStringInfoChar(es->str, '\n'); - } - break; - - case EXPLAIN_FORMAT_JSON: - ExplainJSONLineEnding(es); - appendStringInfoSpaces(es->str, es->indent * 2); - escape_json(es->str, qlabel); - appendStringInfoString(es->str, ": "); - if (numeric) - appendStringInfoString(es->str, value); - else - escape_json(es->str, value); - break; - - case EXPLAIN_FORMAT_YAML: - ExplainYAMLLineStarting(es); - appendStringInfo(es->str, "%s: ", qlabel); - if (numeric) - appendStringInfoString(es->str, value); - else - escape_yaml(es->str, value); - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, "%s: %s\n", qlabel, value); + break; + + case EXPLAIN_FORMAT_XML: + { + char *str; + + appendStringInfoSpaces(es->str, es->indent * 2); + ExplainXMLTag(qlabel, X_OPENING | X_NOWHITESPACE, es); + str = escape_xml(value); + appendStringInfoString(es->str, str); + pfree(str); + ExplainXMLTag(qlabel, X_CLOSING | X_NOWHITESPACE, es); + appendStringInfoChar(es->str, '\n'); + } + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, es->indent * 2); + escape_json(es->str, qlabel); + appendStringInfoString(es->str, ": "); + if (numeric) + appendStringInfoString(es->str, value); + else + escape_json(es->str, value); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + appendStringInfo(es->str, "%s: ", qlabel); + if (numeric) + appendStringInfoString(es->str, value); + else + escape_yaml(es->str, value); + break; + } } /* @@ -2140,7 +2126,7 @@ ExplainProperty(const char *qlabel, const char *value, bool numeric, void ExplainPropertyText(const char *qlabel, const char *value, ExplainState *es) { - ExplainProperty(qlabel, value, false, es); + ExplainProperty(qlabel, value, false, es); } /* @@ -2149,10 +2135,10 @@ ExplainPropertyText(const char *qlabel, const char *value, ExplainState *es) void ExplainPropertyInteger(const char *qlabel, int value, ExplainState *es) { - char buf[32]; - - snprintf(buf, sizeof(buf), "%d", value); - ExplainProperty(qlabel, buf, true, es); + char buf[32]; + + snprintf(buf, sizeof(buf), "%d", value); + ExplainProperty(qlabel, buf, true, es); } /* @@ -2161,10 +2147,10 @@ ExplainPropertyInteger(const char *qlabel, int value, ExplainState *es) void ExplainPropertyLong(const char *qlabel, long value, ExplainState *es) { - char buf[32]; - - snprintf(buf, sizeof(buf), "%ld", value); - ExplainProperty(qlabel, buf, true, es); + char buf[32]; + + snprintf(buf, sizeof(buf), "%ld", value); + ExplainProperty(qlabel, buf, true, es); } /* @@ -2173,12 +2159,12 @@ ExplainPropertyLong(const char *qlabel, long value, ExplainState *es) */ void ExplainPropertyFloat(const char *qlabel, double value, int ndigits, - ExplainState *es) + ExplainState *es) { - char buf[256]; - - snprintf(buf, sizeof(buf), "%.*f", ndigits, value); - ExplainProperty(qlabel, buf, true, es); + char buf[256]; + + snprintf(buf, sizeof(buf), "%.*f", ndigits, value); + ExplainProperty(qlabel, buf, true, es); } /* @@ -2192,61 +2178,61 @@ ExplainPropertyFloat(const char *qlabel, double value, int ndigits, */ static void ExplainOpenGroup(const char *objtype, const char *labelname, - bool labeled, ExplainState *es) + bool labeled, ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* nothing to do */ - break; - - case EXPLAIN_FORMAT_XML: - ExplainXMLTag(objtype, X_OPENING, es); - es->indent++; - break; - - case EXPLAIN_FORMAT_JSON: - ExplainJSONLineEnding(es); - appendStringInfoSpaces(es->str, 2 * es->indent); - if (labelname) - { - escape_json(es->str, labelname); - appendStringInfoString(es->str, ": "); - } - appendStringInfoChar(es->str, labeled ? '{' : '['); - - /* - * In JSON format, the grouping_stack is an integer list. 0 means - * we've emitted nothing at this grouping level, 1 means we've - * emitted something (and so the next item needs a comma). See - * ExplainJSONLineEnding(). - */ - es->grouping_stack = lcons_int(0, es->grouping_stack); - es->indent++; - break; - - case EXPLAIN_FORMAT_YAML: - - /* - * In YAML format, the grouping stack is an integer list. 0 means - * we've emitted nothing at this grouping level AND this grouping - * level is unlabelled and must be marked with "- ". See - * ExplainYAMLLineStarting(). - */ - ExplainYAMLLineStarting(es); - if (labelname) - { - appendStringInfo(es->str, "%s: ", labelname); - es->grouping_stack = lcons_int(1, es->grouping_stack); - } - else - { - appendStringInfoString(es->str, "- "); - es->grouping_stack = lcons_int(0, es->grouping_stack); - } - es->indent++; - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + ExplainXMLTag(objtype, X_OPENING, es); + es->indent++; + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, 2 * es->indent); + if (labelname) + { + escape_json(es->str, labelname); + appendStringInfoString(es->str, ": "); + } + appendStringInfoChar(es->str, labeled ? '{' : '['); + + /* + * In JSON format, the grouping_stack is an integer list. 0 means + * we've emitted nothing at this grouping level, 1 means we've + * emitted something (and so the next item needs a comma). See + * ExplainJSONLineEnding(). + */ + es->grouping_stack = lcons_int(0, es->grouping_stack); + es->indent++; + break; + + case EXPLAIN_FORMAT_YAML: + + /* + * In YAML format, the grouping stack is an integer list. 0 means + * we've emitted nothing at this grouping level AND this grouping + * level is unlabelled and must be marked with "- ". See + * ExplainYAMLLineStarting(). + */ + ExplainYAMLLineStarting(es); + if (labelname) + { + appendStringInfo(es->str, "%s: ", labelname); + es->grouping_stack = lcons_int(1, es->grouping_stack); + } + else + { + appendStringInfoString(es->str, "- "); + es->grouping_stack = lcons_int(0, es->grouping_stack); + } + es->indent++; + break; + } } /* @@ -2255,32 +2241,32 @@ ExplainOpenGroup(const char *objtype, const char *labelname, */ static void ExplainCloseGroup(const char *objtype, const char *labelname, - bool labeled, ExplainState *es) + bool labeled, ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* nothing to do */ - break; - - case EXPLAIN_FORMAT_XML: - es->indent--; - ExplainXMLTag(objtype, X_CLOSING, es); - break; - - case EXPLAIN_FORMAT_JSON: - es->indent--; - appendStringInfoChar(es->str, '\n'); - appendStringInfoSpaces(es->str, 2 * es->indent); - appendStringInfoChar(es->str, labeled ? '}' : ']'); - es->grouping_stack = list_delete_first(es->grouping_stack); - break; - - case EXPLAIN_FORMAT_YAML: - es->indent--; - es->grouping_stack = list_delete_first(es->grouping_stack); - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + es->indent--; + ExplainXMLTag(objtype, X_CLOSING, es); + break; + + case EXPLAIN_FORMAT_JSON: + es->indent--; + appendStringInfoChar(es->str, '\n'); + appendStringInfoSpaces(es->str, 2 * es->indent); + appendStringInfoChar(es->str, labeled ? '}' : ']'); + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + + case EXPLAIN_FORMAT_YAML: + es->indent--; + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + } } /* @@ -2292,41 +2278,41 @@ ExplainCloseGroup(const char *objtype, const char *labelname, static void ExplainDummyGroup(const char *objtype, const char *labelname, ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* nothing to do */ - break; - - case EXPLAIN_FORMAT_XML: - ExplainXMLTag(objtype, X_CLOSE_IMMEDIATE, es); - break; - - case EXPLAIN_FORMAT_JSON: - ExplainJSONLineEnding(es); - appendStringInfoSpaces(es->str, 2 * es->indent); - if (labelname) - { - escape_json(es->str, labelname); - appendStringInfoString(es->str, ": "); - } - escape_json(es->str, objtype); - break; - - case EXPLAIN_FORMAT_YAML: - ExplainYAMLLineStarting(es); - if (labelname) - { - escape_yaml(es->str, labelname); - appendStringInfoString(es->str, ": "); - } - else - { - appendStringInfoString(es->str, "- "); - } - escape_yaml(es->str, objtype); - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + ExplainXMLTag(objtype, X_CLOSE_IMMEDIATE, es); + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, 2 * es->indent); + if (labelname) + { + escape_json(es->str, labelname); + appendStringInfoString(es->str, ": "); + } + escape_json(es->str, objtype); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + if (labelname) + { + escape_yaml(es->str, labelname); + appendStringInfoString(es->str, ": "); + } + else + { + appendStringInfoString(es->str, "- "); + } + escape_yaml(es->str, objtype); + break; + } } /* @@ -2338,29 +2324,29 @@ ExplainDummyGroup(const char *objtype, const char *labelname, ExplainState *es) void ExplainBeginOutput(ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* nothing to do */ - break; - - case EXPLAIN_FORMAT_XML: - appendStringInfoString(es->str, - "\n"); - es->indent++; - break; - - case EXPLAIN_FORMAT_JSON: - /* top-level structure is an array of plans */ - appendStringInfoChar(es->str, '['); - es->grouping_stack = lcons_int(0, es->grouping_stack); - es->indent++; - break; - - case EXPLAIN_FORMAT_YAML: - es->grouping_stack = lcons_int(0, es->grouping_stack); - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + appendStringInfoString(es->str, + "\n"); + es->indent++; + break; + + case EXPLAIN_FORMAT_JSON: + /* top-level structure is an array of plans */ + appendStringInfoChar(es->str, '['); + es->grouping_stack = lcons_int(0, es->grouping_stack); + es->indent++; + break; + + case EXPLAIN_FORMAT_YAML: + es->grouping_stack = lcons_int(0, es->grouping_stack); + break; + } } /* @@ -2369,27 +2355,27 @@ ExplainBeginOutput(ExplainState *es) void ExplainEndOutput(ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* nothing to do */ - break; - - case EXPLAIN_FORMAT_XML: - es->indent--; - appendStringInfoString(es->str, ""); - break; - - case EXPLAIN_FORMAT_JSON: - es->indent--; - appendStringInfoString(es->str, "\n]"); - es->grouping_stack = list_delete_first(es->grouping_stack); - break; - - case EXPLAIN_FORMAT_YAML: - es->grouping_stack = list_delete_first(es->grouping_stack); - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + es->indent--; + appendStringInfoString(es->str, ""); + break; + + case EXPLAIN_FORMAT_JSON: + es->indent--; + appendStringInfoString(es->str, "\n]"); + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + + case EXPLAIN_FORMAT_YAML: + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + } } /* @@ -2398,19 +2384,19 @@ ExplainEndOutput(ExplainState *es) void ExplainSeparatePlans(ExplainState *es) { - switch (es->format) - { - case EXPLAIN_FORMAT_TEXT: - /* add a blank line */ - appendStringInfoChar(es->str, '\n'); - break; - - case EXPLAIN_FORMAT_XML: - case EXPLAIN_FORMAT_JSON: - case EXPLAIN_FORMAT_YAML: - /* nothing to do */ - break; - } + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* add a blank line */ + appendStringInfoChar(es->str, '\n'); + break; + + case EXPLAIN_FORMAT_XML: + case EXPLAIN_FORMAT_JSON: + case EXPLAIN_FORMAT_YAML: + /* nothing to do */ + break; + } } /* @@ -2426,20 +2412,20 @@ ExplainSeparatePlans(ExplainState *es) static void ExplainXMLTag(const char *tagname, int flags, ExplainState *es) { - const char *s; - - if ((flags & X_NOWHITESPACE) == 0) - appendStringInfoSpaces(es->str, 2 * es->indent); - appendStringInfoCharMacro(es->str, '<'); - if ((flags & X_CLOSING) != 0) - appendStringInfoCharMacro(es->str, '/'); - for (s = tagname; *s; s++) - appendStringInfoCharMacro(es->str, (*s == ' ') ? '-' : *s); - if ((flags & X_CLOSE_IMMEDIATE) != 0) - appendStringInfoString(es->str, " /"); - appendStringInfoCharMacro(es->str, '>'); - if ((flags & X_NOWHITESPACE) == 0) - appendStringInfoCharMacro(es->str, '\n'); + const char *s; + + if ((flags & X_NOWHITESPACE) == 0) + appendStringInfoSpaces(es->str, 2 * es->indent); + appendStringInfoCharMacro(es->str, '<'); + if ((flags & X_CLOSING) != 0) + appendStringInfoCharMacro(es->str, '/'); + for (s = tagname; *s; s++) + appendStringInfoCharMacro(es->str, (*s == ' ') ? '-' : *s); + if ((flags & X_CLOSE_IMMEDIATE) != 0) + appendStringInfoString(es->str, " /"); + appendStringInfoCharMacro(es->str, '>'); + if ((flags & X_NOWHITESPACE) == 0) + appendStringInfoCharMacro(es->str, '\n'); } /* @@ -2452,12 +2438,12 @@ ExplainXMLTag(const char *tagname, int flags, ExplainState *es) static void ExplainJSONLineEnding(ExplainState *es) { - Assert(es->format == EXPLAIN_FORMAT_JSON); - if (linitial_int(es->grouping_stack) != 0) - appendStringInfoChar(es->str, ','); - else - linitial_int(es->grouping_stack) = 1; - appendStringInfoChar(es->str, '\n'); + Assert(es->format == EXPLAIN_FORMAT_JSON); + if (linitial_int(es->grouping_stack) != 0) + appendStringInfoChar(es->str, ','); + else + linitial_int(es->grouping_stack) = 1; + appendStringInfoChar(es->str, '\n'); } /* @@ -2472,16 +2458,16 @@ ExplainJSONLineEnding(ExplainState *es) static void ExplainYAMLLineStarting(ExplainState *es) { - Assert(es->format == EXPLAIN_FORMAT_YAML); - if (linitial_int(es->grouping_stack) == 0) - { - linitial_int(es->grouping_stack) = 1; - } - else - { - appendStringInfoChar(es->str, '\n'); - appendStringInfoSpaces(es->str, es->indent * 2); - } + Assert(es->format == EXPLAIN_FORMAT_YAML); + if (linitial_int(es->grouping_stack) == 0) + { + linitial_int(es->grouping_stack) = 1; + } + else + { + appendStringInfoChar(es->str, '\n'); + appendStringInfoSpaces(es->str, es->indent * 2); + } } /* @@ -2497,5 +2483,5 @@ ExplainYAMLLineStarting(ExplainState *es) static void escape_yaml(StringInfo buf, const char *str) { - escape_json(buf, str); + escape_json(buf, str); } diff --git a/PostgreSQL/src/backend/optimizer/path/allpaths.c b/PostgreSQL/src/backend/optimizer/path/allpaths.c index f022c70..808beaa 100644 --- a/PostgreSQL/src/backend/optimizer/path/allpaths.c +++ b/PostgreSQL/src/backend/optimizer/path/allpaths.c @@ -37,8 +37,6 @@ #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" -//NEW FOR RECDB -#include "utils/recathon.h" /* These parameters are set by GUC */ @@ -52,49 +50,49 @@ join_search_hook_type join_search_hook = NULL; static void set_base_rel_sizes(PlannerInfo *root); static void set_base_rel_pathlists(PlannerInfo *root); static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, - List *live_childrels, - List *all_child_pathkeys); + List *live_childrels, + List *all_child_pathkeys); static List *accumulate_append_subpath(List *subpaths, Path *path); static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + RangeTblEntry *rte); static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, - bool *differentTypes); + bool *differentTypes); static bool recurse_pushdown_safe(Node *setOp, Query *topquery, - bool *differentTypes); + bool *differentTypes); static void compare_tlist_datatypes(List *tlist, List *colTypes, - bool *differentTypes); + bool *differentTypes); static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, - bool *differentTypes); + bool *differentTypes); static void subquery_push_qual(Query *subquery, - RangeTblEntry *rte, Index rti, Node *qual); + RangeTblEntry *rte, Index rti, Node *qual); static void recurse_push_qual(Node *setOp, Query *topquery, - RangeTblEntry *rte, Index rti, Node *qual); + RangeTblEntry *rte, Index rti, Node *qual); /* @@ -105,47 +103,47 @@ static void recurse_push_qual(Node *setOp, Query *topquery, RelOptInfo * make_one_rel(PlannerInfo *root, List *joinlist) { - RelOptInfo *rel; - Index rti; - - /* - * Construct the all_baserels Relids set. - */ - root->all_baserels = NULL; - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *brel = root->simple_rel_array[rti]; - - /* there may be empty slots corresponding to non-baserel RTEs */ - if (brel == NULL) - continue; - - Assert(brel->relid == rti); /* sanity check on array */ - - /* ignore RTEs that are "other rels" */ - if (brel->reloptkind != RELOPT_BASEREL) - continue; - - root->all_baserels = bms_add_member(root->all_baserels, brel->relid); - } - - /* - * Generate access paths for the base rels. - */ - set_base_rel_sizes(root); - set_base_rel_pathlists(root); - - /* - * Generate access paths for the entire join tree. - */ - rel = make_rel_from_joinlist(root, joinlist); - - /* - * The result should join all and only the query's base rels. - */ - Assert(bms_equal(rel->relids, root->all_baserels)); - - return rel; + RelOptInfo *rel; + Index rti; + + /* + * Construct the all_baserels Relids set. + */ + root->all_baserels = NULL; + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (brel == NULL) + continue; + + Assert(brel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind != RELOPT_BASEREL) + continue; + + root->all_baserels = bms_add_member(root->all_baserels, brel->relid); + } + + /* + * Generate access paths for the base rels. + */ + set_base_rel_sizes(root); + set_base_rel_pathlists(root); + + /* + * Generate access paths for the entire join tree. + */ + rel = make_rel_from_joinlist(root, joinlist); + + /* + * The result should join all and only the query's base rels. + */ + Assert(bms_equal(rel->relids, root->all_baserels)); + + return rel; } /* @@ -158,24 +156,24 @@ make_one_rel(PlannerInfo *root, List *joinlist) static void set_base_rel_sizes(PlannerInfo *root) { - Index rti; - - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *rel = root->simple_rel_array[rti]; - - /* there may be empty slots corresponding to non-baserel RTEs */ - if (rel == NULL) - continue; - - Assert(rel->relid == rti); /* sanity check on array */ - - /* ignore RTEs that are "other rels" */ - if (rel->reloptkind != RELOPT_BASEREL) - continue; - - set_rel_size(root, rel, rti, root->simple_rte_array[rti]); - } + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + set_rel_size(root, rel, rti, root->simple_rte_array[rti]); + } } /* @@ -187,24 +185,24 @@ set_base_rel_sizes(PlannerInfo *root) static void set_base_rel_pathlists(PlannerInfo *root) { - Index rti; - - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *rel = root->simple_rel_array[rti]; - - /* there may be empty slots corresponding to non-baserel RTEs */ - if (rel == NULL) - continue; - - Assert(rel->relid == rti); /* sanity check on array */ - - /* ignore RTEs that are "other rels" */ - if (rel->reloptkind != RELOPT_BASEREL) - continue; - - set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]); - } + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]); + } } /* @@ -213,75 +211,75 @@ set_base_rel_pathlists(PlannerInfo *root) */ static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) + Index rti, RangeTblEntry *rte) { - if (rel->reloptkind == RELOPT_BASEREL && - relation_excluded_by_constraints(root, rel, rte)) - { - /* - * We proved we don't need to scan the rel via constraint exclusion, - * so set up a single dummy path for it. Here we only check this for - * regular baserels; if it's an otherrel, CE was already checked in - * set_append_rel_pathlist(). - * - * In this case, we go ahead and set up the relation's path right away - * instead of leaving it for set_rel_pathlist to do. This is because - * we don't have a convention for marking a rel as dummy except by - * assigning a dummy path to it. - */ - set_dummy_rel_pathlist(rel); - } - else if (rte->inh) - { - /* It's an "append relation", process accordingly */ - set_append_rel_size(root, rel, rti, rte); - } - else - { - switch (rel->rtekind) - { - case RTE_RELATION: - if (rte->relkind == RELKIND_FOREIGN_TABLE) - { - /* Foreign table */ - set_foreign_size(root, rel, rte); - } - else - { - /* Plain relation */ - set_plain_rel_size(root, rel, rte); - } - break; - case RTE_SUBQUERY: - - /* - * Subqueries don't support parameterized paths, so just go - * ahead and build their paths immediately. - */ - set_subquery_pathlist(root, rel, rti, rte); - break; - case RTE_FUNCTION: - set_function_size_estimates(root, rel); - break; - case RTE_VALUES: - set_values_size_estimates(root, rel); - break; - case RTE_CTE: - - /* - * CTEs don't support parameterized paths, so just go ahead - * and build their paths immediately. - */ - if (rte->self_reference) - set_worktable_pathlist(root, rel, rte); - else - set_cte_pathlist(root, rel, rte); - break; - default: - elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); - break; - } - } + if (rel->reloptkind == RELOPT_BASEREL && + relation_excluded_by_constraints(root, rel, rte)) + { + /* + * We proved we don't need to scan the rel via constraint exclusion, + * so set up a single dummy path for it. Here we only check this for + * regular baserels; if it's an otherrel, CE was already checked in + * set_append_rel_pathlist(). + * + * In this case, we go ahead and set up the relation's path right away + * instead of leaving it for set_rel_pathlist to do. This is because + * we don't have a convention for marking a rel as dummy except by + * assigning a dummy path to it. + */ + set_dummy_rel_pathlist(rel); + } + else if (rte->inh) + { + /* It's an "append relation", process accordingly */ + set_append_rel_size(root, rel, rti, rte); + } + else + { + switch (rel->rtekind) + { + case RTE_RELATION: + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Foreign table */ + set_foreign_size(root, rel, rte); + } + else + { + /* Plain relation */ + set_plain_rel_size(root, rel, rte); + } + break; + case RTE_SUBQUERY: + + /* + * Subqueries don't support parameterized paths, so just go + * ahead and build their paths immediately. + */ + set_subquery_pathlist(root, rel, rti, rte); + break; + case RTE_FUNCTION: + set_function_size_estimates(root, rel); + break; + case RTE_VALUES: + set_values_size_estimates(root, rel); + break; + case RTE_CTE: + + /* + * CTEs don't support parameterized paths, so just go ahead + * and build their paths immediately. + */ + if (rte->self_reference) + set_worktable_pathlist(root, rel, rte); + else + set_cte_pathlist(root, rel, rte); + break; + default: + elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); + break; + } + } } /* @@ -290,55 +288,55 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, */ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) + Index rti, RangeTblEntry *rte) { - if (IS_DUMMY_REL(rel)) - { - /* We already proved the relation empty, so nothing more to do */ - } - else if (rte->inh) - { - /* It's an "append relation", process accordingly */ - set_append_rel_pathlist(root, rel, rti, rte); - } - else - { - switch (rel->rtekind) - { - case RTE_RELATION: - if (rte->relkind == RELKIND_FOREIGN_TABLE) - { - /* Foreign table */ - set_foreign_pathlist(root, rel, rte); - } - else - { - /* Plain relation */ - set_plain_rel_pathlist(root, rel, rte); - } - break; - case RTE_SUBQUERY: - /* Subquery --- fully handled during set_rel_size */ - break; - case RTE_FUNCTION: - /* RangeFunction */ - set_function_pathlist(root, rel, rte); - break; - case RTE_VALUES: - /* Values list */ - set_values_pathlist(root, rel, rte); - break; - case RTE_CTE: - /* CTE reference --- fully handled during set_rel_size */ - break; - default: - elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); - break; - } - } - + if (IS_DUMMY_REL(rel)) + { + /* We already proved the relation empty, so nothing more to do */ + } + else if (rte->inh) + { + /* It's an "append relation", process accordingly */ + set_append_rel_pathlist(root, rel, rti, rte); + } + else + { + switch (rel->rtekind) + { + case RTE_RELATION: + if (rte->relkind == RELKIND_FOREIGN_TABLE) + { + /* Foreign table */ + set_foreign_pathlist(root, rel, rte); + } + else + { + /* Plain relation */ + set_plain_rel_pathlist(root, rel, rte); + } + break; + case RTE_SUBQUERY: + /* Subquery --- fully handled during set_rel_size */ + break; + case RTE_FUNCTION: + /* RangeFunction */ + set_function_pathlist(root, rel, rte); + break; + case RTE_VALUES: + /* Values list */ + set_values_pathlist(root, rel, rte); + break; + case RTE_CTE: + /* CTE reference --- fully handled during set_rel_size */ + break; + default: + elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); + break; + } + } + #ifdef OPTIMIZER_DEBUG - debug_print_rel(root, rel); + debug_print_rel(root, rel); #endif } @@ -349,25 +347,25 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - /* - * Test any partial indexes of rel for applicability. We must do this - * first since partial unique indexes can affect size estimates. - */ - check_partial_indexes(root, rel); - - /* Mark rel with estimated output rows, width, etc */ - set_baserel_size_estimates(root, rel); - - /* - * Check to see if we can extract any restriction conditions from join - * quals that are OR-of-AND structures. If so, add them to the rel's - * restriction list, and redo the above steps. - */ - if (create_or_index_quals(root, rel)) - { - check_partial_indexes(root, rel); - set_baserel_size_estimates(root, rel); - } + /* + * Test any partial indexes of rel for applicability. We must do this + * first since partial unique indexes can affect size estimates. + */ + check_partial_indexes(root, rel); + + /* Mark rel with estimated output rows, width, etc */ + set_baserel_size_estimates(root, rel); + + /* + * Check to see if we can extract any restriction conditions from join + * quals that are OR-of-AND structures. If so, add them to the rel's + * restriction list, and redo the above steps. + */ + if (create_or_index_quals(root, rel)) + { + check_partial_indexes(root, rel); + set_baserel_size_estimates(root, rel); + } } /* @@ -378,45 +376,45 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - Path *seqscan_path; - - /* Consider sequential scan */ - seqscan_path = create_seqscan_path(root, rel, NULL); - - /* At this point, we check to see if we're dealing with a RECOMMEND - * query using FilterRecommend or JoinRecommend. If we are, we don't - * need to create any other paths at all, as SeqScan is required. - * Note that we also do this if this is the items table that is being - * used in a RecJoin. */ - if (rel->recommender) { - RecommendInfo *recInfo = (RecommendInfo*) rel->recommender; - if (recInfo->opType != OP_INDEX) { - /* A new type, to make our lives easier. Only do this - * if it's not OP_JOINPARTNER though. */ - if (recInfo->opType != OP_JOINPARTNER) - seqscan_path->pathtype = T_RecScan; - - rel->cheapest_startup_path = seqscan_path; - rel->cheapest_total_path = seqscan_path; - rel->cheapest_unique_path = NULL; - rel->cheapest_parameterized_paths = list_make1(seqscan_path); - - add_path(rel, seqscan_path); - - return; - } - } - - add_path(rel, seqscan_path); - - /* Consider index scans */ - create_index_paths(root, rel); - - /* Consider TID scans */ - create_tidscan_paths(root, rel); - - /* Now find the cheapest of the paths for this rel */ - set_cheapest(rel); + Path *seqscan_path; + + /* Consider sequential scan */ + seqscan_path = create_seqscan_path(root, rel, NULL); + + /* At this point, we check to see if we're dealing with a RECOMMEND + * query using FilterRecommend or JoinRecommend. If we are, we don't + * need to create any other paths at all, as SeqScan is required. + * Note that we also do this if this is the items table that is being + * used in a RecJoin. */ + if (rel->recommender) { + RecommendInfo *recInfo = (RecommendInfo*) rel->recommender; + if (recInfo->opType != OP_INDEX) { + /* A new type, to make our lives easier. Only do this + * if it's not OP_JOINPARTNER though. */ + if (recInfo->opType != OP_JOINPARTNER) + seqscan_path->pathtype = T_RecScan; + + rel->cheapest_startup_path = seqscan_path; + rel->cheapest_total_path = seqscan_path; + rel->cheapest_unique_path = NULL; + rel->cheapest_parameterized_paths = list_make1(seqscan_path); + + add_path(rel, seqscan_path); + + return; + } + } + + add_path(rel, seqscan_path); + + /* Consider index scans */ + create_index_paths(root, rel); + + /* Consider TID scans */ + create_tidscan_paths(root, rel); + + /* Now find the cheapest of the paths for this rel */ + set_cheapest(rel); } /* @@ -426,14 +424,14 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - /* Mark rel with estimated output rows, width, etc */ - set_foreign_size_estimates(root, rel); - - /* Get FDW routine pointers for the rel */ - rel->fdwroutine = GetFdwRoutineByRelId(rte->relid); - - /* Let FDW adjust the size estimates, if it can */ - rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid); + /* Mark rel with estimated output rows, width, etc */ + set_foreign_size_estimates(root, rel); + + /* Get FDW routine pointers for the rel */ + rel->fdwroutine = GetFdwRoutineByRelId(rte->relid); + + /* Let FDW adjust the size estimates, if it can */ + rel->fdwroutine->GetForeignRelSize(root, rel, rte->relid); } /* @@ -443,11 +441,11 @@ set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - /* Call the FDW's GetForeignPaths function to generate path(s) */ - rel->fdwroutine->GetForeignPaths(root, rel, rte->relid); - - /* Select cheapest path */ - set_cheapest(rel); + /* Call the FDW's GetForeignPaths function to generate path(s) */ + rel->fdwroutine->GetForeignPaths(root, rel, rte->relid); + + /* Select cheapest path */ + set_cheapest(rel); } /* @@ -463,221 +461,221 @@ set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) */ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) + Index rti, RangeTblEntry *rte) { - int parentRTindex = rti; - double parent_rows; - double parent_size; - double *parent_attrsizes; - int nattrs; - ListCell *l; - - /* - * Initialize to compute size estimates for whole append relation. - * - * We handle width estimates by weighting the widths of different child - * rels proportionally to their number of rows. This is sensible because - * the use of width estimates is mainly to compute the total relation - * "footprint" if we have to sort or hash it. To do this, we sum the - * total equivalent size (in "double" arithmetic) and then divide by the - * total rowcount estimate. This is done separately for the total rel - * width and each attribute. - * - * Note: if you consider changing this logic, beware that child rels could - * have zero rows and/or width, if they were excluded by constraints. - */ - parent_rows = 0; - parent_size = 0; - nattrs = rel->max_attr - rel->min_attr + 1; - parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); - - foreach(l, root->append_rel_list) - { - AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); - int childRTindex; - RangeTblEntry *childRTE; - RelOptInfo *childrel; - List *childquals; - Node *childqual; - ListCell *parentvars; - ListCell *childvars; - - /* append_rel_list contains all append rels; ignore others */ - if (appinfo->parent_relid != parentRTindex) - continue; - - childRTindex = appinfo->child_relid; - childRTE = root->simple_rte_array[childRTindex]; - - /* - * The child rel's RelOptInfo was already created during - * add_base_rels_to_query. - */ - childrel = find_base_rel(root, childRTindex); - Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); - - /* - * We have to copy the parent's targetlist and quals to the child, - * with appropriate substitution of variables. However, only the - * baserestrictinfo quals are needed before we can check for - * constraint exclusion; so do that first and then check to see if we - * can disregard this child. - * - * As of 8.4, the child rel's targetlist might contain non-Var - * expressions, which means that substitution into the quals could - * produce opportunities for const-simplification, and perhaps even - * pseudoconstant quals. To deal with this, we strip the RestrictInfo - * nodes, do the substitution, do const-simplification, and then - * reconstitute the RestrictInfo layer. - */ - childquals = get_all_actual_clauses(rel->baserestrictinfo); - childquals = (List *) adjust_appendrel_attrs(root, - (Node *) childquals, - appinfo); - childqual = eval_const_expressions(root, (Node *) - make_ands_explicit(childquals)); - if (childqual && IsA(childqual, Const) && - (((Const *) childqual)->constisnull || - !DatumGetBool(((Const *) childqual)->constvalue))) - { - /* - * Restriction reduces to constant FALSE or constant NULL after - * substitution, so this child need not be scanned. - */ - set_dummy_rel_pathlist(childrel); - continue; - } - childquals = make_ands_implicit((Expr *) childqual); - childquals = make_restrictinfos_from_actual_clauses(root, - childquals); - childrel->baserestrictinfo = childquals; - - if (relation_excluded_by_constraints(root, childrel, childRTE)) - { - /* - * This child need not be scanned, so we can omit it from the - * appendrel. - */ - set_dummy_rel_pathlist(childrel); - continue; - } - - /* - * CE failed, so finish copying/modifying targetlist and join quals. - * - * Note: the resulting childrel->reltargetlist may contain arbitrary - * expressions, which normally would not occur in a reltargetlist. - * That is okay because nothing outside of this routine will look at - * the child rel's reltargetlist. We do have to cope with the case - * while constructing attr_widths estimates below, though. - */ - childrel->joininfo = (List *) - adjust_appendrel_attrs(root, - (Node *) rel->joininfo, - appinfo); - childrel->reltargetlist = (List *) - adjust_appendrel_attrs(root, - (Node *) rel->reltargetlist, - appinfo); - - /* - * We have to make child entries in the EquivalenceClass data - * structures as well. This is needed either if the parent - * participates in some eclass joins (because we will want to consider - * inner-indexscan joins on the individual children) or if the parent - * has useful pathkeys (because we should try to build MergeAppend - * paths that produce those sort orderings). - */ - if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) - add_child_rel_equivalences(root, appinfo, rel, childrel); - childrel->has_eclass_joins = rel->has_eclass_joins; - - /* - * Note: we could compute appropriate attr_needed data for the child's - * variables, by transforming the parent's attr_needed through the - * translated_vars mapping. However, currently there's no need - * because attr_needed is only examined for base relations not - * otherrels. So we just leave the child's attr_needed empty. - */ - - /* - * Compute the child's size. - */ - set_rel_size(root, childrel, childRTindex, childRTE); - - /* - * It is possible that constraint exclusion detected a contradiction - * within a child subquery, even though we didn't prove one above. If - * so, we can skip this child. - */ - if (IS_DUMMY_REL(childrel)) - continue; - - /* - * Accumulate size information from each live child. - */ - if (childrel->rows > 0) - { - parent_rows += childrel->rows; - parent_size += childrel->width * childrel->rows; - - /* - * Accumulate per-column estimates too. We need not do anything - * for PlaceHolderVars in the parent list. If child expression - * isn't a Var, or we didn't record a width estimate for it, we - * have to fall back on a datatype-based estimate. - * - * By construction, child's reltargetlist is 1-to-1 with parent's. - */ - forboth(parentvars, rel->reltargetlist, - childvars, childrel->reltargetlist) - { - Var *parentvar = (Var *) lfirst(parentvars); - Node *childvar = (Node *) lfirst(childvars); - - if (IsA(parentvar, Var)) - { - int pndx = parentvar->varattno - rel->min_attr; - int32 child_width = 0; - - if (IsA(childvar, Var)) - { - int cndx = ((Var *) childvar)->varattno - childrel->min_attr; - - child_width = childrel->attr_widths[cndx]; - } - if (child_width <= 0) - child_width = get_typavgwidth(exprType(childvar), - exprTypmod(childvar)); - Assert(child_width > 0); - parent_attrsizes[pndx] += child_width * childrel->rows; - } - } - } - } - - /* - * Save the finished size estimates. - */ - rel->rows = parent_rows; - if (parent_rows > 0) - { - int i; - - rel->width = rint(parent_size / parent_rows); - for (i = 0; i < nattrs; i++) - rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows); - } - else - rel->width = 0; /* attr_widths should be zero already */ - - /* - * Set "raw tuples" count equal to "rows" for the appendrel; needed - * because some places assume rel->tuples is valid for any baserel. - */ - rel->tuples = parent_rows; - - pfree(parent_attrsizes); + int parentRTindex = rti; + double parent_rows; + double parent_size; + double *parent_attrsizes; + int nattrs; + ListCell *l; + + /* + * Initialize to compute size estimates for whole append relation. + * + * We handle width estimates by weighting the widths of different child + * rels proportionally to their number of rows. This is sensible because + * the use of width estimates is mainly to compute the total relation + * "footprint" if we have to sort or hash it. To do this, we sum the + * total equivalent size (in "double" arithmetic) and then divide by the + * total rowcount estimate. This is done separately for the total rel + * width and each attribute. + * + * Note: if you consider changing this logic, beware that child rels could + * have zero rows and/or width, if they were excluded by constraints. + */ + parent_rows = 0; + parent_size = 0; + nattrs = rel->max_attr - rel->min_attr + 1; + parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); + + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + int childRTindex; + RangeTblEntry *childRTE; + RelOptInfo *childrel; + List *childquals; + Node *childqual; + ListCell *parentvars; + ListCell *childvars; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + childRTindex = appinfo->child_relid; + childRTE = root->simple_rte_array[childRTindex]; + + /* + * The child rel's RelOptInfo was already created during + * add_base_rels_to_query. + */ + childrel = find_base_rel(root, childRTindex); + Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); + + /* + * We have to copy the parent's targetlist and quals to the child, + * with appropriate substitution of variables. However, only the + * baserestrictinfo quals are needed before we can check for + * constraint exclusion; so do that first and then check to see if we + * can disregard this child. + * + * As of 8.4, the child rel's targetlist might contain non-Var + * expressions, which means that substitution into the quals could + * produce opportunities for const-simplification, and perhaps even + * pseudoconstant quals. To deal with this, we strip the RestrictInfo + * nodes, do the substitution, do const-simplification, and then + * reconstitute the RestrictInfo layer. + */ + childquals = get_all_actual_clauses(rel->baserestrictinfo); + childquals = (List *) adjust_appendrel_attrs(root, + (Node *) childquals, + appinfo); + childqual = eval_const_expressions(root, (Node *) + make_ands_explicit(childquals)); + if (childqual && IsA(childqual, Const) && + (((Const *) childqual)->constisnull || + !DatumGetBool(((Const *) childqual)->constvalue))) + { + /* + * Restriction reduces to constant FALSE or constant NULL after + * substitution, so this child need not be scanned. + */ + set_dummy_rel_pathlist(childrel); + continue; + } + childquals = make_ands_implicit((Expr *) childqual); + childquals = make_restrictinfos_from_actual_clauses(root, + childquals); + childrel->baserestrictinfo = childquals; + + if (relation_excluded_by_constraints(root, childrel, childRTE)) + { + /* + * This child need not be scanned, so we can omit it from the + * appendrel. + */ + set_dummy_rel_pathlist(childrel); + continue; + } + + /* + * CE failed, so finish copying/modifying targetlist and join quals. + * + * Note: the resulting childrel->reltargetlist may contain arbitrary + * expressions, which normally would not occur in a reltargetlist. + * That is okay because nothing outside of this routine will look at + * the child rel's reltargetlist. We do have to cope with the case + * while constructing attr_widths estimates below, though. + */ + childrel->joininfo = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->joininfo, + appinfo); + childrel->reltargetlist = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->reltargetlist, + appinfo); + + /* + * We have to make child entries in the EquivalenceClass data + * structures as well. This is needed either if the parent + * participates in some eclass joins (because we will want to consider + * inner-indexscan joins on the individual children) or if the parent + * has useful pathkeys (because we should try to build MergeAppend + * paths that produce those sort orderings). + */ + if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) + add_child_rel_equivalences(root, appinfo, rel, childrel); + childrel->has_eclass_joins = rel->has_eclass_joins; + + /* + * Note: we could compute appropriate attr_needed data for the child's + * variables, by transforming the parent's attr_needed through the + * translated_vars mapping. However, currently there's no need + * because attr_needed is only examined for base relations not + * otherrels. So we just leave the child's attr_needed empty. + */ + + /* + * Compute the child's size. + */ + set_rel_size(root, childrel, childRTindex, childRTE); + + /* + * It is possible that constraint exclusion detected a contradiction + * within a child subquery, even though we didn't prove one above. If + * so, we can skip this child. + */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* + * Accumulate size information from each live child. + */ + if (childrel->rows > 0) + { + parent_rows += childrel->rows; + parent_size += childrel->width * childrel->rows; + + /* + * Accumulate per-column estimates too. We need not do anything + * for PlaceHolderVars in the parent list. If child expression + * isn't a Var, or we didn't record a width estimate for it, we + * have to fall back on a datatype-based estimate. + * + * By construction, child's reltargetlist is 1-to-1 with parent's. + */ + forboth(parentvars, rel->reltargetlist, + childvars, childrel->reltargetlist) + { + Var *parentvar = (Var *) lfirst(parentvars); + Node *childvar = (Node *) lfirst(childvars); + + if (IsA(parentvar, Var)) + { + int pndx = parentvar->varattno - rel->min_attr; + int32 child_width = 0; + + if (IsA(childvar, Var)) + { + int cndx = ((Var *) childvar)->varattno - childrel->min_attr; + + child_width = childrel->attr_widths[cndx]; + } + if (child_width <= 0) + child_width = get_typavgwidth(exprType(childvar), + exprTypmod(childvar)); + Assert(child_width > 0); + parent_attrsizes[pndx] += child_width * childrel->rows; + } + } + } + } + + /* + * Save the finished size estimates. + */ + rel->rows = parent_rows; + if (parent_rows > 0) + { + int i; + + rel->width = rint(parent_size / parent_rows); + for (i = 0; i < nattrs; i++) + rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows); + } + else + rel->width = 0; /* attr_widths should be zero already */ + + /* + * Set "raw tuples" count equal to "rows" for the appendrel; needed + * because some places assume rel->tuples is valid for any baserel. + */ + rel->tuples = parent_rows; + + pfree(parent_attrsizes); } /* @@ -686,192 +684,192 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, */ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) + Index rti, RangeTblEntry *rte) { - int parentRTindex = rti; - List *live_childrels = NIL; - List *subpaths = NIL; - List *all_child_pathkeys = NIL; - List *all_child_outers = NIL; - ListCell *l; - - /* - * Generate access paths for each member relation, and remember the - * cheapest path for each one. Also, identify all pathkeys (orderings) - * and parameterizations (required_outer sets) available for the member - * relations. - */ - foreach(l, root->append_rel_list) - { - AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); - int childRTindex; - RangeTblEntry *childRTE; - RelOptInfo *childrel; - ListCell *lcp; - - /* append_rel_list contains all append rels; ignore others */ - if (appinfo->parent_relid != parentRTindex) - continue; - - /* Re-locate the child RTE and RelOptInfo */ - childRTindex = appinfo->child_relid; - childRTE = root->simple_rte_array[childRTindex]; - childrel = root->simple_rel_array[childRTindex]; - - /* - * Compute the child's access paths. - */ - set_rel_pathlist(root, childrel, childRTindex, childRTE); - - /* - * If child is dummy, ignore it. - */ - if (IS_DUMMY_REL(childrel)) - continue; - - /* - * Child is live, so add its cheapest access path to the Append path - * we are constructing for the parent. - */ - subpaths = accumulate_append_subpath(subpaths, - childrel->cheapest_total_path); - - /* Remember which childrels are live, for logic below */ - live_childrels = lappend(live_childrels, childrel); - - /* - * Collect lists of all the available path orderings and - * parameterizations for all the children. We use these as a - * heuristic to indicate which sort orderings and parameterizations we - * should build Append and MergeAppend paths for. - */ - foreach(lcp, childrel->pathlist) - { - Path *childpath = (Path *) lfirst(lcp); - List *childkeys = childpath->pathkeys; - Relids childouter = PATH_REQ_OUTER(childpath); - - /* Unsorted paths don't contribute to pathkey list */ - if (childkeys != NIL) - { - ListCell *lpk; - bool found = false; - - /* Have we already seen this ordering? */ - foreach(lpk, all_child_pathkeys) - { - List *existing_pathkeys = (List *) lfirst(lpk); - - if (compare_pathkeys(existing_pathkeys, - childkeys) == PATHKEYS_EQUAL) - { - found = true; - break; - } - } - if (!found) - { - /* No, so add it to all_child_pathkeys */ - all_child_pathkeys = lappend(all_child_pathkeys, - childkeys); - } - } - - /* Unparameterized paths don't contribute to param-set list */ - if (childouter) - { - ListCell *lco; - bool found = false; - - /* Have we already seen this param set? */ - foreach(lco, all_child_outers) - { - Relids existing_outers = (Relids) lfirst(lco); - - if (bms_equal(existing_outers, childouter)) - { - found = true; - break; - } - } - if (!found) - { - /* No, so add it to all_child_outers */ - all_child_outers = lappend(all_child_outers, - childouter); - } - } - } - } - - /* - * Next, build an unordered, unparameterized Append path for the rel. - * (Note: this is correct even if we have zero or one live subpath due to - * constraint exclusion.) - */ - add_path(rel, (Path *) create_append_path(rel, subpaths, NULL)); - - /* - * Build unparameterized MergeAppend paths based on the collected list of - * child pathkeys. - */ - generate_mergeappend_paths(root, rel, live_childrels, all_child_pathkeys); - - /* - * Build Append paths for each parameterization seen among the child rels. - * (This may look pretty expensive, but in most cases of practical - * interest, the child rels will expose mostly the same parameterizations, - * so that not that many cases actually get considered here.) - * - * The Append node itself cannot enforce quals, so all qual checking must - * be done in the child paths. This means that to have a parameterized - * Append path, we must have the exact same parameterization for each - * child path; otherwise some children might be failing to check the - * moved-down quals. To make them match up, we can try to increase the - * parameterization of lesser-parameterized paths. - */ - foreach(l, all_child_outers) - { - Relids required_outer = (Relids) lfirst(l); - bool ok = true; - ListCell *lcr; - - /* Select the child paths for an Append with this parameterization */ - subpaths = NIL; - foreach(lcr, live_childrels) - { - RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); - Path *cheapest_total; - - cheapest_total = - get_cheapest_path_for_pathkeys(childrel->pathlist, - NIL, - required_outer, - TOTAL_COST); - Assert(cheapest_total != NULL); - - /* Children must have exactly the desired parameterization */ - if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer)) - { - cheapest_total = reparameterize_path(root, cheapest_total, - required_outer, 1.0); - if (cheapest_total == NULL) - { - ok = false; - break; - } - } - - subpaths = accumulate_append_subpath(subpaths, cheapest_total); - } - - if (ok) - add_path(rel, (Path *) - create_append_path(rel, subpaths, required_outer)); - } - - /* Select cheapest paths */ - set_cheapest(rel); + int parentRTindex = rti; + List *live_childrels = NIL; + List *subpaths = NIL; + List *all_child_pathkeys = NIL; + List *all_child_outers = NIL; + ListCell *l; + + /* + * Generate access paths for each member relation, and remember the + * cheapest path for each one. Also, identify all pathkeys (orderings) + * and parameterizations (required_outer sets) available for the member + * relations. + */ + foreach(l, root->append_rel_list) + { + AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + int childRTindex; + RangeTblEntry *childRTE; + RelOptInfo *childrel; + ListCell *lcp; + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid != parentRTindex) + continue; + + /* Re-locate the child RTE and RelOptInfo */ + childRTindex = appinfo->child_relid; + childRTE = root->simple_rte_array[childRTindex]; + childrel = root->simple_rel_array[childRTindex]; + + /* + * Compute the child's access paths. + */ + set_rel_pathlist(root, childrel, childRTindex, childRTE); + + /* + * If child is dummy, ignore it. + */ + if (IS_DUMMY_REL(childrel)) + continue; + + /* + * Child is live, so add its cheapest access path to the Append path + * we are constructing for the parent. + */ + subpaths = accumulate_append_subpath(subpaths, + childrel->cheapest_total_path); + + /* Remember which childrels are live, for logic below */ + live_childrels = lappend(live_childrels, childrel); + + /* + * Collect lists of all the available path orderings and + * parameterizations for all the children. We use these as a + * heuristic to indicate which sort orderings and parameterizations we + * should build Append and MergeAppend paths for. + */ + foreach(lcp, childrel->pathlist) + { + Path *childpath = (Path *) lfirst(lcp); + List *childkeys = childpath->pathkeys; + Relids childouter = PATH_REQ_OUTER(childpath); + + /* Unsorted paths don't contribute to pathkey list */ + if (childkeys != NIL) + { + ListCell *lpk; + bool found = false; + + /* Have we already seen this ordering? */ + foreach(lpk, all_child_pathkeys) + { + List *existing_pathkeys = (List *) lfirst(lpk); + + if (compare_pathkeys(existing_pathkeys, + childkeys) == PATHKEYS_EQUAL) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_pathkeys */ + all_child_pathkeys = lappend(all_child_pathkeys, + childkeys); + } + } + + /* Unparameterized paths don't contribute to param-set list */ + if (childouter) + { + ListCell *lco; + bool found = false; + + /* Have we already seen this param set? */ + foreach(lco, all_child_outers) + { + Relids existing_outers = (Relids) lfirst(lco); + + if (bms_equal(existing_outers, childouter)) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_outers */ + all_child_outers = lappend(all_child_outers, + childouter); + } + } + } + } + + /* + * Next, build an unordered, unparameterized Append path for the rel. + * (Note: this is correct even if we have zero or one live subpath due to + * constraint exclusion.) + */ + add_path(rel, (Path *) create_append_path(rel, subpaths, NULL)); + + /* + * Build unparameterized MergeAppend paths based on the collected list of + * child pathkeys. + */ + generate_mergeappend_paths(root, rel, live_childrels, all_child_pathkeys); + + /* + * Build Append paths for each parameterization seen among the child rels. + * (This may look pretty expensive, but in most cases of practical + * interest, the child rels will expose mostly the same parameterizations, + * so that not that many cases actually get considered here.) + * + * The Append node itself cannot enforce quals, so all qual checking must + * be done in the child paths. This means that to have a parameterized + * Append path, we must have the exact same parameterization for each + * child path; otherwise some children might be failing to check the + * moved-down quals. To make them match up, we can try to increase the + * parameterization of lesser-parameterized paths. + */ + foreach(l, all_child_outers) + { + Relids required_outer = (Relids) lfirst(l); + bool ok = true; + ListCell *lcr; + + /* Select the child paths for an Append with this parameterization */ + subpaths = NIL; + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *cheapest_total; + + cheapest_total = + get_cheapest_path_for_pathkeys(childrel->pathlist, + NIL, + required_outer, + TOTAL_COST); + Assert(cheapest_total != NULL); + + /* Children must have exactly the desired parameterization */ + if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer)) + { + cheapest_total = reparameterize_path(root, cheapest_total, + required_outer, 1.0); + if (cheapest_total == NULL) + { + ok = false; + break; + } + } + + subpaths = accumulate_append_subpath(subpaths, cheapest_total); + } + + if (ok) + add_path(rel, (Path *) + create_append_path(rel, subpaths, required_outer)); + } + + /* Select cheapest paths */ + set_cheapest(rel); } /* @@ -899,76 +897,76 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, */ static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, - List *live_childrels, - List *all_child_pathkeys) + List *live_childrels, + List *all_child_pathkeys) { - ListCell *lcp; - - foreach(lcp, all_child_pathkeys) - { - List *pathkeys = (List *) lfirst(lcp); - List *startup_subpaths = NIL; - List *total_subpaths = NIL; - bool startup_neq_total = false; - ListCell *lcr; - - /* Select the child paths for this ordering... */ - foreach(lcr, live_childrels) - { - RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); - Path *cheapest_startup, - *cheapest_total; - - /* Locate the right paths, if they are available. */ - cheapest_startup = - get_cheapest_path_for_pathkeys(childrel->pathlist, - pathkeys, - NULL, - STARTUP_COST); - cheapest_total = - get_cheapest_path_for_pathkeys(childrel->pathlist, - pathkeys, - NULL, - TOTAL_COST); - - /* - * If we can't find any paths with the right order just use the - * cheapest-total path; we'll have to sort it later. - */ - if (cheapest_startup == NULL || cheapest_total == NULL) - { - cheapest_startup = cheapest_total = - childrel->cheapest_total_path; - Assert(cheapest_total != NULL); - } - - /* - * Notice whether we actually have different paths for the - * "cheapest" and "total" cases; frequently there will be no point - * in two create_merge_append_path() calls. - */ - if (cheapest_startup != cheapest_total) - startup_neq_total = true; - - startup_subpaths = - accumulate_append_subpath(startup_subpaths, cheapest_startup); - total_subpaths = - accumulate_append_subpath(total_subpaths, cheapest_total); - } - - /* ... and build the MergeAppend paths */ - add_path(rel, (Path *) create_merge_append_path(root, - rel, - startup_subpaths, - pathkeys, - NULL)); - if (startup_neq_total) - add_path(rel, (Path *) create_merge_append_path(root, - rel, - total_subpaths, - pathkeys, - NULL)); - } + ListCell *lcp; + + foreach(lcp, all_child_pathkeys) + { + List *pathkeys = (List *) lfirst(lcp); + List *startup_subpaths = NIL; + List *total_subpaths = NIL; + bool startup_neq_total = false; + ListCell *lcr; + + /* Select the child paths for this ordering... */ + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *cheapest_startup, + *cheapest_total; + + /* Locate the right paths, if they are available. */ + cheapest_startup = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + STARTUP_COST); + cheapest_total = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + NULL, + TOTAL_COST); + + /* + * If we can't find any paths with the right order just use the + * cheapest-total path; we'll have to sort it later. + */ + if (cheapest_startup == NULL || cheapest_total == NULL) + { + cheapest_startup = cheapest_total = + childrel->cheapest_total_path; + Assert(cheapest_total != NULL); + } + + /* + * Notice whether we actually have different paths for the + * "cheapest" and "total" cases; frequently there will be no point + * in two create_merge_append_path() calls. + */ + if (cheapest_startup != cheapest_total) + startup_neq_total = true; + + startup_subpaths = + accumulate_append_subpath(startup_subpaths, cheapest_startup); + total_subpaths = + accumulate_append_subpath(total_subpaths, cheapest_total); + } + + /* ... and build the MergeAppend paths */ + add_path(rel, (Path *) create_merge_append_path(root, + rel, + startup_subpaths, + pathkeys, + NULL)); + if (startup_neq_total) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + total_subpaths, + pathkeys, + NULL)); + } } /* @@ -983,15 +981,15 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, static List * accumulate_append_subpath(List *subpaths, Path *path) { - if (IsA(path, AppendPath)) - { - AppendPath *apath = (AppendPath *) path; - - /* list_copy is important here to avoid sharing list substructure */ - return list_concat(subpaths, list_copy(apath->subpaths)); - } - else - return lappend(subpaths, path); + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + /* list_copy is important here to avoid sharing list substructure */ + return list_concat(subpaths, list_copy(apath->subpaths)); + } + else + return lappend(subpaths, path); } /* @@ -1004,39 +1002,39 @@ accumulate_append_subpath(List *subpaths, Path *path) static void set_dummy_rel_pathlist(RelOptInfo *rel) { - /* Set dummy size estimates --- we leave attr_widths[] as zeroes */ - rel->rows = 0; - rel->width = 0; - - /* Discard any pre-existing paths; no further need for them */ - rel->pathlist = NIL; - - add_path(rel, (Path *) create_append_path(rel, NIL, NULL)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + /* Set dummy size estimates --- we leave attr_widths[] as zeroes */ + rel->rows = 0; + rel->width = 0; + + /* Discard any pre-existing paths; no further need for them */ + rel->pathlist = NIL; + + add_path(rel, (Path *) create_append_path(rel, NIL, NULL)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* quick-and-dirty test to see if any joining is needed */ static bool has_multiple_baserels(PlannerInfo *root) { - int num_base_rels = 0; - Index rti; - - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *brel = root->simple_rel_array[rti]; - - if (brel == NULL) - continue; - - /* ignore RTEs that are "other rels" */ - if (brel->reloptkind == RELOPT_BASEREL) - if (++num_base_rels > 1) - return true; - } - return false; + int num_base_rels = 0; + Index rti; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + if (brel == NULL) + continue; + + /* ignore RTEs that are "other rels" */ + if (brel->reloptkind == RELOPT_BASEREL) + if (++num_base_rels > 1) + return true; + } + return false; } /* @@ -1048,134 +1046,127 @@ has_multiple_baserels(PlannerInfo *root) */ static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) + Index rti, RangeTblEntry *rte) { - Query *parse = root->parse; - Query *subquery = rte->subquery; - Query * tempQuery; - bool *differentTypes; - double tuple_fraction; - PlannerInfo *subroot; - List *pathkeys; - - /* - * Must copy the Query so that planning doesn't mess up the RTE contents - * (really really need to fix the planner to not scribble on its input, - * someday). - */ - - //NEW FOR RECDB - //Prevent an error from happening while using a recommender in sub query - tempQuery = copyObject(subquery); - copyQueryHelper(tempQuery, subquery); - subquery = tempQuery; - - - /* We need a workspace for keeping track of set-op type coercions */ - differentTypes = (bool *) - palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); - - /* - * If there are any restriction clauses that have been attached to the - * subquery relation, consider pushing them down to become WHERE or HAVING - * quals of the subquery itself. This transformation is useful because it - * may allow us to generate a better plan for the subquery than evaluating - * all the subquery output rows and then filtering them. - * - * There are several cases where we cannot push down clauses. Restrictions - * involving the subquery are checked by subquery_is_pushdown_safe(). - * Restrictions on individual clauses are checked by - * qual_is_pushdown_safe(). Also, we don't want to push down - * pseudoconstant clauses; better to have the gating node above the - * subquery. - * - * Also, if the sub-query has "security_barrier" flag, it means the - * sub-query originated from a view that must enforce row-level security. - * We must not push down quals in order to avoid information leaks, either - * via side-effects or error output. - * - * Non-pushed-down clauses will get evaluated as qpquals of the - * SubqueryScan node. - * - * XXX Are there any cases where we want to make a policy decision not to - * push down a pushable qual, because it'd result in a worse plan? - */ - if (rel->baserestrictinfo != NIL && - subquery_is_pushdown_safe(subquery, subquery, differentTypes)) - { - /* OK to consider pushing down individual quals */ - List *upperrestrictlist = NIL; - ListCell *l; - - foreach(l, rel->baserestrictinfo) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *clause = (Node *) rinfo->clause; - - if (!rinfo->pseudoconstant && - (!rte->security_barrier || - !contain_leaky_functions(clause)) && - qual_is_pushdown_safe(subquery, rti, clause, differentTypes)) - { - /* Push it down */ - subquery_push_qual(subquery, rte, rti, clause); - } - else - { - /* Keep it in the upper query */ - upperrestrictlist = lappend(upperrestrictlist, rinfo); - } - } - rel->baserestrictinfo = upperrestrictlist; - } - - pfree(differentTypes); - - /* - * We can safely pass the outer tuple_fraction down to the subquery if the - * outer level has no joining, aggregation, or sorting to do. Otherwise - * we'd better tell the subquery to plan for full retrieval. (XXX This - * could probably be made more intelligent ...) - */ - if (parse->hasAggs || - parse->groupClause || - parse->havingQual || - parse->distinctClause || - parse->sortClause || - has_multiple_baserels(root)) - tuple_fraction = 0.0; /* default case */ - else - tuple_fraction = root->tuple_fraction; - - /* Generate the plan for the subquery */ - rel->subplan = subquery_planner(root->glob, subquery, - root, - false, tuple_fraction, - &subroot); - rel->subroot = subroot; - - /* - * It's possible that constraint exclusion proved the subquery empty. If - * so, it's convenient to turn it back into a dummy path so that we will - * recognize appropriate optimizations at this level. - */ - if (is_dummy_plan(rel->subplan)) - { - set_dummy_rel_pathlist(rel); - return; - } - - /* Mark rel with estimated output rows, width, etc */ - set_subquery_size_estimates(root, rel); - - /* Convert subquery pathkeys to outer representation */ - pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys); - - /* Generate appropriate path */ - add_path(rel, create_subqueryscan_path(root, rel, pathkeys, NULL)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + Query *parse = root->parse; + Query *subquery = rte->subquery; + bool *differentTypes; + double tuple_fraction; + PlannerInfo *subroot; + List *pathkeys; + + /* + * Must copy the Query so that planning doesn't mess up the RTE contents + * (really really need to fix the planner to not scribble on its input, + * someday). + */ + subquery = copyObject(subquery); + + /* We need a workspace for keeping track of set-op type coercions */ + differentTypes = (bool *) + palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); + + /* + * If there are any restriction clauses that have been attached to the + * subquery relation, consider pushing them down to become WHERE or HAVING + * quals of the subquery itself. This transformation is useful because it + * may allow us to generate a better plan for the subquery than evaluating + * all the subquery output rows and then filtering them. + * + * There are several cases where we cannot push down clauses. Restrictions + * involving the subquery are checked by subquery_is_pushdown_safe(). + * Restrictions on individual clauses are checked by + * qual_is_pushdown_safe(). Also, we don't want to push down + * pseudoconstant clauses; better to have the gating node above the + * subquery. + * + * Also, if the sub-query has "security_barrier" flag, it means the + * sub-query originated from a view that must enforce row-level security. + * We must not push down quals in order to avoid information leaks, either + * via side-effects or error output. + * + * Non-pushed-down clauses will get evaluated as qpquals of the + * SubqueryScan node. + * + * XXX Are there any cases where we want to make a policy decision not to + * push down a pushable qual, because it'd result in a worse plan? + */ + if (rel->baserestrictinfo != NIL && + subquery_is_pushdown_safe(subquery, subquery, differentTypes)) + { + /* OK to consider pushing down individual quals */ + List *upperrestrictlist = NIL; + ListCell *l; + + foreach(l, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Node *clause = (Node *) rinfo->clause; + + if (!rinfo->pseudoconstant && + (!rte->security_barrier || + !contain_leaky_functions(clause)) && + qual_is_pushdown_safe(subquery, rti, clause, differentTypes)) + { + /* Push it down */ + subquery_push_qual(subquery, rte, rti, clause); + } + else + { + /* Keep it in the upper query */ + upperrestrictlist = lappend(upperrestrictlist, rinfo); + } + } + rel->baserestrictinfo = upperrestrictlist; + } + + pfree(differentTypes); + + /* + * We can safely pass the outer tuple_fraction down to the subquery if the + * outer level has no joining, aggregation, or sorting to do. Otherwise + * we'd better tell the subquery to plan for full retrieval. (XXX This + * could probably be made more intelligent ...) + */ + if (parse->hasAggs || + parse->groupClause || + parse->havingQual || + parse->distinctClause || + parse->sortClause || + has_multiple_baserels(root)) + tuple_fraction = 0.0; /* default case */ + else + tuple_fraction = root->tuple_fraction; + + /* Generate the plan for the subquery */ + rel->subplan = subquery_planner(root->glob, subquery, + root, + false, tuple_fraction, + &subroot); + rel->subroot = subroot; + + /* + * It's possible that constraint exclusion proved the subquery empty. If + * so, it's convenient to turn it back into a dummy path so that we will + * recognize appropriate optimizations at this level. + */ + if (is_dummy_plan(rel->subplan)) + { + set_dummy_rel_pathlist(rel); + return; + } + + /* Mark rel with estimated output rows, width, etc */ + set_subquery_size_estimates(root, rel); + + /* Convert subquery pathkeys to outer representation */ + pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys); + + /* Generate appropriate path */ + add_path(rel, create_subqueryscan_path(root, rel, pathkeys, NULL)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* @@ -1185,11 +1176,11 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - /* Generate appropriate path */ - add_path(rel, create_functionscan_path(root, rel)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + /* Generate appropriate path */ + add_path(rel, create_functionscan_path(root, rel)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* @@ -1199,11 +1190,11 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - /* Generate appropriate path */ - add_path(rel, create_valuesscan_path(root, rel)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + /* Generate appropriate path */ + add_path(rel, create_valuesscan_path(root, rel)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* @@ -1216,55 +1207,55 @@ set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - Plan *cteplan; - PlannerInfo *cteroot; - Index levelsup; - int ndx; - ListCell *lc; - int plan_id; - - /* - * Find the referenced CTE, and locate the plan previously made for it. - */ - levelsup = rte->ctelevelsup; - cteroot = root; - while (levelsup-- > 0) - { - cteroot = cteroot->parent_root; - if (!cteroot) /* shouldn't happen */ - elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); - } - - /* - * Note: cte_plan_ids can be shorter than cteList, if we are still working - * on planning the CTEs (ie, this is a side-reference from another CTE). - * So we mustn't use forboth here. - */ - ndx = 0; - foreach(lc, cteroot->parse->cteList) - { - CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); - - if (strcmp(cte->ctename, rte->ctename) == 0) - break; - ndx++; - } - if (lc == NULL) /* shouldn't happen */ - elog(ERROR, "could not find CTE \"%s\"", rte->ctename); - if (ndx >= list_length(cteroot->cte_plan_ids)) - elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); - plan_id = list_nth_int(cteroot->cte_plan_ids, ndx); - Assert(plan_id > 0); - cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1); - - /* Mark rel with estimated output rows, width, etc */ - set_cte_size_estimates(root, rel, cteplan); - - /* Generate appropriate path */ - add_path(rel, create_ctescan_path(root, rel)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + Plan *cteplan; + PlannerInfo *cteroot; + Index levelsup; + int ndx; + ListCell *lc; + int plan_id; + + /* + * Find the referenced CTE, and locate the plan previously made for it. + */ + levelsup = rte->ctelevelsup; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + + /* + * Note: cte_plan_ids can be shorter than cteList, if we are still working + * on planning the CTEs (ie, this is a side-reference from another CTE). + * So we mustn't use forboth here. + */ + ndx = 0; + foreach(lc, cteroot->parse->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); + + if (strcmp(cte->ctename, rte->ctename) == 0) + break; + ndx++; + } + if (lc == NULL) /* shouldn't happen */ + elog(ERROR, "could not find CTE \"%s\"", rte->ctename); + if (ndx >= list_length(cteroot->cte_plan_ids)) + elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); + plan_id = list_nth_int(cteroot->cte_plan_ids, ndx); + Assert(plan_id > 0); + cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1); + + /* Mark rel with estimated output rows, width, etc */ + set_cte_size_estimates(root, rel, cteplan); + + /* Generate appropriate path */ + add_path(rel, create_ctescan_path(root, rel)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* @@ -1277,38 +1268,38 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { - Plan *cteplan; - PlannerInfo *cteroot; - Index levelsup; - - /* - * We need to find the non-recursive term's plan, which is in the plan - * level that's processing the recursive UNION, which is one level *below* - * where the CTE comes from. - */ - levelsup = rte->ctelevelsup; - if (levelsup == 0) /* shouldn't happen */ - elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); - levelsup--; - cteroot = root; - while (levelsup-- > 0) - { - cteroot = cteroot->parent_root; - if (!cteroot) /* shouldn't happen */ - elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); - } - cteplan = cteroot->non_recursive_plan; - if (!cteplan) /* shouldn't happen */ - elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); - - /* Mark rel with estimated output rows, width, etc */ - set_cte_size_estimates(root, rel, cteplan); - - /* Generate appropriate path */ - add_path(rel, create_worktablescan_path(root, rel)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); + Plan *cteplan; + PlannerInfo *cteroot; + Index levelsup; + + /* + * We need to find the non-recursive term's plan, which is in the plan + * level that's processing the recursive UNION, which is one level *below* + * where the CTE comes from. + */ + levelsup = rte->ctelevelsup; + if (levelsup == 0) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + levelsup--; + cteroot = root; + while (levelsup-- > 0) + { + cteroot = cteroot->parent_root; + if (!cteroot) /* shouldn't happen */ + elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename); + } + cteplan = cteroot->non_recursive_plan; + if (!cteplan) /* shouldn't happen */ + elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename); + + /* Mark rel with estimated output rows, width, etc */ + set_cte_size_estimates(root, rel, cteplan); + + /* Generate appropriate path */ + add_path(rel, create_worktablescan_path(root, rel)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); } /* @@ -1321,77 +1312,77 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) static RelOptInfo * make_rel_from_joinlist(PlannerInfo *root, List *joinlist) { - int levels_needed; - List *initial_rels; - ListCell *jl; - - /* - * Count the number of child joinlist nodes. This is the depth of the - * dynamic-programming algorithm we must employ to consider all ways of - * joining the child nodes. - */ - levels_needed = list_length(joinlist); - - if (levels_needed <= 0) - return NULL; /* nothing to do? */ - - /* - * Construct a list of rels corresponding to the child joinlist nodes. - * This may contain both base rels and rels constructed according to - * sub-joinlists. - */ - initial_rels = NIL; - foreach(jl, joinlist) - { - Node *jlnode = (Node *) lfirst(jl); - RelOptInfo *thisrel; - - if (IsA(jlnode, RangeTblRef)) - { - int varno = ((RangeTblRef *) jlnode)->rtindex; - - thisrel = find_base_rel(root, varno); - } - else if (IsA(jlnode, List)) - { - /* Recurse to handle subproblem */ - thisrel = make_rel_from_joinlist(root, (List *) jlnode); - } - else - { - elog(ERROR, "unrecognized joinlist node type: %d", - (int) nodeTag(jlnode)); - thisrel = NULL; /* keep compiler quiet */ - } - - initial_rels = lappend(initial_rels, thisrel); - } - - if (levels_needed == 1) - { - /* - * Single joinlist node, so we're done. - */ - return (RelOptInfo *) linitial(initial_rels); - } - else - { - /* - * Consider the different orders in which we could join the rels, - * using a plugin, GEQO, or the regular join search code. - * - * We put the initial_rels list into a PlannerInfo field because - * has_legal_joinclause() needs to look at it (ugly :-(). - */ - root->initial_rels = initial_rels; - - if (join_search_hook) - return (*join_search_hook) (root, levels_needed, initial_rels); - else if (enable_geqo && levels_needed >= geqo_threshold) - return geqo(root, levels_needed, initial_rels); - else - return standard_join_search(root, levels_needed, initial_rels); - } + int levels_needed; + List *initial_rels; + ListCell *jl; + + /* + * Count the number of child joinlist nodes. This is the depth of the + * dynamic-programming algorithm we must employ to consider all ways of + * joining the child nodes. + */ + levels_needed = list_length(joinlist); + + if (levels_needed <= 0) + return NULL; /* nothing to do? */ + + /* + * Construct a list of rels corresponding to the child joinlist nodes. + * This may contain both base rels and rels constructed according to + * sub-joinlists. + */ + initial_rels = NIL; + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + RelOptInfo *thisrel; + + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; + + thisrel = find_base_rel(root, varno); + } + else if (IsA(jlnode, List)) + { + /* Recurse to handle subproblem */ + thisrel = make_rel_from_joinlist(root, (List *) jlnode); + } + else + { + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + thisrel = NULL; /* keep compiler quiet */ + } + + initial_rels = lappend(initial_rels, thisrel); + } + + if (levels_needed == 1) + { + /* + * Single joinlist node, so we're done. + */ + return (RelOptInfo *) linitial(initial_rels); + } + else + { + /* + * Consider the different orders in which we could join the rels, + * using a plugin, GEQO, or the regular join search code. + * + * We put the initial_rels list into a PlannerInfo field because + * has_legal_joinclause() needs to look at it (ugly :-(). + */ + root->initial_rels = initial_rels; + + if (join_search_hook) + return (*join_search_hook) (root, levels_needed, initial_rels); + else if (enable_geqo && levels_needed >= geqo_threshold) + return geqo(root, levels_needed, initial_rels); + else + return standard_join_search(root, levels_needed, initial_rels); + } } /* @@ -1426,69 +1417,69 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist) RelOptInfo * standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) { - int lev; - RelOptInfo *rel; - - /* - * This function cannot be invoked recursively within any one planning - * problem, so join_rel_level[] can't be in use already. - */ - Assert(root->join_rel_level == NULL); - - /* - * We employ a simple "dynamic programming" algorithm: we first find all - * ways to build joins of two jointree items, then all ways to build joins - * of three items (from two-item joins and single items), then four-item - * joins, and so on until we have considered all ways to join all the - * items into one rel. - * - * root->join_rel_level[j] is a list of all the j-item rels. Initially we - * set root->join_rel_level[1] to represent all the single-jointree-item - * relations. - */ - root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *)); - - root->join_rel_level[1] = initial_rels; - - for (lev = 2; lev <= levels_needed; lev++) - { - ListCell *lc; - - /* - * Determine all possible pairs of relations to be joined at this - * level, and build paths for making each one from every available - * pair of lower-level relations. - */ - join_search_one_level(root, lev); - - /* - * Do cleanup work on each just-processed rel. - */ - foreach(lc, root->join_rel_level[lev]) - { - rel = (RelOptInfo *) lfirst(lc); - - /* Find and save the cheapest paths for this rel */ - set_cheapest(rel); - + int lev; + RelOptInfo *rel; + + /* + * This function cannot be invoked recursively within any one planning + * problem, so join_rel_level[] can't be in use already. + */ + Assert(root->join_rel_level == NULL); + + /* + * We employ a simple "dynamic programming" algorithm: we first find all + * ways to build joins of two jointree items, then all ways to build joins + * of three items (from two-item joins and single items), then four-item + * joins, and so on until we have considered all ways to join all the + * items into one rel. + * + * root->join_rel_level[j] is a list of all the j-item rels. Initially we + * set root->join_rel_level[1] to represent all the single-jointree-item + * relations. + */ + root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *)); + + root->join_rel_level[1] = initial_rels; + + for (lev = 2; lev <= levels_needed; lev++) + { + ListCell *lc; + + /* + * Determine all possible pairs of relations to be joined at this + * level, and build paths for making each one from every available + * pair of lower-level relations. + */ + join_search_one_level(root, lev); + + /* + * Do cleanup work on each just-processed rel. + */ + foreach(lc, root->join_rel_level[lev]) + { + rel = (RelOptInfo *) lfirst(lc); + + /* Find and save the cheapest paths for this rel */ + set_cheapest(rel); + #ifdef OPTIMIZER_DEBUG - debug_print_rel(root, rel); + debug_print_rel(root, rel); #endif - } - } - - /* - * We should have a single rel at the final level. - */ - if (root->join_rel_level[levels_needed] == NIL) - elog(ERROR, "failed to build any %d-way joins", levels_needed); - Assert(list_length(root->join_rel_level[levels_needed]) == 1); - - rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]); - - root->join_rel_level = NULL; - - return rel; + } + } + + /* + * We should have a single rel at the final level. + */ + if (root->join_rel_level[levels_needed] == NIL) + elog(ERROR, "failed to build any %d-way joins", levels_needed); + Assert(list_length(root->join_rel_level[levels_needed]) == 1); + + rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]); + + root->join_rel_level = NULL; + + return rel; } /***************************************************************************** @@ -1527,44 +1518,44 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) */ static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, - bool *differentTypes) + bool *differentTypes) { - SetOperationStmt *topop; - - /* Check point 1 */ - if (subquery->limitOffset != NULL || subquery->limitCount != NULL) - return false; - - /* Check point 2 */ - if (subquery->hasWindowFuncs) - return false; - - /* Check point 3 (new for Recathon) */ - if (subquery->recommendStmt) - return false; - - /* Are we at top level, or looking at a setop component? */ - if (subquery == topquery) - { - /* Top level, so check any component queries */ - if (subquery->setOperations != NULL) - if (!recurse_pushdown_safe(subquery->setOperations, topquery, - differentTypes)) - return false; - } - else - { - /* Setop component must not have more components (too weird) */ - if (subquery->setOperations != NULL) - return false; - /* Check whether setop component output types match top level */ - topop = (SetOperationStmt *) topquery->setOperations; - Assert(topop && IsA(topop, SetOperationStmt)); - compare_tlist_datatypes(subquery->targetList, - topop->colTypes, - differentTypes); - } - return true; + SetOperationStmt *topop; + + /* Check point 1 */ + if (subquery->limitOffset != NULL || subquery->limitCount != NULL) + return false; + + /* Check point 2 */ + if (subquery->hasWindowFuncs) + return false; + + /* Check point 3 (new for Recathon) */ + if (subquery->recommendStmt) + return false; + + /* Are we at top level, or looking at a setop component? */ + if (subquery == topquery) + { + /* Top level, so check any component queries */ + if (subquery->setOperations != NULL) + if (!recurse_pushdown_safe(subquery->setOperations, topquery, + differentTypes)) + return false; + } + else + { + /* Setop component must not have more components (too weird) */ + if (subquery->setOperations != NULL) + return false; + /* Check whether setop component output types match top level */ + topop = (SetOperationStmt *) topquery->setOperations; + Assert(topop && IsA(topop, SetOperationStmt)); + compare_tlist_datatypes(subquery->targetList, + topop->colTypes, + differentTypes); + } + return true; } /* @@ -1572,36 +1563,36 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, */ static bool recurse_pushdown_safe(Node *setOp, Query *topquery, - bool *differentTypes) + bool *differentTypes) { - if (IsA(setOp, RangeTblRef)) - { - RangeTblRef *rtr = (RangeTblRef *) setOp; - RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable); - Query *subquery = rte->subquery; - - Assert(subquery != NULL); - return subquery_is_pushdown_safe(subquery, topquery, differentTypes); - } - else if (IsA(setOp, SetOperationStmt)) - { - SetOperationStmt *op = (SetOperationStmt *) setOp; - - /* EXCEPT is no good */ - if (op->op == SETOP_EXCEPT) - return false; - /* Else recurse */ - if (!recurse_pushdown_safe(op->larg, topquery, differentTypes)) - return false; - if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes)) - return false; - } - else - { - elog(ERROR, "unrecognized node type: %d", - (int) nodeTag(setOp)); - } - return true; + if (IsA(setOp, RangeTblRef)) + { + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable); + Query *subquery = rte->subquery; + + Assert(subquery != NULL); + return subquery_is_pushdown_safe(subquery, topquery, differentTypes); + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + + /* EXCEPT is no good */ + if (op->op == SETOP_EXCEPT) + return false; + /* Else recurse */ + if (!recurse_pushdown_safe(op->larg, topquery, differentTypes)) + return false; + if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes)) + return false; + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + } + return true; } /* @@ -1615,25 +1606,25 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, */ static void compare_tlist_datatypes(List *tlist, List *colTypes, - bool *differentTypes) + bool *differentTypes) { - ListCell *l; - ListCell *colType = list_head(colTypes); - - foreach(l, tlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (tle->resjunk) - continue; /* ignore resjunk columns */ - if (colType == NULL) - elog(ERROR, "wrong number of tlist entries"); - if (exprType((Node *) tle->expr) != lfirst_oid(colType)) - differentTypes[tle->resno] = true; - colType = lnext(colType); - } - if (colType != NULL) - elog(ERROR, "wrong number of tlist entries"); + ListCell *l; + ListCell *colType = list_head(colTypes); + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + if (colType == NULL) + elog(ERROR, "wrong number of tlist entries"); + if (exprType((Node *) tle->expr) != lfirst_oid(colType)) + differentTypes[tle->resno] = true; + colType = lnext(colType); + } + if (colType != NULL) + elog(ERROR, "wrong number of tlist entries"); } /* @@ -1674,107 +1665,107 @@ compare_tlist_datatypes(List *tlist, List *colTypes, */ static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, - bool *differentTypes) + bool *differentTypes) { - bool safe = true; - List *vars; - ListCell *vl; - Bitmapset *tested = NULL; - - /* Refuse subselects (point 1) */ - if (contain_subplans(qual)) - return false; - - /* - * It would be unsafe to push down window function calls, but at least for - * the moment we could never see any in a qual anyhow. (The same applies - * to aggregates, which we check for in pull_var_clause below.) - */ - Assert(!contain_window_function(qual)); - - /* - * Examine all Vars used in clause; since it's a restriction clause, all - * such Vars must refer to subselect output columns. - */ - vars = pull_var_clause(qual, - PVC_REJECT_AGGREGATES, - PVC_INCLUDE_PLACEHOLDERS); - foreach(vl, vars) - { - Var *var = (Var *) lfirst(vl); - TargetEntry *tle; - - /* - * XXX Punt if we find any PlaceHolderVars in the restriction clause. - * It's not clear whether a PHV could safely be pushed down, and even - * less clear whether such a situation could arise in any cases of - * practical interest anyway. So for the moment, just refuse to push - * down. - */ - if (!IsA(var, Var)) - { - safe = false; - break; - } - - Assert(var->varno == rti); - - /* Check point 2 */ - if (var->varattno == 0) - { - safe = false; - break; - } - - /* - * We use a bitmapset to avoid testing the same attno more than once. - * (NB: this only works because subquery outputs can't have negative - * attnos.) - */ - if (bms_is_member(var->varattno, tested)) - continue; - tested = bms_add_member(tested, var->varattno); - - /* Check point 3 */ - if (differentTypes[var->varattno]) - { - safe = false; - break; - } - - /* Must find the tlist element referenced by the Var */ - tle = get_tle_by_resno(subquery->targetList, var->varattno); - Assert(tle != NULL); - Assert(!tle->resjunk); - - /* If subquery uses DISTINCT ON, check point 4 */ - if (subquery->hasDistinctOn && - !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) - { - /* non-DISTINCT column, so fail */ - safe = false; - break; - } - - /* Refuse functions returning sets (point 5) */ - if (expression_returns_set((Node *) tle->expr)) - { - safe = false; - break; - } - - /* Refuse volatile functions (point 6) */ - if (contain_volatile_functions((Node *) tle->expr)) - { - safe = false; - break; - } - } - - list_free(vars); - bms_free(tested); - - return safe; + bool safe = true; + List *vars; + ListCell *vl; + Bitmapset *tested = NULL; + + /* Refuse subselects (point 1) */ + if (contain_subplans(qual)) + return false; + + /* + * It would be unsafe to push down window function calls, but at least for + * the moment we could never see any in a qual anyhow. (The same applies + * to aggregates, which we check for in pull_var_clause below.) + */ + Assert(!contain_window_function(qual)); + + /* + * Examine all Vars used in clause; since it's a restriction clause, all + * such Vars must refer to subselect output columns. + */ + vars = pull_var_clause(qual, + PVC_REJECT_AGGREGATES, + PVC_INCLUDE_PLACEHOLDERS); + foreach(vl, vars) + { + Var *var = (Var *) lfirst(vl); + TargetEntry *tle; + + /* + * XXX Punt if we find any PlaceHolderVars in the restriction clause. + * It's not clear whether a PHV could safely be pushed down, and even + * less clear whether such a situation could arise in any cases of + * practical interest anyway. So for the moment, just refuse to push + * down. + */ + if (!IsA(var, Var)) + { + safe = false; + break; + } + + Assert(var->varno == rti); + + /* Check point 2 */ + if (var->varattno == 0) + { + safe = false; + break; + } + + /* + * We use a bitmapset to avoid testing the same attno more than once. + * (NB: this only works because subquery outputs can't have negative + * attnos.) + */ + if (bms_is_member(var->varattno, tested)) + continue; + tested = bms_add_member(tested, var->varattno); + + /* Check point 3 */ + if (differentTypes[var->varattno]) + { + safe = false; + break; + } + + /* Must find the tlist element referenced by the Var */ + tle = get_tle_by_resno(subquery->targetList, var->varattno); + Assert(tle != NULL); + Assert(!tle->resjunk); + + /* If subquery uses DISTINCT ON, check point 4 */ + if (subquery->hasDistinctOn && + !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) + { + /* non-DISTINCT column, so fail */ + safe = false; + break; + } + + /* Refuse functions returning sets (point 5) */ + if (expression_returns_set((Node *) tle->expr)) + { + safe = false; + break; + } + + /* Refuse volatile functions (point 6) */ + if (contain_volatile_functions((Node *) tle->expr)) + { + safe = false; + break; + } + } + + list_free(vars); + bms_free(tested); + + return safe; } /* @@ -1783,45 +1774,45 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, static void subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) { - if (subquery->setOperations != NULL) - { - /* Recurse to push it separately to each component query */ - recurse_push_qual(subquery->setOperations, subquery, - rte, rti, qual); - } - else - { - /* - * We need to replace Vars in the qual (which must refer to outputs of - * the subquery) with copies of the subquery's targetlist expressions. - * Note that at this point, any uplevel Vars in the qual should have - * been replaced with Params, so they need no work. - * - * This step also ensures that when we are pushing into a setop tree, - * each component query gets its own copy of the qual. - */ - qual = ResolveNew(qual, rti, 0, rte, - subquery->targetList, - CMD_SELECT, 0, - &subquery->hasSubLinks); - - /* - * Now attach the qual to the proper place: normally WHERE, but if the - * subquery uses grouping or aggregation, put it in HAVING (since the - * qual really refers to the group-result rows). - */ - if (subquery->hasAggs || subquery->groupClause || subquery->havingQual) - subquery->havingQual = make_and_qual(subquery->havingQual, qual); - else - subquery->jointree->quals = - make_and_qual(subquery->jointree->quals, qual); - - /* - * We need not change the subquery's hasAggs or hasSublinks flags, - * since we can't be pushing down any aggregates that weren't there - * before, and we don't push down subselects at all. - */ - } + if (subquery->setOperations != NULL) + { + /* Recurse to push it separately to each component query */ + recurse_push_qual(subquery->setOperations, subquery, + rte, rti, qual); + } + else + { + /* + * We need to replace Vars in the qual (which must refer to outputs of + * the subquery) with copies of the subquery's targetlist expressions. + * Note that at this point, any uplevel Vars in the qual should have + * been replaced with Params, so they need no work. + * + * This step also ensures that when we are pushing into a setop tree, + * each component query gets its own copy of the qual. + */ + qual = ResolveNew(qual, rti, 0, rte, + subquery->targetList, + CMD_SELECT, 0, + &subquery->hasSubLinks); + + /* + * Now attach the qual to the proper place: normally WHERE, but if the + * subquery uses grouping or aggregation, put it in HAVING (since the + * qual really refers to the group-result rows). + */ + if (subquery->hasAggs || subquery->groupClause || subquery->havingQual) + subquery->havingQual = make_and_qual(subquery->havingQual, qual); + else + subquery->jointree->quals = + make_and_qual(subquery->jointree->quals, qual); + + /* + * We need not change the subquery's hasAggs or hasSublinks flags, + * since we can't be pushing down any aggregates that weren't there + * before, and we don't push down subselects at all. + */ + } } /* @@ -1829,29 +1820,29 @@ subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) */ static void recurse_push_qual(Node *setOp, Query *topquery, - RangeTblEntry *rte, Index rti, Node *qual) + RangeTblEntry *rte, Index rti, Node *qual) { - if (IsA(setOp, RangeTblRef)) - { - RangeTblRef *rtr = (RangeTblRef *) setOp; - RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable); - Query *subquery = subrte->subquery; - - Assert(subquery != NULL); - subquery_push_qual(subquery, rte, rti, qual); - } - else if (IsA(setOp, SetOperationStmt)) - { - SetOperationStmt *op = (SetOperationStmt *) setOp; - - recurse_push_qual(op->larg, topquery, rte, rti, qual); - recurse_push_qual(op->rarg, topquery, rte, rti, qual); - } - else - { - elog(ERROR, "unrecognized node type: %d", - (int) nodeTag(setOp)); - } + if (IsA(setOp, RangeTblRef)) + { + RangeTblRef *rtr = (RangeTblRef *) setOp; + RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable); + Query *subquery = subrte->subquery; + + Assert(subquery != NULL); + subquery_push_qual(subquery, rte, rti, qual); + } + else if (IsA(setOp, SetOperationStmt)) + { + SetOperationStmt *op = (SetOperationStmt *) setOp; + + recurse_push_qual(op->larg, topquery, rte, rti, qual); + recurse_push_qual(op->rarg, topquery, rte, rti, qual); + } + else + { + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(setOp)); + } } /***************************************************************************** @@ -1863,183 +1854,183 @@ recurse_push_qual(Node *setOp, Query *topquery, static void print_relids(Relids relids) { - Relids tmprelids; - int x; - bool first = true; - - tmprelids = bms_copy(relids); - while ((x = bms_first_member(tmprelids)) >= 0) - { - if (!first) - printf(" "); - printf("%d", x); - first = false; - } - bms_free(tmprelids); + Relids tmprelids; + int x; + bool first = true; + + tmprelids = bms_copy(relids); + while ((x = bms_first_member(tmprelids)) >= 0) + { + if (!first) + printf(" "); + printf("%d", x); + first = false; + } + bms_free(tmprelids); } static void print_restrictclauses(PlannerInfo *root, List *clauses) { - ListCell *l; - - foreach(l, clauses) - { - RestrictInfo *c = lfirst(l); - - print_expr((Node *) c->clause, root->parse->rtable); - if (lnext(l)) - printf(", "); - } + ListCell *l; + + foreach(l, clauses) + { + RestrictInfo *c = lfirst(l); + + print_expr((Node *) c->clause, root->parse->rtable); + if (lnext(l)) + printf(", "); + } } static void print_path(PlannerInfo *root, Path *path, int indent) { - const char *ptype; - bool join = false; - Path *subpath = NULL; - int i; - - switch (nodeTag(path)) - { - case T_Path: - ptype = "SeqScan"; - break; - case T_IndexPath: - ptype = "IdxScan"; - break; - case T_BitmapHeapPath: - ptype = "BitmapHeapScan"; - break; - case T_BitmapAndPath: - ptype = "BitmapAndPath"; - break; - case T_BitmapOrPath: - ptype = "BitmapOrPath"; - break; - case T_TidPath: - ptype = "TidScan"; - break; - case T_ForeignPath: - ptype = "ForeignScan"; - break; - case T_AppendPath: - ptype = "Append"; - break; - case T_MergeAppendPath: - ptype = "MergeAppend"; - break; - case T_ResultPath: - ptype = "Result"; - break; - case T_MaterialPath: - ptype = "Material"; - subpath = ((MaterialPath *) path)->subpath; - break; - case T_UniquePath: - ptype = "Unique"; - subpath = ((UniquePath *) path)->subpath; - break; - case T_NestPath: - ptype = "NestLoop"; - join = true; - break; - case T_MergePath: - ptype = "MergeJoin"; - join = true; - break; - case T_HashPath: - ptype = "HashJoin"; - join = true; - break; - default: - ptype = "???Path"; - break; - } - - for (i = 0; i < indent; i++) - printf("\t"); - printf("%s", ptype); - - if (path->parent) - { - printf("("); - print_relids(path->parent->relids); - printf(") rows=%.0f", path->parent->rows); - } - printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost); - - if (path->pathkeys) - { - for (i = 0; i < indent; i++) - printf("\t"); - printf(" pathkeys: "); - print_pathkeys(path->pathkeys, root->parse->rtable); - } - - if (join) - { - JoinPath *jp = (JoinPath *) path; - - for (i = 0; i < indent; i++) - printf("\t"); - printf(" clauses: "); - print_restrictclauses(root, jp->joinrestrictinfo); - printf("\n"); - - if (IsA(path, MergePath)) - { - MergePath *mp = (MergePath *) path; - - for (i = 0; i < indent; i++) - printf("\t"); - printf(" sortouter=%d sortinner=%d materializeinner=%d\n", - ((mp->outersortkeys) ? 1 : 0), - ((mp->innersortkeys) ? 1 : 0), - ((mp->materialize_inner) ? 1 : 0)); - } - - print_path(root, jp->outerjoinpath, indent + 1); - print_path(root, jp->innerjoinpath, indent + 1); - } - - if (subpath) - print_path(root, subpath, indent + 1); + const char *ptype; + bool join = false; + Path *subpath = NULL; + int i; + + switch (nodeTag(path)) + { + case T_Path: + ptype = "SeqScan"; + break; + case T_IndexPath: + ptype = "IdxScan"; + break; + case T_BitmapHeapPath: + ptype = "BitmapHeapScan"; + break; + case T_BitmapAndPath: + ptype = "BitmapAndPath"; + break; + case T_BitmapOrPath: + ptype = "BitmapOrPath"; + break; + case T_TidPath: + ptype = "TidScan"; + break; + case T_ForeignPath: + ptype = "ForeignScan"; + break; + case T_AppendPath: + ptype = "Append"; + break; + case T_MergeAppendPath: + ptype = "MergeAppend"; + break; + case T_ResultPath: + ptype = "Result"; + break; + case T_MaterialPath: + ptype = "Material"; + subpath = ((MaterialPath *) path)->subpath; + break; + case T_UniquePath: + ptype = "Unique"; + subpath = ((UniquePath *) path)->subpath; + break; + case T_NestPath: + ptype = "NestLoop"; + join = true; + break; + case T_MergePath: + ptype = "MergeJoin"; + join = true; + break; + case T_HashPath: + ptype = "HashJoin"; + join = true; + break; + default: + ptype = "???Path"; + break; + } + + for (i = 0; i < indent; i++) + printf("\t"); + printf("%s", ptype); + + if (path->parent) + { + printf("("); + print_relids(path->parent->relids); + printf(") rows=%.0f", path->parent->rows); + } + printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost); + + if (path->pathkeys) + { + for (i = 0; i < indent; i++) + printf("\t"); + printf(" pathkeys: "); + print_pathkeys(path->pathkeys, root->parse->rtable); + } + + if (join) + { + JoinPath *jp = (JoinPath *) path; + + for (i = 0; i < indent; i++) + printf("\t"); + printf(" clauses: "); + print_restrictclauses(root, jp->joinrestrictinfo); + printf("\n"); + + if (IsA(path, MergePath)) + { + MergePath *mp = (MergePath *) path; + + for (i = 0; i < indent; i++) + printf("\t"); + printf(" sortouter=%d sortinner=%d materializeinner=%d\n", + ((mp->outersortkeys) ? 1 : 0), + ((mp->innersortkeys) ? 1 : 0), + ((mp->materialize_inner) ? 1 : 0)); + } + + print_path(root, jp->outerjoinpath, indent + 1); + print_path(root, jp->innerjoinpath, indent + 1); + } + + if (subpath) + print_path(root, subpath, indent + 1); } void debug_print_rel(PlannerInfo *root, RelOptInfo *rel) { - ListCell *l; - - printf("RELOPTINFO ("); - print_relids(rel->relids); - printf("): rows=%.0f width=%d\n", rel->rows, rel->width); - - if (rel->baserestrictinfo) - { - printf("\tbaserestrictinfo: "); - print_restrictclauses(root, rel->baserestrictinfo); - printf("\n"); - } - - if (rel->joininfo) - { - printf("\tjoininfo: "); - print_restrictclauses(root, rel->joininfo); - printf("\n"); - } - - printf("\tpath list:\n"); - foreach(l, rel->pathlist) - print_path(root, lfirst(l), 1); - printf("\n\tcheapest startup path:\n"); - print_path(root, rel->cheapest_startup_path, 1); - printf("\n\tcheapest total path:\n"); - print_path(root, rel->cheapest_total_path, 1); - printf("\n"); - fflush(stdout); + ListCell *l; + + printf("RELOPTINFO ("); + print_relids(rel->relids); + printf("): rows=%.0f width=%d\n", rel->rows, rel->width); + + if (rel->baserestrictinfo) + { + printf("\tbaserestrictinfo: "); + print_restrictclauses(root, rel->baserestrictinfo); + printf("\n"); + } + + if (rel->joininfo) + { + printf("\tjoininfo: "); + print_restrictclauses(root, rel->joininfo); + printf("\n"); + } + + printf("\tpath list:\n"); + foreach(l, rel->pathlist) + print_path(root, lfirst(l), 1); + printf("\n\tcheapest startup path:\n"); + print_path(root, rel->cheapest_startup_path, 1); + printf("\n\tcheapest total path:\n"); + print_path(root, rel->cheapest_total_path, 1); + printf("\n"); + fflush(stdout); } #endif /* OPTIMIZER_DEBUG */ diff --git a/PostgreSQL/src/backend/optimizer/plan/planner.c b/PostgreSQL/src/backend/optimizer/plan/planner.c index 1c991d3..d9c657f 100644 --- a/PostgreSQL/src/backend/optimizer/plan/planner.c +++ b/PostgreSQL/src/backend/optimizer/plan/planner.c @@ -418,16 +418,25 @@ subquery_planner(PlannerGlobal *glob, Query *parse, /* NEW FOR RECDB */ /* Do expression preprocessing on the user-focused WHERE clause as well. */ - if (parse->recommendStmt) { - RecommendInfo *recInfo; - AttributeInfo *attInfo; + /*In order to Prevent an error from happening while using copy functions + * we do expression preprocessing on range table entries directly*/ - recInfo = (RecommendInfo*) parse->recommendStmt; - attInfo = recInfo->attributes; + foreach(l, parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); - attInfo->userWhereClause = preprocess_expression(root, - attInfo->userWhereClause, EXPRKIND_QUAL); - } + if (rte->recommender != NULL){ + RecommendInfo *recInfo; + AttributeInfo *attInfo; + + recInfo = (RecommendInfo*) rte->recommender; + attInfo = recInfo->attributes; + + attInfo->userWhereClause = preprocess_expression(root, + attInfo->userWhereClause, EXPRKIND_QUAL); + } + + } foreach(l, parse->windowClause) { diff --git a/PostgreSQL/src/backend/utils/misc/recathon.c b/PostgreSQL/src/backend/utils/misc/recathon.c index 3fe56ff..bfe1abe 100644 --- a/PostgreSQL/src/backend/utils/misc/recathon.c +++ b/PostgreSQL/src/backend/utils/misc/recathon.c @@ -57,14 +57,14 @@ static float getUpdateThreshold(); */ sim_node createSimNode(int userid, float event) { - sim_node newnode; - - newnode = (sim_node) palloc(sizeof(struct sim_node_t)); - newnode->id = userid; - newnode->event = event; - newnode->next = NULL; - - return newnode; + sim_node newnode; + + newnode = (sim_node) palloc(sizeof(struct sim_node_t)); + newnode->id = userid; + newnode->event = event; + newnode->next = NULL; + + return newnode; } /* ---------------------------------------------------------------- @@ -76,28 +76,28 @@ createSimNode(int userid, float event) { */ sim_node simInsert(sim_node target, sim_node newnode) { - sim_node tempnode; - - // Base case 1: target is empty. - if (!target) return newnode; - - // Base case 2: target belongs at the head of the list. - tempnode = target; - if (newnode->id <= tempnode->id) { - newnode->next = tempnode; - return newnode; - } - - // Normal case. - while (tempnode->next) { - if (newnode->id <= tempnode->next->id) break; - tempnode = tempnode->next; - } - - newnode->next = tempnode->next; - tempnode->next = newnode; - - return target; + sim_node tempnode; + + // Base case 1: target is empty. + if (!target) return newnode; + + // Base case 2: target belongs at the head of the list. + tempnode = target; + if (newnode->id <= tempnode->id) { + newnode->next = tempnode; + return newnode; + } + + // Normal case. + while (tempnode->next) { + if (newnode->id <= tempnode->next->id) break; + tempnode = tempnode->next; + } + + newnode->next = tempnode->next; + tempnode->next = newnode; + + return target; } /* ---------------------------------------------------------------- @@ -108,12 +108,12 @@ simInsert(sim_node target, sim_node newnode) { */ void freeSimList(sim_node head) { - sim_node temp; - while (head) { - temp = head->next; - pfree(head); - head = temp; - } + sim_node temp; + while (head) { + temp = head->next; + pfree(head); + head = temp; + } } /* ---------------------------------------------------------------- @@ -125,15 +125,15 @@ freeSimList(sim_node head) { */ nbr_node createNbrNode(int item1, int item2, float similarity) { - nbr_node newnode; - - newnode = (nbr_node) palloc(sizeof(struct nbr_node_t)); - newnode->item1 = item1; - newnode->item2 = item2; - newnode->similarity = similarity; - newnode->next = NULL; - - return newnode; + nbr_node newnode; + + newnode = (nbr_node) palloc(sizeof(struct nbr_node_t)); + newnode->item1 = item1; + newnode->item2 = item2; + newnode->similarity = similarity; + newnode->next = NULL; + + return newnode; } /* ---------------------------------------------------------------- @@ -145,43 +145,43 @@ createNbrNode(int item1, int item2, float similarity) { */ nbr_node nbrInsert(nbr_node target, nbr_node newnode, int maxsize) { - int i; - nbr_node tempnode; - bool inserted = false; - - // Base case 1: target is empty. - if (!target) return newnode; - - // Base case 2: target belongs at the head of the list. - if (newnode->similarity >= target->similarity) { - newnode->next = target; - target = newnode; - inserted = true; - } - - tempnode = target; - i = 1; - - // Normal case. - while (tempnode->next && i < maxsize) { - if (newnode->similarity >= tempnode->next->similarity) { - if (!inserted) { - newnode->next = tempnode->next; - tempnode->next = newnode; - inserted = true; - } - } - tempnode = tempnode->next; - i++; - } - - // If we've run out of room on our list. - if (tempnode->next && i >= maxsize) { - pfree(tempnode->next); - tempnode->next = NULL; - } - - return target; + int i; + nbr_node tempnode; + bool inserted = false; + + // Base case 1: target is empty. + if (!target) return newnode; + + // Base case 2: target belongs at the head of the list. + if (newnode->similarity >= target->similarity) { + newnode->next = target; + target = newnode; + inserted = true; + } + + tempnode = target; + i = 1; + + // Normal case. + while (tempnode->next && i < maxsize) { + if (newnode->similarity >= tempnode->next->similarity) { + if (!inserted) { + newnode->next = tempnode->next; + tempnode->next = newnode; + inserted = true; + } + } + tempnode = tempnode->next; + i++; + } + + // If we've run out of room on our list. + if (tempnode->next && i >= maxsize) { + pfree(tempnode->next); + tempnode->next = NULL; + } + + return target; } /* ---------------------------------------------------------------- @@ -192,12 +192,12 @@ nbrInsert(nbr_node target, nbr_node newnode, int maxsize) { */ void freeNbrList(nbr_node head) { - nbr_node temp; - while (head) { - temp = head->next; - pfree(head); - head = temp; - } + nbr_node temp; + while (head) { + temp = head->next; + pfree(head); + head = temp; + } } /* ---------------------------------------------------------------- @@ -212,47 +212,47 @@ freeNbrList(nbr_node head) { */ QueryDesc * recathon_queryStart(char *query_string, MemoryContext *recathoncontext) { - List *parsetree_list, *querytree_list, *plantree_list; - Node *parsetree; - QueryDesc *queryDesc; - MemoryContext newcontext, oldcontext; - - // First we'll create a new memory context to operate in. - newcontext = AllocSetContextCreate(CurrentMemoryContext, - "RecathonQuery", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - oldcontext = MemoryContextSwitchTo(newcontext); - - // Now we parse the query and get a parse tree. - parsetree_list = pg_parse_query(query_string); - - // There should be only one item in the parse tree. - parsetree = lfirst(parsetree_list->head); - - // Now we generate plan trees. - querytree_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0); - plantree_list = pg_plan_queries(querytree_list, 0, NULL); - - // Now we need to update the current snapshot. - PushCopiedSnapshot(GetActiveSnapshot()); - UpdateActiveSnapshotCommandId(); - - // We need to do the ExecProcNode stage of the query, which means that we - // need an intact planstate. The following code just creates this state. - queryDesc = CreateQueryDesc((PlannedStmt*) linitial(plantree_list), - query_string, - GetActiveSnapshot(), - InvalidSnapshot, - None_Receiver, NULL, 0); - ExecutorStart(queryDesc, 0); - - // Return the newly created memory context. - MemoryContextSwitchTo(oldcontext); - (*recathoncontext) = newcontext; - - return queryDesc; + List *parsetree_list, *querytree_list, *plantree_list; + Node *parsetree; + QueryDesc *queryDesc; + MemoryContext newcontext, oldcontext; + + // First we'll create a new memory context to operate in. + newcontext = AllocSetContextCreate(CurrentMemoryContext, + "RecathonQuery", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldcontext = MemoryContextSwitchTo(newcontext); + + // Now we parse the query and get a parse tree. + parsetree_list = pg_parse_query(query_string); + + // There should be only one item in the parse tree. + parsetree = lfirst(parsetree_list->head); + + // Now we generate plan trees. + querytree_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0); + plantree_list = pg_plan_queries(querytree_list, 0, NULL); + + // Now we need to update the current snapshot. + PushCopiedSnapshot(GetActiveSnapshot()); + UpdateActiveSnapshotCommandId(); + + // We need to do the ExecProcNode stage of the query, which means that we + // need an intact planstate. The following code just creates this state. + queryDesc = CreateQueryDesc((PlannedStmt*) linitial(plantree_list), + query_string, + GetActiveSnapshot(), + InvalidSnapshot, + None_Receiver, NULL, 0); + ExecutorStart(queryDesc, 0); + + // Return the newly created memory context. + MemoryContextSwitchTo(oldcontext); + (*recathoncontext) = newcontext; + + return queryDesc; } /* ---------------------------------------------------------------- @@ -263,21 +263,21 @@ recathon_queryStart(char *query_string, MemoryContext *recathoncontext) { */ void recathon_queryEnd(QueryDesc *queryDesc, MemoryContext recathoncontext) { - MemoryContext oldcontext; - - oldcontext = MemoryContextSwitchTo(recathoncontext); - - // End the query. - ExecutorFinish(queryDesc); - ExecutorEnd(queryDesc); - FreeQueryDesc(queryDesc); - - // Pop our snapshot. - PopActiveSnapshot(); - - // Delete our memory context. - MemoryContextSwitchTo(oldcontext); - MemoryContextDelete(recathoncontext); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(recathoncontext); + + // End the query. + ExecutorFinish(queryDesc); + ExecutorEnd(queryDesc); + FreeQueryDesc(queryDesc); + + // Pop our snapshot. + PopActiveSnapshot(); + + // Delete our memory context. + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(recathoncontext); } /* ---------------------------------------------------------------- @@ -288,17 +288,17 @@ recathon_queryEnd(QueryDesc *queryDesc, MemoryContext recathoncontext) { */ void recathon_queryExecute(char *query_string) { - QueryDesc *queryDesc; - MemoryContext recathoncontext, oldcontext; - - // We do the query start and end, and sandwich ExecutorRun in the middle. - queryDesc = recathon_queryStart(query_string, &recathoncontext); - - oldcontext = MemoryContextSwitchTo(recathoncontext); - ExecutorRun(queryDesc, ForwardScanDirection, 0); - MemoryContextSwitchTo(oldcontext); - - recathon_queryEnd(queryDesc, recathoncontext); + QueryDesc *queryDesc; + MemoryContext recathoncontext, oldcontext; + + // We do the query start and end, and sandwich ExecutorRun in the middle. + queryDesc = recathon_queryStart(query_string, &recathoncontext); + + oldcontext = MemoryContextSwitchTo(recathoncontext); + ExecutorRun(queryDesc, ForwardScanDirection, 0); + MemoryContextSwitchTo(oldcontext); + + recathon_queryEnd(queryDesc, recathoncontext); } /* ---------------------------------------------------------------- @@ -311,38 +311,38 @@ recathon_queryExecute(char *query_string) { */ void recathon_utilityExecute(char *query_string) { - List *parsetree_list, *querytree_list, *plantree_list; - MemoryContext recathoncontext, oldcontext; - Node *parsetree, *utilStmt; - - // We do this inside another memory context - // so we can rid ourselves of this memory easily. - recathoncontext = AllocSetContextCreate(CurrentMemoryContext, - "RecathonExecute", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - oldcontext = MemoryContextSwitchTo(recathoncontext); - - // Now we parse the query and get a parse tree. - parsetree_list = pg_parse_query(query_string); - - // There should be only one item in the parse tree. - parsetree = lfirst(parsetree_list->head); - - // Now we generate plan trees. - querytree_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0); - plantree_list = pg_plan_queries(querytree_list, 0, NULL); - - // Isolate the statement. - utilStmt = (Node*) lfirst(list_head(plantree_list)); - - // Execute the query. - ProcessUtility(utilStmt, query_string, NULL, true, None_Receiver, NULL); - - // Nothing left to do. - MemoryContextSwitchTo(oldcontext); - MemoryContextDelete(recathoncontext); + List *parsetree_list, *querytree_list, *plantree_list; + MemoryContext recathoncontext, oldcontext; + Node *parsetree, *utilStmt; + + // We do this inside another memory context + // so we can rid ourselves of this memory easily. + recathoncontext = AllocSetContextCreate(CurrentMemoryContext, + "RecathonExecute", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldcontext = MemoryContextSwitchTo(recathoncontext); + + // Now we parse the query and get a parse tree. + parsetree_list = pg_parse_query(query_string); + + // There should be only one item in the parse tree. + parsetree = lfirst(parsetree_list->head); + + // Now we generate plan trees. + querytree_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0); + plantree_list = pg_plan_queries(querytree_list, 0, NULL); + + // Isolate the statement. + utilStmt = (Node*) lfirst(list_head(plantree_list)); + + // Execute the query. + ProcessUtility(utilStmt, query_string, NULL, true, None_Receiver, NULL); + + // Nothing left to do. + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(recathoncontext); } /* ---------------------------------------------------------------- @@ -355,16 +355,16 @@ recathon_utilityExecute(char *query_string) { */ RecScan* make_rec_from_scan(Scan *subscan, Node *recommender) { - RecScan *recscan; - - recscan = (RecScan*) makeNode(RecScan); - recscan->scan.plan = subscan->plan; - recscan->scan.scanrelid = subscan->scanrelid; - recscan->scan.plan.type = T_RecScan; - recscan->subscan = subscan; - recscan->recommender = recommender; - - return recscan; + RecScan *recscan; + + recscan = (RecScan*) makeNode(RecScan); + recscan->scan.plan = subscan->plan; + recscan->scan.scanrelid = subscan->scanrelid; + recscan->scan.plan.type = T_RecScan; + recscan->subscan = subscan; + recscan->recommender = recommender; + + return recscan; } /* ---------------------------------------------------------------- @@ -377,16 +377,16 @@ make_rec_from_scan(Scan *subscan, Node *recommender) { */ RecJoin* make_rec_from_join(Join *subjoin) { - RecJoin *recjoin; - - recjoin = (RecJoin*) makeNode(RecJoin); - recjoin->join.plan = subjoin->plan; - recjoin->join.jointype = subjoin->jointype; - recjoin->join.joinqual = subjoin->joinqual; - recjoin->join.plan.type = T_RecJoin; - recjoin->subjoin = subjoin; - - return recjoin; + RecJoin *recjoin; + + recjoin = (RecJoin*) makeNode(RecJoin); + recjoin->join.plan = subjoin->plan; + recjoin->join.jointype = subjoin->jointype; + recjoin->join.joinqual = subjoin->joinqual; + recjoin->join.plan.type = T_RecJoin; + recjoin->subjoin = subjoin; + + return recjoin; } /* ---------------------------------------------------------------- @@ -398,62 +398,62 @@ make_rec_from_join(Join *subjoin) { */ int count_rows(char *tablename) { - int i, numItems, natts; - // Objects for querying. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // We start with a simple query to get the number of items. - querystring = (char*) palloc(256*sizeof(char)); - sprintf(querystring,"SELECT COUNT(*) FROM %s;",tablename); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) { - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - return -1; - } - slot_getallattrs(slot); - natts = slot->tts_tupleDescriptor->natts; - - // Silence the compiler. - numItems = 0; - - for (i = 0; i < natts; i++) { - if (!slot->tts_isnull[i]) { - Datum slot_result; - unsigned int data_type; - - slot_result = slot->tts_values[i]; - data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; - - switch (data_type) { - case INT8OID: - numItems = (int) DatumGetInt64(slot_result); - break; - case INT2OID: - numItems = (int) DatumGetInt16(slot_result); - break; - case INT4OID: - numItems = (int) DatumGetInt32(slot_result); - break; - default: - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("fatal error in count_rows()"))); - } - } - } - - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - return numItems; + int i, numItems, natts; + // Objects for querying. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // We start with a simple query to get the number of items. + querystring = (char*) palloc(256*sizeof(char)); + sprintf(querystring,"SELECT COUNT(*) FROM %s;",tablename); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) { + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + return -1; + } + slot_getallattrs(slot); + natts = slot->tts_tupleDescriptor->natts; + + // Silence the compiler. + numItems = 0; + + for (i = 0; i < natts; i++) { + if (!slot->tts_isnull[i]) { + Datum slot_result; + unsigned int data_type; + + slot_result = slot->tts_values[i]; + data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; + + switch (data_type) { + case INT8OID: + numItems = (int) DatumGetInt64(slot_result); + break; + case INT2OID: + numItems = (int) DatumGetInt16(slot_result); + break; + case INT4OID: + numItems = (int) DatumGetInt32(slot_result); + break; + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("fatal error in count_rows()"))); + } + } + } + + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + return numItems; } /* ---------------------------------------------------------------- @@ -465,44 +465,44 @@ count_rows(char *tablename) { */ int getTupleInt(TupleTableSlot *slot, char *attname) { - int i, natts; - - slot_getallattrs(slot); - natts = slot->tts_tupleDescriptor->natts; - - for (i = 0; i < natts; i++) { - if (!slot->tts_isnull[i]) { - char *col_name; - Datum slot_result; - - col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; - slot_result = slot->tts_values[i]; - - if (strcmp(col_name, attname) == 0) { - unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; - // The data type will tell us what to do with it. - switch (data_type) { - case INT8OID: - return (int) DatumGetInt64(slot_result); - case INT2OID: - return (int) DatumGetInt16(slot_result); - case INT4OID: - return (int) DatumGetInt32(slot_result); - case FLOAT4OID: - return (int) DatumGetFloat4(slot_result); - case FLOAT8OID: - return (int) DatumGetFloat8(slot_result); - default: - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("type mismatch in getTupleInt()"))); - break; - } - } - } - } - - return -1; + int i, natts; + + slot_getallattrs(slot); + natts = slot->tts_tupleDescriptor->natts; + + for (i = 0; i < natts; i++) { + if (!slot->tts_isnull[i]) { + char *col_name; + Datum slot_result; + + col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; + slot_result = slot->tts_values[i]; + + if (strcmp(col_name, attname) == 0) { + unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; + // The data type will tell us what to do with it. + switch (data_type) { + case INT8OID: + return (int) DatumGetInt64(slot_result); + case INT2OID: + return (int) DatumGetInt16(slot_result); + case INT4OID: + return (int) DatumGetInt32(slot_result); + case FLOAT4OID: + return (int) DatumGetFloat4(slot_result); + case FLOAT8OID: + return (int) DatumGetFloat8(slot_result); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type mismatch in getTupleInt()"))); + break; + } + } + } + } + + return -1; } /* ---------------------------------------------------------------- @@ -514,44 +514,44 @@ getTupleInt(TupleTableSlot *slot, char *attname) { */ float getTupleFloat(TupleTableSlot *slot, char *attname) { - int i, natts; - - slot_getallattrs(slot); - natts = slot->tts_tupleDescriptor->natts; - - for (i = 0; i < natts; i++) { - if (!slot->tts_isnull[i]) { - char *col_name; - Datum slot_result; - - col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; - slot_result = slot->tts_values[i]; - - if (strcmp(col_name, attname) == 0) { - unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; - // The data type will tell us what to do with it. - switch (data_type) { - case FLOAT8OID: - return (float) DatumGetFloat8(slot_result); - case FLOAT4OID: - return (float) DatumGetFloat4(slot_result); - case INT8OID: - return (float) DatumGetInt64(slot_result); - case INT2OID: - return (float) DatumGetInt16(slot_result); - case INT4OID: - return (float) DatumGetInt32(slot_result); - default: - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("type mismatch in getTupleFloat()"))); - break; - } - } - } - } - - return -1.0; + int i, natts; + + slot_getallattrs(slot); + natts = slot->tts_tupleDescriptor->natts; + + for (i = 0; i < natts; i++) { + if (!slot->tts_isnull[i]) { + char *col_name; + Datum slot_result; + + col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; + slot_result = slot->tts_values[i]; + + if (strcmp(col_name, attname) == 0) { + unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; + // The data type will tell us what to do with it. + switch (data_type) { + case FLOAT8OID: + return (float) DatumGetFloat8(slot_result); + case FLOAT4OID: + return (float) DatumGetFloat4(slot_result); + case INT8OID: + return (float) DatumGetInt64(slot_result); + case INT2OID: + return (float) DatumGetInt16(slot_result); + case INT4OID: + return (float) DatumGetInt32(slot_result); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type mismatch in getTupleFloat()"))); + break; + } + } + } + } + + return -1.0; } /* ---------------------------------------------------------------- @@ -565,77 +565,77 @@ getTupleFloat(TupleTableSlot *slot, char *attname) { */ char* getTupleString(TupleTableSlot *slot, char *attname) { - int i, natts; - // Possible return cases. - int string_int; - float string_float; - bool string_bool; - char *rtn_string; - - slot_getallattrs(slot); - natts = slot->tts_tupleDescriptor->natts; - - for (i = 0; i < natts; i++) { - if (!slot->tts_isnull[i]) { - char *col_name; - Datum slot_result; - - col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; - slot_result = slot->tts_values[i]; - - if (strcmp(col_name, attname) == 0) { - unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; - - // The data type will tell us what to do with it. - switch(data_type) { - case INT2OID: - string_int = (int) DatumGetInt16(slot_result); - rtn_string = (char*) palloc(32*sizeof(char)); - sprintf(rtn_string,"%d",string_int); - return rtn_string; - case INT4OID: - string_int = (int) DatumGetInt32(slot_result); - rtn_string = (char*) palloc(32*sizeof(char)); - sprintf(rtn_string,"%d",string_int); - return rtn_string; - case INT8OID: - string_int = (int) DatumGetInt64(slot_result); - rtn_string = (char*) palloc(32*sizeof(char)); - sprintf(rtn_string,"%d",string_int); - return rtn_string; - case FLOAT4OID: - string_float = (float) DatumGetFloat4(slot_result); - rtn_string = (char*) palloc(128*sizeof(char)); - snprintf(rtn_string,128,"%f",string_float); - return rtn_string; - case FLOAT8OID: - string_float = (float) DatumGetFloat8(slot_result); - rtn_string = (char*) palloc(128*sizeof(char)); - snprintf(rtn_string,128,"%f",string_float); - return rtn_string; - case BOOLOID: - string_bool = DatumGetBool(slot_result); - rtn_string = (char*) palloc(8*sizeof(char)); - if (string_bool) - sprintf(rtn_string,"true"); - else - sprintf(rtn_string,"false"); - return rtn_string; - case VARCHAROID: - case TEXTOID: - case BPCHAROID: - case BYTEAOID: - return TextDatumGetCString(slot_result); - default: - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("unsupported type in getTupleString()"))); - } - } - } - } - - return NULL; + int i, natts; + // Possible return cases. + int string_int; + float string_float; + bool string_bool; + char *rtn_string; + + slot_getallattrs(slot); + natts = slot->tts_tupleDescriptor->natts; + + for (i = 0; i < natts; i++) { + if (!slot->tts_isnull[i]) { + char *col_name; + Datum slot_result; + + col_name = slot->tts_tupleDescriptor->attrs[i]->attname.data; + slot_result = slot->tts_values[i]; + + if (strcmp(col_name, attname) == 0) { + unsigned int data_type = slot->tts_tupleDescriptor->attrs[i]->atttypid; + + // The data type will tell us what to do with it. + switch(data_type) { + case INT2OID: + string_int = (int) DatumGetInt16(slot_result); + rtn_string = (char*) palloc(32*sizeof(char)); + sprintf(rtn_string,"%d",string_int); + return rtn_string; + case INT4OID: + string_int = (int) DatumGetInt32(slot_result); + rtn_string = (char*) palloc(32*sizeof(char)); + sprintf(rtn_string,"%d",string_int); + return rtn_string; + case INT8OID: + string_int = (int) DatumGetInt64(slot_result); + rtn_string = (char*) palloc(32*sizeof(char)); + sprintf(rtn_string,"%d",string_int); + return rtn_string; + case FLOAT4OID: + string_float = (float) DatumGetFloat4(slot_result); + rtn_string = (char*) palloc(128*sizeof(char)); + snprintf(rtn_string,128,"%f",string_float); + return rtn_string; + case FLOAT8OID: + string_float = (float) DatumGetFloat8(slot_result); + rtn_string = (char*) palloc(128*sizeof(char)); + snprintf(rtn_string,128,"%f",string_float); + return rtn_string; + case BOOLOID: + string_bool = DatumGetBool(slot_result); + rtn_string = (char*) palloc(8*sizeof(char)); + if (string_bool) + sprintf(rtn_string,"true"); + else + sprintf(rtn_string,"false"); + return rtn_string; + case VARCHAROID: + case TEXTOID: + case BPCHAROID: + case BYTEAOID: + return TextDatumGetCString(slot_result); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported type in getTupleString()"))); + } + } + } + } + + return NULL; } /* ---------------------------------------------------------------- @@ -647,9 +647,9 @@ getTupleString(TupleTableSlot *slot, char *attname) { */ bool relationExists(RangeVar* relation) { - Oid testOid; - testOid = RangeVarGetRelid(relation,0,true); - return OidIsValid(testOid); + Oid testOid; + testOid = RangeVarGetRelid(relation,0,true); + return OidIsValid(testOid); } /* ---------------------------------------------------------------- @@ -661,38 +661,38 @@ relationExists(RangeVar* relation) { */ bool columnExistsInRelation(char *colname, RangeVar *relation) { - Oid relOid; - Relation newRel; - RangeTblEntry *rte; - ListCell *c; - bool foundColumn; - - // Step 1: build a proper RTE to use. - relOid = RangeVarGetRelid(relation,0,true); - // Double-check to make sure the table exists. - if (!OidIsValid(relOid)) return false; - newRel = relation_open(relOid,NoLock); - rte = addRangeTableEntryForRelation(NULL,newRel,NULL,false,false); - - // Step 2: cross-reference the relation columns and - // our provided column name. - foundColumn = false; - foreach(c, rte->eref->colnames) - { - if (strcmp(strVal(lfirst(c)), colname) == 0) - { - if (foundColumn) { - perror("Ambiguous column request.\n"); - return false; - } else { - foundColumn = true; - } - } - } - // Close the relation to avoid leaks. - relation_close(newRel,NoLock); - pfree(rte); - return foundColumn; + Oid relOid; + Relation newRel; + RangeTblEntry *rte; + ListCell *c; + bool foundColumn; + + // Step 1: build a proper RTE to use. + relOid = RangeVarGetRelid(relation,0,true); + // Double-check to make sure the table exists. + if (!OidIsValid(relOid)) return false; + newRel = relation_open(relOid,NoLock); + rte = addRangeTableEntryForRelation(NULL,newRel,NULL,false,false); + + // Step 2: cross-reference the relation columns and + // our provided column name. + foundColumn = false; + foreach(c, rte->eref->colnames) + { + if (strcmp(strVal(lfirst(c)), colname) == 0) + { + if (foundColumn) { + perror("Ambiguous column request.\n"); + return false; + } else { + foundColumn = true; + } + } + } + // Close the relation to avoid leaks. + relation_close(newRel,NoLock); + pfree(rte); + return foundColumn; } /* ---------------------------------------------------------------- @@ -705,45 +705,45 @@ columnExistsInRelation(char *colname, RangeVar *relation) { */ char* retrieveRecommender(char *eventtable, char *method) { - RangeVar *cataloguerv; - char *querystring, *recindexname; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // If this fails, there's no RecModelsCatalogue, so - // there are no recommenders. - cataloguerv = makeRangeVar(NULL,"recmodelscatalogue",0); - if (!relationExists(cataloguerv)) { - pfree(cataloguerv); - return NULL; - } - pfree(cataloguerv); - - // If the catalogue does exist, we'll query it looking - // for recommenders based on the given information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT recommenderindexname FROM RecModelsCatalogue WHERE eventtable = '%s' AND method = '%s';", - eventtable, method); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - // If there are no results, the recommender does not exist. - if (TupIsNull(slot)) { - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - return NULL; - } - - recindexname = getTupleString(slot,"recommenderindexname"); - - // Query cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - return recindexname; + RangeVar *cataloguerv; + char *querystring, *recindexname; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // If this fails, there's no RecModelsCatalogue, so + // there are no recommenders. + cataloguerv = makeRangeVar(NULL,"recmodelscatalogue",0); + if (!relationExists(cataloguerv)) { + pfree(cataloguerv); + return NULL; + } + pfree(cataloguerv); + + // If the catalogue does exist, we'll query it looking + // for recommenders based on the given information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT recommenderindexname FROM RecModelsCatalogue WHERE eventtable = '%s' AND method = '%s';", + eventtable, method); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + // If there are no results, the recommender does not exist. + if (TupIsNull(slot)) { + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + return NULL; + } + + recindexname = getTupleString(slot,"recommenderindexname"); + + // Query cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + return recindexname; } /* ---------------------------------------------------------------- @@ -758,56 +758,56 @@ retrieveRecommender(char *eventtable, char *method) { */ void getRecInfo(char *recindexname, char **ret_eventtable, - char **ret_userkey, char **ret_itemkey, - char **ret_eventval, char **ret_method, int *ret_numatts) { - char *eventtable, *userkey, *itemkey, *eventval, *method; - // Information for query. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT * FROM RecModelsCatalogue WHERE recommenderindexname = '%s';", - recindexname); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - // This should never happen. - if (TupIsNull(slot)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("fatal error in getRecInfo()"))); - - // Obtain each of the values needed. - if (ret_eventtable) { - eventtable = getTupleString(slot,"eventtable"); - (*ret_eventtable) = eventtable; - } - if (ret_userkey) { - userkey = getTupleString(slot,"userkey"); - (*ret_userkey) = userkey; - } - if (ret_itemkey) { - itemkey = getTupleString(slot,"itemkey"); - (*ret_itemkey) = itemkey; - } - if (ret_eventval) { - eventval = getTupleString(slot,"eventval"); - (*ret_eventval) = eventval; - } - if (ret_method) { - method = getTupleString(slot,"method"); - (*ret_method) = method; - } - if (ret_numatts) - (*ret_numatts) = getTupleInt(slot,"contextattributes"); - - // Cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); + char **ret_userkey, char **ret_itemkey, + char **ret_eventval, char **ret_method, int *ret_numatts) { + char *eventtable, *userkey, *itemkey, *eventval, *method; + // Information for query. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT * FROM RecModelsCatalogue WHERE recommenderindexname = '%s';", + recindexname); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + // This should never happen. + if (TupIsNull(slot)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("fatal error in getRecInfo()"))); + + // Obtain each of the values needed. + if (ret_eventtable) { + eventtable = getTupleString(slot,"eventtable"); + (*ret_eventtable) = eventtable; + } + if (ret_userkey) { + userkey = getTupleString(slot,"userkey"); + (*ret_userkey) = userkey; + } + if (ret_itemkey) { + itemkey = getTupleString(slot,"itemkey"); + (*ret_itemkey) = itemkey; + } + if (ret_eventval) { + eventval = getTupleString(slot,"eventval"); + (*ret_eventval) = eventval; + } + if (ret_method) { + method = getTupleString(slot,"method"); + (*ret_method) = method; + } + if (ret_numatts) + (*ret_numatts) = getTupleInt(slot,"contextattributes"); + + // Cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); } /* ---------------------------------------------------------------- @@ -820,64 +820,64 @@ getRecInfo(char *recindexname, char **ret_eventtable, */ recMethod validateCreateRStmt(CreateRStmt *recStmt) { - recMethod method; - - // Our first test is to make sure the ratings table exists. - if (!relationExists(recStmt->eventtable)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("relation \"%s\" does not exist", - recStmt->eventtable->relname))); - - // Our second test is to see whether or not a recommender has already - // been created with the given events table and method, or name. - if (relationExists(recStmt->recname)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("a recommender with name \"%s\" already exists", - recStmt->recname->relname))); - - if (retrieveRecommender(recStmt->eventtable->relname,recStmt->method) != NULL) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("recommender on table \"%s\" using method \"%s\" already exists", - recStmt->eventtable->relname,recStmt->method))); - - // We next need to test that the provided columns - // exist in the events table. - // Test: user key is in event table. - if (!columnExistsInRelation(recStmt->userkey,recStmt->eventtable)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column \"%s\" does not exist in relation \"%s\"", - recStmt->userkey,recStmt->eventtable->relname))); - // Test: item key is in event table. - if (!columnExistsInRelation(recStmt->itemkey,recStmt->eventtable)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column \"%s\" does not exist in relation \"%s\"", - recStmt->itemkey,recStmt->eventtable->relname))); - // Test: event value is in event table. - if (!columnExistsInRelation(recStmt->eventval,recStmt->eventtable)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column \"%s\" does not exist in relation \"%s\"", - recStmt->eventval,recStmt->eventtable->relname))); - - // Now we convert our method name. - method = itemCosCF; - // To handle the case where no USING clause was provided. - if (recStmt->method) { - method = getRecMethod(recStmt->method); - if (method < 0) - ereport(ERROR, - (errcode(ERRCODE_CASE_NOT_FOUND), - errmsg("recommendation method %s not recognized", - recStmt->method))); - } - - // And return. - return method; + recMethod method; + + // Our first test is to make sure the ratings table exists. + if (!relationExists(recStmt->eventtable)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("relation \"%s\" does not exist", + recStmt->eventtable->relname))); + + // Our second test is to see whether or not a recommender has already + // been created with the given events table and method, or name. + if (relationExists(recStmt->recname)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("a recommender with name \"%s\" already exists", + recStmt->recname->relname))); + + if (retrieveRecommender(recStmt->eventtable->relname,recStmt->method) != NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("recommender on table \"%s\" using method \"%s\" already exists", + recStmt->eventtable->relname,recStmt->method))); + + // We next need to test that the provided columns + // exist in the events table. + // Test: user key is in event table. + if (!columnExistsInRelation(recStmt->userkey,recStmt->eventtable)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist in relation \"%s\"", + recStmt->userkey,recStmt->eventtable->relname))); + // Test: item key is in event table. + if (!columnExistsInRelation(recStmt->itemkey,recStmt->eventtable)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist in relation \"%s\"", + recStmt->itemkey,recStmt->eventtable->relname))); + // Test: event value is in event table. + if (!columnExistsInRelation(recStmt->eventval,recStmt->eventtable)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist in relation \"%s\"", + recStmt->eventval,recStmt->eventtable->relname))); + + // Now we convert our method name. + method = itemCosCF; + // To handle the case where no USING clause was provided. + if (recStmt->method) { + method = getRecMethod(recStmt->method); + if (method < 0) + ereport(ERROR, + (errcode(ERRCODE_CASE_NOT_FOUND), + errmsg("recommendation method %s not recognized", + recStmt->method))); + } + + // And return. + return method; } /* ---------------------------------------------------------------- @@ -888,20 +888,20 @@ validateCreateRStmt(CreateRStmt *recStmt) { */ recMethod getRecMethod(char *method) { - if (!method) return -1; - - if (strcmp("itemcoscf",method) == 0) - return itemCosCF; - else if (strcmp("itempearcf",method) == 0) - return itemPearCF; - else if (strcmp("usercoscf",method) == 0) - return userCosCF; - else if (strcmp("userpearcf",method) == 0) - return userPearCF; - else if (strcmp("svd",method) == 0) - return SVD; - else - return -1; + if (!method) return -1; + + if (strcmp("itemcoscf",method) == 0) + return itemCosCF; + else if (strcmp("itempearcf",method) == 0) + return itemPearCF; + else if (strcmp("usercoscf",method) == 0) + return userCosCF; + else if (strcmp("userpearcf",method) == 0) + return userPearCF; + else if (strcmp("svd",method) == 0) + return SVD; + else + return -1; } /* ---------------------------------------------------------------- @@ -912,40 +912,40 @@ getRecMethod(char *method) { */ static float getUpdateThreshold() { - float threshold = -1; - RangeVar *testrv; - // Query information. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - testrv = makeRangeVar(NULL,"recdbproperties",0); - if (!relationExists(testrv)) { - pfree(testrv); - return -1; - } - pfree(testrv); - - querystring = (char*) palloc(128*sizeof(char)); - sprintf(querystring,"SELECT update_threshold FROM recdbproperties;"); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) { - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - return -1; - } - - threshold = getTupleFloat(slot,"update_threshold"); - - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - return threshold; + float threshold = -1; + RangeVar *testrv; + // Query information. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + testrv = makeRangeVar(NULL,"recdbproperties",0); + if (!relationExists(testrv)) { + pfree(testrv); + return -1; + } + pfree(testrv); + + querystring = (char*) palloc(128*sizeof(char)); + sprintf(querystring,"SELECT update_threshold FROM recdbproperties;"); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) { + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + return -1; + } + + threshold = getTupleFloat(slot,"update_threshold"); + + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + return threshold; } /* ---------------------------------------------------------------- @@ -959,247 +959,247 @@ getUpdateThreshold() { */ void updateCellCounter(char *eventtable, TupleTableSlot *insertslot) { - float update_threshold; - RangeVar *cataloguerv; - // Query information. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // If this fails, there's no RecModelsCatalogue, so - // there are no recommenders. - cataloguerv = makeRangeVar(NULL,"recmodelscatalogue",0); - if (!relationExists(cataloguerv)) { - pfree(cataloguerv); - return; - } - pfree(cataloguerv); - - // Obtain the update threshold. - update_threshold = getUpdateThreshold(); - - // Now that we've confirmed the RecModelsCatalogue - // exists, let's query it to find the necessary - // information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT * FROM RecModelsCatalogue WHERE eventtable = '%s';", - eventtable); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - // In case of SVD, recmodelname is the user model, and the other is the - // item model. Otherwise, recmodelname2 is nothing. - char *recindexname, *recmodelname, *recmodelname2; - char *userkey, *itemkey, *eventval, *strmethod; - int updatecounter = -1; - int eventtotal = -1; - recMethod method; - // Query information for our internal query. - char *countquerystring; - QueryDesc *countqueryDesc; - PlanState *countplanstate; - TupleTableSlot *countslot; - MemoryContext countcontext; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - // Acquire the data for this recommender. - recindexname = getTupleString(slot,"recommenderindexname"); - userkey = getTupleString(slot,"userkey"); - itemkey = getTupleString(slot,"itemkey"); - eventval = getTupleString(slot,"eventval"); - strmethod = getTupleString(slot,"method"); - - // Get the recMethod. - method = getRecMethod(strmethod); - pfree(strmethod); - - // Failure case, continue to next tuple. - if (method < 0) { - pfree(recindexname); - pfree(userkey); - pfree(itemkey); - pfree(eventval); - continue; - } - - // We now have all the information necessary to update this - // recommender's cell counter. First we need to acquire it, - // and we might as well get the model name while we're at it. - countquerystring = (char*) palloc(1024*sizeof(char)); - if (method == SVD) - sprintf(countquerystring,"SELECT recusermodelname, recitemmodelname, updatecounter, eventtotal FROM %s;", - recindexname); - else - sprintf(countquerystring,"SELECT recmodelname, updatecounter, eventtotal FROM %s;", - recindexname); - - countqueryDesc = recathon_queryStart(countquerystring,&countcontext); - countplanstate = countqueryDesc->planstate; - - // Go through what should be the only tuple and obtain the data. - countslot = ExecProcNode(countplanstate); - if (TupIsNull(countslot)) { - // More failure conditions. We can't just error out - // because the INSERT still needs to happen. - recathon_queryEnd(countqueryDesc,countcontext); - pfree(countquerystring); - pfree(recindexname); - pfree(userkey); - pfree(itemkey); - pfree(eventval); - continue; - } - - // Get the relevant data. - if (method == SVD) { - recmodelname = getTupleString(countslot,"recusermodelname"); - recmodelname2 = getTupleString(countslot,"recitemmodelname"); - } else { - recmodelname = getTupleString(countslot,"recmodelname"); - recmodelname2 = NULL; - } - updatecounter = getTupleInt(countslot,"updatecounter"); - eventtotal = getTupleInt(countslot,"eventtotal"); - - recathon_queryEnd(countqueryDesc,countcontext); - pfree(countquerystring); - - // Even more failure conditions. - if (updatecounter < 0) { - pfree(recmodelname); - if (recmodelname2) - pfree(recmodelname2); - pfree(recindexname); - pfree(userkey); - pfree(itemkey); - continue; - } - - // With that done, we check the original counter. If the - // number of new events is greater than threshold * the - // number of events currently used in the model, we need - // to trigger an update. Otherwise, just increment. - updatecounter++; - - if (updatecounter >= (int) (update_threshold * eventtotal)) { - int numEvents = 0; - - // What we do depends on the recommendation method. - switch (method) { - case itemCosCF: - { - // Before we update the similarity model, we need to obtain - // a few item-related things. - int numItems; - int *IDs; - float *lengths; - - lengths = vector_lengths(itemkey, eventtable, eventval, - &numItems, &IDs); - - // Now update the similarity model. - numEvents = updateItemCosModel(eventtable, userkey, - itemkey, eventval, recmodelname, - IDs, lengths, numItems, true); - } - break; - case itemPearCF: - { - // Before we update the similarity model, we need to obtain - // a few item-related things. - int numItems; - int *IDs; - float *avgs, *pearsons; - - pearson_info(itemkey, eventtable, eventval, &numItems, - &IDs, &avgs, &pearsons); - - // Now update the similarity model. - numEvents = updateItemPearModel(eventtable, userkey, - itemkey, eventval, recmodelname, - IDs, avgs, pearsons, numItems, true); - } - break; - case userCosCF: - { - // Before we update the similarity model, we need to obtain - // a few user-related things. - int numUsers; - int *IDs; - float *lengths; - - lengths = vector_lengths(userkey, eventtable, eventval, - &numUsers, &IDs); - - // Now update the similarity model. - numEvents = updateUserCosModel(eventtable, userkey, - itemkey, eventval, recmodelname, - IDs, lengths, numUsers, true); - } - break; - case userPearCF: - { - // Before we update the similarity model, we need to obtain - // a few user-related things. - int numUsers; - int *IDs; - float *avgs, *pearsons; - - pearson_info(userkey, eventtable, eventval, &numUsers, - &IDs, &avgs, &pearsons); - - // Now update the similarity model. - numEvents = updateUserPearModel(eventtable, userkey, - itemkey, eventval, recmodelname, - IDs, avgs, pearsons, numUsers, true); - } - break; - case SVD: - // No additional functions, just update the model. - numEvents = SVDtrain(userkey, itemkey, - eventtable, eventval, - recmodelname, recmodelname2, true); - break; - default: - break; - } - - // Finally, we update the cell to indicate how many events were used - // to build it. We'll also reset the updatecounter. - countquerystring = (char*) palloc(1024*sizeof(char)); - sprintf(countquerystring,"UPDATE %s SET updatecounter = 0, eventtotal = %d;", - recindexname,numEvents); - - // Execute normally, we don't need to see results. - recathon_queryExecute(countquerystring); - pfree(countquerystring); - } else { - // Just increment. - countquerystring = (char*) palloc(1024*sizeof(char)); - sprintf(countquerystring,"UPDATE %s SET updatecounter = updatecounter+1;", - recindexname); - // Execute normally, we don't need to see results. - recathon_queryExecute(countquerystring); - pfree(countquerystring); - } - - // Final cleanup. - pfree(recmodelname); - if (recmodelname2) - pfree(recmodelname2); - pfree(recindexname); - pfree(userkey); - pfree(itemkey); - pfree(eventval); - } - - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); + float update_threshold; + RangeVar *cataloguerv; + // Query information. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // If this fails, there's no RecModelsCatalogue, so + // there are no recommenders. + cataloguerv = makeRangeVar(NULL,"recmodelscatalogue",0); + if (!relationExists(cataloguerv)) { + pfree(cataloguerv); + return; + } + pfree(cataloguerv); + + // Obtain the update threshold. + update_threshold = getUpdateThreshold(); + + // Now that we've confirmed the RecModelsCatalogue + // exists, let's query it to find the necessary + // information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT * FROM RecModelsCatalogue WHERE eventtable = '%s';", + eventtable); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + // In case of SVD, recmodelname is the user model, and the other is the + // item model. Otherwise, recmodelname2 is nothing. + char *recindexname, *recmodelname, *recmodelname2; + char *userkey, *itemkey, *eventval, *strmethod; + int updatecounter = -1; + int eventtotal = -1; + recMethod method; + // Query information for our internal query. + char *countquerystring; + QueryDesc *countqueryDesc; + PlanState *countplanstate; + TupleTableSlot *countslot; + MemoryContext countcontext; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + // Acquire the data for this recommender. + recindexname = getTupleString(slot,"recommenderindexname"); + userkey = getTupleString(slot,"userkey"); + itemkey = getTupleString(slot,"itemkey"); + eventval = getTupleString(slot,"eventval"); + strmethod = getTupleString(slot,"method"); + + // Get the recMethod. + method = getRecMethod(strmethod); + pfree(strmethod); + + // Failure case, continue to next tuple. + if (method < 0) { + pfree(recindexname); + pfree(userkey); + pfree(itemkey); + pfree(eventval); + continue; + } + + // We now have all the information necessary to update this + // recommender's cell counter. First we need to acquire it, + // and we might as well get the model name while we're at it. + countquerystring = (char*) palloc(1024*sizeof(char)); + if (method == SVD) + sprintf(countquerystring,"SELECT recusermodelname, recitemmodelname, updatecounter, eventtotal FROM %s;", + recindexname); + else + sprintf(countquerystring,"SELECT recmodelname, updatecounter, eventtotal FROM %s;", + recindexname); + + countqueryDesc = recathon_queryStart(countquerystring,&countcontext); + countplanstate = countqueryDesc->planstate; + + // Go through what should be the only tuple and obtain the data. + countslot = ExecProcNode(countplanstate); + if (TupIsNull(countslot)) { + // More failure conditions. We can't just error out + // because the INSERT still needs to happen. + recathon_queryEnd(countqueryDesc,countcontext); + pfree(countquerystring); + pfree(recindexname); + pfree(userkey); + pfree(itemkey); + pfree(eventval); + continue; + } + + // Get the relevant data. + if (method == SVD) { + recmodelname = getTupleString(countslot,"recusermodelname"); + recmodelname2 = getTupleString(countslot,"recitemmodelname"); + } else { + recmodelname = getTupleString(countslot,"recmodelname"); + recmodelname2 = NULL; + } + updatecounter = getTupleInt(countslot,"updatecounter"); + eventtotal = getTupleInt(countslot,"eventtotal"); + + recathon_queryEnd(countqueryDesc,countcontext); + pfree(countquerystring); + + // Even more failure conditions. + if (updatecounter < 0) { + pfree(recmodelname); + if (recmodelname2) + pfree(recmodelname2); + pfree(recindexname); + pfree(userkey); + pfree(itemkey); + continue; + } + + // With that done, we check the original counter. If the + // number of new events is greater than threshold * the + // number of events currently used in the model, we need + // to trigger an update. Otherwise, just increment. + updatecounter++; + + if (updatecounter >= (int) (update_threshold * eventtotal)) { + int numEvents = 0; + + // What we do depends on the recommendation method. + switch (method) { + case itemCosCF: + { + // Before we update the similarity model, we need to obtain + // a few item-related things. + int numItems; + int *IDs; + float *lengths; + + lengths = vector_lengths(itemkey, eventtable, eventval, + &numItems, &IDs); + + // Now update the similarity model. + numEvents = updateItemCosModel(eventtable, userkey, + itemkey, eventval, recmodelname, + IDs, lengths, numItems, true); + } + break; + case itemPearCF: + { + // Before we update the similarity model, we need to obtain + // a few item-related things. + int numItems; + int *IDs; + float *avgs, *pearsons; + + pearson_info(itemkey, eventtable, eventval, &numItems, + &IDs, &avgs, &pearsons); + + // Now update the similarity model. + numEvents = updateItemPearModel(eventtable, userkey, + itemkey, eventval, recmodelname, + IDs, avgs, pearsons, numItems, true); + } + break; + case userCosCF: + { + // Before we update the similarity model, we need to obtain + // a few user-related things. + int numUsers; + int *IDs; + float *lengths; + + lengths = vector_lengths(userkey, eventtable, eventval, + &numUsers, &IDs); + + // Now update the similarity model. + numEvents = updateUserCosModel(eventtable, userkey, + itemkey, eventval, recmodelname, + IDs, lengths, numUsers, true); + } + break; + case userPearCF: + { + // Before we update the similarity model, we need to obtain + // a few user-related things. + int numUsers; + int *IDs; + float *avgs, *pearsons; + + pearson_info(userkey, eventtable, eventval, &numUsers, + &IDs, &avgs, &pearsons); + + // Now update the similarity model. + numEvents = updateUserPearModel(eventtable, userkey, + itemkey, eventval, recmodelname, + IDs, avgs, pearsons, numUsers, true); + } + break; + case SVD: + // No additional functions, just update the model. + numEvents = SVDtrain(userkey, itemkey, + eventtable, eventval, + recmodelname, recmodelname2, true); + break; + default: + break; + } + + // Finally, we update the cell to indicate how many events were used + // to build it. We'll also reset the updatecounter. + countquerystring = (char*) palloc(1024*sizeof(char)); + sprintf(countquerystring,"UPDATE %s SET updatecounter = 0, eventtotal = %d;", + recindexname,numEvents); + + // Execute normally, we don't need to see results. + recathon_queryExecute(countquerystring); + pfree(countquerystring); + } else { + // Just increment. + countquerystring = (char*) palloc(1024*sizeof(char)); + sprintf(countquerystring,"UPDATE %s SET updatecounter = updatecounter+1;", + recindexname); + // Execute normally, we don't need to see results. + recathon_queryExecute(countquerystring); + pfree(countquerystring); + } + + // Final cleanup. + pfree(recmodelname); + if (recmodelname2) + pfree(recmodelname2); + pfree(recindexname); + pfree(userkey); + pfree(itemkey); + pfree(eventval); + } + + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); } /* ---------------------------------------------------------------- @@ -1211,17 +1211,17 @@ updateCellCounter(char *eventtable, TupleTableSlot *insertslot) { */ int binarySearch(int *array, int value, int lo, int hi) { - int mid; - - mid = (hi + lo) / 2; - if (array[mid] == value) return mid; - // Edge case. - if (mid == lo) return -1; - // Normal recursive case. - if (array[mid] < value) - return binarySearch(array, value, mid, hi); - else - return binarySearch(array, value, lo, mid); + int mid; + + mid = (hi + lo) / 2; + if (array[mid] == value) return mid; + // Edge case. + if (mid == lo) return -1; + // Normal recursive case. + if (array[mid] < value) + return binarySearch(array, value, mid, hi); + else + return binarySearch(array, value, lo, mid); } /* ---------------------------------------------------------------- @@ -1235,76 +1235,76 @@ binarySearch(int *array, int value, int lo, int hi) { */ float* vector_lengths(char *key, char *eventtable, char *eventval, int *totalNum, int **IDlist) { - int *IDs; - float *lengths; - int i, j, numItems, priorID; - // Objects for querying. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // We start by getting the number of distinct items in the event table. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", - key,eventtable); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - slot = ExecProcNode(planstate); - numItems = getTupleInt(slot,"count"); - recathon_queryEnd(queryDesc,recathoncontext); - - // Now that we have the number of items, we can create an array or two. - IDs = (int*) palloc(numItems*sizeof(int)); - lengths = (float*) palloc(numItems*sizeof(float)); - for (j = 0; j < numItems; j++) - lengths[j] = 0.0; - - // Now we need to populate the two arrays. We'll get all the events from - // the events table. - priorID = -1; - sprintf(querystring,"SELECT * FROM %s ORDER BY %s;",eventtable,key); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - i = -1; - - // This query grabs all item IDs, so we can store them. Later we'll calculate - // vector lengths. - for (;;) { - int currentID = 0; - float currentEvent = 0.0; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - currentID = getTupleInt(slot,key); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (currentID != priorID) { - i++; - priorID = currentID; - IDs[i] = currentID; - } - - currentEvent = getTupleFloat(slot,eventval); - lengths[i] += currentEvent*currentEvent; - } - - // Query cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - // Now that we've totally queried the events table, we need to - // take the square root of each length and we're done. - for (i = 0; i < numItems; i++) - lengths[i] = sqrtf(lengths[i]); - - // Return data. - (*totalNum) = numItems; - (*IDlist) = IDs; - - return lengths; + int *IDs; + float *lengths; + int i, j, numItems, priorID; + // Objects for querying. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // We start by getting the number of distinct items in the event table. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", + key,eventtable); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + slot = ExecProcNode(planstate); + numItems = getTupleInt(slot,"count"); + recathon_queryEnd(queryDesc,recathoncontext); + + // Now that we have the number of items, we can create an array or two. + IDs = (int*) palloc(numItems*sizeof(int)); + lengths = (float*) palloc(numItems*sizeof(float)); + for (j = 0; j < numItems; j++) + lengths[j] = 0.0; + + // Now we need to populate the two arrays. We'll get all the events from + // the events table. + priorID = -1; + sprintf(querystring,"SELECT * FROM %s ORDER BY %s;",eventtable,key); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + i = -1; + + // This query grabs all item IDs, so we can store them. Later we'll calculate + // vector lengths. + for (;;) { + int currentID = 0; + float currentEvent = 0.0; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + currentID = getTupleInt(slot,key); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (currentID != priorID) { + i++; + priorID = currentID; + IDs[i] = currentID; + } + + currentEvent = getTupleFloat(slot,eventval); + lengths[i] += currentEvent*currentEvent; + } + + // Query cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + // Now that we've totally queried the events table, we need to + // take the square root of each length and we're done. + for (i = 0; i < numItems; i++) + lengths[i] = sqrtf(lengths[i]); + + // Return data. + (*totalNum) = numItems; + (*IDlist) = IDs; + + return lengths; } /* ---------------------------------------------------------------- @@ -1318,29 +1318,29 @@ vector_lengths(char *key, char *eventtable, char *eventval, int *totalNum, int * */ float dotProduct(sim_node item1, sim_node item2) { - sim_node temp1, temp2; - float similarity; - - if (item1 == NULL || item2 == NULL) return 0; - - similarity = 0.0; - - // Check every event for the first item, and see how - // many of those users also rated the second item. - temp1 = item1; temp2 = item2; - while (temp1 && temp2) { - if (temp1->id == temp2->id) { - similarity += temp1->event * temp2->event; - temp1 = temp1->next; - temp2 = temp2->next; - } else if (temp1->id > temp2->id) { - temp2 = temp2->next; - } else { - temp1 = temp1->next; - } - } - - return similarity; + sim_node temp1, temp2; + float similarity; + + if (item1 == NULL || item2 == NULL) return 0; + + similarity = 0.0; + + // Check every event for the first item, and see how + // many of those users also rated the second item. + temp1 = item1; temp2 = item2; + while (temp1 && temp2) { + if (temp1->id == temp2->id) { + similarity += temp1->event * temp2->event; + temp1 = temp1->next; + temp2 = temp2->next; + } else if (temp1->id > temp2->id) { + temp2 = temp2->next; + } else { + temp1 = temp1->next; + } + } + + return similarity; } /* ---------------------------------------------------------------- @@ -1352,18 +1352,18 @@ dotProduct(sim_node item1, sim_node item2) { */ float cosineSimilarity(sim_node item1, sim_node item2, float length1, float length2) { - float numerator; - float denominator; - - // Short-circuit check. If one of the items has no events, - // no point checking similarity. This also avoids a possible - // divide-by-zero error. - denominator = length1 * length2; - if (denominator <= 0) return 0; - - numerator = dotProduct(item1,item2); - if (numerator <= 0) return 0; - else return numerator / denominator; + float numerator; + float denominator; + + // Short-circuit check. If one of the items has no events, + // no point checking similarity. This also avoids a possible + // divide-by-zero error. + denominator = length1 * length2; + if (denominator <= 0) return 0; + + numerator = dotProduct(item1,item2); + if (numerator <= 0) return 0; + else return numerator / denominator; } /* ---------------------------------------------------------------- @@ -1377,188 +1377,188 @@ cosineSimilarity(sim_node item1, sim_node item2, float length1, float length2) { */ int updateItemCosModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *itemIDs, float *itemLengths, - int numItems, bool update) { - int i, j, priorID; - int numEvents = 0; - char *querystring, *insertstring, *temprecfile; - sim_node *itemEvents; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - // Information for writing to file. - FILE *fp; - temprecfile = (char*) palloc(256*sizeof(char)); - sprintf(temprecfile,"recathon_temp_%s.dat",modelname); - - // If this is us updating a cell as opposed to building - // a recommender, we need to drop the existing entries. - if (update) { - char *dropstring; - - dropstring = (char*) palloc(256*sizeof(char)); - sprintf(dropstring,"DELETE FROM %s;",modelname); - recathon_queryExecute(dropstring); - pfree(dropstring); - } - - // With the precomputation done, we need to derive the actual item - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); - for (i = 0; i < numItems; i++) - itemEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all item pairs. We need to query the events table - // in order to get the key information. We'll also keep track of the number - // of events used, since we need to store that information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,itemkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (simitem != priorID) { - priorID = simitem; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // itemEvents table; we'll do calculations later. - newnode = createSimNode(simuser, simevent); - itemEvents[i] = simInsert(itemEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - - // We're going to write out the results to file. - if ((fp = fopen(temprecfile,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - insertstring = (char*) palloc(128*sizeof(char)); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first item ALWAYS has a lower value than the second. - for (i = 0; i < numItems; i++) { - float length_i; - sim_node item_i; - nbr_node temp_nbr; - nbr_node nbr_list = NULL; - - item_i = itemEvents[i]; - if (!item_i) continue; - length_i = itemLengths[i]; - - for (j = i+1; j < numItems; j++) { - float length_j; - sim_node item_j; - int item1, item2; - float similarity; - - item_j = itemEvents[j]; - if (!item_j) continue; - length_j = itemLengths[j]; - - similarity = cosineSimilarity(item_i, item_j, length_i, length_j); - if (similarity <= 0) continue; - item1 = itemIDs[i]; - item2 = itemIDs[j]; - - // Now we write. - if (NBRHOOD <= 0) { - sprintf(insertstring,"%d;%d;%f\n", - item1,item2,similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } else { - nbr_node newnbr = createNbrNode(item1,item2,similarity); - nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); - } - } - - // If we have a limited neighborhood, we write the results here. - if (NBRHOOD > 0) { - for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { - sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, - temp_nbr->item2,temp_nbr->similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } - freeNbrList(nbr_list); - } - - CHECK_FOR_INTERRUPTS(); - } - - pfree(insertstring); - fclose(fp); - - // If we are updating an existing similarity model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - sprintf(querystring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - modelname,modelname); - recathon_utilityExecute(querystring); - } - - // With all the data written out, we're going to - // issue a COPY FROM command to bulk load the data - // into the database. - sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", - modelname,temprecfile); - recathon_utilityExecute(querystring); - - // Now we add the primary key constraint. It's - // faster to add it after adding the data than - // having it incrementally updated. - sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (item1, item2)",modelname); - recathon_utilityExecute(querystring); - pfree(querystring); - - // We'll delete the temporary file here, to not hold onto - // memory longer than necessary. - if (unlink(temprecfile) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Free up the lists of sim_nodes and start again. - for (i = 0; i < numItems; i++) { - freeSimList(itemEvents[i]); - itemEvents[i] = NULL; - } - - // Return the number of events we used. - return numEvents; + char *eventval, char *modelname, int *itemIDs, float *itemLengths, + int numItems, bool update) { + int i, j, priorID; + int numEvents = 0; + char *querystring, *insertstring, *temprecfile; + sim_node *itemEvents; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + // Information for writing to file. + FILE *fp; + temprecfile = (char*) palloc(256*sizeof(char)); + sprintf(temprecfile,"recathon_temp_%s.dat",modelname); + + // If this is us updating a cell as opposed to building + // a recommender, we need to drop the existing entries. + if (update) { + char *dropstring; + + dropstring = (char*) palloc(256*sizeof(char)); + sprintf(dropstring,"DELETE FROM %s;",modelname); + recathon_queryExecute(dropstring); + pfree(dropstring); + } + + // With the precomputation done, we need to derive the actual item + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); + for (i = 0; i < numItems; i++) + itemEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all item pairs. We need to query the events table + // in order to get the key information. We'll also keep track of the number + // of events used, since we need to store that information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,itemkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (simitem != priorID) { + priorID = simitem; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // itemEvents table; we'll do calculations later. + newnode = createSimNode(simuser, simevent); + itemEvents[i] = simInsert(itemEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + + // We're going to write out the results to file. + if ((fp = fopen(temprecfile,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + insertstring = (char*) palloc(128*sizeof(char)); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first item ALWAYS has a lower value than the second. + for (i = 0; i < numItems; i++) { + float length_i; + sim_node item_i; + nbr_node temp_nbr; + nbr_node nbr_list = NULL; + + item_i = itemEvents[i]; + if (!item_i) continue; + length_i = itemLengths[i]; + + for (j = i+1; j < numItems; j++) { + float length_j; + sim_node item_j; + int item1, item2; + float similarity; + + item_j = itemEvents[j]; + if (!item_j) continue; + length_j = itemLengths[j]; + + similarity = cosineSimilarity(item_i, item_j, length_i, length_j); + if (similarity <= 0) continue; + item1 = itemIDs[i]; + item2 = itemIDs[j]; + + // Now we write. + if (NBRHOOD <= 0) { + sprintf(insertstring,"%d;%d;%f\n", + item1,item2,similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } else { + nbr_node newnbr = createNbrNode(item1,item2,similarity); + nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); + } + } + + // If we have a limited neighborhood, we write the results here. + if (NBRHOOD > 0) { + for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { + sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, + temp_nbr->item2,temp_nbr->similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } + freeNbrList(nbr_list); + } + + CHECK_FOR_INTERRUPTS(); + } + + pfree(insertstring); + fclose(fp); + + // If we are updating an existing similarity model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + sprintf(querystring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + modelname,modelname); + recathon_utilityExecute(querystring); + } + + // With all the data written out, we're going to + // issue a COPY FROM command to bulk load the data + // into the database. + sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", + modelname,temprecfile); + recathon_utilityExecute(querystring); + + // Now we add the primary key constraint. It's + // faster to add it after adding the data than + // having it incrementally updated. + sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (item1, item2)",modelname); + recathon_utilityExecute(querystring); + pfree(querystring); + + // We'll delete the temporary file here, to not hold onto + // memory longer than necessary. + if (unlink(temprecfile) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Free up the lists of sim_nodes and start again. + for (i = 0; i < numItems; i++) { + freeSimList(itemEvents[i]); + itemEvents[i] = NULL; + } + + // Return the number of events we used. + return numEvents; } /* ---------------------------------------------------------------- @@ -1574,126 +1574,126 @@ updateItemCosModel(char *eventtable, char *userkey, char *itemkey, */ void pearson_info(char *key, char *eventtable, char *eventval, int *totalNum, - int **IDlist, float **avgList, float **pearsonList) { - int *IDs, *counts; - float *avgs, *pearsons; - int i, j, numItems, priorID; - // Objects for querying. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // We start by getting the number of items in the event table. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", - key,eventtable); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - slot = ExecProcNode(planstate); - numItems = getTupleInt(slot,"count"); - recathon_queryEnd(queryDesc,recathoncontext); - - // Now that we have the number of items, we can create an array or two. - IDs = (int*) palloc(numItems*sizeof(int)); - counts = (int*) palloc(numItems*sizeof(int)); - for (j = 0; j < numItems; j++) - counts[j] = 0; - avgs = (float*) palloc(numItems*sizeof(float)); - for (j = 0; j < numItems; j++) - avgs[j] = 0.0; - pearsons = (float*) palloc(numItems*sizeof(float)); - for (j = 0; j < numItems; j++) - pearsons[j] = 0.0; - - // Now we need to populate the four arrays. We'll get all the events from - // the events table. - priorID = -1; - sprintf(querystring,"SELECT * FROM %s ORDER BY %s;",eventtable,key); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - i = -1; - - // This query grabs all item IDs, so we can store them. It also fills in - // some other information we'll need. - for (;;) { - int currentID = 0; - float currentEvent = 0.0; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - currentID = getTupleInt(slot,key); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (currentID != priorID) { - i++; - priorID = currentID; - IDs[i] = currentID; - } - - currentEvent = getTupleFloat(slot,eventval); - counts[i] += 1; - avgs[i] += currentEvent; - } - - // Query cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - - // Now that we've totally queried the events table, we need to - // obtain the actual averages for each item. - for (i = 0; i < numItems; i++) { - if (counts[i] > 0) - avgs[i] /= ((float)counts[i]); - } - pfree(counts); - - // We can reuse the same query to obtain the events again, and - // calculate Pearsons. - priorID = -1; - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - i = -1; - - // We scan through the entire event table once, sorting the events - // based on which item they apply to. - for (;;) { - int currentID = 0; - float currentEvent = 0.0; - float difference = 0.0; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - currentID = getTupleInt(slot,key); - // Are we dealing with a new item ID? If so, switch to the next slot. - if (currentID != priorID) { - priorID = currentID; - i++; - } - currentEvent = getTupleFloat(slot,eventval); - - // We have the item number and event value from this tuple. - // Now we need to update Pearsons. - difference = currentEvent - avgs[i]; - pearsons[i] += difference*difference; - } - - // Query cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - // Now that we've totally queried the events table, we need to - // take the square root of each Pearson and we're done. - for (i = 0; i < numItems; i++) - pearsons[i] = sqrtf(pearsons[i]); - - // Return data. - (*totalNum) = numItems; - (*IDlist) = IDs; - (*avgList) = avgs; - (*pearsonList) = pearsons; + int **IDlist, float **avgList, float **pearsonList) { + int *IDs, *counts; + float *avgs, *pearsons; + int i, j, numItems, priorID; + // Objects for querying. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // We start by getting the number of items in the event table. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", + key,eventtable); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + slot = ExecProcNode(planstate); + numItems = getTupleInt(slot,"count"); + recathon_queryEnd(queryDesc,recathoncontext); + + // Now that we have the number of items, we can create an array or two. + IDs = (int*) palloc(numItems*sizeof(int)); + counts = (int*) palloc(numItems*sizeof(int)); + for (j = 0; j < numItems; j++) + counts[j] = 0; + avgs = (float*) palloc(numItems*sizeof(float)); + for (j = 0; j < numItems; j++) + avgs[j] = 0.0; + pearsons = (float*) palloc(numItems*sizeof(float)); + for (j = 0; j < numItems; j++) + pearsons[j] = 0.0; + + // Now we need to populate the four arrays. We'll get all the events from + // the events table. + priorID = -1; + sprintf(querystring,"SELECT * FROM %s ORDER BY %s;",eventtable,key); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + i = -1; + + // This query grabs all item IDs, so we can store them. It also fills in + // some other information we'll need. + for (;;) { + int currentID = 0; + float currentEvent = 0.0; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + currentID = getTupleInt(slot,key); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (currentID != priorID) { + i++; + priorID = currentID; + IDs[i] = currentID; + } + + currentEvent = getTupleFloat(slot,eventval); + counts[i] += 1; + avgs[i] += currentEvent; + } + + // Query cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + + // Now that we've totally queried the events table, we need to + // obtain the actual averages for each item. + for (i = 0; i < numItems; i++) { + if (counts[i] > 0) + avgs[i] /= ((float)counts[i]); + } + pfree(counts); + + // We can reuse the same query to obtain the events again, and + // calculate Pearsons. + priorID = -1; + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + i = -1; + + // We scan through the entire event table once, sorting the events + // based on which item they apply to. + for (;;) { + int currentID = 0; + float currentEvent = 0.0; + float difference = 0.0; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + currentID = getTupleInt(slot,key); + // Are we dealing with a new item ID? If so, switch to the next slot. + if (currentID != priorID) { + priorID = currentID; + i++; + } + currentEvent = getTupleFloat(slot,eventval); + + // We have the item number and event value from this tuple. + // Now we need to update Pearsons. + difference = currentEvent - avgs[i]; + pearsons[i] += difference*difference; + } + + // Query cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + // Now that we've totally queried the events table, we need to + // take the square root of each Pearson and we're done. + for (i = 0; i < numItems; i++) + pearsons[i] = sqrtf(pearsons[i]); + + // Return data. + (*totalNum) = numItems; + (*IDlist) = IDs; + (*avgList) = avgs; + (*pearsonList) = pearsons; } /* ---------------------------------------------------------------- @@ -1707,29 +1707,29 @@ pearson_info(char *key, char *eventtable, char *eventval, int *totalNum, */ float pearsonDotProduct(sim_node item1, sim_node item2, float avg1, float avg2) { - sim_node temp1, temp2; - float similarity; - - if (item1 == NULL || item2 == NULL) return 0.0; - - similarity = 0.0; - - // Check every event for the first item, and see how - // many of those users also rated the second item. - temp1 = item1; temp2 = item2; - while (temp1 && temp2) { - if (temp1->id == temp2->id) { - similarity += (temp1->event - avg1) * (temp2->event - avg2); - temp1 = temp1->next; - temp2 = temp2->next; - } else if (temp1->id > temp2->id) { - temp2 = temp2->next; - } else { - temp1 = temp1->next; - } - } - - return similarity; + sim_node temp1, temp2; + float similarity; + + if (item1 == NULL || item2 == NULL) return 0.0; + + similarity = 0.0; + + // Check every event for the first item, and see how + // many of those users also rated the second item. + temp1 = item1; temp2 = item2; + while (temp1 && temp2) { + if (temp1->id == temp2->id) { + similarity += (temp1->event - avg1) * (temp2->event - avg2); + temp1 = temp1->next; + temp2 = temp2->next; + } else if (temp1->id > temp2->id) { + temp2 = temp2->next; + } else { + temp1 = temp1->next; + } + } + + return similarity; } /* ---------------------------------------------------------------- @@ -1741,19 +1741,19 @@ pearsonDotProduct(sim_node item1, sim_node item2, float avg1, float avg2) { */ float pearsonSimilarity(sim_node item1, sim_node item2, float avg1, float avg2, - float pearson1, float pearson2) { - float numerator; - float denominator; - - // Short-circuit check. If one of the items has no events, - // no point checking similarity. This also avoids a possible - // divide-by-zero error. - denominator = pearson1 * pearson2; - if (denominator == 0.0) return 0.0; - - numerator = pearsonDotProduct(item1,item2,avg1,avg2); - if (numerator == 0.0) return 0.0; - else return numerator / denominator; + float pearson1, float pearson2) { + float numerator; + float denominator; + + // Short-circuit check. If one of the items has no events, + // no point checking similarity. This also avoids a possible + // divide-by-zero error. + denominator = pearson1 * pearson2; + if (denominator == 0.0) return 0.0; + + numerator = pearsonDotProduct(item1,item2,avg1,avg2); + if (numerator == 0.0) return 0.0; + else return numerator / denominator; } /* ---------------------------------------------------------------- @@ -1767,194 +1767,194 @@ pearsonSimilarity(sim_node item1, sim_node item2, float avg1, float avg2, */ int updateItemPearModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *itemIDs, float *itemAvgs, - float *itemPearsons, int numItems, bool update) { - int i, j, priorID; - int numEvents = 0; - char *querystring, *insertstring, *temprecfile; - sim_node *itemEvents; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - // Information for writing to file. - FILE *fp; - temprecfile = (char*) palloc(256*sizeof(char)); - sprintf(temprecfile,"recathon_temp_%s.dat",modelname); - - // If this is us updating a cell as opposed to building - // a recommender, we need to drop the existing entries. - if (update) { - char *dropstring; - - dropstring = (char*) palloc(256*sizeof(char)); - sprintf(dropstring,"DELETE FROM %s;",modelname); - recathon_queryExecute(dropstring); - pfree(dropstring); - } - - // With the precomputation done, we need to derive the actual item - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); - for (i = 0; i < numItems; i++) - itemEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all item pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,itemkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (simitem != priorID) { - priorID = simitem; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // itemEvents table; we'll do calculations later. - newnode = createSimNode(simuser, simevent); - itemEvents[i] = simInsert(itemEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // We're going to write out the results to file. - if ((fp = fopen(temprecfile,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - insertstring = (char*) palloc(128*sizeof(char)); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first item ALWAYS has a lower value than the second. - for (i = 0; i < numItems; i++) { - float avg_i, pearson_i; - sim_node item_i; - nbr_node temp_nbr; - nbr_node nbr_list = NULL; - - item_i = itemEvents[i]; - if (!item_i) continue; - avg_i = itemAvgs[i]; - pearson_i = itemPearsons[i]; - - for (j = i+1; j < numItems; j++) { - float avg_j, pearson_j; - sim_node item_j; - int item1, item2; - float similarity; - - item_j = itemEvents[j]; - if (!item_j) continue; - avg_j = itemAvgs[j]; - pearson_j = itemPearsons[j]; - - similarity = pearsonSimilarity(item_i, item_j, avg_i, avg_j, pearson_i, pearson_j); - if (similarity == 0.0) continue; - item1 = itemIDs[i]; - item2 = itemIDs[j]; - - // Now we write. - if (NBRHOOD <= 0) { - sprintf(insertstring,"%d;%d;%f\n",item1,item2,similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } else { - nbr_node newnbr = createNbrNode(item1,item2,similarity); - nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); - } - } - - // If we have a limited neighborhood, we write the results here. - if (NBRHOOD > 0) { - for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { - sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, - temp_nbr->item2,temp_nbr->similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } - freeNbrList(nbr_list); - } - - CHECK_FOR_INTERRUPTS(); - } - - pfree(insertstring); - fclose(fp); - - // If we are updating an existing similarity model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - modelname,modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - } - - // With all the data written out, we're going to - // issue a COPY FROM command to bulk load the data - // into the database. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", - modelname,temprecfile); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // Now we add the primary key constraint. It's - // faster to add it after adding the data than - // having it incrementally updated. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (item1, item2)",modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // We'll delete the temporary file here, to not hold onto - // memory longer than necessary. - if (unlink(temprecfile) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Free up the lists of sim_nodes and start again. - for (i = 0; i < numItems; i++) { - freeSimList(itemEvents[i]); - itemEvents[i] = NULL; - } - - // Return the number of events we used. - return numEvents; + char *eventval, char *modelname, int *itemIDs, float *itemAvgs, + float *itemPearsons, int numItems, bool update) { + int i, j, priorID; + int numEvents = 0; + char *querystring, *insertstring, *temprecfile; + sim_node *itemEvents; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + // Information for writing to file. + FILE *fp; + temprecfile = (char*) palloc(256*sizeof(char)); + sprintf(temprecfile,"recathon_temp_%s.dat",modelname); + + // If this is us updating a cell as opposed to building + // a recommender, we need to drop the existing entries. + if (update) { + char *dropstring; + + dropstring = (char*) palloc(256*sizeof(char)); + sprintf(dropstring,"DELETE FROM %s;",modelname); + recathon_queryExecute(dropstring); + pfree(dropstring); + } + + // With the precomputation done, we need to derive the actual item + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); + for (i = 0; i < numItems; i++) + itemEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all item pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,itemkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (simitem != priorID) { + priorID = simitem; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // itemEvents table; we'll do calculations later. + newnode = createSimNode(simuser, simevent); + itemEvents[i] = simInsert(itemEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // We're going to write out the results to file. + if ((fp = fopen(temprecfile,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + insertstring = (char*) palloc(128*sizeof(char)); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first item ALWAYS has a lower value than the second. + for (i = 0; i < numItems; i++) { + float avg_i, pearson_i; + sim_node item_i; + nbr_node temp_nbr; + nbr_node nbr_list = NULL; + + item_i = itemEvents[i]; + if (!item_i) continue; + avg_i = itemAvgs[i]; + pearson_i = itemPearsons[i]; + + for (j = i+1; j < numItems; j++) { + float avg_j, pearson_j; + sim_node item_j; + int item1, item2; + float similarity; + + item_j = itemEvents[j]; + if (!item_j) continue; + avg_j = itemAvgs[j]; + pearson_j = itemPearsons[j]; + + similarity = pearsonSimilarity(item_i, item_j, avg_i, avg_j, pearson_i, pearson_j); + if (similarity == 0.0) continue; + item1 = itemIDs[i]; + item2 = itemIDs[j]; + + // Now we write. + if (NBRHOOD <= 0) { + sprintf(insertstring,"%d;%d;%f\n",item1,item2,similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } else { + nbr_node newnbr = createNbrNode(item1,item2,similarity); + nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); + } + } + + // If we have a limited neighborhood, we write the results here. + if (NBRHOOD > 0) { + for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { + sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, + temp_nbr->item2,temp_nbr->similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } + freeNbrList(nbr_list); + } + + CHECK_FOR_INTERRUPTS(); + } + + pfree(insertstring); + fclose(fp); + + // If we are updating an existing similarity model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + modelname,modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + } + + // With all the data written out, we're going to + // issue a COPY FROM command to bulk load the data + // into the database. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", + modelname,temprecfile); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // Now we add the primary key constraint. It's + // faster to add it after adding the data than + // having it incrementally updated. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (item1, item2)",modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // We'll delete the temporary file here, to not hold onto + // memory longer than necessary. + if (unlink(temprecfile) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Free up the lists of sim_nodes and start again. + for (i = 0; i < numItems; i++) { + freeSimList(itemEvents[i]); + itemEvents[i] = NULL; + } + + // Return the number of events we used. + return numEvents; } /* ---------------------------------------------------------------- @@ -1968,192 +1968,192 @@ updateItemPearModel(char *eventtable, char *userkey, char *itemkey, */ int updateUserCosModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *userIDs, float *userLengths, - int numUsers, bool update) { - int i, j, priorID; - int numEvents = 0; - char *querystring, *insertstring, *temprecfile; - sim_node *userEvents; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - // Information for writing to file. - FILE *fp; - temprecfile = (char*) palloc(256*sizeof(char)); - sprintf(temprecfile,"recathon_temp_%s.dat",modelname); - - // If this is us updating a cell as opposed to building - // a recommender, we need to drop the existing entries. - if (update) { - char *dropstring; - - dropstring = (char*) palloc(256*sizeof(char)); - sprintf(dropstring,"DELETE FROM %s;",modelname); - recathon_queryExecute(dropstring); - pfree(dropstring); - } - - // With the precomputation done, we need to derive the actual user - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); - for (i = 0; i < numUsers; i++) - userEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all user pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new user ID? If so, switch to the next slot. - if (simuser != priorID) { - priorID = simuser; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // userEvents table; we'll do calculations later. - newnode = createSimNode(simitem, simevent); - userEvents[i] = simInsert(userEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // We're going to write out the results to file. - if ((fp = fopen(temprecfile,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - insertstring = (char*) palloc(128*sizeof(char)); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first user ALWAYS has a lower value than the second. - for (i = 0; i < numUsers; i++) { - float length_i; - sim_node user_i; - nbr_node temp_nbr; - nbr_node nbr_list = NULL; - - user_i = userEvents[i]; - if (!user_i) continue; - length_i = userLengths[i]; - - for (j = i+1; j < numUsers; j++) { - float length_j; - sim_node user_j; - int user1, user2; - float similarity; - - user_j = userEvents[j]; - if (!user_j) continue; - length_j = userLengths[j]; - - similarity = cosineSimilarity(user_i, user_j, length_i, length_j); - if (similarity <= 0) continue; - user1 = userIDs[i]; - user2 = userIDs[j]; - - // Now we write. - if (NBRHOOD <= 0) { - sprintf(insertstring,"%d;%d;%f\n",user1,user2,similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } else { - nbr_node newnbr = createNbrNode(user1,user2,similarity); - nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); - } - } - - // If we have a limited neighborhood, we write the results here. - if (NBRHOOD > 0) { - for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { - sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, - temp_nbr->item2,temp_nbr->similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } - freeNbrList(nbr_list); - } - - CHECK_FOR_INTERRUPTS(); - } - - pfree(insertstring); - fclose(fp); - - // If we are updating an existing similarity model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - modelname,modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - } - - // With all the data written out, we're going to - // issue a COPY FROM command to bulk load the data - // into the database. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", - modelname,temprecfile); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // Now we add the primary key constraint. It's - // faster to add it after adding the data than - // having it incrementally updated. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (user1, user2)",modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // We'll delete the temporary file here, to not hold onto - // memory longer than necessary. - if (unlink(temprecfile) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Free up the lists of sim_nodes and start again. - for (i = 0; i < numUsers; i++) { - freeSimList(userEvents[i]); - userEvents[i] = NULL; - } - - // Return the number of events we used. - return numEvents; + char *eventval, char *modelname, int *userIDs, float *userLengths, + int numUsers, bool update) { + int i, j, priorID; + int numEvents = 0; + char *querystring, *insertstring, *temprecfile; + sim_node *userEvents; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + // Information for writing to file. + FILE *fp; + temprecfile = (char*) palloc(256*sizeof(char)); + sprintf(temprecfile,"recathon_temp_%s.dat",modelname); + + // If this is us updating a cell as opposed to building + // a recommender, we need to drop the existing entries. + if (update) { + char *dropstring; + + dropstring = (char*) palloc(256*sizeof(char)); + sprintf(dropstring,"DELETE FROM %s;",modelname); + recathon_queryExecute(dropstring); + pfree(dropstring); + } + + // With the precomputation done, we need to derive the actual user + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); + for (i = 0; i < numUsers; i++) + userEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all user pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new user ID? If so, switch to the next slot. + if (simuser != priorID) { + priorID = simuser; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // userEvents table; we'll do calculations later. + newnode = createSimNode(simitem, simevent); + userEvents[i] = simInsert(userEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // We're going to write out the results to file. + if ((fp = fopen(temprecfile,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + insertstring = (char*) palloc(128*sizeof(char)); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first user ALWAYS has a lower value than the second. + for (i = 0; i < numUsers; i++) { + float length_i; + sim_node user_i; + nbr_node temp_nbr; + nbr_node nbr_list = NULL; + + user_i = userEvents[i]; + if (!user_i) continue; + length_i = userLengths[i]; + + for (j = i+1; j < numUsers; j++) { + float length_j; + sim_node user_j; + int user1, user2; + float similarity; + + user_j = userEvents[j]; + if (!user_j) continue; + length_j = userLengths[j]; + + similarity = cosineSimilarity(user_i, user_j, length_i, length_j); + if (similarity <= 0) continue; + user1 = userIDs[i]; + user2 = userIDs[j]; + + // Now we write. + if (NBRHOOD <= 0) { + sprintf(insertstring,"%d;%d;%f\n",user1,user2,similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } else { + nbr_node newnbr = createNbrNode(user1,user2,similarity); + nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); + } + } + + // If we have a limited neighborhood, we write the results here. + if (NBRHOOD > 0) { + for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { + sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, + temp_nbr->item2,temp_nbr->similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } + freeNbrList(nbr_list); + } + + CHECK_FOR_INTERRUPTS(); + } + + pfree(insertstring); + fclose(fp); + + // If we are updating an existing similarity model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + modelname,modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + } + + // With all the data written out, we're going to + // issue a COPY FROM command to bulk load the data + // into the database. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", + modelname,temprecfile); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // Now we add the primary key constraint. It's + // faster to add it after adding the data than + // having it incrementally updated. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (user1, user2)",modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // We'll delete the temporary file here, to not hold onto + // memory longer than necessary. + if (unlink(temprecfile) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Free up the lists of sim_nodes and start again. + for (i = 0; i < numUsers; i++) { + freeSimList(userEvents[i]); + userEvents[i] = NULL; + } + + // Return the number of events we used. + return numEvents; } /* ---------------------------------------------------------------- @@ -2167,230 +2167,230 @@ updateUserCosModel(char *eventtable, char *userkey, char *itemkey, */ int updateUserPearModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *userIDs, float *userAvgs, - float *userPearsons, int numUsers, bool update) { - int i, j, priorID; - int numEvents = 0; - char *querystring, *insertstring, *temprecfile; - sim_node *userEvents; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - // Information for writing to file. - FILE *fp; - temprecfile = (char*) palloc(256*sizeof(char)); - sprintf(temprecfile,"recathon_temp_%s.dat",modelname); - - // If this is us updating a cell as opposed to building - // a recommender, we need to drop the existing entries. - if (update) { - char *dropstring; - - dropstring = (char*) palloc(256*sizeof(char)); - sprintf(dropstring,"DELETE FROM %s;",modelname); - recathon_queryExecute(dropstring); - pfree(dropstring); - } - - // With the precomputation done, we need to derive the actual item - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); - for (i = 0; i < numUsers; i++) - userEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all item pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new user ID? If so, switch to the next slot. - if (simuser != priorID) { - priorID = simuser; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // userEvents table; we'll do calculations later. - newnode = createSimNode(simitem, simevent); - userEvents[i] = simInsert(userEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // We're going to write out the results to file. - if ((fp = fopen(temprecfile,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - insertstring = (char*) palloc(128*sizeof(char)); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first item ALWAYS has a lower value than the second. - for (i = 0; i < numUsers; i++) { - float avg_i, pearson_i; - sim_node user_i; - nbr_node temp_nbr; - nbr_node nbr_list = NULL; - - user_i = userEvents[i]; - if (!user_i) continue; - avg_i = userAvgs[i]; - pearson_i = userPearsons[i]; - - for (j = i+1; j < numUsers; j++) { - float avg_j, pearson_j; - sim_node user_j; - int user1, user2; - float similarity; - - user_j = userEvents[j]; - if (!user_j) continue; - avg_j = userAvgs[j]; - pearson_j = userPearsons[j]; - - similarity = pearsonSimilarity(user_i, user_j, avg_i, avg_j, pearson_i, pearson_j); - if (similarity == 0.0) continue; - user1 = userIDs[i]; - user2 = userIDs[j]; - - // Now we write. - if (NBRHOOD <= 0) { - sprintf(insertstring,"%d;%d;%f\n",user1,user2,similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } else { - nbr_node newnbr = createNbrNode(user1,user2,similarity); - nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); - } - } - - // If we have a limited neighborhood, we write the results here. - if (NBRHOOD > 0) { - for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { - sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, - temp_nbr->item2,temp_nbr->similarity); - fwrite(insertstring,1,strlen(insertstring),fp); - } - freeNbrList(nbr_list); - } - - CHECK_FOR_INTERRUPTS(); - } - - pfree(insertstring); - fclose(fp); - - // If we are updating an existing similarity model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - modelname,modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - } - - // With all the data written out, we're going to - // issue a COPY FROM command to bulk load the data - // into the database. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", - modelname,temprecfile); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // Now we add the primary key constraint. It's - // faster to add it after adding the data than - // having it incrementally updated. - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (user1, user2)",modelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - - // We'll delete the temporary file here, to not hold onto - // memory longer than necessary. - if (unlink(temprecfile) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Free up the lists of sim_nodes and start again. - for (i = 0; i < numUsers; i++) { - freeSimList(userEvents[i]); - userEvents[i] = NULL; - } - - // Return the number of events we used. - return numEvents; -} + char *eventval, char *modelname, int *userIDs, float *userAvgs, + float *userPearsons, int numUsers, bool update) { + int i, j, priorID; + int numEvents = 0; + char *querystring, *insertstring, *temprecfile; + sim_node *userEvents; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + // Information for writing to file. + FILE *fp; + temprecfile = (char*) palloc(256*sizeof(char)); + sprintf(temprecfile,"recathon_temp_%s.dat",modelname); -/* ---------------------------------------------------------------- - * createSVDnode - * - * This function creates a new SVD node out of a - * TupleTableSlot. - * ---------------------------------------------------------------- - */ -svd_node createSVDnode(TupleTableSlot *slot, char *userkey, char *itemkey, char *eventval, - int *userIDs, int *itemIDs, int numUsers, int numItems) { - int userid, itemid; - svd_node new_svd; - - // Quiet the compiler. - userid = -1; - itemid = -1; - - new_svd = (svd_node) palloc(sizeof(struct svd_node_t)); - // Default values. - new_svd->userid = -1; - new_svd->itemid = -1; - new_svd->event = -1; - new_svd->residual = 0.0; - - userid = getTupleInt(slot,userkey); - itemid = getTupleInt(slot,itemkey); - new_svd->event = getTupleFloat(slot,eventval); - - // If we convert IDs to indexes in our arrays, it will make - // our lives easier. - new_svd->userid = binarySearch(userIDs,userid,0,numUsers); - new_svd->itemid = binarySearch(itemIDs,itemid,0,numItems); - - return new_svd; -} + // If this is us updating a cell as opposed to building + // a recommender, we need to drop the existing entries. + if (update) { + char *dropstring; + + dropstring = (char*) palloc(256*sizeof(char)); + sprintf(dropstring,"DELETE FROM %s;",modelname); + recathon_queryExecute(dropstring); + pfree(dropstring); + } + + // With the precomputation done, we need to derive the actual item + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); + for (i = 0; i < numUsers; i++) + userEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all item pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new user ID? If so, switch to the next slot. + if (simuser != priorID) { + priorID = simuser; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // userEvents table; we'll do calculations later. + newnode = createSimNode(simitem, simevent); + userEvents[i] = simInsert(userEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // We're going to write out the results to file. + if ((fp = fopen(temprecfile,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + insertstring = (char*) palloc(128*sizeof(char)); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first item ALWAYS has a lower value than the second. + for (i = 0; i < numUsers; i++) { + float avg_i, pearson_i; + sim_node user_i; + nbr_node temp_nbr; + nbr_node nbr_list = NULL; + + user_i = userEvents[i]; + if (!user_i) continue; + avg_i = userAvgs[i]; + pearson_i = userPearsons[i]; + + for (j = i+1; j < numUsers; j++) { + float avg_j, pearson_j; + sim_node user_j; + int user1, user2; + float similarity; + + user_j = userEvents[j]; + if (!user_j) continue; + avg_j = userAvgs[j]; + pearson_j = userPearsons[j]; + + similarity = pearsonSimilarity(user_i, user_j, avg_i, avg_j, pearson_i, pearson_j); + if (similarity == 0.0) continue; + user1 = userIDs[i]; + user2 = userIDs[j]; + + // Now we write. + if (NBRHOOD <= 0) { + sprintf(insertstring,"%d;%d;%f\n",user1,user2,similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } else { + nbr_node newnbr = createNbrNode(user1,user2,similarity); + nbr_list = nbrInsert(nbr_list,newnbr,NBRHOOD); + } + } + + // If we have a limited neighborhood, we write the results here. + if (NBRHOOD > 0) { + for (temp_nbr = nbr_list; temp_nbr; temp_nbr = temp_nbr->next) { + sprintf(insertstring,"%d;%d;%f\n",temp_nbr->item1, + temp_nbr->item2,temp_nbr->similarity); + fwrite(insertstring,1,strlen(insertstring),fp); + } + freeNbrList(nbr_list); + } + + CHECK_FOR_INTERRUPTS(); + } + + pfree(insertstring); + fclose(fp); + + // If we are updating an existing similarity model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + modelname,modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + } + + // With all the data written out, we're going to + // issue a COPY FROM command to bulk load the data + // into the database. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"COPY %s FROM '%s' DELIMITERS ';';", + modelname,temprecfile); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // Now we add the primary key constraint. It's + // faster to add it after adding the data than + // having it incrementally updated. + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s ADD PRIMARY KEY (user1, user2)",modelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + + // We'll delete the temporary file here, to not hold onto + // memory longer than necessary. + if (unlink(temprecfile) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Free up the lists of sim_nodes and start again. + for (i = 0; i < numUsers; i++) { + freeSimList(userEvents[i]); + userEvents[i] = NULL; + } + + // Return the number of events we used. + return numEvents; +} + +/* ---------------------------------------------------------------- + * createSVDnode + * + * This function creates a new SVD node out of a + * TupleTableSlot. + * ---------------------------------------------------------------- + */ +svd_node createSVDnode(TupleTableSlot *slot, char *userkey, char *itemkey, char *eventval, + int *userIDs, int *itemIDs, int numUsers, int numItems) { + int userid, itemid; + svd_node new_svd; + + // Quiet the compiler. + userid = -1; + itemid = -1; + + new_svd = (svd_node) palloc(sizeof(struct svd_node_t)); + // Default values. + new_svd->userid = -1; + new_svd->itemid = -1; + new_svd->event = -1; + new_svd->residual = 0.0; + + userid = getTupleInt(slot,userkey); + itemid = getTupleInt(slot,itemkey); + new_svd->event = getTupleFloat(slot,eventval); + + // If we convert IDs to indexes in our arrays, it will make + // our lives easier. + new_svd->userid = binarySearch(userIDs,userid,0,numUsers); + new_svd->itemid = binarySearch(itemIDs,itemid,0,numItems); + + return new_svd; +} /* ---------------------------------------------------------------- * SVDlists @@ -2401,94 +2401,94 @@ svd_node createSVDnode(TupleTableSlot *slot, char *userkey, char *itemkey, char */ void SVDlists(char *userkey, char *itemkey, char *eventtable, - int **ret_userIDs, int **ret_itemIDs, - int *ret_numUsers, int *ret_numItems) { - int i, numUsers, numItems; - int *userIDs, *itemIDs; - char *querystring; - // Information for other queries. - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - querystring = (char*) palloc(1024*sizeof(char)); - - // First, let's get the list of users. We need to count how many - // we're dealing with. - sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", - userkey,eventtable); - - queryDesc = recathon_queryStart(querystring, &recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) - numUsers = 0; - else - numUsers = getTupleInt(slot,"count"); - recathon_queryEnd(queryDesc, recathoncontext); - userIDs = (int*) palloc(numUsers*sizeof(int)); - - sprintf(querystring,"SELECT DISTINCT %s FROM %s ORDER BY %s;", - userkey,eventtable,userkey); - - queryDesc = recathon_queryStart(querystring, &recathoncontext); - planstate = queryDesc->planstate; - - i = 0; - for (;;) { - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - userIDs[i] = getTupleInt(slot, userkey); - - i++; - if (i >= numUsers) break; - } - - recathon_queryEnd(queryDesc, recathoncontext); - - // Next, the list of items. - sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", - itemkey,eventtable); - - queryDesc = recathon_queryStart(querystring, &recathoncontext); - planstate = queryDesc->planstate; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) - numItems = 0; - else - numItems = getTupleInt(slot,"count"); - recathon_queryEnd(queryDesc, recathoncontext); - itemIDs = (int*) palloc(numItems*sizeof(int)); - - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT DISTINCT %s FROM %s ORDER BY %s;", - itemkey, eventtable, itemkey); - queryDesc = recathon_queryStart(querystring, &recathoncontext); - planstate = queryDesc->planstate; - - i = 0; - for (;;) { - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - itemIDs[i] = getTupleInt(slot, itemkey); - - i++; - if (i >= numItems) break; - } - - recathon_queryEnd(queryDesc, recathoncontext); - pfree(querystring); - - // Now we return the data. - (*ret_userIDs) = userIDs; - (*ret_itemIDs) = itemIDs; - (*ret_numUsers) = numUsers; - (*ret_numItems) = numItems; + int **ret_userIDs, int **ret_itemIDs, + int *ret_numUsers, int *ret_numItems) { + int i, numUsers, numItems; + int *userIDs, *itemIDs; + char *querystring; + // Information for other queries. + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + querystring = (char*) palloc(1024*sizeof(char)); + + // First, let's get the list of users. We need to count how many + // we're dealing with. + sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", + userkey,eventtable); + + queryDesc = recathon_queryStart(querystring, &recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) + numUsers = 0; + else + numUsers = getTupleInt(slot,"count"); + recathon_queryEnd(queryDesc, recathoncontext); + userIDs = (int*) palloc(numUsers*sizeof(int)); + + sprintf(querystring,"SELECT DISTINCT %s FROM %s ORDER BY %s;", + userkey,eventtable,userkey); + + queryDesc = recathon_queryStart(querystring, &recathoncontext); + planstate = queryDesc->planstate; + + i = 0; + for (;;) { + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + userIDs[i] = getTupleInt(slot, userkey); + + i++; + if (i >= numUsers) break; + } + + recathon_queryEnd(queryDesc, recathoncontext); + + // Next, the list of items. + sprintf(querystring,"SELECT COUNT(DISTINCT %s) FROM %s;", + itemkey,eventtable); + + queryDesc = recathon_queryStart(querystring, &recathoncontext); + planstate = queryDesc->planstate; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) + numItems = 0; + else + numItems = getTupleInt(slot,"count"); + recathon_queryEnd(queryDesc, recathoncontext); + itemIDs = (int*) palloc(numItems*sizeof(int)); + + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT DISTINCT %s FROM %s ORDER BY %s;", + itemkey, eventtable, itemkey); + queryDesc = recathon_queryStart(querystring, &recathoncontext); + planstate = queryDesc->planstate; + + i = 0; + for (;;) { + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + itemIDs[i] = getTupleInt(slot, itemkey); + + i++; + if (i >= numItems) break; + } + + recathon_queryEnd(queryDesc, recathoncontext); + pfree(querystring); + + // Now we return the data. + (*ret_userIDs) = userIDs; + (*ret_itemIDs) = itemIDs; + (*ret_numUsers) = numUsers; + (*ret_numItems) = numItems; } /* ---------------------------------------------------------------- @@ -2501,172 +2501,172 @@ SVDlists(char *userkey, char *itemkey, char *eventtable, */ void SVDaverages(char *userkey, char *itemkey, char *eventtable, char *eventval, - int *userIDs, int *itemIDs, int numUsers, int numItems, - float **ret_itemAvgs, float **ret_userOffsets) { - int i, priorID; - int *userCounts, *itemCounts; - float *userAvgs, *itemAvgs; - float *itemSums; - float *itemSqs; // Squares of sums. Used to calculate variances. - float *itemVars; // Variances. - float globalAvg; - float globalSum = 0.0; - float globalAvgSum = 0.0; - float globalSq = 0.0; - float globalVar; - // Information for other queries. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // Initialize arrays. - itemCounts = (int*) palloc(numItems*sizeof(int)); - itemAvgs = (float*) palloc(numItems*sizeof(float)); - itemSums = (float*) palloc(numItems*sizeof(float)); - itemSqs = (float*) palloc(numItems*sizeof(float)); - itemVars = (float*) palloc(numItems*sizeof(float)); - for (i = 0; i < numItems; i++) { - itemCounts[i] = 0; - itemSums[i] = 0.0; - itemSqs[i] = 0.0; - } - - // We need to issue a query to get event information. - querystring = (char*) palloc(256*sizeof(char)); - sprintf(querystring,"SELECT %s,%s FROM %s ORDER BY %s;", - itemkey,eventval,eventtable,itemkey); - - priorID = -1; - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - i = -1; - - for (;;) { - int itemnum = 0; - float event = 0.0; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - itemnum = getTupleInt(slot,itemkey); - event = getTupleFloat(slot,eventval); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (itemnum != priorID) { - priorID = itemnum; - i++; - } - - itemCounts[i] += 1; - itemSums[i] += event; - itemSqs[i] += (event*event); - } - - recathon_queryEnd(queryDesc,recathoncontext); - - // We have enough data to calculate individual item variances. - for (i = 0; i < numItems; i++) { - float sum, sumsqr; - int n; - - n = itemCounts[i]; - sum = itemSums[i]; - sumsqr = itemSqs[i]; - - if (n <= 0) - itemVars[i] = 0; - else - itemVars[i] = (sumsqr - ((sum*sum)/n))/n; - - // We can also start calculating the global variance in this loop. - // Some notation abuse. - globalSum += sum; - if (n > 0) { - sum = sum/n; - globalAvgSum += sum; - globalSq += (sum*sum); - } - } - - // Now we derive the global variance. - globalVar = (globalSq - ((globalAvgSum*globalAvgSum)/numItems))/numItems; - globalAvg = globalSum/count_rows(eventtable); - - // Finally, we can obtain the baseline averages for each item. - for (i = 0; i < numItems; i++) { - float k; - - if (globalVar == 0) - k = 0; - else - k = itemVars[i] / globalVar; - - if ((k + itemCounts[i]) > 0) - itemAvgs[i] = ((globalAvg*k) + itemSums[i]) / (k + itemCounts[i]); - else - itemAvgs[i] = 0; - } - - // With the averages calculated, we can now calculate the average offset - // for each user. This involves querying the user table again. - userCounts = (int*) palloc(numUsers*sizeof(int)); - for (i = 0; i < numUsers; i++) - userCounts[i] = 0; - userAvgs = (float*) palloc(numUsers*sizeof(float)); - for (i = 0; i < numUsers; i++) - userAvgs[i] = 0.0; - - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r;", - userkey,itemkey,eventval,eventtable); - - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int userindex, itemindex; - int usernum, itemnum; - float event; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - usernum = getTupleInt(slot,userkey); - itemnum = getTupleInt(slot,itemkey); - event = getTupleFloat(slot,eventval); - userindex = binarySearch(userIDs, usernum, 0, numUsers); - itemindex = binarySearch(itemIDs, itemnum, 0, numItems); - - // We need to find the average offset of a user's event from - // the average event. - if (userindex >= 0 && userindex < numUsers) { - userCounts[userindex] += 1; - userAvgs[userindex] += event - itemAvgs[itemindex]; - } - } - - recathon_queryEnd(queryDesc,recathoncontext); - - // Now we just divide by the counts. - for (i = 0; i < numUsers; i++) { - if (userCounts[i] > 0) - userAvgs[i] /= userCounts[i]; - else - userAvgs[i] = 0; - } - - // Free up memory. - pfree(itemCounts); - pfree(itemSums); - pfree(itemSqs); - pfree(itemVars); - pfree(userCounts); - pfree(querystring); - - // With that information calculated, we can finally return. - (*ret_itemAvgs) = itemAvgs; - (*ret_userOffsets) = userAvgs; + int *userIDs, int *itemIDs, int numUsers, int numItems, + float **ret_itemAvgs, float **ret_userOffsets) { + int i, priorID; + int *userCounts, *itemCounts; + float *userAvgs, *itemAvgs; + float *itemSums; + float *itemSqs; // Squares of sums. Used to calculate variances. + float *itemVars; // Variances. + float globalAvg; + float globalSum = 0.0; + float globalAvgSum = 0.0; + float globalSq = 0.0; + float globalVar; + // Information for other queries. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // Initialize arrays. + itemCounts = (int*) palloc(numItems*sizeof(int)); + itemAvgs = (float*) palloc(numItems*sizeof(float)); + itemSums = (float*) palloc(numItems*sizeof(float)); + itemSqs = (float*) palloc(numItems*sizeof(float)); + itemVars = (float*) palloc(numItems*sizeof(float)); + for (i = 0; i < numItems; i++) { + itemCounts[i] = 0; + itemSums[i] = 0.0; + itemSqs[i] = 0.0; + } + + // We need to issue a query to get event information. + querystring = (char*) palloc(256*sizeof(char)); + sprintf(querystring,"SELECT %s,%s FROM %s ORDER BY %s;", + itemkey,eventval,eventtable,itemkey); + + priorID = -1; + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + i = -1; + + for (;;) { + int itemnum = 0; + float event = 0.0; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + itemnum = getTupleInt(slot,itemkey); + event = getTupleFloat(slot,eventval); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (itemnum != priorID) { + priorID = itemnum; + i++; + } + + itemCounts[i] += 1; + itemSums[i] += event; + itemSqs[i] += (event*event); + } + + recathon_queryEnd(queryDesc,recathoncontext); + + // We have enough data to calculate individual item variances. + for (i = 0; i < numItems; i++) { + float sum, sumsqr; + int n; + + n = itemCounts[i]; + sum = itemSums[i]; + sumsqr = itemSqs[i]; + + if (n <= 0) + itemVars[i] = 0; + else + itemVars[i] = (sumsqr - ((sum*sum)/n))/n; + + // We can also start calculating the global variance in this loop. + // Some notation abuse. + globalSum += sum; + if (n > 0) { + sum = sum/n; + globalAvgSum += sum; + globalSq += (sum*sum); + } + } + + // Now we derive the global variance. + globalVar = (globalSq - ((globalAvgSum*globalAvgSum)/numItems))/numItems; + globalAvg = globalSum/count_rows(eventtable); + + // Finally, we can obtain the baseline averages for each item. + for (i = 0; i < numItems; i++) { + float k; + + if (globalVar == 0) + k = 0; + else + k = itemVars[i] / globalVar; + + if ((k + itemCounts[i]) > 0) + itemAvgs[i] = ((globalAvg*k) + itemSums[i]) / (k + itemCounts[i]); + else + itemAvgs[i] = 0; + } + + // With the averages calculated, we can now calculate the average offset + // for each user. This involves querying the user table again. + userCounts = (int*) palloc(numUsers*sizeof(int)); + for (i = 0; i < numUsers; i++) + userCounts[i] = 0; + userAvgs = (float*) palloc(numUsers*sizeof(float)); + for (i = 0; i < numUsers; i++) + userAvgs[i] = 0.0; + + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r;", + userkey,itemkey,eventval,eventtable); + + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int userindex, itemindex; + int usernum, itemnum; + float event; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + usernum = getTupleInt(slot,userkey); + itemnum = getTupleInt(slot,itemkey); + event = getTupleFloat(slot,eventval); + userindex = binarySearch(userIDs, usernum, 0, numUsers); + itemindex = binarySearch(itemIDs, itemnum, 0, numItems); + + // We need to find the average offset of a user's event from + // the average event. + if (userindex >= 0 && userindex < numUsers) { + userCounts[userindex] += 1; + userAvgs[userindex] += event - itemAvgs[itemindex]; + } + } + + recathon_queryEnd(queryDesc,recathoncontext); + + // Now we just divide by the counts. + for (i = 0; i < numUsers; i++) { + if (userCounts[i] > 0) + userAvgs[i] /= userCounts[i]; + else + userAvgs[i] = 0; + } + + // Free up memory. + pfree(itemCounts); + pfree(itemSums); + pfree(itemSqs); + pfree(itemVars); + pfree(userCounts); + pfree(querystring); + + // With that information calculated, we can finally return. + (*ret_itemAvgs) = itemAvgs; + (*ret_userOffsets) = userAvgs; } /* ---------------------------------------------------------------- @@ -2678,15 +2678,15 @@ SVDaverages(char *userkey, char *itemkey, char *eventtable, char *eventval, */ float predictRating(int featurenum, int numFeatures, int userid, int itemid, - float **userFeatures, float **itemFeatures, float residual) { - int i; - float rating; - - rating = residual; - for (i = featurenum; i < numFeatures; i++) - rating += userFeatures[i][userid] * itemFeatures[i][itemid]; - - return rating; + float **userFeatures, float **itemFeatures, float residual) { + int i; + float rating; + + rating = residual; + for (i = featurenum; i < numFeatures; i++) + rating += userFeatures[i][userid] * itemFeatures[i][itemid]; + + return rating; } /* ---------------------------------------------------------------- @@ -2698,245 +2698,245 @@ predictRating(int featurenum, int numFeatures, int userid, int itemid, */ int SVDtrain(char *userkey, char *itemkey, char *eventtable, char *eventval, - char *usermodelname, char *itemmodelname, bool update) { - float **userFeatures, **itemFeatures; - int *userIDs, *itemIDs; - float *itemAvgs, *userOffsets; - int numUsers, numItems; - int i, j, k, numEvents; - int numFeatures = 50; - svd_node *allEvents; - FILE *fp; - char *tempfilename, *insertstring; - // Information for other queries. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // If this is us updating a cell as opposed to building - // a recommender, we need to drop the existing entries. - if (update) { - char *dropstring; - - dropstring = (char*) palloc(256*sizeof(char)); - sprintf(dropstring,"DELETE FROM %s;",usermodelname); - recathon_queryExecute(dropstring); - sprintf(dropstring,"DELETE FROM %s;",itemmodelname); - recathon_queryExecute(dropstring); - pfree(dropstring); - } - - // First, we get our lists of users and items. - SVDlists(userkey,itemkey,eventtable, - &userIDs, &itemIDs, &numUsers, &numItems); - - // Then we get information for baseline averages. - SVDaverages(userkey,itemkey,eventtable,eventval, - userIDs,itemIDs,numUsers,numItems, - &itemAvgs,&userOffsets); - - // Initialize our feature arrays. - userFeatures = (float**) palloc(numFeatures*sizeof(float*)); - for (i = 0; i < numFeatures; i++) { - userFeatures[i] = (float*) palloc(numUsers*sizeof(float)); - for (j = 0; j < numUsers; j++) - userFeatures[i][j] = 0.1; - } - itemFeatures = (float**) palloc(numFeatures*sizeof(float*)); - for (i = 0; i < numFeatures; i++) { - itemFeatures[i] = (float*) palloc(numItems*sizeof(float)); - for (j = 0; j < numItems; j++) - itemFeatures[i][j] = 0.1; - } - - // First we need to count the number of events we'll be - // considering. - querystring = (char*) palloc(1024*sizeof(char)); - numEvents = count_rows(eventtable); - - // Initialize the events array. - allEvents = (svd_node*) palloc(numEvents*sizeof(svd_node)); - - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Let's acquire all of our events and store them. Sorting initially by - // user ID avoids unnecessary binary searches. - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - i = 0; - for (;;) { - svd_node new_svd; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - new_svd = createSVDnode(slot, userkey, itemkey, eventval, userIDs, itemIDs, numUsers, numItems); - - allEvents[i] = new_svd; - - i++; - if (i >= numEvents) break; - } - - recathon_queryEnd(queryDesc,recathoncontext); - - // We now have all of the events, so we can start training our features. - for (j = 0; j < 100; j++) { - for (i = 0; i < numFeatures; i++) { - float learn = 0.001; - float penalty = 0.002; - float *userVal = userFeatures[i]; - float *itemVal = itemFeatures[i]; - - for (k = 0; k < numEvents; k++) { - int userid; - int itemid; - float event, err, residual, temp; - svd_node current_svd; - - current_svd = allEvents[k]; - userid = current_svd->userid; - itemid = current_svd->itemid; - event = current_svd->event; - // Need to reset residuals for each new - // iteration of the trainer. - if (i == 0) - current_svd->residual = 0; - residual = current_svd->residual; - - if (i == 0 && j == 0) { - err = event - (itemAvgs[itemid] + userOffsets[userid]); - } else { - err = event - predictRating(i, numFeatures, userid, itemid, - userFeatures, itemFeatures, residual); - } - temp = userVal[userid]; - userVal[userid] += learn * ((err * itemVal[itemid]) - (penalty * userVal[userid])); - itemVal[itemid] += learn * ((err * temp) - (penalty * itemVal[itemid])); - - // Store residuals. - if (i == 0) - current_svd->residual = userVal[userid] * itemVal[itemid]; - else - current_svd->residual += userVal[userid] * itemVal[itemid]; - } - - CHECK_FOR_INTERRUPTS(); - } - } - - // With the training finished, we need to write out the data to file, - // so we can put it back. First, the user model. - tempfilename = (char*) palloc(256*sizeof(char)); - sprintf(tempfilename,"recathon_temp_%s.dat",usermodelname); - if ((fp = fopen(tempfilename,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - insertstring = (char*) palloc(128*sizeof(char)); - - for (i = 0; i < numFeatures; i++) { - for (j = 0; j < numUsers; j++) { - sprintf(insertstring,"%d;%d;%f\n",userIDs[j],i,userFeatures[i][j]); - fwrite(insertstring,1,strlen(insertstring),fp); - } - } - fclose(fp); - - // If we are updating an existing SVD model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - usermodelname,usermodelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - } - - // We can bulk load the data with COPY FROM. It's faster - // than individual inserts by a good margin. - sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", - usermodelname,tempfilename); - recathon_utilityExecute(querystring); - - // Adding a primary key after the COPY FROM is about 25% faster - // than adding it before. - sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (users, feature);",usermodelname); - recathon_utilityExecute(querystring); - - // Delete the temporary file. - if (unlink(tempfilename) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Now do it again for the item model. - tempfilename = (char*) palloc(256*sizeof(char)); - sprintf(tempfilename,"recathon_temp_%s.dat",itemmodelname); - if ((fp = fopen(tempfilename,"w")) == NULL) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("failed to open temporary file"))); - - for (i = 0; i < numFeatures; i++) { - for (j = 0; j < numItems; j++) { - char insertstring[128]; - sprintf(insertstring,"%d;%d;%f\n",itemIDs[j],i,itemFeatures[i][j]); - fwrite(insertstring,1,strlen(insertstring),fp); - } - } - fclose(fp); - - // If we are updating an existing SVD model, - // we will want to drop the existing primary key - // constraint before doing the copy, to save time. - if (update) { - insertstring = (char*) palloc(1024*sizeof(char)); - sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", - itemmodelname,itemmodelname); - recathon_utilityExecute(insertstring); - pfree(insertstring); - } - - // We can bulk load the data with COPY FROM. It's faster - // than individual inserts by a good margin. - sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", - itemmodelname,tempfilename); - recathon_utilityExecute(querystring); - - // Adding a primary key after the COPY FROM is about 25% faster - // than adding it before. - sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (items, feature);",itemmodelname); - recathon_utilityExecute(querystring); - - // Delete the temporary file. - if (unlink(tempfilename) < 0) - ereport(WARNING, - (errcode(ERRCODE_WARNING), - errmsg("failed to delete temporary file"))); - - // Free up memory. - pfree(querystring); - pfree(userIDs); - pfree(itemIDs); - pfree(itemAvgs); - pfree(userOffsets); - pfree(allEvents); - - for (i = 0; i < numFeatures; i++) - pfree(userFeatures[i]); - pfree(userFeatures); - for (i = 0; i < numFeatures; i++) - pfree(itemFeatures[i]); - pfree(itemFeatures); - - // Return the number of events we used. - return numEvents; + char *usermodelname, char *itemmodelname, bool update) { + float **userFeatures, **itemFeatures; + int *userIDs, *itemIDs; + float *itemAvgs, *userOffsets; + int numUsers, numItems; + int i, j, k, numEvents; + int numFeatures = 50; + svd_node *allEvents; + FILE *fp; + char *tempfilename, *insertstring; + // Information for other queries. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // If this is us updating a cell as opposed to building + // a recommender, we need to drop the existing entries. + if (update) { + char *dropstring; + + dropstring = (char*) palloc(256*sizeof(char)); + sprintf(dropstring,"DELETE FROM %s;",usermodelname); + recathon_queryExecute(dropstring); + sprintf(dropstring,"DELETE FROM %s;",itemmodelname); + recathon_queryExecute(dropstring); + pfree(dropstring); + } + + // First, we get our lists of users and items. + SVDlists(userkey,itemkey,eventtable, + &userIDs, &itemIDs, &numUsers, &numItems); + + // Then we get information for baseline averages. + SVDaverages(userkey,itemkey,eventtable,eventval, + userIDs,itemIDs,numUsers,numItems, + &itemAvgs,&userOffsets); + + // Initialize our feature arrays. + userFeatures = (float**) palloc(numFeatures*sizeof(float*)); + for (i = 0; i < numFeatures; i++) { + userFeatures[i] = (float*) palloc(numUsers*sizeof(float)); + for (j = 0; j < numUsers; j++) + userFeatures[i][j] = 0.1; + } + itemFeatures = (float**) palloc(numFeatures*sizeof(float*)); + for (i = 0; i < numFeatures; i++) { + itemFeatures[i] = (float*) palloc(numItems*sizeof(float)); + for (j = 0; j < numItems; j++) + itemFeatures[i][j] = 0.1; + } + + // First we need to count the number of events we'll be + // considering. + querystring = (char*) palloc(1024*sizeof(char)); + numEvents = count_rows(eventtable); + + // Initialize the events array. + allEvents = (svd_node*) palloc(numEvents*sizeof(svd_node)); + + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Let's acquire all of our events and store them. Sorting initially by + // user ID avoids unnecessary binary searches. + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + i = 0; + for (;;) { + svd_node new_svd; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + new_svd = createSVDnode(slot, userkey, itemkey, eventval, userIDs, itemIDs, numUsers, numItems); + + allEvents[i] = new_svd; + + i++; + if (i >= numEvents) break; + } + + recathon_queryEnd(queryDesc,recathoncontext); + + // We now have all of the events, so we can start training our features. + for (j = 0; j < 100; j++) { + for (i = 0; i < numFeatures; i++) { + float learn = 0.001; + float penalty = 0.002; + float *userVal = userFeatures[i]; + float *itemVal = itemFeatures[i]; + + for (k = 0; k < numEvents; k++) { + int userid; + int itemid; + float event, err, residual, temp; + svd_node current_svd; + + current_svd = allEvents[k]; + userid = current_svd->userid; + itemid = current_svd->itemid; + event = current_svd->event; + // Need to reset residuals for each new + // iteration of the trainer. + if (i == 0) + current_svd->residual = 0; + residual = current_svd->residual; + + if (i == 0 && j == 0) { + err = event - (itemAvgs[itemid] + userOffsets[userid]); + } else { + err = event - predictRating(i, numFeatures, userid, itemid, + userFeatures, itemFeatures, residual); + } + temp = userVal[userid]; + userVal[userid] += learn * ((err * itemVal[itemid]) - (penalty * userVal[userid])); + itemVal[itemid] += learn * ((err * temp) - (penalty * itemVal[itemid])); + + // Store residuals. + if (i == 0) + current_svd->residual = userVal[userid] * itemVal[itemid]; + else + current_svd->residual += userVal[userid] * itemVal[itemid]; + } + + CHECK_FOR_INTERRUPTS(); + } + } + + // With the training finished, we need to write out the data to file, + // so we can put it back. First, the user model. + tempfilename = (char*) palloc(256*sizeof(char)); + sprintf(tempfilename,"recathon_temp_%s.dat",usermodelname); + if ((fp = fopen(tempfilename,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + insertstring = (char*) palloc(128*sizeof(char)); + + for (i = 0; i < numFeatures; i++) { + for (j = 0; j < numUsers; j++) { + sprintf(insertstring,"%d;%d;%f\n",userIDs[j],i,userFeatures[i][j]); + fwrite(insertstring,1,strlen(insertstring),fp); + } + } + fclose(fp); + + // If we are updating an existing SVD model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + usermodelname,usermodelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + } + + // We can bulk load the data with COPY FROM. It's faster + // than individual inserts by a good margin. + sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", + usermodelname,tempfilename); + recathon_utilityExecute(querystring); + + // Adding a primary key after the COPY FROM is about 25% faster + // than adding it before. + sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (users, feature);",usermodelname); + recathon_utilityExecute(querystring); + + // Delete the temporary file. + if (unlink(tempfilename) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Now do it again for the item model. + tempfilename = (char*) palloc(256*sizeof(char)); + sprintf(tempfilename,"recathon_temp_%s.dat",itemmodelname); + if ((fp = fopen(tempfilename,"w")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("failed to open temporary file"))); + + for (i = 0; i < numFeatures; i++) { + for (j = 0; j < numItems; j++) { + char insertstring[128]; + sprintf(insertstring,"%d;%d;%f\n",itemIDs[j],i,itemFeatures[i][j]); + fwrite(insertstring,1,strlen(insertstring),fp); + } + } + fclose(fp); + + // If we are updating an existing SVD model, + // we will want to drop the existing primary key + // constraint before doing the copy, to save time. + if (update) { + insertstring = (char*) palloc(1024*sizeof(char)); + sprintf(insertstring,"ALTER TABLE %s DROP CONSTRAINT %s_pkey;", + itemmodelname,itemmodelname); + recathon_utilityExecute(insertstring); + pfree(insertstring); + } + + // We can bulk load the data with COPY FROM. It's faster + // than individual inserts by a good margin. + sprintf(querystring,"COPY %s FROM '%s' DELIMITERS ';';", + itemmodelname,tempfilename); + recathon_utilityExecute(querystring); + + // Adding a primary key after the COPY FROM is about 25% faster + // than adding it before. + sprintf(querystring,"ALTER TABLE %s ADD PRIMARY KEY (items, feature);",itemmodelname); + recathon_utilityExecute(querystring); + + // Delete the temporary file. + if (unlink(tempfilename) < 0) + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("failed to delete temporary file"))); + + // Free up memory. + pfree(querystring); + pfree(userIDs); + pfree(itemIDs); + pfree(itemAvgs); + pfree(userOffsets); + pfree(allEvents); + + for (i = 0; i < numFeatures; i++) + pfree(userFeatures[i]); + pfree(userFeatures); + for (i = 0; i < numFeatures; i++) + pfree(itemFeatures[i]); + pfree(itemFeatures); + + // Return the number of events we used. + return numEvents; } /* ---------------------------------------------------------------- @@ -2947,128 +2947,128 @@ SVDtrain(char *userkey, char *itemkey, char *eventtable, char *eventval, */ void generateItemCosModel(RecScanState *recnode) { - int i, j, priorID; - AttributeInfo *attributes; - float **itemmodel; - char *eventtable, *userkey, *itemkey, *eventval; - int numItems; - int *itemIDs; - float *itemLengths; - sim_node *itemEvents; - // Information for other queries. - char *querystring; - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - - attributes = (AttributeInfo*) recnode->attributes; - eventtable = attributes->eventtable; - userkey = attributes->userkey; - itemkey = attributes->itemkey; - eventval = attributes->eventval; - - /* We start by getting vector lengths. */ - itemLengths = vector_lengths(itemkey,eventtable,eventval,&numItems,&itemIDs); - - /* We have the number of items, so we can initialize our model. */ - itemmodel = (float**) palloc(numItems*sizeof(float*)); - for (i = 0; i < numItems; i++) - itemmodel[i] = (float*) palloc0(numItems*sizeof(float)); - - /* Then we can calculate similarity values for our model. We start by - * storing all the ratings. */ - itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); - for (i = 0; i < numItems; i++) - itemEvents[i] = NULL; - - /* With the model created, we need to populate it, which means calculating - * similarity between all item pairs. We need to query the events table - * in order to get the key information. We'll also keep track of the number - * of events used, since we need to store that information. */ - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,itemkey); - - /* Begin extracting data. */ - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - /* Shut the compiler up. */ - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - /* Are we dealing with a new item ID? If so, switch to the next slot. */ - if (simitem != priorID) { - priorID = simitem; - i++; - } - - /* We now have the user, item, and event for this tuple. - * We insert the results as a sim_node into the - * itemEvents table; we'll do calculations later. */ - newnode = createSimNode(simuser, simevent); - itemEvents[i] = simInsert(itemEvents[i], newnode); - } - - /* Query cleanup. */ - recathon_queryEnd(simqueryDesc, simcontext); - - /* Now we do the similarity calculations. Note that we - * don't include duplicate entries, to save time and space. - * The first item ALWAYS has a lower value than the second. */ - for (i = 0; i < numItems; i++) { - float length_i; - sim_node item_i; - - item_i = itemEvents[i]; - if (!item_i) continue; - length_i = itemLengths[i]; - - for (j = i+1; j < numItems; j++) { - float length_j; - sim_node item_j; - float similarity; - - item_j = itemEvents[j]; - if (!item_j) continue; - length_j = itemLengths[j]; - - similarity = cosineSimilarity(item_i, item_j, length_i, length_j); - if (similarity <= 0) continue; - - /* Now we output. Like with the pre-computed model, we'll - * only worry about half the model. This allows us to fill - * in the matrix left-to-right, top-to-bottom. */ - itemmodel[i][j] = similarity; - } - - CHECK_FOR_INTERRUPTS(); - } - - /* Free up the lists of sim_nodes now, since we're done. */ - for (i = 0; i < numItems; i++) { - freeSimList(itemEvents[i]); - itemEvents[i] = NULL; - } - - /* Fill in the appropriate information. */ - recnode->fullTotalItems = numItems; - recnode->fullItemList = itemIDs; - recnode->itemCFmodel = itemmodel; + int i, j, priorID; + AttributeInfo *attributes; + float **itemmodel; + char *eventtable, *userkey, *itemkey, *eventval; + int numItems; + int *itemIDs; + float *itemLengths; + sim_node *itemEvents; + // Information for other queries. + char *querystring; + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + + attributes = (AttributeInfo*) recnode->attributes; + eventtable = attributes->eventtable; + userkey = attributes->userkey; + itemkey = attributes->itemkey; + eventval = attributes->eventval; + + /* We start by getting vector lengths. */ + itemLengths = vector_lengths(itemkey,eventtable,eventval,&numItems,&itemIDs); + + /* We have the number of items, so we can initialize our model. */ + itemmodel = (float**) palloc(numItems*sizeof(float*)); + for (i = 0; i < numItems; i++) + itemmodel[i] = (float*) palloc0(numItems*sizeof(float)); + + /* Then we can calculate similarity values for our model. We start by + * storing all the ratings. */ + itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); + for (i = 0; i < numItems; i++) + itemEvents[i] = NULL; + + /* With the model created, we need to populate it, which means calculating + * similarity between all item pairs. We need to query the events table + * in order to get the key information. We'll also keep track of the number + * of events used, since we need to store that information. */ + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,itemkey); + + /* Begin extracting data. */ + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + /* Shut the compiler up. */ + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + /* Are we dealing with a new item ID? If so, switch to the next slot. */ + if (simitem != priorID) { + priorID = simitem; + i++; + } + + /* We now have the user, item, and event for this tuple. + * We insert the results as a sim_node into the + * itemEvents table; we'll do calculations later. */ + newnode = createSimNode(simuser, simevent); + itemEvents[i] = simInsert(itemEvents[i], newnode); + } + + /* Query cleanup. */ + recathon_queryEnd(simqueryDesc, simcontext); + + /* Now we do the similarity calculations. Note that we + * don't include duplicate entries, to save time and space. + * The first item ALWAYS has a lower value than the second. */ + for (i = 0; i < numItems; i++) { + float length_i; + sim_node item_i; + + item_i = itemEvents[i]; + if (!item_i) continue; + length_i = itemLengths[i]; + + for (j = i+1; j < numItems; j++) { + float length_j; + sim_node item_j; + float similarity; + + item_j = itemEvents[j]; + if (!item_j) continue; + length_j = itemLengths[j]; + + similarity = cosineSimilarity(item_i, item_j, length_i, length_j); + if (similarity <= 0) continue; + + /* Now we output. Like with the pre-computed model, we'll + * only worry about half the model. This allows us to fill + * in the matrix left-to-right, top-to-bottom. */ + itemmodel[i][j] = similarity; + } + + CHECK_FOR_INTERRUPTS(); + } + + /* Free up the lists of sim_nodes now, since we're done. */ + for (i = 0; i < numItems; i++) { + freeSimList(itemEvents[i]); + itemEvents[i] = NULL; + } + + /* Fill in the appropriate information. */ + recnode->fullTotalItems = numItems; + recnode->fullItemList = itemIDs; + recnode->itemCFmodel = itemmodel; } /* ---------------------------------------------------------------- @@ -3079,134 +3079,134 @@ generateItemCosModel(RecScanState *recnode) { */ void generateItemPearModel(RecScanState *recnode) { - int i, j, priorID; - char *querystring; - char *eventtable, *userkey, *itemkey, *eventval; - sim_node *itemEvents; - int numItems; - int *itemIDs; - float *itemAvgs; - float *itemPearsons; - AttributeInfo *attributes; - float **itemmodel; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - - attributes = (AttributeInfo*) recnode->attributes; - eventtable = attributes->eventtable; - userkey = attributes->userkey; - itemkey = attributes->itemkey; - eventval = attributes->eventval; - - // First we need to get relevant Pearson information. - pearson_info(itemkey, eventtable, eventval, &numItems, &itemIDs, &itemAvgs, &itemPearsons); - - /* We have the number of items, so we can initialize our model. */ - itemmodel = (float**) palloc(numItems*sizeof(float*)); - for (i = 0; i < numItems; i++) - itemmodel[i] = (float*) palloc0(numItems*sizeof(float)); - - // With the precomputation done, we need to derive the actual item - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); - for (i = 0; i < numItems; i++) - itemEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all item pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,itemkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new item ID? If so, switch to the next slot. - if (simitem != priorID) { - priorID = simitem; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // itemEvents table; we'll do calculations later. - newnode = createSimNode(simuser, simevent); - itemEvents[i] = simInsert(itemEvents[i], newnode); - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first item ALWAYS has a lower value than the second. - for (i = 0; i < numItems; i++) { - float avg_i, pearson_i; - sim_node item_i; - - item_i = itemEvents[i]; - if (!item_i) continue; - avg_i = itemAvgs[i]; - pearson_i = itemPearsons[i]; - - for (j = i+1; j < numItems; j++) { - float avg_j, pearson_j; - sim_node item_j; - float similarity; - - item_j = itemEvents[j]; - if (!item_j) continue; - avg_j = itemAvgs[j]; - pearson_j = itemPearsons[j]; - - similarity = pearsonSimilarity(item_i, item_j, avg_i, avg_j, pearson_i, pearson_j); - if (similarity == 0.0) continue; - - /* Now we output. Like with the pre-computed model, we'll - * only worry about half the model. This allows us to fill - * in the matrix left-to-right, top-to-bottom. */ - itemmodel[i][j] = similarity; - } - - CHECK_FOR_INTERRUPTS(); - } - - // Free up the lists of sim_nodes and we're done. - for (i = 0; i < numItems; i++) { - freeSimList(itemEvents[i]); - itemEvents[i] = NULL; - } - - // Return the relevant information. - recnode->fullTotalItems = numItems; - recnode->fullItemList = itemIDs; - recnode->itemCFmodel = itemmodel; + int i, j, priorID; + char *querystring; + char *eventtable, *userkey, *itemkey, *eventval; + sim_node *itemEvents; + int numItems; + int *itemIDs; + float *itemAvgs; + float *itemPearsons; + AttributeInfo *attributes; + float **itemmodel; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + + attributes = (AttributeInfo*) recnode->attributes; + eventtable = attributes->eventtable; + userkey = attributes->userkey; + itemkey = attributes->itemkey; + eventval = attributes->eventval; + + // First we need to get relevant Pearson information. + pearson_info(itemkey, eventtable, eventval, &numItems, &itemIDs, &itemAvgs, &itemPearsons); + + /* We have the number of items, so we can initialize our model. */ + itemmodel = (float**) palloc(numItems*sizeof(float*)); + for (i = 0; i < numItems; i++) + itemmodel[i] = (float*) palloc0(numItems*sizeof(float)); + + // With the precomputation done, we need to derive the actual item + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + itemEvents = (sim_node*) palloc(numItems*sizeof(sim_node)); + for (i = 0; i < numItems; i++) + itemEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all item pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,itemkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new item ID? If so, switch to the next slot. + if (simitem != priorID) { + priorID = simitem; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // itemEvents table; we'll do calculations later. + newnode = createSimNode(simuser, simevent); + itemEvents[i] = simInsert(itemEvents[i], newnode); + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first item ALWAYS has a lower value than the second. + for (i = 0; i < numItems; i++) { + float avg_i, pearson_i; + sim_node item_i; + + item_i = itemEvents[i]; + if (!item_i) continue; + avg_i = itemAvgs[i]; + pearson_i = itemPearsons[i]; + + for (j = i+1; j < numItems; j++) { + float avg_j, pearson_j; + sim_node item_j; + float similarity; + + item_j = itemEvents[j]; + if (!item_j) continue; + avg_j = itemAvgs[j]; + pearson_j = itemPearsons[j]; + + similarity = pearsonSimilarity(item_i, item_j, avg_i, avg_j, pearson_i, pearson_j); + if (similarity == 0.0) continue; + + /* Now we output. Like with the pre-computed model, we'll + * only worry about half the model. This allows us to fill + * in the matrix left-to-right, top-to-bottom. */ + itemmodel[i][j] = similarity; + } + + CHECK_FOR_INTERRUPTS(); + } + + // Free up the lists of sim_nodes and we're done. + for (i = 0; i < numItems; i++) { + freeSimList(itemEvents[i]); + itemEvents[i] = NULL; + } + + // Return the relevant information. + recnode->fullTotalItems = numItems; + recnode->fullItemList = itemIDs; + recnode->itemCFmodel = itemmodel; } /* ---------------------------------------------------------------- @@ -3217,133 +3217,133 @@ generateItemPearModel(RecScanState *recnode) { */ void generateUserCosModel(RecScanState *recnode) { - int i, j, priorID; - int numEvents = 0; - char *querystring; - sim_node *userEvents; - char *eventtable, *userkey, *itemkey, *eventval; - AttributeInfo *attributes; - float **usermodel; - int numUsers; - int *userIDs; - float *userLengths; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - - attributes = (AttributeInfo*) recnode->attributes; - eventtable = attributes->eventtable; - userkey = attributes->userkey; - itemkey = attributes->itemkey; - eventval = attributes->eventval; - - // First we need vector lengths. - userLengths = vector_lengths(userkey, eventtable, eventval, &numUsers, &userIDs); - - /* We have the number of users, so we can initialize our model. */ - usermodel = (float**) palloc(numUsers*sizeof(float*)); - for (i = 0; i < numUsers; i++) - usermodel[i] = (float*) palloc0(numUsers*sizeof(float)); - - // With the precomputation done, we need to derive the actual user - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); - for (i = 0; i < numUsers; i++) - userEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all user pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new user ID? If so, switch to the next slot. - if (simuser != priorID) { - priorID = simuser; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // userEvents table; we'll do calculations later. - newnode = createSimNode(simitem, simevent); - userEvents[i] = simInsert(userEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first user ALWAYS has a lower value than the second. - for (i = 0; i < numUsers; i++) { - float length_i; - sim_node user_i; - - user_i = userEvents[i]; - if (!user_i) continue; - length_i = userLengths[i]; - - for (j = i+1; j < numUsers; j++) { - float length_j; - sim_node user_j; - float similarity; - - user_j = userEvents[j]; - if (!user_j) continue; - length_j = userLengths[j]; - - similarity = cosineSimilarity(user_i, user_j, length_i, length_j); - if (similarity <= 0) continue; - - /* Now we output. Like with the pre-computed model, we'll - * only worry about half the model. This allows us to fill - * in the matrix left-to-right, top-to-bottom. */ - usermodel[i][j] = similarity; - } - - CHECK_FOR_INTERRUPTS(); - } - - // Free up the lists of sim_nodes and we're done. - for (i = 0; i < numUsers; i++) { - freeSimList(userEvents[i]); - userEvents[i] = NULL; - } - - // Return the relevant information. - recnode->totalUsers = numUsers; - recnode->userList = userIDs; - recnode->userCFmodel = usermodel; + int i, j, priorID; + int numEvents = 0; + char *querystring; + sim_node *userEvents; + char *eventtable, *userkey, *itemkey, *eventval; + AttributeInfo *attributes; + float **usermodel; + int numUsers; + int *userIDs; + float *userLengths; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + + attributes = (AttributeInfo*) recnode->attributes; + eventtable = attributes->eventtable; + userkey = attributes->userkey; + itemkey = attributes->itemkey; + eventval = attributes->eventval; + + // First we need vector lengths. + userLengths = vector_lengths(userkey, eventtable, eventval, &numUsers, &userIDs); + + /* We have the number of users, so we can initialize our model. */ + usermodel = (float**) palloc(numUsers*sizeof(float*)); + for (i = 0; i < numUsers; i++) + usermodel[i] = (float*) palloc0(numUsers*sizeof(float)); + + // With the precomputation done, we need to derive the actual user + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); + for (i = 0; i < numUsers; i++) + userEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all user pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new user ID? If so, switch to the next slot. + if (simuser != priorID) { + priorID = simuser; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // userEvents table; we'll do calculations later. + newnode = createSimNode(simitem, simevent); + userEvents[i] = simInsert(userEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first user ALWAYS has a lower value than the second. + for (i = 0; i < numUsers; i++) { + float length_i; + sim_node user_i; + + user_i = userEvents[i]; + if (!user_i) continue; + length_i = userLengths[i]; + + for (j = i+1; j < numUsers; j++) { + float length_j; + sim_node user_j; + float similarity; + + user_j = userEvents[j]; + if (!user_j) continue; + length_j = userLengths[j]; + + similarity = cosineSimilarity(user_i, user_j, length_i, length_j); + if (similarity <= 0) continue; + + /* Now we output. Like with the pre-computed model, we'll + * only worry about half the model. This allows us to fill + * in the matrix left-to-right, top-to-bottom. */ + usermodel[i][j] = similarity; + } + + CHECK_FOR_INTERRUPTS(); + } + + // Free up the lists of sim_nodes and we're done. + for (i = 0; i < numUsers; i++) { + freeSimList(userEvents[i]); + userEvents[i] = NULL; + } + + // Return the relevant information. + recnode->totalUsers = numUsers; + recnode->userList = userIDs; + recnode->userCFmodel = usermodel; } /* ---------------------------------------------------------------- @@ -3354,136 +3354,136 @@ generateUserCosModel(RecScanState *recnode) { */ void generateUserPearModel(RecScanState *recnode) { - int i, j, priorID; - int numEvents = 0; - char *querystring; - sim_node *userEvents; - char *eventtable, *userkey, *itemkey, *eventval; - AttributeInfo *attributes; - float **usermodel; - int numUsers; - int *userIDs; - float *userAvgs; - float *userPearsons; - // Information for other queries. - QueryDesc *simqueryDesc; - PlanState *simplanstate; - TupleTableSlot *simslot; - MemoryContext simcontext; - - attributes = (AttributeInfo*) recnode->attributes; - eventtable = attributes->eventtable; - userkey = attributes->userkey; - itemkey = attributes->itemkey; - eventval = attributes->eventval; - - // First, we need Pearson info. - pearson_info(userkey, eventtable, eventval, &numUsers, &userIDs, &userAvgs, &userPearsons); - - /* We have the number of users, so we can initialize our model. */ - usermodel = (float**) palloc(numUsers*sizeof(float*)); - for (i = 0; i < numUsers; i++) - usermodel[i] = (float*) palloc0(numUsers*sizeof(float)); - - // With the precomputation done, we need to derive the actual item - // similarities. We can do this in a way that's linear in the number - // of I/Os and also the amount of storage. The complexity is relegated - // to in-memory calculations, which is the most affordable. We need to - // use this data structure here. - userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); - for (i = 0; i < numUsers; i++) - userEvents[i] = NULL; - - // With the model created, we need to populate it, which means calculating - // similarity between all item pairs. We need to query the events table - // in order to get the key information. - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Begin extracting data. - priorID = -1; - simqueryDesc = recathon_queryStart(querystring, &simcontext); - simplanstate = simqueryDesc->planstate; - i = -1; - - for (;;) { - int simuser, simitem; - float simevent; - sim_node newnode; - - // Shut the compiler up. - simuser = 0; simitem = 0; simevent = 0.0; - - simslot = ExecProcNode(simplanstate); - if (TupIsNull(simslot)) break; - - simuser = getTupleInt(simslot,userkey); - simitem = getTupleInt(simslot,itemkey); - simevent = getTupleFloat(simslot,eventval); - - // Are we dealing with a new user ID? If so, switch to the next slot. - if (simuser != priorID) { - priorID = simuser; - i++; - } - - // We now have the user, item, and event for this tuple. - // We insert the results as a sim_node into the - // userEvents table; we'll do calculations later. - newnode = createSimNode(simitem, simevent); - userEvents[i] = simInsert(userEvents[i], newnode); - numEvents++; - } - - // Query cleanup. - recathon_queryEnd(simqueryDesc, simcontext); - pfree(querystring); - - // Now we do the similarity calculations. Note that we - // don't include duplicate entries, to save time and space. - // The first item ALWAYS has a lower value than the second. - for (i = 0; i < numUsers; i++) { - float avg_i, pearson_i; - sim_node user_i; - - user_i = userEvents[i]; - if (!user_i) continue; - avg_i = userAvgs[i]; - pearson_i = userPearsons[i]; - - for (j = i+1; j < numUsers; j++) { - float avg_j, pearson_j; - sim_node user_j; - float similarity; - - user_j = userEvents[j]; - if (!user_j) continue; - avg_j = userAvgs[j]; - pearson_j = userPearsons[j]; - - similarity = pearsonSimilarity(user_i, user_j, avg_i, avg_j, pearson_i, pearson_j); - if (similarity == 0.0) continue; - - /* Now we output. Like with the pre-computed model, we'll - * only worry about half the model. This allows us to fill - * in the matrix left-to-right, top-to-bottom. */ - usermodel[i][j] = similarity; - } - - CHECK_FOR_INTERRUPTS(); - } - - // Free up the lists of sim_nodes and we're done. - for (i = 0; i < numUsers; i++) { - freeSimList(userEvents[i]); - userEvents[i] = NULL; - } - - // Return the relevant information. - recnode->totalUsers = numUsers; - recnode->userList = userIDs; - recnode->userCFmodel = usermodel; + int i, j, priorID; + int numEvents = 0; + char *querystring; + sim_node *userEvents; + char *eventtable, *userkey, *itemkey, *eventval; + AttributeInfo *attributes; + float **usermodel; + int numUsers; + int *userIDs; + float *userAvgs; + float *userPearsons; + // Information for other queries. + QueryDesc *simqueryDesc; + PlanState *simplanstate; + TupleTableSlot *simslot; + MemoryContext simcontext; + + attributes = (AttributeInfo*) recnode->attributes; + eventtable = attributes->eventtable; + userkey = attributes->userkey; + itemkey = attributes->itemkey; + eventval = attributes->eventval; + + // First, we need Pearson info. + pearson_info(userkey, eventtable, eventval, &numUsers, &userIDs, &userAvgs, &userPearsons); + + /* We have the number of users, so we can initialize our model. */ + usermodel = (float**) palloc(numUsers*sizeof(float*)); + for (i = 0; i < numUsers; i++) + usermodel[i] = (float*) palloc0(numUsers*sizeof(float)); + + // With the precomputation done, we need to derive the actual item + // similarities. We can do this in a way that's linear in the number + // of I/Os and also the amount of storage. The complexity is relegated + // to in-memory calculations, which is the most affordable. We need to + // use this data structure here. + userEvents = (sim_node*) palloc(numUsers*sizeof(sim_node)); + for (i = 0; i < numUsers; i++) + userEvents[i] = NULL; + + // With the model created, we need to populate it, which means calculating + // similarity between all item pairs. We need to query the events table + // in order to get the key information. + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Begin extracting data. + priorID = -1; + simqueryDesc = recathon_queryStart(querystring, &simcontext); + simplanstate = simqueryDesc->planstate; + i = -1; + + for (;;) { + int simuser, simitem; + float simevent; + sim_node newnode; + + // Shut the compiler up. + simuser = 0; simitem = 0; simevent = 0.0; + + simslot = ExecProcNode(simplanstate); + if (TupIsNull(simslot)) break; + + simuser = getTupleInt(simslot,userkey); + simitem = getTupleInt(simslot,itemkey); + simevent = getTupleFloat(simslot,eventval); + + // Are we dealing with a new user ID? If so, switch to the next slot. + if (simuser != priorID) { + priorID = simuser; + i++; + } + + // We now have the user, item, and event for this tuple. + // We insert the results as a sim_node into the + // userEvents table; we'll do calculations later. + newnode = createSimNode(simitem, simevent); + userEvents[i] = simInsert(userEvents[i], newnode); + numEvents++; + } + + // Query cleanup. + recathon_queryEnd(simqueryDesc, simcontext); + pfree(querystring); + + // Now we do the similarity calculations. Note that we + // don't include duplicate entries, to save time and space. + // The first item ALWAYS has a lower value than the second. + for (i = 0; i < numUsers; i++) { + float avg_i, pearson_i; + sim_node user_i; + + user_i = userEvents[i]; + if (!user_i) continue; + avg_i = userAvgs[i]; + pearson_i = userPearsons[i]; + + for (j = i+1; j < numUsers; j++) { + float avg_j, pearson_j; + sim_node user_j; + float similarity; + + user_j = userEvents[j]; + if (!user_j) continue; + avg_j = userAvgs[j]; + pearson_j = userPearsons[j]; + + similarity = pearsonSimilarity(user_i, user_j, avg_i, avg_j, pearson_i, pearson_j); + if (similarity == 0.0) continue; + + /* Now we output. Like with the pre-computed model, we'll + * only worry about half the model. This allows us to fill + * in the matrix left-to-right, top-to-bottom. */ + usermodel[i][j] = similarity; + } + + CHECK_FOR_INTERRUPTS(); + } + + // Free up the lists of sim_nodes and we're done. + for (i = 0; i < numUsers; i++) { + freeSimList(userEvents[i]); + userEvents[i] = NULL; + } + + // Return the relevant information. + recnode->totalUsers = numUsers; + recnode->userList = userIDs; + recnode->userCFmodel = usermodel; } /* ---------------------------------------------------------------- @@ -3494,143 +3494,143 @@ generateUserPearModel(RecScanState *recnode) { */ void generateSVDmodel(RecScanState *recnode) { - float **userFeatures, **itemFeatures; - int *userIDs, *itemIDs; - float *itemAvgs, *userOffsets; - int numUsers, numItems; - int i, j, k, numEvents; - int numFeatures = 50; - svd_node *allEvents; - AttributeInfo *attributes; - char *eventtable, *userkey, *itemkey, *eventval; - // Information for other queries. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - attributes = (AttributeInfo*) recnode->attributes; - eventtable = attributes->eventtable; - userkey = attributes->userkey; - itemkey = attributes->itemkey; - eventval = attributes->eventval; - - // First, we get our lists of users and items. - SVDlists(userkey,itemkey,eventtable, - &userIDs, &itemIDs, &numUsers, &numItems); - - // Then we get information for baseline averages. - SVDaverages(userkey,itemkey,eventtable,eventval, - userIDs,itemIDs,numUsers,numItems, - &itemAvgs,&userOffsets); - - // Initialize our feature arrays. - userFeatures = (float**) palloc(numFeatures*sizeof(float*)); - for (i = 0; i < numFeatures; i++) { - userFeatures[i] = (float*) palloc(numUsers*sizeof(float)); - for (j = 0; j < numUsers; j++) - userFeatures[i][j] = 0.1; - } - itemFeatures = (float**) palloc(numFeatures*sizeof(float*)); - for (i = 0; i < numFeatures; i++) { - itemFeatures[i] = (float*) palloc(numItems*sizeof(float)); - for (j = 0; j < numItems; j++) - itemFeatures[i][j] = 0.1; - } - - // First we need to count the number of events we'll be - // considering. - querystring = (char*) palloc(1024*sizeof(char)); - numEvents = count_rows(eventtable); - - // Initialize the events array. - allEvents = (svd_node*) palloc(numEvents*sizeof(svd_node)); - - sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", - userkey,itemkey,eventval,eventtable,userkey); - - // Let's acquire all of our events and store them. Sorting initially by - // user ID avoids unnecessary binary searches. - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - i = 0; - for (;;) { - svd_node new_svd; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - new_svd = createSVDnode(slot, userkey, itemkey, eventval, userIDs, itemIDs, numUsers, numItems); - - allEvents[i] = new_svd; - - i++; - if (i >= numEvents) break; - } - - recathon_queryEnd(queryDesc,recathoncontext); - - // We now have all of the events, so we can start training our features. - for (j = 0; j < 100; j++) { - for (i = 0; i < numFeatures; i++) { - float learn = 0.001; - float penalty = 0.002; - float *userVal = userFeatures[i]; - float *itemVal = itemFeatures[i]; - - for (k = 0; k < numEvents; k++) { - int userid; - int itemid; - float event, err, residual, temp; - svd_node current_svd; - - current_svd = allEvents[k]; - userid = current_svd->userid; - itemid = current_svd->itemid; - event = current_svd->event; - // Need to reset residuals for each new - // iteration of the trainer. - if (i == 0) - current_svd->residual = 0; - residual = current_svd->residual; - - if (i == 0 && j == 0) { - err = event - (itemAvgs[itemid] + userOffsets[userid]); - } else { - err = event - predictRating(i, numFeatures, userid, itemid, - userFeatures, itemFeatures, residual); - } - temp = userVal[userid]; - userVal[userid] += learn * ((err * itemVal[itemid]) - (penalty * userVal[userid])); - itemVal[itemid] += learn * ((err * temp) - (penalty * itemVal[itemid])); - - // Store residuals. - if (i == 0) - current_svd->residual = userVal[userid] * itemVal[itemid]; - else - current_svd->residual += userVal[userid] * itemVal[itemid]; - } - - CHECK_FOR_INTERRUPTS(); - } - } - - // Free up memory. - pfree(querystring); - pfree(itemAvgs); - pfree(userOffsets); - pfree(allEvents); - - // Return the relevant information. - recnode->numFeatures = numFeatures; - recnode->totalUsers = numUsers; - recnode->fullTotalItems = numItems; - recnode->userList = userIDs; - recnode->fullItemList = itemIDs; - recnode->SVDusermodel = userFeatures; - recnode->SVDitemmodel = itemFeatures; + float **userFeatures, **itemFeatures; + int *userIDs, *itemIDs; + float *itemAvgs, *userOffsets; + int numUsers, numItems; + int i, j, k, numEvents; + int numFeatures = 50; + svd_node *allEvents; + AttributeInfo *attributes; + char *eventtable, *userkey, *itemkey, *eventval; + // Information for other queries. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + attributes = (AttributeInfo*) recnode->attributes; + eventtable = attributes->eventtable; + userkey = attributes->userkey; + itemkey = attributes->itemkey; + eventval = attributes->eventval; + + // First, we get our lists of users and items. + SVDlists(userkey,itemkey,eventtable, + &userIDs, &itemIDs, &numUsers, &numItems); + + // Then we get information for baseline averages. + SVDaverages(userkey,itemkey,eventtable,eventval, + userIDs,itemIDs,numUsers,numItems, + &itemAvgs,&userOffsets); + + // Initialize our feature arrays. + userFeatures = (float**) palloc(numFeatures*sizeof(float*)); + for (i = 0; i < numFeatures; i++) { + userFeatures[i] = (float*) palloc(numUsers*sizeof(float)); + for (j = 0; j < numUsers; j++) + userFeatures[i][j] = 0.1; + } + itemFeatures = (float**) palloc(numFeatures*sizeof(float*)); + for (i = 0; i < numFeatures; i++) { + itemFeatures[i] = (float*) palloc(numItems*sizeof(float)); + for (j = 0; j < numItems; j++) + itemFeatures[i][j] = 0.1; + } + + // First we need to count the number of events we'll be + // considering. + querystring = (char*) palloc(1024*sizeof(char)); + numEvents = count_rows(eventtable); + + // Initialize the events array. + allEvents = (svd_node*) palloc(numEvents*sizeof(svd_node)); + + sprintf(querystring,"SELECT r.%s,r.%s,r.%s FROM %s r ORDER BY r.%s;", + userkey,itemkey,eventval,eventtable,userkey); + + // Let's acquire all of our events and store them. Sorting initially by + // user ID avoids unnecessary binary searches. + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + i = 0; + for (;;) { + svd_node new_svd; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + new_svd = createSVDnode(slot, userkey, itemkey, eventval, userIDs, itemIDs, numUsers, numItems); + + allEvents[i] = new_svd; + + i++; + if (i >= numEvents) break; + } + + recathon_queryEnd(queryDesc,recathoncontext); + + // We now have all of the events, so we can start training our features. + for (j = 0; j < 100; j++) { + for (i = 0; i < numFeatures; i++) { + float learn = 0.001; + float penalty = 0.002; + float *userVal = userFeatures[i]; + float *itemVal = itemFeatures[i]; + + for (k = 0; k < numEvents; k++) { + int userid; + int itemid; + float event, err, residual, temp; + svd_node current_svd; + + current_svd = allEvents[k]; + userid = current_svd->userid; + itemid = current_svd->itemid; + event = current_svd->event; + // Need to reset residuals for each new + // iteration of the trainer. + if (i == 0) + current_svd->residual = 0; + residual = current_svd->residual; + + if (i == 0 && j == 0) { + err = event - (itemAvgs[itemid] + userOffsets[userid]); + } else { + err = event - predictRating(i, numFeatures, userid, itemid, + userFeatures, itemFeatures, residual); + } + temp = userVal[userid]; + userVal[userid] += learn * ((err * itemVal[itemid]) - (penalty * userVal[userid])); + itemVal[itemid] += learn * ((err * temp) - (penalty * itemVal[itemid])); + + // Store residuals. + if (i == 0) + current_svd->residual = userVal[userid] * itemVal[itemid]; + else + current_svd->residual += userVal[userid] * itemVal[itemid]; + } + + CHECK_FOR_INTERRUPTS(); + } + } + + // Free up memory. + pfree(querystring); + pfree(itemAvgs); + pfree(userOffsets); + pfree(allEvents); + + // Return the relevant information. + recnode->numFeatures = numFeatures; + recnode->totalUsers = numUsers; + recnode->fullTotalItems = numItems; + recnode->userList = userIDs; + recnode->fullItemList = itemIDs; + recnode->SVDusermodel = userFeatures; + recnode->SVDitemmodel = itemFeatures; } /* ---------------------------------------------------------------- @@ -3644,45 +3644,45 @@ generateSVDmodel(RecScanState *recnode) { float itemCFgenerate(RecScanState *recnode, int itemid, int itemindex) { - int i; - float recScore; - GenRating *currentItem; - - // First, we grab the GenRating for this item ID. - currentItem = hashFind(recnode->pendingTable, itemid); - // In case there's some error. - if (!currentItem) - return -1; - - // We're going to look through the similarity matrix for the - // numbers that correspond to this item, and find which of those - // also correspond to items this user rated. We will use that - // information to obtain the estimated rating. - - for (i = itemindex+1; i < recnode->fullTotalItems; i++) { - int itemID; - float similarity; - GenRating *ratedItem; - - itemID = recnode->fullItemList[i]; - similarity = recnode->itemCFmodel[itemindex][i]; - - // Find the array slot this item ID corresponds to. - // If -1 is returned, then the item ID corresponds to - // another item we haven't rated, so we don't care. - ratedItem = hashFind(recnode->ratedTable,itemID); - if (ratedItem) { - currentItem->score += similarity*ratedItem->score; - if (similarity < 0) - similarity *= -1; - currentItem->totalSim += similarity; - } - } - - if (currentItem->totalSim == 0) return 0; - - recScore = currentItem->score / currentItem->totalSim; - return recScore; + int i; + float recScore; + GenRating *currentItem; + + // First, we grab the GenRating for this item ID. + currentItem = hashFind(recnode->pendingTable, itemid); + // In case there's some error. + if (!currentItem) + return -1; + + // We're going to look through the similarity matrix for the + // numbers that correspond to this item, and find which of those + // also correspond to items this user rated. We will use that + // information to obtain the estimated rating. + + for (i = itemindex+1; i < recnode->fullTotalItems; i++) { + int itemID; + float similarity; + GenRating *ratedItem; + + itemID = recnode->fullItemList[i]; + similarity = recnode->itemCFmodel[itemindex][i]; + + // Find the array slot this item ID corresponds to. + // If -1 is returned, then the item ID corresponds to + // another item we haven't rated, so we don't care. + ratedItem = hashFind(recnode->ratedTable,itemID); + if (ratedItem) { + currentItem->score += similarity*ratedItem->score; + if (similarity < 0) + similarity *= -1; + currentItem->totalSim += similarity; + } + } + + if (currentItem->totalSim == 0) return 0; + + recScore = currentItem->score / currentItem->totalSim; + return recScore; } /* ---------------------------------------------------------------- @@ -3696,61 +3696,61 @@ itemCFgenerate(RecScanState *recnode, int itemid, int itemindex) float userCFgenerate(RecScanState *recnode, int itemid, int itemindex) { - float event, totalSim, average; - AttributeInfo *attributes; - // Query objects; - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *qslot; - MemoryContext recathoncontext; - - attributes = (AttributeInfo*) recnode->attributes; - - event = 0.0; - totalSim = 0.0; - average = recnode->average; - - /* We need to query the events table, so that we can - * find all events for this item and match them up - * with what we have in the similarity matrix. We note - * that it's necessarily true that the user has not - * rated these items. */ - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"select * from %s where %s = %d;", - attributes->eventtable,attributes->itemkey,itemid); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int currentUserID; - float currentRating, similarity; - GenRating *currentUser; - - qslot = ExecProcNode(planstate); - if (TupIsNull(qslot)) break; - - currentUserID = getTupleInt(qslot,attributes->userkey); - currentRating = getTupleFloat(qslot,attributes->eventval); - - currentUser = hashFind(recnode->simTable,currentUserID); - if (!currentUser) continue; - similarity = currentUser->totalSim; - - event += (currentRating - average) * similarity; - // Poor man's absolute value of the similarity. - if (similarity < 0) - similarity *= -1; - totalSim += similarity; - } - recathon_queryEnd(queryDesc,recathoncontext); - - if (totalSim == 0.0) return 0.0; - - event /= totalSim; - event += average; - - return event; + float event, totalSim, average; + AttributeInfo *attributes; + // Query objects; + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *qslot; + MemoryContext recathoncontext; + + attributes = (AttributeInfo*) recnode->attributes; + + event = 0.0; + totalSim = 0.0; + average = recnode->average; + + /* We need to query the events table, so that we can + * find all events for this item and match them up + * with what we have in the similarity matrix. We note + * that it's necessarily true that the user has not + * rated these items. */ + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"select * from %s where %s = %d;", + attributes->eventtable,attributes->itemkey,itemid); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int currentUserID; + float currentRating, similarity; + GenRating *currentUser; + + qslot = ExecProcNode(planstate); + if (TupIsNull(qslot)) break; + + currentUserID = getTupleInt(qslot,attributes->userkey); + currentRating = getTupleFloat(qslot,attributes->eventval); + + currentUser = hashFind(recnode->simTable,currentUserID); + if (!currentUser) continue; + similarity = currentUser->totalSim; + + event += (currentRating - average) * similarity; + // Poor man's absolute value of the similarity. + if (similarity < 0) + similarity *= -1; + totalSim += similarity; + } + recathon_queryEnd(queryDesc,recathoncontext); + + if (totalSim == 0.0) return 0.0; + + event /= totalSim; + event += average; + + return event; } /* ---------------------------------------------------------------- @@ -3764,18 +3764,18 @@ userCFgenerate(RecScanState *recnode, int itemid, int itemindex) float SVDgenerate(RecScanState *recnode, int itemid, int itemindex) { - int i; - float **userFeatures, **itemFeatures; - float recscore = 0.0; - - userFeatures = recnode->SVDusermodel; - itemFeatures = recnode->SVDitemmodel; - - // At this point, our work is easy. - for (i = 0; i < recnode->numFeatures; i++) - recscore += userFeatures[i][recnode->userindex] * itemFeatures[i][itemindex]; - - return recscore; + int i; + float **userFeatures, **itemFeatures; + float recscore = 0.0; + + userFeatures = recnode->SVDusermodel; + itemFeatures = recnode->SVDitemmodel; + + // At this point, our work is easy. + for (i = 0; i < recnode->numFeatures; i++) + recscore += userFeatures[i][recnode->userindex] * itemFeatures[i][itemindex]; + + return recscore; } /* ---------------------------------------------------------------- @@ -3788,47 +3788,47 @@ SVDgenerate(RecScanState *recnode, int itemid, int itemindex) void applyItemSimGenerate(RecScanState *recnode) { - int i, j; - GenHash *ratedTable; - - ratedTable = recnode->ratedTable; - - // For every item we've rated, we need to obtain its similarity - // scores and apply them to the appropriate items. This is - // necessary because we're only storing half of the similarity - // matrix. - for (i = 0; i < ratedTable->hash; i++) { - GenRating *currentItem; - - for (currentItem = ratedTable->table[i]; currentItem; - currentItem = currentItem->next) { - int itemindex = currentItem->index; - - for (j = itemindex+1; j < recnode->fullTotalItems; j++) { - int itemID; - float similarity; - GenRating *pendingItem; - - itemID = recnode->fullItemList[j]; - similarity = recnode->itemCFmodel[itemindex][j]; - - // If the similarity is 0, there's no point. - if (similarity == 0.0) - continue; - - // Find the array slot this item ID corresponds to. - // If -1 is returned, then the item ID corresponds to - // another item we've rated, so we don't care. - pendingItem = hashFind(recnode->pendingTable,itemID); - if (pendingItem) { - pendingItem->score += similarity*currentItem->score; - if (similarity < 0) - similarity *= -1; - pendingItem->totalSim += similarity; - } - } - } - } + int i, j; + GenHash *ratedTable; + + ratedTable = recnode->ratedTable; + + // For every item we've rated, we need to obtain its similarity + // scores and apply them to the appropriate items. This is + // necessary because we're only storing half of the similarity + // matrix. + for (i = 0; i < ratedTable->hash; i++) { + GenRating *currentItem; + + for (currentItem = ratedTable->table[i]; currentItem; + currentItem = currentItem->next) { + int itemindex = currentItem->index; + + for (j = itemindex+1; j < recnode->fullTotalItems; j++) { + int itemID; + float similarity; + GenRating *pendingItem; + + itemID = recnode->fullItemList[j]; + similarity = recnode->itemCFmodel[itemindex][j]; + + // If the similarity is 0, there's no point. + if (similarity == 0.0) + continue; + + // Find the array slot this item ID corresponds to. + // If -1 is returned, then the item ID corresponds to + // another item we've rated, so we don't care. + pendingItem = hashFind(recnode->pendingTable,itemID); + if (pendingItem) { + pendingItem->score += similarity*currentItem->score; + if (similarity < 0) + similarity *= -1; + pendingItem->totalSim += similarity; + } + } + } + } } /* ---------------------------------------------------------------- @@ -3841,281 +3841,281 @@ applyItemSimGenerate(RecScanState *recnode) */ bool prepUserForRating(RecScanState *recstate, int userID) { - int i, userindex; - // Query objects. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *hslot; - MemoryContext recathoncontext; - - AttributeInfo *attributes = (AttributeInfo*) recstate->attributes; - attributes->userID = userID; - - /* First off, we need to delete any existing structures. */ - if (recstate->ratedTable) { - freeHash(recstate->ratedTable); - recstate->ratedTable = NULL; - } - if (recstate->pendingTable) { - freeHash(recstate->pendingTable); - recstate->pendingTable = NULL; - } - if (recstate->simTable) { - freeHash(recstate->simTable); - recstate->simTable = NULL; - } - if (recstate->userFeatures) { - pfree(recstate->userFeatures); - recstate->userFeatures = NULL; - } - - /* INSERT FORMER LIST CODE HERE */ - querystring = (char*) palloc(1024*sizeof(char)); - - switch ((recMethod) attributes->method) { - /* If this is an item-based CF recommender, we can pre-obtain - * the ratings of this user, and add in their contributions to - * the scores of all the other items. */ - case itemCosCF: - case itemPearCF: - /* The rated list is all of the items this user has - * rated already. We store the ratings now and we'll - * use them during calculation. */ - sprintf(querystring,"select count(*) from %s where %s = %d;", - attributes->eventtable,attributes->userkey,userID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - hslot = ExecProcNode(planstate); - recstate->totalRatings = getTupleInt(hslot,"count"); - recathon_queryEnd(queryDesc,recathoncontext); - - /* It's possible that someone has rated no items. */ - if (recstate->totalRatings <= 0) { - elog(WARNING, "user %d has rated no items, no predictions can be made", - userID); - return false; - } - - recstate->ratedTable = hashCreate(recstate->totalRatings); - - /* Now to acquire the actual ratings. */ - sprintf(querystring,"select * from %s where %s = %d order by %s;", - attributes->eventtable,attributes->userkey, - userID,attributes->itemkey); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - i = 0; - for (;;) { - int currentItem; - float currentRating; - GenRating *newItem; - - hslot = ExecProcNode(planstate); - if (TupIsNull(hslot)) break; - - currentItem = getTupleInt(hslot,attributes->itemkey); - currentRating = getTupleFloat(hslot,attributes->eventval); - - newItem = (GenRating*) palloc(sizeof(GenRating)); - newItem->ID = currentItem; - newItem->index = binarySearch(recstate->fullItemList,currentItem,0,recstate->fullTotalItems); - newItem->score = currentRating; - newItem->next = NULL; - hashAdd(recstate->ratedTable, newItem); - - i++; - if (i >= recstate->totalRatings) break; - } - recathon_queryEnd(queryDesc,recathoncontext); - - /* Quick error protection. Again, I don't know how this could - * possibly happen, but better safe than sorry. */ - recstate->totalRatings = i; - if (recstate->totalRatings <= 0) { - elog(WARNING, "user %d has rated no items, no predictions can be made", - userID); - return false; - } - - /* The pending list is all of the items we have yet to - * calculate ratings for. We need to maintain partial - * scores and similarity sums for each one. In this version - * of the code, note that we rate all items. */ - recstate->pendingTable = hashCreate(recstate->fullTotalItems); - for (i = 0; i < recstate->fullTotalItems; i++) { - GenRating *newItem; - - newItem = (GenRating*) palloc(sizeof(GenRating)); - newItem->ID = recstate->fullItemList[i]; - /* The pending list doesn't need indexes. */ - newItem->index = -1; - newItem->score = 0.0; - newItem->totalSim = 0.0; - newItem->next = NULL; - hashAdd(recstate->pendingTable, newItem); - } - - /* With another function, we apply the ratings and similarities - * from the rated items to the unrated ones. It's good to get - * this done early, as this will allow the operator to be - * non-blocking, which is important. */ - if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) - applyItemSimGenerate(recstate); - else - applyItemSim(recstate, attributes->recModelName); - break; - case userCosCF: - case userPearCF: - userindex = binarySearch(recstate->userList, userID, 0, recstate->totalUsers); - - /* The first thing we'll do is obtain the average rating. */ - sprintf(querystring,"select avg(%s) as average from %s where %s = %d;", - attributes->eventval,attributes->eventtable, - attributes->userkey,userID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - hslot = ExecProcNode(planstate); - recstate->average = getTupleFloat(hslot,"average"); - recathon_queryEnd(queryDesc,recathoncontext); - - /* Next, we need to store this user's similarity model - * in a hash table for easier access. We base the table on - * the number of items we have to rate - a close enough - * approximation that we won't have much trouble. */ - recstate->simTable = hashCreate(recstate->fullTotalItems); - - /* We need to find the entire similarity table for this - * user, which will be in two parts. */ - if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) { - for (i = 0; i < userindex; i++) { - int currentUser; - float currentSim; - GenRating *newUser; - - currentUser = recstate->userList[i]; - currentSim = recstate->userCFmodel[i][userindex]; - - newUser = (GenRating*) palloc(sizeof(GenRating)); - newUser->ID = currentUser; - newUser->index = i; - newUser->totalSim = currentSim; - newUser->next = NULL; - hashAdd(recstate->simTable, newUser); - } - - for (i = userindex+1; i < recstate->totalUsers; i++) { - int currentUser; - float currentSim; - GenRating *newUser; - - currentUser = recstate->userList[i]; - currentSim = recstate->userCFmodel[userindex][i]; - - newUser = (GenRating*) palloc(sizeof(GenRating)); - newUser->ID = currentUser; - newUser->index = i; - newUser->totalSim = currentSim; - newUser->next = NULL; - hashAdd(recstate->simTable, newUser); - } - } else { - sprintf(querystring,"select * from %s where user1 < %d and user2 = %d;", - attributes->recModelName,attributes->userID, - attributes->userID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int currentUser; - float currentSim; - GenRating *newUser; - - hslot = ExecProcNode(planstate); - if (TupIsNull(hslot)) break; - - currentUser = getTupleInt(hslot,"user1"); - currentSim = getTupleFloat(hslot,"similarity"); - - newUser = (GenRating*) palloc(sizeof(GenRating)); - newUser->ID = currentUser; - /* Pre-generated recommendation doesn't need - * indexes. */ - newUser->index = -1; - newUser->totalSim = currentSim; - newUser->next = NULL; - hashAdd(recstate->simTable, newUser); - } - recathon_queryEnd(queryDesc,recathoncontext); - - /* Here's the second. */ - sprintf(querystring,"select * from %s where user1 = %d;", - attributes->recModelName,attributes->userID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int currentUser; - float currentSim; - GenRating *newUser; - - hslot = ExecProcNode(planstate); - if (TupIsNull(hslot)) break; - - currentUser = getTupleInt(hslot,"user2"); - currentSim = getTupleFloat(hslot,"similarity"); - - newUser = (GenRating*) palloc(sizeof(GenRating)); - newUser->ID = currentUser; - /* Pre-generated recommendation doesn't need - * indexes. */ - newUser->index = -1; - newUser->totalSim = currentSim; - newUser->next = NULL; - hashAdd(recstate->simTable, newUser); - } - recathon_queryEnd(queryDesc,recathoncontext); - } - - break; - /* If this is a SVD recommender, we can pre-obtain the user features, - * which stay fixed, and cut the I/O time in half. Of course, if this - * is generated on-the-fly, this is done already. */ - case SVD: - if (attributes->opType != OP_GENERATE && attributes->opType != OP_GENERATEJOIN) { - recstate->userFeatures = (float*) palloc(50*sizeof(float)); - for (i = 0; i < 50; i++) - recstate->userFeatures[i] = 0; - sprintf(querystring,"select * from %s where users = %d;", - attributes->recModelName,userID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int feature; - float featValue; - - hslot = ExecProcNode(planstate); - if (TupIsNull(hslot)) break; - - feature = getTupleInt(hslot,"feature"); - featValue = getTupleFloat(hslot,"value"); - - recstate->userFeatures[feature] = featValue; - } - - recathon_queryEnd(queryDesc,recathoncontext); - } - break; - default: - elog(ERROR, "invalid recommendation method in prepUserForRating()"); - } - - /* If we've gotten to this point, this is a valid user, so return true. */ - pfree(querystring); - return true; + int i, userindex; + // Query objects. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *hslot; + MemoryContext recathoncontext; + + AttributeInfo *attributes = (AttributeInfo*) recstate->attributes; + attributes->userID = userID; + + /* First off, we need to delete any existing structures. */ + if (recstate->ratedTable) { + freeHash(recstate->ratedTable); + recstate->ratedTable = NULL; + } + if (recstate->pendingTable) { + freeHash(recstate->pendingTable); + recstate->pendingTable = NULL; + } + if (recstate->simTable) { + freeHash(recstate->simTable); + recstate->simTable = NULL; + } + if (recstate->userFeatures) { + pfree(recstate->userFeatures); + recstate->userFeatures = NULL; + } + + /* INSERT FORMER LIST CODE HERE */ + querystring = (char*) palloc(1024*sizeof(char)); + + switch ((recMethod) attributes->method) { + /* If this is an item-based CF recommender, we can pre-obtain + * the ratings of this user, and add in their contributions to + * the scores of all the other items. */ + case itemCosCF: + case itemPearCF: + /* The rated list is all of the items this user has + * rated already. We store the ratings now and we'll + * use them during calculation. */ + sprintf(querystring,"select count(*) from %s where %s = %d;", + attributes->eventtable,attributes->userkey,userID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + hslot = ExecProcNode(planstate); + recstate->totalRatings = getTupleInt(hslot,"count"); + recathon_queryEnd(queryDesc,recathoncontext); + + /* It's possible that someone has rated no items. */ + if (recstate->totalRatings <= 0) { + elog(WARNING, "user %d has rated no items, no predictions can be made", + userID); + return false; + } + + recstate->ratedTable = hashCreate(recstate->totalRatings); + + /* Now to acquire the actual ratings. */ + sprintf(querystring,"select * from %s where %s = %d order by %s;", + attributes->eventtable,attributes->userkey, + userID,attributes->itemkey); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + i = 0; + for (;;) { + int currentItem; + float currentRating; + GenRating *newItem; + + hslot = ExecProcNode(planstate); + if (TupIsNull(hslot)) break; + + currentItem = getTupleInt(hslot,attributes->itemkey); + currentRating = getTupleFloat(hslot,attributes->eventval); + + newItem = (GenRating*) palloc(sizeof(GenRating)); + newItem->ID = currentItem; + newItem->index = binarySearch(recstate->fullItemList,currentItem,0,recstate->fullTotalItems); + newItem->score = currentRating; + newItem->next = NULL; + hashAdd(recstate->ratedTable, newItem); + + i++; + if (i >= recstate->totalRatings) break; + } + recathon_queryEnd(queryDesc,recathoncontext); + + /* Quick error protection. Again, I don't know how this could + * possibly happen, but better safe than sorry. */ + recstate->totalRatings = i; + if (recstate->totalRatings <= 0) { + elog(WARNING, "user %d has rated no items, no predictions can be made", + userID); + return false; + } + + /* The pending list is all of the items we have yet to + * calculate ratings for. We need to maintain partial + * scores and similarity sums for each one. In this version + * of the code, note that we rate all items. */ + recstate->pendingTable = hashCreate(recstate->fullTotalItems); + for (i = 0; i < recstate->fullTotalItems; i++) { + GenRating *newItem; + + newItem = (GenRating*) palloc(sizeof(GenRating)); + newItem->ID = recstate->fullItemList[i]; + /* The pending list doesn't need indexes. */ + newItem->index = -1; + newItem->score = 0.0; + newItem->totalSim = 0.0; + newItem->next = NULL; + hashAdd(recstate->pendingTable, newItem); + } + + /* With another function, we apply the ratings and similarities + * from the rated items to the unrated ones. It's good to get + * this done early, as this will allow the operator to be + * non-blocking, which is important. */ + if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) + applyItemSimGenerate(recstate); + else + applyItemSim(recstate, attributes->recModelName); + break; + case userCosCF: + case userPearCF: + userindex = binarySearch(recstate->userList, userID, 0, recstate->totalUsers); + + /* The first thing we'll do is obtain the average rating. */ + sprintf(querystring,"select avg(%s) as average from %s where %s = %d;", + attributes->eventval,attributes->eventtable, + attributes->userkey,userID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + hslot = ExecProcNode(planstate); + recstate->average = getTupleFloat(hslot,"average"); + recathon_queryEnd(queryDesc,recathoncontext); + + /* Next, we need to store this user's similarity model + * in a hash table for easier access. We base the table on + * the number of items we have to rate - a close enough + * approximation that we won't have much trouble. */ + recstate->simTable = hashCreate(recstate->fullTotalItems); + + /* We need to find the entire similarity table for this + * user, which will be in two parts. */ + if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) { + for (i = 0; i < userindex; i++) { + int currentUser; + float currentSim; + GenRating *newUser; + + currentUser = recstate->userList[i]; + currentSim = recstate->userCFmodel[i][userindex]; + + newUser = (GenRating*) palloc(sizeof(GenRating)); + newUser->ID = currentUser; + newUser->index = i; + newUser->totalSim = currentSim; + newUser->next = NULL; + hashAdd(recstate->simTable, newUser); + } + + for (i = userindex+1; i < recstate->totalUsers; i++) { + int currentUser; + float currentSim; + GenRating *newUser; + + currentUser = recstate->userList[i]; + currentSim = recstate->userCFmodel[userindex][i]; + + newUser = (GenRating*) palloc(sizeof(GenRating)); + newUser->ID = currentUser; + newUser->index = i; + newUser->totalSim = currentSim; + newUser->next = NULL; + hashAdd(recstate->simTable, newUser); + } + } else { + sprintf(querystring,"select * from %s where user1 < %d and user2 = %d;", + attributes->recModelName,attributes->userID, + attributes->userID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int currentUser; + float currentSim; + GenRating *newUser; + + hslot = ExecProcNode(planstate); + if (TupIsNull(hslot)) break; + + currentUser = getTupleInt(hslot,"user1"); + currentSim = getTupleFloat(hslot,"similarity"); + + newUser = (GenRating*) palloc(sizeof(GenRating)); + newUser->ID = currentUser; + /* Pre-generated recommendation doesn't need + * indexes. */ + newUser->index = -1; + newUser->totalSim = currentSim; + newUser->next = NULL; + hashAdd(recstate->simTable, newUser); + } + recathon_queryEnd(queryDesc,recathoncontext); + + /* Here's the second. */ + sprintf(querystring,"select * from %s where user1 = %d;", + attributes->recModelName,attributes->userID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int currentUser; + float currentSim; + GenRating *newUser; + + hslot = ExecProcNode(planstate); + if (TupIsNull(hslot)) break; + + currentUser = getTupleInt(hslot,"user2"); + currentSim = getTupleFloat(hslot,"similarity"); + + newUser = (GenRating*) palloc(sizeof(GenRating)); + newUser->ID = currentUser; + /* Pre-generated recommendation doesn't need + * indexes. */ + newUser->index = -1; + newUser->totalSim = currentSim; + newUser->next = NULL; + hashAdd(recstate->simTable, newUser); + } + recathon_queryEnd(queryDesc,recathoncontext); + } + + break; + /* If this is a SVD recommender, we can pre-obtain the user features, + * which stay fixed, and cut the I/O time in half. Of course, if this + * is generated on-the-fly, this is done already. */ + case SVD: + if (attributes->opType != OP_GENERATE && attributes->opType != OP_GENERATEJOIN) { + recstate->userFeatures = (float*) palloc(50*sizeof(float)); + for (i = 0; i < 50; i++) + recstate->userFeatures[i] = 0; + sprintf(querystring,"select * from %s where users = %d;", + attributes->recModelName,userID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int feature; + float featValue; + + hslot = ExecProcNode(planstate); + if (TupIsNull(hslot)) break; + + feature = getTupleInt(hslot,"feature"); + featValue = getTupleFloat(hslot,"value"); + + recstate->userFeatures[feature] = featValue; + } + + recathon_queryEnd(queryDesc,recathoncontext); + } + break; + default: + elog(ERROR, "invalid recommendation method in prepUserForRating()"); + } + + /* If we've gotten to this point, this is a valid user, so return true. */ + pfree(querystring); + return true; } /* ---------------------------------------------------------------- @@ -4127,17 +4127,17 @@ prepUserForRating(RecScanState *recstate, int userID) { GenHash* hashCreate(int totalItems) { - int hash; - GenHash *newHashTable; - - if (totalItems <= 10) hash = totalItems; - else hash = totalItems / 3; - - newHashTable = (GenHash*) palloc(sizeof(GenHash)); - newHashTable->hash = hash; - newHashTable->table = (GenRating**) palloc0(hash*sizeof(GenRating)); - - return newHashTable; + int hash; + GenHash *newHashTable; + + if (totalItems <= 10) hash = totalItems; + else hash = totalItems / 3; + + newHashTable = (GenHash*) palloc(sizeof(GenHash)); + newHashTable->hash = hash; + newHashTable->table = (GenRating**) palloc0(hash*sizeof(GenRating)); + + return newHashTable; } /* ---------------------------------------------------------------- @@ -4152,21 +4152,21 @@ hashCreate(int totalItems) void hashAdd(GenHash *table, GenRating *item) { - int hashval; - GenRating *tempRating; - - hashval = item->ID % table->hash; - tempRating = table->table[hashval]; - - if (!tempRating) { - table->table[hashval] = item; - return; - } - - while (tempRating->next) - tempRating = tempRating->next; - - tempRating->next = item; + int hashval; + GenRating *tempRating; + + hashval = item->ID % table->hash; + tempRating = table->table[hashval]; + + if (!tempRating) { + table->table[hashval] = item; + return; + } + + while (tempRating->next) + tempRating = tempRating->next; + + tempRating->next = item; } /* ---------------------------------------------------------------- @@ -4179,21 +4179,21 @@ hashAdd(GenHash *table, GenRating *item) GenRating* hashFind(GenHash *table, int itemID) { - int hashval; - GenRating *tempRating; - - hashval = itemID % table->hash; - tempRating = table->table[hashval]; - - while (tempRating) { - if (tempRating->ID == itemID) - return tempRating; - if (tempRating->ID > itemID) - return NULL; - tempRating = tempRating->next; - } - - return NULL; + int hashval; + GenRating *tempRating; + + hashval = itemID % table->hash; + tempRating = table->table[hashval]; + + while (tempRating) { + if (tempRating->ID == itemID) + return tempRating; + if (tempRating->ID > itemID) + return NULL; + tempRating = tempRating->next; + } + + return NULL; } /* ---------------------------------------------------------------- @@ -4204,24 +4204,24 @@ hashFind(GenHash *table, int itemID) */ void freeHash(GenHash *table) { - int i; - - if (!table) - return; - - for (i = 0; i < table->hash; i++) { - GenRating *tempRating; - - tempRating = table->table[i]; - while (tempRating) { - GenRating *tempRating2 = tempRating->next; - pfree(tempRating); - tempRating = tempRating2; - } - } - - pfree(table->table); - pfree(table); + int i; + + if (!table) + return; + + for (i = 0; i < table->hash; i++) { + GenRating *tempRating; + + tempRating = table->table[i]; + while (tempRating) { + GenRating *tempRating2 = tempRating->next; + pfree(tempRating); + tempRating = tempRating2; + } + } + + pfree(table->table); + pfree(table); } /* ---------------------------------------------------------------- @@ -4235,63 +4235,63 @@ freeHash(GenHash *table) { float itemCFpredict(RecScanState *recnode, char *itemmodel, int itemid) { - float recScore; - GenRating *currentItem; - // Query objects. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - // First, we grab the GenRating for this item ID. - currentItem = hashFind(recnode->pendingTable, itemid); - // In case there's some error. - if (!currentItem) - return -1; - - querystring = (char*) palloc(1024*sizeof(char)); - - // We're going to look through the similarity matrix for the - // numbers that correspond to this item, and find which of those - // also correspond to items this user rated. We will use that - // information to obtain the estimated rating. - sprintf(querystring,"select * from %s where item1 = %d;", - itemmodel,currentItem->ID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int itemID; - float similarity; - GenRating *ratedItem; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - itemID = getTupleInt(slot,"item2"); - similarity = getTupleFloat(slot,"similarity"); - - // Find the array slot this item ID corresponds to. - // If -1 is returned, then the item ID corresponds to - // another item we haven't rated, so we don't care. - ratedItem = hashFind(recnode->ratedTable,itemID); - if (ratedItem) { - currentItem->score += similarity*ratedItem->score; - if (similarity < 0) - similarity *= -1; - currentItem->totalSim += similarity; - } - } - - // Cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - if (currentItem->totalSim == 0) return 0; - - recScore = currentItem->score / currentItem->totalSim; - return recScore; + float recScore; + GenRating *currentItem; + // Query objects. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + + // First, we grab the GenRating for this item ID. + currentItem = hashFind(recnode->pendingTable, itemid); + // In case there's some error. + if (!currentItem) + return -1; + + querystring = (char*) palloc(1024*sizeof(char)); + + // We're going to look through the similarity matrix for the + // numbers that correspond to this item, and find which of those + // also correspond to items this user rated. We will use that + // information to obtain the estimated rating. + sprintf(querystring,"select * from %s where item1 = %d;", + itemmodel,currentItem->ID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int itemID; + float similarity; + GenRating *ratedItem; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + itemID = getTupleInt(slot,"item2"); + similarity = getTupleFloat(slot,"similarity"); + + // Find the array slot this item ID corresponds to. + // If -1 is returned, then the item ID corresponds to + // another item we haven't rated, so we don't care. + ratedItem = hashFind(recnode->ratedTable,itemID); + if (ratedItem) { + currentItem->score += similarity*ratedItem->score; + if (similarity < 0) + similarity *= -1; + currentItem->totalSim += similarity; + } + } + + // Cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + if (currentItem->totalSim == 0) return 0; + + recScore = currentItem->score / currentItem->totalSim; + return recScore; } /* ---------------------------------------------------------------- @@ -4305,61 +4305,61 @@ itemCFpredict(RecScanState *recnode, char *itemmodel, int itemid) float userCFpredict(RecScanState *recnode, char *eventval, int itemid) { - float event, totalSim, average; - AttributeInfo *attributes; - // Query objects; - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *qslot; - MemoryContext recathoncontext; - - attributes = (AttributeInfo*) recnode->attributes; - - event = 0.0; - totalSim = 0.0; - average = recnode->average; - - /* We need to query the events table, so that we can - * find all events for this item and match them up - * with what we have in the similarity matrix. We note - * that it's necessarily true that the user has not - * rated these items. */ - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"select * from %s where %s = %d;", - attributes->eventtable,attributes->itemkey,itemid); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int currentUserID; - float currentRating, similarity; - GenRating *currentUser; - - qslot = ExecProcNode(planstate); - if (TupIsNull(qslot)) break; - - currentUserID = getTupleInt(qslot,attributes->userkey); - currentRating = getTupleFloat(qslot,eventval); - - currentUser = hashFind(recnode->simTable,currentUserID); - if (!currentUser) continue; - similarity = currentUser->totalSim; - - event += (currentRating - average) * similarity; - // Poor man's absolute value of the similarity. - if (similarity < 0) - similarity *= -1; - totalSim += similarity; - } - recathon_queryEnd(queryDesc,recathoncontext); - - if (totalSim == 0.0) return 0.0; - - event /= totalSim; - event += average; - - return event; + float event, totalSim, average; + AttributeInfo *attributes; + // Query objects; + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *qslot; + MemoryContext recathoncontext; + + attributes = (AttributeInfo*) recnode->attributes; + + event = 0.0; + totalSim = 0.0; + average = recnode->average; + + /* We need to query the events table, so that we can + * find all events for this item and match them up + * with what we have in the similarity matrix. We note + * that it's necessarily true that the user has not + * rated these items. */ + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"select * from %s where %s = %d;", + attributes->eventtable,attributes->itemkey,itemid); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int currentUserID; + float currentRating, similarity; + GenRating *currentUser; + + qslot = ExecProcNode(planstate); + if (TupIsNull(qslot)) break; + + currentUserID = getTupleInt(qslot,attributes->userkey); + currentRating = getTupleFloat(qslot,eventval); + + currentUser = hashFind(recnode->simTable,currentUserID); + if (!currentUser) continue; + similarity = currentUser->totalSim; + + event += (currentRating - average) * similarity; + // Poor man's absolute value of the similarity. + if (similarity < 0) + similarity *= -1; + totalSim += similarity; + } + recathon_queryEnd(queryDesc,recathoncontext); + + if (totalSim == 0.0) return 0.0; + + event /= totalSim; + event += average; + + return event; } /* ---------------------------------------------------------------- @@ -4372,64 +4372,64 @@ userCFpredict(RecScanState *recnode, char *eventval, int itemid) float SVDpredict(RecScanState *recnode, char *itemmodel, int itemid) { - float *userFeatures; - float recscore = 0.0; - // Query objects; - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *qslot; - MemoryContext recathoncontext; - - userFeatures = recnode->userFeatures; - - querystring = (char*) palloc(1024*sizeof(char)); - sprintf(querystring,"select * from %s where items = %d;", - itemmodel,itemid); - - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - // Here we don't use the simpler methods, because they're slightly - // less efficient. Since we'll be doing this several thousand times, - // we'll take what we can get. - for (;;) { - int i, natts; - int feature = -1; - float featValue = 0; - - qslot = ExecProcNode(planstate); - if (TupIsNull(qslot)) break; - slot_getallattrs(qslot); - natts = qslot->tts_tupleDescriptor->natts; - - for (i = 0; i < natts; i++) { - if (!qslot->tts_isnull[i]) { - char *col_name; - Datum slot_result; - // What we do depends on the column name. - col_name = qslot->tts_tupleDescriptor->attrs[i]->attname.data; - slot_result = qslot->tts_values[i]; - - if (strcmp(col_name,"feature") == 0) - feature = DatumGetInt32(slot_result); - else if (strcmp(col_name,"value") == 0) - featValue = DatumGetFloat4(slot_result); - } - } - - // If there's an error and we didn't find the column. - if (feature < 0) continue; - - // Add it into the event and continue. - recscore += featValue * userFeatures[feature]; - } - - // Cleanup. - recathon_queryEnd(queryDesc,recathoncontext); - pfree(querystring); - - return recscore; + float *userFeatures; + float recscore = 0.0; + // Query objects; + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *qslot; + MemoryContext recathoncontext; + + userFeatures = recnode->userFeatures; + + querystring = (char*) palloc(1024*sizeof(char)); + sprintf(querystring,"select * from %s where items = %d;", + itemmodel,itemid); + + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + // Here we don't use the simpler methods, because they're slightly + // less efficient. Since we'll be doing this several thousand times, + // we'll take what we can get. + for (;;) { + int i, natts; + int feature = -1; + float featValue = 0; + + qslot = ExecProcNode(planstate); + if (TupIsNull(qslot)) break; + slot_getallattrs(qslot); + natts = qslot->tts_tupleDescriptor->natts; + + for (i = 0; i < natts; i++) { + if (!qslot->tts_isnull[i]) { + char *col_name; + Datum slot_result; + // What we do depends on the column name. + col_name = qslot->tts_tupleDescriptor->attrs[i]->attname.data; + slot_result = qslot->tts_values[i]; + + if (strcmp(col_name,"feature") == 0) + feature = DatumGetInt32(slot_result); + else if (strcmp(col_name,"value") == 0) + featValue = DatumGetFloat4(slot_result); + } + } + + // If there's an error and we didn't find the column. + if (feature < 0) continue; + + // Add it into the event and continue. + recscore += featValue * userFeatures[feature]; + } + + // Cleanup. + recathon_queryEnd(queryDesc,recathoncontext); + pfree(querystring); + + return recscore; } /* ---------------------------------------------------------------- @@ -4442,39 +4442,39 @@ SVDpredict(RecScanState *recnode, char *itemmodel, int itemid) void applyRecScore(RecScanState *recnode, TupleTableSlot *slot, int itemid, int itemindex) { - float recscore; - AttributeInfo *attributes; - - attributes = (AttributeInfo*) recnode->attributes; - - switch ((recMethod)attributes->method) { - case itemCosCF: - case itemPearCF: - if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) - recscore = itemCFgenerate(recnode,itemid,itemindex); - else - recscore = itemCFpredict(recnode,attributes->recModelName,itemid); - break; - case userCosCF: - case userPearCF: - if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) - recscore = userCFgenerate(recnode,itemid,itemindex); - else - recscore = userCFpredict(recnode,attributes->eventval,itemid); - break; - case SVD: - if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) - recscore = SVDgenerate(recnode,itemid,itemindex); - else - recscore = SVDpredict(recnode,attributes->recModelName2,itemid); - break; - default: - recscore = -1; - break; - } - - slot->tts_values[recnode->eventatt] = Float4GetDatum(recscore); - slot->tts_isnull[recnode->eventatt] = false; + float recscore; + AttributeInfo *attributes; + + attributes = (AttributeInfo*) recnode->attributes; + + switch ((recMethod)attributes->method) { + case itemCosCF: + case itemPearCF: + if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) + recscore = itemCFgenerate(recnode,itemid,itemindex); + else + recscore = itemCFpredict(recnode,attributes->recModelName,itemid); + break; + case userCosCF: + case userPearCF: + if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) + recscore = userCFgenerate(recnode,itemid,itemindex); + else + recscore = userCFpredict(recnode,attributes->eventval,itemid); + break; + case SVD: + if (attributes->opType == OP_GENERATE || attributes->opType == OP_GENERATEJOIN) + recscore = SVDgenerate(recnode,itemid,itemindex); + else + recscore = SVDpredict(recnode,attributes->recModelName2,itemid); + break; + default: + recscore = -1; + break; + } + + slot->tts_values[recnode->eventatt] = Float4GetDatum(recscore); + slot->tts_isnull[recnode->eventatt] = false; } /* ---------------------------------------------------------------- @@ -4489,84 +4489,59 @@ applyRecScore(RecScanState *recnode, TupleTableSlot *slot, int itemid, int itemi void applyItemSim(RecScanState *recnode, char *itemmodel) { - int i; - GenHash *ratedTable; - // Query objects. - char *querystring; - QueryDesc *queryDesc; - PlanState *planstate; - TupleTableSlot *slot; - MemoryContext recathoncontext; - - ratedTable = recnode->ratedTable; - - querystring = (char*) palloc(1024*sizeof(char)); - - // For every item we've rated, we need to obtain its similarity - // scores and apply them to the appropriate items. This is - // necessary because we're only storing half of the similarity - // matrix. - for (i = 0; i < ratedTable->hash; i++) { - GenRating *currentItem; - - for (currentItem = ratedTable->table[i]; currentItem; - currentItem = currentItem->next) { - sprintf(querystring,"select * from %s where item1 = %d;", - itemmodel,currentItem->ID); - queryDesc = recathon_queryStart(querystring,&recathoncontext); - planstate = queryDesc->planstate; - - for (;;) { - int itemID; - float similarity; - GenRating *pendingItem; - - slot = ExecProcNode(planstate); - if (TupIsNull(slot)) break; - - itemID = getTupleInt(slot,"item2"); - similarity = getTupleFloat(slot,"similarity"); - - // Find the array slot this item ID corresponds to. - // If -1 is returned, then the item ID corresponds to - // another item we've rated, so we don't care. - pendingItem = hashFind(recnode->pendingTable,itemID); - if (pendingItem) { - pendingItem->score += similarity*currentItem->score; - if (similarity < 0) - similarity *= -1; - pendingItem->totalSim += similarity; - } - } - - recathon_queryEnd(queryDesc,recathoncontext); - } - } - - pfree(querystring); -} + int i; + GenHash *ratedTable; + // Query objects. + char *querystring; + QueryDesc *queryDesc; + PlanState *planstate; + TupleTableSlot *slot; + MemoryContext recathoncontext; + ratedTable = recnode->ratedTable; -/* **************************************************************** - * _copyQuery function helper - * **************************************************************** - */ -void -copyQueryHelper(Query *query, Query *mainQuery) -{ - - ListCell * l; - ListCell *curr_old = mainQuery->rtable->head; - - if(mainQuery->recommendStmt != NULL) - query->recommendStmt = (Node*)(mainQuery->recommendStmt); - if(list_length(query->rtable) >= 1){ - forboth(l, query->rtable, curr_old, mainQuery->rtable){ - if(((RangeTblEntry*)l)->recommender != NULL){ - lfirst(l) = lfirst(curr_old); - } - } - } - - return; + querystring = (char*) palloc(1024*sizeof(char)); + + // For every item we've rated, we need to obtain its similarity + // scores and apply them to the appropriate items. This is + // necessary because we're only storing half of the similarity + // matrix. + for (i = 0; i < ratedTable->hash; i++) { + GenRating *currentItem; + + for (currentItem = ratedTable->table[i]; currentItem; + currentItem = currentItem->next) { + sprintf(querystring,"select * from %s where item1 = %d;", + itemmodel,currentItem->ID); + queryDesc = recathon_queryStart(querystring,&recathoncontext); + planstate = queryDesc->planstate; + + for (;;) { + int itemID; + float similarity; + GenRating *pendingItem; + + slot = ExecProcNode(planstate); + if (TupIsNull(slot)) break; + + itemID = getTupleInt(slot,"item2"); + similarity = getTupleFloat(slot,"similarity"); + + // Find the array slot this item ID corresponds to. + // If -1 is returned, then the item ID corresponds to + // another item we've rated, so we don't care. + pendingItem = hashFind(recnode->pendingTable,itemID); + if (pendingItem) { + pendingItem->score += similarity*currentItem->score; + if (similarity < 0) + similarity *= -1; + pendingItem->totalSim += similarity; + } + } + + recathon_queryEnd(queryDesc,recathoncontext); + } + } + + pfree(querystring); } diff --git a/PostgreSQL/src/include/utils/recathon.h b/PostgreSQL/src/include/utils/recathon.h index 1ea123b..e08b9c8 100644 --- a/PostgreSQL/src/include/utils/recathon.h +++ b/PostgreSQL/src/include/utils/recathon.h @@ -21,38 +21,38 @@ /* An enum to list all of our recommendation methods. */ typedef enum { - itemCosCF, - itemPearCF, - userCosCF, - userPearCF, - SVD + itemCosCF, + itemPearCF, + userCosCF, + userPearCF, + SVD } recMethod; /* Structures for a linked list of similarity cells. */ struct sim_node_t { - int id; - float event; - struct sim_node_t *next; + int id; + float event; + struct sim_node_t *next; }; typedef struct sim_node_t* sim_node; /* Structures for a linked list of neighbor nodes. * Used when we have a specific neighborhood size. */ struct nbr_node_t { - int item1; - int item2; - float similarity; - struct nbr_node_t *next; + int item1; + int item2; + float similarity; + struct nbr_node_t *next; }; typedef struct nbr_node_t* nbr_node; /* Structure to hold event information for SVD * training. Includes space for residual information. */ struct svd_node_t { - int userid; - int itemid; - float event; - float residual; + int userid; + int itemid; + float event; + float residual; }; typedef struct svd_node_t* svd_node; @@ -89,8 +89,8 @@ extern char* retrieveRecommender(char *eventtable, char *method); /* Functions for getting recommender data. */ extern void getRecInfo(char *recindexname, char **ret_eventtable, - char **ret_userkey, char **ret_itemkey, - char **ret_eventval, char **ret_method, int *ret_numatts); + char **ret_userkey, char **ret_itemkey, + char **ret_eventval, char **ret_method, int *ret_numatts); /* Functions for parsing CreateRStmt data. */ extern recMethod validateCreateRStmt(CreateRStmt *recStmt); @@ -105,43 +105,43 @@ extern void updateCellCounter(char *eventtable, TupleTableSlot *insertslot); extern int binarySearch(int *array, int value, int lo, int hi); extern int *getAllUsers(int numusers, char* usertable); extern float *vector_lengths(char *key, char *eventtable, char *eventval, - int *totalNum, int **IDlist); + int *totalNum, int **IDlist); extern float dotProduct(sim_node item1, sim_node item2); extern float cosineSimilarity(sim_node item1, sim_node item2, float length1, float length2); extern int updateItemCosModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *itemIDs, float *itemLengths, - int numItems, bool update); + char *eventval, char *modelname, int *itemIDs, float *itemLengths, + int numItems, bool update); /* Functions for building a recommender based on itemPearCF. */ extern void pearson_info(char *key, char *eventtable, char *eventval, int *totalNum, - int **IDlist, float **avgList, float **pearsonList); + int **IDlist, float **avgList, float **pearsonList); extern float pearsonDotProduct(sim_node item1, sim_node item2, float avg1, float avg2); extern float pearsonSimilarity(sim_node item1, sim_node item2, float avg1, float avg2, - float pearson1, float pearson2); + float pearson1, float pearson2); extern int updateItemPearModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *itemIDs, float *itemAvgs, - float *itemPearsons, int numItems, bool update); + char *eventval, char *modelname, int *itemIDs, float *itemAvgs, + float *itemPearsons, int numItems, bool update); /* Functions for building a user-based recommender. */ extern int updateUserCosModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *userIDs, float *userLengths, - int numUsers, bool update); + char *eventval, char *modelname, int *userIDs, float *userLengths, + int numUsers, bool update); extern int updateUserPearModel(char *eventtable, char *userkey, char *itemkey, - char *eventval, char *modelname, int *userIDs, float *userAvgs, - float *userPearsons, int numUsers, bool update); + char *eventval, char *modelname, int *userIDs, float *userAvgs, + float *userPearsons, int numUsers, bool update); /* Functions for building a SVD recommender. */ extern svd_node createSVDnode(TupleTableSlot *slot, char *userkey, char *itemkey, char *eventval, - int *userIDs, int *itemIDs, int numUsers, int numItems); + int *userIDs, int *itemIDs, int numUsers, int numItems); extern void SVDlists(char *userkey, char *itemkey, char *eventtable, - int **ret_userIDs, int **ret_itemIDs, int *ret_numUsers, int *ret_numItems); + int **ret_userIDs, int **ret_itemIDs, int *ret_numUsers, int *ret_numItems); extern void SVDaverages(char *userkey, char *itemkey, char *eventtable, char *eventval, - int *userIDs, int *itemIDs, int numUsers, int numItems, - float **ret_itemAvgs, float **ret_userOffsets); + int *userIDs, int *itemIDs, int numUsers, int numItems, + float **ret_itemAvgs, float **ret_userOffsets); extern float predictRating(int featurenum, int numFeatures, int userid, int itemid, - float **userFeatures, float **itemFeatures, float redisual); + float **userFeatures, float **itemFeatures, float redisual); extern int SVDtrain(char *userkey, char *itemkey, char *eventtable, char *eventval, - char *usermodelname, char *itemmodelname, bool update); + char *usermodelname, char *itemmodelname, bool update); /* Functions for building and querying recommenders on-the-fly. */ extern void generateItemCosModel(RecScanState *recnode); @@ -166,7 +166,4 @@ extern float SVDpredict(RecScanState *recnode, char *itemmodel, int itemid); extern void applyRecScore(RecScanState *recnode, TupleTableSlot *slot, int itemid, int itemindex); extern void applyItemSim(RecScanState *recnode, char *itemmodel); -/* Functions for copyQuery function. */ -extern void copyQueryHelper(Query *query, Query *mainQuery); - #endif /* RECATHON_H */ From 2d46e08a9deabf981f996ccf6e9427792a0f9e1d Mon Sep 17 00:00:00 2001 From: RMoraffah Date: Mon, 14 Nov 2016 18:28:08 +0330 Subject: [PATCH 06/18] Bugs fix. prevent all probable side effects of changes for fixed bugs from happening. --- PostgreSQL/src/backend/utils/misc/recathon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PostgreSQL/src/backend/utils/misc/recathon.c b/PostgreSQL/src/backend/utils/misc/recathon.c index b60eb4c..477f639 100644 --- a/PostgreSQL/src/backend/utils/misc/recathon.c +++ b/PostgreSQL/src/backend/utils/misc/recathon.c @@ -4500,7 +4500,7 @@ applyItemSim(RecScanState *recnode, char *itemmodel) ratedTable = recnode->ratedTable; -<<<<<<< HEAD + querystring = (char*) palloc(1024*sizeof(char)); // For every item we've rated, we need to obtain its similarity From be8cdc1ecb17ab9204658093c36aa26d9c5f33d1 Mon Sep 17 00:00:00 2001 From: RMoraffah Date: Thu, 23 Mar 2017 10:28:02 -0700 Subject: [PATCH 07/18] adding postgis extension --- PostgreSQL/src/backend/parser/parse_rec.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/PostgreSQL/src/backend/parser/parse_rec.c b/PostgreSQL/src/backend/parser/parse_rec.c index 21ee8a3..16fdb27 100644 --- a/PostgreSQL/src/backend/parser/parse_rec.c +++ b/PostgreSQL/src/backend/parser/parse_rec.c @@ -1113,7 +1113,10 @@ userWhereClause(Node* whereClause, char *userkey) { if (!whereClause) return NULL; - // Turns out this isn't necessarily an A_Expr. + // Turns out this isn't necessarily an A_Expr. POSTGIS Support + if (nodeTag(whereClause) == T_FuncCall) + return makeTrueConst(); + if (nodeTag(whereClause) != T_A_Expr) return NULL; From 0e926ee7873893c3f4676176856d30d247b89002 Mon Sep 17 00:00:00 2001 From: RMoraffah Date: Mon, 27 Mar 2017 10:56:39 -0700 Subject: [PATCH 08/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 46a823f..f87fd07 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Welcome to RecDB RecDB is an Open Source Recommendation Engine Built Entirely Inside PostgreSQL 9.2. RecDB allows application developers to build recommendation applications in a heartbeat through a wide variety of built-in recommendation algorithms like user-user collaborative filtering, item-item collaborative filtering, singular value decomposition. Applications powered by RecDB can produce online and flexible personalized recommendations to end-users. -![RecDB Logo](http://www-users.cs.umn.edu/~sarwat/RecDB/pics/recdblogo.png) current version: ```v0.9-beta``` +![RecDB Logo](http://www-users.cs.umn.edu/~sarwat/RecDB/pics/recdblogo.png) current version: ```v1``` ## How to Get Source Code From 8098a2f4a2f154fb5927a9812460188aa9e05a46 Mon Sep 17 00:00:00 2001 From: RMoraffah Date: Mon, 27 Mar 2017 11:27:44 -0700 Subject: [PATCH 09/18] adding JDBC features --- PostgreSQL/src/backend/nodes/copyfuncs.c | 36 ++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/PostgreSQL/src/backend/nodes/copyfuncs.c b/PostgreSQL/src/backend/nodes/copyfuncs.c index f5b2cd5..06f3cd5 100644 --- a/PostgreSQL/src/backend/nodes/copyfuncs.c +++ b/PostgreSQL/src/backend/nodes/copyfuncs.c @@ -28,6 +28,7 @@ #include "nodes/plannodes.h" #include "nodes/relation.h" #include "utils/datum.h" +#include "../../include/nodes/plannodes.h" /* @@ -402,6 +403,32 @@ _copyIndexOnlyScan(const IndexOnlyScan *from) return newnode; } +/* + * _copyIndexOnlyScan + */ +static RecScan * +_copyRecScan(const RecScan *from) +{ + RecScan *newnode = makeNode(RecScan); + + /* + * copy node superclass fields + */ + + + CopyScanFields((const Scan *) from, (Scan *) newnode); + + /* + * copy remainder of node + */ + + COPY_NODE_FIELD(recommender); + // CopyScanFields((const Scan *) from, (Scan *) newnode); + COPY_NODE_FIELD(subscan); + + return newnode; +} + /* * _copyBitmapIndexScan */ @@ -3829,9 +3856,9 @@ copyObject(const void *from) switch (nodeTag(from)) { - /* - * PLAN NODES - */ + /* + * PLAN NODES + */ case T_PlannedStmt: retval = _copyPlannedStmt(from); break; @@ -3949,6 +3976,9 @@ copyObject(const void *from) case T_PlanInvalItem: retval = _copyPlanInvalItem(from); break; + case T_RecScan: + retval = _copyRecScan(from); + break; /* * PRIMITIVE NODES From ac4a6b729094d62ccc9faadb7cc23a3c4b0f2b2d Mon Sep 17 00:00:00 2001 From: RMoraffah Date: Mon, 27 Mar 2017 11:29:12 -0700 Subject: [PATCH 10/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f87fd07..a7799e5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Welcome to RecDB RecDB is an Open Source Recommendation Engine Built Entirely Inside PostgreSQL 9.2. RecDB allows application developers to build recommendation applications in a heartbeat through a wide variety of built-in recommendation algorithms like user-user collaborative filtering, item-item collaborative filtering, singular value decomposition. Applications powered by RecDB can produce online and flexible personalized recommendations to end-users. -![RecDB Logo](http://www-users.cs.umn.edu/~sarwat/RecDB/pics/recdblogo.png) current version: ```v1``` +![RecDB Logo](http://www-users.cs.umn.edu/~sarwat/RecDB/pics/recdblogo.png) current version: ```v1.1``` ## How to Get Source Code From 69b5e94f1127daa91c448ad60c062c06f7fdfd46 Mon Sep 17 00:00:00 2001 From: Mohamed Sarwat Date: Thu, 23 Nov 2017 20:28:42 -0700 Subject: [PATCH 11/18] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a7799e5..b1fd9a0 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,10 @@ LIMIT 10 ## Publications +* [Database System Support for Personalized Recommendation Applications] (http://ieeexplore.ieee.org/document/7930070/) +Mohamed Sarwat, Raha Moraffah, Mohamed F. Mokbel, James L. Avery: +Proceedings of the IEEE International Conference on Data Engineering, ICDE 2017: 1320-1331 + * [Recdb in Action: Recommendation Made Easy in Relational Databases](http://dl.acm.org/citation.cfm?id=2536286). Mohamed Sarwat, James L. Avery, Mohamed F. Mokbel. Proceedings of the Very Large Databases Endowment, PVLDB 6 (12), 1242-1245, 2013 From b24138f4246abe5ee6a7e777675b1814a3df9095 Mon Sep 17 00:00:00 2001 From: Mohamed Sarwat Date: Thu, 23 Nov 2017 20:29:08 -0700 Subject: [PATCH 12/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b1fd9a0..5be9565 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ LIMIT 10 ## Publications -* [Database System Support for Personalized Recommendation Applications] (http://ieeexplore.ieee.org/document/7930070/) +* [Database System Support for Personalized Recommendation Applications](http://ieeexplore.ieee.org/document/7930070/) Mohamed Sarwat, Raha Moraffah, Mohamed F. Mokbel, James L. Avery: Proceedings of the IEEE International Conference on Data Engineering, ICDE 2017: 1320-1331 From 00cc8eada603df0eb0ac98b21ff1ef24a00537f8 Mon Sep 17 00:00:00 2001 From: Mohamed Sarwat Date: Thu, 23 Nov 2017 20:29:34 -0700 Subject: [PATCH 13/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5be9565..9b00c6b 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ LIMIT 10 ## Publications * [Database System Support for Personalized Recommendation Applications](http://ieeexplore.ieee.org/document/7930070/) -Mohamed Sarwat, Raha Moraffah, Mohamed F. Mokbel, James L. Avery: +Mohamed Sarwat, Raha Moraffah, Mohamed F. Mokbel, James L. Avery. Proceedings of the IEEE International Conference on Data Engineering, ICDE 2017: 1320-1331 * [Recdb in Action: Recommendation Made Easy in Relational Databases](http://dl.acm.org/citation.cfm?id=2536286). From 746f0a4728092820d11d14fbaeecfd0b36f1243a Mon Sep 17 00:00:00 2001 From: Mohamed Sarwat Date: Thu, 15 Mar 2018 10:44:05 -0700 Subject: [PATCH 14/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9b00c6b..2c9e8bb 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Welcome to RecDB RecDB is an Open Source Recommendation Engine Built Entirely Inside PostgreSQL 9.2. RecDB allows application developers to build recommendation applications in a heartbeat through a wide variety of built-in recommendation algorithms like user-user collaborative filtering, item-item collaborative filtering, singular value decomposition. Applications powered by RecDB can produce online and flexible personalized recommendations to end-users. -![RecDB Logo](http://www-users.cs.umn.edu/~sarwat/RecDB/pics/recdblogo.png) current version: ```v1.1``` +![RecDB Logo](http://faculty.engineering.asu.edu/sarwat/wp-content/uploads/2014/09/Untitled.png) current version: ```v1.1``` ## How to Get Source Code From 28ad9be9ce60b81ad86b778c681e892d662044a6 Mon Sep 17 00:00:00 2001 From: Mohamed Sarwat Date: Thu, 6 Sep 2018 10:38:59 -0700 Subject: [PATCH 15/18] Update movies.dat --- PostgreSQL/moviedata/MovieLens1M/movies.dat | 84 ++++++++++----------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/PostgreSQL/moviedata/MovieLens1M/movies.dat b/PostgreSQL/moviedata/MovieLens1M/movies.dat index cadebf6..a8b0628 100644 --- a/PostgreSQL/moviedata/MovieLens1M/movies.dat +++ b/PostgreSQL/moviedata/MovieLens1M/movies.dat @@ -352,7 +352,7 @@ 355;Flintstones, The (1994);Children's|Comedy 356;Forrest Gump (1994);Comedy|Romance|War 357;Four Weddings and a Funeral (1994);Comedy|Romance -358;Higher Learning (1995);Drama +358;Higher Learning (1995);Drama 359;I Like It Like That (1994);Comedy|Drama|Romance 360;I Love Trouble (1994);Action|Comedy 361;It Could Happen to You (1994);Drama|Romance @@ -1191,7 +1191,7 @@ 1208;Apocalypse Now (1979);Drama|War 1209;Once Upon a Time in the West (1969);Western 1210;Star Wars: Episode VI - Return of the Jedi (1983);Action|Adventure|Romance|Sci-Fi|War -1211;Wings of Desire (Der Himmel über Berlin) (1987);Comedy|Drama|Romance +1211;Wings of Desire (Der Himmel über Berlin) (1987);Comedy|Drama|Romance 1212;Third Man, The (1949);Mystery|Thriller 1213;GoodFellas (1990);Crime|Drama 1214;Alien (1979);Action|Horror|Sci-Fi|Thriller @@ -1298,7 +1298,7 @@ 1317;I'm Not Rappaport (1996);Comedy 1318;Blue Juice (1995);Comedy|Drama 1319;Kids of Survival (1993);Documentary -1320;Alien³ (1992);Action|Horror|Sci-Fi|Thriller +1320;Alien³ (1992);Action|Horror|Sci-Fi|Thriller 1321;American Werewolf in London, An (1981);Horror 1322;Amityville 1992: It's About Time (1992);Horror 1323;Amityville 3-D (1983);Horror @@ -1381,7 +1381,7 @@ 1401;Ghosts of Mississippi (1996);Drama 1404;Night Falls on Manhattan (1997);Crime|Drama 1405;Beavis and Butt-head Do America (1996);Animation|Comedy -1406;Cérémonie, La (1995);Drama +1406;Cérémonie, La (1995);Drama 1407;Scream (1996);Horror|Thriller 1408;Last of the Mohicans, The (1992);Action|Romance|War 1409;Michael (1996);Comedy|Romance @@ -1530,7 +1530,7 @@ 1569;My Best Friend's Wedding (1997);Comedy|Romance 1570;Tetsuo II: Body Hammer (1992);Sci-Fi 1571;When the Cats Away (Chacun cherche son chat) (1996);Comedy|Romance -1572;Contempt (Le Mépris) (1963);Drama +1572;Contempt (Le Mépris) (1963);Drama 1573;Face/Off (1997);Action|Sci-Fi|Thriller 1574;Fall (1997);Romance 1575;Gabbeh (1996);Drama @@ -1616,7 +1616,7 @@ 1661;Switchback (1997);Thriller 1662;Gang Related (1997);Crime 1663;Stripes (1981);Comedy -1664;Nénette et Boni (1996);Drama +1664;Nénette et Boni (1996);Drama 1665;Bean (1997);Comedy 1666;Hugo Pool (1997);Romance 1667;Mad City (1997);Action|Drama @@ -1688,7 +1688,7 @@ 1738;Vermin (1998);Comedy 1739;3 Ninjas: High Noon On Mega Mountain (1998);Action|Children's 1740;Men of Means (1998);Action|Drama -1741;Midaq Alley (Callejón de los milagros, El) (1995);Drama +1741;Midaq Alley (Callejón de los milagros, El) (1995);Drama 1742;Caught Up (1998);Crime 1743;Arguing the World (1996);Documentary 1744;Firestorm (1998);Action|Adventure|Thriller @@ -1733,7 +1733,7 @@ 1792;U.S. Marshalls (1998);Action|Thriller 1793;Welcome to Woop-Woop (1997);Comedy 1794;Love and Death on Long Island (1997);Comedy|Drama -1795;Callejón de los milagros, El (1995);Drama +1795;Callejón de los milagros, El (1995);Drama 1796;In God's Hands (1998);Action|Drama 1797;Everest (1998);Documentary 1798;Hush (1998);Thriller @@ -1802,7 +1802,7 @@ 1870;Dancer, Texas Pop. 81 (1998);Comedy|Drama 1871;Friend of the Deceased, A (1997);Comedy|Drama 1872;Go Now (1995);Drama -1873;Misérables, Les (1998);Drama +1873;Misérables, Les (1998);Drama 1874;Still Breathing (1997);Comedy|Romance 1875;Clockwatchers (1997);Comedy 1876;Deep Impact (1998);Action|Drama|Sci-Fi|Thriller @@ -1862,7 +1862,7 @@ 1930;Cavalcade (1933);Drama 1931;Mutiny on the Bounty (1935);Adventure 1932;Great Ziegfeld, The (1936);Musical -1933;Life of Émile Zola, The (1937);Drama +1933;Life of Émile Zola, The (1937);Drama 1934;You Can't Take It With You (1938);Comedy 1935;How Green Was My Valley (1941);Drama 1936;Mrs. Miniver (1942);Drama|War @@ -1992,7 +1992,7 @@ 2060;BASEketball (1998);Comedy 2061;Full Tilt Boogie (1997);Documentary 2062;Governess, The (1998);Drama|Romance -2063;Seventh Heaven (Le Septième ciel) (1997);Drama|Romance +2063;Seventh Heaven (Le Septième ciel) (1997);Drama|Romance 2064;Roger & Me (1989);Comedy|Documentary 2065;Purple Rose of Cairo, The (1985);Comedy|Drama|Romance 2066;Out of the Past (1947);Film-Noir @@ -2060,7 +2060,7 @@ 2128;Safe Men (1998);Comedy 2129;Saltmen of Tibet, The (1997);Documentary 2130;Atlantic City (1980);Crime|Drama|Romance -2131;Autumn Sonata (Höstsonaten ) (1978);Drama +2131;Autumn Sonata (Höstsonaten ) (1978);Drama 2132;Who's Afraid of Virginia Woolf? (1966);Drama 2133;Adventures in Babysitting (1987);Adventure|Comedy 2134;Weird Science (1985);Comedy @@ -2104,7 +2104,7 @@ 2172;Strike! (a.k.a. All I Wanna Do, The Hairy Bird) (1998);Comedy 2173;Navigator: A Mediaeval Odyssey, The (1988);Adventure|Fantasy|Sci-Fi 2174;Beetlejuice (1988);Comedy|Fantasy -2175;Déjà Vu (1997);Drama|Romance +2175;Déjà Vu (1997);Drama|Romance 2176;Rope (1948);Thriller 2177;Family Plot (1976);Comedy|Thriller 2178;Frenzy (1972);Thriller @@ -2253,7 +2253,7 @@ 2321;Pleasantville (1998);Comedy 2322;Soldier (1998);Action|Adventure|Sci-Fi|Thriller|War 2323;Cruise, The (1998);Documentary -2324;Life Is Beautiful (La Vita è bella) (1997);Comedy|Drama +2324;Life Is Beautiful (La Vita è bella) (1997);Comedy|Drama 2325;Orgazmo (1997);Comedy 2326;Shattered Image (1998);Drama|Thriller 2327;Tales from the Darkside: The Movie (1990);Horror @@ -2409,10 +2409,10 @@ 2477;Firewalker (1986);Adventure 2478;Three Amigos! (1986);Comedy|Western 2479;Gloria (1999);Drama|Thriller -2480;Dry Cleaning (Nettoyage à sec) (1997);Drama +2480;Dry Cleaning (Nettoyage à sec) (1997);Drama 2481;My Name Is Joe (1998);Drama|Romance 2482;Still Crazy (1998);Comedy|Romance -2483;Day of the Beast, The (El Día de la bestia) (1995);Comedy|Horror|Thriller +2483;Day of the Beast, The (El Día de la bestia) (1995);Comedy|Horror|Thriller 2484;Tinseltown (1998);Comedy 2485;She's All That (1999);Comedy|Romance 2486;24-hour Woman (1998);Drama @@ -2424,7 +2424,7 @@ 2492;20 Dates (1998);Comedy 2493;Harmonists, The (1997);Drama 2494;Last Days, The (1998);Documentary -2495;Fantastic Planet, The (La Planète sauvage) (1973);Animation|Sci-Fi +2495;Fantastic Planet, The (La Planète sauvage) (1973);Animation|Sci-Fi 2496;Blast from the Past (1999);Comedy|Romance 2497;Message in a Bottle (1999);Romance 2498;My Favorite Martian (1999);Comedy|Sci-Fi @@ -2473,7 +2473,7 @@ 2541;Cruel Intentions (1999);Drama 2542;Lock, Stock & Two Smoking Barrels (1998);Comedy|Crime|Thriller 2543;Six Ways to Sunday (1997);Comedy -2544;School of Flesh, The (L' École de la chair) (1998);Drama +2544;School of Flesh, The (L' École de la chair) (1998);Drama 2545;Relax... It's Just Sex (1998);Comedy 2546;Deep End of the Ocean, The (1999);Drama 2547;Harvest (1998);Drama @@ -2504,7 +2504,7 @@ 2572;10 Things I Hate About You (1999);Comedy|Romance 2573;Tango (1998);Drama 2574;Out-of-Towners, The (1999);Comedy -2575;Dreamlife of Angels, The (La Vie rêvée des anges) (1998);Drama +2575;Dreamlife of Angels, The (La Vie rêvée des anges) (1998);Drama 2576;Love, etc. (1996);Drama 2577;Metroland (1997);Comedy|Drama 2578;Sticky Fingers of Time, The (1997);Sci-Fi @@ -2514,13 +2514,13 @@ 2582;Twin Dragons (Shuang long hui) (1992);Action|Comedy 2583;Cookie's Fortune (1999);Mystery 2584;Foolish (1999);Comedy -2585;Lovers of the Arctic Circle, The (Los Amantes del Círculo Polar) (1998);Drama|Romance +2585;Lovers of the Arctic Circle, The (Los Amantes del Círculo Polar) (1998);Drama|Romance 2586;Goodbye, Lover (1999);Comedy|Crime|Thriller 2587;Life (1999);Comedy 2588;Clubland (1998);Drama 2589;Friends & Lovers (1999);Comedy|Drama|Romance 2590;Hideous Kinky (1998);Drama -2591;Jeanne and the Perfect Guy (Jeanne et le garçon formidable) (1998);Comedy|Romance +2591;Jeanne and the Perfect Guy (Jeanne et le garçon formidable) (1998);Comedy|Romance 2592;Joyriders, The (1999);Drama 2593;Monster, The (Il Mostro) (1994);Comedy 2594;Open Your Eyes (Abre los ojos) (1997);Drama|Romance|Sci-Fi @@ -2532,7 +2532,7 @@ 2600;eXistenZ (1999);Action|Sci-Fi|Thriller 2601;Little Bit of Soul, A (1998);Comedy 2602;Mighty Peking Man (Hsing hsing wang) (1977);Adventure|Sci-Fi -2603;Nô (1998);Drama +2603;Nô (1998);Drama 2604;Let it Come Down: The Life of Paul Bowles (1998);Documentary 2605;Entrapment (1999);Crime|Thriller 2606;Idle Hands (1999);Comedy|Horror @@ -2625,7 +2625,7 @@ 2693;Trekkies (1997);Documentary 2694;Big Daddy (1999);Comedy 2695;Boys, The (1997);Drama -2696;Dinner Game, The (Le Dîner de cons) (1998);Comedy +2696;Dinner Game, The (Le Dîner de cons) (1998);Comedy 2697;My Son the Fanatic (1998);Comedy|Drama|Romance 2698;Zone 39 (1997);Sci-Fi 2699;Arachnophobia (1990);Action|Comedy|Sci-Fi|Thriller @@ -2634,7 +2634,7 @@ 2702;Summer of Sam (1999);Drama 2703;Broken Vessels (1998);Drama 2704;Lovers on the Bridge, The (Les Amants du Pont-Neuf) (1991);Drama|Romance -2705;Late August, Early September (Fin août, début septembre) (1998);Drama +2705;Late August, Early September (Fin août, début septembre) (1998);Drama 2706;American Pie (1999);Comedy 2707;Arlington Road (1999);Thriller 2708;Autumn Tale, An (Conte d'automne) (1998);Romance @@ -2671,7 +2671,7 @@ 2739;Color Purple, The (1985);Drama 2740;Kindred, The (1986);Horror 2741;No Mercy (1986);Action|Thriller -2742;Ménage (Tenue de soirée) (1986);Comedy|Drama +2742;Ménage (Tenue de soirée) (1986);Comedy|Drama 2743;Native Son (1986);Drama 2744;Otello (1986);Drama 2745;Mission, The (1986);Drama @@ -2689,7 +2689,7 @@ 2757;Frances (1982);Drama 2758;Plenty (1985);Drama 2759;Dick (1999);Comedy -2760;Gambler, The (A Játékos) (1997);Drama +2760;Gambler, The (A Játékos) (1997);Drama 2761;Iron Giant, The (1999);Animation|Children's 2762;Sixth Sense, The (1999);Thriller 2763;Thomas Crown Affair, The (1999);Action|Thriller @@ -2798,7 +2798,7 @@ 2866;Buddy Holly Story, The (1978);Drama 2867;Fright Night (1985);Comedy|Horror 2868;Fright Night Part II (1989);Horror -2869;Separation, The (La Séparation) (1994);Drama +2869;Separation, The (La Séparation) (1994);Drama 2870;Barefoot in the Park (1967);Comedy 2871;Deliverance (1972);Adventure|Thriller 2872;Excalibur (1981);Action|Drama|Fantasy|Romance @@ -2983,7 +2983,7 @@ 3051;Anywhere But Here (1999);Drama 3052;Dogma (1999);Comedy 3053;Messenger: The Story of Joan of Arc, The (1999);Drama|War -3054;Pokémon: The First Movie (1998);Animation|Children's +3054;Pokémon: The First Movie (1998);Animation|Children's 3055;Felicia's Journey (1999);Thriller 3056;Oxygen (1999);Thriller 3057;Where's Marlowe? (1999);Comedy @@ -3156,7 +3156,7 @@ 3224;Woman in the Dunes (Suna no onna) (1964);Drama 3225;Down to You (2000);Comedy|Romance 3226;Hellhounds on My Trail (1999);Documentary -3227;Not Love, Just Frenzy (Más que amor, frenesí) (1996);Comedy|Drama|Thriller +3227;Not Love, Just Frenzy (Más que amor, frenesí) (1996);Comedy|Drama|Thriller 3228;Wirey Spindell (1999);Comedy 3229;Another Man's Poison (1952);Crime|Drama 3230;Odessa File, The (1974);Thriller @@ -3166,11 +3166,11 @@ 3234;Train Ride to Hollywood (1978);Comedy 3235;Where the Buffalo Roam (1980);Comedy 3236;Zachariah (1971);Western -3237;Kestrel's Eye (Falkens öga) (1998);Documentary +3237;Kestrel's Eye (Falkens öga) (1998);Documentary 3238;Eye of the Beholder (1999);Thriller 3239;Isn't She Great? (2000);Comedy 3240;Big Tease, The (1999);Comedy -3241;Cup, The (Phörpa) (1999);Comedy +3241;Cup, The (Phörpa) (1999);Comedy 3242;Santitos (1997);Comedy 3243;Encino Man (1992);Comedy 3244;Goodbye Girl, The (1977);Comedy|Romance @@ -3195,7 +3195,7 @@ 3263;White Men Can't Jump (1992);Comedy 3264;Buffy the Vampire Slayer (1992);Comedy|Horror 3265;Hard-Boiled (Lashou shentan) (1992);Action|Crime -3266;Man Bites Dog (C'est arrivé près de chez vous) (1992);Action|Comedy|Crime|Drama +3266;Man Bites Dog (C'est arrivé près de chez vous) (1992);Action|Comedy|Crime|Drama 3267;Mariachi, El (1992);Action|Thriller 3268;Stop! Or My Mom Will Shoot (1992);Action|Comedy 3269;Forever Young (1992);Adventure|Romance|Sci-Fi @@ -3298,7 +3298,7 @@ 3366;Where Eagles Dare (1969);Action|Adventure|War 3367;Devil's Brigade, The (1968);War 3368;Big Country, The (1958);Romance|Western -3369;Any Number Can Win (Mélodie en sous-sol ) (1963);Crime +3369;Any Number Can Win (Mélodie en sous-sol ) (1963);Crime 3370;Betrayed (1988);Drama|Thriller 3371;Bound for Glory (1976);Drama 3372;Bridge at Remagen, The (1969);Action|War @@ -3345,7 +3345,7 @@ 3413;Impact (1949);Crime|Drama 3414;Love Is a Many-Splendored Thing (1955);Romance 3415;Mirror, The (Zerkalo) (1975);Drama -3416;Trial, The (Le Procès) (1963);Drama +3416;Trial, The (Le Procès) (1963);Drama 3417;Crimson Pirate, The (1952);Adventure|Comedy|Sci-Fi 3418;Thelma & Louise (1991);Action|Drama 3419;Something for Everyone (1970);Comedy|Crime @@ -3461,7 +3461,7 @@ 3529;Postman Always Rings Twice, The (1981);Crime|Thriller 3530;Smoking/No Smoking (1993);Comedy 3531;All the Vermeers in New York (1990);Comedy|Drama|Romance -3532;Freedom for Us (À nous la liberté ) (1931);Comedy +3532;Freedom for Us (À nous la liberté ) (1931);Comedy 3533;Actor's Revenge, An (Yukinojo Henge) (1963);Drama 3534;28 Days (2000);Comedy 3535;American Psycho (2000);Comedy|Horror|Thriller @@ -3521,7 +3521,7 @@ 3589;Kill, Baby... Kill! (Operazione Paura) (1966);Horror 3590;Lords of Flatbush, The (1974);Comedy 3591;Mr. Mom (1983);Comedy|Drama -3592;Time Masters (Les Maîtres du Temps) (1982);Animation|Sci-Fi +3592;Time Masters (Les Maîtres du Temps) (1982);Animation|Sci-Fi 3593;Battlefield Earth (2000);Action|Sci-Fi 3594;Center Stage (2000);Drama 3595;Held Up (2000);Comedy @@ -3574,14 +3574,14 @@ 3642;In Old California (1942);Western 3643;Fighting Seabees, The (1944);Action|Drama|War 3644;Dark Command (1940);Western -3645;Cleo From 5 to 7 (Cléo de 5 à 7) (1962);Drama +3645;Cleo From 5 to 7 (Cléo de 5 à 7) (1962);Drama 3646;Big Momma's House (2000);Comedy 3647;Running Free (2000);Drama 3648;Abominable Snowman, The (1957);Horror|Sci-Fi 3649;American Gigolo (1980);Drama 3650;Anguish (Angustia) (1986);Horror 3651;Blood Spattered Bride, The (La Novia Ensangrentada) (1972);Horror -3652;City of the Living Dead (Paura nella città dei morti viventi) (1980);Horror +3652;City of the Living Dead (Paura nella città dei morti viventi) (1980);Horror 3653;Endless Summer, The (1966);Documentary 3654;Guns of Navarone, The (1961);Action|Drama|War 3655;Blow-Out (La Grande Bouffe) (1973);Drama @@ -3678,7 +3678,7 @@ 3746;Butterfly (La Lengua de las Mariposas) (2000);Drama|War 3747;Jesus' Son (1999);Drama 3748;Match, The (1999);Comedy|Romance -3749;Time Regained (Le Temps Retrouvé) (1999);Drama +3749;Time Regained (Le Temps Retrouvé) (1999);Drama 3750;Boricua's Bond (2000);Drama 3751;Chicken Run (2000);Animation|Children's|Comedy 3752;Me, Myself and Irene (2000);Comedy @@ -3728,7 +3728,7 @@ 3796;Wisdom of Crocodiles, The (a.k.a. Immortality) (2000);Romance|Thriller 3797;In Crowd, The (2000);Thriller 3798;What Lies Beneath (2000);Thriller -3799;Pokémon the Movie 2000 (2000);Animation|Children's +3799;Pokémon the Movie 2000 (2000);Animation|Children's 3800;Criminal Lovers (Les Amants Criminels) (1999);Drama|Romance 3801;Anatomy of a Murder (1959);Drama|Mystery 3802;Freejack (1992);Action|Sci-Fi @@ -3745,7 +3745,7 @@ 3813;Interiors (1978);Drama 3814;Love and Death (1975);Comedy 3816;Official Story, The (La Historia Oficial) (1985);Drama -3817;Other Side of Sunday, The (Søndagsengler) (1996);Comedy|Drama +3817;Other Side of Sunday, The (Søndagsengler) (1996);Comedy|Drama 3818;Pot O' Gold (1941);Comedy|Musical 3819;Tampopo (1986);Comedy 3820;Thomas and the Magic Railroad (2000);Children's @@ -3782,7 +3782,7 @@ 3851;I'm the One That I Want (2000);Comedy 3852;Tao of Steve, The (2000);Comedy 3853;Tic Code, The (1998);Drama -3854;Aimée & Jaguar (1999);Drama|Romance +3854;Aimée & Jaguar (1999);Drama|Romance 3855;Affair of Love, An (Une Liaison Pornographique) (1999);Drama|Romance 3856;Autumn Heart (1999);Drama 3857;Bless the Child (2000);Thriller From fd9e5ad0c5a68113e20783d266a188a7bf3b4735 Mon Sep 17 00:00:00 2001 From: Mohamed Sarwat Date: Thu, 6 Sep 2018 10:39:34 -0700 Subject: [PATCH 16/18] Update README.md --- examples/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/README.md b/examples/README.md index 8693d69..5577194 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,4 +1,5 @@ #Examples + This directory contains scripts along-with the .csv files of all datasets of RecDB's examples. In all, we have written scripts on four large datasets i.e MoiveTweet, MoiveLens 100k, MoiveLens 1M and yelps academic dataset. Following is the brief discription of the datasets: 1. MovieTweet: a rich data set consisting of ratings on movies that were contained in well-structured tweets on twitter. This data set consists 11495 ratings for 4730 movies rated from 5788 users. (1) users (userid, name):It consists all information of the registered user. (2) movies (movieid, name, year, genre): Information about movies are stored in this table. (3) ratings (userid, itemid, rating): Each tuple in the ratings table represents how much a user liked a movie after watching it. For further information about MovieTweet, please visit the following [https://github.com/sidooms/MovieTweetings](link). From 6ebb75778089e8fb4a0af4c41fa2750642527e23 Mon Sep 17 00:00:00 2001 From: Mohamed Sarwat Date: Mon, 11 May 2020 08:31:19 -0700 Subject: [PATCH 17/18] Add files via upload --- 8F51D3FB-A4C0-4236-8598-A084DCDBCF9D.jpeg | Bin 0 -> 19938 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 8F51D3FB-A4C0-4236-8598-A084DCDBCF9D.jpeg diff --git a/8F51D3FB-A4C0-4236-8598-A084DCDBCF9D.jpeg b/8F51D3FB-A4C0-4236-8598-A084DCDBCF9D.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..3798aac8c2f950a81b2df93f1cd7296ec613dc6a GIT binary patch literal 19938 zcmce-cUV(hvp2kH0s_*Dw4ihlP?RDH5tSw)f+9_bf=Ck(=?Do$dJ_;3P$EqP#7LJK zkSbNW^dulE&4dyIY2Uu@=bYy`*K^Jv=XvVOPy6c4QO9BF=PI#2eAE3%l=LBOiCorrU5m~zarRQZjvs; z={Ux7CReV&Zd#ffnp`*ddr*ur_a8rg^w*cYy#1e88eNvMv9*(8#R9D04;LT=NZoh# zd#r!+=Jmh&{LjCC_y5li;`HBR2PRbh>T5tWD@qo@W2T+UKQ*>481>KF{%7Lo0~bGM z@Dc?)T$~?2@dpL*0HmdZ{2%{C$Ah%M6L3%<{pv5;^&j-dzi6j_(A|IEbKCL?c+WSG zmU4Ex5B9-80_jsu|K+^vf1&^5x!^4T$5nT~$B*uZNZma9*T4UFo4-pi$Oiyi@p&Bb z#NE}+UrHa8l8cmyx3lUgDfQE5v;g2Qng8RdyB2@Tl0j(bU+1Ei0YDc`r_)>hb?zD( z0II=n9&P&9IR$M1;N<{-{-($GpWOeuKPK>#@gb;fJ7oaCWeWiOU%~I#`#n6X{`YkT z&dUJ6vP7rv$^igtCIC>M)9K_QI-Ob!089%2(BVz*0C?Ge8b&oHhLZpzF9Q=V1HBu7 zfZ}Cg_`CgM;Nbs^Ow24tSlQT*a)39~^8kztOiYZ-Oe`#aDJMfXcpYHoW#Kz4j|4m#aGkEywUOXPurzu^$x@7C9~|D<`j@sB}(4Q%n22&XubMhDOFF z*KXamx?>Hp>-@mwp{tv_ho65yU{G*KX!Ns~*tqzF#Ei@rS=l)+bMuOdOG@9Cy(_P% zsjaJTX#CXF{JE>Ur?;>F%fR@=V)(Jbid;7#cq=Uo1_+kK< z{=pXb`47(ijW1r1FGgl&CT7;Z_+nrT`ipU1W|k9Yj_~PQu-^COKdBzcCU7~uu)337 z`m7~h(CNw8Q6ZUgSXshfto_Z||BkV!|0B-+$=JX6ng_0ctNpLW2>voLGJ;dZ1U6=n z1+cLkVfniq`H#l>cVqjjvH!Et!A}0}fsv6J{AEAFa^#=a{?`P35j1Af^f`c&i2+n5 zCSCvv&?pK;iNJq5Ds&3U-?}w;z1z?_V`pEslpSRAHQY9yvDMXr`VRfjK@w5xK?itS z)jXwW?DbD&e7?Lr@;c6WQ2})pkL*Q7JL-M1H)jlPF_`|2XwDWU&YXfrDT?In4e?GV zv4Ky!Bw%3x{J%O7^soQUW~%XpP~tEEcSLmRBQRCaT2>1G92@1#imfXFn)=&|Hs*p2 zmnn9rb3O5}IXx}MoryP6rc7m|J4}>jP6yn%NowX40qDx+A0bJ$=|?m{wX>$q(tsK%JLPNpon0c0E#Y?^*cNGCxI)Fma1)-d(B2zkZVx<1I7v`$^_U!%GYK z2xZsw#_u)_k;zV{sy$z`a=iu7Y=>24T!!Bl+Kw;j| zNqYlWBm1yvTvuD1C#r6TvfZKs3032zhHIG6Jj_K4!YwR13g>>&ylQq%a6LsS7s_mD zzgK1T+{6DX>FMQ$CewLksk4>Wpwshs)c)4Tl zhl2s6Sh2yTdH+U!`vsek()bRB+Y;~ID5XYK=Ql&l%=g%e5|v*?6eTJE|BntP`q;lK zPR@}C=+GdITZWqQL63_z-_B31Zk6}&+CwGUSb`L*?ns1Xct4+)I>3>q8u(bAS$1`_X0*_-p3kU0AYOJt+sfHY)tfUf4g5 zd{Jv&+K~?E7Mt}wsf4W_4W0NjQf5JzNvRJ|tk@84R0>UyPmf!*YjWyIwOP%(rpQK? zCJIlE8<&QI-(T*2R?wPYo!A=Uu3A#U8?dbJ8-h9MK@%3 zgzeNA#(dAc%zFP*6zdhcKBwuv!`2xF!WCR&ms*q~TN^2dkkl<26w|8v_MK;ysZ~?z z_%u(q((mj$x5ZvYFr1gc_`%}f{Gb|SLos<;?ILY6aD_!uQiqmi_Kumv+xpU4jn(vW zpV~RI2Y*hRoLyl{Iq9tzEd$pi31bmkE_8tPYFKC-uCK4_DSm}`4IfzYY}B@|mW9pGl!;-CuzmSr?-_3VD7GvT&J}jyD~&(w^u&p=unX80k#mchpBH!q z+p22Va4AnrKEGp<$q3Cb3lcneRG}#1A4l<#h@*;+-MVfvTA0R`biQFsRQO*WcSTQ6 z0xvAF<^QJS5odc;r{6FadXV*JE zd~t|Rd+dnq>z5G~tTLIAuoMJ88uLCcCnd}a`dRJU*^WO5evMBF){}`k?qkG-!p9@g zN(BCc4_z$x;_O>;h)E(7R$ZHItaEd3wxZ7y7DsK-J(VPN|wQuh~0u< zyMuY+M>-G{8O%XEKH*ADp#vCXX!u*CUojmh-f%qWZ(BmPOQ}&363N5(tzA2~?UN;( zs{5!|9+gt@?yEH-YX2f-LbEWcbgQxrdUoRgE`sQEp~=lkAaIaw^fAi0S~z^zJKZN) z^0C9oob-v@gb#PrW|71V{>@tIhes|=bfCaXJsoM*l17V$4MDo0$q1o#mG!i$Fbjfa zIgTUunCY^E>FMLN^;$2B%{NC$eZ(>XcYK%_Ubq`BjSqA}C$dVs4mgs}mZ#>rNEwG;D1D#M5hc$1>;&e~X%tlIOvBk`WzXj;EGEJU?bj z#j*-tEYM2wiQ6+j8f+)e`lo1oZRh@Wk^7V%ZHFc(9)Gutm8?JqIvb#ny=C6H;03P%9)a-;Ab$Xob-gv)sV{#+~*2?rj`5VH=JZZUFYjYT{sW!_&v`$sAX|MEp zN6NHMwFuB%?0vQLKy%sim+$!;79vD$>9i4Z3@U;SxFP4$R{nIGDbB0}rS3a~s78zI z1;4ddmY4W5Klk3)Sz;)kdR~L@r)!zhUpVY&EbfM!?kg#lii}LZ?d2XeAt~FJ z)6mLFWA6jiqaBajqLyte36Yuz$DqK*<4b*p_+*P{4cgfAnV?J0cAzm^Ren;evI8k^ zQN4)SA2|8VmX*r1Xav7MV$*Z$jMd2$fPC@!G3kuOh@%7^Jxd*bDc(rp!ud`@&&cg+ z_0^2ofc=?5!d;^t6bIX6m%$CSpb45dEhkKud;y#oBRw9s;$B4c?>-K-#AX5Ux5#jv zy9ok*^`h1V7t^V;D|%U52qrY9D$fO-l8I^Ds>VJ|i`7%Q!%KY|;9J(;7hxZ~%;r2+ z1G+K|$_H{J7~v@n*{K_T@fJR*TbL$+zRaELeD&Ak@^62cEb1HPQKUk>@m=5m{d38#{&b&bRH%$6s zW-9($xssLjDdYmeJTNFbFQ&wr$X9~DktFlPurqxu)+#GjpZN(JW-5^uKnGmuK+>`( z6!Q{RXArj;O*Vw-)#I1D{aEnCkE`h(GU4%q8G(JDV)Ddko6mpk6d;PyO5tHsX-#gR z3ws%=jljHz*1>uZv)05D1g&0VR8Y11(#eVKx}RR#a#0tu44PUJk3)kL6if`I9y|^~ z;ePZ$@z%ddiqNFe5H;GV^@c(^5ZkM4dP%R~Dx1*{*mNUBxRPd(y8=C=17mbx56guB z>fZ~+ezb$!Uq|9^qUO=IPIEcEYQkSy-ig)^aDS07c=80a(hUN?*USjkJwr=ncI|Xv zWoq}rX{%Fq z)87%OA_Mo)E9^v>63`T$Algw2%&@A1-TdcJc=Pk9Vac2d`!8nz*YBy$?9Q7_dQ%4` zD`+5i0~?gMP2nKO8@7Gbs5~<3=lt$6akuxWcS?u0ZrWuXep5rC>$GC%LvGUf8wd5u zKF4S?OB@Ma4^BYr@N1iW$d&BghfZDO-t#rAS9O&SBT2f1e5@xEK23+O4M_~p| zL)VQnc*TSI+l1_$t_X?O*+1WBzFzn(=UCsN)R>u(36BXc=X(arWBROxM_$_=|F7Hd zzta$P_#9N=q66NXXi7>U1a+wQ9Bag)xtBeJ)XhY)H44~rzt*|N0;zG)u+O7OXiOav ze+zP)&l@hk;J&`xRc3257rCeC15S!0YAcg=)QS!$&`2PUPT8Q{FWG{vz12B7%{I

jIH0g3{^->#Hm8PQ1fz|nSKOV&u^rI#~(V7Iy0nkcm;1x zV?P0IU<}kZ6!uce4U!AS9C~T`jeVuqeAVUNS@{^+M#UHHOxA|mRzjO;Ox4s+pmK0& z>TAUHpoF^z`=xE$v?<>;Hrsi@XMaR=8d3xwH;P*KY=y44vwD$x!pbD#n^PT?7c|UC zC$uO6KTPX9t1%bstI(Z3mA^KB{1kN={Q=_+bEPIDJh^9=gj-27_+1D5ev!CX)0V^* z6fdWO2UU5`P53(1i=+$sd}8pqc8;c0NRp0T3)5z zehKawCT%~?$i%`WAzGQUTs(^VGM5fI+J_{^N)a-}#cc+B_(cbf>S--G1m|NPz5n=f zRYhYhRb*wj?xAcR%Q|s2J^3ZEK#W|0FH59ote`mEY3J}>dpZS~74v;Tu0h_NxIFgT zI>yP`N(1_WnJLM6)o54*8t;%cpHF;AO`<^Y-=VH5fhtGL`G+fHDJXXz|ILQVM^3ZE zXX&?&-1)*(psW069sOdNbp8+R68Z>qW92R#_P6+QdhNwm{1kcY_*kq4)u+D%Yqq%-);9#Fm7PX!|| z=&t>kdNjYIY|QgdnFH~|a=vZ&dF#|0+~f5cHM%miIYvIm(Cq*bT6k+OD8GM-)+A$Y1!Aq*j zg;WKn^(>Q4s8Ix*v4&}SXm@NrL}+fl+*OoieZf3J{o4G{M=(4xo_a|X`0#A*3{4Qx z>5ImcAUL+2$*`0nf>{xd-Mm9}6y~%<^4O`fX`ioA=cs823@$ASDyX3hV}VbF%2T97 zUw5%h+^G+28khLevHRrW#y#^uKB@JQB6PB@so7+mm*I-hw95Co*1Gh<0`{@uY(*Yy z`dEaEi!?J2o32NeyudLwI$&L;c6&mjO>^0H%-gB8wk1U_Dp=w{qRqk)BO^BD?g8;_C+0USw<2LVFg^t46@({DdL_lR%kd~7FdfGQz0qDU zJ$Ha9nAtjW`MZ!;#9*DaSj0lGbRKOMj5}Thd4f&^yANTIn9@L9Dty;AoYETon~AI0 zzB76z;e!p;bH^DA-Q*%a2y>mlP3b`>x>Fxcl>((xLrx>Yk`;ecMDB%KYaCqe-#H`= zUEP>Fon+$a)7%%$A+l>Tn%*A?I zKqtiw?^uq%{N>!DdC#53y#0QN>%``%LG@V7MU+gkmEY%-{8#wma@6|SJ_!E$=o}ru zKAH=xCm=iXdXX3qNI?;oO5eQKDV=)#{EoY9s#s9aC_|9f2?mwuM!|z&f)O@o6T6}| zC%3y2^clgz&FTJizRR`D?(VhS!4r)vHwvG~xN-!X?!6@M#zYzv9cbkIzF(mvBC@O1 zitf9~&60=2)xBC{X#Ua`JB6M*9I3@nhvPc1ZvraUXxtsmmFv?)#Awl)-xVnh8h$Ic{D zKu~}smzu4nCq%hQ))iXRzU7+^u}`fxVVh;3LyZPEs#N$0RP3S_p|7hs zFw;{r>q+d|RQ**h-Qw4^9b~~5ahIPKT+*%V#Wi6=Zc{5#Y&L|$!*!2u;9GrFZ%7Ug zyr>Ix-`Ih2hTX!Z@dPV(&&5SkY(0o}1zEnbRg10BT;gMAtZ(f}6ETM_1o{XYA5!f#mn{&1Oy@ohP8XM_gyl{W^u!y6&ee zlxicMy>2REm9TB-c?h8*DF%KxIuI$zL9p5tMp}H`BPWRLtI0N>g>>i90ueq*A$nkl ztpsPGh`jV_n;aj&^igb0ALf1{hD^dWsA3dB{9XYcNeioXbV##3zu0E}^qyPQ7#pPr zp}v+1wJ9HLJWCnU%x)@f?Sc7y2UJH#LJ9n`)($&U`m{L!Za9|KUd1UovQWI_ifq2#gy>%%I_c{-5s z^bH;OrZYnau2v~-nq8?ubsmc8XsMxi{1m^JU~hy2L1sFIVYzO6DZ5IZB^ohOg~~itEX{L0RDt`}``;?wPN@ zi0Ns+hvtK-H{+XvbQ#O#OTE1jrHD;?T$NEDq?!(VUKZZqbo_0BFG#z%IV)T8R6D6! z{#ErC)LZ3fk~K!+EY`e>`&gLj*o_S+*RSlnyR0HZfzRJ7`ECnqEJYS5AMs0oNy5SC zpz+6MBw6lEEooz=3TiSLxb2`W=UGW)eGVV=uDSMx`0!@?KIIRh(-t&{jWpSMf@&pM z9o}PC_O0+^P~CKxwMWOm?g?ethCIr%+fe0&w3i=Go&=Vb2O0_yh)NIs^||*u4h{=V z8SZO=8MYFsrWdMD4Vhb-X!yoGndIKe?}gx>m~Yw^->pYK3pYx8MPR=B%yL+?*o0YN&_eFMk~JlPK~jj0J6QW26mHafqF%!8$7ZNmYoi|n zQkB^q0@;2eSJYD9GO=wpcr(^CWahV#(hJ}HJ3!)xtJWu><5!5_ijT^LchiCTQi>yl z1YJoonqGsbX1-IlSw6FuDi%@TEEB6Ovvc{#}+Im<+BoPt^EB&n3eQVasgWxJ!i-jc z8p5sm{o~jDHv69O-fKS$CraEBBtgHJCoMoG^kF*;#ABjgqFug~&ELoW>A7WuY#nxr zlWYki(gG;=EUf!e&ht_`f-lgHU82SXe6i#f`st|rHAwup)y^#LZd|U+4c1xk;dQk>cABri3?WFxqMi3P} zkcT&>XdcdQPb}w z?r6}wvuNocs{Z@H$f$FaEcA#Dq!&G9ChAe&sa92?s!D3wQXa-yDG>UZlodrzyuj;A z=$HR|pPcpsLOU7_T2^h67a^!i2iXfgZr3EPt3b2PsJZEDm4LzhN*0#f`|WR49WK{b=ylSt5Laj|$vG{uYDMQ+7P5Th|jG!?47Lpj*m!d*S z^U+EdM8|unsOD=QnWobE9&L`XD4#7=^zNvz??n5{;WCw7*SUvWksl&de1*#_;L6k$0l` z9d*g`z~1}wwkf$JXK^D1^HjGvyY(txhu(mF%CNUU}-08dBw1b&2&cR_g~j zgAcc*Vb^DN<6y4vB}hYGx6l0O%G79Y8st)*SL^Aj)XcY+*NitG6+d)I3jtb_ha8PM z7WyIt6d)pe0uQFg?8FS*;K|@@NN;Hjj*=ABJ}c}2YqzKEOf+5N`KAN&wYR><4kLNmmUMbSlg?JXLEY=$H8 zIYcsvK!yaz1pZo*Cf_E0Y-GS$yWV7=?bna4)>9@ZwsqDHlAlY)qkL2bOa zjyS&Wz;e-4D4}wx#eLht(spS_d*zga4UpX5y(QzE>I-`JJZKCG*1+1UCTgqNgewo@ z#l|(qJJ^$)1X6>mOe0U2xGXF8muECRX5UoafJSRa)7bb4oQvGq4o(;&N(Pg~h; zBB;)E+r2JLEyhxx1Wfw6`eS9Ab}zmxt7w zS~r4&#u8@v98t2a^N7&D*q^a9?R%JMz}av<(AnU!^^fMKo>N}mY?8mM9S?U^n9mr7 zw;)FjMnX!85~-Xd{=-N4xGRWG(2QbUAda<~;WMOND#I?f)`!Dx*RiQF2bs$3*4@Cq zxc;#$i~ZhZN#8uu72*eK6~gs*{2_OoKliM%B`MkmH$UHRuw7oc5i~o0;+xaX@KF5r zNXNE;!Cl{1m~eLthcVhqjdXH~4n&;G{6RuuPYvsQKL5NlH>9{@BSGas0PKz(MU*5T z;NnF{`>d7ojjZEo_w-9azQJeyJj#s1pyKU-iYZv@vr>fK%mJi$ZT7XDM~V`x>xHwA zDwv)ogSLeQH@$3HN4kLjuydzi*KFKd)ivX??YFF0uAv(yr~11yr_uZfSDzRyw0Qf4 zU?Z$VRJPYstA_e8?iTjP6K9AvUHp5f83;*js(uGP3FmJG!!WJb$t%CInlb~SXDq>G z%B#oiHtncRNZU#w03o8n5$6sH8*_VwxA=M;v7NftA9IF35FE>X$lJN9xBP{=xz>kq zNgCBn+-_eDHzGf^v!#7q##^dAgVtEbO$6p(T^c+;O=P9mBqiueG|YdFCZK-)V7O!; z!qLI>QIJAFbjpTlRMN&^^{k*%GLP!zX^?Nu9yC*j&}UNJuIa?}$1ubOlWd7(^L?H$Ky#FwXqh2c@)52VOi39;Ko85LE3eunrY3 zu9U)$$%QY3FiS3} zJj^i1q%rGSBT@Ht)w5@kpYjwaQ-_t&u$Ou5)s>{UJ54k7!= zyJi^4<|eG0$hE2A`Z^$~Fh;eN2|bNqs4c!^Lj0_^=6!d~Xvz~bJKwR=RhOGOd9-;@^D~z5#^Lfr zi3X@B94tI|A9iEXYe;!jg4leOFLLdQ_bS(}_JK=2;H-dD1;d^J)TS6HB5i6B9k`dq z)8~UReKtjr@OvUR*?J*R;ELWRYw*Ll2T@+=fKKIC-iCryDs321XWnH#SBv=&N0`T| z%r=ef_Xkz9Gf(fHnAd0>9GP``Y!!Dyo!yc>p+;_YyPos~d_p(1cHev!O1zr+na14W zX)Z*$l4mCQ*==N2W$Utcd()Lk_1-(O(pe!^aVOCaH%QOGeK(CJ=NMc}oC1UCuJ9k5 z1%mh?&gxZ+`8%^yHJZBDWUl;Vxq7Fh#{;3K#28We)p`+$zk&FMvZuBp+!xg_ZS~#R zqAIQC<(`dWoNet$CIy6=ROG1-g8~ntm^5*0mde;`WLKgpwY^;j-r3xX#&Jj9{ACt` z)4tc(oMTzAPK?m}Wc+p=ijTeEtE;1^N;w<9x45rlTqh|ChpVf(o9X&WbFWb-r2(8~ zk1*??a$3LRIehZwbvvUZnc&Dx)%RNqky}kBhj|B?v}3-coG?{Aeu{YGlZGJ;6%T*b z*z~YUHxtNaY!1Y3FvtG5y**^$N5VxD97Kc^3!-LADsB0eQvs$4s=rmYMOte|&kAQ4 zIe&TRWb)ol`6p~v8AC1GVj|z9=#ga0`5NHzGw_~DNTQ?Lwb5tXsB48e*EChbM^W8hGVlwYfYwZD;`6XppSsI zRRk>8fOC!)PdJfaE)_bQ8ogz{0YPWU)fsmmfJGw!5j9~ZZ9KlmB{!r0b!(-rraDGt z3zKlU#NuipFRu}U)g|Cq#GM$Y)uFq&2hbHdKsu>L;WzJVemdKSC-^JAym$APaAV-O z+9yd*qrPU|E!Z0v0B0v@5q(H!u;@rIU$9a{a_qezcIT6e`)hug*5rx`hi}JjHNtx`^hyAhz7b^&ozd(e6u4A?7Z8|U>h}yj+ z!Q#%9a^hqCe~TGVyZ)%XBv4k?hyfkFV~B6Hu9Ou1Z}QcDwgfD|)DJ=)hWA^`e+b21 z-;m<2tDcdPZ%XEngG4|_(C_VNAZ&_RgCr2`mbaMj8(4`KpG_|67A|^dDIE*cE7d7d zlt>+PSiY=O*krLlbK5Ax8+2zEXn)-Nk)VlwMUS_#MJ7MYGX0ql-aw=z934JRbH$2praJE%?BfC?h<)} z2YNn`I>lzTh-G`4S-S`j(ATu@5Y~D02gcAY=OJX>!Ozvu<|#OI?5VQZqZ9s;Ro~I% z`ino)+|pK6(f$Zb7P>Bn!dHxeCG54C6SMG!<&b12C1--_yTMBLz+KmbleLNGJn!km zow>1Y7}hzB2GJx>wQn0MKQ^rh@-)GmzSq*5!u-mn&LK=8)7D?hJG18=0Udly$}Ije zM=a8jITeW>KMp?zEB8qrBV}UkA>v`sDX)MwG0S8#&!;X*!_57N1PON5ue+U?T?YdM zl9j>-CNND`Am5x<402+_ES_qNc0u?YWOPOfHtjBGy?7=X)BI{P_>+1}NA_v`gD75F z^y5*MAw|DO8!}H69t22LDjsL%?PPw!<*e{+z@l1Kz{1kl!a#t5KJ~8(9okcb&k${J z8GlRd*>e2@sm{{iXGUe~Pf`V5rp=Zu(gE*XFfkWlz1Bz(8pYI5&QNRC2-B}c94_?T z;aT^sp-%Yn?5F~|>eq^qe>VR7pdBUPkJB{ZeWJan(kpB0OI+yzEe^%1&hdgx7HKxN z`Rhe!b%aNy8`v|HBs232;pU|QpTHqvW$#Tk?5t@=**m(ThoZ#N-e?fRxN=Wv-e*-Q zEk4)`Ly3N0wi*2HwSrneXzf|(qkc}0{>VHOAe=;eR-^-4S%f%ekq3~ zF}114_2<^RGqA5n3O|4M5;OHZ+WEHxqEo9~%CR)H{4@wdy7&#Uy5uWd$oI_RQ%}8i zeouoGLde97cBe%^cs00V&_d_F_*cPhJg=0A*vwK79OUp1e)Z&iEAMZZSeKQR(muXk zPtR5D_F?e`DK)>`FjlUh`n%~^aWZ~1wjO#sIOhgFHtu-Au2pmF2-{4?_zm1Srgbwo zh=c{T>X*6khe?xIU8F+7&KD2*DX?x=vQ-t0z75OKSb^XTz$ywOLSG*jjF?7>sD#AQ zj?d-SXIqfbjXJf`{B+>@N$VGf5^vFZ&_`)=YL?Uru)JiMFU-A_OQN9=npyU_PtPjl z)6Uk+`fVQ{;>(HmwA%dMGjka&`%l$A<(aO}-Y!*HZ&AA%)?*-G{S7Gte~su=paUKx z=tIPu-SQQ}gJ>ErR9wkjw5T5uB-Y}zgpFKb8?r)w3w*Bz=HEAS5o8JY#DsiYp&~Xs z)VJcNC2H~ZE|ZB5^7my51NHM;wD!PvKiH)8W(oqMMp9bd^m8Wbg}I@5N%PpA{gUdX z)7-wYuWl5|bxM8A=J{$m|M<>DT)VWc4^uqtYvLc7CfCEWTk02_-Kd7pSZ zft%HTcgqn7*#G3jPk;S3C&p_|E2IO9&;j(64|axWP&vu8BMjAz{@v z7s<9@qGREMB^Rq*L&k6S-bdekPX2<&vP;%wG@r}(3T6YEX~)3L5$nbH#~9pRlN>y9 zl;ihdT)(V&kXC6U5i38(@@RPHY{eB@%)Or&^_o{v+)~jCys*Jhig+PR<_LF$(c)5@3BoU9LshXVAp6PkQ&x4G4kDYorX*h zvh*cfzDel}YZOf+Zfb|#)a@JsefJg8{@0S+pJwjg<$8i{s$YMva* zf+D`n3SytwZ#x^<733_f%#$2!TkOKkFR8AH%u4YX&fP%Zzk|+&z1JWxZ0Usomd#iMPTa+{Dpz|_bw7&y8-usrBg3z*=gUI6nUpka?w z1D!K7#LsLH5W>J;(}6Dz@Q+4Z9TFW8G^ykA_L+8a_I zzPM+oY;(p=v-Dxr{7@85k?jy?0s9sXhaft`p)SML;M&@}z04CP|7gZ;rptbGt2Fhr zXWztJshF7CBRhM-t;rk|^D#IFd=|+;QzT{U3*{FFXY|@PaT{+f z=?i=aPZzHy3huo18$Q7hdFME5v|T=AgXjH+!mr=WrBa_+&g{5?K{4o_M{Ne5?W8e{ zqg~VHJ+`01HsdoUObIs)3p5)p2`TuDpB`84MJv3vidK-k$1Lpg3)u;&gKg3Qg7USr zyQJ`ya8}#8tSj%cLt|QtS!X;m^x|0OK6> zzxl(ig9boaQ^}Ymt8-tS<%#OiN)X`C)B5Th+Z$^Tp2-M;Q_stmU2LZXhlw+w}HU zr$nLQP|Ol+w%`x0aVsW(@Z(maN)bh?CdAV4Zt=SHPrUsb`Q#EqckaiR4`*<$pUs~w z^XlnPjK}r(!W728PlxG$xY0n&u0Cd)W*zu5z36*M;OC+!lMd9LH#Gw43}vT%Uaoaa z#~WQrSJRL(+pd47w6c|#+z~R)x%>SMqjuz~VN!wL)y3y#vk;G&9WWOCHwYea$BOb6 zoVii-4hsp)g1|(7&>~$dxQ1~5GcZ37YItWhSfB`QK&-O4QA(t|nNxkhD+YY2_+@zxAkX%;Ki0V#f%dRIibbft3vL z$j9o)gIi#CM@ihD9p%8;4s#&&*(XA`pj@7zUcLEen1-yIUfP?97PL4jvftCmvV{Lc zVfoNOphc<%f$=*M97GJ#oCLwkDPl?Cg^uA3_VX{!m{u&EtTB>GXm{JfanfuaOUyQ^ ze-N<7vUmy>*4>G=wVEt`!q$n}1Fc<@e{hQpVR;jRPwH`$dYG}Z@wwS8YyS87RF>p3 zx`Qmt1qY~IU%Y%|m^I-_@0?MW596FJu^zQ_rR>{$ull(8W869Mz%O5PBV_Q(RQ+}) z%Bcw&YZyO1t+Prcx)bXqIcREa5wSKcn1Fr2h-W|GC`=iq%{o~*B%nre7alrFm zClUKk-uEwE-tGI1c^CsqMMava3Sn$@@E1^QK}`wxJ;#AvjO&FxE2Ij4u?=b-urLg*N@2ru50A%%4l{mE#c29PPP0+tLlK$=PDXaYu{ds-p({R9{~> zrg}2~SU0n7M@iygPb z^4N_RI`!RTMnx`C5O^jUTQOPR1ANPi6NE-c^>{>7U5@&?uxAKo#$AKsUyP57#81W9 zKA5lf>%3^acf_cdWQ(D`l?!}vrxIOOPP!4$8Jri4c8yC}Q9=6L8bo7q&>w$yo2Mao zRcN})9*ufjc=Kp)_>^7~C(<>~_RYbZn7cUumUK9r7%0j(ZPEjO0Uv|mZ$rNEufV#M zmt&#F+r_`EK-qtxzT0{BSJw?;(xpGg4Q5HpZ`TZP1kCrN5coopDJV&n9DC{uIO~Qv z78Z>@Cf!i?sbQ?4C!p$r_ndSfbJYu%WA^UuB`03496vyIhn7+J$wZZz>y8{GrWG|o zQrwM9o`v;huP@K~<6>=9{f-}BRsS4r0EgwSkyF@Ac@VF(OU?!p@Ntw*NK4*)(NwUZ9_C=O_$|fwSn6%FcYQD=H%~bD>P{PJ1XGVU3 zjHYdYu^#jKSA;7hE>C_-5YOMejL!|zBrug&IJ{2LXc=OUukCx5p*45=w3SkqCuh22 z72!fB0&l@hQik@-OPJrrzaEWk)o>=ai2QcWp13}N^NV{?H-Nf0q(`tib!qO+QK~ex zNsl!cN(3Q~C_M)XA0A#ttv`|y;>3&8Si;SGiDMr`t;*x&I@`3fK!Ai^I zO26CXYM~c?lG}d_t@)R){IYuCw)2@|F?%`A`QRM5B#2jbczyGz{b0A|@Szz!v5#PR zXjEjFA)&5-8IO3jmT^Mng3)~XX?c^uBM?`aXLEwN%iEX;Py7N?bRw;a#v1HOd{PH~ z=R8U4Go&6~6;K##-(6cPaE^;uZL7tf{?cL*CB7mJn;p1856h-($Q< z^$pv=)iY8BSvw60WL9@aR(u;O^5QMBoxxi-p#$S{i|Sh^%$GE;^@+5sR_iimm<6Ap zzC>W+53LX3l>|?_$s#7b>n=&Wzjkc5Km6!K*ZWZTVZ?iAW3-cbjMkC{A)i}y<|aDU zM21f?#tx?L-By&qzK`njJ9_*!uMM;FQOEPY?@8 z(gdGwi;cjXz%NG~N7L#LevrVj7pAob0rzf;ub_XUHp>V`-4HP-=xno4tO*X~wUhOW zN*wtw8@43f@1A|PS;O0A^vA{;_a_1z&fEs=C=)@lYX!D-5Fi(cwb$gc4fcjC^p9n5 zXP;y~INHUqbot@^J_)0}6D96C4aD8ShM@cN(J zVgrHOY!qKm!uyDUvll_{33dpjy*h!y2%#s`h`*?rr_f%bvyC>?*8viN=3G6Mqps)C ziHqL0dS4!AMx1m_mYCP1Vv~R5QAFuL{JmBP9WVerSWx+&(}5ia_v6Zb2%@M9e6scCkLHG-%_go{`zgPtLE!>S8MGTq>WB_GgupmxPV2BtjxFNoo+$D zP=mgC;S3Q(>T{I^SerowwUik3P_}uJkImMJ#)$he| zL*#41JCiYsb*{%d-or?-9Gzfczby*f5ly)1fb4YZ&zaW{FiggqLerASMzl`EF}T#? z4f#p0y2)`3{#PGcCfMtrHk$~=skqk!g}@mUC-rXYOc`PP>P$a z^PTgio?CC?T`fACpXD6C@~VR7V13xUVJDY_b2#~J}E-PSli|2)3m}5BcnK@?i8H`J9nD&)V+94=2Rk9Sn{PUfcnMJi@-lh>rFB( z1M3q*2i z@o>3YF#svKG+)#Fq1caTZ?^t~JN@BF=jahhLx`RD^H=HQ=Fq&TwYj;SCDRN2({3)7 zp!C#hz)5A50u#%Vi4}>wp$w6vTb2CMB#^DBT12OKs~=fCxYCM5CTqL;Hq_64dKxh`z_1hdMGiQ&9MKUg_F@%Wj5e`W0SXww`(VAh?#7NXu@vl0 z26R^ImWusw@BB?#xp86{j^LJhOx<4Q10vDh(7MsdT&6-=a?8#zd{}^t{4sx>*YLf= zZN|{&=QXI;g6RN_E0vVs^<)c=!nMB+VHoM2+WY0T5Xwm08RDL~cW9m71J1-4d?w|L zZCD7F<;D=#0rb!4Y}{_ent@;OK`+l|$PgP-=t>@K1A$pY$Ac^5CA>ooGy8p6>shP8 zJDX?nS&0~!m?K^lCAq4vHfx7ftB3E9`9@3=3HG3+bAv&j2o zMntEs9zXce%JDu}3maQiv7sIug%fMLymBk!Y_rYH-nR-xS8Zn8SHrq74+ysuPrAqq zqe+U^lw$G6-kKUj1#Vv7~8)q`ZJ_x$BQ?<3?Mq;t3NaYgI% zssRDjie*LcJ>_83S`rwlN8k5~=Es^R`!v=pxmB3s#uiaKMd}|f-+^X&BoxMsI0zsg zD<6h|C6v+5?drvl&E+J~Q%Uo~6W$;|v{I|#0)jB|WPPD-q(GRC@T9Fn=7Be;v#&v& zU30k~bIeKra!=_Rt+Wn8(vjj5rxl{#!d%cHXA%rh(X8Q5sfWn z;A}h`^SXgK{d2%Osfc&pNd=x{AaL{Q$4iCMmjta00j3B*98hw&(cSzyZ5TiG9~TQtPuB7)n8a=Jx*p6;WaWzJ@Rj&WVbTgw>(TaqO&-N2xvvW$;w2*yGo5l0 z(p@@_P{OK0F|%rE9^dLorPx1`C};n&y}+uFbd(M>553jqBSH=s(Y^SC4u}pv&n-PW zxcmS0a;?!&r(t}OmRuqgn=&INEOtk(4YTIRk>r*|h?&`PNsY2L8ZrzSOD>H|Q6x4- zhP5+p%_^BRqo_1tWJbm{6*p}qdjH#KL^htvZ6YIG!K1h6!KZ-n%*84vpC$RMaBB_B|Uv{lte*yZfS( z=@Dl*=F}xFoB&t5o>gTI!pTcJwNXoQb){fc8AF%f(etLqQf-@Fmhx525XSQUylth` z8Pe%nMLD=~G+0ne$Et(Dv5y!ivaX9;&^qQE(My4BJ z3C=XS(t>Fom_p@xW=kq_n^N{bI!v*y^AJ?nJ$==sQPrD$yoMJHVeviDR zrBj){On*aRgGRuR-X@Ro4ptu=u&3!BpFGa{@my(HM1KQ&5uCuPSckMw5GcPkU#y!f zrs`IPjEcVYOS3UKIX^>Iz#1vjhGUhaY3Q)|Bn2d^(9?qbf)NBM+*sV#L3Eo~!jSzx`8%>H|1Amqm^ZAC^q)40q^o{1##rE4x<&D9BqYx1pK0@zA z{Xn1Vp-yxABxYr+)qP9OQ$yFmDA6rVjh;6S0q|7IDlG#~t2SMN>K=unHNpe*CYid# zgzO;dXKQ20kVg@SQRZDcZX>qlI+|lv+yotQoH(ZYpIaV|q$W4yEQf)vT7^VT#$SqM%YO+kn zhu%IWiCl&SHJnly36I&x>3(G+(Qj31rug&b_=yw8Hnu7fq7m?<-7y23Cr!l8;&Y5l zF_^h7{vO*a&Z&o;7BSgX2a9hHW?h@O>tklXez9EOS@}J_P(fHv2MEy` zt=PH~8>ZvPA>=5>L4~pa72OTZLKk^jE|=hiYd|*`z7vRM#6E!gaZ&Lo$GL+ko#^5v z#jOxb?c*l&fnvsF8N}9ag6Pdbm~r9*1HRQjdyL*l4&{@=gs+!RzOeLiA~rAW0rSm< zFR&Co*9rMsQ)ddKeixaXb%+2a4hFi<4^Z0GV!Awh|h+f5;1b?r^aBQk$^L$&T>{&nH_xDyh00vIPQtSmLxj>0>DU RQtRIm6JN^af7w|V{0R|R>h}Nu literal 0 HcmV?d00001 From f925b844cf8aa714914eeaaea6de12eed4dd5e1c Mon Sep 17 00:00:00 2001 From: Mohamed Sarwat Date: Mon, 11 May 2020 08:32:17 -0700 Subject: [PATCH 18/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2c9e8bb..ebcc2b8 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Welcome to RecDB RecDB is an Open Source Recommendation Engine Built Entirely Inside PostgreSQL 9.2. RecDB allows application developers to build recommendation applications in a heartbeat through a wide variety of built-in recommendation algorithms like user-user collaborative filtering, item-item collaborative filtering, singular value decomposition. Applications powered by RecDB can produce online and flexible personalized recommendations to end-users. -![RecDB Logo](http://faculty.engineering.asu.edu/sarwat/wp-content/uploads/2014/09/Untitled.png) current version: ```v1.1``` +![RecDB Logo](8F51D3FB-A4C0-4236-8598-A084DCDBCF9D.jpeg) current version: ```v1.1``` ## How to Get Source Code