diff --git a/contrib/file_fdw/data/agg.csv.gz b/contrib/file_fdw/data/agg.csv.gz
new file mode 100644
index 0000000000000..83773d7f3c83f
Binary files /dev/null and b/contrib/file_fdw/data/agg.csv.gz differ
diff --git a/contrib/file_fdw/data/it's_ok.csv.gz b/contrib/file_fdw/data/it's_ok.csv.gz
new file mode 100644
index 0000000000000..83773d7f3c83f
Binary files /dev/null and b/contrib/file_fdw/data/it's_ok.csv.gz differ
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index c5c797c1a4c76..65a5bf9d7c889 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -46,6 +46,9 @@ struct FileFdwOption
Oid optcontext; /* Oid of catalog in which option may appear */
};
+/* Totally made-up compression ratio */
+static const double program_compression_ratio = 2.7708899835032f;
+
/*
* Valid options for file_fdw.
* These options are based on the options for COPY FROM command.
@@ -68,6 +71,7 @@ static const struct FileFdwOption valid_options[] = {
{"escape", ForeignTableRelationId},
{"null", ForeignTableRelationId},
{"encoding", ForeignTableRelationId},
+ {"decompressor", ForeignTableRelationId},
{"force_not_null", AttributeRelationId},
/*
@@ -84,6 +88,7 @@ static const struct FileFdwOption valid_options[] = {
typedef struct FileFdwPlanState
{
char *filename; /* file to read */
+ bool is_program; /* whether a program is used to read the file */
List *options; /* merged COPY options, excluding filename */
BlockNumber pages; /* estimate of file's physical size */
double ntuples; /* estimate of number of rows in file */
@@ -95,6 +100,7 @@ typedef struct FileFdwPlanState
typedef struct FileFdwExecutionState
{
char *filename; /* file to read */
+ char *program; /* optional program to use to read file */
List *options; /* merged COPY options, excluding filename */
CopyState cstate; /* state of reading file */
} FileFdwExecutionState;
@@ -137,7 +143,7 @@ static bool fileAnalyzeForeignTable(Relation relation,
*/
static bool is_valid_option(const char *option, Oid context);
static void fileGetOptions(Oid foreigntableid,
- char **filename, List **other_options);
+ char **filename, char **program, List **other_options);
static List *get_file_fdw_attribute_options(Oid relid);
static bool check_selective_binary_conversion(RelOptInfo *baserel,
Oid foreigntableid,
@@ -186,6 +192,7 @@ file_fdw_validator(PG_FUNCTION_ARGS)
List *options_list = untransformRelOptions(PG_GETARG_DATUM(0));
Oid catalog = PG_GETARG_OID(1);
char *filename = NULL;
+ char *decompressor = NULL;
DefElem *force_not_null = NULL;
List *other_options = NIL;
ListCell *cell;
@@ -243,9 +250,9 @@ file_fdw_validator(PG_FUNCTION_ARGS)
}
/*
- * Separate out filename and force_not_null, since ProcessCopyOptions
- * won't accept them. (force_not_null only comes in a boolean
- * per-column flavor here.)
+ * Separate out filename, decompressor, and force_not_null, since
+ * ProcessCopyOptions won't accept them. (force_not_null only comes in
+ * a boolean per-column flavor here.)
*/
if (strcmp(def->defname, "filename") == 0)
{
@@ -255,6 +262,14 @@ file_fdw_validator(PG_FUNCTION_ARGS)
errmsg("conflicting or redundant options")));
filename = defGetString(def);
}
+ else if (strcmp(def->defname, "decompressor") == 0)
+ {
+ if (decompressor)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ decompressor = defGetString(def);
+ }
else if (strcmp(def->defname, "force_not_null") == 0)
{
if (force_not_null)
@@ -274,13 +289,28 @@ file_fdw_validator(PG_FUNCTION_ARGS)
*/
ProcessCopyOptions(NULL, true, other_options);
- /*
- * Filename option is required for file_fdw foreign tables.
- */
- if (catalog == ForeignTableRelationId && filename == NULL)
- ereport(ERROR,
- (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED),
- errmsg("filename is required for file_fdw foreign tables")));
+ if (catalog == ForeignTableRelationId)
+ {
+ /*
+ * Filename option is required for file_fdw foreign tables.
+ */
+ if (filename == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED),
+ errmsg("filename is required for file_fdw foreign tables")));
+
+
+ /*
+ * Decompressors must be executable.
+ */
+ if (decompressor && (access(decompressor, R_OK | X_OK) != 0))
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("decompressor must be readable/executable \"%s\": %m",
+ decompressor)));
+ }
+ }
PG_RETURN_VOID();
}
@@ -305,12 +335,14 @@ is_valid_option(const char *option, Oid context)
/*
* Fetch the options for a file_fdw foreign table.
*
- * We have to separate out "filename" from the other options because
- * it must not appear in the options list passed to the core COPY code.
+ * We have to separate out "filename" and "decompressor" from the other options
+ * because they must not appear in the options passed to the core COPY code. If
+ * a decompressor is present, a string consisting of it concatenated to the
+ * escaped file name is stored at `program`.
*/
static void
fileGetOptions(Oid foreigntableid,
- char **filename, List **other_options)
+ char **filename, char **program, List **other_options)
{
ForeignTable *table;
ForeignServer *server;
@@ -319,6 +351,9 @@ fileGetOptions(Oid foreigntableid,
ListCell *lc,
*prev;
+ char *decompressor;
+ char *write_ptr, *token, *input, *read_ptr;
+
/*
* Extract options from FDW objects. We ignore user mappings because
* file_fdw doesn't have any options that can be specified there.
@@ -352,6 +387,27 @@ fileGetOptions(Oid foreigntableid,
options = list_delete_cell(options, lc, prev);
break;
}
+
+ prev = lc;
+ }
+
+ /*
+ * Separate out the decompressor, which will be used to calculate program.
+ */
+ decompressor = NULL;
+ *program = NULL;
+ prev = NULL;
+ foreach(lc, options)
+ {
+ DefElem *def = (DefElem *) lfirst(lc);
+
+ if (strcmp(def->defname, "decompressor") == 0)
+ {
+ decompressor = defGetString(def);
+ options = list_delete_cell(options, lc, prev);
+ break;
+ }
+
prev = lc;
}
@@ -362,6 +418,41 @@ fileGetOptions(Oid foreigntableid,
if (*filename == NULL)
elog(ERROR, "filename is required for file_fdw foreign tables");
+ /*
+ * Set up the decompressor if present.
+ */
+ if (decompressor != NULL)
+ {
+ /*
+ * We will escape the filename by wrapping it in single quotes. To deal
+ * with single quotes in the name itself, we will replace all single
+ * quotes with the string "'\''", which is four characters long. Strings
+ * of only single quotes will need four times as much space, plus the
+ * room for the quotes, a space, and a null terminator.
+ */
+ *program = palloc0(
+ (strlen(decompressor) + (4 * strlen(*filename)) + 4)
+ * sizeof(char));
+
+ write_ptr = stpcpy(*program, decompressor);
+ write_ptr = stpcpy(write_ptr, " '");
+
+ /* We're mutating filename so copy it */
+ input = read_ptr = pstrdup(*filename);
+
+ write_ptr = stpcpy(write_ptr, strsep(&read_ptr, "'"));
+
+ while ((token = strsep(&read_ptr, "'")) != NULL)
+ {
+ write_ptr = stpcpy(write_ptr, "'\\''");
+ write_ptr = stpcpy(write_ptr, token);
+ }
+
+ stpcpy(write_ptr, "'");
+
+ pfree(input);
+ }
+
*other_options = options;
}
@@ -433,14 +524,16 @@ fileGetForeignRelSize(PlannerInfo *root,
Oid foreigntableid)
{
FileFdwPlanState *fdw_private;
+ char *program;
/*
* Fetch options. We only need filename at this point, but we might as
* well get everything and not need to re-fetch it later in planning.
*/
fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState));
- fileGetOptions(foreigntableid,
- &fdw_private->filename, &fdw_private->options);
+ fileGetOptions(foreigntableid, &fdw_private->filename, &program,
+ &fdw_private->options);
+ fdw_private->is_program = (program != NULL);
baserel->fdw_private = (void *) fdw_private;
/* Estimate relation size */
@@ -537,14 +630,22 @@ static void
fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
{
char *filename;
+ char *program;
List *options;
/* Fetch options --- we only need filename at this point */
fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
- &filename, &options);
+ &filename, &program, &options);
ExplainPropertyText("Foreign File", filename, es);
+ if (program != NULL)
+ {
+ ExplainPropertyText("Foreign Program", program, es);
+ ExplainPropertyFloat("Foreign Program Compression Est.",
+ program_compression_ratio, 4, es);
+ }
+
/* Suppress file size if we're not showing cost details */
if (es->costs)
{
@@ -565,6 +666,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
{
ForeignScan *plan = (ForeignScan *) node->ss.ps.plan;
char *filename;
+ char *program;
List *options;
CopyState cstate;
FileFdwExecutionState *festate;
@@ -577,7 +679,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
/* Fetch options of foreign table */
fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
- &filename, &options);
+ &filename, &program, &options);
/* Add any options from the plan (currently only convert_selectively) */
options = list_concat(options, plan->fdw_private);
@@ -587,8 +689,8 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
* as to match the expected ScanTupleSlot signature.
*/
cstate = BeginCopyFrom(node->ss.ss_currentRelation,
- filename,
- false,
+ (program != NULL) ? program : filename,
+ (program != NULL),
NIL,
options);
@@ -598,6 +700,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
*/
festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState));
festate->filename = filename;
+ festate->program = program;
festate->options = options;
festate->cstate = cstate;
@@ -656,12 +759,16 @@ static void
fileReScanForeignScan(ForeignScanState *node)
{
FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
+ char *filename_or_program;
EndCopyFrom(festate->cstate);
+ filename_or_program =
+ (festate->program != NULL) ? festate->program : festate->filename;
+
festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation,
- festate->filename,
- false,
+ filename_or_program,
+ (festate->program != NULL),
NIL,
festate->options);
}
@@ -690,11 +797,12 @@ fileAnalyzeForeignTable(Relation relation,
BlockNumber *totalpages)
{
char *filename;
+ char *program;
List *options;
struct stat stat_buf;
/* Fetch options of foreign table */
- fileGetOptions(RelationGetRelid(relation), &filename, &options);
+ fileGetOptions(RelationGetRelid(relation), &filename, &program, &options);
/*
* Get size of the file. (XXX if we fail here, would it be better to just
@@ -900,6 +1008,8 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
MAXALIGN(sizeof(HeapTupleHeaderData));
ntuples = clamp_row_est((double) stat_buf.st_size /
(double) tuple_width);
+ if (fdw_private->is_program)
+ ntuples *= program_compression_ratio;
}
fdw_private->ntuples = ntuples;
@@ -976,6 +1086,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
bool *nulls;
bool found;
char *filename;
+ char *program;
List *options;
CopyState cstate;
ErrorContextCallback errcallback;
@@ -990,12 +1101,13 @@ file_acquire_sample_rows(Relation onerel, int elevel,
nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
/* Fetch options of foreign table */
- fileGetOptions(RelationGetRelid(onerel), &filename, &options);
+ fileGetOptions(RelationGetRelid(onerel), &filename, &program, &options);
/*
* Create CopyState from FDW options.
*/
- cstate = BeginCopyFrom(onerel, filename, false, NIL, options);
+ cstate = BeginCopyFrom(onerel, (program != NULL) ? program : filename,
+ (program != NULL), NIL, options);
/*
* Use per-tuple memory context to prevent leak of memory used to read
diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source
index f7fd28d44d7be..7980619b6ecc8 100644
--- a/contrib/file_fdw/input/file_fdw.source
+++ b/contrib/file_fdw/input/file_fdw.source
@@ -72,6 +72,16 @@ CREATE FOREIGN TABLE agg_csv (
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null '');
+CREATE FOREIGN TABLE agg_csv_gz (
+ a int2,
+ b float4
+) SERVER file_server
+OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl');
+CREATE FOREIGN TABLE it_is_ok (
+ a int2,
+ b float4
+) SERVER file_server
+OPTIONS (format 'csv', filename '@abs_srcdir@/data/it''s_ok.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl');
CREATE FOREIGN TABLE agg_bad (
a int2,
b float4
@@ -97,7 +107,9 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); --
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
SELECT * FROM agg_csv ORDER BY a;
+SELECT * FROM agg_csv_gz ORDER BY a;
SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
+SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
-- error context report tests
SELECT * FROM agg_bad; -- ERROR
@@ -111,6 +123,18 @@ EXECUTE st(100);
EXECUTE st(100);
DEALLOCATE st;
+\t on
+EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz;
+\t off
+PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1;
+EXECUTE st(100);
+EXECUTE st(100);
+DEALLOCATE st;
+
+\t on
+EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM it_is_ok;
+\t off
+
-- tableoid
SELECT tableoid::regclass, b FROM agg_csv;
diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source
index 4f90baebd6b09..c08d7bd19be44 100644
--- a/contrib/file_fdw/output/file_fdw.source
+++ b/contrib/file_fdw/output/file_fdw.source
@@ -88,6 +88,16 @@ CREATE FOREIGN TABLE agg_csv (
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null '');
+CREATE FOREIGN TABLE agg_csv_gz (
+ a int2,
+ b float4
+) SERVER file_server
+OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl');
+CREATE FOREIGN TABLE it_is_ok (
+ a int2,
+ b float4
+) SERVER file_server
+OPTIONS (format 'csv', filename '@abs_srcdir@/data/it''s_ok.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl');
CREATE FOREIGN TABLE agg_bad (
a int2,
b float4
@@ -123,7 +133,7 @@ ERROR: invalid option "force_not_null"
HINT: There are no valid options in this context.
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR
ERROR: invalid option "force_not_null"
-HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding
+HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding, decompressor
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
a | b
@@ -140,6 +150,14 @@ SELECT * FROM agg_csv ORDER BY a;
100 | 99.097
(3 rows)
+SELECT * FROM agg_csv_gz ORDER BY a;
+ a | b
+-----+---------
+ 0 | 0.09561
+ 42 | 324.78
+ 100 | 99.097
+(3 rows)
+
SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
a | b | a | b
-----+---------+-----+---------
@@ -148,6 +166,14 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
100 | 99.097 | 100 | 99.097
(3 rows)
+SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
+ a | b | a | b
+-----+---------+-----+---------
+ 0 | 0.09561 | 0 | 0.09561
+ 42 | 324.78 | 42 | 324.78
+ 100 | 99.097 | 100 | 99.097
+(3 rows)
+
-- error context report tests
SELECT * FROM agg_bad; -- ERROR
ERROR: invalid input syntax for type real: "aaa"
@@ -174,6 +200,38 @@ EXECUTE st(100);
(1 row)
DEALLOCATE st;
+\t on
+EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz;
+ Foreign Scan on public.agg_csv_gz
+ Output: a, b
+ Foreign File: @abs_srcdir@/data/agg.csv.gz
+ Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/agg.csv.gz'
+ Foreign Program Compression Est.: 2.7709
+
+\t off
+PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1;
+EXECUTE st(100);
+ a | b
+-----+--------
+ 100 | 99.097
+(1 row)
+
+EXECUTE st(100);
+ a | b
+-----+--------
+ 100 | 99.097
+(1 row)
+
+DEALLOCATE st;
+\t on
+EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM it_is_ok;
+ Foreign Scan on public.it_is_ok
+ Output: a, b
+ Foreign File: @abs_srcdir@/data/it's_ok.csv.gz
+ Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/it'\''s_ok.csv.gz'
+ Foreign Program Compression Est.: 2.7709
+
+\t off
-- tableoid
SELECT tableoid::regclass, b FROM agg_csv;
tableoid | b
@@ -243,13 +301,15 @@ SET ROLE file_fdw_superuser;
-- cleanup
RESET ROLE;
DROP EXTENSION file_fdw CASCADE;
-NOTICE: drop cascades to 8 other objects
+NOTICE: drop cascades to 10 other objects
DETAIL: drop cascades to server file_server
drop cascades to user mapping for file_fdw_user
drop cascades to user mapping for file_fdw_superuser
drop cascades to user mapping for no_priv_user
drop cascades to foreign table agg_text
drop cascades to foreign table agg_csv
+drop cascades to foreign table agg_csv_gz
+drop cascades to foreign table it_is_ok
drop cascades to foreign table agg_bad
drop cascades to foreign table text_csv
DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user;
diff --git a/contrib/file_fdw/scripts/gunzip.pl b/contrib/file_fdw/scripts/gunzip.pl
new file mode 100755
index 0000000000000..14b1e6d15182c
--- /dev/null
+++ b/contrib/file_fdw/scripts/gunzip.pl
@@ -0,0 +1,10 @@
+#!/usr/bin/perl
+
+# Decompress the gzipped file at the path specified by the ARGV[0]
+# Usage: gunzip.pl /path/to/compressed/file.gz
+
+use strict;
+
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
+
+gunzip $ARGV[0] => '-' or die "could not decompress: GunzipError\n";
diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml
index 9385b26d34d51..4bfdca0aaafdf 100644
--- a/doc/src/sgml/file-fdw.sgml
+++ b/doc/src/sgml/file-fdw.sgml
@@ -32,6 +32,18 @@
+
+ decompressor
+
+
+
+ Specifies an external program to be used to decompress the file. Such
+ programs should accept the filename as an argument and decompress data to
+ stdout. The program must be readable and executable by the server process.
+
+
+
+
format