Skip to content

Add file_fdw support for external decompressors #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Prev Previous commit
Next Next commit
Add decompressor tests to file_fdw suite
Duplicates the agg.csv-based tests, but using a decompressor. Includes
a Perl-based decompressor since the codebase already depends on Perl
and I didn't want to hardcode a path to the gunzip executable.
  • Loading branch information
jasonmp85 committed Dec 31, 2013
commit 32ab3e2388a5ca69a4a57f2e0217557bcc2311d8
Binary file added contrib/file_fdw/data/agg.csv.gz
Binary file not shown.
15 changes: 15 additions & 0 deletions contrib/file_fdw/input/file_fdw.source
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ CREATE FOREIGN TABLE agg_csv (
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null '');
CREATE FOREIGN TABLE agg_csv_gz (
a int2,
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl');
CREATE FOREIGN TABLE agg_bad (
a int2,
b float4
Expand All @@ -97,7 +102,9 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); --
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
SELECT * FROM agg_csv ORDER BY a;
SELECT * FROM agg_csv_gz ORDER BY a;
SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;

-- error context report tests
SELECT * FROM agg_bad; -- ERROR
Expand All @@ -111,6 +118,14 @@ EXECUTE st(100);
EXECUTE st(100);
DEALLOCATE st;

\t on
EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz;
\t off
PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1;
EXECUTE st(100);
EXECUTE st(100);
DEALLOCATE st;

-- tableoid
SELECT tableoid::regclass, b FROM agg_csv;

Expand Down
47 changes: 46 additions & 1 deletion contrib/file_fdw/output/file_fdw.source
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ CREATE FOREIGN TABLE agg_csv (
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null '');
CREATE FOREIGN TABLE agg_csv_gz (
a int2,
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv.gz', header 'true', delimiter ';', quote '@', escape '"', null '', decompressor '@abs_srcdir@/scripts/gunzip.pl');
CREATE FOREIGN TABLE agg_bad (
a int2,
b float4
Expand Down Expand Up @@ -140,6 +145,14 @@ SELECT * FROM agg_csv ORDER BY a;
100 | 99.097
(3 rows)

SELECT * FROM agg_csv_gz ORDER BY a;
a | b
-----+---------
0 | 0.09561
42 | 324.78
100 | 99.097
(3 rows)

SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
a | b | a | b
-----+---------+-----+---------
Expand All @@ -148,6 +161,14 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
100 | 99.097 | 100 | 99.097
(3 rows)

SELECT * FROM agg_csv_gz c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
a | b | a | b
-----+---------+-----+---------
0 | 0.09561 | 0 | 0.09561
42 | 324.78 | 42 | 324.78
100 | 99.097 | 100 | 99.097
(3 rows)

-- error context report tests
SELECT * FROM agg_bad; -- ERROR
ERROR: invalid input syntax for type real: "aaa"
Expand All @@ -173,6 +194,29 @@ EXECUTE st(100);
100 | 99.097
(1 row)

DEALLOCATE st;
\t on
EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv_gz;
Foreign Scan on public.agg_csv_gz
Output: a, b
Foreign File: @abs_srcdir@/data/agg.csv.gz
Foreign Program: @abs_srcdir@/scripts/gunzip.pl '@abs_srcdir@/data/agg.csv.gz'
Foreign Program Compression Est.: 2.7709

\t off
PREPARE st(int) AS SELECT * FROM agg_csv_gz WHERE a = $1;
EXECUTE st(100);
a | b
-----+--------
100 | 99.097
(1 row)

EXECUTE st(100);
a | b
-----+--------
100 | 99.097
(1 row)

DEALLOCATE st;
-- tableoid
SELECT tableoid::regclass, b FROM agg_csv;
Expand Down Expand Up @@ -243,13 +287,14 @@ SET ROLE file_fdw_superuser;
-- cleanup
RESET ROLE;
DROP EXTENSION file_fdw CASCADE;
NOTICE: drop cascades to 8 other objects
NOTICE: drop cascades to 9 other objects
DETAIL: drop cascades to server file_server
drop cascades to user mapping for file_fdw_user
drop cascades to user mapping for file_fdw_superuser
drop cascades to user mapping for no_priv_user
drop cascades to foreign table agg_text
drop cascades to foreign table agg_csv
drop cascades to foreign table agg_csv_gz
drop cascades to foreign table agg_bad
drop cascades to foreign table text_csv
DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user;
10 changes: 10 additions & 0 deletions contrib/file_fdw/scripts/gunzip.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/perl

# Decompress the gzipped file at the path specified by the ARGV[0]
# Usage: gunzip.pl /path/to/compressed/file.gz

use strict;

use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;

gunzip $ARGV[0] => '-' or die "could not decompress: GunzipError\n";
12 changes: 12 additions & 0 deletions doc/src/sgml/file-fdw.sgml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@
</listitem>
</varlistentry>

<varlistentry>
<term><literal>decompressor</literal></term>

<listitem>
<para>
Specifies an external program to be used to decompress the file. Such
programs should accept the filename as an argument and decompress data to
stdout. The program must be readable and executable by the server process.
</para>
</listitem>
</varlistentry>

<varlistentry>
<term><literal>format</literal></term>

Expand Down