CREATE FUNCTION lib.get_csvfile(
file text,
delim_char char(1) = ',',
quote_char char(1) = '"'
) RETURNS setof text[] AS $f$
import csv
return csv.reader(
open(file, 'rb'),
quotechar=quote_char,
delimiter=delim_char,
skipinitialspace=True,
escapechar='\\'
)
$f$ immutable language PLpythonU;
CREATE FUNCTION lib.get_csvline(
_line text,
_delim_char char(1) = ',',
_quote_char char(1) = '"'
) RETURNS text[] AS $f$
import csv
return [row for row in csv.reader(
[_line],
quotechar = _quote_char,
delimiter = _delim_char,
skipinitialspace = True,
escapechar = '\\'
)][0]
$f$ immutable language PLpythonU;
CREATE SCHEMA csv; -- a library, easy to drop
CREATE FUNCTION csv.split_csv2(
line text, -- the input CSV UTF-8 line
delim_char char(1) = ',', -- can be also E'\t', ';', '|', etc.
quote_char char(1) = '"' -- field quotation
) RETURNS setof text[] AS $f$
import csv
return csv.reader(
line.splitlines(),
quotechar=quote_char,
delimiter=delim_char,
skipinitialspace=True,
escapechar='\\'
)
$f$ IMMUTABLE language PLpythonU;
CREATE FUNCTION csv.split_csv_line2(
text, char(1) DEFAULT ',', char(1) DEFAULT '"'
) RETURNS text[] AS $f$
SELECT x FROM csv.split_csv($1,$2,$3) x LIMIT 1;
$f$ language SQL IMMUTABLE;
-- not reliable but fast? http://stackoverflow.com/a/42926265/287948
-- compare performance with csv.split_csv_line2()
-- ideal a C implementation
CREATE OR REPLACE FUNCTION csv.split_csv_line(
line text, -- the input CSV string
delim_char char(1) = ',', -- can be also E'\t', ';', '|', etc.
quote_char char(1) = '"', -- field quotation
OUT result text[]
) LANGUAGE plpgsql AS
$f$DECLARE
i integer;
t text := '';
c char(1);
/*
* 0 means unquoted
* 1 means quoted
* 2 means quoted, and we just read a quote
*/
q integer := 0;
BEGIN
/* loop through the characters */
FOR i IN 1..length(line) LOOP
/* get i-th character */
c := substring(line FROM i FOR 1);
/* end of string is at an unquoted delimiter */
IF c = delim_char AND q <> 1 THEN
result := result || t;
t := '';
q := 0;
ELSIF c = quote_char THEN
CASE q
WHEN 0 THEN
q := 1;
WHEN 1 THEN
IF c = quote_char THEN
q := 2;
ELSE
t := t || c;
END IF;
WHEN 2 THEN
q := 1;
t := t || quote_char;
END CASE;
ELSE
IF q = 2 THEN
q := 0;
END IF;
t := t || c;
END IF;
END LOOP;
/* add the last string */
result := result || t;
END;$f$;
CREATE EXTENSION file_fdw WITH SCHEMA csv;
CREATE SERVER files FOREIGN DATA WRAPPER file_fdw;
CREATE or replace FUNCTION csv.ftable_drop(tname text) RETURNS void AS $f$
BEGIN
EXECUTE format('DROP VIEW IF EXISTS %s', 'csv.'||$1);
EXECUTE format('DROP FOREIGN TABLE IF EXISTS %s CASCADE', 'csv.fgn_'||$1);
END
$f$ language PLpgSQL;
CREATE or replace FUNCTION csv.ftable_create(
tname text, -- table name for scan text and "csv_tname"
fpath text, -- file path to the input CSV
delim_char char(1) = ',', -- can be also E'\t', ';', '|', etc.
quote_char char(1) = '"', -- field quotation
p_check boolean default true -- false to not check-and-drop.
) RETURNS text AS $f$
DECLARE
fgn_tname text;
csv_tname text;
BEGIN
fgn_tname := 'csv.fgn_'||$1;
csv_tname := 'csv.'||$1;
IF p_check THEN PERFORM csv.ftable_drop($1); END IF;
EXECUTE format(E'CREATE FOREIGN TABLE %s (line text) SERVER files OPTIONS ( filename %L, format \'text\')', fgn_tname, $2);
EXECUTE format('CREATE VIEW %s AS SELECT csv.split_csv_line(line,%L,%L) as c FROM %s', csv_tname, delim_char,quote_char, fgn_tname);
RETURN format(E'\ntables %I and %I created\n',fgn_tname,csv_tname);
END
$f$ language PLpgSQL;
---- TESTING ----
SELECT csv.ftable_create('my','/tmp/TSE2/lix.csv',';');