Spool header
When downloading data of a result set into a file (action known also as spool) for example into a CSV file, it is useful to add an header, i.e. write in the first row the field names.
Suppose that our statement is
SELECT name, color, quantity, when_eat
FROM fruit
If the database you are using does not support this feature out of the box, the common trick is to prepend a statement that adds a single row with the header, something like
SELECT 'name', 'color', 'quantity', 'when_eat'
UNION
SELECT name, color, quantity, when_eat
FROM fruit
To transform the first statement into the second one you can do
var sql2json = require('sql92-json').parse
var json2sql = require('sql92-json').stringify
function addHeader (header, query) {
return {
SELECT: header,
UNION: query
}
}
var sql = `
SELECT name, color, quantity, when_eat
FROM fruit
`
// Serialize query.
var query = sql2json(sql)
// Enclose fields with single quotes.
var header = query.SELECT.map((field) => "'" + field + "'")
var queryWithHeader = addHeader(header, query)
// Stringify the generated JSON back into SQL.
var sqlWithHeader = json2sql(queryWithHeader)
console.log(sqlWithHeader)
// SELECT 'name', 'color', 'quantity', 'when_eat'
// UNION
// SELECT name, color, quantity, when_eat
// FROM fruit
Actually my use case is more complex cause I need to add an header to a
SQL statement that could be also like SELECT * FROM fruit
and
furthermore I am using an UNLOAD
statement on Redshift, hence the data
file is spooled on S3 but it does not return any data so it is not even
possible to read the first row and get the fields.
I am using the following function to generate the UNLOAD
statement.
var json2sql = require('sql92-json').stringify
function generateUnloadSQL (s3File, queryJSON) {
var AWS_SECRET_ACCESS_KEY = process.env.AWS_SECRET_ACCESS_KEY
var AWS_ACCESS_KEY_ID = process.env.AWS_ACCESS_KEY_ID
var credentials = `'aws_access_key_id=${AWS_ACCESS_KEY_ID};aws_secret_access_key=${AWS_SECRET_ACCESS_KEY}'`
var querySQL = json2sql(queryJSON).replace(/'/g, "\\'")
var s3Bucket = s3File.Bucket
// The unload command appends a '000.gz', hence it must be stripped.
var s3FileKey = s3File.Key.replace(/\.000\.gz$/, '')
return `
UNLOAD('${querySQL}')
TO 's3://${s3Bucket}/${s3FileKey}.'
CREDENTIALS ${credentials}
DELIMITER AS ';'
ADDQUOTES
ALLOWOVERWRITE
PARALLEL FALSE
GZIP
`
}
A generic solution to this use case is to fetch the fields from a query
like SELECT * FROM fruit LIMIT 1
, then apply the previous transformation
to get a statement that returns an header in the first row.
This is as easy as the following snippet
var sqlStar = `
SELECT *
FROM fruit
`
// Add LIMIT clause.
var queryStar = sql2json(sqlStar)
queryStar.LIMIT = 1
console.log(json2sql(queryStar))
// SELECT *
// FROM fruit
// LIMIT 1
Since all fields in a UNION
must have the same cardinality and data
type, it is necessary to cast all fields to VARCHAR
. Furthermore the
header could appear not in the first row.
See this answer on Stack Overflow
the statement we need is something like the following
SELECT foo, bar
FROM (
SELECT 1 AS i, 'foo' AS foo, 'bar' AS bar
UNION
SELECT 2 AS i, foo, bar
FROM mytable
)
ORDER BY i
Assuming that we know the fields involved, the final solution is the following.
var fields = [ 'name', 'color', 'quantity', 'when_eat' ]
var table = 'fruit'
function spool (table, fields) {
var header = fields.map((field) => {
var alias = { AS: {} }
alias.AS[field] = `'${field}'`
return alias
})
return {
SELECT: fields,
FROM: [{
SELECT: [{ AS: { i: 1 } }].concat(header),
UNION: {
SELECT: [{ AS: { i: 2 } }].concat(fields.map((field) => `${field}::VARCHAR`)),
FROM: [table]
}
}],
'ORDER BY': ['i']
}
}
console.log(json2sql(spool(table, fields)))
// SELECT 'name', 'color', 'quantity', 'when_eat'
// FROM (
// SELECT 1 AS i, 'name' AS name, 'color' AS color, 'quantity' AS quantity, 'when_eat' AS when_eat
// UNION
// SELECT 2 AS i, name::VARCHAR, color::VARCHAR, quantity::VARCHAR, when_eat::VARCHAR
// FROM fruit
// )
// ORDER BY i