docs.sheetjs.com/docz/docs/03-demos/07-data/10-sql.md
2023-02-28 06:40:44 -05:00

10 KiB

title pagination_prev pagination_next sidebar_custom_props
SQL Connectors demos/desktop/index demos/local/index
sql
true

import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem';

Generating Tables

This example will fetch https://sheetjs.com/data/cd.xls, scan the columns of the first worksheet to determine data types, and generate 6 PostgreSQL statements.

Explanation (click to show)

The relevant generate_sql function takes a worksheet name and a table name:

// define mapping between determined types and PostgreSQL types
const PG = { "n": "float8", "s": "text", "b": "boolean" };

function generate_sql(ws, wsname) {

  // generate an array of objects from the data
  const aoo = XLSX.utils.sheet_to_json(ws);

  // types will map column headers to types, while hdr holds headers in order
  const types = {}, hdr = [];

  // loop across each row object
  aoo.forEach(row =>
    // Object.entries returns a row of [key, value] pairs.  Loop across those
    Object.entries(row).forEach(([k,v]) => {

      // If this is first time seeing key, mark unknown and append header array
      if(!types[k]) { types[k] = "?"; hdr.push(k); }

      // skip null and undefined
      if(v == null) return;

      // check and resolve type
      switch(typeof v) {
        case "string": // strings are the broadest type
          types[k] = "s"; break;
        case "number": // if column is not string, number is the broadest type
          if(types[k] != "s") types[k] = "n"; break;
        case "boolean": // only mark boolean if column is unknown or boolean
          if("?b".includes(types[k])) types[k] = "b"; break;
        default: types[k] = "s"; break; // default to string type
      }
    })
  );

  // The final array consists of the CREATE TABLE query and a series of INSERTs
  return [
    // generate CREATE TABLE query and return batch
    `CREATE TABLE \`${wsname}\` (${hdr.map(h =>
      // column name must be wrapped in backticks
      `\`${h}\` ${PG[types[h]]}`
    ).join(", ")});`
  ].concat(aoo.map(row => { // generate INSERT query for each row
    // entries will be an array of [key, value] pairs for the data in the row
    const entries = Object.entries(row);
    // fields will hold the column names and values will hold the values
    const fields = [], values = [];
    // check each key/value pair in the row
    entries.forEach(([k,v]) => {
      // skip null / undefined
      if(v == null) return;
      // column name must be wrapped in backticks
      fields.push(`\`${k}\``);
      // when the field type is numeric, `true` -> 1 and `false` -> 0
      if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v);
      // otherwise,
      else values.push(`'${v.toString().replaceAll("'", "''")}'`);
    })
    if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`;
  })).filter(x => x); // filter out skipped rows
}
function SheetJSQLWriter() {
  // define mapping between determined types and PostgreSQL types
  const PG = { "n": "float8", "s": "text", "b": "boolean" };
  function generate_sql(ws, wsname) {
    const aoo = XLSX.utils.sheet_to_json(ws);
    const types = {}, hdr = [];
    // loop across each key in each column
    aoo.forEach(row => Object.entries(row).forEach(([k,v]) => {
      // set up type if header hasn't been seen
      if(!types[k]) { types[k] = "?"; hdr.push(k); }
      // check and resolve type
      switch(typeof v) {
        case "string": types[k] = "s"; break;
        case "number": if(types[k] != "s") types[k] = "n"; break;
        case "boolean": if("?b".includes(types[k])) types[k] = "b"; break;
        default: types[k] = "s"; break;
      }
    }));
    return [
      // generate CREATE TABLE query and return batch
      `CREATE TABLE \`${wsname}\` (${hdr.map(h => `\`${h}\` ${PG[types[h]]}`).join(", ")});`
    ].concat(aoo.map(row => {
      const entries = Object.entries(row);
      const fields = [], values = [];
      entries.forEach(([k,v]) => {
        if(v == null) return;
        fields.push(`\`${k}\``);
        if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v);
        else values.push(`'${v.toString().replaceAll("'", "''")}'`);
      })
      if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`;
    })).filter(x => x).slice(0, 6);
  }
  const [url, setUrl] = React.useState("https://sheetjs.com/data/cd.xls");
  const set_url = React.useCallback((evt) => setUrl(evt.target.value));
  const [out, setOut] = React.useState("");
  const xport = React.useCallback(async() => {
    const ab = await (await fetch(url)).arrayBuffer();
    const wb = XLSX.read(ab), wsname = wb.SheetNames[0];
    setOut(generate_sql(wb.Sheets[wsname], wsname).join("\n"));
  });

  return ( <> {out && ( <><a href={url}>{url}</a><pre>{out}</pre></> )}
    <b>URL: </b><input type="text" value={url} onChange={set_url} size="50"/>
    <br/><button onClick={xport}><b>Fetch!</b></button>
  </> );
}

Databases

Query Builders

Query builders are designed to simplify query generation and normalize field types and other database minutiae.

Knex

The result of a SELECT statement is an array of objects:

const aoo = await connection.select("*").from("DataTable");
const worksheet = XLSX.utils.json_to_sheet(aoa);

Knex wraps primitive types when creating a table. generate_sql takes a knex connection object and uses the API:

Generating a Table (click to show)
// define mapping between determined types and Knex types
const PG = { "n": "float", "s": "text", "b": "boolean" };

async function generate_sql(knex, ws, wsname) {

  // generate an array of objects from the data
  const aoo = XLSX.utils.sheet_to_json(ws);

  // types will map column headers to types, while hdr holds headers in order
  const types = {}, hdr = [];

  // loop across each row object
  aoo.forEach(row =>
    // Object.entries returns a row of [key, value] pairs.  Loop across those
    Object.entries(row).forEach(([k,v]) => {

      // If this is first time seeing key, mark unknown and append header array
      if(!types[k]) { types[k] = "?"; hdr.push(k); }

      // skip null and undefined
      if(v == null) return;

      // check and resolve type
      switch(typeof v) {
        case "string": // strings are the broadest type
          types[k] = "s"; break;
        case "number": // if column is not string, number is the broadest type
          if(types[k] != "s") types[k] = "n"; break;
        case "boolean": // only mark boolean if column is unknown or boolean
          if("?b".includes(types[k])) types[k] = "b"; break;
        default: types[k] = "s"; break; // default to string type
      }
    })
  );

  await knex.schema.dropTableIfExists(wsname);
  await knex.schema.createTable(wsname, (table) => { hdr.forEach(h => { table[PG[types[h]] || "text"](h); }); });
  for(let i = 0; i < aoo.length; ++i) {
    if(!aoo[i] || !Object.keys(aoo[i]).length) continue;
    try { await knex.insert(aoo[i]).into(wsname); } catch(e) {}
  }
  return knex;
}

Other SQL Databases

The generate_sql function from "Building Schemas from Worksheets" can be adapted to generate SQL statements for a variety of databases, including:

PostgreSQL

The pg connector library was tested against the generate_sql output as-is.

The rows property of a query result is an array of objects that plays nice with json_to_sheet:

const aoa = await connection.query(`SELECT * FROM DataTable`).rows;
const worksheet = XLSX.utils.json_to_sheet(aoa);

MySQL / MariaDB

The mysql2 connector library was tested. The differences are shown below, primarily stemming from the different quoting requirements and field types.

Differences (click to show)
// highlight-start
// define mapping between determined types and MySQL types
const PG = { "n": "REAL", "s": "TEXT", "b": "TINYINT" };
// highlight-end

function generate_sql(ws, wsname) {

  // generate an array of objects from the data
  const aoo = XLSX.utils.sheet_to_json(ws);

  // types will map column headers to types, while hdr holds headers in order
  const types = {}, hdr = [];

  // loop across each row object
  aoo.forEach(row =>
    // Object.entries returns a row of [key, value] pairs.  Loop across those
    Object.entries(row).forEach(([k,v]) => {

      // If this is first time seeing key, mark unknown and append header array
      if(!types[k]) { types[k] = "?"; hdr.push(k); }

      // skip null and undefined
      if(v == null) return;

      // check and resolve type
      switch(typeof v) {
        case "string": // strings are the broadest type
          types[k] = "s"; break;
        case "number": // if column is not string, number is the broadest type
          if(types[k] != "s") types[k] = "n"; break;
        case "boolean": // only mark boolean if column is unknown or boolean
          if("?b".includes(types[k])) types[k] = "b"; break;
        default: types[k] = "s"; break; // default to string type
      }
    })
  );

  // The final array consists of the CREATE TABLE query and a series of INSERTs
  return [
    // generate CREATE TABLE query and return batch
    // highlight-next-line
    `CREATE TABLE ${wsname} (${hdr.map(h =>
      // highlight-next-line
      `${h} ${PG[types[h]]}`
    ).join(", ")});`
  ].concat(aoo.map(row => { // generate INSERT query for each row
    // entries will be an array of [key, value] pairs for the data in the row
    const entries = Object.entries(row);
    // fields will hold the column names and values will hold the values
    const fields = [], values = [];
    // check each key/value pair in the row
    entries.forEach(([k,v]) => {
      // skip null / undefined
      if(v == null) return;
      // highlight-next-line
      fields.push(`${k}`);
      // when the field type is numeric, `true` -> 1 and `false` -> 0
      if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v);
      // otherwise,
      // highlight-next-line
      else values.push(`"${v.toString().replaceAll('"', '""')}"`);
    })
    if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`;
  })).filter(x => x); // filter out skipped rows
}

The first property of a query result is an array of objects that plays nice with json_to_sheet:

const aoa = await connection.query(`SELECT * FROM DataTable`)[0];
const worksheet = XLSX.utils.json_to_sheet(aoa);