From 4a2314409e1a89d2828340186508b7e1deaf4252 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Sat, 11 Feb 2023 23:20:11 -0500 Subject: [PATCH] github --- .../{45-git.md => 44-hosting/02-github.md} | 64 +++++++++---------- docz/docs/03-demos/index.md | 2 +- docz/docusaurus.config.js | 1 + 3 files changed, 34 insertions(+), 33 deletions(-) rename docz/docs/03-demos/{45-git.md => 44-hosting/02-github.md} (85%) diff --git a/docz/docs/03-demos/45-git.md b/docz/docs/03-demos/44-hosting/02-github.md similarity index 85% rename from docz/docs/03-demos/45-git.md rename to docz/docs/03-demos/44-hosting/02-github.md index a518a6d..88eaa2a 100644 --- a/docz/docs/03-demos/45-git.md +++ b/docz/docs/03-demos/44-hosting/02-github.md @@ -1,40 +1,38 @@ --- -title: Data in Version Control -pagination_prev: demos/hosting/index +title: GitHub +pagination_prev: demos/ml pagination_next: solutions/input --- -Git is a popular system for organizing a historical record of source code and -changes. Git can also store and track binary data artifacts, but data tools -are more effective in processing data stored in plain text formats like CSV. - Many official data releases by governments and organizations include XLSX or -XLS files. SheetJS trivializes the conversion to CSV. For example, in NodeJS: +XLS files. Unfortunately some data sources do not retain older versions. -```js -const XLSX = require("xlsx"); +Git is a popular system for organizing a historical record of source code and +changes. Git can also store and track binary data artifacts. -(async() => { - /* Download Data */ - const f = await fetch("https://docs.sheetjs.com/pres.xlsx"); - const data = await f.arrayBuffer(); +GitHub is a popular host for Git repositories. GitHub's "Flat Data" project +explores storing and comparing versions of structured CSV and JSON data. The +official "Excel to CSV" example uses SheetJS to generate CSV data from files: - /* Parse workbook */ - // highlight-next-line - const wb = XLSX.read(data); - - /* Convert first worksheet to CSV */ - const ws = wb.Sheets[wb.SheetNames[0]]; - // highlight-next-line - const csv = XLSX.utils.sheet_to_csv(ws); - console.log(csv); -})(); +```mermaid +sequenceDiagram + autonumber + participant R as GH Repo + participant A as GH Action + participant S as Data Source + loop Regular Interval (cron) + A->>R: clone repo + R->>A: old repo + A->>S: fetch file + S->>A: spreadsheet + Note over A: SheetJS
convert to CSV + alt Data changed + Note over A: commit new data + A->>R: push new commit + end + end ``` -GitHub's "Flat Data" project explores storing and comparing versions of -structured CSV and JSON data. The official "Excel to CSV" example uses SheetJS -under the hood to generate CSV data from an XLSX file. - This demo covers implementation details elided in the official write-up. ## Flat Data @@ -49,7 +47,7 @@ As a project from the company, the entire lifecycle uses GitHub offerings: :::caution -A GitHub account is required. At the time of writing (2022 November 08), free +A GitHub account is required. At the time of writing (2023 February 11), free GitHub accounts have no Actions usage limits for public repositories. Using private GitHub repositories is not recommended because the Flat Viewer @@ -79,14 +77,16 @@ The `githubocto/flat` action can be added as a step in a workflow: postprocess: ./postprocess.ts ``` -The `http_url` will be fetched and saved to `downloaded_filename` in the repo. +This action performs the following steps: + +1) `http_url` will be fetched and saved to `downloaded_filename` in the repo. This can be approximated with the following command: ```bash curl -L -o data.xlsx https://docs.sheetjs.com/pres.xlsx ``` -After saving, the `postprocess` script will be run. When a `.ts` file is the +2) After saving, the `postprocess` script will be run. When a `.ts` file is the script, it will run the script in the Deno runtime. The `postprocess` script is expected to read the downloaded file and create or overwrite files in the repo. This can be approximated with the following command: @@ -95,7 +95,7 @@ This can be approximated with the following command: deno run -A ./postprocess.ts data.xlsx ``` -The action will then compare the contents of the repo, creating a new commit if +3) The action will compare the contents of the repo, creating a new commit if the source data or artifacts from the `postprocess` script changed. @@ -153,7 +153,7 @@ Deno.writeFileSync(out_file, new TextEncoder().encode(csv)); :::note -This was tested on 2022 November 08 using the GitHub UI. +This was tested on 2023 February 11 using the GitHub UI. ::: diff --git a/docz/docs/03-demos/index.md b/docz/docs/03-demos/index.md index 6fb9ffe..97628f3 100644 --- a/docz/docs/03-demos/index.md +++ b/docz/docs/03-demos/index.md @@ -81,7 +81,7 @@ run in the web browser, demos will include interactive examples. ### File Hosting Services - [`Dropbox`](/docs/demos/hosting/dropbox) -- [`Git`](/docs/demos/git) +- [`GitHub`](/docs/demos/hosting/github) ### Platforms and Integrations diff --git a/docz/docusaurus.config.js b/docz/docusaurus.config.js index 70cd4df..9968cc2 100644 --- a/docz/docusaurus.config.js +++ b/docz/docusaurus.config.js @@ -172,6 +172,7 @@ const config = { { from: '/docs/getting-started/demos/', to: '/docs/demos/' }, { from: '/docs/getting-started/demos/excel', to: '/docs/demos/' }, { from: '/docs/demos/content', to: '/docs/demos/static/' }, + { from: '/docs/demos/git', to: '/docs/demos/hosting/github/' }, /* frontend */ { from: '/docs/demos/angular', to: '/docs/demos/frontend/angular/' }, { from: '/docs/demos/react', to: '/docs/demos/frontend/react/' },