docs.sheetjs.com/docz/static/pandas/sheetjs.py
2023-07-30 02:07:38 -04:00

137 lines
3.5 KiB
Python

from base64 import b64encode, b64decode
from contextlib import contextmanager
from STPyV8 import JSContext, JSArray, JSObject
from functools import wraps
from os.path import splitext
def to_py(method):
# `convert` from STPyV8 tests/test_Wrapper.py
def convert(obj):
if isinstance(obj, JSArray):
return [convert(v) for v in obj]
if isinstance(obj, JSObject):
return dict([[str(k), convert(obj.__getattr__(str(k)))] for k in obj.__dir__()])
return obj
@wraps(method)
def func(self, *args, **kwargs):
res = method(self, *args, **kwargs)
return convert(res)
return func
class SheetJSWorksheet:
ws = None
ctxt = None
def __init__(self, ctxt, ws):
self.ctxt = ctxt
self.ws = ws
def js(self): return self.ws
@to_py
def get_rows(self):
return self.ctxt.eval("(ws => XLSX.utils.sheet_to_json(ws))")(self.ws)
class SheetJSWorkbook:
wb = None
ctxt = None
def __init__(self, ctxt, wb):
self.ctxt = ctxt
self.wb = wb
def js(self): return self.wb
@to_py
def sheet_names(self):
return self.wb.SheetNames
def get_sheet(self, name):
return SheetJSWorksheet(self.ctxt, self.wb.Sheets[name])
def to_file(self, path, book_type=""):
b64ify = self.ctxt.eval("((wb, bT) => XLSX.write(wb, {type:'base64', bookType:bT}))")
if not book_type: book_type = splitext(path)[1][1:]
b64 = b64ify(self.wb, book_type)
raw = b64decode(b64)
with open(path, mode="wb") as f:
f.write(raw)
class SheetJSWrapper:
ctxt = None
def __init__(self, ctx):
self.ctxt = ctx
with open("xlsx.full.min.js") as f: self.ctxt.eval(f.read())
def version(self):
return self.ctxt.eval("XLSX.version")
def read_binary(self, data):
read = self.ctxt.eval("(b64 => XLSX.read(b64, {type: 'base64', dense: true}))")
return SheetJSWorkbook(self.ctxt, read(b64encode(data)))
def read_file(self, path):
with open(path, mode="rb") as f:
return self.read_binary(f.read())
def sheet_from_json(self, json):
jsonify = self.ctxt.eval("(json => XLSX.utils.json_to_sheet(JSON.parse(json)) )")
return SheetJSWorksheet(self.ctxt, jsonify(json))
def book_new(self):
booknew = self.ctxt.eval("XLSX.utils.book_new()")
return SheetJSWorkbook(self.ctxt, booknew)
def book_append_sheet(self, book, sheet, wsname):
bas = self.ctxt.eval("((wb, ws, wsname) => XLSX.utils.book_append_sheet(wb, ws, wsname))")
bas(book.js(), sheet.js(), wsname)
def book_from_json(self, json, wsname = "Sheet1"):
booknew = self.book_new()
sheet = self.sheet_from_json(json)
self.book_append_sheet(booknew, sheet, wsname)
return booknew
def book_from_df(self, df):
# convert from dataframe to JSON string
json = df.to_json(orient="records")
return self.book_from_json(json)
@contextmanager
def SheetJS():
"""
SheetJS Library context manager
Returns an instance of the SheetJSWrapper class
Reading data from file to Pandas DataFrame:
```py
with SheetJS() as sheetjs:
# read data from file
wb = sheetjs.read_file(argv[1])
# get first worksheet
first_ws_name = wb.sheet_names()[0]
ws = wb.get_sheet(wsname)
# get data from first worksheet (list of dicts)
rows = ws.get_rows()
# generate pandas DataFrame
df = pd.DataFrame.from_records(rows)
```
Writing data from Pandas DataFrame to file:
```py
with SheetJS() as sheetjs:
sheetjs.book_from_df(df).to_file(outf)
```
"""
with JSContext() as ctxt:
yield SheetJSWrapper(ctxt)