docs.sheetjs.com/docz/static/pandas/sheetjs.py

154 lines
4.4 KiB
Python
Raw Permalink Normal View History

2024-01-30 09:27:22 +00:00
lib = "libduktape.207.20700.so"
from ctypes import CDLL, byref, string_at, c_int, c_void_p, c_char_p, c_size_t
from json import loads
from io import StringIO
from pandas import read_csv
duk = CDLL(lib)
def str_to_c(s):
b = s.encode("utf8")
return [c_char_p(b), len(b)]
def duk_create_heap_default():
duk.duk_create_heap.restype = c_void_p
return duk.duk_create_heap(None, None, None, None, None)
def duk_eval_string_noresult(ctx, cmd):
[s, l] = str_to_c(cmd)
return duk.duk_eval_raw(ctx, s, l, 1 | (1<<3) | (1<<9) | (1<<10) | (1<<8) | (1<<11) )
def duk_eval_string(ctx, cmd):
[s, l] = str_to_c(cmd)
return duk.duk_eval_raw(ctx, s, l, 0 | (1<<3) | (1<<9) | (1<<10) | (1<<11) )
def duk_peval(ctx):
return duk.duk_eval_raw(ctx, None, 0, 1 | (1<<3) | (1<<7) | (1<<11) )
def duk_get_string(ctx, idx):
duk.duk_get_string.restype = c_char_p
retval = duk.duk_get_string(ctx, idx)
return retval.decode("utf8")
def eval_file(ctx, path):
with open(path, "r") as f:
code = f.read()
[s, l] = str_to_c(code)
duk.duk_push_lstring(ctx, s, l)
retval = duk_peval(ctx)
duk.duk_pop(ctx)
return retval
def load_file(ctx, path, var):
with open(path, "rb") as f:
data = f.read()
ptr = c_char_p(data)
duk.duk_push_buffer_raw(ctx, 0, 1 | 2)
duk.duk_config_buffer(ctx, -1, ptr, len(data))
duk.duk_put_global_string(ctx, str_to_c(var)[0])
return data
def save_file(ctx, path, var):
duk.duk_get_global_string(ctx, str_to_c(var)[0])
sz = c_size_t()
duk.duk_get_buffer_data.restype = c_void_p
buf = duk.duk_get_buffer_data(ctx, -1, byref(sz))
s = string_at(buf, sz.value)
with open(path, "wb") as f:
f.write(s)
def initialize():
# initialize
context = duk_create_heap_default()
ctx = c_void_p(context)
# duktape does not expose a standard "global" by default
duk_eval_string_noresult(ctx, "var global = (function(){ return this; }).call(null);")
# load library
eval_file(ctx, "shim.min.js")
eval_file(ctx, "xlsx.full.min.js")
# get version string
duk_eval_string(ctx, "XLSX.version")
print(f"SheetJS Library Version {duk_get_string(ctx, -1)}")
duk.duk_pop(ctx)
return [context, ctx]
def parse_file(ctx, path, name):
# read file
# NOTE: data is captured here to avoid GC
data = load_file(ctx, path, "buf")
# parse workbook
duk_eval_string_noresult(ctx, f"{name} = XLSX.read(buf.slice(0, buf.length));")
def get_sheet_names(ctx, wb):
duk_eval_string(ctx, f"JSON.stringify({wb}.SheetNames)")
wsnames = duk_get_string(ctx, -1)
names = loads(wsnames)
duk.duk_pop(ctx)
return names
def get_csv_from_wb(ctx, wb, sheet_name=None):
if not sheet_name: sheet_name = f"{wb}.SheetNames[0]"
else: sheet_name = f"'{sheet_name}'"
duk_eval_string(ctx, f"XLSX.utils.sheet_to_csv({wb}.Sheets[{sheet_name}])")
csv = duk_get_string(ctx, -1)
duk.duk_pop(ctx)
return csv
def export_df_to_wb(ctx, df, path, sheet_name="Sheet1", book_type=None):
json = df.to_json(orient="records")
[s, l] = str_to_c(json)
duk.duk_push_lstring(ctx, s, l)
duk.duk_put_global_string(ctx, str_to_c("json")[0])
if not book_type: book_type = path.split(".")[-1]
duk_eval_string_noresult(ctx, f"""
aoo = JSON.parse(json);
newws = XLSX.utils.json_to_sheet(aoo);
newwb = XLSX.utils.book_new(newws, '{sheet_name}');
newbuf = XLSX.write(newwb, {{type:'buffer', bookType:'{book_type}'}});
""")
save_file(ctx, path, "newbuf")
def get_df_from_wb(ctx, wb, sheet_name=None):
csv = get_csv_from_wb(ctx, wb, sheet_name)
return read_csv(StringIO(csv))
class SheetJSWorkbook(object):
def __init__(self, sheetjs, wb):
self.ctx = sheetjs.ctx
2023-07-30 03:17:31 +00:00
self.wb = wb
2024-01-30 09:27:22 +00:00
def get_sheet_names(self):
return get_sheet_names(self.ctx, self.wb)
2023-07-30 03:17:31 +00:00
2024-01-30 09:27:22 +00:00
def get_df(self, sheet_name=None):
if sheet_name is None: sheet_name = self.get_sheet_names()[0]
return get_df_from_wb(self.ctx, self.wb, sheet_name)
2023-07-30 03:17:31 +00:00
2024-01-30 09:27:22 +00:00
class SheetJS(object):
2023-07-30 03:17:31 +00:00
def __init__(self, ctx):
2024-01-30 09:27:22 +00:00
self.ctx = ctx
self.wb_names = []
2023-07-30 03:17:31 +00:00
def read_file(self, path):
2024-01-30 09:27:22 +00:00
self.wb_names.append(f"wb{len(self.wb_names)}")
parse_file(self.ctx, path, self.wb_names[-1])
return SheetJSWorkbook(self, self.wb_names[-1])
def write_df(self, df, path, sheet_name = None):
export_df_to_wb(self.ctx, df, path, sheet_name)
class SheetJSWrapper(object):
def __enter__(self):
[context, ctx] = initialize()
self.context = context
self.ctx = ctx
return SheetJS(ctx)
def __exit__(self, exc_type, exc_value, traceback):
duk.duk_destroy_heap(self.ctx)