docs.sheetjs.com/docz/static/pandas/sheetjs.py

lib = "libduktape.207.20700.so"

from ctypes import CDLL, byref, string_at, c_int, c_void_p, c_char_p, c_size_t
from json import loads
from io import StringIO
from pandas import read_csv

duk = CDLL(lib)

def str_to_c(s):
  b = s.encode("utf8")
  return [c_char_p(b), len(b)]

def duk_create_heap_default():
  duk.duk_create_heap.restype = c_void_p
  return duk.duk_create_heap(None, None, None, None, None)

def duk_eval_string_noresult(ctx, cmd):
  [s, l] = str_to_c(cmd)
  return duk.duk_eval_raw(ctx, s, l, 1 | (1<<3) | (1<<9) | (1<<10) | (1<<8) | (1<<11) )

def duk_eval_string(ctx, cmd):
  [s, l] = str_to_c(cmd)
  return duk.duk_eval_raw(ctx, s, l, 0 | (1<<3) | (1<<9) | (1<<10) | (1<<11) )

def duk_peval(ctx):
  return duk.duk_eval_raw(ctx, None, 0, 1 | (1<<3) | (1<<7) | (1<<11) )

def duk_get_string(ctx, idx):
  duk.duk_get_string.restype = c_char_p
  retval = duk.duk_get_string(ctx, idx)
  return retval.decode("utf8")

def eval_file(ctx, path):
  with open(path, "r") as f:
    code = f.read()
    [s, l] = str_to_c(code)

    duk.duk_push_lstring(ctx, s, l)
    retval = duk_peval(ctx)
    duk.duk_pop(ctx)
    return retval

def load_file(ctx, path, var):
  with open(path, "rb") as f:
    data = f.read()
  ptr = c_char_p(data)
  duk.duk_push_buffer_raw(ctx, 0, 1 | 2)
  duk.duk_config_buffer(ctx, -1, ptr, len(data))
  duk.duk_put_global_string(ctx, str_to_c(var)[0])
  return data

def save_file(ctx, path, var):
  duk.duk_get_global_string(ctx, str_to_c(var)[0])
  sz = c_size_t()
  duk.duk_get_buffer_data.restype = c_void_p
  buf = duk.duk_get_buffer_data(ctx, -1, byref(sz))
  s = string_at(buf, sz.value)
  with open(path, "wb") as f:
    f.write(s)

def initialize():
  # initialize
  context = duk_create_heap_default()
  ctx = c_void_p(context)

  # duktape does not expose a standard "global" by default
  duk_eval_string_noresult(ctx, "var global = (function(){ return this; }).call(null);")

  # load library
  eval_file(ctx, "shim.min.js")
  eval_file(ctx, "xlsx.full.min.js")

  # get version string
  duk_eval_string(ctx, "XLSX.version")
  print(f"SheetJS Library Version {duk_get_string(ctx, -1)}")
  duk.duk_pop(ctx)
  return [context, ctx]

def parse_file(ctx, path, name):
  # read file
  # NOTE: data is captured here to avoid GC
  data = load_file(ctx, path, "buf")

  # parse workbook
  duk_eval_string_noresult(ctx, f"{name} = XLSX.read(buf.slice(0, buf.length));")

def get_sheet_names(ctx, wb):
  duk_eval_string(ctx, f"JSON.stringify({wb}.SheetNames)")
  wsnames = duk_get_string(ctx, -1)
  names = loads(wsnames)
  duk.duk_pop(ctx)
  return names

def get_csv_from_wb(ctx, wb, sheet_name=None):
  if not sheet_name: sheet_name = f"{wb}.SheetNames[0]"
  else: sheet_name = f"'{sheet_name}'"
  duk_eval_string(ctx, f"XLSX.utils.sheet_to_csv({wb}.Sheets[{sheet_name}])")
  csv = duk_get_string(ctx, -1)
  duk.duk_pop(ctx)
  return csv

def export_df_to_wb(ctx, df, path, sheet_name="Sheet1", book_type=None):
  json = df.to_json(orient="records")
  [s, l] = str_to_c(json)
  duk.duk_push_lstring(ctx, s, l)
  duk.duk_put_global_string(ctx, str_to_c("json")[0])
  if not book_type: book_type = path.split(".")[-1]
  duk_eval_string_noresult(ctx, f"""
    aoo = JSON.parse(json);
    newws = XLSX.utils.json_to_sheet(aoo);
    newwb = XLSX.utils.book_new(newws, '{sheet_name}');
    newbuf = XLSX.write(newwb, {{type:'buffer', bookType:'{book_type}'}});
  """)
  save_file(ctx, path, "newbuf")

def get_df_from_wb(ctx, wb, sheet_name=None):
  csv = get_csv_from_wb(ctx, wb, sheet_name)
  return read_csv(StringIO(csv))

class SheetJSWorkbook(object):
  def __init__(self, sheetjs, wb):
    self.ctx = sheetjs.ctx
    self.wb = wb

  def get_sheet_names(self):
    return get_sheet_names(self.ctx, self.wb)

  def get_df(self, sheet_name=None):
    if sheet_name is None: sheet_name = self.get_sheet_names()[0]
    return get_df_from_wb(self.ctx, self.wb, sheet_name)

class SheetJS(object):
  def __init__(self, ctx):
    self.ctx = ctx
    self.wb_names = []

  def read_file(self, path):
    self.wb_names.append(f"wb{len(self.wb_names)}")
    parse_file(self.ctx, path, self.wb_names[-1])
    return SheetJSWorkbook(self, self.wb_names[-1])

  def write_df(self, df, path, sheet_name = None):
    export_df_to_wb(self.ctx, df, path, sheet_name)

class SheetJSWrapper(object):
  def __enter__(self):
    [context, ctx] = initialize()
    self.context = context
    self.ctx = ctx
    return SheetJS(ctx)

  def __exit__(self, exc_type, exc_value, traceback):
    duk.duk_destroy_heap(self.ctx)