docs.sheetjs.com/docz/static/pandas/sheetjs.py

lib = "libduktape.207.20700.so"

from ctypes import CDLL, byref, string_at, c_int, c_void_p, c_char_p, c_size_t
from json import loads
from io import StringIO
from pandas import read_csv

duk = CDLL(lib)

def str_to_c(s):
  b = s.encode("utf8")
  return [c_char_p(b), len(b)]

def duk_create_heap_default():
  duk.duk_create_heap.restype = c_void_p
  return duk.duk_create_heap(None, None, None, None, None)

def duk_eval_string_noresult(ctx, cmd):
  [s, l] = str_to_c(cmd)
  return duk.duk_eval_raw(ctx, s, l, 1 | (1<<3) | (1<<9) | (1<<10) | (1<<8) | (1<<11) )

def duk_eval_string(ctx, cmd):
  [s, l] = str_to_c(cmd)
  return duk.duk_eval_raw(ctx, s, l, 0 | (1<<3) | (1<<9) | (1<<10) | (1<<11) )

def duk_peval(ctx):
  return duk.duk_eval_raw(ctx, None, 0, 1 | (1<<3) | (1<<7) | (1<<11) )

def duk_get_string(ctx, idx):
  duk.duk_get_string.restype = c_char_p
  retval = duk.duk_get_string(ctx, idx)
  return retval.decode("utf8")

def eval_file(ctx, path):
  with open(path, "r") as f:
    code = f.read()
    [s, l] = str_to_c(code)

    duk.duk_push_lstring(ctx, s, l)
    retval = duk_peval(ctx)
    duk.duk_pop(ctx)
    return retval

def load_file(ctx, path, var):
  with open(path, "rb") as f:
    data = f.read()
  ptr = c_char_p(data)
  duk.duk_push_buffer_raw(ctx, 0, 1 | 2)
  duk.duk_config_buffer(ctx, -1, ptr, len(data))
  duk.duk_put_global_string(ctx, str_to_c(var)[0])
  return data

def save_file(ctx, path, var):
  duk.duk_get_global_string(ctx, str_to_c(var)[0])
  sz = c_size_t()
  duk.duk_get_buffer_data.restype = c_void_p
  buf = duk.duk_get_buffer_data(ctx, -1, byref(sz))
  s = string_at(buf, sz.value)
  with open(path, "wb") as f:
    f.write(s)

def initialize():
  # initialize
  context = duk_create_heap_default()
  ctx = c_void_p(context)

  # duktape does not expose a standard "global" by default
  duk_eval_string_noresult(ctx, "var global = (function(){ return this; }).call(null);")

  # load library
  eval_file(ctx, "shim.min.js")
  eval_file(ctx, "xlsx.full.min.js")

  # get version string
  duk_eval_string(ctx, "XLSX.version")
  print(f"SheetJS Library Version {duk_get_string(ctx, -1)}")
  duk.duk_pop(ctx)
  return [context, ctx]

def parse_file(ctx, path, name):
  # read file
  # NOTE: data is captured here to avoid GC
  data = load_file(ctx, path, "buf")

  # parse workbook
  duk_eval_string_noresult(ctx, f"{name} = XLSX.read(buf.slice(0, buf.length));")

def get_sheet_names(ctx, wb):
  duk_eval_string(ctx, f"JSON.stringify({wb}.SheetNames)")
  wsnames = duk_get_string(ctx, -1)
  names = loads(wsnames)
  duk.duk_pop(ctx)
  return names

def get_csv_from_wb(ctx, wb, sheet_name=None):
  if not sheet_name: sheet_name = f"{wb}.SheetNames[0]"
  else: sheet_name = f"'{sheet_name}'"
  duk_eval_string(ctx, f"XLSX.utils.sheet_to_csv({wb}.Sheets[{sheet_name}])")
  csv = duk_get_string(ctx, -1)
  duk.duk_pop(ctx)
  return csv

def export_df_to_wb(ctx, df, path, sheet_name="Sheet1", book_type=None):
  json = df.to_json(orient="records")
  [s, l] = str_to_c(json)
  duk.duk_push_lstring(ctx, s, l)
  duk.duk_put_global_string(ctx, str_to_c("json")[0])
  if not book_type: book_type = path.split(".")[-1]
  duk_eval_string_noresult(ctx, f"""
    aoo = JSON.parse(json);
    newws = XLSX.utils.json_to_sheet(aoo);
    newwb = XLSX.utils.book_new(newws, '{sheet_name}');
    newbuf = XLSX.write(newwb, {{type:'buffer', bookType:'{book_type}'}});
  """)
  save_file(ctx, path, "newbuf")

def get_df_from_wb(ctx, wb, sheet_name=None):
  csv = get_csv_from_wb(ctx, wb, sheet_name)
  return read_csv(StringIO(csv))

class SheetJSWorkbook(object):
  def __init__(self, sheetjs, wb):
    self.ctx = sheetjs.ctx
    self.wb = wb

  def get_sheet_names(self):
    return get_sheet_names(self.ctx, self.wb)

  def get_df(self, sheet_name=None):
    if sheet_name is None: sheet_name = self.get_sheet_names()[0]
    return get_df_from_wb(self.ctx, self.wb, sheet_name)

class SheetJS(object):
  def __init__(self, ctx):
    self.ctx = ctx
    self.wb_names = []

  def read_file(self, path):
    self.wb_names.append(f"wb{len(self.wb_names)}")
    parse_file(self.ctx, path, self.wb_names[-1])
    return SheetJSWorkbook(self, self.wb_names[-1])

  def write_df(self, df, path, sheet_name = None):
    export_df_to_wb(self.ctx, df, path, sheet_name)

class SheetJSWrapper(object):
  def __enter__(self):
    [context, ctx] = initialize()
    self.context = context
    self.ctx = ctx
    return SheetJS(ctx)

  def __exit__(self, exc_type, exc_value, traceback):
    duk.duk_destroy_heap(self.ctx)
pandas-duktape 2024-01-30 09:27:22 +00:00			`lib = "libduktape.207.20700.so"`

			`from ctypes import CDLL, byref, string_at, c_int, c_void_p, c_char_p, c_size_t`
			`from json import loads`
			`from io import StringIO`
			`from pandas import read_csv`

			`duk = CDLL(lib)`

			`def str_to_c(s):`
			`b = s.encode("utf8")`
			`return [c_char_p(b), len(b)]`

			`def duk_create_heap_default():`
			`duk.duk_create_heap.restype = c_void_p`
			`return duk.duk_create_heap(None, None, None, None, None)`

			`def duk_eval_string_noresult(ctx, cmd):`
			`[s, l] = str_to_c(cmd)`
			`return duk.duk_eval_raw(ctx, s, l, 1 \| (1<<3) \| (1<<9) \| (1<<10) \| (1<<8) \| (1<<11) )`

			`def duk_eval_string(ctx, cmd):`
			`[s, l] = str_to_c(cmd)`
			`return duk.duk_eval_raw(ctx, s, l, 0 \| (1<<3) \| (1<<9) \| (1<<10) \| (1<<11) )`

			`def duk_peval(ctx):`
			`return duk.duk_eval_raw(ctx, None, 0, 1 \| (1<<3) \| (1<<7) \| (1<<11) )`

			`def duk_get_string(ctx, idx):`
			`duk.duk_get_string.restype = c_char_p`
			`retval = duk.duk_get_string(ctx, idx)`
			`return retval.decode("utf8")`

			`def eval_file(ctx, path):`
			`with open(path, "r") as f:`
			`code = f.read()`
			`[s, l] = str_to_c(code)`

			`duk.duk_push_lstring(ctx, s, l)`
			`retval = duk_peval(ctx)`
			`duk.duk_pop(ctx)`
			`return retval`

			`def load_file(ctx, path, var):`
			`with open(path, "rb") as f:`
			`data = f.read()`
			`ptr = c_char_p(data)`
			`duk.duk_push_buffer_raw(ctx, 0, 1 \| 2)`
			`duk.duk_config_buffer(ctx, -1, ptr, len(data))`
			`duk.duk_put_global_string(ctx, str_to_c(var)[0])`
			`return data`

			`def save_file(ctx, path, var):`
			`duk.duk_get_global_string(ctx, str_to_c(var)[0])`
			`sz = c_size_t()`
			`duk.duk_get_buffer_data.restype = c_void_p`
			`buf = duk.duk_get_buffer_data(ctx, -1, byref(sz))`
			`s = string_at(buf, sz.value)`
			`with open(path, "wb") as f:`
			`f.write(s)`

			`def initialize():`
			`# initialize`
			`context = duk_create_heap_default()`
			`ctx = c_void_p(context)`

			`# duktape does not expose a standard "global" by default`
			`duk_eval_string_noresult(ctx, "var global = (function(){ return this; }).call(null);")`

			`# load library`
			`eval_file(ctx, "shim.min.js")`
			`eval_file(ctx, "xlsx.full.min.js")`

			`# get version string`
			`duk_eval_string(ctx, "XLSX.version")`
			`print(f"SheetJS Library Version {duk_get_string(ctx, -1)}")`
			`duk.duk_pop(ctx)`
			`return [context, ctx]`

			`def parse_file(ctx, path, name):`
			`# read file`
			`# NOTE: data is captured here to avoid GC`
			`data = load_file(ctx, path, "buf")`

			`# parse workbook`
			`duk_eval_string_noresult(ctx, f"{name} = XLSX.read(buf.slice(0, buf.length));")`

			`def get_sheet_names(ctx, wb):`
			`duk_eval_string(ctx, f"JSON.stringify({wb}.SheetNames)")`
			`wsnames = duk_get_string(ctx, -1)`
			`names = loads(wsnames)`
			`duk.duk_pop(ctx)`
			`return names`

			`def get_csv_from_wb(ctx, wb, sheet_name=None):`
			`if not sheet_name: sheet_name = f"{wb}.SheetNames[0]"`
			`else: sheet_name = f"'{sheet_name}'"`
			`duk_eval_string(ctx, f"XLSX.utils.sheet_to_csv({wb}.Sheets[{sheet_name}])")`
			`csv = duk_get_string(ctx, -1)`
			`duk.duk_pop(ctx)`
			`return csv`

			`def export_df_to_wb(ctx, df, path, sheet_name="Sheet1", book_type=None):`
			`json = df.to_json(orient="records")`
			`[s, l] = str_to_c(json)`
			`duk.duk_push_lstring(ctx, s, l)`
			`duk.duk_put_global_string(ctx, str_to_c("json")[0])`
			`if not book_type: book_type = path.split(".")[-1]`
			`duk_eval_string_noresult(ctx, f"""`
			`aoo = JSON.parse(json);`
			`newws = XLSX.utils.json_to_sheet(aoo);`
			`newwb = XLSX.utils.book_new(newws, '{sheet_name}');`
			`newbuf = XLSX.write(newwb, {{type:'buffer', bookType:'{book_type}'}});`
			`""")`
			`save_file(ctx, path, "newbuf")`

			`def get_df_from_wb(ctx, wb, sheet_name=None):`
			`csv = get_csv_from_wb(ctx, wb, sheet_name)`
			`return read_csv(StringIO(csv))`

			`class SheetJSWorkbook(object):`
			`def __init__(self, sheetjs, wb):`
			`self.ctx = sheetjs.ctx`
pandas 2023-07-30 03:17:31 +00:00			`self.wb = wb`

pandas-duktape 2024-01-30 09:27:22 +00:00			`def get_sheet_names(self):`
			`return get_sheet_names(self.ctx, self.wb)`
pandas 2023-07-30 03:17:31 +00:00
pandas-duktape 2024-01-30 09:27:22 +00:00			`def get_df(self, sheet_name=None):`
			`if sheet_name is None: sheet_name = self.get_sheet_names()[0]`
			`return get_df_from_wb(self.ctx, self.wb, sheet_name)`
pandas 2023-07-30 03:17:31 +00:00
pandas-duktape 2024-01-30 09:27:22 +00:00			`class SheetJS(object):`
pandas 2023-07-30 03:17:31 +00:00			`def __init__(self, ctx):`
pandas-duktape 2024-01-30 09:27:22 +00:00			`self.ctx = ctx`
			`self.wb_names = []`
pandas 2023-07-30 03:17:31 +00:00
			`def read_file(self, path):`
pandas-duktape 2024-01-30 09:27:22 +00:00			`self.wb_names.append(f"wb{len(self.wb_names)}")`
			`parse_file(self.ctx, path, self.wb_names[-1])`
			`return SheetJSWorkbook(self, self.wb_names[-1])`

			`def write_df(self, df, path, sheet_name = None):`
			`export_df_to_wb(self.ctx, df, path, sheet_name)`

			`class SheetJSWrapper(object):`
			`def __enter__(self):`
			`[context, ctx] = initialize()`
			`self.context = context`
			`self.ctx = ctx`
			`return SheetJS(ctx)`

			`def __exit__(self, exc_type, exc_value, traceback):`
			`duk.duk_destroy_heap(self.ctx)`