import { Document } from "@langchain/core/documents";
import { BufferLoader } from "langchain/document_loaders/fs/buffer";
import { read, utils } from "xlsx";

/**
 * Document loader that uses SheetJS to load documents.
 *
 * Each worksheet is parsed into an array of row objects using the SheetJS
 * `sheet_to_json` method and projected to a `Document`. Metadata includes
 * original sheet name, row data, and row index
 */
export default class LoadOfSheet extends BufferLoader {
  /** @type {import("langchain/chains/query_constructor").AttributeInfo[]}  */
  attributes = [];

  /**
   * Document loader that uses SheetJS to load documents.
   *
   * @param {string|Blob} filePathOrBlob Source Data
   */
  constructor(filePathOrBlob) {
    super(filePathOrBlob);
    this.attributes = [];
  }

  /**
   * Parse document
   *
   * NOTE: column labels in multiple sheets are not disambiguated!
   *
   * @param {Buffer} raw Raw data Buffer
   * @param {Document["metadata"]} metadata Document metadata
   * @returns {Promise<Document[]>} Array of Documents
   */
  async parse(raw, metadata) {
    /** @type {Document[]} */
    const result = [];

    this.attributes = [
      { name: "worksheet", description: "Sheet or Worksheet Name", type: "string" },
      { name: "rowNum", description: "Row index", type: "number" }
    ];

    const wb = read(raw, {type: "buffer", WTF:1});
    for(let name of wb.SheetNames) {
      const fields = {};
      const ws = wb.Sheets[name];
      if(!ws) return;

      const aoo = utils.sheet_to_json(ws);
      aoo.forEach((row, idx) => {
        result.push({
          pageContent: "Row " + (idx + 1) + " has the following content: \n" + Object.entries(row).map(kv => `- ${kv[0]}: ${kv[1]}`).join("\n") + "\n",
          metadata: {
            worksheet: name,
            rowNum: row["__rowNum__"],
            ...metadata,
            ...row
          }
        });
        Object.entries(row).forEach(([k,v]) => { if(v != null) (fields[k] || (fields[k] = {}))[v instanceof Date ? "date" : typeof v] = true } );
      });
      Object.entries(fields).forEach(([k,v]) => this.attributes.push({
        name: k, description: k, type: Object.keys(v).join(" or ")
      }));
    }

    return result;
  }
};