import * as XLSX from "xlsx";
import { createWorker } from "tesseract.js";
import * as pdfjs from "pdfjs-dist";
import { createExtractionConfig } from "../config/extraction";

// Alternative method if the above doesn't work
// pdfjs.GlobalWorkerOptions.workerSrc = `//unpkg.com/pdfjs-dist@${pdfjs.version}/build/pdf.worker.min.js`;

// Add PDF worker initialization at the top of the file
pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.min.js`;

// Add this at the top of the file
export const DATA_TYPES = {
  string: (value) => value?.toString() || "",
  number: (value) => {
    const num = Number(value);
    return isNaN(num) ? 0 : num;
  },
  date: (value) => {
    if (!value) return null;
    // Handle Excel date number
    if (typeof value === "number") {
      return new Date((value - 25569) * 86400 * 1000)
        .toISOString()
        .split("T")[0];
    }
    const date = new Date(value);
    return isNaN(date.getTime()) ? null : date.toISOString().split("T")[0];
  },
  boolean: (value) => Boolean(value),
};

export const handleExcelUpload = (file) => {
  return new Promise((resolve, reject) => {
    const reader = new FileReader();
    reader.onload = (event) => {
      try {
        const bstr = event.target.result;
        const workbook = XLSX.read(bstr, { type: "binary" });

        if (!workbook.SheetNames || workbook.SheetNames.length === 0) {
          throw new Error("No sheets found in the workbook");
        }

        const sheets = workbook.SheetNames.map((sheetName) => {
          const worksheet = workbook.Sheets[sheetName];
          if (!worksheet) {
            console.warn(`Worksheet ${sheetName} is undefined`);
            return null;
          }

          const rawData = XLSX.utils.sheet_to_json(worksheet, {
            header: 1,
            defval: "",
          });

          return { name: sheetName, data: rawData };
        }).filter((sheet) => sheet !== null);

        resolve(sheets);
      } catch (error) {
        console.error("Error processing Excel file:", error);
        reject(error);
      }
    };

    reader.onerror = (error) => {
      console.error("FileReader error:", error);
      reject(error);
    };

    reader.readAsBinaryString(file);
  });
};

export const handlePdfUpload = (file) => {
  return new Promise((resolve) => {
    const reader = new FileReader();
    reader.onload = (e) => {
      const arrayBuffer = e.target.result;
      const blob = new Blob([arrayBuffer], { type: "application/pdf" });
      const url = URL.createObjectURL(blob);
      resolve({ arrayBuffer, url });
    };
    reader.readAsArrayBuffer(file);
  });
};

export const handleImageUpload = (file) => {
  return new Promise((resolve) => {
    const reader = new FileReader();
    reader.onload = async (event) => {
      const imageUrl = event.target.result;
      const worker = await createWorker();
      await worker.load();
      await worker.loadLanguage("eng");
      await worker.initialize("eng");
      const {
        data: { text },
      } = await worker.recognize(imageUrl);
      await worker.terminate();
      resolve(imageUrl);
    };
    reader.readAsDataURL(file);
  });
};

export const extractTextFromPdf = async (arrayBuffer) => {
  try {
    const loadingTask = pdfjs.getDocument({ data: arrayBuffer });
    const pdf = await loadingTask.promise;
    let fullText = "";

    for (let i = 1; i <= pdf.numPages; i++) {
      const page = await pdf.getPage(i);
      const content = await page.getTextContent();
      const pageText = content.items.map((item) => item.str).join(" ");
      fullText += pageText + "\n";
    }

    return fullText;
  } catch (error) {
    console.error("Error extracting text from PDF:", error);
    return "";
  }
};

const extractSheetData = (sheetData, config) => {
  // Find the header row using the headerIdentifier
  const headerRowIndex = sheetData.findIndex((row) =>
    row.some(
      (cell) =>
        cell?.toString().toLowerCase() === config.headerIdentifier.toLowerCase()
    )
  );

  if (headerRowIndex === -1) {
    return [];
  }

  const headers = sheetData[headerRowIndex];
  const dataRows = sheetData.slice(headerRowIndex + 1);

  // Map column indices based on configuration
  const columnIndices = mapColumnIndices(headers, config.columns);

  // Extract and transform data rows
  return dataRows
    .filter((row) => hasValidData(row, columnIndices))
    .map((row) => extractRowData(row, columnIndices, config.columns));
};

const mapColumnIndices = (headers, columnConfig) => {
  return Object.entries(columnConfig).reduce((indices, [field, config]) => {
    indices[field] = headers.findIndex(
      (header) =>
        header?.toString().toLowerCase() === config.header.toLowerCase()
    );
    return indices;
  }, {});
};

const hasValidData = (row, columnIndices) => {
  // Check if row has at least one non-empty value in mapped columns
  return Object.values(columnIndices).some(
    (index) =>
      index !== -1 &&
      row[index] !== undefined &&
      row[index] !== null &&
      row[index] !== ""
  );
};

const extractRowData = (row, columnIndices, columnConfig) => {
  return Object.entries(columnIndices).reduce((rowData, [field, index]) => {
    if (index !== -1) {
      const config = columnConfig[field];
      const rawValue = row[index];

      // Apply type conversion
      let value = DATA_TYPES[config.type](rawValue);

      // Apply custom transformation if defined
      if (config.transform) {
        value = config.transform(value);
      }

      rowData[field] = value;
    }
    return rowData;
  }, {});
};

export const extractSpreadsheetData = (data, config) => {
  // If data is already in array format, return it
  if (Array.isArray(data)) return data;
  if (!data || typeof data !== "object") return [];

  const sheets = [];

  // Dynamically process each section defined in the config
  Object.entries(config).forEach(([sectionName, sectionConfig]) => {
    const sectionData = data[sectionName];

    if (!sectionData) return;

    // Handle collection (array) vs single object
    if (sectionConfig.isCollection && Array.isArray(sectionData)) {
      if (sectionData.length === 0) return;

      // Get all possible columns from config
      const configColumns = Object.keys(sectionConfig.columns || {});

      // Get actual columns from data
      const dataColumns = Object.keys(sectionData[0]);

      // Map headers using config
      const headers = dataColumns.map((column) => {
        const columnConfig = sectionConfig.columns[column];
        return columnConfig?.label || column;
      });

      // Transform data rows using config
      const rows = sectionData.map((item) =>
        dataColumns.map((column) => {
          const value = item[column];
          const columnConfig = sectionConfig.columns[column];
          return columnConfig?.transform
            ? columnConfig.transform(value)
            : value;
        })
      );

      sheets.push({
        name: sectionName,
        data: [headers, ...rows],
        config: sectionConfig,
      });
    } else if (!sectionConfig.isCollection && typeof sectionData === "object") {
      // Handle single object data (like LOAD)
      const configColumns = Object.keys(sectionConfig.columns || {});
      const dataColumns = Object.keys(sectionData);

      const headers = dataColumns.map((column) => {
        const columnConfig = sectionConfig.columns[column];
        return columnConfig?.label || column;
      });

      const values = dataColumns.map((column) => {
        const value = sectionData[column];
        const columnConfig = sectionConfig.columns[column];
        return columnConfig?.transform ? columnConfig.transform(value) : value;
      });

      sheets.push({
        name: sectionName,
        data: [headers, values],
        config: sectionConfig,
      });
    }
  });

  return sheets;
};

const extractMetadata = (sheetData, config) => {
  const metadata = {};

  // Look for metadata in first few rows
  const searchRows = sheetData.slice(0, 10); // Search first 10 rows for metadata

  Object.entries(config.columns).forEach(([field, columnConfig]) => {
    for (const row of searchRows) {
      const headerIndex = row.findIndex(
        (cell) =>
          cell?.toString().toLowerCase() === columnConfig.header.toLowerCase()
      );

      if (headerIndex !== -1) {
        const value = row[headerIndex + (columnConfig.adjacentIndex || 1)];

        // Convert value based on type
        let convertedValue = DATA_TYPES[columnConfig.type](value);

        // Apply any custom transformations
        if (columnConfig.transform) {
          convertedValue = columnConfig.transform(convertedValue);
        }

        metadata[field] = convertedValue;
        break;
      }
    }
  });

  return metadata;
};

const extractOrders = (sheetData, config) => {
  // Find the header row for orders
  const headerRowIndex = sheetData.findIndex((row) =>
    row.some(
      (cell) =>
        cell?.toString().toLowerCase() === config.headerIdentifier.toLowerCase()
    )
  );

  if (headerRowIndex === -1) {
    console.warn(
      `No header row found for orders with identifier: ${config.headerIdentifier}`
    );
    return [];
  }

  const headers = sheetData[headerRowIndex];
  const dataRows = sheetData.slice(headerRowIndex + 1);

  // Map column indices to fields
  const columnMap = {};
  Object.entries(config.columns).forEach(([field, columnConfig]) => {
    const index = headers.findIndex(
      (header) =>
        header?.toString().toLowerCase() === columnConfig.header.toLowerCase()
    );
    if (index !== -1) {
      columnMap[field] = {
        index,
        config: columnConfig,
      };
    }
  });

  // Extract order data
  return dataRows
    .filter((row) => row.some((cell) => cell != null && cell !== "")) // Skip empty rows
    .map((row) => {
      const orderData = {};

      Object.entries(columnMap).forEach(([field, { index, config }]) => {
        const value = row[index];

        // Skip if no value
        if (value == null || value === "") return;

        // Convert value based on type
        let convertedValue = DATA_TYPES[config.type](value);

        // Apply any custom transformations
        if (config.transform) {
          convertedValue = config.transform(convertedValue);
        }

        orderData[field] = convertedValue;
      });

      return orderData;
    })
    .filter((order) => Object.keys(order).length > 0); // Remove empty orders
};

const findHeaderRowIndex = (sheet, identifierColumns) => {
  return sheet.findIndex((row) => {
    if (!Array.isArray(row)) {
      return false;
    }
    return row.some(
      (cell) =>
        cell &&
        identifierColumns.some((identifier) =>
          cell.toString().toLowerCase().includes(identifier.toLowerCase())
        )
    );
  });
};

const getColumnIndices = (headers, columnConfig) => {
  return Object.fromEntries(
    Object.entries(columnConfig).map(([key, possibleNames]) => [
      key,
      headers.findIndex((header) =>
        possibleNames.some((name) =>
          header.toString().toLowerCase().includes(name.toLowerCase())
        )
      ),
    ])
  );
};

const extractDataFromSheet = (rows, columnIndices, extractionMethod) => {
  switch (extractionMethod) {
    case "multipleRows":
      return rows
        .map((row) => extractRowData(row, columnIndices))
        .filter((data) => Object.keys(data).length > 0);
    case "singleRow":
      return extractRowData(rows[0], columnIndices);
    default:
      console.error(`Unknown extraction method: ${extractionMethod}`);
      return [];
  }
};

export const extractPDFData = (text, extractionConfig, dataStructure) => {
  const extractedData = {};

  Object.entries(extractionConfig).forEach(([sectionType, config]) => {
    extractedData[sectionType] = extractDataFromPDF(
      text,
      config,
      dataStructure
    );
  });

  return extractedData;
};

// Helper functions

const findColumnIndex = (headers, possibleNames) => {
  return headers.findIndex((header) =>
    possibleNames.some((name) =>
      header.toString().toLowerCase().includes(name.toLowerCase())
    )
  );
};

const findColumnKeyIndex = (headers, possibleNames) => {
  return Object.keys(headers).find((key) =>
    possibleNames.some((name) =>
      key.toString().toLowerCase().includes(name.toLowerCase())
    )
  );
};

const extractDataFromPDF = (text, config, dataStructure) => {
  const {
    parsing: { pdf: parsingConfig },
    categories,
  } = config;

  // Initialize data structure based on categories
  const extractedData = Object.keys(categories).reduce((acc, category) => {
    acc[category] = categories[category].isCollection ? [] : {};
    return acc;
  }, {});

  // Split text into lines using configured separator
  const lines = text
    .split(parsingConfig.lineSeparator)
    .map((line) => line.trim())
    .filter(Boolean);

  let currentSection = null;

  lines.forEach((line) => {
    // Check for section changes
    const sectionStart = parsingConfig.sectionIdentifiers.start.find(
      (identifier) => line.includes(identifier)
    );
    if (sectionStart) {
      currentSection = sectionStart;
      return;
    }

    // Find the first matching separator in the line
    const separator = parsingConfig.keyValueSeparators.find((sep) =>
      line.includes(sep)
    );
    if (!separator) return;

    // Split on the first occurrence of the separator
    const [key, ...valueParts] = line.split(separator);
    const trimmedKey = key.trim();
    let value = valueParts.join(separator).trim(); // Rejoin in case value contains the separator

    // Special handling for dates
    if (
      parsingConfig.dateDelimiters.some(
        (delim) => value.split(delim).length > 2 && !isNaN(Date.parse(value))
      )
    ) {
      // Handle date string with multiple delimiters
      value = formatDateString(value);
    }

    // Find matching field config
    const fieldConfig = Object.entries(config.fields).find(([_, field]) =>
      field.aliases?.includes(trimmedKey)
    )?.[1];

    if (fieldConfig) {
      const category = fieldConfig.category || currentSection;
      const categoryConfig = categories[category];

      if (categoryConfig) {
        const transformedValue = fieldConfig.transform
          ? fieldConfig.transform(value)
          : value;

        if (categoryConfig.isCollection) {
          if (!extractedData[category][0]) {
            extractedData[category][0] = {};
          }
          extractedData[category][0][fieldConfig.mapTo || trimmedKey] =
            transformedValue;
        } else {
          extractedData[category][fieldConfig.mapTo || trimmedKey] =
            transformedValue;
        }
      }
    }
  });

  return extractedData;
};

// Helper function to format date strings
const formatDateString = (dateStr) => {
  try {
    const date = new Date(dateStr);
    return date.toISOString().split("T")[0]; // Returns YYYY-MM-DD format
  } catch {
    return dateStr; // Return original string if parsing fails
  }
};

export const extractFileData = async (file, companyId, dataStructure) => {
  try {
    const config = createExtractionConfig(dataStructure);

    if (
      file.type.includes("spreadsheet") ||
      file.type.includes("excel") ||
      file.name.endsWith(".xlsx") ||
      file.name.endsWith(".xls")
    ) {
      const content = await handleExcelUpload(file);

      const formattedSheets = content.map((sheet, index) => ({
        name: sheet.name || `Sheet ${index + 1}`,
        data: sheet.data || [],
      }));

      const extractedData = extractSpreadsheetData(
        formattedSheets,
        config,
        dataStructure
      );

      return {
        sheetData: formattedSheets,
        extractedData,
      };
    }

    // ... handle other file types ...
  } catch (error) {
    console.error("Error in extractFileData:", error);
    return {
      error: true,
      message: error.message,
      sheetData: [],
      extractedData: dataStructure || {},
    };
  }
};
