"""CsvDatasetReader class."""

__copyright__ = "(C) Copyright Aquaveo 2025"
__license__ = "All rights reserved"

# 1. Standard Python modules
import csv
from pathlib import Path

# 2. Third party modules

# 3. Aquaveo modules
from xms.datasets.dataset_reader import DatasetReader
from xms.datasets.dataset_writer import DatasetWriter

# 4. Local modules
from xms.mf6.data import time_util
from xms.mf6.data.time_util import datetime_from_arbitrary_string
from xms.mf6.misc import util


def dataset_from_csv(csv_filepath: Path | str) -> DatasetReader | None:
    """Read a .csv file and return a dataset.

    See https://www.xmswiki.com/wiki/GMS:File_Import_Wizard_Supported_File_Formats#Dataset

    Args:
        csv_filepath: The .csv filepath.

    Returns:
        See description.
    """
    reader = CsvDatasetReader(csv_filepath)
    return reader.read()


class CsvDatasetReader:
    """Reads a .csv file containing a dataset.

    See https://www.xmswiki.com/wiki/GMS:File_Import_Wizard_Supported_File_Formats#Dataset
    """
    def __init__(self, csv_filepath: Path | str):
        """Initializer.

        Args:
            csv_filepath: The .csv filepath.
        """
        self._csv_filepath = csv_filepath
        self._names = []
        self._time_strings = []
        self._time_floats = []
        self._time_units = []
        self._ref_time = None
        self._ts_values = []
        self._time_units = 'Days'  # The csv file doesn't include time units, so we assume 'Days'
        self._dataset_writer = None

    def read(self) -> DatasetReader | None:
        """Read a .csv file and return a dataset.

        See https://www.xmswiki.com/wiki/GMS:File_Import_Wizard_Supported_File_Formats#Dataset
        Assumes only one dataset is in the csv file.

        Returns:
            See description.
        """
        try:
            self._read_csv_file()
            self._convert_time_strings_to_floats()
            self._write_dataset()
            return self._create_dataset_reader()
        except ValueError:
            return None

    def _read_csv_file(self) -> None:
        """Read the csv file into RAM."""
        with open(self._csv_filepath, 'r') as fp:
            csv_reader = csv.reader(fp)
            self._read_heading_row(csv_reader)
            self._read_values(csv_reader)

    def _read_heading_row(self, csv_reader) -> tuple[list[str], list[str]]:
        """Read the heading row and return list of dataset names and time strings (both size of num columns).

        Args:
            csv_reader: The csv reader.

        Returns:
            See description.
        """
        row = next(csv_reader)  # Read heading row
        for heading in row:
            words = heading.split("|")
            name, ts_time = words[0], words[1]
            self._names.append(name)
            self._time_strings.append(ts_time)
        return self._names, self._time_strings

    def _read_values(self, csv_reader):
        """Read the values from the csv file and return a 2D list of values, with 1st dimension the timestep.

        Args:
            csv_reader:

        Returns:
            See description.
        """
        for row in csv_reader:
            if not self._ts_values:
                self._ts_values = [[] for _ in range(len(row))]
            for i, column in enumerate(row):
                self._ts_values[i].append(float(column))

    def _convert_time_strings_to_floats(self) -> None:
        """Convert the time strings to floats and return the list of floats.

        Returns:
            See description.
        """
        # If the first time string is a date/time, use it for ref_time
        if util.is_number(self._time_strings[0]):
            self._ref_time = None
        else:
            self._ref_time = datetime_from_arbitrary_string(self._time_strings[0])

        for time_str in self._time_strings:
            if self._ref_time:
                ts_datetime = datetime_from_arbitrary_string(time_str)
                self._time_floats.append(time_util.time_duration(self._ref_time, ts_datetime, self._time_units.upper()))
            else:
                self._time_floats.append(float(time_str))

    def _write_dataset(self) -> None:
        """Write the dataset to disk."""
        self._dataset_writer = DatasetWriter(name=self._names[0], ref_time=self._ref_time, time_units=self._time_units)
        for i in range(len(self._ts_values)):
            self._dataset_writer.append_timestep(self._time_floats[i], self._ts_values[i])
        self._dataset_writer.appending_finished()

    def _create_dataset_reader(self) -> DatasetReader:
        """Create and return a DatasetReader."""
        return DatasetReader(self._dataset_writer.h5_filename, dset_name=self._names[0])
