"""This module is a reader for ASCII dataset .dat files."""

# 1. Standard Python modules
from datetime import datetime, timedelta
from itertools import islice
import logging
from pathlib import Path
from random import Random
import re
import uuid

# 2. Third party modules

# 3. Aquaveo modules
from xms.core.filesystem import filesystem

# 4. Local modules
from xms.datasets.dataset_reader import DatasetReader
from xms.datasets.dataset_writer import DatasetWriter

log = logging.getLogger('xms.datasets')
COLUMNS_IN_TIMESTEP_ROW = 3
TIMESTEP_ROW_REGEX = r'TS\s+[01]\s+([\d\.+e]*)\n'
repeatable_random: Random | None = None  # Can be set to Random() and used to create repeatable random numbers and uuids


# Copied from xmscoverage because we can't depend on xmscoverage
def _mfc_date_number_to_datetime(mfc_date_number: float) -> datetime:
    """Converts the mfc_date_number float to a datetime.

    The DATE type is implemented using an 8-byte floating-point number. Days are represented by whole number
    increments starting with 30 December 1899, midnight as time zero. Hour values are expressed as the absolute
    value of the fractional part of the number.
    """
    base_date = datetime(year=1899, month=12, day=30, hour=0)
    delta = timedelta(days=mfc_date_number)
    return base_date + delta


# Copied from xmsguipy because we can't depend on xmsguipy
def new_uuid() -> str:
    """Returns a uuid as a string.

    See RepeatableRandomness for how to get repeated uuids when testing.
    """
    if repeatable_random:
        return str(uuid.UUID(int=repeatable_random.getrandbits(128), version=4))
    else:
        return str(uuid.uuid4())


def parse_dat_header(file_path: Path | str) -> dict:
    """Parse the header of a .dat file and return as a dict.

    The first line of data is also parsed to determine the dimensionality of the data.

    Args:
        file_path: path to .dat file

    Returns:
        dict of card/value pairs from the header
    """
    with open(file_path, 'r') as f:
        text = f.read(1024)
    return _parse_header(text)


def _parse_header(text) -> dict:
    """Helper function for parsing .dat file headers.

    Args:
        text: the content of the first chunk of the file

    Returns:
        dict of card/value pairs from the header
    """
    matches = re.split(TIMESTEP_ROW_REGEX, text)
    if len(matches) < 3:
        raise ValueError('The file is not formatted correctly')
    header = [row.split() for row in matches[0].splitlines()]
    header = {row[0].upper(): (' '.join(row[1:]).strip('" ') if len(row) > 1 else '') for row in header}
    dimensionality = len(matches[2].splitlines()[0].strip().split())
    header['DIM'] = dimensionality
    return header


def parse_dat_file(file_path: Path | str, **kwargs) -> DatasetReader:
    """Parse a .dat file into a XMDF .h5 file and return a DatasetReader.

    Since .dat files don't include all the information that XMDF files do, you can use the optional kwargs to supply
    missing information. The kwargs are a subset of those from DatasetWriter. See DatasetWriter and the list below.
    The kwargs will be used instead of the data from the .dat file if both are present.

    Args:
        file_path: path to .dat file

    Keyword Args:
        h5_filename (Path | str): Path the .h5 file to write. If None, .h5 file will be created in temp dir.
        name (str): Name of the dataset (will be used to build the dataset's group path)
        dset_uuid (str): UUID of the dataset
        geom_uuid (str): UUID of the dataset's geometry
        ref_time (float | datetime): The dataset's reference time. Either a Julian float or a Python datetime
        time_units (str): The dataset's time units. One of: 'Seconds', 'Minutes', 'Hours', 'Days'
        units (str): Units of the dataset values
        location (str): Location of the dataset values. One of the XMDF_DATA_LOCATIONS keys. Note that this
                does not usually need to be set XMS is going to ignore it in most cases. XMS will try to determine the
                dataset location based on the geometries currently loaded (number of nodes, number of points). Here for
                historical reasons.
        overwrite (bool): If True and the file already exists, it will be overwritten

    Returns:
        DatasetReader.
    """
    header = parse_dat_header(file_path)
    h5_filename = str(kwargs.get('h5_filename', filesystem.temp_filename(suffix='.h5')))
    name = kwargs.get('name', header.get('NAME'))

    # Create the DatasetWriter
    dataset_writer = DatasetWriter(
        h5_filename=h5_filename,
        name=name,
        dset_uuid=kwargs.get('dset_uuid', str(new_uuid())),
        geom_uuid=kwargs.get('geom_uuid', ''),
        num_components=header['DIM'],
        ref_time=kwargs.get('ref_time', _get_reftime(header)),
        time_units=kwargs.get('time_units', header.get('TIMEUNITS', 'Days')),
        units=kwargs.get('units', ''),
        location=kwargs.get('location', 'points'),
        overwrite=kwargs.get('overwrite', True)
    )

    try:
        skip_rows = len(header) - 1
        nd = int(header['ND'])  # number of data values
        is_scalar = 'BEGSCL' in header
        times = []
        activity = []
        values = []
        activity_found = False

        with Path(file_path).open() as file:
            # Skip the header
            for _ in range(skip_rows):
                next(file)

            for line in file:
                words = line.split()
                if words and words[0] == 'ENDDS':
                    break
                times.append(float(words[2]))

                # Read or create activity
                if words[1] == '1':  # Activity precedes values
                    activity_found = True
                    lines = list(islice(file, nd))  # Read nd lines
                    _ensure(len(lines) == nd, 'Lines missing in file')
                    activity.append([int(line) for line in lines])
                else:
                    activity.append([1] * nd)  # Create activity even if none was provided

                # Read the values
                lines = list(islice(file, nd))  # Read nd lines
                _ensure(len(lines) == nd, 'Lines missing in file')
                if is_scalar:
                    values.append([float(line) for line in lines])
                else:
                    values.append([[float(word) for word in line.split()] for line in lines])

        if not activity_found:
            activity = None

        # Append the timesteps
        for index, time in enumerate(times):
            dataset_writer.append_timestep(time, values[index], activity[index] if activity else None)
        dataset_writer.appending_finished()

        # Return a DatasetReader
        return DatasetReader(h5_filename, name)
    except Exception as error:
        raise error


def _get_reftime(header: dict) -> float:
    """Return the reftime float."""
    ref_time = None
    if 'REFTIME' in header:
        ref_time = _mfc_date_number_to_datetime(float(header['REFTIME']))
    elif 'RT_JULIAN' in header:
        ref_time = float(header['RT_JULIAN'])
    return ref_time


def _ensure(condition, msg: str) -> None:
    """Helper function to avoid many nested if statements, and to raise an exception with 1 line instead of 2."""
    if not condition:
        raise RuntimeError(msg)
