"""Class to write TUFLOWFV BC curves."""
# 1. Standard python modules
import logging

# 2. Third party modules
import numpy as np
import pandas as pd

# 3. Aquaveo modules

# 4. Local modules
from xms.tuflowfv.file_io import io_util


"""
Callbacks used with the pandas CSV reader.
"""


def parse_float(token):
    """Parse a string into a float.

    Args:
        token (str): The string to parse

    Returns:
        Union[float, None]: The parsed float or None if not a float
    """
    try:  # Try a float offset first
        return float(token)
    except ValueError:
        return None


def parse_isodate_seconds(token):
    """Parse a TUFLOWFV ISOTIME with second resolution into a pandas Timestamp.

    Args:
        token (str): The string to parse

    Returns:
        Union[pd.Timestamp, None]: The parsed datetime or None if format not valid
    """
    try:  # Try ISODATE with seconds (not a true ISODATE)
        return pd.to_datetime(token, format='%d/%m/%Y %H:%M:%S')
    except ValueError:
        return None


def parse_isodate_minutes(token):
    """Parse a TUFLOWFV ISOTIME with minute resolution into a pandas Timestamp.

    Args:
        token (str): The string to parse

    Returns:
        Union[pd.Timestamp, None]: The parsed datetime or None if format not valid
    """
    try:  # Try ISODATE without seconds (not a true ISODATE)
        return pd.to_datetime(token, format='%d/%m/%Y %H:%M')
    except ValueError:
        return None


def parse_isodate_hours(token):
    """Parse a TUFLOWFV ISOTIME with hour resolution into a pandas Timestamp.

    Args:
        token (str): The string to parse

    Returns:
        Union[pd.Timestamp, None]: The parsed datetime or None if format not valid
    """
    try:  # Try ISODATE without seconds (not a true ISODATE)
        return pd.to_datetime(token, format='%d/%m/%Y %H')
    except ValueError:
        return None


def parse_isodate_days(token):
    """Parse a TUFLOWFV ISOTIME with day resolution into a pandas Timestamp.

    Args:
        token (str): The string to parse

    Returns:
        Union[pd.Timestamp, None]: The parsed datetime or None if format not valid
    """
    try:  # Try ISODATE without seconds (not a true ISODATE)
        return pd.to_datetime(token, format='%d/%m/%Y')
    except ValueError:
        return None


def parse_tuflowfv_time(token, time_format=None):
    """Datetime parser for TUFLOWFV formatted datetime strings or float offsets.

    Args:
        token (str): The datetime/offset string to parse
        time_format (str): The strptime format to use if not standard TUFLOWFV format (GUI)

    Returns:
        Union[pd.Timestamp, float, None]: The parsed datetime, float offset, or None if unable to parse
    """
    # Custom absolute datetime format specified
    if time_format:
        try:
            return pd.to_datetime(token, format=time_format)
        except ValueError:
            pass
    # Try float offset first (most common for flood runs)
    value = parse_float(token)
    if value is not None:
        return value
    # Standard TUFLOWFV absolute formats
    value = parse_isodate_seconds(token)  # Try standard TUFLOWFV format with seconds resolution
    if value is not None:
        return value
    value = parse_isodate_minutes(token)  # Try TUFLOWFV format with seconds truncated
    if value is not None:
        return value
    value = parse_isodate_hours(token)  # Try TUFLOWFV format with minutes truncated
    if value is not None:
        return value
    value = parse_isodate_days(token)  # Try TUFLOWFV format with only date
    if value is not None:
        return value
    return None


class BcCsvReader:
    """Class to write TUFLOWFV BC curves."""

    def __init__(self, filename, default_columns, user_columns, x_is_time):
        """Constructor.

        Args:
            filename (str): CSV filename
            default_columns (Sequence[str]): Default column names in order as defined by TUFLOWFV
            user_columns (list[str]): User defined column names in same order as `default_columns`
            x_is_time (bool): True if the X column is time
        """
        self._logger = logging.getLogger('xms.tuflowfv')
        self._filename = filename
        self._default_columns = default_columns
        # TODO: The number of columns in the CSV change based on the different modules being used. As we support these
        #       new modules, the default column definition passed in will need to have the additional variables appended
        #       to the base default columns and the number of user columns should match. For now slice off any user
        #       defined columns for modules we do not support so we can still import what we know about.
        self._user_columns = user_columns[:len(self._default_columns)]
        self._file_columns = []
        self._x_is_time = x_is_time
        self._missing_columns = []  # Indices of columns that do not appear in the header
        self._time_parser = None
        self.uses_isodate = False

    def _find_exact_column_names(self):
        """Find the exact column names from user provided names.

        We need these because we need to be case insensitive with user provided column names. The 'usecols' kwarg to
        pandas.read_csv allows passing a callable, but 'parse_dates' kwarg and subsequent operations do not.

        Returns:
            list[str]: The column names exactly as they appear in the header (matches case). If a header is not found,
                that column will not be read from the file and will be filled with NaN in the curve. These values should
                be replaced by a BC default command.
        """
        self._logger.info('Finding user specified column names in the BC CSV header...')
        with open(self._filename, 'r') as f:
            line = f.readline()
        self._file_columns = line.rstrip('\r\n').split(',')
        upper_user_column_names = [column_name.upper() for column_name in self._user_columns]
        exact_column_names = []
        for idx, user_column in enumerate(upper_user_column_names):
            found = False
            for token in self._file_columns:
                # TUFLOWFV is case insensitive with header column names, also matches with/without trailing whitespace.
                upper_token = token.upper()
                if user_column in [upper_token, upper_token.strip()]:
                    exact_column_names.append(token)
                    found = True
                    break
            if not found:
                if idx == 0:  # Don't think the X column can be missing
                    raise ValueError(
                        f'{"Time" if self._x_is_time else "X"} column not found in the CSV header.\n'
                        f'CSV file: {io_util.logging_filename(self._filename)}\n'
                        f'{"Time" if self._x_is_time else "X"} column specified by user: {self._user_columns[0]}\n'
                        f'Columns in CSV header: {self._file_columns}\n'
                    )
                self._missing_columns.append(idx)
        return exact_column_names

    def _parse_tuflowfv_isodate(self, token):
        """TUFLOWFV datetime parser for the pandas CSV reader.

        Args:
            token (str): The datetime string to parse

        Returns:
            Union[pd.Timestamp, None]: The parsed datetime, or None if unable to parse
        """
        if self._time_parser is not None:  # Already determined the format
            return self._time_parser(token)

        value = parse_isodate_seconds(token)
        if value is not None:
            self._time_parser = parse_isodate_seconds
            return value
        value = parse_isodate_minutes(token)
        if value is not None:
            self._time_parser = parse_isodate_minutes
            return value
        value = parse_isodate_hours(token)
        if value is not None:
            self._time_parser = parse_isodate_hours
            return value
        value = parse_isodate_days(token)
        if value is not None:
            self._time_parser = parse_isodate_days
            return value
        return None

    def _uses_isodate(self, time_column):
        """Test the first row to see if the curve uses isotime or float offsets.

        Args:
            time_column (str): The time column name exactly as it appears in the CSV header
        """
        self._logger.info('Determining time format...')
        # Find out the index of the time column
        idx = 0
        for i, file_column in enumerate(self._file_columns):
            if file_column == time_column:
                idx = i
                break
        # Read the first line of data
        with open(self._filename, 'r') as f:
            f.readline()
            line = f.readline()
        # Try casting the time to a float to see if it is an offset
        first_time = line.split(',')[idx].strip()
        try:
            float(first_time)
            return False
        except ValueError:
            self.uses_isodate = True  # Store this flag so callers can access
            return True

    def _cleanup_dataframe(self, df, columns):
        """Cleanup the DataFrame curve read from the file.

        Args:
            df (pd.DataFrame): The curve DataFrame
            columns (list[str]): The columns read from the file (column labels in df)

        Returns:
            pd.DataFrame: The cleaned DataFrame
        """
        df = df[columns]  # Allow columns to be in any order in file
        # If there are any columns missing in the CSV, TUFLOWFV always the user to specify a BC default command to fill
        # in the column with a constant value.
        num_rows = len(df.index)
        for idx in self._missing_columns:
            df.insert(idx, self._default_columns[idx], np.full(num_rows, np.nan))
        df.index += 1  # Switch to 1-base rows
        df.columns = self._default_columns  # Replace column names with TUFLOWFV defaults
        # If X column is time and the dtype is not a datetime, convert them to floats.
        for idx, column in enumerate(df.columns):
            if idx == 0 and self._x_is_time and self.uses_isodate:
                continue  # If this is the x-column and x is time and we are using ISODATE format leave as pd.Timestamp
            df[column] = df[column].astype(np.float64)
        return df

    def read(self):
        """Read a TUFLOWFV BC curve from a CSV file.

        Returns:
            pd.DataFrame: The imported curve. Columns are named and ordered in TUFLOWFV default format. Skipped columns
                are not included.
        """
        try:
            columns = self._find_exact_column_names()
            time_column = [columns[0]] if self._x_is_time else False
            self._logger.info('Parsing BC CSV file...')
            if self._x_is_time and self._uses_isodate(columns[0]):
                df = pd.read_csv(self._filename, header=0, usecols=columns, parse_dates=time_column,
                                 date_parser=self._parse_tuflowfv_isodate)
            else:
                df = pd.read_csv(self._filename, header=0, usecols=columns, parse_dates=False)
            return self._cleanup_dataframe(df, columns)
        except Exception as e:
            self._logger.error(f'Errors reading CSV file: {str(e)}')
        return None
