"""Class for reading XMDF data set files."""

# 1. Standard Python modules
import datetime
import os
from typing import Optional, Union
import uuid

# 2. Third party modules
import h5py
import numpy as np

# 3. Aquaveo modules
from xms.core.time import julian_to_datetime

# 4. Local modules
from xms.datasets.dataset_io import DSET_NULL_VALUE
from xms.datasets.dataset_metadata import XMDF_DATA_LOCATIONS

MULTI_DATASETS_GROUP = 'Datasets'
# A placeholder indicating the dataset contained a reftime, but it was invalid (e.g. because it was in BCE).
INVALID_REFTIME = datetime.datetime(year=1, month=1, day=1)


class DatasetReader:
    """Class for reading XMDF data set files."""
    def __init__(self, h5_filename: str, dset_name: str = None, group_path: str = None) -> None:
        """Constructor.

        Args:
            h5_filename (:obj:`str`): Path to the H5 file to write
            dset_name (:obj:`str`): Name of the dataset. If provided, H5 group path to the dataset is assumed to be the
                standard: 'Datasets/<dset_name>'
            group_path (:obj:`str`): H5 group path to the dataset
        """
        if not os.path.isfile(h5_filename):
            raise ValueError(f'Dataset file not found: {h5_filename}')
        if (not dset_name and not group_path) or (dset_name and group_path):
            raise ValueError('Must provide either the dataset name or the H5 group path to the dataset.')

        self.geom_uuid = ''
        self.uuid = ''
        self.ref_time = None  # None if reference time not defined, INVALID_REFTIME if it was defined but invalid.
        self.null_value = None  # None if not using a null value
        self.time_units = ''
        self.h5_filename = h5_filename
        self.group_path = group_path if group_path else f'Datasets/{dset_name}'
        self.location = 'points'
        self.active_timestep = -1
        self._h5file = None  # File handle to the H5 file
        self._values_dset = None
        self._activity_dset = None
        self._min_dset = None
        self._max_dset = None
        self._times_dset = None
        self._load_metadata()  # Populate attributes from file

        # If point data with cell activity set this. Must define calc(np.ndarray) that returns a np.ndarray activity
        # mask that matches the number of data values. See CellToPointActivityCalculator class in xmsconstraint.
        self.activity_calculator = None

    def _requires_activity_calculator(self):
        """Returns True if there is a mismatch in the number of data values and activity values."""
        if self.num_activity_values is not None and self.num_activity_values != self.num_values:
            return True
        return False

    def _load_metadata(self) -> None:
        """Load dataset metadata from the file."""
        self._find_geom_uuid()
        self._read_active_timestep()
        self._load_dataset_attrs()
        self._load_dataset_properties()

    def _read_active_timestep(self):
        """Load the active timestep index from the file, if it exists."""
        dset_path = f'{self.group_path}/Active Function'
        if dset_path in self.h5file:
            self.active_timestep = self.h5file[dset_path][0].item()

    def _geom_uuid_group(self):
        """Find the geometry uuid group in the h5 file.

        Returns:
            (:obj:`str`) : the path to this group
        """
        # Find the multi-datasets group
        geom_guid_group = ''
        multi_group = os.path.dirname(self.group_path)
        while multi_group:
            grouptype = self.h5file[multi_group].attrs.get('Grouptype')
            if grouptype and grouptype.item().decode() == 'MULTI DATASETS':
                break
            multi_group = os.path.dirname(multi_group)
        if multi_group:  # Standard XMDF places the geom UUID under the multi-datasets group.
            guid_group = f'{multi_group}/Guid'
            if guid_group in self.h5file:  # At the MultiDatasets group level, where it should be
                geom_guid_group = guid_group

        if not geom_guid_group:
            # Check for it in the geometry PROPERTIES group. Some files have it here if the datasets
            # are in the same file as the geometry.
            guid_group = f'{os.path.dirname(multi_group)}/PROPERTIES/GUID'
            if guid_group in self.h5file:
                geom_guid_group = guid_group
        return geom_guid_group

    def _find_geom_uuid(self):
        """Look for the dataset geometry UUID."""
        geom_guid_group = self._geom_uuid_group()
        if geom_guid_group in self.h5file:  # At the MultiDatasets group level, where it should be
            self.geom_uuid = self.h5file[geom_guid_group][0].decode()

    def _load_dataset_attrs(self):
        """Load attributes on the dataset group."""
        # Load the reference time, if it exists.
        dset = self.h5file[self.group_path]
        if 'Reftime' in dset.attrs:
            ref_time = dset.attrs['Reftime'].item()
            self.ref_time = julian_to_datetime(ref_time)
            if self.ref_time is None and ref_time != DSET_NULL_VALUE:
                # Python's datetime object only supports dates down to year 1, but some outside code writes BC dates.
                # DSET_NULL_VALUE is before year 1 and equivalent to no reftime, but other values are errors. We raise
                # an exception here so higher-level code can report the problem to the user to fix it.
                raise ValueError('ref time is out of range')
        # Load time units.
        if 'TimeUnits' in dset.attrs:
            self.time_units = str(dset.attrs['TimeUnits'][0].decode())
        self.location = 'points'
        key = ''
        if 'DatasetLocation' in dset.attrs:
            key = 'DatasetLocation'
        elif 'DatasetLocationI' in dset.attrs:
            key = 'DatasetLocationI'
        if key:
            int_loc = dset.attrs[key].item()
            try:
                self.location = next(key for key, value in XMDF_DATA_LOCATIONS.items() if value == int_loc)
            except StopIteration:
                pass

    def _dataset_uuid_group(self):
        """Gets the path to the data set uuid.

        Returns:
            (:obj:`str`): see description
        """
        # PROPERTIES group
        prop_group = f'{self.group_path}/PROPERTIES'
        # Load the dataset UUID, if it exists.
        return f'{prop_group}/GUID'

    def _load_dataset_properties(self):
        """Load dataset attributes from the PROPERTIES group."""
        # PROPERTIES group
        prop_group = f'{self.group_path}/PROPERTIES'
        # Load the dataset UUID, if it exists.
        dset_uuid_prop = self._dataset_uuid_group()
        if dset_uuid_prop in self.h5file:
            self.uuid = self.h5file[dset_uuid_prop][0].decode()
        else:
            dset_uuid_prop = f'{self.group_path}/GUID'
            if dset_uuid_prop in self.h5file:
                self.uuid = self.h5file[dset_uuid_prop][0].decode()

        dset_prop_null = f'{prop_group}/nullvalue'
        if dset_prop_null in self.h5file:
            self.null_value = self.h5file[dset_prop_null][0].item()

    @property
    def h5file(self) -> h5py.File:
        """File handle to the H5 file."""
        if self._h5file is None:
            self._h5file = _H5FileLatin1(self.h5_filename, 'r')
        return self._h5file

    @property
    def name(self) -> str:
        """Returns the name of the dataset."""
        return os.path.basename(self.group_path)

    @property
    def values(self) -> h5py.Dataset:
        """h5py.Dataset of the XMDF Dataset values."""
        if self._values_dset is None:
            self._values_dset = self.h5file[f'{self.group_path}/Values']
        return self._values_dset

    @property
    def activity(self) -> h5py.Dataset:
        """h5py.Dataset of the XMDF Dataset activity array."""
        if self._activity_dset is None:
            activity_group = f'{self.group_path}/Active'
            if activity_group in self.h5file:
                self._activity_dset = self.h5file[activity_group]
        return self._activity_dset

    @property
    def times(self) -> h5py.Dataset:
        """h5py.Dataset of the XMDF Dataset times array."""
        if self._times_dset is None:
            self._times_dset = self.h5file[f'{self.group_path}/Times']
        return self._times_dset

    @property
    def mins(self) -> h5py.Dataset:
        """h5py.Dataset of the XMDF Dataset minimum value array."""
        if self._min_dset is None:
            self._min_dset = self.h5file[f'{self.group_path}/Mins']
        return self._min_dset

    @property
    def maxs(self) -> h5py.Dataset:
        """h5py.Dataset of the XMDF Dataset maximum value array."""
        if self._max_dset is None:
            self._max_dset = self.h5file[f'{self.group_path}/Maxs']
        return self._max_dset

    @property
    def num_times(self):
        """Returns the number of timesteps in the dataset."""
        return len(self.times)

    @property
    def num_values(self) -> int:
        """Get the number of dataset values.

        Returns:
            (:obj:`int`): The number of the dataset values
        """
        num_values = 0
        if len(self.values.shape) > 1:  # Has a timestep with something in it
            num_values = self.values.shape[1]
        return num_values

    @property
    def num_activity_values(self) -> Union[int, None]:
        """Get the number of activity values.

        Datasets may or may not have activity arrays. If a dataset has an activity array, it may or may not match
        the number of dataset values (e.g. node-based dataset with cell-based activity).

        Returns:
            (:obj:`int`): The number activity values in the dataset or None if no activity array is present.
        """
        num_active = None
        if self.activity is not None and len(self.activity) > 0:
            # Get the size of the first timestep's activity array. Assuming all timesteps have uniform
            # activity array sizes.
            num_active = len(self.activity[0])
        return num_active

    @property
    def num_components(self) -> int:
        """Get the number of dataset dimensions (e.g. scalar=1, 2D vector=2).

        Returns:
            (:obj:`int`): The number of the dataset components
        """
        num_comps = 1
        if len(self.values.shape) > 2:  # Vector dataset, get number of components from last dimension
            num_comps = self.values.shape[-1]
        return num_comps

    def timestep_offset(self, ts_idx):
        """Get a timestep's offset from the dataset zero time or reference time if defined.

        Args:
            ts_idx (:obj:`int`): 0-based index of the timestep to get offset for

        Returns:
            (:obj:`Union[None, datetime.timedelta]`): See description. None if ts_idx out of range or no time units
            defined for the dataset.
        """
        if abs(ts_idx) > self.num_times:  # Allow negative indices
            return None

        if not self.time_units or self.time_units == 'None':
            return None  # No time units defined, have to access the times dataset directly.

        # Lowercase versions of XMDF time units strings match timedelta constructor kwarg names.
        time_units = self.time_units.lower()
        time_value = self.times[ts_idx].item()
        if time_units == 'years':
            time_units = 'days'
            time_value = time_value * 365.25
        return datetime.timedelta(**{time_units: time_value})

    def timestep_with_activity(self, tsidx, nan_null_values=True, nan_activity=False):
        """Replace inactive/null values with numpy nan.

        Args:
            tsidx (:obj:`int`): Index of the desired timestep
            nan_null_values (:obj:`bool`): If True and the dataset has a defined null value, null values will be
                replaced with nan
            nan_activity (:obj:`bool`): If True, the dataset doesn't have a null value but has an activity array,
                dataset values that are made inactive by the activity array will be replaced with nan.

        Returns:
            (:obj:`tuple(numpy.ndarray, numpy.ndarray)`): The dataset timestep with activity applied, and the activity
            array for this timestep (or None if not present).
        """
        activity_array = None
        # There may be a mismatch in the activity and data locations, so we need to make a local mask for indexing
        # into the data array if we are nulling out inactive locations.
        nanactivity_array = None
        timestep = self.values[tsidx]
        if self.null_value is not None:
            if nan_null_values:
                timestep[timestep == self.null_value] = np.nan
        elif self.num_activity_values == self.num_values:
            activity_array = self.activity[tsidx]
            nanactivity_array = activity_array
        elif self._requires_activity_calculator():
            if self.activity_calculator is None:
                raise RuntimeError('Must define activity calculator if activity array does not match number of values.')
            nanactivity_array = self.activity_calculator.calc(self.activity[tsidx])
            activity_array = self.activity[tsidx]
        if nanactivity_array is not None and nan_activity:
            timestep[nanactivity_array == 0] = np.nan
        return timestep, activity_array

    def duplicate_to_new_geometry(
        self,
        new_geom_uuid: str,
        new_file_name: str,
        dataset_name: Optional[str] = None,
        dataset_uuid: Optional[str] = None
    ):
        """Copies the data set to new_file_name and sets new uuid for the geometry and a new uuid for the data set.

        Args:
            new_geom_uuid (:obj:`str`): uuid of the new geometry
            new_file_name (:obj:`str`): the file name
            dataset_name (:obj:`str`): optional name of dataset (can be nested path).
            dataset_uuid (:obj:`str`): optional name of dataset (can be nested path).

        Returns:
            (:obj:`DatasetReader`): a new reader of the new data set file
        """
        if dataset_name is None:
            dataset_name = self.name
        with _H5FileLatin1(new_file_name, 'w') as f:
            ds_path = f'Datasets/{dataset_name}'
            grp_datasets = f.create_group('Datasets')
            multi_datasets = 'MULTI DATASETS'
            ascii_list = [multi_datasets.encode("ascii", "ignore")]
            grp_datasets.attrs.create('Grouptype', data=ascii_list, shape=(1, ), dtype='S15')
            self.h5file.copy(self._geom_uuid_group(), f, name='Datasets/Guid')
            f['Datasets/Guid'][0] = new_geom_uuid
            self.h5file.copy(self.group_path, f, name=ds_path)
            self.h5file.copy('File Type', f)
            self.h5file.copy('File Version', f)
            ds_uuid_group = f'Datasets/{dataset_name}/PROPERTIES/GUID'
            if ds_uuid_group in f:
                if dataset_uuid is None:
                    dataset_uuid = str(uuid.uuid4())
                f[ds_uuid_group][0] = dataset_uuid
        return DatasetReader(new_file_name, group_path=ds_path)

    @staticmethod
    def get_reasonable_null_value():
        """Returns the reasonable null value constant from dataset_io."""
        return DSET_NULL_VALUE


class _H5FileLatin1(h5py.File):
    """H5py file class that allows latin-1 encoded strings to be used as keys."""
    def __init__(self, *args, **kwargs):
        """Constructor.

        Args:
            *args: The args to pass to the superclass constructor.
            **kwargs: The kwargs to pass to the superclass constructor.
        """
        super().__init__(*args, **kwargs)

    def __getitem__(self, item):
        """Get an item from the file.

        Args:
            item: The item to get from the file.

        Returns:
            The item from the file.
        """
        if isinstance(item, str):
            item = item.encode('latin-1')
        return super().__getitem__(item)

    # def __setitem__(self, key, value):
    #     """Set an item in the file.
    #
    #     Args:
    #         key: The key to set.
    #         value: The value to set.
    #     """
    #     if isinstance(key, str):
    #         key = key.encode('latin-1')
    #     super().__setitem__(key, value)
