"""This module is a group of utility functions for dealing with files."""

__copyright__ = "(C) Copyright Aquaveo 2020"
__license__ = "All rights reserved"

# 1. Standard Python modules
import os
from pathlib import Path
import shutil

# 2. Third party modules
import h5py
import numpy as np

# 3. Aquaveo modules
from xms.api.dmi import XmsEnvironment as XmEnv
from xms.core.filesystem import filesystem

# 4. Local modules


def paths_are_equal(path1, path2):
    """Returns true if the two filesystem paths are equal.

    Args:
        path1 (str): The first path.
        path2 (str): The second path.

    Returns:
        bool: True if both paths exists and are equivalent.

    """
    if not os.path.exists(path1) or not os.path.exists(path2):
        return False
    return os.path.samefile(path1, path2)


def copyfile(src: str | Path, dest: str | Path):
    """Copy a file, ignoring shutil.SameFileError.

    Args:
        src: Source file path.
        dest: Destination file path.
    """
    try:
        shutil.copyfile(src, dest)
    except shutil.SameFileError:
        pass


def remove(file):
    """Delete a file, ignoring any system errors.

    Args:
        file (str): File path to delete.

    """
    try:
        os.remove(file)
    except Exception:
        pass


def delete_h5_groups(filename, groups):
    """Delete a group or dataset from an H5 or NetCDF file.

    Need to use H5 calls because the xarray and Python NetCDF libraries do not provide
    a convenient enough way to overwrite existing datasets in a file without overwriting
    the entire file.

    Args:
        filename (str): File path to the H5/NetCDF file
        groups (list): List of paths in the file to remove

    """
    with h5py.File(filename, 'a') as f:
        for group in groups:
            try:
                del f[group]
            except Exception:
                pass  # Try to clean up the other groups


def convert_to_relative(path, base_path):
    """Convert an absolute path to relative from a base path.

    I am currently letting this sucker throw because I have code using it that reports errors.

    Args:
        path: The absolute path to convert.
        base_path: The base path to make the absolute path relative to

    Returns:
        (str): The absolute path converted to relative from the base path.

    """
    if Path(base_path).is_file():
        base_path = os.path.dirname(base_path)
    return os.path.relpath(path, base_path)


def comp_uuid_from_mainfile_path(mainfile_path):
    """Get a component's UUID from the absolute path to its mainfile."""
    return os.path.basename(os.path.dirname(mainfile_path))


def does_file_exist(file, proj_dir):
    """Determine if a file in our persistent data still exist.

    If file is not absolute, will check if relative from the project directory exists.

    Args:
       file (str): Relative or absolute file path to check the existence of
       proj_dir (str): Project path to resolve relative paths to

    Returns:
        (bool): True if the file exists

    """
    try:
        if not os.path.isabs(file):  # Convert relative to absolute
            file = filesystem.resolve_relative_path(proj_dir, file)
        return os.path.exists(file)
    except Exception:
        return False


def compare_text_files(base_path, out_path):
    """
    Compare two files as text, ignoring differences in line ending.

    Args:
        base_path: Path to the base file.
        out_path: Path to the out file.

    Returns:
        Whether the files are the same.
    """
    with open(base_path) as base_file, open(out_path) as out_file:
        for base_line, out_line in zip(base_file, out_file, strict=True):
            assert base_line == out_line


def compare_h5_files(output_file, base_file, to_ignore) -> None:
    """
    Compare two HDF5 files to see if they are the same.

    If they are different, an assertion error is raised. Otherwise, nothing happens.

    Args:
        output_file: The output file.
        base_file: The base file.
        to_ignore: List of groups to ignore. Any group with a name in this list will be skipped. These are not full
            paths; putting `GUID` in here will skip `/GUID`, `/group/GUID` and `/group/subgroup/GUID`. Putting
            `/group/subgroup/GUID` will have no effect since a group with that name isn't allowed.

    Returns:
        Whether the files were the same, ignoring things in to_ignore.
    """
    with h5py.File(output_file, 'r') as f1:
        with h5py.File(base_file, 'r') as f2:
            implicit_ignore = [
                'dim_0',
                'DIMENSION_LIST',
                'REFERENCE_LIST',
                '_Netcdf4Dimid',
                '_NCProperties',
                '_Netcdf4Coordinates',
                '_nc3_strict',
                'CLASS',
                'NAME',
                '_FillValue',
            ]
            ignore = to_ignore
            ignore.extend(implicit_ignore)
            match, error = _compare_h5_groups('/', f1, f2, ignore)
            if not match:
                raise AssertionError(error)


def _compare_h5_groups(path, group_out, group_base, to_ignore) -> tuple[bool, str]:
    """Compares two HDF5 groups. It will recurse into subgroups.

    Returns:
        If the files are equal, returns a tuple of (True, ''). Otherwise, returns a tuple of (False, message) where
        message is some error message.
    """
    g_base = set(group_base.keys())
    g_out = set(group_out.keys())
    # Anything in exactly one group, minus ignored things.
    g_bad = (g_base ^ g_out) - set(to_ignore)

    if g_bad:
        return False, f'{path}: Groups in only one file: {g_bad!r}'

    for sub_group in g_base:
        sub_path = f'{path}{sub_group}/'
        # pandas uses 'index' while xarray uses 'dim_0', both work equally well,
        # and the values in them are not relevant to our tests.
        # Check to make sure at least one of them is present.
        if sub_group == 'index' or sub_group == 'dim_0':
            if 'index' in group_out:
                sub_group_out = group_out['index']
            elif 'dim_0' in group_out:
                sub_group_out = group_out['dim_0']
            else:
                sub_group_out = None
                error = f'{sub_path}: Dataset missing index.'
                return False, error
            continue
        if sub_group not in group_out:
            error = f'{sub_path}: Dataset missing.'
            return False, error
        is_dset_out = isinstance(group_out[sub_group], h5py.Dataset)
        is_dset_base = isinstance(group_base[sub_group], h5py.Dataset)
        sub_group_out = group_out[sub_group]
        sub_group_base = group_base[sub_group]

        if sub_group in to_ignore:
            continue

        # check the attributes
        a_base = set(sub_group_base.attrs.keys())
        a_out = set(sub_group_out.attrs.keys())
        a_bad = (a_base ^ a_out) - set(to_ignore)
        if a_bad:
            return False, f'{sub_path}: Attributes in only one file: {a_bad}'

        for att in a_base:
            # print(f'att: {att}')
            if att in to_ignore:
                continue
            attr_out = sub_group_out.attrs[att]
            attr_base = sub_group_base.attrs[att]
            if not np.array_equal(attr_out, attr_base):
                error = f'{sub_path}: Attribute value mismatch ({att}))'
                return False, error
            if sub_group_out.attrs[att]:
                type_out = str(type(sub_group_out.attrs[att][0]))
            else:
                type_out = None
            if sub_group_base.attrs[att]:
                type_base = str(type(sub_group_base.attrs[att][0]))
            else:
                type_base = None
            if type_out != type_base:
                error = f'{sub_path}: Attribute type mismatch ({type_out} != {type_base})'
                return False, error

        if is_dset_out and is_dset_base:
            if sub_group_out.shape != sub_group_base.shape:
                error = f'{sub_path}: Dataset shape mismatch.'
                return False, error
            if sub_group_out.dtype != sub_group_base.dtype:
                error = f'{sub_path}: Dataset type mismatch.'
                return False, error
            array_out = sub_group_out[()]
            array_base = sub_group_base[()]
            if array_base.dtype == array_out.dtype and array_base.dtype == float:
                # Floats can be NaN, which compares unequal to itself, so we have to pass equal_nan for them.
                equal = np.array_equal(array_base, array_out, equal_nan=True)
            else:
                # Passing equal_nan on integers and strings crashes, so we can't pass equal_nan for them.
                # If the types mismatch, NumPy should be able to tell us they're unequal whether there are NaNs or not.
                equal = np.array_equal(array_base, array_out)
            if not equal:
                error = f'{sub_path}: Dataset value mismatch.'
                return False, error
        elif not is_dset_out and not is_dset_base:
            # recurse
            result = _compare_h5_groups(sub_path, sub_group_out, sub_group_base, to_ignore)
            if not result[0]:
                return result
        else:
            error = f'{sub_path}: Dataset/group mismatch.'
            return False, error
    return True, ""


def logging_filename(abs_path):
    """Get the filename for logging (basename if testing).

    Args:
        abs_path (str): Aboslute path to the file to log

    Returns:
        str: abs_path or base_name of abs_path if testing
    """
    if XmEnv.xms_environ_running_tests() == 'TRUE':
        return os.path.basename(abs_path)
    return abs_path
