"""This module is a group of utility functions for dealing with files."""

__copyright__ = "(C) Copyright Aquaveo 2025"
__license__ = "All rights reserved"

# 1. Standard Python modules
import filecmp
import os
from pathlib import Path
import shutil
import subprocess
import sys
import time

# 2. Third party modules
import h5py
import numpy
import xarray as xr


# 3. Aquaveo modules

# 4. Local modules


def run_executable_with_tee(arguments, file1_path, file2_path, child_process_file=None) -> int:
    """Runs an executable using subprocess and simultaneously writes the command's output to two files.

    Args:
        arguments: A list representing the command and its arguments to be run by the subprocess.
        file1_path: The file path where the first copy of the command's output should be written.
        file2_path: The file path where the second copy of the command's output should be written.
        child_process_file: Optional file to track the child process ID.

    Returns:
        The process return code.
    """
    return_code = -1
    try:
        with open(file1_path, 'w') as stdout, open(file2_path, 'w') as dest_file:
            # Start the process
            process = subprocess.Popen(arguments, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

            # Write child process ID if needed
            if child_process_file:
                with open(child_process_file, 'w') as f:
                    f.write(f'{process.pid}\n')
            while True:
                output = process.stdout.readline()
                poll = process.poll()
                if output:
                    # Write to both files
                    stdout.write(output)
                    dest_file.write(output)
                    stdout.flush()
                    dest_file.flush()
                elif poll is not None:
                    break
                time.sleep(0.1)
            process.wait()  # Ensure process completes properly
            return_code = process.returncode
    except FileNotFoundError as e:
        print(f'Executable not found: {e}')
    except Exception as e:
        print(f'Something went wrong: {e}')
    return return_code


def copyfile(src: str | Path, dest: str | Path):
    """Copy a file, ignoring shutil.SameFileError.

    Args:
        src: Source file path.
        dest: Destination file path.
    """
    try:
        shutil.copyfile(src, dest)
    except shutil.SameFileError:
        pass


def remove(file: Path | str):
    """Delete a file, ignoring any system errors.

    Args:
        file: File path to delete.

    """
    try:
        os.remove(file)
    except Exception:
        pass


def compare_h5_files(output_file: Path | str, base_file: Path | str, to_ignore):
    """Compare two HDF5 files to see if they are the same.

    Args:
        output_file: The new file to be tested.
        base_file: The file to test against.
        to_ignore (list): A list of datasets and attributes to not compare.

    Returns:
        True if the files match, and a message.
    """
    with h5py.File(output_file, 'r') as f1:
        with h5py.File(base_file, 'r') as f2:
            try:
                implicit_ignore = [
                    'dim_0', 'DIMENSION_LIST', 'REFERENCE_LIST', '_Netcdf4Dimid', '_NCProperties',
                    '_Netcdf4Coordinates', '_nc3_strict', 'CLASS', 'NAME', '_FillValue'
                ]
                ignore = to_ignore
                ignore.extend(implicit_ignore)
                result = _compare_h5_groups('/', f1, f2, ignore)
                message = f'{result[1]}\n{output_file}\n{base_file}\n'
                return result[0], message
            except Exception as e:
                return False, f'compare_h5_files exception: {str(e)}'


def _compare_h5_groups(path, group_out, group_base, to_ignore):
    """Compares two HDF5 groups. It will recurse into subgroups.
    """
    # print(f'path: {path}')
    g_base = set(group_base.keys())  # & g_out

    for sub_group in g_base:
        sub_path = f'{path}{sub_group}/'
        # print(f'path: {sub_path}')

        # pandas uses 'index' while xarray uses 'dim_0', both work equally well,
        # and the values in them are not relevant to our tests.
        # Check to make sure at least one of them is present.
        if sub_group == 'index' or sub_group == 'dim_0':
            if 'index' in group_out:
                sub_group_out = group_out['index']
            elif 'dim_0' in group_out:
                sub_group_out = group_out['dim_0']
            else:
                sub_group_out = None
                error = f'{sub_path}: Dataset missing index.'
                return False, error
            continue
        if sub_group not in group_out:
            error = f'{sub_path}: Dataset missing.'
            return False, error
        is_dset_out = isinstance(group_out[sub_group], h5py.Dataset)
        is_dset_base = isinstance(group_base[sub_group], h5py.Dataset)
        sub_group_out = group_out[sub_group]
        sub_group_base = group_base[sub_group]

        if sub_group in to_ignore:
            continue

        # check the attributes
        a_set_base = set(sub_group_base.attrs.keys())
        for att in a_set_base:
            # print(f'att: {att}')
            if att in to_ignore:
                continue
            attr_out = sub_group_out.attrs[att]
            attr_base = sub_group_base.attrs[att]
            if attr_out != attr_base:
                error = f'{sub_path}: Attribute value mismatch ({att}))'
                return False, error
            if sub_group_out.attrs[att]:
                type_out = str(type(sub_group_out.attrs[att][0]))
            else:
                type_out = None
            if sub_group_base.attrs[att]:
                type_base = str(type(sub_group_base.attrs[att][0]))
            else:
                type_base = None
            if type_out != type_base:
                error = f'{sub_path}: Attribute type mismatch ({type_out} != {type_base})'
                return False, error

        if is_dset_out and is_dset_base:
            if sub_group_out.shape != sub_group_base.shape:
                error = f'{sub_path}: Dataset shape mismatch.'
                return False, error
            if sub_group_out.dtype != sub_group_base.dtype:
                error = f'{sub_path}: Dataset type mismatch.'
                return False, error
            array_out = sub_group_out[()]
            array_base = sub_group_base[()]
            if numpy.issubdtype(sub_group_out.dtype, numpy.floating):
                equal = True
                for out, base in zip(array_out, array_base):
                    if numpy.isscalar(out):
                        if numpy.isnan(out) and numpy.isnan(base):
                            continue
                        if out != base:
                            equal = False
                            continue
                    else:
                        equal = equal and (array_out == array_base).all()
            else:
                equal = (array_out == array_base).all()
            if not equal:
                error = f'{sub_path}: Dataset value mismatch.'
                return False, error
        elif not is_dset_out and not is_dset_base:
            # recurse
            result = _compare_h5_groups(sub_path, sub_group_out, sub_group_base, to_ignore)
            if not result[0]:
                return result
        else:
            error = f'{sub_path}: Dataset/group mismatch.'
            return False, error
    return True, ""


def ensure_empty_directory(directory: Path):
    """
    Creates an empty directory at the specified path.

    If the directory already exists, it will be removed and
     recreated as an empty directory.

    Args:
        directory (Path): The path of the directory to create.
    """
    if directory.exists():
        shutil.rmtree(directory)
    directory.mkdir(parents=True, exist_ok=True)


def create_netcdf_file(
        filename: Path | str, file_type: str | None, version: str | None, dataset: xr.Dataset | None = None
):
    """
    Writes it to a NetCDF file with 'FILE_TYPE' and 'VERSION' attributes.

    Args:
        filename: The name of the NetCDF file to create.
        file_type: The value for the 'FILE_TYPE' attribute.
        version: The value for the 'VERSION' attribute.
        dataset: The data to write to the file.
    """
    info = xr.Dataset()

    # Add global attributes
    if file_type:
        info.attrs['FILE_TYPE'] = file_type
    if version:
        info.attrs['VERSION'] = version

    # Write the dataset to a NetCDF file
    info.to_netcdf(filename, group='info', mode='w')
    if dataset is not None:
        dataset.to_netcdf(filename, mode='a')


def _is_pycharm_available() -> bool:
    """Check if PyCharm diff tool is available."""
    try:
        pycharm_paths = [
            "pycharm",
            "pycharm.exe",
            r"C:\Program Files\JetBrains\PyCharm Community Edition*\bin\pycharm.exe",
            r"C:\Program Files\JetBrains\PyCharm Professional*\bin\pycharm.exe",
            "/Applications/PyCharm CE.app/Contents/bin/pycharm",
            "/Applications/PyCharm.app/Contents/bin/pycharm"
        ]

        for path in pycharm_paths:
            try:
                subprocess.run([path, "--help"], capture_output=True, timeout=5)
                return True
            except (subprocess.TimeoutExpired, subprocess.CalledProcessError, OSError):
                continue
        return False
    except (subprocess.TimeoutExpired, subprocess.CalledProcessError, OSError):
        return False


def _is_kdiff3_available() -> bool:
    """Check if KDiff3 is available."""
    try:
        subprocess.run(["kdiff3", "--help"], capture_output=True, timeout=5)
        return True
    except (subprocess.TimeoutExpired, subprocess.CalledProcessError, OSError):
        return False


def _is_vscode_available() -> bool:
    """Check if Visual Studio Code is available."""
    try:
        vscode_paths = [
            "code",
            "code.exe",
            r"C:\Program Files\Microsoft VS Code\bin\code.cmd",
            r"C:\Program Files (x86)\Microsoft VS Code\bin\code.cmd",
            "/Applications/Visual Studio Code.app/Contents/Resources/app/bin/code",
            "/usr/local/bin/code"
        ]

        for path in vscode_paths:
            try:
                subprocess.run([path, "--version"], capture_output=True, timeout=5)
                return True
            except (subprocess.TimeoutExpired, subprocess.CalledProcessError, OSError):
                continue
        return False
    except (subprocess.TimeoutExpired, subprocess.CalledProcessError, OSError):
        return False


def _launch_pycharm_diff(baseline_dir: Path, output_dir: Path):
    """Launch PyCharm diff for directory comparison."""
    pycharm_cmd = os.environ.get('PYCHARM_PATH', 'pycharm')

    try:
        subprocess.run([
            pycharm_cmd,
            "diff",
            str(baseline_dir),
            str(output_dir)
        ], check=True)
    except subprocess.CalledProcessError as e:
        print(f"Failed to launch PyCharm diff: {e}")


def _launch_kdiff3_diff(baseline_dir: Path, output_dir: Path):
    """Launch KDiff3 for directory comparison."""
    try:
        subprocess.run([
            "kdiff3",
            str(baseline_dir),
            str(output_dir)
        ], check=True)
    except subprocess.CalledProcessError as e:
        print(f"Failed to launch KDiff3: {e}")


def _launch_vscode_diff(baseline_dir: Path, output_dir: Path):
    """Launch Visual Studio Code diff for directory comparison."""
    vscode_cmd = os.environ.get('VSCODE_PATH', 'code')

    try:
        subprocess.run([
            vscode_cmd,
            "--diff",
            str(baseline_dir),
            str(output_dir)
        ], check=True)
    except subprocess.CalledProcessError as e:
        print(f"Failed to launch VS Code diff: {e}")


def _launch_custom_diff_tool(diff_tool_path: str, baseline_dir: Path, output_dir: Path):
    """Launch custom diff tool for directory comparison.

    Args:
        diff_tool_path: Path to the custom diff tool executable
        baseline_dir: Path to baseline directory
        output_dir: Path to output directory
    """
    try:
        subprocess.run([
            diff_tool_path,
            str(baseline_dir),
            str(output_dir)
        ], check=True)
    except subprocess.CalledProcessError as e:
        print(f"Failed to launch custom diff tool: {e}")
    except FileNotFoundError:
        print(f"Custom diff tool not found at: {diff_tool_path}")


def _launch_custom_file_diff(diff_tool_path: str, baseline_file: Path, output_file: Path):
    """Launch custom diff tool for file comparison.

    Args:
        diff_tool_path: Path to the custom diff tool executable
        baseline_file: Path to baseline file
        output_file: Path to output file
    """
    try:
        subprocess.run([
            diff_tool_path,
            str(baseline_file),
            str(output_file)
        ], check=True)
    except subprocess.CalledProcessError as e:
        print(f"Failed to launch custom diff tool: {e}")
    except FileNotFoundError:
        print(f"Custom diff tool not found at: {diff_tool_path}")


def launch_diff_tool(baseline_dir: Path, output_dir: Path):
    """Launch external diff tool to compare directories.

    Args:
        baseline_dir: Path to baseline directory
        output_dir: Path to output directory
    """
    diff_tool = os.environ.get('DIFF_TOOL', 'auto')
    diff_tool_path = os.environ.get('DIFF_TOOL_PATH')

    try:
        # If a custom diff tool path is specified, use it directly
        if diff_tool_path:
            _launch_custom_diff_tool(diff_tool_path, baseline_dir, output_dir)
        elif diff_tool == 'pycharm' or (diff_tool == 'auto' and _is_pycharm_available()):
            _launch_pycharm_diff(baseline_dir, output_dir)
        elif diff_tool == 'vscode' or (diff_tool == 'auto' and _is_vscode_available()):
            _launch_vscode_diff(baseline_dir, output_dir)
        elif diff_tool == 'kdiff3' or (diff_tool == 'auto' and _is_kdiff3_available()):
            _launch_kdiff3_diff(baseline_dir, output_dir)
        else:
            print("No supported diff tool found. Set DIFF_TOOL environment variable.")
            print("Supported tools: pycharm, vscode, kdiff3")
            print("Or set DIFF_TOOL_PATH to specify a custom diff tool executable.")
    except Exception as e:
        print(f"Failed to launch diff tool: {e}")


def launch_file_diff(baseline_file: Path, output_file: Path):
    """Launch diff tool for specific files.

    Args:
        baseline_file: Path to baseline file
        output_file: Path to output file
    """
    diff_tool = os.environ.get('DIFF_TOOL', 'auto')
    diff_tool_path = os.environ.get('DIFF_TOOL_PATH')

    try:
        # If a custom diff tool path is specified, use it directly
        if diff_tool_path:
            _launch_custom_file_diff(diff_tool_path, baseline_file, output_file)
        elif diff_tool == 'pycharm' or (diff_tool == 'auto' and _is_pycharm_available()):
            pycharm_cmd = os.environ.get('PYCHARM_PATH', 'pycharm')
            subprocess.run([pycharm_cmd, "diff", str(baseline_file), str(output_file)])
        elif diff_tool == 'vscode' or (diff_tool == 'auto' and _is_vscode_available()):
            vscode_cmd = os.environ.get('VSCODE_PATH', 'code')
            subprocess.run([vscode_cmd, "--diff", str(baseline_file), str(output_file)])
        elif diff_tool == 'kdiff3' or (diff_tool == 'auto' and _is_kdiff3_available()):
            subprocess.run(["kdiff3", str(baseline_file), str(output_file)])
        else:
            print(f"Launching system default diff for {baseline_file.name}")
            if sys.platform == "win32":
                subprocess.run(["fc", "/N", str(baseline_file), str(output_file)])
            else:
                subprocess.run(["diff", "-u", str(baseline_file), str(output_file)])
    except Exception as e:
        print(f"Failed to launch diff for {baseline_file.name}: {e}")


def compare_directories(baseline_dir: Path, output_dir: Path) -> tuple[bool, list[str]]:
    """Compare two directories and return differences.

    Args:
        baseline_dir: Path to baseline directory
        output_dir: Path to output directory

    Returns:
        Tuple of (all_match, differences) where all_match is True if directories match
        and differences is a list of difference descriptions
    """
    differences = []

    if not baseline_dir.exists():
        return False, ["Baseline directory does not exist"]

    # Compare files that exist in baseline
    for baseline_file in baseline_dir.iterdir():
        if baseline_file.is_file():
            output_file = output_dir / baseline_file.name
            if output_file.exists():
                if not filecmp.cmp(str(baseline_file), str(output_file), shallow=False):
                    differences.append(f"Content differs: {baseline_file.name}")
                    # Optionally launch diff for each differing file
                    if os.environ.get('LAUNCH_FILE_DIFF') == '1':
                        launch_file_diff(baseline_file, output_file)
            else:
                differences.append(f"Missing file: {baseline_file.name}")

    # Check for extra files in output
    for output_file in output_dir.iterdir():
        if output_file.is_file() and not (baseline_dir / output_file.name).exists():
            differences.append(f"Extra file: {output_file.name}")

    return len(differences) == 0, differences


def update_baseline(baseline_dir: Path, output_dir: Path):
    """Update baseline directory with new output.

    Args:
        baseline_dir: Path to baseline directory to update
        output_dir: Directory containing the new output to use as baseline
    """
    # Create parent directory if it doesn't exist
    baseline_dir.parent.mkdir(parents=True, exist_ok=True)

    if baseline_dir.exists():
        shutil.rmtree(baseline_dir)

    shutil.copytree(output_dir, baseline_dir)
    print(f"Updated baseline at {baseline_dir}")


def update_baseline_for_test(test_name: str, output_dir: Path):
    """Update baseline directory for a test (legacy function for backward compatibility).

    Args:
        test_name: Name of the test
        output_dir: Directory containing the new output to use as baseline
    """
    # Determine the correct baseline directory based on the test name and structure
    # For io_tests, baselines go in tests/io_tests/baselines/
    # For other tests, they might go in their respective directories

    project_root = Path(__file__).parent.parent.parent

    if test_name.startswith("test_global") or "import" in test_name or "export" in test_name:
        # IO tests go in tests/io_tests/baselines/
        baseline_dir = project_root / "tests" / "io_tests" / "baselines" / test_name
    else:
        # Other tests go in their respective baseline directories
        baseline_dir = project_root / "tests" / "baseline" / test_name

    update_baseline(baseline_dir, output_dir)


def compare_with_baseline(baseline_dir: Path, output_dir: Path, test_name: str = None) -> bool:
    """Compare output directory with baseline and optionally update or launch diff tool.

    This function encapsulates the common pattern of:
    1. Checking if baseline should be updated (UPDATE_BASELINE env var)
    2. Comparing directories
    3. Launching diff tool if differences found (LAUNCH_DIFF_TOOL env var)
    4. Failing the test if differences found

    Args:
        baseline_dir: Path to baseline directory
        output_dir: Path to output directory to compare
        test_name: Optional test name for better error messages

    Returns:
        True if directories match or baseline was updated, False if differences found

    Raises:
        AssertionError: If differences are found and not updating baseline
    """
    import pytest

    test_name = test_name or baseline_dir.name

    # Check if we should update baseline
    if os.environ.get('UPDATE_BASELINE') == '1':
        update_baseline(baseline_dir, output_dir)
        return True

    # Compare with baseline
    all_match, differences = compare_directories(baseline_dir, output_dir)

    if all_match:
        return True

    # Differences found - provide structured output for parsing
    print("Differences found:")
    for diff in differences:
        print(f"  - {diff}")

    # Print paths in a parseable format
    print(f"BASELINE_PATH: {baseline_dir}")
    print(f"OUTPUT_PATH: {output_dir}")

    # Launch diff tool if requested
    if os.environ.get('LAUNCH_DIFF_TOOL') == '1':
        launch_diff_tool(baseline_dir, output_dir)

    # Create simple diff message
    diff_message = f"Output differs from baseline: {differences}\nBaseline: {baseline_dir}\nOutput: {output_dir}"

    # Fail the test with detailed diff information
    pytest.fail(diff_message)
    return False
