"""PackageReaderBase class."""

__copyright__ = "(C) Copyright Aquaveo 2025"
__license__ = "All rights reserved"

# 1. Standard Python modules
import os
from pathlib import Path
import re
import shlex
import sys

# 2. Third party modules

# 3. Aquaveo modules
from xms.core.filesystem import filesystem as fs

# 4. Local modules
from xms.mf6.data import package_factory
from xms.mf6.data.array import Array
from xms.mf6.file_io import io_util
from xms.mf6.file_io.text_file_context_manager import TextFileContextManager
from xms.mf6.misc import log_util, util

GMS_COMMENT_CARD_LEN = 12  # len('#GMSCOMMENT ')


def read_external_list(filename):
    """Reads list data from an external file.

    Args:
        filename (str): name of external file

    Returns:
        list_lines (list): the lines from the file
    """
    list_lines = []
    with TextFileContextManager(filename) as file:
        list_lines = file.readlines()
    return list_lines


class PackageReaderBase:
    """Base class for package reader classes."""

    importing = False  # flag to indicate if external lists and arrays have been formatted for pandas
    temp_file_list: list[str] = []  # list of temp files created when importing

    def __init__(self):
        """Initializes the class.

        Args:
            ftype (str): The file type used in the GWF name file (e.g. 'WEL6')
        """
        self.ftype = ''
        self._data = None  # A package data class.
        self._file = None  # The file object
        self._line_number = 0  # Current line number we're reading in the file
        self._block_begin_line = ''  # line with BEGIN in it
        self._curr_block_name = ''  # Current block name
        self._curr_block_number = -1  # Current block number (e.g. 'BEGIN PERIOD 2')
        self._stop_after_block = ''  # name of block to end import operation
        self._blocks_to_skip = set()  # Blocks that we will ignore
        self._block_methods = {}
        self._resolve_paths: bool = True  # If True, relative file paths are resolved to absolute paths
        # self._log = log_util.get_logger()
        self._log = log_util.get_logger()

    def _store_comment(self, line, gms_comment: bool):
        """Stores the line as a comment.

        Args:
            line (str): A line from the file.
            gms_comment (bool): True if it's a GMS comment (starts out '#GMSCOMMENT ')
        """
        line = line.rstrip('\n')
        if gms_comment:
            self._data.gms_comments.append(line[GMS_COMMENT_CARD_LEN:])
        else:
            line = line.lstrip("#!/ ")  # Valid comment characters
            self._data.comments.append(line)

    def _on_begin_block(self, block_name):
        """Called when a BEGIN [block] line is found.

        Args:
            block_name (str): Name of the current block.
        """
        pass

    def _on_begin_file(self) -> None:
        """Called after initializing the package and before reading anything."""
        pass

    def _on_end_file(self):
        """Called after closing the file."""
        pass

    def _find_method(self, block_name):
        """Finds and returns the method that reads the block.

        Args:
            block_name (str): Name of the block

        Returns:
            method (callable): The method to call, or none.
        """
        if block_name in self._block_methods:
            method_name = self._block_methods[block_name]
        else:
            method_name = f'_read_{block_name.lower()}'

        if hasattr(self.__class__, method_name) and callable(getattr(self.__class__, method_name)):
            method = getattr(self.__class__, method_name)
        else:
            # raise TypeError(f'Error: {self.__class__} does not define method {method_name}.')
            method = None
            fname = ''
            if self._data:
                fname = self._data.filename
            self._log.warning(
                f'Unrecognized block: "{block_name}" found in file: {os.path.basename(fname)}. '
                f'Block skipped.'
            )
            self._log.warning(f'Full path to file: {fname}.')
        return method

    def _read_block(self, block_name, block_number=''):
        """Reads the block from the mfsim.nam file.

        Args:
            block_name (str): Name of the block.
            block_number (str): Some blocks have numbers like 'BEGIN PERIOD 1'
        """
        self._curr_block_name = block_name
        self._curr_block_number = -1 if not block_number else int(block_number)
        self._on_begin_block(block_name)
        method = self._find_method(block_name)
        end_found = False
        # for line in self._file:
        # Use "while True" syntax and not "for line in file" cause the latter doesn't work with TextFile
        while True:
            line = self._file.readline()
            if not line:
                break

            self._line_number += 1
            # if io_util.is_comment_or_blank(line):
            #     continue
            # elif io_util.is_end_block_line(line, block_name):
            if io_util.is_end_block_line(line, block_name):
                self._on_end_block(block_name)
                end_found = True
                break
            else:
                # Call the method
                if not method:
                    io_util.skip_block(self._file)
                    end_found = True
                    break
                else:
                    # line = io_util.remove_trailing_comment(line)
                    rv = method(self, line)
                    if rv is not None and rv is True:
                        # The method finished reading the block. Stop reading.
                        end_found = True
                        self._on_end_block(block_name)
                        break

        if not end_found:
            raise IOError(f'Error reading file {self._data.filename} on or around line {self._line_number}.')

    def _is_open_close_line(self, words):
        """Returns true if the line is an OPEN/CLOSE line.

        Args:
            words (list of str): List of words from the line after splitting.

        Returns:
            (bool): True or False
        """
        return words and len(words) > 1 and words[0].upper() == 'OPEN/CLOSE'

    def _list_to_external_file(self, list_lines):
        """Cleans up the lines from list data and writes them to an external file that pandas can read.

        Args:
            list_lines (list): lines that make up list data
        """
        # If using BOUNDNAMES, get the column count for later
        # column_count = 0
        # if self._data.options_block.has('BOUNDNAMES'):
        #     names, _, _ = self._data.get_column_info(self._curr_block_name)
        #     column_count = len(names)
        names, _, _ = self._data.get_column_info(self._curr_block_name)
        column_count = len(names)
        return self.list_to_external_file(list_lines, column_count)

    def join_line(self, words, column_count, separator=io_util.mfsep):
        """Joins the words into a line and returns the line.

        Can be overridden.

        Args:
            words (list[str]): The words.
            column_count (int): Number of columns that should be written for this line.
            separator (str): Separator character.

        Returns:
            (str): The line.
        """
        extra = ''
        if column_count > 0 and len(words) < column_count:
            # Add separators at the end for missing columns so CSV reader works
            extra = separator * (column_count - len(words))
        record = f'{separator.join([str(x) for x in words])}{extra}'
        return record

    def list_to_external_file(self, list_lines, column_count):
        """Cleans up the lines from list data and writes them to an external file that pandas can read.

        Args:
            list_lines (list): lines that make up list data
            column_count: Number of columns.
        """
        # This re_dbl_fort stuff handles weird fortran numbers but it's slow and rare so let's don't.
        # re_dbl_fort = re.compile(r'(\d*\.\d+)[dD]([-+]?\d+)')
        with open(io_util.get_temp_filename(suffix='.mf6_tmp'), mode='wt') as new_file:
            for line in list_lines:
                # line = io_util.remove_trailing_comment(line)
                # line = re_dbl_fort.sub(r'\1E\2', line)
                # "One or more spaces, or a single comma optionally combined with
                # spaces, must separate adjacent values." mf6io.pdf page 19.
                line = line.strip().replace(',', ' ').replace('\t', ' ')
                # words = line.split()
                # Use shlex to handle quoted strings
                # words = shlex.split(line, posix="win" not in sys.platform)
                # shlex is SLOW! Try a regex: https://stackoverflow.com/questions/79968
                words = [p for p in re.split("( |\\\".*?\\\"|'.*?')", line) if p.strip()]
                record = self.join_line(words, column_count)
                new_file.write(record)
                new_file.write('\n')
            PackageReaderBase.temp_file_list.append(new_file.name)
            return new_file.name

    def _read_array(self, line: str, array_names: list[str]) -> Array:
        """Reads the array.

        Args:
            line : A line read from the file
            array_names : strings of valid array names

        Returns:
            array : array data.
        """
        numeric_types = {name: 'int' if self._data.is_int_array(name) else 'float' for name in array_names}
        return Array.read(
            self._file, line, array_names, self._data.array_size_and_layers, self._data.filename, numeric_types,
            PackageReaderBase.importing
        )

    def _handle_array_block(self, line: str) -> bool:
        """Read the array block.

        Args:
            line: A line from the file.

        Returns:
            bool : False see comment below
        """
        array = self._read_array(line, self._data.block(self._curr_block_name).names)
        if array:
            self._data.block(self._curr_block_name).add_array(array)
        return False  # There may be more arrays to follow in the block

    def _handle_list_block(self, line: str) -> bool:
        """Reads other list blocks besides perioddata.

        Args:
            line (str): line from file

        Returns:
            bool
        """
        external_filename = self._read_list_block(line, self._curr_block_name)
        self._data.list_blocks[self._curr_block_name] = external_filename
        return True

    def _read_list_block(self, line, block_name):
        """Reads a list block.

        Args:
            line (str): line from file
            block_name (str): block name from file

        Returns:
            external_filename (str) : file where the list is stored
        """
        external_filename = ''
        list_lines = []
        while line:
            # if io_util.is_comment_or_blank(line):
            #     line = self._file.readline()
            #     continue
            # elif io_util.is_end_block_line(line, block_name):
            if io_util.is_end_block_line(line, block_name):
                break

            words = line.split()
            if self._is_open_close_line(words):
                # Paths are always relative to the mfsim.nam file, but with components, the
                # mfsim.nam file is always unlocked in the system Temp folder but the packages
                # may still be locked somewhere else. We should get the correct result by
                # using the package filename to resolve the relative path. This was causing
                # a bug when reading the .exg package when saving the simulation after having
                # saved the project.
                # external_filename = fs.resolve_relative_path(self._data.mfsim_dir, words[1])
                external_filename = fs.resolve_relative_path(self._data.filename, words[1])
                if PackageReaderBase.importing:
                    list_lines = read_external_list(external_filename)
            else:
                list_lines.append(line)
            line = self._file.readline()

        if list_lines:
            if self._data.tree_node:
                # We are reading from the tree, meaning we've already imported this sim, so the list_lines should
                # already be in an external file. However, OC6 is special because it cannot use OPEN/CLOSE, so the
                # list lines remain in the file. But the data will be in the periods.db database, so again we do not
                # need to create an external file from the lines that are read here. And doing so messes up tests.
                external_filename = ''
            else:
                external_filename = self._list_to_external_file(list_lines)

        return external_filename

    def _read_option_line(self, filename, line):
        """Reads the line in an options block and stores the option in the dict.

        Args:
            filename (str): Filepath of file we are reading.
            line (str): A line from the file.

        Returns:
            (tuple): tuple containing:
            - key: str
            - value: str or may be list(str) or may be something else
        """
        # line = io_util.remove_trailing_comment(line)
        line = line.strip()
        words = shlex.split(line, posix="win" not in sys.platform)
        if not words:
            return

        options = self._data.options_block.dict()
        if len(words) > 1 and words[1].upper() == 'PRINT_FORMAT':
            # Handle stupid special case of
            # [HEAD PRINT_FORMAT COLUMNS <columns> WIDTH <width> DIGITS <digits> <format>]
            key_upper = words[0].upper() + ' ' + words[1].upper()
            options[key_upper] = words[2:]
            options[key_upper] = ' '.join(words[2:])  # Don't use a list

        elif len(words) > 2 and (words[1].upper() in {'FILEIN', 'FILEOUT'}):
            # Handle FILEIN and FILEOUT.
            key_upper = words[0].upper() + ' ' + words[1].upper()
            fileinout_name = words[2].strip('"\'')
            if self._resolve_paths:
                fileinout_name = fs.resolve_relative_path(os.path.dirname(filename), fileinout_name)
            if words[1].upper() in 'FILEIN':  # We use a list because there can be multiple time series files.
                if key_upper in options:
                    options[key_upper].append(fileinout_name)
                else:
                    options[key_upper] = [fileinout_name]
            else:  # Don't use a list
                options[key_upper] = os.path.basename(fileinout_name) if self._resolve_paths else fileinout_name
        elif words[0].upper() == 'AUXILIARY':
            key_upper = words[0].upper()
            options[key_upper] = words[1:]  # Put values into a list
        else:
            key_upper = words[0].upper()
            # options[key_upper] = words[1:]  # Put values into a list
            options[key_upper] = ' '.join(words[1:])  # Don't use a list

        return key_upper, options[key_upper]

    def _read_options(self, line):
        """Reads a line from the options block.

        Args:
            line (str): A line from the file.

        Returns:
            (tuple(str, str)): tuple containing:
            - key: str
            - value: str or may be list(str) or may be something else
        """
        key, value = self._read_option_line(self._data.filename, line)
        return key, value

    def _read_settings(self):
        """Reads the package settings file if it exists."""
        pass

    def _init_package(self, **kwargs):
        """Initializes the data class.

        Args:
            **kwargs: Arbitrary keyword arguments.

        Keyword Args:
            mfsim (MfsimData): The simulation.
            model (GwfData or GwtData): The GWF/GWT model. Will be None for TDIS, IMS, Exchanges (things below mfsim)
            grid_info (GridInfo): Information about the grid. Only used when testing individual packages. Otherwise,
             it comes from model and dis
            filename (str): Filepath.
        """
        self._data = package_factory.package_from_ftype(ftype=self.ftype, **kwargs)

        if hasattr(self._data, 'parent_package_ftype'):
            self._data.parent_package_ftype = kwargs.get('parent_package_ftype', 'GWF6')

    def invalid_line_in_file(self, line):
        """Logs info about invalid line in file.

        Args:
            line (str): the line from the file
        """
        self._log.info(f'Invalid line in file skipped. Line number: {self._line_number}. Line: "{line}".')

    def _read_top_comments(self, filename):
        """Reads the top comments from the file and stores them.

        Args:
            filename (str): File path.
        """
        with open(filename, 'r') as self._file:
            for line in self._file:
                self._line_number += 1
                if io_util.is_comment(line):
                    self._store_comment(line, io_util.is_gms_comment(line))
                    continue
                elif io_util.is_begin_line(line):
                    break

    def read(self, filename: str | Path, **kwargs):
        """Reads a file and returns a data object.

        Args:
            filename: Path to file.
            **kwargs: Arbitrary keyword arguments.

        Keyword Args:
            mfsim (MfsimData): The simulation.
            model (GwfData or GwtData): The GWF/GWT model. Will be None for TDIS, IMS, Exchanges (things below mfsim)
            grid_info (GridInfo): Information about the grid. Only used when testing individual packages. Otherwise,
             it comes from model and dis
            resolve_paths (bool): If True, relative file paths are resolved to absolute paths.

        Returns:
            The package data class.
        """
        try:
            self._log.info(f'Reading \"{str(filename)}\"')

            kwargs['filename'] = str(filename)  # Add filename to kwargs
            self._resolve_paths = kwargs.get('resolve_paths', True)
            self._init_package(**kwargs)
            self._on_begin_file()
            self._read_top_comments(self._data.filename)
            # _read_top_comments stored top comments so now use TextFile to skip comments and blank lines (it's faster)
            with TextFileContextManager(self._data.filename) as self._file:
                while True:
                    line = self._file.readline()
                    if not line:
                        break

                    self._line_number += 1  # This doesn't include comment lines
                    if io_util.is_begin_line(line):
                        self._block_begin_line = line
                        words = line.split()
                        if len(words) > 1:
                            block_name = words[1].upper()
                            number = words[2] if len(words) > 2 and util.is_number(words[2]) else ''
                            if block_name in self._blocks_to_skip:
                                io_util.skip_block(self._file)
                            else:
                                self._read_block(block_name, number)

                            if self._stop_after_block == block_name:
                                break

            self._on_end_file()
            self._read_settings()
            return self._data

        except Exception as error:
            self._log.error(f'Error reading file {str(filename)} on or around line {self._line_number}: {str(error)}')
            self._log.exception(error)
            self._data = None

        return None
