"""Reader for fort.14 geometry files."""

__copyright__ = "(C) Copyright Aquaveo 2023"
__license__ = "All rights reserved"

# 1. Standard Python modules
from array import array
from pathlib import Path
from typing import Sequence

# 2. Third party modules
import numpy as np

# 3. Aquaveo modules
from xms.grid.ugrid import UGrid

# 4. Local modules
from xms.ptmio.file_reader import FileReader
from xms.ptmio.fort14 import Boundary, Geometry


def read_geometry(path: Path | str) -> Geometry:
    """
    Read a .fort14 geometry file.

    This function assumes the file contains geometry, but no datasets. Boundary condition definitions will be expected
    and read, and an exception will be raised if they are absent or malformed.

    Each node in the file's UGrid has an ID associated with it, and it's possible these IDs are out of order or have
    gaps. If either of these is true, their order in the resulting datasets is unspecified, but guaranteed to be
    consistent with the dataset reader. In other words, if a .fort.14 dataset and .fort.14 geometry file use the same
    set of IDs, then it is guaranteed that the value at location N in the dataset(s) will be the value that belongs with
    node N in the UGrid.

    Args:
        path: The file to read.

    Returns:
        The file's parsed, validated content.
    """
    with FileReader(path, comment_markers=['!', '=']) as reader:
        header = reader.line
        reader.next_line()

        ugrid = _read_domain(reader)
        open_boundaries = _read_boundary_set(reader)
        closed_boundaries = _read_boundary_set(reader)

    return Geometry(header=header, ugrid=ugrid, open_boundaries=open_boundaries, closed_boundaries=closed_boundaries)


def _read_domain(reader: FileReader) -> UGrid:
    """Read the domain out of the file."""
    num_elements = reader.read_int()
    num_nodes = reader.read_int()
    reader.next_line()
    ugrid = UGrid()

    locations, mapping = _parse_locations(reader, num_nodes)
    ugrid.locations = locations

    cell_stream = _parse_cell_stream(reader, mapping, num_elements)
    ugrid.cellstream = cell_stream

    return ugrid


def _parse_locations(reader: FileReader, num_nodes: int) -> tuple[Sequence[tuple[float, float, float]], dict[int, int]]:
    """
    Read all the locations out of the file.

    Args:
        reader: Where to read the locations from.
        num_nodes: Number of locations to expect.
    """
    table = np.full((num_nodes, 4), 0.0, dtype=float)

    for index in range(num_nodes):
        node_id = reader.read_int()
        x = reader.read_float()
        y = reader.read_float()
        z = reader.read_float()
        reader.next_line()
        table[index] = (node_id, x, y, z)

    # It's possible for a geometry and dataset file to have IDs in different orders. We sort the locations by ID here
    # so they still match up.
    sorted_by_id = table[table[:, 0].argsort()]

    mapping = {}
    for index, node_id in enumerate(sorted_by_id[:, 0]):
        mapping[int(node_id)] = index

    locations = sorted_by_id[:, [1, 2, 3]]
    return locations, mapping


def _parse_cell_stream(reader: FileReader, mapping: dict[int, int], num_elements: int) -> Sequence[int]:
    """
    Read the connectivity out of the file.

    Args:
        reader: Where to read the cell stream from.
        mapping: Mapping from node_id -> index_in_file. Used to renumber nodes.
        num_elements: Number of elements to expect.
    """
    cell_stream = array('l')
    for _ in range(num_elements):
        reader.read_int()  # The cell's ID. Unused since nothing actually references it.
        length = reader.read_int()

        if not (3 <= length <= 4):
            raise reader.error('Only triangles and quads supported.')

        node_indexes = []
        for _ in range(length):
            node_id = reader.read_int()
            try:
                node_index = mapping[node_id]
            except KeyError:
                raise reader.error(f'Reference to undefined node {node_id}.')
            node_indexes.append(node_index)

        cell_type = UGrid.cell_type_enum.QUAD if length == 4 else UGrid.cell_type_enum.TRIANGLE
        cell_stream.extend((cell_type, length, *node_indexes))
        reader.next_line()

    return cell_stream


def _read_boundary_set(reader: FileReader) -> list[Boundary]:
    """
    Read a set of boundaries from the file.

    Reads either all the open boundaries or all the closed boundaries (whichever group is next in the file).
    """
    num_boundaries = reader.read_int()
    reader.next_line()
    boundaries = []
    reader.next_line()  # Number of nodes in set. Models probably uses it for pre-allocation; we don't need it.
    for _ in range(num_boundaries):
        boundary = _parse_boundary(reader)
        boundaries.append(boundary)

    return boundaries


def _parse_boundary(reader: FileReader) -> Boundary:
    """
    Parse a single boundary out of the file.

    Args:
        reader: Reader to parse from.
    """
    nodes = array('l')
    num_nodes = reader.read_int()
    flag = reader.read_int(optional=True)
    reader.next_line()
    if flag is None:
        flag = 0

    for _ in range(num_nodes):
        nodes.append(reader.read_int())
        reader.next_line()
    boundary = Boundary(boundary_type=flag, nodes=nodes)
    return boundary
