import csv
import hashlib
import os
# from helpers import get_absolute_path


class Person:
    data_details = None   # shared cache for Base.csv
    spouses = None        # shared cache for Spouses.csv

    def __init__(self):
        app_dir = os.path.dirname(os.path.abspath(__file__))
        if Person.spouses is None:
            Person.spouses = {}
            spouses_path = os.path.join(app_dir, "Spouses.csv")
            with open(spouses_path, encoding="utf-8") as f:
                reader = csv.reader(f)
                cols = next(reader)
                for r in reader:
                    d = dict(zip(cols, r))
                    Person.spouses[d['Index']] = d
        if Person.data_details is None:
            Person.data_details = {}
            with open(os.path.join(app_dir,"Base.csv"), encoding="utf-8") as f:
                reader = csv.reader(f)
                detail_cols = next(reader)
                for r in reader:
                    d = dict(zip(detail_cols, r))
                    Person.data_details[d['Index']] = d

    def list_all(self):
        """Return all Index keys from this person's dataset."""
        return sorted(self.data.keys())

    def get_relatives(self, index):
        """Get related persons depending on type."""
        index = str(index)
        if self.person_type == "students":
            parents = Parents().data
            return [p_idx for p_idx, p in parents.items()
                    if p['Students_qry.Index'] == index]
        if self.person_type == "parents":
            parents = Parents().data
            return [p['Students_qry.Index'] for p_idx, p in parents.items()
                    if p_idx == index]

    def get_person(self, index):
        """Return one record by index."""
        return self.data.get(str(index))

    def create_data(self, filename):
        """Read CSV into dict keyed by Index for faster lookups."""
        dataset = {}
        with open(filename, encoding="utf-8") as f:
            reader = csv.reader(f)
            cols = next(reader)
            for r in reader:
                d = dict(zip(cols, r))
                dataset[d['Index']] = d
        self.data = dataset
        self.count = len(self.data)


class Parents(Person):
    def __init__(self):
        super().__init__()
        self.person_type = "parents"
        self.create_data("Parents.csv")

class Students(Person):
    def __init__(self):
        super().__init__()
        self.person_type = "students"
        self.create_data("Students.csv")

    def unique_images(self, imgs):  # Added self parameter
        seen = set()
        unique = []

        for path in imgs:
            try:
                # Read file in chunks to handle large images efficiently
                hasher = hashlib.md5()
                with open(path, 'rb') as f:
                    for chunk in iter(lambda: f.read(8192), b''):
                        hasher.update(chunk)
                digest = hasher.hexdigest()

                if digest not in seen:
                    seen.add(digest)
                    unique.append(path)
            except (FileNotFoundError, IOError) as e:
                print(f"Error reading image {path}: {e}")
                continue

        return unique

    def get_parent_profiles(self, index):
        """Get details from Base.csv for a student, deduplicating across all fields."""
        index = str(index)
        relatives = self.get_relatives(index)

        # Make shallow copies so we don't mutate Person.data_details directly
        parent_profiles = [Person.data_details[r].copy() for r in relatives]

        if len(parent_profiles) > 1:
            reference = parent_profiles[0]
            for p in parent_profiles[1:]:
                for k in list(p.keys()):  # use list() to avoid runtime error on deletion
                    try:
                        if k in reference and reference[k] == p[k]:
                            del p[k]
                    except KeyError:
                        # if k is missing in p, just skip
                        continue
            
            # Get absolute paths for comparison
            imgs = [
                get_absolute_path(parent_profiles[i]['Path']) 
                for i in range(len(parent_profiles)) 
                if parent_profiles[i].get('Path') and parent_profiles[i]['Path'] != ""
            ]
            
            unique_imgs = self.unique_images(imgs)  # Changed from Students.unique_images
            
            # Update parent_profiles - clear duplicates
            for i in range(len(parent_profiles)):
                if parent_profiles[i].get('Path'):
                    abs_path = get_absolute_path(parent_profiles[i]['Path'])
                    if abs_path not in unique_imgs:
                        parent_profiles[i]['Path'] = ""

        return parent_profiles


if __name__ == "__main__":
    students = Students()
    parents = Parents()

    # Access shared Base.csv
    # print("From Base.csv:", students.data_details['94'])

    # Access shared Spouses.csv
    # print("From Spouses.csv:", spouses['12'])   # example index
    print(students.get_parent_profiles('7'))
