from pyogrio import read_dataframe, write_dataframe
import glob
import pandas as pd
import numpy as np
import geopandas as gpd
import json
from joblib import Parallel, delayed

from tools.spatial_indexed_intersection import geom_overlay


GRID_INDEX = "GID"
LOCALE_INDEX = "EMD_CD"

SIG_CODE = [
    ["경산", "47290"],
    ["경주", "47130"],
    ["구미", "47190"],
    ["김천", "47150"],
    ["안동", "47170"],
    ["영주", "47210"],
    ["영천", "47230"],
    ["예천", "47900"],
    ["칠곡", "47850"],
    ["포항_남구", "47111"],
    ["포항_북구", "47113"]
]


gpkg_datas = glob.glob("DATA/processed/점수산출_등급화전_전체데이터/v4/*.gpkg")

house_ratio_grid_path = "DATA/processed/건물연면적/buildings_summary_per_grid.gpkg"
house_ratio_grid_columns_to_extract = ["BLDG_CNT", "FLOOR_AREA_SUM","HOUSE_FLOOR_AREA_SUM", "HOUSE_FLOOR_AREA_RATIO"]


def range_creator(df, column, num_ranges):
    """
    Creates a list of tuples representing ranges based on percentiles.

    Parameters:
    df (pd.DataFrame): The input dataframe.
    column (str): The column name to calculate the ranges for.
    num_ranges (int): The number of ranges to create.

    Returns:
    list of tuples: Each tuple contains the start and end of a range.
    """
    percentiles = np.linspace(0, 100, num_ranges + 1)
    values = np.percentile(df[column], percentiles)
    ranges = [(values[i], values[i + 1]) for i in range(len(values) - 1)]
    return ranges

def map_value_to_range(df, column, ranges):
    """
    Maps values in a column to the specified ranges.

    Parameters:
    df (pd.DataFrame): The input dataframe.
    column (str): The column name to map values for.
    ranges (list of tuples): The list of ranges to map values to.

    Returns:
    pd.Series: A series of the same length as the input dataframe,
               where each value is the index of the range it falls into.
    """
    def map_value_to_range(value):
        for i, (start, end) in enumerate(ranges):
            if start <= value <= end:
                return i + 1
        return np.nan

    return df[column].apply(map_value_to_range)

def export_geojson_into_superset_compat_data(df, name):
    print("폴리곤 데이터가 한 줄에 한 도형인지 다시 확인하세요! 별도의 예외처리가 없습니다!")
    df = df.to_crs("epsg:4326")
    df = df.fillna(0)
    # converting shapely format into geojson for SUPERSET visualization use
    df["geometry"] = df["geometry"].apply(lambda row : row.__geo_interface__["coordinates"][0][0])
    #... mapbox don't like single quote...
    df["geometry"] = df["geometry"].apply(lambda row : json.dumps(row, indent=4))

    df.to_csv(f"{name}.csv")
    # write_dataframe(df, f"DATA/processed/점수산출_등급화전_전체데이터/{i.split('/')[-1].split('.')[0]}.csv")

def process_region(gpkg):
    df = read_dataframe(gpkg)

    df = df.fillna(0)

    filename = gpkg.split('/')[-1].split('.')[0]

    # exclude by rules
    df = df[(df["총인구"] > 0) | (df["BLDG_CNT"] > 0)]
    df = df[df["산지영역"] < 9000]
    df = df[df["아파트단지영역"] < 1000]

    # calculate
    df["감시취약지수"] = (10000 - df["CCTV_감시영역"]) * 0.00352
    df["범죄특성요인"] = df["범죄취약점수"] * 17.6
    df["범죄예방요인"] = df["area_ratio"] * 14.7
    df["환경요인"] = df["IH_RATIO"] * 2.9
    인구밀집요인등급화 = range_creator(df, "총인구", 10)
    df["인구밀집요인"] = map_value_to_range(df, "총인구", 인구밀집요인등급화) * 1.76

    취약인구비율_평균 = df["취약인구_비율"].mean()
    단독주택연면적_평균 = df["HOUSE_FLOOR_AREA_RATIO"].mean()
    df["가중치1"] = df.apply(lambda row: 1 if row["취약인구_비율"] > 취약인구비율_평균 else 0, axis=1)
    df["가중치2"] = df.apply(lambda row: 1 if row["HOUSE_FLOOR_AREA_RATIO"] > 단독주택연면적_평균 else 0, axis=1)


    def calculate_score(row):
        sum = row["감시취약지수"] + row["범죄특성요인"] + row["범죄예방요인"] + row["환경요인"] + row["인구밀집요인"]
        가중치 = 1
        if row["가중치1"] == 1 :
            가중치 += 0.095
        if row["가중치2"] == 1 :
            가중치 += 0.041
        return sum * 가중치

    df["최종지수"] = df.apply(calculate_score, axis=1)
    # print(df["최종지수"])
    최종지수등급화 = range_creator(df, "최종지수", 100)
    df["최종지수등급"] = map_value_to_range(df, "최종지수", 최종지수등급화)

    save_loc = f"DATA/processed/최종데이터/{gpkg.split('/')[-1].split('.')[0]}_격자"
    df = df.drop_duplicates(keep="first")
    write_dataframe(df, f"{save_loc}.gpkg")
    with open(f"{save_loc}_메타데이터.txt", 'w', encoding='utf-8') as file:
        file.write(f"취약인구비율_평균 : {취약인구비율_평균}\n")
        file.write(f"단독주택연면적_평균 : {단독주택연면적_평균}\n")
        file.write(f"인구밀집요인등급화 : {인구밀집요인등급화}\n")
        file.write(f"최종지수등급화 : {최종지수등급화}\n")
        pd.set_option("display.max_rows", None)
        file.write(f"등급별_격자수 : {df['최종지수등급'].value_counts().sort_index()}\n")
    df = df.to_crs("epsg:4326")
    df["centroid"] = df["geometry"].centroid
    df.to_csv(f"{save_loc}_표준좌표계.csv")


Parallel(n_jobs=11)(delayed(process_region)(gpkg) for gpkg in gpkg_datas)