import pandas as pd
import glob
import os
from tools.geocode.naver_geocode import Naver_Map
from time import sleep


if __name__ == "__main__":
    all_files = glob.glob("DATA/refined/건축물대장/지오코딩전_파일_분리/*.csv")
    completed_files = glob.glob("DATA/refined/건축물대장/지오코드/*.csv")

    def adjust_filename(filename):
        # Remove directory and extension, then strip '_geocoded'
        base_name = os.path.basename(filename)
        return base_name.replace('_geocoded', '')
    completed_files_set = set(adjust_filename(f) for f in completed_files)
    remaining_files = [f for f in all_files if os.path.basename(f) not in completed_files_set]

    naver_geocode = Naver_Map()
    for csv in remaining_files:
        df = pd.read_csv(csv)

        latitudes = []
        longitudes = []

        df["address"] = df.apply(
            lambda row:
            f"{row['시도']} {row['시군구']} {row['법정동']}" +
            ("" if row['번'] == 0 or row['번'] == "" else f" {pd.to_numeric(str(row['번']), errors = 'ignore', downcast='integer')}") +
            ("" if row['지'] == 0 or row['지'] == "" else f"-{pd.to_numeric(str(row['지']), errors = 'ignore', downcast='integer')}"),
            axis=1
        )

        previous_addr = None
        for i, addr in enumerate(df["address"]):
            print(f"{i}/{len(df)} : {addr}")
            current_addr = addr
            # compare if this is duplicate
            # 같은 주소지에 아파트나 블럭단위로 건설이 진행되어 여러 동이 같이 있는 경우 일어남.
            if current_addr == previous_addr:
                no_api_call_flag = True
            else:
                no_api_call_flag = False

            if not no_api_call_flag:
                response = naver_geocode.geocoding(addr)
                if response:
                    lat = response[0]
                    lon = response[1]
                    latitudes.append(lat)
                    longitudes.append(lon)
                    sleep(0.5)
                else:
                    lat = "INVALID"
                    lon = "INVALID"
                    latitudes.append(lat)
                    longitudes.append(lon)
                    sleep(0.5)
            else:
                latitudes.append(lat)
                longitudes.append(lon)
            previous_addr = addr
        df['latitude'] = latitudes
        df['longitude'] = longitudes

        df.to_csv(csv.replace(".csv", "_geocoded.csv").replace("지오코딩전_파일_분리","지오코드"), index=False)