import pandas as pd
import geopandas as gpd
from pyogrio import read_dataframe, write_dataframe
from shapely.geometry import Point


buildings_df = pd.read_csv("DATA/raw/상가정보/소상공인시장진흥공단_상가(상권)정보_경북_202403.csv", low_memory=False)

grid_df = read_dataframe('DATA/refined/geopackage/시군구읍면동_경상북도_100x100.gpkg', encoding='utf-8')

grid_df = grid_df.to_crs("epsg:4326")

def is_float(x):
    try:
        float(x)
        return True
    except ValueError:
        return False

valid_buildings_df = buildings_df[buildings_df['경도'].apply(is_float) & buildings_df['위도'].apply(is_float)]

valid_buildings_df['geometry'] = valid_buildings_df.apply(lambda row: Point(float(row['경도']), float(row['위도'])), axis=1)

buildings_gdf = gpd.GeoDataFrame(valid_buildings_df, geometry='geometry')

buildings_gdf.set_crs(grid_df.crs, inplace=True)

joined_df = gpd.sjoin(buildings_gdf, grid_df, how='inner', op='within')

# this returns series object, so there is no 'columns', but you can name it.
stores = joined_df.groupby('SPO_NO_CD').size()
stores.name = '상가_수'

# buildings_area_sum = joined_df.groupby('SPO_NO_CD')['연면적(㎡)'].sum()
# buildings_area_sum.name = '연면적(㎡)합'

selection_slice = (joined_df["상권업종중분류명"] == '일반 숙박') | (joined_df["상권업종중분류명"] == "주점")

inn_and_hedonic = joined_df[selection_slice].groupby('SPO_NO_CD').size()
inn_and_hedonic.name = '숙박 및 유해업소수'

ratio_of_inn_and_hedonic = inn_and_hedonic.divide(stores).where(stores >= 5, 0)
ratio_of_inn_and_hedonic.name = '숙박및 유해업소 비율'

stores.to_csv('store_counts.csv')

inn_and_hedonic.to_csv('inn_and_hedonic.csv')

ratio_of_inn_and_hedonic.to_csv('inn_and_hedonic_ratio.csv')

# Drop duplicates to avoid multiple entries of the same base_road square
final_gdf = joined_df.drop_duplicates(subset=['SPO_NO_CD'])

summary_df = pd.DataFrame({'STORE_CNT': stores, 'IH_CNT': inn_and_hedonic, 'IH_RATIO': ratio_of_inn_and_hedonic}).reset_index()

summary_df.fillna(0)

summary_gdf = grid_df.merge(summary_df, on='SPO_NO_CD')

# Export to Shapefile
write_dataframe(summary_gdf, 'DATA/processed/유흥_숙박업소/inn_and_hedonic.gpkg')

# Do not use geopandas for saving files, it will corrupt non-latin characters by incorrectly assign encodings for it. (always latin-1)
# summary_gdf.to_file('DATA/processed/건물연면적/buildings_summary_per_grid.shp', encoding='utf-8')