import pandas as pd
import numpy as np

debug = False

if __name__ == "__main__":
    df = pd.read_csv("DATA/refined/건축물대장/건축물대장_전국_필터.csv")
    print(len(df))
    if debug:
        df = df.iloc[:7000]

    out = pd.DataFrame(columns=["기초지자체","연면적합", "연면적_단독주택합", "비율"])

    df["address"] = (df["시도"] +"_"+ df["시군구"]).replace(" ", "_", regex=True)
    groups = df.groupby("address")
    for group in groups:
        addr = group[0]
        df_gr = group[1]
        print(len(df_gr))
        연면적 = df_gr["연면적(㎡)"].sum()
        연면적_단독주택 = df_gr[df_gr["주용도"] == "단독주택"]["연면적(㎡)"].sum()
        비율 =  연면적_단독주택 / 연면적
        row = {
            "기초지자체" : [addr],
            "연면적합": [연면적],
            "연면적_단독주택합" : [연면적_단독주택],
            "비율" : [비율]
        }
        row = pd.DataFrame(row)
        out = pd.concat((out, row))
        pass
    out.to_csv("DATA/processed/전국_기초지자체_단독주택연면적_비율.csv", index=False)
    pass