import pandas as pd
from datetime import datetime, date
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score ,confusion_matrix# 정확도 함수

def taking_TF(user_id):
    df= pd.read_csv(r'D:\takensoft\project7\약상자송신데이터.csv',encoding='euc-kr')
    df['humantype']=df['type1'].astype(str)+df['type2'].astype(str)+df['type3'].astype(str)+df['type4'].astype(str)+df['type5'].astype(str)
    df['taketype']=df['take1'].astype(str)+df['take2'].astype(str)+df['take3'].astype(str)+df['take4'].astype(str)+df['take5'].astype(str)
    li_type = []
    li_taketype = []

    for i in range(0,len(df)):
        li_type.append(int(df['humantype'][i],2))
        li_taketype.append(int(df['taketype'][i],2))
    humantype=pd.DataFrame({'humantypes':li_type,'taketypes':li_taketype})
    df=pd.concat([humantype,df],axis=1)
    
    df=df[df['user_id']==user_id]
    if len(df) <30:
        print("분석불가")
        return "분석불가"
    df=df.reset_index()

    df['date_info']=pd.to_datetime(df['date_info'])
    li_weekday=[]
    for i in range(0,len(df)):
        li_weekday.append(df['date_info'][i].weekday())
    day=pd.DataFrame({'weekday':li_weekday})
    df=pd.concat([df,day],axis=1)
    df['error']=df['humantypes']!=df['taketypes']
    df['prev_humantypes']=df['humantypes'].shift()
    df['prev_taketypes']=df['taketypes'].shift()

    df=df[1:len(df)-1].reset_index()
    X= df[['temp1','temp2','temp3','temp4','weekday','prev_humantypes','prev_taketypes']]
    y = df['error']
    train_x, test_x, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 42) # 학습데이터와 평가데이터의 비율을 8:2 로 분할| 
    clf = RandomForestClassifier(n_estimators=20, max_depth=5,random_state=0)
    clf.fit(train_x,train_y)
    #print(len(train_x))
    predict1 = clf.predict(test_x)
    print(accuracy_score(test_y,predict1))
    #print(predict1)
    #print(confusion_matrix(test_y, predict1))
    #feature_scores = pd.Series(clf.feature_importances_, index=train_x.columns).sort_values(ascending=False)
    #print(feature_scores[:10])

def taking_class(user_id):
    df= pd.read_csv(r'D:\takensoft\project7\약상자송신데이터.csv',encoding='euc-kr')
    df['humantype']=df['type1'].astype(str)+df['type2'].astype(str)+df['type3'].astype(str)+df['type4'].astype(str)+df['type5'].astype(str)
    df['taketype']=df['take1'].astype(str)+df['take2'].astype(str)+df['take3'].astype(str)+df['take4'].astype(str)+df['take5'].astype(str)
    li_type = []
    li_taketype = []

    for i in range(0,len(df)):
        li_type.append(int(df['humantype'][i],2))
        li_taketype.append(int(df['taketype'][i],2))
    humantype=pd.DataFrame({'humantypes':li_type,'taketypes':li_taketype})
    df=pd.concat([humantype,df],axis=1)
    
    df=df[df['user_id']==user_id]
    if len(df) <30:
        print("분석불가")
        return "분석불가"
    df=df.reset_index()

    df['date_info']=pd.to_datetime(df['date_info'])
    li_weekday=[]
    for i in range(0,len(df)):
        li_weekday.append(df['date_info'][i].weekday())
    day=pd.DataFrame({'weekday':li_weekday})
    df=pd.concat([df,day],axis=1)
    df['error']=df['humantypes']!=df['taketypes']
    df['prev_humantypes']=df['humantypes'].shift()
    df['prev_taketypes']=df['taketypes'].shift()

    df=df[1:len(df)-1].reset_index()
    X= df[['temp1','temp2','temp3','temp4','weekday','prev_humantypes','prev_taketypes']]
    y = df['taketypes']
    train_x, test_x, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 42) # 학습데이터와 평가데이터의 비율을 8:2 로 분할| 
    clf = RandomForestClassifier(n_estimators=20, max_depth=5,random_state=0)
    clf.fit(train_x,train_y)
    #print(len(train_x))
    predict1 = clf.predict(test_x)
    print(accuracy_score(test_y,predict1))
    #print(confusion_matrix(test_y, predict1))
    #feature_scores = pd.Series(clf.feature_importances_, index=train_x.columns).sort_values(ascending=False)
    #print(feature_scores[:10])



taking_TF('MEMBER_000000000000584')
taking_class('MEMBER_000000000000584')