import streamlit as st
from llama_index import VectorStoreIndex, ServiceContext, Document, download_loader
from llama_index.llms import OpenAI
from llama_index.readers import PDFReader
import openai
from llama_index import SimpleDirectoryReader
import tempfile
import shutil
import os

# import pdfplumber

def create_vector_embeding_from_pdf(feed):
    with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
        # Write the contents of the uploaded file to the temporary file
        shutil.copyfileobj(uploaded_file, temp_file)
        temp_file_path = temp_file.name  # Get the file path

    PyMuPDFReader = download_loader("PyMuPDFReader")
    loader = PyMuPDFReader()
    documents = loader.load(temp_file_path, metadata=True)
    index = VectorStoreIndex.from_documents(documents)

    os.remove(temp_file_path)

    return documents, index # build more code to return a dataframe


st.set_page_config(page_title="Chat with the Streamlit docs, powered by LlamaIndex", page_icon="🦙", layout="centered",
                   initial_sidebar_state="auto", menu_items=None)
openai.api_key = st.secrets.openai_key
st.title("Chat with the Streamlit docs, powered by LlamaIndex 💬🦙")
st.info(
    "Check out the full tutorial to build this app in our [blog post](https://blog.streamlit.io/build-a-chatbot-with-custom-data-sources-powered-by-llamaindex/)",
    icon="📃")

@st.cache_resource(show_spinner=False)
def load_data():
    with st.spinner(text="Loading and indexing the Streamlit docs – hang tight! This should take 1-2 minutes."):
        reader = SimpleDirectoryReader(input_dir="./data", recursive=True)
        docs = reader.load_data()
        service_context = ServiceContext.from_defaults(
            llm=OpenAI(model="gpt-3.5-turbo", temperature=0.5,
                       system_prompt="You are an expert on the Streamlit Python library and your job is to answer technical questions."
                                     " Assume that all questions are related to the Streamlit Python library. Keep your answers technical and based on facts – do not hallucinate features.")
        )
        index = VectorStoreIndex.from_documents(docs, service_context=service_context)
        return index

streamlit_docs_index = load_data()

uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
if uploaded_file is not None:
    documents, pdf_index = create_vector_embeding_from_pdf(uploaded_file)
    combined_index = streamlit_docs_index.insert(documents)
else:
    combined_index = streamlit_docs_index

if "messages" not in st.session_state.keys():  # Initialize the chat messages history
    st.session_state.messages = [
        {"role": "assistant", "content": "Ask me a question about Streamlit's open-source Python library!"}
    ]

index = load_data()

if "chat_engine" not in st.session_state.keys():  # Initialize the chat engine
    st.session_state.chat_engine = combined_index.as_chat_engine(chat_mode="condense_question", verbose=True)

if prompt := st.chat_input("Your question"):  # Prompt for user input and save to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

for message in st.session_state.messages:  # Display the prior chat messages
    with st.chat_message(message["role"]):
        st.write(message["content"])


# If last message is not from assistant, generate a new response
if st.session_state.messages[-1]["role"] != "assistant":
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            response = st.session_state.chat_engine.chat(prompt)
            st.write(response.response)
            message = {"role": "assistant", "content": response.response}
            st.session_state.messages.append(message)  # Add response to message history