from llama_index import SimpleDirectoryReader, StorageContext, ServiceContext
from llama_index.indices.vector_store import VectorStoreIndex
from llama_index.vector_stores import PGVectorStore
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms.vllm import VllmServer
import psycopg2
import json
import urllib

vllm_instance = VllmServer(
    api_url="http://takenaiassistent.iptime.org:14220/generate", max_new_tokens=2000, temperature=0.2
)
# print(vllm_instance.complete("hello?"))

# max_length must be set to 512, or it will crash for high dimension multilingual models...... WHY DO YOU NOT MENTION THIS ON DOCUMENT!?
# https://github.com/run-llama/llama_index/issues/9272
# Why am I setting dimension manually instead of llama_index deciding it?
# it's because their default parameters are fit to openai... ...
DENSE_EMBED_DIMENSION = 384
embed_model = HuggingFaceEmbedding(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', max_length=DENSE_EMBED_DIMENSION) #, max_length=DENSE_EMBED_DIMENSION) # , device="cuda:1"
service_context = ServiceContext.from_defaults(llm=vllm_instance, embed_model=embed_model)

documents = SimpleDirectoryReader("./data/paul_graham").load_data()
print("Document ID:", documents[0].doc_id)

with open('db_config.json', 'r') as file:
    db_config = json.load(file)
table_name = db_config["tablename"]
db_config.pop('tablename')
db_name = db_config["dbname"]

conn = psycopg2.connect(**db_config)
conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP TABLE IF EXISTS {table_name}")
    c.execute(f"CREATE TABLE {table_name} (id bigserial PRIMARY KEY, embedding vector({DENSE_EMBED_DIMENSION}))")

vector_store = PGVectorStore.from_params(
    database=db_name,
    host=db_config['host'],
    password= urllib.parse.quote_plus(db_config['password']), # to handle special char
    port=db_config['port'],
    user=db_config['user'],
    table_name=table_name,
    embed_dim=DENSE_EMBED_DIMENSION
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    service_context=service_context,
    show_progress=True,
    embed_model=embed_model
)
query_engine = index.as_query_engine()

response = query_engine.query("what did author do?")
print(response)
response = query_engine.query("What happened in the mid 1980s?")
print(response)