Code - yjyoon/RAG-minimal-example

from llama_index import SimpleDirectoryReader, StorageContext, ServiceContext from llama_index.indices.vector_store import VectorStoreIndex from llama_index.vector_stores import PGVectorStore from llama_index.embeddings import HuggingFaceEmbedding from llama_index.llms.vllm import VllmServer import psycopg2 import json import urllib vllm_instance = VllmServer( api_url="http://takenaiassistent.iptime.org:14220/generate", max_new_tokens=2000, temperature=0.2 ) # print(vllm_instance.complete("hello?")) # max_length must be set to 512, or it will crash for high dimension multilingual models...... WHY DO YOU NOT MENTION THIS ON DOCUMENT!? # https://github.com/run-llama/llama_index/issues/9272 # Why am I setting dimension manually instead of llama_index deciding it? # it's because their default parameters are fit to openai... ... DENSE_EMBED_DIMENSION = 384 embed_model = HuggingFaceEmbedding(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', max_length=DENSE_EMBED_DIMENSION) #, max_length=DENSE_EMBED_DIMENSION) # , device="cuda:1" service_context = ServiceContext.from_defaults(llm=vllm_instance, embed_model=embed_model) documents = SimpleDirectoryReader("./data/paul_graham").load_data() print("Document ID:", documents[0].doc_id) with open('db_config.json', 'r') as file: db_config = json.load(file) table_name = db_config["tablename"] db_config.pop('tablename') db_name = db_config["dbname"] conn = psycopg2.connect(**db_config) conn.autocommit = True with conn.cursor() as c: c.execute(f"DROP TABLE IF EXISTS {table_name}") c.execute(f"CREATE TABLE {table_name} (id bigserial PRIMARY KEY, embedding vector({DENSE_EMBED_DIMENSION}))") vector_store = PGVectorStore.from_params( database=db_name, host=db_config['host'], password= urllib.parse.quote_plus(db_config['password']), # to handle special char port=db_config['port'], user=db_config['user'], table_name=table_name, embed_dim=DENSE_EMBED_DIMENSION ) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents( documents, storage_context=storage_context, service_context=service_context, show_progress=True, embed_model=embed_model ) query_engine = index.as_query_engine() response = query_engine.query("what did author do?") print(response) response = query_engine.query("What happened in the mid 1980s?") print(response)