Skip to content
Snippets Groups Projects
Commit 9c26b60b authored by Jakub Zovak's avatar Jakub Zovak
Browse files

mark my code contribution

parent 37396df2
Branches
No related tags found
No related merge requests found
Showing
with 52 additions and 91 deletions
...@@ -6,8 +6,10 @@ ENV PYTHONFAULTHANDLER=1 \ ...@@ -6,8 +6,10 @@ ENV PYTHONFAULTHANDLER=1 \
PIP_NO_CACHE_DIR=off \ PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \ PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \ PIP_DEFAULT_TIMEOUT=100 \
# LVD MODIFICATION START
POETRY_VERSION=1.5.1 \ POETRY_VERSION=1.5.1 \
PEP517_BUILD_BACKEND=setuptools.build_meta PEP517_BUILD_BACKEND=setuptools.build_meta
# LVD MODIFICATION END
RUN pip install "poetry==$POETRY_VERSION" RUN pip install "poetry==$POETRY_VERSION"
...@@ -21,6 +23,7 @@ RUN poetry config virtualenvs.create false \ ...@@ -21,6 +23,7 @@ RUN poetry config virtualenvs.create false \
# Creating folders, and files for a project: # Creating folders, and files for a project:
COPY . /code COPY . /code
# LVD MODIFICATION START
RUN chmod 777 /code RUN chmod 777 /code
RUN chmod +x /code/start_all.sh RUN chmod +x /code/start_all.sh
RUN chmod +x /code/start_all_no_filters.sh RUN chmod +x /code/start_all_no_filters.sh
...@@ -36,3 +39,4 @@ RUN chmod +x /code/start_qdrant.sh ...@@ -36,3 +39,4 @@ RUN chmod +x /code/start_qdrant.sh
RUN chmod +x /code/start_qdrant_no_filters.sh RUN chmod +x /code/start_qdrant_no_filters.sh
CMD ["./start_qdrant_no_filters.sh"] CMD ["./start_qdrant_no_filters.sh"]
# LVD MODIFICATION END
\ No newline at end of file
...@@ -9,14 +9,11 @@ BASE_DIRECTORY = Path(__file__).parent.parent ...@@ -9,14 +9,11 @@ BASE_DIRECTORY = Path(__file__).parent.parent
DATASETS_DIR = BASE_DIRECTORY / "datasets" DATASETS_DIR = BASE_DIRECTORY / "datasets"
CODE_DIR = os.path.dirname(__file__) CODE_DIR = os.path.dirname(__file__)
ROOT_DIR = Path(os.path.dirname(CODE_DIR)) ROOT_DIR = Path(os.path.dirname(CODE_DIR))
# LVD MODIFICATION START
KUBE_DIR = "/pvc" KUBE_DIR = "/pvc"
def copy_directory(src, dst): def copy_directory(src, dst):
"""
Recursively copy a directory from src to dst,
handling the case where dst may already exist.
"""
if not os.path.exists(dst): if not os.path.exists(dst):
os.makedirs(dst) os.makedirs(dst)
...@@ -24,6 +21,7 @@ def copy_directory(src, dst): ...@@ -24,6 +21,7 @@ def copy_directory(src, dst):
s = os.path.join(src, item) s = os.path.join(src, item)
d = os.path.join(dst, item) d = os.path.join(dst, item)
if os.path.isdir(s): if os.path.isdir(s):
copy_directory(s, d) # Recursive call copy_directory(s, d)
else: else:
shutil.copy2(s, d) shutil.copy2(s, d)
\ No newline at end of file # LVD MODIFICATION END
# LVD MODIFICATION START
from engine.clients.chroma.configure import ChromaConfigurator from engine.clients.chroma.configure import ChromaConfigurator
from engine.clients.chroma.search import ChromaSearcher from engine.clients.chroma.search import ChromaSearcher
from engine.clients.chroma.upload import ChromaUploader from engine.clients.chroma.upload import ChromaUploader
# LVD MODIFICATION END
# LVD MODIFICATION START
import os import os
CHROMA_COLLECTION_NAME = os.getenv("CHROMA_COLLECTION_NAME", "benchmark") CHROMA_COLLECTION_NAME = os.getenv("CHROMA_COLLECTION_NAME", "benchmark")
CHROMA_PORT = 8000 CHROMA_PORT = 8000
# LVD MODIFICATION END
# LVD MODIFICATION START
from benchmark.dataset import Dataset from benchmark.dataset import Dataset
from engine.base_client.configure import BaseConfigurator from engine.base_client.configure import BaseConfigurator
from engine.clients.chroma.config import CHROMA_COLLECTION_NAME from engine.clients.chroma.config import CHROMA_COLLECTION_NAME
...@@ -35,3 +36,4 @@ class ChromaConfigurator(BaseConfigurator): ...@@ -35,3 +36,4 @@ class ChromaConfigurator(BaseConfigurator):
}, },
get_or_create=True, # If True, return the existing collection if it exists. get_or_create=True, # If True, return the existing collection if it exists.
) )
# LVD MODIFICATION END
# LVD MODIFICATION START
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from engine.base_client import IncompatibilityError from engine.base_client import IncompatibilityError
...@@ -40,3 +41,4 @@ class ChromaConditionParser(BaseConditionParser): ...@@ -40,3 +41,4 @@ class ChromaConditionParser(BaseConditionParser):
return { return {
field_name: value field_name: value
} }
# LVD MODIFICATION END
\ No newline at end of file
# import multiprocessing as mp # LVD MODIFICATION START
from typing import List, Tuple from typing import List, Tuple
from engine.base_client.search import BaseSearcher from engine.base_client.search import BaseSearcher
...@@ -25,32 +25,12 @@ class ChromaSearcher(BaseSearcher): ...@@ -25,32 +25,12 @@ class ChromaSearcher(BaseSearcher):
@classmethod @classmethod
def init_client(cls, host, distance, connection_params: dict, search_params: dict): def init_client(cls, host, distance, connection_params: dict, search_params: dict):
# cls.client: QdrantClient = QdrantClient(
# host,
# prefer_grpc=True,
# limits=httpx.Limits(max_connections=None, max_keepalive_connections=0),
# **connection_params
# )
cls.client = HttpClient(host=host, port=CHROMA_PORT) cls.client = HttpClient(host=host, port=CHROMA_PORT)
cls.collection = cls.client.get_collection(CHROMA_COLLECTION_NAME) cls.collection = cls.client.get_collection(CHROMA_COLLECTION_NAME)
cls.search_params = search_params cls.search_params = search_params
# Uncomment for gRPC
# @classmethod
# def get_mp_start_method(cls):
# return "forkserver" if "forkserver" in mp.get_all_start_methods() else "spawn"
@classmethod @classmethod
def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]: def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]:
# res = cls.client.search(
# collection_name=QDRANT_COLLECTION_NAME,
# query_vector=vector,
# query_filter=cls.parser.parse(meta_conditions),
# limit=top,
# search_params=rest.SearchParams(
# **cls.search_params.get("search_params", {})
# ),
# )
res = cls.collection.query( res = cls.collection.query(
query_embeddings=[vector], query_embeddings=[vector],
where=cls.parser.parse(meta_conditions), where=cls.parser.parse(meta_conditions),
...@@ -58,3 +38,4 @@ class ChromaSearcher(BaseSearcher): ...@@ -58,3 +38,4 @@ class ChromaSearcher(BaseSearcher):
) )
return list(zip(map(int, res.get("ids")[0]), res.get("distances")[0])) return list(zip(map(int, res.get("ids")[0]), res.get("distances")[0]))
# LVD MODIFICATION END
# import time # LVD MODIFICATION START
# from typing import List, Optional
# from qdrant_client import QdrantClient
# from qdrant_client.http.models import Batch, CollectionStatus
from typing import List, Optional from typing import List, Optional
from engine.base_client.upload import BaseUploader from engine.base_client.upload import BaseUploader
...@@ -10,8 +6,6 @@ from engine.base_client.upload import BaseUploader ...@@ -10,8 +6,6 @@ from engine.base_client.upload import BaseUploader
from engine.clients.chroma.config import CHROMA_COLLECTION_NAME from engine.clients.chroma.config import CHROMA_COLLECTION_NAME
from engine.clients.chroma.config import CHROMA_PORT from engine.clients.chroma.config import CHROMA_PORT
# from engine.clients.qdrant.config import QDRANT_COLLECTION_NAME
from chromadb.api.models.Collection import Collection from chromadb.api.models.Collection import Collection
from chromadb.config import Settings from chromadb.config import Settings
from chromadb import HttpClient from chromadb import HttpClient
...@@ -29,19 +23,6 @@ class ChromaUploader(BaseUploader): ...@@ -29,19 +23,6 @@ class ChromaUploader(BaseUploader):
cls.collection = cls.client.get_collection(CHROMA_COLLECTION_NAME) cls.collection = cls.client.get_collection(CHROMA_COLLECTION_NAME)
cls.upload_params = upload_params cls.upload_params = upload_params
# @classmethod
# def upload_batch(
# cls, ids: List[int], vectors: List[list], metadata: Optional[List[dict]]
# ):
# cls.client.upsert(
# collection_name=QDRANT_COLLECTION_NAME,
# points=Batch.construct(
# ids=ids,
# vectors=vectors,
# payloads=[payload or {} for payload in metadata],
# ),
# )
@classmethod @classmethod
def upload_batch( def upload_batch(
cls, ids: List[int], vectors: List[list], metadata: Optional[List[dict]] cls, ids: List[int], vectors: List[list], metadata: Optional[List[dict]]
...@@ -56,37 +37,9 @@ class ChromaUploader(BaseUploader): ...@@ -56,37 +37,9 @@ class ChromaUploader(BaseUploader):
metadatas=metadata, metadatas=metadata,
ids=id_strings, ids=id_strings,
) )
# cls.client.upsert(
# collection_name=QDRANT_COLLECTION_NAME,
# points=Batch.construct(
# ids=ids,
# vectors=vectors,
# payloads=[payload or {} for payload in metadata],
# ),
# )
# @classmethod
# def post_upload(cls, _distance):
# cls.wait_collection_green()
# return {}
# @classmethod
# def wait_collection_green(cls):
# wait_time = 5.0
# total = 0
# while True:
# time.sleep(wait_time)
# total += wait_time
# collection_info = cls.client.get_collection(QDRANT_COLLECTION_NAME)
# if collection_info.status != CollectionStatus.GREEN:
# continue
# time.sleep(wait_time)
# collection_info = cls.client.get_collection(QDRANT_COLLECTION_NAME)
# if collection_info.status == CollectionStatus.GREEN:
# break
# return total
@classmethod @classmethod
def delete_client(cls): def delete_client(cls):
if cls.client is not None: if cls.client is not None:
del cls.client del cls.client
# LVD MODIFICATION END
\ No newline at end of file
...@@ -42,8 +42,10 @@ ENGINE_CONFIGURATORS = { ...@@ -42,8 +42,10 @@ ENGINE_CONFIGURATORS = {
"opensearch": OpenSearchConfigurator, "opensearch": OpenSearchConfigurator,
"redis": RedisConfigurator, "redis": RedisConfigurator,
"pgvector": PgVectorConfigurator, "pgvector": PgVectorConfigurator,
# LVD MODIFICATION START
"chroma": ChromaConfigurator, "chroma": ChromaConfigurator,
"lvd": LVDConfigurator, "lvd": LVDConfigurator,
# LVD MODIFICATION END
} }
ENGINE_UPLOADERS = { ENGINE_UPLOADERS = {
...@@ -54,8 +56,10 @@ ENGINE_UPLOADERS = { ...@@ -54,8 +56,10 @@ ENGINE_UPLOADERS = {
"opensearch": OpenSearchUploader, "opensearch": OpenSearchUploader,
"redis": RedisUploader, "redis": RedisUploader,
"pgvector": PgVectorUploader, "pgvector": PgVectorUploader,
# LVD MODIFICATION START
"chroma": ChromaUploader, "chroma": ChromaUploader,
"lvd": LVDUploader "lvd": LVDUploader,
# LVD MODIFICATION END
} }
ENGINE_SEARCHERS = { ENGINE_SEARCHERS = {
...@@ -66,8 +70,10 @@ ENGINE_SEARCHERS = { ...@@ -66,8 +70,10 @@ ENGINE_SEARCHERS = {
"opensearch": OpenSearchSearcher, "opensearch": OpenSearchSearcher,
"redis": RedisSearcher, "redis": RedisSearcher,
"pgvector": PgVectorSearcher, "pgvector": PgVectorSearcher,
# LVD MODIFICATION START
"chroma": ChromaSearcher, "chroma": ChromaSearcher,
"lvd": LVDSearcher "lvd": LVDSearcher
# LVD MODIFICATION END
} }
......
# LVD MODIFICATION START
from engine.clients.lvd.configure import LVDConfigurator from engine.clients.lvd.configure import LVDConfigurator
from engine.clients.lvd.search import LVDSearcher from engine.clients.lvd.search import LVDSearcher
from engine.clients.lvd.upload import LVDUploader from engine.clients.lvd.upload import LVDUploader
# LVD MODIFICATION END
# LVD MODIFICATION START
import os import os
LVD_COLLECTION_NAME = os.getenv("LVD_COLLECTION_NAME", "benchmark") LVD_COLLECTION_NAME = os.getenv("LVD_COLLECTION_NAME", "benchmark")
LVD_PORT = 8000 LVD_PORT = 8000
# LVD MODIFICATION END
# LVD MODIFICATION START
from benchmark.dataset import Dataset from benchmark.dataset import Dataset
from engine.base_client.configure import BaseConfigurator from engine.base_client.configure import BaseConfigurator
from engine.clients.lvd.config import LVD_COLLECTION_NAME from engine.clients.lvd.config import LVD_COLLECTION_NAME
...@@ -36,3 +37,4 @@ class LVDConfigurator(BaseConfigurator): ...@@ -36,3 +37,4 @@ class LVDConfigurator(BaseConfigurator):
}, },
get_or_create=True, # If True, return the existing collection if it exists. get_or_create=True, # If True, return the existing collection if it exists.
) )
# LVD MODIFICATION END
# LVD MODIFICATION START
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from engine.base_client import IncompatibilityError from engine.base_client import IncompatibilityError
...@@ -40,3 +41,4 @@ class LVDConditionParser(BaseConditionParser): ...@@ -40,3 +41,4 @@ class LVDConditionParser(BaseConditionParser):
return { return {
field_name: value field_name: value
} }
# LVD MODIFICATION END
# LVD MODIFICATION START
from typing import List, Tuple from typing import List, Tuple
import requests import requests
from engine.base_client.search import BaseSearcher from engine.base_client.search import BaseSearcher
...@@ -44,16 +45,6 @@ class LVDSearcher(BaseSearcher): ...@@ -44,16 +45,6 @@ class LVDSearcher(BaseSearcher):
url = f"http://{cls.upload_host}:{cls.upload_port}/api/v1/collections/{cls.collection.id}/query" url = f"http://{cls.upload_host}:{cls.upload_port}/api/v1/collections/{cls.collection.id}/query"
res = requests.post(url, json=data, headers={}, verify=False) res = requests.post(url, json=data, headers={}, verify=False)
res = res.json() res = res.json()
# res = cls.collection.query(
# query_embeddings=[vector],
# include=["distances"],
# where=cls.parser.parse(meta_conditions),
# n_results=top,
# n_buckets=cls.search_params["n_buckets"],
# bruteforce_threshold=cls.search_params["bruteforce_threshold"],
# constraint_weight=cls.search_params["constraint_weight"],
# search_until_bucket_not_empty=,
# )
return list(zip(map(int, res.get("ids")[0]), res.get("distances")[0])) return list(zip(map(int, res.get("ids")[0]), res.get("distances")[0]))
# LVD MODIFICATION END
# LVD MODIFICATION START
from typing import List, Optional from typing import List, Optional
from engine.base_client.upload import BaseUploader from engine.base_client.upload import BaseUploader
import requests import requests
...@@ -53,3 +54,4 @@ class LVDUploader(BaseUploader): ...@@ -53,3 +54,4 @@ class LVDUploader(BaseUploader):
def delete_client(cls): def delete_client(cls):
if cls.client is not None: if cls.client is not None:
del cls.client del cls.client
# LVD MODIFICATION START
...@@ -22,7 +22,9 @@ class QdrantSearcher(BaseSearcher): ...@@ -22,7 +22,9 @@ class QdrantSearcher(BaseSearcher):
os.environ["GRPC_POLL_STRATEGY"] = "epoll,poll" os.environ["GRPC_POLL_STRATEGY"] = "epoll,poll"
cls.client: QdrantClient = QdrantClient( cls.client: QdrantClient = QdrantClient(
host, host,
# LVD MODIFICATION START
prefer_grpc=False, prefer_grpc=False,
# LVD MODIFICATION END
limits=httpx.Limits(max_connections=None, max_keepalive_connections=0), limits=httpx.Limits(max_connections=None, max_keepalive_connections=0),
**connection_params **connection_params
) )
......
...@@ -17,7 +17,9 @@ class QdrantUploader(BaseUploader): ...@@ -17,7 +17,9 @@ class QdrantUploader(BaseUploader):
def init_client(cls, host, distance, connection_params, upload_params): def init_client(cls, host, distance, connection_params, upload_params):
os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "true" os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "true"
os.environ["GRPC_POLL_STRATEGY"] = "epoll,poll" os.environ["GRPC_POLL_STRATEGY"] = "epoll,poll"
# LVD MODIFICATION START
cls.client = QdrantClient(host=host, prefer_grpc=False, **connection_params) cls.client = QdrantClient(host=host, prefer_grpc=False, **connection_params)
# LVD MODIFICATION END
cls.upload_params = upload_params cls.upload_params = upload_params
@classmethod @classmethod
......
# LVD MODIFICATION START
apiVersion: networking.k8s.io/v1 apiVersion: networking.k8s.io/v1
kind: Ingress kind: Ingress
metadata: metadata:
...@@ -75,4 +76,5 @@ spec: ...@@ -75,4 +76,5 @@ spec:
resources: resources:
limits: limits:
cpu: 1 cpu: 1
memory: 16Gi memory: 16Gi
\ No newline at end of file # LVD MODIFICATION END
# LVD MODIFICATION START
version: '3.9' version: '3.9'
services: services:
chroma: chroma:
image: chromadb/chroma image: chromadb/chroma
ports: ports:
- "8000:8000" - "8000:8000"
\ No newline at end of file # LVD MODIFICATION END
\ No newline at end of file
# LVD MODIFICATION START
apiVersion: networking.k8s.io/v1 apiVersion: networking.k8s.io/v1
kind: Ingress kind: Ingress
metadata: metadata:
...@@ -75,4 +76,5 @@ spec: ...@@ -75,4 +76,5 @@ spec:
resources: resources:
limits: limits:
cpu: 1 cpu: 1
memory: 16Gi memory: 16Gi
\ No newline at end of file # LVD MODIFICATION END
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment