Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.13-slim
LABEL authors="Oleksandr Khoroshevskyi, Nathan Sheffield"

RUN apt-get update
Expand Down Expand Up @@ -29,9 +29,10 @@ RUN apt-get install -y build-essential
RUN pip install uv

# Install CPU-only pytorch, eliminating huge nvidia dependencies
RUN pip install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
RUN pip install https://github.com/pepkit/pipestat/archive/refs/heads/dev.zip
#pip install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
RUN uv pip install torch --index-url https://download.pytorch.org/whl/cpu --system
# RUN uv pip install https://github.com/pepkit/pipestat/archive/refs/heads/dev.zip --system

RUN uv pip install -r requirements/requirements-all.txt --no-cache-dir --system

CMD ["uvicorn", "bedhost.main:app", "--host", "0.0.0.0", "--port", "80"]
CMD ["uvicorn", "bedhost.main:app", "--host", "0.0.0.0", "--port", "80"]
2 changes: 1 addition & 1 deletion bedhost/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.12.0"
__version__ = "0.12.1"
77 changes: 47 additions & 30 deletions bedhost/routers/bed_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ async def get_example_bed_record():
response_model=BedListResult,
)
async def list_beds(
limit: int = 1000,
limit: int = Query(
1000, ge=1, le=10000, description="Limit (1-10000), default 1000"
),
offset: int = 0,
genome: str = Query(
default=None, description="filter by genome of the bed file. e.g. 'hg38'"
Expand All @@ -77,6 +79,7 @@ async def list_beds(
"""
Returns list of BED files in the database with optional filters.
"""

return bbagent.bed.get_ids_list(
limit=limit, offset=offset, genome=genome, bed_compliance=bed_compliance
)
Expand Down Expand Up @@ -388,7 +391,7 @@ async def text_to_bed_search(
assay: Optional[Union[str, None]] = None,
limit: int = 10,
offset: int = 0,
test_request: bool = test_query_parameter,
test_request: bool = test_query_parameter, # needed for usage tracking in @count_requests
):
"""
Search for a BedFile by a text query.
Expand All @@ -398,31 +401,8 @@ async def text_to_bed_search(
"""

_LOGGER.info(
f"Searching for: '{query}' with limit='{limit}' and offset='{offset}' and genome='{genome}'"
f"Searching for: '{query}' with limit='{limit}' and offset='{offset}' and genome='{genome}' and assay='{assay}'"
)
#
# # results_sql = bbagent.bed.sql_search(
# # query, limit=round(limit / 2, 0), offset=round(offset / 2, 0)
# # )
# #
# # if results_sql.count > results_sql.offset:
# # qdrant_offset = offset - results_sql.offset
# # else:
# # qdrant_offset = offset - results_sql.count
# #
# # results_qdr = bbagent.bed.text_to_bed_search(
# # query, limit=limit, offset=qdrant_offset - 1 if qdrant_offset > 0 else 0
# # )
# #
# # results = BedListSearchResult(
# # count=results_qdr.count,
# # limit=limit,
# # offset=offset,
# # results=(results_sql.results + results_qdr.results)[0:limit],
# # )
# query = query.strip()
#
# if not genome or genome == "hg38":

spaceless_query = query.replace(" ", "")
if len(spaceless_query) == 32 and spaceless_query == query:
Expand Down Expand Up @@ -483,17 +463,54 @@ async def text_to_bed_search(
if result.count != 0:
return result

results = bbagent.bed.semantic_search(
# # Basic semantic search
# results = bbagent.bed.semantic_search(
# query,
# genome_alias=genome,
# assay=assay,
# limit=limit,
# offset=offset,
# )

# # Hybrid search
results = bbagent.bed.hybrid_search(
query,
genome_alias=genome,
assay=assay,
limit=limit,
offset=offset,
)
return results

if results:
return results
raise HTTPException(status_code=404, detail="No records found")
# # # Bi-vec search
#
# # This is disabled for now, as it is sql search mix, which we don't want to mix
# # results_sql = bbagent.bed.sql_search(
# # query, limit=round(limit / 2, 0), offset=round(offset / 2, 0)
# # )
# #
# # if results_sql.count > results_sql.offset:
# # qdrant_offset = offset - results_sql.offset
# # else:
# # qdrant_offset = offset - results_sql.count
# # results_qdr = bbagent.bed.text_to_bed_search(
# # query, limit=limit, offset=qdrant_offset - 1 if qdrant_offset > 0 else 0
# # )
# # results = BedListSearchResult(
# # count=results_qdr.count,
# # limit=limit,
# # offset=offset,
# # )
# # print("results:", results_qdr)
# #
# # raise HTTPException(status_code=404, detail="No records found")
#
#
# results_qdr = bbagent.bed.text_to_bed_search(
# query, limit=limit, offset=offset
# )

return results_qdr


@router.get(
Expand Down
4 changes: 3 additions & 1 deletion deployment/config/api-dev.bedbase.org.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ path:
# region2vec: databio/r2v-pretrained-for-search
region2vec: databio/r2v-encode-hg38
vec2vec: 'databio/v2v-sentencetransformers-encode'
umap_model: "https://huggingface.co/databio/bedbase-umap/resolve/main/hg38_umap_umap_model.joblib"
umap_model: "https://huggingface.co/databio/bedbase-umap/resolve/main/hg38_umap_umap_model_3_13.joblib"
sparse_model: "prithivida/Splade_PP_en_v2"
database:
host: $POSTGRES_HOST
port: 5432
Expand All @@ -17,6 +18,7 @@ qdrant:
api_key: $QDRANT_API_KEY
file_collection: bedbase
text_collection: bed_text
hybrid_collection: bedbase_sparse_collection
server:
host: 0.0.0.0
port: 8000
Expand Down
8 changes: 5 additions & 3 deletions deployment/config/api.bedbase.org.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
path:
remote_url_base: http://data.bedbase.org/
text2vec: "sentence-transformers/all-MiniLM-L6-v2"
text2vec: 'sentence-transformers/all-MiniLM-L6-v2'
# region2vec: databio/r2v-pretrained-for-search
region2vec: databio/r2v-encode-hg38
vec2vec: "databio/v2v-sentencetransformers-encode"
umap_model: "https://huggingface.co/databio/bedbase-umap/resolve/main/hg38_umap_umap_model.joblib"
vec2vec: 'databio/v2v-sentencetransformers-encode'
umap_model: "https://huggingface.co/databio/bedbase-umap/resolve/main/hg38_umap_umap_model_3_13.joblib"
sparse_model: "prithivida/Splade_PP_en_v2"
database:
host: $POSTGRES_HOST
port: 5432
Expand All @@ -17,6 +18,7 @@ qdrant:
api_key: $QDRANT_API_KEY
file_collection: bedbase
text_collection: bed_text
hybrid_collection: bedbase_sparse_collection
server:
host: 0.0.0.0
port: 8000
Expand Down
7 changes: 4 additions & 3 deletions dev.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.13-slim
LABEL authors="Oleksandr Khoroshevskyi, Nathan Sheffield"

RUN apt-get update
Expand Down Expand Up @@ -29,8 +29,9 @@ RUN apt-get install -y build-essential
RUN pip install uv

# Install CPU-only pytorch, eliminating huge nvidia dependencies
RUN pip install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
RUN pip install https://github.com/pepkit/pipestat/archive/refs/heads/dev.zip
#pip install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
RUN uv pip install torch --index-url https://download.pytorch.org/whl/cpu --system
# RUN uv pip install https://github.com/pepkit/pipestat/archive/refs/heads/dev.zip --system

RUN uv pip install -r requirements/requirements-all.txt --no-cache-dir --system

Expand Down
4 changes: 2 additions & 2 deletions requirements/requirements-all.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# bbconf @ git+https://github.com/databio/bbconf.git@umap#egg=bbconf
bbconf>=0.13.0
# bbconf @ git+https://github.com/databio/bbconf.git@dev#egg=bbconf
bbconf>=0.14.1
fastapi>=0.103.0
logmuse>=0.2.7
markdown
Expand Down