databio · khoroshevskyi · Dec 22, 2025 · Dec 17, 2025 · Dec 19, 2025 · Dec 19, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.10-slim
+FROM python:3.13-slim
 LABEL authors="Oleksandr Khoroshevskyi, Nathan Sheffield"
 
 RUN apt-get update
@@ -29,9 +29,10 @@ RUN apt-get install -y build-essential
 RUN pip install uv
 
 # Install CPU-only pytorch, eliminating huge nvidia dependencies
-RUN pip install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
-RUN pip install https://github.com/pepkit/pipestat/archive/refs/heads/dev.zip
+#pip install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
+RUN uv pip install torch --index-url https://download.pytorch.org/whl/cpu --system
+# RUN uv pip install https://github.com/pepkit/pipestat/archive/refs/heads/dev.zip --system
 
 RUN uv pip install -r requirements/requirements-all.txt --no-cache-dir --system
 
-CMD ["uvicorn", "bedhost.main:app", "--host", "0.0.0.0", "--port", "80"]
+CMD ["uvicorn", "bedhost.main:app", "--host", "0.0.0.0", "--port", "80"]
diff --git a/bedhost/_version.py b/bedhost/_version.py
@@ -1 +1 @@
-__version__ = "0.12.0"
+__version__ = "0.12.1"
diff --git a/bedhost/routers/bed_api.py b/bedhost/routers/bed_api.py
@@ -65,7 +65,9 @@ async def get_example_bed_record():
     response_model=BedListResult,
 )
 async def list_beds(
-    limit: int = 1000,
+    limit: int = Query(
+        1000, ge=1, le=10000, description="Limit (1-10000), default 1000"
+    ),
     offset: int = 0,
     genome: str = Query(
         default=None, description="filter by genome of the bed file. e.g. 'hg38'"
@@ -77,6 +79,7 @@ async def list_beds(
     """
     Returns list of BED files in the database with optional filters.
     """
+
     return bbagent.bed.get_ids_list(
         limit=limit, offset=offset, genome=genome, bed_compliance=bed_compliance
     )
@@ -388,7 +391,7 @@ async def text_to_bed_search(
     assay: Optional[Union[str, None]] = None,
     limit: int = 10,
     offset: int = 0,
-    test_request: bool = test_query_parameter,
+    test_request: bool = test_query_parameter,  # needed for usage tracking in @count_requests
 ):
     """
     Search for a BedFile by a text query.
@@ -398,31 +401,8 @@ async def text_to_bed_search(
     """
 
     _LOGGER.info(
-        f"Searching for: '{query}' with limit='{limit}' and offset='{offset}' and genome='{genome}'"
+        f"Searching for: '{query}' with limit='{limit}' and offset='{offset}' and genome='{genome}' and assay='{assay}'"
     )
-    #
-    # # results_sql = bbagent.bed.sql_search(
-    # #     query, limit=round(limit / 2, 0), offset=round(offset / 2, 0)
-    # # )
-    # #
-    # # if results_sql.count > results_sql.offset:
-    # #     qdrant_offset = offset - results_sql.offset
-    # # else:
-    # #     qdrant_offset = offset - results_sql.count
-    # #
-    # # results_qdr = bbagent.bed.text_to_bed_search(
-    # #     query, limit=limit, offset=qdrant_offset - 1 if qdrant_offset > 0 else 0
-    # # )
-    # #
-    # # results = BedListSearchResult(
-    # #     count=results_qdr.count,
-    # #     limit=limit,
-    # #     offset=offset,
-    # #     results=(results_sql.results + results_qdr.results)[0:limit],
-    # # )
-    # query = query.strip()
-    #
-    # if not genome or genome == "hg38":
 
     spaceless_query = query.replace(" ", "")
     if len(spaceless_query) == 32 and spaceless_query == query:
@@ -483,17 +463,54 @@ async def text_to_bed_search(
             if result.count != 0:
                 return result
 
-    results = bbagent.bed.semantic_search(
+    # # Basic semantic search
+    # results = bbagent.bed.semantic_search(
+    #     query,
+    #     genome_alias=genome,
+    #     assay=assay,
+    #     limit=limit,
+    #     offset=offset,
+    # )
+
+    # # Hybrid search
+    results = bbagent.bed.hybrid_search(
         query,
         genome_alias=genome,
         assay=assay,
         limit=limit,
         offset=offset,
     )
+    return results
 
-    if results:
-        return results
-    raise HTTPException(status_code=404, detail="No records found")
+    # # # Bi-vec search
+    #
+    # # This is disabled for now, as it is sql search mix, which we don't want to mix
+    # # results_sql = bbagent.bed.sql_search(
+    # #     query, limit=round(limit / 2, 0), offset=round(offset / 2, 0)
+    # # )
+    # #
+    # # if results_sql.count > results_sql.offset:
+    # #     qdrant_offset = offset - results_sql.offset
+    # # else:
+    # #     qdrant_offset = offset - results_sql.count
+    # # results_qdr = bbagent.bed.text_to_bed_search(
+    # #     query, limit=limit, offset=qdrant_offset - 1 if qdrant_offset > 0 else 0
+    # # )
+    # # results = BedListSearchResult(
+    # #     count=results_qdr.count,
+    # #     limit=limit,
+    # #     offset=offset,
+    # # )
+    # # print("results:", results_qdr)
+    # #
+    # # raise HTTPException(status_code=404, detail="No records found")
+    #
+    #
+    # results_qdr = bbagent.bed.text_to_bed_search(
+    #     query, limit=limit, offset=offset
+    # )
+
+    return results_qdr
 
 
 @router.get(

diff --git a/deployment/config/api-dev.bedbase.org.yaml b/deployment/config/api-dev.bedbase.org.yaml
@@ -4,7 +4,8 @@ path:
   # region2vec: databio/r2v-pretrained-for-search
   region2vec: databio/r2v-encode-hg38
   vec2vec: 'databio/v2v-sentencetransformers-encode'
-  umap_model: "https://huggingface.co/databio/bedbase-umap/resolve/main/hg38_umap_umap_model.joblib"
+  umap_model: "https://huggingface.co/databio/bedbase-umap/resolve/main/hg38_umap_umap_model_3_13.joblib"
+  sparse_model: "prithivida/Splade_PP_en_v2"
 database:
   host: $POSTGRES_HOST
   port: 5432
@@ -17,6 +18,7 @@ qdrant:
   api_key: $QDRANT_API_KEY
   file_collection: bedbase
   text_collection: bed_text
+  hybrid_collection: bedbase_sparse_collection
 server:
   host: 0.0.0.0
   port: 8000

diff --git a/deployment/config/api.bedbase.org.yaml b/deployment/config/api.bedbase.org.yaml
@@ -1,10 +1,11 @@
 path:
   remote_url_base: http://data.bedbase.org/
-  text2vec: "sentence-transformers/all-MiniLM-L6-v2"
+  text2vec: 'sentence-transformers/all-MiniLM-L6-v2'
   # region2vec: databio/r2v-pretrained-for-search
   region2vec: databio/r2v-encode-hg38
-  vec2vec: "databio/v2v-sentencetransformers-encode"
-  umap_model: "https://huggingface.co/databio/bedbase-umap/resolve/main/hg38_umap_umap_model.joblib"
+  vec2vec: 'databio/v2v-sentencetransformers-encode'
+  umap_model: "https://huggingface.co/databio/bedbase-umap/resolve/main/hg38_umap_umap_model_3_13.joblib"
+  sparse_model: "prithivida/Splade_PP_en_v2"
 database:
   host: $POSTGRES_HOST
   port: 5432
@@ -17,6 +18,7 @@ qdrant:
   api_key: $QDRANT_API_KEY
   file_collection: bedbase
   text_collection: bed_text
+  hybrid_collection: bedbase_sparse_collection
 server:
   host: 0.0.0.0
   port: 8000

diff --git a/dev.Dockerfile b/dev.Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.10-slim
+FROM python:3.13-slim
 LABEL authors="Oleksandr Khoroshevskyi, Nathan Sheffield"
 
 RUN apt-get update
@@ -29,8 +29,9 @@ RUN apt-get install -y build-essential
 RUN pip install uv
 
 # Install CPU-only pytorch, eliminating huge nvidia dependencies
-RUN pip install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
-RUN pip install https://github.com/pepkit/pipestat/archive/refs/heads/dev.zip
+#pip install torch==2.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
+RUN uv pip install torch --index-url https://download.pytorch.org/whl/cpu --system
+# RUN uv pip install https://github.com/pepkit/pipestat/archive/refs/heads/dev.zip --system
 
 RUN uv pip install -r requirements/requirements-all.txt --no-cache-dir --system
 

diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
@@ -1,5 +1,5 @@
-# bbconf @ git+https://github.com/databio/bbconf.git@umap#egg=bbconf
-bbconf>=0.13.0
+# bbconf @ git+https://github.com/databio/bbconf.git@dev#egg=bbconf
+bbconf>=0.14.1
 fastapi>=0.103.0
 logmuse>=0.2.7
 markdown