Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
83374b0
updated trackhub file for bedset
khoroshevskyi Nov 11, 2024
1358cb8
excluded from schema trackhub endpoint
khoroshevskyi Nov 11, 2024
8e15412
fix search table stretched link in safari
sanghoonio Nov 12, 2024
b76fc2a
1. Fixed #153
khoroshevskyi Nov 13, 2024
d465967
ui update for bed description
khoroshevskyi Nov 13, 2024
891e877
ui - disabled link if genome digest is not provided
khoroshevskyi Nov 13, 2024
082c766
Merge branch 'dev' into dev_sam
sanghoonio Nov 14, 2024
b43828a
bootstrap detail cleanup
sanghoonio Nov 14, 2024
86e9e37
address #153 and #154, and clean up more ui related details
sanghoonio Nov 14, 2024
52543cd
ui detail cleanup
sanghoonio Nov 15, 2024
1f9eb9d
description placeholder text for bed splash
sanghoonio Nov 15, 2024
9e999c8
bed sets table height consistency
sanghoonio Nov 15, 2024
44f1592
bedsets typo
sanghoonio Nov 15, 2024
0fc1b37
address comments
sanghoonio Nov 15, 2024
6afecec
Merge pull request #155 from databio/dev_sam
sanghoonio Nov 15, 2024
7b8c0d1
prettier
nleroy917 Nov 15, 2024
1c3c395
ui - work on mobile version
khoroshevskyi Nov 17, 2024
07f7f19
ui - work on mobile version 2
khoroshevskyi Nov 17, 2024
fafaf56
mobile improvement
sanghoonio Nov 18, 2024
d6a1f18
type error
sanghoonio Nov 18, 2024
ce3f370
header details
sanghoonio Nov 18, 2024
8ffd0c5
header details again
sanghoonio Nov 18, 2024
d62b1c7
fixed scroll bar
khoroshevskyi Nov 18, 2024
8267496
rolled back combined text search
khoroshevskyi Nov 18, 2024
94ff8da
mobile and safari ui improvements
sanghoonio Nov 18, 2024
2a2a962
ui merge
sanghoonio Nov 18, 2024
9bfee65
home page detail
sanghoonio Nov 18, 2024
2f16394
home page typo
sanghoonio Nov 18, 2024
ea59540
fix safari homepage again
sanghoonio Nov 18, 2024
4c83562
Added endpoint and ui modal that creates bedsets
khoroshevskyi Nov 21, 2024
2b3a231
bedset modal cleanup
sanghoonio Nov 22, 2024
fb440fa
Added id search if text2bed
khoroshevskyi Nov 25, 2024
a489543
Merge remote-tracking branch 'origin/bedset_creation' into bedset_cre…
khoroshevskyi Nov 25, 2024
4a4db2c
fixed types
khoroshevskyi Nov 25, 2024
0a5e8d5
update cart page details
sanghoonio Nov 25, 2024
fe964e6
bedset modal detail fix
sanghoonio Nov 25, 2024
3b75f43
bed splash header detail
sanghoonio Nov 25, 2024
d0f5a71
bed cart detail
sanghoonio Nov 25, 2024
9c9a357
work on adding ui for creating bedsets
khoroshevskyi Nov 25, 2024
4dfd439
Merge remote-tracking branch 'origin/bedset_creation' into bedset_cre…
khoroshevskyi Nov 25, 2024
d338779
Fixed error type
khoroshevskyi Nov 25, 2024
b71762b
Work on bedset erroring
khoroshevskyi Nov 26, 2024
d669d63
Added bedset metadata
khoroshevskyi Nov 26, 2024
94be5c4
Merge pull request #157 from databio/bedset_creation
khoroshevskyi Nov 26, 2024
babb5e1
bedset creation modal details
sanghoonio Nov 27, 2024
f80e2a8
some tests
nsheff Nov 27, 2024
876ce2d
Merge branch 'dev' of github.com:databio/bedhost into dev
nsheff Nov 27, 2024
f7b3675
Fixed #103
khoroshevskyi Dec 2, 2024
ea3c195
bedset header ui details
sanghoonio Dec 2, 2024
a676ad2
type error
sanghoonio Dec 2, 2024
2e3251f
fixed tooltip and lint
khoroshevskyi Dec 2, 2024
7d354a1
Merge remote-tracking branch 'origin/dev' into dev
khoroshevskyi Dec 2, 2024
08f2743
fixed tooltip
khoroshevskyi Dec 2, 2024
f772a81
added geniml version
khoroshevskyi Dec 3, 2024
ac59324
remove footer again from nonempty cart page
sanghoonio Dec 11, 2024
a41e47d
Added reference validation endpoint
khoroshevskyi Dec 19, 2024
ee8aee8
Merge remote-tracking branch 'origin/dev' into dev
khoroshevskyi Dec 19, 2024
dd09342
Fixed #156
khoroshevskyi Dec 19, 2024
0c31b81
FIxed previous commit
khoroshevskyi Dec 19, 2024
1215618
Fixed comments
khoroshevskyi Jan 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bedhost/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.8.0"
__version__ = "0.9.0"
4 changes: 4 additions & 0 deletions bedhost/data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,7 @@ class BaseListResponse(BaseModel):
limit: int
offset: int
results: list


class CreateBEDsetRequest(BaseModel):
registry_path: str
93 changes: 91 additions & 2 deletions bedhost/routers/bed_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
BEDFileNotFoundError,
TokenizeFileNotExistError,
)
from bbconf.models.bed_models import BedClassification # BedPEPHub,
from bbconf.models.bed_models import (
BedClassification, # BedPEPHub,
BedEmbeddingResult,
BedFiles,
BedListResult,
Expand All @@ -27,6 +27,8 @@
BedStatsModel,
TokenizedBedResponse,
TokenizedPathResponse,
QdrantSearchResult,
RefGenValidReturnModel,
)
from fastapi import APIRouter, File, HTTPException, Query, UploadFile
from fastapi.responses import PlainTextResponse
Expand Down Expand Up @@ -193,6 +195,27 @@ async def get_bed_pephub(
)


@router.get(
"/{bed_id}/neighbours",
summary="Get nearest neighbours for a single BED record",
response_model=BedListSearchResult,
response_model_by_alias=False,
description=f"Returns most similar BED files in the database. "
f"Example\n bed_id: {EXAMPLE_BED}",
)
async def get_bed_neighbours(
bed_id: str = BedDigest,
limit: int = 10,
offset: int = 0,
):
try:
return bbagent.bed.get_neighbours(bed_id, limit=limit, offset=offset)
except BEDFileNotFoundError as _:
raise HTTPException(
status_code=404,
)


@router.get(
"/{bed_id}/embedding",
summary="Get embeddings for a single BED record",
Expand Down Expand Up @@ -335,7 +358,52 @@ async def text_to_bed_search(query, limit: int = 10, offset: int = 0):
Example: query="cancer"
"""
_LOGGER.info(f"Searching for: {query}")
results = bbagent.bed.text_to_bed_search(query, limit=limit, offset=offset)

# results_sql = bbagent.bed.sql_search(
# query, limit=round(limit / 2, 0), offset=round(offset / 2, 0)
# )
#
# if results_sql.count > results_sql.offset:
# qdrant_offset = offset - results_sql.offset
# else:
# qdrant_offset = offset - results_sql.count
#
# results_qdr = bbagent.bed.text_to_bed_search(
# query, limit=limit, offset=qdrant_offset - 1 if qdrant_offset > 0 else 0
# )
#
# results = BedListSearchResult(
# count=results_qdr.count,
# limit=limit,
# offset=offset,
# results=(results_sql.results + results_qdr.results)[0:limit],
# )
spaceless_query = query.replace(" ", "")
if len(spaceless_query) == 32 and spaceless_query == query:
try:
similar_results = bbagent.bed.get_neighbours(
query, limit=limit, offset=offset
)

if similar_results.results and offset == 0:

result = QdrantSearchResult(
id=query,
payload={},
score=1.0,
metadata=bbagent.bed.get(query),
)

similar_results.results.insert(0, result)
return similar_results
except Exception as _:
pass

results = bbagent.bed.text_to_bed_search(
query,
limit=limit,
offset=offset,
)

if results:
return results
Expand Down Expand Up @@ -414,3 +482,24 @@ async def get_tokens(
status_code=404,
detail="Tokenized file not found",
)


@router.get(
"/{bed_id}/genome-stats",
summary="Get reference genome validation results",
response_model=RefGenValidReturnModel,
)
async def get_ref_gen_results(
bed_id: str,
):
"""
Return reference genome validation results for a bed file
Example: bed: 0dcdf8986a72a3d85805bbc9493a1302
"""
try:
return bbagent.bed.get_reference_validation(bed_id)
except BEDFileNotFoundError as _:
raise HTTPException(
status_code=404,
detail=f"Bed file {bed_id} not found",
)
88 changes: 73 additions & 15 deletions bedhost/routers/bedset_api.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import logging

from bbconf.exceptions import BedSetNotFoundError
from bbconf.exceptions import BedSetNotFoundError, BedSetTrackHubLimitError
from bbconf.models.bedset_models import (
BedSetBedFiles,
BedSetListResult,
BedSetMetadata,
BedSetPlots,
BedSetStats,
)
from pephubclient.helpers import is_registry_path, unwrap_registry_path
from fastapi import APIRouter, HTTPException, Request, Response

from ..const import EXAMPLE_BEDSET, PKG_NAME
from ..main import bbagent
from ..data_models import CreateBEDsetRequest
from ..utils import zip_pep

router = APIRouter(prefix="/v1/bedset", tags=["bedset"])
Expand Down Expand Up @@ -165,22 +167,78 @@ async def get_trackDb_file_bedset(bedset_id: str):
"""
Generate trackDb file for the BED set track hub
"""
# Response should be this type:
# trackDb_txt = (
# trackDb_txt + f"track\t {metadata.name}\n"
# "type\t bigBed\n"
# f"bigDataUrl\t {metadata.files.bigbed_file.access_methods[0].access_url.url} \n"
# f"shortLabel\t {metadata.name}\n"
# f"longLabel\t {metadata.description}\n"
# "visibility\t full\n\n"
# )
try:
trackDb_txt = bbagent.bedset.get_track_hub_file(bedset_id)
except BedSetTrackHubLimitError as _:
raise HTTPException(
status_code=400,
detail="Track hub limit reached. Please try smaller BEDset.",
)

return Response(trackDb_txt, media_type="text/plain")

hit = bbagent.bedset.get_bedset_bedfiles(bedset_id)

trackDb_txt = ""
for bed in hit.results:
metadata = bbagent.bed.get(bed.id, full=True)
@router.post(
"/create/",
description="Create a new bedset by providing registry path to the PEPhub project",
)
async def create_bedset(bedset: CreateBEDsetRequest):
"""
Create a new bedset
"""
# Validate the PEPhub project string
if not is_registry_path(bedset.registry_path):
raise HTTPException(status_code=406, detail="Invalid registry path")

project_reg_path = unwrap_registry_path(bedset.registry_path)

if metadata.files.bigbed_file:
if project_reg_path.namespace not in ["databio", "bedbase", "pepkit"]:
raise HTTPException(status_code=403, detail="User is not in admin list")

trackDb_txt = (
trackDb_txt + f"track\t {metadata.name}\n"
"type\t bigBed\n"
f"bigDataUrl\t {metadata.files.bigbed_file.access_methods[0].access_url.url} \n"
f"shortLabel\t {metadata.name}\n"
f"longLabel\t {metadata.description}\n"
"visibility\t full\n\n"
)
try:
project = bbagent.config.phc.load_project(bedset.registry_path)
except Exception as _:
raise HTTPException(
status_code=404, detail=f"Project: '{bedset.registry_path}' not found"
)

bedfiles_list = [
bedfile_id.get("record_identifier") or bedfile_id.sample_name
for bedfile_id in project.samples
]

if bbagent.bedset.exists(identifier=project.name):
raise HTTPException(
status_code=409,
detail=f"BEDset with identifier {project.name} already exists",
)

return Response(trackDb_txt, media_type="text/plain")
try:
bbagent.bedset.create(
identifier=project.name,
name=project.name,
bedid_list=bedfiles_list,
statistics=True,
description=project.description,
annotation={
"source": project.config.get("source", ""),
"author": project.config.get("author", project_reg_path.namespace),
},
no_fail=False,
overwrite=False,
)
except Exception as err:
raise HTTPException(
status_code=400, detail=f"Unable to create bedset. Error: {err}"
)

return {"status": "success"}
56 changes: 56 additions & 0 deletions interactive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import bbconf

bba = bbconf.BedBaseAgent("deployment/config/api-dev.bedbase.org.yaml")

bba.config._b2bsi = bba.config._init_b2bsi_object()
bba.config._r2v = bba.config._init_r2v_object()
bba.config._bivec = bba.config._init_bivec_object()


# Here's some code to test the BiVectorSearchInterface

from geniml.search.interfaces import BiVectorSearchInterface
from geniml.search.backends import BiVectorBackend

from geniml.search.query2vec import Text2Vec

search_backend = BiVectorBackend(
metadata_backend=self._qdrant_text_engine, bed_backend=self._qdrant_engine
)

t2v = Text2Vec("sentence-transformers/all-MiniLM-L6-v2", v2v=None)

bvsi = BiVectorSearchInterface()

from langchain_huggingface.embeddings import HuggingFaceEmbeddings
import logging
from typing import Union

import numpy as np
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

from geniml.text2bednn import Vec2VecFNN
from geniml.search.query2vec.abstract import Query2Vec

# culprit:
te = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Testing the sentence transformers:


from sentence_transformers import SentenceTransformer

sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = model.encode(sentences)
print(embeddings)


from fastembed import TextEmbedding

model = TextEmbedding(
model_name="sentence-transformers/all-MiniLM-L6-v2", max_length=512
)
sentences = ["This is an example sentence", "Each sentence is converted"]
embeddings = list(model.embed(sentences))
4 changes: 2 additions & 2 deletions requirements/requirements-all.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# bbconf @ git+https://github.com/databio/bbconf.git@dev#egg=bbconf
bbconf>=0.9.0
bbconf>=0.10.0
fastapi>=0.103.0
logmuse>=0.2.7
markdown
Expand All @@ -9,4 +9,4 @@ uvicorn
yacman>=0.9.2
pephubclient>=0.4.1
psycopg[binary,pool]
python-multipart>=0.0.9
python-multipart>=0.0.9
Loading
Loading