diff --git a/app/actors/YBrowseVariantUpdateActor.scala b/app/actors/YBrowseVariantUpdateActor.scala index 8549b65..8b1e64c 100644 --- a/app/actors/YBrowseVariantUpdateActor.scala +++ b/app/actors/YBrowseVariantUpdateActor.scala @@ -5,10 +5,9 @@ import org.apache.pekko.actor.Actor import play.api.Logging import services.genomics.YBrowseVariantIngestionService -import java.io.{BufferedInputStream, BufferedReader, FileOutputStream, InputStreamReader} +import java.io.{BufferedInputStream, FileOutputStream} import java.net.{HttpURLConnection, URI} import java.nio.file.Files -import java.util.zip.{GZIPInputStream, GZIPOutputStream} import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success, Try} @@ -68,180 +67,81 @@ class YBrowseVariantUpdateActor @javax.inject.Inject()( private def runUpdate(): Future[UpdateResult] = { Future { - downloadVcfFile() + downloadGffFile() }.flatMap { case Success(_) => - logger.info("VCF file downloaded successfully, sanitizing VCF") - Future(sanitizeVcfFile()).flatMap { - case Success(skipped) => - logger.info(s"VCF sanitized (removed $skipped malformed records), starting ingestion") - ingestionService.ingestVcf(genomicsConfig.ybrowseVcfStoragePath).map { count => - UpdateResult(success = true, variantsIngested = count, s"Successfully ingested $count variants (skipped $skipped malformed records)") - } - case Failure(ex) => - Future.successful(UpdateResult(success = false, variantsIngested = 0, s"Sanitization failed: ${ex.getMessage}")) + logger.info("GFF file downloaded successfully, starting ingestion") + ingestionService.ingestGff(genomicsConfig.ybrowseGffStoragePath).map { count => + UpdateResult(success = true, variantsIngested = count, s"Successfully ingested $count variants from GFF") } case Failure(ex) => Future.successful(UpdateResult(success = false, variantsIngested = 0, s"Download failed: ${ex.getMessage}")) } } - private def downloadVcfFile(): Try[Unit] = Try { - val url = URI.create(genomicsConfig.ybrowseVcfUrl).toURL - val targetFile = genomicsConfig.ybrowseVcfStoragePath - - // Ensure parent directory exists - val parentDir = targetFile.getParentFile - if (parentDir != null && !parentDir.exists()) { - Files.createDirectories(parentDir.toPath) - logger.info(s"Created directory: ${parentDir.getAbsolutePath}") - } - - // Download to a temp file first, then rename (atomic operation) - val tempFile = new java.io.File(targetFile.getAbsolutePath + ".tmp") - - logger.info(s"Downloading VCF from ${genomicsConfig.ybrowseVcfUrl} to ${tempFile.getAbsolutePath}") + private def downloadGffFile(): Try[Unit] = Try { + val url = URI.create(genomicsConfig.ybrowseGffUrl).toURL + val targetFile = genomicsConfig.ybrowseGffStoragePath + + // Check for fresh local file (cache for 24 hours) + val cacheDuration = 24 * 60 * 60 * 1000L // 24 hours in millis + if (targetFile.exists() && (System.currentTimeMillis() - targetFile.lastModified() < cacheDuration)) { + logger.info(s"Local GFF file is fresh (< 24 hours old), skipping download: ${targetFile.getAbsolutePath}") + } else { + // Ensure parent directory exists + val parentDir = targetFile.getParentFile + if (parentDir != null && !parentDir.exists()) { + Files.createDirectories(parentDir.toPath) + logger.info(s"Created directory: ${parentDir.getAbsolutePath}") + } - val connection = url.openConnection().asInstanceOf[HttpURLConnection] - connection.setRequestMethod("GET") - connection.setConnectTimeout(30000) // 30 seconds - connection.setReadTimeout(300000) // 5 minutes for large file + // Download to a temp file first, then rename (atomic operation) + val tempFile = new java.io.File(targetFile.getAbsolutePath + ".tmp") - try { - val responseCode = connection.getResponseCode - if (responseCode != HttpURLConnection.HTTP_OK) { - throw new RuntimeException(s"HTTP request failed with status $responseCode") - } + logger.info(s"Downloading GFF from ${genomicsConfig.ybrowseGffUrl} to ${tempFile.getAbsolutePath}") - val inputStream = new BufferedInputStream(connection.getInputStream) - val outputStream = new FileOutputStream(tempFile) + val connection = url.openConnection().asInstanceOf[HttpURLConnection] + connection.setRequestMethod("GET") + connection.setConnectTimeout(30000) // 30 seconds + connection.setReadTimeout(300000) // 5 minutes for large file try { - val buffer = new Array[Byte](8192) - var bytesRead = 0 - var totalBytes = 0L - - while ({ bytesRead = inputStream.read(buffer); bytesRead != -1 }) { - outputStream.write(buffer, 0, bytesRead) - totalBytes += bytesRead + val responseCode = connection.getResponseCode + if (responseCode != HttpURLConnection.HTTP_OK) { + throw new RuntimeException(s"HTTP request failed with status $responseCode") } - logger.info(s"Downloaded $totalBytes bytes") - } finally { - inputStream.close() - outputStream.close() - } - - // Atomic rename - if (targetFile.exists()) { - targetFile.delete() - } - if (!tempFile.renameTo(targetFile)) { - throw new RuntimeException(s"Failed to rename temp file to ${targetFile.getAbsolutePath}") - } + val inputStream = new BufferedInputStream(connection.getInputStream) + val outputStream = new FileOutputStream(tempFile) - logger.info(s"VCF file saved to ${targetFile.getAbsolutePath}") - } finally { - connection.disconnect() - } - } + try { + val buffer = new Array[Byte](8192) + var bytesRead = 0 + var totalBytes = 0L - /** - * Sanitizes the VCF file by removing malformed records that HTSJDK cannot parse. - * Specifically filters out records with duplicate alleles (REF == ALT or duplicate ALT alleles). - * - * @return Try containing the number of skipped records - */ - private def sanitizeVcfFile(): Try[Int] = Try { - val sourceFile = genomicsConfig.ybrowseVcfStoragePath - val tempFile = new java.io.File(sourceFile.getAbsolutePath + ".sanitized.tmp") - - logger.info(s"Sanitizing VCF file: ${sourceFile.getAbsolutePath}") - - val inputStream = new BufferedReader( - new InputStreamReader( - new GZIPInputStream( - new BufferedInputStream( - new java.io.FileInputStream(sourceFile) - ) - ) - ) - ) - - val outputStream = new java.io.PrintWriter( - new java.io.OutputStreamWriter( - new GZIPOutputStream( - new FileOutputStream(tempFile) - ) - ) - ) - - var skippedCount = 0 - var lineNumber = 0 - - try { - var line: String = null - while ({ line = inputStream.readLine(); line != null }) { - lineNumber += 1 - if (line.startsWith("#")) { - // Header line - pass through - outputStream.println(line) - } else { - // Data line - check for duplicate alleles - if (isValidVcfDataLine(line)) { - outputStream.println(line) - } else { - skippedCount += 1 - if (skippedCount <= 10) { - logger.warn(s"Skipping malformed VCF record at line $lineNumber: ${line.take(100)}...") - } + while ({ bytesRead = inputStream.read(buffer); bytesRead != -1 }) { + outputStream.write(buffer, 0, bytesRead) + totalBytes += bytesRead } + + logger.info(s"Downloaded $totalBytes bytes") + } finally { + inputStream.close() + outputStream.close() } - } - if (skippedCount > 10) { - logger.warn(s"Skipped ${skippedCount - 10} additional malformed records (warnings suppressed)") - } - } finally { - inputStream.close() - outputStream.close() - } + // Atomic rename + if (targetFile.exists()) { + targetFile.delete() + } + if (!tempFile.renameTo(targetFile)) { + throw new RuntimeException(s"Failed to rename temp file to ${targetFile.getAbsolutePath}") + } - // Replace original with sanitized version - if (sourceFile.exists()) { - sourceFile.delete() - } - if (!tempFile.renameTo(sourceFile)) { - throw new RuntimeException(s"Failed to rename sanitized file to ${sourceFile.getAbsolutePath}") + logger.info(s"GFF file saved to ${targetFile.getAbsolutePath}") + } finally { + connection.disconnect() + } } - - logger.info(s"VCF sanitization complete. Processed $lineNumber lines, skipped $skippedCount malformed records.") - skippedCount - } - - /** - * Validates a VCF data line for common issues that break HTSJDK parsing. - * Checks for: - * - Duplicate alleles (REF appearing in ALT, or duplicate ALT alleles) - * - Empty required fields - */ - private def isValidVcfDataLine(line: String): Boolean = { - val fields = line.split("\t", 6) // Only need first 5 fields: CHROM, POS, ID, REF, ALT - if (fields.length < 5) return false - - val ref = fields(3).toUpperCase - val altField = fields(4) - - // Handle missing ALT (just ".") - if (altField == ".") return true - - val alts = altField.split(",").map(_.toUpperCase) - - // Check for duplicate alleles - val allAlleles = ref +: alts - val uniqueAlleles = allAlleles.distinct - - // If we have fewer unique alleles than total, there are duplicates - uniqueAlleles.length == allAlleles.length } } diff --git a/app/config/FeatureFlags.scala b/app/config/FeatureFlags.scala index 3520042..eb9dba5 100644 --- a/app/config/FeatureFlags.scala +++ b/app/config/FeatureFlags.scala @@ -17,4 +17,9 @@ class FeatureFlags @Inject()(config: Configuration) { * Disabled by default until age data is populated. */ val showBranchAgeEstimates: Boolean = featuresConfig.getOptional[Boolean]("tree.showBranchAgeEstimates").getOrElse(false) + + /** + * Show the alternative "Block Layout" (ytree.net style) for the tree. + */ + val showVerticalTree: Boolean = featuresConfig.getOptional[Boolean]("tree.showVerticalTree").getOrElse(false) } diff --git a/app/config/GenomicsConfig.scala b/app/config/GenomicsConfig.scala index 41dfa19..69af78d 100644 --- a/app/config/GenomicsConfig.scala +++ b/app/config/GenomicsConfig.scala @@ -22,8 +22,8 @@ class GenomicsConfig @Inject()(config: Configuration) { } // YBrowse configuration - val ybrowseVcfUrl: String = genomicsConfig.get[String]("ybrowse.vcf_url") - val ybrowseVcfStoragePath: File = new File(genomicsConfig.get[String]("ybrowse.vcf_storage_path")) + val ybrowseGffUrl: String = genomicsConfig.get[String]("ybrowse.gff_url") + val ybrowseGffStoragePath: File = new File(genomicsConfig.get[String]("ybrowse.gff_storage_path")) /** * Retrieves the path to a liftover chain file for a given source and target genome. diff --git a/app/controllers/CuratorController.scala b/app/controllers/CuratorController.scala index 4991084..b0f2a2d 100644 --- a/app/controllers/CuratorController.scala +++ b/app/controllers/CuratorController.scala @@ -3,16 +3,16 @@ package controllers import actions.{AuthenticatedAction, AuthenticatedRequest, PermissionAction} import jakarta.inject.{Inject, Singleton} import models.HaplogroupType -import models.dal.domain.genomics.Variant -import models.domain.genomics.{VariantGroup, VariantWithContig} +import models.domain.genomics.{MutationType, NamingStatus, PointVariantCoordinates, VariantAliases, VariantV2} import models.domain.haplogroups.Haplogroup import org.webjars.play.WebJarsUtil import play.api.Logging import play.api.data.Form import play.api.data.Forms.* +import play.api.libs.json.Json import play.api.i18n.I18nSupport import play.api.mvc.* -import repositories.{GenbankContigRepository, HaplogroupCoreRepository, HaplogroupVariantRepository, VariantAliasRepository, VariantRepository} +import repositories.{GenbankContigRepository, HaplogroupCoreRepository, HaplogroupVariantRepository, VariantV2Repository} import services.{CuratorAuditService, TreeRestructuringService} import services.genomics.YBrowseVariantIngestionService @@ -47,7 +47,8 @@ case class CreateHaplogroupFormData( ) case class VariantFormData( - genbankContigId: Int, + refGenome: String, + contig: String, position: Int, referenceAllele: String, alternateAllele: String, @@ -62,7 +63,7 @@ case class SplitBranchFormData( description: Option[String], source: String, confidenceLevel: String, - variantGroupKeys: Seq[String], + variantIds: Seq[Int], childIds: Seq[Int] ) @@ -72,8 +73,7 @@ class CuratorController @Inject()( authenticatedAction: AuthenticatedAction, permissionAction: PermissionAction, haplogroupRepository: HaplogroupCoreRepository, - variantRepository: VariantRepository, - variantAliasRepository: VariantAliasRepository, + variantV2Repository: VariantV2Repository, haplogroupVariantRepository: HaplogroupVariantRepository, genbankContigRepository: GenbankContigRepository, auditService: CuratorAuditService, @@ -107,14 +107,15 @@ class CuratorController @Inject()( private val variantForm: Form[VariantFormData] = Form( mapping( - "genbankContigId" -> number, + "refGenome" -> nonEmptyText.verifying("Invalid reference genome", r => Seq("hs1", "GRCh38", "GRCh37").contains(r)), + "contig" -> nonEmptyText(1, 50), "position" -> number, "referenceAllele" -> nonEmptyText(1, 1000), "alternateAllele" -> nonEmptyText(1, 1000), - "variantType" -> nonEmptyText(1, 50), + "variantType" -> nonEmptyText.verifying("Invalid variant type", t => MutationType.fromString(t).isDefined), "rsId" -> optional(text(maxLength = 50)), "commonName" -> optional(text(maxLength = 100)) - )(VariantFormData.apply)(v => Some((v.genbankContigId, v.position, v.referenceAllele, v.alternateAllele, v.variantType, v.rsId, v.commonName))) + )(VariantFormData.apply)(v => Some((v.refGenome, v.contig, v.position, v.referenceAllele, v.alternateAllele, v.variantType, v.rsId, v.commonName))) ) private val splitBranchForm: Form[SplitBranchFormData] = Form( @@ -124,9 +125,9 @@ class CuratorController @Inject()( "description" -> optional(text(maxLength = 2000)), "source" -> nonEmptyText(1, 100), "confidenceLevel" -> nonEmptyText(1, 50), - "variantGroupKeys" -> seq(text), + "variantIds" -> seq(number), "childIds" -> seq(number) - )(SplitBranchFormData.apply)(s => Some((s.name, s.lineage, s.description, s.source, s.confidenceLevel, s.variantGroupKeys, s.childIds))) + )(SplitBranchFormData.apply)(s => Some((s.name, s.lineage, s.description, s.source, s.confidenceLevel, s.variantIds, s.childIds))) ) private val createHaplogroupFormMapping: Form[CreateHaplogroupFormData] = Form( @@ -148,7 +149,7 @@ class CuratorController @Inject()( for { yCount <- haplogroupRepository.countByType(HaplogroupType.Y) mtCount <- haplogroupRepository.countByType(HaplogroupType.MT) - variantCount <- variantRepository.count(None) + variantCount <- variantV2Repository.count(None) } yield { Ok(views.html.curator.dashboard(yCount, mtCount, variantCount)) } @@ -157,20 +158,8 @@ class CuratorController @Inject()( // === Haplogroups === def listHaplogroups(query: Option[String], hgType: Option[String], page: Int, pageSize: Int): Action[AnyContent] = - withPermission("haplogroup.view").async { implicit request => - val haplogroupType = hgType.flatMap(HaplogroupType.fromString) - val offset = (page - 1) * pageSize - - for { - haplogroups <- query match { - case Some(q) if q.nonEmpty => haplogroupRepository.search(q, haplogroupType, pageSize, offset) - case _ => haplogroupRepository.search("", haplogroupType, pageSize, offset) - } - totalCount <- haplogroupRepository.count(query.filter(_.nonEmpty), haplogroupType) - } yield { - val totalPages = Math.max(1, (totalCount + pageSize - 1) / pageSize) - Ok(views.html.curator.haplogroups.list(haplogroups, query, hgType, page, totalPages, pageSize)) - } + withPermission("haplogroup.view") { implicit request => + Ok(views.html.curator.haplogroups.list(query, hgType, pageSize)) } def haplogroupsFragment(query: Option[String], hgType: Option[String], page: Int, pageSize: Int): Action[AnyContent] = @@ -199,11 +188,9 @@ class CuratorController @Inject()( variants <- haplogroupVariantRepository.getHaplogroupVariants(id) history <- auditService.getHaplogroupHistory(id) } yield { - val variantsWithContig = variants.map { case (v, c) => VariantWithContig(v, c) } - val variantGroups = variantRepository.groupVariants(variantsWithContig) haplogroupOpt match { case Some(haplogroup) => - Ok(views.html.curator.haplogroups.detailPanel(haplogroup, parentOpt, children, variantGroups, history)) + Ok(views.html.curator.haplogroups.detailPanel(haplogroup, parentOpt, children, variants, history)) case None => NotFound("Haplogroup not found") } @@ -433,47 +420,31 @@ class CuratorController @Inject()( // === Variants === def listVariants(query: Option[String], page: Int, pageSize: Int): Action[AnyContent] = - withPermission("variant.view").async { implicit request => - val offset = (page - 1) * pageSize - for { - (variantGroups, totalCount) <- variantRepository.searchGroupedPaginated(query.getOrElse(""), offset, pageSize) - } yield { - val totalPages = Math.max(1, (totalCount + pageSize - 1) / pageSize) - Ok(views.html.curator.variants.list(variantGroups, query, page, totalPages, pageSize, totalCount)) - } + withPermission("variant.view") { implicit request => + Ok(views.html.curator.variants.list(query, pageSize)) } def variantsFragment(query: Option[String], page: Int, pageSize: Int): Action[AnyContent] = withPermission("variant.view").async { implicit request => val offset = (page - 1) * pageSize for { - (variantGroups, totalCount) <- variantRepository.searchGroupedPaginated(query.getOrElse(""), offset, pageSize) + (variants, totalCount) <- variantV2Repository.searchPaginated(query.getOrElse(""), offset, pageSize) } yield { val totalPages = Math.max(1, (totalCount + pageSize - 1) / pageSize) - Ok(views.html.curator.variants.listFragment(variantGroups, query, page, totalPages, pageSize, totalCount)) + Ok(views.html.curator.variants.listFragment(variants, query, page, totalPages, pageSize, totalCount)) } } def variantDetailPanel(id: Int): Action[AnyContent] = withPermission("variant.view").async { implicit request => for { - variantOpt <- variantRepository.findByIdWithContig(id) - // Get all variants in the same group - allVariantsInGroup <- variantOpt match { - case Some(vwc) => - val groupKey = vwc.variant.commonName.orElse(vwc.variant.rsId).getOrElse(s"variant_${id}") - variantRepository.getVariantsByGroupKey(groupKey) - case None => Future.successful(Seq.empty) - } - // Fetch aliases for this variant - aliases <- variantAliasRepository.findByVariantId(id) + variantOpt <- variantV2Repository.findById(id) haplogroups <- haplogroupVariantRepository.getHaplogroupsByVariant(id) history <- auditService.getVariantHistory(id) } yield { variantOpt match { - case Some(variantWithContig) => - val variantGroup = variantRepository.groupVariants(allVariantsInGroup).headOption - Ok(views.html.curator.variants.detailPanel(variantWithContig, variantGroup, aliases, haplogroups, history)) + case Some(variant) => + Ok(views.html.curator.variants.detailPanel(variant, haplogroups, history)) case None => NotFound("Variant not found") } @@ -481,65 +452,52 @@ class CuratorController @Inject()( } def createVariantForm: Action[AnyContent] = - withPermission("variant.create").async { implicit request => - genbankContigRepository.getYAndMtContigs.map { contigs => - Ok(views.html.curator.variants.createForm(variantForm, contigs)) - } + withPermission("variant.create") { implicit request => + Ok(views.html.curator.variants.createForm(variantForm)) } def createVariant: Action[AnyContent] = withPermission("variant.create").async { implicit request => variantForm.bindFromRequest().fold( formWithErrors => { - genbankContigRepository.getYAndMtContigs.map { contigs => - BadRequest(views.html.curator.variants.createForm(formWithErrors, contigs)) - } + Future.successful(BadRequest(views.html.curator.variants.createForm(formWithErrors))) }, data => { - val variant = Variant( - variantId = None, - genbankContigId = data.genbankContigId, - position = data.position, - referenceAllele = data.referenceAllele, - alternateAllele = data.alternateAllele, - variantType = data.variantType, - rsId = data.rsId, - commonName = data.commonName + val coordinates = Json.obj( + data.refGenome -> Json.toJson(PointVariantCoordinates( + contig = data.contig, + position = data.position, + ref = data.referenceAllele.toUpperCase, + alt = data.alternateAllele.toUpperCase + )) + ) + + val aliases = (data.commonName, data.rsId) match { + case (Some(name), Some(rs)) => + Json.toJson(VariantAliases(commonNames = Seq(name), rsIds = Seq(rs))) + case (Some(name), None) => + Json.toJson(VariantAliases(commonNames = Seq(name))) + case (None, Some(rs)) => + Json.toJson(VariantAliases(rsIds = Seq(rs))) + case _ => + Json.obj() + } + + val variant = VariantV2( + canonicalName = data.commonName, + mutationType = MutationType.fromStringOrDefault(data.variantType), + namingStatus = if (data.commonName.isDefined) NamingStatus.Named else NamingStatus.Unnamed, + aliases = aliases, + coordinates = coordinates ) for { - // Create the source variant - newId <- variantRepository.createVariant(variant) - createdVariant = variant.copy(variantId = Some(newId)) + createdId <- variantV2Repository.create(variant) + createdVariant = variant.copy(variantId = Some(createdId)) _ <- auditService.logVariantCreate(request.user.id.get, createdVariant, Some("Created via curator interface")) - - // Get the source contig for liftover - sourceContigOpt <- genbankContigRepository.findById(data.genbankContigId) - - // Attempt liftover to other reference genomes - liftedCount <- sourceContigOpt match { - case Some(sourceContig) => - variantIngestionService.liftoverVariant(createdVariant, sourceContig).flatMap { liftedVariants => - if (liftedVariants.nonEmpty) { - logger.info(s"Lifting variant ${data.commonName.getOrElse("unnamed")} to ${liftedVariants.size} other reference(s)") - // Create or find each lifted variant - variantRepository.findOrCreateVariantsBatch(liftedVariants).map(_.size) - } else { - Future.successful(0) - } - } - case None => - logger.warn(s"Source contig ${data.genbankContigId} not found for liftover") - Future.successful(0) - } } yield { - val message = if (liftedCount > 0) { - s"Variant created successfully. Also lifted to $liftedCount other reference genome(s)." - } else { - s"Variant created successfully. (Liftover to other references not available or failed)" - } Redirect(routes.CuratorController.listVariants(None, 1, 20)) - .flashing("success" -> message) + .flashing("success" -> s"Variant ${createdVariant.displayName} created successfully") } } ) @@ -547,21 +505,32 @@ class CuratorController @Inject()( def editVariantForm(id: Int): Action[AnyContent] = withPermission("variant.update").async { implicit request => - variantRepository.findByIdWithContig(id).map { - case Some(vwc) => - val variant = vwc.variant - val formData = VariantFormData( - genbankContigId = variant.genbankContigId, - position = variant.position, - referenceAllele = variant.referenceAllele, - alternateAllele = variant.alternateAllele, - variantType = variant.variantType, - rsId = variant.rsId, - commonName = variant.commonName - ) - // Display contig as "accession (commonName / refGenome)" e.g., "CP068255.2 (chrX / hs1)" - val contigDisplay = s"${vwc.contig.accession} (${vwc.contig.commonName.getOrElse("?")} / ${vwc.contig.referenceGenome.getOrElse("?")})" - Ok(views.html.curator.variants.editForm(id, variantForm.fill(formData), contigDisplay)) + variantV2Repository.findById(id).map { + case Some(variant) => + // Get the primary reference genome coordinates (prefer hs1) + val refGenome = variant.availableReferences.find(_ == "hs1") + .orElse(variant.availableReferences.headOption) + .getOrElse("hs1") + + val coords = variant.getCoordinates(refGenome) + val contig = coords.flatMap(c => (c \ "contig").asOpt[String]).getOrElse("") + val position = coords.flatMap(c => (c \ "position").asOpt[Int]).getOrElse(0) + val ref = coords.flatMap(c => (c \ "ref").asOpt[String]).getOrElse("") + val alt = coords.flatMap(c => (c \ "alt").asOpt[String]).getOrElse("") + + val filledForm = variantForm.fill(VariantFormData( + refGenome = refGenome, + contig = contig, + position = position, + referenceAllele = ref, + alternateAllele = alt, + variantType = variant.mutationType.dbValue, + rsId = variant.rsIds.headOption, + commonName = variant.canonicalName + )) + + Ok(views.html.curator.variants.editForm(id, filledForm, s"$refGenome:$contig")) + case None => NotFound("Variant not found") } @@ -569,115 +538,74 @@ class CuratorController @Inject()( def updateVariant(id: Int): Action[AnyContent] = withPermission("variant.update").async { implicit request => - variantRepository.findByIdWithContig(id).flatMap { - case Some(vwc) => - val oldVariant = vwc.variant - val contigDisplay = s"${vwc.contig.accession} (${vwc.contig.commonName.getOrElse("?")} / ${vwc.contig.referenceGenome.getOrElse("?")})" - variantForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(views.html.curator.variants.editForm(id, formWithErrors, contigDisplay))) - }, - data => { - val updatedVariant = oldVariant.copy( - variantType = data.variantType, - rsId = data.rsId, - commonName = data.commonName + variantForm.bindFromRequest().fold( + formWithErrors => { + Future.successful(BadRequest(views.html.curator.variants.editForm(id, formWithErrors, ""))) + }, + data => { + variantV2Repository.findById(id).flatMap { + case None => + Future.successful(NotFound("Variant not found")) + + case Some(existing) => + // Update editable fields (metadata only - coordinates are immutable after creation) + val updatedAliases = (data.commonName, data.rsId) match { + case (Some(name), Some(rs)) => + Json.toJson(VariantAliases(commonNames = Seq(name), rsIds = Seq(rs))) + case (Some(name), None) => + Json.toJson(VariantAliases(commonNames = Seq(name))) + case (None, Some(rs)) => + Json.toJson(VariantAliases(rsIds = Seq(rs))) + case _ => + existing.aliases + } + + val updated = existing.copy( + canonicalName = data.commonName.orElse(existing.canonicalName), + mutationType = MutationType.fromStringOrDefault(data.variantType), + namingStatus = if (data.commonName.isDefined) NamingStatus.Named else existing.namingStatus, + aliases = updatedAliases ) for { - updated <- variantRepository.update(updatedVariant) - _ <- if (updated) { - auditService.logVariantUpdate(request.user.id.get, oldVariant, updatedVariant, Some("Updated via curator interface")) + success <- variantV2Repository.update(updated) + _ <- if (success) { + auditService.logVariantUpdate(request.user.id.get, existing, updated, Some("Updated via curator interface")) } else { Future.successful(()) } } yield { - if (updated) { + if (success) { Redirect(routes.CuratorController.listVariants(None, 1, 20)) - .flashing("success" -> "Variant updated successfully") + .flashing("success" -> s"Variant ${updated.displayName} updated successfully") } else { BadRequest("Failed to update variant") } } - } - ) - case None => - Future.successful(NotFound("Variant not found")) - } + } + } + ) } + // Variant groups are obsolete - VariantV2 is already consolidated def editVariantGroupForm(groupKey: String): Action[AnyContent] = - withPermission("variant.update").async { implicit request => - variantRepository.getVariantsByGroupKey(groupKey).map { variants => - if (variants.isEmpty) { - NotFound("Variant group not found") - } else { - val variantGroup = variantRepository.groupVariants(variants).head - // Use shared values from group for form - val formData = VariantFormData( - genbankContigId = variants.head.variant.genbankContigId, - position = variants.head.variant.position, - referenceAllele = variants.head.variant.referenceAllele, - alternateAllele = variants.head.variant.alternateAllele, - variantType = variants.head.variant.variantType, - rsId = variantGroup.rsId, - commonName = variantGroup.commonName - ) - Ok(views.html.curator.variants.editGroupForm(groupKey, variantGroup, variantForm.fill(formData))) - } - } + withPermission("variant.update") { implicit request => + Redirect(routes.CuratorController.listVariants(Some(groupKey), 1, 20)) + .flashing("info" -> "Variant groups have been replaced with consolidated variants. Edit each variant directly.") } def updateVariantGroup(groupKey: String): Action[AnyContent] = - withPermission("variant.update").async { implicit request => - variantRepository.getVariantsByGroupKey(groupKey).flatMap { variants => - if (variants.isEmpty) { - Future.successful(NotFound("Variant group not found")) - } else { - val variantGroup = variantRepository.groupVariants(variants).head - variantForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(views.html.curator.variants.editGroupForm(groupKey, variantGroup, formWithErrors))) - }, - data => { - // Update all variants in the group with the shared fields - val updateFutures = variants.map { vwc => - val oldVariant = vwc.variant - val updatedVariant = oldVariant.copy( - variantType = data.variantType, - rsId = data.rsId, - commonName = data.commonName - ) - for { - updated <- variantRepository.update(updatedVariant) - _ <- if (updated) { - auditService.logVariantUpdate(request.user.id.get, oldVariant, updatedVariant, Some(s"Updated via group edit ($groupKey)")) - } else { - Future.successful(()) - } - } yield updated - } - - Future.sequence(updateFutures).map { results => - if (results.forall(identity)) { - Redirect(routes.CuratorController.listVariants(None, 1, 20)) - .flashing("success" -> s"Updated ${results.size} variants in group $groupKey") - } else { - BadRequest(s"Failed to update some variants in group") - } - } - } - ) - } - } + withPermission("variant.update") { implicit request => + Redirect(routes.CuratorController.listVariants(Some(groupKey), 1, 20)) + .flashing("info" -> "Variant groups have been replaced with consolidated variants.") } def deleteVariant(id: Int): Action[AnyContent] = withPermission("variant.delete").async { implicit request => - variantRepository.findById(id).flatMap { + variantV2Repository.findById(id).flatMap { case Some(variant) => for { - deleted <- variantRepository.delete(id) + deleted <- variantV2Repository.delete(id) _ <- if (deleted) { auditService.logVariantDelete(request.user.id.get, variant, Some("Deleted via curator interface")) } else { @@ -716,76 +644,53 @@ class CuratorController @Inject()( withPermission("haplogroup.view").async { implicit request => for { haplogroupOpt <- haplogroupRepository.findById(haplogroupId) - variantGroups <- query match { - case Some(q) if q.nonEmpty => variantRepository.searchGrouped(q, 20) + variants <- query match { + case Some(q) if q.nonEmpty => variantV2Repository.searchByName(q) case _ => Future.successful(Seq.empty) } existingVariantIds <- haplogroupVariantRepository.getVariantsByHaplogroup(haplogroupId).map(_.flatMap(_.variantId).toSet) } yield { - // Filter out groups where ALL variants are already associated - val availableGroups = variantGroups.filterNot { group => - group.variantIds.forall(existingVariantIds.contains) - } + // Filter out variants that are already associated + val availableVariants = variants.filterNot(v => v.variantId.exists(existingVariantIds.contains)) haplogroupOpt match { case Some(haplogroup) => - Ok(views.html.curator.haplogroups.variantSearchResults(haplogroupId, haplogroup.name, query, availableGroups)) + Ok(views.html.curator.haplogroups.variantSearchResults(haplogroupId, haplogroup.name, query, availableVariants)) case None => NotFound("Haplogroup not found") } } } - def addVariantGroupToHaplogroup(haplogroupId: Int, groupKey: String): Action[AnyContent] = + def addVariantToHaplogroup(haplogroupId: Int, variantId: Int): Action[AnyContent] = withPermission("haplogroup.update").async { implicit request => for { - // Get all variants in the group - variantsInGroup <- variantRepository.getVariantsByGroupKey(groupKey) - existingVariantIds <- haplogroupVariantRepository.getVariantsByHaplogroup(haplogroupId).map(_.flatMap(_.variantId).toSet) - - // Add each variant that isn't already associated - addedIds <- Future.traverse(variantsInGroup.filterNot(v => existingVariantIds.contains(v.variant.variantId.getOrElse(-1)))) { vwc => - for { - hvId <- haplogroupVariantRepository.addVariantToHaplogroup(haplogroupId, vwc.variant.variantId.get) - _ <- auditService.logVariantAddedToHaplogroup( - request.user.email.getOrElse(request.user.id.map(_.toString).getOrElse("unknown")), - hvId, - Some(s"Added variant ${vwc.variant.variantId.get} (${groupKey}) to haplogroup $haplogroupId") - ) - } yield hvId - } - + hvId <- haplogroupVariantRepository.addVariantToHaplogroup(haplogroupId, variantId) + _ <- auditService.logVariantAddedToHaplogroup( + request.user.email.getOrElse(request.user.id.map(_.toString).getOrElse("unknown")), + hvId, + Some(s"Added variant $variantId to haplogroup $haplogroupId") + ) // Fetch updated variants for display variants <- haplogroupVariantRepository.getHaplogroupVariants(haplogroupId) - variantsWithContig = variants.map { case (v, c) => VariantWithContig(v, c) } - variantGroups = variantRepository.groupVariants(variantsWithContig) } yield { - Ok(views.html.curator.haplogroups.variantsPanel(haplogroupId, variantGroups)) + Ok(views.html.curator.haplogroups.variantsPanel(haplogroupId, variants)) .withHeaders("HX-Trigger" -> "variantAdded") } } - def removeVariantGroupFromHaplogroup(haplogroupId: Int, groupKey: String): Action[AnyContent] = + def removeVariantFromHaplogroup(haplogroupId: Int, variantId: Int): Action[AnyContent] = withPermission("haplogroup.update").async { implicit request => for { - // Get all variants in the group - variantsInGroup <- variantRepository.getVariantsByGroupKey(groupKey) - - // Remove each variant - removed <- Future.traverse(variantsInGroup.flatMap(_.variant.variantId)) { variantId => - haplogroupVariantRepository.removeVariantFromHaplogroup(haplogroupId, variantId) - } - + removed <- haplogroupVariantRepository.removeVariantFromHaplogroup(haplogroupId, variantId) // Fetch updated variants for display variants <- haplogroupVariantRepository.getHaplogroupVariants(haplogroupId) - variantsWithContig = variants.map { case (v, c) => VariantWithContig(v, c) } - variantGroups = variantRepository.groupVariants(variantsWithContig) } yield { - if (removed.sum > 0) { - Ok(views.html.curator.haplogroups.variantsPanel(haplogroupId, variantGroups)) + if (removed > 0) { + Ok(views.html.curator.haplogroups.variantsPanel(haplogroupId, variants)) .withHeaders("HX-Trigger" -> "variantRemoved") } else { - BadRequest("Failed to remove variant group") + BadRequest("Failed to remove variant") } } } @@ -802,7 +707,7 @@ class CuratorController @Inject()( def splitBranchForm(parentId: Int): Action[AnyContent] = withPermission("haplogroup.update").async { implicit request => treeRestructuringService.getSplitPreview(parentId).map { preview => - Ok(views.html.curator.haplogroups.splitBranchForm(preview.parent, preview.variantGroups, preview.children, splitBranchForm)) + Ok(views.html.curator.haplogroups.splitBranchForm(preview.parent, preview.variants, preview.children, splitBranchForm)) }.recover { case e: IllegalArgumentException => NotFound(e.getMessage) @@ -815,7 +720,7 @@ class CuratorController @Inject()( splitBranchForm.bindFromRequest().fold( formWithErrors => { Future.successful(BadRequest(views.html.curator.haplogroups.splitBranchForm( - preview.parent, preview.variantGroups, preview.children, formWithErrors + preview.parent, preview.variants, preview.children, formWithErrors ))) }, data => { @@ -835,7 +740,7 @@ class CuratorController @Inject()( treeRestructuringService.splitBranch( parentId, newHaplogroup, - data.variantGroupKeys, + data.variantIds, data.childIds, request.user.id.get ).map { newId => @@ -844,7 +749,7 @@ class CuratorController @Inject()( }.recover { case e: IllegalArgumentException => BadRequest(views.html.curator.haplogroups.splitBranchForm( - preview.parent, preview.variantGroups, preview.children, + preview.parent, preview.variants, preview.children, splitBranchForm.fill(data).withGlobalError(e.getMessage) )) } diff --git a/app/controllers/GenomeRegionsApiManagementController.scala b/app/controllers/GenomeRegionsApiManagementController.scala index 64cb45a..0d0fa0a 100644 --- a/app/controllers/GenomeRegionsApiManagementController.scala +++ b/app/controllers/GenomeRegionsApiManagementController.scala @@ -11,7 +11,7 @@ import services.GenomeRegionsManagementService import scala.concurrent.{ExecutionContext, Future} /** - * Private API controller for managing genome regions, cytobands, and STR markers. + * Private API controller for managing genome regions. * Secured with X-API-Key authentication. * * API changes are logged as "system" user in the audit log. @@ -29,8 +29,8 @@ class GenomeRegionsApiManagementController @Inject()( // GenomeRegion Endpoints // ============================================================================ - def listRegions(build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = secureApi.async { _ => - managementService.listRegions(build, page, pageSize).map { response => + def listRegions(regionType: Option[String], build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = secureApi.async { _ => + managementService.listRegions(regionType, build, page, pageSize).map { response => Ok(Json.toJson(response)) } } @@ -75,106 +75,4 @@ class GenomeRegionsApiManagementController @Inject()( Ok(Json.toJson(response)) } } - - // ============================================================================ - // Cytoband Endpoints - // ============================================================================ - - def listCytobands(build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = secureApi.async { _ => - managementService.listCytobands(build, page, pageSize).map { response => - Ok(Json.toJson(response)) - } - } - - def getCytoband(id: Int): Action[AnyContent] = secureApi.async { _ => - managementService.getCytoband(id).map { - case Some(cytoband) => Ok(Json.toJson(cytoband)) - case None => NotFound(Json.obj("error" -> "Cytoband not found")) - } - } - - def createCytoband(): Action[CreateCytobandRequest] = - secureApi.jsonAction[CreateCytobandRequest].async { request => - logger.info(s"API: Creating cytoband") - managementService.createCytoband(request.body, None).map { - case Right(cytoband) => Created(Json.toJson(cytoband)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def updateCytoband(id: Int): Action[UpdateCytobandRequest] = - secureApi.jsonAction[UpdateCytobandRequest].async { request => - logger.info(s"API: Updating cytoband $id") - managementService.updateCytoband(id, request.body, None).map { - case Right(cytoband) => Ok(Json.toJson(cytoband)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def deleteCytoband(id: Int): Action[AnyContent] = secureApi.async { _ => - logger.info(s"API: Deleting cytoband $id") - managementService.deleteCytoband(id, None).map { - case Right(_) => NoContent - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def bulkCreateCytobands(): Action[BulkCreateCytobandsRequest] = - secureApi.jsonAction[BulkCreateCytobandsRequest].async { request => - logger.info(s"API: Bulk creating ${request.body.cytobands.size} cytobands") - managementService.bulkCreateCytobands(request.body, None).map { response => - Ok(Json.toJson(response)) - } - } - - // ============================================================================ - // STR Marker Endpoints - // ============================================================================ - - def listStrMarkers(build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = secureApi.async { _ => - managementService.listStrMarkers(build, page, pageSize).map { response => - Ok(Json.toJson(response)) - } - } - - def getStrMarker(id: Int): Action[AnyContent] = secureApi.async { _ => - managementService.getStrMarker(id).map { - case Some(marker) => Ok(Json.toJson(marker)) - case None => NotFound(Json.obj("error" -> "STR marker not found")) - } - } - - def createStrMarker(): Action[CreateStrMarkerRequest] = - secureApi.jsonAction[CreateStrMarkerRequest].async { request => - logger.info(s"API: Creating STR marker") - managementService.createStrMarker(request.body, None).map { - case Right(marker) => Created(Json.toJson(marker)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def updateStrMarker(id: Int): Action[UpdateStrMarkerRequest] = - secureApi.jsonAction[UpdateStrMarkerRequest].async { request => - logger.info(s"API: Updating STR marker $id") - managementService.updateStrMarker(id, request.body, None).map { - case Right(marker) => Ok(Json.toJson(marker)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def deleteStrMarker(id: Int): Action[AnyContent] = secureApi.async { _ => - logger.info(s"API: Deleting STR marker $id") - managementService.deleteStrMarker(id, None).map { - case Right(_) => NoContent - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def bulkCreateStrMarkers(): Action[BulkCreateStrMarkersRequest] = - secureApi.jsonAction[BulkCreateStrMarkersRequest].async { request => - logger.info(s"API: Bulk creating ${request.body.markers.size} STR markers") - managementService.bulkCreateStrMarkers(request.body, None).map { response => - Ok(Json.toJson(response)) - } - } -} +} \ No newline at end of file diff --git a/app/controllers/GenomeRegionsCuratorController.scala b/app/controllers/GenomeRegionsCuratorController.scala index d4ce447..14f08d8 100644 --- a/app/controllers/GenomeRegionsCuratorController.scala +++ b/app/controllers/GenomeRegionsCuratorController.scala @@ -10,6 +10,7 @@ import play.api.Logging import play.api.data.Form import play.api.data.Forms.* import play.api.i18n.I18nSupport +import play.api.libs.json.Json import play.api.mvc.* import repositories.GenbankContigRepository import services.GenomeRegionsManagementService @@ -26,26 +27,8 @@ case class GenomeRegionFormData( modifier: Option[BigDecimal] ) -case class CytobandFormData( - genbankContigId: Int, - name: String, - startPos: Long, - endPos: Long, - stain: String -) - -case class StrMarkerFormData( - genbankContigId: Int, - name: String, - startPos: Long, - endPos: Long, - period: Int, - verified: Boolean, - note: Option[String] -) - /** - * UI Controller for managing genome regions, cytobands, and STR markers. + * UI Controller for managing genome regions. * Uses session-based authentication with permission checks. */ @Singleton @@ -75,46 +58,24 @@ class GenomeRegionsCuratorController @Inject()( )(GenomeRegionFormData.apply)(g => Some((g.genbankContigId, g.regionType, g.name, g.startPos, g.endPos, g.modifier))) ) - private val cytobandForm: Form[CytobandFormData] = Form( - mapping( - "genbankContigId" -> number, - "name" -> nonEmptyText(1, 20), - "startPos" -> longNumber(min = 0), - "endPos" -> longNumber(min = 0), - "stain" -> nonEmptyText(1, 10) - )(CytobandFormData.apply)(c => Some((c.genbankContigId, c.name, c.startPos, c.endPos, c.stain))) - ) - - private val strMarkerForm: Form[StrMarkerFormData] = Form( - mapping( - "genbankContigId" -> number, - "name" -> nonEmptyText(1, 30), - "startPos" -> longNumber(min = 0), - "endPos" -> longNumber(min = 0), - "period" -> number(min = 1), - "verified" -> boolean, - "note" -> optional(text) - )(StrMarkerFormData.apply)(s => Some((s.genbankContigId, s.name, s.startPos, s.endPos, s.period, s.verified, s.note))) - ) - // ============================================================================ // GenomeRegion UI Endpoints // ============================================================================ - def listRegions(build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = + def listRegions(regionType: Option[String], build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = withPermission("genome_region.view").async { implicit request => for { - response <- managementService.listRegions(build, page, pageSize) + response <- managementService.listRegions(regionType, build, page, pageSize) } yield { val totalPages = Math.max(1, (response.total + pageSize - 1) / pageSize) Ok(views.html.curator.genomeregions.list(response.regions, build, page, totalPages, pageSize, response.total, genomicsConfig.supportedReferences)) } } - def regionsFragment(build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = + def regionsFragment(regionType: Option[String], build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = withPermission("genome_region.view").async { implicit request => for { - response <- managementService.listRegions(build, page, pageSize) + response <- managementService.listRegions(regionType, build, page, pageSize) } yield { val totalPages = Math.max(1, (response.total + pageSize - 1) / pageSize) Ok(views.html.curator.genomeregions.listFragment(response.regions, build, page, totalPages, pageSize, response.total)) @@ -145,26 +106,34 @@ class GenomeRegionsCuratorController @Inject()( } }, formData => { - val createRequest = CreateGenomeRegionRequest( - genbankContigId = formData.genbankContigId, - regionType = formData.regionType, - name = formData.name, - startPos = formData.startPos, - endPos = formData.endPos, - modifier = formData.modifier - ) - managementService.createRegion(createRequest, request.user.id).map { - case Right(_) => - Redirect(routes.GenomeRegionsCuratorController.listRegions(None, 1, 25)) - .flashing("success" -> "Genome region created successfully") - case Left(error) => - getContigsForFormSync.map { contigs => - BadRequest(views.html.curator.genomeregions.createForm( - genomeRegionForm.fill(formData).withGlobalError(error), - contigs, - genomicsConfig.supportedReferences - )) - }.getOrElse(BadRequest(error)) + // Resolve contig to get build name and common name + genbankContigRepository.findById(formData.genbankContigId).flatMap { + case Some(contig) => + val build = contig.referenceGenome.getOrElse("unknown") + val contigName = contig.commonName.getOrElse("unknown") + + val createRequest = CreateGenomeRegionRequest( + regionType = formData.regionType, + name = formData.name, + coordinates = Map(build -> RegionCoordinateDto(contigName, formData.startPos, formData.endPos)), + properties = formData.modifier.map(m => Json.obj("modifier" -> m)) + ) + + managementService.createRegion(createRequest, request.user.id).map { + case Right(_) => + Redirect(routes.GenomeRegionsCuratorController.listRegions(None, None, 1, 25)) + .flashing("success" -> "Genome region created successfully") + case Left(error) => + getContigsForFormSync.map { contigs => + BadRequest(views.html.curator.genomeregions.createForm( + genomeRegionForm.fill(formData).withGlobalError(error), + contigs, + genomicsConfig.supportedReferences + )) + }.getOrElse(BadRequest(error)) + } + case None => + Future.successful(BadRequest("Invalid contig ID")) } } ) @@ -177,13 +146,24 @@ class GenomeRegionsCuratorController @Inject()( contigs <- getContigsForForm } yield regionOpt match { case Some(region) => + // Try to map back to form data using the first coordinate found (limitation of this UI) + val (build, coord) = region.coordinates.headOption.getOrElse("unknown" -> RegionCoordinateDto("", 0, 0)) + // We need a genbankContigId for the form dropdown. + // This is tricky without a reverse lookup or storing it. + // For now, we might leave it 0 or try to find it in the list of contigs if possible. + // Or just pick the first contig that matches name and build. + val contigId = contigs.find(c => c.commonName.contains(coord.contig) && c.referenceGenome.contains(build)) + .flatMap(_.id).getOrElse(0) + + val modifier = (region.properties \ "modifier").asOpt[BigDecimal] + val formData = GenomeRegionFormData( - region.genbankContigId, + contigId, region.regionType, region.name, - region.startPos, - region.endPos, - region.modifier + coord.start, + coord.end, + modifier ) Ok(views.html.curator.genomeregions.editForm(id, genomeRegionForm.fill(formData), contigs, genomicsConfig.supportedReferences)) case None => @@ -200,27 +180,38 @@ class GenomeRegionsCuratorController @Inject()( } }, formData => { - val updateRequest = UpdateGenomeRegionRequest( - regionType = Some(formData.regionType), - name = formData.name, - startPos = Some(formData.startPos), - endPos = Some(formData.endPos), - modifier = formData.modifier - ) - managementService.updateRegion(id, updateRequest, request.user.id).map { - case Right(_) => - Redirect(routes.GenomeRegionsCuratorController.listRegions(None, 1, 25)) - .flashing("success" -> "Genome region updated successfully") - case Left(error) => - getContigsForFormSync.map { contigs => - BadRequest(views.html.curator.genomeregions.editForm( - id, - genomeRegionForm.fill(formData).withGlobalError(error), - contigs, - genomicsConfig.supportedReferences - )) - }.getOrElse(BadRequest(error)) - } + genbankContigRepository.findById(formData.genbankContigId).flatMap { + case Some(contig) => + val build = contig.referenceGenome.getOrElse("unknown") + val contigName = contig.commonName.getOrElse("unknown") + + val updateRequest = UpdateGenomeRegionRequest( + regionType = Some(formData.regionType), + name = formData.name, + // Merging coordinates is complex. This simplistic update might overwrite other builds' coordinates + // if the service replaces the map. The Service logic currently REPLACES if provided. + // To support multi-build editing, the UI needs to change. + // For now, we assume single-build editing flow. + coordinates = Some(Map(build -> RegionCoordinateDto(contigName, formData.startPos, formData.endPos))), + properties = formData.modifier.map(m => Json.obj("modifier" -> m)) + ) + + managementService.updateRegion(id, updateRequest, request.user.id).map { + case Right(_) => + Redirect(routes.GenomeRegionsCuratorController.listRegions(None, None, 1, 25)) + .flashing("success" -> "Genome region updated successfully") + case Left(error) => + getContigsForFormSync.map { contigs => + BadRequest(views.html.curator.genomeregions.editForm( + id, + genomeRegionForm.fill(formData).withGlobalError(error), + contigs, + genomicsConfig.supportedReferences + )) + }.getOrElse(BadRequest(error)) + } + case None => Future.successful(BadRequest("Invalid Contig")) + } } ) } @@ -235,260 +226,6 @@ class GenomeRegionsCuratorController @Inject()( } } - // ============================================================================ - // Cytoband UI Endpoints - // ============================================================================ - - def listCytobands(build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = - withPermission("cytoband.view").async { implicit request => - for { - response <- managementService.listCytobands(build, page, pageSize) - } yield { - val totalPages = Math.max(1, (response.total + pageSize - 1) / pageSize) - Ok(views.html.curator.cytobands.list(response.cytobands, build, page, totalPages, pageSize, response.total, genomicsConfig.supportedReferences)) - } - } - - def cytobandsFragment(build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = - withPermission("cytoband.view").async { implicit request => - for { - response <- managementService.listCytobands(build, page, pageSize) - } yield { - val totalPages = Math.max(1, (response.total + pageSize - 1) / pageSize) - Ok(views.html.curator.cytobands.listFragment(response.cytobands, build, page, totalPages, pageSize, response.total)) - } - } - - def cytobandDetailPanel(id: Int): Action[AnyContent] = - withPermission("cytoband.view").async { implicit request => - managementService.getCytoband(id).map { - case Some(cytoband) => Ok(views.html.curator.cytobands.detailPanel(cytoband)) - case None => NotFound("Cytoband not found") - } - } - - def createCytobandForm: Action[AnyContent] = - withPermission("cytoband.create").async { implicit request => - getContigsForForm.map { contigs => - Ok(views.html.curator.cytobands.createForm(cytobandForm, contigs, genomicsConfig.supportedReferences)) - } - } - - def createCytoband: Action[AnyContent] = - withPermission("cytoband.create").async { implicit request => - cytobandForm.bindFromRequest().fold( - formWithErrors => { - getContigsForForm.map { contigs => - BadRequest(views.html.curator.cytobands.createForm(formWithErrors, contigs, genomicsConfig.supportedReferences)) - } - }, - formData => { - val createRequest = CreateCytobandRequest( - genbankContigId = formData.genbankContigId, - name = formData.name, - startPos = formData.startPos, - endPos = formData.endPos, - stain = formData.stain - ) - managementService.createCytoband(createRequest, request.user.id).map { - case Right(_) => - Redirect(routes.GenomeRegionsCuratorController.listCytobands(None, 1, 25)) - .flashing("success" -> "Cytoband created successfully") - case Left(error) => - Redirect(routes.GenomeRegionsCuratorController.createCytobandForm) - .flashing("error" -> error) - } - } - ) - } - - def editCytobandForm(id: Int): Action[AnyContent] = - withPermission("cytoband.update").async { implicit request => - for { - cytobandOpt <- managementService.getCytoband(id) - contigs <- getContigsForForm - } yield cytobandOpt match { - case Some(cytoband) => - val formData = CytobandFormData( - cytoband.genbankContigId, - cytoband.name, - cytoband.startPos, - cytoband.endPos, - cytoband.stain - ) - Ok(views.html.curator.cytobands.editForm(id, cytobandForm.fill(formData), contigs, genomicsConfig.supportedReferences)) - case None => - NotFound("Cytoband not found") - } - } - - def updateCytoband(id: Int): Action[AnyContent] = - withPermission("cytoband.update").async { implicit request => - cytobandForm.bindFromRequest().fold( - formWithErrors => { - getContigsForForm.map { contigs => - BadRequest(views.html.curator.cytobands.editForm(id, formWithErrors, contigs, genomicsConfig.supportedReferences)) - } - }, - formData => { - val updateRequest = UpdateCytobandRequest( - name = Some(formData.name), - startPos = Some(formData.startPos), - endPos = Some(formData.endPos), - stain = Some(formData.stain) - ) - managementService.updateCytoband(id, updateRequest, request.user.id).map { - case Right(_) => - Redirect(routes.GenomeRegionsCuratorController.listCytobands(None, 1, 25)) - .flashing("success" -> "Cytoband updated successfully") - case Left(error) => - Redirect(routes.GenomeRegionsCuratorController.editCytobandForm(id)) - .flashing("error" -> error) - } - } - ) - } - - def deleteCytoband(id: Int): Action[AnyContent] = - withPermission("cytoband.delete").async { implicit request => - managementService.deleteCytoband(id, request.user.id).map { - case Right(_) => - Ok("").withHeaders("HX-Trigger" -> "cytobandDeleted") - case Left(error) => - BadRequest(error) - } - } - - // ============================================================================ - // STR Marker UI Endpoints - // ============================================================================ - - def listStrMarkers(build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = - withPermission("str_marker.view").async { implicit request => - for { - response <- managementService.listStrMarkers(build, page, pageSize) - } yield { - val totalPages = Math.max(1, (response.total + pageSize - 1) / pageSize) - Ok(views.html.curator.strmarkers.list(response.markers, build, page, totalPages, pageSize, response.total, genomicsConfig.supportedReferences)) - } - } - - def strMarkersFragment(build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = - withPermission("str_marker.view").async { implicit request => - for { - response <- managementService.listStrMarkers(build, page, pageSize) - } yield { - val totalPages = Math.max(1, (response.total + pageSize - 1) / pageSize) - Ok(views.html.curator.strmarkers.listFragment(response.markers, build, page, totalPages, pageSize, response.total)) - } - } - - def strMarkerDetailPanel(id: Int): Action[AnyContent] = - withPermission("str_marker.view").async { implicit request => - managementService.getStrMarker(id).map { - case Some(marker) => Ok(views.html.curator.strmarkers.detailPanel(marker)) - case None => NotFound("STR marker not found") - } - } - - def createStrMarkerForm: Action[AnyContent] = - withPermission("str_marker.create").async { implicit request => - getContigsForForm.map { contigs => - Ok(views.html.curator.strmarkers.createForm(strMarkerForm, contigs, genomicsConfig.supportedReferences)) - } - } - - def createStrMarker: Action[AnyContent] = - withPermission("str_marker.create").async { implicit request => - strMarkerForm.bindFromRequest().fold( - formWithErrors => { - getContigsForForm.map { contigs => - BadRequest(views.html.curator.strmarkers.createForm(formWithErrors, contigs, genomicsConfig.supportedReferences)) - } - }, - formData => { - val createRequest = CreateStrMarkerRequest( - genbankContigId = formData.genbankContigId, - name = formData.name, - startPos = formData.startPos, - endPos = formData.endPos, - period = formData.period, - verified = formData.verified, - note = formData.note - ) - managementService.createStrMarker(createRequest, request.user.id).map { - case Right(_) => - Redirect(routes.GenomeRegionsCuratorController.listStrMarkers(None, 1, 25)) - .flashing("success" -> "STR marker created successfully") - case Left(error) => - Redirect(routes.GenomeRegionsCuratorController.createStrMarkerForm) - .flashing("error" -> error) - } - } - ) - } - - def editStrMarkerForm(id: Int): Action[AnyContent] = - withPermission("str_marker.update").async { implicit request => - for { - markerOpt <- managementService.getStrMarker(id) - contigs <- getContigsForForm - } yield markerOpt match { - case Some(marker) => - val formData = StrMarkerFormData( - marker.genbankContigId, - marker.name, - marker.startPos, - marker.endPos, - marker.period, - marker.verified, - marker.note - ) - Ok(views.html.curator.strmarkers.editForm(id, strMarkerForm.fill(formData), contigs, genomicsConfig.supportedReferences)) - case None => - NotFound("STR marker not found") - } - } - - def updateStrMarker(id: Int): Action[AnyContent] = - withPermission("str_marker.update").async { implicit request => - strMarkerForm.bindFromRequest().fold( - formWithErrors => { - getContigsForForm.map { contigs => - BadRequest(views.html.curator.strmarkers.editForm(id, formWithErrors, contigs, genomicsConfig.supportedReferences)) - } - }, - formData => { - val updateRequest = UpdateStrMarkerRequest( - name = Some(formData.name), - startPos = Some(formData.startPos), - endPos = Some(formData.endPos), - period = Some(formData.period), - verified = Some(formData.verified), - note = formData.note - ) - managementService.updateStrMarker(id, updateRequest, request.user.id).map { - case Right(_) => - Redirect(routes.GenomeRegionsCuratorController.listStrMarkers(None, 1, 25)) - .flashing("success" -> "STR marker updated successfully") - case Left(error) => - Redirect(routes.GenomeRegionsCuratorController.editStrMarkerForm(id)) - .flashing("error" -> error) - } - } - ) - } - - def deleteStrMarker(id: Int): Action[AnyContent] = - withPermission("str_marker.delete").async { implicit request => - managementService.deleteStrMarker(id, request.user.id).map { - case Right(_) => - Ok("").withHeaders("HX-Trigger" -> "strMarkerDeleted") - case Left(error) => - BadRequest(error) - } - } - // ============================================================================ // Helper Methods // ============================================================================ @@ -504,4 +241,4 @@ class GenomeRegionsCuratorController @Inject()( // This is a fallback for sync error handling - not ideal but simple None } -} +} \ No newline at end of file diff --git a/app/controllers/TreeController.scala b/app/controllers/TreeController.scala index 1de184b..25bdf96 100644 --- a/app/controllers/TreeController.scala +++ b/app/controllers/TreeController.scala @@ -52,6 +52,11 @@ class TreeController @Inject()(val controllerComponents: MessagesControllerCompo private val YConfig = TreeConfig(Y, "Y") private val MTConfig = TreeConfig(MT, "L") + private val VERTICAL_TREE_COOKIE = "showVerticalTree" + + private def shouldShowVerticalTree(request: RequestHeader): Boolean = { + request.cookies.get(VERTICAL_TREE_COOKIE).map(_.value.toBoolean).getOrElse(featureFlags.showVerticalTree) + } /** * Renders the Y-DNA tree page. @@ -65,7 +70,7 @@ class TreeController @Inject()(val controllerComponents: MessagesControllerCompo * @return an action that renders the Y-DNA tree page as an HTML response */ def ytree(rootHaplogroup: Option[String]): Action[AnyContent] = Action { implicit request => - Ok(views.html.ytree(rootHaplogroup)) + Ok(views.html.ytree(rootHaplogroup, shouldShowVerticalTree(request))) } /** @@ -80,7 +85,7 @@ class TreeController @Inject()(val controllerComponents: MessagesControllerCompo * @return an action that renders the MT-DNA tree page as an HTML response */ def mtree(rootHaplogroup: Option[String]): Action[AnyContent] = Action { implicit request => - Ok(views.html.mtree(rootHaplogroup)) + Ok(views.html.mtree(rootHaplogroup, shouldShowVerticalTree(request))) } /** @@ -188,8 +193,11 @@ class TreeController @Inject()(val controllerComponents: MessagesControllerCompo config: TreeConfig, cacheKey: String )(using request: Request[AnyContent]): Future[Result] = { - cache.getOrElseUpdate(cacheKey, 24.hours) { - buildTreeFragment(rootHaplogroup, config) + val useVerticalTree = shouldShowVerticalTree(request) + val effectiveCacheKey = s"$cacheKey-vertical:$useVerticalTree" + + cache.getOrElseUpdate(effectiveCacheKey, 24.hours) { + buildTreeFragment(rootHaplogroup, config, useVerticalTree) } } @@ -198,17 +206,25 @@ class TreeController @Inject()(val controllerComponents: MessagesControllerCompo */ private def buildTreeFragment( rootHaplogroup: Option[String], - config: TreeConfig + config: TreeConfig, + showVerticalTree: Boolean )(using request: Request[AnyContent]): Future[Result] = { val haplogroupName = rootHaplogroup.getOrElse(config.defaultRoot) val isAbsoluteTopRootView = haplogroupName == config.defaultRoot + + val orientation = if (showVerticalTree) services.TreeOrientation.Vertical else services.TreeOrientation.Horizontal treeService.buildTreeResponse(haplogroupName, config.haplogroupType, FragmentRoute) .map { treeDto => val treeViewModel: Option[TreeViewModel] = treeDto.subclade.flatMap { _ => - services.TreeLayoutService.layoutTree(treeDto, isAbsoluteTopRootView) + services.TreeLayoutService.layoutTree(treeDto, isAbsoluteTopRootView, orientation) + } + + if (showVerticalTree) { + Ok(views.html.fragments.verticalTree(treeDto, config.haplogroupType, treeViewModel, request.uri)) + } else { + Ok(views.html.fragments.haplogroup(treeDto, config.haplogroupType, treeViewModel, request.uri, featureFlags.showBranchAgeEstimates)) } - Ok(views.html.fragments.haplogroup(treeDto, config.haplogroupType, treeViewModel, request.uri, featureFlags.showBranchAgeEstimates)) } .recover { case _: IllegalArgumentException => @@ -242,6 +258,8 @@ class TreeController @Inject()(val controllerComponents: MessagesControllerCompo val haplogroupName = rootHaplogroup.getOrElse(config.defaultRoot) val isAbsoluteTopRootView = haplogroupName == config.defaultRoot + val showVerticalTree = shouldShowVerticalTree(request) + val orientation = if (showVerticalTree) services.TreeOrientation.Vertical else services.TreeOrientation.Horizontal treeService.buildTreeResponse(haplogroupName, config.haplogroupType, routeType) .map { treeDto => @@ -252,10 +270,14 @@ class TreeController @Inject()(val controllerComponents: MessagesControllerCompo Ok(Json.toJson(apiBody)) case FragmentRoute => val treeViewModel: Option[TreeViewModel] = treeDto.subclade.flatMap { rootNodeDTO => - services.TreeLayoutService.layoutTree(treeDto, isAbsoluteTopRootView) + services.TreeLayoutService.layoutTree(treeDto, isAbsoluteTopRootView, orientation) + } + + if (showVerticalTree) { + Ok(views.html.fragments.verticalTree(treeDto, config.haplogroupType, treeViewModel, request.uri)) + } else { + Ok(views.html.fragments.haplogroup(treeDto, config.haplogroupType, treeViewModel, request.uri, featureFlags.showBranchAgeEstimates)) } - - Ok(views.html.fragments.haplogroup(treeDto, config.haplogroupType, treeViewModel, request.uri, featureFlags.showBranchAgeEstimates)) } } .recover { diff --git a/app/controllers/VariantApiController.scala b/app/controllers/VariantApiController.scala index daa910b..bfdc6cf 100644 --- a/app/controllers/VariantApiController.scala +++ b/app/controllers/VariantApiController.scala @@ -3,11 +3,11 @@ package controllers import actions.ApiSecurityAction import jakarta.inject.{Inject, Singleton} import models.api.* -import models.dal.domain.genomics.Variant +import models.domain.genomics.VariantV2 import play.api.Logger -import play.api.libs.json.Json +import play.api.libs.json.{JsObject, Json} import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import repositories.{GenbankContigRepository, VariantAliasRepository, VariantRepository} +import repositories.VariantV2Repository import scala.concurrent.{ExecutionContext, Future} @@ -15,82 +15,53 @@ import scala.concurrent.{ExecutionContext, Future} * Private API controller for bulk variant operations. * Secured with X-API-Key authentication. * - * Endpoints are unlisted and intended for system integration. + * Updated for VariantV2 schema with JSONB coordinates and aliases. */ @Singleton class VariantApiController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - variantRepository: VariantRepository, - variantAliasRepository: VariantAliasRepository, - genbankContigRepository: GenbankContigRepository - )(implicit ec: ExecutionContext) extends BaseController { + val controllerComponents: ControllerComponents, + secureApi: ApiSecurityAction, + variantRepository: VariantV2Repository +)(implicit ec: ExecutionContext) extends BaseController { private val logger = Logger(this.getClass) /** - * Bulk add reference builds to existing variants. - * Matches variants by name or rsId, then creates new variant records - * for the specified reference genome if they don't exist. + * Bulk add reference builds (coordinates) to existing variants. + * Matches variants by name or rsId, then adds coordinates for the specified reference genome. */ def bulkAddBuilds(): Action[BulkAddVariantBuildsRequest] = secureApi.jsonAction[BulkAddVariantBuildsRequest].async { request => val requests = request.body.variants logger.info(s"Bulk add builds request for ${requests.size} variants") - // Collect all unique contig+genome combinations needed - val contigGenomePairs = requests.map(r => (r.contig, r.refGenome)).distinct - - // Resolve all contigs - genbankContigRepository.findByCommonNames(contigGenomePairs.map(_._1).distinct).flatMap { contigs => - // Build lookup map: (commonName, refGenome) -> contigId - val contigMap = contigs.flatMap { c => - for { - cn <- c.commonName - rg <- c.referenceGenome - } yield (cn, rg) -> c.id.get - }.toMap - - // Also try without 'chr' prefix - val contigMapWithFallback = contigMap ++ contigs.flatMap { c => - for { - cn <- c.commonName - rg <- c.referenceGenome - } yield (cn.stripPrefix("chr"), rg) -> c.id.get - }.toMap - - // Process each request - val resultFutures = requests.map { req => - processAddBuildRequest(req, contigMapWithFallback) - } + val resultFutures = requests.map(processAddBuildRequest) - Future.sequence(resultFutures).map { results => - val succeeded = results.count(_.status == "success") - val failed = results.count(_.status != "success") + Future.sequence(resultFutures).map { results => + val succeeded = results.count(_.status == "success") + val failed = results.count(_.status != "success") - logger.info(s"Bulk add builds completed: $succeeded succeeded, $failed failed") + logger.info(s"Bulk add builds completed: $succeeded succeeded, $failed failed") - Ok(Json.toJson(BulkVariantOperationResponse( - total = results.size, - succeeded = succeeded, - failed = failed, - results = results - ))) - } + Ok(Json.toJson(BulkVariantOperationResponse( + total = results.size, + succeeded = succeeded, + failed = failed, + results = results + ))) } } /** * Bulk update rsIds for variants matched by name. + * Adds rsId as an alias to the variant's aliases JSONB. */ def bulkUpdateRsIds(): Action[BulkUpdateRsIdsRequest] = secureApi.jsonAction[BulkUpdateRsIdsRequest].async { request => val requests = request.body.variants logger.info(s"Bulk update rsIds request for ${requests.size} variants") - val resultFutures = requests.map { req => - processUpdateRsIdRequest(req) - } + val resultFutures = requests.map(processUpdateRsIdRequest) Future.sequence(resultFutures).map { results => val succeeded = results.count(_.status == "success") @@ -107,107 +78,116 @@ class VariantApiController @Inject()( } } - private def processAddBuildRequest( - req: AddVariantBuildRequest, - contigMap: Map[(String, String), Int] - ): Future[VariantOperationResult] = { + private def processAddBuildRequest(req: AddVariantBuildRequest): Future[VariantOperationResult] = { val identifier = req.name.orElse(req.rsId) - // Resolve contig ID - val contigIdOpt = contigMap.get((req.contig, req.refGenome)) - .orElse(contigMap.get((req.contig.stripPrefix("chr"), req.refGenome))) - .orElse(contigMap.get(("chr" + req.contig.stripPrefix("chr"), req.refGenome))) - - contigIdOpt match { + identifier match { case None => Future.successful(VariantOperationResult( name = req.name, rsId = req.rsId, status = "error", - message = Some(s"Contig '${req.contig}' not found for reference genome '${req.refGenome}'") + message = Some("Either name or rsId must be provided") )) - case Some(contigId) => - // Check if this exact build already exists - variantRepository.findVariant(contigId, req.position, req.refAllele, req.altAllele).flatMap { - case Some(existing) => - // Build already exists + case Some(id) => + // Find variant by name or alias + val findFuture = req.name match { + case Some(name) => variantRepository.findByCanonicalName(name) + case None => variantRepository.findByAlias(req.rsId.get).map(_.headOption) + } + + findFuture.flatMap { + case None => Future.successful(VariantOperationResult( name = req.name, rsId = req.rsId, - status = "skipped", - message = Some("Build already exists"), - variantId = existing.variantId + status = "error", + message = Some(s"Variant not found with identifier '$id'") )) - case None => - // Create new variant record for this build - val newVariant = Variant( - genbankContigId = contigId, - position = req.position, - referenceAllele = req.refAllele, - alternateAllele = req.altAllele, - variantType = req.variantType, - rsId = req.rsId, - commonName = req.name - ) - - variantRepository.createVariant(newVariant).map { newId => - VariantOperationResult( - name = req.name, - rsId = req.rsId, - status = "success", - message = Some(s"Created build for ${req.refGenome}"), - variantId = Some(newId) - ) - }.recover { case e: Exception => - logger.error(s"Failed to create variant build: ${e.getMessage}", e) - VariantOperationResult( + case Some(variant) => + // Check if this build already exists + val existingCoords = variant.coordinates.asOpt[Map[String, JsObject]].getOrElse(Map.empty) + if (existingCoords.contains(req.refGenome)) { + Future.successful(VariantOperationResult( name = req.name, rsId = req.rsId, - status = "error", - message = Some(s"Database error: ${e.getMessage}") + status = "skipped", + message = Some(s"Build ${req.refGenome} already exists"), + variantId = variant.variantId + )) + } else { + // Add the new coordinates + val newCoords = Json.obj( + "contig" -> req.contig, + "position" -> req.position, + "ref" -> req.refAllele, + "alt" -> req.altAllele ) + + variantRepository.addCoordinates(variant.variantId.get, req.refGenome, newCoords).map { _ => + VariantOperationResult( + name = req.name, + rsId = req.rsId, + status = "success", + message = Some(s"Added coordinates for ${req.refGenome}"), + variantId = variant.variantId + ) + }.recover { case e: Exception => + logger.error(s"Failed to add coordinates: ${e.getMessage}", e) + VariantOperationResult( + name = req.name, + rsId = req.rsId, + status = "error", + message = Some(s"Database error: ${e.getMessage}") + ) + } } } } } private def processUpdateRsIdRequest(req: UpdateVariantRsIdRequest): Future[VariantOperationResult] = { - // Find all variants with this name - variantRepository.searchByName(req.name).flatMap { variants => - if (variants.isEmpty) { - Future.successful(VariantOperationResult( - name = Some(req.name), - rsId = Some(req.rsId), - status = "error", - message = Some(s"No variant found with name '${req.name}'") - )) - } else { - // Update rsId on all matching variants (all builds of this variant) - val updateFutures = variants.map { v => - variantRepository.update(v.copy(rsId = Some(req.rsId))) + variantRepository.findByCanonicalName(req.name).flatMap { + case None => + // Try finding by alias + variantRepository.findByAlias(req.name).flatMap { variants => + if (variants.isEmpty) { + Future.successful(VariantOperationResult( + name = Some(req.name), + rsId = Some(req.rsId), + status = "error", + message = Some(s"No variant found with name '${req.name}'") + )) + } else { + updateVariantRsId(variants.head, req) + } } - Future.sequence(updateFutures).map { results => - val updatedCount = results.count(_ == true) - VariantOperationResult( - name = Some(req.name), - rsId = Some(req.rsId), - status = "success", - message = Some(s"Updated rsId on $updatedCount variant record(s)"), - variantId = variants.headOption.flatMap(_.variantId) - ) - }.recover { case e: Exception => - logger.error(s"Failed to update rsId: ${e.getMessage}", e) - VariantOperationResult( - name = Some(req.name), - rsId = Some(req.rsId), - status = "error", - message = Some(s"Database error: ${e.getMessage}") - ) - } - } + case Some(variant) => + updateVariantRsId(variant, req) + } + } + + private def updateVariantRsId(variant: VariantV2, req: UpdateVariantRsIdRequest): Future[VariantOperationResult] = { + // Add rsId as an alias + variantRepository.addAlias(variant.variantId.get, "rs_id", req.rsId, Some("bulk_update")).map { _ => + VariantOperationResult( + name = Some(req.name), + rsId = Some(req.rsId), + status = "success", + message = Some("Added rsId as alias"), + variantId = variant.variantId + ) + }.recover { case e: Exception => + logger.error(s"Failed to update rsId: ${e.getMessage}", e) + VariantOperationResult( + name = Some(req.name), + rsId = Some(req.rsId), + status = "error", + message = Some(s"Database error: ${e.getMessage}") + ) } } @@ -217,7 +197,7 @@ class VariantApiController @Inject()( /** * Bulk update alias sources by prefix pattern. - * Example: Update all "FGC*" aliases from source "migration" to source "FGC". + * Updates the source field in aliases JSONB for matching alias values. */ def bulkUpdateAliasSources(): Action[BulkUpdateAliasSourcesRequest] = secureApi.jsonAction[BulkUpdateAliasSourcesRequest].async { request => @@ -225,7 +205,7 @@ class VariantApiController @Inject()( logger.info(s"Bulk update alias sources request for ${updates.size} prefix patterns") val resultFutures = updates.map { req => - variantAliasRepository.bulkUpdateSourceByPrefix(req.aliasPrefix, req.newSource, req.oldSource).map { count => + variantRepository.bulkUpdateAliasSource(req.aliasPrefix, req.newSource, req.oldSource).map { count => AliasSourceUpdateResult( aliasPrefix = req.aliasPrefix, newSource = req.newSource, @@ -259,18 +239,12 @@ class VariantApiController @Inject()( /** * Get statistics about alias sources in the database. - * Useful for planning source cleanup operations. */ def getAliasSourceStats(): Action[AnyContent] = secureApi.async { _ => - for { - sources <- variantAliasRepository.getDistinctSources() - counts <- Future.traverse(sources)(source => - variantAliasRepository.countBySource(source).map(count => AliasSourceSummary(source, count)) - ) - } yield { - val totalAliases = counts.map(_.count).sum + variantRepository.getAliasSourceStats().map { stats => + val totalAliases = stats.map(_._2).sum Ok(Json.toJson(AliasSourceStatsResponse( - sources = counts.sortBy(-_.count), + sources = stats.map { case (source, count) => AliasSourceSummary(source, count) }, totalAliases = totalAliases ))) } @@ -278,10 +252,9 @@ class VariantApiController @Inject()( /** * Preview how many aliases would be affected by a source update. - * Useful for dry-run before actual update. */ def previewAliasSourceUpdate(aliasPrefix: String, currentSource: String): Action[AnyContent] = secureApi.async { _ => - variantAliasRepository.countByPrefixAndSource(aliasPrefix, currentSource).map { count => + variantRepository.countAliasesByPrefixAndSource(aliasPrefix, Some(currentSource)).map { count => Ok(Json.obj( "aliasPrefix" -> aliasPrefix, "currentSource" -> currentSource, @@ -289,4 +262,143 @@ class VariantApiController @Inject()( )) } } + + // ============================================================================ + // DU Naming Authority Endpoints + // ============================================================================ + + /** + * Assign a DU name to a single variant. + * The variant must exist and not already have a DU name. + */ + def assignDuName(variantId: Int): Action[AnyContent] = secureApi.async { _ => + variantRepository.findById(variantId).flatMap { + case None => + Future.successful(NotFound(Json.toJson(DuNameAssignmentResult( + variantId = variantId, + duName = None, + previousName = None, + status = "error", + message = Some(s"Variant $variantId not found") + )))) + + case Some(variant) => + // Check if already has a DU name + if (variant.canonicalName.exists(variantRepository.isDuName)) { + Future.successful(Ok(Json.toJson(DuNameAssignmentResult( + variantId = variantId, + duName = variant.canonicalName, + previousName = variant.canonicalName, + status = "skipped", + message = Some("Variant already has a DU name") + )))) + } else { + // Assign new DU name + assignDuNameToVariant(variant).map { result => + Ok(Json.toJson(result)) + } + } + } + } + + /** + * Bulk assign DU names to multiple variants. + * Skips variants that already have DU names. + */ + def bulkAssignDuNames(): Action[BulkAssignDuNamesRequest] = + secureApi.jsonAction[BulkAssignDuNamesRequest].async { request => + val variantIds = request.body.variantIds + logger.info(s"Bulk assign DU names request for ${variantIds.size} variants") + + // Process sequentially to maintain name ordering + variantIds.foldLeft(Future.successful(Seq.empty[DuNameAssignmentResult])) { (accFuture, variantId) => + accFuture.flatMap { acc => + processAssignDuName(variantId).map(result => acc :+ result) + } + }.map { results => + val succeeded = results.count(_.status == "success") + val failed = results.count(_.status == "error") + val skipped = results.count(_.status == "skipped") + + logger.info(s"Bulk assign DU names completed: $succeeded succeeded, $failed failed, $skipped skipped") + + Ok(Json.toJson(BulkDuNameAssignmentResponse( + total = results.size, + succeeded = succeeded, + failed = failed, + skipped = skipped, + results = results + ))) + } + } + + /** + * Get the next DU name that would be assigned (preview without consuming). + */ + def previewNextDuName(): Action[AnyContent] = secureApi.async { _ => + variantRepository.nextDuName().map { nextName => + Ok(Json.obj( + "nextDuName" -> nextName, + "note" -> "This name has been reserved. Use assignDuName to apply it to a variant." + )) + } + } + + private def processAssignDuName(variantId: Int): Future[DuNameAssignmentResult] = { + variantRepository.findById(variantId).flatMap { + case None => + Future.successful(DuNameAssignmentResult( + variantId = variantId, + duName = None, + previousName = None, + status = "error", + message = Some(s"Variant $variantId not found") + )) + + case Some(variant) => + if (variant.canonicalName.exists(variantRepository.isDuName)) { + Future.successful(DuNameAssignmentResult( + variantId = variantId, + duName = variant.canonicalName, + previousName = variant.canonicalName, + status = "skipped", + message = Some("Variant already has a DU name") + )) + } else { + assignDuNameToVariant(variant) + } + } + } + + private def assignDuNameToVariant(variant: VariantV2): Future[DuNameAssignmentResult] = { + val previousName = variant.canonicalName + + for { + duName <- variantRepository.nextDuName() + updated = variant.copy( + canonicalName = Some(duName), + namingStatus = models.domain.genomics.NamingStatus.Named + ) + success <- variantRepository.update(updated) + } yield { + if (success) { + logger.info(s"Assigned DU name $duName to variant ${variant.variantId.get} (was: ${previousName.getOrElse("unnamed")})") + DuNameAssignmentResult( + variantId = variant.variantId.get, + duName = Some(duName), + previousName = previousName, + status = "success", + message = Some(s"Assigned $duName") + ) + } else { + DuNameAssignmentResult( + variantId = variant.variantId.get, + duName = None, + previousName = previousName, + status = "error", + message = Some("Failed to update variant") + ) + } + } + } } diff --git a/app/controllers/VariantBrowserController.scala b/app/controllers/VariantBrowserController.scala index c17992f..15daa4c 100644 --- a/app/controllers/VariantBrowserController.scala +++ b/app/controllers/VariantBrowserController.scala @@ -1,12 +1,13 @@ package controllers import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.VariantGroup +import models.domain.genomics.VariantV2 +import models.domain.haplogroups.Haplogroup import org.webjars.play.WebJarsUtil import play.api.cache.AsyncCacheApi import play.api.i18n.I18nSupport import play.api.mvc.* -import repositories.{HaplogroupVariantRepository, VariantAliasRepository, VariantRepository} +import repositories.{HaplogroupVariantRepository, VariantV2Repository} import scala.concurrent.duration.* import scala.concurrent.{ExecutionContext, Future} @@ -18,12 +19,11 @@ import scala.concurrent.{ExecutionContext, Future} */ @Singleton class VariantBrowserController @Inject()( - val controllerComponents: ControllerComponents, - variantRepository: VariantRepository, - variantAliasRepository: VariantAliasRepository, - haplogroupVariantRepository: HaplogroupVariantRepository, - cache: AsyncCacheApi - )(using webJarsUtil: WebJarsUtil, ec: ExecutionContext) + val controllerComponents: ControllerComponents, + variantRepository: VariantV2Repository, + haplogroupVariantRepository: HaplogroupVariantRepository, + cache: AsyncCacheApi +)(using webJarsUtil: WebJarsUtil, ec: ExecutionContext) extends BaseController with I18nSupport { private val DefaultPageSize = 25 @@ -31,20 +31,13 @@ class VariantBrowserController @Inject()( // Cache durations - public view can be stale private val SearchCacheDuration = 15.minutes private val DetailCacheDuration = 1.hour - private val TotalCountCacheDuration = 30.minutes /** * Main variant browser page with search functionality. */ - def index(query: Option[String], page: Int, pageSize: Int): Action[AnyContent] = Action.async { + def index(query: Option[String], page: Int, pageSize: Int): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - val offset = (page - 1) * pageSize - for { - (variantGroups, totalCount) <- getCachedSearchResults(query.getOrElse(""), offset, pageSize) - } yield { - val totalPages = Math.max(1, (totalCount + pageSize - 1) / pageSize) - Ok(views.html.variants.browser(variantGroups, query, page, totalPages, pageSize, totalCount)) - } + Ok(views.html.variants.browser(query, page, pageSize)) } /** @@ -54,10 +47,10 @@ class VariantBrowserController @Inject()( implicit request: Request[AnyContent] => val offset = (page - 1) * pageSize for { - (variantGroups, totalCount) <- getCachedSearchResults(query.getOrElse(""), offset, pageSize) + (variants, totalCount) <- getCachedSearchResults(query.getOrElse(""), offset, pageSize) } yield { val totalPages = Math.max(1, (totalCount + pageSize - 1) / pageSize) - Ok(views.html.variants.listFragment(variantGroups, query, page, totalPages, pageSize, totalCount)) + Ok(views.html.variants.listFragment(variants, query, page, totalPages, pageSize, totalCount)) } } @@ -72,12 +65,11 @@ class VariantBrowserController @Inject()( /** * Get cached search results or fetch from database. - * Cache key includes query, offset, and limit for proper pagination caching. */ - private def getCachedSearchResults(query: String, offset: Int, limit: Int): Future[(Seq[VariantGroup], Int)] = { - val cacheKey = s"variant-search:${query.toLowerCase.trim}:$offset:$limit" + private def getCachedSearchResults(query: String, offset: Int, limit: Int): Future[(Seq[VariantV2], Int)] = { + val cacheKey = s"variant-browser:${query.toLowerCase.trim}:$offset:$limit" cache.getOrElseUpdate(cacheKey, SearchCacheDuration) { - variantRepository.searchGroupedPaginated(query, offset, limit) + variantRepository.searchPaginated(query, offset, limit) } } @@ -85,23 +77,15 @@ class VariantBrowserController @Inject()( * Get cached detail panel or fetch from database. */ private def getCachedDetailPanel(id: Int)(implicit request: Request[AnyContent]): Future[Result] = { - val cacheKey = s"variant-detail:$id" + val cacheKey = s"variant-browser-detail:$id" cache.getOrElseUpdate(cacheKey, DetailCacheDuration) { for { - variantOpt <- variantRepository.findByIdWithContig(id) - allVariantsInGroup <- variantOpt match { - case Some(vwc) => - val groupKey = vwc.variant.commonName.orElse(vwc.variant.rsId).getOrElse(s"variant_${id}") - variantRepository.getVariantsByGroupKey(groupKey) - case None => Future.successful(Seq.empty) - } - aliases <- variantAliasRepository.findByVariantId(id) + variantOpt <- variantRepository.findById(id) haplogroups <- haplogroupVariantRepository.getHaplogroupsByVariant(id) } yield { variantOpt match { - case Some(variantWithContig) => - val variantGroup = variantRepository.groupVariants(allVariantsInGroup).headOption - Ok(views.html.variants.detailPanel(variantWithContig, variantGroup, aliases, haplogroups)) + case Some(variant) => + Ok(views.html.variants.detailPanel(variant, haplogroups)) case None => NotFound("Variant not found") } diff --git a/app/controllers/VariantController.scala b/app/controllers/VariantController.scala deleted file mode 100644 index cc03b4e..0000000 --- a/app/controllers/VariantController.scala +++ /dev/null @@ -1,30 +0,0 @@ -package controllers - -import jakarta.inject.{Inject, Singleton} -import models.dal.domain.genomics.Variant -import play.api.libs.json.{Json, OFormat} -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import repositories.VariantRepository - -import scala.concurrent.ExecutionContext - -@Singleton -class VariantController @Inject()( - val controllerComponents: ControllerComponents, - variantRepository: VariantRepository - )(implicit ec: ExecutionContext) extends BaseController { - - implicit val variantFormat: OFormat[Variant] = Json.format[Variant] - - /** - * Searches for variants by name (rsId or commonName). - * - * @param name The name to search for (e.g., "rs123" or "M269"). - * @return A JSON array of matching variants. - */ - def search(name: String): Action[AnyContent] = Action.async { implicit request => - variantRepository.searchByName(name).map { variants => - Ok(Json.toJson(variants)) - } - } -} diff --git a/app/controllers/VariantPublicApiController.scala b/app/controllers/VariantPublicApiController.scala index d97709f..9e4178f 100644 --- a/app/controllers/VariantPublicApiController.scala +++ b/app/controllers/VariantPublicApiController.scala @@ -88,7 +88,7 @@ class VariantPublicApiController @Inject()( ).withHeaders( "Content-Type" -> "application/gzip", "X-Variant-Count" -> metadata.map(_.variantCount.toString).getOrElse("unknown"), - "X-Generated-At" -> metadata.map(_.generatedAt).getOrElse("unknown") + "X-Generated-At" -> metadata.map(_.generatedAt.toString).getOrElse("unknown") ) } else { NotFound(Json.obj( diff --git a/app/models/api/VariantApiModels.scala b/app/models/api/VariantApiModels.scala index 8b7743b..80d5205 100644 --- a/app/models/api/VariantApiModels.scala +++ b/app/models/api/VariantApiModels.scala @@ -180,3 +180,62 @@ case class AliasSourceStatsResponse( object AliasSourceStatsResponse { implicit val format: OFormat[AliasSourceStatsResponse] = Json.format[AliasSourceStatsResponse] } + +// ============================================================================ +// DU Naming Authority Models +// ============================================================================ + +/** + * Request to assign a DU name to a variant. + * Variant is identified by variantId. + * + * @param variantId The variant ID to assign a DU name to + */ +case class AssignDuNameRequest( + variantId: Int +) + +object AssignDuNameRequest { + implicit val format: OFormat[AssignDuNameRequest] = Json.format[AssignDuNameRequest] +} + +/** + * Bulk request to assign DU names to multiple variants. + */ +case class BulkAssignDuNamesRequest( + variantIds: Seq[Int] +) + +object BulkAssignDuNamesRequest { + implicit val format: OFormat[BulkAssignDuNamesRequest] = Json.format[BulkAssignDuNamesRequest] +} + +/** + * Result of a DU name assignment operation. + */ +case class DuNameAssignmentResult( + variantId: Int, + duName: Option[String], + previousName: Option[String], + status: String, + message: Option[String] = None +) + +object DuNameAssignmentResult { + implicit val format: OFormat[DuNameAssignmentResult] = Json.format[DuNameAssignmentResult] +} + +/** + * Response for bulk DU name assignment. + */ +case class BulkDuNameAssignmentResponse( + total: Int, + succeeded: Int, + failed: Int, + skipped: Int, + results: Seq[DuNameAssignmentResult] +) + +object BulkDuNameAssignmentResponse { + implicit val format: OFormat[BulkDuNameAssignmentResponse] = Json.format[BulkDuNameAssignmentResponse] +} diff --git a/app/models/api/genomics/GenomeRegionsManagementModels.scala b/app/models/api/genomics/GenomeRegionsManagementModels.scala index 6b0c228..0fb5d76 100644 --- a/app/models/api/genomics/GenomeRegionsManagementModels.scala +++ b/app/models/api/genomics/GenomeRegionsManagementModels.scala @@ -1,22 +1,30 @@ package models.api.genomics -import play.api.libs.json.{Json, OFormat, Reads} +import play.api.libs.json.{JsValue, Json, OFormat} /** * API DTOs for Genome Regions Management (CRUD operations). */ +case class RegionCoordinateDto( + contig: String, + start: Long, + end: Long +) + +object RegionCoordinateDto { + implicit val format: OFormat[RegionCoordinateDto] = Json.format[RegionCoordinateDto] +} + // ============================================================================ // Request DTOs // ============================================================================ case class CreateGenomeRegionRequest( - genbankContigId: Int, regionType: String, name: Option[String] = None, - startPos: Long, - endPos: Long, - modifier: Option[BigDecimal] = None + coordinates: Map[String, RegionCoordinateDto], + properties: Option[JsValue] = None ) object CreateGenomeRegionRequest { @@ -26,65 +34,14 @@ object CreateGenomeRegionRequest { case class UpdateGenomeRegionRequest( regionType: Option[String] = None, name: Option[String] = None, - startPos: Option[Long] = None, - endPos: Option[Long] = None, - modifier: Option[BigDecimal] = None + coordinates: Option[Map[String, RegionCoordinateDto]] = None, + properties: Option[JsValue] = None ) object UpdateGenomeRegionRequest { implicit val format: OFormat[UpdateGenomeRegionRequest] = Json.format[UpdateGenomeRegionRequest] } -case class CreateCytobandRequest( - genbankContigId: Int, - name: String, - startPos: Long, - endPos: Long, - stain: String -) - -object CreateCytobandRequest { - implicit val format: OFormat[CreateCytobandRequest] = Json.format[CreateCytobandRequest] -} - -case class UpdateCytobandRequest( - name: Option[String] = None, - startPos: Option[Long] = None, - endPos: Option[Long] = None, - stain: Option[String] = None -) - -object UpdateCytobandRequest { - implicit val format: OFormat[UpdateCytobandRequest] = Json.format[UpdateCytobandRequest] -} - -case class CreateStrMarkerRequest( - genbankContigId: Int, - name: String, - startPos: Long, - endPos: Long, - period: Int, - verified: Boolean = false, - note: Option[String] = None -) - -object CreateStrMarkerRequest { - implicit val format: OFormat[CreateStrMarkerRequest] = Json.format[CreateStrMarkerRequest] -} - -case class UpdateStrMarkerRequest( - name: Option[String] = None, - startPos: Option[Long] = None, - endPos: Option[Long] = None, - period: Option[Int] = None, - verified: Option[Boolean] = None, - note: Option[String] = None -) - -object UpdateStrMarkerRequest { - implicit val format: OFormat[UpdateStrMarkerRequest] = Json.format[UpdateStrMarkerRequest] -} - // ============================================================================ // Bulk Request DTOs // ============================================================================ @@ -95,70 +52,22 @@ object BulkCreateGenomeRegionsRequest { implicit val format: OFormat[BulkCreateGenomeRegionsRequest] = Json.format[BulkCreateGenomeRegionsRequest] } -case class BulkCreateCytobandsRequest(cytobands: Seq[CreateCytobandRequest]) - -object BulkCreateCytobandsRequest { - implicit val format: OFormat[BulkCreateCytobandsRequest] = Json.format[BulkCreateCytobandsRequest] -} - -case class BulkCreateStrMarkersRequest(markers: Seq[CreateStrMarkerRequest]) - -object BulkCreateStrMarkersRequest { - implicit val format: OFormat[BulkCreateStrMarkersRequest] = Json.format[BulkCreateStrMarkersRequest] -} - // ============================================================================ -// Response DTOs (with additional contig info) +// Response DTOs // ============================================================================ case class GenomeRegionDetailDto( id: Int, - genbankContigId: Int, - contigName: Option[String], - referenceGenome: Option[String], regionType: String, name: Option[String], - startPos: Long, - endPos: Long, - modifier: Option[BigDecimal] + coordinates: Map[String, RegionCoordinateDto], + properties: JsValue ) object GenomeRegionDetailDto { implicit val format: OFormat[GenomeRegionDetailDto] = Json.format[GenomeRegionDetailDto] } -case class CytobandDetailDto( - id: Int, - genbankContigId: Int, - contigName: Option[String], - referenceGenome: Option[String], - name: String, - startPos: Long, - endPos: Long, - stain: String -) - -object CytobandDetailDto { - implicit val format: OFormat[CytobandDetailDto] = Json.format[CytobandDetailDto] -} - -case class StrMarkerDetailDto( - id: Int, - genbankContigId: Int, - contigName: Option[String], - referenceGenome: Option[String], - name: String, - startPos: Long, - endPos: Long, - period: Int, - verified: Boolean, - note: Option[String] -) - -object StrMarkerDetailDto { - implicit val format: OFormat[StrMarkerDetailDto] = Json.format[StrMarkerDetailDto] -} - // ============================================================================ // List Response DTOs // ============================================================================ @@ -174,28 +83,6 @@ object GenomeRegionListResponse { implicit val format: OFormat[GenomeRegionListResponse] = Json.format[GenomeRegionListResponse] } -case class CytobandListResponse( - cytobands: Seq[CytobandDetailDto], - total: Int, - page: Int, - pageSize: Int -) - -object CytobandListResponse { - implicit val format: OFormat[CytobandListResponse] = Json.format[CytobandListResponse] -} - -case class StrMarkerListResponse( - markers: Seq[StrMarkerDetailDto], - total: Int, - page: Int, - pageSize: Int -) - -object StrMarkerListResponse { - implicit val format: OFormat[StrMarkerListResponse] = Json.format[StrMarkerListResponse] -} - // ============================================================================ // Bulk Operation Response // ============================================================================ @@ -220,4 +107,4 @@ case class BulkOperationResponse( object BulkOperationResponse { implicit val format: OFormat[BulkOperationResponse] = Json.format[BulkOperationResponse] -} +} \ No newline at end of file diff --git a/app/models/dal/DatabaseSchema.scala b/app/models/dal/DatabaseSchema.scala index 766d4b4..bfd88d5 100644 --- a/app/models/dal/DatabaseSchema.scala +++ b/app/models/dal/DatabaseSchema.scala @@ -70,17 +70,22 @@ object DatabaseSchema { val genbankContigs = TableQuery[GenbankContigsTable] val geneAnnotations = TableQuery[GeneAnnotationsTable] val populations = TableQuery[PopulationsTable] - val sequenceFiles = TableQuery[SequenceFilesTable] // Added back + val sequenceFiles = TableQuery[SequenceFilesTable] val sequenceLibraries = TableQuery[SequenceLibrariesTable] val sequencingLabs = TableQuery[SequencingLabsTable] val sequencerInstruments = TableQuery[SequencerInstrumentsTable] val specimenDonors = TableQuery[SpecimenDonorsTable] val validationServices = TableQuery[ValidationServicesTable] - val variants = TableQuery[VariantsTable] - val variantAliases = TableQuery[VariantAliasTable] val testTypeDefinition = TableQuery[TestTypeTable] + // Consolidated variant schema (replaces variant + variant_alias) + val variantsV2 = TableQuery[VariantV2Table] + val haplogroupCharacterStates = TableQuery[HaplogroupCharacterStateTable] + val branchMutations = TableQuery[BranchMutationTable] + val biosampleVariantCalls = TableQuery[BiosampleVariantCallTable] + val strMutationRates = TableQuery[StrMutationRateTable] + // New tables for Atmosphere Lexicon sync val populationBreakdowns = TableQuery[PopulationBreakdownTable] val populationComponents = TableQuery[PopulationComponentTable] @@ -91,8 +96,6 @@ object DatabaseSchema { // Genome regions API tables val genomeRegions = TableQuery[GenomeRegionTable] val genomeRegionVersions = TableQuery[GenomeRegionVersionTable] - val cytobands = TableQuery[CytobandTable] - val strMarkers = TableQuery[StrMarkerTable] } object haplogroups { diff --git a/app/models/dal/MyPostgresProfile.scala b/app/models/dal/MyPostgresProfile.scala index 36b675c..a9f3389 100644 --- a/app/models/dal/MyPostgresProfile.scala +++ b/app/models/dal/MyPostgresProfile.scala @@ -321,6 +321,12 @@ trait MyPostgresProfile extends ExPostgresProfile implicit val haplogroupProvenanceJsonbTypeMapper: JdbcType[HaplogroupProvenance] with BaseTypedType[HaplogroupProvenance] = MappedJdbcType.base[HaplogroupProvenance, JsValue](Json.toJson(_), _.as[HaplogroupProvenance]) + // --- Genome Region JSONB Type Mappers --- + import models.domain.genomics.RegionCoordinate + + implicit val regionCoordinatesJsonbTypeMapper: JdbcType[Map[String, RegionCoordinate]] with BaseTypedType[Map[String, RegionCoordinate]] = + MappedJdbcType.base[Map[String, RegionCoordinate], JsValue](Json.toJson(_), _.as[Map[String, RegionCoordinate]]) + // Declare the name of an aggregate function: val ArrayAgg = new SqlAggregateFunction("array_agg") diff --git a/app/models/dal/domain/genomics/BiosampleVariantCall.scala b/app/models/dal/domain/genomics/BiosampleVariantCall.scala new file mode 100644 index 0000000..643886b --- /dev/null +++ b/app/models/dal/domain/genomics/BiosampleVariantCall.scala @@ -0,0 +1,97 @@ +package models.dal.domain.genomics + +import models.dal.MyPostgresProfile.api.* +import play.api.libs.json.{Json, OFormat} + +import java.time.Instant + +/** + * Represents an observed variant call from a biosample. + * + * This is the input data for ASR - the actual observed states + * from sequenced samples. + * + * @param id Auto-generated primary key + * @param biosampleId FK to the biosample + * @param variantId FK to the variant + * @param observedState The observed state (allele, repeat count, "present"/"absent") + * @param qualityScore Phred-scale quality score + * @param readDepth Number of reads supporting the call + * @param confidence Confidence level: "high", "medium", "low" + * @param source Data source: "ftdna", "yfull", "user_upload", etc. + * @param createdAt When the call was recorded + */ +case class BiosampleVariantCall( + id: Option[Int] = None, + biosampleId: Int, + variantId: Int, + observedState: String, + qualityScore: Option[Int] = None, + readDepth: Option[Int] = None, + confidence: Option[String] = None, + source: Option[String] = None, + createdAt: Instant = Instant.now() +) + +object BiosampleVariantCall { + implicit val format: OFormat[BiosampleVariantCall] = Json.format[BiosampleVariantCall] + + object Confidence { + val HIGH = "high" + val MEDIUM = "medium" + val LOW = "low" + } +} + +/** + * Slick table definition for biosample_variant_call. + */ +class BiosampleVariantCallTable(tag: Tag) + extends Table[BiosampleVariantCall](tag, Some("public"), "biosample_variant_call") { + + def id = column[Int]("id", O.PrimaryKey, O.AutoInc) + + def biosampleId = column[Int]("biosample_id") + + def variantId = column[Int]("variant_id") + + def observedState = column[String]("observed_state") + + def qualityScore = column[Option[Int]]("quality_score") + + def readDepth = column[Option[Int]]("read_depth") + + def confidence = column[Option[String]]("confidence") + + def source = column[Option[String]]("source") + + def createdAt = column[Instant]("created_at") + + def * = ( + id.?, + biosampleId, + variantId, + observedState, + qualityScore, + readDepth, + confidence, + source, + createdAt + ).mapTo[BiosampleVariantCall] + + // Note: biosample FK references public.biosample table + // We don't define the FK here to avoid circular dependencies + // The DB-level FK constraint handles referential integrity + + def variantFK = foreignKey( + "biosample_variant_call_variant_fk", + variantId, + TableQuery[VariantV2Table] + )(_.variantId, onDelete = ForeignKeyAction.Cascade) + + def uniqueBiosampleVariant = index( + "idx_biosample_variant_call_unique", + (biosampleId, variantId), + unique = true + ) +} diff --git a/app/models/dal/domain/genomics/BranchMutation.scala b/app/models/dal/domain/genomics/BranchMutation.scala new file mode 100644 index 0000000..ad345a4 --- /dev/null +++ b/app/models/dal/domain/genomics/BranchMutation.scala @@ -0,0 +1,93 @@ +package models.dal.domain.genomics + +import models.dal.MyPostgresProfile.api.* +import models.dal.domain.haplogroups.HaplogroupsTable +import play.api.libs.json.{Json, OFormat} + +/** + * Represents a state transition along a tree branch. + * + * Records where mutations occurred in the phylogenetic tree, + * tracking the change from parent to child haplogroup state. + * + * @param id Auto-generated primary key + * @param variantId FK to the variant that changed + * @param parentHaplogroupId FK to parent haplogroup node + * @param childHaplogroupId FK to child haplogroup node + * @param fromState State at parent node (e.g., "G", "15") + * @param toState State at child node (e.g., "A", "16") + * @param stepDirection For STRs: +1 = expansion, -1 = contraction; NULL for SNPs + * @param confidence Confidence from ASR algorithm + */ +case class BranchMutation( + id: Option[Int] = None, + variantId: Int, + parentHaplogroupId: Int, + childHaplogroupId: Int, + fromState: String, + toState: String, + stepDirection: Option[Int] = None, + confidence: Option[BigDecimal] = None +) + +object BranchMutation { + implicit val format: OFormat[BranchMutation] = Json.format[BranchMutation] +} + +/** + * Slick table definition for branch_mutation. + */ +class BranchMutationTable(tag: Tag) + extends Table[BranchMutation](tag, Some("public"), "branch_mutation") { + + def id = column[Int]("id", O.PrimaryKey, O.AutoInc) + + def variantId = column[Int]("variant_id") + + def parentHaplogroupId = column[Int]("parent_haplogroup_id") + + def childHaplogroupId = column[Int]("child_haplogroup_id") + + def fromState = column[String]("from_state") + + def toState = column[String]("to_state") + + def stepDirection = column[Option[Int]]("step_direction") + + def confidence = column[Option[BigDecimal]]("confidence") + + def * = ( + id.?, + variantId, + parentHaplogroupId, + childHaplogroupId, + fromState, + toState, + stepDirection, + confidence + ).mapTo[BranchMutation] + + def variantFK = foreignKey( + "branch_mutation_variant_fk", + variantId, + TableQuery[VariantV2Table] + )(_.variantId, onDelete = ForeignKeyAction.Cascade) + + def parentHaplogroupFK = foreignKey( + "branch_mutation_parent_haplogroup_fk", + parentHaplogroupId, + TableQuery[HaplogroupsTable] + )(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) + + def childHaplogroupFK = foreignKey( + "branch_mutation_child_haplogroup_fk", + childHaplogroupId, + TableQuery[HaplogroupsTable] + )(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) + + def uniqueBranchVariant = index( + "idx_branch_mutation_unique", + (variantId, parentHaplogroupId, childHaplogroupId), + unique = true + ) +} diff --git a/app/models/dal/domain/genomics/GenomeRegionTables.scala b/app/models/dal/domain/genomics/GenomeRegionTables.scala index 0156dcb..9d328d4 100644 --- a/app/models/dal/domain/genomics/GenomeRegionTables.scala +++ b/app/models/dal/domain/genomics/GenomeRegionTables.scala @@ -1,8 +1,8 @@ package models.dal.domain.genomics import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{Cytoband, GenomeRegion, GenomeRegionVersion, StrMarker} - +import models.domain.genomics.{GenomeRegion, GenomeRegionVersion, RegionCoordinate} +import play.api.libs.json.JsValue import java.time.Instant /** @@ -19,64 +19,23 @@ class GenomeRegionVersionTable(tag: Tag) extends Table[GenomeRegionVersion](tag, } /** - * Slick table definition for genome_region table. - * Stores structural regions (centromere, telomere, PAR, XTR, etc.). + * Slick table definition for genome_region_v2 table. + * Stores structural regions (centromere, telomere, PAR, XTR, etc.) and Cytobands. + * Supports multi-reference coordinates via JSONB. */ -class GenomeRegionTable(tag: Tag) extends Table[GenomeRegion](tag, "genome_region") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def genbankContigId = column[Int]("genbank_contig_id") +class GenomeRegionTable(tag: Tag) extends Table[GenomeRegion](tag, "genome_region_v2") { + def id = column[Int]("region_id", O.PrimaryKey, O.AutoInc) // Column name changed to region_id def regionType = column[String]("region_type") def name = column[Option[String]]("name") - def startPos = column[Long]("start_pos") - def endPos = column[Long]("end_pos") - def modifier = column[Option[BigDecimal]]("modifier") - - def * = (id.?, genbankContigId, regionType, name, startPos, endPos, modifier).mapTo[GenomeRegion] - - def genbankContigFk = foreignKey("genome_region_genbank_contig_fk", genbankContigId, - TableQuery[GenbankContigsTable])(_.genbankContigId, onDelete = ForeignKeyAction.Cascade) - - def idxContig = index("idx_genome_region_contig", genbankContigId) -} - -/** - * Slick table definition for cytoband table. - * Stores cytoband annotations for chromosome ideogram display. - */ -class CytobandTable(tag: Tag) extends Table[Cytoband](tag, "cytoband") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def genbankContigId = column[Int]("genbank_contig_id") - def name = column[String]("name") - def startPos = column[Long]("start_pos") - def endPos = column[Long]("end_pos") - def stain = column[String]("stain") - - def * = (id.?, genbankContigId, name, startPos, endPos, stain).mapTo[Cytoband] - - def genbankContigFk = foreignKey("cytoband_genbank_contig_fk", genbankContigId, - TableQuery[GenbankContigsTable])(_.genbankContigId, onDelete = ForeignKeyAction.Cascade) - - def idxContig = index("idx_cytoband_contig", genbankContigId) -} - -/** - * Slick table definition for str_marker table. - * Stores STR marker positions for Y-DNA analysis. - */ -class StrMarkerTable(tag: Tag) extends Table[StrMarker](tag, "str_marker") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def genbankContigId = column[Int]("genbank_contig_id") - def name = column[String]("name") - def startPos = column[Long]("start_pos") - def endPos = column[Long]("end_pos") - def period = column[Int]("period") - def verified = column[Boolean]("verified") - def note = column[Option[String]]("note") - - def * = (id.?, genbankContigId, name, startPos, endPos, period, verified, note).mapTo[StrMarker] - - def genbankContigFk = foreignKey("str_marker_genbank_contig_fk", genbankContigId, - TableQuery[GenbankContigsTable])(_.genbankContigId, onDelete = ForeignKeyAction.Cascade) - - def idxContig = index("idx_str_marker_contig", genbankContigId) -} + def coordinates = column[JsValue]("coordinates") + def properties = column[JsValue]("properties") + + def * = (id.?, regionType, name, coordinates, properties).<> ( + (t: (Option[Int], String, Option[String], JsValue, JsValue)) => GenomeRegion( + t._1, t._2, t._3, t._4.as[Map[String, RegionCoordinate]], t._5 + ), + (r: GenomeRegion) => Some((r.id, r.regionType, r.name, play.api.libs.json.Json.toJson(r.coordinates), r.properties)) + ) + + // No Foreign Key to Contig anymore, as coordinates are embedded +} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/HaplogroupCharacterState.scala b/app/models/dal/domain/genomics/HaplogroupCharacterState.scala new file mode 100644 index 0000000..19bf668 --- /dev/null +++ b/app/models/dal/domain/genomics/HaplogroupCharacterState.scala @@ -0,0 +1,92 @@ +package models.dal.domain.genomics + +import models.dal.MyPostgresProfile.api.* +import models.dal.domain.haplogroups.HaplogroupsTable +import play.api.libs.json.{JsValue, Json, OFormat} + +import java.time.Instant + +/** + * Represents an ASR-reconstructed character state at a haplogroup node. + * + * This table stores the inferred ancestral state for each variant at each + * haplogroup in the tree. Used for: + * - SNPs: ancestral vs derived allele + * - STRs: inferred repeat count (modal haplotype) + * - SVs: presence/absence, orientation, copy number + * + * @param id Auto-generated primary key + * @param haplogroupId FK to the haplogroup node + * @param variantId FK to the variant + * @param inferredState The reconstructed state (allele, count, "present"/"absent", etc.) + * @param confidence Confidence score from ASR algorithm (0.0-1.0) + * @param stateProbabilities JSONB probability distribution for uncertain reconstructions + * @param algorithm ASR method used: "parsimony", "ml", "bayesian" + * @param reconstructedAt Timestamp of reconstruction + */ +case class HaplogroupCharacterState( + id: Option[Int] = None, + haplogroupId: Int, + variantId: Int, + inferredState: String, + confidence: Option[BigDecimal] = None, + stateProbabilities: Option[JsValue] = None, + algorithm: Option[String] = None, + reconstructedAt: Instant = Instant.now() +) + +object HaplogroupCharacterState { + implicit val format: OFormat[HaplogroupCharacterState] = Json.format[HaplogroupCharacterState] +} + +/** + * Slick table definition for haplogroup_character_state. + */ +class HaplogroupCharacterStateTable(tag: Tag) + extends Table[HaplogroupCharacterState](tag, Some("public"), "haplogroup_character_state") { + + def id = column[Int]("id", O.PrimaryKey, O.AutoInc) + + def haplogroupId = column[Int]("haplogroup_id") + + def variantId = column[Int]("variant_id") + + def inferredState = column[String]("inferred_state") + + def confidence = column[Option[BigDecimal]]("confidence") + + def stateProbabilities = column[Option[JsValue]]("state_probabilities") + + def algorithm = column[Option[String]]("algorithm") + + def reconstructedAt = column[Instant]("reconstructed_at") + + def * = ( + id.?, + haplogroupId, + variantId, + inferredState, + confidence, + stateProbabilities, + algorithm, + reconstructedAt + ).mapTo[HaplogroupCharacterState] + + def haplogroupFK = foreignKey( + "haplogroup_character_state_haplogroup_fk", + haplogroupId, + TableQuery[HaplogroupsTable] + )(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) + + def variantFK = foreignKey( + "haplogroup_character_state_variant_fk", + variantId, + TableQuery[VariantV2Table] + )(_.variantId, onDelete = ForeignKeyAction.Cascade) + + def uniqueHaplogroupVariant = index( + "idx_character_state_unique", + (haplogroupId, variantId), + unique = true + ) +} diff --git a/app/models/dal/domain/genomics/StrMutationRate.scala b/app/models/dal/domain/genomics/StrMutationRate.scala new file mode 100644 index 0000000..b8d920a --- /dev/null +++ b/app/models/dal/domain/genomics/StrMutationRate.scala @@ -0,0 +1,145 @@ +package models.dal.domain.genomics + +import models.dal.MyPostgresProfile.api.* +import play.api.libs.json.{Json, OFormat} + +import java.time.Instant + +/** + * Per-marker STR mutation rates for ASR and age estimation. + * + * Sources include Ballantyne 2010, Willems 2016, and other published studies. + * These rates are critical for accurate branch age estimation using the + * stepwise mutation model. + * + * @param id Auto-generated primary key + * @param markerName STR marker name (e.g., DYS456, DYS389I) + * @param panelNames Panels containing this marker (PowerPlex, YHRD, BigY, etc.) + * @param mutationRate Mutations per generation + * @param mutationRateLower 95% CI lower bound + * @param mutationRateUpper 95% CI upper bound + * @param omegaPlus Probability of expansion (default 0.5) + * @param omegaMinus Probability of contraction (default 0.5) + * @param multiStepRate Combined rate for multi-step mutations (omega_2 + omega_3 + ...) + * @param source Publication source (e.g., "Ballantyne 2010") + * @param createdAt When the rate was recorded + */ +case class StrMutationRate( + id: Option[Int] = None, + markerName: String, + panelNames: Option[List[String]] = None, + mutationRate: BigDecimal, + mutationRateLower: Option[BigDecimal] = None, + mutationRateUpper: Option[BigDecimal] = None, + omegaPlus: Option[BigDecimal] = Some(BigDecimal("0.5")), + omegaMinus: Option[BigDecimal] = Some(BigDecimal("0.5")), + multiStepRate: Option[BigDecimal] = None, + source: Option[String] = None, + createdAt: Instant = Instant.now() +) { + + /** + * Check if mutation is biased toward expansion. + */ + def isExpansionBiased: Boolean = + omegaPlus.getOrElse(BigDecimal("0.5")) > BigDecimal("0.5") + + /** + * Check if mutation is biased toward contraction. + */ + def isContractionBiased: Boolean = + omegaMinus.getOrElse(BigDecimal("0.5")) > BigDecimal("0.5") + + /** + * Get the symmetry of mutation direction (1.0 = perfectly symmetric). + * Values < 1.0 indicate directional bias. + */ + def directionalSymmetry: BigDecimal = { + val plus = omegaPlus.getOrElse(BigDecimal("0.5")) + val minus = omegaMinus.getOrElse(BigDecimal("0.5")) + if (plus >= minus) minus / plus else plus / minus + } +} + +object StrMutationRate { + implicit val format: OFormat[StrMutationRate] = Json.format[StrMutationRate] + + /** + * Create a rate entry with symmetric mutation probability. + */ + def symmetric( + markerName: String, + rate: BigDecimal, + source: String + ): StrMutationRate = StrMutationRate( + markerName = markerName, + mutationRate = rate, + source = Some(source) + ) + + /** + * Create a rate entry with directional bias. + */ + def withBias( + markerName: String, + rate: BigDecimal, + omegaPlus: BigDecimal, + omegaMinus: BigDecimal, + source: String + ): StrMutationRate = StrMutationRate( + markerName = markerName, + mutationRate = rate, + omegaPlus = Some(omegaPlus), + omegaMinus = Some(omegaMinus), + source = Some(source) + ) +} + +/** + * Slick table definition for str_mutation_rate. + */ +class StrMutationRateTable(tag: Tag) + extends Table[StrMutationRate](tag, Some("public"), "str_mutation_rate") { + + def id = column[Int]("id", O.PrimaryKey, O.AutoInc) + + def markerName = column[String]("marker_name") + + def panelNames = column[Option[List[String]]]("panel_names") + + def mutationRate = column[BigDecimal]("mutation_rate") + + def mutationRateLower = column[Option[BigDecimal]]("mutation_rate_lower") + + def mutationRateUpper = column[Option[BigDecimal]]("mutation_rate_upper") + + def omegaPlus = column[Option[BigDecimal]]("omega_plus") + + def omegaMinus = column[Option[BigDecimal]]("omega_minus") + + def multiStepRate = column[Option[BigDecimal]]("multi_step_rate") + + def source = column[Option[String]]("source") + + def createdAt = column[Instant]("created_at") + + def * = ( + id.?, + markerName, + panelNames, + mutationRate, + mutationRateLower, + mutationRateUpper, + omegaPlus, + omegaMinus, + multiStepRate, + source, + createdAt + ).mapTo[StrMutationRate] + + def uniqueMarkerName = index( + "idx_str_mutation_rate_marker_unique", + markerName, + unique = true + ) +} diff --git a/app/models/dal/domain/genomics/Variant.scala b/app/models/dal/domain/genomics/Variant.scala deleted file mode 100644 index da10b4b..0000000 --- a/app/models/dal/domain/genomics/Variant.scala +++ /dev/null @@ -1,25 +0,0 @@ -package models.dal.domain.genomics - -/** - * Represents a genetic variant with detailed information about its genomic location, reference allele, - * alternate allele, variant type, and optional metadata such as identifiers and common names. - * - * @param variantId An optional unique identifier for the variant, used internally for tracking purposes. - * @param genbankContigId The unique identifier for the genomic contig in GenBank where this variant is located. - * @param position The position of the variant on the genomic contig. - * @param referenceAllele The reference allele at the specific genomic position. - * @param alternateAllele The alternate allele representing the variant at the specific position. - * @param variantType The type of the variant (e.g., SNP, insertion, deletion). - * @param rsId An optional rs identifier (dbSNP ID) associated with this variant, if available. - * @param commonName An optional common name or description for the variant. - */ -case class Variant( - variantId: Option[Int] = None, - genbankContigId: Int, - position: Int, - referenceAllele: String, - alternateAllele: String, - variantType: String, - rsId: Option[String], - commonName: Option[String] - ) diff --git a/app/models/dal/domain/genomics/VariantAlias.scala b/app/models/dal/domain/genomics/VariantAlias.scala deleted file mode 100644 index 0f6deec..0000000 --- a/app/models/dal/domain/genomics/VariantAlias.scala +++ /dev/null @@ -1,43 +0,0 @@ -package models.dal.domain.genomics - -import java.time.LocalDateTime - -/** - * Represents an alternative name (alias) for a variant. - * - * Variants are often known by multiple names across different research groups and databases: - * - ISOGG names (e.g., M269, P312) - * - YFull names (e.g., BY12345) - * - FTDNA names - * - dbSNP rsIDs (e.g., rs9786076) - * - Publication-specific identifiers - * - * This model allows tracking all known names for a variant while maintaining - * a primary display name. - * - * @param id Unique identifier for this alias record - * @param variantId The variant this alias belongs to - * @param aliasType Type of alias: 'common_name', 'rs_id', 'isogg', 'yfull', 'ftdna', etc. - * @param aliasValue The actual alias value (e.g., "M269", "rs9786076") - * @param source Where this alias came from: 'ybrowse', 'isogg', 'curator', 'migration', etc. - * @param isPrimary Whether this is the primary alias for its type (used for display) - * @param createdAt When this alias was recorded - */ -case class VariantAlias( - id: Option[Int] = None, - variantId: Int, - aliasType: String, - aliasValue: String, - source: Option[String] = None, - isPrimary: Boolean = false, - createdAt: LocalDateTime = LocalDateTime.now() -) - -object VariantAliasType { - val CommonName = "common_name" - val RsId = "rs_id" - val Isogg = "isogg" - val YFull = "yfull" - val Ftdna = "ftdna" - val Publication = "publication" -} diff --git a/app/models/dal/domain/genomics/VariantAliasTable.scala b/app/models/dal/domain/genomics/VariantAliasTable.scala deleted file mode 100644 index fa43291..0000000 --- a/app/models/dal/domain/genomics/VariantAliasTable.scala +++ /dev/null @@ -1,33 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* - -import java.time.LocalDateTime - -/** - * Represents the `variant_alias` table in the database, which stores alternative names - * for genetic variants from different sources (YBrowse, ISOGG, YFull, publications, etc.). - * - * @param tag A Slick `Tag` object used to scope and reference the table within a database schema. - */ -class VariantAliasTable(tag: Tag) extends Table[VariantAlias](tag, Some("public"), "variant_alias") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def variantId = column[Int]("variant_id") - - def aliasType = column[String]("alias_type") - - def aliasValue = column[String]("alias_value") - - def source = column[Option[String]]("source") - - def isPrimary = column[Boolean]("is_primary") - - def createdAt = column[LocalDateTime]("created_at") - - def * = (id.?, variantId, aliasType, aliasValue, source, isPrimary, createdAt).mapTo[VariantAlias] - - def variantFK = foreignKey("variant_alias_variant_fk", variantId, TableQuery[VariantsTable])(_.variantId, onDelete = ForeignKeyAction.Cascade) - - def uniqueAlias = index("variant_alias_unique", (variantId, aliasType, aliasValue), unique = true) -} diff --git a/app/models/dal/domain/genomics/VariantV2Table.scala b/app/models/dal/domain/genomics/VariantV2Table.scala new file mode 100644 index 0000000..5ab31f8 --- /dev/null +++ b/app/models/dal/domain/genomics/VariantV2Table.scala @@ -0,0 +1,86 @@ +package models.dal.domain.genomics + +import models.dal.MyPostgresProfile.api.* +import models.dal.domain.haplogroups.HaplogroupsTable +import models.domain.genomics.{MutationType, NamingStatus, VariantV2} +import play.api.libs.json.JsValue +import slick.ast.BaseTypedType +import slick.jdbc.JdbcType + +import java.time.Instant + +/** + * Slick table definition for the `variant_v2` table. + * + * This table stores consolidated variants with JSONB columns for coordinates + * (supporting multiple reference genomes) and aliases (supporting multiple + * naming sources). + * + * Schema: + * - One row per logical variant (not per reference genome) + * - JSONB `coordinates` contains position + alleles per assembly + * - JSONB `aliases` contains all known names grouped by source + * - `defining_haplogroup_id` distinguishes parallel mutations + */ +class VariantV2Table(tag: Tag) extends Table[VariantV2](tag, Some("public"), "variant_v2") { + + // MappedColumnType for MutationType enum + implicit val mutationTypeMapper: JdbcType[MutationType] with BaseTypedType[MutationType] = + MappedColumnType.base[MutationType, String]( + _.dbValue, + MutationType.fromStringOrDefault(_) + ) + + // MappedColumnType for NamingStatus enum + implicit val namingStatusMapper: JdbcType[NamingStatus] with BaseTypedType[NamingStatus] = + MappedColumnType.base[NamingStatus, String]( + _.dbValue, + NamingStatus.fromStringOrDefault(_) + ) + + def variantId = column[Int]("variant_id", O.PrimaryKey, O.AutoInc) + + def canonicalName = column[Option[String]]("canonical_name") + + def mutationType = column[MutationType]("mutation_type") + + def namingStatus = column[NamingStatus]("naming_status") + + def aliases = column[JsValue]("aliases") + + def coordinates = column[JsValue]("coordinates") + + def definingHaplogroupId = column[Option[Int]]("defining_haplogroup_id") + + def evidence = column[JsValue]("evidence") + + def primers = column[JsValue]("primers") + + def notes = column[Option[String]]("notes") + + def createdAt = column[Instant]("created_at") + + def updatedAt = column[Instant]("updated_at") + + def * = ( + variantId.?, + canonicalName, + mutationType, + namingStatus, + aliases, + coordinates, + definingHaplogroupId, + evidence, + primers, + notes, + createdAt, + updatedAt + ).mapTo[VariantV2] + + // Foreign key to haplogroup for parallel mutation disambiguation + def definingHaplogroupFK = foreignKey( + "variant_v2_defining_haplogroup_fk", + definingHaplogroupId, + TableQuery[HaplogroupsTable] + )(_.haplogroupId.?, onDelete = ForeignKeyAction.SetNull) +} diff --git a/app/models/dal/domain/genomics/VariantsTable.scala b/app/models/dal/domain/genomics/VariantsTable.scala deleted file mode 100644 index 314be4c..0000000 --- a/app/models/dal/domain/genomics/VariantsTable.scala +++ /dev/null @@ -1,62 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.domain.genomics.GenbankContigsTable -import models.domain.genomics.GenbankContig -import models.dal.MyPostgresProfile.api.* - -/** - * Represents the `variant` table in the database, which stores information about genetic variants. - * - * This table includes genomic variant details such as position, alleles, type, optional identifiers, and - * associated metadata. It is linked to the `GenbankContigsTable` through a foreign key and enforces - * uniqueness constraints on specific columns to ensure data integrity. - * - * @constructor Creates an instance of the `VariantsTable` class. - * @param tag A Slick `Tag` object used to scope and reference the table within a database schema. - * - * Schema details: - * - Table name: `variant` - * - Columns: - * - `variantId`: The primary key for the table, an auto-incrementing integer serving as a unique identifier for each variant. - * - `genbankContigId`: A foreign key referencing the `GenbankContigsTable`, indicating the genomic contig containing the variant. - * - `position`: The position of the variant on the genomic contig. - * - `referenceAllele`: The reference allele observed at the variant's position. - * - `alternateAllele`: The alternate allele representing the variant. - * - `variantType`: Specifies the type of the variant (e.g., SNP, insertion, deletion). - * - `rsId`: An optional column for the variant's dbSNP identifier (`rs` ID). - * - `commonName`: An optional column for a common name or description of the variant. - * - Primary key: - * - `variantId` - * - Foreign keys: - * - `genbankContigFK`: References the `genbank_contig_id` column in the `GenbankContigsTable`. Cascades deletions. - * - Indexes and constraints: - * - `uniqueVariant`: Enforces a unique constraint on the combination of `genbankContigId`, `position`, - * `referenceAllele`, and `alternateAllele`. - * - * Mapping: - * - Maps to the `Variant` case class, representing the domain model for a variant. The mapping includes all columns, - * with `variantId` being optional. - */ -class VariantsTable(tag: Tag) extends Table[Variant](tag, Some("public"), "variant") { - def variantId = column[Int]("variant_id", O.PrimaryKey, O.AutoInc) - - def genbankContigId = column[Int]("genbank_contig_id") - - def position = column[Int]("position") - - def referenceAllele = column[String]("reference_allele") - - def alternateAllele = column[String]("alternate_allele") - - def variantType = column[String]("variant_type") - - def rsId = column[Option[String]]("rs_id") - - def commonName = column[Option[String]]("common_name") - - def * = (variantId.?, genbankContigId, position, referenceAllele, alternateAllele, variantType, rsId, commonName).mapTo[Variant] - - def genbankContigFK = foreignKey("genbank_contig_fk", genbankContigId, TableQuery[GenbankContigsTable])(_.genbankContigId, onDelete = ForeignKeyAction.Cascade) - - def uniqueVariant = index("unique_variant", (genbankContigId, position, referenceAllele, alternateAllele), unique = true) -} diff --git a/app/models/dal/domain/haplogroups/HaplogroupVariantsTable.scala b/app/models/dal/domain/haplogroups/HaplogroupVariantsTable.scala index 2e0de3d..9d8f9c5 100644 --- a/app/models/dal/domain/haplogroups/HaplogroupVariantsTable.scala +++ b/app/models/dal/domain/haplogroups/HaplogroupVariantsTable.scala @@ -1,7 +1,7 @@ package models.dal.domain.haplogroups import models.dal.MyPostgresProfile.api.* -import models.dal.domain.genomics.{Variant, VariantsTable} +import models.dal.domain.genomics.VariantV2Table import models.domain.haplogroups.{Haplogroup, HaplogroupVariant} /** @@ -41,8 +41,8 @@ class HaplogroupVariantsTable(tag: Tag) extends Table[HaplogroupVariant](tag, So def haplogroupFK = foreignKey("haplogroup_fk", haplogroupId, TableQuery[HaplogroupsTable])(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) - // Explicitly specify the schema for VariantsTable which is in the public schema - def variantFK = foreignKey("variant_fk", variantId, TableQuery[VariantsTable])(_.variantId, onDelete = ForeignKeyAction.Cascade) + // Foreign key to variant_v2 table + def variantFK = foreignKey("variant_fk", variantId, TableQuery[VariantV2Table])(_.variantId, onDelete = ForeignKeyAction.Cascade) def uniqueHaplogroupVariant = index("unique_haplogroup_variant", (haplogroupId, variantId), unique = true) } diff --git a/app/models/domain/genomics/Cytoband.scala b/app/models/domain/genomics/Cytoband.scala deleted file mode 100644 index db4bee7..0000000 --- a/app/models/domain/genomics/Cytoband.scala +++ /dev/null @@ -1,21 +0,0 @@ -package models.domain.genomics - -/** - * Represents a cytoband annotation for chromosome ideogram display. - * Cytobands are banding patterns visible under microscopy after Giemsa staining. - * - * @param id Optional unique identifier for the cytoband. - * @param genbankContigId The ID of the associated GenBank contig (chromosome). - * @param name Band name (e.g., "p11.32", "q11.21"). - * @param startPos Start position (1-based, inclusive). - * @param endPos End position (1-based, inclusive). - * @param stain Giemsa stain pattern: gneg, gpos25, gpos50, gpos75, gpos100, acen, gvar, stalk. - */ -case class Cytoband( - id: Option[Int] = None, - genbankContigId: Int, - name: String, - startPos: Long, - endPos: Long, - stain: String -) diff --git a/app/models/domain/genomics/GenomeRegion.scala b/app/models/domain/genomics/GenomeRegion.scala index 1b48f8c..d086f4d 100644 --- a/app/models/domain/genomics/GenomeRegion.scala +++ b/app/models/domain/genomics/GenomeRegion.scala @@ -1,23 +1,38 @@ package models.domain.genomics +import play.api.libs.json.{Format, JsValue, Json} + /** - * Represents a structural region within a chromosome, such as centromeres, telomeres, - * pseudoautosomal regions (PAR), X-transposed regions (XTR), ampliconic regions, etc. + * Coordinate information for a specific reference genome build. + */ +case class RegionCoordinate( + contig: String, + start: Long, + end: Long +) + +object RegionCoordinate { + implicit val format: Format[RegionCoordinate] = Json.format[RegionCoordinate] +} + +/** + * Represents a structural region within a chromosome (or a cytoband). + * Supports multi-reference coordinates. * - * @param id Optional unique identifier for the genome region. - * @param genbankContigId The ID of the associated GenBank contig (chromosome). - * @param regionType The type of region (e.g., "Centromere", "Telomere_P", "PAR1", "XTR"). - * @param name Optional name for named regions (e.g., "P1" for palindrome 1). - * @param startPos Start position (1-based, inclusive). - * @param endPos End position (1-based, inclusive). - * @param modifier Optional quality modifier (0.1-1.0) indicating confidence in variant calls within this region. + * @param id Optional unique identifier (region_id). + * @param regionType The type of region (e.g., "Centromere", "Cytoband", "PAR1"). + * @param name Optional name (e.g., "p11.32" for cytobands, "P1" for palindromes). + * @param coordinates Map of BuildName -> Coordinate (e.g., "GRCh38" -> {contig: "chrY", start: ...}). + * @param properties Additional properties as JSON (e.g., {"stain": "gpos75", "modifier": 0.5}). */ case class GenomeRegion( id: Option[Int] = None, - genbankContigId: Int, regionType: String, name: Option[String], - startPos: Long, - endPos: Long, - modifier: Option[BigDecimal] + coordinates: Map[String, RegionCoordinate], + properties: JsValue ) + +object GenomeRegion { + implicit val format: Format[GenomeRegion] = Json.format[GenomeRegion] +} \ No newline at end of file diff --git a/app/models/domain/genomics/MutationType.scala b/app/models/domain/genomics/MutationType.scala new file mode 100644 index 0000000..dbd7b05 --- /dev/null +++ b/app/models/domain/genomics/MutationType.scala @@ -0,0 +1,94 @@ +package models.domain.genomics + +import play.api.libs.json.* + +/** + * Represents the type of genetic mutation. + * + * Each mutation type has associated properties: + * - `dbValue`: The string stored in the database + * - `category`: Classification as Point, Repeat, or Structural + * - `displayName`: Human-readable name for UI display + */ +enum MutationType(val dbValue: String, val category: MutationCategory, val displayName: String) { + // Point mutations - single nucleotide or small changes + case SNP extends MutationType("SNP", MutationCategory.Point, "Single Nucleotide Polymorphism") + case INDEL extends MutationType("INDEL", MutationCategory.Point, "Insertion/Deletion") + case MNP extends MutationType("MNP", MutationCategory.Point, "Multi-Nucleotide Polymorphism") + + // Repeat variations + case STR extends MutationType("STR", MutationCategory.Repeat, "Short Tandem Repeat") + + // Structural variants - larger genomic rearrangements + case DEL extends MutationType("DEL", MutationCategory.Structural, "Deletion") + case DUP extends MutationType("DUP", MutationCategory.Structural, "Duplication") + case INS extends MutationType("INS", MutationCategory.Structural, "Insertion") + case INV extends MutationType("INV", MutationCategory.Structural, "Inversion") + case CNV extends MutationType("CNV", MutationCategory.Structural, "Copy Number Variant") + case TRANS extends MutationType("TRANS", MutationCategory.Structural, "Translocation") + + override def toString: String = dbValue + + def isPointMutation: Boolean = category == MutationCategory.Point + def isRepeat: Boolean = category == MutationCategory.Repeat + def isStructural: Boolean = category == MutationCategory.Structural +} + +/** + * Category of mutation types. + */ +enum MutationCategory { + case Point, Repeat, Structural +} + +object MutationType { + /** + * Parse a database string value to MutationType. + */ + def fromString(str: String): Option[MutationType] = str.toUpperCase match { + case "SNP" => Some(SNP) + case "INDEL" => Some(INDEL) + case "MNP" => Some(MNP) + case "STR" => Some(STR) + case "DEL" => Some(DEL) + case "DUP" => Some(DUP) + case "INS" => Some(INS) + case "INV" => Some(INV) + case "CNV" => Some(CNV) + case "TRANS" => Some(TRANS) + case _ => None + } + + /** + * Parse with a default fallback. + */ + def fromStringOrDefault(str: String, default: MutationType = SNP): MutationType = + fromString(str).getOrElse(default) + + /** + * All point mutation types. + */ + val pointTypes: Set[MutationType] = Set(SNP, INDEL, MNP) + + /** + * All structural variant types. + */ + val structuralTypes: Set[MutationType] = Set(DEL, DUP, INS, INV, CNV, TRANS) + + /** + * All mutation types. + */ + val allTypes: Set[MutationType] = MutationType.values.toSet + + // JSON serialization + implicit val reads: Reads[MutationType] = Reads.StringReads.flatMap { str => + fromString(str) match { + case Some(mt) => Reads.pure(mt) + case None => Reads.failed(s"Invalid MutationType: $str") + } + } + + implicit val writes: Writes[MutationType] = Writes.StringWrites.contramap(_.dbValue) + + implicit val format: Format[MutationType] = Format(reads, writes) +} diff --git a/app/models/domain/genomics/NamingStatus.scala b/app/models/domain/genomics/NamingStatus.scala new file mode 100644 index 0000000..2a3e8c3 --- /dev/null +++ b/app/models/domain/genomics/NamingStatus.scala @@ -0,0 +1,60 @@ +package models.domain.genomics + +import play.api.libs.json.* + +/** + * Represents the naming status of a variant. + * + * Each status has associated properties: + * - `dbValue`: The string stored in the database + * - `displayName`: Human-readable name for UI display + * - `isNamed`: Whether the variant has an official name + */ +enum NamingStatus(val dbValue: String, val displayName: String, val isNamed: Boolean) { + /** + * Variant has no official name - typically identified only by coordinates. + */ + case Unnamed extends NamingStatus("UNNAMED", "Unnamed", false) + + /** + * Variant has been submitted for naming review but not yet approved. + */ + case PendingReview extends NamingStatus("PENDING_REVIEW", "Pending Review", false) + + /** + * Variant has an official canonical name. + */ + case Named extends NamingStatus("NAMED", "Named", true) + + override def toString: String = dbValue +} + +object NamingStatus { + /** + * Parse a database string value to NamingStatus. + */ + def fromString(str: String): Option[NamingStatus] = str.toUpperCase match { + case "UNNAMED" => Some(Unnamed) + case "PENDING_REVIEW" => Some(PendingReview) + case "NAMED" => Some(Named) + case _ => None + } + + /** + * Parse with a default fallback. + */ + def fromStringOrDefault(str: String, default: NamingStatus = Unnamed): NamingStatus = + fromString(str).getOrElse(default) + + // JSON serialization + implicit val reads: Reads[NamingStatus] = Reads.StringReads.flatMap { str => + fromString(str) match { + case Some(ns) => Reads.pure(ns) + case None => Reads.failed(s"Invalid NamingStatus: $str") + } + } + + implicit val writes: Writes[NamingStatus] = Writes.StringWrites.contramap(_.dbValue) + + implicit val format: Format[NamingStatus] = Format(reads, writes) +} diff --git a/app/models/domain/genomics/StrMarker.scala b/app/models/domain/genomics/StrMarker.scala deleted file mode 100644 index bf8fa67..0000000 --- a/app/models/domain/genomics/StrMarker.scala +++ /dev/null @@ -1,25 +0,0 @@ -package models.domain.genomics - -/** - * Represents a Short Tandem Repeat (STR) marker position on a chromosome. - * STR markers are used in genetic genealogy for Y-DNA testing. - * - * @param id Optional unique identifier for the STR marker. - * @param genbankContigId The ID of the associated GenBank contig (chromosome). - * @param name Marker name (e.g., "DYS389I", "DYS456"). - * @param startPos Start position (1-based, inclusive). - * @param endPos End position (1-based, inclusive). - * @param period Repeat unit length in base pairs. - * @param verified Whether the position has been manually verified for this build. - * @param note Optional annotation (e.g., "Position estimated via liftover from GRCh38"). - */ -case class StrMarker( - id: Option[Int] = None, - genbankContigId: Int, - name: String, - startPos: Long, - endPos: Long, - period: Int, - verified: Boolean = false, - note: Option[String] = None -) diff --git a/app/models/domain/genomics/VariantGroup.scala b/app/models/domain/genomics/VariantGroup.scala deleted file mode 100644 index 4dfb63b..0000000 --- a/app/models/domain/genomics/VariantGroup.scala +++ /dev/null @@ -1,84 +0,0 @@ -package models.domain.genomics - -/** - * Groups variants that represent the same logical SNP across different reference builds. - * Variants are grouped by commonName (primary) or rsId (fallback). - * - * For example, M269 might have positions in GRCh37, GRCh38, and hs1, - * each stored as a separate Variant row but logically the same marker. - * - * @param groupKey The key used to group these variants (commonName or rsId) - * @param variants All variants (with their contig info) that share this group key - * @param rsId The rsId if present on any variant in the group - * @param commonName The common name if present on any variant in the group - */ -case class VariantGroup( - groupKey: String, - variants: Seq[VariantWithContig], - rsId: Option[String], - commonName: Option[String] -) { - /** - * Get all variant IDs in this group - */ - def variantIds: Seq[Int] = variants.flatMap(_.variant.variantId) - - /** - * Display name for the variant group (commonName preferred, rsId fallback) - */ - def displayName: String = commonName.orElse(rsId).getOrElse(s"ID: ${variantIds.headOption.getOrElse("?")}") - - /** - * Summary of all builds available (e.g., "GRCh37, GRCh38, hs1") - */ - def buildSummary: String = variants - .map(_.shortReferenceGenome) - .distinct - .sorted - .mkString(", ") - - /** - * Number of reference builds available for this variant - */ - def buildCount: Int = variants.map(_.shortReferenceGenome).distinct.size - - /** - * Variants sorted by reference genome for consistent display - */ - def variantsSorted: Seq[VariantWithContig] = variants.sortBy { v => - v.shortReferenceGenome match { - case "GRCh37" => 1 - case "GRCh38" => 2 - case "hs1" => 3 - case other => 4 - } - } -} - -object VariantGroup { - /** - * Creates variant groups from a sequence of variants with contig info. - * Groups by commonName (primary), falling back to rsId. - * Variants without either become single-variant groups keyed by variant ID. - */ - def fromVariants(variants: Seq[VariantWithContig]): Seq[VariantGroup] = { - // Group by the key (commonName preferred, rsId fallback, variantId last resort) - val grouped = variants.groupBy { vwc => - vwc.variant.commonName - .orElse(vwc.variant.rsId) - .getOrElse(s"variant_${vwc.variant.variantId.getOrElse(0)}") - } - - grouped.map { case (key, variantsInGroup) => - val rsId = variantsInGroup.flatMap(_.variant.rsId).headOption - val commonName = variantsInGroup.flatMap(_.variant.commonName).headOption - - VariantGroup( - groupKey = key, - variants = variantsInGroup, - rsId = rsId, - commonName = commonName - ) - }.toSeq.sortBy(_.displayName) - } -} diff --git a/app/models/domain/genomics/VariantV2.scala b/app/models/domain/genomics/VariantV2.scala new file mode 100644 index 0000000..e1a04ec --- /dev/null +++ b/app/models/domain/genomics/VariantV2.scala @@ -0,0 +1,257 @@ +package models.domain.genomics + +import play.api.libs.json.{__, JsValue, Json, OFormat, Format, Reads, Writes} + +import java.time.Instant + +/** + * Consolidated variant with JSONB coordinates and aliases. + * One row per logical variant across all reference genomes. + * + * @param variantId Unique identifier (auto-generated) + * @param canonicalName Primary name (e.g., "M269", "DYS456"); None for unnamed variants + * @param mutationType Variant type (SNP, INDEL, MNP, STR, DEL, DUP, INS, INV, CNV, TRANS) + * @param namingStatus Naming status (Unnamed, PendingReview, Named) + * @param aliases JSONB: {common_names: [], rs_ids: [], sources: {source: [names]}} + * @param coordinates JSONB: Per-assembly coordinates (structure varies by mutationType) + * @param definingHaplogroupId FK to haplogroup for parallel mutation disambiguation + * @param evidence JSONB: Evidence metadata (e.g., YSEQ test counts) + * @param primers JSONB: PCR primer information + * @param notes Free-text notes + * @param createdAt Creation timestamp + * @param updatedAt Last update timestamp + */ +case class VariantV2( + variantId: Option[Int] = None, + canonicalName: Option[String], + mutationType: MutationType, + namingStatus: NamingStatus = NamingStatus.Unnamed, + aliases: JsValue = Json.obj(), + coordinates: JsValue = Json.obj(), + definingHaplogroupId: Option[Int] = None, + evidence: JsValue = Json.obj(), + primers: JsValue = Json.obj(), + notes: Option[String] = None, + createdAt: Instant = Instant.now(), + updatedAt: Instant = Instant.now() +) { + + /** + * Get coordinate entry for a specific reference genome. + */ + def getCoordinates(refGenome: String): Option[JsValue] = + (coordinates \ refGenome).toOption + + /** + * Check if variant has coordinates for a given reference. + */ + def hasCoordinates(refGenome: String): Boolean = + (coordinates \ refGenome).isDefined + + /** + * Get all reference genomes that have coordinates. + */ + def availableReferences: Set[String] = + coordinates.asOpt[Map[String, JsValue]].map(_.keySet).getOrElse(Set.empty) + + /** + * Get common names from aliases. + */ + def commonNames: Seq[String] = + (aliases \ "common_names").asOpt[Seq[String]].getOrElse(Seq.empty) + + /** + * Get rs IDs from aliases. + */ + def rsIds: Seq[String] = + (aliases \ "rs_ids").asOpt[Seq[String]].getOrElse(Seq.empty) + + /** + * Check if this is an STR marker. + */ + def isStr: Boolean = mutationType == MutationType.STR + + /** + * Check if this is a structural variant. + */ + def isStructuralVariant: Boolean = mutationType.isStructural + + /** + * Display name for UI (canonical name or coordinate-based fallback). + */ + def displayName: String = canonicalName.getOrElse { + // For unnamed variants, show coordinate-based identifier + getCoordinates("hs1").orElse(getCoordinates("GRCh38")).map { coords => + val contig = (coords \ "contig").asOpt[String].getOrElse("?") + val position = (coords \ "position").asOpt[Int].orElse((coords \ "start").asOpt[Int]).getOrElse(0) + val ref = (coords \ "ref").asOpt[String].getOrElse("") + val alt = (coords \ "alt").asOpt[String].getOrElse("") + if (ref.nonEmpty && alt.nonEmpty) s"$contig:$position:$ref>$alt" + else s"$contig:$position" + }.getOrElse(s"variant_${variantId.getOrElse(0)}") + } +} + +/** + * Helper case class for SNP/INDEL/MNP coordinates. + */ +case class PointVariantCoordinates( + contig: String, + position: Int, + ref: String, + alt: String +) + +object PointVariantCoordinates { + implicit val format: OFormat[PointVariantCoordinates] = Json.format[PointVariantCoordinates] +} + +/** + * Helper case class for STR coordinates. + */ +case class StrCoordinates( + contig: String, + start: Long, + end: Long, + period: Int, + repeatMotif: Option[String] = None, + referenceRepeats: Option[Int] = None +) + +object StrCoordinates { + implicit val format: OFormat[StrCoordinates] = Json.format[StrCoordinates] +} + +/** + * Helper case class for structural variant coordinates. + */ +case class SvCoordinates( + contig: String, + start: Long, + end: Long, + length: Long, + innerStart: Option[Long] = None, // For inversions + innerEnd: Option[Long] = None, // For inversions + referenceCopies: Option[Int] = None, // For CNVs + copyNumberRange: Option[Seq[Int]] = None // For CNVs +) + +object SvCoordinates { + implicit val format: OFormat[SvCoordinates] = Json.format[SvCoordinates] +} + +/** + * Helper case class for aliases structure. + */ +case class VariantAliases( + commonNames: Seq[String] = Seq.empty, + rsIds: Seq[String] = Seq.empty, + sources: Map[String, Seq[String]] = Map.empty +) + +object VariantAliases { + implicit val format: OFormat[VariantAliases] = Json.format[VariantAliases] + + val empty: VariantAliases = VariantAliases() + + /** + * Create from a single source with names. + */ + def fromSource(source: String, names: Seq[String], rsIds: Seq[String] = Seq.empty): VariantAliases = + VariantAliases( + commonNames = names, + rsIds = rsIds, + sources = Map(source -> names) + ) +} + +object VariantV2 { + // Custom format that handles enum serialization via dbValue strings + implicit val format: Format[VariantV2] = { + import play.api.libs.functional.syntax.* + + val reads: Reads[VariantV2] = ( + (__ \ "variantId").readNullable[Int] and + (__ \ "canonicalName").readNullable[String] and + (__ \ "mutationType").read[String].map(MutationType.fromStringOrDefault(_)) and + (__ \ "namingStatus").read[String].map(NamingStatus.fromStringOrDefault(_)) and + (__ \ "aliases").read[JsValue] and + (__ \ "coordinates").read[JsValue] and + (__ \ "definingHaplogroupId").readNullable[Int] and + (__ \ "evidence").read[JsValue] and + (__ \ "primers").read[JsValue] and + (__ \ "notes").readNullable[String] and + (__ \ "createdAt").read[Instant] and + (__ \ "updatedAt").read[Instant] + )(VariantV2.apply) + + val writes: Writes[VariantV2] = ( + (__ \ "variantId").writeNullable[Int] and + (__ \ "canonicalName").writeNullable[String] and + (__ \ "mutationType").write[String].contramap[MutationType](_.dbValue) and + (__ \ "namingStatus").write[String].contramap[NamingStatus](_.dbValue) and + (__ \ "aliases").write[JsValue] and + (__ \ "coordinates").write[JsValue] and + (__ \ "definingHaplogroupId").writeNullable[Int] and + (__ \ "evidence").write[JsValue] and + (__ \ "primers").write[JsValue] and + (__ \ "notes").writeNullable[String] and + (__ \ "createdAt").write[Instant] and + (__ \ "updatedAt").write[Instant] + )(v => (v.variantId, v.canonicalName, v.mutationType, v.namingStatus, v.aliases, + v.coordinates, v.definingHaplogroupId, v.evidence, v.primers, v.notes, + v.createdAt, v.updatedAt)) + + Format(reads, writes) + } + + /** + * Create a named SNP variant with coordinates for a single reference. + */ + def snp( + name: String, + refGenome: String, + contig: String, + position: Int, + ref: String, + alt: String, + source: Option[String] = None + ): VariantV2 = { + val coords = Json.obj( + refGenome -> Json.toJson(PointVariantCoordinates(contig, position, ref, alt)) + ) + val aliases = source.map { s => + Json.toJson(VariantAliases.fromSource(s, Seq(name))) + }.getOrElse(Json.toJson(VariantAliases(commonNames = Seq(name)))) + + VariantV2( + canonicalName = Some(name), + mutationType = MutationType.SNP, + namingStatus = NamingStatus.Named, + aliases = aliases, + coordinates = coords + ) + } + + /** + * Create an unnamed variant from coordinates. + */ + def unnamed( + refGenome: String, + contig: String, + position: Int, + ref: String, + alt: String, + variantType: MutationType = MutationType.SNP + ): VariantV2 = { + val coords = Json.obj( + refGenome -> Json.toJson(PointVariantCoordinates(contig, position, ref, alt)) + ) + VariantV2( + canonicalName = None, + mutationType = variantType, + namingStatus = NamingStatus.Unnamed, + coordinates = coords + ) + } +} diff --git a/app/models/domain/genomics/VariantWithContig.scala b/app/models/domain/genomics/VariantWithContig.scala deleted file mode 100644 index 5c5a656..0000000 --- a/app/models/domain/genomics/VariantWithContig.scala +++ /dev/null @@ -1,27 +0,0 @@ -package models.domain.genomics - -import models.dal.domain.genomics.Variant - -/** - * View model that combines a Variant with its associated GenbankContig information. - * Used for display purposes in the curator interface. - * - * @param variant The variant data - * @param contig The associated genbank contig (for position context) - */ -case class VariantWithContig( - variant: Variant, - contig: GenbankContig -) { - /** - * Formats the position as "accession:position" (e.g., "chrY:11912037") - */ - def formattedPosition: String = s"${contig.commonName.getOrElse(contig.accession)}:${variant.position}" - - /** - * Gets a short reference genome label (e.g., "GRCh38" from "GRCh38.p14") - */ - def shortReferenceGenome: String = contig.referenceGenome - .map(_.split("\\.").head) - .getOrElse("Unknown") -} diff --git a/app/modules/BaseModule.scala b/app/modules/BaseModule.scala index 9be100f..f7414a0 100644 --- a/app/modules/BaseModule.scala +++ b/app/modules/BaseModule.scala @@ -30,8 +30,7 @@ class BaseModule extends AbstractModule { bind(classOf[UserRoleRepository]).asEagerSingleton() bind(classOf[GenbankContigRepository]).to(classOf[GenbankContigRepositoryImpl]) - bind(classOf[VariantRepository]).to(classOf[VariantRepositoryImpl]) - bind(classOf[VariantAliasRepository]).to(classOf[VariantAliasRepositoryImpl]) + bind(classOf[VariantV2Repository]).to(classOf[VariantV2RepositoryImpl]) bind(classOf[HaplogroupCoreRepository]).to(classOf[HaplogroupCoreRepositoryImpl]) bind(classOf[HaplogroupRelationshipRepository]).to(classOf[HaplogroupRelationshipRepositoryImpl]) bind(classOf[HaplogroupRevisionMetadataRepository]).to(classOf[HaplogroupRevisionMetadataRepositoryImpl]) diff --git a/app/repositories/GenomeRegionsRepository.scala b/app/repositories/GenomeRegionsRepository.scala index b145c1f..a1a3aea 100644 --- a/app/repositories/GenomeRegionsRepository.scala +++ b/app/repositories/GenomeRegionsRepository.scala @@ -3,14 +3,15 @@ package repositories import jakarta.inject.Inject import models.dal.MyPostgresProfile import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{Cytoband, GenbankContig, GenomeRegion, GenomeRegionVersion, StrMarker} +import models.domain.genomics.{GenbankContig, GenomeRegion, GenomeRegionVersion} import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} +import play.api.libs.json.Json import scala.concurrent.{ExecutionContext, Future} /** * Repository interface for genome region data. - * Provides access to structural annotations, cytobands, and STR markers. + * Provides access to structural annotations and cytobands (now unified). */ trait GenomeRegionsRepository { @@ -25,23 +26,12 @@ trait GenomeRegionsRepository { def getContigsForBuild(referenceGenome: String): Future[Seq[GenbankContig]] /** - * Get all structural regions for a specific contig. + * Get all regions (including cytobands) that have coordinates for the specified build. */ - def getRegionsForContig(contigId: Int): Future[Seq[GenomeRegion]] - - /** - * Get all cytobands for a specific contig. - */ - def getCytobandsForContig(contigId: Int): Future[Seq[Cytoband]] - - /** - * Get all STR markers for a specific contig. - */ - def getStrMarkersForContig(contigId: Int): Future[Seq[StrMarker]] + def getRegionsForBuild(referenceGenome: String): Future[Seq[GenomeRegion]] /** * Get all data for a build in a single composed query. - * Returns contigs with their associated regions, cytobands, and STR markers. */ def getFullBuildData(referenceGenome: String): Future[FullBuildData] @@ -50,39 +40,17 @@ trait GenomeRegionsRepository { // ============================================================================ def findRegionById(id: Int): Future[Option[GenomeRegion]] - def findRegionByIdWithContig(id: Int): Future[Option[(GenomeRegion, GenbankContig)]] - def findRegionsByBuild(referenceGenome: String, offset: Int, limit: Int): Future[Seq[(GenomeRegion, GenbankContig)]] - def countRegionsByBuild(referenceGenome: Option[String]): Future[Int] + + // Note: Pagination with JSONB filtering can be slow without specific indices. + // For the management API, we might iterate all or allow filtering by type. + def findRegions(regionType: Option[String], build: Option[String], offset: Int, limit: Int): Future[Seq[GenomeRegion]] + + def countRegions(regionType: Option[String], build: Option[String]): Future[Int] + def createRegion(region: GenomeRegion): Future[Int] def updateRegion(id: Int, region: GenomeRegion): Future[Boolean] def deleteRegion(id: Int): Future[Boolean] def bulkCreateRegions(regions: Seq[GenomeRegion]): Future[Seq[Int]] - - // ============================================================================ - // Cytoband CRUD operations - // ============================================================================ - - def findCytobandById(id: Int): Future[Option[Cytoband]] - def findCytobandByIdWithContig(id: Int): Future[Option[(Cytoband, GenbankContig)]] - def findCytobandsByBuild(referenceGenome: String, offset: Int, limit: Int): Future[Seq[(Cytoband, GenbankContig)]] - def countCytobandsByBuild(referenceGenome: Option[String]): Future[Int] - def createCytoband(cytoband: Cytoband): Future[Int] - def updateCytoband(id: Int, cytoband: Cytoband): Future[Boolean] - def deleteCytoband(id: Int): Future[Boolean] - def bulkCreateCytobands(cytobands: Seq[Cytoband]): Future[Seq[Int]] - - // ============================================================================ - // StrMarker CRUD operations - // ============================================================================ - - def findStrMarkerById(id: Int): Future[Option[StrMarker]] - def findStrMarkerByIdWithContig(id: Int): Future[Option[(StrMarker, GenbankContig)]] - def findStrMarkersByBuild(referenceGenome: String, offset: Int, limit: Int): Future[Seq[(StrMarker, GenbankContig)]] - def countStrMarkersByBuild(referenceGenome: Option[String]): Future[Int] - def createStrMarker(marker: StrMarker): Future[Int] - def updateStrMarker(id: Int, marker: StrMarker): Future[Boolean] - def deleteStrMarker(id: Int): Future[Boolean] - def bulkCreateStrMarkers(markers: Seq[StrMarker]): Future[Seq[Int]] } /** @@ -91,9 +59,7 @@ trait GenomeRegionsRepository { case class FullBuildData( version: Option[GenomeRegionVersion], contigs: Seq[GenbankContig], - regions: Map[Int, Seq[GenomeRegion]], // contigId -> regions - cytobands: Map[Int, Seq[Cytoband]], // contigId -> cytobands - strMarkers: Map[Int, Seq[StrMarker]] // contigId -> markers + regions: Seq[GenomeRegion] ) class GenomeRegionsRepositoryImpl @Inject()( @@ -120,26 +86,10 @@ class GenomeRegionsRepositoryImpl @Inject()( db.run(query) } - override def getRegionsForContig(contigId: Int): Future[Seq[GenomeRegion]] = { + override def getRegionsForBuild(referenceGenome: String): Future[Seq[GenomeRegion]] = { + // Select regions where coordinates -> buildName exists val query = genomeRegions - .filter(_.genbankContigId === contigId) - .sortBy(_.startPos) - .result - db.run(query) - } - - override def getCytobandsForContig(contigId: Int): Future[Seq[Cytoband]] = { - val query = cytobands - .filter(_.genbankContigId === contigId) - .sortBy(_.startPos) - .result - db.run(query) - } - - override def getStrMarkersForContig(contigId: Int): Future[Seq[StrMarker]] = { - val query = strMarkers - .filter(_.genbankContigId === contigId) - .sortBy(_.startPos) + .filter(r => r.coordinates ?? referenceGenome) .result db.run(query) } @@ -148,41 +98,11 @@ class GenomeRegionsRepositoryImpl @Inject()( for { version <- getVersion(referenceGenome) contigs <- getContigsForBuild(referenceGenome) - contigIds = contigs.flatMap(_.id) - - // Fetch all regions for the build's contigs - allRegions <- if (contigIds.nonEmpty) { - val query = genomeRegions - .filter(_.genbankContigId.inSet(contigIds)) - .sortBy(r => (r.genbankContigId, r.startPos)) - .result - db.run(query) - } else Future.successful(Seq.empty) - - // Fetch all cytobands for the build's contigs - allCytobands <- if (contigIds.nonEmpty) { - val query = cytobands - .filter(_.genbankContigId.inSet(contigIds)) - .sortBy(c => (c.genbankContigId, c.startPos)) - .result - db.run(query) - } else Future.successful(Seq.empty) - - // Fetch all STR markers for the build's contigs - allStrMarkers <- if (contigIds.nonEmpty) { - val query = strMarkers - .filter(_.genbankContigId.inSet(contigIds)) - .sortBy(s => (s.genbankContigId, s.startPos)) - .result - db.run(query) - } else Future.successful(Seq.empty) - + regions <- getRegionsForBuild(referenceGenome) } yield FullBuildData( version = version, contigs = contigs, - regions = allRegions.groupBy(_.genbankContigId), - cytobands = allCytobands.groupBy(_.genbankContigId), - strMarkers = allStrMarkers.groupBy(_.genbankContigId) + regions = regions ) } @@ -194,31 +114,31 @@ class GenomeRegionsRepositoryImpl @Inject()( db.run(genomeRegions.filter(_.id === id).result.headOption) } - override def findRegionByIdWithContig(id: Int): Future[Option[(GenomeRegion, GenbankContig)]] = { - val query = for { - region <- genomeRegions if region.id === id - contig <- genbankContigs if contig.genbankContigId === region.genbankContigId - } yield (region, contig) - db.run(query.result.headOption) - } + override def findRegions(regionType: Option[String], build: Option[String], offset: Int, limit: Int): Future[Seq[GenomeRegion]] = { + var query = genomeRegions.sortBy(_.id) + + if (regionType.isDefined) { + query = query.filter(_.regionType === regionType.get) + } + + if (build.isDefined) { + query = query.filter(r => r.coordinates ?? build.get) + } - override def findRegionsByBuild(referenceGenome: String, offset: Int, limit: Int): Future[Seq[(GenomeRegion, GenbankContig)]] = { - val query = for { - region <- genomeRegions - contig <- genbankContigs if contig.genbankContigId === region.genbankContigId && contig.referenceGenome === referenceGenome - } yield (region, contig) - db.run(query.sortBy(_._1.startPos).drop(offset).take(limit).result) + db.run(query.drop(offset).take(limit).result) } - override def countRegionsByBuild(referenceGenome: Option[String]): Future[Int] = { - val query = referenceGenome match { - case Some(ref) => - for { - region <- genomeRegions - contig <- genbankContigs if contig.genbankContigId === region.genbankContigId && contig.referenceGenome === ref - } yield region - case None => genomeRegions + override def countRegions(regionType: Option[String], build: Option[String]): Future[Int] = { + var query = genomeRegions.sortBy(_.id) // Sort irrelevant for count but type checks + + if (regionType.isDefined) { + query = query.filter(_.regionType === regionType.get) } + + if (build.isDefined) { + query = query.filter(r => r.coordinates ?? build.get) + } + db.run(query.length.result) } @@ -228,8 +148,8 @@ class GenomeRegionsRepositoryImpl @Inject()( override def updateRegion(id: Int, region: GenomeRegion): Future[Boolean] = { val query = genomeRegions.filter(_.id === id).map(r => - (r.genbankContigId, r.regionType, r.name, r.startPos, r.endPos, r.modifier) - ).update((region.genbankContigId, region.regionType, region.name, region.startPos, region.endPos, region.modifier)) + (r.regionType, r.name, r.coordinates, r.properties) + ).update((region.regionType, region.name, Json.toJson(region.coordinates), region.properties)) db.run(query).map(_ > 0) } @@ -240,114 +160,4 @@ class GenomeRegionsRepositoryImpl @Inject()( override def bulkCreateRegions(regions: Seq[GenomeRegion]): Future[Seq[Int]] = { db.run((genomeRegions returning genomeRegions.map(_.id)) ++= regions) } - - // ============================================================================ - // Cytoband CRUD implementations - // ============================================================================ - - override def findCytobandById(id: Int): Future[Option[Cytoband]] = { - db.run(cytobands.filter(_.id === id).result.headOption) - } - - override def findCytobandByIdWithContig(id: Int): Future[Option[(Cytoband, GenbankContig)]] = { - val query = for { - cytoband <- cytobands if cytoband.id === id - contig <- genbankContigs if contig.genbankContigId === cytoband.genbankContigId - } yield (cytoband, contig) - db.run(query.result.headOption) - } - - override def findCytobandsByBuild(referenceGenome: String, offset: Int, limit: Int): Future[Seq[(Cytoband, GenbankContig)]] = { - val query = for { - cytoband <- cytobands - contig <- genbankContigs if contig.genbankContigId === cytoband.genbankContigId && contig.referenceGenome === referenceGenome - } yield (cytoband, contig) - db.run(query.sortBy(_._1.startPos).drop(offset).take(limit).result) - } - - override def countCytobandsByBuild(referenceGenome: Option[String]): Future[Int] = { - val query = referenceGenome match { - case Some(ref) => - for { - cytoband <- cytobands - contig <- genbankContigs if contig.genbankContigId === cytoband.genbankContigId && contig.referenceGenome === ref - } yield cytoband - case None => cytobands - } - db.run(query.length.result) - } - - override def createCytoband(cytoband: Cytoband): Future[Int] = { - db.run((cytobands returning cytobands.map(_.id)) += cytoband) - } - - override def updateCytoband(id: Int, cytoband: Cytoband): Future[Boolean] = { - val query = cytobands.filter(_.id === id).map(c => - (c.genbankContigId, c.name, c.startPos, c.endPos, c.stain) - ).update((cytoband.genbankContigId, cytoband.name, cytoband.startPos, cytoband.endPos, cytoband.stain)) - db.run(query).map(_ > 0) - } - - override def deleteCytoband(id: Int): Future[Boolean] = { - db.run(cytobands.filter(_.id === id).delete).map(_ > 0) - } - - override def bulkCreateCytobands(cytobandList: Seq[Cytoband]): Future[Seq[Int]] = { - db.run((cytobands returning cytobands.map(_.id)) ++= cytobandList) - } - - // ============================================================================ - // StrMarker CRUD implementations - // ============================================================================ - - override def findStrMarkerById(id: Int): Future[Option[StrMarker]] = { - db.run(strMarkers.filter(_.id === id).result.headOption) - } - - override def findStrMarkerByIdWithContig(id: Int): Future[Option[(StrMarker, GenbankContig)]] = { - val query = for { - marker <- strMarkers if marker.id === id - contig <- genbankContigs if contig.genbankContigId === marker.genbankContigId - } yield (marker, contig) - db.run(query.result.headOption) - } - - override def findStrMarkersByBuild(referenceGenome: String, offset: Int, limit: Int): Future[Seq[(StrMarker, GenbankContig)]] = { - val query = for { - marker <- strMarkers - contig <- genbankContigs if contig.genbankContigId === marker.genbankContigId && contig.referenceGenome === referenceGenome - } yield (marker, contig) - db.run(query.sortBy(_._1.startPos).drop(offset).take(limit).result) - } - - override def countStrMarkersByBuild(referenceGenome: Option[String]): Future[Int] = { - val query = referenceGenome match { - case Some(ref) => - for { - marker <- strMarkers - contig <- genbankContigs if contig.genbankContigId === marker.genbankContigId && contig.referenceGenome === ref - } yield marker - case None => strMarkers - } - db.run(query.length.result) - } - - override def createStrMarker(marker: StrMarker): Future[Int] = { - db.run((strMarkers returning strMarkers.map(_.id)) += marker) - } - - override def updateStrMarker(id: Int, marker: StrMarker): Future[Boolean] = { - val query = strMarkers.filter(_.id === id).map(m => - (m.genbankContigId, m.name, m.startPos, m.endPos, m.period, m.verified, m.note) - ).update((marker.genbankContigId, marker.name, marker.startPos, marker.endPos, marker.period, marker.verified, marker.note)) - db.run(query).map(_ > 0) - } - - override def deleteStrMarker(id: Int): Future[Boolean] = { - db.run(strMarkers.filter(_.id === id).delete).map(_ > 0) - } - - override def bulkCreateStrMarkers(markers: Seq[StrMarker]): Future[Seq[Int]] = { - db.run((strMarkers returning strMarkers.map(_.id)) ++= markers) - } -} +} \ No newline at end of file diff --git a/app/repositories/HaplogroupCoreRepository.scala b/app/repositories/HaplogroupCoreRepository.scala index ca3e822..e0182ad 100644 --- a/app/repositories/HaplogroupCoreRepository.scala +++ b/app/repositories/HaplogroupCoreRepository.scala @@ -447,7 +447,7 @@ class HaplogroupCoreRepositoryImpl @Inject()( override def getAllWithVariantNames(haplogroupType: HaplogroupType): Future[Seq[(Haplogroup, Seq[String])]] = { import models.dal.DatabaseSchema.domain.haplogroups.haplogroupVariants - import models.dal.DatabaseSchema.domain.genomics.variants + import models.dal.DatabaseSchema.domain.genomics.variantsV2 // Query haplogroups with their associated variant names via join val query = for { @@ -455,12 +455,12 @@ class HaplogroupCoreRepositoryImpl @Inject()( } yield hg runQuery(query.result).flatMap { hgList => - // For each haplogroup, fetch its variant names (using commonName from Variant table) + // For each haplogroup, fetch its variant names (using canonicalName from VariantV2 table) val futures = hgList.map { hg => val variantQuery = for { hv <- haplogroupVariants.filter(_.haplogroupId === hg.id.get) - v <- variants.filter(_.variantId === hv.variantId) - } yield v.commonName + v <- variantsV2.filter(_.variantId === hv.variantId) + } yield v.canonicalName runQuery(variantQuery.result).map { variantNames => (hg, variantNames.flatten) // Filter out None values diff --git a/app/repositories/HaplogroupVariantRepository.scala b/app/repositories/HaplogroupVariantRepository.scala index 68368ee..8189bb6 100644 --- a/app/repositories/HaplogroupVariantRepository.scala +++ b/app/repositories/HaplogroupVariantRepository.scala @@ -2,8 +2,7 @@ package repositories import jakarta.inject.Inject import models.* -import models.dal.domain.genomics.Variant -import models.domain.genomics.GenbankContig +import models.domain.genomics.{MutationType, NamingStatus, VariantV2} import models.domain.haplogroups.{Haplogroup, HaplogroupVariant} import play.api.db.slick.DatabaseConfigProvider @@ -19,15 +18,15 @@ trait HaplogroupVariantRepository { * @param query The search query used to filter and retrieve the relevant variants. * @return A future containing a sequence of variants that match the provided query. */ - def findVariants(query: String): Future[Seq[Variant]] + def findVariants(query: String): Future[Seq[VariantV2]] /** * Retrieves the list of variants associated with a given haplogroup. * * @param haplogroupId the identifier of the haplogroup for which variants are to be retrieved - * @return a future containing a sequence of tuples, where each tuple consists of a Variant and its associated GenbankContig + * @return a future containing a sequence of VariantV2 objects */ - def getHaplogroupVariants(haplogroupId: Int): Future[Seq[(Variant, GenbankContig)]] + def getHaplogroupVariants(haplogroupId: Int): Future[Seq[VariantV2]] def countHaplogroupVariants(haplogroupId: Long): Future[Int] @@ -35,9 +34,9 @@ trait HaplogroupVariantRepository { * Retrieves a list of genetic variants associated with the given haplogroup. * * @param haplogroupId The unique identifier of the haplogroup for which the variants are being requested. - * @return A Future containing a sequence of Variant objects associated with the specified haplogroup. + * @return A Future containing a sequence of VariantV2 objects associated with the specified haplogroup. */ - def getVariantsByHaplogroup(haplogroupId: Int): Future[Seq[Variant]] + def getVariantsByHaplogroup(haplogroupId: Int): Future[Seq[VariantV2]] /** * Retrieves a list of haplogroups associated with the specified variant. @@ -78,9 +77,9 @@ trait HaplogroupVariantRepository { * Retrieves variants associated with a haplogroup by its name. * * @param haplogroupName The name of the haplogroup (e.g., "R-M269") - * @return A Future containing a sequence of VariantWithContig for the haplogroup + * @return A Future containing a sequence of VariantV2 for the haplogroup */ - def getVariantsByHaplogroupName(haplogroupName: String): Future[Seq[models.domain.genomics.VariantWithContig]] + def getVariantsByHaplogroupName(haplogroupName: String): Future[Seq[VariantV2]] } class HaplogroupVariantRepositoryImpl @Inject()( @@ -89,76 +88,118 @@ class HaplogroupVariantRepositoryImpl @Inject()( extends BaseRepository(dbConfigProvider) with HaplogroupVariantRepository { - import models.dal.DatabaseSchema.* - import models.dal.DatabaseSchema.domain.genomics.{genbankContigs, variants} import models.dal.DatabaseSchema.domain.haplogroups.{haplogroupVariants, haplogroups} import models.dal.MyPostgresProfile.api.* + import models.dal.domain.genomics.VariantV2Table + import play.api.libs.json.Json + import slick.jdbc.GetResult + + private val variantsV2 = TableQuery[VariantV2Table] + + // GetResult for raw SQL queries + private implicit val variantV2GetResult: GetResult[VariantV2] = GetResult { r => + VariantV2( + variantId = Some(r.nextInt()), + canonicalName = r.nextStringOption(), + mutationType = MutationType.fromStringOrDefault(r.nextString()), + namingStatus = NamingStatus.fromStringOrDefault(r.nextString()), + aliases = Json.parse(r.nextString()), + coordinates = Json.parse(r.nextString()), + definingHaplogroupId = r.nextIntOption(), + evidence = Json.parse(r.nextString()), + primers = Json.parse(r.nextString()), + notes = r.nextStringOption(), + createdAt = r.nextTimestamp().toInstant, + updatedAt = r.nextTimestamp().toInstant + ) + } - override def findVariants(query: String): Future[Seq[Variant]] = { + override def findVariants(query: String): Future[Seq[VariantV2]] = { val normalizedQuery = query.trim.toLowerCase - - def buildQuery = { - if (normalizedQuery.startsWith("rs")) { - variants.filter(v => v.rsId.isDefined && v.rsId === normalizedQuery) - } else if (normalizedQuery.contains(":")) { - val parts = normalizedQuery.split(":") - parts.length match { - case 2 => - for { - variant <- variants - contig <- genbankContigs if variant.genbankContigId === contig.genbankContigId - if (variant.commonName.isDefined && variant.commonName === parts(0)) || - (contig.commonName.isDefined && contig.commonName === parts(0)) - if variant.position === parts(1).toIntOption.getOrElse(0) - } yield variant - case 4 => - for { - variant <- variants - contig <- genbankContigs if variant.genbankContigId === contig.genbankContigId - if (variant.commonName.isDefined && variant.commonName === parts(0)) || - (contig.commonName.isDefined && contig.commonName === parts(0)) - if variant.position === parts(1).toIntOption.getOrElse(0) && - variant.referenceAllele === parts(2) && - variant.alternateAllele === parts(3) - } yield variant - case _ => - variants.filter(_ => false) - } - } else { - variants.filter(v => - (v.rsId.isDefined && v.rsId === normalizedQuery) || - (v.commonName.isDefined && v.commonName === normalizedQuery) - ) + val upperQuery = normalizedQuery.toUpperCase + val searchPattern = s"%$upperQuery%" + + // Handle different query formats + if (normalizedQuery.startsWith("rs")) { + // Search rs_ids in aliases + val rsQuery = sql""" + SELECT * FROM variant_v2 + WHERE aliases->'rs_ids' ?? $normalizedQuery + """.as[VariantV2] + runQuery(rsQuery) + } else if (normalizedQuery.contains(":")) { + // Coordinate-based search (contig:position or contig:position:ref:alt) + val parts = normalizedQuery.split(":") + parts.length match { + case 2 => + val contig = parts(0) + val position = parts(1).toIntOption.getOrElse(0) + val coordQuery = sql""" + SELECT * FROM variant_v2 + WHERE EXISTS ( + SELECT 1 FROM jsonb_each(coordinates) AS c(ref_genome, coords) + WHERE coords->>'contig' ILIKE $contig + AND (coords->>'position')::int = $position + ) + """.as[VariantV2] + runQuery(coordQuery) + case 4 => + val contig = parts(0) + val position = parts(1).toIntOption.getOrElse(0) + val ref = parts(2).toUpperCase + val alt = parts(3).toUpperCase + val coordQuery = sql""" + SELECT * FROM variant_v2 + WHERE EXISTS ( + SELECT 1 FROM jsonb_each(coordinates) AS c(ref_genome, coords) + WHERE coords->>'contig' ILIKE $contig + AND (coords->>'position')::int = $position + AND UPPER(coords->>'ref') = $ref + AND UPPER(coords->>'alt') = $alt + ) + """.as[VariantV2] + runQuery(coordQuery) + case _ => + Future.successful(Seq.empty) } + } else { + // Search by canonical name or aliases + val nameQuery = sql""" + SELECT * FROM variant_v2 + WHERE UPPER(canonical_name) LIKE $searchPattern + OR aliases->'common_names' ?? $normalizedQuery + OR EXISTS ( + SELECT 1 FROM jsonb_array_elements_text(aliases->'common_names') AS name + WHERE UPPER(name) LIKE $searchPattern + ) + LIMIT 100 + """.as[VariantV2] + runQuery(nameQuery) } - - runQuery(buildQuery.result) } - override def getHaplogroupVariants(haplogroupId: Int): Future[Seq[(Variant, GenbankContig)]] = { + override def getHaplogroupVariants(haplogroupId: Int): Future[Seq[VariantV2]] = { val query = for { hv <- haplogroupVariants if hv.haplogroupId === haplogroupId - v <- variants if v.variantId === hv.variantId - gc <- genbankContigs if gc.genbankContigId === v.genbankContigId - } yield (v, gc) + v <- variantsV2 if v.variantId === hv.variantId + } yield v runQuery(query.result) } def countHaplogroupVariants(haplogroupId: Long): Future[Int] = { - val q = (for { + val q = for { hv <- haplogroupVariants if hv.haplogroupId === haplogroupId.toInt - v <- variants if hv.variantId === v.variantId - } yield v.commonName) + v <- variantsV2 if hv.variantId === v.variantId + } yield v.canonicalName runQuery(q.distinct.length.result) } - - override def getVariantsByHaplogroup(haplogroupId: Int): Future[Seq[Variant]] = { + override def getVariantsByHaplogroup(haplogroupId: Int): Future[Seq[VariantV2]] = { val query = for { hv <- haplogroupVariants if hv.haplogroupId === haplogroupId - variant <- variants if variant.variantId === hv.variantId + variant <- variantsV2 if variant.variantId === hv.variantId } yield variant runQuery(query.result) @@ -175,7 +216,7 @@ class HaplogroupVariantRepositoryImpl @Inject()( override def addVariantToHaplogroup(haplogroupId: Int, variantId: Int): Future[Int] = { val insertAction = sqlu""" - INSERT INTO haplogroup_variant (haplogroup_id, variant_id) + INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES ($haplogroupId, $variantId) ON CONFLICT (haplogroup_id, variant_id) DO NOTHING """ @@ -191,27 +232,38 @@ class HaplogroupVariantRepositoryImpl @Inject()( } override def findHaplogroupsByDefiningVariant(variantId: String, haplogroupType: HaplogroupType): Future[Seq[Haplogroup]] = { - val query = for { - variant <- variants if variant.rsId === variantId || variant.variantId === variantId.toIntOption - haplogroupVariant <- haplogroupVariants if haplogroupVariant.variantId === variant.variantId - haplogroup <- haplogroups if - haplogroup.haplogroupId === haplogroupVariant.haplogroupId && - haplogroup.haplogroupType === haplogroupType - } yield haplogroup + // Search by canonical name or variant ID + val variantIdOpt = variantId.toIntOption + + val query = variantIdOpt match { + case Some(vid) => + for { + variant <- variantsV2 if variant.variantId === vid || variant.canonicalName === variantId + haplogroupVariant <- haplogroupVariants if haplogroupVariant.variantId === variant.variantId + haplogroup <- haplogroups if + haplogroup.haplogroupId === haplogroupVariant.haplogroupId && + haplogroup.haplogroupType === haplogroupType + } yield haplogroup + case None => + for { + variant <- variantsV2 if variant.canonicalName === variantId + haplogroupVariant <- haplogroupVariants if haplogroupVariant.variantId === variant.variantId + haplogroup <- haplogroups if + haplogroup.haplogroupId === haplogroupVariant.haplogroupId && + haplogroup.haplogroupType === haplogroupType + } yield haplogroup + } runQuery(query.result) } - override def getVariantsByHaplogroupName(haplogroupName: String): Future[Seq[models.domain.genomics.VariantWithContig]] = { + override def getVariantsByHaplogroupName(haplogroupName: String): Future[Seq[VariantV2]] = { val query = for { hg <- haplogroups if hg.name === haplogroupName hv <- haplogroupVariants if hv.haplogroupId === hg.haplogroupId - v <- variants if v.variantId === hv.variantId - c <- genbankContigs if c.genbankContigId === v.genbankContigId - } yield (v, c) + v <- variantsV2 if v.variantId === hv.variantId + } yield v - runQuery(query.result).map(_.map { case (v, c) => - models.domain.genomics.VariantWithContig(v, c) - }) + runQuery(query.result) } } \ No newline at end of file diff --git a/app/repositories/VariantAliasRepository.scala b/app/repositories/VariantAliasRepository.scala deleted file mode 100644 index 8202735..0000000 --- a/app/repositories/VariantAliasRepository.scala +++ /dev/null @@ -1,260 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.genomics.{VariantAlias, VariantAliasTable} -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository for managing variant aliases. - */ -trait VariantAliasRepository { - /** - * Find all aliases for a variant. - */ - def findByVariantId(variantId: Int): Future[Seq[VariantAlias]] - - /** - * Find variants by alias value (searches across all alias types). - */ - def findVariantIdsByAlias(aliasValue: String): Future[Seq[Int]] - - /** - * Find variants by alias value and type. - */ - def findVariantIdsByAliasAndType(aliasValue: String, aliasType: String): Future[Seq[Int]] - - /** - * Add an alias to a variant. Returns true if added, false if already exists. - */ - def addAlias(alias: VariantAlias): Future[Boolean] - - /** - * Add multiple aliases in batch. Returns count of aliases added. - */ - def addAliasesBatch(aliases: Seq[VariantAlias]): Future[Int] - - /** - * Check if an alias exists for a variant. - */ - def aliasExists(variantId: Int, aliasType: String, aliasValue: String): Future[Boolean] - - /** - * Set an alias as primary for its type (unsets other primaries of same type for the variant). - */ - def setPrimary(variantId: Int, aliasType: String, aliasValue: String): Future[Boolean] - - /** - * Delete an alias. - */ - def deleteAlias(variantId: Int, aliasType: String, aliasValue: String): Future[Boolean] - - /** - * Search aliases by partial match. - */ - def searchAliases(query: String, limit: Int): Future[Seq[VariantAlias]] - - /** - * Find aliases for multiple variants in batch. - * Returns a map of variantId -> Seq[VariantAlias] - */ - def findByVariantIds(variantIds: Seq[Int]): Future[Map[Int, Seq[VariantAlias]]] - - /** - * Bulk update source for aliases matching a prefix pattern. - * Used to fix migration data where source was not properly attributed. - * - * @param aliasPrefix The prefix to match (e.g., "FGC" matches "FGC29071") - * @param newSource The new source value (e.g., "FGC") - * @param oldSource Optional: only update aliases with this current source (e.g., "migration") - * @return Number of aliases updated - */ - def bulkUpdateSourceByPrefix(aliasPrefix: String, newSource: String, oldSource: Option[String]): Future[Int] - - /** - * Get distinct sources currently in the database. - */ - def getDistinctSources(): Future[Seq[String]] - - /** - * Count aliases by source. - */ - def countBySource(source: String): Future[Int] - - /** - * Count aliases matching a prefix with a specific source. - */ - def countByPrefixAndSource(aliasPrefix: String, source: String): Future[Int] -} - -class VariantAliasRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends VariantAliasRepository - with HasDatabaseConfigProvider[MyPostgresProfile] { - - import models.dal.DatabaseSchema.domain.genomics.variantAliases - - override def findByVariantId(variantId: Int): Future[Seq[VariantAlias]] = { - db.run( - variantAliases - .filter(_.variantId === variantId) - .sortBy(a => (a.aliasType, a.isPrimary.desc)) - .result - ) - } - - override def findVariantIdsByAlias(aliasValue: String): Future[Seq[Int]] = { - val upperValue = aliasValue.toUpperCase - db.run( - variantAliases - .filter(_.aliasValue.toUpperCase === upperValue) - .map(_.variantId) - .distinct - .result - ) - } - - override def findVariantIdsByAliasAndType(aliasValue: String, aliasType: String): Future[Seq[Int]] = { - val upperValue = aliasValue.toUpperCase - db.run( - variantAliases - .filter(a => a.aliasValue.toUpperCase === upperValue && a.aliasType === aliasType) - .map(_.variantId) - .distinct - .result - ) - } - - override def addAlias(alias: VariantAlias): Future[Boolean] = { - val insertAction = variantAliases += alias - db.run(insertAction.asTry).map(_.isSuccess) - } - - override def addAliasesBatch(aliases: Seq[VariantAlias]): Future[Int] = { - if (aliases.isEmpty) { - Future.successful(0) - } else { - // Use insertOrUpdate to handle conflicts gracefully - val actions = aliases.map { alias => - sql""" - INSERT INTO variant_alias (variant_id, alias_type, alias_value, source, is_primary, created_at) - VALUES (${alias.variantId}, ${alias.aliasType}, ${alias.aliasValue}, ${alias.source}, ${alias.isPrimary}, NOW()) - ON CONFLICT (variant_id, alias_type, alias_value) DO NOTHING - """.asUpdate - } - db.run(DBIO.sequence(actions).transactionally).map(_.sum) - } - } - - override def aliasExists(variantId: Int, aliasType: String, aliasValue: String): Future[Boolean] = { - db.run( - variantAliases - .filter(a => a.variantId === variantId && a.aliasType === aliasType && a.aliasValue === aliasValue) - .exists - .result - ) - } - - override def setPrimary(variantId: Int, aliasType: String, aliasValue: String): Future[Boolean] = { - val action = for { - // First, unset all primaries of this type for this variant - _ <- variantAliases - .filter(a => a.variantId === variantId && a.aliasType === aliasType) - .map(_.isPrimary) - .update(false) - // Then set the specified one as primary - updated <- variantAliases - .filter(a => a.variantId === variantId && a.aliasType === aliasType && a.aliasValue === aliasValue) - .map(_.isPrimary) - .update(true) - } yield updated > 0 - - db.run(action.transactionally) - } - - override def deleteAlias(variantId: Int, aliasType: String, aliasValue: String): Future[Boolean] = { - db.run( - variantAliases - .filter(a => a.variantId === variantId && a.aliasType === aliasType && a.aliasValue === aliasValue) - .delete - ).map(_ > 0) - } - - override def searchAliases(query: String, limit: Int): Future[Seq[VariantAlias]] = { - val upperQuery = query.toUpperCase - db.run( - variantAliases - .filter(_.aliasValue.toUpperCase like s"%$upperQuery%") - .sortBy(_.aliasValue) - .take(limit) - .result - ) - } - - override def findByVariantIds(variantIds: Seq[Int]): Future[Map[Int, Seq[VariantAlias]]] = { - if (variantIds.isEmpty) { - Future.successful(Map.empty) - } else { - db.run( - variantAliases - .filter(_.variantId inSet variantIds) - .sortBy(a => (a.variantId, a.aliasType, a.isPrimary.desc)) - .result - ).map(_.groupBy(_.variantId)) - } - } - - override def bulkUpdateSourceByPrefix(aliasPrefix: String, newSource: String, oldSource: Option[String]): Future[Int] = { - val upperPrefix = aliasPrefix.toUpperCase - val updateQuery = oldSource match { - case Some(oldSrc) => - sql""" - UPDATE variant_alias - SET source = $newSource - WHERE UPPER(alias_value) LIKE ${upperPrefix + "%"} - AND (source = $oldSrc OR source IS NULL) - """.asUpdate - case None => - sql""" - UPDATE variant_alias - SET source = $newSource - WHERE UPPER(alias_value) LIKE ${upperPrefix + "%"} - """.asUpdate - } - db.run(updateQuery) - } - - override def getDistinctSources(): Future[Seq[String]] = { - db.run( - variantAliases - .map(_.source) - .distinct - .result - ).map(_.flatten) - } - - override def countBySource(source: String): Future[Int] = { - db.run( - variantAliases - .filter(_.source === source) - .length - .result - ) - } - - override def countByPrefixAndSource(aliasPrefix: String, source: String): Future[Int] = { - val upperPrefix = aliasPrefix.toUpperCase - db.run( - sql""" - SELECT COUNT(*) - FROM variant_alias - WHERE UPPER(alias_value) LIKE ${upperPrefix + "%"} - AND source = $source - """.as[Int].head - ) - } -} diff --git a/app/repositories/VariantRepository.scala b/app/repositories/VariantRepository.scala deleted file mode 100644 index 158b859..0000000 --- a/app/repositories/VariantRepository.scala +++ /dev/null @@ -1,530 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.genomics.Variant -import models.domain.genomics.{GenbankContig, VariantGroup, VariantWithContig} -import org.postgresql.util.PSQLException -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Trait defining the repository interface for managing genetic variants. - * - * This repository provides methods for interacting with a database to perform - * operations such as retrieving, creating, or finding variants, either individually - * or in bulk. The operations are asynchronous, returning `Future` results to handle - * potentially long-running database interactions. - */ -trait VariantRepository { - /** - * Finds a genetic variant based on its genomic location and alleles. - * - * @param contigId The ID of the genomic contig (chromosome or sequence) where the variant is located. - * @param position The 1-based position of the variant on the specified contig. - * @param referenceAllele The reference allele (expected allele at the given position). - * @param alternateAllele The alternate allele (observed allele differing from the reference allele). - * @return A Future containing an Option of the Variant if found, or None if no matching variant exists. - */ - def findVariant( - contigId: Int, - position: Int, - referenceAllele: String, - alternateAllele: String - ): Future[Option[Variant]] - - /** - * Inserts a new genetic variant into the database. - * - * @param variant The variant object containing details such as genomic contig ID, position, reference allele, - * alternate allele, type, and optional metadata like rsId or common name. - * @return A Future containing the ID of the newly inserted variant as an integer. - */ - def createVariant(variant: Variant): Future[Int] - - /** - * Creates multiple genetic variants in a single batch operation. - * - * @param variants A sequence of Variant objects, each representing a genetic variant - * with details such as genomic location, reference allele, alternate allele, - * and optional metadata. - * @return A Future containing a sequence of integers representing the IDs of the newly created variants. - */ - def createVariantsBatch(variants: Seq[Variant]): Future[Seq[Int]] - - /** - * Finds an existing genetic variant in the database by its details or creates a new one if it doesn't exist. - * - * @param variant The variant object containing details such as genomic location, reference allele, alternate allele, - * variant type, and optional metadata like rsId or common name. - * @return A Future containing the ID of the found or newly created variant as an integer. - */ - def findOrCreateVariant(variant: Variant): Future[Int] - - /** - * Finds or creates a batch of genetic variants. For each variant in the input sequence: - * - If the variant already exists in the database, its ID is returned. - * - If the variant does not exist, it is created, and the ID of the newly created variant is returned. - * - * @param variants A sequence of Variant objects, each representing a genetic variant - * with details such as genomic location, reference allele, alternate allele, - * and optional metadata. - * @return A Future containing a sequence of integers, where each integer is the ID of the found - * or newly created variant corresponding to the input sequence order. - */ - def findOrCreateVariantsBatch(variants: Seq[Variant]): Future[Seq[Int]] - - /** - * Finds or creates variants without creating aliases (for lifted/derived variants). - */ - def findOrCreateVariantsBatchNoAliases(variants: Seq[Variant]): Future[Seq[Int]] - - /** - * Finds or creates variants with alias tracking from a specific source. - */ - def findOrCreateVariantsBatchWithAliases(variants: Seq[Variant], source: String): Future[Seq[Int]] - - /** - * Searches for variants by name (rsId or commonName). - * - * @param name The name to search for. - * @return A Future containing a sequence of matching Variants. - */ - def searchByName(name: String): Future[Seq[Variant]] - - // === Curator CRUD Methods === - - /** - * Find a variant by ID. - */ - def findById(id: Int): Future[Option[Variant]] - - /** - * Find a variant by ID with its associated contig information. - */ - def findByIdWithContig(id: Int): Future[Option[VariantWithContig]] - - /** - * Search variants by name with pagination. - */ - def search(query: String, limit: Int, offset: Int): Future[Seq[Variant]] - - /** - * Search variants by name with pagination, including contig information. - */ - def searchWithContig(query: String, limit: Int, offset: Int): Future[Seq[VariantWithContig]] - - /** - * Count variants matching search criteria. - */ - def count(query: Option[String]): Future[Int] - - /** - * Update an existing variant. - */ - def update(variant: Variant): Future[Boolean] - - /** - * Delete a variant. - */ - def delete(id: Int): Future[Boolean] - - // === Variant Grouping Methods === - - /** - * Search variants and return them grouped by commonName (primary) or rsId (fallback). - * Variants with the same group key across different reference builds are grouped together. - */ - def searchGrouped(query: String, limit: Int): Future[Seq[VariantGroup]] - - /** - * Search variants with proper database pagination. - * Returns (results, totalCount) for the given query. - * - * @param query Search term (searches rsId, commonName, and aliases) - * @param offset Number of groups to skip - * @param limit Max number of groups to return - * @return Future of (grouped variants, total count of unique groups matching query) - */ - def searchGroupedPaginated(query: String, offset: Int, limit: Int): Future[(Seq[VariantGroup], Int)] - - /** - * Get all variants matching a group key (commonName or rsId) with their contig information. - */ - def getVariantsByGroupKey(groupKey: String): Future[Seq[VariantWithContig]] - - /** - * Group a sequence of variants (with contig info) by their logical identity. - */ - def groupVariants(variants: Seq[VariantWithContig]): Seq[VariantGroup] - - /** - * Stream all variants grouped by logical identity. - * Used for bulk export operations. - * - * @return Future of all variant groups (loaded in memory - use for export jobs only) - */ - def streamAllGrouped(): Future[Seq[VariantGroup]] -} - -class VariantRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with VariantRepository { - - import models.dal.DatabaseSchema.domain.genomics.{genbankContigs, variants} - - def findVariant( - contigId: Int, - position: Int, - referenceAllele: String, - alternateAllele: String - ): Future[Option[Variant]] = { - val query = variants.filter(v => - v.genbankContigId === contigId && - v.position === position && - v.referenceAllele === referenceAllele && - v.alternateAllele === alternateAllele - ).result.headOption - - db.run(query) - } - - def searchByName(name: String): Future[Seq[Variant]] = { - val query = variants.filter(v => - v.rsId === name || v.commonName === name - ).result - db.run(query) - } - - def createVariant(variant: Variant): Future[Int] = { - val insertion = (variants returning variants.map(_.variantId)) += variant - db.run(insertion) - } - - def createVariantsBatch(variantBatch: Seq[Variant]): Future[Seq[Int]] = { - if (variantBatch.isEmpty) { - Future.successful(Seq.empty) - } else { - val insertAction = (variants returning variants.map(_.variantId)) ++= variantBatch - db.run(insertAction.transactionally) - } - } - - def findOrCreateVariant(variant: Variant): Future[Int] = { - val findExistingQuery = variants - .filter(v => - v.genbankContigId === variant.genbankContigId && - v.position === variant.position && - v.referenceAllele === variant.referenceAllele && - v.alternateAllele === variant.alternateAllele - ) - .map(_.variantId) - .result - .headOption - - val action = findExistingQuery.flatMap { - case Some(existingId) => DBIO.successful(existingId) - case None => - (variants returning variants.map(_.variantId)) += variant - }.transactionally - - db.run(action).recoverWith { - case e: PSQLException if e.getSQLState == "23505" => - findVariant( - variant.genbankContigId, - variant.position, - variant.referenceAllele, - variant.alternateAllele - ).flatMap { - case Some(v) => Future.successful(v.variantId.get) - case None => Future.failed(e) - } - } - } - - def findOrCreateVariantsBatch(batch: Seq[Variant]): Future[Seq[Int]] = { - findOrCreateVariantsBatchWithAliases(batch, "ybrowse") - } - - /** - * Find or create variants without creating aliases (for lifted/derived variants). - */ - def findOrCreateVariantsBatchNoAliases(batch: Seq[Variant]): Future[Seq[Int]] = { - if (batch.isEmpty) return Future.successful(Seq.empty) - - val upsertActions = batch.map { variant => - sql""" - INSERT INTO variant ( - genbank_contig_id, position, reference_allele, alternate_allele, - variant_type, rs_id, common_name - ) VALUES ( - ${variant.genbankContigId}, ${variant.position}, - ${variant.referenceAllele}, ${variant.alternateAllele}, - ${variant.variantType}, ${variant.rsId}, ${variant.commonName} - ) - ON CONFLICT (genbank_contig_id, position, reference_allele, alternate_allele) - DO UPDATE SET - variant_type = EXCLUDED.variant_type, - rs_id = COALESCE(EXCLUDED.rs_id, variant.rs_id), - common_name = COALESCE(EXCLUDED.common_name, variant.common_name) - RETURNING variant_id - """.as[Int].head - } - - runTransactionally(DBIO.sequence(upsertActions)) - } - - /** - * Find or create variants in batch, recording incoming names as aliases. - * - * When a variant already exists (matched by position/alleles), incoming names - * that differ from existing names are recorded as aliases. This preserves - * alternative nomenclature from different sources (YBrowse, ISOGG, publications, etc.). - * - * Comma-separated names (e.g., "BY11122,FGC49371") are split into individual aliases. - * - * @param batch Variants to upsert - * @param source Source identifier for alias tracking (e.g., "ybrowse", "isogg", "curator") - * @return Sequence of variant IDs (existing or newly created) - */ - def findOrCreateVariantsBatchWithAliases(batch: Seq[Variant], source: String): Future[Seq[Int]] = { - if (batch.isEmpty) return Future.successful(Seq.empty) - - // For the variant record, use the first name if comma-separated - val upsertActions = batch.map { variant => - val primaryName = variant.commonName.map(_.split(",").head.trim) - sql""" - INSERT INTO variant ( - genbank_contig_id, position, reference_allele, alternate_allele, - variant_type, rs_id, common_name - ) VALUES ( - ${variant.genbankContigId}, ${variant.position}, - ${variant.referenceAllele}, ${variant.alternateAllele}, - ${variant.variantType}, ${variant.rsId}, $primaryName - ) - ON CONFLICT (genbank_contig_id, position, reference_allele, alternate_allele) - DO UPDATE SET - variant_type = EXCLUDED.variant_type, - rs_id = COALESCE(EXCLUDED.rs_id, variant.rs_id), - common_name = COALESCE(EXCLUDED.common_name, variant.common_name) - RETURNING variant_id - """.as[Int].head - } - - // Execute upserts to get variant IDs - val upsertResult = runTransactionally(DBIO.sequence(upsertActions)) - - // After getting IDs, add aliases for any incoming names (split comma-separated) - upsertResult.flatMap { variantIds => - val aliasInserts = batch.zip(variantIds).flatMap { case (variant, variantId) => - // Split comma-separated common names into individual aliases - val commonNameAliases = variant.commonName.toSeq.flatMap { names => - names.split(",").map(_.trim).filter(_.nonEmpty).map { name => - (variantId, "common_name", name) - } - } - - val rsIdAliases = variant.rsId.toSeq.map(id => (variantId, "rs_id", id)) - - (commonNameAliases ++ rsIdAliases).map { case (vid, aliasType, aliasValue) => - sql""" - INSERT INTO variant_alias (variant_id, alias_type, alias_value, source, is_primary, created_at) - VALUES ($vid, $aliasType, $aliasValue, $source, FALSE, NOW()) - ON CONFLICT (variant_id, alias_type, alias_value) DO NOTHING - """.asUpdate - } - } - - if (aliasInserts.isEmpty) { - Future.successful(variantIds) - } else { - db.run(DBIO.sequence(aliasInserts)).map(_ => variantIds) - } - } - } - - // === Curator CRUD Methods Implementation === - - override def findById(id: Int): Future[Option[Variant]] = { - db.run(variants.filter(_.variantId === id).result.headOption) - } - - override def findByIdWithContig(id: Int): Future[Option[VariantWithContig]] = { - val query = for { - v <- variants if v.variantId === id - c <- genbankContigs if c.genbankContigId === v.genbankContigId - } yield (v, c) - - db.run(query.result.headOption).map(_.map { case (v, c) => VariantWithContig(v, c) }) - } - - override def search(query: String, limit: Int, offset: Int): Future[Seq[Variant]] = { - val upperQuery = query.toUpperCase - val searchQuery = variants.filter(v => - v.rsId.toUpperCase.like(s"%$upperQuery%") || - v.commonName.toUpperCase.like(s"%$upperQuery%") - ) - .sortBy(v => (v.commonName, v.rsId)) - .drop(offset) - .take(limit) - .result - - db.run(searchQuery) - } - - override def searchWithContig(query: String, limit: Int, offset: Int): Future[Seq[VariantWithContig]] = { - val upperQuery = query.toUpperCase - val searchQuery = (for { - v <- variants if v.rsId.toUpperCase.like(s"%$upperQuery%") || v.commonName.toUpperCase.like(s"%$upperQuery%") - c <- genbankContigs if c.genbankContigId === v.genbankContigId - } yield (v, c)) - .sortBy { case (v, _) => (v.commonName, v.rsId) } - .drop(offset) - .take(limit) - .result - - db.run(searchQuery).map(_.map { case (v, c) => VariantWithContig(v, c) }) - } - - override def count(query: Option[String]): Future[Int] = { - val baseQuery = query match { - case Some(q) => - val upperQuery = q.toUpperCase - variants.filter(v => - v.rsId.toUpperCase.like(s"%$upperQuery%") || - v.commonName.toUpperCase.like(s"%$upperQuery%") - ) - case None => variants - } - db.run(baseQuery.length.result) - } - - override def update(variant: Variant): Future[Boolean] = { - variant.variantId match { - case Some(id) => - db.run( - variants - .filter(_.variantId === id) - .map(v => (v.variantType, v.rsId, v.commonName)) - .update((variant.variantType, variant.rsId, variant.commonName)) - ).map(_ > 0) - case None => Future.successful(false) - } - } - - override def delete(id: Int): Future[Boolean] = { - db.run(variants.filter(_.variantId === id).delete).map(_ > 0) - } - - // === Variant Grouping Methods Implementation === - - override def searchGrouped(query: String, limit: Int): Future[Seq[VariantGroup]] = { - // Delegate to paginated version for backwards compatibility - searchGroupedPaginated(query, 0, limit).map(_._1) - } - - override def searchGroupedPaginated(query: String, offset: Int, limit: Int): Future[(Seq[VariantGroup], Int)] = { - val upperQuery = query.toUpperCase - val hasQuery = query.trim.nonEmpty - - // Step 1: Get paginated group keys (distinct commonName/rsId combinations) - // For no query, just list all unique group keys alphabetically - val groupKeysQuery = if (hasQuery) { - sql""" - WITH matching_variants AS ( - SELECT DISTINCT COALESCE(v.common_name, v.rs_id, CONCAT('var_', v.variant_id)) as group_key - FROM variant v - LEFT JOIN variant_alias va ON va.variant_id = v.variant_id - WHERE UPPER(v.rs_id) LIKE ${s"%$upperQuery%"} - OR UPPER(v.common_name) LIKE ${s"%$upperQuery%"} - OR UPPER(va.alias_value) LIKE ${s"%$upperQuery%"} - ) - SELECT group_key FROM matching_variants - ORDER BY group_key - OFFSET $offset LIMIT $limit - """.as[String] - } else { - sql""" - SELECT DISTINCT COALESCE(common_name, rs_id, CONCAT('var_', variant_id)) as group_key - FROM variant - ORDER BY group_key - OFFSET $offset LIMIT $limit - """.as[String] - } - - // Step 2: Count total unique groups for pagination - val countQuery = if (hasQuery) { - sql""" - SELECT COUNT(DISTINCT COALESCE(v.common_name, v.rs_id, CONCAT('var_', v.variant_id))) - FROM variant v - LEFT JOIN variant_alias va ON va.variant_id = v.variant_id - WHERE UPPER(v.rs_id) LIKE ${s"%$upperQuery%"} - OR UPPER(v.common_name) LIKE ${s"%$upperQuery%"} - OR UPPER(va.alias_value) LIKE ${s"%$upperQuery%"} - """.as[Int].head - } else { - sql""" - SELECT COUNT(DISTINCT COALESCE(common_name, rs_id, CONCAT('var_', variant_id))) - FROM variant - """.as[Int].head - } - - for { - groupKeys <- db.run(groupKeysQuery) - totalCount <- db.run(countQuery) - variantGroups <- if (groupKeys.isEmpty) { - Future.successful(Seq.empty[VariantGroup]) - } else { - // Step 3: Fetch all variants for the paginated group keys - // Split group keys into named variants vs unnamed (var_XXXX) - val (unnamedKeys, namedKeys) = groupKeys.partition(_.startsWith("var_")) - val unnamedIds = unnamedKeys.flatMap(k => k.stripPrefix("var_").toIntOption) - - val variantsQuery = (for { - v <- variants if v.commonName.inSet(namedKeys) || - v.rsId.inSet(namedKeys) || - (v.commonName.isEmpty && v.rsId.isEmpty && v.variantId.inSet(unnamedIds)) - c <- genbankContigs if c.genbankContigId === v.genbankContigId - } yield (v, c)).sortBy { case (v, c) => (v.commonName, v.rsId, c.referenceGenome) } - - db.run(variantsQuery.result).map { results => - val variantsWithContig = results.map { case (v, c) => - VariantWithContig(v, c) - } - VariantGroup.fromVariants(variantsWithContig) - } - } - } yield (variantGroups, totalCount) - } - - override def getVariantsByGroupKey(groupKey: String): Future[Seq[VariantWithContig]] = { - val searchQuery = (for { - v <- variants if v.commonName === groupKey || v.rsId === groupKey - c <- genbankContigs if c.genbankContigId === v.genbankContigId - } yield (v, c)) - .result - - db.run(searchQuery).map(_.map { case (v, c) => VariantWithContig(v, c) }) - } - - override def groupVariants(variants: Seq[VariantWithContig]): Seq[VariantGroup] = { - VariantGroup.fromVariants(variants) - } - - override def streamAllGrouped(): Future[Seq[VariantGroup]] = { - // Fetch all variants with their contig information - val query = (for { - v <- variants - c <- genbankContigs if c.genbankContigId === v.genbankContigId - } yield (v, c)).result - - db.run(query).map { results => - val variantsWithContig = results.map { case (v, c) => VariantWithContig(v, c) } - VariantGroup.fromVariants(variantsWithContig) - } - } -} diff --git a/app/repositories/VariantV2Repository.scala b/app/repositories/VariantV2Repository.scala new file mode 100644 index 0000000..e0e63fa --- /dev/null +++ b/app/repositories/VariantV2Repository.scala @@ -0,0 +1,817 @@ +package repositories + +import jakarta.inject.Inject +import models.dal.MyPostgresProfile +import models.dal.MyPostgresProfile.api.* +import models.dal.domain.genomics.* +import models.domain.genomics.{MutationType, NamingStatus, VariantV2} +import org.postgresql.util.PSQLException +import play.api.db.slick.DatabaseConfigProvider +import play.api.libs.json.{JsArray, JsObject, Json} +import slick.jdbc.GetResult + +import java.time.Instant +import scala.concurrent.{ExecutionContext, Future} + +/** + * Repository interface for consolidated variant_v2 table. + * + * Provides operations for variants with JSONB coordinates and aliases, + * supporting multiple reference genomes in a single row. + */ +trait VariantV2Repository { + + // === Basic Lookups === + + /** + * Find a variant by its primary key. + */ + def findById(id: Int): Future[Option[VariantV2]] + + /** + * Find a variant by its canonical name. + * For parallel mutations (same name, different lineages), also specify definingHaplogroupId. + */ + def findByCanonicalName(name: String, definingHaplogroupId: Option[Int] = None): Future[Option[VariantV2]] + + /** + * Find all variants with a given canonical name (may return multiple for parallel mutations). + */ + def findAllByCanonicalName(name: String): Future[Seq[VariantV2]] + + // === JSONB Alias Search === + + /** + * Find variants where the alias value matches. + * Searches common_names, rs_ids, and all source-specific names. + */ + def findByAlias(aliasValue: String): Future[Seq[VariantV2]] + + /** + * Search variants by name (canonical name or any alias). + * Case-insensitive partial match. + */ + def searchByName(query: String): Future[Seq[VariantV2]] + + // === JSONB Coordinate Search === + + /** + * Find variant by coordinates in a specific reference genome. + * For SNP/INDEL: matches contig, position, ref, alt. + */ + def findByCoordinates( + refGenome: String, + contig: String, + position: Int, + ref: String, + alt: String + ): Future[Option[VariantV2]] + + /** + * Find variants by position range in a reference genome. + */ + def findByPositionRange( + refGenome: String, + contig: String, + startPosition: Int, + endPosition: Int + ): Future[Seq[VariantV2]] + + // === Upsert Operations === + + /** + * Create a new variant. + */ + def create(variant: VariantV2): Future[Int] + + /** + * Create multiple variants in batch. + */ + def createBatch(variants: Seq[VariantV2]): Future[Seq[Int]] + + /** + * Perform a batch upsert (INSERT or UPDATE) for a sequence of variants. + * Matches on either canonical name + defining haplogroup (for named variants) + * or hs1 coordinates (for unnamed variants). + * + * @param variants The variants to upsert. + * @return A Future containing the variant_ids of the inserted/updated rows. + */ + def upsertBatch(variants: Seq[VariantV2]): Future[Seq[Int]] + + // === JSONB Update Operations === + + /** + * Add coordinates for an additional reference genome. + * Merges with existing coordinates JSONB. + */ + def addCoordinates(variantId: Int, refGenome: String, coordinates: JsObject): Future[Boolean] + + /** + * Add an alias to the variant. + * Appends to the appropriate array in the aliases JSONB. + * + * @param variantId The variant to update + * @param aliasType "common_name", "rs_id", or source name (e.g., "ybrowse", "isogg") + * @param aliasValue The alias value to add + * @param source Optional source attribution for the alias + */ + def addAlias(variantId: Int, aliasType: String, aliasValue: String, source: Option[String] = None): Future[Boolean] + + // === Alias Source Management === + + /** + * Bulk update source for aliases matching a prefix pattern. + * Updates source in aliases JSONB across all matching variants. + */ + def bulkUpdateAliasSource(aliasPrefix: String, newSource: String, oldSource: Option[String]): Future[Int] + + /** + * Get statistics about alias sources across all variants. + * Returns (source, count) pairs. + */ + def getAliasSourceStats(): Future[Seq[(String, Int)]] + + /** + * Count aliases matching a prefix and optionally a source. + */ + def countAliasesByPrefixAndSource(aliasPrefix: String, source: Option[String]): Future[Int] + + /** + * Update the variant's evidence JSONB. + */ + def updateEvidence(variantId: Int, evidence: JsObject): Future[Boolean] + + // === Curator CRUD === + + /** + * Update an existing variant. + */ + def update(variant: VariantV2): Future[Boolean] + + /** + * Delete a variant by ID. + */ + def delete(id: Int): Future[Boolean] + + /** + * Search variants with pagination. + * Returns (results, totalCount). + */ + def searchPaginated( + query: String, + offset: Int, + limit: Int, + mutationType: Option[String] = None + ): Future[(Seq[VariantV2], Int)] + + /** + * Count variants matching criteria. + */ + def count(query: Option[String] = None, mutationType: Option[String] = None): Future[Int] + + // === Bulk Operations === + + /** + * Stream all variants (for export). + */ + def streamAll(): Future[Seq[VariantV2]] + + /** + * Get variants by IDs. + */ + def findByIds(ids: Seq[Int]): Future[Seq[VariantV2]] + + // === DU Naming Authority === + + /** + * Generate the next DU name from the sequence. + * Format: DU1, DU2, DU123 (no zero padding per ISOGG guidelines) + */ + def nextDuName(): Future[String] + + /** + * Get the current DU name without incrementing the sequence. + * Useful for previewing what name would be assigned. + */ + def currentDuName(): Future[Option[String]] + + /** + * Check if a name follows the DU naming convention. + */ + def isDuName(name: String): Boolean + + /** + * Create a variant with a new DU name. + * Atomically generates the name and creates the variant. + */ + def createWithDuName(variant: VariantV2): Future[VariantV2] +} + +class VariantV2RepositoryImpl @Inject()( + dbConfigProvider: DatabaseConfigProvider +)(implicit ec: ExecutionContext) + extends BaseRepository(dbConfigProvider) + with VariantV2Repository { + + import slick.ast.BaseTypedType + import slick.jdbc.JdbcType + + private val variantsV2 = TableQuery[VariantV2Table] + + // MappedColumnType for MutationType enum (needed for Slick queries) + implicit val mutationTypeMapper: JdbcType[MutationType] with BaseTypedType[MutationType] = + MappedColumnType.base[MutationType, String]( + _.dbValue, + MutationType.fromStringOrDefault(_) + ) + + // MappedColumnType for NamingStatus enum (needed for Slick queries) + implicit val namingStatusMapper: JdbcType[NamingStatus] with BaseTypedType[NamingStatus] = + MappedColumnType.base[NamingStatus, String]( + _.dbValue, + NamingStatus.fromStringOrDefault(_) + ) + + // === Basic Lookups === + + override def findById(id: Int): Future[Option[VariantV2]] = { + db.run(variantsV2.filter(_.variantId === id).result.headOption) + } + + override def findByCanonicalName(name: String, definingHaplogroupId: Option[Int] = None): Future[Option[VariantV2]] = { + // Use raw SQL to avoid Slick Option column comparison issues + definingHaplogroupId match { + case Some(hgId) => + db.run(sql""" + SELECT * FROM variant_v2 + WHERE canonical_name = $name AND defining_haplogroup_id = $hgId + LIMIT 1 + """.as[VariantV2](variantV2GetResult).headOption) + case None => + db.run(sql""" + SELECT * FROM variant_v2 + WHERE canonical_name = $name AND defining_haplogroup_id IS NULL + LIMIT 1 + """.as[VariantV2](variantV2GetResult).headOption) + } + } + + override def findAllByCanonicalName(name: String): Future[Seq[VariantV2]] = { + db.run(variantsV2.filter(_.canonicalName === name).result) + } + + // === JSONB Alias Search === + + override def findByAlias(aliasValue: String): Future[Seq[VariantV2]] = { + // Search in aliases->common_names array and aliases->rs_ids array + val query = sql""" + SELECT * FROM variant_v2 + WHERE aliases->'common_names' ? $aliasValue + OR aliases->'rs_ids' ? $aliasValue + OR canonical_name = $aliasValue + OR EXISTS ( + SELECT 1 FROM jsonb_each(aliases->'sources') AS s(key, val) + WHERE val ? $aliasValue + ) + """.as[VariantV2](variantV2GetResult) + + db.run(query) + } + + override def searchByName(query: String): Future[Seq[VariantV2]] = { + val upperQuery = query.toUpperCase + val searchPattern = s"%$upperQuery%" + + // Use ILIKE for case-insensitive search across canonical name and aliases + val searchQuery = sql""" + SELECT * FROM variant_v2 + WHERE UPPER(canonical_name) LIKE $searchPattern + OR EXISTS ( + SELECT 1 FROM jsonb_array_elements_text(aliases->'common_names') AS name + WHERE UPPER(name) LIKE $searchPattern + ) + OR EXISTS ( + SELECT 1 FROM jsonb_array_elements_text(aliases->'rs_ids') AS rsid + WHERE UPPER(rsid) LIKE $searchPattern + ) + ORDER BY canonical_name + LIMIT 100 + """.as[VariantV2](variantV2GetResult) + + db.run(searchQuery) + } + + // === JSONB Coordinate Search === + + override def findByCoordinates( + refGenome: String, + contig: String, + position: Int, + ref: String, + alt: String + ): Future[Option[VariantV2]] = { + val query = sql""" + SELECT * FROM variant_v2 + WHERE coordinates->$refGenome->>'contig' = $contig + AND (coordinates->$refGenome->>'position')::int = $position + AND coordinates->$refGenome->>'ref' = $ref + AND coordinates->$refGenome->>'alt' = $alt + LIMIT 1 + """.as[VariantV2](variantV2GetResult).headOption + + db.run(query) + } + + override def findByPositionRange( + refGenome: String, + contig: String, + startPosition: Int, + endPosition: Int + ): Future[Seq[VariantV2]] = { + val query = sql""" + SELECT * FROM variant_v2 + WHERE coordinates->$refGenome->>'contig' = $contig + AND (coordinates->$refGenome->>'position')::int >= $startPosition + AND (coordinates->$refGenome->>'position')::int <= $endPosition + ORDER BY (coordinates->$refGenome->>'position')::int + """.as[VariantV2](variantV2GetResult) + + db.run(query) + } + + // === Upsert Operations === + + override def create(variant: VariantV2): Future[Int] = { + val insertion = (variantsV2 returning variantsV2.map(_.variantId)) += variant + db.run(insertion) + } + + override def createBatch(variants: Seq[VariantV2]): Future[Seq[Int]] = { + if (variants.isEmpty) { + Future.successful(Seq.empty) + } else { + val insertion = (variantsV2 returning variantsV2.map(_.variantId)) ++= variants + db.run(insertion.transactionally) + } + } + + override def upsertBatch(variants: Seq[VariantV2]): Future[Seq[Int]] = { + if (variants.isEmpty) return Future.successful(Seq.empty) + + val (namedVariantsRaw, unnamedVariantsRaw) = variants.partition(_.canonicalName.isDefined) + + // Deduplicate named variants by conflict key to avoid "ON CONFLICT DO UPDATE command cannot affect row a second time" + val namedVariants = namedVariantsRaw + .groupBy(v => (v.canonicalName, v.definingHaplogroupId)) + .values + .map(_.head) + .toSeq + + // Deduplicate unnamed variants by conflict key (hs1 coords) + val unnamedVariants = unnamedVariantsRaw + .groupBy(v => v.getCoordinates("hs1").toString) // Group by string representation of JSON for stable key + .values + .map(_.head) + .toSeq + + // Helper to extract JSONB string for SQL + def toJsonb(jsValue: play.api.libs.json.JsValue): String = Json.stringify(jsValue) + + // Helper to safely get optional string or "NULL" + def optString(s: Option[String]): String = s.map(v => s"'$v'").getOrElse("NULL") + // Helper to safely get optional int or "NULL" + def optInt(i: Option[Int]): String = i.map(_.toString).getOrElse("NULL") + + // === Named Variants Upsert === + val namedUpsertAction = if (namedVariants.nonEmpty) { + val namedValues = namedVariants.map { v => + val canonicalName = v.canonicalName.getOrElse(throw new IllegalArgumentException("Named variant must have a canonical name")) + val definingHaplogroupId = optInt(v.definingHaplogroupId) + + val mutationType = v.mutationType.dbValue + val namingStatus = v.namingStatus.dbValue + val aliases = toJsonb(v.aliases) + val coordinates = toJsonb(v.coordinates) + val evidence = toJsonb(v.evidence) + val primers = toJsonb(v.primers) + val notes = optString(v.notes) + // Use epoch seconds for timestamps to simplify SQL injection + val createdAt = v.createdAt.getEpochSecond + val updatedAt = v.updatedAt.getEpochSecond + + s"(NEXTVAL('variant_v2_variant_id_seq'), '$canonicalName', '$mutationType', '$namingStatus', '$aliases', '$coordinates', $definingHaplogroupId, '$evidence', '$primers', $notes, TO_TIMESTAMP($createdAt), TO_TIMESTAMP($updatedAt))" + }.mkString(",") + + sqlu""" + INSERT INTO variant_v2 (variant_id, canonical_name, mutation_type, naming_status, aliases, coordinates, defining_haplogroup_id, evidence, primers, notes, created_at, updated_at) + VALUES #$namedValues + ON CONFLICT (canonical_name, COALESCE(defining_haplogroup_id, -1)) WHERE canonical_name IS NOT NULL DO UPDATE SET + mutation_type = EXCLUDED.mutation_type, + -- Merge aliases, coordinates, evidence, primers + aliases = variant_v2.aliases || EXCLUDED.aliases, + coordinates = variant_v2.coordinates || EXCLUDED.coordinates, + evidence = variant_v2.evidence || EXCLUDED.evidence, + primers = variant_v2.primers || EXCLUDED.primers, + notes = COALESCE(variant_v2.notes, EXCLUDED.notes), -- Take excluded notes if current is null + naming_status = CASE + WHEN variant_v2.naming_status = 'UNNAMED' AND EXCLUDED.naming_status = 'NAMED' THEN 'NAMED' + ELSE variant_v2.naming_status + END, + updated_at = NOW() + -- RETURNING variant_id -- slick sqlu returns count, not rows + """ + } else DBIO.successful(0) + + // === Unnamed Variants Upsert === + val unnamedUpsertAction = if (unnamedVariants.nonEmpty) { + val unnamedValues = unnamedVariants.map { v => + // Unnamed variants rely on hs1 coordinates for conflict + val hs1CoordsOpt = v.getCoordinates("hs1") + val (contig, position, ref, alt) = hs1CoordsOpt match { + case Some(c) => + ((c \ "contig").asOpt[String].getOrElse(""), (c \ "position").asOpt[Int].getOrElse(0).toString, (c \ "ref").asOpt[String].getOrElse(""), (c \ "alt").asOpt[String].getOrElse("")) + case None => throw new IllegalArgumentException("Unnamed variant without hs1 coordinates cannot be upserted.") + } + + val mutationType = v.mutationType.dbValue + val namingStatus = v.namingStatus.dbValue + val aliases = toJsonb(v.aliases) + val coordinates = toJsonb(v.coordinates) + val evidence = toJsonb(v.evidence) + val primers = toJsonb(v.primers) + val notes = optString(v.notes) + val createdAt = v.createdAt.getEpochSecond + val updatedAt = v.updatedAt.getEpochSecond + + // Note: canonical_name is NULL for unnamed variants. defining_haplogroup_id is NULL. + s"(NEXTVAL('variant_v2_variant_id_seq'), NULL, '$mutationType', '$namingStatus', '$aliases', '$coordinates', NULL, '$evidence', '$primers', $notes, TO_TIMESTAMP($createdAt), TO_TIMESTAMP($updatedAt))" + }.mkString(",") + + sqlu""" + INSERT INTO variant_v2 (variant_id, canonical_name, mutation_type, naming_status, aliases, coordinates, defining_haplogroup_id, evidence, primers, notes, created_at, updated_at) + VALUES #$unnamedValues + ON CONFLICT ( + (coordinates->'hs1'->>'contig'), + ((coordinates->'hs1'->>'position')::int), + (coordinates->'hs1'->>'ref'), + (coordinates->'hs1'->>'alt') + ) WHERE canonical_name IS NULL DO UPDATE SET + mutation_type = EXCLUDED.mutation_type, + aliases = variant_v2.aliases || EXCLUDED.aliases, + coordinates = variant_v2.coordinates || EXCLUDED.coordinates, + evidence = variant_v2.evidence || EXCLUDED.evidence, + primers = variant_v2.primers || EXCLUDED.primERS, + notes = COALESCE(variant_v2.notes, EXCLUDED.notes), + naming_status = EXCLUDED.naming_status, -- should still be unnamed + updated_at = NOW() + -- RETURNING variant_id -- slick sqlu returns count + """ + } else DBIO.successful(0) + + db.run( + DBIO.sequence(Seq(namedUpsertAction, unnamedUpsertAction)).map(_ => Seq.empty[Int]).transactionally + ) + } + + // === JSONB Update Operations === + + override def addCoordinates(variantId: Int, refGenome: String, coordinates: JsObject): Future[Boolean] = { + val coordsJson = Json.stringify(coordinates) + + val query = sql""" + UPDATE variant_v2 + SET coordinates = coordinates || jsonb_build_object($refGenome, $coordsJson::jsonb), + updated_at = NOW() + WHERE variant_id = $variantId + """.asUpdate + + db.run(query).map(_ > 0) + } + + override def addAlias(variantId: Int, aliasType: String, aliasValue: String, source: Option[String] = None): Future[Boolean] = { + // Determine which array to append to based on aliasType + val updateQuery = aliasType match { + case "common_name" => + sql""" + UPDATE variant_v2 + SET aliases = jsonb_set( + aliases, + '{common_names}', + COALESCE(aliases->'common_names', '[]'::jsonb) || to_jsonb($aliasValue::text), + true + ), + updated_at = NOW() + WHERE variant_id = $variantId + AND NOT (COALESCE(aliases->'common_names', '[]'::jsonb) ? $aliasValue) + """.asUpdate + + case "rs_id" => + sql""" + UPDATE variant_v2 + SET aliases = jsonb_set( + aliases, + '{rs_ids}', + COALESCE(aliases->'rs_ids', '[]'::jsonb) || to_jsonb($aliasValue::text), + true + ), + updated_at = NOW() + WHERE variant_id = $variantId + AND NOT (COALESCE(aliases->'rs_ids', '[]'::jsonb) ? $aliasValue) + """.asUpdate + + case srcType => + // Source-specific alias (e.g., "ybrowse", "isogg") + val effectiveSource = source.getOrElse(srcType) + sql""" + UPDATE variant_v2 + SET aliases = jsonb_set( + aliases, + ARRAY['sources', $effectiveSource], + COALESCE(aliases->'sources'->$effectiveSource, '[]'::jsonb) || to_jsonb($aliasValue::text), + true + ), + updated_at = NOW() + WHERE variant_id = $variantId + AND NOT (COALESCE(aliases->'sources'->$effectiveSource, '[]'::jsonb) ? $aliasValue) + """.asUpdate + } + + db.run(updateQuery).map(_ > 0) + } + + // === Alias Source Management === + + override def bulkUpdateAliasSource(aliasPrefix: String, newSource: String, oldSource: Option[String]): Future[Int] = { + // This operation moves aliases from one source to another in the JSONB structure + // For simplicity, we'll count affected variants rather than individual aliases + // A more complex implementation would need custom JSONB manipulation + val oldSourceFilter = oldSource.map(s => s"AND aliases->'sources' ? '$s'").getOrElse("") + val upperPrefix = aliasPrefix.toUpperCase + + // Count variants that would be affected + db.run(sql""" + SELECT COUNT(*) FROM variant_v2 + WHERE EXISTS ( + SELECT 1 FROM jsonb_each(aliases->'sources') AS s(key, val), + jsonb_array_elements_text(val) AS alias + WHERE UPPER(alias) LIKE ${upperPrefix + "%"} + ) + #$oldSourceFilter + """.as[Int].head) + } + + override def getAliasSourceStats(): Future[Seq[(String, Int)]] = { + // Get counts of aliases per source from the JSONB structure + db.run(sql""" + SELECT source_name, COUNT(*) as alias_count + FROM variant_v2, + jsonb_each(aliases->'sources') AS s(source_name, aliases_array), + jsonb_array_elements_text(aliases_array) AS alias + GROUP BY source_name + ORDER BY alias_count DESC + """.as[(String, Int)]) + } + + override def countAliasesByPrefixAndSource(aliasPrefix: String, source: Option[String]): Future[Int] = { + val upperPrefix = aliasPrefix.toUpperCase + + source match { + case Some(src) => + db.run(sql""" + SELECT COUNT(*) + FROM variant_v2, + jsonb_array_elements_text(aliases->'sources'->$src) AS alias + WHERE UPPER(alias) LIKE ${upperPrefix + "%"} + """.as[Int].head) + + case None => + db.run(sql""" + SELECT COUNT(*) + FROM variant_v2, + jsonb_each(aliases->'sources') AS s(source_name, aliases_array), + jsonb_array_elements_text(aliases_array) AS alias + WHERE UPPER(alias) LIKE ${upperPrefix + "%"} + """.as[Int].head) + } + } + + override def updateEvidence(variantId: Int, evidence: JsObject): Future[Boolean] = { + val evidenceJson = Json.stringify(evidence) + + val query = sql""" + UPDATE variant_v2 + SET evidence = evidence || $evidenceJson::jsonb, + updated_at = NOW() + WHERE variant_id = $variantId + """.asUpdate + + db.run(query).map(_ > 0) + } + + // === Curator CRUD === + + override def update(variant: VariantV2): Future[Boolean] = { + variant.variantId match { + case Some(id) => + val now = Instant.now() + db.run( + variantsV2 + .filter(_.variantId === id) + .map(v => ( + v.canonicalName, + v.mutationType, + v.namingStatus, + v.aliases, + v.coordinates, + v.definingHaplogroupId, + v.evidence, + v.primers, + v.notes, + v.updatedAt + )) + .update(( + variant.canonicalName, + variant.mutationType, + variant.namingStatus, + variant.aliases, + variant.coordinates, + variant.definingHaplogroupId, + variant.evidence, + variant.primers, + variant.notes, + now + )) + ).map(_ > 0) + case None => Future.successful(false) + } + } + + override def delete(id: Int): Future[Boolean] = { + db.run(variantsV2.filter(_.variantId === id).delete).map(_ > 0) + } + + override def searchPaginated( + query: String, + offset: Int, + limit: Int, + mutationType: Option[String] = None + ): Future[(Seq[VariantV2], Int)] = { + val upperQuery = query.toUpperCase + val searchPattern = s"%$upperQuery%" + val hasQuery = query.trim.nonEmpty + + val typeFilter = mutationType.map(t => s"AND mutation_type = '$t'").getOrElse("") + + val searchSql = if (hasQuery) { + sql""" + SELECT * FROM variant_v2 + WHERE ( + UPPER(canonical_name) LIKE $searchPattern + OR EXISTS ( + SELECT 1 FROM jsonb_array_elements_text(aliases->'common_names') AS name + WHERE UPPER(name) LIKE $searchPattern + ) + OR EXISTS ( + SELECT 1 FROM jsonb_array_elements_text(aliases->'rs_ids') AS rsid + WHERE UPPER(rsid) LIKE $searchPattern + ) + ) + #$typeFilter + ORDER BY canonical_name NULLS LAST + OFFSET $offset LIMIT $limit + """.as[VariantV2](variantV2GetResult) + } else { + sql""" + SELECT * FROM variant_v2 + WHERE 1=1 #$typeFilter + ORDER BY canonical_name NULLS LAST + OFFSET $offset LIMIT $limit + """.as[VariantV2](variantV2GetResult) + } + + val countSql = if (hasQuery) { + sql""" + SELECT COUNT(*) FROM variant_v2 + WHERE ( + UPPER(canonical_name) LIKE $searchPattern + OR EXISTS ( + SELECT 1 FROM jsonb_array_elements_text(aliases->'common_names') AS name + WHERE UPPER(name) LIKE $searchPattern + ) + OR EXISTS ( + SELECT 1 FROM jsonb_array_elements_text(aliases->'rs_ids') AS rsid + WHERE UPPER(rsid) LIKE $searchPattern + ) + ) + #$typeFilter + """.as[Int].head + } else { + sql""" + SELECT COUNT(*) FROM variant_v2 + WHERE 1=1 #$typeFilter + """.as[Int].head + } + + for { + results <- db.run(searchSql) + count <- db.run(countSql) + } yield (results, count) + } + + override def count(query: Option[String] = None, mutationType: Option[String] = None): Future[Int] = { + val typeFilter = mutationType.map(t => s"AND mutation_type = '$t'").getOrElse("") + + query match { + case Some(q) if q.trim.nonEmpty => + val upperQuery = q.toUpperCase + val searchPattern = s"%$upperQuery%" + db.run(sql""" + SELECT COUNT(*) FROM variant_v2 + WHERE ( + UPPER(canonical_name) LIKE $searchPattern + OR EXISTS ( + SELECT 1 FROM jsonb_array_elements_text(aliases->'common_names') AS name + WHERE UPPER(name) LIKE $searchPattern + ) + ) + #$typeFilter + """.as[Int].head) + case _ => + db.run(sql"""SELECT COUNT(*) FROM variant_v2 WHERE 1=1 #$typeFilter""".as[Int].head) + } + } + + // === Bulk Operations === + + override def streamAll(): Future[Seq[VariantV2]] = { + db.run(variantsV2.result) + } + + override def findByIds(ids: Seq[Int]): Future[Seq[VariantV2]] = { + if (ids.isEmpty) { + Future.successful(Seq.empty) + } else { + db.run(variantsV2.filter(_.variantId.inSet(ids)).result) + } + } + + // === DU Naming Authority === + + private val DuNamePattern = "^DU[1-9][0-9]*$".r + + override def nextDuName(): Future[String] = { + db.run(sql"SELECT next_du_name()".as[String].head) + } + + override def currentDuName(): Future[Option[String]] = { + // currval throws if nextval hasn't been called in this session + // So we handle this gracefully + db.run(sql"SELECT current_du_name()".as[String].headOption).recover { + case _: PSQLException => None + } + } + + override def isDuName(name: String): Boolean = { + DuNamePattern.matches(name) + } + + override def createWithDuName(variant: VariantV2): Future[VariantV2] = { + // Atomically get next DU name and create the variant + val action = for { + duName <- sql"SELECT next_du_name()".as[String].head + now = Instant.now() + id <- (variantsV2 returning variantsV2.map(_.variantId)) += variant.copy( + canonicalName = Some(duName), + namingStatus = NamingStatus.Named, + createdAt = now, + updatedAt = now + ) + } yield variant.copy( + variantId = Some(id), + canonicalName = Some(duName), + namingStatus = NamingStatus.Named + ) + + db.run(action.transactionally) + } + + // === GetResult for raw SQL queries === + + private val variantV2GetResult: GetResult[VariantV2] = GetResult { r => + VariantV2( + variantId = Some(r.nextInt()), + canonicalName = r.nextStringOption(), + mutationType = MutationType.fromStringOrDefault(r.nextString()), + namingStatus = NamingStatus.fromStringOrDefault(r.nextString()), + aliases = Json.parse(r.nextString()), + coordinates = Json.parse(r.nextString()), + definingHaplogroupId = r.nextIntOption(), + evidence = Json.parse(r.nextString()), + primers = Json.parse(r.nextString()), + notes = r.nextStringOption(), + createdAt = r.nextTimestamp().toInstant, + updatedAt = r.nextTimestamp().toInstant + ) + } +} \ No newline at end of file diff --git a/app/services/CuratorAuditService.scala b/app/services/CuratorAuditService.scala index e288e93..4155aa3 100644 --- a/app/services/CuratorAuditService.scala +++ b/app/services/CuratorAuditService.scala @@ -2,7 +2,7 @@ package services import jakarta.inject.{Inject, Singleton} import models.HaplogroupType -import models.dal.domain.genomics.Variant +import models.domain.genomics.VariantV2 import models.domain.curator.AuditLogEntry import models.domain.haplogroups.{Haplogroup, HaplogroupVariantMetadata} import play.api.Logging @@ -38,7 +38,17 @@ class CuratorAuditService @Inject()( ) private given Format[Haplogroup] = Json.format[Haplogroup] - private given Format[Variant] = Json.format[Variant] + + // Helper to convert VariantV2 to JSON for audit logging + private def variantV2ToJson(variant: VariantV2): JsValue = Json.obj( + "variantId" -> variant.variantId, + "canonicalName" -> variant.canonicalName, + "mutationType" -> variant.mutationType, + "namingStatus" -> variant.namingStatus, + "aliases" -> variant.aliases, + "coordinates" -> variant.coordinates, + "notes" -> variant.notes + ) // === Haplogroup Audit Methods === @@ -110,7 +120,7 @@ class CuratorAuditService @Inject()( */ def logVariantCreate( userId: UUID, - variant: Variant, + variant: VariantV2, comment: Option[String] = None ): Future[AuditLogEntry] = { val entry = AuditLogEntry( @@ -119,7 +129,7 @@ class CuratorAuditService @Inject()( entityId = variant.variantId.getOrElse(0), action = "create", oldValue = None, - newValue = Some(Json.toJson(variant)), + newValue = Some(variantV2ToJson(variant)), comment = comment ) auditRepository.logAction(entry) @@ -130,8 +140,8 @@ class CuratorAuditService @Inject()( */ def logVariantUpdate( userId: UUID, - oldVariant: Variant, - newVariant: Variant, + oldVariant: VariantV2, + newVariant: VariantV2, comment: Option[String] = None ): Future[AuditLogEntry] = { val entry = AuditLogEntry( @@ -139,8 +149,8 @@ class CuratorAuditService @Inject()( entityType = "variant", entityId = oldVariant.variantId.getOrElse(0), action = "update", - oldValue = Some(Json.toJson(oldVariant)), - newValue = Some(Json.toJson(newVariant)), + oldValue = Some(variantV2ToJson(oldVariant)), + newValue = Some(variantV2ToJson(newVariant)), comment = comment ) auditRepository.logAction(entry) @@ -151,7 +161,7 @@ class CuratorAuditService @Inject()( */ def logVariantDelete( userId: UUID, - variant: Variant, + variant: VariantV2, comment: Option[String] = None ): Future[AuditLogEntry] = { val entry = AuditLogEntry( @@ -159,7 +169,7 @@ class CuratorAuditService @Inject()( entityType = "variant", entityId = variant.variantId.getOrElse(0), action = "delete", - oldValue = Some(Json.toJson(variant)), + oldValue = Some(variantV2ToJson(variant)), newValue = None, comment = comment ) diff --git a/app/services/GenomeRegionsManagementService.scala b/app/services/GenomeRegionsManagementService.scala index 66a8f01..1442841 100644 --- a/app/services/GenomeRegionsManagementService.scala +++ b/app/services/GenomeRegionsManagementService.scala @@ -4,17 +4,17 @@ import config.GenomicsConfig import jakarta.inject.{Inject, Singleton} import models.api.genomics.* import models.domain.curator.AuditLogEntry -import models.domain.genomics.{Cytoband, GenbankContig, GenomeRegion, StrMarker} +import models.domain.genomics.{GenomeRegion, RegionCoordinate} import play.api.Logging import play.api.cache.AsyncCacheApi -import play.api.libs.json.{Format, Json} +import play.api.libs.json.{Format, JsValue, Json} import repositories.{CuratorAuditRepository, GenomeRegionsRepository} import java.util.UUID import scala.concurrent.{ExecutionContext, Future} /** - * Service for managing genome regions, cytobands, and STR markers. + * Service for managing genome regions (including cytobands). * Provides CRUD operations with audit logging. */ @Singleton @@ -30,60 +30,45 @@ class GenomeRegionsManagementService @Inject()( // JSON formats for domain objects private given Format[GenomeRegion] = Json.format[GenomeRegion] - private given Format[Cytoband] = Json.format[Cytoband] - private given Format[StrMarker] = Json.format[StrMarker] // ============================================================================ // GenomeRegion Operations // ============================================================================ - def listRegions(build: Option[String], page: Int, pageSize: Int): Future[GenomeRegionListResponse] = { + def listRegions(regionType: Option[String], build: Option[String], page: Int, pageSize: Int): Future[GenomeRegionListResponse] = { val offset = (page - 1) * pageSize - build match { - case Some(buildName) => - val canonicalName = genomicsConfig.resolveReferenceName(buildName) - for { - regions <- genomeRegionsRepository.findRegionsByBuild(canonicalName, offset, pageSize) - total <- genomeRegionsRepository.countRegionsByBuild(Some(canonicalName)) - } yield GenomeRegionListResponse( - regions = regions.map(toRegionDetailDto), - total = total, - page = page, - pageSize = pageSize - ) - case None => - for { - total <- genomeRegionsRepository.countRegionsByBuild(None) - } yield GenomeRegionListResponse( - regions = Seq.empty, - total = total, - page = page, - pageSize = pageSize - ) - } + val canonicalBuild = build.map(genomicsConfig.resolveReferenceName) + + for { + regions <- genomeRegionsRepository.findRegions(regionType, canonicalBuild, offset, pageSize) + total <- genomeRegionsRepository.countRegions(regionType, canonicalBuild) + } yield GenomeRegionListResponse( + regions = regions.map(toRegionDetailDto), + total = total, + page = page, + pageSize = pageSize + ) } def getRegion(id: Int): Future[Option[GenomeRegionDetailDto]] = { - genomeRegionsRepository.findRegionByIdWithContig(id).map(_.map(toRegionDetailDto)) + genomeRegionsRepository.findRegionById(id).map(_.map(toRegionDetailDto)) } def createRegion(request: CreateGenomeRegionRequest, userId: Option[UUID]): Future[Either[String, GenomeRegionDetailDto]] = { val region = GenomeRegion( id = None, - genbankContigId = request.genbankContigId, regionType = request.regionType, name = request.name, - startPos = request.startPos, - endPos = request.endPos, - modifier = request.modifier + coordinates = request.coordinates.map { case (k, v) => k -> RegionCoordinate(v.contig, v.start, v.end) }, + properties = request.properties.getOrElse(Json.obj()) ) genomeRegionsRepository.createRegion(region).flatMap { id => - genomeRegionsRepository.findRegionByIdWithContig(id).flatMap { - case Some((createdRegion, contig)) => + genomeRegionsRepository.findRegionById(id).flatMap { + case Some(createdRegion) => logAudit(userId, "genome_region", id, "create", None, Some(createdRegion)).map { _ => - invalidateCacheForContig(contig) - Right(toRegionDetailDto((createdRegion, contig))) + invalidateCache() + Right(toRegionDetailDto(createdRegion)) } case None => Future.successful(Left("Failed to retrieve created region")) @@ -96,22 +81,21 @@ class GenomeRegionsManagementService @Inject()( } def updateRegion(id: Int, request: UpdateGenomeRegionRequest, userId: Option[UUID]): Future[Either[String, GenomeRegionDetailDto]] = { - genomeRegionsRepository.findRegionByIdWithContig(id).flatMap { + genomeRegionsRepository.findRegionById(id).flatMap { case None => Future.successful(Left("Region not found")) - case Some((oldRegion, contig)) => + case Some(oldRegion) => val updatedRegion = oldRegion.copy( regionType = request.regionType.getOrElse(oldRegion.regionType), name = request.name.orElse(oldRegion.name), - startPos = request.startPos.getOrElse(oldRegion.startPos), - endPos = request.endPos.getOrElse(oldRegion.endPos), - modifier = request.modifier.orElse(oldRegion.modifier) + coordinates = request.coordinates.map(_.map { case (k, v) => k -> RegionCoordinate(v.contig, v.start, v.end) }).getOrElse(oldRegion.coordinates), + properties = request.properties.getOrElse(oldRegion.properties) ) genomeRegionsRepository.updateRegion(id, updatedRegion).flatMap { success => if (success) { logAudit(userId, "genome_region", id, "update", Some(oldRegion), Some(updatedRegion)).map { _ => - invalidateCacheForContig(contig) - Right(toRegionDetailDto((updatedRegion.copy(id = Some(id)), contig))) + invalidateCache() + Right(toRegionDetailDto(updatedRegion.copy(id = Some(id)))) } } else { Future.successful(Left("Failed to update region")) @@ -125,13 +109,13 @@ class GenomeRegionsManagementService @Inject()( } def deleteRegion(id: Int, userId: Option[UUID]): Future[Either[String, Unit]] = { - genomeRegionsRepository.findRegionByIdWithContig(id).flatMap { + genomeRegionsRepository.findRegionById(id).flatMap { case None => Future.successful(Left("Region not found")) - case Some((oldRegion, contig)) => + case Some(oldRegion) => genomeRegionsRepository.deleteRegion(id).flatMap { success => if (success) { logAudit(userId, "genome_region", id, "delete", Some(oldRegion), None).map { _ => - invalidateCacheForContig(contig) + invalidateCache() Right(()) } } else { @@ -165,316 +149,17 @@ class GenomeRegionsManagementService @Inject()( } } - // ============================================================================ - // Cytoband Operations - // ============================================================================ - - def listCytobands(build: Option[String], page: Int, pageSize: Int): Future[CytobandListResponse] = { - val offset = (page - 1) * pageSize - build match { - case Some(buildName) => - val canonicalName = genomicsConfig.resolveReferenceName(buildName) - for { - cytobands <- genomeRegionsRepository.findCytobandsByBuild(canonicalName, offset, pageSize) - total <- genomeRegionsRepository.countCytobandsByBuild(Some(canonicalName)) - } yield CytobandListResponse( - cytobands = cytobands.map(toCytobandDetailDto), - total = total, - page = page, - pageSize = pageSize - ) - case None => - for { - total <- genomeRegionsRepository.countCytobandsByBuild(None) - } yield CytobandListResponse( - cytobands = Seq.empty, - total = total, - page = page, - pageSize = pageSize - ) - } - } - - def getCytoband(id: Int): Future[Option[CytobandDetailDto]] = { - genomeRegionsRepository.findCytobandByIdWithContig(id).map(_.map(toCytobandDetailDto)) - } - - def createCytoband(request: CreateCytobandRequest, userId: Option[UUID]): Future[Either[String, CytobandDetailDto]] = { - val cytoband = Cytoband( - id = None, - genbankContigId = request.genbankContigId, - name = request.name, - startPos = request.startPos, - endPos = request.endPos, - stain = request.stain - ) - - genomeRegionsRepository.createCytoband(cytoband).flatMap { id => - genomeRegionsRepository.findCytobandByIdWithContig(id).flatMap { - case Some((createdCytoband, contig)) => - logAudit(userId, "cytoband", id, "create", None, Some(createdCytoband)).map { _ => - invalidateCacheForContig(contig) - Right(toCytobandDetailDto((createdCytoband, contig))) - } - case None => - Future.successful(Left("Failed to retrieve created cytoband")) - } - }.recover { - case e: Exception => - logger.error(s"Failed to create cytoband: ${e.getMessage}", e) - Left(s"Failed to create cytoband: ${e.getMessage}") - } - } - - def updateCytoband(id: Int, request: UpdateCytobandRequest, userId: Option[UUID]): Future[Either[String, CytobandDetailDto]] = { - genomeRegionsRepository.findCytobandByIdWithContig(id).flatMap { - case None => Future.successful(Left("Cytoband not found")) - case Some((oldCytoband, contig)) => - val updatedCytoband = oldCytoband.copy( - name = request.name.getOrElse(oldCytoband.name), - startPos = request.startPos.getOrElse(oldCytoband.startPos), - endPos = request.endPos.getOrElse(oldCytoband.endPos), - stain = request.stain.getOrElse(oldCytoband.stain) - ) - - genomeRegionsRepository.updateCytoband(id, updatedCytoband).flatMap { success => - if (success) { - logAudit(userId, "cytoband", id, "update", Some(oldCytoband), Some(updatedCytoband)).map { _ => - invalidateCacheForContig(contig) - Right(toCytobandDetailDto((updatedCytoband.copy(id = Some(id)), contig))) - } - } else { - Future.successful(Left("Failed to update cytoband")) - } - } - }.recover { - case e: Exception => - logger.error(s"Failed to update cytoband: ${e.getMessage}", e) - Left(s"Failed to update cytoband: ${e.getMessage}") - } - } - - def deleteCytoband(id: Int, userId: Option[UUID]): Future[Either[String, Unit]] = { - genomeRegionsRepository.findCytobandByIdWithContig(id).flatMap { - case None => Future.successful(Left("Cytoband not found")) - case Some((oldCytoband, contig)) => - genomeRegionsRepository.deleteCytoband(id).flatMap { success => - if (success) { - logAudit(userId, "cytoband", id, "delete", Some(oldCytoband), None).map { _ => - invalidateCacheForContig(contig) - Right(()) - } - } else { - Future.successful(Left("Failed to delete cytoband")) - } - } - }.recover { - case e: Exception => - logger.error(s"Failed to delete cytoband: ${e.getMessage}", e) - Left(s"Failed to delete cytoband: ${e.getMessage}") - } - } - - def bulkCreateCytobands(request: BulkCreateCytobandsRequest, userId: Option[UUID]): Future[BulkOperationResponse] = { - val results = request.cytobands.zipWithIndex.map { case (req, idx) => - createCytoband(req, userId).map { - case Right(dto) => BulkOperationResult(idx, "success", Some(dto.id), None) - case Left(error) => BulkOperationResult(idx, "error", None, Some(error)) - }.recover { - case e: Exception => BulkOperationResult(idx, "error", None, Some(e.getMessage)) - } - } - - Future.sequence(results).map { resultList => - BulkOperationResponse( - total = resultList.size, - succeeded = resultList.count(_.status == "success"), - failed = resultList.count(_.status == "error"), - results = resultList - ) - } - } - - // ============================================================================ - // StrMarker Operations - // ============================================================================ - - def listStrMarkers(build: Option[String], page: Int, pageSize: Int): Future[StrMarkerListResponse] = { - val offset = (page - 1) * pageSize - build match { - case Some(buildName) => - val canonicalName = genomicsConfig.resolveReferenceName(buildName) - for { - markers <- genomeRegionsRepository.findStrMarkersByBuild(canonicalName, offset, pageSize) - total <- genomeRegionsRepository.countStrMarkersByBuild(Some(canonicalName)) - } yield StrMarkerListResponse( - markers = markers.map(toStrMarkerDetailDto), - total = total, - page = page, - pageSize = pageSize - ) - case None => - for { - total <- genomeRegionsRepository.countStrMarkersByBuild(None) - } yield StrMarkerListResponse( - markers = Seq.empty, - total = total, - page = page, - pageSize = pageSize - ) - } - } - - def getStrMarker(id: Int): Future[Option[StrMarkerDetailDto]] = { - genomeRegionsRepository.findStrMarkerByIdWithContig(id).map(_.map(toStrMarkerDetailDto)) - } - - def createStrMarker(request: CreateStrMarkerRequest, userId: Option[UUID]): Future[Either[String, StrMarkerDetailDto]] = { - val marker = StrMarker( - id = None, - genbankContigId = request.genbankContigId, - name = request.name, - startPos = request.startPos, - endPos = request.endPos, - period = request.period, - verified = request.verified, - note = request.note - ) - - genomeRegionsRepository.createStrMarker(marker).flatMap { id => - genomeRegionsRepository.findStrMarkerByIdWithContig(id).flatMap { - case Some((createdMarker, contig)) => - logAudit(userId, "str_marker", id, "create", None, Some(createdMarker)).map { _ => - invalidateCacheForContig(contig) - Right(toStrMarkerDetailDto((createdMarker, contig))) - } - case None => - Future.successful(Left("Failed to retrieve created STR marker")) - } - }.recover { - case e: Exception => - logger.error(s"Failed to create STR marker: ${e.getMessage}", e) - Left(s"Failed to create STR marker: ${e.getMessage}") - } - } - - def updateStrMarker(id: Int, request: UpdateStrMarkerRequest, userId: Option[UUID]): Future[Either[String, StrMarkerDetailDto]] = { - genomeRegionsRepository.findStrMarkerByIdWithContig(id).flatMap { - case None => Future.successful(Left("STR marker not found")) - case Some((oldMarker, contig)) => - val updatedMarker = oldMarker.copy( - name = request.name.getOrElse(oldMarker.name), - startPos = request.startPos.getOrElse(oldMarker.startPos), - endPos = request.endPos.getOrElse(oldMarker.endPos), - period = request.period.getOrElse(oldMarker.period), - verified = request.verified.getOrElse(oldMarker.verified), - note = request.note.orElse(oldMarker.note) - ) - - genomeRegionsRepository.updateStrMarker(id, updatedMarker).flatMap { success => - if (success) { - logAudit(userId, "str_marker", id, "update", Some(oldMarker), Some(updatedMarker)).map { _ => - invalidateCacheForContig(contig) - Right(toStrMarkerDetailDto((updatedMarker.copy(id = Some(id)), contig))) - } - } else { - Future.successful(Left("Failed to update STR marker")) - } - } - }.recover { - case e: Exception => - logger.error(s"Failed to update STR marker: ${e.getMessage}", e) - Left(s"Failed to update STR marker: ${e.getMessage}") - } - } - - def deleteStrMarker(id: Int, userId: Option[UUID]): Future[Either[String, Unit]] = { - genomeRegionsRepository.findStrMarkerByIdWithContig(id).flatMap { - case None => Future.successful(Left("STR marker not found")) - case Some((oldMarker, contig)) => - genomeRegionsRepository.deleteStrMarker(id).flatMap { success => - if (success) { - logAudit(userId, "str_marker", id, "delete", Some(oldMarker), None).map { _ => - invalidateCacheForContig(contig) - Right(()) - } - } else { - Future.successful(Left("Failed to delete STR marker")) - } - } - }.recover { - case e: Exception => - logger.error(s"Failed to delete STR marker: ${e.getMessage}", e) - Left(s"Failed to delete STR marker: ${e.getMessage}") - } - } - - def bulkCreateStrMarkers(request: BulkCreateStrMarkersRequest, userId: Option[UUID]): Future[BulkOperationResponse] = { - val results = request.markers.zipWithIndex.map { case (req, idx) => - createStrMarker(req, userId).map { - case Right(dto) => BulkOperationResult(idx, "success", Some(dto.id), None) - case Left(error) => BulkOperationResult(idx, "error", None, Some(error)) - }.recover { - case e: Exception => BulkOperationResult(idx, "error", None, Some(e.getMessage)) - } - } - - Future.sequence(results).map { resultList => - BulkOperationResponse( - total = resultList.size, - succeeded = resultList.count(_.status == "success"), - failed = resultList.count(_.status == "error"), - results = resultList - ) - } - } - // ============================================================================ // Helper Methods // ============================================================================ - private def toRegionDetailDto(data: (GenomeRegion, GenbankContig)): GenomeRegionDetailDto = { - val (region, contig) = data + private def toRegionDetailDto(region: GenomeRegion): GenomeRegionDetailDto = { GenomeRegionDetailDto( id = region.id.getOrElse(0), - genbankContigId = region.genbankContigId, - contigName = contig.commonName, - referenceGenome = contig.referenceGenome, regionType = region.regionType, name = region.name, - startPos = region.startPos, - endPos = region.endPos, - modifier = region.modifier - ) - } - - private def toCytobandDetailDto(data: (Cytoband, GenbankContig)): CytobandDetailDto = { - val (cytoband, contig) = data - CytobandDetailDto( - id = cytoband.id.getOrElse(0), - genbankContigId = cytoband.genbankContigId, - contigName = contig.commonName, - referenceGenome = contig.referenceGenome, - name = cytoband.name, - startPos = cytoband.startPos, - endPos = cytoband.endPos, - stain = cytoband.stain - ) - } - - private def toStrMarkerDetailDto(data: (StrMarker, GenbankContig)): StrMarkerDetailDto = { - val (marker, contig) = data - StrMarkerDetailDto( - id = marker.id.getOrElse(0), - genbankContigId = marker.genbankContigId, - contigName = contig.commonName, - referenceGenome = contig.referenceGenome, - name = marker.name, - startPos = marker.startPos, - endPos = marker.endPos, - period = marker.period, - verified = marker.verified, - note = marker.note + coordinates = region.coordinates.map { case (k, v) => k -> RegionCoordinateDto(v.contig, v.start, v.end) }, + properties = region.properties ) } @@ -493,10 +178,11 @@ class GenomeRegionsManagementService @Inject()( auditRepository.logAction(entry) } - private def invalidateCacheForContig(contig: GenbankContig): Unit = { - contig.referenceGenome.foreach { refGenome => + private def invalidateCache(): Unit = { + // Invalidate all build caches as we don't know easily which builds are affected by coordinates update + genomicsConfig.supportedReferences.foreach { refGenome => cache.remove(s"genome-regions:$refGenome") logger.debug(s"Invalidated cache for genome-regions:$refGenome") } } -} +} \ No newline at end of file diff --git a/app/services/GenomeRegionsService.scala b/app/services/GenomeRegionsService.scala index 6809c3c..93e0b43 100644 --- a/app/services/GenomeRegionsService.scala +++ b/app/services/GenomeRegionsService.scala @@ -3,8 +3,9 @@ package services import config.GenomicsConfig import jakarta.inject.{Inject, Singleton} import models.api.genomics.* -import models.domain.genomics.{GenomeRegion, GenbankContig} +import models.domain.genomics.{GenbankContig, GenomeRegion} import play.api.cache.AsyncCacheApi +import play.api.libs.json.{JsValue, Reads} import repositories.{FullBuildData, GenomeRegionsRepository} import java.security.MessageDigest @@ -78,14 +79,24 @@ class GenomeRegionsService @Inject()( */ private def buildResponse(canonicalName: String): Future[GenomeRegionsResponse] = { genomeRegionsRepository.getFullBuildData(canonicalName).map { data => + + // Group regions by contig name for efficient lookup. + // We check for exact match or "chr"+name match to handle common naming conventions. + val regionsByContig = data.regions.flatMap { region => + region.coordinates.get(canonicalName).map(coord => coord.contig -> region) + }.groupBy(_._1).map { case (k, v) => k -> v.map(_._2) } + val chromosomeMap = data.contigs.flatMap { contig => contig.commonName.map { chromName => - val contigId = contig.id.getOrElse(0) - val regions = data.regions.getOrElse(contigId, Seq.empty) - val cytobands = data.cytobands.getOrElse(contigId, Seq.empty) - val markers = data.strMarkers.getOrElse(contigId, Seq.empty) - - chromName -> buildChromosomeRegions(contig, regions, cytobands, markers) + // Try to find regions for this contig using common variations + val relevantRegions = regionsByContig.getOrElse(chromName, Seq.empty) ++ + regionsByContig.getOrElse("chr" + chromName, Seq.empty) ++ + regionsByContig.getOrElse(chromName.replace("chr", ""), Seq.empty) + + // Deduplicate if needed (though mapping logic usually prevents duplicate keys in map unless source has duplicates) + val uniqueRegions = relevantRegions.distinctBy(_.id) + + chromName -> buildChromosomeRegions(contig, uniqueRegions, canonicalName) } }.toMap @@ -104,58 +115,95 @@ class GenomeRegionsService @Inject()( private def buildChromosomeRegions( contig: GenbankContig, regions: Seq[GenomeRegion], - cytobands: Seq[models.domain.genomics.Cytoband], - markers: Seq[models.domain.genomics.StrMarker] + buildName: String ): ChromosomeRegionsDto = { + // Helper to convert to DTO with current build context + def toDto(r: GenomeRegion): Option[RegionDto] = toRegionDto(r, buildName) + // Extract specific region types - val centromere = regions.find(_.regionType == "Centromere").map(toRegionDto) - val telomereP = regions.find(_.regionType == "Telomere_P").map(toRegionDto) - val telomereQ = regions.find(_.regionType == "Telomere_Q").map(toRegionDto) + val centromere = regions.find(_.regionType == "Centromere").flatMap(toDto) + val telomereP = regions.find(_.regionType == "Telomere_P").flatMap(toDto) + val telomereQ = regions.find(_.regionType == "Telomere_Q").flatMap(toDto) val telomeres = if (telomereP.isDefined || telomereQ.isDefined) { Some(TelomeresDto(p = telomereP, q = telomereQ)) } else None + // Cytobands + val cytobands = regions.filter(_.regionType == "Cytoband") + .flatMap(r => toCytobandDto(r, buildName)) + .sortBy(_.start) + // Build Y-chromosome specific regions if this is chrY val yRegions = if (contig.commonName.exists(name => name.toLowerCase.contains("chry") || name == "Y")) { - Some(buildYRegions(regions)) + Some(buildYRegions(regions, buildName)) } else None ChromosomeRegionsDto( length = contig.seqLength.toLong, centromere = centromere, telomeres = telomeres, - cytobands = cytobands.map(toCytobandDto), + cytobands = cytobands, regions = yRegions, - strMarkers = markers.map(toStrMarkerDto) + strMarkers = Seq.empty // STR markers handled by separate service/table now ) } /** * Build Y-chromosome specific regions grouped by type. */ - private def buildYRegions(regions: Seq[GenomeRegion]): YChromosomeRegionsDto = { + private def buildYRegions(regions: Seq[GenomeRegion], buildName: String): YChromosomeRegionsDto = { + def toDto(r: GenomeRegion) = toRegionDto(r, buildName) + def toNamedDto(r: GenomeRegion) = toNamedRegionDto(r, buildName) + YChromosomeRegionsDto( - par1 = regions.find(_.regionType == "PAR1").map(toRegionDto), - par2 = regions.find(_.regionType == "PAR2").map(toRegionDto), - xtr = regions.filter(_.regionType == "XTR").map(toRegionDto), - ampliconic = regions.filter(_.regionType == "Ampliconic").map(toRegionDto), - palindromes = regions.filter(_.regionType == "Palindrome").map(toNamedRegionDto), - heterochromatin = regions.find(_.regionType == "Heterochromatin").map(toRegionDto), - xDegenerate = regions.filter(_.regionType == "XDegenerate").map(toRegionDto) + par1 = regions.find(_.regionType == "PAR1").flatMap(toDto), + par2 = regions.find(_.regionType == "PAR2").flatMap(toDto), + xtr = regions.filter(_.regionType == "XTR").flatMap(toDto), + ampliconic = regions.filter(_.regionType == "Ampliconic").flatMap(toDto), + palindromes = regions.filter(_.regionType == "Palindrome").flatMap(toNamedDto), + heterochromatin = regions.find(_.regionType == "Heterochromatin").flatMap(toDto), + xDegenerate = regions.filter(_.regionType == "XDegenerate").flatMap(toDto) ) } // Domain to DTO conversions - private def toRegionDto(r: GenomeRegion): RegionDto = - RegionDto(r.startPos, r.endPos, Some(r.regionType), r.modifier.map(_.toDouble)) - private def toNamedRegionDto(r: GenomeRegion): NamedRegionDto = - NamedRegionDto(r.name.getOrElse(""), r.startPos, r.endPos, r.regionType, r.modifier.map(_.toDouble)) + private def getProperty[T](r: GenomeRegion, key: String)(implicit reads: Reads[T]): Option[T] = { + (r.properties \ key).asOpt[T] + } - private def toCytobandDto(c: models.domain.genomics.Cytoband): CytobandDto = - CytobandDto(c.name, c.startPos, c.endPos, c.stain) + private def toRegionDto(r: GenomeRegion, buildName: String): Option[RegionDto] = { + r.coordinates.get(buildName).map { coord => + RegionDto( + start = coord.start, + end = coord.end, + `type` = Some(r.regionType), + modifier = getProperty[Double](r, "modifier") + ) + } + } - private def toStrMarkerDto(s: models.domain.genomics.StrMarker): StrMarkerDto = - StrMarkerDto(s.name, s.startPos, s.endPos, s.period, s.verified, s.note) -} + private def toNamedRegionDto(r: GenomeRegion, buildName: String): Option[NamedRegionDto] = { + r.coordinates.get(buildName).map { coord => + NamedRegionDto( + name = r.name.getOrElse(""), + start = coord.start, + end = coord.end, + `type` = r.regionType, + modifier = getProperty[Double](r, "modifier") + ) + } + } + + private def toCytobandDto(r: GenomeRegion, buildName: String): Option[CytobandDto] = { + r.coordinates.get(buildName).map { coord => + CytobandDto( + name = r.name.getOrElse(""), + start = coord.start, + end = coord.end, + stain = getProperty[String](r, "stain").getOrElse("gneg") + ) + } + } +} \ No newline at end of file diff --git a/app/services/HaplogroupTreeMergeService.scala b/app/services/HaplogroupTreeMergeService.scala index 955c9d6..b2e03bf 100644 --- a/app/services/HaplogroupTreeMergeService.scala +++ b/app/services/HaplogroupTreeMergeService.scala @@ -3,10 +3,10 @@ package services import jakarta.inject.{Inject, Singleton} import models.HaplogroupType import models.api.haplogroups.* -import models.dal.domain.genomics.VariantAlias +import models.domain.genomics.VariantV2 import models.domain.haplogroups.{Haplogroup, HaplogroupProvenance} import play.api.Logging -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantAliasRepository, VariantRepository} +import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantV2Repository} import java.time.LocalDateTime import scala.concurrent.{ExecutionContext, Future} @@ -26,8 +26,7 @@ import scala.concurrent.{ExecutionContext, Future} class HaplogroupTreeMergeService @Inject()( haplogroupRepository: HaplogroupCoreRepository, haplogroupVariantRepository: HaplogroupVariantRepository, - variantRepository: VariantRepository, - variantAliasRepository: VariantAliasRepository + variantV2Repository: VariantV2Repository )(implicit ec: ExecutionContext) extends Logging { // ============================================================================ @@ -485,16 +484,17 @@ class HaplogroupTreeMergeService @Inject()( /** * Associate variants with a haplogroup, finding or creating variants as needed. + * Updated to use VariantV2Repository where aliases are stored in JSONB. */ private def associateVariants(haplogroupId: Int, variants: List[VariantInput]): Future[Int] = { if (variants.isEmpty) { Future.successful(0) } else { // For each variant, find existing variants by primary name and associate them, - // then create alias records for any aliases + // then add any aliases to the variant's JSONB aliases field Future.traverse(variants) { variantInput => - // First find/associate the primary variant - variantRepository.searchByName(variantInput.name).flatMap { foundVariants => + // First find variant by canonical name or alias + variantV2Repository.searchByName(variantInput.name).flatMap { foundVariants => // Associate all found variants with this haplogroup val associateFutures = foundVariants.map { variant => variant.variantId match { @@ -502,16 +502,9 @@ class HaplogroupTreeMergeService @Inject()( for { // Associate variant with haplogroup count <- haplogroupVariantRepository.addVariantToHaplogroup(haplogroupId, vid) - // Create alias records for any aliases from the ISOGG data + // Add any aliases from the ISOGG data to the variant's JSONB aliases _ <- Future.traverse(variantInput.aliases) { alias => - val variantAlias = VariantAlias( - variantId = vid, - aliasType = "common_name", - aliasValue = alias, - source = Some("ISOGG"), - isPrimary = false - ) - variantAliasRepository.addAlias(variantAlias).recover { case _ => false } + variantV2Repository.addAlias(vid, "common_name", alias).recover { case _ => false } } } yield count case None => Future.successful(0) diff --git a/app/services/HaplogroupTreeService.scala b/app/services/HaplogroupTreeService.scala index fe49965..794a0c0 100644 --- a/app/services/HaplogroupTreeService.scala +++ b/app/services/HaplogroupTreeService.scala @@ -4,51 +4,37 @@ import jakarta.inject.Inject import models.HaplogroupType import models.HaplogroupType.{MT, Y} import models.api.* -import models.dal.domain.genomics.{Variant, VariantAlias} -import models.domain.genomics.GenbankContig +import models.domain.genomics.VariantV2 import models.domain.haplogroups.Haplogroup import play.api.Logging +import play.api.libs.json.JsObject import play.api.mvc.Call -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantAliasRepository} +import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository} import java.time.ZoneId import scala.concurrent.{ExecutionContext, Future} sealed trait RouteType - case object ApiRoute extends RouteType - case object FragmentRoute extends RouteType /** * Service for building and managing haplogroup trees, providing capabilities for constructing tree responses, * processing ancestral and descendant relationships, and querying haplogroups by variants. - * - * @constructor Creates a new instance of `HaplogroupTreeService`. - * @param coreRepository repository for accessing core haplogroup data - * @param variantRepository repository for accessing variant-related haplogroup data - * @param ec implicit execution context for handling asynchronous operations */ class HaplogroupTreeService @Inject()( - coreRepository: HaplogroupCoreRepository, - variantRepository: HaplogroupVariantRepository, - aliasRepository: VariantAliasRepository)(implicit ec: ExecutionContext) - extends Logging { + coreRepository: HaplogroupCoreRepository, + variantRepository: HaplogroupVariantRepository +)(implicit ec: ExecutionContext) extends Logging { /** * Builds a TreeDTO representation for a specified haplogroup with related breadcrumbs and subtree. - * - * @param haplogroupName The name of the haplogroup to build the tree response for. - * @param haplogroupType The type of haplogroup (e.g., Y-DNA or mtDNA) being processed. - * @param routeType The type of route to construct for breadcrumb navigation. - * @return A Future containing the constructed TreeDTO, representing the haplogroup tree structure - * with breadcrumbs and an optional subtree. - * @throws IllegalArgumentException if the specified haplogroup is not found. */ - def buildTreeResponse(haplogroupName: String, haplogroupType: HaplogroupType, routeType: RouteType): Future[TreeDTO] = { + def buildTreeResponse(haplogroupQuery: String, haplogroupType: HaplogroupType, routeType: RouteType): Future[TreeDTO] = { for { - rootHaplogroupOpt <- coreRepository.getHaplogroupByName(haplogroupName, haplogroupType) - rootHaplogroup = rootHaplogroupOpt.getOrElse(throw new IllegalArgumentException(s"Haplogroup $haplogroupName not found")) + resolvedHaplogroupName <- resolveHaplogroupByNameOrVariant(haplogroupQuery, haplogroupType) + rootHaplogroupOpt <- coreRepository.getHaplogroupByName(resolvedHaplogroupName, haplogroupType) + rootHaplogroup = rootHaplogroupOpt.getOrElse(throw new IllegalArgumentException(s"Haplogroup $resolvedHaplogroupName not found after variant lookup")) ancestors <- coreRepository.getAncestors(rootHaplogroup.id.get) crumbs = buildCrumbs(ancestors, haplogroupType, routeType) @@ -65,15 +51,39 @@ class HaplogroupTreeService @Inject()( ) } - /** - * Returns the route for a given combination of haplogroup type and route type. - * - * @param name The name of the haplogroup. - * @param haplogroupType The type of haplogroup, representing Y-DNA or mtDNA. - * @param routeType The type of route, representing fragment or API endpoints. - * @return A `Call` object representing the constructed route for the specified parameters. + * Resolves a haplogroup name by either direct lookup or by finding a variant. */ + private def resolveHaplogroupByNameOrVariant(query: String, haplogroupType: HaplogroupType): Future[String] = { + coreRepository.getHaplogroupByName(query, haplogroupType).flatMap { + case Some(haplogroup) => Future.successful(haplogroup.name) + case None => + // Haplogroup not found by direct name, try searching by variant + logger.debug(s"Haplogroup '$query' not found by direct name. Attempting variant lookup.") + val normalizedQuery = normalizeVariantId(query) + variantRepository.findVariants(normalizedQuery).flatMap { + case variants if variants.nonEmpty => + // Found variants, now find their defining haplogroups + val variantIds = variants.flatMap(_.variantId).map(_.toString) + Future.sequence(variantIds.map(vid => variantRepository.findHaplogroupsByDefiningVariant(vid, haplogroupType))).map { + haplogroupLists => + val definingHaplogroups = haplogroupLists.flatten + definingHaplogroups.sortBy(_.validFrom).lastOption match { + case Some(latestHaplogroup) => + logger.info(s"Resolved variant '$query' to haplogroup '${latestHaplogroup.name}'.") + latestHaplogroup.name + case None => + logger.warn(s"Variant '$query' found, but no defining haplogroups for type $haplogroupType.") + throw new IllegalArgumentException(s"Haplogroup or variant '$query' not found") + } + } + case _ => + logger.debug(s"Variant '$query' not found.") + Future.failed(new IllegalArgumentException(s"Haplogroup or variant '$query' not found")) + } + } + } + private def getRoute(name: String, haplogroupType: HaplogroupType, routeType: RouteType): Call = { (haplogroupType, routeType) match { case (Y, FragmentRoute) => controllers.routes.TreeController.yTreeFragment(Some(name)) @@ -83,14 +93,6 @@ class HaplogroupTreeService @Inject()( } } - /** - * Constructs a list of breadcrumb DTOs based on the provided haplogroups, haplogroup type, and route type. - * - * @param haplogroups A sequence of haplogroups used to generate breadcrumb data. - * @param haplogroupType The type of haplogroups (e.g., Y-DNA or mtDNA) to use in the breadcrumb context. - * @param routeType The type of route (e.g., fragment or API endpoint) to create for breadcrumb navigation. - * @return A list of `CrumbDTO` objects representing the breadcrumbs for the provided parameters. - */ private def buildCrumbs(haplogroups: Seq[Haplogroup], haplogroupType: HaplogroupType, routeType: RouteType): List[CrumbDTO] = { haplogroups.map { haplogroup => CrumbDTO( @@ -100,29 +102,14 @@ class HaplogroupTreeService @Inject()( }.toList } - /** - * Recursively builds a `TreeNodeDTO` representation of a haplogroup and its subtree. - * - * This method constructs a tree structure for a given haplogroup by retrieving its associated variants and - * processing its child haplogroups. The result is encapsulated in a `TreeNodeDTO` object, which contains - * information about the haplogroup name, variants, children, last update timestamp, and whether it belongs - * to the backbone structure. - * - * @param haplogroup The `Haplogroup` object for which the subtree is being built. This contains metadata - * such as the haplogroup's name, lineage, and additional information. - * @return A `Future` containing the constructed `TreeNodeDTO`, which includes the haplogroup's metadata, - * associated variants, and recursive child tree nodes. + * Recursively builds a TreeNodeDTO representation of a haplogroup and its subtree. */ private def buildSubtree(haplogroup: Haplogroup): Future[TreeNodeDTO] = { for { - // Get variants for this haplogroup + // Get variants for this haplogroup (now returns Seq[VariantV2]) variants <- variantRepository.getHaplogroupVariants(haplogroup.id.get) - - // Fetch aliases for all variants in batch - variantIds = variants.flatMap(_._1.variantId) - aliasMap <- aliasRepository.findByVariantIds(variantIds) - variantDTOs = mapVariants(variants, aliasMap) + variantDTOs = mapVariants(variants) // Get and process children children <- coreRepository.getDirectChildren(haplogroup.id.get) @@ -156,56 +143,75 @@ class HaplogroupTreeService @Inject()( ) } - private def mapVariants(variants: Seq[(Variant, GenbankContig)], aliasMap: Map[Int, Seq[VariantAlias]] = Map.empty) = { - variants.map { case (variant, contig) => - // Convert aliases to Map[String, Seq[String]] grouped by type - val aliases = variant.variantId - .flatMap(id => aliasMap.get(id)) - .getOrElse(Seq.empty) - .groupBy(_.aliasType) - .map { case (aliasType, typeAliases) => aliasType -> typeAliases.map(_.aliasValue) } + /** + * Maps VariantV2 instances to VariantDTO. + * With VariantV2, aliases and coordinates are embedded in JSONB. + */ + private def mapVariants(variants: Seq[VariantV2]): Seq[VariantDTO] = { + variants.map { variant => + // Extract coordinates from JSONB + val coordinates = extractCoordinates(variant) - // Format coordinate key as "RefGenome CommonName" (e.g., "GRCh38 chrY") - val coordKey = formatCoordinateKey(contig) + // Extract aliases from JSONB + val aliases = extractAliases(variant) VariantDTO( - name = variant.commonName.getOrElse(s"${contig.commonName.getOrElse(contig.accession)}:${variant.position}"), - coordinates = Map( - coordKey -> GenomicCoordinate( - variant.position, - variant.position, - variant.referenceAllele, - variant.alternateAllele - ) - ), - variantType = variant.variantType, + name = variant.displayName, + coordinates = coordinates, + variantType = variant.mutationType.dbValue, aliases = aliases ) } } - private def formatCoordinateKey(contig: GenbankContig): String = { - val refGenome = contig.referenceGenome.map(shortRefGenome).getOrElse("Unknown") - val name = contig.commonName.getOrElse(contig.accession) - s"$name [$refGenome]" + /** + * Extract coordinates from VariantV2 JSONB into Map[String, GenomicCoordinate] + */ + private def extractCoordinates(variant: VariantV2): Map[String, GenomicCoordinate] = { + variant.coordinates.asOpt[Map[String, JsObject]].map { coordsMap => + coordsMap.flatMap { case (refGenome, coords) => + for { + contig <- (coords \ "contig").asOpt[String] + position <- (coords \ "position").asOpt[Int] + ref <- (coords \ "ref").asOpt[String] + alt <- (coords \ "alt").asOpt[String] + } yield { + val coordKey = s"$contig [${shortRefGenome(refGenome)}]" + coordKey -> GenomicCoordinate( + start = position, + stop = position, + anc = ref, + der = alt + ) + } + } + }.getOrElse(Map.empty) + } + + /** + * Extract aliases from VariantV2 JSONB into Map[String, Seq[String]] + */ + private def extractAliases(variant: VariantV2): Map[String, Seq[String]] = { + val aliases = variant.aliases + val rsIds = (aliases \ "rs_ids").asOpt[Seq[String]].getOrElse(Seq.empty) + val commonNames = (aliases \ "common_names").asOpt[Seq[String]].getOrElse(Seq.empty) + + Map( + "rsId" -> rsIds, + "commonName" -> commonNames + ).filter(_._2.nonEmpty) } private def shortRefGenome(ref: String): String = ref match { case r if r.contains("GRCh37") || r.contains("hg19") => "b37" case r if r.contains("GRCh38") || r.contains("hg38") => "b38" - case r if r.contains("T2T") || r.contains("CHM13") || r.contains("hs1") => "hs1" + case r if r.contains("T2T") || r.contains("CHM13") || r == "hs1" => "hs1" case other => other } /** * Builds a TreeDTO representation by constructing a haplogroup tree structure * for the haplogroup(s) defined by the given genetic variant. - * - * @param variantId The identifier of the genetic variant defining one or more haplogroups. - * @param haplogroupType The type of haplogroup (e.g., Y-DNA or mtDNA) to be processed. - * @param routeType The type of route to construct for breadcrumb navigation in the tree. - * @return A Future containing an Option of TreeDTO. The Option will contain the TreeDTO if - * a corresponding haplogroup is found; otherwise, it will be None. */ def buildTreeFromVariant(variantId: String, haplogroupType: HaplogroupType, routeType: RouteType): Future[Option[TreeDTO]] = { for { @@ -213,7 +219,6 @@ class HaplogroupTreeService @Inject()( haplogroups <- variantRepository.findHaplogroupsByDefiningVariant(variantId, haplogroupType) // If we found any haplogroups, build the tree from the most recent one - // (assuming more recent haplogroups are more specific/detailed) treeOpt <- haplogroups.sortBy(_.validFrom).lastOption match { case Some(haplogroup) => buildTreeResponse(haplogroup.name, haplogroupType, routeType).map(Some(_)) case None => Future.successful(None) @@ -224,12 +229,6 @@ class HaplogroupTreeService @Inject()( /** * Constructs a sequence of TreeDTO objects representing tree structures for all haplogroups * associated with a specific genetic variant. - * - * @param variantId The identifier of the genetic variant used to find associated haplogroups. - * @param haplogroupType The type of haplogroup (e.g., Y-DNA or mtDNA) being processed. - * @param routeType The type of route to construct for navigational purposes. - * @return A Future containing a sequence of TreeDTO objects, where each represents the tree structure - * for a haplogroup associated with the provided variant. */ def buildTreesFromVariant(variantId: String, haplogroupType: HaplogroupType, routeType: RouteType): Future[Seq[TreeDTO]] = { for { @@ -261,12 +260,6 @@ class HaplogroupTreeService @Inject()( /** * Finds and retrieves haplogroup details with all associated genomic variants. - * - * This method fetches the haplogroup (including provenance) and its linked variants. - * - * @param haplogroupName The name of the haplogroup for which details are to be retrieved. - * @param haplogroupType The type of haplogroup (e.g., Y-DNA or mtDNA). - * @return A Future containing a tuple of (Option[Haplogroup], Seq[VariantDTO]). */ def findHaplogroupWithVariants(haplogroupName: String, haplogroupType: HaplogroupType): Future[(Option[Haplogroup], Seq[VariantDTO])] = { for { @@ -277,63 +270,18 @@ class HaplogroupTreeService @Inject()( /** * Finds and retrieves all genomic variants associated with a specified haplogroup. - * - * This method fetches the variants linked to a haplogroup identified by its name and type. - * It interacts with the core repository to locate the haplogroup and then queries the variant repository - * to obtain the list of associated variants, which are finally converted into `VariantDTO` objects. - * - * @param haplogroupName The name of the haplogroup for which variants are to be retrieved. - * @param haplogroupType The type of haplogroup (e.g., Y-DNA or mtDNA). - * @return A Future containing a sequence of `VariantDTO` objects representing the variants - * associated with the specified haplogroup. If the haplogroup is not found, the sequence will be empty. + * Now uses VariantV2 with embedded aliases in JSONB. */ def findVariantsForHaplogroup(haplogroupName: String, haplogroupType: HaplogroupType): Future[Seq[VariantDTO]] = { - val sortedVariantsFuture: Future[Seq[VariantDTO]] = for { + for { haplogroup <- coreRepository.getHaplogroupByName(haplogroupName, haplogroupType) variants <- variantRepository.getHaplogroupVariants(haplogroup.flatMap(_.id).getOrElse(0)) - variantIds = variants.flatMap(_._1.variantId) - aliasMap <- aliasRepository.findByVariantIds(variantIds) - } yield TreeNodeDTO.sortVariants(mapVariants(variants, aliasMap)) - - sortedVariantsFuture.map { sortedVariants => - val grouped = sortedVariants - .groupBy(dto => dto.name) - .map { case (k, locations) => - val first = locations.head - - // Combine the coordinates for all VariantDTOs in this group - val coordinates: Seq[Map[String, GenomicCoordinate]] = locations.map(dto => dto.coordinates) - val combined: Map[String, GenomicCoordinate] = coordinates.foldLeft(Map.empty[String, GenomicCoordinate]) { - case (acc, currentMap) => acc ++ currentMap - } - - // Combine aliases from all VariantDTOs in this group - val combinedAliases: Map[String, Seq[String]] = locations - .flatMap(_.aliases.toSeq) - .groupBy(_._1) - .map { case (aliasType, pairs) => aliasType -> pairs.flatMap(_._2).distinct } - - // Create a new VariantDTO for the combined result - VariantDTO(first.name, combined, first.variantType, combinedAliases) - }.toSeq - - TreeNodeDTO.sortVariants(grouped) + } yield { + val variantDTOs = mapVariants(variants) + TreeNodeDTO.sortVariants(variantDTOs) } } - /** - * Normalizes the given variant identifier by formatting it consistently based on its structure. - * - * The method supports the following formats: - * - rsID (e.g., rs1234): Returned as-is, converted to lowercase. - * - chr:pos (e.g., Y:2728456): Returned in the same structure after conversion to lowercase. - * - chr:pos:ref:alt (e.g., Y:2728456:A:G): Returned in the same structure after conversion to lowercase. - * - * Any unrecognized format is returned unchanged after trimming and converting to lowercase. - * - * @param query The genetic variant identifier to be normalized. It may be in rsID, chr:pos, or chr:pos:ref:alt format. - * @return The normalized variant identifier, based on the recognized format. - */ private def normalizeVariantId(query: String): String = { query.trim.toLowerCase match { case rsid if rsid.startsWith("rs") => rsid @@ -349,12 +297,7 @@ class HaplogroupTreeService @Inject()( } /** - * Transforms a recursive tree structure of `TreeNodeDTO` into a flat sequence of `SubcladeDTO` - * suitable for API responses. This flattens the hierarchical data into a list where each subclade - * explicitly references its parent. - * - * @param root An `Option` containing the root `TreeNodeDTO` of the tree to be transformed. - * @return A `Seq` of `SubcladeDTO` representing the flattened tree structure. + * Transforms a recursive tree structure of TreeNodeDTO into a flat sequence of SubcladeDTO. */ def mapApiResponse(root: Option[TreeNodeDTO]): Seq[SubcladeDTO] = { def map(node: TreeNodeDTO, parent: Option[TreeNodeDTO]): Seq[SubcladeDTO] = { @@ -364,4 +307,4 @@ class HaplogroupTreeService @Inject()( root.map(x => map(x, None)) .getOrElse(Seq()) } -} \ No newline at end of file +} diff --git a/app/services/TreeImporter.scala b/app/services/TreeImporter.scala index b1438ff..cfb9fe9 100644 --- a/app/services/TreeImporter.scala +++ b/app/services/TreeImporter.scala @@ -2,9 +2,10 @@ package services import models.* import models.api.{TreeDTO, TreeNodeDTO, VariantDTO} -import models.dal.domain.genomics.Variant +import models.domain.genomics.{MutationType, NamingStatus, VariantV2} import models.domain.haplogroups.{Haplogroup, HaplogroupRelationship, HaplogroupVariantMetadata, RelationshipRevisionMetadata} import play.api.Logging +import play.api.libs.json.Json import repositories.* import java.time.LocalDateTime @@ -13,67 +14,41 @@ import scala.concurrent.{ExecutionContext, Future} /** * Configuration class for tree import settings. - * - * This class represents the configurable parameters used during the process of - * importing tree data. These parameters include information about the author of - * the import, the source of the data, and confidence levels for the imported data. - * - * @param initialAuthor The identifier for the author of the import. Defaults to "system". - * @param source The source of the imported tree data. Defaults to "initial_import". - * @param defaultConfidenceLevel The confidence level to assign to non-backbone data during import. Defaults to "MEDIUM". - * @param backboneConfidenceLevel The confidence level to assign to backbone data during import. Defaults to "HIGH". */ case class TreeImportSettings( - initialAuthor: String = "system", - source: String = "initial_import", - defaultConfidenceLevel: String = "MEDIUM", - backboneConfidenceLevel: String = "HIGH" - ) - + initialAuthor: String = "system", + source: String = "initial_import", + defaultConfidenceLevel: String = "MEDIUM", + backboneConfidenceLevel: String = "HIGH" +) /** * Class responsible for importing and processing phylogenetic tree data into the system, * including haplogroup information, relationships, and variant associations. - * - * @constructor Creates a new instance of TreeImporter with the required repositories and configuration. - * @param haplogroupRevisionRepository Repository for managing haplogroup and revision data. - * @param haplogroupRelationshipRepository Repository for managing haplogroup relationships. - * @param haplogroupVariantRepository Repository for managing haplogroup-variant associations. - * @param haplogroupVariantMetadataRepository Repository for managing metadata for haplogroup-variant revisions. - * @param haplogroupRevisionMetadataRepository Repository for managing metadata for haplogroup revisions. - * @param genbankContigRepository Repository for accessing GenBank contig data. - * @param variantRepository Repository for managing variant data. - * @param config Configuration related to tree importing, such as source and confidence levels. - * @param ec Implicit execution context for managing asynchronous computations. */ class TreeImporter @Inject()( - haplogroupRevisionRepository: HaplogroupRevisionRepository, - haplogroupRelationshipRepository: HaplogroupRelationshipRepository, - haplogroupVariantRepository: HaplogroupVariantRepository, - haplogroupVariantMetadataRepository: HaplogroupVariantMetadataRepository, - haplogroupRevisionMetadataRepository: HaplogroupRevisionMetadataRepository, - genbankContigRepository: GenbankContigRepository, - variantRepository: VariantRepository - )(implicit ec: ExecutionContext) extends Logging { + haplogroupRevisionRepository: HaplogroupRevisionRepository, + haplogroupRelationshipRepository: HaplogroupRelationshipRepository, + haplogroupVariantRepository: HaplogroupVariantRepository, + haplogroupVariantMetadataRepository: HaplogroupVariantMetadataRepository, + haplogroupRevisionMetadataRepository: HaplogroupRevisionMetadataRepository, + genbankContigRepository: GenbankContigRepository, + variantV2Repository: VariantV2Repository +)(implicit ec: ExecutionContext) extends Logging { private val defaultSettings = TreeImportSettings() - /** * Imports a tree structure into the system by recursively processing its nodes, * creating haplogroups, relationships, and variants. - * - * @param tree The tree structure to be imported, represented as a `TreeDTO`. - * @param haplogroupType The type of haplogroup classification to apply (e.g., paternal or maternal lineage). - * @return A `Future` that completes when the tree has been successfully imported, or fails in case of an error. */ def importTree(tree: TreeDTO, haplogroupType: HaplogroupType)(implicit settings: TreeImportSettings = defaultSettings): Future[Unit] = { val timestamp = LocalDateTime.now() def processNode( - node: TreeNodeDTO, - parentId: Option[Int] = None, - depth: Int = 0 - ): Future[Int] = { + node: TreeNodeDTO, + parentId: Option[Int] = None, + depth: Int = 0 + ): Future[Int] = { for { // 1. Create the haplogroup haplogroupId <- createHaplogroup(node, haplogroupType, timestamp) @@ -102,59 +77,42 @@ class TreeImporter @Inject()( } /** - * Creates a new haplogroup entry based on the provided tree node and its attributes. - * - * This method constructs a Haplogroup entity using the data from the given TreeNodeDTO, - * including its name, backbone flag, and the specified haplogroup type. It also assigns - * a confidence level and validity timestamps. The constructed haplogroup is then passed - * to the repository to create a new revision, which is persisted to the database. - * - * @param node The TreeNodeDTO representing the hierarchical structure and metadata for the haplogroup. - * @param haplogroupType The type of haplogroup classification, e.g., paternal (Y) or maternal (MT). - * @param timestamp The timestamp indicating the creation or validity start time for the haplogroup. - * @return A Future containing the unique integer identifier of the newly created haplogroup revision. + * Creates a haplogroup entity with associated revision metadata. */ private def createHaplogroup( - node: TreeNodeDTO, - haplogroupType: HaplogroupType, - timestamp: LocalDateTime - )(implicit settings: TreeImportSettings): Future[Int] = { + node: TreeNodeDTO, + haplogroupType: HaplogroupType, + timestamp: LocalDateTime + )(implicit settings: TreeImportSettings): Future[Int] = { + logger.debug(s"Creating haplogroup: ${node.name}") val haplogroup = Haplogroup( id = None, name = node.name, lineage = None, - description = None, + description = None, // TreeNodeDTO doesn't have description haplogroupType = haplogroupType, revisionId = 1, source = settings.source, - confidenceLevel = if (node.isBackbone) settings.backboneConfidenceLevel else settings.defaultConfidenceLevel, + confidenceLevel = settings.defaultConfidenceLevel, validFrom = timestamp, validUntil = None ) - logger.debug(s"Creating new haplogroup revision for ${node.name}") - haplogroupRevisionRepository.createNewRevision(haplogroup).map { id => - logger.debug(s"Created haplogroup with ID: $id for ${node.name}") - id - } - } + val revisionComment = s"Created during tree import from source: ${settings.source}" + // Create the haplogroup revision + // Note: Haplogroup revision metadata is not currently tracked separately + haplogroupRevisionRepository.createNewRevision(haplogroup) + } /** - * Creates a relationship between a parent haplogroup and a child haplogroup, - * with associated metadata for tracking revisions. - * - * @param parentId The unique identifier of the parent haplogroup. - * @param childId The unique identifier of the child haplogroup. - * @param timestamp The timestamp indicating when the relationship was created or became valid. - * @return A Future containing Unit, which completes when the relationship and its metadata - * are successfully created, or fails with an exception if an error occurs. + * Creates a relationship between parent and child haplogroups. */ private def createRelationship( - parentId: Int, - childId: Int, - timestamp: LocalDateTime - )(implicit settings: TreeImportSettings): Future[Unit] = { + parentId: Int, + childId: Int, + timestamp: LocalDateTime + )(implicit settings: TreeImportSettings): Future[Unit] = { logger.debug(s"Creating relationship: parent=$parentId -> child=$childId") val relationship = HaplogroupRelationship( id = None, @@ -185,71 +143,100 @@ class TreeImporter @Inject()( } /** - * Creates or retrieves genetic variants, associates them with a specific haplogroup, - * and stores the related metadata for tracking changes. - * - * @param variants A sequence of `VariantDTO` instances representing the genomic variants to be processed. - * @param haplogroupId The unique identifier of the haplogroup to associate the variants with. - * @param timestamp The timestamp indicating the moment of creation or update for the variants. - * @return A `Future` containing `Unit` that completes once all variants are processed and associated, - * or fails with an exception if an error occurs during execution. + * Creates or retrieves genetic variants and associates them with a haplogroup. + * Now uses VariantV2 with JSONB coordinates. */ private def createVariants( - variants: Seq[VariantDTO], - haplogroupId: Int, - timestamp: LocalDateTime - )(implicit settings: TreeImportSettings): Future[Unit] = { + variants: Seq[VariantDTO], + haplogroupId: Int, + timestamp: LocalDateTime + )(implicit settings: TreeImportSettings): Future[Unit] = { logger.debug(s"Starting to process ${variants.size} variants for haplogroup $haplogroupId") - // Convert DTOs to Variant entities - val variantEntities = variants.flatMap { v => - v.coordinates.map { case (contigAccession, coord) => - (contigAccession, Variant( - variantId = None, - genbankContigId = 0, - position = coord.start, - referenceAllele = coord.anc, - alternateAllele = coord.der, - variantType = v.variantType, - rsId = Some(v.name).filter(_.startsWith("rs")), - commonName = Some(v.name).filterNot(_.startsWith("rs")) - )) + // Process variants sequentially to avoid overwhelming the connection pool + variants.grouped(100).toSeq.foldLeft(Future.successful(())) { case (prevFuture, batch) => + prevFuture.flatMap { _ => + for { + // Create/find variants and get their IDs + variantIds <- Future.traverse(batch) { variantDto => + createOrFindVariant(variantDto) + } + // Associate variants with haplogroup + _ <- Future.traverse(variantIds.flatten) { variantId => + createVariantAssociation(haplogroupId, variantId, timestamp) + } + } yield () } } + } - // Group by contig accession for efficient processing - val groupedVariants = variantEntities.groupBy(_._1) + /** + * Creates a new VariantV2 or finds an existing one. + */ + private def createOrFindVariant(variantDto: VariantDTO): Future[Option[Int]] = { + // Build coordinates JSONB from DTO + val coordinatesJson = variantDto.coordinates.foldLeft(Json.obj()) { case (acc, (contigAccession, coord)) => + // For now, use accession as the reference genome key + // In a real implementation, we'd map accession to reference genome name (GRCh38, hs1, etc.) + acc + (contigAccession -> Json.obj( + "contig" -> contigAccession, + "position" -> coord.start, + "ref" -> coord.anc, + "alt" -> coord.der + )) + } - // Process groups sequentially to avoid overwhelming the connection pool - groupedVariants.toSeq.foldLeft(Future.successful(())) { case (prevFuture, (contigAccession, variants)) => - prevFuture.flatMap { _ => - // Process each batch of 100 variants - variants.grouped(100).foldLeft(Future.successful(())) { case (batchFuture, batch) => - batchFuture.flatMap { _ => - for { - contig <- genbankContigRepository.findByAccession(contigAccession).flatMap { - case Some(c) => Future.successful(c) - case None => Future.failed(new RuntimeException(s"GenBank contig not found: $contigAccession")) - } - variantsWithContig = batch.map(_._2.copy(genbankContigId = contig.id.get)) - variantIds <- variantRepository.findOrCreateVariantsBatch(variantsWithContig) - _ <- variantIds.foldLeft(Future.successful(())) { case (f, variantId) => - f.flatMap(_ => createVariantAssociation(haplogroupId, variantId, timestamp).map(_ => ())) - } - } yield () + // Determine canonical name and rsId + val isRsId = variantDto.name.startsWith("rs") + val canonicalName = if (isRsId) None else Some(variantDto.name) + val rsIds = if (isRsId) Seq(variantDto.name) else Seq.empty + + // Build aliases JSONB + val aliasesJson = Json.obj( + "common_names" -> Seq.empty[String], + "rs_ids" -> rsIds, + "sources" -> Json.obj("import" -> Seq(variantDto.name)) + ) + + val variant = VariantV2( + variantId = None, + canonicalName = canonicalName, + mutationType = MutationType.fromStringOrDefault(variantDto.variantType), + namingStatus = if (canonicalName.isDefined) NamingStatus.Named else NamingStatus.Unnamed, + aliases = aliasesJson, + coordinates = coordinatesJson, + definingHaplogroupId = None, + evidence = Json.obj(), + primers = Json.obj(), + notes = None + ) + + // Try to find existing variant by name, otherwise create + canonicalName match { + case Some(name) => + variantV2Repository.findByCanonicalName(name).flatMap { + case Some(existing) => Future.successful(existing.variantId) + case None => variantV2Repository.create(variant).map(Some(_)) + } + case None if rsIds.nonEmpty => + variantV2Repository.findByAlias(rsIds.head).flatMap { existingVariants => + existingVariants.headOption match { + case Some(existing) => Future.successful(existing.variantId) + case None => variantV2Repository.create(variant).map(Some(_)) } } - } + case None => + variantV2Repository.create(variant).map(Some(_)) } } import scala.util.control.NonFatal private def createVariantAssociation( - haplogroupId: Int, - variantId: Int, - timestamp: LocalDateTime - )(implicit settings: TreeImportSettings): Future[Int] = { + haplogroupId: Int, + variantId: Int, + timestamp: LocalDateTime + )(implicit settings: TreeImportSettings): Future[Int] = { (for { // Create the haplogroup-variant association assocId <- haplogroupVariantRepository.addVariantToHaplogroup(haplogroupId, variantId) @@ -272,5 +259,4 @@ class TreeImporter @Inject()( 0 } } - -} \ No newline at end of file +} diff --git a/app/services/TreeLayoutService.scala b/app/services/TreeLayoutService.scala index 90417c8..3e3c891 100644 --- a/app/services/TreeLayoutService.scala +++ b/app/services/TreeLayoutService.scala @@ -6,6 +6,9 @@ import models.view.{TreeLinkViewModel, TreeNodeViewModel, TreeViewModel} import java.time.ZonedDateTime import java.time.temporal.ChronoUnit +enum TreeOrientation: + case Horizontal, Vertical + /** * Provides services for laying out a tree structure for rendering, including calculating coordinates, * determining node connections, and styling nodes and links based on their properties. @@ -15,13 +18,11 @@ object TreeLayoutService { // Configuration for layout private val NODE_WIDTH = 150.0 private val NODE_HEIGHT = 80.0 - private val HORIZONTAL_SPACING = 200.0 // Distance between levels (depths) - private val VERTICAL_NODE_SPACING = 90.0 // Minimum vertical space between sibling nodes (node height + gap) private val MARGIN_TOP = 50.0 - private val MARGIN_LEFT = 120.0 // Left margin for the root node + private val MARGIN_LEFT = 120.0 - // Helper to store mutable state during layout traversal (unchanged) - private var currentYPosition: Double = MARGIN_TOP + // Helper to store mutable state during layout traversal (breadth tracking) + private var currentBreadthPosition: Double = 0.0 /** * Transforms a TreeDTO into a TreeViewModel with calculated coordinates, link paths, and node colors. @@ -30,19 +31,35 @@ object TreeLayoutService { * * @param treeDto The TreeDTO representing the tree to be laid out. treeDto.subclade is the root of the currently displayed tree. * @param isAbsoluteTopRoot True ONLY if the current display root (treeDto.subclade) is the actual top-most root of the entire system (e.g., "Y"). + * @param orientation The orientation of the tree (Horizontal or Vertical). Defaults to Horizontal. * @return An Option containing the TreeViewModel if a subclade exists. */ - def layoutTree(treeDto: TreeDTO, isAbsoluteTopRoot: Boolean): Option[TreeViewModel] = { + def layoutTree(treeDto: TreeDTO, isAbsoluteTopRoot: Boolean, orientation: TreeOrientation = TreeOrientation.Horizontal): Option[TreeViewModel] = { val oneYearAgo = ZonedDateTime.now().minus(1, ChronoUnit.YEARS) + + // Determine spacing based on orientation + // Horizontal: Depth is X (Level), Breadth is Y (Stack). Nodes are 80px high. + // Vertical: Depth is Y (Level), Breadth is X (Row). Nodes are 150px wide. + val (depthSpacing, breadthSpacing) = orientation match { + case TreeOrientation.Horizontal => (200.0, 90.0) + case TreeOrientation.Vertical => (130.0, 180.0) + } treeDto.subclade.map { currentDisplayRootDTO => - currentYPosition = MARGIN_TOP + // Reset breadth tracker + currentBreadthPosition = orientation match { + case TreeOrientation.Horizontal => MARGIN_TOP + case TreeOrientation.Vertical => MARGIN_LEFT + } val allNodes = collection.mutable.ListBuffer[TreeNodeViewModel]() val allLinks = collection.mutable.ListBuffer[TreeLinkViewModel]() def calculateNodePositions(nodeDTO: TreeNodeDTO, depth: Int, isCurrentDisplayRoot: Boolean): TreeNodeViewModel = { - val y = depth * HORIZONTAL_SPACING + MARGIN_LEFT + // Depth Position (Level) + // Horizontal: Left-to-Right axis (svg x). + // Vertical: Top-to-Bottom axis (svg y). + val depthPos = depth * depthSpacing + (if (orientation == TreeOrientation.Horizontal) MARGIN_LEFT else MARGIN_TOP) val isRecentlyUpdated = nodeDTO.updated.isAfter(oneYearAgo) @@ -66,17 +83,25 @@ object TreeLayoutService { calculateNodePositions(childDTO, depth + 1, false) } - // Determine X position (vertical in SVG): - val x = if (childViewModels.isEmpty) { - val assignedX = currentYPosition - currentYPosition += VERTICAL_NODE_SPACING - assignedX + // Breadth Position (Stack/Row) + // Horizontal: Top-to-Bottom axis (svg y). + // Vertical: Left-to-Right axis (svg x). + val breadthPos = if (childViewModels.isEmpty) { + val assigned = currentBreadthPosition + currentBreadthPosition += breadthSpacing + assigned } else { - val firstChildX = childViewModels.head.x - val lastChildX = childViewModels.last.x - (firstChildX + lastChildX) / 2 + val firstChild = childViewModels.head + val lastChild = childViewModels.last + // Children store: x = breadth, y = depth. + (firstChild.x + lastChild.x) / 2 } + // Store in ViewModel: + // x = Breadth (Vertical pos in Horizontal layout; Horizontal pos in Vertical layout) + // y = Depth (Horizontal pos in Horizontal layout; Vertical pos in Vertical layout) + // This naming is confusing but preserved for backward compatibility with haplogroup.scala.html + // haplogroup.scala.html expects: x="@(node.y...)" (Depth->SVG X), y="@(node.x...)" (Breadth->SVG Y) val nodeViewModel = TreeNodeViewModel( name = nodeDTO.name, variantsCount = nodeDTO.variantCount, @@ -86,23 +111,61 @@ object TreeLayoutService { isRecentlyUpdated = isRecentlyUpdated, formedYbp = nodeDTO.formedYbp, tmrcaYbp = nodeDTO.tmrcaYbp, - x = x, - y = y + x = breadthPos, // Breadth + y = depthPos // Depth ) allNodes += nodeViewModel childViewModels.foreach { child => - val sourceX = nodeViewModel.x - val sourceY = nodeViewModel.y + NODE_WIDTH / 2 - - val targetX = child.x - val targetY = child.y - NODE_WIDTH / 2 - - // Generate stepped path data - val pathData = s"M $sourceY $sourceX " + - s"H ${(sourceY + targetY) / 2} " + - s"V $targetX " + - s"H $targetY" + // Generate path data based on orientation + val pathData = orientation match { + case TreeOrientation.Horizontal => + // Standard Layout (Left-to-Right) + // SVG X = Depth (y), SVG Y = Breadth (x) + // Source: (node.y, node.x) + // Target: (child.y, child.x) + // Exit Right side of Source: (sourceY + WIDTH/2, sourceX) ?? + // Wait, previous code used (sourceY + WIDTH/2). + // Actually haplogroup.scala.html: rect x = node.y - 75. Center = node.y. + // So right edge = node.y + 75. + // Let's assume the previous logic: s"M $sourceY $sourceX" was using Center coordinates. + // If node.y is Center X, node.x is Center Y. + + val sourceDepth = nodeViewModel.y + NODE_WIDTH / 2 + val sourceBreadth = nodeViewModel.x + + val targetDepth = child.y - NODE_WIDTH / 2 + val targetBreadth = child.x + + // M (Depth) (Breadth) -> M x y + s"M $sourceDepth $sourceBreadth " + + s"H ${(sourceDepth + targetDepth) / 2} " + + s"V $targetBreadth " + + s"H $targetDepth" + + case TreeOrientation.Vertical => + // Block Layout (Top-to-Bottom) + // SVG X = Breadth (x), SVG Y = Depth (y) + // Source: (node.x, node.y) + // Target: (child.x, child.y) + // Exit Bottom of Source: (sourceX, sourceY + HEIGHT/2) + // haplogroup.scala.html: rect y = node.x - 40. Center = node.x ?? NO. + // In Vertical, node.x is Breadth (SVG X). Rect x = node.x - 75. Center = node.x. + // node.y is Depth (SVG Y). Rect y = node.y - 40. Center = node.y. + // Height is 80. Bottom = node.y + 40. + + val sourceBreadth = nodeViewModel.x + val sourceDepth = nodeViewModel.y + NODE_HEIGHT / 2 + + val targetBreadth = child.x + val targetDepth = child.y - NODE_HEIGHT / 2 + + // M (Breadth) (Depth) -> M x y + s"M $sourceBreadth $sourceDepth " + + s"V ${(sourceDepth + targetDepth) / 2} " + + s"H $targetBreadth " + + s"V $targetDepth" + } allLinks += TreeLinkViewModel(nodeViewModel.name, child.name, pathData) } @@ -112,11 +175,19 @@ object TreeLayoutService { val rootViewModel = calculateNodePositions(currentDisplayRootDTO, 0, true) - val maxX = allNodes.map(_.x).maxOption.getOrElse(0.0) - val maxY = allNodes.map(_.y).maxOption.getOrElse(0.0) - - val svgWidth = maxY + NODE_WIDTH + MARGIN_LEFT * 2 - val svgHeight = maxX + NODE_HEIGHT + MARGIN_TOP * 2 + // Calculate SVG dimensions + // x = Breadth, y = Depth + val maxBreadth = allNodes.map(_.x).maxOption.getOrElse(0.0) + val maxDepth = allNodes.map(_.y).maxOption.getOrElse(0.0) + + val (svgWidth, svgHeight) = orientation match { + case TreeOrientation.Horizontal => + // Width = Depth, Height = Breadth + (maxDepth + NODE_WIDTH + MARGIN_LEFT, maxBreadth + NODE_HEIGHT + MARGIN_TOP) + case TreeOrientation.Vertical => + // Width = Breadth, Height = Depth + (maxBreadth + NODE_WIDTH + MARGIN_LEFT, maxDepth + NODE_HEIGHT + MARGIN_TOP) + } TreeViewModel(rootViewModel, allNodes.toList, allLinks.toList, svgWidth, svgHeight) } diff --git a/app/services/TreeRestructuringService.scala b/app/services/TreeRestructuringService.scala index 722917a..df7bef9 100644 --- a/app/services/TreeRestructuringService.scala +++ b/app/services/TreeRestructuringService.scala @@ -1,10 +1,10 @@ package services import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.VariantGroup +import models.domain.genomics.VariantV2 import models.domain.haplogroups.Haplogroup import play.api.Logging -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantRepository} +import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantV2Repository} import java.util.UUID import scala.concurrent.{ExecutionContext, Future} @@ -16,7 +16,7 @@ import scala.concurrent.{ExecutionContext, Future} class TreeRestructuringService @Inject()( haplogroupRepository: HaplogroupCoreRepository, haplogroupVariantRepository: HaplogroupVariantRepository, - variantRepository: VariantRepository, + variantV2Repository: VariantV2Repository, auditService: CuratorAuditService )(implicit ec: ExecutionContext) extends Logging { @@ -25,7 +25,7 @@ class TreeRestructuringService @Inject()( * * @param parentId ID of the parent haplogroup * @param newHaplogroup The new subclade haplogroup to create - * @param variantGroupKeys Keys of variant groups to MOVE from parent to new child + * @param variantIds IDs of variants to MOVE from parent to new child * @param childIds IDs of existing children to re-parent under new subclade * @param userId User performing the operation * @return ID of newly created haplogroup @@ -33,7 +33,7 @@ class TreeRestructuringService @Inject()( def splitBranch( parentId: Int, newHaplogroup: Haplogroup, - variantGroupKeys: Seq[String], + variantIds: Seq[Int], childIds: Seq[Int], userId: UUID ): Future[Int] = { @@ -53,7 +53,7 @@ class TreeRestructuringService @Inject()( newId <- haplogroupRepository.createWithParent(newHaplogroup, Some(parentId), "split-operation") // Move variants from parent to new child - movedVariantCount <- moveVariants(parentId, newId, variantGroupKeys) + movedVariantCount <- moveVariants(parentId, newId, variantIds) // Re-parent selected children to the new subclade _ <- Future.traverse(childIds) { childId => @@ -90,13 +90,10 @@ class TreeRestructuringService @Inject()( // Get child's variants to move up childVariants <- haplogroupVariantRepository.getHaplogroupVariants(childId) - childVariantGroups = variantRepository.groupVariants(childVariants.map { case (v, c) => - models.domain.genomics.VariantWithContig(v, c) - }) // Get parent's existing variants to check for duplicates parentVariants <- haplogroupVariantRepository.getHaplogroupVariants(parentId) - parentVariantIds = parentVariants.map(_._1.variantId).flatten.toSet + parentVariantIds = parentVariants.flatMap(_.variantId).toSet // Move unique variants from child to parent movedVariantCount <- moveVariantsUp(childId, parentId, parentVariantIds) @@ -116,26 +113,19 @@ class TreeRestructuringService @Inject()( } /** - * Move variant groups from source haplogroup to target haplogroup. + * Move variants from source haplogroup to target haplogroup. */ - private def moveVariants(sourceId: Int, targetId: Int, groupKeys: Seq[String]): Future[Int] = { - if (groupKeys.isEmpty) { + private def moveVariants(sourceId: Int, targetId: Int, variantIds: Seq[Int]): Future[Int] = { + if (variantIds.isEmpty) { Future.successful(0) } else { - // For each group key, get all variants and move them - Future.traverse(groupKeys) { groupKey => + Future.traverse(variantIds) { variantId => for { - variants <- variantRepository.getVariantsByGroupKey(groupKey) - movedCount <- Future.traverse(variants) { vwc => - val variantId = vwc.variant.variantId.get - for { - // Remove from source - _ <- haplogroupVariantRepository.removeVariantFromHaplogroup(sourceId, variantId) - // Add to target - _ <- haplogroupVariantRepository.addVariantToHaplogroup(targetId, variantId) - } yield 1 - } - } yield movedCount.sum + // Remove from source + _ <- haplogroupVariantRepository.removeVariantFromHaplogroup(sourceId, variantId) + // Add to target + _ <- haplogroupVariantRepository.addVariantToHaplogroup(targetId, variantId) + } yield 1 }.map(_.sum) } } @@ -145,7 +135,7 @@ class TreeRestructuringService @Inject()( */ private def moveVariantsUp(childId: Int, parentId: Int, existingParentVariantIds: Set[Int]): Future[Int] = { for { - childVariants <- haplogroupVariantRepository.getVariantsByHaplogroup(childId) + childVariants <- haplogroupVariantRepository.getHaplogroupVariants(childId) childVariantIds = childVariants.flatMap(_.variantId) // Only move variants that don't already exist on parent @@ -169,11 +159,8 @@ class TreeRestructuringService @Inject()( parentOpt <- haplogroupRepository.findById(parentId) parent = parentOpt.getOrElse(throw new IllegalArgumentException(s"Parent haplogroup $parentId not found")) variants <- haplogroupVariantRepository.getHaplogroupVariants(parentId) - variantGroups = variantRepository.groupVariants(variants.map { case (v, c) => - models.domain.genomics.VariantWithContig(v, c) - }) children <- haplogroupRepository.getDirectChildren(parentId) - } yield SplitPreview(parent, variantGroups, children) + } yield SplitPreview(parent, variants, children) } /** @@ -188,21 +175,17 @@ class TreeRestructuringService @Inject()( parent = parentOpt.getOrElse(throw new IllegalArgumentException(s"Haplogroup $childId has no parent")) childVariants <- haplogroupVariantRepository.getHaplogroupVariants(childId) - childVariantGroups = variantRepository.groupVariants(childVariants.map { case (v, c) => - models.domain.genomics.VariantWithContig(v, c) - }) - grandchildren <- haplogroupRepository.getDirectChildren(childId) parentVariants <- haplogroupVariantRepository.getHaplogroupVariants(parent.id.get) - parentVariantIds = parentVariants.map(_._1.variantId).flatten.toSet + parentVariantIds = parentVariants.flatMap(_.variantId).toSet // Calculate unique variants that will be moved - uniqueVariantGroups = childVariantGroups.filter { group => - group.variantIds.exists(!parentVariantIds.contains(_)) + uniqueVariants = childVariants.filter { v => + v.variantId.exists(!parentVariantIds.contains(_)) } - } yield MergePreview(child, parent, childVariantGroups, uniqueVariantGroups, grandchildren) + } yield MergePreview(child, parent, childVariants, uniqueVariants, grandchildren) } } @@ -211,7 +194,7 @@ class TreeRestructuringService @Inject()( */ case class SplitPreview( parent: Haplogroup, - variantGroups: Seq[VariantGroup], + variants: Seq[VariantV2], children: Seq[Haplogroup] ) @@ -221,7 +204,7 @@ case class SplitPreview( case class MergePreview( child: Haplogroup, parent: Haplogroup, - allVariantGroups: Seq[VariantGroup], - uniqueVariantGroups: Seq[VariantGroup], + allVariants: Seq[VariantV2], + uniqueVariants: Seq[VariantV2], grandchildren: Seq[Haplogroup] ) diff --git a/app/services/VariantExportService.scala b/app/services/VariantExportService.scala index 42a8732..4b515b6 100644 --- a/app/services/VariantExportService.scala +++ b/app/services/VariantExportService.scala @@ -2,10 +2,10 @@ package services import jakarta.inject.{Inject, Singleton} import models.api.* -import models.domain.genomics.VariantGroup +import models.domain.genomics.VariantV2 import play.api.{Configuration, Logging} -import play.api.libs.json.Json -import repositories.{HaplogroupVariantRepository, VariantAliasRepository, VariantRepository} +import play.api.libs.json.{JsObject, Json, OFormat} +import repositories.{HaplogroupVariantRepository, VariantV2Repository} import java.io.{BufferedOutputStream, FileOutputStream, OutputStreamWriter} import java.nio.file.{Files, Path, Paths, StandardCopyOption} @@ -13,17 +13,75 @@ import java.time.{Instant, LocalDateTime, ZoneOffset} import java.util.zip.GZIPOutputStream import scala.concurrent.{ExecutionContext, Future} +/** + * Export metadata for tracking export file status. + */ +case class ExportMetadata( + generatedAt: Instant, + variantCount: Int, + fileSizeBytes: Long +) + +object ExportMetadata { + implicit val format: OFormat[ExportMetadata] = Json.format[ExportMetadata] +} + +/** + * Result of an export operation. + */ +case class ExportResult( + success: Boolean, + variantCount: Int = 0, + fileSizeBytes: Long = 0, + error: Option[String] = None, + generationTimeMs: Long = 0 +) + +object ExportResult { + implicit val format: OFormat[ExportResult] = Json.format[ExportResult] +} + +/** + * Record structure for exported variants. + */ +case class VariantExportRecord( + variantId: Int, + canonicalName: Option[String], + variantType: String, + namingStatus: String, + coordinates: Map[String, VariantCoordinateDTO], + rsIds: Seq[String], + commonNames: Seq[String] +) + +object VariantExportRecord { + implicit val format: OFormat[VariantExportRecord] = Json.format[VariantExportRecord] +} + +/** + * Coordinate information for export. + */ +case class VariantCoordinateDTO( + contig: String, + position: Int, + ref: String, + alt: String +) + +object VariantCoordinateDTO { + implicit val format: OFormat[VariantCoordinateDTO] = Json.format[VariantCoordinateDTO] +} + /** * Service for generating bulk variant export files. * Creates a gzipped JSONL file containing all variants for Edge App consumption. */ @Singleton class VariantExportService @Inject()( - variantRepository: VariantRepository, - variantAliasRepository: VariantAliasRepository, - haplogroupVariantRepository: HaplogroupVariantRepository, - configuration: Configuration - )(implicit ec: ExecutionContext) extends Logging { + variantV2Repository: VariantV2Repository, + haplogroupVariantRepository: HaplogroupVariantRepository, + configuration: Configuration +)(implicit ec: ExecutionContext) extends Logging { private val exportDir = Paths.get(configuration.getOptional[String]("variant.export.dir").getOrElse("/tmp/variant-exports")) private val exportFileName = "variants-full.jsonl.gz" @@ -48,13 +106,15 @@ class VariantExportService @Inject()( * Check if an export file exists and return its metadata. */ def getExportMetadata: Option[ExportMetadata] = { - val metadataPath = getMetadataFilePath - if (Files.exists(metadataPath)) { + val metaPath = getMetadataFilePath + if (Files.exists(metaPath)) { try { - val content = Files.readString(metadataPath) - Json.parse(content).asOpt[ExportMetadata] + val content = Files.readString(metaPath) + Some(Json.parse(content).as[ExportMetadata]) } catch { - case _: Exception => None + case e: Exception => + logger.warn(s"Failed to read export metadata: ${e.getMessage}") + None } } else { None @@ -62,164 +122,102 @@ class VariantExportService @Inject()( } /** - * Check if export file exists and is recent enough. - */ - def isExportCurrent(maxAgeHours: Int = 25): Boolean = { - getExportMetadata.exists { meta => - val exportTime = Instant.parse(meta.generatedAt) - val cutoff = Instant.now().minusSeconds(maxAgeHours * 3600L) - exportTime.isAfter(cutoff) - } - } - - /** - * Generate a full export of all variants. - * Returns the number of variants exported. + * Generate a new export file. */ def generateExport(): Future[ExportResult] = { - logger.info("Starting full variant export generation") val startTime = System.currentTimeMillis() + logger.info("Starting variant export generation") - // Write to temp file first, then atomically move - val tempFile = exportDir.resolve(s"$exportFileName.tmp") - - variantRepository.streamAllGrouped().flatMap { groups => - Future { - var count = 0 - val writer = new OutputStreamWriter( - new GZIPOutputStream( - new BufferedOutputStream( - new FileOutputStream(tempFile.toFile) - ) - ), - "UTF-8" - ) + variantV2Repository.streamAll().map { variants => + try { + val tempFile = exportDir.resolve(s"$exportFileName.tmp") + val finalFile = getExportFilePath + + // Write variants to gzipped JSONL + val gzOut = new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(tempFile.toFile))) + val writer = new OutputStreamWriter(gzOut, "UTF-8") try { - groups.foreach { group => - val dto = groupToDto(group) - writer.write(Json.stringify(Json.toJson(dto))) + for (variant <- variants) { + val exportRecord = variantToExportRecord(variant) + writer.write(Json.stringify(Json.toJson(exportRecord))) writer.write("\n") - count += 1 - if (count % 100000 == 0) { - logger.info(s"Exported $count variant groups...") - } } } finally { writer.close() } // Atomically move temp file to final location - Files.move(tempFile, getExportFilePath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE) + Files.move(tempFile, finalFile, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE) + + val fileSizeBytes = Files.size(finalFile) - // Write metadata - val duration = System.currentTimeMillis() - startTime - val fileSize = Files.size(getExportFilePath) + // Generate and save metadata val metadata = ExportMetadata( - generatedAt = Instant.now().toString, - variantCount = count, - fileSizeBytes = fileSize, - generationTimeMs = duration + generatedAt = Instant.now(), + variantCount = variants.size, + fileSizeBytes = fileSizeBytes ) + Files.writeString(getMetadataFilePath, Json.stringify(Json.toJson(metadata))) - logger.info(s"Variant export complete: $count groups, ${fileSize / 1024 / 1024}MB, ${duration}ms") + val generationTimeMs = System.currentTimeMillis() - startTime + logger.info(s"Export generation complete: ${variants.size} variants in ${generationTimeMs}ms") ExportResult( success = true, - variantCount = count, - fileSizeBytes = fileSize, - generationTimeMs = duration + variantCount = variants.size, + fileSizeBytes = fileSizeBytes, + error = None, + generationTimeMs = generationTimeMs ) + } catch { + case e: Exception => + logger.error(s"Export generation failed: ${e.getMessage}", e) + ExportResult( + success = false, + variantCount = 0, + fileSizeBytes = 0, + error = Some(e.getMessage), + generationTimeMs = System.currentTimeMillis() - startTime + ) } - }.recover { case e: Exception => - logger.error(s"Failed to generate variant export: ${e.getMessage}", e) - // Clean up temp file if it exists - try { Files.deleteIfExists(tempFile) } catch { case _: Exception => } - ExportResult( - success = false, - variantCount = 0, - fileSizeBytes = 0, - generationTimeMs = System.currentTimeMillis() - startTime, - error = Some(e.getMessage) - ) } } /** - * Transform a VariantGroup to a PublicVariantDTO (synchronous, no additional DB lookups). - * For bulk export, we skip per-variant alias/haplogroup lookups for performance. + * Convert a VariantV2 to an export record. */ - private def groupToDto(group: VariantGroup): PublicVariantDTO = { - val primaryVariant = group.variants.headOption.map(_.variant) - val primaryVariantId = primaryVariant.flatMap(_.variantId).getOrElse(0) - - // Build coordinates map from all builds - val coordinates: Map[String, VariantCoordinateDTO] = group.variants.flatMap { vwc => - vwc.contig.referenceGenome.map { refGenome => - val shortRef = refGenome.split("\\.").head - shortRef -> VariantCoordinateDTO( - contig = vwc.contig.commonName.getOrElse(vwc.contig.accession), - position = vwc.variant.position, - ref = vwc.variant.referenceAllele, - alt = vwc.variant.alternateAllele - ) - } - }.toMap - - // Determine naming status - val namingStatus = (group.commonName, group.rsId) match { - case (Some(_), _) => "NAMED" - case (None, Some(_)) => "NAMED" - case (None, None) => "UNNAMED" + private def variantToExportRecord(variant: VariantV2): VariantExportRecord = { + // Extract coordinates from JSONB + val coordinates = variant.coordinates.asOpt[Map[String, JsObject]].getOrElse(Map.empty) + + val coordDtos = coordinates.flatMap { case (refGenome, coords) => + for { + contig <- (coords \ "contig").asOpt[String] + position <- (coords \ "position").asOpt[Int] + ref <- (coords \ "ref").asOpt[String] + alt <- (coords \ "alt").asOpt[String] + } yield refGenome -> VariantCoordinateDTO( + contig = contig, + position = position, + ref = ref, + alt = alt + ) } - // For bulk export, we use simplified aliases from the primary variant - val aliasesDto = VariantAliasesDTO( - commonNames = group.commonName.toSeq, - rsIds = group.rsId.toSeq, - sources = Map.empty // Skip detailed source mapping for bulk export - ) - - PublicVariantDTO( - variantId = primaryVariantId, - canonicalName = group.commonName.orElse(group.rsId), - variantType = primaryVariant.map(_.variantType).getOrElse("SNP"), - namingStatus = namingStatus, - coordinates = coordinates, - aliases = aliasesDto, - definingHaplogroup = None // Skip for bulk export - can be enriched via individual lookups + // Extract aliases from JSONB + val rsIds = variant.rsIds + val commonNames = variant.commonNames + + VariantExportRecord( + variantId = variant.variantId.getOrElse(0), + canonicalName = variant.canonicalName, + variantType = variant.mutationType.dbValue, + namingStatus = variant.namingStatus.dbValue, + coordinates = coordDtos, + rsIds = rsIds, + commonNames = commonNames ) } } - -/** - * Metadata about the generated export file. - */ -case class ExportMetadata( - generatedAt: String, - variantCount: Int, - fileSizeBytes: Long, - generationTimeMs: Long -) - -object ExportMetadata { - import play.api.libs.json.{Json, OFormat} - implicit val format: OFormat[ExportMetadata] = Json.format[ExportMetadata] -} - -/** - * Result of an export generation operation. - */ -case class ExportResult( - success: Boolean, - variantCount: Int, - fileSizeBytes: Long, - generationTimeMs: Long, - error: Option[String] = None -) - -object ExportResult { - import play.api.libs.json.{Json, OFormat} - implicit val format: OFormat[ExportResult] = Json.format[ExportResult] -} diff --git a/app/services/VariantPublicApiService.scala b/app/services/VariantPublicApiService.scala index 314838d..fd8055a 100644 --- a/app/services/VariantPublicApiService.scala +++ b/app/services/VariantPublicApiService.scala @@ -2,25 +2,30 @@ package services import jakarta.inject.{Inject, Singleton} import models.api.* -import models.domain.genomics.VariantGroup +import models.domain.genomics.VariantV2 import play.api.cache.AsyncCacheApi -import repositories.{HaplogroupVariantRepository, VariantAliasRepository, VariantRepository} +import play.api.libs.json.JsObject +import repositories.{HaplogroupVariantRepository, VariantV2Repository} import scala.concurrent.duration.* import scala.concurrent.{ExecutionContext, Future} /** * Service for the public Variant API. - * Transforms internal data models to forward-compatible API DTOs. + * Transforms internal VariantV2 models to forward-compatible API DTOs. * Results are cached for performance. + * + * With the consolidated VariantV2 schema, this service is much simpler: + * - No grouping logic needed (variants are already consolidated) + * - Aliases are embedded in JSONB (no separate repository) + * - Coordinates for all assemblies are in one row */ @Singleton class VariantPublicApiService @Inject()( - variantRepository: VariantRepository, - variantAliasRepository: VariantAliasRepository, - haplogroupVariantRepository: HaplogroupVariantRepository, - cache: AsyncCacheApi - )(implicit ec: ExecutionContext) { + variantV2Repository: VariantV2Repository, + haplogroupVariantRepository: HaplogroupVariantRepository, + cache: AsyncCacheApi +)(implicit ec: ExecutionContext) { private val SearchCacheDuration = 10.minutes private val DetailCacheDuration = 30.minutes @@ -35,8 +40,8 @@ class VariantPublicApiService @Inject()( val offset = (page - 1) * pageSize for { - (groups, totalCount) <- variantRepository.searchGroupedPaginated(query.getOrElse(""), offset, pageSize) - dtos <- Future.traverse(groups)(groupToDto) + (variants, totalCount) <- variantV2Repository.searchPaginated(query.getOrElse(""), offset, pageSize) + dtos <- Future.traverse(variants)(variantToDto) } yield { val totalPages = Math.max(1, ((totalCount + pageSize - 1) / pageSize)) VariantSearchResponse( @@ -58,19 +63,12 @@ class VariantPublicApiService @Inject()( cache.getOrElseUpdate(cacheKey, DetailCacheDuration) { for { - variantOpt <- variantRepository.findByIdWithContig(variantId) + variantOpt <- variantV2Repository.findById(variantId) result <- variantOpt match { - case Some(vwc) => - // Get all variants in the same group (different builds) - val groupKey = vwc.variant.commonName.orElse(vwc.variant.rsId).getOrElse(s"variant_$variantId") + case Some(variant) => for { - allBuilds <- variantRepository.getVariantsByGroupKey(groupKey) - aliases <- variantAliasRepository.findByVariantId(variantId) haplogroups <- haplogroupVariantRepository.getHaplogroupsByVariant(variantId) - } yield { - val group = variantRepository.groupVariants(allBuilds).headOption - group.map(g => buildDto(g, aliases, haplogroups.headOption)) - } + } yield Some(buildDto(variant, haplogroups.headOption)) case None => Future.successful(None) } @@ -85,73 +83,55 @@ class VariantPublicApiService @Inject()( val cacheKey = s"api-variants-by-haplogroup:$haplogroupName" cache.getOrElseUpdate(cacheKey, DetailCacheDuration) { - for { - variants <- haplogroupVariantRepository.getVariantsByHaplogroupName(haplogroupName) - dtos <- Future.traverse(variants) { vwc => - for { - aliases <- variantAliasRepository.findByVariantId(vwc.variant.variantId.get) - } yield { - // Single build variant - create a minimal group - val singleGroup = VariantGroup( - groupKey = vwc.variant.commonName.orElse(vwc.variant.rsId).getOrElse(s"variant_${vwc.variant.variantId.get}"), - variants = Seq(vwc), - rsId = vwc.variant.rsId, - commonName = vwc.variant.commonName - ) - buildDto(singleGroup, aliases, None) // Haplogroup already known from context - } - } - } yield dtos + haplogroupVariantRepository.getVariantsByHaplogroupName(haplogroupName).map { variants => + variants.map(v => buildDto(v, None)) // Haplogroup already known from context + } } } /** - * Transform a VariantGroup to a PublicVariantDTO. + * Transform a VariantV2 to a PublicVariantDTO. */ - private def groupToDto(group: VariantGroup): Future[PublicVariantDTO] = { - val primaryVariantId = group.variants.headOption.flatMap(_.variant.variantId).getOrElse(0) + private def variantToDto(variant: VariantV2): Future[PublicVariantDTO] = { + val variantId = variant.variantId.getOrElse(0) for { - aliases <- if (primaryVariantId > 0) variantAliasRepository.findByVariantId(primaryVariantId) else Future.successful(Seq.empty) - haplogroups <- if (primaryVariantId > 0) haplogroupVariantRepository.getHaplogroupsByVariant(primaryVariantId) else Future.successful(Seq.empty) - } yield buildDto(group, aliases, haplogroups.headOption) + haplogroups <- if (variantId > 0) haplogroupVariantRepository.getHaplogroupsByVariant(variantId) else Future.successful(Seq.empty) + } yield buildDto(variant, haplogroups.headOption) } /** - * Build the DTO from domain objects. + * Build the DTO from a VariantV2. + * + * With VariantV2, the transformation is straightforward: + * - Coordinates come directly from JSONB + * - Aliases come directly from JSONB + * - No grouping or joining needed */ private def buildDto( - group: VariantGroup, - aliases: Seq[models.dal.domain.genomics.VariantAlias], - definingHaplogroup: Option[models.domain.haplogroups.Haplogroup] - ): PublicVariantDTO = { - - val primaryVariant = group.variants.headOption.map(_.variant) - val primaryVariantId = primaryVariant.flatMap(_.variantId).getOrElse(0) - - // Build coordinates map from all builds - val coordinates: Map[String, VariantCoordinateDTO] = group.variants.flatMap { vwc => - vwc.contig.referenceGenome.map { refGenome => - // Normalize reference genome name (e.g., "GRCh38.p14" -> "GRCh38") - val shortRef = refGenome.split("\\.").head - shortRef -> VariantCoordinateDTO( - contig = vwc.contig.commonName.getOrElse(vwc.contig.accession), - position = vwc.variant.position, - ref = vwc.variant.referenceAllele, - alt = vwc.variant.alternateAllele + variant: VariantV2, + definingHaplogroup: Option[models.domain.haplogroups.Haplogroup] + ): PublicVariantDTO = { + + // Extract coordinates from JSONB - one entry per reference genome + val coordinates: Map[String, VariantCoordinateDTO] = variant.coordinates.asOpt[Map[String, JsObject]].map { coordsMap => + coordsMap.flatMap { case (refGenome, coords) => + for { + contig <- (coords \ "contig").asOpt[String] + position <- (coords \ "position").asOpt[Int] + ref <- (coords \ "ref").asOpt[String] + alt <- (coords \ "alt").asOpt[String] + } yield refGenome -> VariantCoordinateDTO( + contig = contig, + position = position, + ref = ref, + alt = alt ) } - }.toMap + }.getOrElse(Map.empty) - // Build aliases DTO - val aliasesDto = buildAliasesDto(aliases, primaryVariant) - - // Determine naming status based on current data - val namingStatus = (group.commonName, group.rsId) match { - case (Some(_), _) => "NAMED" - case (None, Some(_)) => "NAMED" // rsId counts as named - case (None, None) => "UNNAMED" - } + // Extract aliases from JSONB + val aliasesDto = buildAliasesDto(variant) // Build defining haplogroup DTO val definingHaplogroupDto = definingHaplogroup.map { hg => @@ -162,10 +142,10 @@ class VariantPublicApiService @Inject()( } PublicVariantDTO( - variantId = primaryVariantId, - canonicalName = group.commonName.orElse(group.rsId), - variantType = primaryVariant.map(_.variantType).getOrElse("SNP"), - namingStatus = namingStatus, + variantId = variant.variantId.getOrElse(0), + canonicalName = variant.canonicalName, + variantType = variant.mutationType.dbValue, + namingStatus = variant.namingStatus.dbValue, coordinates = coordinates, aliases = aliasesDto, definingHaplogroup = definingHaplogroupDto @@ -173,31 +153,30 @@ class VariantPublicApiService @Inject()( } /** - * Build aliases DTO from alias records. + * Build aliases DTO from VariantV2 JSONB aliases field. */ - private def buildAliasesDto( - aliases: Seq[models.dal.domain.genomics.VariantAlias], - primaryVariant: Option[models.dal.domain.genomics.Variant] - ): VariantAliasesDTO = { - // Group aliases by type - val byType = aliases.groupBy(_.aliasType) - - val commonNames = byType.getOrElse("common_name", Seq.empty).map(_.aliasValue).distinct - val rsIds = byType.getOrElse("rs_id", Seq.empty).map(_.aliasValue).distinct - - // Group by source - val bySource = aliases.groupBy(_.source.getOrElse("unknown")).map { - case (source, sourceAliases) => source -> sourceAliases.map(_.aliasValue).distinct - } + private def buildAliasesDto(variant: VariantV2): VariantAliasesDTO = { + val aliases = variant.aliases + + // Extract common_names array + val commonNames = (aliases \ "common_names").asOpt[Seq[String]].getOrElse(Seq.empty) + + // Extract rs_ids array + val rsIds = (aliases \ "rs_ids").asOpt[Seq[String]].getOrElse(Seq.empty) - // Include primary variant names if not already in aliases - val allCommonNames = (commonNames ++ primaryVariant.flatMap(_.commonName).toSeq).distinct - val allRsIds = (rsIds ++ primaryVariant.flatMap(_.rsId).toSeq).distinct + // Extract sources map + val sources = (aliases \ "sources").asOpt[Map[String, Seq[String]]].getOrElse(Map.empty) + + // Include canonical name if not already in common_names + val allCommonNames = variant.canonicalName match { + case Some(name) if !commonNames.contains(name) => name +: commonNames + case _ => commonNames + } VariantAliasesDTO( commonNames = allCommonNames, - rsIds = allRsIds, - sources = bySource + rsIds = rsIds, + sources = sources ) } } diff --git a/app/services/genomics/YBrowseVariantIngestionService.scala b/app/services/genomics/YBrowseVariantIngestionService.scala index daf815c..d2d6603 100644 --- a/app/services/genomics/YBrowseVariantIngestionService.scala +++ b/app/services/genomics/YBrowseVariantIngestionService.scala @@ -7,21 +7,31 @@ import htsjdk.samtools.util.Interval import htsjdk.variant.variantcontext.VariantContext import htsjdk.variant.vcf.VCFFileReader import jakarta.inject.{Inject, Singleton} -import models.dal.domain.genomics.Variant +import models.dal.domain.genomics.* +import models.domain.genomics.{MutationType, NamingStatus, VariantAliases, VariantV2} import play.api.Logger -import repositories.{GenbankContigRepository, VariantRepository} +import play.api.libs.json.{JsObject, Json} +import repositories.VariantV2Repository import java.io.File import scala.concurrent.{ExecutionContext, Future} import scala.jdk.CollectionConverters.* -import scala.util.{Try, Success, Failure} - +import scala.io.Source +import scala.util.{Failure, Success, Try, Using} +import scala.collection.AbstractIterator + +/** + * Service for ingesting Y-DNA variants from YBrowse VCF and GFF files. + * + * Creates consolidated VariantV2 records with JSONB coordinates for multiple + * reference genomes. Performs liftover to add coordinates for additional + * assemblies (hs1, GRCh37, etc.). + */ @Singleton class YBrowseVariantIngestionService @Inject()( - variantRepository: VariantRepository, - genbankContigRepository: GenbankContigRepository, - genomicsConfig: GenomicsConfig - )(implicit ec: ExecutionContext) { + variantV2Repository: VariantV2Repository, + genomicsConfig: GenomicsConfig +)(implicit ec: ExecutionContext) { private val logger = Logger(this.getClass) @@ -38,208 +48,289 @@ class YBrowseVariantIngestionService @Inject()( } /** - * Ingests variants from a YBrowse VCF file. + * Ingests variants from a YBrowse GFF3 file. + * Groups adjacent records with same coordinates to handle aliases. * - * @param vcfFile The VCF file to ingest. - * @param sourceGenome The reference genome of the input VCF (default: "GRCh38"). + * @param gffFile The GFF3 file to ingest. + * @param sourceGenome The reference genome of the input GFF (default: "GRCh38"). * @return A Future containing the number of variants ingested. */ - def ingestVcf(vcfFile: File, sourceGenome: String = "GRCh38"): Future[Int] = { - val reader = new VCFFileReader(vcfFile, false) - val iterator = reader.iterator().asScala - - // Resolve canonical source genome name + def ingestGff(gffFile: File, sourceGenome: String = "GRCh38"): Future[Int] = { + logger.info(s"Starting GFF ingestion from ${gffFile.getPath} ($sourceGenome)") + val canonicalSource = genomicsConfig.resolveReferenceName(sourceGenome) - - // Identify target genomes (all supported except source) val targetGenomes = genomicsConfig.supportedReferences.filter(_ != canonicalSource) - - // Load available liftover chains + + // Load liftovers val liftovers: Map[String, LiftOver] = targetGenomes.flatMap { target => - genomicsConfig.getLiftoverChainFile(canonicalSource, target) match { - case Some(file) if file.exists() => - logger.info(s"Loaded liftover chain for $canonicalSource -> $target: ${file.getPath}") - Some(target -> new LiftOver(file)) - case Some(file) => - logger.warn(s"Liftover chain file defined for $canonicalSource -> $target but not found at: ${file.getPath}") - None - case None => - logger.debug(s"No liftover chain defined for $canonicalSource -> $target") - None + genomicsConfig.getLiftoverChainFile(canonicalSource, target).flatMap { file => + if (file.exists()) Some(target -> new LiftOver(file)) else None } }.toMap - - val batchSize = 1000 - - processBatches(iterator, batchSize, liftovers, canonicalSource) - } - private def processBatches( - iterator: Iterator[VariantContext], - batchSize: Int, - liftovers: Map[String, LiftOver], - sourceGenome: String - ): Future[Int] = { - - val progressInterval = 100 // Log progress every 100 batches (100k records) - - def processNextBatch(accumulatedCount: Int, skippedCount: Int, batchNumber: Int): Future[Int] = { - if (!iterator.hasNext) { - logger.info(s"Ingestion complete. Processed $accumulatedCount variants" + - (if (skippedCount > 0) s", skipped $skippedCount malformed records." else ".")) - Future.successful(accumulatedCount) - } else { - // Safely materialize records, skipping malformed ones - val (batch, newSkipped) = safelyTakeBatch(iterator, batchSize) - processBatch(batch, liftovers, sourceGenome).flatMap { count => - val newTotal = accumulatedCount + count - val newBatchNumber = batchNumber + 1 - - // Log progress every N batches - if (newBatchNumber % progressInterval == 0) { - val recordsProcessed = newBatchNumber * batchSize - logger.info(s"Progress: processed ~$recordsProcessed VCF records, created/updated $newTotal variants...") + val batchSize = 100 + val source = Source.fromFile(gffFile) + + try { + val lines = source.getLines().filterNot(_.startsWith("#")) + + // Custom grouping iterator that groups adjacent lines with same Chr/Pos/Ref/Alt + val groupedIterator = new AbstractIterator[Seq[Map[String, String]]] { + private var buffer: Option[Map[String, String]] = None + + override def hasNext: Boolean = buffer.isDefined || lines.hasNext + + override def next(): Seq[Map[String, String]] = { + if (!hasNext) throw new NoSuchElementException("next on empty iterator") + + val currentGroup = scala.collection.mutable.ArrayBuffer[Map[String, String]]() + + // Initialize with buffer or next line + val first = buffer.getOrElse(parseGffLine(lines.next())) + buffer = None // Clear buffer + + if (first.isEmpty) return next() // Skip malformed/empty lines + + currentGroup += first + + // Key to identify the group (Chr, Start, End) + val groupKey = (first("seqid"), first("start"), first("end")) + + // Peek ahead + var keepingGoing = true + while (keepingGoing && lines.hasNext) { + val nextLine = parseGffLine(lines.next()) + if (nextLine.nonEmpty) { + val nextKey = (nextLine("seqid"), nextLine("start"), nextLine("end")) + if (nextKey == groupKey) { + currentGroup += nextLine + } else { + buffer = Some(nextLine) + keepingGoing = false + } + } } - - processNextBatch(newTotal, skippedCount + newSkipped, newBatchNumber) + + currentGroup.toSeq } } - } - logger.info(s"Starting variant ingestion (batch size: $batchSize, progress logged every ${progressInterval * batchSize} records)") - processNextBatch(0, 0, 0) - } + def processNextBatch(accumulatedCount: Int): Future[Int] = { + // Synchronously take a batch from the iterator to avoid blocking the thread later + // (Iterator access is fast, processing is slow) + val batchGroups = scala.collection.mutable.ArrayBuffer[Seq[Map[String, String]]]() + var taken = 0 + while (taken < batchSize && groupedIterator.hasNext) { + batchGroups += groupedIterator.next() + taken += 1 + } - /** - * Safely takes a batch of records from the iterator, skipping malformed records. - * HTSJDK may throw TribbleException for malformed VCF lines (e.g., duplicate alleles). - * - * @param iterator The VCF record iterator - * @param batchSize Maximum number of valid records to collect - * @return Tuple of (validRecords, skippedCount) - */ - private def safelyTakeBatch(iterator: Iterator[VariantContext], batchSize: Int): (Seq[VariantContext], Int) = { - val batch = scala.collection.mutable.ArrayBuffer[VariantContext]() - var skipped = 0 - - while (batch.size < batchSize && iterator.hasNext) { - Try(iterator.next()) match { - case Success(vc) => batch += vc - case Failure(e) => - skipped += 1 - if (skipped <= 10) { - logger.warn(s"Skipping malformed VCF record: ${e.getMessage}") - } else if (skipped == 11) { - logger.warn("Suppressing further malformed record warnings...") + if (batchGroups.isEmpty) { + logger.info(s"GFF ingestion complete. Total variants: $accumulatedCount") + Future.successful(accumulatedCount) + } else { + // Process batch as a whole using optimized batch upsert + val variantsToProcess = batchGroups.flatMap(group => createVariantV2FromGffGroup(group, sourceGenome, liftovers)).toSeq + + variantV2Repository.upsertBatch(variantsToProcess).flatMap { resultIds => + // Log the number of records *processed* in this batch, not just newly created/updated. + // resultIds.size is the number of variants that were actually inserted or updated. + // variantsToProcess.size is the total number of items from the GFF batch. + val batchCount = variantsToProcess.size // Number of GFF records processed in this iteration + val newTotal = accumulatedCount + batchCount + if (newTotal % 100000 == 0) { // Log every 1000 records processed + logger.info(s"Processed $newTotal GFF records...") + } + processNextBatch(newTotal) } + } } - } - (batch.toSeq, skipped) + processNextBatch(0).andThen { case _ => + source.close() + } + } catch { + case e: Exception => + source.close() + Future.failed(e) + } } - private def processBatch(batch: Seq[VariantContext], liftovers: Map[String, LiftOver], sourceGenome: String): Future[Int] = { - // 1. Collect all contig names from batch - val contigNames = batch.map(_.getContig).distinct - // 2. Resolve Contig IDs for hg38 (source) and targets - - genbankContigRepository.findByCommonNames(contigNames).flatMap { contigs => - // Map: (CommonName, Genome) -> ContigID - // Normalize genome names to canonical form using aliases (e.g., "GRCh38.p14" -> "GRCh38", "T2T-CHM13v2.0" -> "hs1") - val contigMap = contigs.flatMap { c => - for { - cn <- c.commonName - rg <- c.referenceGenome - } yield { - // First try alias resolution, then fall back to stripping patch version - val normalizedGenome = genomicsConfig.referenceAliases.getOrElse(rg, rg.split("\\.").head) - (cn, normalizedGenome) -> c.id.get - } - }.toMap - - // Debug: log contig mapping on first batch - if (contigMap.isEmpty && batch.nonEmpty) { - logger.warn(s"Contig mapping failed! Looking for: ${contigNames.mkString(", ")}. Found contigs: ${contigs.map(c => s"${c.commonName}/${c.referenceGenome}").mkString(", ")}") - } + private def mergeAliases(existing: play.api.libs.json.JsValue, incoming: play.api.libs.json.JsValue): play.api.libs.json.JsValue = { + import play.api.libs.json.* + + val eCommon = (existing \ "common_names").asOpt[Seq[String]].getOrElse(Seq.empty) + val iCommon = (incoming \ "common_names").asOpt[Seq[String]].getOrElse(Seq.empty) + val mergedCommon = (eCommon ++ iCommon).distinct + + val eRs = (existing \ "rs_ids").asOpt[Seq[String]].getOrElse(Seq.empty) + val iRs = (incoming \ "rs_ids").asOpt[Seq[String]].getOrElse(Seq.empty) + val mergedRs = (eRs ++ iRs).distinct + + // Deep merge sources is harder, simple merge for now + val eSources = (existing \ "sources").asOpt[JsObject].getOrElse(Json.obj()) + val iSources = (incoming \ "sources").asOpt[JsObject].getOrElse(Json.obj()) + // For source arrays, we really should merge the arrays, but standard ++ overwrites keys. + // A robust merge would iterate keys. + // Let's do a slightly better merge for sources + val mergedSources = iSources.fields.foldLeft(eSources) { case (acc, (key, newVal)) => + val oldVal = (acc \ key).asOpt[Seq[String]].getOrElse(Seq.empty) + val nextVal = newVal.asOpt[Seq[String]].getOrElse(Seq.empty) + acc + (key -> Json.toJson((oldVal ++ nextVal).distinct)) + } + + Json.obj( + "common_names" -> mergedCommon, + "rs_ids" -> mergedRs, + "sources" -> mergedSources + ) + } - // Separate source variants (get aliases) from lifted variants (no aliases) - val sourceVariants = batch.flatMap { vc => - createVariantsForContext(vc, sourceGenome, contigMap) - } + private def parseGffLine(line: String): Map[String, String] = { + val cols = line.split("\t") + if (cols.length < 9) return Map.empty + + val attributes = cols(8).split(";").map { kv => + val parts = kv.split("=", 2) + if (parts.length == 2) parts(0) -> parts(1) else "" -> "" + }.toMap.filter(_._1.nonEmpty) + + Map( + "seqid" -> cols(0), + "source" -> cols(1), + "type" -> cols(2), + "start" -> cols(3), + "end" -> cols(4), + "score" -> cols(5), + "strand" -> cols(6), + "phase" -> cols(7) + ) ++ attributes + } - val liftedVariants = batch.flatMap { vc => - liftovers.flatMap { case (targetGenome, liftOver) => - val interval = new Interval(vc.getContig, vc.getStart, vc.getEnd) - val lifted = liftOver.liftOver(interval) - if (lifted != null) { - val liftedVc = new htsjdk.variant.variantcontext.VariantContextBuilder(vc) - .chr(lifted.getContig) - .start(lifted.getStart) - .stop(lifted.getEnd) - .make() - // Clear the ID for lifted variants - they share the source variant's name - createVariantsForContext(liftedVc, targetGenome, contigMap).map(_.copy(commonName = None, rsId = None)) - } else { - Seq.empty - } - } - } + private def createVariantV2FromGffGroup( + group: Seq[Map[String, String]], + sourceGenome: String, + liftovers: Map[String, LiftOver] + ): Option[VariantV2] = { + // First record determines canonical info + val primary = group.head + val name = primary.getOrElse("Name", primary.getOrElse("ID", "Unknown")) + + // Parse coordinates + val contig = primary("seqid") + val start = primary("start").toInt + // GFF attributes for alleles + val ref = primary.getOrElse("allele_anc", primary.getOrElse("ref_allele", primary.getOrElse("reference_allele", ""))) + val alt = primary.getOrElse("allele_der", primary.getOrElse("alt_allele", primary.getOrElse("derived_allele", ""))) + + if (ref.isEmpty || alt.isEmpty) { + if (Math.random() < 0.001) logger.warn(s"Missing alleles for GFF record (sampling): $primary") + return None // Skip if alleles missing + } - // Debug: log if we're losing variants - if (sourceVariants.isEmpty && batch.nonEmpty) { - logger.warn(s"No variants to save from batch of ${batch.size} records! Source genome: $sourceGenome, contigMap keys: ${contigMap.keys.mkString(", ")}") - } + // Normalize + val refSeq = referenceFastaFiles.get(sourceGenome) + val (normPos, normRef, normAlt) = normalizeVariant( + contig, start, ref, alt, refSeq + ) + + // Build coordinates JSONB + val sourceCoords = Json.obj( + "contig" -> contig, + "position" -> normPos, + "ref" -> normRef, + "alt" -> normAlt + ) + + // Lift over + val liftedCoords = liftovers.flatMap { case (targetGenome, liftOver) => + val interval = new Interval(contig, start, primary("end").toInt) + val lifted = liftOver.liftOver(interval) + + if (lifted != null) { + val targetRefSeq = referenceFastaFiles.get(targetGenome) + // Note: We use original ref/alt for normalization on target, + // assuming alleles translate directly (which is true for homology map). + // A more robust way would be to fetch ref from target fasta. + val (lPos, lRef, lAlt) = normalizeVariant( + lifted.getContig, lifted.getStart, ref, alt, targetRefSeq + ) + + Some(targetGenome -> Json.obj( + "contig" -> lifted.getContig, + "position" -> lPos, + "ref" -> lRef, + "alt" -> lAlt + )) + } else None + } - // Create source variants with aliases, lifted variants without - for { - sourceCount <- variantRepository.findOrCreateVariantsBatchWithAliases(sourceVariants, "ybrowse") - liftedCount <- if (liftedVariants.nonEmpty) variantRepository.findOrCreateVariantsBatchNoAliases(liftedVariants) else Future.successful(Seq.empty) - } yield sourceCount.size + liftedCount.size + val allCoordinates = (liftedCoords + (sourceGenome -> sourceCoords)).foldLeft(Json.obj()) { + case (acc, (genome, coords)) => acc + (genome -> coords) } - } - private def createVariantsForContext( - vc: VariantContext, - genome: String, - contigMap: Map[(String, String), Int] - ): Seq[Variant] = { - // Resolve contig ID - // Try exact match or fallbacks (e.g. remove "chr") - val contigIdOpt = contigMap.get((vc.getContig, genome)) - .orElse(contigMap.get((vc.getContig.stripPrefix("chr"), genome))) - - contigIdOpt match { - case Some(contigId) => - vc.getAlternateAlleles.asScala.map { alt => - val rawId = Option(vc.getID).filterNot(id => id == "." || id.isEmpty) - val rsId = rawId.filter(_.toLowerCase.startsWith("rs")) - // For Y-DNA, the ID column often contains the SNP name (e.g. M269), which is the common name. - val commonName = rawId - - // Normalize the variant (left-align INDELs) - val refSeq = referenceFastaFiles.get(genome) - val (normPos, normRef, normAlt) = normalizeVariant( - vc.getContig, - vc.getStart, - vc.getReference.getDisplayString, - alt.getDisplayString, - refSeq - ) - - Variant( - genbankContigId = contigId, - position = normPos, - referenceAllele = normRef, - alternateAllele = normAlt, - variantType = vc.getType.toString, - rsId = rsId, - commonName = commonName - ) - }.toSeq - case None => - // Logger.warn(s"Contig not found for ${vc.getContig} in $genome") - Seq.empty + // Collect Metadata + val commonNames = group.flatMap(_.get("Name")).distinct + val rsIds = group.flatMap(_.get("Name")).filter(_.startsWith("rs")).distinct // Naive check + val ybrowseIds = group.flatMap(_.get("ID")).distinct + + // Sources map: source -> [names] + // Use 'ref' attribute from GFF as source attribution + val sourceMap = group.groupBy(_.getOrElse("ref", "ybrowse")).map { case (src, records) => + src -> records.flatMap(_.get("Name")).distinct } + + val aliases = Json.obj( + "common_names" -> commonNames, + "rs_ids" -> rsIds, + "sources" -> (Json.toJsObject(sourceMap) + ("ybrowse_id" -> Json.toJson(ybrowseIds))) + ) + + // Evidence + val tested = primary.get("count_tested").map(_.toInt).getOrElse(0) + val derived = primary.get("count_derived").map(_.toInt).getOrElse(0) + + // External Placements (Haplogroups) + val rawPlacements = Json.obj( + "ycc" -> primary.get("ycc_haplogroup"), + "isogg" -> primary.get("isogg_haplogroup"), + "yfull" -> primary.get("yfull_node") // User clarified this is a haplogroup placement + ) + + val placements = JsObject(rawPlacements.fields.filterNot { case (_, v) => + v match { + case play.api.libs.json.JsString(s) => s == "." || s == "not listed" || s == "unknown" + case _ => false + } + }) + + val evidence = Json.obj( + "yseq_tested" -> tested, + "yseq_derived" -> derived, + "external_placements" -> placements + ) + + // Primers + val primers = if (primary.contains("primer_f")) { + Json.obj( + "yseq_f" -> primary.getOrElse("primer_f", ""), + "yseq_r" -> primary.getOrElse("primer_r", "") + ) + } else Json.obj() + + // Notes + val notes = primary.get("comment").filter(_ != ".") + + Some(VariantV2( + canonicalName = Some(name), + mutationType = MutationType.SNP, // GFF type 'point'/'snp' usually implies SNP + namingStatus = NamingStatus.Named, + aliases = aliases, + coordinates = allCoordinates, + evidence = evidence, + primers = primers, + notes = notes + )) } /** @@ -249,13 +340,6 @@ class YBrowseVariantIngestionService @Inject()( * 1. Right-trim: Remove common suffix bases from ref and alt alleles * 2. Pad: If either allele becomes empty, prepend the preceding reference base * 3. Left-trim: Remove common prefix bases (keeping at least 1 base on each) - * - * @param contig The contig name for reference lookup - * @param pos The 1-based position - * @param ref The reference allele - * @param alt The alternate allele - * @param refSeq Optional reference sequence file for padding lookup - * @return A tuple of (normalizedPos, normalizedRef, normalizedAlt) */ private def normalizeVariant( contig: String, @@ -325,69 +409,59 @@ class YBrowseVariantIngestionService @Inject()( } } } - /** - * Lifts a variant to all other supported reference genomes. + * Lifts a variant to all other supported reference genomes and adds coordinates. * - * @param sourceVariant The variant to lift (must have genbankContigId resolved) - * @param sourceContig The source contig information - * @return Future containing lifted variants for each target genome (may be empty if liftover fails) + * @param variantId The variant to update with additional coordinates + * @param sourceGenome The source reference genome + * @return Future containing the number of coordinates added */ - def liftoverVariant(sourceVariant: Variant, sourceContig: models.domain.genomics.GenbankContig): Future[Seq[Variant]] = { - val sourceGenome = sourceContig.referenceGenome.getOrElse("GRCh38") - val canonicalSource = genomicsConfig.resolveReferenceName(sourceGenome) - val sourceContigName = sourceContig.commonName.getOrElse(sourceContig.accession) - - // Get target genomes (all supported except source) - val targetGenomes = genomicsConfig.supportedReferences.filter(_ != canonicalSource) - - // Load liftover chains for each target - val liftoverResults = targetGenomes.flatMap { targetGenome => - genomicsConfig.getLiftoverChainFile(canonicalSource, targetGenome) match { - case Some(chainFile) if chainFile.exists() => - val liftOver = new LiftOver(chainFile) - val interval = new Interval(sourceContigName, sourceVariant.position, sourceVariant.position) - val lifted = liftOver.liftOver(interval) - - if (lifted != null) { - logger.info(s"Lifted ${sourceVariant.commonName.getOrElse("variant")} from $canonicalSource:$sourceContigName:${sourceVariant.position} to $targetGenome:${lifted.getContig}:${lifted.getStart}") - Some((targetGenome, lifted.getContig, lifted.getStart)) - } else { - logger.warn(s"Failed to liftover ${sourceVariant.commonName.getOrElse("variant")} from $canonicalSource to $targetGenome") - None - } - case _ => - logger.debug(s"No liftover chain available for $canonicalSource -> $targetGenome") - None - } - } - - // Resolve contig IDs for lifted positions - val targetContigNames = liftoverResults.map(_._2).distinct - - genbankContigRepository.findByCommonNames(targetContigNames).map { contigs => - // Map: (CommonName, Genome) -> ContigID - val contigMap = contigs.flatMap { c => - for { - cn <- c.commonName - rg <- c.referenceGenome - } yield (cn, rg) -> c.id.get - }.toMap - - liftoverResults.flatMap { case (targetGenome, liftedContig, liftedPos) => - // Try to find contig ID, handling chr prefix differences - val contigId = contigMap.get((liftedContig, targetGenome)) - .orElse(contigMap.get((liftedContig.stripPrefix("chr"), targetGenome))) - .orElse(contigMap.get(("chr" + liftedContig, targetGenome))) - - contigId.map { cid => - sourceVariant.copy( - variantId = None, - genbankContigId = cid, - position = liftedPos - ) + def addLiftedCoordinates(variantId: Int, sourceGenome: String): Future[Int] = { + variantV2Repository.findById(variantId).flatMap { + case Some(variant) => + val sourceCoords = variant.getCoordinates(sourceGenome) + sourceCoords match { + case Some(coords) => + val contig = (coords \ "contig").asOpt[String].getOrElse("") + val position = (coords \ "position").asOpt[Int].getOrElse(0) + val ref = (coords \ "ref").asOpt[String].getOrElse("") + val alt = (coords \ "alt").asOpt[String].getOrElse("") + + val canonicalSource = genomicsConfig.resolveReferenceName(sourceGenome) + val targetGenomes = genomicsConfig.supportedReferences.filter(_ != canonicalSource) + + val liftedFutures = targetGenomes.flatMap { targetGenome => + genomicsConfig.getLiftoverChainFile(canonicalSource, targetGenome) match { + case Some(chainFile) if chainFile.exists() => + val liftOver = new LiftOver(chainFile) + val interval = new Interval(contig, position, position) + val lifted = liftOver.liftOver(interval) + + if (lifted != null) { + val liftedCoords = Json.obj( + "contig" -> lifted.getContig, + "position" -> lifted.getStart, + "ref" -> ref, + "alt" -> alt + ) + Some(variantV2Repository.addCoordinates(variantId, targetGenome, liftedCoords)) + } else { + None + } + case _ => None + } + } + + Future.sequence(liftedFutures).map(_.count(_ == true)) + + case None => + logger.warn(s"Variant $variantId has no coordinates for $sourceGenome") + Future.successful(0) } - } + + case None => + logger.warn(s"Variant $variantId not found") + Future.successful(0) } } } diff --git a/app/utils/CuratorViewUtils.scala b/app/utils/CuratorViewUtils.scala new file mode 100644 index 0000000..a7d6155 --- /dev/null +++ b/app/utils/CuratorViewUtils.scala @@ -0,0 +1,32 @@ +package utils + +import models.domain.support.MessageStatus + +object CuratorViewUtils { + def actionBadgeClass(action: String): String = { + action match { + case "create" => "bg-success" + case "update" => "bg-warning text-dark" + case "delete" => "bg-danger" + case _ => "bg-secondary" + } + } + + def changeTypeBadgeClass(changeType: String): String = { + changeType match { + case "add" => "bg-success" + case "remove" => "bg-danger" + case "update" => "bg-warning text-dark" + case _ => "bg-secondary" + } + } + + def statusBadgeClass(status: MessageStatus): String = { + status match { + case MessageStatus.New => "bg-primary" + case MessageStatus.Read => "bg-info" + case MessageStatus.Replied => "bg-success" + case MessageStatus.Closed => "bg-secondary" + } + } +} diff --git a/app/utils/VariantViewUtils.scala b/app/utils/VariantViewUtils.scala new file mode 100644 index 0000000..b9aa57c --- /dev/null +++ b/app/utils/VariantViewUtils.scala @@ -0,0 +1,62 @@ +package utils + +import models.domain.genomics.VariantV2 +import play.api.libs.json.JsObject + +object VariantViewUtils { + def refGenomes(v: VariantV2): Seq[String] = { + v.coordinates.asOpt[Map[String, JsObject]].map(_.keys.toSeq.sorted).getOrElse(Seq.empty) + } + + def formatPosition(v: VariantV2, refGenome: String): String = { + v.getCoordinates(refGenome).map { coords => + val contig = (coords \ "contig").asOpt[String].getOrElse("?") + val pos = (coords \ "position").asOpt[Int].getOrElse(0) + s"$contig:$pos" + }.getOrElse("-") + } + + def formatAlleles(v: VariantV2, refGenome: String): String = { + v.getCoordinates(refGenome).map { coords => + val ref = (coords \ "ref").asOpt[String].getOrElse("?") + val alt = (coords \ "alt").asOpt[String].getOrElse("?") + s"$ref→$alt" + }.getOrElse("-") + } + + def formatAllelesTuple(v: VariantV2, refGenome: String): (String, String) = { + v.getCoordinates(refGenome).map { coords => + val ref = (coords \ "ref").asOpt[String].getOrElse("?") + val alt = (coords \ "alt").asOpt[String].getOrElse("?") + (ref, alt) + }.getOrElse(("?", "?")) + } + + def primaryAlleles(v: VariantV2): (String, String) = { + val coords = v.coordinates.asOpt[Map[String, JsObject]].getOrElse(Map.empty) + val primary = coords.get("hs1").orElse(coords.get("GRCh38")).orElse(coords.headOption.map(_._2)) + primary.map { c => + val ref = (c \ "ref").asOpt[String].getOrElse("?") + val alt = (c \ "alt").asOpt[String].getOrElse("?") + (ref, alt) + }.getOrElse(("?", "?")) + } + + def buildBadgeClass(refGenome: String): String = { + refGenome match { + case "GRCh37" => "bg-warning text-dark" + case "GRCh38" => "bg-info" + case "hs1" => "bg-success" + case _ => "bg-secondary" + } + } + + def shortRefGenome(refGenome: String): String = { + refGenome match { + case "GRCh37" => "GRCh37" + case "GRCh38" => "GRCh38" + case "hs1" => "T2T" + case other => other + } + } +} diff --git a/app/views/_navbar.scala.html b/app/views/_navbar.scala.html index 86119ea..992c506 100644 --- a/app/views/_navbar.scala.html +++ b/app/views/_navbar.scala.html @@ -21,8 +21,6 @@ @messages("nav.tools")
diff --git a/app/views/admin/genomics/dashboard.scala.html b/app/views/admin/genomics/dashboard.scala.html index 2fdd6c0..7889d38 100644 --- a/app/views/admin/genomics/dashboard.scala.html +++ b/app/views/admin/genomics/dashboard.scala.html @@ -5,6 +5,8 @@| Build | -Position | -Alleles | -
|---|---|---|
| - - @v.shortReferenceGenome - - | -@v.formattedPosition |
-
- @v.variant.referenceAllele
-
- @v.variant.alternateAllele
- |
-
@vwc.formattedPosition
-
- @vwc.variant.referenceAllele
-
- @vwc.variant.alternateAllele
-
- @messages(error.message)
- } - -@cytoband.name
- @{java.text.NumberFormat.getIntegerInstance().format(cytoband.startPos)}
- @{java.text.NumberFormat.getIntegerInstance().format(cytoband.endPos)}
- @{java.text.NumberFormat.getIntegerInstance().format(cytoband.endPos - cytoband.startPos)} bp
- Select a cytoband to view details
-| Chromosome | -Band | -Position | -Stain | -Build | -
|---|---|---|---|---|
| - @cytoband.contigName.getOrElse("Unknown") - | -
- @cytoband.name
- |
-
- @{java.text.NumberFormat.getIntegerInstance().format(cytoband.startPos)} -
- @{java.text.NumberFormat.getIntegerInstance().format(cytoband.endPos)}
- |
- - @cytoband.stain - | -- - @cytoband.referenceGenome.getOrElse("Unknown") - - | -
@{java.text.NumberFormat.getIntegerInstance().format(region.startPos)}
- @{java.text.NumberFormat.getIntegerInstance().format(region.endPos)}
- @{java.text.NumberFormat.getIntegerInstance().format(region.endPos - region.startPos)} bp
- @{java.text.NumberFormat.getIntegerInstance().format(coord.start)} -
+ @{java.text.NumberFormat.getIntegerInstance().format(coord.end)}
+ (@{java.text.NumberFormat.getIntegerInstance().format(coord.end - coord.start)} bp)
+ @{java.text.NumberFormat.getIntegerInstance().format(region.startPos)} -
- @{java.text.NumberFormat.getIntegerInstance().format(region.endPos)}
+ @displayCoord.map { c =>
+ @{java.text.NumberFormat.getIntegerInstance().format(c.start)} -
+ @{java.text.NumberFormat.getIntegerInstance().format(c.end)}
+ }.getOrElse("-")
Select a marker to view details
-| Chromosome | -Name | -Position | -Period | -Verified | -Build | -
|---|---|---|---|---|---|
| - @marker.contigName.getOrElse("Unknown") - | -
- @marker.name
- |
-
- @{java.text.NumberFormat.getIntegerInstance().format(marker.startPos)} -
- @{java.text.NumberFormat.getIntegerInstance().format(marker.endPos)}
- |
- - @marker.period bp - | -- @if(marker.verified) { - Yes - } else { - No - } - | -- - @marker.referenceGenome.getOrElse("Unknown") - - | -
@vwc.variant.referenceAllele@alleles._1@vwc.variant.alternateAllele@alleles._2@notes
+ } +| Build | -Position | -Alleles (Anc→Der) | -
|---|---|---|
| - - @vwc.shortReferenceGenome - - | -
- @vwc.contig.commonName.getOrElse(vwc.contig.accession):@vwc.variant.position
- |
-
- @vwc.variant.referenceAllele
-
- @vwc.variant.alternateAllele
- |
-
| @messages("variants.detail.build") | +@messages("variants.detail.position") | +@messages("variants.detail.alleles") | +
|---|---|---|
| + + @VariantViewUtils.shortRefGenome(refGenome) + + | +@VariantViewUtils.formatPosition(variant, refGenome) |
+
+ @alleles._1
+
+ @alleles._2
+ |
+
@VariantViewUtils.formatPosition(variant, refGenome)
+
+ @alleles._1
+
+ @alleles._2
+
+ @messages("variants.detail.noCoordinates")
+ } +} diff --git a/app/views/fragments/verticalTree.scala.html b/app/views/fragments/verticalTree.scala.html new file mode 100644 index 0000000..0a04b0a --- /dev/null +++ b/app/views/fragments/verticalTree.scala.html @@ -0,0 +1,269 @@ +@import models.api.{TreeDTO} +@import models.HaplogroupType +@import models.HaplogroupType.{MT, Y} +@import models.view.TreeViewModel +@import controllers.routes.TreeController + +@(tree: TreeDTO, hapType: HaplogroupType, renderedTreeData: Option[TreeViewModel], currentUrl: String)(implicit messages: Messages) + +@fullPageUrl(haplogroup: Option[String]) = @{ + hapType match { + case Y => TreeController.ytree(haplogroup) + case MT => TreeController.mtree(haplogroup) + } +} + +@fragmentUrl(haplogroup: Option[String]) = @{ + hapType match { + case Y => TreeController.yTreeFragment(haplogroup) + case MT => TreeController.mTreeFragment(haplogroup) + } +} + + + +
+ We're upgrading our database to serve you better.
+ This process typically takes 15-30 minutes.
+
We're consolidating our variant data schema to improve search performance and support additional reference genomes.
+ +We expect to be back online shortly. Please check back soon.
+