Skip to content

Commit f73396b

Browse files
committed
Hotfix for .filterGenomes()
Column name mismatch for BV-BRC data queries when the fallback-to-cache option is triggered. Updated .filterGenomes() to use appropriate logic.
1 parent 20d7386 commit f73396b

1 file changed

Lines changed: 19 additions & 7 deletions

File tree

R/data_curation.R

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -953,7 +953,7 @@ retrieveMetadata <- function(user_bacs,
953953
NULL
954954
}, add = TRUE)
955955

956-
# Happy path: metadata present -> apply AMR-aware filters
956+
# Good metadata present? Apply AMR filters
957957
if ("metadata" %in% DBI::dbListTables(con)) {
958958
if (isTRUE(verbose)) message("Loading metadata for filtering.")
959959
initial_metadata <- DBI::dbReadTable(con, "metadata")
@@ -1014,25 +1014,37 @@ retrieveMetadata <- function(user_bacs,
10141014

10151015
bv <- tibble::as_tibble(DBI::dbReadTable(con_cache, "bvbrc_bac_data"))
10161016

1017+
10171018
# Derive matches from user_bacs (taxon IDs or species)
10181019
sel <- tibble::tibble(`genome.genome_id` = character())
1020+
10191021
for (v in user_bacs) {
10201022
if (suppressWarnings(!is.na(as.numeric(v)))) {
1021-
# taxon id match
1022-
matches <- bv[bv$taxon_id == v | bv$taxon_id == as.numeric(v), , drop = FALSE]
1023+
# numeric taxon_id match
1024+
matches <- bv[bv$`genome.taxon_id` == v |
1025+
bv$`genome.taxon_id` == as.numeric(v), , drop = FALSE]
10231026
} else {
1024-
# species substring (case-insensitive) match
1025-
matches <- bv[stringr::str_detect(bv$species, stringr::fixed(v, ignore_case = TRUE)), , drop = FALSE]
1027+
# species substring (case-insensitive)
1028+
matches <- bv[stringr::str_detect(
1029+
bv$`genome.species`,
1030+
stringr::fixed(v, ignore_case = TRUE)
1031+
), , drop = FALSE]
10261032
}
1033+
10271034
if (nrow(matches)) {
1028-
sel <- dplyr::bind_rows(sel, tibble::tibble(`genome.genome_id` = as.character(matches$genome_id)))
1035+
sel <- dplyr::bind_rows(
1036+
sel,
1037+
tibble::tibble(
1038+
`genome.genome_id` = as.character(matches$`genome.genome_id`)
1039+
)
1040+
)
10291041
}
10301042
}
10311043
sel <- dplyr::distinct(sel)
10321044

10331045
if (nrow(sel) == 0L) {
10341046
DBI::dbDisconnect(con, shutdown = TRUE)
1035-
stop("No genomes matched user_bacs in BV-BRC cache. (Cache present but no hits.)")
1047+
stop("No genomes matched user_bacs in BV-BRC cache.")
10361048
}
10371049

10381050
# Minimal 'filtered' for downstream steps (downloaders & genomeList)

0 commit comments

Comments
 (0)