library(readr); library(dplyr); library(gt); library(stringr); library(tidyr)# ---------- helpers robusti ----------
standardize_cols <- function(df) {
# mappa: target -> possibili nomi nella pratica
mapping <- list(
gene = c("gene","Gene","Hugo_Symbol","Symbol"),
variant = c("variant","Variant","cDNA","cdna_change","hgvsc","hgvsc_short","c_change","mutation","c_variant","c_hgvs"),
protein_change = c("protein_change","Protein","hgvsp","hgvsp_short","p_change","aa_change","p_hgvs","protein"),
classification = c("classification","class","pathogenicity","clin_sig","clinical_significance","significance"),
escat = c("escat","ESCAT"),
tier_amp_asco_cap = c("tier_amp_asco_cap","tier","tier_amp","amp_asco_cap_tier","amp_asco_cap"),
vaf = c("vaf","VAF","vaf_fraction","allele_fraction","variant_allele_frequency","AF"),
depth = c("depth","coverage","dp","read_depth","DP"),
notes = c("notes","note","comment","comments","interpretation","report_note")
)
# per ogni target, copia dalla prima cand. esistente o crea NA
for (tgt in names(mapping)) {
cands <- mapping[[tgt]]
hit <- cands[cands %in% names(df)]
if (length(hit) >= 1) {
df[[tgt]] <- df[[ hit[1] ]]
} else {
df[[tgt]] <- NA
}
}
df
}
safe_flag_cols_somatic <- function(df){
df %>% mutate(
pathogenic_like = if ("classification" %in% names(.))
str_detect(tolower(coalesce(classification,"")), "patho|likely")
else NA,
high_actionability = if ("escat" %in% names(.))
str_detect(tolower(coalesce(escat,"")), "i|ii")
else NA,
qc_low_vaf = if ("vaf" %in% names(.))
ifelse(!is.na(suppressWarnings(as.numeric(vaf))) & as.numeric(vaf) < 0.05, TRUE, FALSE)
else NA,
qc_low_depth = if ("depth" %in% names(.))
ifelse(!is.na(suppressWarnings(as.numeric(depth))) & as.numeric(depth) < 250, TRUE, FALSE)
else NA
)
}
safe_flag_cols_germline <- function(df){
df %>% mutate(
needs_cascade = if ("classification" %in% names(.))
str_detect(tolower(coalesce(classification,"")), "patho|likely")
else NA
)
}
# importa dati
som <- read_csv("data/ovary_variants_somatic.csv")
germ <- read_csv("data/ovary_variants_germline.csv")
# arricchisci con flag utili se colonne presenti
som