#############################################################################
# Copyright (C) 2023 Mitchell Kluesner (mkluesne@fredhutch.org)
#
# This file is part of multiEditR (Multiple Edit Deconvolution by Inference of Traces in R)
#
# Please only copy and/or distribute this script with proper citation of
# multiEditR publication
#############################################################################
README.txt
Welcome! This Rmarkdown file allows for the batch analysis of base editing from sanger sequencing files
example_analysis
directory
example_analysis
README.txt
multiEditRBatch.Rmd
(This file)global.R
parameters.xlsx
files
.ab1
or .fasta
files to be used in the
analysisoutput
example_analysis_analysis_data.tsv
,
example_analysis_model_fit.tsv
, and
analysis.html
will be created after running
EditRBatch.Rmd
How to use:
Make a copy of the parent example_analysis
directory, and rename as relevant
MGK001
, etc.Open global.R
and install all the packages under
libraries if not already installed
Open parameters.xlsx
, edit and save the input
data
sample_name
column will be used to identify data in
analysisMove all files referenced in parameters.xlsx
to the
files
directory
Open EditRBatch.Rmd
in RStudio
Edit analysis_directory = "SYSTEM_PATH"
to reflect
the system path to the parent analysis directory
Knit EditRBatch.Rmd
Open example_analysis/output/analysis.html
to
observe results. Scroll down to Individual results to observe sample
wise analysis.
Disclaimer: This is a preliminary script, with virtually no error handling, so extensive trouble shooting may be required
analysis_directory = "/Users/kluesner/Desktop/Research/For\ Others/James\ Thomas/example_analysis"
analysis_name = tail(unlist(strsplit(analysis_directory, "/")), 1)
source(paste0(analysis_directory, "/global.R"))
print(analysis_directory)
## [1] "/Users/kluesner/Desktop/Research/For Others/James Thomas/example_analysis"
print(analysis_name)
## [1] "example_analysis"
params.tbl = readxl::read_excel(paste0(analysis_directory, "/parameters.xlsx"),
col_names = c("sample_name", "sample_file", "ctrl_file", "motif",
"motif_fwd","wt", "edit", "ctrl_seq_fwd", "use_ctrl_seq",
"p_value"),
skip = 1) %>%
rowwise() %>%
mutate(sample_file = paste0(analysis_directory, "/files/", sample_file),
ctrl_file = paste0(analysis_directory, "/files/", ctrl_file)) %>%
rowid_to_column("id") %>%
ungroup
analysis = lapply(X = 1:nrow(params.tbl), FUN = runEditRBatch, params = params.tbl)
## Test1 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 2997
## GAMLSS-RS iteration 2: Global Deviance = 2997
## GAMLSS-RS iteration 1: Global Deviance = 2125
## GAMLSS-RS iteration 2: Global Deviance = 2125
## GAMLSS-RS iteration 1: Global Deviance = 2398
## GAMLSS-RS iteration 2: Global Deviance = 2398
## GAMLSS-RS iteration 1: Global Deviance = 2252
## GAMLSS-RS iteration 2: Global Deviance = 2252
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 7.8 seconds elapsed.
## Test2 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 2997
## GAMLSS-RS iteration 2: Global Deviance = 2997
## GAMLSS-RS iteration 1: Global Deviance = 2121
## GAMLSS-RS iteration 2: Global Deviance = 2121
## GAMLSS-RS iteration 1: Global Deviance = 2391
## GAMLSS-RS iteration 2: Global Deviance = 2391
## GAMLSS-RS iteration 1: Global Deviance = 2246
## GAMLSS-RS iteration 2: Global Deviance = 2246
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 4.52 seconds elapsed.
## Test3 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 3000
## GAMLSS-RS iteration 2: Global Deviance = 3000
## GAMLSS-RS iteration 1: Global Deviance = 2131
## GAMLSS-RS iteration 2: Global Deviance = 2131
## GAMLSS-RS iteration 1: Global Deviance = 2504
## GAMLSS-RS iteration 2: Global Deviance = 2504
## GAMLSS-RS iteration 1: Global Deviance = 2271
## GAMLSS-RS iteration 2: Global Deviance = 2271
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 6.94 seconds elapsed.
## Test4 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 3000
## GAMLSS-RS iteration 2: Global Deviance = 3000
## GAMLSS-RS iteration 1: Global Deviance = 2127
## GAMLSS-RS iteration 2: Global Deviance = 2127
## GAMLSS-RS iteration 1: Global Deviance = 2497
## GAMLSS-RS iteration 2: Global Deviance = 2497
## GAMLSS-RS iteration 1: Global Deviance = 2265
## GAMLSS-RS iteration 2: Global Deviance = 2265
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 4.07 seconds elapsed.
## Test5 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 3018
## GAMLSS-RS iteration 2: Global Deviance = 3018
## GAMLSS-RS iteration 1: Global Deviance = 2156
## GAMLSS-RS iteration 2: Global Deviance = 2156
## GAMLSS-RS iteration 1: Global Deviance = 2419
## GAMLSS-RS iteration 2: Global Deviance = 2419
## GAMLSS-RS iteration 1: Global Deviance = 2281
## GAMLSS-RS iteration 2: Global Deviance = 2281
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 5.71 seconds elapsed.
## Test6 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 2997
## GAMLSS-RS iteration 2: Global Deviance = 2997
## GAMLSS-RS iteration 1: Global Deviance = 2121
## GAMLSS-RS iteration 2: Global Deviance = 2121
## GAMLSS-RS iteration 1: Global Deviance = 2391
## GAMLSS-RS iteration 2: Global Deviance = 2391
## GAMLSS-RS iteration 1: Global Deviance = 2246
## GAMLSS-RS iteration 2: Global Deviance = 2246
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 4.54 seconds elapsed.
## Test7 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 3021
## GAMLSS-RS iteration 2: Global Deviance = 3021
## GAMLSS-RS iteration 1: Global Deviance = 2163
## GAMLSS-RS iteration 2: Global Deviance = 2163
## GAMLSS-RS iteration 1: Global Deviance = 2525
## GAMLSS-RS iteration 2: Global Deviance = 2525
## GAMLSS-RS iteration 1: Global Deviance = 2300
## GAMLSS-RS iteration 2: Global Deviance = 2300
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 6.26 seconds elapsed.
## Test8 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 3000
## GAMLSS-RS iteration 2: Global Deviance = 3000
## GAMLSS-RS iteration 1: Global Deviance = 2127
## GAMLSS-RS iteration 2: Global Deviance = 2127
## GAMLSS-RS iteration 1: Global Deviance = 2497
## GAMLSS-RS iteration 2: Global Deviance = 2497
## GAMLSS-RS iteration 1: Global Deviance = 2265
## GAMLSS-RS iteration 2: Global Deviance = 2265
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 6.34 seconds elapsed.
## Test9 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 1732
## GAMLSS-RS iteration 2: Global Deviance = 1732
## GAMLSS-RS iteration 1: Global Deviance = 1857
## GAMLSS-RS iteration 2: Global Deviance = 1857
## GAMLSS-RS iteration 1: Global Deviance = 1802
## GAMLSS-RS iteration 2: Global Deviance = 1802
## GAMLSS-RS iteration 1: Global Deviance = 1881
## GAMLSS-RS iteration 2: Global Deviance = 1881
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 4.25 seconds elapsed.
## Test10 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 1732
## GAMLSS-RS iteration 2: Global Deviance = 1732
## GAMLSS-RS iteration 1: Global Deviance = 1857
## GAMLSS-RS iteration 2: Global Deviance = 1857
## GAMLSS-RS iteration 1: Global Deviance = 1802
## GAMLSS-RS iteration 2: Global Deviance = 1802
## GAMLSS-RS iteration 1: Global Deviance = 1881
## GAMLSS-RS iteration 2: Global Deviance = 1881
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 4.54 seconds elapsed.
# analysis = lapply(X = 1:2, FUN = runEditRBatch, params = params.tbl)
data.tbl = analysis %>% lapply(., "[[", 1) %>%
plyr::ldply(., "tibble")
write_tsv(data.tbl, paste0(analysis_directory, "/output/summary_data.tsv"))
data.tbl %>%
mutate_if(is.numeric, round, digits=3) %>%
DT::datatable()
model_fit.tbl = analysis %>% lapply(., "[[", 2) %>%
plyr::ldply(., "tibble") %>%
mutate(base = paste0(base, " fillibens coef.")) %>%
dplyr::select(sample_name, base, fillibens) %>%
pivot_wider(names_from = base, values_from = fillibens)
write_tsv(model_fit.tbl, paste0(analysis_directory, "/output/model_fit_data.tsv"))
model_fit.tbl %>%
mutate_if(is.numeric, round, digits=3) %>%
DT::datatable()
sample file: /files/RP272_cdna_wt.ab1
control file: /files/RP272_cdna_ko.ab1
sgRNA: AGTAGCTGGGATTACAGATG
sample file: /files/RP272_cdna_wt.ab1
control file: /files/RP272_cdna_ko.fasta
sgRNA: AGTAGCTGGGATTACAGATG
NULL
sample file: /files/RP272_cdna_wt.ab1
control file: /files/RP272_cdna_ko.ab1
sgRNA: CGTATTTTTGTTAGAGATGG
sample file: /files/RP272_cdna_wt.ab1
control file: /files/RP272_cdna_ko.fasta
sgRNA: CGTATTTTTGTTAGAGATGG
NULL
sample file: /files/RP272_cdna_wt.ab1
control file: /files/RP272_cdna_ko.ab1
sgRNA: CATCTGTAATCCCAGCTACT
sample file: /files/RP272_cdna_wt.ab1
control file: /files/RP272_cdna_ko.fasta
sgRNA: CATCTGTAATCCCAGCTACT
NULL
sample file: /files/RP272_cdna_wt.ab1
control file: /files/RP272_cdna_ko.ab1
sgRNA: CCATCTCTAACAAAAATACG
sample file: /files/RP272_cdna_wt.ab1
control file: /files/RP272_cdna_ko.fasta
sgRNA: CCATCTCTAACAAAAATACG
NULL
sample file: /files/4 Round 1_B01.ab1
control file: /files/4 Round 1_B01_ctrl.fasta
sgRNA: AGCCCAGGCAAGGGCAGCTT
NULL
sample file: /files/4 Round 1_B01.ab1
control file: /files/4 Round 1_B01_ctrl.fasta
sgRNA: AGCCCAGGCAAGGGCAGCTT
NULL