#############################################################################
# Copyright (C) 2023 Mitchell Kluesner (mkluesne@fredhutch.org)
#  
# This file is part of multiEditR (Multiple Edit Deconvolution by Inference of Traces in R)
# 
# Please only copy and/or distribute this script with proper citation of 
# multiEditR publication
#############################################################################

README

README.txt

Welcome! This Rmarkdown file allows for the batch analysis of base editing from sanger sequencing files

  • To use set up a directory structured like the example_analysis directory
    • example_analysis
      • README.txt
      • multiEditRBatch.Rmd (This file)
      • global.R
      • parameters.xlsx
      • files
      • .ab1 or .fasta files to be used in the analysis
      • output
        • example_analysis_analysis_data.tsv, example_analysis_model_fit.tsv, and analysis.html will be created after running EditRBatch.Rmd
  • Do not change the directory structure, as it will disrupt the analysis!

How to use:

  1. Make a copy of the parent example_analysis directory, and rename as relevant

    • for e.g. rename to MGK001, etc.
  2. Open global.R and install all the packages under libraries if not already installed

  3. Open parameters.xlsx, edit and save the input data

    • sample_name column will be used to identify data in analysis
    • For file names, just use the terminal file name as they’d appear in the final directory
  4. Move all files referenced in parameters.xlsx to the files directory

  5. Open EditRBatch.Rmd in RStudio

  6. Edit analysis_directory = "SYSTEM_PATH" to reflect the system path to the parent analysis directory

  7. Knit EditRBatch.Rmd

  8. Open example_analysis/output/analysis.html to observe results. Scroll down to Individual results to observe sample wise analysis.

Disclaimer: This is a preliminary script, with virtually no error handling, so extensive trouble shooting may be required

Set analysis directory

analysis_directory = "/Users/kluesner/Desktop/Research/For\ Others/James\ Thomas/example_analysis"

analysis_name = tail(unlist(strsplit(analysis_directory, "/")), 1)
source(paste0(analysis_directory, "/global.R"))
print(analysis_directory)
## [1] "/Users/kluesner/Desktop/Research/For Others/James Thomas/example_analysis"
print(analysis_name)
## [1] "example_analysis"

Analysis

params.tbl = readxl::read_excel(paste0(analysis_directory, "/parameters.xlsx"),
                            col_names = c("sample_name", "sample_file", "ctrl_file", "motif",
                                          "motif_fwd","wt", "edit", "ctrl_seq_fwd", "use_ctrl_seq",
                                          "p_value"), 
                            skip = 1) %>%
  rowwise() %>%
  mutate(sample_file = paste0(analysis_directory, "/files/", sample_file),
         ctrl_file = paste0(analysis_directory, "/files/", ctrl_file)) %>%
  rowid_to_column("id") %>%
  ungroup

analysis = lapply(X = 1:nrow(params.tbl), FUN = runEditRBatch, params = params.tbl)
## Test1 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 2997 
## GAMLSS-RS iteration 2: Global Deviance = 2997 
## GAMLSS-RS iteration 1: Global Deviance = 2125 
## GAMLSS-RS iteration 2: Global Deviance = 2125 
## GAMLSS-RS iteration 1: Global Deviance = 2398 
## GAMLSS-RS iteration 2: Global Deviance = 2398 
## GAMLSS-RS iteration 1: Global Deviance = 2252 
## GAMLSS-RS iteration 2: Global Deviance = 2252
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 7.8 seconds elapsed.
## Test2 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 2997 
## GAMLSS-RS iteration 2: Global Deviance = 2997 
## GAMLSS-RS iteration 1: Global Deviance = 2121 
## GAMLSS-RS iteration 2: Global Deviance = 2121 
## GAMLSS-RS iteration 1: Global Deviance = 2391 
## GAMLSS-RS iteration 2: Global Deviance = 2391 
## GAMLSS-RS iteration 1: Global Deviance = 2246 
## GAMLSS-RS iteration 2: Global Deviance = 2246
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 4.52 seconds elapsed.
## Test3 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 3000 
## GAMLSS-RS iteration 2: Global Deviance = 3000 
## GAMLSS-RS iteration 1: Global Deviance = 2131 
## GAMLSS-RS iteration 2: Global Deviance = 2131 
## GAMLSS-RS iteration 1: Global Deviance = 2504 
## GAMLSS-RS iteration 2: Global Deviance = 2504 
## GAMLSS-RS iteration 1: Global Deviance = 2271 
## GAMLSS-RS iteration 2: Global Deviance = 2271
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 6.94 seconds elapsed.
## Test4 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 3000 
## GAMLSS-RS iteration 2: Global Deviance = 3000 
## GAMLSS-RS iteration 1: Global Deviance = 2127 
## GAMLSS-RS iteration 2: Global Deviance = 2127 
## GAMLSS-RS iteration 1: Global Deviance = 2497 
## GAMLSS-RS iteration 2: Global Deviance = 2497 
## GAMLSS-RS iteration 1: Global Deviance = 2265 
## GAMLSS-RS iteration 2: Global Deviance = 2265
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 4.07 seconds elapsed.
## Test5 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 3018 
## GAMLSS-RS iteration 2: Global Deviance = 3018 
## GAMLSS-RS iteration 1: Global Deviance = 2156 
## GAMLSS-RS iteration 2: Global Deviance = 2156 
## GAMLSS-RS iteration 1: Global Deviance = 2419 
## GAMLSS-RS iteration 2: Global Deviance = 2419 
## GAMLSS-RS iteration 1: Global Deviance = 2281 
## GAMLSS-RS iteration 2: Global Deviance = 2281
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 5.71 seconds elapsed.
## Test6 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 2997 
## GAMLSS-RS iteration 2: Global Deviance = 2997 
## GAMLSS-RS iteration 1: Global Deviance = 2121 
## GAMLSS-RS iteration 2: Global Deviance = 2121 
## GAMLSS-RS iteration 1: Global Deviance = 2391 
## GAMLSS-RS iteration 2: Global Deviance = 2391 
## GAMLSS-RS iteration 1: Global Deviance = 2246 
## GAMLSS-RS iteration 2: Global Deviance = 2246
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 4.54 seconds elapsed.
## Test7 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 3021 
## GAMLSS-RS iteration 2: Global Deviance = 3021 
## GAMLSS-RS iteration 1: Global Deviance = 2163 
## GAMLSS-RS iteration 2: Global Deviance = 2163 
## GAMLSS-RS iteration 1: Global Deviance = 2525 
## GAMLSS-RS iteration 2: Global Deviance = 2525 
## GAMLSS-RS iteration 1: Global Deviance = 2300 
## GAMLSS-RS iteration 2: Global Deviance = 2300
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 6.26 seconds elapsed.
## Test8 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 3000 
## GAMLSS-RS iteration 2: Global Deviance = 3000 
## GAMLSS-RS iteration 1: Global Deviance = 2127 
## GAMLSS-RS iteration 2: Global Deviance = 2127 
## GAMLSS-RS iteration 1: Global Deviance = 2497 
## GAMLSS-RS iteration 2: Global Deviance = 2497 
## GAMLSS-RS iteration 1: Global Deviance = 2265 
## GAMLSS-RS iteration 2: Global Deviance = 2265
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 6.34 seconds elapsed.
## Test9 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 1732 
## GAMLSS-RS iteration 2: Global Deviance = 1732 
## GAMLSS-RS iteration 1: Global Deviance = 1857 
## GAMLSS-RS iteration 2: Global Deviance = 1857 
## GAMLSS-RS iteration 1: Global Deviance = 1802 
## GAMLSS-RS iteration 2: Global Deviance = 1802 
## GAMLSS-RS iteration 1: Global Deviance = 1881 
## GAMLSS-RS iteration 2: Global Deviance = 1881
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 4.25 seconds elapsed.
## Test10 started.
## initialized.
## samples loaded.
## samples filtered.
## sample aligned.
## Indels removed.
## Joining with `by = join_by(ctrl_post_aligned_index)`
## Indices adjusted.
## Motifs of interest mapped and subsetted.
## GAMLSS-RS iteration 1: Global Deviance = 1732 
## GAMLSS-RS iteration 2: Global Deviance = 1732 
## GAMLSS-RS iteration 1: Global Deviance = 1857 
## GAMLSS-RS iteration 2: Global Deviance = 1857 
## GAMLSS-RS iteration 1: Global Deviance = 1802 
## GAMLSS-RS iteration 2: Global Deviance = 1802 
## GAMLSS-RS iteration 1: Global Deviance = 1881 
## GAMLSS-RS iteration 2: Global Deviance = 1881
## Joining with `by = join_by(x)`
## Joining with `by = join_by(x)`
## Joining with `by = join_by(base)`
## 4.54 seconds elapsed.
# analysis = lapply(X = 1:2, FUN = runEditRBatch, params = params.tbl)

Compiled Results

Editing data

data.tbl = analysis %>% lapply(., "[[", 1) %>%
  plyr::ldply(., "tibble")

write_tsv(data.tbl, paste0(analysis_directory, "/output/summary_data.tsv"))

data.tbl %>%
  mutate_if(is.numeric, round, digits=3) %>%
  DT::datatable()

Model fit data

model_fit.tbl = analysis %>% lapply(., "[[", 2) %>%
  plyr::ldply(., "tibble") %>%
  mutate(base = paste0(base, " fillibens coef.")) %>%
  dplyr::select(sample_name, base, fillibens) %>%
  pivot_wider(names_from = base, values_from = fillibens)

write_tsv(model_fit.tbl, paste0(analysis_directory, "/output/model_fit_data.tsv"))

model_fit.tbl %>%
  mutate_if(is.numeric, round, digits=3) %>%
  DT::datatable()

Individual Results

Test1

sample file: /files/RP272_cdna_wt.ab1

control file: /files/RP272_cdna_ko.ab1

sgRNA: AGTAGCTGGGATTACAGATG

Chromatograms

Editing Data

Model parameters

Test2

sample file: /files/RP272_cdna_wt.ab1

control file: /files/RP272_cdna_ko.fasta

sgRNA: AGTAGCTGGGATTACAGATG

Chromatograms

NULL

Editing Data

Model parameters

Test3

sample file: /files/RP272_cdna_wt.ab1

control file: /files/RP272_cdna_ko.ab1

sgRNA: CGTATTTTTGTTAGAGATGG

Chromatograms

Editing Data

Model parameters

Test4

sample file: /files/RP272_cdna_wt.ab1

control file: /files/RP272_cdna_ko.fasta

sgRNA: CGTATTTTTGTTAGAGATGG

Chromatograms

NULL

Editing Data

Model parameters

Test5

sample file: /files/RP272_cdna_wt.ab1

control file: /files/RP272_cdna_ko.ab1

sgRNA: CATCTGTAATCCCAGCTACT

Chromatograms

Editing Data

Model parameters

Test6

sample file: /files/RP272_cdna_wt.ab1

control file: /files/RP272_cdna_ko.fasta

sgRNA: CATCTGTAATCCCAGCTACT

Chromatograms

NULL

Editing Data

Model parameters

Test7

sample file: /files/RP272_cdna_wt.ab1

control file: /files/RP272_cdna_ko.ab1

sgRNA: CCATCTCTAACAAAAATACG

Chromatograms

Editing Data

Model parameters

Test8

sample file: /files/RP272_cdna_wt.ab1

control file: /files/RP272_cdna_ko.fasta

sgRNA: CCATCTCTAACAAAAATACG

Chromatograms

NULL

Editing Data

Model parameters

Test9

sample file: /files/4 Round 1_B01.ab1

control file: /files/4 Round 1_B01_ctrl.fasta

sgRNA: AGCCCAGGCAAGGGCAGCTT

Chromatograms

NULL

Editing Data

Model parameters

Test10

sample file: /files/4 Round 1_B01.ab1

control file: /files/4 Round 1_B01_ctrl.fasta

sgRNA: AGCCCAGGCAAGGGCAGCTT

Chromatograms

NULL

Editing Data

Model parameters