import os
import warnings
from collections import Counter
import numpy as np
from azimuth import model_comparison
from .helpers import load_cfd_scoring_matrix, rev_comp
from .mmej import generate_mmej_patterns
from .off_targets import calculate_cfd_score, calculate_ot_sum_score, run_bowtie
# suppress warnings from azimuth
warnings.filterwarnings("ignore")
[docs]
class Guide:
def __init__(
self,
sequence,
pam_position,
pam_len,
strand="+",
max_flanking_length=75,
cut_offset=3,
chromosome="seq",
start=0,
):
"""Guide class.
Defines a gRNA and its properties.
Parameters
----------
sequence : str
gRNA sequence
pam_position : int
Position of PAM sequence in gRNA sequence
pam_len : int
Length of PAM sequence
strand : str, optional
Strand of the gRNA, by default "+"
max_flanking_length : int, optional
Max flanking length taken into account for MMEJ calculation, by default 75
cut_offset : int, optional
Cut offset from the beginning of PAM, by default 3
chromosome : str, optional
Chromosome where the gRNA is located, by default "seq"
start : int, optional
Start position of the gRNA on the chromosome, by default 0
"""
length = len(sequence)
cut_pos = pam_position - cut_offset
relative_guide_start = cut_pos - 17
relative_guide_end = cut_pos + cut_offset + pam_len
guide_seq = sequence[relative_guide_start:relative_guide_end]
guide_long_seq = sequence[relative_guide_start - 4 : relative_guide_end + 3]
if strand == "-":
cut_pos = pam_position + pam_len + cut_offset
relative_guide_start = pam_position
relative_guide_end = cut_pos + 17
guide_seq = rev_comp(sequence[relative_guide_start:relative_guide_end])
guide_long_seq = rev_comp(
sequence[relative_guide_start - 3 : relative_guide_end + 4]
)
absolute_cut_pos = start + cut_pos
absolute_guide_start = start + relative_guide_start
absolute_guide_end = start + relative_guide_end
left_slice = cut_pos - max_flanking_length
right_slice = cut_pos + max_flanking_length
relative_cut_pos = max_flanking_length
if left_slice < 0:
left_slice = 0
relative_cut_pos = cut_pos
if right_slice > length:
right_slice = length
# Guide properties
self.locus_seq = sequence[slice(left_slice, right_slice)]
self.guide_seq = guide_seq
self.guide_long_seq = guide_long_seq
self.guide_pam = guide_seq[-pam_len:]
self.guide_chrom = chromosome
self.guide_start = absolute_guide_start
self.guide_end = absolute_guide_end - 1 # 1-based (last position inclusive)
self.guide_strand = strand
self.relative_cut_pos = relative_cut_pos
self.absolute_cut_pos = absolute_cut_pos
self.rank_score = 0.0
self.rank = 0
self.id = ""
# MMEJ properties
self.mmej_patterns = []
# Off-targets
self.off_targets = []
self.off_target_str = ""
# Layers
self._layers = {}
[docs]
def __repr__(self):
if self.id:
name = self.id
else:
name = "gRNA"
return f"{name}({self.guide_seq}|{self.guide_chrom}:{self.guide_start}-{self.guide_end}|{self.guide_strand}|)"
[docs]
def __getattr__(self, attr):
return self[attr]
@property
[docs]
def location(self):
"""Returns the location of the guide in the format: chr:start-end.
Returns
-------
str
String representation of gRNA location
"""
return f"{self.guide_chrom}:{self.guide_start}-{self.guide_end}"
[docs]
def _create_mmej_oof_string(self, mmej_patterns):
return "|".join([p["frame_shift"] for p in mmej_patterns])
[docs]
def simulate_end_joining(self, n_patterns=5, length_weight=20):
"""Simulate Microhomology-Mediated End Joining (MMEJ) events for the
gRNA.
MMEJ scoring is based on the Bae et al. 2014 paper (https://doi.org/10.1038/nmeth.3015)
Parameters
----------
n_patterns : int, optional
Number of top-scoring MMEJ patterns to keep, by default 5
length_weight : int, optional
Lengeth weight, by default 20
"""
if "N" not in self.locus_seq:
mmej_patterns = generate_mmej_patterns(
self.relative_cut_pos, self.locus_seq, length_weight
)
if mmej_patterns:
sorted_mmej_patterns = sorted(
mmej_patterns, key=lambda x: -x["pattern_score"]
)[:n_patterns]
oof_sum = sum(
[
p["pattern_score"]
for p in sorted_mmej_patterns
if p["frame_shift"] == "+"
]
)
sum_score = sum([p["pattern_score"] for p in sorted_mmej_patterns])
oof_score = oof_sum / sum_score * 100
else:
sorted_mmej_patterns = None
sum_score = None
oof_score = None
# TODO: refactor
self.mmej_patterns = sorted_mmej_patterns
self.mmej_str = self._create_mmej_oof_string(sorted_mmej_patterns)
self.add_layer("mmej_sum_score", sum_score)
self.add_layer("mmej_oof_score", oof_score)
else:
self.mmej_patterns = None
self.mmej_str = None
self.add_layer("mmej_sum_score", 0)
self.add_layer("mmej_oof_score", 0)
[docs]
def find_off_targets(self, genome, **kwargs):
"""Finds off-targets for the guide. The off-targets are found using
Bowtie. Bowtie index for the genome must be built before running this
function.
Notes
-----
The off-targets are stored in the `off_targets` attribute. Based on the
off-targets, the following layers are added to the guide:
- ot_sum_score: sum of the off-target scores - the lower the better
- ot_cfd_score_mean: mean of the CFD scores of the off-targets
- ot_cfd_score_max: max CFD scores of the off-targets
- ot_cfd_score_sum: sum CFD scores of the off-targets
Parameters
----------
genome : Genome
Genome object with the Bowtie index built
"""
if genome:
index_path = genome.bowtie_index
else:
raise ValueError("No genome / locus specified.")
if index_path:
guides_offtargets = run_bowtie(
guides=[self], genome_index_path=index_path, **kwargs
)
self.off_targets = guides_offtargets[0]
self.add_layer("ot_sum_score", calculate_ot_sum_score(self.off_targets))
# Load scoring matrices for CFD score calculation
mm_scores, pam_scores = load_cfd_scoring_matrix()
# Calculate CFD scores
cfd_scores = calculate_cfd_score(
self, self.off_targets, mm_scores, pam_scores
)
for ix, cfd in enumerate(cfd_scores.tolist()):
self.off_targets[ix]["cfd_score"] = cfd
self.add_layer("ot_cfd_score_mean", cfd_scores.mean())
self.add_layer("ot_cfd_score_max", cfd_scores.max())
self.add_layer("ot_cfd_score_sum", cfd_scores.sum())
else:
raise ValueError("Bowtie index is not built for the genome / locus.")
@property
[docs]
def off_targets_string(self):
"""Returns a string representation of the off-targets.
The string representatio captures the number of off-targets with certain number
of mismatches: n0|n1|n2|n3|n4|n5 (total), where n0 is the number of off-targets
with 0 mismatches, n1 is the number of off-targets with 1 mismatch, etc.
For example, if there are 3 off-targets with 0 mismatches, 2 with 1 mismatch,
1 with 2 mismatches, 0 with 3 mismatches, 5 with 4 mismatches and 1 with 5 mismatches
the string representation will be "3|2|1|0|5|1 (13)". In the parenthesis, the total
number of off-targets is given.
Returns
-------
str
String representation of off-targets.
"""
# if there are no off-targets, return 0
if not self.off_targets:
return "0"
# count the number of off-targets with certain number of mismatches
counts = Counter(
[
len(ot["mismatches"].keys()) - self.guide_pam.count("N")
for ot in self.off_targets
]
)
# create a string representation
ot_str = "|".join([str(counts[i]) for i in range(max(counts.keys()) + 1)])
ot_str += f" ({len(self.off_targets)})"
return ot_str
@property
[docs]
def layers(self):
return self._layers
[docs]
def add_layer(self, name, layer_data):
"""_summary_
Parameters
----------
name : str
_description_
layer_data : float
_description_
"""
self._layers[name] = layer_data
[docs]
def layer(self, key):
"""_summary_
Parameters
----------
key : _type_
_description_
Returns
-------
_type_
_description_
Raises
------
ValueError
_description_
"""
if key in self._layers.keys():
return self._layers[key]
else:
raise ValueError(f"Layer {key} was not added to the gRNA.")
[docs]
def add_azimuth_score(self, model_file="V3_model_nopos.pickle"):
"""Apply Azimuth score to a list of guides.
Azimuth is a machine learning-based predictive modelling of CRISPR/Cas9 guide efficiency.
Sometimes its reffered to as Doench 2016 score.
Described in https://doi.org/10.1038/nbt.3437 (Doench et al., 2016)
Returns
-------
float
Azimuth score.
"""
# Azimuth model requires 30bp sequences
if (
len(self.guide_long_seq) == 30
and "N" not in self.guide_long_seq
and self.guide_pam.endswith("GG")
and len(self.guide_pam) == 3
):
score_azimuth = float(
model_comparison.predict(
np.array([self.guide_long_seq]),
length_audit=True,
pam_audit=True,
model_file=os.path.dirname(os.path.realpath(__file__))
+ "/data/"
+ model_file,
)[0]
)
else:
score_azimuth = 0.0
# add layer to gRNA
self.add_layer("azimuth_score", score_azimuth)
return score_azimuth