Source code for src.seqvar.auto_ps1_pm5

"""Implementation of PS1 and PM5 prediction for sequence variants."""

import re
from typing import Optional, Tuple

from loguru import logger

from src.defs.annonars_variant import AnnonarsVariantResponse, VariantResult
from src.defs.auto_acmg import (
    PS1PM5,
    AminoAcid,
    AutoACMGCriteria,
    AutoACMGPrediction,
    AutoACMGSeqVarData,
    AutoACMGStrength,
)
from src.defs.exceptions import AlgorithmError, AutoAcmgBaseException
from src.defs.seqvar import SeqVar
from src.utils import AutoACMGHelper

#: DNA bases
DNA_BASES = ["A", "C", "G", "T"]

#: Regular expression for parsing pHGVSp
REGEX_HGVSP = re.compile(r"p\.(\D+)(\d+)(\D+)")


[docs] class AutoPS1PM5(AutoACMGHelper): """Class for PS1 and PM5 prediction.""" def __init__(self): super().__init__() #: Prediction result. self.prediction_ps1pm5: Optional[PS1PM5] = None #: Comment to store the prediction explanation. self.comment_ps1pm5: str = ""
[docs] def _get_var_info(self, seqvar: SeqVar) -> Optional[AnnonarsVariantResponse]: """Get variant information from Annonars. Args: seqvar: The sequence variant. Returns: Optional[AnnonarsVariantResponse]: Annonars response if the variant is found, None otherwise. """ try: return self.annonars_client.get_variant_info(seqvar) except AutoAcmgBaseException: return None
[docs] def _parse_HGVSp(self, pHGVSp: str) -> Optional[AminoAcid]: """Parse the pHGVSp from VEP into its components. Args: pHGVSp: The protein change in HGVS format. Returns: Optional[AminoAcid]: The amino acid change if the pHGVSp is valid, None otherwise. """ try: # If multiple pHGVSp values are present, take the first one if ";" in pHGVSp: pHGVSp = pHGVSp.split(";")[0] match = re.match(REGEX_HGVSP, pHGVSp) if match: return AminoAcid[match.group(3)] else: return None except AutoAcmgBaseException: logger.debug("Invalid pHGVSp: {}", pHGVSp) return None
[docs] def _is_pathogenic(self, variant_info: VariantResult) -> bool: """Check if the variant is pathogenic based on ClinVar data. Args: variant_info: Annonars variant information Returns: bool: True if the variant is pathogenic """ if variant_info.clinvar and variant_info.clinvar.records: for rec in variant_info.clinvar.records: if ( (r := rec.classifications) and (g := r.germlineClassification) and g.description in ["Pathogenic", "Likely pathogenic"] ): return True return False
[docs] def _is_missense(self, var_data: AutoACMGSeqVarData) -> bool: """ Check if the variant's consequence is missense. Args: var_data: The variant information. Returns: bool: True if the variant is a missense variant, False otherwise. """ if "missense" in var_data.consequence.cadd: return True if any("missense" in cons for cons in var_data.consequence.mehari): return True return False
[docs] def _is_splice_affecting(self, var_data: AutoACMGSeqVarData) -> bool: """ Check if the variant is a splice-affecting variant. Args: var_data: The variant information. Returns: bool: True if the variant is a splice-affecting variant, False otherwise. """ if "splice" in var_data.consequence.cadd: return True if any("splice" in cons for cons in var_data.consequence.mehari): return True return False
[docs] def _affect_splicing(self, var_data: AutoACMGSeqVarData) -> bool: """ Check if the variant affects splicing. If any of spliceAI scores are above the threshold, the variant is considered to affect splicing. Args: var_data: The variant information. Returns: bool: True if the variant affects splicing, False otherwise. """ score_checks = { "spliceAI_acceptor_gain": var_data.thresholds.spliceAI_acceptor_gain, "spliceAI_acceptor_loss": var_data.thresholds.spliceAI_acceptor_loss, "spliceAI_donor_gain": var_data.thresholds.spliceAI_donor_gain, "spliceAI_donor_loss": var_data.thresholds.spliceAI_donor_loss, } return any( (getattr(var_data.scores.cadd, score_name) or 0) > threshold for score_name, threshold in score_checks.items() )
[docs] def verify_ps1pm5( self, seqvar: SeqVar, var_data: AutoACMGSeqVarData ) -> Tuple[Optional[PS1PM5], str]: """ Predicts the criteria PS1 and PM5 for the provided sequence variant. Implementation of the rule PS1 and PM5: The method implements the rule by: - Getting the primary variant information & parsing the primary amino acid change. - Iterating over all possible alternative bases & getting the alternative variant information. - Parsing the alternative amino acid change & checking if the alternative variant is pathogenic. - If the alternative variant is pathogenic and the amino acid change is the same as the primary variant, then PS1 is set to True. - If the alternative variant is pathogenic and the amino acid change is different from the primary variant, then PM5 is set to True. Note: Rules: PS1: Same amino acid change as a previously established pathogenic variant regardless of nucleotide change. PM5: Novel missense change at an amino acid residue where a different missense change determined to be pathogenic has been seen before. Returns: Tuple[Optional[PS1PM5], str]: Prediction result and the comment with the explanation. """ self.prediction_ps1pm5 = PS1PM5() self.comment_ps1pm5 = "" try: if not self._is_missense(var_data) and not self._is_splice_affecting(var_data): raise AlgorithmError( "Variant is not a missense or splice-affecting variant. PS1/PM5 not applicable." ) primary_aa_change = self._parse_HGVSp(var_data.pHGVS) if not primary_aa_change: raise AlgorithmError("No valid primary amino acid change for PS1/PM5 prediction.") self.comment_ps1pm5 = f"Primary amino acid change: {primary_aa_change.name}. " for alt_base in DNA_BASES: # Skip the same base insert if alt_base == seqvar.insert: continue self.comment_ps1pm5 += f"Analysing alternative variant with base: {alt_base}. " alt_seqvar = SeqVar( genome_release=seqvar.genome_release, chrom=seqvar.chrom, pos=seqvar.pos, delete=seqvar.delete, insert=alt_base, ) alt_info = self._get_var_info(alt_seqvar) if alt_info and alt_info.result.dbnsfp and alt_info.result.dbnsfp.HGVSp_VEP: alt_aa_change = self._parse_HGVSp(alt_info.result.dbnsfp.HGVSp_VEP) self.comment_ps1pm5 += ( "Alternative amino acid change: " f"{alt_aa_change.name if alt_aa_change else "N/A"}. " ) if alt_aa_change and self._is_pathogenic(alt_info.result): if primary_aa_change == alt_aa_change: self.comment_ps1pm5 += ( "Alternative variant is pathogenic " "and amino acid change is the same as primary variant." "Result: PS1 is met." ) self.prediction_ps1pm5.PS1 = True if primary_aa_change != alt_aa_change: self.comment_ps1pm5 += ( "Alternative variant is pathogenic " "and amino acid change is different from primary variant." "Result: PM5 is met." ) self.prediction_ps1pm5.PM5 = True else: self.comment_ps1pm5 += "Missing dbnsfp data." self.comment_ps1pm5 += f"Failed to get variant information for {alt_seqvar}. " except AutoAcmgBaseException as e: self.comment_ps1pm5 = f"Error occurred during PS1/PM5 prediction. Error: {e}" self.prediction_ps1pm5 = None return self.prediction_ps1pm5, self.comment_ps1pm5
[docs] def predict_ps1pm5( self, seqvar: SeqVar, var_data: AutoACMGSeqVarData ) -> Tuple[AutoACMGCriteria, AutoACMGCriteria]: """Predict PS1 and PM5 criteria.""" logger.info("Predict PS1 and PM5") pred, comment = self.verify_ps1pm5(seqvar, var_data) if pred: ps1_pred = ( AutoACMGPrediction.Applicable if pred.PS1 else ( AutoACMGPrediction.NotApplicable if pred.PS1 is False else AutoACMGPrediction.Failed ) ) pm5_pred = ( AutoACMGPrediction.Applicable if pred.PM5 else ( AutoACMGPrediction.NotApplicable if pred.PM5 is False else AutoACMGPrediction.Failed ) ) ps1_strength = pred.PS1_strength pm5_strength = pred.PM5_strength else: ps1_pred = AutoACMGPrediction.Failed pm5_pred = AutoACMGPrediction.Failed ps1_strength = AutoACMGStrength.PathogenicStrong pm5_strength = AutoACMGStrength.PathogenicModerate return ( AutoACMGCriteria( name="PS1", prediction=ps1_pred, strength=ps1_strength, summary=comment, ), AutoACMGCriteria( name="PM5", prediction=pm5_pred, strength=pm5_strength, summary=comment, ), )