Irwiny
/

PepGLAD

Model card Files Files and versions

PepGLAD / evaluation /seq_metric.py

Irwiny123's picture

添加PepGLAD初始代码

52007f8 5 months ago

history blame contribute delete

1.97 kB

	#!/usr/bin/python
	# -- coding:utf-8 --
	from math import sqrt

	from Bio.Align import substitution_matrices, PairwiseAligner


	def aar(candidate, reference):
	hit = 0
	for a, b in zip(candidate, reference):
	if a == b:
	hit += 1
	return hit / len(reference)


	def align_sequences(sequence_A, sequence_B, **kwargs):
	"""
	Performs a global pairwise alignment between two sequences
	using the BLOSUM62 matrix and the Needleman-Wunsch algorithm
	as implemented in Biopython. Returns the alignment, the sequence
	identity and the residue mapping between both original sequences.
	"""

	sub_matrice = substitution_matrices.load('BLOSUM62')
	aligner = PairwiseAligner()
	aligner.substitution_matrix = sub_matrice
	alns = aligner.align(sequence_A, sequence_B)

	best_aln = alns[0]
	aligned_A, aligned_B = best_aln

	base = sqrt(aligner.score(sequence_A, sequence_A) * aligner.score(sequence_B, sequence_B))
	seq_id = aligner.score(sequence_A, sequence_B) / base
	return (aligned_A, aligned_B), seq_id


	def slide_aar(candidate, reference, aar_func):
	'''
	e.g.
	candidate: AILPV
	reference: ILPVH

	should be matched as
	AILPV
	ILPVH

	To do this, we slide the candidate and calculate the maximum aar:
	A
	AI
	AIL
	AILP
	AILPV
	ILPV
	LPV
	PV
	V
	'''
	special_token = ' '
	ref_len = len(reference)
	padded_candidate = special_token * (ref_len - 1) + candidate + special_token * (ref_len - 1)
	value = 0
	for start in range(len(padded_candidate) - ref_len + 1):
	value = max(value, aar_func(padded_candidate[start:start + ref_len], reference))
	return value


	if __name__ == '__main__':
	print(align_sequences('PKGYAAPSA', 'KPAVYKFTL'))
	print(align_sequences('KPAVYKFTL', 'PKGYAAPSA'))
	print(align_sequences('PKGYAAPSA', 'PKGYAAPSA'))
	print(align_sequences('KPAVYKFTL', 'KPAVYKFTL'))