UNIVERSITY OF MAURITIUS
FACULTY OF AGRICULTURE
BSc (Hons) Biotechnology
AGRI 2081Y (3) - COMPUTATIONAL BIOLOGY
Name of Student: Marie Natacha Meunier
Student I.D: 1712892
Date: 25th May 2020
Lecturer Name: Dr Shakuntala Baichoo
chain_a = """SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKM
FCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVV
RRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFR
HSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILT
IITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKG
EPHHELPPGSTKRALPNNT"""
#Question 1 a
num_lines = chain_a.count ("\n")
print (num_lines)
#Question 1 b
length sequence = len (chain_a) - chain_a.count ("\n")
print (length sequence: ", length)
#Question 1 c
new_chain = chain_a.replace("\n", "")
print("New Chain:",new_chain)
#Question 1 d
count = 0
result=0
for i in chain_a:
if i == 'C':
count = count + 1
print ("Number of C:",count)
#Question 1 e
if "NLRVEYLDDRN" in chain_a:
print("yes found");
pos= chain_a.find("NLRVEYLDDRN")
print("Starting position :",pos);
Question 2
dna_seq = """GGGCTTGTGGCGCGAGCTTCTGAAACTAGGCGGCAGAGGCGGAGCCGCT
GTGGCACTGCTGCGCCTCTGCTGCGCCTCGGGTGTCTTTT
GCGGCGGTGGGTCGCCGCCGGGAGAAGCGTGAGGGGACAG
ATTTGTGACCGGCGCGGTTTTTGTCAGCTTACTCCGGCCA AAAAAGAACTGCACCTCTGGAGCGG""
#Question 2 a
# Count the number of C’s in DNA sequence
no_c = dna_seq.count ("C")
# Count the number of G’s in DNA sequence
no_g = dna_seq.count ("G")
#determine the length of the DNA sequence
dna_length = len(dna_seq)
#compute the GC content
gc_cont = (no_g + no_c)
#Question 2 b
rna_seq = dna_seq.replace("T","U")
#Question 2 c
intron = dna_seq[50:156]
exon1 = dna_seq[0:50]
exon2 = dna_seq[156:]
spliced = exon1+exon2
Question 3
#Question 3 a
clusters = """\
>Cluster 0
0 >YLR106C at 100.00%
>Cluster 50
0 >YPL082C at 100.00%
>Cluster 54
0 >YHL009W-A at 90.80%
1 >YHL009W-B at 100.00%
2 >YJL113W at 98.77%
3 >YJL114W at 97.35%
>Cluster 52
0 >YBR208C at 100.00%
"""
#Question a
result = re.findall(r">Cluster?([ \d.]+)", clusters, re.IGNORECASE |
re.MULTILINE)
#print("ID :",str(result))
#Question b
r = clusters.replace('>Cluster', 'Test')
#print("New :",r)
result = re.findall(r"> ?([A-Za-z0-9-]+)", r, re.IGNORECASE |
re.MULTILINE)
#print("sd :",str(result))
per=re.findall(r"> ?([A-Za-z0-9-]+)", r, re.IGNORECASE | re.MULTILINE)
+ re.findall(r"at ?([\d.]+)", clusters, re.IGNORECASE | re.MULTILINE)
#print("sd :",str(per))
lines = r.split('\n')
#print(lines)
for line in lines:
print(re.findall(r"> ?([A-Za-z0-9-]+)", line, re.IGNORECASE |
re.MULTILINE) + re.findall(r"at ?([\d.]+)", line, re.IGNORECASE |
re.MULTILINE))
#Question 4
("A", "T"): 10.0 / 5.0,
("A", "C"): 10.0 / 7.0,
("A", "G"): 10.0 / 6.0,
("T", "C"): 5.0 / 7.0,
("T", "G"): 5.0 / 6.0,
("C", "G"): 7.0 / 6.0 .
#Question 4 a
#There is no difference between the len(ratios), len(ratios.keys()),
len(ratios.values()) and len(ratios.items()) since all the commands
measure the key values
print len(ratios.keys())
print len(ratios.values())
print len(ratios.items())
#Question 4 b
ratio= ("A", "T"): 10.0 / 5.0, ("C", "G"): 7.0 / 6.0 .
If ("A", "T") in ratios:
print ("yes 'A, T' is found in ratios")
or:
print ("No 'T, A' is not found in ratios")
If ("C", "G") in ratios:
print ("yes 'C, G' is found in ratios")
or:
print ("No 'C, G' is not found in ratios")
#Question 4 c
contains_2 = 2 in ratios.values()
print contains_2
contains_3 = 3 in ratios.values()
print contains_3
#Question 4 d
2 in ("A", "T"):
print (("A", "T"), 2) in ratios.items()
1000 in ("C", "G"):
print (("C", "G"), 1000) in ratios.items()
#Question 4 e
keys = [key_value[0]
for key_value in ratios.items()]
values = [key_value[-1]
for key_value in ratios.items()]
#Question 5
#translate the list:
list = ["A", "T", "T", "A", "G", "T", "C"]
translation=
String="ade tym tym ade gua tym cyt"
str = " ade tym tym ade gua tym cyt "
s = ['A, T, T, A, G, T, C ', 'for', ' ade, tym, tym, ade, gua, tym, cyt ']
print(listToString(s))
#Question 6
A python program to read the file data.fasta
text=""">2HMI:A|PDBID|CHAIN|SEQUENCE
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKI
>2HMI:B|PDBID|CHAIN|SEQUENCE
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKI
>2HMI:C|PDBID|CHAIN|SEQUENCE
DIQMTQTTSSLSASLGDRVTISCSASQDISSYLNWYQQKPEGTVKLLIYY
>2HMI:D|PDBID|CHAIN|SEQUENCE
QITLKESGPGIVQPSQPFRLTCTFSGFSLSTSGIGVTWIRQPSGKGLEWL
>2HMI:E|PDBID|CHAIN|SEQUENCE
ATGGCGCCCGAACAGGGAC
>2HMI:F|PDBID|CHAIN|SEQUENCE
GTCCCTGTTCGGGCGCCA"""
fastaFile = open('fasta_file.txt')