"""This program ask the user to enter a number of DNA sequences and finds the
consensus sequence. The ouput is the consensus.
Add the corresponding code to accomplish the requested tasks
"""
##### ADD YOUR NAME, Student ID, and Section number #######
# NAME: Daniella Vargas Figueroa
# STUDENT ID:802228453
# SECTION:096
###########################################################
# Auxiliar functions
# The function valid_seq() will check if the given sequence is valid or not.
# seq: is a string containing the sequence entered by the user
def valid_seq(seq):
isvalid = False
#Checks which of the inputs is valid.
for s in seq:
if (s == 'A') or (s == 'C') or (s == 'T') or (s == 'G'):
isvalid = True
else:
isvalid = False
break
return isvalid
# the max_nuc() takes four inputs: the nucleotide frequencey in a colum,
# calculate which nucleotide is more frequent
# and returns a list with two elements: the nucleotide with maximum frequency and
its frequency.
# a,b,c,d: are the number of frequencies for each nucleotide
def max_nuc(A,G,C,T):
if A>G and A>C and A>T:
return["A",A]
elif G>A and G>C and G>T:
return ["G",G]
elif C>A and C>G and C>T:
return ["C",C]
elif T>A and T>C and T>G:
return ["T",T]
#########################
# The function load_data, it take as an argument, it input the DNA sequences, save
in the list and return the list
# a: is a number of sequences to be input
def load_data(a):
#Create a counter for the while loop.
counter=a
#Create an empty list named sequences.
sequences=[]
# While loop continues adding entered sequences to list sequences until reached
number of sequences the user input.
while counter > 0:
seq=input("DNA sequence: ")
if valid_seq(seq):
sequences.append(seq)
counter-=1
else:
print("Invalid Input. Try again")
#Created a new list to add all the valid sequences.
validseq=[]
for i in sequences:
if valid_seq(i):
validseq.append(i)
return validseq
# input sequences
# validate sequences
# save list
# return list
#New function to sort the order of the frequencies from greater to least for the
challenge.
def order(l):
#Reverse each element in l, sort l and reverse l again. Then after the list is
sorted reverse l again to get the list from greatest to least.
for element in l:
element.reverse()
l.sort()
l.reverse()
for element in l:
element.reverse()
#return l
return l
# The function count_nucl_freq, it take arguments the load_data,
# contains the frecuencies of the nucleotides for each column
# a: is a list of DNA sequences
def count_nucl_freq(a):
#create an empty list to store each letter's frequency
frequencies=[]
#Another empty list to store the order for the challenge.
bono=[]
#Use for loops to look for the frequency of each letter in each column.
for i in range(0,len(a[0])):
columnfrec=[0,0,0,0]
for j in range(0,len(a)):
let= a[j][i]
if let=="A":
columnfrec[0]=columnfrec[0]+1
elif let=="G":
columnfrec[1]=columnfrec[1]+1
elif let=="C":
columnfrec[2]=columnfrec[2]+1
else:
columnfrec[3]=columnfrec[3]+1
#Append each letter frequency from greater to least for the challenge display.
bono.append(order([["A:",columnfrec[0]], ["G:",columnfrec[1]],
["C:",columnfrec[2]], ["T:",columnfrec[3]] ])) # BONO
#Append each Maximum frequency by column to the list frequencies.
frequencies.append(max_nuc(columnfrec[0], columnfrec[1], columnfrec[2],
columnfrec[3]))
#Return both lists.
return frequencies, bono
# analyze the list by columns
# find nucleotide frecuencies
# find the nucleotide with the maximum number of repetitions for each columm
# append the output from the max_nuc() function to a list Result
# The function find_consensus, it take arguments the count_nucl_freq and return a
consensus sequence
# a: is a you return in count_nucl_freq
def find_consensus(a):
freq_lst=a
consensusString = ""
#For loop to access each element in index 0 in the frequency list done before and
add it to the consensous string.
for element in freq_lst:
#print(element)
x=element[0]
consensusString= consensusString + x
return consensusString
# The function main, your program to start and function calls
def main():
# ask the number DNA sequence
n_seq = int(input('Number of DNA sequences: '))
#call all the function before
list_seq = load_data(n_seq)
list_freq,list_bono = count_nucl_freq(list_seq)
consensus =find_consensus(list_freq)
#display's DNA consensus
print("Consensus:",consensus)
#Display the word challenge
print("Challenge:")
#Create a for loop to display the frequencies of each letter in ech column
counter=1
for col in list_bono:
#Identify and asign a variable to the postion you want to access in the list
named list_bono:
x = col[0][0]
x2= col[0][1]
y = col[1][0]
y2= col[1][1]
z= col[2][0]
z2=col[2][1]
f= col[3][0]
f2= col[3][1]
#Display each column based on the length of the sequence and each letter's
frequency.
print("Col",str(counter)+": ",sep=" ", end="")
counter+=1
print(str(x) +''+ str(x2),end=" ")
print(str(y) +''+ str(y2),end=" ")
print(str(z) +''+ str(z2),end=" ")
print(str(f) +''+ str(f2))
if __name__ == "__main__":
main()