I am trying to write the code so that i eliminate the duplicate in my result file, which is the line with “N/A” and line underneath it. Here is what i have:
with open('false_'+uniprotID+'.txt','w') as fileinput:
for index, (start, end) in enumerate(searchPFAM(fname)):
for item in lookup[uniprotID]:
for names in wholelookup[uniprotID]:
if re.search(r'\d+',names).group(0)==item and start <= int(item) <= end:
result = str(int(item) - start + 1)
try:
fileinput.write(">{0} | at position {1} | start= {2}, end= {3} | description: {4}\n".format(uniprotID, result, start, end, names))
fileinput.write(''.join(makeList[start-1:end]))
textwrap.wrap(''.join(makeList[start-1:end]),width = 60)
fileinput.write('\n')
except ErrorIO as e:
break
else:
fileinput.write(">{0} | N/A | start= {1}, end= {2} | description: {3} \n".format(uniprotID, start, end, names))
fileinput.write(''.join(makeList[start-1:end]))
textwrap.wrap(''.join(makeList[start-1:end]),width = 60)
fileinput.write('\n')
My result file look like this:
Q14591 | at position 4 | start= 174, end= 196 | description: A177T
YQCRHCSKSFSQRSDLVKHQRIH
Q14591 | N/A | start= 174, end= 196 | description: M418T
YQCRHCSKSFSQRSDLVKHQRIH
Q14591 | at position 21 | start= 398, end= 420 | description: M418T
YACSDCTKSFSRRSDLVKHQRIHQ14591 | N/A | start= 398, end= 420 | description: M418T
YACSDCTKSFSRRSDLVKHQRIH
Why don’t you just filter them out afterwards?