What I have is a long list of codes that involves reading different files and in the end putting everything into different .csv
This is all my codes
import csv
import os.path
#open files + readlines
with open("C:/Users/Ivan Wong/Desktop/Placement/Lists of targets/Mouse/UCSC to Ensembl.csv", "r") as f:
reader = csv.reader(f, delimiter = ',')
#find files with the name in 1st row
for row in reader:
graph_filename = os.path.join("C:/Python27/Scripts/My scripts/Selenoprotein/NMD targets",row[0]+"_nt_counts.txt.png")
if os.path.exists(graph_filename):
y = row[0]+'_nt_counts.txt'
r = open('C:/Users/Ivan Wong/Desktop/Placement/fp_mesc_nochx/'+y, 'r')
k = r.readlines()
r.close
del k[:1]
k = map(lambda s: s.strip(), k)
interger = map(int, k)
import itertools
#adding the numbers for every 3 rows
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return itertools.izip_longest(*args, fillvalue=fillvalue)
result = map(sum, grouper(3, interger, 0))
e = row[1]
cDNA = open('C:/Users/Ivan Wong/Desktop/Placement/Downloaded seq/Mouse/cDNA.txt', 'r')
seq = cDNA.readlines()
# get all lines that have a gene name
lineNum = 0;
lineGenes = []
for line in seq:
lineNum = lineNum +1
if '>' in line:
lineGenes.append(str(lineNum))
if '>'+e in line:
lineBegin = lineNum
cDNA.close
# which gene is this
index1 = lineGenes.index(str(lineBegin))
lineEnd = lineGenes[index1+1]
# linebegin and lineEnd now give you, where to look for your sequence, all that
# you have to do is to read the lines between lineBegin and lineEnd in the file
# and make it into a single string.
lineEnd = lineGenes[index1+1]
Lastline = int(lineEnd) -1
# in your code you have already made a list with all the lines (q), first delete
# \n and other symbols, then combine all lines into a big string of nucleotides (like this)
qq = seq[lineBegin:Lastline]
qq = map(lambda s: s.strip(), qq)
string = ''
for i in range(len(qq)):
string = string + qq[i]
# now you want to get a list of triplets, again you can use the for loop:
# first get the length of the string
lenString = len(string);
# this is your list codons
listCodon = []
for i in range(0,lenString/3):
listCodon.append(string[0+i*3:3+i*3])
with open(e+'.csv','wb') as outfile:
outfile.writelines(str(result)+'\n'+str(listCodon))
My problem here is the file produced looks like this:
0 0 0
'GCA' 'CTT' 'GGT'
I want to make it like this:
0 GCA
0 CTT
0 GGT
What can I do in my code to achieve this?
print result:
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 3, 3, 0, 3, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 0, 5, 0, 0, 0, 0, 6, 0, 1, 0, 0, 2, 0, 1, 0, 0, 1, 1, 0, 1, 6, 34, 35, 32, 1, 1, 0, 4, 1, 0, 1, 0, 0, 0, 0, 1, 6, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
print listCodon:
['gtt', 'gaa', 'aca', 'gag', 'aca', 'tgt', 'tct', 'gga', 'gat', 'gag', 'ctg', 'tgg', 'gca', 'gaa', 'gga', 'cag', 'gcc', 'taa', 'gca', 'cag', 'gca', 'gca', 'gag', 'ctt', 'tga', 'tct', 'ctt', 'ggt', 'gat', 'cgg', 'tgg', 'ggg', 'atc', 'cgg', 'tgg', 'cct', 'agc', 'ttg', 'tgc', 'caa', 'gga', 'agc', 'tgc', 'tca', 'gct', 'ggg', 'aaa', 'gaa', 'ggt', 'ggc', 'tgt', 'ggc', 'tga', 'cta', 'tgt', 'gga', 'acc', 'ttc', 'tcc', 'ccg', 'agg', 'cac', 'caa', 'gtg', 'ggg', 'cct', 'tgg', 'tgg', 'cac', 'ctg', 'tgt', 'caa', 'cgt', 'ggg', 'ttg', 'cat', 'acc', 'caa', 'gaa', 'gct', 'gat', 'gca', 'tca', 'ggc', 'tgc', 'act', 'gct', 'ggg', 'ggg', 'cat', 'gat', 'cag', 'aga', 'tgc', 'tca', 'cca', 'cta', 'tgg', 'ctg', 'gga', 'ggt', 'ggc', 'cca', 'gcc', 'tgt', 'cca', 'aca', 'caa', 'ctg', 'gtg', 'aga', 'gag', 'aag', 'ccc', 'ttg', 'ccc', 'tct', 'gca', 'ggt', 'ccc', 'att', 'gaa', 'agg', 'aga', 'ggt', 'ttg', 'ctc', 'tct', 'gcc', 'act', 'cat', 'ctg', 'taa', 'ccg', 'tga', 'gct', 'ttt', 'cca', 'ccc', 'ggc', 'ctc', 'ctc', 'ttt', 'gat', 'ccc', 'aga', 'ata', 'atg', 'act', 'ctg', 'aga', 'ctt', 'ctt', 'atg', 'tat', 'gaa', 'taa', 'atg', 'cct', 'ggg', 'cca', 'aaa', 'acc']


picture on the left is what Marek’s code helped me to achieve, I want to make an improvement so it arrange like the picture on the right
You can use
zip()to zip together two iterators. So if you havethen you can do
or, for your example: