I am writing a program in C++ which involves parsing a lot of text files. I am facing a lot of errors when I run my program with valgrind.
I am using Ubuntu 11, the compiler is g++;
The goal of the program is to receive as arguments an optional reference file, a main file to work on, containing more than 10million lines, and a root name of the 14 files to generate.
I will start with the first main problem: Following the strange behaviour of the program (sometimes it works, sometimes it does not, depending on the files and on the string passed as arguments, etc.) I decided to check it with valgrind. It produces a huge amount of errors on memory manipulation. Starting on the variable argv.
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <sstream>
#include "coverSamMulti.h"
#include "string.h"
using namespace std;
int main(int argc, char **argv) {
bool mutYN;
char * filerefgen; // The reference genome files
char * filemap; // the mapping file
char * fileroot; // the root name of all the generated files
// two or three arguments?
if (argc == 3)
{
mutYN= false;
filemap = new char[strlen(argv[1])+1];
fileroot = new char[strlen(argv[2])+1];
strcpy(filemap,argv[1]);
strcpy(fileroot,argv[2]);
}
else if (argc == 4)
{
mutYN= true;
filerefgen = new char[strlen(argv[1])+1];
filemap = new char[strlen(argv[2])+1];
fileroot = new char[strlen(argv[3])+1];
strcpy(filerefgen,argv[1]);
strcpy(filemap,argv[2]);
strcpy(fileroot,argv[3]);
}
else {
cout << " \n Less or or too much parameter passed \n";
return 0;
}
//call of the main function
GenF_Inf genfinf = ScanReadsMap(filemap);
// We will store for each positions and in 14 files respectively the coverage, the coverage of A, G, C and T,
// the number mismatches,the number of first position alignment, the number of first position alignment on the reverse strand,
// and the consensus genome of the sample, the segments size, the insertfs, the firsts, the ins and the del,
char * filecov = (char *)malloc(sizeof(char)*(strlen(fileroot)+10));
char * filecov_A = (char *)malloc(sizeof(char)*(strlen(fileroot)+12));
char * filecov_G = (char *)malloc(sizeof(char)*(strlen(fileroot)+12));
char * filecov_C = (char *)malloc(sizeof(char)*(strlen(fileroot)+12));
char * filecov_T = (char *)malloc(sizeof(char)*(strlen(fileroot)+12));
char * filemis= (char *)malloc(sizeof(char)*(strlen(fileroot)+10));
char * filefirst= (char *)malloc(sizeof(char)*(strlen(fileroot)+10));
char * filefirstr = (char *)malloc(sizeof(char)*(strlen(fileroot)+9));
char * filecons = (char *)malloc(sizeof(char)*(strlen(fileroot)+9));
char * fileseg = (char *)malloc(sizeof(char)*(strlen(fileroot)+9));
char * fileinsert = (char *)malloc(sizeof(char)*(strlen(fileroot)+12)); // the different insertsize
char * filefirsts = (char *)malloc(sizeof(char)*(strlen(fileroot)+12)); // the first position on single mapped reads
char * fileins = (char *)malloc(sizeof(char)*(strlen(fileroot)+9)); // the different insertsize
char * filedel = (char *)malloc(sizeof(char)*(strlen(fileroot)+9)); // the first position on single mapped reads
//building files' name;
strcpy(filecov,fileroot);
strcpy(filecov_A,fileroot);
strcpy(filecov_C,fileroot);
strcpy(filecov_G,fileroot);
strcpy(filecov_T,fileroot);
strcpy(filemis,fileroot);
strcpy(filefirst,fileroot);
strcpy(filefirstr,fileroot);
strcpy(filecons,fileroot);
strcpy(fileseg,fileroot);
strcpy(fileinsert,fileroot);
strcpy(filefirsts,fileroot);
strcpy(fileins,fileroot);
strcpy(filedel,fileroot);
strcat(filecov, "_cov.txt");
strcat(filecov_A, "_cov_A.txt");
strcat(filecov_C, "_cov_C.txt");
strcat(filecov_G, "_cov_G.txt");
strcat(filecov_T, "_cov_T.txt");
strcat(filemis, "_mis.txt");
strcat(filefirst, "_first.txt");
strcat(filefirstr, "_firstr.txt");
strcat(filecons, "_cons.txt");
strcat(fileseg, "_seg.txt");
strcat(fileinsert, "_insert.txt");
strcat(filefirsts, "_firsts.txt");
strcat(fileins, "_ins.txt");
strcat(filedel, "_del.txt");
ofstream covf(filecov);
ofstream covf_A(filecov_A);
ofstream covf_C(filecov_C);
ofstream covf_G(filecov_G);
ofstream covf_T(filecov_T);
ofstream misf(filemis);
ofstream firstf(filefirst);
ofstream firstrevf(filefirstr);
ofstream consf(filecons);
ofstream segf(fileseg);
ofstream insertf(fileinsert);
ofstream firstsf(filefirsts);
ofstream insf(fileins);
ofstream delf(filedel);
// generating the files
cout << "\n ====================================== \n ";
cout << "\n Generating the files \n ";
cout << "\n ====================================== \n ";
for(int j=0;j < NSEG; j++)
{ segf << genfinf.start[j];
for ( int i =0; i <genfinf.lenght[j]; i++)
{ if ((j!=NSEG-1) && (i!= genfinf.lenght[NSEG-1]-1)){
firstf << genfinf.mapfinf[j].firstposcov[i] << ",";
misf << genfinf.mapfinf[j].nbmismatch[i]<< ",";
covf << genfinf.mapfinf[j].poscov[i] << ",";
covf_A << genfinf.mapfinf[j].poscov_A[i]<< ",";
covf_C << genfinf.mapfinf[j].poscov_C[i]<< ",";
covf_G << genfinf.mapfinf[j].poscov_G[i]<< ",";
covf_T << genfinf.mapfinf[j].poscov_T[i]<< ",";
firstrevf<< genfinf.mapfinf[j].first_rev[i]<< ",";
insf << genfinf.mapfinf[j].ins[i]<< ",";
delf << genfinf.mapfinf[j].del[i]<< ",";
firstsf << genfinf.mapfinf[j].firstposcovsingle[i]<< ",";
}}
}
for(int j=0;j < 999; j++) insertf << genfinf.insert[j]<< ",";
int j = NSEG-1;int i = genfinf.lenght[NSEG-1]-1;
firstf << genfinf.mapfinf[j].firstposcov[i] ;
misf << genfinf.mapfinf[j].nbmismatch[i];
covf << genfinf.mapfinf[j].poscov[i] ;
covf_A << genfinf.mapfinf[j].poscov_A[i];
covf_C << genfinf.mapfinf[j].poscov_C[i];
covf_G << genfinf.mapfinf[j].poscov_G[i];
covf_T << genfinf.mapfinf[j].poscov_T[i];
firstrevf<< genfinf.mapfinf[j].first_rev[i];
insf << genfinf.mapfinf[j].ins[i];
delf << genfinf.mapfinf[j].del[i];
firstsf << genfinf.mapfinf[j].firstposcovsingle[i];
j =999;
insertf << genfinf.insert[j];
//building the consensus genome
cout << "\n ====================================== \n ";
cout << "\n building the consensus \n ";
cout << "\n ====================================== \n ";
int A,G,C,T;
for(int j=0;j < NSEG; j++)
{ consf<<">segment_"<<j+1<<"\n";
int k = 0; // for newline after 60 bp;
for ( i =0; i <genfinf.lenght[j]; i++)
{
A=genfinf.mapfinf[j].poscov_A[i]; C=genfinf.mapfinf[j].poscov_C[i];
G=genfinf.mapfinf[j].poscov_G[i]; T=genfinf.mapfinf[j].poscov_T[i];
if ((A>=G)&&(A>=C)&&(A>=T)) {if (A!=0)consf<<"A"; else consf<<"N";}
else if ((C>=A)&&(C>=G)&&(C>=T)) consf<<"C";
else if ((G>=A)&&(G>=C)&&(G>=T)) consf<<"G";
else if (((T>=A)&&(T>=G)&&(T>=C)))consf<<"T";
k++;
if (k ==59 ){consf<<"\n";k=0;}
}
consf<<"\n";
}
// closing all files
covf.close();
covf_A.close();
covf_C.close();
covf_G.close();
covf_T.close();
misf.close();
firstf.close();
firstrevf.close();
consf.close();
segf.close();
insertf.close();
firstsf.close();
insf.close();
delf.close();
// finding mutations
if (mutYN)
{
cout << "\n ====================================== \n ";
cout << "\n computing mutations \n ";
cout << "\n ====================================== \n ";
// Preparing the mutations file.
char * filemut = (char *)malloc(sizeof(char)*(strlen(fileroot)+9));
strcpy(filemut,fileroot);
strcat(filemut, "_mut.txt");
ofstream mutf(filemut);
char chrg, chcg;// Char from the reference genome, Char from the consensus genome
string strheader; // the first line of the reference genome file
ifstream refgen(filerefgen);
ifstream consf(filecons);
if ( !refgen )
{
puts("Cannot open open the file") ;
refgen.close() ;
exit(0);
}
else
{
getline(refgen, strheader);
// read in the file without catching any non alpha caracter like space, tab, etc...
do {chrg = refgen.get();} while (!isalpha(chrg) && (chrg !=EOF ));
do {chcg=consf.get(); } while (!isalpha(chcg)&& (chcg !=EOF ));
int i =0;
while (( chrg !=EOF )&&(chcg !=EOF ))
{ if (chrg != chcg)
{
mutf<< "Position "<< i << " : \n";
mutf << "\t"<<chrg<<" by "<<chcg<<"\n";
}
do {chrg = refgen.get();} while (!isalpha(chrg) && (chrg !=EOF ));
do {chcg=consf.get(); } while (!isalpha(chcg)&& (chcg !=EOF ));
i++;
}
}
cout << "\n Mutations computed \n ";
mutf.close();
}
cout << "\n ====================================== \n ";
cout << "\n all files generated successfuly \n ";
cout << "\n ====================================== \n ";
return 0;
}
And just a small portion of Valrind output, the first lines:
==28950== Memcheck, a memory error detector
==28950== Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al.
==28950== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==28950== Command: ./echantillon Ftooshort.sam covsam/echan1
==28950== Parent PID: 1928
==28950==
==28950== Warning: client switching stacks? SP change: 0xbeee91f8 --> 0xbeb8b910
==28950== to suppress, use: --max-stackframe=3528936 or greater
==28950== Invalid write of size 4
==28950== at 0x8049000: main (echantillon.cpp:11)
==28950== Location 0xbeb8b92c is 0 bytes inside local var "argv"
==28950== declared at echantillon.cpp:11, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x804900D: main (echantillon.cpp:11)
==28950== Address 0xbeee91ec is on thread 1's stack
==28950==
==28950== Invalid write of size 1
==28950== at 0x804901B: main (echantillon.cpp:20)
==28950== Location 0xbeee7f8d is 0 bytes inside local var "mutYN"
==28950== declared at echantillon.cpp:12, in frame #0 of thread 1
==28950==
==28950== Invalid read of size 4
==28950== at 0x8049022: main (echantillon.cpp:21)
==28950== Location 0xbeb8b92c is 0 bytes inside local var "argv"
==28950== declared at echantillon.cpp:11, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x804902D: main (echantillon.cpp:21)
==28950== Address 0xbeb8b910 is on thread 1's stack
==28950==
==28950== Invalid read of size 4
==28950== at 0x402A225: strlen (mc_replace_strmem.c:390)
==28950== by 0x8049034: main (echantillon.cpp:21)
==28950== Location 0xbeb8b910 is 0 bytes inside local var "str"
==28950== declared at mc_replace_strmem.c:390, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x8049038: main (echantillon.cpp:21)
==28950== Address 0xbeb8b910 is on thread 1's stack
==28950==
==28950== Invalid read of size 4
==28950== at 0x4029149: operator new[](unsigned int) (vg_replace_malloc.c:343)
==28950== by 0x804903F: main (echantillon.cpp:21)
==28950== Location 0xbeb8b910 is 0 bytes inside local var "n"
==28950== declared at vg_replace_malloc.c:343, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x8049040: main (echantillon.cpp:21)
==28950== Location 0xbeee7f18 is 0 bytes inside local var "filemap"
==28950== declared at echantillon.cpp:14, in frame #0 of thread 1
==28950==
==28950== Invalid read of size 4
==28950== at 0x8049046: main (echantillon.cpp:22)
==28950== Location 0xbeb8b92c is 0 bytes inside local var "argv"
==28950== declared at echantillon.cpp:11, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x8049051: main (echantillon.cpp:22)
==28950== Address 0xbeb8b910 is on thread 1's stack
==28950==
==28950== Invalid read of size 4
==28950== at 0x402A225: strlen (mc_replace_strmem.c:390)
==28950== by 0x8049058: main (echantillon.cpp:22)
==28950== Location 0xbeb8b910 is 0 bytes inside local var "str"
==28950== declared at mc_replace_strmem.c:390, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x804905C: main (echantillon.cpp:22)
==28950== Address 0xbeb8b910 is on thread 1's stack
==28950==
==28950== Invalid read of size 4
==28950== at 0x4029149: operator new[](unsigned int) (vg_replace_malloc.c:343)
==28950== by 0x8049063: main (echantillon.cpp:22)
==28950== Location 0xbeb8b910 is 0 bytes inside local var "n"
==28950== declared at vg_replace_malloc.c:343, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x8049064: main (echantillon.cpp:22)
==28950== Location 0xbeee7f1c is 0 bytes inside local var "fileroot"
==28950== declared at echantillon.cpp:15, in frame #0 of thread 1
==28950==
==28950== Invalid read of size 4
==28950== at 0x804906A: main (echantillon.cpp:23)
==28950== Location 0xbeb8b92c is 0 bytes inside local var "argv"
==28950== declared at echantillon.cpp:11, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x8049075: main (echantillon.cpp:23)
==28950== Address 0xbeb8b914 is on thread 1's stack
==28950==
==28950== Invalid read of size 4
==28950== at 0x8049079: main (echantillon.cpp:23)
==28950== Location 0xbeee7f18 is 0 bytes inside local var "filemap"
==28950== declared at echantillon.cpp:14, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x804907F: main (echantillon.cpp:23)
==28950== Address 0xbeb8b910 is on thread 1's stack
==28950==
==28950== Invalid read of size 4
==28950== at 0x402A269: strcpy (mc_replace_strmem.c:429)
==28950== by 0x8049086: main (echantillon.cpp:23)
==28950== Location 0xbeb8b914 is 0 bytes inside local var "src"
==28950== declared at mc_replace_strmem.c:429, in frame #0 of thread 1
==28950==
==28950== Invalid read of size 4
==28950== at 0x402A26C: strcpy (mc_replace_strmem.c:429)
==28950== by 0x8049086: main (echantillon.cpp:23)
==28950== Location 0xbeb8b910 is 0 bytes inside local var "dst"
==28950== declared at mc_replace_strmem.c:429, in frame #0 of thread 1
==28950==
==28950== Invalid read of size 4
==28950== at 0x8049087: main (echantillon.cpp:24)
==28950== Location 0xbeb8b92c is 0 bytes inside local var "argv"
==28950== declared at echantillon.cpp:11, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x8049092: main (echantillon.cpp:24)
==28950== Address 0xbeb8b914 is on thread 1's stack
==28950==
==28950== Invalid read of size 4
==28950== at 0x8049096: main (echantillon.cpp:24)
==28950== Location 0xbeee7f1c is 0 bytes inside local var "fileroot"
==28950== declared at echantillon.cpp:15, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x804909C: main (echantillon.cpp:24)
==28950== Address 0xbeb8b910 is on thread 1's stack
==28950==
==28950== Invalid read of size 4
==28950== at 0x402A269: strcpy (mc_replace_strmem.c:429)
==28950== by 0x80490A3: main (echantillon.cpp:24)
==28950== Location 0xbeb8b914 is 0 bytes inside local var "src"
==28950== declared at mc_replace_strmem.c:429, in frame #0 of thread 1
==28950==
==28950== Invalid read of size 4
==28950== at 0x402A26C: strcpy (mc_replace_strmem.c:429)
==28950== by 0x80490A3: main (echantillon.cpp:24)
==28950== Location 0xbeb8b910 is 0 bytes inside local var "dst"
==28950== declared at mc_replace_strmem.c:429, in frame #0 of thread 1
==28950==
// And many others almost 30 per each mentioning char * or file I mean fileroot, filemap, filecons, filedes, and so on.
==28950==
==28950== Invalid read of size 4
==28950== at 0x804D29C: std::operator|(std::_Ios_Openmode, std::_Ios_Openmode) (ios_base.h:122)
==28950== by 0x8049867: main (echantillon.cpp:99)
==28950== Location 0xbeb8b914 is 0 bytes inside local var "__b"
==28950== declared at ios_base.h:121, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x8049868: main (echantillon.cpp:99)
==28950== Address 0xbeb8b918 is on thread 1's stack
==28950==
==28950== Invalid read of size 4
==28950== at 0x804986C: main (echantillon.cpp:99)
==28950== Location 0xbeee7f3c is 0 bytes inside local var "filecov"
==28950== declared at echantillon.cpp:52, in frame #0 of thread 1
==28950==
==28950== Invalid write of size 4
==28950== at 0x8049872: main (echantillon.cpp:99)
==28950== Address 0xbeb8b914 is on thread 1's stack
So, Please could anybody help me to sort out this issue?
From the valgrind docs:
Try fixing this first, and see if the subsequent errors go away. The log tells you how to fix it already.
If that doesn’t fix your problem, please post the minimal compilable code that reproduces it – your OP above is huge and looks mostly irrelevant.