The goal of this code is to quickly read some data into memory from fields in a tab delimited file and sort them. I have found that when I run this code, I get a segmentation fault. I assume it’s something to do with my limited knowledge of strtok. I know it would be easier to use some c++ functions for tokenizing strings, however, I would like to have this code run as fast as possible. It seems like most c++ code would have me unnecessarily allocating space for new objects. Ideally, the code will be run on files containing 100’s of millions of lines. So, it needs to be fast.
#include <stdlib.h>
#include <stdio.h>
#include <string>
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;
class Node
{
public:
string name;
int position1;
int position2;
string desc;
float value;
bool operator<(const Node& T) const;
};
bool Node::operator<(const Node &T) const
{
int result;
result = name.compare(T.name);
if (result !=0) return(result);
if (position1 != T.position1) return(position1 < T.position1);
if (position2 != T.position2) return(position2 < T.position2);
return(false);
}
class NodeList
{
public:
vector<Node> nodes;
};
int main(void)
{
string filename = "table.txt";
FILE* infile = fopen(filename.c_str(), "r");
int buflen = 1000;
char buffer[buflen];
NodeList K;
Node T;
while(fgets(buffer,buflen,infile) != NULL)
{
cout<< buffer << endl;
T.name = string(strtok(buffer, "\t\n"));
T.position1 = atoi (strtok(NULL , "\t\n"));
T.position2 = atoi (strtok(NULL , "\t\n"));
T.desc = string(strtok(NULL , "\t\n"));
T.value = atof (strtok(NULL , "\t\n"));
K.nodes.push_back(T);
}
sort(K.nodes.begin(),K.nodes.end());
return(0);
}
EDIT: The segfault occurs in the sort command. Without the sort command the code runs normally. Edited to take comments into account. Here is the output from the debugger:
Program received signal EXC_BAD_ACCESS, Could not access memory.
Reason: KERN_INVALID_ADDRESS at address: 0xffffffffffffffe8 0x00007fff83a078bb in std::string::compare ()
(gdb) bt
#0 0x00007fff83a078bb in std::string::compare ()
#1 0x0000000100001333 in Node::operator< (this=0x7fff5fbfeef0, T=@0x1001fffe0) at test.cpp:27
#2 0x000000010000274e in std::__unguarded_linear_insert<__gnu_cxx::__normal_iterator<Node*, std::vector<Node, std::allocator<Node> > >, Node> (__last={_M_current = 0x100200000}, __val=@0x7fff5fbfeef0) at stl_algo.h:2309
#3 0x0000000100003f28 in std::__unguarded_insertion_sort<__gnu_cxx::__normal_iterator<Node*, std::vector<Node, std::allocator<Node> > > > (__first={_M_current = 0x100200200}, __last={_M_current = 0x1002581e0}) at stl_algo.h:2406
#4 0x000000010000437b in std::__final_insertion_sort<__gnu_cxx::__normal_iterator<Node*, std::vector<Node, std::allocator<Node> > > > (__first={_M_current = 0x100200000}, __last={_M_current = 0x1002581e0}) at stl_algo.h:2439
#5 0x0000000100004422 in std::sort<__gnu_cxx::__normal_iterator<Node*, std::vector<Node, std::allocator<Node> > > > (__first={_M_current = 0x100200000}, __last={_M_current = 0x1002581e0}) at stl_algo.h:2831
#6 0x00000001000019e8 in main () at test.cpp:76
If I go up one level and look at the values, I get this:
(gdb) print T
$1 = (const Node &) @0x1001fffe0: {
name = {
_M_dataplus = {
<std::allocator<char>> = {
<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>},
members of std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider:
_M_p = 0x0
}
},
position1 = 0,
position2 = 0,
desc = {
_M_dataplus = {
<std::allocator<char>> = {
<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>},
members of std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider:
_M_p = 0x0
}
},
value = 0
}
The values for this.name etc look like they come from the file, but whatever it is being compared to has values that are all 0’s or NULL.
Compiling with
g++ -Wall -g, I see that you need to includestring.hto getstrtok, and youroperator<needs to return something if none of the earlierifstatements were true. After that…You aren’t checking the return value of
fopen, so the first segfault I found was when I hadn’t created atable.txtto test with.You aren’t checking the return value of
strtokeither, so if no matching column exists, then you can passNULLtoatoi, and get a segfault there.You need to use
gdb‘sbtcommand when your program crashes to find out what line triggered the crash.