TL/DR: How can a large
std::vector<std::string>achieve such fast deallocation when compiled with release settings on Visual Studio 2012 RC?
I have written a class strung that behaves similarly to std::string as an exercise, implementing basic copy- and move semantics.
class strung
{
private:
size_t length_;
char* data_;
public:
// -------- Constructors --------
strung() : length_(0), data_(nullptr) {};
strung(const char* c_str)
{
length_ = strlen(c_str);
data_ = new char[length_];
::std::copy(c_str, c_str + length_, data_);
};
inline explicit strung(size_t length) : length_(length)
{
data_ = new char[length_];
};
strung(size_t length, char value) : length_(length)
{
data_ = new char[length_];
::std::fill(data_, data_ + length_, value);
};
// -------- Copy/move-constructors --------
strung(const strung& old)
{
data_ = new char[old.length_];
::std::copy(old.data_, old.data_ + old.length_, data_);
length_ = old.length_;
};
strung(strung&& old)
{
data_ = old.data_;
length_ = old.length_;
// Even though it is a rvalue, its destructor will still be called,
// so we would like to prevent our data from being freed.
old.data_ = nullptr;
};
// -------- Assignment operators --------
inline strung & operator =(const strung& old)
{
if (this != &old)
{
delete[] data_;
data_ = new char[old.length_];
::std::copy(old.data_, old.data_ + old.length_, data_);
length_ = old.length_;
}
return *this;
};
strung & operator =(strung&& old)
{
if (this != &old)
{
delete[] data_;
data_ = old.data_;
length_ = old.length_;
old.data_ = nullptr;
}
return *this;
};
// -------- Array operators (no bounds checking by design) --------
inline char& operator[](size_t pos)
{
return data_[pos];
};
inline const char& operator[](size_t pos) const
{
return data_[pos];
};
// -------- Insertion operator for `ostream`s --------
inline friend ::std::ostream &operator<<(::std::ostream &out, const strung& source)
{
out.write(source.data_, source.length_);
return out;
};
// -------- Various functions --------
inline const size_t length() const
{
return length_;
}
// -------- Poor man's iterators --------
char* begin()
{
return data_;
};
char* end()
{
return data_ + length_;
};
// -------- Destructor --------
inline ~strung()
{
delete[] data_;
};
};
I tried comparing the performance of std::string and strung using this code:
double time(const std::function<void(void)> &func)
{
using namespace std::chrono;
auto t1 = high_resolution_clock::now();
func();
auto total = duration_cast<nanoseconds>(high_resolution_clock::now()-t1);
return static_cast<double>(total.count()) / 1000000.;
}
template<typename T>
void test(const int num)
{
double allocation_time, full_time;
full_time = time([&] {
std::vector<T> container;
allocation_time = time([&] {
container.reserve(num);
for (int i=0; i < num; i++)
{
container.emplace_back(rand() % 10 + 1,'\0');
for (char &chr : container.back())
chr = ('A' + rand() % ('Z' - 'A' + 1) );
}
});
});
std::cout << "Full time: " << full_time << " miliseconds" << std::endl
<< "Allocation time: " << allocation_time << " miliseconds" << std::endl
<< "Deallocation time: " << full_time - allocation_time << " miliseconds" << std::endl;
}
int main()
{
std::cout << "-------- std::string --------" << std::endl;
test<std::string>(500000);
std::cout << "-------- strung --------" << std::endl;
test<strung>(500000);
return EXIT_SUCCESS;
}
And these were the results:
Debug (x86-64)
-------- std::string --------
Full time: 51050.9 miliseconds
Allocation time: 1853.11 miliseconds
Deallocation time: 49197.8 miliseconds
-------- strung --------
Full time: 52404 miliseconds
Allocation time: 4886.28 miliseconds
Deallocation time: 47517.7 miliseconds
Release (x86-64):
-------- std::string --------
Full time: 113.007 miliseconds
Allocation time: 107.006 miliseconds
Deallocation time: 6.0004 miliseconds
-------- strung --------
Full time: 47771.7 miliseconds
Allocation time: 356.02 miliseconds
Deallocation time: 47415.7 miliseconds
Allocation speeds are understandable, since I didn’t really do much optimization on the class, but deallocation speeds are more intriguing.
Testing on Debug settings indicates that deallocation is similarly complex for both std::string and strung (though still very slow), but testing on Release settings makes deallocation for std::string very very fast, while strung stays exactly the same. What does std::string do to achieve such fast deallocation, considering that strung‘s destructor is almost trivial.
At first I thought that std::string is optimized into a nop, so deallocation is not performed at all, but when I removed strung‘s destructor, the latter was still much faster, so this is probably not a case.
I would like my deallocation to be fast, so what am I to do achieve similar deallocation speeds?
Microsoft’s
std::stringimplementation uses something called “small string optimization”. What this means is thatstd::stringactually contains a 15-character string (achar[16]). If it is given a string shorter than 16 characters, then it stores it in that internal memory. So there is no dynamic memory allocation done in these cases.Your
strungalways dynamically allocates the string. Which means that its destructor will always deallocate it.std::string, if small enough, will do neither.