UPDATE 2021
I've modified the benchmark code as follows:
- std::chrono used for timing measurements instead of boost
- C++11
<random>
used instead of rand()
- Avoid repeated operations that can get hoisted out. The base parameter is ever-changing.
I get the following results with GCC 10 -O2 (in seconds):
exp c++ pow c pow x*x*x...
2 0.204243 1.39962 0.0902527
3 1.36162 1.38291 0.107679
4 1.37717 1.38197 0.106103
5 1.3815 1.39139 0.117097
GCC 10 -O3 is almost identical to GCC 10 -O2.
With GCC 10 -O2 -ffast-math:
exp c++ pow c pow x*x*x...
2 0.203625 1.4056 0.0913414
3 0.11094 1.39938 0.108027
4 0.201593 1.38618 0.101585
5 0.102141 1.38212 0.10662
With GCC 10 -O3 -ffast-math:
exp c++ pow c pow x*x*x...
2 0.0451995 1.175 0.0450497
3 0.0470842 1.20226 0.051399
4 0.0475239 1.18033 0.0473844
5 0.0522424 1.16817 0.0522291
With Clang 12 -O2:
exp c++ pow c pow x*x*x...
2 0.106242 0.105435 0.105533
3 1.45909 1.4425 0.102235
4 1.45629 1.44262 0.108861
5 1.45837 1.44483 0.1116
Clang 12 -O3 is almost identical to Clang 12 -O2.
With Clang 12 -O2 -ffast-math:
exp c++ pow c pow x*x*x...
2 0.0233731 0.0232457 0.0231076
3 0.0271074 0.0266663 0.0278415
4 0.026897 0.0270698 0.0268115
5 0.0312481 0.0296402 0.029811
Clang 12 -O3 -ffast-math is almost identical to Clang 12 -O2 -ffast-math.
Machine is Intel Core i7-7700K on Linux 5.4.0-73-generic x86_64.
Conclusions:
- With GCC 10 (no -ffast-math),
x*x*x...
is always faster
- With GCC 10 -O2 -ffast-math,
std::pow
is as fast as x*x*x...
for odd exponents
- With GCC 10 -O3 -ffast-math,
std::pow
is as fast as x*x*x...
for all test cases, and is around twice as fast as -O2.
- With GCC 10, C's
pow(double, double)
is always much slower
- With Clang 12 (no -ffast-math),
x*x*x...
is faster for exponents greater than 2
- With Clang 12 -ffast-math, all methods produce similar results
- With Clang 12,
pow(double, double)
is as fast as std::pow
for integral exponents
- Writing benchmarks without having the compiler outsmart you is hard.
I'll eventually get around to installing a more recent version of GCC on my machine and will update my results when I do so.
Here's the updated benchmark code:
#include <cmath>
#include <chrono>
#include <iostream>
#include <random>
using Moment = std::chrono::high_resolution_clock::time_point;
using FloatSecs = std::chrono::duration<double>;
inline Moment now()
{
return std::chrono::high_resolution_clock::now();
}
#define TEST(num, expression)
double test##num(double b, long loops)
{
double x = 0.0;
auto startTime = now();
for (long i=0; i<loops; ++i)
{
x += expression;
b += 1.0;
}
auto elapsed = now() - startTime;
auto seconds = std::chrono::duration_cast<FloatSecs>(elapsed);
std::cout << seconds.count() << "";
return x;
}
TEST(2, b*b)
TEST(3, b*b*b)
TEST(4, b*b*b*b)
TEST(5, b*b*b*b*b)
template <int exponent>
double testCppPow(double base, long loops)
{
double x = 0.0;
auto startTime = now();
for (long i=0; i<loops; ++i)
{
x += std::pow(base, exponent);
base += 1.0;
}
auto elapsed = now() - startTime;
auto seconds = std::chrono::duration_cast<FloatSecs>(elapsed);
std::cout << seconds.count() << "";
return x;
}
double testCPow(double base, double exponent, long loops)
{
double x = 0.0;
auto startTime = now();
for (long i=0; i<loops; ++i)
{
x += ::pow(base, exponent);
base += 1.0;
}
auto elapsed = now() - startTime;
auto seconds = std::chrono::duration_cast<FloatSecs>(elapsed);
std::cout << seconds.count() << "";
return x;
}
int main()
{
using std::cout;
long loops = 100000000l;
double x = 0;
std::random_device rd;
std::default_random_engine re(rd());
std::uniform_real_distribution<double> dist(1.1, 1.2);
cout << "expc++ powc powx*x*x...";
cout << "
2";
double b = dist(re);
x += testCppPow<2>(b, loops);
x += testCPow(b, 2.0, loops);
x += test2(b, loops);
cout << "
3";
b = dist(re);
x += testCppPow<3>(b, loops);
x += testCPow(b, 3.0, loops);
x += test3(b, loops);
cout << "
4";
b = dist(re);
x += testCppPow<4>(b, loops);
x += testCPow(b, 4.0, loops);
x += test4(b, loops);
cout << "
5";
b = dist(re);
x += testCppPow<5>(b, loops);
x += testCPow(b, 5.0, loops);
x += test5(b, loops);
std::cout << "
" << x << "
";
}
Old Answer, 2010
I tested the performance difference between x*x*...
vs pow(x,i)
for small i
using this code:
#include <cstdlib>
#include <cmath>
#include <boost/date_time/posix_time/posix_time.hpp>
inline boost::posix_time::ptime now()
{
return boost::posix_time::microsec_clock::local_time();
}
#define TEST(num, expression)
double test##num(double b, long loops)
{
double x = 0.0;
boost::posix_time::ptime startTime = now();
for (long i=0; i<loops; ++i)
{
x += expression;
x += expression;
x += expression;
x += expression;
x += expression;
x += expression;
x += expression;
x += expression;
x += expression;
x += expression;
}
boost::posix_time::time_duration elapsed = now() - startTime;
std::cout << elapsed << " ";
return x;
}
TEST(1, b)
TEST(2, b*b)
TEST(3, b*b*b)
TEST(4, b*b*b*b)
TEST(5, b*b*b*b*b)
template <int exponent>
double testpow(double base, long loops)
{
double x = 0.0;
boost::posix_time::ptime startTime = now();
for (long i=0; i<loops; ++i)
{
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
}
boost::posix_time::time_duration elapsed = now() - startTime;
std::cout << elapsed << " ";
return x;
}
int main()
{
using std::cout;
long loops = 100000000l;
double x = 0.0;
cout << "1 ";
x += testpow<1>(rand(), loops);
x += test1(rand(), loops);
cout << "
2 ";
x += testpow<2>(rand(), loops);
x += test2(rand(), loops);
cout << "
3 ";
x += testpow<3>(rand(), loops);
x += test3(rand(), loops);
cout << "
4 ";
x += testpow<4>(rand(), loops);
x += test4(rand(), loops);
cout << "
5 ";
x += testpow<5>(rand(), loops);
x += test5(rand(), loops);
cout << "
" << x << "
";
}
Results are:
1 00:00:01.126008 00:00:01.128338
2 00:00:01.125832 00:00:01.127227
3 00:00:01.125563 00:00:01.126590
4 00:00:01.126289 00:00:01.126086
5 00:00:01.126570 00:00:01.125930
2.45829e+54
Note that I accumulate the result of every pow calculation to make sure the compiler doesn't optimize it away.
If I use the std::pow(double, double)
version, and loops = 1000000l
, I get:
1 00:00:00.011339 00:00:00.011262
2 00:00:00.011259 00:00:00.011254
3 00:00:00.975658 00:00:00.011254
4 00:00:00.976427 00:00:00.011254
5 00:00:00.973029 00:00:00.011254
2.45829e+52
This is on an Intel Core Duo running Ubuntu 9.10 64bit. Compiled using gcc 4.4.1 with -o2 optimization.
So in C, yes x*x*x
will be faster than pow(x, 3)
, because there is no pow(double, int)
overload. In C++, it will be the roughly same. (Assuming the methodology in my testing is correct.)
This is in response to the comment made by An Markm:
Even if a using namespace std
directive was issued, if the second parameter to pow
is an int
, then the std::pow(double, int)
overload from <cmath>
will be called instead of ::pow(double, double)
from <math.h>
.
This test code confirms that behavior:
#include <iostream>
namespace foo
{
double bar(double x, int i)
{
std::cout << "foo::bar
";
return x*i;
}
}
double bar(double x, double y)
{
std::cout << "::bar
";
return x*y;
}
using namespace foo;
int main()
{
double a = bar(1.2, 3); // Prints "foo::bar"
std::cout << a << "
";
return 0;
}