请查看此代码:
#include <iostream>
#include <thread>
#include <numeric>
#include <algorithm>
#include <vector>
#include <chrono>
template<typename Iterator, typename T>
struct accumulate_block
{
void operator()(Iterator begin, Iterator end, T& result)
{
result = std::accumulate(begin, end, result);
}
};
template<typename Iterator, typename T>
int accumulate_all(Iterator begin, Iterator end, T& init)
{
auto numOfThreads = std::thread::hardware_concurrency();
std::vector<std::thread> threads(numOfThreads);
auto step = std::distance(begin, end) / numOfThreads;
std::vector<int> results(numOfThreads,0);
for(int i=0; i<numOfThreads-1; ++i)
{
auto block_end = begin;
std::advance(block_end, step);
threads[i] = std::thread(accumulate_block<Iterator, T>(), begin, block_end, std::ref(results[i]));
begin = block_end;
}
threads[numOfThreads-1] = std::thread(accumulate_block<Iterator, T>(), begin, end, std::ref(results[numOfThreads-1]));
for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
return accumulate(results.begin(), results.end(), 0);
}
int main()
{
int x=0;
std::vector<int> V(20000000,1);
auto t1 = std::chrono::high_resolution_clock::now();
//std::accumulate(std::begin(V), std::end(V), x); singe threaded option
std::cout<<accumulate_all(std::begin(V), std::end(V), x);
auto t2 = std::chrono::high_resolution_clock::now();
std::cout << "process took: "
<< std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count()
<< " nanoseconds\n";
return 0;
}
当我在并发版本上运行时(基本上是在8个线程上运行,因为
std::thread::hardware_concurrency();
返回8)
输出为:处理时间:
8895404 nanoseconds
。
但单线程选项的输出是:
process took: 124 nanoseconds
有人能解释这种奇怪的行为吗??