Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
thread-create-benchmark.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <iostream>
18 #include "util/cpu-info.h"
19 #include "util/debug-util.h"
20 #include "util/pretty-printer.h"
21 #include "util/thread.h"
22 #include "util/stopwatch.h"
23 
24 #include <boost/thread/thread.hpp>
25 #include <boost/foreach.hpp>
26 #include <boost/bind.hpp>
27 
28 #include "common/names.h"
29 
30 using namespace impala;
31 
32 // Benchmark for thread creation time using native threads and
33 // Impala's Thread class.
34 
35 // -----------------Benchmark 1: Single-threaded thread creation
36 // (Native):Time to start up 1 * 1 = 1 threads: 1136K clock cycles
37 // (Native):Time to start up 1 * 5 = 5 threads: 918K clock cycles
38 // (Native):Time to start up 1 * 50 = 50 threads: 4ms
39 // (Native):Time to start up 1 * 500 = 500 threads: 37ms
40 // (Native):Time to start up 1 * 5000 = 5000 threads: 237ms
41 // Total time (Native): 280ms
42 
43 // (Impala):Time to start up 1 * 1 = 1 threads: 861K clock cycles
44 // (Impala):Time to start up 1 * 5 = 5 threads: 936K clock cycles
45 // (Impala):Time to start up 1 * 50 = 50 threads: 7ms
46 // (Impala):Time to start up 1 * 500 = 500 threads: 31ms
47 // (Impala):Time to start up 1 * 5000 = 5000 threads: 461ms
48 // Total time (IMPALA): 502ms
49 
50 // Impala thread overhead: 221ms, which is 78.9033%
51 
52 // -----------------Benchmark 2: Multi-threaded thread creation
53 // (Native):Time to start up 20 * 1 = 20 threads: 2ms
54 // (Native):Time to start up 20 * 5 = 100 threads: 28ms
55 // (Native):Time to start up 20 * 50 = 1000 threads: 89ms
56 // (Native):Time to start up 20 * 500 = 10000 threads: 977ms
57 // Total time (Native): 1s098ms
58 
59 // (Impala):Time to start up 20 * 1 = 20 threads: 3ms
60 // (Impala):Time to start up 20 * 5 = 100 threads: 7ms
61 // (Impala):Time to start up 20 * 50 = 1000 threads: 97ms
62 // (Impala):Time to start up 20 * 500 = 10000 threads: 1s088ms
63 // Total time (IMPALA): 1s196ms
64 
65 // Impala thread overhead: 98ms, which is 8.94135%
66 
67 // The difference between Impala and native thread creation throughput is explained almost
68 // entirely by Impala thread creation blocking until the thread ID is available returning
69 // (hence the difference is less marked in the multi-threaded creation case where another
70 // creation thread is usually available to do work). See Thread.StartThread() for more
71 // details. Without blocking, thread creation benchmark times are always within ~5% of
72 // each other.
73 
74 void EmptyThread() {
75 }
76 
77 // Runs N native threads, each executing 'f'
78 void NativeThreadStarter(int num_threads, const function<void ()>& f) {
79  thread_group threads;
80  for (int i = 0; i < num_threads; ++i) {
81  threads.add_thread(new thread(f));
82  }
83  threads.join_all();
84 }
85 
86 // Runs N Impala Threads, each executing 'f'
87 void ImpalaThreadStarter(int num_threads, const function<void ()>& f) {
88  vector<Thread*> threads;
89  threads.reserve(num_threads);
90  for (int i=0; i < num_threads; ++i) {
91  threads.push_back(new Thread("mythreadgroup", "thread", f));
92  }
93  BOOST_FOREACH(Thread* thread, threads) {
94  thread->Join();
95  delete thread;
96  }
97 }
98 
99 // Times how long it takes to run num_threads 'executors', each of
100 // which spawns num_threads_per_executor empty threads, and to wait
101 // for all of them to finish.
102 void TimeParallelExecutors(int num_threads, int num_threads_per_executor,
103  bool use_native_threads = true) {
104  StopWatch sw;
105  sw.Start();
106  if (use_native_threads) {
107  function<void ()> f =
108  bind(NativeThreadStarter, num_threads_per_executor, EmptyThread);
109  NativeThreadStarter(num_threads, f);
110  } else {
111  function<void ()> f =
112  bind(ImpalaThreadStarter, num_threads_per_executor, EmptyThread);
113  ImpalaThreadStarter(num_threads, f);
114  }
115  sw.Stop();
116  cout << (use_native_threads ? "(Native):" : "(Impala):")
117  << "Time to start up " << num_threads << " * " << num_threads_per_executor << " = "
118  << num_threads * num_threads_per_executor << " threads: "
119  << PrettyPrinter::Print(sw.ElapsedTime(), TUnit::CPU_TICKS) << endl;
120 }
121 
122 int main(int argc, char **argv) {
123  google::InitGoogleLogging(argv[0]);
124  CpuInfo::Init();
126 
127  cout << "-----------------Benchmark 1: Single-threaded thread creation" << endl;
128 
129  // Measure how long it takes to start up a bunch of threads
130  StopWatch total_time;
131  total_time.Start();
132 
133  TimeParallelExecutors(1, 1);
134  TimeParallelExecutors(1, 5);
135  TimeParallelExecutors(1, 50);
136  TimeParallelExecutors(1, 500);
137  TimeParallelExecutors(1, 5000);
138 
139  total_time.Stop();
140 
141  cout << "Total time (Native): "
142  << PrettyPrinter::Print(total_time.ElapsedTime(), TUnit::CPU_TICKS)
143  << endl << endl;
144 
145  // Measure how long it takes to start up a bunch of threads
146  StopWatch total_time_imp;
147  total_time_imp.Start();
148 
149  TimeParallelExecutors(1, 1, false);
150  TimeParallelExecutors(1, 5, false);
151  TimeParallelExecutors(1, 50, false);
152  TimeParallelExecutors(1, 500, false);
153  TimeParallelExecutors(1, 5000, false);
154 
155  total_time_imp.Stop();
156 
157  cout << "Total time (IMPALA): "
158  << PrettyPrinter::Print(total_time_imp.ElapsedTime(), TUnit::CPU_TICKS)
159  << endl << endl;
160 
161  int64_t difference = total_time_imp.ElapsedTime() - total_time.ElapsedTime();
162  cout << "Impala thread overhead: "
163  << PrettyPrinter::Print(difference, TUnit::CPU_TICKS)
164  << ", which is " << (difference * 100.0 / total_time.ElapsedTime())
165  << "%" << endl << endl;
166 
167 
168  cout << "-----------------Benchmark 2: Multi-threaded thread creation" << endl;
169 
170  // Measure how long it takes to start up a bunch of threads
171  StopWatch total_time_parallel_native;
172  total_time_parallel_native.Start();
173 
174  TimeParallelExecutors(20, 1);
175  TimeParallelExecutors(20, 5);
176  TimeParallelExecutors(20, 50);
177  TimeParallelExecutors(20, 500);
178 
179  total_time_parallel_native.Stop();
180 
181  cout << "Total time (Native): "
182  << PrettyPrinter::Print(total_time_parallel_native.ElapsedTime(),
183  TUnit::CPU_TICKS)
184  << endl << endl;
185 
186  // Measure how long it takes to start up a bunch of threads
187  StopWatch total_time_parallel_impala;
188  total_time_parallel_impala.Start();
189 
190  TimeParallelExecutors(20, 1, false);
191  TimeParallelExecutors(20, 5, false);
192  TimeParallelExecutors(20, 50, false);
193  TimeParallelExecutors(20, 500, false);
194 
195  total_time_parallel_impala.Stop();
196 
197  cout << "Total time (IMPALA): "
198  << PrettyPrinter::Print(total_time_parallel_impala.ElapsedTime(),
199  TUnit::CPU_TICKS)
200  << endl;
201 
202  difference = total_time_parallel_impala.ElapsedTime()
203  - total_time_parallel_native.ElapsedTime() ;
204  cout << "Impala thread overhead: "
205  << PrettyPrinter::Print(difference, TUnit::CPU_TICKS)
206  << ", which is " << (difference * 100.0 / total_time_parallel_native.ElapsedTime())
207  << "%" << endl;
208 
209  return 0;
210 }
uint64_t ElapsedTime() const
Returns time in cpu ticks.
Definition: stopwatch.h:50
int main(int argc, char **argv)
TODO: Consider allowing fragment IDs as category parameters.
Definition: thread.h:45
void NativeThreadStarter(int num_threads, const function< void()> &f)
void TimeParallelExecutors(int num_threads, int num_threads_per_executor, bool use_native_threads=true)
static std::string Print(bool value, TUnit::type ignored, bool verbose=false)
void InitThreading()
Initialises the threading subsystem. Must be called before a Thread is created.
Definition: thread.cc:261
void EmptyThread()
static void Init()
Initialize CpuInfo.
Definition: cpu-info.cc:75
void ImpalaThreadStarter(int num_threads, const function< void()> &f)
void Join() const
Definition: thread.h:102