Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
init.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "common/init.h"
16 
17 #include <google/heap-profiler.h>
18 #include <google/malloc_extension.h>
19 
20 #include "common/logging.h"
21 #include "common/status.h"
22 #include "exprs/expr.h"
23 #include "util/cpu-info.h"
24 #include "util/debug-util.h"
25 #include "util/disk-info.h"
26 #include "util/logging-support.h"
27 #include "util/mem-info.h"
28 #include "util/network-util.h"
29 #include "util/os-info.h"
30 #include "util/redactor.h"
31 #include "util/test-info.h"
32 #include "runtime/decimal-value.h"
33 #include "runtime/exec-env.h"
34 #include "runtime/hdfs-fs-cache.h"
35 #include "runtime/lib-cache.h"
36 #include "runtime/mem-tracker.h"
38 #include "rpc/authentication.h"
39 #include "rpc/thrift-util.h"
40 #include "util/thread.h"
41 
42 #include "common/names.h"
43 
44 DECLARE_string(hostname);
45 DECLARE_string(redaction_rules_file);
46 // TODO: renamed this to be more generic when we have a good CM release to do so.
47 DECLARE_int32(logbufsecs);
48 DECLARE_string(heap_profile_dir);
49 DECLARE_bool(enable_process_lifetime_heap_profiling);
50 
51 DEFINE_int32(max_log_files, 10, "Maximum number of log files to retain per severity "
52  "level. The most recent log files are retained. If set to 0, all log files are "
53  "retained.");
54 
55 // Defined by glog. This allows users to specify the log level using a glob. For
56 // example -vmodule=*scanner*=3 would enable full logging for scanners. If redaction
57 // is enabled, this option won't be allowed because some logging dumps table data
58 // in ways the authors of redaction rules can't anticipate.
59 DECLARE_string(vmodule);
60 
61 // tcmalloc will hold on to freed memory. We will periodically release the memory back
62 // to the OS if the extra memory is too high. If the memory used by the application
63 // is less than this fraction of the total reserved memory, free it back to the OS.
64 static const float TCMALLOC_RELEASE_FREE_MEMORY_FRACTION = 0.5f;
65 
66 using std::string;
67 
68 // Maintenance thread that runs periodically. It does a few things:
69 // 1) flushes glog every logbufsecs sec. glog flushes the log file only if
70 // logbufsecs has passed since the previous flush when a new log is written. That means
71 // that on a quiet system, logs will be buffered indefinitely.
72 // 2) checks that tcmalloc has not left too much memory in its pageheap
73 shared_ptr<impala::Thread> maintenance_thread;
74 static void MaintenanceThread() {
75  while(true) {
76  sleep(FLAGS_logbufsecs);
77 
78  google::FlushLogFiles(google::GLOG_INFO);
79 
80  // Tests don't need to run the maintenance thread. It causes issues when
81  // on teardown.
82  if (impala::TestInfo::is_test()) continue;
83 
84 #ifndef ADDRESS_SANITIZER
85  // Required to ensure memory gets released back to the OS, even if tcmalloc doesn't do
86  // it for us. This is because tcmalloc releases memory based on the
87  // TCMALLOC_RELEASE_RATE property, which is not actually a rate but a divisor based
88  // on the number of blocks that have been deleted. When tcmalloc does decide to
89  // release memory, it removes a single span from the PageHeap. This means there are
90  // certain allocation patterns that can lead to OOM due to not enough memory being
91  // released by tcmalloc, even when that memory is no longer being used.
92  // One example is continually resizing a vector which results in many allocations.
93  // Even after the vector goes out of scope, all the memory will not be released
94  // unless there are enough other deletions that are occurring in the system.
95  // This can eventually lead to OOM/crashes (see IMPALA-818).
96  // See: http://google-perftools.googlecode.com/svn/trunk/doc/tcmalloc.html#runtime
97  size_t bytes_used = 0;
98  size_t bytes_in_pageheap = 0;
99  MallocExtension::instance()->GetNumericProperty(
100  "generic.current_allocated_bytes", &bytes_used);
101  MallocExtension::instance()->GetNumericProperty(
102  "generic.heap_size", &bytes_in_pageheap);
103  if (bytes_used < bytes_in_pageheap * TCMALLOC_RELEASE_FREE_MEMORY_FRACTION) {
104  MallocExtension::instance()->ReleaseFreeMemory();
105  }
106 
107  // When using tcmalloc, the process limit as measured by our trackers will
108  // be out of sync with the process usage. Update the process tracker periodically.
110  if (env != NULL && env->process_mem_tracker() != NULL) {
111  env->process_mem_tracker()->Release(0);
112  }
113 #endif
114  // TODO: we should also update the process mem tracker with the reported JVM
115  // mem usage.
116 
117  // Check for log rotation in every interval of the maintenance thread
118  impala::CheckAndRotateLogFiles(FLAGS_max_log_files);
119  }
120 }
121 
122 void impala::InitCommonRuntime(int argc, char** argv, bool init_jvm,
123  TestInfo::Mode test_mode) {
124  CpuInfo::Init();
125  DiskInfo::Init();
126  MemInfo::Init();
127  OsInfo::Init();
128  DecimalUtil::InitMaxUnscaledDecimal();
129  TestInfo::Init(test_mode);
130 
131  // Verify CPU meets the minimum requirements before calling InitGoogleLoggingSafe()
132  // which might use SSSE3 instructions (see IMPALA-160).
133  CpuInfo::VerifyCpuRequirements();
134 
135  // Set the default hostname. The user can override this with the hostname flag.
136  GetHostname(&FLAGS_hostname);
137 
138  google::SetVersionString(impala::GetBuildVersion());
139  google::ParseCommandLineFlags(&argc, &argv, true);
140  if (!FLAGS_redaction_rules_file.empty()) {
141  if (VLOG_ROW_IS_ON || !FLAGS_vmodule.empty()) {
142  EXIT_WITH_ERROR("Redaction cannot be used in combination with log level 3 or "
143  "higher or the -vmodule option because these log levels may log data in "
144  "ways redaction rules may not anticipate.");
145  }
146  const string& error_message = SetRedactionRulesFromFile(FLAGS_redaction_rules_file);
147  if (!error_message.empty()) EXIT_WITH_ERROR(error_message);
148  }
153 
154  // Initialize maintenance_thread after InitGoogleLoggingSafe and InitThreading.
155  maintenance_thread.reset(
156  new Thread("common", "maintenance-thread", &MaintenanceThread));
157 
158  LOG(INFO) << impala::GetVersionString();
159  LOG(INFO) << "Using hostname: " << FLAGS_hostname;
161 
163 
164  LOG(INFO) << CpuInfo::DebugString();
165  LOG(INFO) << DiskInfo::DebugString();
166  LOG(INFO) << MemInfo::DebugString();
167  LOG(INFO) << OsInfo::DebugString();
168  LOG(INFO) << "Process ID: " << getpid();
169 
170  // Required for the FE's Catalog
173 
174  if (init_jvm) {
175  EXIT_IF_ERROR(JniUtil::Init());
177  }
178 
179  if (argc == -1) {
180  // Should not be called. We need BuiltinsInit() so the builtin symbols are
181  // not stripped.
182  DCHECK(false);
183  Expr::InitBuiltinsDummy();
184  }
185 
186 #ifndef ADDRESS_SANITIZER
187  // tcmalloc and address sanitizer can not be used together
188  if (FLAGS_enable_process_lifetime_heap_profiling) {
189  HeapProfilerStart(FLAGS_heap_profile_dir.c_str());
190  }
191 #endif
192 }
void InitThriftLogging()
Redirects all Thrift logging to VLOG(1)
Definition: thrift-util.cc:112
#define EXIT_WITH_ERROR(msg)
Definition: status.h:256
TODO: Consider allowing fragment IDs as category parameters.
Definition: thread.h:45
string GetVersionString(bool compact)
Returns "<program short name> version <GetBuildVersion(compact)>".
Definition: debug-util.cc:239
void InitCommonRuntime(int argc, char **argv, bool init_jvm, TestInfo::Mode m=TestInfo::NON_TEST)
Definition: init.cc:122
DECLARE_string(hostname)
string GetBuildVersion(bool compact)
Definition: debug-util.cc:223
string SetRedactionRulesFromFile(const string &rules_file_path)
Definition: redactor.cc:260
MemTracker * process_mem_tracker()
Definition: exec-env.h:86
void InitGoogleLoggingSafe(const char *arg)
Definition: logging.cc:55
void CheckAndRotateLogFiles(int max_log_files)
Definition: logging.cc:159
DEFINE_int32(max_log_files, 10,"Maximum number of log files to retain per severity ""level. The most recent log files are retained. If set to 0, all log files are ""retained.")
void InitThreading()
Initialises the threading subsystem. Must be called before a Thread is created.
Definition: thread.cc:261
void InitJvmLoggingSupport()
std::string DebugString(const T &val)
Definition: udf-debug.h:27
DECLARE_bool(enable_process_lifetime_heap_profiling)
#define EXIT_IF_ERROR(stmt)
Definition: status.h:248
void Release(int64_t bytes)
Decreases consumption of this tracker and its ancestors by 'bytes'.
Definition: mem-tracker.h:209
Status GetHostname(string *hostname)
Definition: network-util.cc:40
DECLARE_int32(logbufsecs)
static ExecEnv * GetInstance()
Definition: exec-env.h:63
static bool is_test()
Definition: test-info.h:34
static const float TCMALLOC_RELEASE_FREE_MEMORY_FRACTION
Definition: init.cc:64
static Status Init()
Initializes the libcache. Must be called before any other APIs.
Definition: lib-cache.cc:100
void LogCommandLineFlags()
Writes all command-line flags to the log at level INFO.
Definition: logging.cc:154
static void Init()
Initializes the cache. Must be called before any other APIs.
#define VLOG_ROW_IS_ON
Definition: logging.h:66
static void MaintenanceThread()
Definition: init.cc:74
Status InitAuth(const std::string &appname)
shared_ptr< impala::Thread > maintenance_thread
Definition: init.cc:73