Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
webserver.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "util/webserver.h"
16 
17 #include <boost/algorithm/string.hpp>
18 #include <boost/bind.hpp>
19 #include <boost/filesystem.hpp>
20 #include <boost/foreach.hpp>
21 #include <boost/lexical_cast.hpp>
22 #include <boost/mem_fn.hpp>
23 #include <boost/thread/locks.hpp>
24 #include <gutil/strings/substitute.h>
25 #include <map>
26 #include <fstream>
27 #include <stdio.h>
28 #include <signal.h>
29 #include <string>
30 #include <mustache/mustache.h>
31 #include <rapidjson/stringbuffer.h>
32 #include <rapidjson/prettywriter.h>
33 
34 #include "common/logging.h"
35 #include "util/cpu-info.h"
36 #include "util/disk-info.h"
37 #include "util/mem-info.h"
38 #include "util/os-info.h"
40 #include "util/url-coding.h"
41 #include "util/debug-util.h"
42 #include "util/pretty-printer.h"
43 #include "util/stopwatch.h"
44 #include "rpc/thrift-util.h"
45 
46 #include "common/names.h"
47 
48 using boost::algorithm::is_any_of;
49 using boost::algorithm::split;
50 using boost::algorithm::to_lower;
51 using boost::filesystem::exists;
52 using boost::upgrade_to_unique_lock;
53 using namespace google;
54 using namespace strings;
55 using namespace rapidjson;
56 using namespace mustache;
57 
58 const char* GetDefaultDocumentRoot();
59 
60 DEFINE_int32(webserver_port, 25000, "Port to start debug webserver on");
61 DEFINE_string(webserver_interface, "",
62  "Interface to start debug webserver on. If blank, webserver binds to 0.0.0.0");
63 DEFINE_string(webserver_doc_root, GetDefaultDocumentRoot(),
64  "Files under <webserver_doc_root>/www are accessible via the debug webserver. "
65  "Defaults to $IMPALA_HOME, or if $IMPALA_HOME is not set, disables the document "
66  "root");
67 DEFINE_bool(enable_webserver_doc_root, true,
68  "If true, webserver may serve static files from the webserver_doc_root");
69 
70 DEFINE_string(webserver_certificate_file, "",
71  "The location of the debug webserver's SSL certificate file, in .pem format. If "
72  "empty, webserver SSL support is not enabled");
73 DEFINE_string(webserver_authentication_domain, "",
74  "Domain used for debug webserver authentication");
75 DEFINE_string(webserver_password_file, "",
76  "(Optional) Location of .htpasswd file containing user names and hashed passwords for"
77  " debug webserver authentication");
78 
79 static const char* DOC_FOLDER = "/www/";
80 static const int DOC_FOLDER_LEN = strlen(DOC_FOLDER);
81 
82 // Easy-to-read constants for Squeasel return codes
83 static const uint32_t PROCESSING_COMPLETE = 1;
84 static const uint32_t NOT_PROCESSED = 0;
85 
86 // Standard key in the json document sent to templates for rendering. Must be kept in
87 // sync with the templates themselves.
88 static const char* COMMON_JSON_KEY = "__common__";
89 
90 // Standard key used to add errors to the argument map passed to the webserver's error
91 // handler.
92 static const char* ERROR_KEY = "__error_msg__";
93 
94 // Returns $IMPALA_HOME if set, otherwise /tmp/impala_www
95 const char* GetDefaultDocumentRoot() {
96  stringstream ss;
97  char* impala_home = getenv("IMPALA_HOME");
98  if (impala_home == NULL) {
99  return ""; // Empty document root means don't serve static files
100  } else {
101  ss << impala_home;
102  }
103 
104  // Deliberate memory leak, but this should be called exactly once.
105  string* str = new string(ss.str());
106  return str->c_str();
107 }
108 
109 namespace impala {
110 
111 const char* Webserver::ENABLE_RAW_JSON_KEY = "__raw__";
112 
113 // Supported HTTP response codes
115  OK = 200,
116  NOT_FOUND = 404
117 };
118 
119 // Supported HTTP content types
123 };
124 
125 // Builds a valid HTTP header given the response code and a content type.
126 string BuildHeaderString(ResponseCode response, ContentType content_type) {
127  static const string RESPONSE_TEMPLATE = "HTTP/1.1 $0 $1\r\n"
128  "Content-Type: text/$2\r\n"
129  "Content-Length: %d\r\n"
130  "\r\n";
131 
132  return Substitute(RESPONSE_TEMPLATE, response, response == OK ? "OK" : "Not found",
133  content_type == HTML ? "html" : "plain");
134 }
135 
136 Webserver::Webserver()
137  : context_(NULL),
138  error_handler_(UrlHandler(bind<void>(&Webserver::ErrorHandler, this, _1, _2),
139  "error.tmpl", false)) {
141  FLAGS_webserver_interface.empty() ? "0.0.0.0" : FLAGS_webserver_interface,
142  FLAGS_webserver_port);
143 }
144 
145 Webserver::Webserver(const int port)
146  : context_(NULL),
147  error_handler_(UrlHandler(bind<void>(&Webserver::ErrorHandler, this, _1, _2),
148  "error.tmpl", false)) {
149  http_address_ = MakeNetworkAddress("0.0.0.0", port);
150 }
151 
153  Stop();
154 }
155 
156 void Webserver::RootHandler(const ArgumentMap& args, Document* document) {
157  Value version(GetVersionString().c_str(), document->GetAllocator());
158  document->AddMember("version", version, document->GetAllocator());
159  Value cpu_info(CpuInfo::DebugString().c_str(), document->GetAllocator());
160  document->AddMember("cpu_info", cpu_info, document->GetAllocator());
161  Value mem_info(MemInfo::DebugString().c_str(), document->GetAllocator());
162  document->AddMember("mem_info", mem_info, document->GetAllocator());
163  Value disk_info(DiskInfo::DebugString().c_str(), document->GetAllocator());
164  document->AddMember("disk_info", disk_info, document->GetAllocator());
165  Value os_info(OsInfo::DebugString().c_str(), document->GetAllocator());
166  document->AddMember("os_info", os_info, document->GetAllocator());
167  Value process_state_info(ProcessStateInfo().DebugString().c_str(),
168  document->GetAllocator());
169  document->AddMember("process_state_info", process_state_info,
170  document->GetAllocator());
171 }
172 
173 void Webserver::ErrorHandler(const ArgumentMap& args, Document* document) {
174  ArgumentMap::const_iterator it = args.find(ERROR_KEY);
175  if (it == args.end()) return;
176 
177  Value error(it->second.c_str(), document->GetAllocator());
178  document->AddMember("error", error, document->GetAllocator());
179 }
180 
181 void Webserver::BuildArgumentMap(const string& args, ArgumentMap* output) {
182  vector<string> arg_pairs;
183  split(arg_pairs, args, is_any_of("&"));
184 
185  BOOST_FOREACH(const string& arg_pair, arg_pairs) {
186  vector<string> key_value;
187  split(key_value, arg_pair, is_any_of("="));
188  if (key_value.empty()) continue;
189 
190  string key;
191  if (!UrlDecode(key_value[0], &key)) continue;
192  string value;
193  if (!UrlDecode((key_value.size() >= 2 ? key_value[1] : ""), &value)) continue;
194  to_lower(key);
195  (*output)[key] = value;
196  }
197 }
198 
199 bool Webserver::IsSecure() const {
200  return !FLAGS_webserver_certificate_file.empty();
201 }
202 
204  LOG(INFO) << "Starting webserver on " << http_address_;
205 
206  stringstream listening_spec;
207  listening_spec << http_address_;
208 
209  if (IsSecure()) {
210  LOG(INFO) << "Webserver: Enabling HTTPS support";
211  // Squeasel makes sockets with 's' suffixes accept SSL traffic only
212  listening_spec << "s";
213  }
214  string listening_str = listening_spec.str();
215  vector<const char*> options;
216 
217  if (!FLAGS_webserver_doc_root.empty() && FLAGS_enable_webserver_doc_root) {
218  LOG(INFO) << "Document root: " << FLAGS_webserver_doc_root;
219  options.push_back("document_root");
220  options.push_back(FLAGS_webserver_doc_root.c_str());
221  } else {
222  LOG(INFO)<< "Document root disabled";
223  }
224 
225  if (IsSecure()) {
226  options.push_back("ssl_certificate");
227  options.push_back(FLAGS_webserver_certificate_file.c_str());
228  }
229 
230  if (!FLAGS_webserver_authentication_domain.empty()) {
231  options.push_back("authentication_domain");
232  options.push_back(FLAGS_webserver_authentication_domain.c_str());
233  }
234 
235  if (!FLAGS_webserver_password_file.empty()) {
236  // Squeasel doesn't log anything if it can't stat the password file (but will if it
237  // can't open it, which it tries to do during a request)
238  if (!exists(FLAGS_webserver_password_file)) {
239  stringstream ss;
240  ss << "Webserver: Password file does not exist: " << FLAGS_webserver_password_file;
241  return Status(ss.str());
242  }
243  LOG(INFO) << "Webserver: Password file is " << FLAGS_webserver_password_file;
244  options.push_back("global_auth_file");
245  options.push_back(FLAGS_webserver_password_file.c_str());
246  }
247 
248  options.push_back("listening_ports");
249  options.push_back(listening_str.c_str());
250 
251  // Options must be a NULL-terminated list
252  options.push_back(NULL);
253 
254  // squeasel ignores SIGCHLD and we need it to run kinit. This means that since
255  // squeasel does not reap its own children CGI programs must be avoided.
256  // Save the signal handler so we can restore it after squeasel sets it to be ignored.
257  sighandler_t sig_chld = signal(SIGCHLD, SIG_DFL);
258 
259  sq_callbacks callbacks;
260  memset(&callbacks, 0, sizeof(callbacks));
261  callbacks.begin_request = &Webserver::BeginRequestCallbackStatic;
262  callbacks.log_message = &Webserver::LogMessageCallbackStatic;
263 
264  // To work around not being able to pass member functions as C callbacks, we store a
265  // pointer to this server in the per-server state, and register a static method as the
266  // default callback. That method unpacks the pointer to this and calls the real
267  // callback.
268  context_ = sq_start(&callbacks, reinterpret_cast<void*>(this), &options[0]);
269 
270  // Restore the child signal handler so wait() works properly.
271  signal(SIGCHLD, sig_chld);
272 
273  if (context_ == NULL) {
274  stringstream error_msg;
275  error_msg << "Webserver: Could not start on address " << http_address_;
276  return Status(error_msg.str());
277  }
278 
279  UrlCallback default_callback =
280  bind<void>(mem_fn(&Webserver::RootHandler), this, _1, _2);
281 
282  RegisterUrlCallback("/", "root.tmpl", default_callback, false);
283 
284  LOG(INFO) << "Webserver started";
285  return Status::OK;
286 }
287 
289  if (context_ != NULL) {
290  sq_stop(context_);
291  context_ = NULL;
292  }
293 }
294 
295 void Webserver::GetCommonJson(Document* document) {
296  DCHECK(document != NULL);
297  Value obj(kObjectType);
298  obj.AddMember("process-name", google::ProgramInvocationShortName(),
299  document->GetAllocator());
300 
301  Value lst(kArrayType);
302  BOOST_FOREACH(const UrlHandlerMap::value_type& handler, url_handlers_) {
303  if (handler.second.is_on_nav_bar()) {
304  Value obj(kObjectType);
305  obj.AddMember("link", handler.first.c_str(), document->GetAllocator());
306  obj.AddMember("title", handler.first.c_str(), document->GetAllocator());
307  lst.PushBack(obj, document->GetAllocator());
308  }
309  }
310 
311  obj.AddMember("navbar", lst, document->GetAllocator());
312  document->AddMember(COMMON_JSON_KEY, obj, document->GetAllocator());
313 }
314 
315 int Webserver::LogMessageCallbackStatic(const struct sq_connection* connection,
316  const char* message) {
317  if (message != NULL) {
318  LOG(INFO) << "Webserver: " << message;
319  }
320  return PROCESSING_COMPLETE;
321 }
322 
323 int Webserver::BeginRequestCallbackStatic(struct sq_connection* connection) {
324  struct sq_request_info* request_info = sq_get_request_info(connection);
325  Webserver* instance = reinterpret_cast<Webserver*>(request_info->user_data);
326  return instance->BeginRequestCallback(connection, request_info);
327 }
328 
329 int Webserver::BeginRequestCallback(struct sq_connection* connection,
330  struct sq_request_info* request_info) {
331  if (!FLAGS_webserver_doc_root.empty() && FLAGS_enable_webserver_doc_root) {
332  if (strncmp(DOC_FOLDER, request_info->uri, DOC_FOLDER_LEN) == 0) {
333  VLOG(2) << "HTTP File access: " << request_info->uri;
334  // Let Squeasel deal with this request; returning NULL will fall through
335  // to the default handler which will serve files.
336  return NOT_PROCESSED;
337  }
338  }
339 
340  map<string, string> arguments;
341  if (request_info->query_string != NULL) {
342  BuildArgumentMap(request_info->query_string, &arguments);
343  }
344 
345  shared_lock<shared_mutex> lock(url_handlers_lock_);
346  UrlHandlerMap::const_iterator it = url_handlers_.find(request_info->uri);
347  ResponseCode response = OK;
348  ContentType content_type = HTML;
349  const UrlHandler* url_handler = NULL;
350  if (it == url_handlers_.end()) {
351  response = NOT_FOUND;
352  arguments[ERROR_KEY] = Substitute("No URI handler for '$0'", request_info->uri);
353  url_handler = &error_handler_;
354  } else {
355  url_handler = &it->second;
356  }
357 
359  sw.Start();
360 
361  Document document;
362  document.SetObject();
363  GetCommonJson(&document);
364 
365  // The output of this page is accumulated into this stringstream.
366  stringstream output;
367  bool raw_json = (arguments.find("json") != arguments.end());
368  url_handler->callback()(arguments, &document);
369  if (raw_json) {
370  // Callbacks may optionally be rendered as a text-only, pretty-printed Json document
371  // (mostly for debugging or integration with third-party tools).
372  StringBuffer strbuf;
373  PrettyWriter<StringBuffer> writer(strbuf);
374  document.Accept(writer);
375  output << strbuf.GetString();
376  content_type = PLAIN;
377  } else {
378  if (arguments.find("raw") != arguments.end()) {
379  document.AddMember(ENABLE_RAW_JSON_KEY, "true", document.GetAllocator());
380  }
381  if (document.HasMember(ENABLE_RAW_JSON_KEY)) {
382  content_type = PLAIN;
383  }
384 
385  const string& full_template_path =
386  Substitute("$0/$1/$2", FLAGS_webserver_doc_root, DOC_FOLDER,
387  url_handler->template_filename());
388  ifstream tmpl(full_template_path.c_str());
389  if (!tmpl.is_open()) {
390  output << "Could not open template: " << full_template_path;
391  content_type = PLAIN;
392  } else {
393  stringstream buffer;
394  buffer << tmpl.rdbuf();
395  RenderTemplate(buffer.str(), Substitute("$0/", FLAGS_webserver_doc_root), document,
396  &output);
397  }
398  }
399 
400  VLOG(3) << "Rendering page " << request_info->uri << " took "
401  << PrettyPrinter::Print(sw.ElapsedTime(), TUnit::CPU_TICKS);
402 
403  const string& str = output.str();
404  const string& headers = BuildHeaderString(response, content_type);
405  sq_printf(connection, headers.c_str(), (int)str.length());
406 
407  // Make sure to use sq_write for printing the body; sq_printf truncates at 8kb
408  sq_write(connection, str.c_str(), str.length());
409  return PROCESSING_COMPLETE;
410 }
411 
413  const string& template_filename, const UrlCallback& callback, bool is_on_nav_bar) {
414  upgrade_lock<shared_mutex> lock(url_handlers_lock_);
415  upgrade_to_unique_lock<shared_mutex> writer_lock(lock);
416  DCHECK(url_handlers_.find(path) == url_handlers_.end())
417  << "Duplicate Url handler for: " << path;
418 
419  url_handlers_.insert(
420  make_pair(path, UrlHandler(callback, template_filename, is_on_nav_bar)));
421 }
422 
423 }
struct sq_context * context_
Handle to Squeasel context; owned and freed by Squeasel internally.
Definition: webserver.h:145
static const char * ENABLE_RAW_JSON_KEY
Definition: webserver.h:43
string path("/usr/lib/sasl2:/usr/lib64/sasl2:/usr/local/lib/sasl2:/usr/lib/x86_64-linux-gnu/sasl2")
const char * GetDefaultDocumentRoot()
Definition: webserver.cc:95
static const int DOC_FOLDER_LEN
Definition: webserver.cc:80
boost::function< void(const ArgumentMap &args, rapidjson::Document *json)> UrlCallback
Definition: webserver.h:38
UrlHandler error_handler_
Catch-all handler for error messages.
Definition: webserver.h:148
string GetVersionString(bool compact)
Returns "<program short name> version <GetBuildVersion(compact)>".
Definition: debug-util.cc:239
static const uint32_t PROCESSING_COMPLETE
Definition: webserver.cc:83
DEFINE_string(webserver_interface,"","Interface to start debug webserver on. If blank, webserver binds to 0.0.0.0")
Webserver()
Uses FLAGS_webserver_{port, interface}.
Definition: webserver.cc:136
void Stop()
Stops the webserver synchronously.
Definition: webserver.cc:288
void ErrorHandler(const ArgumentMap &args, rapidjson::Document *document)
Called when an error is encountered, e.g. when a handler for a URI cannot be found.
Definition: webserver.cc:173
void RegisterUrlCallback(const std::string &path, const std::string &template_filename, const UrlCallback &callback, bool is_on_nav_bar=true)
Only one callback may be registered per URL.
Definition: webserver.cc:412
TNetworkAddress MakeNetworkAddress(const string &hostname, int port)
Definition: network-util.cc:96
static const char * COMMON_JSON_KEY
Definition: webserver.cc:88
bool UrlDecode(const string &in, string *out, bool hive_compat)
Definition: url-coding.cc:84
static std::string Print(bool value, TUnit::type ignored, bool verbose=false)
std::map< std::string, std::string > ArgumentMap
Definition: webserver.h:36
bool IsSecure() const
True if serving all traffic over SSL, false otherwise.
Definition: webserver.cc:199
TNetworkAddress http_address_
The address of the interface on which to run this webserver.
Definition: webserver.h:142
static int LogMessageCallbackStatic(const struct sq_connection *connection, const char *message)
Squeasel callback for log events. Returns squeasel success code.
Definition: webserver.cc:315
static std::string DebugString()
Definition: mem-info.cc:96
DEFINE_int32(webserver_port, 25000,"Port to start debug webserver on")
static const char * DOC_FOLDER
Definition: webserver.cc:79
std::string DebugString(const T &val)
Definition: udf-debug.h:27
const UrlCallback & callback() const
Definition: webserver.h:90
void RootHandler(const ArgumentMap &args, rapidjson::Document *document)
Registered to handle "/", populates document with various system-wide information.
Definition: webserver.cc:156
static std::string DebugString()
Definition: disk-info.cc:127
static std::string DebugString()
Definition: cpu-info.cc:155
ResponseCode
Definition: webserver.cc:114
uint64_t ElapsedTime() const
Returns time in nanosecond.
Definition: stopwatch.h:105
void GetCommonJson(rapidjson::Document *document)
Definition: webserver.cc:295
UrlHandlerMap url_handlers_
Definition: webserver.h:139
static const Status OK
Definition: status.h:87
DEFINE_bool(enable_webserver_doc_root, true,"If true, webserver may serve static files from the webserver_doc_root")
static std::string DebugString()
Definition: os-info.cc:41
string BuildHeaderString(ResponseCode response, ContentType content_type)
Definition: webserver.cc:126
static const char * ERROR_KEY
Definition: webserver.cc:92
void BuildArgumentMap(const std::string &args, ArgumentMap *output)
Definition: webserver.cc:181
const std::string & template_filename() const
Definition: webserver.h:91
boost::shared_mutex url_handlers_lock_
Lock guarding the path_handlers_ map.
Definition: webserver.h:133
static int BeginRequestCallbackStatic(struct sq_connection *connection)
Definition: webserver.cc:323
static const uint32_t NOT_PROCESSED
Definition: webserver.cc:84
int BeginRequestCallback(struct sq_connection *connection, struct sq_request_info *request_info)
Dispatch point for all incoming requests. Returns squeasel success code.
Definition: webserver.cc:329