Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
impala-server.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef IMPALA_SERVICE_IMPALA_SERVER_H
16 #define IMPALA_SERVICE_IMPALA_SERVER_H
17 
18 #include <boost/thread/mutex.hpp>
19 #include <boost/shared_ptr.hpp>
20 #include <boost/scoped_ptr.hpp>
21 #include <boost/unordered_map.hpp>
22 #include <boost/unordered_set.hpp>
23 #include <boost/uuid/uuid.hpp>
24 #include <boost/uuid/uuid_generators.hpp>
25 #include <boost/uuid/uuid_io.hpp>
26 
27 #include "gen-cpp/ImpalaService.h"
28 #include "gen-cpp/ImpalaHiveServer2Service.h"
29 #include "gen-cpp/ImpalaInternalService.h"
30 #include "gen-cpp/Frontend_types.h"
31 #include "rpc/thrift-server.h"
32 #include "common/status.h"
33 #include "service/frontend.h"
34 #include "util/metrics.h"
35 #include "util/runtime-profile.h"
36 #include "util/simple-logger.h"
37 #include "util/thread-pool.h"
38 #include "util/time.h"
39 #include "util/uid-util.h"
40 #include "runtime/coordinator.h"
41 #include "runtime/runtime-state.h"
43 #include "runtime/types.h"
44 #include "rapidjson/rapidjson.h"
45 
46 namespace impala {
47 
48 class ExecEnv;
49 class DataSink;
50 class CancellationWork;
51 class Coordinator;
52 class RowDescriptor;
53 class TCatalogUpdate;
54 class TPlanExecRequest;
55 class TPlanExecParams;
56 class TInsertResult;
57 class TReportExecStatusArgs;
58 class TReportExecStatusResult;
59 class TTransmitDataArgs;
60 class TTransmitDataResult;
61 class TNetworkAddress;
62 class TClientRequest;
63 class TExecRequest;
64 class TSessionState;
65 class TQueryOptions;
66 class TGetExecSummaryResp;
67 class TGetExecSummaryReq;
68 
74 //
84  public:
85  ImpalaServer(ExecEnv* exec_env);
86  ~ImpalaServer();
87 
89  virtual void query(beeswax::QueryHandle& query_handle, const beeswax::Query& query);
90  virtual void executeAndWait(beeswax::QueryHandle& query_handle,
91  const beeswax::Query& query, const beeswax::LogContextId& client_ctx);
92  virtual void explain(beeswax::QueryExplanation& query_explanation,
93  const beeswax::Query& query);
94  virtual void fetch(beeswax::Results& query_results,
95  const beeswax::QueryHandle& query_handle, const bool start_over,
96  const int32_t fetch_size);
97  virtual void get_results_metadata(beeswax::ResultsMetadata& results_metadata,
98  const beeswax::QueryHandle& handle);
99  virtual void close(const beeswax::QueryHandle& handle);
100  virtual beeswax::QueryState::type get_state(const beeswax::QueryHandle& handle);
101  virtual void echo(std::string& echo_string, const std::string& input_string);
102  virtual void clean(const beeswax::LogContextId& log_context);
103  virtual void get_log(std::string& log, const beeswax::LogContextId& context);
104 
109  virtual void get_default_configuration(
110  std::vector<beeswax::ConfigVariable>& configurations, const bool include_hadoop);
111 
114  virtual void dump_config(std::string& config);
115 
117  virtual void Cancel(impala::TStatus& status, const beeswax::QueryHandle& query_id);
118  virtual void CloseInsert(impala::TInsertResult& insert_result,
119  const beeswax::QueryHandle& query_handle);
120 
122  virtual void PingImpalaService(TPingImpalaServiceResp& return_val);
123 
124  virtual void GetRuntimeProfile(std::string& profile_output,
125  const beeswax::QueryHandle& query_id);
126 
127  virtual void GetExecSummary(impala::TExecSummary& result,
128  const beeswax::QueryHandle& query_id);
129 
131  virtual void ResetCatalog(impala::TStatus& status);
132 
135  virtual void ResetTable(impala::TStatus& status, const TResetTableReq& request);
136 
139  virtual void OpenSession(
140  apache::hive::service::cli::thrift::TOpenSessionResp& return_val,
141  const apache::hive::service::cli::thrift::TOpenSessionReq& request);
142  virtual void CloseSession(
143  apache::hive::service::cli::thrift::TCloseSessionResp& return_val,
144  const apache::hive::service::cli::thrift::TCloseSessionReq& request);
145  virtual void GetInfo(
146  apache::hive::service::cli::thrift::TGetInfoResp& return_val,
147  const apache::hive::service::cli::thrift::TGetInfoReq& request);
148  virtual void ExecuteStatement(
149  apache::hive::service::cli::thrift::TExecuteStatementResp& return_val,
150  const apache::hive::service::cli::thrift::TExecuteStatementReq& request);
151  virtual void GetTypeInfo(
152  apache::hive::service::cli::thrift::TGetTypeInfoResp& return_val,
153  const apache::hive::service::cli::thrift::TGetTypeInfoReq& request);
154  virtual void GetCatalogs(
155  apache::hive::service::cli::thrift::TGetCatalogsResp& return_val,
156  const apache::hive::service::cli::thrift::TGetCatalogsReq& request);
157  virtual void GetSchemas(
158  apache::hive::service::cli::thrift::TGetSchemasResp& return_val,
159  const apache::hive::service::cli::thrift::TGetSchemasReq& request);
160  virtual void GetTables(
161  apache::hive::service::cli::thrift::TGetTablesResp& return_val,
162  const apache::hive::service::cli::thrift::TGetTablesReq& request);
163  virtual void GetTableTypes(
164  apache::hive::service::cli::thrift::TGetTableTypesResp& return_val,
165  const apache::hive::service::cli::thrift::TGetTableTypesReq& request);
166  virtual void GetColumns(
167  apache::hive::service::cli::thrift::TGetColumnsResp& return_val,
168  const apache::hive::service::cli::thrift::TGetColumnsReq& request);
169  virtual void GetFunctions(
170  apache::hive::service::cli::thrift::TGetFunctionsResp& return_val,
171  const apache::hive::service::cli::thrift::TGetFunctionsReq& request);
172  virtual void GetOperationStatus(
173  apache::hive::service::cli::thrift::TGetOperationStatusResp& return_val,
174  const apache::hive::service::cli::thrift::TGetOperationStatusReq& request);
175  virtual void CancelOperation(
176  apache::hive::service::cli::thrift::TCancelOperationResp& return_val,
177  const apache::hive::service::cli::thrift::TCancelOperationReq& request);
178  virtual void CloseOperation(
179  apache::hive::service::cli::thrift::TCloseOperationResp& return_val,
180  const apache::hive::service::cli::thrift::TCloseOperationReq& request);
181  virtual void GetResultSetMetadata(
182  apache::hive::service::cli::thrift::TGetResultSetMetadataResp& return_val,
183  const apache::hive::service::cli::thrift::TGetResultSetMetadataReq& request);
184  virtual void FetchResults(
185  apache::hive::service::cli::thrift::TFetchResultsResp& return_val,
186  const apache::hive::service::cli::thrift::TFetchResultsReq& request);
187  virtual void GetLog(apache::hive::service::cli::thrift::TGetLogResp& return_val,
188  const apache::hive::service::cli::thrift::TGetLogReq& request);
189  virtual void GetExecSummary(TGetExecSummaryResp& return_val,
190  const TGetExecSummaryReq& request);
191  virtual void GetRuntimeProfile(TGetRuntimeProfileResp& return_val,
192  const TGetRuntimeProfileReq& request);
193  virtual void GetDelegationToken(
194  apache::hive::service::cli::thrift::TGetDelegationTokenResp& return_val,
195  const apache::hive::service::cli::thrift::TGetDelegationTokenReq& req);
196  virtual void CancelDelegationToken(
197  apache::hive::service::cli::thrift::TCancelDelegationTokenResp& return_val,
198  const apache::hive::service::cli::thrift::TCancelDelegationTokenReq& req);
199  virtual void RenewDelegationToken(
200  apache::hive::service::cli::thrift::TRenewDelegationTokenResp& return_val,
201  const apache::hive::service::cli::thrift::TRenewDelegationTokenReq& req);
202 
205  void ReportExecStatus(TReportExecStatusResult& return_val,
206  const TReportExecStatusParams& params);
207  void TransmitData(TTransmitDataResult& return_val,
208  const TTransmitDataParams& params);
209 
214  static void PrepareQueryContext(TQueryCtx* query_ctx);
215 
217 
221  virtual void ConnectionStart(const ThriftServer::ConnectionContext& session_context);
222 
225  virtual void ConnectionEnd(const ThriftServer::ConnectionContext& session_context);
226 
233  incoming_topic_deltas, std::vector<TTopicDelta>* subscriber_topic_updates);
234 
236  std::vector<TTopicDelta>* topic_updates);
237 
239  bool IsOffline() {
240  boost::lock_guard<boost::mutex> l(is_offline_lock_);
241  return is_offline_;
242  }
243 
246 
247  private:
248  friend class ChildQuery;
249 
254  public:
256  virtual ~QueryResultSet() {}
257 
262  virtual Status AddOneRow(
263  const std::vector<void*>& row, const std::vector<int>& scales) = 0;
264 
267  virtual Status AddOneRow(const TResultRow& row) = 0;
268 
272  virtual int AddRows(const QueryResultSet* other, int start_idx, int num_rows) = 0;
273 
275  int64_t ByteSize() { return ByteSize(0, size()); }
276 
278  virtual int64_t ByteSize(int start_idx, int num_rows) = 0;
279 
281  virtual size_t size() = 0;
282  };
283 
285  class AsciiQueryResultSet;
286  class HS2RowOrientedResultSet;
287  class HS2ColumnarResultSet;
288 
289  struct SessionState;
290 
293 
297  static const char* SQLSTATE_GENERAL_ERROR;
299 
301  static const int ASCII_PRECISION;
302 
304  apache::hive::service::cli::thrift::TProtocolVersion::type version,
305  const TResultSetMetadata& metadata,
306  apache::hive::service::cli::thrift::TRowSet* rowset = NULL);
307 
311  boost::shared_ptr<QueryExecState> GetQueryExecState(
312  const TUniqueId& query_id, bool lock);
313 
316  bool GetSessionIdForQuery(const TUniqueId& query_id, TUniqueId* session_id);
317 
320 
329  Status Execute(TQueryCtx* query_ctx,
330  boost::shared_ptr<SessionState> session_state,
331  boost::shared_ptr<QueryExecState>* exec_state);
332 
334  Status ExecuteInternal(const TQueryCtx& query_ctx,
335  boost::shared_ptr<SessionState> session_state,
336  bool* registered_exec_state,
337  boost::shared_ptr<QueryExecState>* exec_state);
338 
342  Status RegisterQuery(boost::shared_ptr<SessionState> session_state,
343  const boost::shared_ptr<QueryExecState>& exec_state);
344 
354  Status SetQueryInflight(boost::shared_ptr<SessionState> session_state,
355  const boost::shared_ptr<QueryExecState>& exec_state);
356 
362  Status UnregisterQuery(const TUniqueId& query_id, bool check_inflight,
363  const Status* cause = NULL);
364 
371  Status CancelInternal(const TUniqueId& query_id, bool check_inflight,
372  const Status* cause = NULL);
373 
378  Status CloseSessionInternal(const TUniqueId& session_id, bool ignore_if_absent);
379 
387  Status GetRuntimeProfileStr(const TUniqueId& query_id, bool base64_encoded,
388  std::stringstream* output);
389 
391  Status GetExecSummary(const TUniqueId& query_id, TExecSummary* result);
392 
396  rapidjson::Document* document);
397 
400  //
425  rapidjson::Document* document);
426 
430  rapidjson::Document* document);
431 
443  void QuerySummaryCallback(bool include_plan_json, bool include_summary,
444  const Webserver::ArgumentMap& args, rapidjson::Document* document);
445 
448  rapidjson::Document* document);
449 
453  rapidjson::Document* document);
454 
458  rapidjson::Document* document);
459 
479  rapidjson::Document* document);
480 
482  void CatalogUrlCallback(const Webserver::ArgumentMap& args, rapidjson::Document* output);
483 
486  rapidjson::Document* output);
487 
492 
494  void RegisterWebserverCallbacks(Webserver* webserver);
495 
502 
508 
514 
517  static Status InitLoggingDir(const std::string& log_dir);
518 
521 
522  Status LogAuditRecord(const QueryExecState& exec_state, const TExecRequest& request);
523 
524  Status LogLineageRecord(const TExecRequest& request);
525 
527  void LogQueryEvents(const QueryExecState& exec_state);
528 
530  void LogFileFlushThread();
531 
534 
537 
541  void ArchiveQuery(const QueryExecState& query);
542 
546  Status AuthorizeProxyUser(const std::string& user, const std::string& do_as_user);
547 
551  std::string profile_str;
552 
554  std::string encoded_profile_str;
555 
557  TUniqueId id;
558 
562  std::string effective_user;
563 
565  std::string default_db;
566 
568  std::string stmt;
569 
571  std::string plan;
572 
574  TStmtType::type stmt_type;
575 
577  bool has_coord;
578 
581 
584 
587 
589  beeswax::QueryState::type query_state;
590 
593 
595  TExecSummary exec_summary;
596 
598 
600  TEventSequence event_sequence;
601 
604  vector<TPlanFragment> fragments;
605 
611  QueryStateRecord(const QueryExecState& exec_state, bool copy_profile = false,
612  const std::string& encoded_str = "");
613 
616 
618  bool operator() (const QueryStateRecord& lhs, const QueryStateRecord& rhs) const;
619  };
620 
624  rapidjson::Value* value, rapidjson::Document* document);
625 
627 
629  Status QueryToTQueryContext(const beeswax::Query& query, TQueryCtx* query_ctx);
630  void TUniqueIdToQueryHandle(const TUniqueId& query_id, beeswax::QueryHandle* handle);
631  void QueryHandleToTUniqueId(const beeswax::QueryHandle& handle, TUniqueId* query_id);
632 
634  void RaiseBeeswaxException(const std::string& msg, const char* sql_state);
635 
637  Status FetchInternal(const TUniqueId& query_id, bool start_over,
638  int32_t fetch_size, beeswax::Results* query_results);
639 
642  Status CloseInsertInternal(const TUniqueId& query_id, TInsertResult* insert_result);
643 
645 
652  void ExecuteMetadataOp(
653  const apache::hive::service::cli::thrift::THandleIdentifier& session_handle,
654  TMetadataOpRequest* request,
655  apache::hive::service::cli::thrift::TOperationHandle* handle,
656  apache::hive::service::cli::thrift::TStatus* status);
657 
661  Status FetchInternal(const TUniqueId& query_id, int32_t fetch_size, bool fetch_first,
662  apache::hive::service::cli::thrift::TFetchResultsResp* fetch_results);
663 
665 
668  const apache::hive::service::cli::thrift::THandleIdentifier& handle,
669  TUniqueId* unique_id, TUniqueId* secret);
670  static void TUniqueIdToTHandleIdentifier(
671  const TUniqueId& unique_id, const TUniqueId& secret,
672  apache::hive::service::cli::thrift::THandleIdentifier* handle);
674  const apache::hive::service::cli::thrift::TExecuteStatementReq execute_request,
675  TQueryCtx* query_ctx);
676 
681  void CancelFromThreadPool(uint32_t thread_id,
682  const CancellationWork& cancellation_work);
683 
697  Status ProcessCatalogUpdateResult(const TCatalogUpdateResult& catalog_update_result,
698  bool wait_for_all_subscribers);
699 
704  void ExpireSessions();
705 
709  void ExpireQueries();
710 
715  void DetectNmFailures();
716 
718  void SetOffline(bool offline);
719 
721  boost::mutex query_log_lock_;
722 
724  typedef std::list<QueryStateRecord> QueryLog;
726 
728  typedef boost::unordered_map<TUniqueId, QueryLog::iterator> QueryLogIndex;
730 
733  boost::scoped_ptr<SimpleLogger> profile_logger_;
734 
737  boost::scoped_ptr<SimpleLogger> audit_event_logger_;
738 
741  boost::scoped_ptr<SimpleLogger> lineage_logger_;
742 
744  boost::scoped_ptr<Thread> profile_log_file_flush_thread_;
745 
747  boost::scoped_ptr<Thread> audit_event_logger_flush_thread_;
748 
750  boost::scoped_ptr<Thread> lineage_logger_flush_thread_;
751 
753  ExecEnv* exec_env_; // not owned
754 
757  boost::scoped_ptr<ThreadPool<CancellationWork> > cancellation_thread_pool_;
758 
760  boost::scoped_ptr<Thread> session_timeout_thread_;
761 
764  typedef boost::unordered_map<TUniqueId, boost::shared_ptr<QueryExecState> >
767  boost::mutex query_exec_state_map_lock_; // protects query_exec_state_map_
768 
770  TQueryOptions default_query_options_;
771  std::vector<beeswax::ConfigVariable> default_configs_;
772 
776  struct SessionState {
781  SessionState() : closed(false), expired(false), hs2_version(
782  apache::hive::service::cli::thrift::
783  TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V1), ref_count(0) { }
784 
785  TSessionType::type session_type;
786 
789 
791  std::string connected_user;
792 
794  std::string do_as_user;
795 
797  TNetworkAddress network_address;
798 
801  boost::mutex lock;
802 
804  bool closed;
805 
810  bool expired;
811 
813  std::string database;
814 
816  TQueryOptions default_query_options;
817 
819  apache::hive::service::cli::thrift::TProtocolVersion::type hs2_version;
820 
822  boost::unordered_set<TUniqueId> inflight_queries;
823 
826 
831  uint32_t ref_count;
832 
835  void ToThrift(const TUniqueId& session_id, TSessionState* session_state);
836  };
837 
843  public:
844  ScopedSessionState(ImpalaServer* impala) : impala_(impala) { }
845 
849  Status WithSession(const TUniqueId& session_id,
850  boost::shared_ptr<SessionState>* session = NULL) {
851  DCHECK(session_.get() == NULL);
852  RETURN_IF_ERROR(impala_->GetSessionState(session_id, &session_, true));
853  if (session != NULL) (*session) = session_;
854  return Status::OK;
855  }
856 
859  if (session_.get() != NULL) {
861  }
862  }
863 
864  private:
866  boost::shared_ptr<SessionState> session_;
867 
870  };
871 
873  friend class ScopedSessionState;
874 
879 
881  typedef boost::unordered_map<TUniqueId, boost::shared_ptr<SessionState> >
884 
887 
892  typedef boost::unordered_map<TUniqueId, std::vector<TUniqueId> >
895 
900  Status GetSessionState(const TUniqueId& session_id,
901  boost::shared_ptr<SessionState>* session_state, bool mark_active = false);
902 
905  inline void MarkSessionInactive(boost::shared_ptr<SessionState> session) {
906  boost::lock_guard<boost::mutex> l(session->lock);
907  DCHECK_GT(session->ref_count, 0);
908  --session->ref_count;
909  session->last_accessed_ms = UnixMillis();
910  }
911 
914  boost::mutex query_locations_lock_;
915 
917  typedef boost::unordered_map<TNetworkAddress, boost::unordered_set<TUniqueId> >
920 
930  typedef boost::unordered_map<std::string, TNetworkAddress> BackendAddressMap;
932 
934  boost::uuids::random_generator uuid_generator_;
935 
937  boost::mutex uuid_lock_;
938 
941  boost::mutex catalog_version_lock_;
942 
944  boost::condition_variable catalog_version_update_cv_;
945 
949  catalog_version(0L),
951  }
952 
959  };
960 
963 
968 
972  typedef boost::unordered_map<std::string, boost::unordered_set<std::string> >
975 
978 
981  typedef std::pair<int64_t, TUniqueId> ExpirationEvent;
982 
985  bool operator()(const ExpirationEvent& t1, const ExpirationEvent& t2) {
986  if (t1.first < t2.first) return true;
987  if (t2.first < t1.first) return false;
988  return t1.second < t2.second;
989  }
990  };
991 
997  //
1000  //
1004  //
1011  typedef std::set<ExpirationEvent, ExpirationEventComparator> ExpirationQueue;
1013 
1015  boost::scoped_ptr<Thread> query_expiration_thread_;
1016 
1018  boost::scoped_ptr<Thread> nm_failure_detection_thread_;
1019 
1021  boost::mutex is_offline_lock_;
1022 
1025 };
1026 
1039 Status CreateImpalaServer(ExecEnv* exec_env, int beeswax_port, int hs2_port,
1040  int be_port, ThriftServer** beeswax_server, ThriftServer** hs2_server,
1041  ThriftServer** be_server, ImpalaServer** impala_server);
1042 
1043 }
1044 
1045 #endif
virtual void GetTables(apache::hive::service::cli::thrift::TGetTablesResp &return_val, const apache::hive::service::cli::thrift::TGetTablesReq &request)
void TransmitData(TTransmitDataResult &return_val, const TTransmitDataParams &params)
boost::scoped_ptr< ThreadPool< CancellationWork > > cancellation_thread_pool_
boost::scoped_ptr< Thread > audit_event_logger_flush_thread_
If audit event logging is enabled, wakes once every 5s to flush audit events to disk.
virtual void query(beeswax::QueryHandle &query_handle, const beeswax::Query &query)
ImpalaService rpcs: Beeswax API (implemented in impala-beeswax-server.cc)
boost::mutex query_expiration_lock_
Guards queries_by_timestamp_. Must not be acquired before a session state lock.
virtual void CloseOperation(apache::hive::service::cli::thrift::TCloseOperationResp &return_val, const apache::hive::service::cli::thrift::TCloseOperationReq &request)
virtual void explain(beeswax::QueryExplanation &query_explanation, const beeswax::Query &query)
Status TExecuteStatementReqToTQueryContext(const apache::hive::service::cli::thrift::TExecuteStatementReq execute_request, TQueryCtx *query_ctx)
void CatalogObjectsUrlCallback(const Webserver::ArgumentMap &args, rapidjson::Document *output)
Webserver callback that allows for dumping information on objects in the catalog. ...
virtual void echo(std::string &echo_string, const std::string &input_string)
std::string stmt
SQL statement text.
boost::mutex query_exec_state_map_lock_
Status CloseInsertInternal(const TUniqueId &query_id, TInsertResult *insert_result)
void QuerySummaryCallback(bool include_plan_json, bool include_summary, const Webserver::ArgumentMap &args, rapidjson::Document *document)
bool operator()(const ExpirationEvent &t1, const ExpirationEvent &t2)
void MembershipCallback(const StatestoreSubscriber::TopicDeltaMap &incoming_topic_deltas, std::vector< TTopicDelta > *subscriber_topic_updates)
boost::unordered_map< TUniqueId, QueryLog::iterator > QueryLogIndex
Index that allows lookup via TUniqueId into the query log.
virtual void CloseInsert(impala::TInsertResult &insert_result, const beeswax::QueryHandle &query_handle)
void ToThrift(const TUniqueId &session_id, TSessionState *session_state)
boost::uuids::random_generator uuid_generator_
Generate unique session id for HiveServer2 session.
static const char * SQLSTATE_SYNTAX_ERROR_OR_ACCESS_VIOLATION
virtual void RenewDelegationToken(apache::hive::service::cli::thrift::TRenewDelegationTokenResp &return_val, const apache::hive::service::cli::thrift::TRenewDelegationTokenReq &req)
void SetOffline(bool offline)
Set is_offline_ to the argument's value.
boost::mutex session_state_map_lock_
virtual void GetTypeInfo(apache::hive::service::cli::thrift::TGetTypeInfoResp &return_val, const apache::hive::service::cli::thrift::TGetTypeInfoReq &request)
boost::unordered_map< TUniqueId, std::vector< TUniqueId > > ConnectionToSessionMap
int64_t num_complete_fragments
The number of fragments that have completed.
virtual void get_default_configuration(std::vector< beeswax::ConfigVariable > &configurations, const bool include_hadoop)
ProxyUserMap authorized_proxy_user_config_
boost::mutex query_log_lock_
Guards query_log_ and query_log_index_.
int64_t num_rows_fetched
The number of rows fetched by the client.
boost::mutex connection_to_sessions_map_lock_
Protects connection_to_sessions_map_. May be taken before session_state_map_lock_.
const TUniqueId & query_id() const
Definition: coordinator.h:152
void QueryStateToJson(const ImpalaServer::QueryStateRecord &record, rapidjson::Value *value, rapidjson::Document *document)
Status CancelInternal(const TUniqueId &query_id, bool check_inflight, const Status *cause=NULL)
int64_t min_subscriber_catalog_topic_version_
virtual void get_results_metadata(beeswax::ResultsMetadata &results_metadata, const beeswax::QueryHandle &handle)
void SessionsUrlCallback(const Webserver::ArgumentMap &args, rapidjson::Document *document)
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242
void CancelFromThreadPool(uint32_t thread_id, const CancellationWork &cancellation_work)
virtual void ExecuteStatement(apache::hive::service::cli::thrift::TExecuteStatementResp &return_val, const apache::hive::service::cli::thrift::TExecuteStatementReq &request)
virtual void PingImpalaService(TPingImpalaServiceResp &return_val)
Pings the Impala service and gets the server version string.
TUniqueId catalog_service_id
The CatalogService ID that this catalog version is from.
void LogQueryEvents(const QueryExecState &exec_state)
Log audit and column lineage events.
void RegisterWebserverCallbacks(Webserver *webserver)
Registers all the per-Impalad webserver callbacks.
boost::unordered_map< std::string, boost::unordered_set< std::string > > ProxyUserMap
Status RegisterQuery(boost::shared_ptr< SessionState > session_state, const boost::shared_ptr< QueryExecState > &exec_state)
int64_t catalog_topic_version
The statestore catalog topic version this update was received in.
std::pair< int64_t, TUniqueId > ExpirationEvent
virtual void ResetCatalog(impala::TStatus &status)
Performs a full catalog metadata reset, invalidating all table and database metadata.
virtual size_t size()=0
Returns the size of this result set in number of rows.
beeswax::QueryState::type query_state
The state of the query as of this snapshot.
virtual Status AddOneRow(const std::vector< void * > &row, const std::vector< int > &scales)=0
void CatalogUrlCallback(const Webserver::ArgumentMap &args, rapidjson::Document *output)
Webserver callback that prints a list of all known databases and tables.
QueryResultSet * CreateHS2ResultSet(apache::hive::service::cli::thrift::TProtocolVersion::type version, const TResultSetMetadata &metadata, apache::hive::service::cli::thrift::TRowSet *rowset=NULL)
boost::scoped_ptr< Thread > query_expiration_thread_
Container for a thread that runs ExpireQueries() if FLAGS_idle_query_timeout is set.
virtual void Cancel(impala::TStatus &status, const beeswax::QueryHandle &query_id)
ImpalaService rpcs: extensions over Beeswax (implemented in impala-beeswax-server.cc)
void InflightQueryIdsUrlCallback(const Webserver::ArgumentMap &args, rapidjson::Document *document)
virtual void fetch(beeswax::Results &query_results, const beeswax::QueryHandle &query_handle, const bool start_over, const int32_t fetch_size)
Interface class for receiving connection creation / termination events.
Definition: thrift-server.h:53
static void PrepareQueryContext(TQueryCtx *query_ctx)
boost::condition_variable catalog_version_update_cv_
Variable to signal when the catalog version has been modified.
void QueryStateUrlCallback(const Webserver::ArgumentMap &args, rapidjson::Document *document)
virtual void GetFunctions(apache::hive::service::cli::thrift::TGetFunctionsResp &return_val, const apache::hive::service::cli::thrift::TGetFunctionsReq &request)
std::set< ExpirationEvent, ExpirationEventComparator > ExpirationQueue
static const char * SQLSTATE_GENERAL_ERROR
Status Execute(TQueryCtx *query_ctx, boost::shared_ptr< SessionState > session_state, boost::shared_ptr< QueryExecState > *exec_state)
bool IsAuditEventLoggingEnabled()
Returns true if audit event logging is enabled, false otherwise.
bool IsLineageLoggingEnabled()
Returns true if lineage logging is enabled, false otherwise.
virtual void CancelOperation(apache::hive::service::cli::thrift::TCancelOperationResp &return_val, const apache::hive::service::cli::thrift::TCancelOperationReq &request)
Status CloseSessionInternal(const TUniqueId &session_id, bool ignore_if_absent)
QueryStateRecord()
Default constructor used only when participating in collections.
boost::mutex query_locations_lock_
std::map< std::string, std::string > ArgumentMap
Definition: webserver.h:36
static Status InitLoggingDir(const std::string &log_dir)
int64_t total_fragments
The total number of fragments.
ImpalaServer(ExecEnv *exec_env)
ExpirationQueue queries_by_timestamp_
int64_t UnixMillis()
Definition: time.h:51
Status InitAuditEventLogging()
Status FetchInternal(const TUniqueId &query_id, bool start_over, int32_t fetch_size, beeswax::Results *query_results)
Executes the fetch logic. Doesn't clean up the exec state if an error occurs.
virtual void GetRuntimeProfile(std::string &profile_output, const beeswax::QueryHandle &query_id)
static void TUniqueIdToTHandleIdentifier(const TUniqueId &unique_id, const TUniqueId &secret, apache::hive::service::cli::thrift::THandleIdentifier *handle)
TStmtType::type stmt_type
DDL, DML etc.
boost::unordered_map< TNetworkAddress, boost::unordered_set< TUniqueId > > QueryLocations
A map from backend to the list of queries currently running there.
bool operator()(const QueryStateRecord &lhs, const QueryStateRecord &rhs) const
Comparator that sorts by start time.
boost::scoped_ptr< Thread > session_timeout_thread_
Thread that runs ExpireSessions if FLAGS_idle_session_timeout > 0.
Status ProcessCatalogUpdateResult(const TCatalogUpdateResult &catalog_update_result, bool wait_for_all_subscribers)
boost::scoped_ptr< SimpleLogger > lineage_logger_
TimestampValue start_time
Time the session was created.
void HadoopVarzUrlCallback(const Webserver::ArgumentMap &args, rapidjson::Document *document)
virtual void CancelDelegationToken(apache::hive::service::cli::thrift::TCancelDelegationTokenResp &return_val, const apache::hive::service::cli::thrift::TCancelDelegationTokenReq &req)
Status WithSession(const TUniqueId &session_id, boost::shared_ptr< SessionState > *session=NULL)
TEventSequence event_sequence
Timeline of important query events.
virtual void CloseSession(apache::hive::service::cli::thrift::TCloseSessionResp &return_val, const apache::hive::service::cli::thrift::TCloseSessionReq &request)
QueryLocations query_locations_
void ExecuteMetadataOp(const apache::hive::service::cli::thrift::THandleIdentifier &session_handle, TMetadataOpRequest *request, apache::hive::service::cli::thrift::TOperationHandle *handle, apache::hive::service::cli::thrift::TStatus *status)
HiveServer2 private methods (implemented in impala-hs2-server.cc)
boost::mutex is_offline_lock_
Protects is_offline_.
TQueryOptions default_query_options
The default query options of this session.
virtual void FetchResults(apache::hive::service::cli::thrift::TFetchResultsResp &return_val, const apache::hive::service::cli::thrift::TFetchResultsReq &request)
virtual void GetExecSummary(impala::TExecSummary &result, const beeswax::QueryHandle &query_id)
Status GetSessionState(const TUniqueId &session_id, boost::shared_ptr< SessionState > *session_state, bool mark_active=false)
void CatalogUpdateCallback(const StatestoreSubscriber::TopicDeltaMap &topic_deltas, std::vector< TTopicDelta > *topic_updates)
ConnectionToSessionMap connection_to_sessions_map_
std::string encoded_profile_str
Base64 encoded runtime profile.
virtual void GetTableTypes(apache::hive::service::cli::thrift::TGetTableTypesResp &return_val, const apache::hive::service::cli::thrift::TGetTableTypesReq &request)
virtual void get_log(std::string &log, const beeswax::LogContextId &context)
virtual void ConnectionStart(const ThriftServer::ConnectionContext &session_context)
SessionHandlerIf methods.
bool GetSessionIdForQuery(const TUniqueId &query_id, TUniqueId *session_id)
void CancelQueryUrlCallback(const Webserver::ArgumentMap &args, rapidjson::Document *document)
Webserver callback. Cancels an in-flight query and writes the result to 'contents'.
boost::shared_ptr< SessionState > session_
Reference-counted pointer to the session state object.
boost::unordered_set< TUniqueId > inflight_queries
Inflight queries belonging to this session.
Per-connection information.
Definition: thrift-server.h:45
SessionStateMap session_state_map_
boost::scoped_ptr< SimpleLogger > audit_event_logger_
boost::scoped_ptr< SimpleLogger > profile_logger_
Status GetRuntimeProfileStr(const TUniqueId &query_id, bool base64_encoded, std::stringstream *output)
virtual int AddRows(const QueryResultSet *other, int start_idx, int num_rows)=0
apache::hive::service::cli::thrift::TProtocolVersion::type hs2_version
For HS2 only, the protocol version this session is expecting.
virtual void dump_config(std::string &config)
static const int ASCII_PRECISION
Ascii output precision for double/float.
TExecSummary exec_summary
Summary of execution for this query.
std::vector< beeswax::ConfigVariable > default_configs_
void MarkSessionInactive(boost::shared_ptr< SessionState > session)
std::map< Statestore::TopicId, TTopicDelta > TopicDeltaMap
A TopicDeltaMap is passed to each callback. See UpdateCallback for more details.
static Status THandleIdentifierToTUniqueId(const apache::hive::service::cli::thrift::THandleIdentifier &handle, TUniqueId *unique_id, TUniqueId *secret)
Helper functions to translate between HiveServer2 and Impala structs.
ImpalaServer * impala_
Saved so that we can access ImpalaServer methods to get / return session state.
std::string connected_user
Connected user for this session, i.e. the user which originated this session.
virtual beeswax::QueryState::type get_state(const beeswax::QueryHandle &handle)
virtual void GetSchemas(apache::hive::service::cli::thrift::TGetSchemasResp &return_val, const apache::hive::service::cli::thrift::TGetSchemasReq &request)
virtual void executeAndWait(beeswax::QueryHandle &query_handle, const beeswax::Query &query, const beeswax::LogContextId &client_ctx)
QueryLogIndex query_log_index_
CatalogUpdateVersionInfo catalog_update_info_
The version information from the last successfull call to UpdateCatalog().
boost::shared_ptr< QueryExecState > GetQueryExecState(const TUniqueId &query_id, bool lock)
Snapshot of a query's state, archived in the query log.
void QueryProfileEncodedUrlCallback(const Webserver::ArgumentMap &args, rapidjson::Document *document)
void ReportExecStatus(TReportExecStatusResult &return_val, const TReportExecStatusParams &params)
Status SetQueryInflight(boost::shared_ptr< SessionState > session_state, const boost::shared_ptr< QueryExecState > &exec_state)
QueryExecStateMap query_exec_state_map_
void TUniqueIdToQueryHandle(const TUniqueId &query_id, beeswax::QueryHandle *handle)
bool is_offline_
True if Impala server is offline, false otherwise.
static const char * SQLSTATE_OPTIONAL_FEATURE_NOT_IMPLEMENTED
std::string default_db
default db for this query
boost::unordered_map< std::string, TNetworkAddress > BackendAddressMap
void LineageLoggerFlushThread()
Runs once every 5s to flush the lineage log file to disk.
Status CreateImpalaServer(ExecEnv *exec_env, int beeswax_port, int hs2_port, int be_port, ThriftServer **beeswax_server, ThriftServer **hs2_server, ThriftServer **be_server, ImpalaServer **impala_server)
std::string database
The default database (changed as a result of 'use' query execution)
std::list< QueryStateRecord > QueryLog
FIFO list of query records, which are written after the query finishes executing. ...
~ScopedSessionState()
Decrements the reference count so the session can be expired correctly.
int64_t ByteSize()
Returns the approximate size of this result set in bytes.
std::string plan
Text representation of plan.
bool closed
If true, the session has been closed.
Status UpdateCatalogMetrics()
Updates the number of databases / tables metrics from the FE catalog.
static const Status OK
Definition: status.h:87
Comparator that breaks ties when two queries have identical expiration deadlines. ...
bool IsOffline()
Returns true if Impala is offline (and not accepting queries), false otherwise.
bool has_coord
True if the query required a coordinator fragment.
Status LogLineageRecord(const TExecRequest &request)
int64_t catalog_version
The last catalog version returned from UpdateCatalog()
virtual void GetColumns(apache::hive::service::cli::thrift::TGetColumnsResp &return_val, const apache::hive::service::cli::thrift::TGetColumnsReq &request)
boost::scoped_ptr< Thread > nm_failure_detection_thread_
Container thread for DetectNmFailures().
boost::scoped_ptr< Thread > profile_log_file_flush_thread_
If profile logging is enabled, wakes once every 5s to flush query profiles to disk.
virtual void GetInfo(apache::hive::service::cli::thrift::TGetInfoResp &return_val, const apache::hive::service::cli::thrift::TGetInfoReq &request)
void QueryHandleToTUniqueId(const beeswax::QueryHandle &handle, TUniqueId *query_id)
std::string profile_str
Pretty-printed runtime profile. TODO: Copy actual profile object.
void ArchiveQuery(const QueryExecState &query)
TQueryOptions default_query_options_
Default query options in the form of TQueryOptions and beeswax::ConfigVariable.
ExecEnv * exec_env_
global, per-server state
virtual void GetCatalogs(apache::hive::service::cli::thrift::TGetCatalogsResp &return_val, const apache::hive::service::cli::thrift::TGetCatalogsReq &request)
virtual void OpenSession(apache::hive::service::cli::thrift::TOpenSessionResp &return_val, const apache::hive::service::cli::thrift::TOpenSessionReq &request)
BackendAddressMap known_backends_
Status QueryToTQueryContext(const beeswax::Query &query, TQueryCtx *query_ctx)
Beeswax private methods.
boost::mutex uuid_lock_
Lock to protect uuid_generator.
boost::unordered_map< TUniqueId, boost::shared_ptr< QueryExecState > > QueryExecStateMap
virtual void GetLog(apache::hive::service::cli::thrift::TGetLogResp &return_val, const apache::hive::service::cli::thrift::TGetLogReq &request)
TNetworkAddress network_address
Client network address.
int64_t last_accessed_ms
Time the session was last accessed.
void QueryProfileUrlCallback(const Webserver::ArgumentMap &args, rapidjson::Document *document)
Contains details on the version information of a catalog update.
virtual void close(const beeswax::QueryHandle &handle)
virtual void clean(const beeswax::LogContextId &log_context)
TimestampValue start_time
Start and end time of the query.
virtual void ResetTable(impala::TStatus &status, const TResetTableReq &request)
virtual void GetResultSetMetadata(apache::hive::service::cli::thrift::TGetResultSetMetadataResp &return_val, const apache::hive::service::cli::thrift::TGetResultSetMetadataReq &request)
virtual void GetDelegationToken(apache::hive::service::cli::thrift::TGetDelegationTokenResp &return_val, const apache::hive::service::cli::thrift::TGetDelegationTokenReq &req)
std::string do_as_user
The user to delegate to. Empty for no delegation.
boost::scoped_ptr< Thread > lineage_logger_flush_thread_
If lineage logging is enabled, wakes once every 5s to flush lineage events to disk.
void AuditEventLoggerFlushThread()
Runs once every 5s to flush the audit log file to disk.
virtual void ConnectionEnd(const ThriftServer::ConnectionContext &session_context)
virtual void GetOperationStatus(apache::hive::service::cli::thrift::TGetOperationStatusResp &return_val, const apache::hive::service::cli::thrift::TGetOperationStatusReq &request)
boost::mutex catalog_version_lock_
Status ExecuteInternal(const TQueryCtx &query_ctx, boost::shared_ptr< SessionState > session_state, bool *registered_exec_state, boost::shared_ptr< QueryExecState > *exec_state)
Implements Execute() logic, but doesn't unregister query on error.
void LogFileFlushThread()
Runs once every 5s to flush the profile log file to disk.
boost::unordered_map< TUniqueId, boost::shared_ptr< SessionState > > SessionStateMap
A map from session identifier to a structure containing per-session information.
void RaiseBeeswaxException(const std::string &msg, const char *sql_state)
Helper function to raise BeeswaxException.
Status AuthorizeProxyUser(const std::string &user, const std::string &do_as_user)
Status UnregisterQuery(const TUniqueId &query_id, bool check_inflight, const Status *cause=NULL)
Status LogAuditRecord(const QueryExecState &exec_state, const TExecRequest &request)