diff --git a/src/alerts/alert_structs.rs b/src/alerts/alert_structs.rs index c37f30d53..fa7210f11 100644 --- a/src/alerts/alert_structs.rs +++ b/src/alerts/alert_structs.rs @@ -77,7 +77,7 @@ pub struct BasicAlertFields { #[derive(Debug)] pub struct Alerts { - pub alerts: RwLock>>, + pub alerts: RwLock>>>, pub sender: mpsc::Sender, } @@ -288,7 +288,7 @@ pub struct AlertRequest { } impl AlertRequest { - pub async fn into(self) -> Result { + pub async fn into(self, tenant_id: Option) -> Result { // Validate that other_fields doesn't contain reserved field names let other_fields = if let Some(mut other_fields) = self.other_fields { // Limit other_fields to maximum 10 fields @@ -316,7 +316,7 @@ impl AlertRequest { // Validate that all target IDs exist for id in &self.targets { - TARGETS.get_target_by_id(id).await?; + TARGETS.get_target_by_id(id, &tenant_id).await?; } let datasets = resolve_stream_names(&self.query)?; @@ -369,6 +369,7 @@ impl AlertRequest { tags: self.tags, last_triggered_at: None, other_fields, + tenant_id, }; Ok(config) @@ -399,6 +400,7 @@ pub struct AlertConfig { pub last_triggered_at: Option>, #[serde(flatten)] pub other_fields: Option>, + pub tenant_id: Option, } #[derive(Debug, serde::Serialize, serde::Deserialize, Clone)] @@ -711,6 +713,7 @@ pub struct DailyMTTRStats { pub struct MTTRHistory { /// Array of daily MTTR statistics pub daily_stats: Vec, + pub tenant_id: Option, } /// Query parameters for MTTR API endpoint @@ -883,7 +886,7 @@ impl MetastoreObject for AlertConfig { } fn get_object_path(&self) -> String { - alert_json_path(self.id).to_string() + alert_json_path(self.id, &self.tenant_id).to_string() } } @@ -893,6 +896,6 @@ impl MetastoreObject for MTTRHistory { } fn get_object_path(&self) -> String { - mttr_json_path().to_string() + mttr_json_path(&self.tenant_id).to_string() } } diff --git a/src/alerts/alert_traits.rs b/src/alerts/alert_traits.rs index c686f27f2..992f6455b 100644 --- a/src/alerts/alert_traits.rs +++ b/src/alerts/alert_traits.rs @@ -76,6 +76,7 @@ pub trait AlertTrait: Debug + Send + Sync + MetastoreObject { fn get_datasets(&self) -> &[String]; fn to_alert_config(&self) -> AlertConfig; fn clone_box(&self) -> Box; + fn get_tenant_id(&self) -> &Option; } #[async_trait] @@ -86,25 +87,38 @@ pub trait AlertManagerTrait: Send + Sync { session: SessionKey, tags: Vec, ) -> Result, AlertError>; - async fn get_alert_by_id(&self, id: Ulid) -> Result, AlertError>; + async fn get_alert_by_id( + &self, + id: Ulid, + tenant_id: &Option, + ) -> Result, AlertError>; async fn update(&self, alert: &dyn AlertTrait); async fn update_state( &self, alert_id: Ulid, new_state: AlertState, trigger_notif: Option, + tenant_id: &Option, ) -> Result<(), AlertError>; async fn update_notification_state( &self, alert_id: Ulid, new_notification_state: NotificationState, + tenant_id: &Option, ) -> Result<(), AlertError>; - async fn delete(&self, alert_id: Ulid) -> Result<(), AlertError>; - async fn get_state(&self, alert_id: Ulid) -> Result; + async fn delete(&self, alert_id: Ulid, tenant_id: &Option) -> Result<(), AlertError>; + async fn get_state( + &self, + alert_id: Ulid, + tenant_id: &Option, + ) -> Result; async fn start_task(&self, alert: Box) -> Result<(), AlertError>; async fn delete_task(&self, alert_id: Ulid) -> Result<(), AlertError>; - async fn list_tags(&self) -> Vec; - async fn get_all_alerts(&self) -> HashMap>; + async fn list_tags(&self, tenant_id: &Option) -> Vec; + async fn get_all_alerts( + &self, + tenant_id: &Option, + ) -> HashMap>; } #[async_trait] diff --git a/src/alerts/alert_types.rs b/src/alerts/alert_types.rs index d1494f658..faee43232 100644 --- a/src/alerts/alert_types.rs +++ b/src/alerts/alert_types.rs @@ -39,7 +39,10 @@ use crate::{ metastore::metastore_traits::MetastoreObject, parseable::PARSEABLE, query::resolve_stream_names, - rbac::map::SessionKey, + rbac::{ + map::{SessionKey, roles, users}, + role::model::DefaultPrivilege, + }, storage::object_storage::alert_json_path, utils::user_auth_for_query, }; @@ -68,11 +71,12 @@ pub struct ThresholdAlert { pub last_triggered_at: Option>, #[serde(flatten)] pub other_fields: Option>, + pub tenant_id: Option, } impl MetastoreObject for ThresholdAlert { fn get_object_path(&self) -> String { - alert_json_path(self.id).to_string() + alert_json_path(self.id, &self.tenant_id).to_string() } fn get_object_id(&self) -> String { @@ -84,7 +88,39 @@ impl MetastoreObject for ThresholdAlert { impl AlertTrait for ThresholdAlert { async fn eval_alert(&self) -> Result, AlertError> { let time_range = extract_time_range(&self.eval_config)?; - let query_result = execute_alert_query(self.get_query(), &time_range).await?; + let auth = if let Some(tenant) = &self.tenant_id + && let Some(tenant_users) = users().get(tenant) + && let Some(tenant_roles) = roles().get(tenant) + && let Some(user) = tenant_users.iter().find_map(|(_, user)| { + let mut res = None; + for role in &user.roles { + if let Some(role) = tenant_roles.get(role) + && role.contains(&DefaultPrivilege::Admin) + { + res = Some(user.clone()); + break; + } + } + res + }) { + // fetch admin credentials for tenant + match user.ty { + crate::rbac::user::UserType::Native(basic) => { + // Create a protected user whose details can't be edited + // save that user's basic auth + // use that to send request + None + } + crate::rbac::user::UserType::OAuth(_) => { + tracing::warn!("admin user is oauth"); + None + } + } + } else { + None + }; + let query_result = + execute_alert_query(auth, self.get_query(), &time_range, &self.tenant_id).await?; if query_result.is_simple_query { // Handle simple queries @@ -164,7 +200,7 @@ impl AlertTrait for ThresholdAlert { "No tables found in query".into(), )); } - create_streams_for_distributed(tables) + create_streams_for_distributed(tables, &self.tenant_id) .await .map_err(|_| AlertError::InvalidAlertQuery("Invalid tables".into()))?; @@ -191,7 +227,7 @@ impl AlertTrait for ThresholdAlert { // update on disk PARSEABLE .metastore - .put_alert(&self.to_alert_config()) + .put_alert(&self.to_alert_config(), &self.tenant_id) .await?; Ok(()) } @@ -217,12 +253,12 @@ impl AlertTrait for ThresholdAlert { // update on disk PARSEABLE .metastore - .put_alert(&self.to_alert_config()) + .put_alert(&self.to_alert_config(), &self.tenant_id) .await?; let state_entry = AlertStateEntry::new(self.id, self.state); PARSEABLE .metastore - .put_alert_state(&state_entry as &dyn MetastoreObject) + .put_alert_state(&state_entry as &dyn MetastoreObject, &self.tenant_id) .await?; return Ok(()); } @@ -257,13 +293,13 @@ impl AlertTrait for ThresholdAlert { // update on disk PARSEABLE .metastore - .put_alert(&self.to_alert_config()) + .put_alert(&self.to_alert_config(), &self.tenant_id) .await?; let state_entry = AlertStateEntry::new(self.id, self.state); PARSEABLE .metastore - .put_alert_state(&state_entry as &dyn MetastoreObject) + .put_alert_state(&state_entry as &dyn MetastoreObject, &self.tenant_id) .await?; if let Some(trigger_notif) = trigger_notif @@ -337,6 +373,10 @@ impl AlertTrait for ThresholdAlert { &self.datasets } + fn get_tenant_id(&self) -> &Option { + &self.tenant_id + } + fn to_alert_config(&self) -> AlertConfig { let clone = self.clone(); clone.into() @@ -414,6 +454,7 @@ impl From for ThresholdAlert { datasets: value.datasets, last_triggered_at: value.last_triggered_at, other_fields: value.other_fields, + tenant_id: value.tenant_id, } } } @@ -438,6 +479,7 @@ impl From for AlertConfig { datasets: val.datasets, last_triggered_at: val.last_triggered_at, other_fields: val.other_fields, + tenant_id: val.tenant_id, } } } diff --git a/src/alerts/alerts_utils.rs b/src/alerts/alerts_utils.rs index 0d5552f31..08f8ebffe 100644 --- a/src/alerts/alerts_utils.rs +++ b/src/alerts/alerts_utils.rs @@ -75,12 +75,14 @@ pub fn extract_time_range(eval_config: &super::EvalConfig) -> Result, query: &str, time_range: &TimeRange, + tenant_id: &Option, ) -> Result { match PARSEABLE.options.mode { - Mode::All | Mode::Query => execute_local_query(query, time_range).await, - Mode::Prism => execute_remote_query(query, time_range).await, + Mode::All | Mode::Query => execute_local_query(query, time_range, tenant_id).await, + Mode::Prism => execute_remote_query(auth_token, query, time_range, tenant_id).await, _ => Err(AlertError::CustomError(format!( "Unsupported mode '{:?}' for alert evaluation", PARSEABLE.options.mode @@ -92,11 +94,12 @@ pub async fn execute_alert_query( async fn execute_local_query( query: &str, time_range: &TimeRange, + tenant_id: &Option, ) -> Result { - let session_state = QUERY_SESSION.state(); + let session_state = QUERY_SESSION.get_ctx().state(); let tables = resolve_stream_names(query)?; - create_streams_for_distributed(tables.clone()) + create_streams_for_distributed(tables.clone(), tenant_id) .await .map_err(|err| AlertError::CustomError(format!("Failed to create streams: {err}")))?; @@ -107,7 +110,7 @@ async fn execute_local_query( filter_tag: None, }; - let (records, _) = execute(query, false) + let (records, _) = execute(query, false, tenant_id) .await .map_err(|err| AlertError::CustomError(format!("Failed to execute query: {err}")))?; @@ -125,10 +128,12 @@ async fn execute_local_query( /// Execute alert query remotely (Prism mode) async fn execute_remote_query( + auth_token: Option, query: &str, time_range: &TimeRange, + tenant_id: &Option, ) -> Result { - let session_state = QUERY_SESSION.state(); + let session_state = QUERY_SESSION.get_ctx().state(); let raw_logical_plan = session_state.create_logical_plan(query).await?; let query_request = Query { @@ -141,7 +146,7 @@ async fn execute_remote_query( filter_tags: None, }; - let (result_value, _) = send_query_request(&query_request) + let (result_value, _) = send_query_request(None, &query_request, tenant_id) .await .map_err(|err| AlertError::CustomError(format!("Failed to send query request: {err}")))?; @@ -280,19 +285,34 @@ async fn update_alert_state( // Now perform the state update if let Some(msg) = message { alerts - .update_state(*alert.get_id(), AlertState::Triggered, Some(msg)) + .update_state( + *alert.get_id(), + AlertState::Triggered, + Some(msg), + alert.get_tenant_id(), + ) .await } else if alerts - .get_state(*alert.get_id()) + .get_state(*alert.get_id(), alert.get_tenant_id()) .await? .eq(&AlertState::Triggered) { alerts - .update_state(*alert.get_id(), AlertState::NotTriggered, Some("".into())) + .update_state( + *alert.get_id(), + AlertState::NotTriggered, + Some("".into()), + alert.get_tenant_id(), + ) .await } else { alerts - .update_state(*alert.get_id(), AlertState::NotTriggered, None) + .update_state( + *alert.get_id(), + AlertState::NotTriggered, + None, + alert.get_tenant_id(), + ) .await } } diff --git a/src/alerts/mod.rs b/src/alerts/mod.rs index 8fe01fcd2..7627e9bc6 100644 --- a/src/alerts/mod.rs +++ b/src/alerts/mod.rs @@ -58,14 +58,14 @@ use crate::alerts::alert_types::ThresholdAlert; use crate::alerts::target::{NotificationConfig, TARGETS}; use crate::handlers::http::fetch_schema; use crate::metastore::MetastoreError; -use crate::parseable::{PARSEABLE, StreamNotFound}; +use crate::parseable::{DEFAULT_TENANT, PARSEABLE, StreamNotFound}; use crate::query::{QUERY_SESSION, resolve_stream_names}; use crate::rbac::map::{SessionKey, sessions}; use crate::sse::{SSE_HANDLER, SSEAlertInfo, SSEEvent}; use crate::storage; use crate::storage::ObjectStorageError; use crate::sync::alert_runtime; -use crate::utils::user_auth_for_query; +use crate::utils::{get_tenant_id_from_key, user_auth_for_query}; // these types describe the scheduled task for an alert pub type ScheduledTaskHandlers = (JoinHandle<()>, Receiver<()>, Sender<()>); @@ -104,11 +104,14 @@ pub fn create_default_alerts_manager() -> Alerts { impl AlertConfig { /// Migration function to convert v1 alerts to v2 structure - pub async fn migrate_from_v1(alert_json: &JsonValue) -> Result { + pub async fn migrate_from_v1( + alert_json: &JsonValue, + tenant_id: &Option, + ) -> Result { let basic_fields = Self::parse_basic_fields(alert_json)?; let alert_info = format!("Alert '{}' (ID: {})", basic_fields.title, basic_fields.id); - let query = Self::build_query_from_v1(alert_json, &alert_info).await?; + let query = Self::build_query_from_v1(alert_json, &alert_info, tenant_id).await?; let datasets = resolve_stream_names(&query)?; let threshold_config = Self::extract_threshold_config(alert_json, &alert_info)?; let eval_config = Self::extract_eval_config(alert_json, &alert_info)?; @@ -134,10 +137,14 @@ impl AlertConfig { tags: None, last_triggered_at: None, other_fields: None, + tenant_id: tenant_id.clone(), }; // Save the migrated alert back to storage - PARSEABLE.metastore.put_alert(&migrated_alert).await?; + PARSEABLE + .metastore + .put_alert(&migrated_alert, tenant_id) + .await?; Ok(migrated_alert) } @@ -180,6 +187,7 @@ impl AlertConfig { async fn build_query_from_v1( alert_json: &JsonValue, alert_info: &str, + tenant_id: &Option, ) -> Result { let stream = alert_json["stream"].as_str().ok_or_else(|| { AlertError::CustomError(format!("Missing stream in v1 alert for {alert_info}")) @@ -192,7 +200,8 @@ impl AlertConfig { let base_query = Self::build_base_query(&aggregate_function, aggregate_config, stream, alert_info)?; let final_query = - Self::add_where_conditions(base_query, aggregate_config, stream, alert_info).await?; + Self::add_where_conditions(base_query, aggregate_config, stream, alert_info, tenant_id) + .await?; Ok(final_query) } @@ -267,6 +276,7 @@ impl AlertConfig { aggregate_config: &JsonValue, stream: &str, alert_info: &str, + tenant_id: &Option, ) -> Result { let Some(conditions) = aggregate_config["conditions"].as_object() else { return Ok(base_query); @@ -281,7 +291,7 @@ impl AlertConfig { } // Fetch the stream schema for data type conversion - let schema = match fetch_schema(stream).await { + let schema = match fetch_schema(stream, tenant_id).await { Ok(schema) => schema, Err(e) => { return Err(AlertError::CustomError(format!( @@ -608,7 +618,7 @@ impl AlertConfig { context.message.clone_from(&message); for target_id in &self.targets { - let target = TARGETS.get_target_by_id(target_id).await?; + let target = TARGETS.get_target_by_id(target_id, &self.tenant_id).await?; trace!("Target (trigger_notifications)-\n{target:?}"); target.call(context.clone()); } @@ -722,7 +732,7 @@ impl AlertConfig { /// Check if a query is an aggregate query that returns a single value without executing it pub async fn get_number_of_agg_exprs(query: &str) -> Result { - let session_state = QUERY_SESSION.state(); + let session_state = QUERY_SESSION.get_ctx().state(); // Parse the query into a logical plan let logical_plan = session_state @@ -736,7 +746,7 @@ pub async fn get_number_of_agg_exprs(query: &str) -> Result { /// Extract the projection which deals with aggregation pub async fn get_aggregate_projection(query: &str) -> Result { - let session_state = QUERY_SESSION.state(); + let session_state = QUERY_SESSION.get_ctx().state(); // Parse the query into a logical plan let logical_plan = session_state @@ -1032,92 +1042,104 @@ impl AlertManagerTrait for Alerts { let mut map = self.alerts.write().await; - for raw_bytes in raw_objects { - // First, try to parse as JSON Value to check version - let json_value: JsonValue = match serde_json::from_slice(&raw_bytes) { - Ok(val) => val, - Err(e) => { - error!("Failed to parse alert JSON: {e}"); - continue; - } + for (tenant_id, raw_bytes) in raw_objects { + let tenant = if tenant_id.is_empty() { + &None + } else { + &Some(tenant_id.clone()) }; + for alert_bytes in raw_bytes { + // First, try to parse as JSON Value to check version + let json_value: JsonValue = match serde_json::from_slice(&alert_bytes) { + Ok(val) => val, + Err(e) => { + error!("Failed to parse alert JSON: {e}"); + continue; + } + }; - // Check version and handle migration - let alert = if let Some(version_str) = json_value["version"].as_str() { - if version_str == "v1" - || json_value["query"].is_null() - || json_value.get("stream").is_some() - { - // This is a v1 alert that needs migration - match AlertConfig::migrate_from_v1(&json_value).await { - Ok(migrated) => migrated, - Err(e) => { - error!("Failed to migrate v1 alert: {e}"); - continue; + // Check version and handle migration + let mut alert = if let Some(version_str) = json_value["version"].as_str() { + if version_str == "v1" + || json_value["query"].is_null() + || json_value.get("stream").is_some() + { + // This is a v1 alert that needs migration + match AlertConfig::migrate_from_v1(&json_value, tenant).await { + Ok(migrated) => migrated, + Err(e) => { + error!("Failed to migrate v1 alert: {e}"); + continue; + } + } + } else { + // Try to parse as v2 + match serde_json::from_value::(json_value) { + Ok(alert) => alert, + Err(e) => { + error!("Failed to parse v2 alert: {e}"); + continue; + } } } } else { - // Try to parse as v2 - match serde_json::from_value::(json_value) { - Ok(alert) => alert, + // No version field, assume v1 and migrate + warn!("Found alert without version field, assuming v1 and migrating"); + match AlertConfig::migrate_from_v1(&json_value, tenant).await { + Ok(migrated) => migrated, Err(e) => { - error!("Failed to parse v2 alert: {e}"); + error!("Failed to migrate alert without version: {e}"); continue; } } - } - } else { - // No version field, assume v1 and migrate - warn!("Found alert without version field, assuming v1 and migrating"); - match AlertConfig::migrate_from_v1(&json_value).await { - Ok(migrated) => migrated, - Err(e) => { - error!("Failed to migrate alert without version: {e}"); - continue; + }; + + // ensure that alert config's tenant is correctly set + alert.tenant_id = tenant.clone(); + + let alert: Box = match &alert.alert_type { + AlertType::Threshold => { + Box::new(ThresholdAlert::from(alert)) as Box } - } - }; + AlertType::Anomaly(_) => { + return Err(anyhow::Error::msg( + AlertError::NotPresentInOSS("anomaly").to_string(), + )); + } + AlertType::Forecast(_) => { + return Err(anyhow::Error::msg( + AlertError::NotPresentInOSS("forecast").to_string(), + )); + } + }; - let alert: Box = match &alert.alert_type { - AlertType::Threshold => { - Box::new(ThresholdAlert::from(alert)) as Box - } - AlertType::Anomaly(_) => { - return Err(anyhow::Error::msg( - AlertError::NotPresentInOSS("anomaly").to_string(), - )); - } - AlertType::Forecast(_) => { - return Err(anyhow::Error::msg( - AlertError::NotPresentInOSS("forecast").to_string(), - )); + // Create alert task iff alert's state is not paused + if alert.get_state().eq(&AlertState::Disabled) { + map.entry(tenant_id.clone()) + .or_default() + .insert(*alert.get_id(), alert); + continue; } - }; - // Create alert task iff alert's state is not paused - if alert.get_state().eq(&AlertState::Disabled) { - map.insert(*alert.get_id(), alert); - continue; - } - - match self.sender.send(AlertTask::Create(alert.clone_box())).await { - Ok(_) => {} - Err(e) => { - warn!("Failed to create alert task: {e}\nRetrying..."); - // Retry sending the task - match self.sender.send(AlertTask::Create(alert.clone_box())).await { - Ok(_) => {} - Err(e) => { - error!("Failed to create alert task: {e}"); - continue; + match self.sender.send(AlertTask::Create(alert.clone_box())).await { + Ok(_) => {} + Err(e) => { + warn!("Failed to create alert task: {e}\nRetrying..."); + // Retry sending the task + match self.sender.send(AlertTask::Create(alert.clone_box())).await { + Ok(_) => {} + Err(e) => { + error!("Failed to create alert task: {e}"); + continue; + } } } - } - }; - - map.insert(*alert.get_id(), alert); + }; + map.entry(tenant_id.clone()) + .or_default() + .insert(*alert.get_id(), alert); + } } - Ok(()) } @@ -1127,13 +1149,23 @@ impl AlertManagerTrait for Alerts { session: SessionKey, tags: Vec, ) -> Result, AlertError> { + let tenant_id = get_tenant_id_from_key(&session); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); // First, collect all alerts without performing auth checks to avoid holding the lock let all_alerts: Vec = { let alerts_guard = self.alerts.read().await; - alerts_guard - .values() - .map(|alert| alert.to_alert_config()) - .collect() + if let Some(alerts) = alerts_guard.get(tenant) { + alerts + .values() + .map(|alert| alert.to_alert_config()) + .collect() + } else { + vec![] + } + // alerts_guard + // .values() + // .map(|alert| alert.to_alert_config()) + // .collect() }; // Lock is released here, now perform expensive auth checks @@ -1189,9 +1221,16 @@ impl AlertManagerTrait for Alerts { } /// Returns a single alert that the user has access to (based on query auth) - async fn get_alert_by_id(&self, id: Ulid) -> Result, AlertError> { + async fn get_alert_by_id( + &self, + id: Ulid, + tenant_id: &Option, + ) -> Result, AlertError> { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let read_access = self.alerts.read().await; - if let Some(alert) = read_access.get(&id) { + if let Some(alerts) = read_access.get(tenant) + && let Some(alert) = alerts.get(&id) + { Ok(alert.clone_box()) } else { Err(AlertError::CustomError(format!( @@ -1202,9 +1241,12 @@ impl AlertManagerTrait for Alerts { /// Update the in-mem vector of alerts async fn update(&self, alert: &dyn AlertTrait) { + let tenant = alert.get_tenant_id().as_ref().map_or(DEFAULT_TENANT, |v| v); self.alerts .write() .await + .entry(tenant.to_owned()) + .or_default() .insert(*alert.get_id(), alert.clone_box()); } @@ -1214,10 +1256,14 @@ impl AlertManagerTrait for Alerts { alert_id: Ulid, new_state: AlertState, trigger_notif: Option, + tenant_id: &Option, ) -> Result<(), AlertError> { let (mut alert, should_delete_task, should_create_task) = { let read_access = self.alerts.read().await; - let alert = if let Some(alert) = read_access.get(&alert_id) { + let alert = if let Some(alerts) = + read_access.get(tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v)) + && let Some(alert) = alerts.get(&alert_id) + { match &alert.get_alert_type() { AlertType::Threshold => Box::new(ThresholdAlert::from(alert.to_alert_config())) as Box, @@ -1233,6 +1279,22 @@ impl AlertManagerTrait for Alerts { "No alert found for the given ID- {alert_id}" ))); }; + // let alert = if let Some(alert) = read_access.get(&alert_id) { + // match &alert.get_alert_type() { + // AlertType::Threshold => Box::new(ThresholdAlert::from(alert.to_alert_config())) + // as Box, + // AlertType::Anomaly(_) => { + // return Err(AlertError::NotPresentInOSS("anomaly")); + // } + // AlertType::Forecast(_) => { + // return Err(AlertError::NotPresentInOSS("forecast")); + // } + // } + // } else { + // return Err(AlertError::CustomError(format!( + // "No alert found for the given ID- {alert_id}" + // ))); + // }; let current_state = *alert.get_state(); let should_delete_task = @@ -1268,7 +1330,12 @@ impl AlertManagerTrait for Alerts { // Finally, update the in-memory state with a brief write lock { let mut write_access = self.alerts.write().await; - write_access.insert(*alert.get_id(), alert.clone_box()); + + let tenant = alert.get_tenant_id().as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(alerts) = write_access.get_mut(tenant) { + alerts.insert(*alert.get_id(), alert.clone_box()); + } + // write_access.insert(*alert.get_id(), alert.clone_box()); } Ok(()) @@ -1279,10 +1346,14 @@ impl AlertManagerTrait for Alerts { &self, alert_id: Ulid, new_notification_state: NotificationState, + tenant_id: &Option, ) -> Result<(), AlertError> { // read and modify alert let mut write_access = self.alerts.write().await; - let mut alert: Box = if let Some(alert) = write_access.get(&alert_id) { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let mut alert: Box = if let Some(alerts) = write_access.get(tenant) + && let Some(alert) = alerts.get(&alert_id) + { match &alert.get_alert_type() { AlertType::Threshold => { Box::new(ThresholdAlert::from(alert.to_alert_config())) as Box @@ -1303,26 +1374,44 @@ impl AlertManagerTrait for Alerts { alert .update_notification_state(new_notification_state) .await?; - write_access.insert(*alert.get_id(), alert.clone_box()); + if let Some(alerts) = write_access.get_mut(tenant) { + alerts.insert(*alert.get_id(), alert.clone_box()); + } + // write_access.insert(*alert.get_id(), alert.clone_box()); Ok(()) } /// Remove alert and scheduled task from disk and memory - async fn delete(&self, alert_id: Ulid) -> Result<(), AlertError> { - if self.alerts.write().await.remove(&alert_id).is_some() { + async fn delete(&self, alert_id: Ulid, tenant_id: &Option) -> Result<(), AlertError> { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(alerts) = self.alerts.write().await.get_mut(tenant) + && let Some(_) = alerts.remove(&alert_id) + { trace!("removed alert from memory"); } else { warn!("Alert ID- {alert_id} not found in memory!"); } + // if self.alerts.write().await.remove(&alert_id).is_some() { + // trace!("removed alert from memory"); + // } else { + // warn!("Alert ID- {alert_id} not found in memory!"); + // } Ok(()) } /// Get state of alert using alert_id - async fn get_state(&self, alert_id: Ulid) -> Result { + async fn get_state( + &self, + alert_id: Ulid, + tenant_id: &Option, + ) -> Result { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let read_access = self.alerts.read().await; - if let Some(alert) = read_access.get(&alert_id) { + if let Some(alerts) = read_access.get(tenant) + && let Some(alert) = alerts.get(&alert_id) + { Ok(*alert.get_state()) } else { let msg = format!("No alert present for ID- {alert_id}"); @@ -1351,21 +1440,35 @@ impl AlertManagerTrait for Alerts { /// List tags from all alerts /// This function returns a list of unique tags from all alerts - async fn list_tags(&self) -> Vec { - let alerts = self.alerts.read().await; - let mut tags = alerts - .iter() - .filter_map(|(_, alert)| alert.get_tags().as_ref()) - .flat_map(|t| t.iter().cloned()) - .collect::>(); + async fn list_tags(&self, tenant_id: &Option) -> Vec { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + // let alerts = self.alerts.read().await; + let mut tags = if let Some(alerts) = self.alerts.read().await.get(tenant) { + alerts + .iter() + .filter_map(|(_, alert)| alert.get_tags().as_ref()) + .flat_map(|t| t.iter().cloned()) + .collect::>() + } else { + vec![] + }; tags.sort(); tags.dedup(); tags } - async fn get_all_alerts(&self) -> HashMap> { - let alerts = self.alerts.read().await; - alerts.iter().map(|(k, v)| (*k, v.clone_box())).collect() + async fn get_all_alerts( + &self, + tenant_id: &Option, + ) -> HashMap> { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(alerts) = self.alerts.read().await.get(tenant) { + alerts.iter().map(|(k, v)| (*k, v.clone_box())).collect() + } else { + HashMap::new() + } + // let alerts = self.alerts.read().await; + // alerts.iter().map(|(k, v)| (*k, v.clone_box())).collect() } } diff --git a/src/alerts/target.rs b/src/alerts/target.rs index be7504c87..7258847b5 100644 --- a/src/alerts/target.rs +++ b/src/alerts/target.rs @@ -38,7 +38,7 @@ use url::Url; use crate::{ alerts::{AlertError, AlertState, Context, alert_traits::CallableTarget}, metastore::metastore_traits::MetastoreObject, - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, storage::object_storage::target_json_path, }; @@ -50,7 +50,7 @@ pub static TARGETS: Lazy = Lazy::new(|| TargetConfigs { #[derive(Debug)] pub struct TargetConfigs { - pub target_configs: RwLock>, + pub target_configs: RwLock>>, } impl TargetConfigs { @@ -58,44 +58,82 @@ impl TargetConfigs { pub async fn load(&self) -> anyhow::Result<()> { let targets = PARSEABLE.metastore.get_targets().await?; let mut map = self.target_configs.write().await; - for target in targets { - map.insert(target.id, target); + for (tenant_id, targets) in targets { + let inner = targets + .into_iter() + .map(|mut t| { + t.tenant = Some(tenant_id.clone()); + (t.id, t) + }) + .collect(); + map.insert(tenant_id, inner); } Ok(()) } pub async fn update(&self, target: Target) -> Result<(), AlertError> { - PARSEABLE.metastore.put_target(&target).await?; + PARSEABLE + .metastore + .put_target(&target, &target.tenant) + .await?; let mut map = self.target_configs.write().await; - map.insert(target.id, target.clone()); + let tenant_id = target.tenant.as_ref().map_or(DEFAULT_TENANT, |v| v); + map.entry(tenant_id.to_owned()) + .or_default() + .insert(target.id, target); + // map.insert(target.id, target.clone()); Ok(()) } - pub async fn list(&self) -> Result, AlertError> { - let targets = self - .target_configs - .read() - .await - .values() - .cloned() - .collect_vec(); + pub async fn list(&self, tenant_id: &Option) -> Result, AlertError> { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let targets = if let Some(targets) = self.target_configs.read().await.get(tenant_id) { + targets.values().cloned().collect_vec() + } else { + vec![] + }; + // let targets = self + // .target_configs + // .read() + // .await + // .values() + // .cloned() + // .collect_vec(); Ok(targets) } - pub async fn get_target_by_id(&self, target_id: &Ulid) -> Result { - let target = self - .target_configs - .read() - .await - .get(target_id) - .ok_or(AlertError::InvalidTargetID(target_id.to_string())) - .cloned()?; + pub async fn get_target_by_id( + &self, + target_id: &Ulid, + tenant_id: &Option, + ) -> Result { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let target = if let Some(targets) = self.target_configs.read().await.get(tenant_id) { + targets + .get(target_id) + .ok_or(AlertError::InvalidTargetID(target_id.to_string())) + .cloned()? + } else { + return Err(AlertError::InvalidTargetID(target_id.to_string())); + }; + // let target = self + // .target_configs + // .read() + // .await + // .get(target_id) + // .ok_or(AlertError::InvalidTargetID(target_id.to_string())) + // .cloned()?; Ok(target) } - pub async fn delete(&self, target_id: &Ulid) -> Result { + pub async fn delete( + &self, + target_id: &Ulid, + tenant_id: &Option, + ) -> Result { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); // ensure that the target is not being used by any alert let guard = ALERTS.read().await; let alerts = if let Some(alerts) = guard.as_ref() { @@ -104,18 +142,28 @@ impl TargetConfigs { return Err(AlertError::CustomError("No AlertManager set".into())); }; - for (_, alert) in alerts.get_all_alerts().await.iter() { + for (_, alert) in alerts.get_all_alerts(tenant_id).await.iter() { if alert.get_targets().contains(target_id) { return Err(AlertError::TargetInUse); } } - let target = self - .target_configs - .write() - .await - .remove(target_id) - .ok_or(AlertError::InvalidTargetID(target_id.to_string()))?; - PARSEABLE.metastore.delete_target(&target).await?; + let target = if let Some(targets) = self.target_configs.write().await.get_mut(tenant) { + targets + .remove(target_id) + .ok_or(AlertError::InvalidTargetID(target_id.to_string()))? + } else { + return Err(AlertError::InvalidTargetID(target_id.to_string())); + }; + // let target = self + // .target_configs + // .write() + // .await + // .remove(target_id) + // .ok_or(AlertError::InvalidTargetID(target_id.to_string()))?; + PARSEABLE + .metastore + .delete_target(&target, tenant_id) + .await?; Ok(target) } } @@ -143,6 +191,7 @@ pub struct Target { pub target: TargetType, #[serde(default = "Ulid::new")] pub id: Ulid, + pub tenant: Option, } impl Target { @@ -259,6 +308,7 @@ impl Target { let timeout = target_timeout.interval; let target = self.target.clone(); let alert_id = alert_context.alert_info.alert_id; + let tenant_id = self.tenant.clone(); let sleep_and_check_if_call = move |timeout_state: Arc>, current_state: AlertState| async move { @@ -291,7 +341,9 @@ impl Target { match retry { Retry::Infinite => loop { - let current_state = if let Ok(state) = alerts.get_state(alert_id).await { + let current_state = if let Ok(state) = + alerts.get_state(alert_id, &tenant_id).await + { state } else { *state.lock().unwrap() = TimeoutState::default(); @@ -309,7 +361,9 @@ impl Target { }, Retry::Finite(times) => { for _ in 0..(times - 1) { - let current_state = if let Ok(state) = alerts.get_state(alert_id).await { + let current_state = if let Ok(state) = + alerts.get_state(alert_id, &tenant_id).await + { state } else { *state.lock().unwrap() = TimeoutState::default(); @@ -363,6 +417,7 @@ pub struct TargetVerifier { pub notification_config: Option, #[serde(default = "Ulid::new")] pub id: Ulid, + pub tenant_id: Option, } impl TryFrom for Target { @@ -392,6 +447,7 @@ impl TryFrom for Target { name: value.name, target: value.target, id: value.id, + tenant: value.tenant_id, }) } } diff --git a/src/analytics.rs b/src/analytics.rs index 9e5de3d77..d31b6d947 100644 --- a/src/analytics.rs +++ b/src/analytics.rs @@ -119,7 +119,8 @@ impl Report { // check liveness of indexers // get the count of active and inactive indexers - let indexer_infos: Vec = cluster::get_node_info(NodeType::Indexer).await?; + let indexer_infos: Vec = + cluster::get_node_info(NodeType::Indexer, &None).await?; for indexer in indexer_infos { if check_liveness(&indexer.domain_name).await { active_indexers += 1; @@ -130,7 +131,8 @@ impl Report { // check liveness of queriers // get the count of active and inactive queriers - let query_infos: Vec = cluster::get_node_info(NodeType::Querier).await?; + let query_infos: Vec = + cluster::get_node_info(NodeType::Querier, &None).await?; for query in query_infos { if check_liveness(&query.domain_name).await { active_queriers += 1; @@ -204,21 +206,29 @@ fn total_event_stats() -> (Stats, Stats, Stats) { let mut deleted_parquet_bytes: u64 = 0; let mut deleted_json_bytes: u64 = 0; - for stream in PARSEABLE.streams.list() { - let Some(stats) = stats::get_current_stats(&stream, "json") else { - continue; - }; - total_events += stats.lifetime_stats.events; - total_parquet_bytes += stats.lifetime_stats.storage; - total_json_bytes += stats.lifetime_stats.ingestion; - - current_events += stats.current_stats.events; - current_parquet_bytes += stats.current_stats.storage; - current_json_bytes += stats.current_stats.ingestion; - - deleted_events += stats.deleted_stats.events; - deleted_parquet_bytes += stats.deleted_stats.storage; - deleted_json_bytes += stats.deleted_stats.ingestion; + let tenants = if let Some(tenants) = PARSEABLE.list_tenants() { + tenants.into_iter().map(|v| Some(v)).collect() + } else { + vec![None] + }; + + for tenant_id in tenants { + for stream in PARSEABLE.streams.list(&tenant_id) { + let Some(stats) = stats::get_current_stats(&stream, "json", &tenant_id) else { + continue; + }; + total_events += stats.lifetime_stats.events; + total_parquet_bytes += stats.lifetime_stats.storage; + total_json_bytes += stats.lifetime_stats.ingestion; + + current_events += stats.current_stats.events; + current_parquet_bytes += stats.current_stats.storage; + current_json_bytes += stats.current_stats.ingestion; + + deleted_events += stats.deleted_stats.events; + deleted_parquet_bytes += stats.deleted_stats.storage; + deleted_json_bytes += stats.deleted_stats.ingestion; + } } ( @@ -266,7 +276,8 @@ async fn fetch_ingestors_metrics() // send analytics for ingest servers // ingestor infos should be valid here, if not some thing is wrong - let ingestor_infos: Vec = cluster::get_node_info(NodeType::Ingestor).await?; + let ingestor_infos: Vec = + cluster::get_node_info(NodeType::Ingestor, &None).await?; for im in ingestor_infos { if !check_liveness(&im.domain_name).await { diff --git a/src/banner.rs b/src/banner.rs index 28565abe0..dd1775687 100644 --- a/src/banner.rs +++ b/src/banner.rs @@ -95,7 +95,7 @@ fn status_info(config: &Parseable, scheme: &str, id: Uid) { /// - Store (path where the data is stored and its latency) async fn storage_info(config: &Parseable) { let storage = config.storage(); - let latency = storage.get_object_store().get_latency().await; + let latency = storage.get_object_store().get_latency(&None).await; eprintln!( " diff --git a/src/catalog/mod.rs b/src/catalog/mod.rs index ec9c5f17d..3d8d1a628 100644 --- a/src/catalog/mod.rs +++ b/src/catalog/mod.rs @@ -31,11 +31,11 @@ use crate::{ event::DEFAULT_TIMESTAMP_KEY, handlers::{ self, - http::{base_path_without_preceding_slash, cluster::for_each_live_ingestor}, + http::{base_path_without_preceding_slash, cluster::for_each_live_node}, }, metrics::{EVENTS_INGESTED_DATE, EVENTS_INGESTED_SIZE_DATE, EVENTS_STORAGE_SIZE_DATE}, option::Mode, - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, query::PartialTimeFilter, stats::{event_labels_date, get_current_stats, storage_size_labels_date, update_deleted_stats}, storage::{ @@ -108,6 +108,7 @@ fn get_file_bounds( pub async fn update_snapshot( stream_name: &str, changes: Vec, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { if changes.is_empty() { return Ok(()); @@ -116,16 +117,16 @@ pub async fn update_snapshot( let mut meta: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, )?; let partition_groups = group_changes_by_partition(changes, &meta.time_partition); let new_manifest_entries = - process_partition_groups(partition_groups, &mut meta, stream_name).await?; + process_partition_groups(partition_groups, &mut meta, stream_name, tenant_id).await?; - finalize_snapshot_update(meta, new_manifest_entries, stream_name).await + finalize_snapshot_update(meta, new_manifest_entries, stream_name, tenant_id).await } /// Groups manifest file changes by time partitions using Rayon for parallel processing @@ -186,10 +187,14 @@ fn create_partition_bounds(lower_bound: DateTime) -> (DateTime, DateTi } /// Extracts statistics from live metrics for a given partition date -fn extract_partition_metrics(stream_name: &str, partition_lower: DateTime) -> (u64, u64, u64) { +fn extract_partition_metrics( + stream_name: &str, + partition_lower: DateTime, + tenant_id: &str, +) -> (u64, u64, u64) { let date_str = partition_lower.date_naive().to_string(); - let event_labels = event_labels_date(stream_name, "json", &date_str); - let storage_labels = storage_size_labels_date(stream_name, &date_str); + let event_labels = event_labels_date(stream_name, "json", &date_str, tenant_id); + let storage_labels = storage_size_labels_date(stream_name, &date_str, tenant_id); let events_ingested = EVENTS_INGESTED_DATE .get_metric_with_label_values(&event_labels) @@ -214,12 +219,16 @@ async fn process_partition_groups( partition_groups: HashMap<(DateTime, DateTime), Vec>, meta: &mut ObjectStoreFormat, stream_name: &str, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let mut new_manifest_entries = Vec::new(); for ((partition_lower, _partition_upper), partition_changes) in partition_groups { - let (events_ingested, ingestion_size, storage_size) = - extract_partition_metrics(stream_name, partition_lower); + let (events_ingested, ingestion_size, storage_size) = extract_partition_metrics( + stream_name, + partition_lower, + tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ); let manifest_entry = process_single_partition( partition_lower, @@ -229,6 +238,7 @@ async fn process_partition_groups( events_ingested, ingestion_size, storage_size, + tenant_id, ) .await?; @@ -250,6 +260,7 @@ async fn process_single_partition( events_ingested: u64, ingestion_size: u64, storage_size: u64, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let pos = meta.snapshot.manifest_list.iter().position(|item| { item.time_lower_bound <= partition_lower && partition_lower < item.time_upper_bound @@ -265,6 +276,7 @@ async fn process_single_partition( ingestion_size, storage_size, partition_lower, + tenant_id, ) .await } else { @@ -278,6 +290,7 @@ async fn process_single_partition( events_ingested, ingestion_size, storage_size, + tenant_id, ) .await } @@ -294,6 +307,7 @@ async fn handle_existing_partition( ingestion_size: u64, storage_size: u64, partition_lower: DateTime, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let manifests = &mut meta.snapshot.manifest_list; @@ -308,6 +322,7 @@ async fn handle_existing_partition( manifests[pos].time_lower_bound, manifests[pos].time_upper_bound, Some(manifests[pos].manifest_path.clone()), + tenant_id, ) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? @@ -323,6 +338,7 @@ async fn handle_existing_partition( stream_name, manifests[pos].time_lower_bound, manifests[pos].time_upper_bound, + tenant_id, ) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -342,6 +358,7 @@ async fn handle_existing_partition( events_ingested, ingestion_size, storage_size, + tenant_id, ) .await } @@ -356,6 +373,7 @@ async fn handle_existing_partition( events_ingested, ingestion_size, storage_size, + tenant_id, ) .await } @@ -366,17 +384,18 @@ async fn finalize_snapshot_update( mut meta: ObjectStoreFormat, new_manifest_entries: Vec, stream_name: &str, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { // Add all new manifest entries to the snapshot meta.snapshot.manifest_list.extend(new_manifest_entries); - let stats = get_current_stats(stream_name, "json"); + let stats = get_current_stats(stream_name, "json", tenant_id); if let Some(stats) = stats { meta.stats = stats; } PARSEABLE .metastore - .put_stream_json(&meta, stream_name) + .put_stream_json(&meta, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) @@ -392,6 +411,7 @@ async fn create_manifest( events_ingested: u64, ingestion_size: u64, storage_size: u64, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let lower_bound = lower_bound.date_naive().and_time(NaiveTime::MIN).and_utc(); let upper_bound = lower_bound @@ -406,7 +426,9 @@ async fn create_manifest( files: changes, ..Manifest::default() }; - let mut first_event_at = PARSEABLE.get_stream(stream_name)?.get_first_event(); + let mut first_event_at = PARSEABLE + .get_stream(stream_name, tenant_id)? + .get_first_event(); if first_event_at.is_none() && let Some(first_event) = manifest.files.first() { @@ -423,7 +445,7 @@ async fn create_manifest( } }; first_event_at = Some(lower_bound.with_timezone(&Local).to_rfc3339()); - match PARSEABLE.get_stream(stream_name) { + match PARSEABLE.get_stream(stream_name, tenant_id) { Ok(stream) => stream.set_first_event_at(first_event_at.as_ref().unwrap()), Err(err) => error!( "Failed to update first_event_at in streaminfo for stream {stream_name:?}, error = {err:?}" @@ -433,15 +455,16 @@ async fn create_manifest( PARSEABLE .metastore - .put_manifest(&manifest, stream_name, lower_bound, upper_bound) + .put_manifest(&manifest, stream_name, lower_bound, upper_bound, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; let path_url = &PARSEABLE .metastore - .get_manifest_path(stream_name, lower_bound, upper_bound) + .get_manifest_path(stream_name, lower_bound, upper_bound, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; + tracing::warn!("manifest path_url= {path_url}"); let new_snapshot_entry = snapshot::ManifestItem { manifest_path: path_url.to_owned(), time_lower_bound: lower_bound, @@ -455,7 +478,7 @@ async fn create_manifest( let mut manifests = meta.snapshot.manifest_list; manifests.push(new_snapshot_entry.clone()); meta.snapshot.manifest_list = manifests; - let stats = get_current_stats(stream_name, "json"); + let stats = get_current_stats(stream_name, "json", tenant_id); if let Some(stats) = stats { meta.stats = stats; } @@ -463,7 +486,7 @@ async fn create_manifest( PARSEABLE .metastore - .put_stream_json(&meta, stream_name) + .put_stream_json(&meta, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -474,35 +497,47 @@ async fn create_manifest( } pub async fn remove_manifest_from_snapshot( - storage: Arc, + storage: &Arc, stream_name: &str, dates: Vec, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { if !dates.is_empty() { // get current snapshot let mut meta: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, )?; let meta_for_stats = meta.clone(); - update_deleted_stats(storage.clone(), stream_name, meta_for_stats, dates.clone()).await?; + update_deleted_stats( + storage, + stream_name, + meta_for_stats, + dates.clone(), + tenant_id, + ) + .await?; let manifests = &mut meta.snapshot.manifest_list; // Filter out items whose manifest_path contains any of the dates_to_delete manifests.retain(|item| !dates.iter().any(|date| item.manifest_path.contains(date))); - PARSEABLE.get_stream(stream_name)?.reset_first_event_at(); + PARSEABLE + .get_stream(stream_name, tenant_id)? + .reset_first_event_at(); meta.first_event_at = None; - storage.put_snapshot(stream_name, meta.snapshot).await?; + storage + .put_snapshot(stream_name, meta.snapshot, tenant_id) + .await?; } if !dates.is_empty() && matches!(PARSEABLE.options.mode, Mode::Query | Mode::Prism) { let stream_name_clone = stream_name.to_string(); let dates_clone = dates.clone(); - for_each_live_ingestor(move |ingestor| { + for_each_live_node(tenant_id, move |ingestor| { let stream_name = stream_name_clone.clone(); let dates = dates_clone.clone(); async move { @@ -529,12 +564,14 @@ pub fn partition_path( stream: &str, lower_bound: DateTime, upper_bound: DateTime, + tenant_id: &Option, ) -> RelativePathBuf { + let root = tenant_id.as_ref().map_or("", |v| v); let lower = lower_bound.date_naive().format("%Y-%m-%d").to_string(); let upper = upper_bound.date_naive().format("%Y-%m-%d").to_string(); if lower == upper { - RelativePathBuf::from_iter([stream, &format!("date={lower}")]) + RelativePathBuf::from_iter([root, stream, &format!("date={lower}")]) } else { - RelativePathBuf::from_iter([stream, &format!("date={lower}:{upper}")]) + RelativePathBuf::from_iter([root, stream, &format!("date={lower}:{upper}")]) } } diff --git a/src/cli.rs b/src/cli.rs index 61b6fa5b7..0c2e2280a 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -156,6 +156,14 @@ pub struct Options { )] pub domain_address: Option, + #[arg( + long, + env = "P_MULTI_TENANCY", + default_value = "false", + help = "Mode of parseable instance" + )] + pub multi_tenancy: bool, + #[arg( long, env = "P_MODE", @@ -522,8 +530,16 @@ pub struct OidcConfig { } impl Options { - pub fn local_stream_data_path(&self, stream_name: &str) -> PathBuf { - self.local_staging_path.join(stream_name) + pub fn local_stream_data_path(&self, stream_name: &str, tenant_id: &Option) -> PathBuf { + if let Some(tenant_id) = tenant_id.as_ref() { + self.local_staging_path.join(tenant_id).join(stream_name) + } else { + self.local_staging_path.join(stream_name) + } + } + + pub fn is_multi_tenant(&self) -> bool { + self.multi_tenancy } pub fn get_scheme(&self) -> String { diff --git a/src/correlation.rs b/src/correlation.rs index a26ef4ca5..cc1800359 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -36,12 +36,12 @@ use crate::{ users::{CORRELATION_DIR, USERS_ROOT_DIR}, }, metastore::{MetastoreError, metastore_traits::MetastoreObject}, - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, query::QUERY_SESSION, rbac::{Users, map::SessionKey}, storage::ObjectStorageError, users::filters::FilterQuery, - utils::{get_hash, user_auth_for_datasets}, + utils::{get_hash, get_tenant_id_from_key, user_auth_for_datasets}, }; pub static CORRELATIONS: Lazy = Lazy::new(Correlations::default); @@ -49,7 +49,7 @@ pub static CORRELATIONS: Lazy = Lazy::new(Correlations::default); type CorrelationMap = HashMap; #[derive(Debug, Default, derive_more::Deref)] -pub struct Correlations(RwLock); +pub struct Correlations(RwLock>); impl Correlations { // Load correlations from storage @@ -58,17 +58,20 @@ impl Correlations { let mut guard = self.write().await; - for correlations_bytes in all_correlations { - let correlation = match serde_json::from_slice::(&correlations_bytes) - { - Ok(c) => c, - Err(e) => { - error!("Unable to load correlation file : {e}"); - continue; - } - }; + for (tenant_id, correlations_bytes) in all_correlations { + let mut corrs = HashMap::new(); + for corr in correlations_bytes { + let correlation = match serde_json::from_slice::(&corr) { + Ok(c) => c, + Err(e) => { + error!("Unable to load correlation file : {e}"); + continue; + } + }; + corrs.insert(correlation.id.clone(), correlation); + } - guard.insert(correlation.id.to_owned(), correlation); + guard.insert(tenant_id, corrs); } Ok(()) @@ -80,15 +83,21 @@ impl Correlations { ) -> Result, CorrelationError> { let mut user_correlations = vec![]; let permissions = Users.get_permissions(session_key); - - for correlation in self.read().await.values() { - let tables = &correlation - .table_configs - .iter() - .map(|t| t.table_name.clone()) - .collect_vec(); - if user_auth_for_datasets(&permissions, tables).await.is_ok() { - user_correlations.push(correlation.clone()); + let tenant_id = get_tenant_id_from_key(session_key); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(corrs) = self.read().await.get(tenant) { + for correlation in corrs.values() { + let tables = &correlation + .table_configs + .iter() + .map(|t| t.table_name.clone()) + .collect_vec(); + if user_auth_for_datasets(&permissions, tables, &tenant_id) + .await + .is_ok() + { + user_correlations.push(correlation.clone()); + } } } @@ -98,16 +107,20 @@ impl Correlations { pub async fn get_correlation( &self, correlation_id: &str, + tenant_id: &Option, ) -> Result { - self.read() - .await - .get(correlation_id) - .cloned() - .ok_or_else(|| { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(corrs) = self.read().await.get(tenant_id) { + corrs.get(correlation_id).cloned().ok_or_else(|| { CorrelationError::AnyhowError(anyhow::Error::msg(format!( "Unable to find correlation with ID- {correlation_id}" ))) }) + } else { + return Err(CorrelationError::AnyhowError(anyhow::Error::msg(format!( + "Unable to find correlation with ID- {correlation_id}" + )))); + } } /// Create correlation associated with the user @@ -118,14 +131,17 @@ impl Correlations { ) -> Result { correlation.id = get_hash(Utc::now().timestamp_micros().to_string().as_str()); correlation.validate(session_key).await?; - + let tenant_id = get_tenant_id_from_key(session_key); // Update in metastore - PARSEABLE.metastore.put_correlation(&correlation).await?; - + PARSEABLE + .metastore + .put_correlation(&correlation, &tenant_id) + .await?; + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); // Update in memory - self.write() - .await - .insert(correlation.id.to_owned(), correlation.clone()); + if let Some(corrs) = self.write().await.get_mut(tenant) { + corrs.insert(correlation.id.to_owned(), correlation.clone()); + } Ok(correlation) } @@ -136,8 +152,11 @@ impl Correlations { mut updated_correlation: CorrelationConfig, session_key: &SessionKey, ) -> Result { + let tenant_id = get_tenant_id_from_key(session_key); // validate whether user has access to this correlation object or not - let correlation = self.get_correlation(&updated_correlation.id).await?; + let correlation = self + .get_correlation(&updated_correlation.id, &tenant_id) + .await?; if correlation.user_id != updated_correlation.user_id { return Err(CorrelationError::AnyhowError(anyhow::Error::msg(format!( r#"User "{}" isn't authorized to update correlation with ID - {}"#, @@ -151,14 +170,17 @@ impl Correlations { // Update in metastore PARSEABLE .metastore - .put_correlation(&updated_correlation) + .put_correlation(&updated_correlation, &tenant_id) .await?; + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); // Update in memory - self.write().await.insert( - updated_correlation.id.to_owned(), - updated_correlation.clone(), - ); + if let Some(corrs) = self.write().await.get_mut(tenant) { + corrs.insert( + updated_correlation.id.to_owned(), + updated_correlation.clone(), + ); + } Ok(updated_correlation) } @@ -168,8 +190,11 @@ impl Correlations { &self, correlation_id: &str, user_id: &str, + tenant_id: &Option, ) -> Result<(), CorrelationError> { - let correlation = CORRELATIONS.get_correlation(correlation_id).await?; + let correlation = CORRELATIONS + .get_correlation(correlation_id, tenant_id) + .await?; if correlation.user_id != user_id { return Err(CorrelationError::AnyhowError(anyhow::Error::msg(format!( r#"User "{user_id}" isn't authorized to delete correlation with ID - {correlation_id}"# @@ -177,7 +202,10 @@ impl Correlations { } // Delete from storage - PARSEABLE.metastore.delete_correlation(&correlation).await?; + PARSEABLE + .metastore + .delete_correlation(&correlation, tenant_id) + .await?; // Delete from memory self.write().await.remove(&correlation.id); @@ -244,8 +272,8 @@ impl CorrelationConfig { /// This function will validate the TableConfigs, JoinConfig, and user auth pub async fn validate(&self, session_key: &SessionKey) -> Result<(), CorrelationError> { - let ctx = &QUERY_SESSION; - + let ctx = &QUERY_SESSION.get_ctx(); + let tenant_id = get_tenant_id_from_key(session_key); let h1: HashSet<&String> = self.table_configs.iter().map(|t| &t.table_name).collect(); let h2: HashSet<&String> = self .join_config @@ -277,7 +305,7 @@ impl CorrelationConfig { .map(|t| t.table_name.clone()) .collect_vec(); - user_auth_for_datasets(&permissions, tables).await?; + user_auth_for_datasets(&permissions, tables, &tenant_id).await?; // to validate table config, we need to check whether the mentioned fields // are present in the table or not diff --git a/src/enterprise/utils.rs b/src/enterprise/utils.rs index 94cd255b0..d93c60009 100644 --- a/src/enterprise/utils.rs +++ b/src/enterprise/utils.rs @@ -79,11 +79,12 @@ pub fn create_time_filter( pub async fn fetch_parquet_file_paths( stream: &str, time_range: &TimeRange, + tenant_id: &Option, ) -> Result>, ObjectStorageError> { let object_store_format: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(stream, false) + .get_stream_json(stream, false, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, )?; @@ -96,7 +97,10 @@ pub async fn fetch_parquet_file_paths( let mut merged_snapshot: snapshot::Snapshot = snapshot::Snapshot::default(); - let obs = PARSEABLE.metastore.get_all_stream_jsons(stream, None).await; + let obs = PARSEABLE + .metastore + .get_all_stream_jsons(stream, None, tenant_id) + .await; if let Ok(obs) = obs { for ob in obs { if let Ok(object_store_format) = serde_json::from_slice::(&ob) { @@ -119,6 +123,7 @@ pub async fn fetch_parquet_file_paths( manifest_item.time_lower_bound, manifest_item.time_upper_bound, Some(manifest_item.manifest_path), + tenant_id, ) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? diff --git a/src/event/format/json.rs b/src/event/format/json.rs index b0628197a..192e114ca 100644 --- a/src/event/format/json.rs +++ b/src/event/format/json.rs @@ -157,6 +157,7 @@ impl EventFormat for Event { stream_type: StreamType, p_custom_fields: &HashMap, telemetry_type: TelemetryType, + tenant_id: &Option, ) -> Result { let custom_partition_values = match custom_partitions.as_ref() { Some(custom_partition) => { @@ -190,6 +191,7 @@ impl EventFormat for Event { custom_partition_values, stream_type, telemetry_type, + tenant_id: tenant_id.to_owned(), }) } } diff --git a/src/event/format/mod.rs b/src/event/format/mod.rs index 4157627b6..6384d0548 100644 --- a/src/event/format/mod.rs +++ b/src/event/format/mod.rs @@ -231,6 +231,7 @@ pub trait EventFormat: Sized { stream_type: StreamType, p_custom_fields: &HashMap, telemetry_type: TelemetryType, + tenant_id: &Option, ) -> Result; } diff --git a/src/event/mod.rs b/src/event/mod.rs index 110ce2828..afcca300c 100644 --- a/src/event/mod.rs +++ b/src/event/mod.rs @@ -30,8 +30,9 @@ use crate::{ handlers::TelemetryType, metadata::update_stats, metrics::{increment_events_ingested_by_date, increment_events_ingested_size_by_date}, - parseable::{PARSEABLE, StagingError}, + parseable::{DEFAULT_TENANT, PARSEABLE, StagingError, StreamNotFound}, storage::StreamType, + tenants::TenantNotFound, }; use chrono::NaiveDateTime; use std::collections::HashMap; @@ -54,6 +55,7 @@ pub struct Event { pub custom_partition_values: HashMap, pub stream_type: StreamType, pub telemetry_type: TelemetryType, + pub tenant_id: Option, } // Events holds the schema related to a each event for a single log stream @@ -72,29 +74,38 @@ impl Event { } if self.is_first_event { - commit_schema(&self.stream_name, self.rb.schema())?; + commit_schema(&self.stream_name, self.rb.schema(), &self.tenant_id)?; } - PARSEABLE.get_or_create_stream(&self.stream_name).push( - &key, - &self.rb, - self.parsed_timestamp, - &self.custom_partition_values, - self.stream_type, - )?; - + PARSEABLE + .get_or_create_stream(&self.stream_name, &self.tenant_id) + .push( + &key, + &self.rb, + self.parsed_timestamp, + &self.custom_partition_values, + self.stream_type, + )?; + + let tenant = self.tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); update_stats( &self.stream_name, self.origin_format, self.origin_size, self.rb.num_rows(), self.parsed_timestamp.date(), + tenant, ); // Track billing metrics for event ingestion let date_string = self.parsed_timestamp.date().to_string(); - increment_events_ingested_by_date(self.rb.num_rows() as u64, &date_string); - increment_events_ingested_size_by_date(self.origin_size, &date_string, self.telemetry_type); + increment_events_ingested_by_date(self.rb.num_rows() as u64, &date_string, tenant); + increment_events_ingested_size_by_date( + self.origin_size, + &date_string, + self.telemetry_type, + tenant, + ); crate::livetail::LIVETAIL.process(&self.stream_name, &self.rb); @@ -104,13 +115,15 @@ impl Event { pub fn process_unchecked(&self) -> Result<(), EventError> { let key = get_schema_key(&self.rb.schema().fields); - PARSEABLE.get_or_create_stream(&self.stream_name).push( - &key, - &self.rb, - self.parsed_timestamp, - &self.custom_partition_values, - self.stream_type, - )?; + PARSEABLE + .get_or_create_stream(&self.stream_name, &self.tenant_id) + .push( + &key, + &self.rb, + self.parsed_timestamp, + &self.custom_partition_values, + self.stream_type, + )?; Ok(()) } @@ -126,12 +139,18 @@ pub fn get_schema_key(fields: &[Arc]) -> String { format!("{hash:x}") } -pub fn commit_schema(stream_name: &str, schema: Arc) -> Result<(), StagingError> { +pub fn commit_schema( + stream_name: &str, + schema: Arc, + tenant_id: &Option, +) -> Result<(), StagingError> { let mut stream_metadata = PARSEABLE.streams.write().expect("lock poisoned"); - + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let map = &mut stream_metadata + .get_mut(tenant_id) + .ok_or_else(|| TenantNotFound(tenant_id.to_owned()))? .get_mut(stream_name) - .ok_or_else(|| StagingError::NotFound(stream_name.to_string()))? + .ok_or_else(|| StreamNotFound(stream_name.to_string()))? .metadata .write() .expect(LOCK_EXPECT) diff --git a/src/handlers/airplane.rs b/src/handlers/airplane.rs index 3c8a81d7b..88b4955ae 100644 --- a/src/handlers/airplane.rs +++ b/src/handlers/airplane.rs @@ -111,7 +111,7 @@ impl FlightService for AirServiceImpl { let table_name = table_name[0].clone(); let schema = PARSEABLE - .get_stream(&table_name) + .get_stream(&table_name, &None) .map_err(|err| Status::failed_precondition(err.to_string()))? .get_schema(); @@ -136,7 +136,7 @@ impl FlightService for AirServiceImpl { info!("query requested to airplane: {:?}", ticket); // get the query session_state - let session_state = QUERY_SESSION.state(); + let session_state = QUERY_SESSION.get_ctx().state(); let time_range = TimeRange::parse_human_time(&ticket.start_time, &ticket.end_time) .map_err(|e| Status::internal(e.to_string()))?; @@ -166,7 +166,7 @@ impl FlightService for AirServiceImpl { }) .to_string(); - let ingester_metadatas: Vec = get_node_info(NodeType::Ingestor) + let ingester_metadatas: Vec = get_node_info(NodeType::Ingestor, &None) .await .map_err(|err| Status::failed_precondition(err.to_string()))?; let mut minute_result: Vec = vec![]; @@ -194,18 +194,19 @@ impl FlightService for AirServiceImpl { rbac::Response::ReloadRequired => { return Err(Status::unauthenticated("reload required")); } + rbac::Response::Suspended(_) => return Err(Status::permission_denied("Suspended")), } let permissions = Users.get_permissions(&key); - user_auth_for_datasets(&permissions, &streams) + user_auth_for_datasets(&permissions, &streams, &None) .await .map_err(|_| { Status::permission_denied("User Does not have permission to access this") })?; let time = Instant::now(); - let (records, _) = execute(query, false) + let (records, _) = execute(query, false, &None) .await .map_err(|err| Status::internal(err.to_string()))?; @@ -234,7 +235,7 @@ impl FlightService for AirServiceImpl { if event.is_some() { // Clear staging of stream once airplane has taxied - PARSEABLE.get_or_create_stream(&stream_name).clear(); + PARSEABLE.get_or_create_stream(&stream_name, &None).clear(); } let time = time.elapsed().as_secs_f64(); diff --git a/src/handlers/http/alerts.rs b/src/handlers/http/alerts.rs index e1ce57669..c7d9e5cd5 100644 --- a/src/handlers/http/alerts.rs +++ b/src/handlers/http/alerts.rs @@ -29,7 +29,7 @@ use crate::{ }, metastore::metastore_traits::MetastoreObject, parseable::PARSEABLE, - utils::{actix::extract_session_key_from_req, user_auth_for_query}, + utils::{actix::extract_session_key_from_req, get_tenant_id_from_request, user_auth_for_query}, }; use actix_web::{ HttpRequest, Responder, @@ -248,7 +248,8 @@ pub async fn post( req: HttpRequest, Json(alert): Json, ) -> Result { - let mut alert: AlertConfig = alert.into().await?; + let tenant_id = get_tenant_id_from_request(&req); + let mut alert: AlertConfig = alert.into(tenant_id.clone()).await?; if alert.notification_config.interval > alert.get_eval_frequency() { return Err(AlertError::ValidationFailure( @@ -308,14 +309,14 @@ pub async fn post( // update persistent storage first PARSEABLE .metastore - .put_alert(&alert.to_alert_config()) + .put_alert(&alert.to_alert_config(), &tenant_id) .await?; // create initial alert state entry (default to NotTriggered) let state_entry = AlertStateEntry::new(*alert.get_id(), AlertState::NotTriggered); PARSEABLE .metastore - .put_alert_state(&state_entry as &dyn MetastoreObject) + .put_alert_state(&state_entry as &dyn MetastoreObject, &tenant_id) .await?; // update in memory @@ -331,7 +332,7 @@ pub async fn post( pub async fn get(req: HttpRequest, alert_id: Path) -> Result { let session_key = extract_session_key_from_req(&req)?; let alert_id = alert_id.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); let guard = ALERTS.read().await; let alerts = if let Some(alerts) = guard.as_ref() { alerts @@ -339,7 +340,7 @@ pub async fn get(req: HttpRequest, alert_id: Path) -> Result) -> Result) -> Result { let session_key = extract_session_key_from_req(&req)?; let alert_id = alert_id.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); let guard = ALERTS.write().await; let alerts = if let Some(alerts) = guard.as_ref() { alerts @@ -359,22 +360,25 @@ pub async fn delete(req: HttpRequest, alert_id: Path) -> Result Result { let session_key = extract_session_key_from_req(&req)?; let alert_id = alert_id.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); let new_notification_state = match new_notification_state.state.as_str() { "notify" => NotificationState::Notify, "indefinite" => NotificationState::Mute("indefinite".into()), @@ -428,14 +432,14 @@ pub async fn update_notification_state( }; // check if alert id exists in map - let alert = alerts.get_alert_by_id(alert_id).await?; + let alert = alerts.get_alert_by_id(alert_id, &tenant_id).await?; // validate that the user has access to the tables mentioned in the query user_auth_for_query(&session_key, alert.get_query()).await?; alerts - .update_notification_state(alert_id, new_notification_state) + .update_notification_state(alert_id, new_notification_state, &tenant_id) .await?; - let alert = alerts.get_alert_by_id(alert_id).await?; + let alert = alerts.get_alert_by_id(alert_id, &tenant_id).await?; Ok(web::Json(alert.to_alert_config().to_response())) } @@ -449,7 +453,7 @@ pub async fn disable_alert( ) -> Result { let session_key = extract_session_key_from_req(&req)?; let alert_id = alert_id.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); let guard = ALERTS.write().await; let alerts = if let Some(alerts) = guard.as_ref() { alerts @@ -458,14 +462,14 @@ pub async fn disable_alert( }; // check if alert id exists in map - let alert = alerts.get_alert_by_id(alert_id).await?; + let alert = alerts.get_alert_by_id(alert_id, &tenant_id).await?; // validate that the user has access to the tables mentioned in the query user_auth_for_query(&session_key, alert.get_query()).await?; alerts - .update_state(alert_id, AlertState::Disabled, Some("".into())) + .update_state(alert_id, AlertState::Disabled, Some("".into()), &tenant_id) .await?; - let alert = alerts.get_alert_by_id(alert_id).await?; + let alert = alerts.get_alert_by_id(alert_id, &tenant_id).await?; Ok(web::Json(alert.to_alert_config().to_response())) } @@ -479,7 +483,7 @@ pub async fn enable_alert( ) -> Result { let session_key = extract_session_key_from_req(&req)?; let alert_id = alert_id.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); let guard = ALERTS.write().await; let alerts = if let Some(alerts) = guard.as_ref() { alerts @@ -488,7 +492,7 @@ pub async fn enable_alert( }; // check if alert id exists in map - let alert = alerts.get_alert_by_id(alert_id).await?; + let alert = alerts.get_alert_by_id(alert_id, &tenant_id).await?; // only run if alert is disabled if alert.get_state().ne(&AlertState::Disabled) { @@ -501,9 +505,14 @@ pub async fn enable_alert( user_auth_for_query(&session_key, alert.get_query()).await?; alerts - .update_state(alert_id, AlertState::NotTriggered, Some("".into())) + .update_state( + alert_id, + AlertState::NotTriggered, + Some("".into()), + &tenant_id, + ) .await?; - let alert = alerts.get_alert_by_id(alert_id).await?; + let alert = alerts.get_alert_by_id(alert_id, &tenant_id).await?; Ok(web::Json(alert.to_alert_config().to_response())) } @@ -518,7 +527,7 @@ pub async fn modify_alert( ) -> Result { let session_key = extract_session_key_from_req(&req)?; let alert_id = alert_id.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); // Get alerts manager reference without holding the global lock let alerts = { let guard = ALERTS.read().await; @@ -530,10 +539,10 @@ pub async fn modify_alert( }; // Validate and prepare the new alert - let alert = alerts.get_alert_by_id(alert_id).await?; + let alert = alerts.get_alert_by_id(alert_id, &tenant_id).await?; user_auth_for_query(&session_key, alert.get_query()).await?; - let mut new_config = alert_request.into().await?; + let mut new_config = alert_request.into(tenant_id.clone()).await?; if &new_config.alert_type != alert.get_alert_type() { return Err(AlertError::InvalidAlertModifyRequest); } @@ -577,13 +586,13 @@ pub async fn modify_alert( // Perform I/O operations PARSEABLE .metastore - .put_alert(&new_alert.to_alert_config()) + .put_alert(&new_alert.to_alert_config(), &tenant_id) .await?; let is_disabled = new_alert.get_state().eq(&AlertState::Disabled); // Now perform the atomic operations alerts.delete_task(alert_id).await?; - alerts.delete(alert_id).await?; + alerts.delete(alert_id, &tenant_id).await?; alerts.update(&*new_alert).await; // only restart the task if the state was not set to disabled @@ -602,7 +611,7 @@ pub async fn evaluate_alert( ) -> Result { let session_key = extract_session_key_from_req(&req)?; let alert_id = alert_id.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); let guard = ALERTS.write().await; let alerts = if let Some(alerts) = guard.as_ref() { alerts @@ -610,7 +619,7 @@ pub async fn evaluate_alert( return Err(AlertError::CustomError("No AlertManager set".into())); }; - let alert = alerts.get_alert_by_id(alert_id).await?; + let alert = alerts.get_alert_by_id(alert_id, &tenant_id).await?; user_auth_for_query(&session_key, alert.get_query()).await?; @@ -625,13 +634,14 @@ pub async fn evaluate_alert( Ok(Json(config)) } -pub async fn list_tags() -> Result { +pub async fn list_tags(req: HttpRequest) -> Result { let guard = ALERTS.read().await; let alerts = if let Some(alerts) = guard.as_ref() { alerts } else { return Err(AlertError::CustomError("No AlertManager set".into())); }; - let tags = alerts.list_tags().await; + let tenant_id = get_tenant_id_from_request(&req); + let tags = alerts.list_tags(&tenant_id).await; Ok(web::Json(tags)) } diff --git a/src/handlers/http/cluster/mod.rs b/src/handlers/http/cluster/mod.rs index 8429c6e72..3635110b1 100644 --- a/src/handlers/http/cluster/mod.rs +++ b/src/handlers/http/cluster/mod.rs @@ -17,7 +17,10 @@ */ pub mod utils; +use actix_web::http::StatusCode; +use actix_web::http::header::HeaderMap; use futures::{StreamExt, future, stream}; +use http::header; use lazy_static::lazy_static; use std::collections::{HashMap, HashSet}; use std::future::Future; @@ -25,13 +28,10 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::sync::{RwLock, Semaphore}; -use actix_web::Responder; -use actix_web::http::StatusCode; -use actix_web::http::header::HeaderMap; use actix_web::web::Path; +use actix_web::{HttpRequest, Responder}; use bytes::Bytes; use chrono::Utc; -use http::header; use itertools::Itertools; use serde::de::{DeserializeOwned, Error}; use serde_json::error::Error as SerdeError; @@ -41,14 +41,16 @@ use url::Url; use utils::{IngestionStats, QueriedStats, StorageStats, check_liveness, to_url_string}; use crate::INTRA_CLUSTER_CLIENT; +use crate::handlers::http::modal::ingest::SyncRole; use crate::handlers::http::query::{Query, QueryError, TIME_ELAPSED_HEADER}; use crate::metrics::prom_utils::Metrics; use crate::option::Mode; -use crate::parseable::PARSEABLE; +use crate::parseable::{DEFAULT_TENANT, PARSEABLE}; use crate::rbac::role::model::DefaultPrivilege; use crate::rbac::user::User; use crate::stats::Stats; use crate::storage::{ObjectStorageError, ObjectStoreFormat}; +use crate::utils::get_tenant_id_from_request; use super::base_path_without_preceding_slash; use super::ingest::PostError; @@ -326,29 +328,41 @@ impl BillingMetricsCollector { } } -pub async fn for_each_live_ingestor(api_fn: F) -> Result<(), E> +pub async fn for_each_live_node(tenant_id: &Option, api_fn: F) -> Result<(), E> where F: Fn(NodeMetadata) -> Fut + Clone + Send + Sync + 'static, Fut: Future> + Send, E: From + Send + Sync + 'static, { - let ingestor_infos: Vec = - get_node_info(NodeType::Ingestor).await.map_err(|err| { + let mut nodes = Vec::new(); + + let ingestor_infos: Vec = get_node_info(NodeType::Ingestor, tenant_id) + .await + .map_err(|err| { error!("Fatal: failed to get ingestor info: {:?}", err); E::from(err) })?; + nodes.extend(ingestor_infos); - let mut live_ingestors = Vec::new(); - for ingestor in ingestor_infos { - if utils::check_liveness(&ingestor.domain_name).await { - live_ingestors.push(ingestor); + let querier_infos: Vec = get_node_info(NodeType::Querier, tenant_id) + .await + .map_err(|err| { + error!("Fatal: failed to get querier info: {:?}", err); + E::from(err) + })?; + nodes.extend(querier_infos); + + let mut live_nodes = Vec::new(); + for node in nodes { + if utils::check_liveness(&node.domain_name).await { + live_nodes.push(node); } else { - warn!("Ingestor {} is not live", ingestor.domain_name); + warn!("Node {} is not live", node.domain_name); } } // Process all live ingestors in parallel - let results = futures::future::join_all(live_ingestors.into_iter().map(|ingestor| { + let results = futures::future::join_all(live_nodes.into_iter().map(|ingestor| { let api_fn = api_fn.clone(); async move { api_fn(ingestor).await } })) @@ -367,6 +381,7 @@ pub async fn sync_streams_with_ingestors( headers: HeaderMap, body: Bytes, stream_name: &str, + tenant_id: &Option, ) -> Result<(), StreamError> { let mut reqwest_headers = reqwest::header::HeaderMap::new(); @@ -383,7 +398,7 @@ pub async fn sync_streams_with_ingestors( let stream_name = stream_name.to_string(); let reqwest_headers_clone = reqwest_headers.clone(); - for_each_live_ingestor( + for_each_live_node(tenant_id, move |ingestor| { let url = format!( "{}{}/logstream/{}/sync", @@ -423,9 +438,13 @@ pub async fn sync_streams_with_ingestors( } // forward the demo data request to one of the live ingestor -pub async fn get_demo_data_from_ingestor(action: &str) -> Result<(), PostError> { - let ingestor_infos: Vec = - get_node_info(NodeType::Ingestor).await.map_err(|err| { +pub async fn get_demo_data_from_ingestor( + action: &str, + tenant_id: &Option, +) -> Result<(), PostError> { + let ingestor_infos: Vec = get_node_info(NodeType::Ingestor, tenant_id) + .await + .map_err(|err| { error!("Fatal: failed to get ingestor info: {:?}", err); PostError::Invalid(err) })?; @@ -483,6 +502,7 @@ pub async fn sync_users_with_roles_with_ingestors( userid: &str, role: &HashSet, operation: &str, + tenant_id: &Option, ) -> Result<(), RBACError> { match operation { "add" | "remove" => {} @@ -498,7 +518,7 @@ pub async fn sync_users_with_roles_with_ingestors( let op = operation.to_string(); - for_each_live_ingestor(move |ingestor| { + for_each_live_node(tenant_id, move |ingestor| { let url = format!( "{}{}/user/{}/role/sync/{}", ingestor.domain_name, @@ -540,10 +560,13 @@ pub async fn sync_users_with_roles_with_ingestors( } // forward the delete user request to all ingestors to keep them in sync -pub async fn sync_user_deletion_with_ingestors(userid: &str) -> Result<(), RBACError> { +pub async fn sync_user_deletion_with_ingestors( + userid: &str, + tenant_id: &Option, +) -> Result<(), RBACError> { let userid = userid.to_owned(); - for_each_live_ingestor(move |ingestor| { + for_each_live_node(tenant_id, move |ingestor| { let url = format!( "{}{}/user/{}/sync", ingestor.domain_name, @@ -579,10 +602,11 @@ pub async fn sync_user_deletion_with_ingestors(userid: &str) -> Result<(), RBACE .await } -// forward the create user request to all ingestors to keep them in sync -pub async fn sync_user_creation_with_ingestors( +// forward the create user request to all ingestors and queriers to keep them in sync +pub async fn sync_user_creation( user: User, role: &Option>, + tenant_id: &Option, ) -> Result<(), RBACError> { let mut user = user.clone(); @@ -598,10 +622,10 @@ pub async fn sync_user_creation_with_ingestors( let userid = userid.to_string(); - for_each_live_ingestor(move |ingestor| { + for_each_live_node(tenant_id, move |node| { let url = format!( "{}{}/user/{}/sync", - ingestor.domain_name, + node.domain_name, base_path_without_preceding_slash(), userid ); @@ -611,23 +635,23 @@ pub async fn sync_user_creation_with_ingestors( async move { let res = INTRA_CLUSTER_CLIENT .post(url) - .header(header::AUTHORIZATION, &ingestor.token) + .header(header::AUTHORIZATION, &node.token) .header(header::CONTENT_TYPE, "application/json") .body(user_data) .send() .await .map_err(|err| { error!( - "Fatal: failed to forward request to ingestor: {}\n Error: {:?}", - ingestor.domain_name, err + "Fatal: failed to forward request to node: {}\n Error: {:?}", + node.domain_name, err ); RBACError::Network(err) })?; if !res.status().is_success() { error!( - "failed to forward request to ingestor: {}\nResponse Returned: {:?}", - ingestor.domain_name, + "failed to forward request to node: {}\nResponse Returned: {:?}", + node.domain_name, res.text().await ); } @@ -639,10 +663,13 @@ pub async fn sync_user_creation_with_ingestors( } // forward the password reset request to all ingestors to keep them in sync -pub async fn sync_password_reset_with_ingestors(username: &str) -> Result<(), RBACError> { +pub async fn sync_password_reset_with_ingestors( + req: HttpRequest, + username: &str, +) -> Result<(), RBACError> { let username = username.to_owned(); - - for_each_live_ingestor(move |ingestor| { + let tenant_id = get_tenant_id_from_request(&req); + for_each_live_node(&tenant_id, move |ingestor| { let url = format!( "{}{}/user/{}/generate-new-password/sync", ingestor.domain_name, @@ -679,41 +706,45 @@ pub async fn sync_password_reset_with_ingestors(username: &str) -> Result<(), RB .await } -// forward the put role request to all ingestors to keep them in sync -pub async fn sync_role_update_with_ingestors( +// forward the put role request to all ingestors and queriers to keep them in sync +pub async fn sync_role_update( + req: HttpRequest, name: String, privileges: Vec, + tenant_id: &Option, ) -> Result<(), RoleError> { - for_each_live_ingestor(move |ingestor| { + let tenant = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT).to_string(); + for_each_live_node(tenant_id, move |node| { let url = format!( "{}{}/role/{}/sync", - ingestor.domain_name, + node.domain_name, base_path_without_preceding_slash(), name ); let privileges = privileges.clone(); + let tenant = tenant.clone(); async move { let res = INTRA_CLUSTER_CLIENT .put(url) - .header(header::AUTHORIZATION, &ingestor.token) + .header(header::AUTHORIZATION, &node.token) .header(header::CONTENT_TYPE, "application/json") - .json(&privileges) + .json(&SyncRole::new(privileges, tenant.clone())) .send() .await .map_err(|err| { error!( - "Fatal: failed to forward request to ingestor: {}\n Error: {:?}", - ingestor.domain_name, err + "Fatal: failed to forward request to node: {}\n Error: {:?}", + node.domain_name, err ); RoleError::Network(err) })?; if !res.status().is_success() { error!( - "failed to forward request to ingestor: {}\nResponse Returned: {:?}", - ingestor.domain_name, + "failed to forward request to node: {}\nResponse Returned: {:?}", + node.domain_name, res.text().await ); } @@ -754,10 +785,11 @@ pub fn fetch_daily_stats( /// get the cumulative stats from all ingestors pub async fn fetch_stats_from_ingestors( stream_name: &str, + tenant_id: &Option, ) -> Result, StreamError> { let obs = PARSEABLE .metastore - .get_all_stream_jsons(stream_name, Some(Mode::Ingest)) + .get_all_stream_jsons(stream_name, Some(Mode::Ingest), tenant_id) .await?; let mut ingestion_size = 0u64; @@ -883,13 +915,14 @@ pub async fn send_retention_cleanup_request( } /// Fetches cluster information for all nodes (ingestor, indexer, querier and prism) -pub async fn get_cluster_info() -> Result { +pub async fn get_cluster_info(req: HttpRequest) -> Result { + let tenant_id = &get_tenant_id_from_request(&req); // Get querier, ingestor and indexer metadata concurrently let (prism_result, querier_result, ingestor_result, indexer_result) = future::join4( - get_node_info(NodeType::Prism), - get_node_info(NodeType::Querier), - get_node_info(NodeType::Ingestor), - get_node_info(NodeType::Indexer), + get_node_info(NodeType::Prism, tenant_id), + get_node_info(NodeType::Querier, tenant_id), + get_node_info(NodeType::Ingestor, tenant_id), + get_node_info(NodeType::Indexer, tenant_id), ) .await; @@ -1028,8 +1061,9 @@ async fn fetch_nodes_info( Ok(infos) } -pub async fn get_cluster_metrics() -> Result { - let dresses = fetch_cluster_metrics().await.map_err(|err| { +pub async fn get_cluster_metrics(req: HttpRequest) -> Result { + let tenant_id = &get_tenant_id_from_request(&req); + let dresses = fetch_cluster_metrics(tenant_id).await.map_err(|err| { error!("Fatal: failed to fetch cluster metrics: {:?}", err); PostError::Invalid(err.into()) })?; @@ -1042,10 +1076,11 @@ pub async fn get_cluster_metrics() -> Result { /// it will return the metadata for all nodes of that type pub async fn get_node_info( node_type: NodeType, + tenant_id: &Option, ) -> anyhow::Result> { let metadata = PARSEABLE .metastore - .get_node_metadata(node_type) + .get_node_metadata(node_type, tenant_id) .await? .iter() .filter_map(|x| match serde_json::from_slice::(x) { @@ -1199,13 +1234,13 @@ where /// fetches node info for all nodes /// fetches metrics for all nodes /// combines all metrics into a single vector -async fn fetch_cluster_metrics() -> Result, PostError> { +async fn fetch_cluster_metrics(tenant_id: &Option) -> Result, PostError> { // Get ingestor and indexer metadata concurrently let (prism_result, querier_result, ingestor_result, indexer_result) = future::join4( - get_node_info(NodeType::Prism), - get_node_info(NodeType::Querier), - get_node_info(NodeType::Ingestor), - get_node_info(NodeType::Indexer), + get_node_info(NodeType::Prism, tenant_id), + get_node_info(NodeType::Querier, tenant_id), + get_node_info(NodeType::Ingestor, tenant_id), + get_node_info(NodeType::Indexer, tenant_id), ) .await; @@ -1573,13 +1608,15 @@ where } /// Main function to fetch billing metrics from all nodes -pub async fn fetch_cluster_billing_metrics() -> Result, PostError> { +pub async fn fetch_cluster_billing_metrics( + tenant_id: &Option, +) -> Result, PostError> { // Get all node types metadata concurrently let (prism_result, querier_result, ingestor_result, indexer_result) = future::join4( - get_node_info(NodeType::Prism), - get_node_info(NodeType::Querier), - get_node_info(NodeType::Ingestor), - get_node_info(NodeType::Indexer), + get_node_info(NodeType::Prism, tenant_id), + get_node_info(NodeType::Querier, tenant_id), + get_node_info(NodeType::Ingestor, tenant_id), + get_node_info(NodeType::Indexer, tenant_id), ) .await; @@ -1647,9 +1684,11 @@ struct QuerierStatus { last_used: Option, } -pub async fn get_available_querier() -> Result { +pub async fn get_available_querier( + tenant_id: &Option, +) -> Result { // Get all querier metadata - let querier_metadata: Vec = get_node_info(NodeType::Querier).await?; + let querier_metadata: Vec = get_node_info(NodeType::Querier, tenant_id).await?; // No queriers found if querier_metadata.is_empty() { @@ -1833,8 +1872,12 @@ pub async fn mark_querier_available(domain_name: &str) { } } -pub async fn send_query_request(query_request: &Query) -> Result<(JsonValue, String), QueryError> { - let querier = get_available_querier().await?; +pub async fn send_query_request( + auth_token: Option, + query_request: &Query, + tenant_id: &Option, +) -> Result<(JsonValue, String), QueryError> { + let querier = get_available_querier(tenant_id).await?; let domain_name = querier.domain_name.clone(); // Perform the query request @@ -1854,10 +1897,26 @@ pub async fn send_query_request(query_request: &Query) -> Result<(JsonValue, Str } }; + let mut map = reqwest::header::HeaderMap::new(); + + if let Some(auth) = auth_token { + for (key, value) in auth.iter() { + if let Ok(name) = reqwest::header::HeaderName::from_bytes(key.as_str().as_bytes()) + && let Ok(val) = reqwest::header::HeaderValue::from_bytes(value.as_bytes()) + { + map.insert(name, val); + } + } + } else { + map.insert( + reqwest::header::AUTHORIZATION, + reqwest::header::HeaderValue::from_str(&querier.token).unwrap(), + ); + }; let res = match INTRA_CLUSTER_CLIENT .post(uri) .timeout(Duration::from_secs(300)) - .header(header::AUTHORIZATION, &querier.token) + .headers(map) .header(header::CONTENT_TYPE, "application/json") .body(body) .send() diff --git a/src/handlers/http/correlation.rs b/src/handlers/http/correlation.rs index 16522969c..adcb4157c 100644 --- a/src/handlers/http/correlation.rs +++ b/src/handlers/http/correlation.rs @@ -23,7 +23,9 @@ use itertools::Itertools; use crate::rbac::Users; use crate::utils::actix::extract_session_key_from_req; -use crate::utils::{get_hash, get_user_from_request, user_auth_for_datasets}; +use crate::utils::{ + get_hash, get_tenant_id_from_request, get_user_and_tenant_from_request, user_auth_for_datasets, +}; use crate::correlation::{CORRELATIONS, CorrelationConfig, CorrelationError}; @@ -40,11 +42,14 @@ pub async fn get( req: HttpRequest, correlation_id: Path, ) -> Result { + let tenant_id = get_tenant_id_from_request(&req); let correlation_id = correlation_id.into_inner(); let session_key = extract_session_key_from_req(&req) .map_err(|err| CorrelationError::AnyhowError(Error::msg(err.to_string())))?; - let correlation = CORRELATIONS.get_correlation(&correlation_id).await?; + let correlation = CORRELATIONS + .get_correlation(&correlation_id, &tenant_id) + .await?; let permissions = Users.get_permissions(&session_key); @@ -54,7 +59,7 @@ pub async fn get( .map(|t| t.table_name.clone()) .collect_vec(); - user_auth_for_datasets(&permissions, tables).await?; + user_auth_for_datasets(&permissions, tables, &tenant_id).await?; Ok(web::Json(correlation)) } @@ -65,8 +70,8 @@ pub async fn post( ) -> Result { let session_key = extract_session_key_from_req(&req) .map_err(|err| CorrelationError::AnyhowError(anyhow::Error::msg(err.to_string())))?; - let user_id = get_user_from_request(&req) - .map(|s| get_hash(&s.to_string())) + let user_id = get_user_and_tenant_from_request(&req) + .map(|(s, _)| get_hash(&s.to_string())) .map_err(|err| CorrelationError::AnyhowError(Error::msg(err.to_string())))?; correlation.user_id = user_id; @@ -81,8 +86,8 @@ pub async fn modify( Json(mut correlation): Json, ) -> Result { correlation.id = correlation_id.into_inner(); - correlation.user_id = get_user_from_request(&req) - .map(|s| get_hash(&s.to_string())) + correlation.user_id = get_user_and_tenant_from_request(&req) + .map(|(s, _)| get_hash(&s.to_string())) .map_err(|err| CorrelationError::AnyhowError(Error::msg(err.to_string())))?; let session_key = extract_session_key_from_req(&req) @@ -98,11 +103,13 @@ pub async fn delete( correlation_id: Path, ) -> Result { let correlation_id = correlation_id.into_inner(); - let user_id = get_user_from_request(&req) - .map(|s| get_hash(&s.to_string())) + let (user_id, tenant_id) = get_user_and_tenant_from_request(&req) + .map(|(s, t)| (get_hash(&s.to_string()), t)) .map_err(|err| CorrelationError::AnyhowError(Error::msg(err.to_string())))?; - CORRELATIONS.delete(&correlation_id, &user_id).await?; + CORRELATIONS + .delete(&correlation_id, &user_id, &tenant_id) + .await?; Ok(HttpResponse::Ok().finish()) } diff --git a/src/handlers/http/demo_data.rs b/src/handlers/http/demo_data.rs index 71103d385..19f10b1f9 100644 --- a/src/handlers/http/demo_data.rs +++ b/src/handlers/http/demo_data.rs @@ -20,6 +20,7 @@ use crate::{ handlers::http::{cluster::get_demo_data_from_ingestor, ingest::PostError}, option::Mode, parseable::PARSEABLE, + utils::get_tenant_id_from_request, }; use actix_web::{HttpRequest, HttpResponse, web}; use std::{collections::HashMap, fs, process::Command}; @@ -48,7 +49,7 @@ pub async fn get_demo_data(req: HttpRequest) -> Result let password = &PARSEABLE.options.password; let scheme = PARSEABLE.options.get_scheme(); let url = format!("{scheme}://{url}"); - + let tenant_id = get_tenant_id_from_request(&req); match action.as_str() { "ingest" => match PARSEABLE.options.mode { Mode::Ingest | Mode::All => { @@ -61,7 +62,7 @@ pub async fn get_demo_data(req: HttpRequest) -> Result } Mode::Query | Mode::Prism => { // Forward the request to ingestor asynchronously - match get_demo_data_from_ingestor(&action).await { + match get_demo_data_from_ingestor(&action, &tenant_id).await { Ok(()) => Ok(HttpResponse::Accepted().finish()), Err(e) => Err(e), } diff --git a/src/handlers/http/health_check.rs b/src/handlers/http/health_check.rs index 90e0b07a3..e27eaddbf 100644 --- a/src/handlers/http/health_check.rs +++ b/src/handlers/http/health_check.rs @@ -18,6 +18,7 @@ use std::sync::Arc; +use actix_web::HttpRequest; use actix_web::http::StatusCode; use actix_web::{ HttpResponse, @@ -31,6 +32,7 @@ use once_cell::sync::Lazy; use tokio::{sync::Mutex, task::JoinSet}; use tracing::{error, info}; +use crate::utils::get_tenant_id_from_request; use crate::{parseable::PARSEABLE, storage::object_storage::sync_all_streams}; // Create a global variable to store signal status @@ -115,9 +117,16 @@ async fn perform_object_store_sync() { } } -pub async fn readiness() -> HttpResponse { +pub async fn readiness(req: HttpRequest) -> HttpResponse { + let tenant_id = get_tenant_id_from_request(&req); // Check the object store connection - if PARSEABLE.storage.get_object_store().check().await.is_ok() { + if PARSEABLE + .storage + .get_object_store() + .check(&tenant_id) + .await + .is_ok() + { HttpResponse::new(StatusCode::OK) } else { HttpResponse::new(StatusCode::SERVICE_UNAVAILABLE) diff --git a/src/handlers/http/ingest.rs b/src/handlers/http/ingest.rs index 4438f7f27..bd8a06d37 100644 --- a/src/handlers/http/ingest.rs +++ b/src/handlers/http/ingest.rs @@ -43,6 +43,7 @@ use crate::otel::metrics::OTEL_METRICS_KNOWN_FIELD_LIST; use crate::otel::traces::OTEL_TRACES_KNOWN_FIELD_LIST; use crate::parseable::{PARSEABLE, StreamNotFound}; use crate::storage::{ObjectStorageError, StreamType}; +use crate::utils::get_tenant_id_from_request; use crate::utils::header_parsing::ParseHeaderError; use crate::utils::json::{flatten::JsonFlattenError, strict::StrictValue}; @@ -61,9 +62,9 @@ pub async fn ingest( let Some(stream_name) = req.headers().get(STREAM_NAME_HEADER_KEY) else { return Err(PostError::Header(ParseHeaderError::MissingStreamName)); }; - + let tenant_id = get_tenant_id_from_request(&req); let stream_name = stream_name.to_str().unwrap().to_owned(); - let internal_stream_names = PARSEABLE.streams.list_internal_streams(); + let internal_stream_names = PARSEABLE.streams.list_internal_streams(&tenant_id); if internal_stream_names.contains(&stream_name) { return Err(PostError::InternalStream(stream_name)); } @@ -115,15 +116,16 @@ pub async fn ingest( None, vec![log_source_entry.clone()], telemetry_type, + &tenant_id, ) .await?; //if stream exists, fetch the stream log source //return error if the stream log source is otel traces or otel metrics - validate_stream_for_ingestion(&stream_name)?; + validate_stream_for_ingestion(&stream_name, &tenant_id)?; PARSEABLE - .add_update_log_source(&stream_name, log_source_entry) + .add_update_log_source(&stream_name, log_source_entry, &tenant_id) .await?; flatten_and_push_logs( @@ -133,16 +135,23 @@ pub async fn ingest( &p_custom_fields, None, telemetry_type, + &tenant_id, ) .await?; Ok(HttpResponse::Ok().finish()) } -pub async fn ingest_internal_stream(stream_name: String, body: Bytes) -> Result<(), PostError> { +pub async fn ingest_internal_stream( + stream_name: String, + body: Bytes, + tenant_id: &Option, +) -> Result<(), PostError> { let size: usize = body.len(); let json: StrictValue = serde_json::from_slice(&body)?; - let schema = PARSEABLE.get_stream(&stream_name)?.get_schema_raw(); + let schema = PARSEABLE + .get_stream(&stream_name, tenant_id)? + .get_schema_raw(); let mut p_custom_fields = HashMap::new(); p_custom_fields.insert(USER_AGENT_KEY.to_string(), "parseable".to_string()); p_custom_fields.insert(FORMAT_KEY.to_string(), LogSource::Json.to_string()); @@ -159,6 +168,7 @@ pub async fn ingest_internal_stream(stream_name: String, body: Bytes) -> Result< StreamType::Internal, &p_custom_fields, TelemetryType::Logs, + tenant_id, )? .process()?; @@ -195,6 +205,7 @@ pub async fn setup_otel_stream( known_fields.iter().map(|&s| s.to_string()).collect(), ); + let tenant_id = get_tenant_id_from_request(req); PARSEABLE .create_stream_if_not_exists( &stream_name, @@ -202,11 +213,12 @@ pub async fn setup_otel_stream( None, vec![log_source_entry.clone()], telemetry_type, + &tenant_id, ) .await?; let mut time_partition = None; // Validate stream compatibility - if let Ok(stream) = PARSEABLE.get_stream(&stream_name) { + if let Ok(stream) = PARSEABLE.get_stream(&stream_name, &tenant_id) { match log_source { LogSource::OtelLogs => { // For logs, reject if stream is metrics or traces @@ -236,7 +248,7 @@ pub async fn setup_otel_stream( } PARSEABLE - .add_update_log_source(&stream_name, log_source_entry.clone()) + .add_update_log_source(&stream_name, log_source_entry.clone(), &tenant_id) .await?; Ok((stream_name, log_source, log_source_entry, time_partition)) @@ -258,6 +270,7 @@ async fn process_otel_content( .and_then(|h| h.to_str().ok()) { Some(content_type) => { + let tenant_id = get_tenant_id_from_request(req); if content_type == CONTENT_TYPE_JSON { flatten_and_push_logs( serde_json::from_slice(&body)?, @@ -266,6 +279,7 @@ async fn process_otel_content( &p_custom_fields, None, telemetry_type, + &tenant_id, ) .await?; } else if content_type == CONTENT_TYPE_PROTOBUF { @@ -365,18 +379,18 @@ pub async fn post_event( Json(json): Json, ) -> Result { let stream_name = stream_name.into_inner(); - - let internal_stream_names = PARSEABLE.streams.list_internal_streams(); + let tenant_id = get_tenant_id_from_request(&req); + let internal_stream_names = PARSEABLE.streams.list_internal_streams(&tenant_id); if internal_stream_names.contains(&stream_name) { return Err(PostError::InternalStream(stream_name)); } - if !PARSEABLE.streams.contains(&stream_name) { + if !PARSEABLE.streams.contains(&stream_name, &tenant_id) { // For distributed deployments, if the stream not found in memory map, - //check if it exists in the storage - //create stream and schema from storage + // check if it exists in the storage + // create stream and schema from storage if PARSEABLE.options.mode != Mode::All { match PARSEABLE - .create_stream_and_schema_from_storage(&stream_name) + .create_stream_and_schema_from_storage(&stream_name, &tenant_id) .await { Ok(true) => {} @@ -414,9 +428,9 @@ pub async fn post_event( _ => {} } - //if stream exists, fetch the stream log source - //return error if the stream log source is otel traces or otel metrics - validate_stream_for_ingestion(&stream_name)?; + // if stream exists, fetch the stream log source + // return error if the stream log source is otel traces or otel metrics + validate_stream_for_ingestion(&stream_name, &tenant_id)?; flatten_and_push_logs( json, @@ -425,6 +439,7 @@ pub async fn post_event( &p_custom_fields, None, TelemetryType::Logs, + &tenant_id, ) .await?; @@ -446,6 +461,7 @@ pub async fn push_logs_unchecked( custom_partition_values: HashMap::new(), // should be an empty map for unchecked push stream_type: StreamType::UserDefined, telemetry_type: TelemetryType::Logs, + tenant_id: None, }; unchecked_event.process_unchecked()?; diff --git a/src/handlers/http/llm.rs b/src/handlers/http/llm.rs index 1df640051..feaeff164 100644 --- a/src/handlers/http/llm.rs +++ b/src/handlers/http/llm.rs @@ -16,14 +16,20 @@ * */ -use actix_web::http::StatusCode; -use actix_web::{HttpResponse, Result, http::header::ContentType, web}; +use actix_web::{ + HttpRequest, HttpResponse, Result, + http::{StatusCode, header::ContentType}, + web, +}; use http::header; use itertools::Itertools; use reqwest; use serde_json::{Value, json}; -use crate::{parseable::PARSEABLE, parseable::StreamNotFound}; +use crate::{ + parseable::{PARSEABLE, StreamNotFound}, + utils::get_tenant_id_from_request, +}; const OPEN_AI_URL: &str = "https://api.openai.com/v1/chat/completions"; @@ -83,14 +89,18 @@ fn build_request_body(ai_prompt: String) -> impl serde::Serialize { }) } -pub async fn make_llm_request(body: web::Json) -> Result { +pub async fn make_llm_request( + req: HttpRequest, + body: web::Json, +) -> Result { let api_key = match &PARSEABLE.options.open_ai_key { Some(api_key) if api_key.len() > 3 => api_key, _ => return Err(LLMError::InvalidAPIKey), }; let stream_name = &body.stream; - let schema = PARSEABLE.get_stream(stream_name)?.get_schema(); + let tenant_id = get_tenant_id_from_request(&req); + let schema = PARSEABLE.get_stream(stream_name, &tenant_id)?.get_schema(); let filtered_schema = schema .flattened_fields() .into_iter() diff --git a/src/handlers/http/logstream.rs b/src/handlers/http/logstream.rs index 79549b86f..5202a736b 100644 --- a/src/handlers/http/logstream.rs +++ b/src/handlers/http/logstream.rs @@ -23,13 +23,15 @@ use crate::event::format::override_data_type; use crate::hottier::{CURRENT_HOT_TIER_VERSION, HotTierManager, StreamHotTier}; use crate::metadata::SchemaVersion; use crate::metrics::{EVENTS_INGESTED_DATE, EVENTS_INGESTED_SIZE_DATE, EVENTS_STORAGE_SIZE_DATE}; -use crate::parseable::{PARSEABLE, StreamNotFound}; +use crate::parseable::{DEFAULT_TENANT, PARSEABLE, StreamNotFound}; use crate::rbac::Users; use crate::rbac::role::Action; use crate::stats::{Stats, event_labels_date, storage_size_labels_date}; use crate::storage::retention::Retention; use crate::storage::{ObjectStoreFormat, StreamInfo, StreamType}; +use crate::tenants::TenantNotFound; use crate::utils::actix::extract_session_key_from_req; +use crate::utils::get_tenant_id_from_request; use crate::utils::json::flatten::{ self, convert_to_array, generic_flattening, has_more_than_max_allowed_levels, }; @@ -47,19 +49,26 @@ use std::fs; use std::sync::Arc; use tracing::warn; -pub async fn delete(stream_name: Path) -> Result { +pub async fn delete( + req: HttpRequest, + stream_name: Path, +) -> Result { let stream_name = stream_name.into_inner(); // Error out if stream doesn't exist in memory, or in the case of query node, in storage as well - if !PARSEABLE.check_or_load_stream(&stream_name).await { + let tenant_id = get_tenant_id_from_request(&req); + if !PARSEABLE + .check_or_load_stream(&stream_name, &tenant_id) + .await + { return Err(StreamNotFound(stream_name).into()); } let objectstore = PARSEABLE.storage.get_object_store(); // Delete from storage - objectstore.delete_stream(&stream_name).await?; + objectstore.delete_stream(&stream_name, &tenant_id).await?; // Delete from staging - let stream_dir = PARSEABLE.get_or_create_stream(&stream_name); + let stream_dir = PARSEABLE.get_or_create_stream(&stream_name, &tenant_id); if let Err(err) = fs::remove_dir_all(&stream_dir.data_path) { warn!( "failed to delete local data for stream {} with error {err}. Clean {} manually", @@ -69,14 +78,16 @@ pub async fn delete(stream_name: Path) -> Result Result { let key = extract_session_key_from_req(&req) .map_err(|err| StreamError::Anyhow(anyhow::Error::msg(err.to_string())))?; + let tenant_id = get_tenant_id_from_request(&req); // list all streams from storage let res = PARSEABLE .metastore - .list_streams() + .list_streams(&tenant_id) .await? .into_iter() .filter(|logstream| { @@ -159,16 +171,22 @@ pub async fn detect_schema(Json(json): Json) -> Result) -> Result { +pub async fn get_schema( + req: HttpRequest, + stream_name: Path, +) -> Result { let stream_name = stream_name.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); // Ensure parseable is aware of stream in distributed mode - if !PARSEABLE.check_or_load_stream(&stream_name).await { + if !PARSEABLE + .check_or_load_stream(&stream_name, &tenant_id) + .await + { return Err(StreamNotFound(stream_name.clone()).into()); } - let stream = PARSEABLE.get_stream(&stream_name)?; - match update_schema_when_distributed(&vec![stream_name.clone()]).await { + let stream = PARSEABLE.get_stream(&stream_name, &tenant_id)?; + match update_schema_when_distributed(&vec![stream_name.clone()], &tenant_id).await { Ok(_) => { let schema = stream.get_schema(); Ok((web::Json(schema), StatusCode::OK)) @@ -186,49 +204,64 @@ pub async fn put_stream( body: Bytes, ) -> Result { let stream_name = stream_name.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); + // tracing::warn!(put_stream_req=?req); PARSEABLE - .create_update_stream(req.headers(), &body, &stream_name) + .create_update_stream(req.headers(), &body, &stream_name, &tenant_id) .await?; Ok(("Log stream created", StatusCode::OK)) } -pub async fn get_retention(stream_name: Path) -> Result { +pub async fn get_retention( + req: HttpRequest, + stream_name: Path, +) -> Result { let stream_name = stream_name.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); // For query mode, if the stream not found in memory map, //check if it exists in the storage //create stream and schema from storage - if !PARSEABLE.check_or_load_stream(&stream_name).await { + if !PARSEABLE + .check_or_load_stream(&stream_name, &tenant_id) + .await + { return Err(StreamNotFound(stream_name.clone()).into()); } let retention = PARSEABLE - .get_stream(&stream_name)? + .get_stream(&stream_name, &tenant_id)? .get_retention() .unwrap_or_default(); Ok((web::Json(retention), StatusCode::OK)) } pub async fn put_retention( + req: HttpRequest, stream_name: Path, Json(retention): Json, ) -> Result { let stream_name = stream_name.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); // For query mode, if the stream not found in memory map, //check if it exists in the storage //create stream and schema from storage - if !PARSEABLE.check_or_load_stream(&stream_name).await { + if !PARSEABLE + .check_or_load_stream(&stream_name, &tenant_id) + .await + { return Err(StreamNotFound(stream_name).into()); } PARSEABLE .storage .get_object_store() - .put_retention(&stream_name, &retention) + .put_retention(&stream_name, &retention, &tenant_id) .await?; - PARSEABLE.get_stream(&stream_name)?.set_retention(retention); + PARSEABLE + .get_stream(&stream_name, &tenant_id)? + .set_retention(retention); Ok(( format!("set retention configuration for log stream {stream_name}"), @@ -236,9 +269,14 @@ pub async fn put_retention( )) } -pub async fn get_stats_date(stream_name: &str, date: &str) -> Result { - let event_labels = event_labels_date(stream_name, "json", date); - let storage_size_labels = storage_size_labels_date(stream_name, date); +pub async fn get_stats_date( + stream_name: &str, + date: &str, + tenant_id: &Option, +) -> Result { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let event_labels = event_labels_date(stream_name, "json", date, tenant); + let storage_size_labels = storage_size_labels_date(stream_name, date, tenant); let events_ingested = EVENTS_INGESTED_DATE .get_metric_with_label_values(&event_labels) .unwrap() @@ -265,11 +303,14 @@ pub async fn get_stats( stream_name: Path, ) -> Result { let stream_name = stream_name.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); // For query mode, if the stream not found in memory map, //check if it exists in the storage //create stream and schema from storage - if !PARSEABLE.check_or_load_stream(&stream_name).await { + if !PARSEABLE + .check_or_load_stream(&stream_name, &tenant_id) + .await + { return Err(StreamNotFound(stream_name.clone()).into()); } @@ -286,13 +327,13 @@ pub async fn get_stats( } if !date_value.is_empty() { - let stats = get_stats_date(&stream_name, date_value).await?; + let stats = get_stats_date(&stream_name, date_value, &tenant_id).await?; let stats = serde_json::to_value(stats)?; return Ok((web::Json(stats), StatusCode::OK)); } } - let stats = stats::get_current_stats(&stream_name, "json") + let stats = stats::get_current_stats(&stream_name, "json", &tenant_id) .ok_or_else(|| StreamNotFound(stream_name.clone()))?; let time = Utc::now(); @@ -322,12 +363,19 @@ pub async fn get_stats( Ok((web::Json(stats), StatusCode::OK)) } -pub async fn get_stream_info(stream_name: Path) -> Result { +pub async fn get_stream_info( + req: HttpRequest, + stream_name: Path, +) -> Result { let stream_name = stream_name.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); // For query mode, if the stream not found in memory map, //check if it exists in the storage //create stream and schema from storage - if !PARSEABLE.check_or_load_stream(&stream_name).await { + if !PARSEABLE + .check_or_load_stream(&stream_name, &tenant_id) + .await + { return Err(StreamNotFound(stream_name.clone()).into()); } @@ -335,7 +383,7 @@ pub async fn get_stream_info(stream_name: Path) -> Result result, @@ -348,8 +396,11 @@ pub async fn get_stream_info(stream_name: Path) -> Result) -> Result, Json(mut hottier): Json, ) -> Result { let stream_name = stream_name.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); // For query mode, if the stream not found in memory map, //check if it exists in the storage //create stream and schema from storage - if !PARSEABLE.check_or_load_stream(&stream_name).await { + if !PARSEABLE + .check_or_load_stream(&stream_name, &tenant_id) + .await + { return Err(StreamNotFound(stream_name).into()); } - let stream = PARSEABLE.get_stream(&stream_name)?; + let stream = PARSEABLE.get_stream(&stream_name, &tenant_id)?; if stream.get_stream_type() == StreamType::Internal { return Err(StreamError::Custom { @@ -385,24 +441,25 @@ pub async fn put_stream_hot_tier( validator::hot_tier(&hottier.size.to_string())?; + // TODO tenants stream.set_hot_tier(Some(hottier.clone())); let Some(hot_tier_manager) = HotTierManager::global() else { return Err(StreamError::HotTierNotEnabled(stream_name)); }; let existing_hot_tier_used_size = hot_tier_manager - .validate_hot_tier_size(&stream_name, hottier.size) + .validate_hot_tier_size(&stream_name, hottier.size, &tenant_id) .await?; hottier.used_size = existing_hot_tier_used_size; hottier.available_size = hottier.size; hottier.version = Some(CURRENT_HOT_TIER_VERSION.to_string()); hot_tier_manager - .put_hot_tier(&stream_name, &mut hottier) + .put_hot_tier(&stream_name, &mut hottier, &tenant_id) .await?; let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(&stream_name, false) + .get_stream_json(&stream_name, false, &tenant_id) .await?, )?; stream_metadata.hot_tier_enabled = true; @@ -410,7 +467,7 @@ pub async fn put_stream_hot_tier( PARSEABLE .metastore - .put_stream_json(&stream_metadata, &stream_name) + .put_stream_json(&stream_metadata, &stream_name, &tenant_id) .await?; Ok(( @@ -419,37 +476,53 @@ pub async fn put_stream_hot_tier( )) } -pub async fn get_stream_hot_tier(stream_name: Path) -> Result { +pub async fn get_stream_hot_tier( + req: HttpRequest, + stream_name: Path, +) -> Result { let stream_name = stream_name.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); // For query mode, if the stream not found in memory map, //check if it exists in the storage //create stream and schema from storage - if !PARSEABLE.check_or_load_stream(&stream_name).await { + if !PARSEABLE + .check_or_load_stream(&stream_name, &tenant_id) + .await + { return Err(StreamNotFound(stream_name.clone()).into()); } let Some(hot_tier_manager) = HotTierManager::global() else { return Err(StreamError::HotTierNotEnabled(stream_name)); }; - let meta = hot_tier_manager.get_hot_tier(&stream_name).await?; + let meta = hot_tier_manager + .get_hot_tier(&stream_name, &tenant_id) + .await?; Ok((web::Json(meta), StatusCode::OK)) } pub async fn delete_stream_hot_tier( + req: HttpRequest, stream_name: Path, ) -> Result { let stream_name = stream_name.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); // For query mode, if the stream not found in memory map, //check if it exists in the storage //create stream and schema from storage - if !PARSEABLE.check_or_load_stream(&stream_name).await { + if !PARSEABLE + .check_or_load_stream(&stream_name, &tenant_id) + .await + { return Err(StreamNotFound(stream_name).into()); } - if PARSEABLE.get_stream(&stream_name)?.get_stream_type() == StreamType::Internal { + if PARSEABLE + .get_stream(&stream_name, &tenant_id)? + .get_stream_type() + == StreamType::Internal + { return Err(StreamError::Custom { msg: "Hot tier can not be deleted for internal stream".to_string(), status: StatusCode::BAD_REQUEST, @@ -460,12 +533,14 @@ pub async fn delete_stream_hot_tier( return Err(StreamError::HotTierNotEnabled(stream_name)); }; - hot_tier_manager.delete_hot_tier(&stream_name).await?; + hot_tier_manager + .delete_hot_tier(&stream_name, &tenant_id) + .await?; let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(&stream_name, false) + .get_stream_json(&stream_name, false, &tenant_id) .await?, )?; stream_metadata.hot_tier_enabled = false; @@ -473,7 +548,7 @@ pub async fn delete_stream_hot_tier( PARSEABLE .metastore - .put_stream_json(&stream_metadata, &stream_name) + .put_stream_json(&stream_metadata, &stream_name, &tenant_id) .await?; Ok(( @@ -502,6 +577,7 @@ pub mod error { metastore::MetastoreError, parseable::StreamNotFound, storage::ObjectStorageError, + tenants::TenantNotFound, validator::error::{ AlertValidationError, HotTierValidationError, StreamNameValidationError, }, @@ -574,6 +650,8 @@ pub mod error { InvalidQueryParameter(String), #[error(transparent)] MetastoreError(#[from] MetastoreError), + #[error("{0}")] + TenantNotFoundError(#[from] TenantNotFound), } impl actix_web::ResponseError for StreamError { @@ -592,6 +670,7 @@ pub mod error { StatusCode::BAD_REQUEST } StreamError::StreamNotFound(_) => StatusCode::NOT_FOUND, + StreamError::TenantNotFoundError(_) => StatusCode::NOT_FOUND, StreamError::Custom { status, .. } => *status, StreamError::UninitializedLogstream => StatusCode::METHOD_NOT_ALLOWED, StreamError::Storage(_) => StatusCode::INTERNAL_SERVER_ERROR, diff --git a/src/handlers/http/middleware.rs b/src/handlers/http/middleware.rs index 280f8894c..6314dfe97 100644 --- a/src/handlers/http/middleware.rs +++ b/src/handlers/http/middleware.rs @@ -20,10 +20,10 @@ use std::future::{Ready, ready}; use actix_web::{ - Error, HttpMessage, Route, + Error, HttpMessage, HttpRequest, Route, dev::{Service, ServiceRequest, ServiceResponse, Transform, forward_ready}, error::{ErrorBadRequest, ErrorForbidden, ErrorUnauthorized}, - http::header::{self, HeaderName}, + http::header::{self, HeaderName, HeaderValue}, }; use chrono::{Duration, Utc}; use futures_util::future::LocalBoxFuture; @@ -31,17 +31,17 @@ use futures_util::future::LocalBoxFuture; use crate::{ handlers::{ AUTHORIZATION_KEY, KINESIS_COMMON_ATTRIBUTES_KEY, LOG_SOURCE_KEY, LOG_SOURCE_KINESIS, - STREAM_NAME_HEADER_KEY, - http::{modal::OIDC_CLIENT, rbac::RBACError}, + STREAM_NAME_HEADER_KEY, http::modal::OIDC_CLIENT, }, option::Mode, - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, rbac::{ EXPIRY_DURATION, map::{SessionKey, mut_sessions, mut_users, sessions, users}, roles_to_permission, user, }, - utils::get_user_from_request, + tenants::TENANT_METADATA, + utils::get_user_and_tenant_from_request, }; use crate::{ rbac::Users, @@ -163,14 +163,26 @@ where header::HeaderValue::from_static(LOG_SOURCE_KINESIS), ); } - /* ## Section end */ + // append tenant id if present + let user_and_tenant_id = match get_user_and_tenant_from_request(req.request()) { + Ok((uid, tid)) => { + if tid.is_some() { + req.headers_mut().insert( + HeaderName::from_static("tenant"), + HeaderValue::from_str(&tid.as_ref().unwrap()).unwrap(), + ); + } + + Ok((uid, tid)) + } + Err(e) => Err(e), + }; + // tracing::warn!("incomin request- {req:?}"); let auth_result: Result<_, Error> = (self.auth_method)(&mut req, self.action); - let http_req = req.request().clone(); let key: Result = extract_session_key(&mut req); - let userid: Result = get_user_from_request(&http_req); let fut = self.service.call(req); Box::pin(async move { @@ -185,10 +197,13 @@ where let oidc_client = OIDC_CLIENT.get(); if let Some(client) = oidc_client - && let Ok(userid) = userid + && let Ok((userid, tenant_id)) = user_and_tenant_id { let bearer_to_refresh = { - if let Some(user) = users().get(&userid) { + if let Some(users) = + users().get(tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v)) + && let Some(user) = users.get(&userid) + { match &user.ty { user::UserType::OAuth(oauth) if oauth.bearer.is_some() => { Some(oauth.clone()) @@ -233,7 +248,10 @@ where let user_roles = { let mut users_guard = mut_users(); - if let Some(user) = users_guard.get_mut(&userid) { + if let Some(users) = users_guard + .get_mut(tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v)) + && let Some(user) = users.get_mut(&userid) + { if let user::UserType::OAuth(oauth) = &mut user.ty { oauth.bearer = Some(refreshed_token); } @@ -249,14 +267,25 @@ where userid.clone(), key.clone(), Utc::now() + expires_in, - roles_to_permission(user_roles), + roles_to_permission( + user_roles, + tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ), + &tenant_id, ); - } else if let Some(user) = users().get(&userid) { + } else if let Some(users) = + users().get(tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v)) + && let Some(user) = users.get(&userid) + { mut_sessions().track_new( userid.clone(), key.clone(), Utc::now() + EXPIRY_DURATION, - roles_to_permission(user.roles()), + roles_to_permission( + user.roles(), + tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ), + &tenant_id, ); } } @@ -273,6 +302,9 @@ where "Your session has expired or is no longer valid. Please re-authenticate to access this resource.", )); } + rbac::Response::Suspended(msg) => { + return Err(ErrorBadRequest(msg)); + } _ => {} } @@ -281,7 +313,25 @@ where } } +pub fn check_suspension(req: &HttpRequest, action: Action) -> rbac::Response { + if let Some(tenant) = req.headers().get("tenant") + && let Ok(tenant) = tenant.to_str() + { + if let Ok(Some(suspension)) = TENANT_METADATA.is_action_suspended(tenant, &action) { + return rbac::Response::Suspended(suspension); + } else { + // tenant does not exist + } + } + rbac::Response::Authorized +} + pub fn auth_no_context(req: &mut ServiceRequest, action: Action) -> Result { + // check if tenant is suspended + match check_suspension(req.request(), action) { + rbac::Response::Suspended(msg) => return Ok(rbac::Response::Suspended(msg)), + _ => {} + } let creds = extract_session_key(req); creds.map(|key| Users.authorize(key, action, None, None)) } @@ -290,6 +340,11 @@ pub fn auth_resource_context( req: &mut ServiceRequest, action: Action, ) -> Result { + // check if tenant is suspended + match check_suspension(req.request(), action) { + rbac::Response::Suspended(msg) => return Ok(rbac::Response::Suspended(msg)), + _ => {} + } let creds = extract_session_key(req); let usergroup = req.match_info().get("usergroup"); let llmid = req.match_info().get("llmid"); @@ -312,6 +367,11 @@ pub fn auth_user_context( req: &mut ServiceRequest, action: Action, ) -> Result { + // check if tenant is suspended + match check_suspension(req.request(), action) { + rbac::Response::Suspended(msg) => return Ok(rbac::Response::Suspended(msg)), + _ => {} + } let creds = extract_session_key(req); let user = req.match_info().get("username"); creds.map(|key| Users.authorize(key, action, None, user)) diff --git a/src/handlers/http/mod.rs b/src/handlers/http/mod.rs index ad10cb28a..993a40b10 100644 --- a/src/handlers/http/mod.rs +++ b/src/handlers/http/mod.rs @@ -86,8 +86,14 @@ pub fn base_path_without_preceding_slash() -> String { /// # Returns /// /// An `anyhow::Result` containing the `arrow_schema::Schema` for the specified stream. -pub async fn fetch_schema(stream_name: &str) -> anyhow::Result { - let res: Vec = PARSEABLE.metastore.get_all_schemas(stream_name).await?; +pub async fn fetch_schema( + stream_name: &str, + tenant_id: &Option, +) -> anyhow::Result { + let res: Vec = PARSEABLE + .metastore + .get_all_schemas(stream_name, tenant_id) + .await?; let new_schema = Schema::try_merge(res)?; Ok(new_schema) @@ -95,10 +101,13 @@ pub async fn fetch_schema(stream_name: &str) -> anyhow::Result anyhow::Result> { +pub async fn send_query_request_to_ingestor( + query: &Query, + tenant_id: &Option, +) -> anyhow::Result> { // send the query request to the ingestor let mut res = vec![]; - let ima: Vec = get_node_info(NodeType::Ingestor).await?; + let ima: Vec = get_node_info(NodeType::Ingestor, tenant_id).await?; for im in ima.iter() { let uri = format!( diff --git a/src/handlers/http/modal/ingest/ingestor_logstream.rs b/src/handlers/http/modal/ingest/ingestor_logstream.rs index 49cea29a3..9f7135068 100644 --- a/src/handlers/http/modal/ingest/ingestor_logstream.rs +++ b/src/handlers/http/modal/ingest/ingestor_logstream.rs @@ -31,27 +31,31 @@ use crate::{ handlers::http::logstream::error::StreamError, parseable::{PARSEABLE, StreamNotFound}, stats, + utils::get_tenant_id_from_request, }; pub async fn retention_cleanup( + req: HttpRequest, stream_name: Path, Json(date_list): Json>, ) -> Result { let stream_name = stream_name.into_inner(); let storage = PARSEABLE.storage().get_object_store(); + let tenant_id = get_tenant_id_from_request(&req); // if the stream not found in memory map, //check if it exists in the storage //create stream and schema from storage - if !PARSEABLE.streams.contains(&stream_name) + if !PARSEABLE.streams.contains(&stream_name, &tenant_id) && !PARSEABLE - .create_stream_and_schema_from_storage(&stream_name) + .create_stream_and_schema_from_storage(&stream_name, &tenant_id) .await .unwrap_or(false) { return Err(StreamNotFound(stream_name.clone()).into()); } - if let Err(err) = remove_manifest_from_snapshot(storage.clone(), &stream_name, date_list).await + if let Err(err) = + remove_manifest_from_snapshot(&storage, &stream_name, date_list, &tenant_id).await { return Err(StreamError::Custom { msg: format!( @@ -65,11 +69,14 @@ pub async fn retention_cleanup( Ok(actix_web::HttpResponse::NoContent().finish()) } -pub async fn delete(stream_name: Path) -> Result { +pub async fn delete( + req: HttpRequest, + stream_name: Path, +) -> Result { let stream_name = stream_name.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); // Delete from staging - let stream_dir = PARSEABLE.get_stream(&stream_name)?; + let stream_dir = PARSEABLE.get_stream(&stream_name, &tenant_id)?; if let Err(err) = fs::remove_dir_all(&stream_dir.data_path) { warn!( "failed to delete local data for stream {} with error {err}. Clean {} manually", @@ -79,8 +86,8 @@ pub async fn delete(stream_name: Path) -> Result Result { let stream_name = stream_name.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); PARSEABLE - .create_update_stream(req.headers(), &body, &stream_name) + .create_update_stream(req.headers(), &body, &stream_name, &tenant_id) .await?; Ok(("Log stream created", StatusCode::OK)) diff --git a/src/handlers/http/modal/ingest/ingestor_rbac.rs b/src/handlers/http/modal/ingest/ingestor_rbac.rs index 761472f4b..c74ada629 100644 --- a/src/handlers/http/modal/ingest/ingestor_rbac.rs +++ b/src/handlers/http/modal/ingest/ingestor_rbac.rs @@ -18,71 +18,88 @@ use std::collections::HashSet; -use actix_web::http::StatusCode; -use actix_web::{HttpResponse, web}; +use actix_web::{HttpRequest, HttpResponse, http::StatusCode, web}; use crate::{ handlers::http::{ modal::utils::rbac_utils::get_metadata, rbac::{RBACError, UPDATE_LOCK}, }, + parseable::DEFAULT_TENANT, rbac::{ Users, map::roles, user::{self, User as ParseableUser}, }, storage, + utils::get_tenant_id_from_request, }; // Handler for POST /api/v1/user/{username} // Creates a new user by username if it does not exists pub async fn post_user( + req: HttpRequest, username: web::Path, body: Option>, ) -> Result { let username = username.into_inner(); - let metadata = get_metadata().await?; if let Some(body) = body { let user: ParseableUser = serde_json::from_value(body.into_inner())?; - let _ = storage::put_staging_metadata(&metadata); + let req_tenant_id = get_tenant_id_from_request(&req); + let req_tenant = req_tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if req_tenant.ne(DEFAULT_TENANT) + && (req_tenant.eq(user.tenant.as_ref().map_or(DEFAULT_TENANT, |v| v))) + { + return Err(RBACError::Anyhow(anyhow::Error::msg( + "non super-admin user trying to create user for another tenant", + ))); + } + let req_tenant_id = &user.tenant; + let metadata = get_metadata(req_tenant_id).await?; + let _ = storage::put_staging_metadata(&metadata, req_tenant_id); let created_role = user.roles.clone(); Users.put_user(user.clone()); - Users.add_roles(&username, created_role.clone()); + Users.add_roles(&username, created_role.clone(), req_tenant_id); } Ok(HttpResponse::Ok().status(StatusCode::OK).finish()) } // Handler for DELETE /api/v1/user/delete/{userid} -pub async fn delete_user(userid: web::Path) -> Result { +pub async fn delete_user( + req: HttpRequest, + userid: web::Path, +) -> Result { let userid = userid.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); let _guard = UPDATE_LOCK.lock().await; // fail this request if the user does not exists - if !Users.contains(&userid) { + if !Users.contains(&userid, &tenant_id) { return Err(RBACError::UserDoesNotExist); }; // delete from parseable.json first - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; metadata.users.retain(|user| user.userid() != userid); - let _ = storage::put_staging_metadata(&metadata); + let _ = storage::put_staging_metadata(&metadata, &tenant_id); // update in mem table - Users.delete_user(&userid); + Users.delete_user(&userid, &tenant_id); Ok(HttpResponse::Ok().status(StatusCode::OK).finish()) } // Handler PATCH /user/{userid}/role/sync/add => Add roles to a user pub async fn add_roles_to_user( + req: HttpRequest, userid: web::Path, roles_to_add: web::Json>, ) -> Result { let userid = userid.into_inner(); let roles_to_add = roles_to_add.into_inner(); - - if !Users.contains(&userid) { + let tenant_id = get_tenant_id_from_request(&req); + if !Users.contains(&userid, &tenant_id) { return Err(RBACError::UserDoesNotExist); }; @@ -99,7 +116,7 @@ pub async fn add_roles_to_user( } // update parseable.json first - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; if let Some(user) = metadata .users .iter_mut() @@ -111,21 +128,22 @@ pub async fn add_roles_to_user( return Err(RBACError::UserDoesNotExist); } - let _ = storage::put_staging_metadata(&metadata); + let _ = storage::put_staging_metadata(&metadata, &tenant_id); // update in mem table - Users.add_roles(&userid.clone(), roles_to_add.clone()); + Users.add_roles(&userid.clone(), roles_to_add.clone(), &tenant_id); Ok(HttpResponse::Ok().status(StatusCode::OK).finish()) } // Handler PATCH /user/{userid}/role/sync/remove => Remove roles to a user pub async fn remove_roles_from_user( + req: HttpRequest, userid: web::Path, roles_to_remove: web::Json>, ) -> Result { let userid = userid.into_inner(); let roles_to_remove = roles_to_remove.into_inner(); - - if !Users.contains(&userid) { + let tenant_id = get_tenant_id_from_request(&req); + if !Users.contains(&userid, &tenant_id) { return Err(RBACError::UserDoesNotExist); }; @@ -142,7 +160,7 @@ pub async fn remove_roles_from_user( } // check that user actually has these roles - let user_roles: HashSet = HashSet::from_iter(Users.get_role(&userid)); + let user_roles: HashSet = HashSet::from_iter(Users.get_role(&userid, &tenant_id)); let roles_not_with_user: HashSet = HashSet::from_iter(roles_to_remove.difference(&user_roles).cloned()); @@ -153,7 +171,7 @@ pub async fn remove_roles_from_user( } // update parseable.json in staging first - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; if let Some(user) = metadata .users .iter_mut() @@ -167,21 +185,25 @@ pub async fn remove_roles_from_user( return Err(RBACError::UserDoesNotExist); } - let _ = storage::put_staging_metadata(&metadata); + let _ = storage::put_staging_metadata(&metadata, &tenant_id); // update in mem table - Users.remove_roles(&userid.clone(), roles_to_remove.clone()); + Users.remove_roles(&userid.clone(), roles_to_remove.clone(), &tenant_id); Ok(HttpResponse::Ok().status(StatusCode::OK).finish()) } // Handler for POST /api/v1/user/{username}/generate-new-password // Resets password for the user to a newly generated one and returns it -pub async fn post_gen_password(username: web::Path) -> Result { +pub async fn post_gen_password( + req: HttpRequest, + username: web::Path, +) -> Result { let username = username.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); let mut new_hash = String::default(); - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; - let _ = storage::put_staging_metadata(&metadata); + let _ = storage::put_staging_metadata(&metadata, &tenant_id); if let Some(user) = metadata .users .iter_mut() @@ -195,6 +217,6 @@ pub async fn post_gen_password(username: web::Path) -> Result, - Json(privileges): Json>, + Json(sync_req): Json, ) -> Result { let name = name.into_inner(); - let mut metadata = get_metadata().await?; - metadata.roles.insert(name.clone(), privileges.clone()); + let req_tenant_id = get_tenant_id_from_request(&req); + let req_tenant = req_tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if req_tenant.ne(DEFAULT_TENANT) && (req_tenant.eq(&sync_req.tenant_id)) { + return Err(RoleError::Anyhow(anyhow::Error::msg( + "non super-admin user trying to create role for another tenant", + ))); + } + let req_tenant_id = &Some(sync_req.tenant_id); + let mut metadata = get_metadata(req_tenant_id).await?; + metadata + .roles + .insert(name.clone(), sync_req.privileges.clone()); - let _ = storage::put_staging_metadata(&metadata); - mut_roles().insert(name.clone(), privileges); + let _ = storage::put_staging_metadata(&metadata, req_tenant_id); + let tenant_id = req_tenant_id + .as_ref() + .map_or(DEFAULT_TENANT, |v| v) + .to_owned(); + mut_roles() + .entry(tenant_id.clone()) + .or_default() + .insert(name.clone(), sync_req.privileges); + // mut_roles().insert(name.clone(), privileges); // refresh the sessions of all users using this role // for this, iterate over all user_groups and users and create a hashset of users let mut session_refresh_users: HashSet = HashSet::new(); - for user_group in read_user_groups().values() { - if user_group.roles.contains(&name) { - session_refresh_users.extend(user_group.users.iter().map(|u| u.userid().to_string())); + if let Some(groups) = read_user_groups().get(&tenant_id) { + for user_group in groups.values() { + if user_group.roles.contains(&name) { + session_refresh_users + .extend(user_group.users.iter().map(|u| u.userid().to_string())); + } } } // iterate over all users to see if they have this role - for user in users().values() { - if user.roles.contains(&name) { - session_refresh_users.insert(user.userid().to_string()); + if let Some(users) = users().get(&tenant_id) { + for user in users.values() { + if user.roles.contains(&name) { + session_refresh_users.insert(user.userid().to_string()); + } } } for userid in session_refresh_users { - mut_sessions().remove_user(&userid); + mut_sessions().remove_user(&userid, &tenant_id); } Ok(HttpResponse::Ok().finish()) diff --git a/src/handlers/http/modal/ingest/mod.rs b/src/handlers/http/modal/ingest/mod.rs index 66ae910da..4dd225481 100644 --- a/src/handlers/http/modal/ingest/mod.rs +++ b/src/handlers/http/modal/ingest/mod.rs @@ -16,6 +16,25 @@ * */ +use serde::{Deserialize, Serialize}; + +use crate::rbac::role::model::DefaultPrivilege; + pub mod ingestor_logstream; pub mod ingestor_rbac; pub mod ingestor_role; + +#[derive(Deserialize, Serialize)] +pub struct SyncRole { + privileges: Vec, + tenant_id: String, +} + +impl SyncRole { + pub fn new(privileges: Vec, tenant_id: String) -> Self { + Self { + privileges, + tenant_id, + } + } +} diff --git a/src/handlers/http/modal/ingest_server.rs b/src/handlers/http/modal/ingest_server.rs index fe2638aa6..c389bbafd 100644 --- a/src/handlers/http/modal/ingest_server.rs +++ b/src/handlers/http/modal/ingest_server.rs @@ -110,7 +110,7 @@ impl ParseableServer for IngestServer { // write the ingestor metadata to storage INGESTOR_META .get_or_init(|| async { - IngestorMetadata::load_node_metadata(NodeType::Ingestor) + IngestorMetadata::load_node_metadata(NodeType::Ingestor, &None) .await .expect("Ingestor Metadata should be set in ingestor mode") }) @@ -173,7 +173,7 @@ impl IngestServer { ) .service( web::resource("/{name}/sync") - .route(web::put().to(ingestor_role::put).authorize(Action::PutRole)), + .route(web::put().to(ingestor_role::put).authorize(Action::PutRole)), // .route(web::delete().to(ingestor_role::delete).authorize(Action::DeleteRole)), ) } // get the user webscope @@ -293,7 +293,7 @@ pub async fn check_querier_state() -> anyhow::Result, ObjectStorag // i.e the querier will create the `.parseable.json` file let parseable_json = PARSEABLE .metastore - .get_parseable_metadata() + .get_parseable_metadata(&None) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; diff --git a/src/handlers/http/modal/mod.rs b/src/handlers/http/modal/mod.rs index f1ec5206b..471483d6d 100644 --- a/src/handlers/http/modal/mod.rs +++ b/src/handlers/http/modal/mod.rs @@ -42,7 +42,7 @@ use crate::{ metastore::metastore_traits::MetastoreObject, oidc::{Claims, DiscoveredClient}, option::Mode, - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, storage::{ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY}, users::{dashboards::DASHBOARDS, filters::FILTERS}, utils::get_node_id, @@ -328,7 +328,10 @@ impl NodeMetadata { } } - pub async fn load_node_metadata(node_type: NodeType) -> anyhow::Result> { + pub async fn load_node_metadata( + node_type: NodeType, + tenant_id: &Option, + ) -> anyhow::Result> { let staging_path = PARSEABLE.options.staging_dir(); let node_type_str = node_type.as_str(); @@ -339,7 +342,7 @@ impl NodeMetadata { } // Attempt to load metadata from storage - let storage_metas = Self::load_from_storage(node_type.clone()).await; + let storage_metas = Self::load_from_storage(node_type.clone(), tenant_id).await; let url = PARSEABLE.options.get_url(node_type.to_mode()); let port = url.port().unwrap_or(80).to_string(); let url = url.to_string(); @@ -381,8 +384,14 @@ impl NodeMetadata { Ok(Arc::new(meta)) } - async fn load_from_storage(node_type: NodeType) -> Vec { - let obs = PARSEABLE.metastore.get_node_metadata(node_type).await; + async fn load_from_storage( + node_type: NodeType, + tenant_id: &Option, + ) -> Vec { + let obs = PARSEABLE + .metastore + .get_node_metadata(node_type, tenant_id) + .await; let mut metadata = vec![]; if let Ok(obs) = obs { @@ -620,25 +629,47 @@ pub async fn initialize_hot_tier_metadata_on_startup( hot_tier_manager: &HotTierManager, ) -> anyhow::Result<()> { // Collect hot tier configurations from streams before doing async operations - let hot_tier_configs: Vec<(String, StreamHotTier)> = { - let streams_guard = PARSEABLE.streams.read().unwrap(); - streams_guard + let hot_tier_configs: Vec<(String, Option, StreamHotTier)> = { + let tenants_guard = PARSEABLE.streams.read().unwrap(); + tenants_guard .iter() - .filter_map(|(stream_name, stream)| { - // Skip if hot tier metadata file already exists for this stream - if hot_tier_manager.check_stream_hot_tier_exists(stream_name) { - return None; - } + .flat_map(|(tenant_id, streams)| { + streams.iter().filter_map(|(stream_name, stream)| { + // Skip if hot tier metadata file already exists for this stream + let tenant_id = if tenant_id.eq(DEFAULT_TENANT) { + None + } else { + Some(tenant_id.clone()) + }; + if hot_tier_manager.check_stream_hot_tier_exists(stream_name, &tenant_id) { + return None; + } - // Get the hot tier configuration from the in-memory stream metadata - stream - .get_hot_tier() - .map(|config| (stream_name.clone(), config)) + // Get the hot tier configuration from the in-memory stream metadata + stream + .get_hot_tier() + .map(|config| (stream_name.clone(), tenant_id, config.clone())) + }) }) .collect() + // let streams_guard = PARSEABLE.streams.read().unwrap(); + // streams_guard + // .iter() + // .filter_map(|(stream_name, stream)| { + // // Skip if hot tier metadata file already exists for this stream + // if hot_tier_manager.check_stream_hot_tier_exists(stream_name) { + // return None; + // } + + // // Get the hot tier configuration from the in-memory stream metadata + // stream + // .get_hot_tier() + // .map(|config| (stream_name.clone(), config)) + // }) + // .collect() }; - for (stream_name, hot_tier_config) in hot_tier_configs { + for (stream_name, tenant_id, hot_tier_config) in hot_tier_configs { // Create the hot tier metadata file with the configuration from stream metadata let mut hot_tier_metadata = hot_tier_config; hot_tier_metadata.used_size = 0; @@ -649,7 +680,7 @@ pub async fn initialize_hot_tier_metadata_on_startup( } if let Err(e) = hot_tier_manager - .put_hot_tier(&stream_name, &mut hot_tier_metadata) + .put_hot_tier(&stream_name, &mut hot_tier_metadata, &tenant_id) .await { warn!( diff --git a/src/handlers/http/modal/query/querier_logstream.rs b/src/handlers/http/modal/query/querier_logstream.rs index a0b6204e0..855feba2f 100644 --- a/src/handlers/http/modal/query/querier_logstream.rs +++ b/src/handlers/http/modal/query/querier_logstream.rs @@ -48,18 +48,22 @@ use crate::{ parseable::{PARSEABLE, StreamNotFound}, stats, storage::{ObjectStoreFormat, StreamType}, + utils::get_tenant_id_from_request, }; const STATS_DATE_QUERY_PARAM: &str = "date"; -pub async fn delete(stream_name: Path) -> Result { +pub async fn delete( + req: HttpRequest, + stream_name: Path, +) -> Result { let stream_name = stream_name.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); // if the stream not found in memory map, //check if it exists in the storage //create stream and schema from storage - if !PARSEABLE.streams.contains(&stream_name) + if !PARSEABLE.streams.contains(&stream_name, &tenant_id) && !PARSEABLE - .create_stream_and_schema_from_storage(&stream_name) + .create_stream_and_schema_from_storage(&stream_name, &tenant_id) .await .unwrap_or(false) { @@ -68,8 +72,8 @@ pub async fn delete(stream_name: Path) -> Result) -> Result = cluster::get_node_info(NodeType::Ingestor) - .await - .map_err(|err| { - error!("Fatal: failed to get ingestor info: {:?}", err); - err - })?; + let ingestor_metadata: Vec = + cluster::get_node_info(NodeType::Ingestor, &tenant_id) + .await + .map_err(|err| { + error!("Fatal: failed to get ingestor info: {:?}", err); + err + })?; for ingestor in ingestor_metadata { let url = format!( @@ -104,8 +111,8 @@ pub async fn delete(stream_name: Path) -> Result Result { let stream_name = stream_name.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); let _guard = CREATE_STREAM_LOCK.lock().await; let headers = PARSEABLE - .create_update_stream(req.headers(), &body, &stream_name) + .create_update_stream(req.headers(), &body, &stream_name, &tenant_id) .await?; let is_update = if let Some(val) = headers.get(UPDATE_STREAM_KEY) { @@ -128,7 +136,7 @@ pub async fn put_stream( false }; - sync_streams_with_ingestors(headers, body, &stream_name).await?; + sync_streams_with_ingestors(headers, body, &stream_name, &tenant_id).await?; if is_update { Ok(("Log stream updated", StatusCode::OK)) @@ -142,12 +150,13 @@ pub async fn get_stats( stream_name: Path, ) -> Result { let stream_name = stream_name.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); // if the stream not found in memory map, //check if it exists in the storage //create stream and schema from storage - if !PARSEABLE.streams.contains(&stream_name) + if !PARSEABLE.streams.contains(&stream_name, &tenant_id) && !PARSEABLE - .create_stream_and_schema_from_storage(&stream_name) + .create_stream_and_schema_from_storage(&stream_name, &tenant_id) .await .unwrap_or(false) { @@ -165,7 +174,7 @@ pub async fn get_stats( if !date_value.is_empty() { let obs = PARSEABLE .metastore - .get_all_stream_jsons(&stream_name, None) + .get_all_stream_jsons(&stream_name, None, &tenant_id) .await?; let mut stream_jsons = Vec::new(); @@ -188,14 +197,14 @@ pub async fn get_stats( } } - let stats = stats::get_current_stats(&stream_name, "json") + let stats = stats::get_current_stats(&stream_name, "json", &tenant_id) .ok_or_else(|| StreamNotFound(stream_name.clone()))?; let ingestor_stats = if PARSEABLE - .get_stream(&stream_name) + .get_stream(&stream_name, &tenant_id) .is_ok_and(|stream| stream.get_stream_type() == StreamType::UserDefined) { - Some(fetch_stats_from_ingestors(&stream_name).await?) + Some(fetch_stats_from_ingestors(&stream_name, &tenant_id).await?) } else { None }; diff --git a/src/handlers/http/modal/query/querier_rbac.rs b/src/handlers/http/modal/query/querier_rbac.rs index cd2e3e1f1..82a9da62e 100644 --- a/src/handlers/http/modal/query/querier_rbac.rs +++ b/src/handlers/http/modal/query/querier_rbac.rs @@ -18,34 +18,38 @@ use std::collections::HashSet; -use actix_web::{HttpResponse, Responder, web}; +use actix_web::{HttpRequest, HttpResponse, Responder, web}; use crate::{ handlers::http::{ cluster::{ - sync_password_reset_with_ingestors, sync_user_creation_with_ingestors, + sync_password_reset_with_ingestors, sync_user_creation, sync_user_deletion_with_ingestors, sync_users_with_roles_with_ingestors, }, modal::utils::rbac_utils::{get_metadata, put_metadata}, rbac::{RBACError, UPDATE_LOCK}, }, + parseable::DEFAULT_TENANT, rbac::{ Users, map::{roles, users, write_user_groups}, user::{self, UserType}, }, + utils::get_tenant_id_from_request, validator, }; // Handler for POST /api/v1/user/{username} // Creates a new user by username if it does not exists pub async fn post_user( + req: HttpRequest, username: web::Path, body: Option>, ) -> Result { let username = username.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); validator::user_role_name(&username)?; - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; let user_roles: HashSet = if let Some(body) = body { serde_json::from_value(body.into_inner())? @@ -63,7 +67,7 @@ pub async fn post_user( return Err(RBACError::RolesDoNotExist(non_existent_roles)); } let _guard = UPDATE_LOCK.lock().await; - if Users.contains(&username) + if Users.contains(&username, &tenant_id) || metadata .users .iter() @@ -72,17 +76,18 @@ pub async fn post_user( return Err(RBACError::UserExists(username)); } - let (user, password) = user::User::new_basic(username.clone()); + let (user, password) = user::User::new_basic(username.clone(), None); metadata.users.push(user.clone()); - put_metadata(&metadata).await?; + put_metadata(&metadata, &tenant_id).await?; let created_role = user_roles.clone(); Users.put_user(user.clone()); - sync_user_creation_with_ingestors(user, &Some(user_roles)).await?; + sync_user_creation(user, &Some(user_roles), &tenant_id).await?; if !created_role.is_empty() { add_roles_to_user( + req, web::Path::::from(username.clone()), web::Json(created_role), ) @@ -93,32 +98,41 @@ pub async fn post_user( } // Handler for DELETE /api/v1/user/{userid} -pub async fn delete_user(userid: web::Path) -> Result { +pub async fn delete_user( + req: HttpRequest, + userid: web::Path, +) -> Result { let userid = userid.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let _guard = UPDATE_LOCK.lock().await; - // fail this request if the user does not exists - if !Users.contains(&userid) { + // fail this request if the user does not exist + if !Users.contains(&userid, &tenant_id) { return Err(RBACError::UserDoesNotExist); }; // find username by userid, for native users, username is userid, for oauth users, we need to look up - let username = if let Some(user) = users().get(&userid) { + let username = if let Some(users) = users().get(tenant) + && let Some(user) = users.get(&userid) + { user.username_by_userid() } else { return Err(RBACError::UserDoesNotExist); }; // delete from parseable.json first - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; metadata.users.retain(|user| user.userid() != userid); // also delete from user groups - let user_groups = Users.get_user_groups(&userid); + let user_groups = Users.get_user_groups(&userid, &tenant_id); let mut groups_to_update = Vec::new(); + for user_group in user_groups { - if let Some(ug) = write_user_groups().get_mut(&user_group) - && let Some(user) = users().get(&userid) + if let Some(groups) = write_user_groups().get_mut(tenant) + && let Some(ug) = groups.get_mut(&user_group) + && let Some(users) = users().get(tenant) + && let Some(user) = users.get(&userid) { let userid = match &user.ty { UserType::Native(basic) => basic.username.clone(), @@ -144,29 +158,33 @@ pub async fn delete_user(userid: web::Path) -> Result Add roles to a user pub async fn add_roles_to_user( + req: HttpRequest, userid: web::Path, roles_to_add: web::Json>, ) -> Result { let userid = userid.into_inner(); let roles_to_add = roles_to_add.into_inner(); - - if !Users.contains(&userid) { + let tenant_id = get_tenant_id_from_request(&req); + if !Users.contains(&userid, &tenant_id) { return Err(RBACError::UserDoesNotExist); }; + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); // find username by userid, for native users, username is userid, for oauth users, we need to look up - let username = if let Some(user) = users().get(&userid) { + let username = if let Some(users) = users().get(tenant) + && let Some(user) = users.get(&userid) + { user.username_by_userid() } else { return Err(RBACError::UserDoesNotExist); @@ -186,7 +204,7 @@ pub async fn add_roles_to_user( } // update parseable.json first - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; if let Some(user) = metadata .users .iter_mut() @@ -198,31 +216,35 @@ pub async fn add_roles_to_user( return Err(RBACError::UserDoesNotExist); } - put_metadata(&metadata).await?; + put_metadata(&metadata, &tenant_id).await?; // update in mem table - Users.add_roles(&userid.clone(), roles_to_add.clone()); + Users.add_roles(&userid.clone(), roles_to_add.clone(), &tenant_id); - sync_users_with_roles_with_ingestors(&userid, &roles_to_add, "add").await?; + sync_users_with_roles_with_ingestors(&userid, &roles_to_add, "add", &tenant_id).await?; Ok(format!("Roles updated successfully for {username}")) } // Handler PATCH /user/{userid}/role/remove => Remove roles from a user pub async fn remove_roles_from_user( + req: HttpRequest, userid: web::Path, roles_to_remove: web::Json>, ) -> Result { let userid = userid.into_inner(); let roles_to_remove = roles_to_remove.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let _guard = UPDATE_LOCK.lock().await; - if !Users.contains(&userid) { + if !Users.contains(&userid, &tenant_id) { return Err(RBACError::UserDoesNotExist); }; // find username by userid, for native users, username is userid, for oauth users, we need to look up - let username = if let Some(user) = users().get(&userid) { + let username = if let Some(users) = users().get(tenant) + && let Some(user) = users.get(&userid) + { user.username_by_userid() } else { return Err(RBACError::UserDoesNotExist); @@ -242,7 +264,7 @@ pub async fn remove_roles_from_user( } // check for role not present with user - let user_roles: HashSet = HashSet::from_iter(Users.get_role(&userid)); + let user_roles: HashSet = HashSet::from_iter(Users.get_role(&userid, &tenant_id)); let roles_not_with_user: HashSet = HashSet::from_iter(roles_to_remove.difference(&user_roles).cloned()); if !roles_not_with_user.is_empty() { @@ -252,7 +274,7 @@ pub async fn remove_roles_from_user( } // update parseable.json first - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; if let Some(user) = metadata .users .iter_mut() @@ -266,22 +288,26 @@ pub async fn remove_roles_from_user( return Err(RBACError::UserDoesNotExist); } - put_metadata(&metadata).await?; + put_metadata(&metadata, &tenant_id).await?; // update in mem table - Users.remove_roles(&userid.clone(), roles_to_remove.clone()); + Users.remove_roles(&userid.clone(), roles_to_remove.clone(), &tenant_id); - sync_users_with_roles_with_ingestors(&userid, &roles_to_remove, "remove").await?; + sync_users_with_roles_with_ingestors(&userid, &roles_to_remove, "remove", &tenant_id).await?; Ok(HttpResponse::Ok().json(format!("Roles updated successfully for {username}"))) } // Handler for POST /api/v1/user/{username}/generate-new-password // Resets password for the user to a newly generated one and returns it -pub async fn post_gen_password(username: web::Path) -> Result { +pub async fn post_gen_password( + req: HttpRequest, + username: web::Path, +) -> Result { let username = username.into_inner(); let mut new_password = String::default(); let mut new_hash = String::default(); - let mut metadata = get_metadata().await?; + let tenant_id = get_tenant_id_from_request(&req); + let mut metadata = get_metadata(&tenant_id).await?; let _guard = UPDATE_LOCK.lock().await; let user::PassCode { password, hash } = user::Basic::gen_new_password(); @@ -300,10 +326,10 @@ pub async fn post_gen_password(username: web::Path) -> Result, Json(privileges): Json>, ) -> Result { let name = name.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); // validate the role name validator::user_role_name(&name).map_err(RoleError::ValidationError)?; - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; metadata.roles.insert(name.clone(), privileges.clone()); - put_metadata(&metadata).await?; - mut_roles().insert(name.clone(), privileges.clone()); + put_metadata(&metadata, &tenant_id).await?; + mut_roles() + .entry(tenant.to_owned()) + .or_default() + .insert(name.clone(), privileges.clone()); + // mut_roles().insert(name.clone(), privileges.clone()); // refresh the sessions of all users using this role // for this, iterate over all user_groups and users and create a hashset of users let mut session_refresh_users: HashSet = HashSet::new(); - for user_group in read_user_groups().values() { - if user_group.roles.contains(&name) { - session_refresh_users.extend(user_group.users.iter().map(|u| u.userid().to_string())); + if let Some(groups) = read_user_groups().get(tenant) { + for user_group in groups.values() { + if user_group.roles.contains(&name) { + session_refresh_users + .extend(user_group.users.iter().map(|u| u.userid().to_string())); + } } } // iterate over all users to see if they have this role - for user in users().values() { - if user.roles.contains(&name) { - session_refresh_users.insert(user.userid().to_string()); + if let Some(users) = users().get(tenant) { + for user in users.values() { + if user.roles.contains(&name) { + session_refresh_users.insert(user.userid().to_string()); + } } } for userid in session_refresh_users { - mut_sessions().remove_user(&userid); + mut_sessions().remove_user(&userid, tenant); } - sync_role_update_with_ingestors(name.clone(), privileges.clone()).await?; + sync_role_update(req, name.clone(), privileges.clone(), &tenant_id).await?; Ok(HttpResponse::Ok().finish()) } diff --git a/src/handlers/http/modal/query_server.rs b/src/handlers/http/modal/query_server.rs index de7a55f88..38cbfff20 100644 --- a/src/handlers/http/modal/query_server.rs +++ b/src/handlers/http/modal/query_server.rs @@ -97,7 +97,7 @@ impl ParseableServer for QueryServer { } let mut parseable_json = PARSEABLE.validate_storage().await?; - migration::run_metadata_migration(&PARSEABLE, &mut parseable_json).await?; + migration::run_metadata_migration(&PARSEABLE, &mut parseable_json, &None).await?; Ok(parseable_json) } @@ -110,14 +110,15 @@ impl ParseableServer for QueryServer { // write the ingestor metadata to storage QUERIER_META .get_or_init(|| async { - QuerierMetadata::load_node_metadata(NodeType::Querier) + QuerierMetadata::load_node_metadata(NodeType::Querier, &None) .await .expect("Querier Metadata should be set in ingestor mode") }) .await; migration::run_migration(&PARSEABLE).await?; - //create internal stream at server start + // create internal stream at server start + // Multi-tenant mode is not allowed in OSS hence no need to have knowledge of tenant id PARSEABLE.create_internal_stream_if_not_exists().await?; // load on init load_on_init().await?; diff --git a/src/handlers/http/modal/server.rs b/src/handlers/http/modal/server.rs index c288e9d5d..4396c7682 100644 --- a/src/handlers/http/modal/server.rs +++ b/src/handlers/http/modal/server.rs @@ -118,7 +118,7 @@ impl ParseableServer for Server { //TODO: removed file migration //deprecated support for deployments < v1.0.0 let mut parseable_json = PARSEABLE.validate_storage().await?; - migration::run_metadata_migration(&PARSEABLE, &mut parseable_json).await?; + migration::run_metadata_migration(&PARSEABLE, &mut parseable_json, &None).await?; Ok(parseable_json) } diff --git a/src/handlers/http/modal/utils/ingest_utils.rs b/src/handlers/http/modal/utils/ingest_utils.rs index dcfd83abb..3916f2deb 100644 --- a/src/handlers/http/modal/utils/ingest_utils.rs +++ b/src/handlers/http/modal/utils/ingest_utils.rs @@ -39,7 +39,7 @@ use crate::{ }, }, otel::{logs::flatten_otel_logs, metrics::flatten_otel_metrics, traces::flatten_otel_traces}, - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, storage::StreamType, utils::json::{convert_array_to_object, flatten::convert_to_array}, }; @@ -55,9 +55,11 @@ pub async fn flatten_and_push_logs( p_custom_fields: &HashMap, time_partition: Option, telemetry_type: TelemetryType, + tenant_id: &Option, ) -> Result<(), PostError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); // Verify the dataset fields count - verify_dataset_fields_count(stream_name)?; + verify_dataset_fields_count(stream_name, tenant_id)?; match log_source { LogSource::Kinesis => { @@ -72,13 +74,14 @@ pub async fn flatten_and_push_logs( p_custom_fields, time_partition, telemetry_type, + tenant_id, ) .await?; } LogSource::OtelLogs => { //custom flattening required for otel logs let logs: LogsData = serde_json::from_value(json)?; - for record in flatten_otel_logs(&logs) { + for record in flatten_otel_logs(&logs, tenant_str) { push_logs( stream_name, record, @@ -86,6 +89,7 @@ pub async fn flatten_and_push_logs( p_custom_fields, time_partition.clone(), telemetry_type, + tenant_id, ) .await?; } @@ -93,7 +97,7 @@ pub async fn flatten_and_push_logs( LogSource::OtelTraces => { //custom flattening required for otel traces let traces: TracesData = serde_json::from_value(json)?; - for record in flatten_otel_traces(&traces) { + for record in flatten_otel_traces(&traces, tenant_str) { push_logs( stream_name, record, @@ -101,6 +105,7 @@ pub async fn flatten_and_push_logs( p_custom_fields, time_partition.clone(), telemetry_type, + tenant_id, ) .await?; } @@ -108,7 +113,7 @@ pub async fn flatten_and_push_logs( LogSource::OtelMetrics => { //custom flattening required for otel metrics let metrics: MetricsData = serde_json::from_value(json)?; - for record in flatten_otel_metrics(metrics) { + for record in flatten_otel_metrics(metrics, tenant_str) { push_logs( stream_name, record, @@ -116,6 +121,7 @@ pub async fn flatten_and_push_logs( p_custom_fields, time_partition.clone(), telemetry_type, + tenant_id, ) .await?; } @@ -128,6 +134,7 @@ pub async fn flatten_and_push_logs( p_custom_fields, time_partition, telemetry_type, + tenant_id, ) .await? } @@ -143,10 +150,11 @@ pub async fn push_logs( p_custom_fields: &HashMap, time_partition: Option, telemetry_type: TelemetryType, + tenant_id: &Option, ) -> Result<(), PostError> { - let stream = PARSEABLE.get_stream(stream_name)?; + let stream = PARSEABLE.get_stream(stream_name, tenant_id)?; let time_partition_limit = PARSEABLE - .get_stream(stream_name)? + .get_stream(stream_name, tenant_id)? .get_time_partition_limit(); let static_schema_flag = stream.get_static_schema_flag(); let custom_partition = stream.get_custom_partition(); @@ -164,7 +172,9 @@ pub async fn push_logs( for json in data { let origin_size = serde_json::to_vec(&json).unwrap().len() as u64; // string length need not be the same as byte length - let schema = PARSEABLE.get_stream(stream_name)?.get_schema_raw(); + let schema = PARSEABLE + .get_stream(stream_name, tenant_id)? + .get_schema_raw(); json::Event { json, p_timestamp } .into_event( stream_name.to_owned(), @@ -177,6 +187,7 @@ pub async fn push_logs( StreamType::UserDefined, p_custom_fields, telemetry_type, + tenant_id, )? .process()?; } @@ -244,9 +255,12 @@ pub fn get_custom_fields_from_header(req: &HttpRequest) -> HashMap Result<(), PostError> { +fn verify_dataset_fields_count( + stream_name: &str, + tenant_id: &Option, +) -> Result<(), PostError> { let fields_count = PARSEABLE - .get_stream(stream_name)? + .get_stream(stream_name, tenant_id)? .get_schema() .fields() .len(); @@ -276,8 +290,11 @@ fn verify_dataset_fields_count(stream_name: &str) -> Result<(), PostError> { Ok(()) } -pub fn validate_stream_for_ingestion(stream_name: &str) -> Result<(), PostError> { - let stream = PARSEABLE.get_stream(stream_name)?; +pub fn validate_stream_for_ingestion( + stream_name: &str, + tenant_id: &Option, +) -> Result<(), PostError> { + let stream = PARSEABLE.get_stream(stream_name, tenant_id)?; // Validate that the stream's log source is compatible stream diff --git a/src/handlers/http/modal/utils/logstream_utils.rs b/src/handlers/http/modal/utils/logstream_utils.rs index 49177c873..ef4907bf2 100644 --- a/src/handlers/http/modal/utils/logstream_utils.rs +++ b/src/handlers/http/modal/utils/logstream_utils.rs @@ -16,8 +16,6 @@ * */ -use actix_web::http::header::HeaderMap; - use crate::{ event::format::LogSource, handlers::{ @@ -27,6 +25,7 @@ use crate::{ }, storage::StreamType, }; +use actix_web::http::header::HeaderMap; #[derive(Debug, Default)] pub struct PutStreamHeaders { diff --git a/src/handlers/http/modal/utils/rbac_utils.rs b/src/handlers/http/modal/utils/rbac_utils.rs index e1593c12f..38da0cf03 100644 --- a/src/handlers/http/modal/utils/rbac_utils.rs +++ b/src/handlers/http/modal/utils/rbac_utils.rs @@ -21,18 +21,23 @@ use crate::{ storage::{self, ObjectStorageError, StorageMetadata}, }; -pub async fn get_metadata() -> Result { +pub async fn get_metadata( + tenant_id: &Option, +) -> Result { let metadata = PARSEABLE .metastore - .get_parseable_metadata() + .get_parseable_metadata(tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? .ok_or_else(|| ObjectStorageError::Custom("parseable metadata not initialized".into()))?; Ok(serde_json::from_slice::(&metadata)?) } -pub async fn put_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { - storage::put_remote_metadata(metadata).await?; - storage::put_staging_metadata(metadata)?; +pub async fn put_metadata( + metadata: &StorageMetadata, + tenant_id: &Option, +) -> Result<(), ObjectStorageError> { + storage::put_remote_metadata(metadata, tenant_id).await?; + storage::put_staging_metadata(metadata, tenant_id)?; Ok(()) } diff --git a/src/handlers/http/oidc.rs b/src/handlers/http/oidc.rs index ee73fdf9e..f0c6d2222 100644 --- a/src/handlers/http/oidc.rs +++ b/src/handlers/http/oidc.rs @@ -22,34 +22,40 @@ use actix_web::http::StatusCode; use actix_web::{ HttpRequest, HttpResponse, cookie::{Cookie, SameSite, time}, - http::header::{self, ContentType}, + http::header::ContentType, web, }; use chrono::{Duration, TimeDelta}; +use http::header; use openid::{Bearer, Options, Token, Userinfo}; use regex::Regex; use serde::Deserialize; +use serde_json::json; use tokio::sync::RwLock; use ulid::Ulid; use url::Url; use crate::{ + INTRA_CLUSTER_CLIENT, handlers::{ COOKIE_AGE_DAYS, SESSION_COOKIE_NAME, USER_COOKIE_NAME, USER_ID_COOKIE_NAME, http::{ - API_BASE_PATH, API_VERSION, + API_BASE_PATH, API_VERSION, base_path_without_preceding_slash, + cluster::for_each_live_node, modal::{GlobalClient, OIDC_CLIENT}, }, }, oidc::{Claims, DiscoveredClient}, - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, rbac::{ self, EXPIRY_DURATION, Users, map::{DEFAULT_ROLE, SessionKey}, user::{self, GroupUser, User, UserType}, }, storage::{self, ObjectStorageError, StorageMetadata}, - utils::actix::extract_session_key_from_req, + utils::{ + actix::extract_session_key_from_req, get_tenant_id_from_key, get_tenant_id_from_request, + }, }; /// Struct representing query params returned from oidc provider @@ -73,7 +79,6 @@ pub async fn login( ) -> Result { let conn = req.connection_info().clone(); let base_url_without_scheme = format!("{}/", conn.host()); - if !is_valid_redirect_url(&base_url_without_scheme, query.redirect.as_str()) { return Err(OIDCError::BadRequest( "Bad Request, Invalid Redirect URL!".to_string(), @@ -94,16 +99,21 @@ pub async fn login( } (Some(session_key), client) => (session_key, client), }; + // if control flow is here then it is most likely basic auth // try authorize match Users.authorize(session_key.clone(), rbac::role::Action::Login, None, None) { rbac::Response::Authorized => (), - rbac::Response::UnAuthorized | rbac::Response::ReloadRequired => { + rbac::Response::UnAuthorized + | rbac::Response::ReloadRequired + | rbac::Response::Suspended(_) => { return Err(OIDCError::Unauthorized); } } + let tenant_id = get_tenant_id_from_key(&session_key); match session_key { // We can exchange basic auth for session cookie - SessionKey::BasicAuth { username, password } => match Users.get_user(&username) { + SessionKey::BasicAuth { username, password } => match Users.get_user(&username, &tenant_id) + { Some( ref user @ User { ty: UserType::Native(ref basic), @@ -117,6 +127,36 @@ pub async fn login( SessionKey::BasicAuth { username, password }, EXPIRY_DURATION, ); + let _session = session_cookie.value().to_owned(); + let _user = user.clone(); + let r = for_each_live_node(&tenant_id, move |node| { + let url = format!( + "{}{}/o/login/sync", + node.domain_name, + base_path_without_preceding_slash(), + ); + let _session = _session.clone(); + let _user = _user.clone(); + + async move { + INTRA_CLUSTER_CLIENT + .post(url) + .header(header::AUTHORIZATION, node.token) + .header(header::CONTENT_TYPE, "application/json") + .json(&json!( + { + "sessionCookie": _session, + "user": _user, + "expiry": EXPIRY_DURATION + } + )) + .send() + .await?; + Ok::<(), anyhow::Error>(()) + } + }) + .await; + tracing::warn!(login_sync=?r); Ok(redirect_to_client( query.redirect.as_str(), [user_cookie, user_id_cookie, session_cookie], @@ -151,6 +191,7 @@ pub async fn logout(req: HttpRequest, query: web::Query) -> let Some(session) = extract_session_key_from_req(&req).ok() else { return redirect_to_client(query.redirect.as_str(), None); }; + let tenant_id = get_tenant_id_from_key(&session); let user = Users.remove_session(&session); let logout_endpoint = if let Some(client) = oidc_client { client @@ -166,7 +207,7 @@ pub async fn logout(req: HttpRequest, query: web::Query) -> match (user, logout_endpoint) { (Some(username), Some(logout_endpoint)) - if Users.is_oauth(&username).unwrap_or_default() => + if Users.is_oauth(&username, &tenant_id).unwrap_or_default() => { redirect_to_oidc_logout(logout_endpoint, &query.redirect) } @@ -176,12 +217,16 @@ pub async fn logout(req: HttpRequest, query: web::Query) -> /// Handler for code callback /// User should be redirected to page they were trying to access with cookie -pub async fn reply_login(login_query: web::Query) -> Result { +pub async fn reply_login( + req: HttpRequest, + login_query: web::Query, +) -> Result { let oidc_client = if let Some(oidc_client) = OIDC_CLIENT.get() { oidc_client } else { return Err(OIDCError::Unauthorized); }; + let tenant_id = get_tenant_id_from_request(&req); let (mut claims, user_info, bearer) = match request_token(oidc_client, &login_query).await { Ok(v) => v, @@ -214,7 +259,7 @@ pub async fn reply_login(login_query: web::Query) -> Result) -> Result { // For existing users: keep existing roles + add new valid OIDC roles @@ -267,7 +322,8 @@ pub async fn reply_login(login_query: web::Query) -> Result update_user_if_changed(user, roles, user_info, bearer).await?, - (None, roles) => put_user(&user_id, roles, user_info, bearer).await?, + // LET TENANT BE NONE FOR NOW!!! + (None, roles) => put_user(&user_id, roles, user_info, bearer, None).await?, }; let id = Ulid::new(); @@ -288,23 +344,23 @@ pub async fn reply_login(login_query: web::Query) -> Result Option { +fn find_existing_user(user_info: &user::UserInfo, tenant_id: Option) -> Option { if let Some(sub) = &user_info.sub - && let Some(user) = Users.get_user(sub) + && let Some(user) = Users.get_user(sub, &tenant_id) && matches!(user.ty, UserType::OAuth(_)) { return Some(user); } if let Some(name) = &user_info.name - && let Some(user) = Users.get_user(name) + && let Some(user) = Users.get_user(name, &tenant_id) && matches!(user.ty, UserType::OAuth(_)) { return Some(user); } if let Some(email) = &user_info.email - && let Some(user) = Users.get_user(email) + && let Some(user) = Users.get_user(email, &tenant_id) && matches!(user.ty, UserType::OAuth(_)) { return Some(user); @@ -338,15 +394,18 @@ fn redirect_to_oidc( let mut url: String = auth_url.into(); url.push_str("&access_type=offline&prompt=consent"); HttpResponse::TemporaryRedirect() - .insert_header((header::LOCATION, url)) + .insert_header((actix_web::http::header::LOCATION, url)) .finish() } fn redirect_to_oidc_logout(mut logout_endpoint: Url, redirect: &Url) -> HttpResponse { logout_endpoint.set_query(Some(&format!("post_logout_redirect_uri={redirect}"))); HttpResponse::TemporaryRedirect() - .insert_header((header::CACHE_CONTROL, "no-store")) - .insert_header((header::LOCATION, logout_endpoint.to_string())) + .insert_header((actix_web::http::header::CACHE_CONTROL, "no-store")) + .insert_header(( + actix_web::http::header::LOCATION, + logout_endpoint.to_string(), + )) .finish() } @@ -355,20 +414,21 @@ pub fn redirect_to_client( cookies: impl IntoIterator>, ) -> HttpResponse { let mut response = HttpResponse::MovedPermanently(); - response.insert_header((header::LOCATION, url)); + response.insert_header((actix_web::http::header::LOCATION, url)); for cookie in cookies { response.cookie(cookie); } - response.insert_header((header::CACHE_CONTROL, "no-store")); + response.insert_header((actix_web::http::header::CACHE_CONTROL, "no-store")); - response.finish() + let res = response.finish(); + res } fn redirect_no_oauth_setup(mut url: Url) -> HttpResponse { url.set_path("oidc-not-configured"); let mut response = HttpResponse::MovedPermanently(); - response.insert_header((header::LOCATION, url.as_str())); - response.insert_header((header::CACHE_CONTROL, "no-store")); + response.insert_header((actix_web::http::header::LOCATION, url.as_str())); + response.insert_header((actix_web::http::header::CACHE_CONTROL, "no-store")); response.finish() } @@ -441,15 +501,16 @@ pub async fn request_token( Ok((claims, userinfo, bearer)) } -// put new user in metadata if does not exits +// put new user in metadata if does not exit // update local cache pub async fn put_user( userid: &str, group: HashSet, user_info: user::UserInfo, bearer: Bearer, + tenant: Option, ) -> Result { - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant).await?; let mut user = metadata .users @@ -457,12 +518,12 @@ pub async fn put_user( .find(|user| user.userid() == userid) .cloned() .unwrap_or_else(|| { - let user = User::new_oauth(userid.to_owned(), group, user_info, None); + let user = User::new_oauth(userid.to_owned(), group, user_info, None, tenant.clone()); metadata.users.push(user.clone()); user }); - put_metadata(&metadata).await?; + put_metadata(&metadata, &tenant).await?; // modify before storing if let user::UserType::OAuth(oauth) = &mut user.ty { @@ -509,7 +570,7 @@ pub async fn update_user_if_changed( oauth_user.userid.clone_from(sub); } - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&user.tenant).await?; // Find the user entry using the old username (before migration) if let Some(entry) = metadata @@ -524,8 +585,8 @@ pub async fn update_user_if_changed( group.users.insert(GroupUser::from_user(&user)); } } - put_metadata(&metadata).await?; - Users.delete_user(&old_username); + put_metadata(&metadata, &user.tenant).await?; + Users.delete_user(&old_username, &user.tenant); // update oauth bearer if let user::UserType::OAuth(oauth) = &mut user.ty { oauth.bearer = Some(bearer); @@ -534,19 +595,24 @@ pub async fn update_user_if_changed( Ok(user) } -async fn get_metadata() -> Result { +async fn get_metadata( + tenant_id: &Option, +) -> Result { let metadata = PARSEABLE .metastore - .get_parseable_metadata() + .get_parseable_metadata(tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? .ok_or_else(|| ObjectStorageError::Custom("parseable metadata not initialized".into()))?; Ok(serde_json::from_slice::(&metadata)?) } -async fn put_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { - storage::put_remote_metadata(metadata).await?; - storage::put_staging_metadata(metadata)?; +async fn put_metadata( + metadata: &StorageMetadata, + tenant_id: &Option, +) -> Result<(), ObjectStorageError> { + storage::put_remote_metadata(metadata, tenant_id).await?; + storage::put_staging_metadata(metadata, tenant_id)?; Ok(()) } diff --git a/src/handlers/http/prism_home.rs b/src/handlers/http/prism_home.rs index 17b193d9c..fa1b9964c 100644 --- a/src/handlers/http/prism_home.rs +++ b/src/handlers/http/prism_home.rs @@ -40,7 +40,6 @@ pub async fn home_api(req: HttpRequest) -> Result) -> Result { - let prism_logstream_info = get_prism_logstream_info(&stream_name).await?; +pub async fn get_info( + req: HttpRequest, + stream_name: Path, +) -> Result { + let prism_logstream_info = + get_prism_logstream_info(&stream_name, &get_tenant_id_from_request(&req)).await?; Ok(web::Json(prism_logstream_info)) } diff --git a/src/handlers/http/query.rs b/src/handlers/http/query.rs index 91bfae1dd..c3c8d55dc 100644 --- a/src/handlers/http/query.rs +++ b/src/handlers/http/query.rs @@ -47,16 +47,16 @@ use tracing::{error, warn}; use crate::event::{DEFAULT_TIMESTAMP_KEY, commit_schema}; use crate::metrics::{QUERY_EXECUTE_TIME, increment_query_calls_by_date}; -use crate::parseable::{PARSEABLE, StreamNotFound}; +use crate::parseable::{DEFAULT_TENANT, PARSEABLE, StreamNotFound}; use crate::query::error::ExecuteError; -use crate::query::{CountsRequest, Query as LogicalQuery, execute}; -use crate::query::{QUERY_SESSION, resolve_stream_names}; +use crate::query::resolve_stream_names; +use crate::query::{CountsRequest, QUERY_SESSION, Query as LogicalQuery, execute}; use crate::rbac::Users; use crate::response::QueryResponse; use crate::storage::ObjectStorageError; use crate::utils::actix::extract_session_key_from_req; use crate::utils::time::{TimeParseError, TimeRange}; -use crate::utils::user_auth_for_datasets; +use crate::utils::{get_tenant_id_from_request, user_auth_for_datasets}; pub const TIME_ELAPSED_HEADER: &str = "p-time-elapsed"; /// Query Request through http endpoint. @@ -82,21 +82,22 @@ pub struct Query { pub async fn get_records_and_fields( query_request: &Query, creds: &SessionKey, + tenant_id: &Option, ) -> Result<(Option>, Option>), QueryError> { - let session_state = QUERY_SESSION.state(); + let session_state = QUERY_SESSION.get_ctx().state(); let time_range = TimeRange::parse_human_time(&query_request.start_time, &query_request.end_time)?; let tables = resolve_stream_names(&query_request.query)?; //check or load streams in memory - create_streams_for_distributed(tables.clone()).await?; + create_streams_for_distributed(tables.clone(), tenant_id).await?; let query: LogicalQuery = into_query(query_request, &session_state, time_range).await?; let permissions = Users.get_permissions(creds); - user_auth_for_datasets(&permissions, &tables).await?; + user_auth_for_datasets(&permissions, &tables, tenant_id).await?; - let (records, fields) = execute(query, false).await?; + let (records, fields) = execute(query, false, tenant_id).await?; let records = match records { Either::Left(vec_rb) => vec_rb, @@ -109,23 +110,33 @@ pub async fn get_records_and_fields( } pub async fn query(req: HttpRequest, query_request: Query) -> Result { - let session_state = QUERY_SESSION.state(); + let mut session_state = QUERY_SESSION.get_ctx().state(); let time_range = TimeRange::parse_human_time(&query_request.start_time, &query_request.end_time)?; let tables = resolve_stream_names(&query_request.query)?; - //check or load streams in memory - create_streams_for_distributed(tables.clone()).await?; + // check or load streams in memory + create_streams_for_distributed(tables.clone(), &get_tenant_id_from_request(&req)).await?; + + let tenant_id = get_tenant_id_from_request(&req); + session_state + .config_mut() + .options_mut() + .catalog + .default_schema = tenant_id.as_ref().map_or("public".into(), |v| v.to_owned()); let query: LogicalQuery = into_query(&query_request, &session_state, time_range).await?; let creds = extract_session_key_from_req(&req)?; let permissions = Users.get_permissions(&creds); - user_auth_for_datasets(&permissions, &tables).await?; + user_auth_for_datasets(&permissions, &tables, &tenant_id).await?; let time = Instant::now(); // Track billing metrics for query calls let current_date = chrono::Utc::now().date_naive().to_string(); - increment_query_calls_by_date(¤t_date); + increment_query_calls_by_date( + ¤t_date, + tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ); // if the query is `select count(*) from ` // we use the `get_bin_density` method to get the count of records in the dataset @@ -134,18 +145,18 @@ pub async fn query(req: HttpRequest, query_request: Query) -> Result`) @@ -167,6 +178,7 @@ async fn handle_count_query( table_name: &str, column_name: &str, time: Instant, + tenant_id: &Option, ) -> Result { let counts_req = CountsRequest { stream: table_name.to_string(), @@ -175,7 +187,7 @@ async fn handle_count_query( num_bins: Some(1), conditions: None, }; - let count_records = counts_req.get_bin_density().await?; + let count_records = counts_req.get_bin_density(tenant_id).await?; let count = count_records[0].count; let response = if query_request.fields { json!({ @@ -190,7 +202,7 @@ async fn handle_count_query( let time = time.elapsed().as_secs_f64(); QUERY_EXECUTE_TIME - .with_label_values(&[table_name]) + .with_label_values(&[table_name, tenant_id.as_deref().unwrap_or(DEFAULT_TENANT)]) .observe(time); Ok(HttpResponse::Ok() @@ -217,9 +229,10 @@ async fn handle_non_streaming_query( table_name: Vec, query_request: &Query, time: Instant, + tenant_id: &Option, ) -> Result { let first_table_name = table_name[0].clone(); - let (records, fields) = execute(query, query_request.streaming).await?; + let (records, fields) = execute(query, query_request.streaming, tenant_id).await?; let records = match records { Either::Left(rbs) => rbs, Either::Right(_) => { @@ -232,7 +245,10 @@ async fn handle_non_streaming_query( let time = time.elapsed().as_secs_f64(); QUERY_EXECUTE_TIME - .with_label_values(&[&first_table_name]) + .with_label_values(&[ + &first_table_name, + tenant_id.as_deref().unwrap_or(DEFAULT_TENANT), + ]) .observe(time); let response = QueryResponse { records, @@ -266,9 +282,10 @@ async fn handle_streaming_query( table_name: Vec, query_request: &Query, time: Instant, + tenant_id: &Option, ) -> Result { let first_table_name = table_name[0].clone(); - let (records_stream, fields) = execute(query, query_request.streaming).await?; + let (records_stream, fields) = execute(query, query_request.streaming, tenant_id).await?; let records_stream = match records_stream { Either::Left(_) => { return Err(QueryError::MalformedQuery( @@ -280,7 +297,10 @@ async fn handle_streaming_query( let total_time = format!("{:?}", time.elapsed()); let time = time.elapsed().as_secs_f64(); QUERY_EXECUTE_TIME - .with_label_values(&[&first_table_name]) + .with_label_values(&[ + &first_table_name, + tenant_id.as_deref().unwrap_or(DEFAULT_TENANT), + ]) .observe(time); let send_null = query_request.send_null; @@ -347,19 +367,22 @@ pub async fn get_counts( ) -> Result { let creds = extract_session_key_from_req(&req)?; let permissions = Users.get_permissions(&creds); - + let tenant_id = get_tenant_id_from_request(&req); let body = counts_request.into_inner(); // does user have access to table? - user_auth_for_datasets(&permissions, std::slice::from_ref(&body.stream)).await?; + user_auth_for_datasets(&permissions, std::slice::from_ref(&body.stream), &tenant_id).await?; // Track billing metrics for query calls let current_date = chrono::Utc::now().date_naive().to_string(); - increment_query_calls_by_date(¤t_date); + increment_query_calls_by_date( + ¤t_date, + tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ); // if the user has given a sql query (counts call with filters applied), then use this flow // this could include filters or group by if body.conditions.is_some() { let time_partition = PARSEABLE - .get_stream(&body.stream)? + .get_stream(&body.stream, &tenant_id)? .get_time_partition() .unwrap_or_else(|| DEFAULT_TIMESTAMP_KEY.into()); @@ -377,7 +400,7 @@ pub async fn get_counts( let creds = extract_session_key_from_req(&req)?; - let (records, _) = get_records_and_fields(&query_request, &creds).await?; + let (records, _) = get_records_and_fields(&query_request, &creds, &tenant_id).await?; if let Some(records) = records { let json_records = record_batches_to_json(&records)?; @@ -396,7 +419,7 @@ pub async fn get_counts( } } - let records = body.get_bin_density().await?; + let records = body.get_bin_density(&tenant_id).await?; let res = json!({ "fields": vec!["start_time", "endTime", "count"], "records": records, @@ -404,14 +427,17 @@ pub async fn get_counts( Ok(web::Json(res)) } -pub async fn update_schema_when_distributed(tables: &Vec) -> Result<(), EventError> { +pub async fn update_schema_when_distributed( + tables: &Vec, + tenant_id: &Option, +) -> Result<(), EventError> { // if the mode is query or prism, we need to update the schema in memory // no need to commit schema to storage // as the schema is read from memory everytime if PARSEABLE.options.mode == Mode::Query || PARSEABLE.options.mode == Mode::Prism { for table in tables { - if let Ok(new_schema) = fetch_schema(table).await { - commit_schema(table, Arc::new(new_schema))?; + if let Ok(new_schema) = fetch_schema(table, tenant_id).await { + commit_schema(table, Arc::new(new_schema), tenant_id)?; } } } @@ -421,15 +447,19 @@ pub async fn update_schema_when_distributed(tables: &Vec) -> Result<(), /// Create streams for querier if they do not exist /// get list of streams from memory and storage /// create streams for memory from storage if they do not exist -pub async fn create_streams_for_distributed(streams: Vec) -> Result<(), QueryError> { +pub async fn create_streams_for_distributed( + streams: Vec, + tenant_id: &Option, +) -> Result<(), QueryError> { if PARSEABLE.options.mode != Mode::Query && PARSEABLE.options.mode != Mode::Prism { return Ok(()); } let mut join_set = JoinSet::new(); for stream_name in streams { + let id = tenant_id.to_owned(); join_set.spawn(async move { let result = PARSEABLE - .create_stream_and_schema_from_storage(&stream_name) + .create_stream_and_schema_from_storage(&stream_name, &id) .await; if let Err(e) = &result { diff --git a/src/handlers/http/rbac.rs b/src/handlers/http/rbac.rs index 684ae4fa2..f351c34de 100644 --- a/src/handlers/http/rbac.rs +++ b/src/handlers/http/rbac.rs @@ -19,6 +19,7 @@ use std::collections::{HashMap, HashSet}; use crate::{ + parseable::DEFAULT_TENANT, rbac::{ self, Users, map::{read_user_groups, roles, users}, @@ -27,11 +28,12 @@ use crate::{ utils::to_prism_user, }, storage::ObjectStorageError, + utils::get_tenant_id_from_request, validator::{self, error::UsernameValidationError}, }; use actix_web::http::StatusCode; use actix_web::{ - HttpResponse, Responder, + HttpRequest, HttpResponse, Responder, http::header::ContentType, web::{self, Path}, }; @@ -67,25 +69,36 @@ impl From<&user::User> for User { // Handler for GET /api/v1/user // returns list of all registered users -pub async fn list_users() -> impl Responder { - web::Json(Users.collect_user::()) +pub async fn list_users(req: HttpRequest) -> impl Responder { + let tenant_id = get_tenant_id_from_request(&req); + web::Json(Users.collect_user::(&tenant_id)) } /// Handler for GET /api/v1/users /// returns list of all registered users along with their roles and other info -pub async fn list_users_prism() -> impl Responder { +pub async fn list_users_prism(req: HttpRequest) -> impl Responder { + let tenant_id = get_tenant_id_from_request(&req); + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); // get all users - let prism_users = rbac::map::users().values().map(to_prism_user).collect_vec(); - + let prism_users = match rbac::map::users().get(tenant_id) { + Some(users) => users.values().map(to_prism_user).collect_vec(), + None => vec![], + }; web::Json(prism_users) } /// Function for GET /users/{username} -pub async fn get_prism_user(username: Path) -> Result { +pub async fn get_prism_user( + req: HttpRequest, + username: Path, +) -> Result { let username = username.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); // First check if the user exists let users = rbac::map::users(); - if let Some(user) = users.get(&username) { + if let Some(users) = users.get(tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v)) + && let Some(user) = users.get(&username) + { // Create UsersPrism for the found user only let prism_user = to_prism_user(user); Ok(web::Json(prism_user)) @@ -97,12 +110,14 @@ pub async fn get_prism_user(username: Path) -> Result, body: Option>, ) -> Result { let username = username.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); validator::user_role_name(&username)?; - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; let user_roles: HashSet = if let Some(body) = body { serde_json::from_value(body.into_inner())? @@ -120,7 +135,7 @@ pub async fn post_user( return Err(RBACError::RolesDoNotExist(non_existent_roles)); } let _guard = UPDATE_LOCK.lock().await; - if Users.contains(&username) + if Users.contains(&username, &tenant_id) || metadata.users.iter().any(|user| match &user.ty { UserType::Native(basic) => basic.username == username, UserType::OAuth(_) => false, // OAuth users should be created differently @@ -129,15 +144,17 @@ pub async fn post_user( return Err(RBACError::UserExists(username)); } - let (user, password) = user::User::new_basic(username.clone()); + // LET TENANT BE NONE FOR NOW!!! + let (user, password) = user::User::new_basic(username.clone(), tenant_id.clone()); metadata.users.push(user.clone()); - put_metadata(&metadata).await?; + put_metadata(&metadata, &tenant_id).await?; let created_role = user_roles.clone(); Users.put_user(user.clone()); if !created_role.is_empty() { add_roles_to_user( + req, web::Path::::from(username.clone()), web::Json(created_role), ) @@ -149,11 +166,15 @@ pub async fn post_user( // Handler for POST /api/v1/user/{username}/generate-new-password // Resets password for the user to a newly generated one and returns it -pub async fn post_gen_password(username: web::Path) -> Result { +pub async fn post_gen_password( + req: HttpRequest, + username: web::Path, +) -> Result { let username = username.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); let mut new_password = String::default(); let mut new_hash = String::default(); - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; let _guard = UPDATE_LOCK.lock().await; let user::PassCode { password, hash } = user::Basic::gen_new_password(); @@ -172,40 +193,58 @@ pub async fn post_gen_password(username: web::Path) -> Result) -> Result { +pub async fn get_role( + req: HttpRequest, + userid: web::Path, +) -> Result { let userid = userid.into_inner(); - if !Users.contains(&userid) { + let tenant_id = get_tenant_id_from_request(&req); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if !Users.contains(&userid, &tenant_id) { return Err(RBACError::UserDoesNotExist); }; let direct_roles: HashMap> = Users - .get_role(&userid) + .get_role(&userid, &tenant_id) .iter() .filter_map(|role_name| { - roles() - .get(role_name) - .map(|role| (role_name.to_owned(), role.clone())) + if let Some(roles) = roles().get(tenant) + && let Some(role) = roles.get(role_name) + { + Some((role_name.to_owned(), role.clone())) + } else { + None + } + // roles() + // .get(role_name) + // .map(|role| (role_name.to_owned(), role.clone())) }) .collect(); let mut group_roles: HashMap>> = HashMap::new(); // user might be part of some user groups, fetch the roles from there as well - for user_group in Users.get_user_groups(&userid) { - if let Some(group) = read_user_groups().get(&user_group) { + for user_group in Users.get_user_groups(&userid, &tenant_id) { + if let Some(groups) = read_user_groups().get(tenant) + && let Some(group) = groups.get(&user_group) + { let ug_roles: HashMap> = group .roles .iter() .filter_map(|role_name| { - roles() - .get(role_name) - .map(|role| (role_name.to_owned(), role.clone())) + if let Some(roles) = roles().get(tenant) + && let Some(role) = roles.get(role_name) + { + Some((role_name.to_owned(), role.clone())) + } else { + None + } }) .collect(); group_roles.insert(group.name.clone(), ug_roles); @@ -219,51 +258,62 @@ pub async fn get_role(userid: web::Path) -> Result) -> Result { +pub async fn delete_user( + req: HttpRequest, + userid: web::Path, +) -> Result { let userid = userid.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); let _guard = UPDATE_LOCK.lock().await; // if user is a part of any groups then don't allow deletion - if !Users.get_user_groups(&userid).is_empty() { + if !Users.get_user_groups(&userid, &tenant_id).is_empty() { return Err(RBACError::InvalidDeletionRequest(format!( "User: {userid} should not be a part of any groups" ))); } // fail this request if the user does not exists - if !Users.contains(&userid) { + if !Users.contains(&userid, &tenant_id) { return Err(RBACError::UserDoesNotExist); }; // find username by userid, for native users, username is userid, for oauth users, we need to look up - let username = if let Some(user) = users().get(&userid) { + let username = if let Some(users) = + users().get(tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v)) + && let Some(user) = users.get(&userid) + { user.username_by_userid() } else { return Err(RBACError::UserDoesNotExist); }; // delete from parseable.json first - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; metadata.users.retain(|user| user.userid() != userid); - put_metadata(&metadata).await?; + put_metadata(&metadata, &tenant_id).await?; // update in mem table - Users.delete_user(&userid); + Users.delete_user(&userid, &tenant_id); Ok(HttpResponse::Ok().json(format!("deleted user: {username}"))) } // Handler PATCH /user/{userid}/role/add => Add roles to a user pub async fn add_roles_to_user( + req: HttpRequest, userid: web::Path, roles_to_add: web::Json>, ) -> Result { let userid = userid.into_inner(); let roles_to_add = roles_to_add.into_inner(); - - if !Users.contains(&userid) { + let tenant_id = get_tenant_id_from_request(&req); + if !Users.contains(&userid, &tenant_id) { return Err(RBACError::UserDoesNotExist); }; // find username by userid, for native users, username is userid, for oauth users, we need to look up - let username = if let Some(user) = users().get(&userid) { + let username = if let Some(users) = + users().get(tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v)) + && let Some(user) = users.get(&userid) + { user.username_by_userid() } else { return Err(RBACError::UserDoesNotExist); @@ -283,7 +333,7 @@ pub async fn add_roles_to_user( } // update parseable.json first - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; if let Some(user) = metadata .users .iter_mut() @@ -295,27 +345,31 @@ pub async fn add_roles_to_user( return Err(RBACError::UserDoesNotExist); } - put_metadata(&metadata).await?; + put_metadata(&metadata, &tenant_id).await?; // update in mem table - Users.add_roles(&userid.clone(), roles_to_add); + Users.add_roles(&userid.clone(), roles_to_add, &tenant_id); Ok(HttpResponse::Ok().json(format!("Roles updated successfully for {username}"))) } // Handler PATCH /user/{userid}/role/remove => Remove roles from a user pub async fn remove_roles_from_user( + req: HttpRequest, userid: web::Path, roles_to_remove: web::Json>, ) -> Result { let userid = userid.into_inner(); let roles_to_remove = roles_to_remove.into_inner(); - - if !Users.contains(&userid) { + let tenant_id = get_tenant_id_from_request(&req); + if !Users.contains(&userid, &tenant_id) { return Err(RBACError::UserDoesNotExist); }; // find username by userid, for native users, username is userid, for oauth users, we need to look up - let username = if let Some(user) = users().get(&userid) { + let username = if let Some(users) = + users().get(tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v)) + && let Some(user) = users.get(&userid) + { user.username_by_userid() } else { return Err(RBACError::UserDoesNotExist); @@ -335,7 +389,7 @@ pub async fn remove_roles_from_user( } // check for role not present with user - let user_roles: HashSet = HashSet::from_iter(Users.get_role(&userid)); + let user_roles: HashSet = HashSet::from_iter(Users.get_role(&userid, &tenant_id)); let roles_not_with_user: HashSet = HashSet::from_iter(roles_to_remove.difference(&user_roles).cloned()); if !roles_not_with_user.is_empty() { @@ -345,7 +399,7 @@ pub async fn remove_roles_from_user( } // update parseable.json first - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; if let Some(user) = metadata .users .iter_mut() @@ -359,9 +413,9 @@ pub async fn remove_roles_from_user( return Err(RBACError::UserDoesNotExist); } - put_metadata(&metadata).await?; + put_metadata(&metadata, &tenant_id).await?; // update in mem table - Users.remove_roles(&userid.clone(), roles_to_remove); + Users.remove_roles(&userid.clone(), roles_to_remove, &tenant_id); Ok(HttpResponse::Ok().json(format!("Roles updated successfully for {username}"))) } diff --git a/src/handlers/http/role.rs b/src/handlers/http/role.rs index 3a228f667..8d78d61f6 100644 --- a/src/handlers/http/role.rs +++ b/src/handlers/http/role.rs @@ -20,54 +20,69 @@ use std::collections::HashSet; use actix_web::http::StatusCode; use actix_web::{ - HttpResponse, Responder, + HttpRequest, HttpResponse, Responder, http::header::ContentType, web::{self, Json}, }; use crate::{ - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, rbac::{ map::{DEFAULT_ROLE, mut_roles, mut_sessions, read_user_groups, users}, role::model::DefaultPrivilege, }, storage::{self, ObjectStorageError, StorageMetadata}, + utils::get_tenant_id_from_request, validator::{self, error::UsernameValidationError}, }; // Handler for PUT /api/v1/role/{name} // Creates a new role or update existing one pub async fn put( + req: HttpRequest, name: web::Path, Json(privileges): Json>, ) -> Result { let name = name.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); // validate the role name validator::user_role_name(&name).map_err(RoleError::ValidationError)?; - let mut metadata = get_metadata().await?; + + let mut metadata = get_metadata(&tenant_id).await?; metadata.roles.insert(name.clone(), privileges.clone()); - put_metadata(&metadata).await?; - mut_roles().insert(name.clone(), privileges.clone()); + put_metadata(&metadata, &tenant_id).await?; + + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + mut_roles() + .entry(tenant_id.to_owned()) + .or_default() + .insert(name.clone(), privileges.clone()); + // mut_roles().insert(name.clone(), privileges.clone()); // refresh the sessions of all users using this role // for this, iterate over all user_groups and users and create a hashset of users let mut session_refresh_users: HashSet = HashSet::new(); - for user_group in read_user_groups().values() { - if user_group.roles.contains(&name) { - session_refresh_users.extend(user_group.users.iter().map(|u| u.userid().to_string())); + if let Some(groups) = read_user_groups().get(tenant_id) { + for user_group in groups.values() { + if user_group.roles.contains(&name) { + session_refresh_users + .extend(user_group.users.iter().map(|u| u.userid().to_string())); + } } } // iterate over all users to see if they have this role - for user in users().values() { - if user.roles.contains(&name) { - session_refresh_users.insert(user.userid().to_string()); + if let Some(users) = users().get(tenant_id) { + for user in users.values() { + if user.roles.contains(&name) { + session_refresh_users.insert(user.userid().to_string()); + } } } for userid in session_refresh_users { - mut_sessions().remove_user(&userid); + mut_sessions().remove_user(&userid, tenant_id); } Ok(HttpResponse::Ok().finish()) @@ -75,35 +90,42 @@ pub async fn put( // Handler for GET /api/v1/role/{name} // Fetch role by name -pub async fn get(name: web::Path) -> Result { +pub async fn get(req: HttpRequest, name: web::Path) -> Result { let name = name.into_inner(); - let metadata = get_metadata().await?; + let tenant_id = get_tenant_id_from_request(&req); + let metadata = get_metadata(&tenant_id).await?; let privileges = metadata.roles.get(&name).cloned().unwrap_or_default(); Ok(web::Json(privileges)) } // Handler for GET /api/v1/role // Fetch all roles in the system -pub async fn list() -> Result { - let metadata = get_metadata().await?; +pub async fn list(req: HttpRequest) -> Result { + let tenant_id = get_tenant_id_from_request(&req); + let metadata = get_metadata(&tenant_id).await?; let roles: Vec = metadata.roles.keys().cloned().collect(); Ok(web::Json(roles)) } // Handler for GET /api/v1/roles // Fetch all roles in the system -pub async fn list_roles() -> Result { - let metadata = get_metadata().await?; +pub async fn list_roles(req: HttpRequest) -> Result { + let tenant_id = get_tenant_id_from_request(&req); + let metadata = get_metadata(&tenant_id).await?; let roles = metadata.roles.clone(); Ok(web::Json(roles)) } // Handler for DELETE /api/v1/role/{name} // Delete existing role -pub async fn delete(name: web::Path) -> Result { +pub async fn delete( + req: HttpRequest, + name: web::Path, +) -> Result { let name = name.into_inner(); + let tenant_id = get_tenant_id_from_request(&req); // check if the role is being used by any user or group - let mut metadata = get_metadata().await?; + let mut metadata = get_metadata(&tenant_id).await?; if metadata.users.iter().any(|user| user.roles.contains(&name)) { return Err(RoleError::RoleInUse); } @@ -115,47 +137,79 @@ pub async fn delete(name: web::Path) -> Result) -> Result { +pub async fn put_default( + req: HttpRequest, + name: web::Json, +) -> Result { let name = name.into_inner(); - let mut metadata = get_metadata().await?; + let tenant_id = get_tenant_id_from_request(&req); + let mut metadata = get_metadata(&tenant_id).await?; metadata.default_role = Some(name.clone()); - *DEFAULT_ROLE.lock().unwrap() = Some(name); - put_metadata(&metadata).await?; + DEFAULT_ROLE.write().unwrap().insert( + tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v).to_owned(), + Some(name), + ); + // *DEFAULT_ROLE.lock().unwrap() = Some(name); + put_metadata(&metadata, &tenant_id).await?; Ok(HttpResponse::Ok().finish()) } // Handler for GET /api/v1/role/default // Delete existing role -pub async fn get_default() -> Result { - let res = match DEFAULT_ROLE.lock().unwrap().clone() { - Some(role) => serde_json::Value::String(role), - None => serde_json::Value::Null, +pub async fn get_default(req: HttpRequest) -> Result { + let tenant_id = get_tenant_id_from_request(&req); + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let res = if let Some(role) = DEFAULT_ROLE.read().unwrap().get(tenant_id) + && let Some(role) = role + { + serde_json::Value::String(role.to_string()) + } else { + serde_json::Value::Null }; + // let res = match DEFAULT_ROLE + // .read() + // .unwrap() + // .get() + // { + // Some(role) => serde_json::Value::String(role), + // None => serde_json::Value::Null, + // }; Ok(web::Json(res)) } -async fn get_metadata() -> Result { +async fn get_metadata( + tenant_id: &Option, +) -> Result { let metadata = PARSEABLE .metastore - .get_parseable_metadata() + .get_parseable_metadata(tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? .ok_or_else(|| ObjectStorageError::Custom("parseable metadata not initialized".into()))?; Ok(serde_json::from_slice::(&metadata)?) } -async fn put_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { - storage::put_remote_metadata(metadata).await?; - storage::put_staging_metadata(metadata)?; +async fn put_metadata( + metadata: &StorageMetadata, + tenant_id: &Option, +) -> Result<(), ObjectStorageError> { + storage::put_remote_metadata(metadata, tenant_id).await?; + storage::put_staging_metadata(metadata, tenant_id)?; Ok(()) } diff --git a/src/handlers/http/targets.rs b/src/handlers/http/targets.rs index 410d6ccc1..988489d7d 100644 --- a/src/handlers/http/targets.rs +++ b/src/handlers/http/targets.rs @@ -23,9 +23,12 @@ use actix_web::{ use itertools::Itertools; use ulid::Ulid; -use crate::alerts::{ - AlertError, - target::{TARGETS, Target}, +use crate::{ + alerts::{ + AlertError, + target::{TARGETS, Target}, + }, + utils::get_tenant_id_from_request, }; // POST /targets @@ -42,10 +45,11 @@ pub async fn post( } // GET /targets -pub async fn list(_req: HttpRequest) -> Result { +pub async fn list(req: HttpRequest) -> Result { + let tenant_id = get_tenant_id_from_request(&req); // add to the map let list = TARGETS - .list() + .list(&tenant_id) .await? .into_iter() // .map(|t| t.mask()) @@ -55,10 +59,10 @@ pub async fn list(_req: HttpRequest) -> Result { } // GET /targets/{target_id} -pub async fn get(_req: HttpRequest, target_id: Path) -> Result { +pub async fn get(req: HttpRequest, target_id: Path) -> Result { let target_id = target_id.into_inner(); - - let target = TARGETS.get_target_by_id(&target_id).await?; + let tenant_id = get_tenant_id_from_request(&req); + let target = TARGETS.get_target_by_id(&target_id, &tenant_id).await?; // Ok(web::Json(target.mask())) Ok(web::Json(target)) @@ -66,14 +70,14 @@ pub async fn get(_req: HttpRequest, target_id: Path) -> Result, Json(mut target): Json, ) -> Result { let target_id = target_id.into_inner(); - + let tenant_id = get_tenant_id_from_request(&req); // if target_id does not exist, error - let old_target = TARGETS.get_target_by_id(&target_id).await?; + let old_target = TARGETS.get_target_by_id(&target_id, &tenant_id).await?; // do not allow modifying name if old_target.name != target.name { @@ -94,13 +98,10 @@ pub async fn update( } // DELETE /targets/{target_id} -pub async fn delete( - _req: HttpRequest, - target_id: Path, -) -> Result { +pub async fn delete(req: HttpRequest, target_id: Path) -> Result { let target_id = target_id.into_inner(); - - let target = TARGETS.delete(&target_id).await?; + let tenant_id = get_tenant_id_from_request(&req); + let target = TARGETS.delete(&target_id, &tenant_id).await?; // Ok(web::Json(target.mask())) Ok(web::Json(target)) diff --git a/src/handlers/http/users/dashboards.rs b/src/handlers/http/users/dashboards.rs index b1184e812..2c8c56c48 100644 --- a/src/handlers/http/users/dashboards.rs +++ b/src/handlers/http/users/dashboards.rs @@ -23,7 +23,7 @@ use crate::{ metastore::MetastoreError, storage::ObjectStorageError, users::dashboards::{DASHBOARDS, Dashboard, Tile, validate_dashboard_id}, - utils::{get_hash, get_user_from_request, is_admin}, + utils::{get_hash, get_tenant_id_from_request, get_user_and_tenant_from_request, is_admin}, }; use actix_web::http::StatusCode; use actix_web::{ @@ -34,6 +34,7 @@ use actix_web::{ use serde_json::Error as SerdeError; pub async fn list_dashboards(req: HttpRequest) -> Result { + let tenant_id = get_tenant_id_from_request(&req); let query_map = web::Query::>::from_query(req.query_string()) .map_err(|_| DashboardError::InvalidQueryParameter)?; let mut dashboard_limit = 0; @@ -55,7 +56,7 @@ pub async fn list_dashboards(req: HttpRequest) -> Result Result Result) -> Result { +pub async fn get_dashboard( + req: HttpRequest, + dashboard_id: Path, +) -> Result { let dashboard_id = validate_dashboard_id(dashboard_id.into_inner())?; - + let tenant_id = get_tenant_id_from_request(&req); let dashboard = DASHBOARDS - .get_dashboard(dashboard_id) + .get_dashboard(dashboard_id, &tenant_id) .await .ok_or_else(|| DashboardError::Metadata("Dashboard does not exist"))?; @@ -90,10 +96,12 @@ pub async fn create_dashboard( if dashboard.title.is_empty() { return Err(DashboardError::Metadata("Title must be provided")); } + let (user_id, tenant_id) = get_user_and_tenant_from_request(&req)?; + let user_id = get_hash(&user_id); - let user_id = get_hash(&get_user_from_request(&req)?); - - DASHBOARDS.create(&user_id, &mut dashboard).await?; + DASHBOARDS + .create(&user_id, &mut dashboard, &tenant_id) + .await?; Ok((web::Json(dashboard), StatusCode::OK)) } @@ -102,12 +110,12 @@ pub async fn update_dashboard( dashboard_id: Path, dashboard: Option>, ) -> Result { - let user_id = get_hash(&get_user_from_request(&req)?); + let (user_id, tenant_id) = get_user_and_tenant_from_request(&req)?; + let user_id = get_hash(&user_id); let dashboard_id = validate_dashboard_id(dashboard_id.into_inner())?; let is_admin = is_admin(&req).map_err(|e| DashboardError::Custom(e.to_string()))?; - let mut existing_dashboard = DASHBOARDS - .get_dashboard_by_user(dashboard_id, &user_id, is_admin) + .get_dashboard_by_user(dashboard_id, &user_id, is_admin, &tenant_id) .await .ok_or(DashboardError::Metadata( "Dashboard does not exist or user is not authorized", @@ -180,7 +188,7 @@ pub async fn update_dashboard( }; DASHBOARDS - .update(&user_id, dashboard_id, &mut final_dashboard) + .update(&user_id, dashboard_id, &mut final_dashboard, &tenant_id) .await?; Ok((web::Json(final_dashboard), StatusCode::OK)) @@ -190,13 +198,14 @@ pub async fn delete_dashboard( req: HttpRequest, dashboard_id: Path, ) -> Result { - let user_id = get_hash(&get_user_from_request(&req)?); + let (user_id, tenant_id) = get_user_and_tenant_from_request(&req)?; + let user_id = get_hash(&user_id); let is_admin = is_admin(&req).map_err(|e| DashboardError::Custom(e.to_string()))?; let dashboard_id = validate_dashboard_id(dashboard_id.into_inner())?; DASHBOARDS - .delete_dashboard(&user_id, dashboard_id, is_admin) + .delete_dashboard(&user_id, dashboard_id, is_admin, &tenant_id) .await?; Ok(HttpResponse::Ok().finish()) @@ -211,12 +220,13 @@ pub async fn add_tile( return Err(DashboardError::Metadata("Tile ID must be provided")); } - let user_id = get_hash(&get_user_from_request(&req)?); + let (user_id, tenant_id) = get_user_and_tenant_from_request(&req)?; + let user_id = get_hash(&user_id); let dashboard_id = validate_dashboard_id(dashboard_id.into_inner())?; let is_admin = is_admin(&req).map_err(|e| DashboardError::Custom(e.to_string()))?; let mut dashboard = DASHBOARDS - .get_dashboard_by_user(dashboard_id, &user_id, is_admin) + .get_dashboard_by_user(dashboard_id, &user_id, is_admin, &tenant_id) .await .ok_or(DashboardError::Unauthorized)?; @@ -229,14 +239,16 @@ pub async fn add_tile( tiles.push(tile); DASHBOARDS - .update(&user_id, dashboard_id, &mut dashboard) + .update(&user_id, dashboard_id, &mut dashboard, &tenant_id) .await?; Ok((web::Json(dashboard), StatusCode::OK)) } -pub async fn list_tags() -> Result { - let tags = DASHBOARDS.list_tags().await; +pub async fn list_tags(req: HttpRequest) -> Result { + let tags = DASHBOARDS + .list_tags(&get_tenant_id_from_request(&req)) + .await; Ok((web::Json(tags), StatusCode::OK)) } diff --git a/src/handlers/http/users/filters.rs b/src/handlers/http/users/filters.rs index 0bbf3375f..4999c858e 100644 --- a/src/handlers/http/users/filters.rs +++ b/src/handlers/http/users/filters.rs @@ -22,7 +22,9 @@ use crate::{ parseable::PARSEABLE, storage::ObjectStorageError, users::filters::{CURRENT_FILTER_VERSION, FILTERS, Filter}, - utils::{actix::extract_session_key_from_req, get_hash, get_user_from_request, is_admin}, + utils::{ + actix::extract_session_key_from_req, get_hash, get_user_and_tenant_from_request, is_admin, + }, }; use actix_web::http::StatusCode; use actix_web::{ @@ -44,11 +46,11 @@ pub async fn get( req: HttpRequest, filter_id: Path, ) -> Result { - let user_id = get_user_from_request(&req)?; + let (user_id, tenant_id) = get_user_and_tenant_from_request(&req)?; let filter_id = filter_id.into_inner(); let is_admin = is_admin(&req).map_err(|e| FiltersError::Custom(e.to_string()))?; if let Some(filter) = FILTERS - .get_filter(&filter_id, &get_hash(&user_id), is_admin) + .get_filter(&filter_id, &get_hash(&user_id), is_admin, &tenant_id) .await { return Ok((web::Json(filter), StatusCode::OK)); @@ -63,15 +65,14 @@ pub async fn post( req: HttpRequest, Json(mut filter): Json, ) -> Result { - let mut user_id = get_user_from_request(&req)?; + let (mut user_id, tenant_id) = get_user_and_tenant_from_request(&req)?; user_id = get_hash(&user_id); let filter_id = Ulid::new().to_string(); filter.filter_id = Some(filter_id.clone()); filter.user_id = Some(user_id.clone()); filter.version = Some(CURRENT_FILTER_VERSION.to_string()); - - PARSEABLE.metastore.put_filter(&filter).await?; - FILTERS.update(&filter).await; + PARSEABLE.metastore.put_filter(&filter, &tenant_id).await?; + FILTERS.update(&filter, &tenant_id).await; Ok((web::Json(filter), StatusCode::OK)) } @@ -81,13 +82,13 @@ pub async fn update( filter_id: Path, Json(mut filter): Json, ) -> Result { - let mut user_id = get_user_from_request(&req)?; + let (mut user_id, tenant_id) = get_user_and_tenant_from_request(&req)?; user_id = get_hash(&user_id); let filter_id = filter_id.into_inner(); let is_admin = is_admin(&req).map_err(|e| FiltersError::Custom(e.to_string()))?; if FILTERS - .get_filter(&filter_id, &user_id, is_admin) + .get_filter(&filter_id, &user_id, is_admin, &tenant_id) .await .is_none() { @@ -99,8 +100,8 @@ pub async fn update( filter.user_id = Some(user_id.clone()); filter.version = Some(CURRENT_FILTER_VERSION.to_string()); - PARSEABLE.metastore.put_filter(&filter).await?; - FILTERS.update(&filter).await; + PARSEABLE.metastore.put_filter(&filter, &tenant_id).await?; + FILTERS.update(&filter, &tenant_id).await; Ok((web::Json(filter), StatusCode::OK)) } @@ -109,19 +110,22 @@ pub async fn delete( req: HttpRequest, filter_id: Path, ) -> Result { - let mut user_id = get_user_from_request(&req)?; + let (mut user_id, tenant_id) = get_user_and_tenant_from_request(&req)?; user_id = get_hash(&user_id); let filter_id = filter_id.into_inner(); let is_admin = is_admin(&req).map_err(|e| FiltersError::Custom(e.to_string()))?; let filter = FILTERS - .get_filter(&filter_id, &user_id, is_admin) + .get_filter(&filter_id, &user_id, is_admin, &tenant_id) .await .ok_or(FiltersError::Metadata( "Filter does not exist or user is not authorized", ))?; - PARSEABLE.metastore.delete_filter(&filter).await?; - FILTERS.delete_filter(&filter_id).await; + PARSEABLE + .metastore + .delete_filter(&filter, &tenant_id) + .await?; + FILTERS.delete_filter(&filter_id, &tenant_id).await; Ok(HttpResponse::Ok().finish()) } diff --git a/src/handlers/livetail.rs b/src/handlers/livetail.rs index 4df8a2bf5..f4b98e965 100644 --- a/src/handlers/livetail.rs +++ b/src/handlers/livetail.rs @@ -113,10 +113,11 @@ impl FlightService for FlightServiceImpl { rbac::Response::ReloadRequired => { return Err(Status::unauthenticated("reload required")); } + rbac::Response::Suspended(_) => return Err(Status::permission_denied("Suspended")), } let schema = PARSEABLE - .get_stream(stream) + .get_stream(stream, &None) .map_err(|err| Status::failed_precondition(err.to_string()))? .get_schema(); diff --git a/src/hottier.rs b/src/hottier.rs index 010a55016..a5d176d03 100644 --- a/src/hottier.rs +++ b/src/hottier.rs @@ -89,20 +89,32 @@ impl HotTierManager { .map(|hot_tier_path| INSTANCE.get_or_init(|| HotTierManager::new(hot_tier_path))) } - ///get the total hot tier size for all streams + /// get the total hot tier size for all streams pub async fn get_hot_tiers_size( &self, current_stream: &str, + current_tenant_id: &Option, ) -> Result<(u64, u64), HotTierError> { let mut total_hot_tier_size = 0; let mut total_hot_tier_used_size = 0; - for stream in PARSEABLE.streams.list() { - if self.check_stream_hot_tier_exists(&stream) && stream != current_stream { - let stream_hot_tier = self.get_hot_tier(&stream).await?; - total_hot_tier_size += &stream_hot_tier.size; - total_hot_tier_used_size += stream_hot_tier.used_size; + let tenants = if let Some(tenants) = PARSEABLE.list_tenants() { + tenants.into_iter().map(|v| Some(v)).collect() + } else { + vec![None] + }; + for tenant_id in tenants { + for stream in PARSEABLE.streams.list(&tenant_id) { + if self.check_stream_hot_tier_exists(&stream, &tenant_id) + && stream != current_stream + && tenant_id != *current_tenant_id + { + let stream_hot_tier = self.get_hot_tier(&stream, &tenant_id).await?; + total_hot_tier_size += &stream_hot_tier.size; + total_hot_tier_used_size += stream_hot_tier.used_size; + } } } + Ok((total_hot_tier_size, total_hot_tier_used_size)) } @@ -114,11 +126,12 @@ impl HotTierManager { &self, stream: &str, stream_hot_tier_size: u64, + tenant_id: &Option, ) -> Result { let mut existing_hot_tier_used_size = 0; - if self.check_stream_hot_tier_exists(stream) { + if self.check_stream_hot_tier_exists(stream, tenant_id) { //delete existing hot tier if its size is less than the updated hot tier size else return error - let existing_hot_tier = self.get_hot_tier(stream).await?; + let existing_hot_tier = self.get_hot_tier(stream, tenant_id).await?; existing_hot_tier_used_size = existing_hot_tier.used_size; if stream_hot_tier_size < existing_hot_tier_used_size { @@ -141,7 +154,7 @@ impl HotTierManager { .expect("Codepath should only be hit if hottier is enabled"); let (total_hot_tier_size, total_hot_tier_used_size) = - self.get_hot_tiers_size(stream).await?; + self.get_hot_tiers_size(stream, tenant_id).await?; let disk_threshold = (PARSEABLE.options.max_disk_usage * total_space as f64) / 100.0; let max_allowed_hot_tier_size = disk_threshold - total_hot_tier_size as f64 @@ -170,12 +183,16 @@ impl HotTierManager { Ok(existing_hot_tier_used_size) } - ///get the hot tier metadata file for the stream - pub async fn get_hot_tier(&self, stream: &str) -> Result { - if !self.check_stream_hot_tier_exists(stream) { + /// get the hot tier metadata file for the stream + pub async fn get_hot_tier( + &self, + stream: &str, + tenant_id: &Option, + ) -> Result { + if !self.check_stream_hot_tier_exists(stream, tenant_id) { return Err(HotTierValidationError::NotFound(stream.to_owned()).into()); } - let path = self.hot_tier_file_path(stream)?; + let path = self.hot_tier_file_path(stream, tenant_id)?; let bytes = self .filesystem .get(&path) @@ -188,8 +205,12 @@ impl HotTierManager { Ok(stream_hot_tier) } - pub async fn delete_hot_tier(&self, stream: &str) -> Result<(), HotTierError> { - if !self.check_stream_hot_tier_exists(stream) { + pub async fn delete_hot_tier( + &self, + stream: &str, + tenant_id: &Option, + ) -> Result<(), HotTierError> { + if !self.check_stream_hot_tier_exists(stream, tenant_id) { return Err(HotTierValidationError::NotFound(stream.to_owned()).into()); } let path = self.hot_tier_path.join(stream); @@ -204,8 +225,9 @@ impl HotTierManager { &self, stream: &str, hot_tier: &mut StreamHotTier, + tenant_id: &Option, ) -> Result<(), HotTierError> { - let path = self.hot_tier_file_path(stream)?; + let path = self.hot_tier_file_path(stream, tenant_id)?; let bytes = serde_json::to_vec(&hot_tier)?.into(); self.filesystem.put(&path, bytes).await?; Ok(()) @@ -215,11 +237,22 @@ impl HotTierManager { pub fn hot_tier_file_path( &self, stream: &str, + tenant_id: &Option, ) -> Result { - let path = self - .hot_tier_path - .join(stream) - .join(STREAM_HOT_TIER_FILENAME); + // let path = self + // .hot_tier_path + // .join(stream) + // .join(STREAM_HOT_TIER_FILENAME); + let path = if let Some(tenant_id) = tenant_id.as_ref() { + self.hot_tier_path + .join(tenant_id) + .join(stream) + .join(STREAM_HOT_TIER_FILENAME) + } else { + self.hot_tier_path + .join(stream) + .join(STREAM_HOT_TIER_FILENAME) + }; let path = object_store::path::Path::from_absolute_path(path)?; Ok(path) @@ -257,9 +290,16 @@ impl HotTierManager { } let mut sync_hot_tier_tasks = FuturesUnordered::new(); - for stream in PARSEABLE.streams.list() { - if self.check_stream_hot_tier_exists(&stream) { - sync_hot_tier_tasks.push(self.process_stream(stream)); + let tenants = if let Some(tenants) = PARSEABLE.list_tenants() { + tenants.into_iter().map(|v| Some(v)).collect() + } else { + vec![None] + }; + for tenant_id in tenants { + for stream in PARSEABLE.streams.list(&tenant_id) { + if self.check_stream_hot_tier_exists(&stream, &tenant_id) { + sync_hot_tier_tasks.push(self.process_stream(stream, tenant_id.to_owned())); + } } } @@ -274,22 +314,31 @@ impl HotTierManager { /// process the hot tier files for the stream /// delete the files from the hot tier directory if the available date range is outside the hot tier range - async fn process_stream(&self, stream: String) -> Result<(), HotTierError> { - let stream_hot_tier = self.get_hot_tier(&stream).await?; + async fn process_stream( + &self, + stream: String, + tenant_id: Option, + ) -> Result<(), HotTierError> { + let stream_hot_tier = self.get_hot_tier(&stream, &tenant_id).await?; let mut parquet_file_size = stream_hot_tier.used_size; let mut s3_manifest_file_list = PARSEABLE .metastore - .get_all_manifest_files(&stream) + .get_all_manifest_files(&stream, &tenant_id) .await .map_err(|e| { - HotTierError::ObjectStorageError(ObjectStorageError::MetastoreError(Box::new( - e.to_detail(), - ))) - })?; - - self.process_manifest(&stream, &mut s3_manifest_file_list, &mut parquet_file_size) - .await?; + HotTierError::ObjectStorageError(ObjectStorageError::MetastoreError(Box::new( + e.to_detail(), + ))) + })?; + + self.process_manifest( + &stream, + &mut s3_manifest_file_list, + &mut parquet_file_size, + &tenant_id, + ) + .await?; Ok(()) } @@ -303,6 +352,7 @@ impl HotTierManager { stream: &str, manifest_files_to_download: &mut BTreeMap>, parquet_file_size: &mut u64, + tenant_id: &Option, ) -> Result<(), HotTierError> { if manifest_files_to_download.is_empty() { return Ok(()); @@ -335,6 +385,7 @@ impl HotTierManager { parquet_file_size, parquet_path, date, + tenant_id, ) .await? { @@ -363,9 +414,10 @@ impl HotTierManager { parquet_file_size: &mut u64, parquet_path: PathBuf, date: NaiveDate, + tenant_id: &Option, ) -> Result { let mut file_processed = false; - let mut stream_hot_tier = self.get_hot_tier(stream).await?; + let mut stream_hot_tier = self.get_hot_tier(stream, tenant_id).await?; if !self.is_disk_available(parquet_file.file_size).await? || stream_hot_tier.available_size <= parquet_file.file_size { @@ -375,6 +427,7 @@ impl HotTierManager { &mut stream_hot_tier, &parquet_path, parquet_file.file_size, + tenant_id, ) .await? { @@ -388,14 +441,15 @@ impl HotTierManager { let parquet_data = PARSEABLE .storage .get_object_store() - .get_object(&parquet_file_path) + .get_object(&parquet_file_path, tenant_id) .await?; file.write_all(&parquet_data).await?; *parquet_file_size += parquet_file.file_size; stream_hot_tier.used_size = *parquet_file_size; stream_hot_tier.available_size -= parquet_file.file_size; - self.put_hot_tier(stream, &mut stream_hot_tier).await?; + self.put_hot_tier(stream, &mut stream_hot_tier, tenant_id) + .await?; file_processed = true; let path = self.get_stream_path_for_date(stream, &date); let mut hot_tier_manifest = HotTierManager::get_hot_tier_manifest_from_path(path).await?; @@ -539,12 +593,13 @@ impl HotTierManager { } ///check if the hot tier metadata file exists for the stream - pub fn check_stream_hot_tier_exists(&self, stream: &str) -> bool { - let path = self - .hot_tier_path - .join(stream) - .join(STREAM_HOT_TIER_FILENAME); - path.exists() + pub fn check_stream_hot_tier_exists(&self, stream: &str, tenant_id: &Option) -> bool { + // let path = self + // .hot_tier_path + // .join(stream) + // .join(STREAM_HOT_TIER_FILENAME); + let path = self.hot_tier_file_path(stream, tenant_id).unwrap(); + PathBuf::from(path.to_string()).exists() } ///delete the parquet file from the hot tier directory for the stream @@ -559,6 +614,7 @@ impl HotTierManager { stream_hot_tier: &mut StreamHotTier, download_file_path: &Path, parquet_file_size: u64, + tenant_id: &Option, ) -> Result { let mut delete_successful = false; let dates = self.fetch_hot_tier_dates(stream).await?; @@ -607,7 +663,8 @@ impl HotTierManager { stream_hot_tier.used_size -= file_size; stream_hot_tier.available_size += file_size; - self.put_hot_tier(stream, stream_hot_tier).await?; + self.put_hot_tier(stream, stream_hot_tier, tenant_id) + .await?; delete_successful = true; if stream_hot_tier.available_size <= parquet_file_size { @@ -697,40 +754,55 @@ impl HotTierManager { } pub async fn put_internal_stream_hot_tier(&self) -> Result<(), HotTierError> { - if !self.check_stream_hot_tier_exists(PMETA_STREAM_NAME) { - let mut stream_hot_tier = StreamHotTier { - version: Some(CURRENT_HOT_TIER_VERSION.to_string()), - size: INTERNAL_STREAM_HOT_TIER_SIZE_BYTES, - used_size: 0, - available_size: INTERNAL_STREAM_HOT_TIER_SIZE_BYTES, - oldest_date_time_entry: None, - }; - self.put_hot_tier(PMETA_STREAM_NAME, &mut stream_hot_tier) - .await?; - } - Ok(()) - } - - /// Creates hot tier for pstats internal stream if the stream exists in storage - async fn create_pstats_hot_tier(&self) -> Result<(), HotTierError> { - // Check if pstats hot tier already exists - if !self.check_stream_hot_tier_exists(DATASET_STATS_STREAM_NAME) { - // Check if pstats stream exists in storage by attempting to load it - if PARSEABLE - .check_or_load_stream(DATASET_STATS_STREAM_NAME) - .await - { + let tenants = if let Some(tenants) = PARSEABLE.list_tenants() { + tenants.into_iter().map(|v| Some(v)).collect() + } else { + vec![None] + }; + + for tenant_id in tenants { + if !self.check_stream_hot_tier_exists(PMETA_STREAM_NAME, &tenant_id) { let mut stream_hot_tier = StreamHotTier { version: Some(CURRENT_HOT_TIER_VERSION.to_string()), - size: MIN_STREAM_HOT_TIER_SIZE_BYTES, + size: INTERNAL_STREAM_HOT_TIER_SIZE_BYTES, used_size: 0, - available_size: MIN_STREAM_HOT_TIER_SIZE_BYTES, + available_size: INTERNAL_STREAM_HOT_TIER_SIZE_BYTES, oldest_date_time_entry: None, }; - self.put_hot_tier(DATASET_STATS_STREAM_NAME, &mut stream_hot_tier) + self.put_hot_tier(PMETA_STREAM_NAME, &mut stream_hot_tier, &tenant_id) .await?; } } + Ok(()) + } + + /// Creates hot tier for pstats internal stream if the stream exists in storage + async fn create_pstats_hot_tier(&self) -> Result<(), HotTierError> { + let tenants = if let Some(tenants) = PARSEABLE.list_tenants() { + tenants.into_iter().map(|v| Some(v)).collect() + } else { + vec![None] + }; + for tenant_id in tenants { + // Check if pstats hot tier already exists + if !self.check_stream_hot_tier_exists(DATASET_STATS_STREAM_NAME, &tenant_id) { + // Check if pstats stream exists in storage by attempting to load it + if PARSEABLE + .check_or_load_stream(DATASET_STATS_STREAM_NAME, &tenant_id) + .await + { + let mut stream_hot_tier = StreamHotTier { + version: Some(CURRENT_HOT_TIER_VERSION.to_string()), + size: MIN_STREAM_HOT_TIER_SIZE_BYTES, + used_size: 0, + available_size: MIN_STREAM_HOT_TIER_SIZE_BYTES, + oldest_date_time_entry: None, + }; + self.put_hot_tier(DATASET_STATS_STREAM_NAME, &mut stream_hot_tier, &tenant_id) + .await?; + } + } + } Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 725bed5f9..b6d15205e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,10 +47,10 @@ mod static_schema; mod stats; pub mod storage; pub mod sync; +pub mod tenants; pub mod users; pub mod utils; pub mod validator; - use std::time::Duration; // Public re-exports of crates being used in enterprise @@ -59,6 +59,7 @@ pub use handlers::http::modal::{ ParseableServer, ingest_server::IngestServer, query_server::QueryServer, server::Server, }; use once_cell::sync::Lazy; +pub use openid; pub use opentelemetry_proto; use parseable::PARSEABLE; use reqwest::{Client, ClientBuilder}; diff --git a/src/main.rs b/src/main.rs index f8577fb15..42cba34f5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -62,7 +62,7 @@ async fn main() -> anyhow::Result<()> { // load metadata from persistence let parseable_json = server.load_metadata().await?; - let metadata = storage::resolve_parseable_metadata(&parseable_json).await?; + let metadata = storage::resolve_parseable_metadata(&parseable_json, &None).await?; banner::print(&PARSEABLE, &metadata).await; // initialize the rbac map rbac::map::init(&metadata); diff --git a/src/metadata.rs b/src/metadata.rs index 2bc0940fc..a934b037d 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -40,25 +40,26 @@ pub fn update_stats( size: u64, num_rows: usize, parsed_date: NaiveDate, + tenant_id: &str, ) { let parsed_date = parsed_date.to_string(); EVENTS_INGESTED - .with_label_values(&[stream_name, origin]) + .with_label_values(&[stream_name, origin, tenant_id]) .add(num_rows as i64); EVENTS_INGESTED_DATE - .with_label_values(&[stream_name, origin, &parsed_date]) + .with_label_values(&[stream_name, origin, &parsed_date, tenant_id]) .inc_by(num_rows as u64); EVENTS_INGESTED_SIZE - .with_label_values(&[stream_name, origin]) + .with_label_values(&[stream_name, origin, tenant_id]) .add(size as i64); EVENTS_INGESTED_SIZE_DATE - .with_label_values(&[stream_name, origin, &parsed_date]) + .with_label_values(&[stream_name, origin, &parsed_date, tenant_id]) .inc_by(size); LIFETIME_EVENTS_INGESTED - .with_label_values(&[stream_name, origin]) + .with_label_values(&[stream_name, origin, tenant_id]) .add(num_rows as i64); LIFETIME_EVENTS_INGESTED_SIZE - .with_label_values(&[stream_name, origin]) + .with_label_values(&[stream_name, origin, tenant_id]) .add(size as i64); } @@ -136,7 +137,7 @@ impl LogStreamMetadata { } } -///this function updates the data type of time partition field +/// this function updates the data type of time partition field /// from utf-8 to timestamp if it is not already timestamp /// and updates the schema in the storage /// required only when migrating from version 1.2.0 and below @@ -167,20 +168,20 @@ pub async fn update_data_type_time_partition( Ok(()) } -pub fn load_daily_metrics(manifests: &Vec, stream_name: &str) { +pub fn load_daily_metrics(manifests: &Vec, stream_name: &str, tenant_id: &str) { for manifest in manifests { let manifest_date = manifest.time_lower_bound.date_naive().to_string(); let events_ingested = manifest.events_ingested; let ingestion_size = manifest.ingestion_size; let storage_size = manifest.storage_size; EVENTS_INGESTED_DATE - .with_label_values(&[stream_name, "json", &manifest_date]) + .with_label_values(&[stream_name, "json", &manifest_date, tenant_id]) .inc_by(events_ingested); EVENTS_INGESTED_SIZE_DATE - .with_label_values(&[stream_name, "json", &manifest_date]) + .with_label_values(&[stream_name, "json", &manifest_date, tenant_id]) .inc_by(ingestion_size); EVENTS_STORAGE_SIZE_DATE - .with_label_values(&["data", stream_name, "parquet", &manifest_date]) + .with_label_values(&["data", stream_name, "parquet", &manifest_date, tenant_id]) .inc_by(storage_size); } } diff --git a/src/metastore/metastore_traits.rs b/src/metastore/metastore_traits.rs index 23608fb3a..1113980d0 100644 --- a/src/metastore/metastore_traits.rs +++ b/src/metastore/metastore_traits.rs @@ -45,74 +45,177 @@ use crate::{ #[async_trait] pub trait Metastore: std::fmt::Debug + Send + Sync { async fn initiate_connection(&self) -> Result<(), MetastoreError>; - async fn get_objects(&self, parent_path: &str) -> Result, MetastoreError>; + async fn get_objects( + &self, + parent_path: &str, + tenant_id: &Option, + ) -> Result, MetastoreError>; /// overview - async fn get_overviews(&self) -> Result>, MetastoreError>; + async fn get_overviews( + &self, + tenant_id: &Option, + ) -> Result>, MetastoreError>; async fn put_overview( &self, obj: &dyn MetastoreObject, stream: &str, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_overview( + &self, + stream: &str, + tenant_id: &Option, ) -> Result<(), MetastoreError>; - async fn delete_overview(&self, stream: &str) -> Result<(), MetastoreError>; /// keystone - async fn get_keystones(&self) -> Result, MetastoreError>; - async fn put_keystone(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn delete_keystone(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn get_keystones(&self) -> Result>, MetastoreError>; + async fn put_keystone( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_keystone( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// conversations - async fn get_conversations(&self) -> Result, MetastoreError>; - async fn put_conversation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn delete_conversation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn get_conversations(&self) -> Result>, MetastoreError>; + async fn put_conversation( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_conversation( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// alerts - async fn get_alerts(&self) -> Result, MetastoreError>; - async fn put_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn delete_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn get_alerts(&self) -> Result>, MetastoreError>; + async fn put_alert( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_alert( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// alerts state - async fn get_alert_states(&self) -> Result, MetastoreError>; + async fn get_alert_states( + &self, + tenant_id: &Option, + ) -> Result, MetastoreError>; async fn get_alert_state_entry( &self, alert_id: &Ulid, + tenant_id: &Option, ) -> Result, MetastoreError>; - async fn put_alert_state(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn delete_alert_state(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn put_alert_state( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_alert_state( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// mttr history - async fn get_mttr_history(&self) -> Result, MetastoreError>; - async fn put_mttr_history(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn get_mttr_history( + &self, + tenant_id: &Option, + ) -> Result, MetastoreError>; + async fn put_mttr_history( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// llmconfig - async fn get_llmconfigs(&self) -> Result, MetastoreError>; - async fn put_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn delete_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn get_llmconfigs(&self) -> Result>, MetastoreError>; + async fn put_llmconfig( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_llmconfig( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// targets - async fn get_targets(&self) -> Result, MetastoreError>; - async fn put_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn delete_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn get_targets(&self) -> Result>, MetastoreError>; + async fn put_target( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_target( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// dashboards - async fn get_dashboards(&self) -> Result, MetastoreError>; - async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn delete_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn get_dashboards(&self) -> Result>, MetastoreError>; + async fn put_dashboard( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_dashboard( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// chats async fn get_chats(&self) -> Result>, MetastoreError>; - async fn put_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn delete_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn put_chat( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_chat( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// filters - async fn get_filters(&self) -> Result, MetastoreError>; - async fn put_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn delete_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn get_filters(&self) -> Result>, MetastoreError>; + async fn put_filter( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_filter( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// correlations - async fn get_correlations(&self) -> Result, MetastoreError>; - async fn put_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn delete_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn get_correlations(&self) -> Result>, MetastoreError>; + async fn put_correlation( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_correlation( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// stream metadata /// `get_base` when set to true, will fetch the stream.json present at the base of @@ -123,11 +226,13 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { &self, stream_name: &str, get_base: bool, + tenant_id: &Option, ) -> Result; async fn put_stream_json( &self, obj: &dyn MetastoreObject, stream_name: &str, + tenant_id: &Option, ) -> Result<(), MetastoreError>; /// This function will fetch multiple stream jsons /// @@ -138,12 +243,14 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { &self, stream_name: &str, mode: Option, + tenant_id: &Option, ) -> Result, MetastoreError>; /// manifest async fn get_all_manifest_files( &self, stream_name: &str, + tenant_id: &Option, ) -> Result>, MetastoreError>; async fn get_manifest( &self, @@ -151,6 +258,7 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { lower_bound: DateTime, upper_bound: DateTime, manifest_url: Option, + tenant_id: &Option, ) -> Result, MetastoreError>; async fn put_manifest( &self, @@ -158,41 +266,72 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { stream_name: &str, lower_bound: DateTime, upper_bound: DateTime, + tenant_id: &Option, ) -> Result<(), MetastoreError>; async fn delete_manifest( &self, stream_name: &str, lower_bound: DateTime, upper_bound: DateTime, + tenant_id: &Option, ) -> Result<(), MetastoreError>; async fn get_manifest_path( &self, stream_name: &str, lower_bound: DateTime, upper_bound: DateTime, + tenant_id: &Option, ) -> Result; /// schema /// This function will fetch all schemas for the given stream - async fn get_all_schemas(&self, stream_name: &str) -> Result, MetastoreError>; - async fn get_schema(&self, stream_name: &str) -> Result; - async fn put_schema(&self, obj: Schema, stream_name: &str) -> Result<(), MetastoreError>; + async fn get_all_schemas( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result, MetastoreError>; + async fn get_schema( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result; + async fn put_schema( + &self, + obj: Schema, + stream_name: &str, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; /// parseable metadata - async fn get_parseable_metadata(&self) -> Result, MetastoreError>; + async fn get_parseable_metadata( + &self, + tenant_id: &Option, + ) -> Result, MetastoreError>; async fn get_ingestor_metadata(&self) -> Result, MetastoreError>; - async fn put_parseable_metadata(&self, obj: &dyn MetastoreObject) - -> Result<(), MetastoreError>; + async fn put_parseable_metadata( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError>; + async fn delete_tenant(&self, tenant_id: &str) -> Result<(), MetastoreError>; + // async fn put_tenant_metadata(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; /// node metadata - async fn get_node_metadata(&self, node_type: NodeType) -> Result, MetastoreError>; + async fn get_node_metadata( + &self, + node_type: NodeType, + tenant_id: &Option, + ) -> Result, MetastoreError>; async fn delete_node_metadata( &self, domain_name: &str, node_type: NodeType, ) -> Result; async fn put_node_metadata(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - async fn list_streams(&self) -> Result, MetastoreError>; + async fn list_streams( + &self, + tenant_id: &Option, + ) -> Result, MetastoreError>; } /// This trait allows a struct to get treated as a Metastore Object diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs index 35f84f80b..98fba5625 100644 --- a/src/metastore/metastores/object_store_metastore.rs +++ b/src/metastore/metastores/object_store_metastore.rs @@ -46,7 +46,7 @@ use crate::{ metastore_traits::{Metastore, MetastoreObject}, }, option::Mode, - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, storage::{ ALERTS_ROOT_DIRECTORY, ObjectStorage, ObjectStorageError, PARSEABLE_ROOT_DIRECTORY, SETTINGS_ROOT_DIRECTORY, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, @@ -73,26 +73,35 @@ impl Metastore for ObjectStoreMetastore { } /// Fetch mutiple .json objects - async fn get_objects(&self, parent_path: &str) -> Result, MetastoreError> { + async fn get_objects( + &self, + parent_path: &str, + tenant_id: &Option, + ) -> Result, MetastoreError> { Ok(self .storage .get_objects( Some(&RelativePathBuf::from(parent_path)), Box::new(|file_name| file_name.ends_with(".json")), + tenant_id, ) .await?) } /// This function fetches all the overviews from the underlying object store - async fn get_overviews(&self) -> Result>, MetastoreError> { - let streams = self.list_streams().await?; + async fn get_overviews( + &self, + tenant_id: &Option, + ) -> Result>, MetastoreError> { + let streams = self.list_streams(tenant_id).await?; let mut all_overviews = HashMap::new(); for stream in streams { - let overview_path = RelativePathBuf::from_iter([&stream, "overview"]); + let root = tenant_id.as_ref().map_or("", |v| v); + let overview_path = RelativePathBuf::from_iter([root, &stream, "overview"]); // if the file doesn't exist, load an empty overview - let overview = (self.storage.get_object(&overview_path).await).ok(); + let overview = (self.storage.get_object(&overview_path, tenant_id).await).ok(); all_overviews.insert(stream, overview); } @@ -105,117 +114,212 @@ impl Metastore for ObjectStoreMetastore { &self, obj: &dyn MetastoreObject, stream: &str, + tenant_id: &Option, ) -> Result<(), MetastoreError> { - let path = RelativePathBuf::from_iter([stream, "overview"]); - Ok(self.storage.put_object(&path, to_bytes(obj)).await?) + let path = if let Some(tenant_id) = tenant_id.as_ref() { + RelativePathBuf::from_iter([tenant_id, stream, "overview"]) + } else { + RelativePathBuf::from_iter([stream, "overview"]) + }; + Ok(self + .storage + .put_object(&path, to_bytes(obj), tenant_id) + .await?) } /// Delete an overview - async fn delete_overview(&self, stream: &str) -> Result<(), MetastoreError> { - let path = RelativePathBuf::from_iter([stream, "overview"]); - Ok(self.storage.delete_object(&path).await?) + async fn delete_overview( + &self, + stream: &str, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { + let path = if let Some(tenant_id) = tenant_id.as_ref() { + RelativePathBuf::from_iter([tenant_id, stream, "overview"]) + } else { + RelativePathBuf::from_iter([stream, "overview"]) + }; + Ok(self.storage.delete_object(&path, tenant_id).await?) } /// This function fetches all the keystones from the underlying object store - async fn get_keystones(&self) -> Result, MetastoreError> { - let keystone_path = RelativePathBuf::from_iter([".keystone"]); - let keystones = self - .storage - .get_objects( - Some(&keystone_path), - Box::new(|file_name| { - file_name.ends_with(".json") && !file_name.starts_with("conv_") - }), - ) - .await?; + async fn get_keystones(&self) -> Result>, MetastoreError> { + let base_paths = PARSEABLE.list_tenants().map_or(vec!["".into()], |v| v); + let mut keystones = HashMap::new(); + for mut tenant in base_paths { + let keystone_path = RelativePathBuf::from_iter([&tenant, ".keystone"]); + let objs = self + .storage + .get_objects( + Some(&keystone_path), + Box::new(|file_name| { + file_name.ends_with(".json") && !file_name.starts_with("conv_") + }), + &Some(tenant.clone()), + ) + .await?; + if tenant.eq(&mut "") { + tenant.clone_from(&DEFAULT_TENANT.to_string()); + } + keystones.insert(tenant, objs); + } Ok(keystones) } /// This function puts a keystone in the object store at the given path - async fn put_keystone(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_keystone( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let id = obj.get_object_id(); - let path = RelativePathBuf::from_iter([".keystone", &format!("{id}.json")]); - Ok(self.storage.put_object(&path, to_bytes(obj)).await?) + let path = if let Some(tenant_id) = tenant_id.as_ref() { + RelativePathBuf::from_iter([tenant_id, ".keystone", &format!("{id}.json")]) + } else { + RelativePathBuf::from_iter([".keystone", &format!("{id}.json")]) + }; + Ok(self + .storage + .put_object(&path, to_bytes(obj), tenant_id) + .await?) } /// Delete a keystone - async fn delete_keystone(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn delete_keystone( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let id = obj.get_object_id(); - let path = RelativePathBuf::from_iter([".keystone", &format!("{id}.json")]); - Ok(self.storage.delete_object(&path).await?) + let path = if let Some(tenant_id) = tenant_id.as_ref() { + RelativePathBuf::from_iter([tenant_id, ".keystone", &format!("{id}.json")]) + } else { + RelativePathBuf::from_iter([".keystone", &format!("{id}.json")]) + }; + Ok(self.storage.delete_object(&path, tenant_id).await?) } /// This function fetches all the conversations from the underlying object store - async fn get_conversations(&self) -> Result, MetastoreError> { - let keystone_path = RelativePathBuf::from_iter([".keystone"]); - let conversations = self - .storage - .get_objects( - Some(&keystone_path), - Box::new(|file_name| { - file_name.ends_with(".json") && file_name.starts_with("conv_") - }), - ) - .await?; + async fn get_conversations(&self) -> Result>, MetastoreError> { + let base_paths = PARSEABLE.list_tenants().map_or(vec!["".into()], |v| v); + let mut conversations = HashMap::new(); + for mut tenant in base_paths { + let conv_path = RelativePathBuf::from_iter([&tenant, ".keystone"]); + let objs = self + .storage + .get_objects( + Some(&conv_path), + Box::new(|file_name| { + file_name.ends_with(".json") && file_name.starts_with("conv_") + }), + &Some(tenant.clone()), + ) + .await?; + if tenant.eq(&mut "") { + tenant.clone_from(&DEFAULT_TENANT.to_string()); + } + conversations.insert(tenant, objs); + } Ok(conversations) } /// This function puts a conversation in the object store at the given path - async fn put_conversation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_conversation( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let id = obj.get_object_id(); - let path = RelativePathBuf::from_iter([".keystone", &format!("conv_{id}.json")]); - Ok(self.storage.put_object(&path, to_bytes(obj)).await?) + let path = if let Some(tenant_id) = tenant_id.as_ref() { + RelativePathBuf::from_iter([tenant_id, ".keystone", &format!("conv_{id}.json")]) + } else { + RelativePathBuf::from_iter([".keystone", &format!("conv_{id}.json")]) + }; + Ok(self + .storage + .put_object(&path, to_bytes(obj), tenant_id) + .await?) } /// Delete a conversation - async fn delete_conversation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn delete_conversation( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let id = obj.get_object_id(); - let path = RelativePathBuf::from_iter([".keystone", &format!("conv_{id}.json")]); - Ok(self.storage.delete_object(&path).await?) + let path = if let Some(tenant_id) = tenant_id.as_ref() { + RelativePathBuf::from_iter([tenant_id, ".keystone", &format!("conv_{id}.json")]) + } else { + RelativePathBuf::from_iter([".keystone", &format!("conv_{id}.json")]) + }; + Ok(self.storage.delete_object(&path, tenant_id).await?) } /// This function fetches all the alerts from the underlying object store - async fn get_alerts(&self) -> Result, MetastoreError> { - let alerts_path = RelativePathBuf::from(ALERTS_ROOT_DIRECTORY); - let alerts = self - .storage - .get_objects( - Some(&alerts_path), - Box::new(|file_name| { - !file_name.starts_with("alert_state_") && file_name.ends_with(".json") - }), - ) - .await?; - - Ok(alerts) + async fn get_alerts(&self) -> Result>, MetastoreError> { + let base_paths = PARSEABLE.list_tenants().map_or(vec!["".into()], |v| v); + let mut all_alerts = HashMap::new(); + for mut tenant in base_paths { + let alerts_path = RelativePathBuf::from_iter([&tenant, ALERTS_ROOT_DIRECTORY]); + let alerts = self + .storage + .get_objects( + Some(&alerts_path), + Box::new(|file_name| { + !file_name.starts_with("alert_state_") && file_name.ends_with(".json") + }), + &Some(tenant.clone()), + ) + .await?; + if tenant.eq(&mut "") { + tenant.clone_from(&DEFAULT_TENANT.to_string()); + } + all_alerts.insert(tenant, alerts); + } + Ok(all_alerts) } /// This function puts an alert in the object store at the given path - async fn put_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_alert( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let id = Ulid::from_string(&obj.get_object_id()).map_err(|e| MetastoreError::Error { status_code: StatusCode::BAD_REQUEST, message: e.to_string(), flow: "put_alert".into(), })?; - let path = alert_json_path(id); + let path = alert_json_path(id, tenant_id); - Ok(self.storage.put_object(&path, to_bytes(obj)).await?) + Ok(self + .storage + .put_object(&path, to_bytes(obj), tenant_id) + .await?) } /// Delete an alert - async fn delete_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn delete_alert( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self .storage - .delete_object(&RelativePathBuf::from(path)) + .delete_object(&RelativePathBuf::from(path), tenant_id) .await?) } /// alerts state - async fn get_alert_states(&self) -> Result, MetastoreError> { - let base_path = RelativePathBuf::from_iter([ALERTS_ROOT_DIRECTORY]); + async fn get_alert_states( + &self, + tenant_id: &Option, + ) -> Result, MetastoreError> { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let base_path = RelativePathBuf::from_iter([&tenant, ALERTS_ROOT_DIRECTORY]); let alert_state_bytes = self .storage .get_objects( @@ -223,6 +327,7 @@ impl Metastore for ObjectStoreMetastore { Box::new(|file_name| { file_name.starts_with("alert_state_") && file_name.ends_with(".json") }), + tenant_id, ) .await?; @@ -232,15 +337,17 @@ impl Metastore for ObjectStoreMetastore { alert_states.push(entry); } } + Ok(alert_states) } async fn get_alert_state_entry( &self, alert_id: &Ulid, + tenant_id: &Option, ) -> Result, MetastoreError> { let path = alert_state_json_path(*alert_id); - match self.storage.get_object(&path).await { + match self.storage.get_object(&path, tenant_id).await { Ok(bytes) => { if let Ok(entry) = serde_json::from_slice::(&bytes) { Ok(Some(entry)) @@ -253,7 +360,11 @@ impl Metastore for ObjectStoreMetastore { } } - async fn put_alert_state(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_alert_state( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let id = Ulid::from_string(&obj.get_object_id()).map_err(|e| MetastoreError::Error { status_code: StatusCode::BAD_REQUEST, message: e.to_string(), @@ -272,7 +383,7 @@ impl Metastore for ObjectStoreMetastore { .state; // Try to read and parse existing file - if let Ok(existing_bytes) = self.storage.get_object(&path).await { + if let Ok(existing_bytes) = self.storage.get_object(&path, tenant_id).await { // File exists - try to parse and update if let Ok(mut existing_entry) = serde_json::from_slice::(&existing_bytes) @@ -284,7 +395,9 @@ impl Metastore for ObjectStoreMetastore { let updated_bytes = serde_json::to_vec(&existing_entry) .map_err(MetastoreError::JsonParseError)?; - self.storage.put_object(&path, updated_bytes.into()).await?; + self.storage + .put_object(&path, updated_bytes.into(), tenant_id) + .await?; } return Ok(()); } @@ -294,26 +407,38 @@ impl Metastore for ObjectStoreMetastore { let new_entry = AlertStateEntry::new(id, new_state); let new_bytes = serde_json::to_vec(&new_entry).map_err(MetastoreError::JsonParseError)?; - self.storage.put_object(&path, new_bytes.into()).await?; + self.storage + .put_object(&path, new_bytes.into(), tenant_id) + .await?; Ok(()) } /// Delete an alert state file - async fn delete_alert_state(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn delete_alert_state( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self .storage - .delete_object(&RelativePathBuf::from(path)) + .delete_object(&RelativePathBuf::from(path), tenant_id) .await?) } /// Get MTTR history from storage - async fn get_mttr_history(&self) -> Result, MetastoreError> { - let path = mttr_json_path(); - match self.storage.get_object(&path).await { + async fn get_mttr_history( + &self, + tenant_id: &Option, + ) -> Result, MetastoreError> { + let path = mttr_json_path(tenant_id); + match self.storage.get_object(&path, tenant_id).await { Ok(bytes) => { - if let Ok(history) = serde_json::from_slice::(&bytes) { + if let Ok(mut history) = serde_json::from_slice::(&bytes) { + if let Some(tenant) = tenant_id.as_ref() { + history.tenant_id = Some(tenant.clone()); + } Ok(Some(history)) } else { Ok(None) @@ -325,82 +450,125 @@ impl Metastore for ObjectStoreMetastore { } /// Put MTTR history to storage - async fn put_mttr_history(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_mttr_history( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let path = RelativePathBuf::from(obj.get_object_path()); - Ok(self.storage.put_object(&path, to_bytes(obj)).await?) + Ok(self + .storage + .put_object(&path, to_bytes(obj), tenant_id) + .await?) } /// This function fetches all the llmconfigs from the underlying object store - async fn get_llmconfigs(&self) -> Result, MetastoreError> { - let base_path = RelativePathBuf::from_iter([SETTINGS_ROOT_DIRECTORY, "llmconfigs"]); - let conf_bytes = self - .storage - .get_objects( - Some(&base_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - - Ok(conf_bytes) + async fn get_llmconfigs(&self) -> Result>, MetastoreError> { + let base_paths = PARSEABLE.list_tenants().map_or(vec!["".into()], |v| v); + let mut all_configs = HashMap::new(); + for mut tenant in base_paths { + let base_path = + RelativePathBuf::from_iter([&tenant, SETTINGS_ROOT_DIRECTORY, "llmconfigs"]); + let conf_bytes = self + .storage + .get_objects( + Some(&base_path), + Box::new(|file_name| file_name.ends_with(".json")), + &Some(tenant.clone()), + ) + .await?; + if tenant.eq(&mut "") { + tenant.clone_from(&DEFAULT_TENANT.to_string()); + } + all_configs.insert(tenant, conf_bytes); + } + Ok(all_configs) } /// This function puts an llmconfig in the object store at the given path - async fn put_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_llmconfig( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self .storage - .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .put_object(&RelativePathBuf::from(path), to_bytes(obj), tenant_id) .await?) } /// Delete an llmconfig - async fn delete_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn delete_llmconfig( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self .storage - .delete_object(&RelativePathBuf::from(path)) + .delete_object(&RelativePathBuf::from(path), tenant_id) .await?) } /// Fetch all dashboards - async fn get_dashboards(&self) -> Result, MetastoreError> { - let mut dashboards = Vec::new(); - - let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); - for user in self.storage.list_dirs_relative(&users_dir).await? { - let dashboards_path = users_dir.join(&user).join("dashboards"); - let dashboard_bytes = self + async fn get_dashboards(&self) -> Result>, MetastoreError> { + let mut dashboards = HashMap::new(); + let base_paths = PARSEABLE.list_tenants().map_or(vec!["".into()], |v| v); + for mut tenant in base_paths { + let tenant_id = &Some(tenant.clone()); + let users_dir = RelativePathBuf::from_iter([&tenant, USERS_ROOT_DIR]); + for user in self .storage - .get_objects( - Some(&dashboards_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - - dashboards.extend(dashboard_bytes); + .list_dirs_relative(&users_dir, tenant_id) + .await? + { + let dashboards_path = users_dir.join(&user).join("dashboards"); + let dashboard_bytes = self + .storage + .get_objects( + Some(&dashboards_path), + Box::new(|file_name| file_name.ends_with(".json")), + tenant_id, + ) + .await?; + if tenant.eq(&mut "") { + tenant.clone_from(&DEFAULT_TENANT.to_string()); + } + dashboards.insert(tenant.to_owned(), dashboard_bytes); + // dashboards.extend(dashboard_bytes); + } } Ok(dashboards) } /// Save a dashboard - async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_dashboard( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { // we need the path to store in obj store let path = obj.get_object_path(); Ok(self .storage - .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .put_object(&RelativePathBuf::from(path), to_bytes(obj), tenant_id) .await?) } /// Delete a dashboard - async fn delete_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn delete_dashboard( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self .storage - .delete_object(&RelativePathBuf::from(path)) + .delete_object(&RelativePathBuf::from(path), tenant_id) .await?) } @@ -409,7 +577,7 @@ impl Metastore for ObjectStoreMetastore { let all_user_chats = DashMap::new(); let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); - for user in self.storage.list_dirs_relative(&users_dir).await? { + for user in self.storage.list_dirs_relative(&users_dir, &None).await? { if user.starts_with(".") { continue; } @@ -420,6 +588,7 @@ impl Metastore for ObjectStoreMetastore { .get_objects( Some(&chats_path), Box::new(|file_name| file_name.ends_with(".json")), + &None, ) .await?; for chat in user_chats { @@ -433,156 +602,215 @@ impl Metastore for ObjectStoreMetastore { } /// Save a chat - async fn put_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_chat( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { // we need the path to store in obj store let path = obj.get_object_path(); Ok(self .storage - .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .put_object(&RelativePathBuf::from(path), to_bytes(obj), tenant_id) .await?) } /// Delete a chat - async fn delete_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn delete_chat( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self .storage - .delete_object(&RelativePathBuf::from(path)) + .delete_object(&RelativePathBuf::from(path), tenant_id) .await?) } // for get filters, take care of migration and removal of incorrect/old filters // return deserialized filter - async fn get_filters(&self) -> Result, MetastoreError> { - let mut this = Vec::new(); - - let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); - - for user in self.storage.list_dirs_relative(&users_dir).await? { - let stream_dir = users_dir.join(&user).join("filters"); - - for stream in self.storage.list_dirs_relative(&stream_dir).await? { - let filters_path = stream_dir.join(&stream); + async fn get_filters(&self) -> Result>, MetastoreError> { + let mut this = HashMap::new(); + let base_paths = PARSEABLE.list_tenants().map_or(vec!["".into()], |v| v); + + for mut tenant in base_paths { + let users_dir = RelativePathBuf::from_iter([&tenant, USERS_ROOT_DIR]); + let tenant_id = &Some(tenant.clone()); + let mut filters = Vec::new(); + for user in self + .storage + .list_dirs_relative(&users_dir, tenant_id) + .await? + { + let stream_dir = users_dir.join(&user).join("filters"); - // read filter object - let filter_bytes = self + for stream in self .storage - .get_objects( - Some(&filters_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - - for filter in filter_bytes { - // deserialize into Value - let mut filter_value = serde_json::from_slice::(&filter)?; - - if let Some(meta) = filter_value.clone().as_object() { - let version = meta.get("version").and_then(|version| version.as_str()); - - if version == Some("v1") { - // delete older version of the filter - self.storage.delete_object(&filters_path).await?; - - filter_value = migrate_v1_v2(filter_value); - let user_id = filter_value - .as_object() - .unwrap() - .get("user_id") - .and_then(|user_id| user_id.as_str()); - let filter_id = filter_value - .as_object() - .unwrap() - .get("filter_id") - .and_then(|filter_id| filter_id.as_str()); - let stream_name = filter_value - .as_object() - .unwrap() - .get("stream_name") - .and_then(|stream_name| stream_name.as_str()); - - // if these values are present, create a new file - if let (Some(user_id), Some(stream_name), Some(filter_id)) = - (user_id, stream_name, filter_id) - { - let path = - filter_path(user_id, stream_name, &format!("{filter_id}.json")); - let filter_bytes = to_bytes(&filter_value); - self.storage.put_object(&path, filter_bytes.clone()).await?; + .list_dirs_relative(&stream_dir, tenant_id) + .await? + { + let filters_path = stream_dir.join(&stream); + + // read filter object + let filter_bytes = self + .storage + .get_objects( + Some(&filters_path), + Box::new(|file_name| file_name.ends_with(".json")), + tenant_id, + ) + .await?; + + for filter in filter_bytes { + // deserialize into Value + let mut filter_value = + serde_json::from_slice::(&filter)?; + + if let Some(meta) = filter_value.clone().as_object() { + let version = meta.get("version").and_then(|version| version.as_str()); + + if version == Some("v1") { + // delete older version of the filter + self.storage.delete_object(&filters_path, tenant_id).await?; + + filter_value = migrate_v1_v2(filter_value); + let user_id = filter_value + .as_object() + .unwrap() + .get("user_id") + .and_then(|user_id| user_id.as_str()); + let filter_id = filter_value + .as_object() + .unwrap() + .get("filter_id") + .and_then(|filter_id| filter_id.as_str()); + let stream_name = filter_value + .as_object() + .unwrap() + .get("stream_name") + .and_then(|stream_name| stream_name.as_str()); + + // if these values are present, create a new file + if let (Some(user_id), Some(stream_name), Some(filter_id)) = + (user_id, stream_name, filter_id) + { + let path = filter_path( + user_id, + stream_name, + &format!("{filter_id}.json"), + ); + let filter_bytes = to_bytes(&filter_value); + self.storage + .put_object(&path, filter_bytes.clone(), tenant_id) + .await?; + } } - } - if let Ok(filter) = serde_json::from_value::(filter_value) { - this.retain(|f: &Filter| f.filter_id != filter.filter_id); - this.push(filter); + if let Ok(filter) = serde_json::from_value::(filter_value) { + filters.retain(|f: &Filter| f.filter_id != filter.filter_id); + filters.push(filter); + } } } } } + if tenant.eq(&mut "") { + tenant.clone_from(&DEFAULT_TENANT.to_string()); + } + this.insert(tenant, filters); } Ok(this) } /// Save a filter - async fn put_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_filter( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { // we need the path to store in obj store let path = obj.get_object_path(); Ok(self .storage - .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .put_object(&RelativePathBuf::from(path), to_bytes(obj), tenant_id) .await?) } /// Delete a filter - async fn delete_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn delete_filter( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self .storage - .delete_object(&RelativePathBuf::from(path)) + .delete_object(&RelativePathBuf::from(path), tenant_id) .await?) } /// Get all correlations - async fn get_correlations(&self) -> Result, MetastoreError> { - let mut correlations = Vec::new(); - - let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); - for user in self.storage.list_dirs_relative(&users_dir).await? { - let correlations_path = users_dir.join(&user).join("correlations"); - let correlation_bytes = self + async fn get_correlations(&self) -> Result>, MetastoreError> { + let mut correlations = HashMap::new(); + let base_paths = PARSEABLE.list_tenants().map_or(vec!["".into()], |v| v); + for mut tenant in base_paths { + let tenant_id = &Some(tenant.clone()); + let mut corrs = Vec::new(); + let users_dir = RelativePathBuf::from_iter([&tenant, USERS_ROOT_DIR]); + for user in self .storage - .get_objects( - Some(&correlations_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; + .list_dirs_relative(&users_dir, tenant_id) + .await? + { + let correlations_path = users_dir.join(&user).join("correlations"); + let correlation_bytes = self + .storage + .get_objects( + Some(&correlations_path), + Box::new(|file_name| file_name.ends_with(".json")), + tenant_id, + ) + .await?; - correlations.extend(correlation_bytes); + corrs.extend(correlation_bytes); + } + if tenant.eq(&mut "") { + tenant.clone_from(&DEFAULT_TENANT.to_string()); + } + correlations.insert(tenant, corrs); } - Ok(correlations) } /// Save a correlation - async fn put_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_correlation( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self .storage - .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .put_object(&RelativePathBuf::from(path), to_bytes(obj), tenant_id) .await?) } /// Delete a correlation - async fn delete_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn delete_correlation( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self .storage - .delete_object(&RelativePathBuf::from(path)) + .delete_object(&RelativePathBuf::from(path), tenant_id) .await?) } @@ -593,17 +821,20 @@ impl Metastore for ObjectStoreMetastore { &self, stream_name: &str, get_base: bool, + tenant_id: &Option, ) -> Result { + let tenant = tenant_id.as_ref().map_or("", |v| v); let path = if get_base { RelativePathBuf::from_iter([ + tenant, stream_name, STREAM_ROOT_DIRECTORY, STREAM_METADATA_FILE_NAME, ]) } else { - stream_json_path(stream_name) + stream_json_path(stream_name, tenant_id) }; - Ok(self.storage.get_object(&path).await?) + Ok(self.storage.get_object(&path, tenant_id).await?) } /// Fetch all `ObjectStoreFormat` present in a stream folder @@ -611,8 +842,10 @@ impl Metastore for ObjectStoreMetastore { &self, stream_name: &str, mode: Option, + tenant_id: &Option, ) -> Result, MetastoreError> { - let path = RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY]); + let root = tenant_id.as_ref().map_or("", |v| v); + let path = RelativePathBuf::from_iter([root, stream_name, STREAM_ROOT_DIRECTORY]); if let Some(mode) = mode { if mode.eq(&Mode::Ingest) { Ok(self @@ -622,6 +855,7 @@ impl Metastore for ObjectStoreMetastore { Box::new(|file_name| { file_name.starts_with(".ingestor") && file_name.ends_with("stream.json") }), + tenant_id, ) .await?) } else { @@ -638,6 +872,7 @@ impl Metastore for ObjectStoreMetastore { .get_objects( Some(&path), Box::new(|file_name| file_name.ends_with("stream.json")), + tenant_id, ) .await?) } @@ -648,10 +883,13 @@ impl Metastore for ObjectStoreMetastore { &self, obj: &dyn MetastoreObject, stream_name: &str, + tenant_id: &Option, ) -> Result<(), MetastoreError> { + let path = stream_json_path(stream_name, tenant_id); + // tracing::warn!(put_stream_json_path=?path); Ok(self .storage - .put_object(&stream_json_path(stream_name), to_bytes(obj)) + .put_object(&path, to_bytes(obj), tenant_id) .await?) } @@ -659,12 +897,15 @@ impl Metastore for ObjectStoreMetastore { async fn get_all_manifest_files( &self, stream_name: &str, + tenant_id: &Option, ) -> Result>, MetastoreError> { let mut result_file_list: BTreeMap> = BTreeMap::new(); - let resp = self - .storage - .list_with_delimiter(Some(stream_name.into())) - .await?; + let root = if let Some(tenant) = tenant_id { + format!("{tenant}/{stream_name}") + } else { + stream_name.into() + }; + let resp = self.storage.list_with_delimiter(Some(root.into())).await?; let dates = resp .common_prefixes @@ -688,7 +929,7 @@ impl Metastore for ObjectStoreMetastore { for path in manifest_paths { let bytes = self .storage - .get_object(&RelativePathBuf::from(path)) + .get_object(&RelativePathBuf::from(path), tenant_id) .await?; result_file_list @@ -707,15 +948,16 @@ impl Metastore for ObjectStoreMetastore { lower_bound: DateTime, upper_bound: DateTime, manifest_url: Option, + tenant_id: &Option, ) -> Result, MetastoreError> { let path = match manifest_url { Some(url) => RelativePathBuf::from(url), None => { - let path = partition_path(stream_name, lower_bound, upper_bound); + let path = partition_path(stream_name, lower_bound, upper_bound, tenant_id); manifest_path(path.as_str()) } }; - match self.storage.get_object(&path).await { + match self.storage.get_object(&path, tenant_id).await { Ok(bytes) => { let manifest = serde_json::from_slice(&bytes)?; Ok(Some(manifest)) @@ -743,8 +985,9 @@ impl Metastore for ObjectStoreMetastore { stream_name: &str, lower_bound: DateTime, upper_bound: DateTime, + tenant_id: &Option, ) -> Result { - let path = partition_path(stream_name, lower_bound, upper_bound); + let path = partition_path(stream_name, lower_bound, upper_bound, tenant_id); Ok(self .storage .absolute_url(&manifest_path(path.as_str())) @@ -757,10 +1000,16 @@ impl Metastore for ObjectStoreMetastore { stream_name: &str, lower_bound: DateTime, upper_bound: DateTime, + tenant_id: &Option, ) -> Result<(), MetastoreError> { let manifest_file_name = manifest_path("").to_string(); - let path = partition_path(stream_name, lower_bound, upper_bound).join(&manifest_file_name); - Ok(self.storage.put_object(&path, to_bytes(obj)).await?) + let path = partition_path(stream_name, lower_bound, upper_bound, tenant_id) + .join(&manifest_file_name); + // tracing::warn!(put_manifest_path=?path); + Ok(self + .storage + .put_object(&path, to_bytes(obj), tenant_id) + .await?) } async fn delete_manifest( @@ -768,62 +1017,93 @@ impl Metastore for ObjectStoreMetastore { stream_name: &str, lower_bound: DateTime, upper_bound: DateTime, + tenant_id: &Option, ) -> Result<(), MetastoreError> { let manifest_file_name = manifest_path("").to_string(); - let path = partition_path(stream_name, lower_bound, upper_bound).join(&manifest_file_name); - Ok(self.storage.delete_object(&path).await?) + let path = partition_path(stream_name, lower_bound, upper_bound, tenant_id) + .join(&manifest_file_name); + Ok(self.storage.delete_object(&path, tenant_id).await?) } /// targets - async fn get_targets(&self) -> Result, MetastoreError> { - let targets_path = - RelativePathBuf::from_iter([SETTINGS_ROOT_DIRECTORY, TARGETS_ROOT_DIRECTORY]); - let targets = self - .storage - .get_objects( - Some(&targets_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await? - .iter() - .filter_map(|bytes| { - serde_json::from_slice(bytes) - .inspect_err(|err| warn!("Expected compatible json, error = {err}")) - .ok() - }) - .collect(); - - Ok(targets) + async fn get_targets(&self) -> Result>, MetastoreError> { + let base_paths = PARSEABLE.list_tenants().map_or(vec!["".into()], |v| v); + let mut all_targets = HashMap::new(); + for mut tenant in base_paths { + let targets_path = RelativePathBuf::from_iter([ + &tenant, + SETTINGS_ROOT_DIRECTORY, + TARGETS_ROOT_DIRECTORY, + ]); + let targets = self + .storage + .get_objects( + Some(&targets_path), + Box::new(|file_name| file_name.ends_with(".json")), + &Some(tenant.clone()), + ) + .await? + .iter() + .filter_map(|bytes| { + serde_json::from_slice(bytes) + .inspect_err(|err| warn!("Expected compatible json, error = {err}")) + .ok() + }) + .collect(); + if tenant.eq(&mut "") { + tenant.clone_from(&DEFAULT_TENANT.to_string()); + } + all_targets.insert(tenant, targets); + } + Ok(all_targets) } - async fn put_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn put_target( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { // we need the path to store in obj store let path = obj.get_object_path(); Ok(self .storage - .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .put_object(&RelativePathBuf::from(path), to_bytes(obj), tenant_id) .await?) } - async fn delete_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + async fn delete_target( + &self, + obj: &dyn MetastoreObject, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { // we need the path to store in obj store let path = obj.get_object_path(); Ok(self .storage - .delete_object(&RelativePathBuf::from(path)) + .delete_object(&RelativePathBuf::from(path), tenant_id) .await?) } - async fn get_all_schemas(&self, stream_name: &str) -> Result, MetastoreError> { - let path_prefix = - relative_path::RelativePathBuf::from(format!("{stream_name}/{STREAM_ROOT_DIRECTORY}")); + async fn get_all_schemas( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result, MetastoreError> { + let path_prefix = if let Some(tenant) = tenant_id { + relative_path::RelativePathBuf::from(format!( + "{tenant}/{stream_name}/{STREAM_ROOT_DIRECTORY}" + )) + } else { + relative_path::RelativePathBuf::from(format!("{stream_name}/{STREAM_ROOT_DIRECTORY}")) + }; Ok(self .storage .get_objects( Some(&path_prefix), Box::new(|file_name: String| file_name.contains(".schema")), + tenant_id, ) .await? .iter() @@ -835,18 +1115,42 @@ impl Metastore for ObjectStoreMetastore { .collect()) } - async fn get_schema(&self, stream_name: &str) -> Result { - Ok(self.storage.get_object(&schema_path(stream_name)).await?) + async fn get_schema( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result { + Ok(self + .storage + .get_object(&schema_path(stream_name, tenant_id), tenant_id) + .await?) } - async fn put_schema(&self, obj: Schema, stream_name: &str) -> Result<(), MetastoreError> { - let path = schema_path(stream_name); - Ok(self.storage.put_object(&path, to_bytes(&obj)).await?) + async fn put_schema( + &self, + obj: Schema, + stream_name: &str, + tenant_id: &Option, + ) -> Result<(), MetastoreError> { + let path = schema_path(stream_name, tenant_id); + Ok(self + .storage + .put_object(&path, to_bytes(&obj), tenant_id) + .await?) } - async fn get_parseable_metadata(&self) -> Result, MetastoreError> { + async fn get_parseable_metadata( + &self, + tenant_id: &Option, + ) -> Result, MetastoreError> { + let path = if let Some(tenant_id) = tenant_id.as_ref() { + RelativePathBuf::from_iter([tenant_id, ".parseable.json"]) + } else { + parseable_json_path() + }; + let parseable_metadata: Option = - match self.storage.get_object(&parseable_json_path()).await { + match self.storage.get_object(&path, tenant_id).await { Ok(bytes) => Some(bytes), Err(err) => { if matches!(err, ObjectStorageError::NoSuchKey(_)) { @@ -860,6 +1164,13 @@ impl Metastore for ObjectStoreMetastore { Ok(parseable_metadata) } + async fn delete_tenant(&self, tenant_id: &str) -> Result<(), MetastoreError> { + self.storage + .delete_prefix(&RelativePathBuf::from(tenant_id), &None) + .await + .map_err(MetastoreError::ObjectStorageError) + } + async fn get_ingestor_metadata(&self) -> Result, MetastoreError> { let base_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); Ok(self @@ -867,6 +1178,7 @@ impl Metastore for ObjectStoreMetastore { .get_objects( Some(&base_path), Box::new(|file_name| file_name.starts_with("ingestor")), + &None, ) .await?) } @@ -874,14 +1186,25 @@ impl Metastore for ObjectStoreMetastore { async fn put_parseable_metadata( &self, obj: &dyn MetastoreObject, + tenant_id: &Option, ) -> Result<(), MetastoreError> { + let path = if let Some(tenant_id) = tenant_id.as_ref() { + RelativePathBuf::from_iter([tenant_id, ".parseable.json"]) + } else { + parseable_json_path() + }; + self.storage - .put_object(&parseable_json_path(), to_bytes(obj)) + .put_object(&path, to_bytes(obj), tenant_id) .await .map_err(MetastoreError::ObjectStorageError) } - async fn get_node_metadata(&self, node_type: NodeType) -> Result, MetastoreError> { + async fn get_node_metadata( + &self, + node_type: NodeType, + tenant_id: &Option, + ) -> Result, MetastoreError> { let root_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); let prefix_owned = node_type.to_string(); @@ -890,6 +1213,7 @@ impl Metastore for ObjectStoreMetastore { .get_objects( Some(&root_path), Box::new(move |file_name| file_name.starts_with(&prefix_owned)), // Use the owned copy + tenant_id, ) .await? .into_iter() @@ -901,7 +1225,7 @@ impl Metastore for ObjectStoreMetastore { async fn put_node_metadata(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { let path = obj.get_object_path(); self.storage - .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .put_object(&RelativePathBuf::from(path), to_bytes(obj), &None) .await?; Ok(()) } @@ -916,6 +1240,7 @@ impl Metastore for ObjectStoreMetastore { .get_objects( Some(&RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY)), Box::new(move |file_name| file_name.starts_with(&node_type.to_string())), + &None, ) .await?; @@ -934,7 +1259,7 @@ impl Metastore for ObjectStoreMetastore { let node_meta_filename = node_metadatas[0].file_path().to_string(); let file = RelativePathBuf::from(&node_meta_filename); - match self.storage.delete_object(&file).await { + match self.storage.delete_object(&file, &None).await { Ok(_) => Ok(true), Err(err) => { if matches!(err, ObjectStorageError::IoError(_)) { @@ -946,7 +1271,10 @@ impl Metastore for ObjectStoreMetastore { } } - async fn list_streams(&self) -> Result, MetastoreError> { + async fn list_streams( + &self, + tenant_id: &Option, + ) -> Result, MetastoreError> { // using LocalFS list_streams because it doesn't implement list_with_delimiter if PARSEABLE.storage.name() == "drive" { PARSEABLE @@ -958,32 +1286,59 @@ impl Metastore for ObjectStoreMetastore { } else { // not local-disk, object storage let mut result_file_list = HashSet::new(); - let resp = self.storage.list_with_delimiter(None).await?; + let root = if let Some(tenant) = tenant_id { + Some(object_store::path::Path::from_iter([tenant.clone()])) + } else { + None + }; + // tracing::warn!("list_streams root- {root:?}"); + // let dirs = self.storage.list_dirs().await?; + // tracing::warn!("list_streams dirs- {dirs:?}"); + + let resp = self.storage.list_with_delimiter(root.clone()).await?; + // tracing::warn!("list_streams resp- {resp:?}"); + // let dirs_relative = self.storage.list_dirs_relative(&RelativePathBuf::from_iter([root.unwrap_or("".into()).to_string()])).await?; + // tracing::warn!("list_streams dirs_relative- {dirs_relative:?}"); let streams = resp .common_prefixes .iter() - .flat_map(|path| path.parts()) + .flat_map(|path| { + // tracing::warn!("list_streams path- {path}"); + path.parts() + }) .map(|name| name.as_ref().to_string()) .filter(|name| { + // tracing::warn!("list_streams name- {name}"); name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR && name != SETTINGS_ROOT_DIRECTORY && name != ALERTS_ROOT_DIRECTORY }) .collect::>(); - + // tracing::warn!("list_streams streams- {streams:?}"); for stream in streams { - let stream_path = object_store::path::Path::from(format!( - "{}/{}", - &stream, STREAM_ROOT_DIRECTORY - )); + let stream_path = if let Some(root) = root.as_ref() { + object_store::path::Path::from_iter([ + &root.to_string(), + &stream, + STREAM_ROOT_DIRECTORY, + ]) + } else { + object_store::path::Path::from(format!("{}/{}", &stream, STREAM_ROOT_DIRECTORY)) + }; + // let stream_path = object_store::path::Path::from(format!( + // "{}/{}", + // &stream, STREAM_ROOT_DIRECTORY + // )); + // tracing::warn!("list_streams stream_path- {stream_path}"); let resp = self.storage.list_with_delimiter(Some(stream_path)).await?; - if resp - .objects - .iter() - .any(|name| name.location.filename().unwrap().ends_with("stream.json")) - { + // tracing::warn!("list_streams streams resp- {resp:?}"); + if resp.objects.iter().any(|name| { + // tracing::warn!("list_streams streams resp name- {name:?}"); + name.location.filename().unwrap().ends_with("stream.json") + }) { + // tracing::warn!("inserting to list_streams- {stream}"); result_file_list.insert(stream); } } diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index b038a241a..9c9ff86fa 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -38,7 +38,7 @@ pub static METRICS_REGISTRY: Lazy = Lazy::new(|| { pub static EVENTS_INGESTED: Lazy = Lazy::new(|| { IntGaugeVec::new( Opts::new("events_ingested", "Events ingested for a stream").namespace(METRICS_NAMESPACE), - &["stream", "format"], + &["stream", "format", "tenant_id"], ) .expect("metric can be created") }); @@ -50,7 +50,7 @@ pub static EVENTS_INGESTED_SIZE: Lazy = Lazy::new(|| { "Events ingested size bytes for a stream", ) .namespace(METRICS_NAMESPACE), - &["stream", "format"], + &["stream", "format", "tenant_id"], ) .expect("metric can be created") }); @@ -58,7 +58,7 @@ pub static EVENTS_INGESTED_SIZE: Lazy = Lazy::new(|| { pub static STORAGE_SIZE: Lazy = Lazy::new(|| { IntGaugeVec::new( Opts::new("storage_size", "Storage size bytes for a stream").namespace(METRICS_NAMESPACE), - &["type", "stream", "format"], + &["type", "stream", "format", "tenant_id"], ) .expect("metric can be created") }); @@ -66,7 +66,7 @@ pub static STORAGE_SIZE: Lazy = Lazy::new(|| { pub static EVENTS_DELETED: Lazy = Lazy::new(|| { IntGaugeVec::new( Opts::new("events_deleted", "Events deleted for a stream").namespace(METRICS_NAMESPACE), - &["stream", "format"], + &["stream", "format", "tenant_id"], ) .expect("metric can be created") }); @@ -78,7 +78,7 @@ pub static EVENTS_DELETED_SIZE: Lazy = Lazy::new(|| { "Events deleted size bytes for a stream", ) .namespace(METRICS_NAMESPACE), - &["stream", "format"], + &["stream", "format", "tenant_id"], ) .expect("metric can be created") }); @@ -90,7 +90,7 @@ pub static DELETED_EVENTS_STORAGE_SIZE: Lazy = Lazy::new(|| { "Deleted events storage size bytes for a stream", ) .namespace(METRICS_NAMESPACE), - &["type", "stream", "format"], + &["type", "stream", "format", "tenant_id"], ) .expect("metric can be created") }); @@ -102,7 +102,7 @@ pub static LIFETIME_EVENTS_INGESTED: Lazy = Lazy::new(|| { "Lifetime events ingested for a stream", ) .namespace(METRICS_NAMESPACE), - &["stream", "format"], + &["stream", "format", "tenant_id"], ) .expect("metric can be created") }); @@ -114,7 +114,7 @@ pub static LIFETIME_EVENTS_INGESTED_SIZE: Lazy = Lazy::new(|| { "Lifetime events ingested size bytes for a stream", ) .namespace(METRICS_NAMESPACE), - &["stream", "format"], + &["stream", "format", "tenant_id"], ) .expect("metric can be created") }); @@ -126,7 +126,7 @@ pub static LIFETIME_EVENTS_STORAGE_SIZE: Lazy = Lazy::new(|| { "Lifetime events storage size bytes for a stream", ) .namespace(METRICS_NAMESPACE), - &["type", "stream", "format"], + &["type", "stream", "format", "tenant_id"], ) .expect("metric can be created") }); @@ -138,7 +138,7 @@ pub static EVENTS_INGESTED_DATE: Lazy = Lazy::new(|| { "Events ingested for a stream on a particular date", ) .namespace(METRICS_NAMESPACE), - &["stream", "format", "date"], + &["stream", "format", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -150,7 +150,7 @@ pub static EVENTS_INGESTED_SIZE_DATE: Lazy = Lazy::new(|| { "Events ingested size in bytes for a stream on a particular date", ) .namespace(METRICS_NAMESPACE), - &["stream", "format", "date"], + &["stream", "format", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -162,7 +162,7 @@ pub static EVENTS_STORAGE_SIZE_DATE: Lazy = Lazy::new(|| { "Events storage size in bytes for a stream on a particular date", ) .namespace(METRICS_NAMESPACE), - &["type", "stream", "format", "date"], + &["type", "stream", "format", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -170,7 +170,7 @@ pub static EVENTS_STORAGE_SIZE_DATE: Lazy = Lazy::new(|| { pub static STAGING_FILES: Lazy = Lazy::new(|| { IntGaugeVec::new( Opts::new("staging_files", "Active Staging files").namespace(METRICS_NAMESPACE), - &["stream"], + &["stream", "tenant_id"], ) .expect("metric can be created") }); @@ -178,7 +178,7 @@ pub static STAGING_FILES: Lazy = Lazy::new(|| { pub static QUERY_EXECUTE_TIME: Lazy = Lazy::new(|| { HistogramVec::new( HistogramOpts::new("query_execute_time", "Query execute time").namespace(METRICS_NAMESPACE), - &["stream"], + &["stream", "tenant_id"], ) .expect("metric can be created") }); @@ -186,7 +186,7 @@ pub static QUERY_EXECUTE_TIME: Lazy = Lazy::new(|| { pub static QUERY_CACHE_HIT: Lazy = Lazy::new(|| { IntCounterVec::new( Opts::new("QUERY_CACHE_HIT", "Full Cache hit").namespace(METRICS_NAMESPACE), - &["stream"], + &["stream", "tenant_id"], ) .expect("metric can be created") }); @@ -194,7 +194,7 @@ pub static QUERY_CACHE_HIT: Lazy = Lazy::new(|| { pub static ALERTS_STATES: Lazy = Lazy::new(|| { IntCounterVec::new( Opts::new("alerts_states", "Alerts States").namespace(METRICS_NAMESPACE), - &["stream", "name", "state"], + &["stream", "name", "state", "tenant_id"], ) .expect("metric can be created") }); @@ -207,7 +207,7 @@ pub static TOTAL_EVENTS_INGESTED_BY_DATE: Lazy = Lazy::new(|| { "Total events ingested by date", ) .namespace(METRICS_NAMESPACE), - &["date"], + &["date", "tenant_id"], ) .expect("metric can be created") }); @@ -219,7 +219,7 @@ pub static TOTAL_EVENTS_INGESTED_SIZE_BY_DATE: Lazy = Lazy::new(| "Total events ingested size in bytes by date", ) .namespace(METRICS_NAMESPACE), - &["date"], + &["date", "tenant_id"], ) .expect("metric can be created") }); @@ -231,7 +231,7 @@ pub static TOTAL_PARQUETS_STORED_BY_DATE: Lazy = Lazy::new(|| { "Total parquet files stored by date", ) .namespace(METRICS_NAMESPACE), - &["date"], + &["date", "tenant_id"], ) .expect("metric can be created") }); @@ -243,7 +243,7 @@ pub static TOTAL_PARQUETS_STORED_SIZE_BY_DATE: Lazy = Lazy::new(| "Total parquet files stored size in bytes by date", ) .namespace(METRICS_NAMESPACE), - &["date"], + &["date", "tenant_id"], ) .expect("metric can be created") }); @@ -252,7 +252,7 @@ pub static TOTAL_QUERY_CALLS_BY_DATE: Lazy = Lazy::new(|| { IntCounterVec::new( Opts::new("total_query_calls_by_date", "Total query calls by date") .namespace(METRICS_NAMESPACE), - &["date"], + &["date", "tenant_id"], ) .expect("metric can be created") }); @@ -264,7 +264,7 @@ pub static TOTAL_FILES_SCANNED_IN_QUERY_BY_DATE: Lazy = Lazy::new "Total files scanned in queries by date", ) .namespace(METRICS_NAMESPACE), - &["date"], + &["date", "tenant_id"], ) .expect("metric can be created") }); @@ -276,7 +276,7 @@ pub static TOTAL_BYTES_SCANNED_IN_QUERY_BY_DATE: Lazy = Lazy::new "Total bytes scanned in queries by date", ) .namespace(METRICS_NAMESPACE), - &["date"], + &["date", "tenant_id"], ) .expect("metric can be created") }); @@ -288,7 +288,7 @@ pub static TOTAL_OBJECT_STORE_CALLS_BY_DATE: Lazy = Lazy::new(|| "Total object store calls by date", ) .namespace(METRICS_NAMESPACE), - &["method", "date"], + &["method", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -301,7 +301,7 @@ pub static TOTAL_FILES_SCANNED_IN_OBJECT_STORE_CALLS_BY_DATE: Lazy = Lazy::new(|| { "Total input LLM tokens used by date", ) .namespace(METRICS_NAMESPACE), - &["provider", "model", "date"], + &["provider", "model", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -338,7 +338,7 @@ pub static TOTAL_OUTPUT_LLM_TOKENS_BY_DATE: Lazy = Lazy::new(|| { "Total output LLM tokens used by date", ) .namespace(METRICS_NAMESPACE), - &["provider", "model", "date"], + &["provider", "model", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -350,7 +350,7 @@ pub static TOTAL_CACHED_LLM_TOKENS_BY_DATE: Lazy = Lazy::new(|| { "Total cached LLM tokens used by date", ) .namespace(METRICS_NAMESPACE), - &["provider", "model", "date"], + &["provider", "model", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -362,7 +362,7 @@ pub static TOTAL_REASONING_LLM_TOKENS_BY_DATE: Lazy = Lazy::new(| "Total reasoning LLM tokens used by date", ) .namespace(METRICS_NAMESPACE), - &["provider", "model", "date"], + &["provider", "model", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -383,7 +383,7 @@ pub static TOTAL_METRICS_COLLECTED_BY_DATE: Lazy = Lazy::new(|| { "Total metrics collected by date", ) .namespace(METRICS_NAMESPACE), - &["team", "date"], + &["team", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -395,7 +395,7 @@ pub static TOTAL_METRICS_COLLECTED_SIZE_BY_DATE: Lazy = Lazy::new "Total metrics collected size in bytes by date", ) .namespace(METRICS_NAMESPACE), - &["team", "date"], + &["team", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -407,7 +407,7 @@ pub static TOTAL_LOGS_COLLECTED_BY_DATE: Lazy = Lazy::new(|| { "Total logs collected by date", ) .namespace(METRICS_NAMESPACE), - &["team", "date"], + &["team", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -419,7 +419,7 @@ pub static TOTAL_LOGS_COLLECTED_SIZE_BY_DATE: Lazy = Lazy::new(|| "Total logs collected size in bytes by date", ) .namespace(METRICS_NAMESPACE), - &["team", "date"], + &["team", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -431,7 +431,7 @@ pub static TOTAL_TRACES_COLLECTED_BY_DATE: Lazy = Lazy::new(|| { "Total traces collected by date", ) .namespace(METRICS_NAMESPACE), - &["team", "date"], + &["team", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -443,7 +443,7 @@ pub static TOTAL_TRACES_COLLECTED_SIZE_BY_DATE: Lazy = Lazy::new( "Total traces collected size in bytes by date", ) .namespace(METRICS_NAMESPACE), - &["team", "date"], + &["team", "date", "tenant_id"], ) .expect("metric can be created") }); @@ -593,41 +593,41 @@ fn prom_process_metrics(metrics: &PrometheusMetrics) { #[cfg(not(target_os = "linux"))] fn prom_process_metrics(_metrics: &PrometheusMetrics) {} -pub async fn fetch_stats_from_storage(stream_name: &str, stats: FullStats) { +pub async fn fetch_stats_from_storage(stream_name: &str, stats: FullStats, tenant_id: &str) { EVENTS_INGESTED - .with_label_values(&[stream_name, "json"]) + .with_label_values(&[stream_name, "json", tenant_id]) .set(stats.current_stats.events as i64); EVENTS_INGESTED_SIZE - .with_label_values(&[stream_name, "json"]) + .with_label_values(&[stream_name, "json", tenant_id]) .set(stats.current_stats.ingestion as i64); STORAGE_SIZE - .with_label_values(&["data", stream_name, "parquet"]) + .with_label_values(&["data", stream_name, "parquet", tenant_id]) .set(stats.current_stats.storage as i64); EVENTS_DELETED - .with_label_values(&[stream_name, "json"]) + .with_label_values(&[stream_name, "json", tenant_id]) .set(stats.deleted_stats.events as i64); EVENTS_DELETED_SIZE - .with_label_values(&[stream_name, "json"]) + .with_label_values(&[stream_name, "json", tenant_id]) .set(stats.deleted_stats.ingestion as i64); DELETED_EVENTS_STORAGE_SIZE - .with_label_values(&["data", stream_name, "parquet"]) + .with_label_values(&["data", stream_name, "parquet", tenant_id]) .set(stats.deleted_stats.storage as i64); LIFETIME_EVENTS_INGESTED - .with_label_values(&[stream_name, "json"]) + .with_label_values(&[stream_name, "json", tenant_id]) .set(stats.lifetime_stats.events as i64); LIFETIME_EVENTS_INGESTED_SIZE - .with_label_values(&[stream_name, "json"]) + .with_label_values(&[stream_name, "json", tenant_id]) .set(stats.lifetime_stats.ingestion as i64); LIFETIME_EVENTS_STORAGE_SIZE - .with_label_values(&["data", stream_name, "parquet"]) + .with_label_values(&["data", stream_name, "parquet", tenant_id]) .set(stats.lifetime_stats.storage as i64); } // Helper functions for tracking billing metrics -pub fn increment_events_ingested_by_date(count: u64, date: &str) { +pub fn increment_events_ingested_by_date(count: u64, date: &str, tenant_id: &str) { TOTAL_EVENTS_INGESTED_BY_DATE - .with_label_values(&[date]) + .with_label_values(&[date, tenant_id]) .inc_by(count); } @@ -635,90 +635,121 @@ pub fn increment_events_ingested_size_by_date( size: u64, date: &str, telemetry_type: TelemetryType, + tenant_id: &str, ) { TOTAL_EVENTS_INGESTED_SIZE_BY_DATE - .with_label_values(&[date]) + .with_label_values(&[date, tenant_id]) .inc_by(size); match telemetry_type { TelemetryType::Logs | TelemetryType::Events => { TOTAL_LOGS_COLLECTED_SIZE_BY_DATE - .with_label_values(&["all", date]) + .with_label_values(&["all", date, tenant_id]) .inc_by(size); } TelemetryType::Metrics => { TOTAL_METRICS_COLLECTED_SIZE_BY_DATE - .with_label_values(&["all", date]) + .with_label_values(&["all", date, tenant_id]) .inc_by(size); } TelemetryType::Traces => { TOTAL_TRACES_COLLECTED_SIZE_BY_DATE - .with_label_values(&["all", date]) + .with_label_values(&["all", date, tenant_id]) .inc_by(size); } } } -pub fn increment_parquets_stored_by_date(date: &str) { +pub fn increment_parquets_stored_by_date(date: &str, tenant_id: &str) { TOTAL_PARQUETS_STORED_BY_DATE - .with_label_values(&[date]) + .with_label_values(&[date, tenant_id]) .inc(); } -pub fn increment_parquets_stored_size_by_date(size: u64, date: &str) { +pub fn increment_parquets_stored_size_by_date(size: u64, date: &str, tenant_id: &str) { TOTAL_PARQUETS_STORED_SIZE_BY_DATE - .with_label_values(&[date]) + .with_label_values(&[date, tenant_id]) .inc_by(size); } -pub fn increment_query_calls_by_date(date: &str) { - TOTAL_QUERY_CALLS_BY_DATE.with_label_values(&[date]).inc(); +pub fn increment_query_calls_by_date(date: &str, tenant_id: &str) { + TOTAL_QUERY_CALLS_BY_DATE + .with_label_values(&[date, tenant_id]) + .inc(); } -pub fn increment_files_scanned_in_query_by_date(count: u64, date: &str) { +pub fn increment_files_scanned_in_query_by_date(count: u64, date: &str, tenant_id: &str) { TOTAL_FILES_SCANNED_IN_QUERY_BY_DATE - .with_label_values(&[date]) + .with_label_values(&[date, tenant_id]) .inc_by(count); } -pub fn increment_bytes_scanned_in_query_by_date(bytes: u64, date: &str) { +pub fn increment_bytes_scanned_in_query_by_date(bytes: u64, date: &str, tenant_id: &str) { TOTAL_BYTES_SCANNED_IN_QUERY_BY_DATE - .with_label_values(&[date]) + .with_label_values(&[date, tenant_id]) .inc_by(bytes); } -pub fn increment_object_store_calls_by_date(method: &str, date: &str) { +pub fn increment_object_store_calls_by_date(method: &str, date: &str, tenant_id: &str) { TOTAL_OBJECT_STORE_CALLS_BY_DATE - .with_label_values(&[method, date]) + .with_label_values(&[method, date, tenant_id]) .inc(); } -pub fn increment_files_scanned_in_object_store_calls_by_date(method: &str, count: u64, date: &str) { +pub fn increment_files_scanned_in_object_store_calls_by_date( + method: &str, + count: u64, + date: &str, + tenant_id: &str, +) { TOTAL_FILES_SCANNED_IN_OBJECT_STORE_CALLS_BY_DATE - .with_label_values(&[method, date]) + .with_label_values(&[method, date, tenant_id]) .inc_by(count); } -pub fn increment_bytes_scanned_in_object_store_calls_by_date(method: &str, bytes: u64, date: &str) { +pub fn increment_bytes_scanned_in_object_store_calls_by_date( + method: &str, + bytes: u64, + date: &str, + tenant_id: &str, +) { TOTAL_BYTES_SCANNED_IN_OBJECT_STORE_CALLS_BY_DATE - .with_label_values(&[method, date]) + .with_label_values(&[method, date, tenant_id]) .inc_by(bytes); } -pub fn increment_input_llm_tokens_by_date(provider: &str, model: &str, tokens: u64, date: &str) { +pub fn increment_input_llm_tokens_by_date( + provider: &str, + model: &str, + tokens: u64, + date: &str, + tenant_id: &str, +) { TOTAL_INPUT_LLM_TOKENS_BY_DATE - .with_label_values(&[provider, model, date]) + .with_label_values(&[provider, model, date, tenant_id]) .inc_by(tokens); } -pub fn increment_output_llm_tokens_by_date(provider: &str, model: &str, tokens: u64, date: &str) { +pub fn increment_output_llm_tokens_by_date( + provider: &str, + model: &str, + tokens: u64, + date: &str, + tenant_id: &str, +) { TOTAL_OUTPUT_LLM_TOKENS_BY_DATE - .with_label_values(&[provider, model, date]) + .with_label_values(&[provider, model, date, tenant_id]) .inc_by(tokens); } -pub fn increment_cached_llm_tokens_by_date(provider: &str, model: &str, tokens: u64, date: &str) { +pub fn increment_cached_llm_tokens_by_date( + provider: &str, + model: &str, + tokens: u64, + date: &str, + tenant_id: &str, +) { TOTAL_CACHED_LLM_TOKENS_BY_DATE - .with_label_values(&[provider, model, date]) + .with_label_values(&[provider, model, date, tenant_id]) .inc_by(tokens); } @@ -727,27 +758,28 @@ pub fn increment_reasoning_llm_tokens_by_date( model: &str, tokens: u64, date: &str, + tenant_id: &str, ) { TOTAL_REASONING_LLM_TOKENS_BY_DATE - .with_label_values(&[provider, model, date]) + .with_label_values(&[provider, model, date, tenant_id]) .inc_by(tokens); } -pub fn increment_metrics_collected_by_date(count: u64, date: &str) { +pub fn increment_metrics_collected_by_date(count: u64, date: &str, tenant_id: &str) { TOTAL_METRICS_COLLECTED_BY_DATE - .with_label_values(&["all", date]) + .with_label_values(&["all", date, tenant_id]) .inc_by(count); } -pub fn increment_logs_collected_by_date(count: u64, date: &str) { +pub fn increment_logs_collected_by_date(count: u64, date: &str, tenant_id: &str) { TOTAL_LOGS_COLLECTED_BY_DATE - .with_label_values(&["all", date]) + .with_label_values(&["all", date, tenant_id]) .inc_by(count); } -pub fn increment_traces_collected_by_date(count: u64, date: &str) { +pub fn increment_traces_collected_by_date(count: u64, date: &str, tenant_id: &str) { TOTAL_TRACES_COLLECTED_BY_DATE - .with_label_values(&["all", date]) + .with_label_values(&["all", date, tenant_id]) .inc_by(count); } diff --git a/src/migration/mod.rs b/src/migration/mod.rs index 2e4d1a394..da66ace08 100644 --- a/src/migration/mod.rs +++ b/src/migration/mod.rs @@ -34,7 +34,7 @@ use crate::{ metadata::{LogStreamMetadata, load_daily_metrics, update_data_type_time_partition}, metrics::fetch_stats_from_storage, option::Mode, - parseable::{PARSEABLE, Parseable}, + parseable::{DEFAULT_TENANT, PARSEABLE, Parseable}, storage::{ObjectStorage, ObjectStoreFormat, PARSEABLE_METADATA_FILE_NAME, StorageMetadata}, }; @@ -50,6 +50,7 @@ fn get_version(metadata: &serde_json::Value) -> Option<&str> { pub async fn run_metadata_migration( config: &Parseable, parseable_json: &mut Option, + tenant_id: &Option, ) -> anyhow::Result<()> { let mut storage_metadata: Option = None; if parseable_json.is_some() { @@ -69,7 +70,7 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(metadata).await?; + put_remote_metadata(metadata, tenant_id).await?; } Some("v2") => { let mut metadata = metadata_migration::v2_v3(storage_metadata); @@ -79,7 +80,7 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(metadata).await?; + put_remote_metadata(metadata, tenant_id).await?; } Some("v3") => { let mut metadata = metadata_migration::v3_v4(storage_metadata); @@ -88,7 +89,7 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(metadata).await?; + put_remote_metadata(metadata, tenant_id).await?; } Some("v4") => { let mut metadata = metadata_migration::v4_v5(storage_metadata); @@ -96,53 +97,57 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(metadata).await?; + put_remote_metadata(metadata, tenant_id).await?; } Some("v5") => { let metadata = metadata_migration::v5_v6(storage_metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(metadata).await?; + put_remote_metadata(metadata, tenant_id).await?; } _ => { let metadata = metadata_migration::remove_querier_metadata(storage_metadata); - put_remote_metadata(metadata).await?; + put_remote_metadata(metadata, tenant_id).await?; } } } // if staging metadata is none do nothing if let Some(staging_metadata) = staging_metadata { - migrate_staging(config, staging_metadata)?; + migrate_staging(config, staging_metadata, tenant_id)?; } Ok(()) } -fn migrate_staging(config: &Parseable, staging_metadata: Value) -> anyhow::Result<()> { +fn migrate_staging( + config: &Parseable, + staging_metadata: Value, + tenant_id: &Option, +) -> anyhow::Result<()> { match get_version(&staging_metadata) { Some("v1") => { let mut metadata = metadata_migration::v1_v3(staging_metadata); metadata = metadata_migration::v3_v4(metadata); - put_staging_metadata(config, &metadata)?; + put_staging_metadata(config, &metadata, tenant_id)?; } Some("v2") => { let mut metadata = metadata_migration::v2_v3(staging_metadata); metadata = metadata_migration::v3_v4(metadata); - put_staging_metadata(config, &metadata)?; + put_staging_metadata(config, &metadata, tenant_id)?; } Some("v3") => { let metadata = metadata_migration::v3_v4(staging_metadata); - put_staging_metadata(config, &metadata)?; + put_staging_metadata(config, &metadata, tenant_id)?; } Some("v4") => { let metadata = metadata_migration::v4_v5(staging_metadata); let metadata = metadata_migration::v5_v6(metadata); - put_staging_metadata(config, &metadata)?; + put_staging_metadata(config, &metadata, tenant_id)?; } Some("v5") => { let metadata = metadata_migration::v5_v6(staging_metadata); - put_staging_metadata(config, &metadata)?; + put_staging_metadata(config, &metadata, tenant_id)?; } _ => (), } @@ -153,31 +158,45 @@ fn migrate_staging(config: &Parseable, staging_metadata: Value) -> anyhow::Resul pub async fn run_migration(config: &Parseable) -> anyhow::Result<()> { let storage = config.storage.get_object_store(); - // Get all stream names - let stream_names = PARSEABLE.metastore.list_streams().await?; - - // Create futures for each stream migration - let futures = stream_names.into_iter().map(|stream_name| { - let storage = storage.clone(); - async move { - match migration_stream(&stream_name, &*storage).await { - Ok(Some(metadata)) => { - // Apply the metadata update - config - .get_or_create_stream(&stream_name) - .set_metadata(metadata) - .await; - Ok(()) - } - Ok(None) => Ok(()), - Err(e) => { - // Optionally log error but continue with other streams - warn!("Error migrating stream {}: {:?}", stream_name, e); - Err(e) + let tenants = if let Some(tenants) = PARSEABLE.list_tenants() { + tenants.into_iter().map(|t| Some(t)).collect() + } else { + vec![None] + }; + let mut futures = Vec::new(); + + for tenant_id in tenants { + // Get all stream names + let stream_names = PARSEABLE.metastore.list_streams(&tenant_id).await?; + + // Create futures for each stream migration + let f = stream_names.into_iter().map(|stream_name| { + let storage = storage.clone(); + let id = tenant_id.clone(); + async move { + match migration_stream(&stream_name, &*storage, &id).await { + Ok(Some(metadata)) => { + // Apply the metadata update + config + .get_or_create_stream(&stream_name, &id) + .set_metadata(metadata) + .await; + Ok(()) + } + Ok(None) => Ok(()), + Err(e) => { + // Optionally log error but continue with other streams + warn!( + "Error migrating stream {}: tenant: {:?} {:?}", + stream_name, id, e + ); + Err(e) + } } } - } - }); + }); + futures.extend(f); + } // Execute all migrations concurrently let results = futures::future::join_all(futures).await; @@ -199,11 +218,14 @@ pub async fn run_migration(config: &Parseable) -> anyhow::Result<()> { async fn migration_stream( stream: &str, storage: &dyn ObjectStorage, + tenant_id: &Option, ) -> anyhow::Result> { let mut arrow_schema: Schema = Schema::empty(); - let schema = storage.create_schema_from_metastore(stream).await?; - let stream_metadata = fetch_or_create_stream_metadata(stream, storage).await?; + let schema = storage + .create_schema_from_metastore(stream, tenant_id) + .await?; + let stream_metadata = fetch_or_create_stream_metadata(stream, storage, tenant_id).await?; let mut stream_meta_found = true; if stream_metadata.is_empty() { @@ -218,7 +240,7 @@ async fn migration_stream( stream_metadata_value = serde_json::from_slice(&stream_metadata).expect("stream.json is valid json"); stream_metadata_value = - migrate_stream_metadata(stream_metadata_value, stream, &schema).await?; + migrate_stream_metadata(stream_metadata_value, stream, &schema, tenant_id).await?; } if arrow_schema.fields().is_empty() { @@ -226,26 +248,32 @@ async fn migration_stream( } let metadata = - setup_logstream_metadata(stream, &mut arrow_schema, stream_metadata_value).await?; + setup_logstream_metadata(stream, &mut arrow_schema, stream_metadata_value, tenant_id) + .await?; Ok(Some(metadata)) } async fn fetch_or_create_stream_metadata( stream: &str, storage: &dyn ObjectStorage, + tenant_id: &Option, ) -> anyhow::Result { - if let Ok(stream_metadata) = PARSEABLE.metastore.get_stream_json(stream, false).await { + if let Ok(stream_metadata) = PARSEABLE + .metastore + .get_stream_json(stream, false, tenant_id) + .await + { Ok(stream_metadata) } else { let querier_stream = storage - .create_stream_from_querier(stream) + .create_stream_from_querier(stream, tenant_id) .await .unwrap_or_default(); if !querier_stream.is_empty() { Ok(querier_stream) } else { Ok(storage - .create_stream_from_ingestor(stream) + .create_stream_from_ingestor(stream, tenant_id) .await .unwrap_or_default()) } @@ -256,6 +284,7 @@ async fn migrate_stream_metadata( mut stream_metadata_value: Value, stream: &str, schema: &Bytes, + tenant_id: &Option, ) -> anyhow::Result { let version = stream_metadata_value .as_object() @@ -273,12 +302,15 @@ async fn migrate_stream_metadata( serde_json::from_value(stream_metadata_value.clone())?; PARSEABLE .metastore - .put_stream_json(&stream_json, stream) + .put_stream_json(&stream_json, stream, tenant_id) .await?; let schema = serde_json::from_slice(schema).ok(); let arrow_schema = schema_migration::v1_v4(schema)?; - PARSEABLE.metastore.put_schema(arrow_schema, stream).await?; + PARSEABLE + .metastore + .put_schema(arrow_schema, stream, tenant_id) + .await?; } Some("v2") => { stream_metadata_value = stream_metadata_migration::v2_v4(stream_metadata_value); @@ -290,12 +322,15 @@ async fn migrate_stream_metadata( serde_json::from_value(stream_metadata_value.clone())?; PARSEABLE .metastore - .put_stream_json(&stream_json, stream) + .put_stream_json(&stream_json, stream, tenant_id) .await?; let schema = serde_json::from_slice(schema)?; let arrow_schema = schema_migration::v2_v4(schema)?; - PARSEABLE.metastore.put_schema(arrow_schema, stream).await?; + PARSEABLE + .metastore + .put_schema(arrow_schema, stream, tenant_id) + .await?; } Some("v3") => { stream_metadata_value = stream_metadata_migration::v3_v4(stream_metadata_value); @@ -307,7 +342,7 @@ async fn migrate_stream_metadata( serde_json::from_value(stream_metadata_value.clone())?; PARSEABLE .metastore - .put_stream_json(&stream_json, stream) + .put_stream_json(&stream_json, stream, tenant_id) .await?; } Some("v4") => { @@ -319,7 +354,7 @@ async fn migrate_stream_metadata( serde_json::from_value(stream_metadata_value.clone())?; PARSEABLE .metastore - .put_stream_json(&stream_json, stream) + .put_stream_json(&stream_json, stream, tenant_id) .await?; } Some("v5") => { @@ -329,7 +364,7 @@ async fn migrate_stream_metadata( serde_json::from_value(stream_metadata_value.clone())?; PARSEABLE .metastore - .put_stream_json(&stream_json, stream) + .put_stream_json(&stream_json, stream, tenant_id) .await?; } Some("v6") => { @@ -338,7 +373,7 @@ async fn migrate_stream_metadata( serde_json::from_value(stream_metadata_value.clone())?; PARSEABLE .metastore - .put_stream_json(&stream_json, stream) + .put_stream_json(&stream_json, stream, tenant_id) .await?; } _ => { @@ -354,6 +389,7 @@ async fn setup_logstream_metadata( stream: &str, arrow_schema: &mut Schema, stream_metadata_value: Value, + tenant_id: &Option, ) -> anyhow::Result { let ObjectStoreFormat { schema_version, @@ -377,13 +413,22 @@ async fn setup_logstream_metadata( update_data_type_time_partition(arrow_schema, time_partition.as_ref()).await?; PARSEABLE .metastore - .put_schema(arrow_schema.clone(), stream) + .put_schema(arrow_schema.clone(), stream, tenant_id) .await?; - fetch_stats_from_storage(stream, stats).await; - load_daily_metrics(&snapshot.manifest_list, stream); + fetch_stats_from_storage( + stream, + stats, + tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ) + .await; + load_daily_metrics( + &snapshot.manifest_list, + stream, + tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ); let schema = PARSEABLE - .get_or_create_stream(stream) + .get_or_create_stream(stream, tenant_id) .updated_schema(arrow_schema.clone()); let schema = HashMap::from_iter( schema @@ -434,11 +479,14 @@ pub fn get_staging_metadata(config: &Parseable) -> anyhow::Result anyhow::Result<()> { +pub async fn put_remote_metadata( + metadata: serde_json::Value, + tenant_id: &Option, +) -> anyhow::Result<()> { let metadata: StorageMetadata = serde_json::from_value(metadata)?; PARSEABLE .metastore - .put_parseable_metadata(&metadata) + .put_parseable_metadata(&metadata, tenant_id) .await?; Ok(()) } @@ -446,8 +494,17 @@ pub async fn put_remote_metadata(metadata: serde_json::Value) -> anyhow::Result< pub fn put_staging_metadata( config: &Parseable, metadata: &serde_json::Value, + tenant_id: &Option, ) -> anyhow::Result<()> { - let path = config.options.staging_dir().join(".parseable.json"); + let path = if let Some(tenant) = tenant_id.as_ref() { + config + .options + .staging_dir() + .join(tenant) + .join(".parseable.json") + } else { + config.options.staging_dir().join(".parseable.json") + }; let mut file = OpenOptions::new() .create(true) .truncate(true) diff --git a/src/otel/logs.rs b/src/otel/logs.rs index ae1abeb4e..84f7b61b5 100644 --- a/src/otel/logs.rs +++ b/src/otel/logs.rs @@ -120,7 +120,7 @@ pub fn flatten_log_record(log_record: &LogRecord) -> Map { /// this function flattens the `ScopeLogs` object /// and returns a `Vec` of `Map` of the flattened json -fn flatten_scope_log(scope_log: &ScopeLogs) -> Vec> { +fn flatten_scope_log(scope_log: &ScopeLogs, tenant_id: &str) -> Vec> { let mut vec_scope_log_json = Vec::new(); let mut scope_log_json = Map::new(); if let Some(scope) = &scope_log.scope { @@ -148,7 +148,7 @@ fn flatten_scope_log(scope_log: &ScopeLogs) -> Vec> { } let date = chrono::Utc::now().date_naive().to_string(); - increment_logs_collected_by_date(scope_log.log_records.len() as u64, &date); + increment_logs_collected_by_date(scope_log.log_records.len() as u64, &date, tenant_id); vec_scope_log_json } @@ -159,6 +159,7 @@ fn process_resource_logs( get_resource: fn(&T) -> Option<&opentelemetry_proto::tonic::resource::v1::Resource>, get_scope_logs: fn(&T) -> &[ScopeLogs], get_schema_url: fn(&T) -> &str, + tenant_id: &str, ) -> Vec where T: std::fmt::Debug, @@ -181,7 +182,7 @@ where let scope_logs = get_scope_logs(resource_log); for scope_log in scope_logs { - vec_resource_logs_json.extend(flatten_scope_log(scope_log)); + vec_resource_logs_json.extend(flatten_scope_log(scope_log, tenant_id)); } resource_log_json.insert( @@ -198,22 +199,24 @@ where vec_otel_json } -pub fn flatten_otel_protobuf(message: &ExportLogsServiceRequest) -> Vec { +pub fn flatten_otel_protobuf(message: &ExportLogsServiceRequest, tenant_id: &str) -> Vec { process_resource_logs( &message.resource_logs, |record| record.resource.as_ref(), |record| &record.scope_logs, |record| &record.schema_url, + tenant_id, ) } /// this function performs the custom flattening of the otel logs /// and returns a `Vec` of `Value::Object` of the flattened json -pub fn flatten_otel_logs(message: &LogsData) -> Vec { +pub fn flatten_otel_logs(message: &LogsData, tenant_id: &str) -> Vec { process_resource_logs( &message.resource_logs, |record| record.resource.as_ref(), |record| &record.scope_logs, |record| &record.schema_url, + tenant_id, ) } diff --git a/src/otel/metrics.rs b/src/otel/metrics.rs index 588d23416..0af72ec3a 100644 --- a/src/otel/metrics.rs +++ b/src/otel/metrics.rs @@ -514,6 +514,7 @@ fn process_resource_metrics( get_scope_schema_url: fn(&S) -> &str, get_metrics: fn(&S) -> &[M], get_metric: fn(&M) -> &Metric, + tenant_id: &str, ) -> Vec { let mut vec_otel_json = Vec::new(); @@ -541,7 +542,7 @@ fn process_resource_metrics( } let date = chrono::Utc::now().date_naive().to_string(); - increment_metrics_collected_by_date(metrics.len() as u64, &date); + increment_metrics_collected_by_date(metrics.len() as u64, &date, tenant_id); if let Some(scope) = get_scope(scope_metric) { scope_metrics_json @@ -588,7 +589,7 @@ fn process_resource_metrics( /// this function performs the custom flattening of the otel metrics /// and returns a `Vec` of `Value::Object` of the flattened json -pub fn flatten_otel_metrics(message: MetricsData) -> Vec { +pub fn flatten_otel_metrics(message: MetricsData, tenant_id: &str) -> Vec { process_resource_metrics( &message.resource_metrics, |record| record.resource.as_ref(), @@ -598,11 +599,15 @@ pub fn flatten_otel_metrics(message: MetricsData) -> Vec { |scope_metric| &scope_metric.schema_url, |scope_metric| &scope_metric.metrics, |metric| metric, + tenant_id, ) } /// Flattens OpenTelemetry metrics from protobuf format -pub fn flatten_otel_metrics_protobuf(message: &ExportMetricsServiceRequest) -> Vec { +pub fn flatten_otel_metrics_protobuf( + message: &ExportMetricsServiceRequest, + tenant_id: &str, +) -> Vec { process_resource_metrics( &message.resource_metrics, |record| record.resource.as_ref(), @@ -612,6 +617,7 @@ pub fn flatten_otel_metrics_protobuf(message: &ExportMetricsServiceRequest) -> V |scope_metric| &scope_metric.schema_url, |scope_metric| &scope_metric.metrics, |metric| metric, + tenant_id, ) } diff --git a/src/otel/traces.rs b/src/otel/traces.rs index 34eaed13d..b8fc9ff99 100644 --- a/src/otel/traces.rs +++ b/src/otel/traces.rs @@ -67,7 +67,7 @@ pub const OTEL_TRACES_KNOWN_FIELD_LIST: [&str; 32] = [ ]; /// this function flattens the `ScopeSpans` object /// and returns a `Vec` of `Map` of the flattened json -fn flatten_scope_span(scope_span: &ScopeSpans) -> Vec> { +fn flatten_scope_span(scope_span: &ScopeSpans, tenant_id: &str) -> Vec> { let mut vec_scope_span_json = Vec::new(); let mut scope_span_json = Map::new(); for span in &scope_span.spans { @@ -76,7 +76,7 @@ fn flatten_scope_span(scope_span: &ScopeSpans) -> Vec> { } let date = chrono::Utc::now().date_naive().to_string(); - increment_traces_collected_by_date(scope_span.spans.len() as u64, &date); + increment_traces_collected_by_date(scope_span.spans.len() as u64, &date, tenant_id); if let Some(scope) = &scope_span.scope { scope_span_json.insert("scope_name".to_string(), Value::String(scope.name.clone())); @@ -113,6 +113,7 @@ fn process_resource_spans( get_resource: fn(&T) -> Option<&opentelemetry_proto::tonic::resource::v1::Resource>, get_scope_spans: fn(&T) -> &[ScopeSpans], get_schema_url: fn(&T) -> &str, + tenant_id: &str, ) -> Vec where T: std::fmt::Debug, @@ -134,7 +135,7 @@ where // Process scope spans let mut vec_resource_spans_json = Vec::new(); for scope_span in get_scope_spans(resource_span) { - let scope_span_json = flatten_scope_span(scope_span); + let scope_span_json = flatten_scope_span(scope_span, tenant_id); vec_resource_spans_json.extend(scope_span_json); } @@ -155,23 +156,28 @@ where } /// Flattens OpenTelemetry traces from protobuf format -pub fn flatten_otel_traces_protobuf(message: &ExportTraceServiceRequest) -> Vec { +pub fn flatten_otel_traces_protobuf( + message: &ExportTraceServiceRequest, + tenant_id: &str, +) -> Vec { process_resource_spans( &message.resource_spans, |rs| rs.resource.as_ref(), |rs| &rs.scope_spans, |rs| &rs.schema_url, + tenant_id, ) } /// this function performs the custom flattening of the otel traces event /// and returns a `Vec` of `Value::Object` of the flattened json -pub fn flatten_otel_traces(message: &TracesData) -> Vec { +pub fn flatten_otel_traces(message: &TracesData, tenant_id: &str) -> Vec { process_resource_spans( &message.resource_spans, |rs| rs.resource.as_ref(), |rs| &rs.scope_spans, |rs| &rs.schema_url, + tenant_id, ) } diff --git a/src/parseable/mod.rs b/src/parseable/mod.rs index 088fdeb91..492f42658 100644 --- a/src/parseable/mod.rs +++ b/src/parseable/mod.rs @@ -22,22 +22,27 @@ use std::{ num::NonZeroU32, path::PathBuf, str::FromStr, - sync::Arc, + sync::{Arc, RwLock}, }; -use actix_web::http::StatusCode; -use actix_web::http::header::{CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue}; +use actix_web::http::{ + StatusCode, + header::{CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue}, +}; use arrow_schema::{Field, Schema}; use bytes::Bytes; use chrono::Utc; use clap::{Parser, error::ErrorKind}; use once_cell::sync::Lazy; +use relative_path::RelativePathBuf; pub use staging::StagingError; use streams::StreamRef; pub use streams::{Stream, StreamNotFound, Streams}; use tokio::try_join; use tracing::error; +pub const DEFAULT_TENANT: &str = "DEFAULT_TENANT"; + #[cfg(feature = "kafka")] use crate::connectors::kafka::config::KafkaConfig; use crate::{ @@ -54,7 +59,10 @@ use crate::{ }, ingest::PostError, logstream::error::{CreateStreamError, StreamError}, - modal::{ingest_server::INGESTOR_META, utils::logstream_utils::PutStreamHeaders}, + modal::{ + ingest_server::INGESTOR_META, + utils::{logstream_utils::PutStreamHeaders, rbac_utils::get_metadata}, + }, }, }, metadata::{LogStreamMetadata, SchemaVersion}, @@ -62,11 +70,16 @@ use crate::{ metastore_traits::Metastore, metastores::object_store_metastore::ObjectStoreMetastore, }, option::Mode, + rbac::{ + Users, + map::{mut_roles, mut_users}, + }, static_schema::{StaticSchema, convert_static_schema_to_arrow_schema}, storage::{ ObjectStorageError, ObjectStorageProvider, ObjectStoreFormat, Owner, Permisssion, - StreamType, + StorageMetadata, StreamType, put_remote_metadata, }, + tenants::{Service, TENANT_METADATA}, validator, }; @@ -166,9 +179,12 @@ pub struct Parseable { pub options: Arc, /// Storage engine backing parseable pub storage: Arc, - /// Metadata and staging realting to each logstreams + // /// ObjectStorageProvider for each tenant + // pub tenant_storage: Arc>>, + /// Metadata and staging relating to each logstreams /// A globally shared mapping of `Streams` that parseable is aware of. pub streams: Streams, + pub tenants: Arc>>, /// metastore pub metastore: Arc, /// Used to configure the kafka connector @@ -188,23 +204,31 @@ impl Parseable { storage, metastore, streams: Streams::default(), + tenants: Arc::new(RwLock::new(vec![])), #[cfg(feature = "kafka")] kafka_config, } } /// Try to get the handle of a stream in staging, if it doesn't exist return `None`. - pub fn get_stream(&self, stream_name: &str) -> Result { + pub fn get_stream( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); self.streams .read() .unwrap() - .get(stream_name) + .get(tenant_id) + .ok_or_else(|| StreamNotFound(format!("{stream_name} with tenant {tenant_id}"))) + .map(|v| v.get(stream_name))? .ok_or_else(|| StreamNotFound(stream_name.to_owned())) .cloned() } /// Get the handle to a stream in staging, create one if it doesn't exist - pub fn get_or_create_stream(&self, stream_name: &str) -> StreamRef { - if let Ok(staging) = self.get_stream(stream_name) { + pub fn get_or_create_stream(&self, stream_name: &str, tenant_id: &Option) -> StreamRef { + if let Ok(staging) = self.get_stream(stream_name, tenant_id) { return staging; } @@ -218,19 +242,26 @@ impl Parseable { stream_name.to_owned(), LogStreamMetadata::default(), ingestor_id, + tenant_id, ) } /// Checks for the stream in memory, or loads it from storage when in distributed mode /// return true if stream exists in memory or loaded from storage /// return false if stream doesn't exist in memory and not loaded from storage - pub async fn check_or_load_stream(&self, stream_name: &str) -> bool { - if self.streams.contains(stream_name) { + pub async fn check_or_load_stream( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> bool { + tracing::warn!("check or load streams- {stream_name}, {tenant_id:?}"); + if self.streams.contains(stream_name, tenant_id) { return true; } + tracing::warn!("check or load streams not present, creating {stream_name}, {tenant_id:?}"); (self.options.mode == Mode::Query || self.options.mode == Mode::Prism) && self - .create_stream_and_schema_from_storage(stream_name) + .create_stream_and_schema_from_storage(stream_name, tenant_id) .await .unwrap_or_default() } @@ -242,7 +273,7 @@ impl Parseable { let mut has_parseable_json = false; let parseable_json_result = self .metastore - .get_parseable_metadata() + .get_parseable_metadata(&None) // load the server meta .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -252,16 +283,29 @@ impl Parseable { // Lists all the directories in the root of the bucket/directory // can be a stream (if it contains .stream.json file) or not - let has_dirs = match obj_store.list_dirs().await { + let has_dirs = match obj_store.list_dirs(&None).await { Ok(dirs) => !dirs.is_empty(), Err(_) => false, }; - let has_streams = PARSEABLE.metastore.list_streams().await.is_ok(); if !has_dirs && !has_parseable_json { return Ok(None); } - if has_streams { + let has_stream = if let Some(tenants) = PARSEABLE.list_tenants() { + let mut has_stream = true; + for tenant in tenants { + if let Err(e) = PARSEABLE.metastore.list_streams(&Some(tenant)).await { + tracing::error!("{e}"); + has_stream = false; + break; + }; + } + has_stream + } else { + PARSEABLE.metastore.list_streams(&None).await.is_ok() + }; + + if has_stream { return Ok(parseable_json_result); } @@ -281,6 +325,16 @@ impl Parseable { ))) } + /// this function only gets called from enterprise main + /// If the server has traces of multi-tenancy AND is started with multi-tenant flag, then proceed + /// otherwise fail with error + /// + /// if the server doesn't have traces of multi-tenancy AND is started without the flag, then proceed + /// otherwise fail with error + pub async fn validate_multi_tenancy(&self) -> Result, anyhow::Error> { + self.load_tenants().await + } + pub fn storage(&self) -> Arc { self.storage.clone() } @@ -322,17 +376,21 @@ impl Parseable { pub async fn create_stream_and_schema_from_storage( &self, stream_name: &str, + tenant_id: &Option, ) -> Result { // Proceed to create log stream if it doesn't exist let storage = self.storage.get_object_store(); - let streams = PARSEABLE.metastore.list_streams().await?; + let streams = PARSEABLE.metastore.list_streams(tenant_id).await?; if !streams.contains(stream_name) { + tracing::warn!("returning ok(false) for {stream_name} , {tenant_id:?}"); return Ok(false); } let (stream_metadata_bytes, schema_bytes) = try_join!( - storage.create_stream_from_ingestor(stream_name), - storage.create_schema_from_metastore(stream_name) + storage.create_stream_from_ingestor(stream_name, tenant_id), + storage.create_schema_from_metastore(stream_name, tenant_id) )?; + tracing::warn!(stream_metadata_bytes=?stream_metadata_bytes); + tracing::warn!(schema_bytes=?schema_bytes); let stream_metadata = if stream_metadata_bytes.is_empty() { ObjectStoreFormat::default() @@ -392,77 +450,97 @@ impl Parseable { stream_name.to_owned(), metadata, ingestor_id, + tenant_id, ); // Set hot tier configuration in memory based on stored metadata if let Some(hot_tier_config) = hot_tier { stream.set_hot_tier(Some(hot_tier_config)); } - - //commit schema in memory - commit_schema(stream_name, schema).map_err(|e| StreamError::Anyhow(e.into()))?; + tracing::warn!(commit_schema=?schema); + // commit schema in memory + commit_schema(stream_name, schema, tenant_id).map_err(|e| StreamError::Anyhow(e.into()))?; Ok(true) } pub async fn create_internal_stream_if_not_exists(&self) -> Result<(), StreamError> { let log_source_entry = LogSourceEntry::new(LogSource::Pmeta, HashSet::new()); - let internal_stream_result = self - .create_stream_if_not_exists( - PMETA_STREAM_NAME, - StreamType::Internal, - None, - vec![log_source_entry], - TelemetryType::Logs, - ) - .await; - - let log_source_entry = LogSourceEntry::new(LogSource::Json, HashSet::new()); - let billing_stream_result = self - .create_stream_if_not_exists( - BILLING_METRICS_STREAM_NAME, - StreamType::Internal, - None, - vec![log_source_entry], - TelemetryType::Logs, - ) - .await; + let tenants = if let Some(tenants) = PARSEABLE.list_tenants() { + tenants.into_iter().map(|t| Some(t)).collect() + } else { + vec![None] + }; + for tenant_id in tenants { + let internal_stream_result = self + .create_stream_if_not_exists( + PMETA_STREAM_NAME, + StreamType::Internal, + None, + vec![log_source_entry.clone()], + TelemetryType::Logs, + &tenant_id, + ) + .await; - // Check if either stream creation failed - if let Err(e) = &internal_stream_result { - tracing::error!("Failed to create pmeta stream: {:?}", e); - } - if let Err(e) = &billing_stream_result { - tracing::error!("Failed to create billing stream: {:?}", e); - } + let log_source_entry = LogSourceEntry::new(LogSource::Json, HashSet::new()); + let billing_stream_result = self + .create_stream_if_not_exists( + BILLING_METRICS_STREAM_NAME, + StreamType::Internal, + None, + vec![log_source_entry], + TelemetryType::Logs, + &tenant_id, + ) + .await; - // Check if both streams already existed - if matches!(internal_stream_result, Ok(true)) && matches!(billing_stream_result, Ok(true)) { - return Ok(()); - } + // Check if either stream creation failed + if let Err(e) = &internal_stream_result { + tracing::error!("Failed to create pmeta stream: {:?}", e); + } + if let Err(e) = &billing_stream_result { + tracing::error!("Failed to create billing stream: {:?}", e); + } - let mut header_map = HeaderMap::new(); - header_map.insert( - HeaderName::from_str(STREAM_TYPE_KEY).unwrap(), - HeaderValue::from_str(&StreamType::Internal.to_string()).unwrap(), - ); - header_map.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + // Check if both streams already existed + if matches!(internal_stream_result, Ok(true)) + && matches!(billing_stream_result, Ok(true)) + { + continue; + } - // Sync only the streams that were created successfully - if matches!(internal_stream_result, Ok(false)) - && let Err(e) = - sync_streams_with_ingestors(header_map.clone(), Bytes::new(), PMETA_STREAM_NAME) - .await - { - tracing::error!("Failed to sync pmeta stream with ingestors: {:?}", e); - } + let mut header_map = HeaderMap::new(); + header_map.insert( + HeaderName::from_str(STREAM_TYPE_KEY).unwrap(), + HeaderValue::from_str(&StreamType::Internal.to_string()).unwrap(), + ); + header_map.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + + // Sync only the streams that were created successfully + if matches!(internal_stream_result, Ok(false)) + && let Err(e) = sync_streams_with_ingestors( + header_map.clone(), + Bytes::new(), + PMETA_STREAM_NAME, + &tenant_id, + ) + .await + { + tracing::error!("Failed to sync pmeta stream with ingestors: {:?}", e); + } - if matches!(billing_stream_result, Ok(false)) - && let Err(e) = - sync_streams_with_ingestors(header_map, Bytes::new(), BILLING_METRICS_STREAM_NAME) - .await - { - tracing::error!("Failed to sync billing stream with ingestors: {:?}", e); + if matches!(billing_stream_result, Ok(false)) + && let Err(e) = sync_streams_with_ingestors( + header_map, + Bytes::new(), + BILLING_METRICS_STREAM_NAME, + &tenant_id, + ) + .await + { + tracing::error!("Failed to sync billing stream with ingestors: {:?}", e); + } } Ok(()) @@ -476,8 +554,9 @@ impl Parseable { custom_partition: Option<&String>, log_source: Vec, telemetry_type: TelemetryType, + tenant_id: &Option, ) -> Result { - if self.streams.contains(stream_name) { + if self.streams.contains(stream_name, tenant_id) { return Ok(true); } @@ -487,11 +566,11 @@ impl Parseable { } // For distributed deployments, if the stream not found in memory map, - //check if it exists in the storage - //create stream and schema from storage + // check if it exists in the storage + // create stream and schema from storage if self.options.mode != Mode::All && self - .create_stream_and_schema_from_storage(stream_name) + .create_stream_and_schema_from_storage(stream_name, tenant_id) .await? { return Ok(true); @@ -507,6 +586,7 @@ impl Parseable { stream_type, log_source, telemetry_type, + tenant_id, ) .await?; @@ -517,8 +597,11 @@ impl Parseable { &self, stream_name: &str, log_source: LogSourceEntry, + tenant_id: &Option, ) -> Result<(), StreamError> { - let stream = self.get_stream(stream_name).expect(STREAM_EXISTS); + let stream = self + .get_stream(stream_name, tenant_id) + .expect(STREAM_EXISTS); let mut log_sources = stream.get_log_source(); let mut changed = false; @@ -554,7 +637,7 @@ impl Parseable { let storage = self.storage.get_object_store(); if let Err(err) = storage - .update_log_source_in_stream(stream_name, &log_sources) + .update_log_source_in_stream(stream_name, &log_sources, tenant_id) .await { return Err(StreamError::Storage(err)); @@ -569,6 +652,7 @@ impl Parseable { headers: &HeaderMap, body: &Bytes, stream_name: &str, + tenant_id: &Option, ) -> Result { let PutStreamHeaders { time_partition, @@ -582,14 +666,15 @@ impl Parseable { } = headers.into(); let stream_in_memory_dont_update = - self.streams.contains(stream_name) && !update_stream_flag; + self.streams.contains(stream_name, tenant_id) && !update_stream_flag; + tracing::warn!(stream_in_memory_dont_update=?stream_in_memory_dont_update); // check if stream in storage only if not in memory // for Parseable OSS, create_update_stream is called only from query node // for Parseable Enterprise, create_update_stream is called from prism node - let stream_in_storage_only_for_query_node = !self.streams.contains(stream_name) + let stream_in_storage_only_for_query_node = !self.streams.contains(stream_name, tenant_id) && (self.options.mode == Mode::Query || self.options.mode == Mode::Prism) && self - .create_stream_and_schema_from_storage(stream_name) + .create_stream_and_schema_from_storage(stream_name, tenant_id) .await?; if stream_in_memory_dont_update || stream_in_storage_only_for_query_node { return Err(StreamError::Custom { @@ -609,6 +694,7 @@ impl Parseable { static_schema_flag, &time_partition_limit, custom_partition.as_ref(), + tenant_id, ) .await; } @@ -637,6 +723,7 @@ impl Parseable { custom_partition.as_ref(), static_schema_flag, )?; + tracing::warn!("validated static schema"); let log_source_entry = LogSourceEntry::new(log_source, HashSet::new()); self.create_stream( stream_name.to_string(), @@ -648,9 +735,10 @@ impl Parseable { stream_type, vec![log_source_entry], telemetry_type, + tenant_id, ) .await?; - + tracing::warn!("created stream"); Ok(headers.clone()) } @@ -662,8 +750,9 @@ impl Parseable { static_schema_flag: bool, time_partition_limit: &str, custom_partition: Option<&String>, + tenant_id: &Option, ) -> Result { - if !self.streams.contains(stream_name) { + if !self.streams.contains(stream_name, tenant_id) { return Err(StreamNotFound(stream_name.to_string()).into()); } if !time_partition.is_empty() { @@ -683,11 +772,12 @@ impl Parseable { self.update_time_partition_limit_in_stream( stream_name.to_string(), time_partition_days, + tenant_id, ) .await?; return Ok(headers.clone()); } - self.validate_and_update_custom_partition(stream_name, custom_partition) + self.validate_and_update_custom_partition(stream_name, custom_partition, tenant_id) .await?; Ok(headers.clone()) @@ -705,6 +795,7 @@ impl Parseable { stream_type: StreamType, log_source: Vec, telemetry_type: TelemetryType, + tenant_id: &Option, ) -> Result<(), CreateStreamError> { // fail to proceed if invalid stream name if stream_type != StreamType::Internal { @@ -713,6 +804,7 @@ impl Parseable { // Proceed to create log stream if it doesn't exist let storage = self.storage.get_object_store(); + // update owner and permissions let meta = ObjectStoreFormat { created_at: Utc::now().to_rfc3339(), permissions: vec![Permisssion::new(PARSEABLE.options.username.clone())], @@ -731,11 +823,16 @@ impl Parseable { ..Default::default() }; + tracing::warn!(meta=?meta); match storage - .create_stream(&stream_name, meta, schema.clone()) + .create_stream(&stream_name, meta, schema.clone(), tenant_id) .await { Ok(created_at) => { + tracing::warn!(created_stream_at=?created_at); + tracing::warn!(stream_name=?stream_name); + tracing::warn!(schema=?schema); + tracing::warn!(tenant_id=?tenant_id); let mut static_schema: HashMap> = HashMap::new(); for (field_name, field) in schema @@ -768,6 +865,7 @@ impl Parseable { stream_name.to_owned(), metadata, ingestor_id, + tenant_id, ); } Err(err) => { @@ -781,8 +879,11 @@ impl Parseable { &self, stream_name: &str, custom_partition: Option<&String>, + tenant_id: &Option, ) -> Result<(), StreamError> { - let stream = self.get_stream(stream_name).expect(STREAM_EXISTS); + let stream = self + .get_stream(stream_name, tenant_id) + .expect(STREAM_EXISTS); if stream.get_time_partition().is_some() { return Err(StreamError::Custom { msg: "Cannot set both time partition and custom partition".to_string(), @@ -793,8 +894,12 @@ impl Parseable { validate_custom_partition(custom_partition)?; } - self.update_custom_partition_in_stream(stream_name.to_string(), custom_partition) - .await?; + self.update_custom_partition_in_stream( + stream_name.to_string(), + custom_partition, + tenant_id, + ) + .await?; Ok(()) } @@ -803,16 +908,17 @@ impl Parseable { &self, stream_name: String, time_partition_limit: NonZeroU32, + tenant_id: &Option, ) -> Result<(), CreateStreamError> { let storage = self.storage.get_object_store(); if let Err(err) = storage - .update_time_partition_limit_in_stream(&stream_name, time_partition_limit) + .update_time_partition_limit_in_stream(&stream_name, time_partition_limit, tenant_id) .await { return Err(CreateStreamError::Storage { stream_name, err }); } - if let Ok(stream) = self.get_stream(&stream_name) { + if let Ok(stream) = self.get_stream(&stream_name, tenant_id) { stream.set_time_partition_limit(time_partition_limit) } else { return Err(CreateStreamError::Custom { @@ -828,8 +934,11 @@ impl Parseable { &self, stream_name: String, custom_partition: Option<&String>, + tenant_id: &Option, ) -> Result<(), CreateStreamError> { - let stream = self.get_stream(&stream_name).expect(STREAM_EXISTS); + let stream = self + .get_stream(&stream_name, tenant_id) + .expect(STREAM_EXISTS); let static_schema_flag = stream.get_static_schema_flag(); let time_partition = stream.get_time_partition(); if static_schema_flag { @@ -869,7 +978,7 @@ impl Parseable { } let storage = self.storage.get_object_store(); if let Err(err) = storage - .update_custom_partition_in_stream(&stream_name, custom_partition) + .update_custom_partition_in_stream(&stream_name, custom_partition, tenant_id) .await { return Err(CreateStreamError::Storage { stream_name, err }); @@ -915,10 +1024,11 @@ impl Parseable { &self, stream_name: &str, first_event_at: &str, + tenant_id: &Option, ) -> Option { let storage = self.storage.get_object_store(); if let Err(err) = storage - .update_first_event_in_stream(stream_name, first_event_at) + .update_first_event_in_stream(stream_name, first_event_at, tenant_id) .await { error!( @@ -926,7 +1036,7 @@ impl Parseable { ); } - match self.get_stream(stream_name) { + match self.get_stream(stream_name, tenant_id) { Ok(stream) => stream.set_first_event_at(first_event_at), Err(err) => error!( "Failed to update first_event_at in stream info for stream {stream_name:?}: {err:?}" @@ -940,20 +1050,161 @@ impl Parseable { &self, stream_name: &str, log_source: Vec, + tenant_id: &Option, ) -> Result<(), StreamError> { let storage = self.storage.get_object_store(); if let Err(err) = storage - .update_log_source_in_stream(stream_name, &log_source) + .update_log_source_in_stream(stream_name, &log_source, tenant_id) .await { return Err(StreamError::Storage(err)); } - let stream = self.get_stream(stream_name).expect(STREAM_EXISTS); + let stream = self + .get_stream(stream_name, tenant_id) + .expect(STREAM_EXISTS); stream.set_log_source(log_source); Ok(()) } + + pub fn add_tenant( + &self, + tenant_id: String, + tenant_meta: StorageMetadata, + ) -> Result<(), anyhow::Error> { + if !self.options.is_multi_tenant() { + return Err(anyhow::Error::msg("P_MULTI_TENANCY is set to false")); + } + + if self.tenants.read().unwrap().contains(&tenant_id) { + return Err(anyhow::Error::msg(format!( + "Tenant with id- {tenant_id} already exists" + ))); + } else { + self.tenants.write().unwrap().push(tenant_id.clone()); + TENANT_METADATA.insert_tenant(tenant_id, tenant_meta); + } + + Ok(()) + } + + pub async fn suspend_tenant_service( + &self, + tenant_id: String, + service: Service, + ) -> Result<(), anyhow::Error> { + TENANT_METADATA.suspend_service(&tenant_id, service.clone()); + + // write to disk + let tenant_id = &Some(tenant_id); + let mut meta = get_metadata(tenant_id).await?; + if let Some(sus) = meta.suspended_services.as_mut() { + sus.insert(service); + } else { + meta.suspended_services = Some(HashSet::from_iter([service])); + } + + put_remote_metadata(&meta, tenant_id).await?; + Ok(()) + } + + pub async fn resume_tenant_service( + &self, + tenant_id: String, + service: Service, + ) -> Result<(), anyhow::Error> { + TENANT_METADATA.resume_service(&tenant_id, service.clone()); + + // write to disk + let tenant_id = &Some(tenant_id); + let mut meta = get_metadata(tenant_id).await?; + if let Some(sus) = meta.suspended_services.as_mut() { + sus.remove(&service); + } + + put_remote_metadata(&meta, tenant_id).await?; + Ok(()) + } + + pub fn delete_tenant(&self, tenant_id: &str) -> Result<(), anyhow::Error> { + // let mut metadata = get_metadata(&Some(tenant_id.to_owned())).await?; + // delete users and sessions + let users = mut_users().remove(tenant_id); + if let Some(users) = users { + tracing::warn!("found tenant users, deleting"); + for (userid, user) in users { + // metadata + // .users + // .retain(|u| u.tenant.eq(&Some(tenant_id.to_owned()))); + + Users.delete_user(&userid, &user.tenant); + } + } + + // delete roles + mut_roles().remove(tenant_id); + // if let Some(roles) = mut_roles().remove(tenant_id) { + // for (role, _) in roles { + // // metadata.roles.retain(|r, _| !role.eq(r)); + // } + // } + + // delete resources + + // delete from in-mem + TENANT_METADATA.delete_tenant(&tenant_id); + Ok(()) + } + + async fn load_tenants(&self) -> Result, anyhow::Error> { + let is_multi_tenant = self.options.is_multi_tenant(); + + let obj_store = self.storage().get_object_store(); + let dirs: Vec = obj_store + .list_dirs_relative(&RelativePathBuf::from_iter([""]), &None) + .await? + .into_iter() + .filter(|d| !d.starts_with(".")) + .collect(); + tracing::warn!("multi-tenant dirs- {dirs:?}"); + // validate the possible presence of tenant storage metadata + for tenant_id in dirs.iter() { + if let Some(meta) = PARSEABLE + .metastore + .get_parseable_metadata(&Some(tenant_id.clone())) + .await? + && is_multi_tenant + { + let metadata: StorageMetadata = serde_json::from_slice(&meta)?; + tracing::warn!("inserting tenant data- {metadata:?} for tenant- {tenant_id}"); + TENANT_METADATA.insert_tenant(tenant_id.clone(), metadata.clone()); + } else if !is_multi_tenant { + } else { + return Err(anyhow::Error::msg(format!( + "Found invalid tenant directory with multi-tenant mode- {tenant_id}.\nExiting." + ))); + } + } + + if let Ok(mut t) = self.tenants.write() { + t.extend(dirs); + Ok(Some(())) + } else { + Ok(None) + } + } + + pub fn list_tenants(&self) -> Option> { + if let Ok(t) = self.tenants.as_ref().read() + && !t.is_empty() + { + let t = t.clone(); + Some(t) + } else { + None + } + } } pub fn validate_static_schema( diff --git a/src/parseable/staging/mod.rs b/src/parseable/staging/mod.rs index fddd4d647..f5bd90cee 100644 --- a/src/parseable/staging/mod.rs +++ b/src/parseable/staging/mod.rs @@ -17,6 +17,8 @@ * */ +use crate::{parseable::StreamNotFound, tenants::TenantNotFound}; + pub mod reader; pub mod writer; @@ -30,8 +32,10 @@ pub enum StagingError { ObjectStorage(#[from] std::io::Error), #[error("Could not generate parquet file")] Create, - #[error("Could not find stream {0}")] - NotFound(String), + #[error("{0}")] + StreamNotFound(#[from] StreamNotFound), + #[error("{0}")] + TenantNotFound(#[from] TenantNotFound), // #[error("Metadata Error: {0}")] // Metadata(#[from] MetadataError), } diff --git a/src/parseable/streams.rs b/src/parseable/streams.rs index f341539af..e3c2f5cc1 100644 --- a/src/parseable/streams.rs +++ b/src/parseable/streams.rs @@ -56,6 +56,7 @@ use crate::{ metadata::{LogStreamMetadata, SchemaVersion}, metrics, option::Mode, + parseable::{DEFAULT_TENANT, PARSEABLE}, storage::{StreamType, object_storage::to_bytes, retention::Retention}, utils::time::{Minute, TimeRange}, }; @@ -119,9 +120,10 @@ impl Stream { stream_name: impl Into, metadata: LogStreamMetadata, ingestor_id: Option, + tenant_id: &Option, ) -> StreamRef { let stream_name = stream_name.into(); - let data_path = options.local_stream_data_path(&stream_name); + let data_path = options.local_stream_data_path(&stream_name, tenant_id); Arc::new(Self { stream_name: stream_name.clone(), @@ -408,13 +410,15 @@ impl Stream { return vec![]; }; - dir.flatten() + let dirs = dir + .flatten() .map(|file| file.path()) .filter(|file| { file.extension().is_some_and(|ext| ext.eq("parquet")) && Self::is_valid_parquet_file(file, &self.stream_name) }) - .collect() + .collect(); + dirs } pub fn schema_files(&self) -> Vec { @@ -461,6 +465,7 @@ impl Stream { &self, init_signal: bool, shutdown_signal: bool, + tenant_id: &Option, ) -> Result<(), StagingError> { info!( "Starting arrow_conversion job for stream- {}", @@ -477,6 +482,7 @@ impl Stream { custom_partition.as_ref(), init_signal, shutdown_signal, + tenant_id, )?; // check if there is already a schema file in staging pertaining to this stream // if yes, then merge them and save @@ -578,22 +584,28 @@ impl Stream { props.set_sorting_columns(Some(sorting_column_vec)).build() } - fn reset_staging_metrics(&self) { + fn reset_staging_metrics(&self, tenant_id: &Option) { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); metrics::STAGING_FILES - .with_label_values(&[&self.stream_name]) + .with_label_values(&[&self.stream_name, tenant_str]) .set(0); metrics::STORAGE_SIZE - .with_label_values(&["staging", &self.stream_name, "arrows"]) + .with_label_values(&["staging", &self.stream_name, "arrows", tenant_str]) .set(0); metrics::STORAGE_SIZE - .with_label_values(&["staging", &self.stream_name, "parquet"]) + .with_label_values(&["staging", &self.stream_name, "parquet", tenant_str]) .set(0); } - fn update_staging_metrics(&self, staging_files: &HashMap>) { + fn update_staging_metrics( + &self, + staging_files: &HashMap>, + tenant_id: &Option, + ) { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let total_arrow_files = staging_files.values().map(|v| v.len()).sum::(); metrics::STAGING_FILES - .with_label_values(&[&self.stream_name]) + .with_label_values(&[&self.stream_name, tenant_str]) .set(total_arrow_files as i64); let total_arrow_files_size = staging_files @@ -605,7 +617,7 @@ impl Stream { }) .sum::(); metrics::STORAGE_SIZE - .with_label_values(&["staging", &self.stream_name, "arrows"]) + .with_label_values(&["staging", &self.stream_name, "arrows", tenant_str]) .set(total_arrow_files_size as i64); } @@ -618,6 +630,7 @@ impl Stream { custom_partition: Option<&String>, init_signal: bool, shutdown_signal: bool, + tenant_id: &Option, ) -> Result, StagingError> { let mut schemas = Vec::new(); @@ -626,11 +639,11 @@ impl Stream { let staging_files = self.arrow_files_grouped_exclude_time(now, group_minute, init_signal, shutdown_signal); if staging_files.is_empty() { - self.reset_staging_metrics(); + self.reset_staging_metrics(tenant_id); return Ok(None); } - self.update_staging_metrics(&staging_files); + self.update_staging_metrics(&staging_files, tenant_id); for (parquet_path, arrow_files) in staging_files { let record_reader = MergedReverseRecordReader::try_new(&arrow_files); if record_reader.readers.is_empty() { @@ -642,6 +655,7 @@ impl Stream { let schema = Arc::new(merged_schema); let part_path = parquet_path.with_extension("part"); + tracing::warn!(part_path=?part_path); if !self.write_parquet_part_file( &part_path, record_reader, @@ -655,7 +669,7 @@ impl Stream { if let Err(e) = std::fs::rename(&part_path, &parquet_path) { error!("Couldn't rename part file: {part_path:?} -> {parquet_path:?}, error = {e}"); } else { - self.cleanup_arrow_files_and_dir(&arrow_files); + self.cleanup_arrow_files_and_dir(&arrow_files, tenant_id); } } @@ -740,7 +754,8 @@ impl Stream { } } - fn cleanup_arrow_files_and_dir(&self, arrow_files: &[PathBuf]) { + fn cleanup_arrow_files_and_dir(&self, arrow_files: &[PathBuf], tenant_id: &Option) { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); for (i, file) in arrow_files.iter().enumerate() { match file.metadata() { Ok(meta) => { @@ -752,6 +767,7 @@ impl Stream { "staging", &self.stream_name, ARROW_FILE_EXTENSION, + tenant_str, ]) .sub(file_size as i64); } @@ -999,6 +1015,7 @@ impl Stream { &self, init_signal: bool, shutdown_signal: bool, + tenant_id: &Option, ) -> Result<(), StagingError> { let start_flush = Instant::now(); // Force flush for init or shutdown signals to convert all .part files to .arrows @@ -1013,7 +1030,7 @@ impl Stream { let start_convert = Instant::now(); - self.prepare_parquet(init_signal, shutdown_signal)?; + self.prepare_parquet(init_signal, shutdown_signal, tenant_id)?; trace!( "Converting arrows to parquet on stream ({}) took: {}s", self.stream_name, @@ -1024,8 +1041,11 @@ impl Stream { } } +// #[derive(Deref, DerefMut, Default)] +// pub struct Streams(RwLock>); + #[derive(Deref, DerefMut, Default)] -pub struct Streams(RwLock>); +pub struct Streams(RwLock>>); // PARSEABLE.streams should be updated // 1. During server start up @@ -1042,30 +1062,64 @@ impl Streams { stream_name: String, metadata: LogStreamMetadata, ingestor_id: Option, + tenant_id: &Option, ) -> StreamRef { let mut guard = self.write().expect(LOCK_EXPECT); - if let Some(stream) = guard.get(&stream_name) { + tracing::warn!( + "get_or_create\nstream- {stream_name}\ntenant- {tenant_id:?}\nmetadata- {metadata:?}\noptions- {options:?}" + ); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + + if let Some(tenant_streams) = guard.get(tenant) + && let Some(stream) = tenant_streams.get(&stream_name) + { return stream.clone(); } - let stream = Stream::new(options, &stream_name, metadata, ingestor_id); - guard.insert(stream_name, stream.clone()); - + // if let Some(stream) = guard.get(&stream_name) { + // return stream.clone(); + // } + // guard.insert(stream_name, stream.clone()); + + let stream = Stream::new(options, &stream_name, metadata, ingestor_id, tenant_id); + tracing::warn!("creating new stream- {stream_name}"); + guard + .entry(tenant.to_owned()) + .or_default() + .insert(stream_name, stream.clone()); + tracing::warn!("inserted stream in mem"); stream } /// TODO: validate possibility of stream continuing to exist despite being deleted - pub fn delete(&self, stream_name: &str) { - self.write().expect(LOCK_EXPECT).remove(stream_name); + pub fn delete(&self, stream_name: &str, tenant_id: &Option) { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let mut guard = self.write().expect(LOCK_EXPECT); + if let Some(tenant_streams) = guard.get_mut(tenant_id) { + tenant_streams.remove(stream_name); + } + // self.write().expect(LOCK_EXPECT).remove(stream_name); } - pub fn contains(&self, stream_name: &str) -> bool { - self.read().expect(LOCK_EXPECT).contains_key(stream_name) + pub fn contains(&self, stream_name: &str, tenant_id: &Option) -> bool { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(tenant) = self.read().expect(LOCK_EXPECT).get(tenant_id) { + tenant.contains_key(stream_name) + } else { + tracing::warn!( + "Tenant with id {tenant_id} does not exist! Shouldn't happen (stream- {stream_name})" + ); + false + } } /// Returns the number of logstreams that parseable is aware of pub fn len(&self) -> usize { - self.read().expect(LOCK_EXPECT).len() + self.read() + .expect(LOCK_EXPECT) + .iter() + .map(|map| map.1.len()) + .sum() } /// Returns true if parseable is not aware of any streams @@ -1073,24 +1127,41 @@ impl Streams { self.len() == 0 } - /// Listing of logstream names that parseable is aware of - pub fn list(&self) -> Vec { - self.read() - .expect(LOCK_EXPECT) - .keys() - .map(String::clone) - .collect() + /// Listing of logstream names for a given tenant that parseable is aware of + pub fn list(&self, tenant_id: &Option) -> Vec { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + + let guard = self.read().expect(LOCK_EXPECT); + if let Some(tenant_streams) = guard.get(tenant_id) { + tenant_streams.keys().map(String::clone).collect() + } else { + vec![] + } + + // self.read() + // .expect(LOCK_EXPECT) + // .get(&tenant_id) + // .and_then(|v|v.keys()) + // .map(f) + // .keys() + // .map(String::clone) + // .collect() } - pub fn list_internal_streams(&self) -> Vec { + pub fn list_internal_streams(&self, tenant_id: &Option) -> Vec { let map = self.read().expect(LOCK_EXPECT); - - map.iter() - .filter(|(_, stream)| { - stream.metadata.read().expect(LOCK_EXPECT).stream_type == StreamType::Internal - }) - .map(|(k, _)| k.clone()) - .collect() + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(tenant_streams) = map.get(tenant_id) { + tenant_streams + .iter() + .filter(|(_, stream)| { + stream.metadata.read().expect(LOCK_EXPECT).stream_type == StreamType::Internal + }) + .map(|(k, _)| k.clone()) + .collect() + } else { + vec![] + } } /// Asynchronously flushes arrows and compacts into parquet data on all streams in staging, @@ -1101,14 +1172,26 @@ impl Streams { init_signal: bool, shutdown_signal: bool, ) { - let streams: Vec> = self - .read() - .expect(LOCK_EXPECT) - .values() - .map(Arc::clone) - .collect(); - for stream in streams { - joinset.spawn(async move { stream.flush_and_convert(init_signal, shutdown_signal) }); + let tenants = if let Some(tenants) = PARSEABLE.list_tenants() { + tenants + } else { + vec![DEFAULT_TENANT.to_owned()] + }; + tracing::warn!(flush_and_convert_tenants=?tenants); + tracing::warn!(parseable_streams_tenants=?self.read().unwrap().keys()); + for tenant_id in tenants { + let guard = self.read().expect(LOCK_EXPECT); + let streams: Vec> = if let Some(tenant_streams) = guard.get(&tenant_id) { + tenant_streams.values().map(Arc::clone).collect() + } else { + vec![] + }; + for stream in streams { + let tenant = tenant_id.clone(); + joinset.spawn(async move { + stream.flush_and_convert(init_signal, shutdown_signal, &Some(tenant)) + }); + } } } } diff --git a/src/prism/home/mod.rs b/src/prism/home/mod.rs index d3b0d36b6..cc9fbc8a6 100644 --- a/src/prism/home/mod.rs +++ b/src/prism/home/mod.rs @@ -38,6 +38,7 @@ use crate::{ stats::Stats, storage::{ObjectStorageError, ObjectStoreFormat, StreamType}, users::{dashboards::DASHBOARDS, filters::FILTERS}, + utils::get_tenant_id_from_key, }; type StreamMetadataResponse = Result< @@ -101,13 +102,12 @@ pub async fn generate_home_response( key: &SessionKey, include_internal: bool, ) -> Result { + let tenant_id = &get_tenant_id_from_key(key); // Execute these operations concurrently let (stream_titles_result, alerts_summary_result) = - tokio::join!(get_stream_titles(key), get_alerts_summary(key)); - + tokio::join!(get_stream_titles(key, tenant_id), get_alerts_summary(key)); let stream_titles = stream_titles_result?; let alerts_summary = alerts_summary_result?; - // Generate dates for date-wise stats let mut dates = (0..7) .map(|i| { @@ -122,7 +122,7 @@ pub async fn generate_home_response( // Process stream metadata concurrently let stream_metadata_futures = stream_titles .iter() - .map(|stream| get_stream_metadata(stream.clone())); + .map(|stream| get_stream_metadata(stream.clone(), tenant_id)); let stream_metadata_results: Vec = futures::future::join_all(stream_metadata_futures).await; @@ -216,6 +216,7 @@ fn get_top_5_streams_by_ingestion( async fn get_stream_metadata( stream: String, + tenant_id: &Option, ) -> Result< ( String, @@ -227,9 +228,8 @@ async fn get_stream_metadata( > { let obs = PARSEABLE .metastore - .get_all_stream_jsons(&stream, None) + .get_all_stream_jsons(&stream, None, tenant_id) .await?; - let mut stream_jsons = Vec::new(); for ob in obs { let stream_metadata: ObjectStoreFormat = match serde_json::from_slice(&ob) { @@ -303,12 +303,13 @@ pub async fn generate_home_search_response( query_value: &str, ) -> Result { let mut resources = Vec::new(); + let tenant_id = &get_tenant_id_from_key(key); let (alert_titles, correlation_titles, dashboard_titles, filter_titles, stream_titles) = tokio::join!( get_alert_titles(key, query_value), get_correlation_titles(key, query_value), - get_dashboard_titles(query_value), + get_dashboard_titles(query_value, tenant_id), get_filter_titles(key, query_value), - get_stream_titles(key) + get_stream_titles(key, tenant_id) ); let alerts = alert_titles?; @@ -334,10 +335,13 @@ pub async fn generate_home_search_response( } // Helper functions to split the work -async fn get_stream_titles(key: &SessionKey) -> Result, PrismHomeError> { +async fn get_stream_titles( + key: &SessionKey, + tenant_id: &Option, +) -> Result, PrismHomeError> { let stream_titles: Vec = PARSEABLE .metastore - .list_streams() + .list_streams(tenant_id) .await .map_err(|e| PrismHomeError::Anyhow(anyhow::Error::new(e)))? .into_iter() @@ -411,9 +415,12 @@ async fn get_correlation_titles( Ok(correlations) } -async fn get_dashboard_titles(query_value: &str) -> Result, PrismHomeError> { +async fn get_dashboard_titles( + query_value: &str, + tenant_id: &Option, +) -> Result, PrismHomeError> { let dashboard_titles = DASHBOARDS - .list_dashboards(0) + .list_dashboards(0, tenant_id) .await .iter() .filter_map(|dashboard| { diff --git a/src/prism/logstream/mod.rs b/src/prism/logstream/mod.rs index a6fdc1174..eed2598d7 100644 --- a/src/prism/logstream/mod.rs +++ b/src/prism/logstream/mod.rs @@ -35,13 +35,15 @@ use crate::{ logstream::error::StreamError, query::{QueryError, update_schema_when_distributed}, }, - hottier::HotTierError, - parseable::{PARSEABLE, StreamNotFound}, + hottier::{HotTierError, HotTierManager, StreamHotTier}, + parseable::{DEFAULT_TENANT, PARSEABLE, StreamNotFound}, query::{CountsRequest, CountsResponse, error::ExecuteError}, rbac::{Users, map::SessionKey, role::Action}, stats, storage::{StreamInfo, StreamType, retention::Retention}, - utils::time::TimeParseError, + tenants::TenantNotFound, + utils::{get_tenant_id_from_key, time::TimeParseError}, + validator::error::HotTierValidationError, }; #[derive(Serialize)] @@ -54,20 +56,25 @@ pub struct PrismLogstreamInfo { pub async fn get_prism_logstream_info( stream_name: &str, + tenant_id: &Option, ) -> Result { let (info, schema, stats) = tokio::join!( - get_stream_info_helper(stream_name), - get_stream_schema_helper(stream_name), - get_stats(stream_name), + get_stream_info_helper(stream_name, tenant_id), + get_stream_schema_helper(stream_name, tenant_id), + get_stats(stream_name, tenant_id), ); - + tracing::warn!("starting dataset info"); let info = info?; + tracing::warn!("got info"); let schema = schema?; - let stats = stats?; + tracing::warn!("got schema"); + // let stats = stats?; + let stats = QueriedStats::default(); + tracing::warn!("got FAKE stats"); // get retention let retention = PARSEABLE - .get_stream(stream_name)? + .get_stream(stream_name, tenant_id)? .get_retention() .unwrap_or_default(); @@ -79,14 +86,17 @@ pub async fn get_prism_logstream_info( }) } -async fn get_stream_schema_helper(stream_name: &str) -> Result, StreamError> { +async fn get_stream_schema_helper( + stream_name: &str, + tenant_id: &Option, +) -> Result, StreamError> { // Ensure parseable is aware of stream in distributed mode - if !PARSEABLE.check_or_load_stream(stream_name).await { + if !PARSEABLE.check_or_load_stream(stream_name, tenant_id).await { return Err(StreamNotFound(stream_name.to_owned()).into()); } - let stream = PARSEABLE.get_stream(stream_name)?; - match update_schema_when_distributed(&vec![stream_name.to_owned()]).await { + let stream = PARSEABLE.get_stream(stream_name, tenant_id)?; + match update_schema_when_distributed(&vec![stream_name.to_owned()], tenant_id).await { Ok(_) => { let schema = stream.get_schema(); Ok(schema) @@ -98,15 +108,19 @@ async fn get_stream_schema_helper(stream_name: &str) -> Result, Stre } } -async fn get_stats(stream_name: &str) -> Result { - let stats = stats::get_current_stats(stream_name, "json") +async fn get_stats( + stream_name: &str, + tenant_id: &Option, +) -> Result { + tracing::warn!("starting stats"); + let stats = stats::get_current_stats(stream_name, "json", tenant_id) .ok_or_else(|| StreamNotFound(stream_name.to_owned()))?; let ingestor_stats = if PARSEABLE - .get_stream(stream_name) + .get_stream(stream_name, tenant_id) .is_ok_and(|stream| stream.get_stream_type() == StreamType::UserDefined) { - Some(fetch_stats_from_ingestors(stream_name).await?) + Some(fetch_stats_from_ingestors(stream_name, tenant_id).await?) } else { None }; @@ -143,11 +157,14 @@ async fn get_stats(stream_name: &str) -> Result Result { +pub async fn get_stream_info_helper( + stream_name: &str, + tenant_id: &Option, +) -> Result { // For query mode, if the stream not found in memory map, - //check if it exists in the storage - //create stream and schema from storage - if !PARSEABLE.check_or_load_stream(stream_name).await { + // check if it exists in the storage + // create stream and schema from storage + if !PARSEABLE.check_or_load_stream(stream_name, tenant_id).await { return Err(StreamNotFound(stream_name.to_owned()).into()); } @@ -155,7 +172,7 @@ pub async fn get_stream_info_helper(stream_name: &str) -> Result result, @@ -168,13 +185,8 @@ pub async fn get_stream_info_helper(stream_name: &str) -> Result Result Result, PrismLogstreamError> { + let tenant_id = get_tenant_id_from_key(&key); if self.streams.is_empty() { - self.streams = PARSEABLE.streams.list(); + self.streams = PARSEABLE.streams.list(&tenant_id); } + tracing::warn!(get_datasets_streams=?self.streams); // Process streams concurrently let results = futures::future::join_all( self.streams .iter() - .map(|stream| self.process_stream(stream.clone(), key.clone())), + .map(|stream| self.process_stream(stream.clone(), key.clone(), &tenant_id)), ) .await; @@ -264,20 +278,25 @@ impl PrismDatasetRequest { &self, stream: String, key: SessionKey, + tenant_id: &Option, ) -> Result, PrismLogstreamError> { // Skip unauthorized streams if !self.is_authorized(&stream, &key) { + tracing::warn!("not authorized for datasets"); return Ok(None); } // Skip streams that don't exist - if !PARSEABLE.check_or_load_stream(&stream).await { + if !PARSEABLE.check_or_load_stream(&stream, tenant_id).await { + tracing::warn!("unable to load stream {stream} for tenant {tenant_id:?}"); return Ok(None); } // Process stream data - match get_prism_logstream_info(&stream).await { - Ok(info) => Ok(Some(self.build_dataset_response(stream, info).await?)), + match get_prism_logstream_info(&stream, tenant_id).await { + Ok(info) => Ok(Some( + self.build_dataset_response(stream, info, tenant_id).await?, + )), Err(err) => Err(err), } } @@ -297,21 +316,47 @@ impl PrismDatasetRequest { &self, stream: String, info: PrismLogstreamInfo, + tenant_id: &Option, ) -> Result { - // Get counts - let counts = self.get_counts(&stream).await?; + // Get hot tier info + let hottier = self.get_hot_tier_info(&stream, tenant_id).await?; - Ok(PrismDatasetResponse { + // Get counts + let counts = self.get_counts(&stream, tenant_id).await?; + tracing::warn!("got counts"); + let res = PrismDatasetResponse { stream, info: info.info, schema: info.schema, stats: info.stats, retention: info.retention, counts, - }) + }; + tracing::warn!(prism_logstream_res=?res); + Ok(res) + } + async fn get_hot_tier_info( + &self, + stream: &str, + tenant_id: &Option, + ) -> Result, PrismLogstreamError> { + match HotTierManager::global() { + Some(manager) => match manager.get_hot_tier(stream, tenant_id).await { + Ok(stats) => Ok(Some(stats)), + Err(HotTierError::HotTierValidationError(HotTierValidationError::NotFound(_))) => { + Ok(None) + } + Err(err) => Err(err.into()), + }, + None => Ok(None), + } } - async fn get_counts(&self, stream: &str) -> Result { + async fn get_counts( + &self, + stream: &str, + tenant_id: &Option, + ) -> Result { let count_request = CountsRequest { stream: stream.to_owned(), start_time: "1h".to_owned(), @@ -320,7 +365,7 @@ impl PrismDatasetRequest { conditions: None, }; - let records = count_request.get_bin_density().await?; + let records = count_request.get_bin_density(tenant_id).await?; Ok(CountsResponse { fields: vec!["start_time".into(), "end_time".into(), "count".into()], records, diff --git a/src/query/listing_table_builder.rs b/src/query/listing_table_builder.rs index ecdeae636..0b106e34f 100644 --- a/src/query/listing_table_builder.rs +++ b/src/query/listing_table_builder.rs @@ -98,7 +98,8 @@ impl ListingTableBuilder { // Use storage.list_dirs_relative for all prefixes and flatten results let mut listing = Vec::new(); for prefix in prefixes { - match storage.list_dirs_relative(&prefix).await { + // None because no new data will be created using this method + match storage.list_dirs_relative(&prefix, &None).await { Ok(paths) => { listing.extend(paths.into_iter().map(|p| prefix.join(p).to_string())); } diff --git a/src/query/mod.rs b/src/query/mod.rs index 9f74ead70..da092bcdd 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -50,8 +50,8 @@ use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use std::ops::Bound; use std::pin::Pin; -use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, RwLock}; use std::task::{Context, Poll}; use sysinfo::System; use tokio::runtime::Runtime; @@ -69,12 +69,12 @@ use crate::event::DEFAULT_TIMESTAMP_KEY; use crate::handlers::http::query::QueryError; use crate::metrics::increment_bytes_scanned_in_query_by_date; use crate::option::Mode; -use crate::parseable::PARSEABLE; +use crate::parseable::{DEFAULT_TENANT, PARSEABLE}; use crate::storage::{ObjectStorageProvider, ObjectStoreFormat}; use crate::utils::time::TimeRange; -pub static QUERY_SESSION: Lazy = - Lazy::new(|| Query::create_session_context(PARSEABLE.storage())); +// pub static QUERY_SESSION: Lazy = +// Lazy::new(|| Query::create_session_context(PARSEABLE.storage())); pub static QUERY_SESSION_STATE: Lazy = Lazy::new(|| Query::create_session_state(PARSEABLE.storage())); @@ -83,11 +83,49 @@ pub static QUERY_SESSION_STATE: Lazy = pub static QUERY_RUNTIME: Lazy = Lazy::new(|| Runtime::new().expect("Runtime should be constructible")); +pub static QUERY_SESSION: Lazy = Lazy::new(|| { + let ctx = Query::create_session_context(PARSEABLE.storage()); + InMemorySessionContext { + session_context: Arc::new(RwLock::new(ctx)), + } +}); + +pub struct InMemorySessionContext { + session_context: Arc>, +} + +impl InMemorySessionContext { + pub fn get_ctx(&self) -> SessionContext { + let ctx = self + .session_context + .read() + .expect("SessionContext should be readable"); + ctx.clone() + } + + pub fn add_schema(&self, tenant_id: &str) { + self.session_context + .write() + .expect("SessionContext should be writeable") + .catalog("datafusion") + .expect("Default catalog should be available") + .register_schema( + tenant_id, + Arc::new(GlobalSchemaProvider { + storage: PARSEABLE.storage().get_object_store(), + tenant_id: Some(tenant_id.to_owned()), + }), + ) + .expect("Should be able to register new schema"); + } +} + /// This function executes a query on the dedicated runtime, ensuring that the query is not isolated to a single thread/CPU /// at a time and has access to the entire thread pool, enabling better concurrent processing, and thus quicker results. pub async fn execute( query: Query, is_streaming: bool, + tenant_id: &Option, ) -> Result< ( Either< @@ -115,8 +153,9 @@ pub async fn execute( ), ExecuteError, > { + let id = tenant_id.clone(); QUERY_RUNTIME - .spawn(async move { query.execute(is_streaming).await }) + .spawn(async move { query.execute(is_streaming, &id).await }) .await .expect("The Join should have been successful") } @@ -134,22 +173,54 @@ impl Query { pub fn create_session_context(storage: Arc) -> SessionContext { let state = Self::create_session_state(storage.clone()); - let schema_provider = Arc::new(GlobalSchemaProvider { - storage: storage.get_object_store(), - }); - state + let catalog = state .catalog_list() .catalog(&state.config_options().catalog.default_catalog) - .expect("default catalog is provided by datafusion") - .register_schema( + .expect("default catalog is provided by datafusion"); + + // create one schema for each tenant + if PARSEABLE.options.is_multi_tenant() { + // register multiple schemas + if let Some(tenants) = PARSEABLE.list_tenants() { + for t in tenants.iter() { + let schema_provider = Arc::new(GlobalSchemaProvider { + storage: storage.get_object_store(), + tenant_id: Some(t.clone()), + }); + // tracing::warn!("registering_schema- {schema_provider:?}\nwith tenant- {t}"); + let _ = catalog.register_schema(t, schema_provider); + // tracing::warn!("result=> {r:?}"); + } + } + } else { + // register just one schema + let schema_provider = Arc::new(GlobalSchemaProvider { + storage: storage.get_object_store(), + tenant_id: None, + }); + let _ = catalog.register_schema( &state.config_options().catalog.default_schema, schema_provider, - ) - .unwrap(); + ); + } + + // state + // .catalog_list() + // .catalog(&state.config_options().catalog.default_catalog) + // .expect("default catalog is provided by datafusion") + // .register_schema( + // &state.config_options().catalog.default_schema, + // schema_provider, + // ) + // .unwrap(); SessionContext::new_with_state(state) } + // pub fn add_schema(&self, tenant_id: String, storage: Arc) { + // self. + // } + fn create_session_state(storage: Arc) -> SessionState { let runtime_config = storage .get_datafusion_runtime() @@ -209,6 +280,7 @@ impl Query { pub async fn execute( &self, is_streaming: bool, + tenant_id: &Option, ) -> Result< ( Either< @@ -237,9 +309,10 @@ impl Query { ExecuteError, > { let df = QUERY_SESSION - .execute_logical_plan(self.final_logical_plan()) + .get_ctx() + .execute_logical_plan(self.final_logical_plan(tenant_id)) .await?; - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let fields = df .schema() .fields() @@ -253,12 +326,13 @@ impl Query { } let plan = QUERY_SESSION + .get_ctx() .state() .create_physical_plan(df.logical_plan()) .await?; let results = if !is_streaming { - let task_ctx = QUERY_SESSION.task_ctx(); + let task_ctx = QUERY_SESSION.get_ctx().task_ctx(); let batches = collect_partitioned(plan.clone(), task_ctx.clone()) .await? @@ -270,11 +344,11 @@ impl Query { // Track billing metrics for query scan let current_date = chrono::Utc::now().date_naive().to_string(); - increment_bytes_scanned_in_query_by_date(actual_io_bytes, ¤t_date); + increment_bytes_scanned_in_query_by_date(actual_io_bytes, ¤t_date, tenant); Either::Left(batches) } else { - let task_ctx = QUERY_SESSION.task_ctx(); + let task_ctx = QUERY_SESSION.get_ctx().task_ctx(); let output_partitions = plan.output_partitioning().partition_count(); @@ -286,7 +360,8 @@ impl Query { let streams = execute_stream_partitioned(plan.clone(), task_ctx.clone())? .into_iter() .map(|s| { - let wrapped = PartitionedMetricMonitor::new(s, monitor_state.clone()); + let wrapped = + PartitionedMetricMonitor::new(s, monitor_state.clone(), tenant_id.clone()); Box::pin(wrapped) as SendableRecordBatchStream }) .collect_vec(); @@ -300,16 +375,20 @@ impl Query { Ok((results, fields)) } - pub async fn get_dataframe(&self) -> Result { + pub async fn get_dataframe( + &self, + tenant_id: &Option, + ) -> Result { let df = QUERY_SESSION - .execute_logical_plan(self.final_logical_plan()) + .get_ctx() + .execute_logical_plan(self.final_logical_plan(tenant_id)) .await?; Ok(df) } /// return logical plan with all time filters applied through - fn final_logical_plan(&self) -> LogicalPlan { + fn final_logical_plan(&self, tenant_id: &Option) -> LogicalPlan { // see https://github.com/apache/arrow-datafusion/pull/8400 // this can be eliminated in later version of datafusion but with slight caveat // transform cannot modify stringified plans by itself @@ -321,6 +400,7 @@ impl Query { plan.plan.as_ref().clone(), self.time_range.start.naive_utc(), self.time_range.end.naive_utc(), + tenant_id, ); LogicalPlan::Explain(Explain { explain_format: plan.explain_format, @@ -340,6 +420,7 @@ impl Query { x, self.time_range.start.naive_utc(), self.time_range.end.naive_utc(), + tenant_id, ) .data } @@ -450,16 +531,18 @@ impl CountsRequest { /// This function is supposed to read maninfest files for the given stream, /// get the sum of `num_rows` between the `startTime` and `endTime`, /// divide that by number of bins and return in a manner acceptable for the console - pub async fn get_bin_density(&self) -> Result, QueryError> { + pub async fn get_bin_density( + &self, + tenant_id: &Option, + ) -> Result, QueryError> { let time_partition = PARSEABLE - .get_stream(&self.stream) + .get_stream(&self.stream, tenant_id) .map_err(|err| anyhow::Error::msg(err.to_string()))? .get_time_partition() .unwrap_or_else(|| DEFAULT_TIMESTAMP_KEY.to_owned()); - // get time range let time_range = TimeRange::parse_human_time(&self.start_time, &self.end_time)?; - let all_manifest_files = get_manifest_list(&self.stream, &time_range).await?; + let all_manifest_files = get_manifest_list(&self.stream, &time_range, tenant_id).await?; // get bounds let counts = self.get_bounds(&time_range); @@ -644,12 +727,13 @@ pub fn resolve_stream_names(sql: &str) -> Result, anyhow::Error> { pub async fn get_manifest_list( stream_name: &str, time_range: &TimeRange, + tenant_id: &Option, ) -> Result, QueryError> { // get object store let object_store_format: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await?, )?; @@ -660,7 +744,7 @@ pub async fn get_manifest_list( if PARSEABLE.options.mode == Mode::Query || PARSEABLE.options.mode == Mode::Prism { let obs = PARSEABLE .metastore - .get_all_stream_jsons(stream_name, None) + .get_all_stream_jsons(stream_name, None, tenant_id) .await; if let Ok(obs) = obs { for ob in obs { @@ -691,6 +775,7 @@ pub async fn get_manifest_list( manifest_item.time_lower_bound, manifest_item.time_upper_bound, Some(manifest_item.manifest_path.clone()), + tenant_id, ) .await?; let manifest = manifest_opt.ok_or_else(|| { @@ -711,13 +796,14 @@ fn transform( plan: LogicalPlan, start_time: NaiveDateTime, end_time: NaiveDateTime, + tenant_id: &Option, ) -> Transformed { plan.transform_up_with_subqueries(&|plan| { match plan { LogicalPlan::TableScan(table) => { // Get the specific time partition for this stream let time_partition = PARSEABLE - .get_stream(&table.table_name.to_string()) + .get_stream(&table.table_name.to_string(), tenant_id) .ok() .and_then(|stream| stream.get_time_partition()); @@ -863,16 +949,23 @@ pub struct PartitionedMetricMonitor { inner: SendableRecordBatchStream, /// State of the streams state: Arc, - // Ensure we only emit metrics once even if polled after completion/error + /// Ensure we only emit metrics once even if polled after completion/error is_finished: bool, + /// tenant id + tenant_id: Option, } impl PartitionedMetricMonitor { - fn new(inner: SendableRecordBatchStream, state: Arc) -> Self { + fn new( + inner: SendableRecordBatchStream, + state: Arc, + tenant_id: Option, + ) -> Self { Self { inner, state, is_finished: false, + tenant_id, } } } @@ -922,7 +1015,11 @@ impl PartitionedMetricMonitor { if prev_count == 1 { let bytes = get_total_bytes_scanned(&self.state.plan); let current_date = chrono::Utc::now().date_naive().to_string(); - increment_bytes_scanned_in_query_by_date(bytes, ¤t_date); + increment_bytes_scanned_in_query_by_date( + bytes, + ¤t_date, + self.tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ); } } } diff --git a/src/query/stream_schema_provider.rs b/src/query/stream_schema_provider.rs index d6c6eb1ff..ca2a915c9 100644 --- a/src/query/stream_schema_provider.rs +++ b/src/query/stream_schema_provider.rs @@ -58,7 +58,7 @@ use crate::{ hottier::HotTierManager, metrics::{QUERY_CACHE_HIT, increment_files_scanned_in_query_by_date}, option::Mode, - parseable::{PARSEABLE, STREAM_EXISTS}, + parseable::{DEFAULT_TENANT, PARSEABLE, STREAM_EXISTS}, storage::{ObjectStorage, ObjectStoreFormat}, }; @@ -68,6 +68,7 @@ use super::listing_table_builder::ListingTableBuilder; #[derive(Debug)] pub struct GlobalSchemaProvider { pub storage: Arc, + pub tenant_id: Option, } #[async_trait::async_trait] @@ -77,17 +78,18 @@ impl SchemaProvider for GlobalSchemaProvider { } fn table_names(&self) -> Vec { - PARSEABLE.streams.list() + PARSEABLE.streams.list(&self.tenant_id) } async fn table(&self, name: &str) -> DataFusionResult>> { if self.table_exist(name) { Ok(Some(Arc::new(StandardTableProvider { schema: PARSEABLE - .get_stream(name) + .get_stream(name, &self.tenant_id) .expect(STREAM_EXISTS) .get_schema(), stream: name.to_owned(), + tenant_id: self.tenant_id.clone(), }))) } else { Ok(None) @@ -95,7 +97,7 @@ impl SchemaProvider for GlobalSchemaProvider { } fn table_exist(&self, name: &str) -> bool { - PARSEABLE.streams.contains(name) + PARSEABLE.get_stream(name, &self.tenant_id).is_ok() } } @@ -104,6 +106,7 @@ struct StandardTableProvider { schema: SchemaRef, // prefix under which to find snapshot stream: String, + tenant_id: Option, } impl StandardTableProvider { @@ -218,9 +221,15 @@ impl StandardTableProvider { .collect(); let (partitioned_files, statistics) = self.partitioned_files(hot_tier_files); + // let object_store_url = if let Some(tenant_id) = self.tenant_id.as_ref() { + // &format!("file:///{tenant_id}/") + // } else { + // "file:///" + // }; + let object_store_url = "file:///"; self.create_parquet_physical_plan( execution_plans, - ObjectStoreUrl::parse("file:///").unwrap(), + ObjectStoreUrl::parse(object_store_url).unwrap(), partitioned_files, statistics, projection, @@ -244,7 +253,7 @@ impl StandardTableProvider { state: &dyn Session, time_partition: Option<&String>, ) -> Result<(), DataFusionError> { - let Ok(staging) = PARSEABLE.get_stream(&self.stream) else { + let Ok(staging) = PARSEABLE.get_stream(&self.stream, &self.tenant_id) else { return Ok(()); }; @@ -270,12 +279,18 @@ impl StandardTableProvider { partitioned_files.push(file) } - // NOTE: There is the possibility of a parquet file being pushed to object store - // and deleted from staging in the time it takes for datafusion to get to it. - // Staging parquet execution plan + // // NOTE: There is the possibility of a parquet file being pushed to object store + // // and deleted from staging in the time it takes for datafusion to get to it. + // // Staging parquet execution plan + // let object_store_url = if let Some(tenant_id) = self.tenant_id.as_ref() { + // &format!("file://{tenant_id}/") + // } else { + // "file:///" + // }; + let object_store_url = "file:///"; self.create_parquet_physical_plan( execution_plans, - ObjectStoreUrl::parse("file:///").unwrap(), + ObjectStoreUrl::parse(object_store_url).unwrap(), vec![partitioned_files], Statistics::new_unknown(&self.schema), projection, @@ -421,7 +436,11 @@ impl StandardTableProvider { // Track billing metrics for query scan let current_date = chrono::Utc::now().date_naive().to_string(); - increment_files_scanned_in_query_by_date(file_count, ¤t_date); + increment_files_scanned_in_query_by_date( + file_count, + ¤t_date, + self.tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ); (partitioned_files, statistics) } @@ -433,6 +452,7 @@ async fn collect_from_snapshot( filters: &[Expr], limit: Option, stream_name: &str, + tenant_id: &Option, ) -> Result, DataFusionError> { let mut manifest_files = Vec::new(); @@ -444,6 +464,7 @@ async fn collect_from_snapshot( manifest_item.time_lower_bound, manifest_item.time_upper_bound, Some(manifest_item.manifest_path), + tenant_id, ) .await .map_err(|e| DataFusionError::Plan(e.to_string()))?; @@ -509,13 +530,19 @@ impl TableProvider for StandardTableProvider { filters: &[Expr], limit: Option, ) -> Result, DataFusionError> { + tracing::warn!( + "entered scan with\ntenant- {:?}\nschema- {:?}\nstream- {}", + self.tenant_id, + self.schema, + self.stream + ); let mut execution_plans = vec![]; let glob_storage = PARSEABLE.storage.get_object_store(); let object_store_format: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(&self.stream, false) + .get_stream_json(&self.stream, false, &self.tenant_id) .await .map_err(|e| DataFusionError::Plan(e.to_string()))?, ) @@ -538,7 +565,7 @@ impl TableProvider for StandardTableProvider { if PARSEABLE.options.mode == Mode::Query || PARSEABLE.options.mode == Mode::Prism { let obs = PARSEABLE .metastore - .get_all_stream_jsons(&self.stream, None) + .get_all_stream_jsons(&self.stream, None, &self.tenant_id) .await; if let Ok(obs) = obs { for ob in obs { @@ -583,6 +610,7 @@ impl TableProvider for StandardTableProvider { filters, limit, &self.stream, + &self.tenant_id, ) .await?; @@ -592,7 +620,7 @@ impl TableProvider for StandardTableProvider { // Hot tier data fetch if let Some(hot_tier_manager) = HotTierManager::global() - && hot_tier_manager.check_stream_hot_tier_exists(&self.stream) + && hot_tier_manager.check_stream_hot_tier_exists(&self.stream, &self.tenant_id) { self.get_hottier_exectuion_plan( &mut execution_plans, @@ -612,9 +640,16 @@ impl TableProvider for StandardTableProvider { } let (partitioned_files, statistics) = self.partitioned_files(manifest_files); + // let object_store_url = if let Some(tenant_id) = self.tenant_id.as_ref() { + // glob_storage.store_url().join(tenant_id).unwrap() + // } else { + // glob_storage.store_url() + // }; + let object_store_url = glob_storage.store_url(); + tracing::warn!(object_store_url=?object_store_url); self.create_parquet_physical_plan( &mut execution_plans, - ObjectStoreUrl::parse(glob_storage.store_url()).unwrap(), + ObjectStoreUrl::parse(object_store_url).unwrap(), partitioned_files, statistics, projection, diff --git a/src/rbac/map.rs b/src/rbac/map.rs index c53bab113..98962e5b4 100644 --- a/src/rbac/map.rs +++ b/src/rbac/map.rs @@ -16,11 +16,12 @@ * */ +use crate::parseable::DEFAULT_TENANT; use crate::rbac::role::ParseableResourceType; use crate::rbac::user::{User, UserGroup}; use crate::{parseable::PARSEABLE, storage::StorageMetadata}; +use std::collections::HashMap; use std::collections::HashSet; -use std::{collections::HashMap, sync::Mutex}; use super::Response; use super::{ @@ -33,11 +34,12 @@ use once_cell::sync::{Lazy, OnceCell}; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; -pub type Roles = HashMap>; +pub type Roles = HashMap>>; pub static USERS: OnceCell> = OnceCell::new(); pub static ROLES: OnceCell> = OnceCell::new(); -pub static DEFAULT_ROLE: Lazy>> = Lazy::new(|| Mutex::new(None)); +pub static DEFAULT_ROLE: Lazy>>> = + Lazy::new(|| RwLock::new(HashMap::new())); pub static SESSIONS: OnceCell> = OnceCell::new(); pub static USER_GROUPS: OnceCell> = OnceCell::new(); @@ -58,11 +60,13 @@ pub fn write_user_groups() -> RwLockWriteGuard<'static, UserGroups> { } pub fn users() -> RwLockReadGuard<'static, Users> { - USERS - .get() - .expect("map is set") - .read() - .expect("not poisoned") + { + USERS + .get() + .expect("map is set") + .read() + .expect("not poisoned") + } } pub fn mut_users() -> RwLockWriteGuard<'static, Users> { @@ -74,11 +78,13 @@ pub fn mut_users() -> RwLockWriteGuard<'static, Users> { } pub fn roles() -> RwLockReadGuard<'static, Roles> { - ROLES - .get() - .expect("map is set") - .read() - .expect("not poisoned") + { + ROLES + .get() + .expect("map is set") + .read() + .expect("not poisoned") + } } pub fn mut_roles() -> RwLockWriteGuard<'static, Roles> { @@ -115,35 +121,61 @@ pub fn init(metadata: &StorageMetadata) { let mut roles = metadata.roles.clone(); DEFAULT_ROLE - .lock() + .write() .unwrap() - .clone_from(&metadata.default_role); + .insert(DEFAULT_TENANT.to_owned(), metadata.default_role.clone()); + + // DEFAULT_ROLE + // .lock() + // .unwrap() + // .clone_from(&metadata.default_role); - let admin_privilege = DefaultPrivilege::Admin; + let admin_privilege = DefaultPrivilege::SuperAdmin; let admin_permissions = RoleBuilder::from(&admin_privilege).build(); - roles.insert("admin".to_string(), vec![admin_privilege]); + roles.insert("super-admin".to_string(), vec![admin_privilege]); let mut users = Users::from(users); - let admin = user::get_admin_user(); + let admin = user::get_super_admin_user(); let admin_username = admin.userid().to_owned(); users.insert(admin); + let key = SessionKey::BasicAuth { + username: PARSEABLE.options.username.clone(), + password: PARSEABLE.options.password.clone(), + }; let mut sessions = Sessions::default(); - sessions.track_new( - admin_username, - SessionKey::BasicAuth { - username: PARSEABLE.options.username.clone(), - password: PARSEABLE.options.password.clone(), - }, - chrono::DateTime::::MAX_UTC, - admin_permissions, + // sessions.track_new( + // admin_username.clone(), + // SessionKey::BasicAuth { + // username: PARSEABLE.options.username.clone(), + // password: PARSEABLE.options.password.clone(), + // }, + // chrono::DateTime::::MAX_UTC, + // admin_permissions, + // ); + + sessions + .user_sessions + .entry(DEFAULT_TENANT.to_owned()) + .or_default() + .insert( + admin_username.clone(), + vec![(key.clone(), chrono::DateTime::::MAX_UTC)], + ); + sessions.active_sessions.insert( + key, + (admin_username, DEFAULT_TENANT.to_owned(), admin_permissions), ); + let mut map = HashMap::new(); + map.insert(DEFAULT_TENANT.to_owned(), roles); + ROLES.set(RwLock::new(map)).expect("map is only set once"); - ROLES.set(RwLock::new(roles)).expect("map is only set once"); USERS.set(RwLock::new(users)).expect("map is only set once"); + SESSIONS .set(RwLock::new(sessions)) .expect("map is only set once"); + USER_GROUPS .set(RwLock::new(UserGroups::from(user_groups))) .expect("Unable to create UserGroups map from storage"); @@ -160,12 +192,12 @@ pub enum SessionKey { #[derive(Debug, Default)] pub struct Sessions { - // map session key to user and their permission - active_sessions: HashMap)>, - // map user to one or more session + // map session key to user, tenant, and their permission + active_sessions: HashMap)>, + // map (tenant, user) to one or more session // this tracks session based on session id. Not basic auth // Ulid time contains expiration datetime - user_sessions: HashMap)>>, + user_sessions: HashMap)>>>, } impl Sessions { @@ -173,17 +205,28 @@ impl Sessions { self.active_sessions.keys().cloned().collect_vec() } + pub fn remove_tenant_sessions(&mut self, tenant_id: &str) { + self.active_sessions + .retain(|_, (_, tenantid, _)| !tenant_id.eq(tenantid)); + self.user_sessions.remove(tenant_id); + } + // only checks if the session is expired or not pub fn is_session_expired(&self, key: &SessionKey) -> bool { // fetch userid from session key - let userid = if let Some((user, _)) = self.active_sessions.get(key) { - user + let (userid, tenant_id) = if let Some((user, tenant_id, _)) = self.active_sessions.get(key) + { + (user, tenant_id) } else { return false; }; // check against user sessions if this session is still valid - let Some(session) = self.user_sessions.get(userid) else { + let session = if let Some(tenant_sessions) = self.user_sessions.get(tenant_id) + && let Some(session) = tenant_sessions.get(userid) + { + session + } else { return false; }; @@ -200,19 +243,27 @@ impl Sessions { key: SessionKey, expiry: DateTime, permissions: Vec, + tenant_id: &Option, ) { - self.remove_expired_session(&user); - let sessions = self.user_sessions.entry(user.clone()).or_default(); - sessions.push((key.clone(), expiry)); - self.active_sessions.insert(key, (user, permissions)); + // let tenant_id = get_tenant_id_from_key(&key); + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + self.remove_expired_session(&user, &tenant_id); + + let sessions = self.user_sessions.entry(tenant_id.to_owned()).or_default(); + sessions.insert(user.clone(), vec![(key.clone(), expiry)]); + // sessions.push((key.clone(), expiry)); + self.active_sessions + .insert(key, (user, tenant_id.to_string(), permissions)); } // remove a specific session pub fn remove_session(&mut self, key: &SessionKey) -> Option { - let (user, _) = self.active_sessions.remove(key)?; + let (user, tenant_id, _) = self.active_sessions.remove(key)?; - if let Some(items) = self.user_sessions.get_mut(&user) { - items.retain(|(session, _)| session != key); + if let Some(tenant_sessions) = self.user_sessions.get_mut(&tenant_id) + && let Some(sessions) = tenant_sessions.get_mut(&user) + { + sessions.retain(|(session, _)| session != key); Some(user) } else { None @@ -220,18 +271,33 @@ impl Sessions { } // remove sessions related to a user - pub fn remove_user(&mut self, username: &str) { - let sessions = self.user_sessions.remove(username); + pub fn remove_user(&mut self, username: &str, tenant_id: &str) { + // tracing::warn!("removing user- {username}, tenant_id- {tenant_id}"); + // tracing::warn!("active sessions- {:?}", self.active_sessions); + // tracing::warn!("user sessions- {:?}", self.user_sessions); + let sessions = if let Some(tenant_sessions) = self.user_sessions.get_mut(tenant_id) { + // tracing::warn!("found session for tenant- {tenant_id}"); + tenant_sessions.remove(username) + } else { + // tracing::warn!("not found session for tenant- {tenant_id}"); + None + }; if let Some(sessions) = sessions { + // tracing::warn!("found active sessions for user {username}- {sessions:?}"); sessions.into_iter().for_each(|(key, _)| { self.active_sessions.remove(&key); }) } } - fn remove_expired_session(&mut self, user: &str) { + fn remove_expired_session(&mut self, user: &str, tenant_id: &str) { let now = Utc::now(); - let Some(sessions) = self.user_sessions.get_mut(user) else { + + let sessions = if let Some(tenant_sessions) = self.user_sessions.get_mut(tenant_id) + && let Some(sessions) = tenant_sessions.get_mut(user) + { + sessions + } else { return; }; sessions.retain(|(_, expiry)| expiry < &now); @@ -239,7 +305,7 @@ impl Sessions { // get permission related to this session pub fn get(&self, key: &SessionKey) -> Option<&Vec> { - self.active_sessions.get(key).map(|(_, perms)| perms) + self.active_sessions.get(key).map(|(_, _, perms)| perms) } // returns None if user is not in the map @@ -251,108 +317,185 @@ impl Sessions { context_resource: Option<&str>, context_user: Option<&str>, ) -> Option { - self.active_sessions.get(key).map(|(username, perms)| { - let mut perms: HashSet = HashSet::from_iter(perms.clone()); - perms.extend(aggregate_group_permissions(username)); - - if perms.iter().any(|user_perm| { - match *user_perm { - // if any action is ALL then we we authorize - Permission::Unit(action) => action == required_action || action == Action::All, - Permission::Resource(action, ref resource_type) => { - match resource_type { - ParseableResourceType::Stream(resource_id) - | ParseableResourceType::Llm(resource_id) => { + // tracing::warn!( + // "required_action- {required_action:?} context_resource- {context_resource:?}, context_user usr- {context_user:?}" + // ); + self.active_sessions + .get(key) + .map(|(username, tenant_id, perms)| { + let mut perms: HashSet = HashSet::from_iter(perms.clone()); + perms.extend(aggregate_group_permissions(username, tenant_id)); + + if perms.iter().any(|user_perm| { + // tracing::warn!("user-perm- {user_perm:?}"); + match *user_perm { + // if any action is ALL then we we authorize + Permission::Unit(action) => { + action == required_action || action == Action::All + } + Permission::Resource(action, ref resource_type) => { + if let Some(resource_type) = resource_type.as_ref() { + // default flow for all actions other than global-ingestion (ingestion action without any dataset restriction) + match resource_type { + ParseableResourceType::Stream(resource_id) + | ParseableResourceType::Llm(resource_id) => { + let ok_resource = + if let Some(context_resource_id) = context_resource { + let is_internal = PARSEABLE + .get_stream( + context_resource_id, + &Some(tenant_id.to_owned()), + ) + .is_ok_and(|stream| { + stream.get_stream_type().eq( + &crate::storage::StreamType::Internal, + ) + }); + resource_id == context_resource_id + || resource_id == "*" + || is_internal + } else { + // if no resource to match then resource check is not needed + // WHEN IS THIS VALID?? + true + }; + (action == required_action || action == Action::All) + && ok_resource + } + ParseableResourceType::All => { + action == required_action || action == Action::All + } + } + } else if resource_type.is_none() && action.eq(&Action::Ingest) { + // tracing::warn!("resource_type is None"); + // flow for global-ingestion let ok_resource = if let Some(context_resource_id) = context_resource { let is_internal = PARSEABLE - .get_stream(context_resource_id) + .get_stream( + context_resource_id, + &Some(tenant_id.to_owned()), + ) .is_ok_and(|stream| { stream .get_stream_type() .eq(&crate::storage::StreamType::Internal) }); - resource_id == context_resource_id - || resource_id == "*" - || is_internal + !is_internal } else { // if no resource to match then resource check is not needed // WHEN IS THIS VALID?? true }; - (action == required_action || action == Action::All) && ok_resource - } - ParseableResourceType::All => { - action == required_action || action == Action::All + // tracing::warn!(ok_resource=?ok_resource); + action == required_action && ok_resource + } else { + // the default flow (some resource_type and an action) was covered in the first if + // if the resource type is also None and action is not ingest then return with false + false } } + Permission::SelfUser if required_action == Action::GetUserRoles => { + context_user.map(|x| x == username).unwrap_or_default() + } + _ => false, } - Permission::SelfUser if required_action == Action::GetUserRoles => { - context_user.map(|x| x == username).unwrap_or_default() - } - _ => false, + }) { + // tracing::warn!("Authorized"); + Response::Authorized + } else { + // tracing::warn!("UnAuthorized"); + Response::UnAuthorized } - }) { - Response::Authorized - } else { - Response::UnAuthorized - } - }) + }) } - pub fn get_userid(&self, key: &SessionKey) -> Option<&String> { - self.active_sessions.get(key).map(|(userid, _)| userid) + pub fn get_user_and_tenant_id(&self, key: &SessionKey) -> Option<(String, String)> { + self.active_sessions + .get(key) + .map(|(userid, tenant_id, _)| (userid.clone(), tenant_id.clone())) } } -// UserMap is a map of [username --> User] +// UserMap is a map of [(username, tenant_id) --> User] // This map is populated at startup with the list of users from parseable.json file #[derive(Debug, Default, Clone, derive_more::Deref, derive_more::DerefMut)] -pub struct Users(HashMap); +pub struct Users(HashMap>); impl Users { pub fn insert(&mut self, user: User) { - self.0.insert(user.userid().to_owned(), user); + // tracing::warn!("inserting user- {user:?}"); + let tenant_id = user.tenant.as_ref().map_or(DEFAULT_TENANT, |v| v); + self.0 + .entry(tenant_id.to_owned()) + .or_default() + .insert(user.userid().to_owned(), user); + // self.0.insert(user.userid().to_owned(), user); } } impl From> for Users { fn from(users: Vec) -> Self { let mut map = Self::default(); - map.extend( - users - .into_iter() - .map(|user| (user.userid().to_owned(), user)), - ); + for user in users { + let tenant_id = user.tenant.as_ref().map_or(DEFAULT_TENANT, |v| v); + map.entry(tenant_id.to_owned()) + .or_default() + .insert(user.userid().to_owned(), user); + } + // map.extend( + // users + // .into_iter() + // .map(|user| (user.userid().to_owned(), user)), + // ); map } } -fn aggregate_group_permissions(username: &str) -> HashSet { +fn aggregate_group_permissions(username: &str, tenant_id: &String) -> HashSet { let mut group_perms = HashSet::new(); - let Some(user) = users().get(username).cloned() else { + let user = if let Some(tenant_users) = users().get(tenant_id) + && let Some(user) = tenant_users.get(username) + { + user.to_owned() + } else { return group_perms; }; + // let Some(user) = users().get(username).cloned() else { + // return group_perms; + // }; if user.user_groups.is_empty() { return group_perms; } for group_name in &user.user_groups { - let Some(group) = read_user_groups().get(group_name).cloned() else { + if let Some(groups) = read_user_groups().get(tenant_id) + && let Some(group) = groups.get(group_name) + { + for role_name in group.roles.iter() { + let privileges = if let Some(roles) = roles().get(tenant_id) + && let Some(privileges) = roles.get(role_name) + { + privileges.clone() + } else { + continue; + }; + // let Some(privileges) = roles().get(role_name).cloned() else { + // continue; + // }; + + for privilege in privileges { + group_perms.extend(RoleBuilder::from(&privilege).build()); + } + } + } else { continue; }; - - for role_name in &group.roles { - let Some(privileges) = roles().get(role_name).cloned() else { - continue; - }; - - for privilege in privileges { - group_perms.extend(RoleBuilder::from(&privilege).build()); - } - } + // let Some(group) = read_user_groups().get(group_name).cloned() else { + // continue; + // }; } group_perms @@ -360,22 +503,30 @@ fn aggregate_group_permissions(username: &str) -> HashSet { // Map of [user group ID --> UserGroup] // This map is populated at startup with the list of user groups from parseable.json file #[derive(Debug, Default, Clone, derive_more::Deref, derive_more::DerefMut)] -pub struct UserGroups(HashMap); +pub struct UserGroups(HashMap>); impl UserGroups { - pub fn insert(&mut self, user_group: UserGroup) { - self.0.insert(user_group.name.clone(), user_group); + pub fn insert(&mut self, user_group: UserGroup, tenant_id: &str) { + self.0 + .entry(tenant_id.to_owned()) + .or_default() + .insert(user_group.name.clone(), user_group); + // self.0.insert(user_group.name.clone(), user_group); } } impl From> for UserGroups { + // only gets called for parseable metadata hence default tenant value fn from(user_groups: Vec) -> Self { let mut map = Self::default(); - map.extend( - user_groups - .into_iter() - .map(|group| (group.name.to_owned(), group)), - ); + for group in user_groups.into_iter() { + map.insert(group, DEFAULT_TENANT); + } + // map.extend( + // user_groups + // .into_iter() + // .map(|group| (group.name.to_owned(), group)), + // ); map } } diff --git a/src/rbac/mod.rs b/src/rbac/mod.rs index 703372e19..4fe67a73c 100644 --- a/src/rbac/mod.rs +++ b/src/rbac/mod.rs @@ -29,9 +29,11 @@ use role::model::DefaultPrivilege; use serde::Serialize; use url::Url; +use crate::parseable::DEFAULT_TENANT; use crate::rbac::map::{mut_sessions, mut_users, read_user_groups, roles, sessions, users}; use crate::rbac::role::Action; use crate::rbac::user::User; +use crate::utils::get_tenant_id_from_key; use self::map::SessionKey; use self::role::{Permission, RoleBuilder}; @@ -39,11 +41,12 @@ use self::user::UserType; pub const EXPIRY_DURATION: Duration = Duration::hours(1); -#[derive(PartialEq)] +#[derive(PartialEq, Debug)] pub enum Response { Authorized, UnAuthorized, ReloadRequired, + Suspended(String), } // This type encapsulates both the user_map and auth_map @@ -52,86 +55,137 @@ pub struct Users; impl Users { pub fn put_user(&self, user: User) { - mut_sessions().remove_user(user.userid()); + let tenant_id = user.tenant.as_ref().map_or(DEFAULT_TENANT, |v| v); + mut_sessions().remove_user(user.userid(), tenant_id); mut_users().insert(user); } - pub fn get_user_groups(&self, userid: &str) -> HashSet { + pub fn get_user_groups(&self, userid: &str, tenant_id: &Option) -> HashSet { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); users() - .get(userid) - .map(|user| user.user_groups.clone()) + .get(tenant_id) + .filter(|users| users.get(userid).is_some()) + .map(|users| users.get(userid).unwrap().user_groups.clone()) .unwrap_or_default() } - pub fn get_user(&self, userid: &str) -> Option { - users().get(userid).cloned() + pub fn get_user(&self, userid: &str, tenant_id: &Option) -> Option { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let u = users() + .get(tenant_id) + .filter(|users| users.get(userid).is_some()) + .map(|users| users.get(userid).unwrap().to_owned()); + u + // .get(userid).cloned() } - pub fn is_oauth(&self, userid: &str) -> Option { - users().get(userid).map(|user| user.is_oauth()) + pub fn is_oauth(&self, userid: &str, tenant_id: &Option) -> Option { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + users() + .get(tenant_id) + .filter(|users| users.get(userid).is_some()) + .map(|users| users.get(userid).unwrap().is_oauth()) + // users().get(userid).map(|user| user.is_oauth()) } - pub fn collect_user From<&'a User> + 'static>(&self) -> Vec { - users().values().map(|user| user.into()).collect_vec() + pub fn collect_user From<&'a User> + 'static>( + &self, + tenant_id: &Option, + ) -> Vec { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + match users().get(tenant_id) { + Some(users) => users.values().map(|user| user.into()).collect_vec(), + None => vec![], + } } - pub fn get_role(&self, userid: &str) -> Vec { + pub fn get_role(&self, userid: &str, tenant_id: &Option) -> Vec { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); users() - .get(userid) - .map(|user| user.roles.iter().cloned().collect()) + .get(tenant_id) + .filter(|users| users.get(userid).is_some()) + .map(|users| users.get(userid).unwrap().roles.iter().cloned().collect()) + // .get(userid) + // .map(|user| user.roles.iter().cloned().collect()) .unwrap_or_default() } - pub fn delete_user(&self, userid: &str) { - mut_users().remove(userid); - mut_sessions().remove_user(userid); + pub fn delete_user(&mut self, userid: &str, tenant_id: &Option) { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + self.remove_user(userid, tenant_id); + mut_sessions().remove_user(userid, tenant_id); + } + + fn remove_user(&mut self, userid: &str, tenant_id: &str) { + match mut_users().get_mut(tenant_id) { + Some(users) => { + users.remove(userid); + } + None => {} + } } // caller ensures that this operation is valid for the user - pub fn change_password_hash(&self, userid: &str, hash: &String) { - if let Some(User { - ty: UserType::Native(user), - .. - }) = mut_users().get_mut(userid) + pub fn change_password_hash(&self, userid: &str, hash: &String, tenant_id: &Option) { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(users) = mut_users().get_mut(tenant_id) + && let Some(User { + ty: UserType::Native(user), + .. + }) = users.get_mut(userid) { user.password_hash.clone_from(hash); - mut_sessions().remove_user(userid); + mut_sessions().remove_user(userid, tenant_id); }; } - pub fn add_roles(&self, userid: &str, roles: HashSet) { - if let Some(user) = mut_users().get_mut(userid) { + pub fn add_roles(&self, userid: &str, roles: HashSet, tenant_id: &Option) { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(users) = mut_users().get_mut(tenant_id) + && let Some(user) = users.get_mut(userid) + { user.roles.extend(roles); - mut_sessions().remove_user(userid) + mut_sessions().remove_user(userid, tenant_id) }; } - pub fn remove_roles(&self, userid: &str, roles: HashSet) { - if let Some(user) = mut_users().get_mut(userid) { + pub fn remove_roles(&self, userid: &str, roles: HashSet, tenant_id: &Option) { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(users) = mut_users().get_mut(tenant_id) + && let Some(user) = users.get_mut(userid) + { let diff = HashSet::from_iter(user.roles.difference(&roles).cloned()); user.roles = diff; - mut_sessions().remove_user(userid) + mut_sessions().remove_user(userid, tenant_id) }; } - pub fn contains(&self, userid: &str) -> bool { - users().contains_key(userid) + pub fn contains(&self, userid: &str, tenant_id: &Option) -> bool { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + match users().get(tenant_id) { + Some(users) => users.contains_key(userid), + None => false, + } } pub fn get_permissions(&self, session: &SessionKey) -> Vec { let mut permissions = sessions().get(session).cloned().unwrap_or_default(); - let Some(userid) = self.get_userid_from_session(session) else { + let Some((userid, tenant_id)) = self.get_userid_from_session(session) else { return permissions.into_iter().collect_vec(); }; - let user_groups = self.get_user_groups(&userid); + let user_groups = self.get_user_groups(&userid, &Some(tenant_id.clone())); for group in user_groups { - if let Some(group) = read_user_groups().get(&group) { + if let Some(groups) = read_user_groups().get(&tenant_id) + && let Some(group) = groups.get(&group) + { let group_roles = &group.roles; for role in group_roles { - if let Some(privelege_list) = roles().get(role) { - for privelege in privelege_list { + if let Some(roles) = roles().get(&tenant_id) + && let Some(privilege_list) = roles.get(role) + { + for privelege in privilege_list { permissions.extend(RoleBuilder::from(privelege).build()); } } @@ -150,12 +204,15 @@ impl Users { } pub fn new_session(&self, user: &User, session: SessionKey, expires_in: TimeDelta) { + let tenant_id = &user.tenant; + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); mut_sessions().track_new( user.userid().to_owned(), session, Utc::now() + expires_in, - roles_to_permission(user.roles()), - ) + roles_to_permission(user.roles(), tenant), + tenant_id, + ); } pub fn authorize( @@ -167,20 +224,28 @@ impl Users { ) -> Response { // try fetch from auth map for faster auth flow if let Some(res) = sessions().check_auth(&key, action, context_stream, context_user) { + // tracing::warn!("returning with res- {res:?}"); return res; } - // attempt reloading permissions into new session for basic auth user // id user will be reloaded only through login endpoint let SessionKey::BasicAuth { username, password } = &key else { return Response::ReloadRequired; }; - if let Some( - user @ User { - ty: UserType::Native(basic_user), - .. - }, - ) = users().get(username) + + let tenant_id = if let Some(user) = self.get_user_from_basic(username, password) { + user.tenant + } else { + get_tenant_id_from_key(&key) + }; + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(users) = users().get(tenant) + && let Some( + user @ User { + ty: UserType::Native(basic_user), + .. + }, + ) = users.get(username) { // if user exists and password matches // add this user to auth map @@ -190,7 +255,8 @@ impl Users { username.clone(), key.clone(), DateTime::::MAX_UTC, - roles_to_permission(user.roles()), + roles_to_permission(user.roles(), tenant), + &user.tenant, ); return sessions .check_auth(&key, action, context_stream, context_user) @@ -201,8 +267,22 @@ impl Users { Response::UnAuthorized } - pub fn get_userid_from_session(&self, session: &SessionKey) -> Option { - sessions().get_userid(session).cloned() + pub fn get_userid_from_session(&self, session: &SessionKey) -> Option<(String, String)> { + sessions().get_user_and_tenant_id(session) + } + + pub fn get_user_from_basic(&self, username: &str, password: &str) -> Option { + for (_, usermap) in users().iter() { + for (_, user) in usermap.iter() { + if let UserType::Native(basic) = &user.ty + && basic.username.eq(username) + && basic.verify_password(password) + { + return Some(user.clone()); + } + } + } + None } } @@ -230,16 +310,19 @@ pub struct UsersPrism { pub user_groups: HashSet, } -pub fn roles_to_permission(roles: Vec) -> Vec { +pub fn roles_to_permission(roles: Vec, tenant_id: &str) -> Vec { let mut perms = HashSet::new(); for role in &roles { let role_map = &map::roles(); - let Some(privilege_list) = role_map.get(role) else { + if let Some(roles) = role_map.get(tenant_id) + && let Some(privilege_list) = roles.get(role) + { + for privs in privilege_list { + perms.extend(RoleBuilder::from(privs).build()) + } + } else { continue; }; - for privs in privilege_list { - perms.extend(RoleBuilder::from(privs).build()) - } } perms.into_iter().collect() } diff --git a/src/rbac/role.rs b/src/rbac/role.rs index 2bcfc5fd5..23fbbbe51 100644 --- a/src/rbac/role.rs +++ b/src/rbac/role.rs @@ -60,6 +60,7 @@ pub enum Action { ListClusterMetrics, DeleteNode, All, + SuperAdmin, GetAnalytics, ListDashboard, GetDashboard, @@ -90,7 +91,7 @@ pub enum ParseableResourceType { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Permission { Unit(Action), - Resource(Action, ParseableResourceType), + Resource(Action, Option), SelfUser, } @@ -166,7 +167,8 @@ impl RoleBuilder { | Action::GetStats | Action::GetRetention | Action::PutRetention - | Action::All => Permission::Resource(action, self.resource_type.clone().unwrap()), + | Action::All + | Action::SuperAdmin => Permission::Resource(action, self.resource_type.clone()), }; perms.push(perm); } @@ -186,16 +188,24 @@ pub mod model { #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize, Hash)] #[serde(tag = "privilege", rename_all = "lowercase")] pub enum DefaultPrivilege { + SuperAdmin, Admin, Editor, - Writer { resource: ParseableResourceType }, - Ingestor { resource: ParseableResourceType }, - Reader { resource: ParseableResourceType }, + Writer { + resource: ParseableResourceType, + }, + Ingestor { + resource: Option, + }, + Reader { + resource: ParseableResourceType, + }, } impl From<&DefaultPrivilege> for RoleBuilder { fn from(value: &DefaultPrivilege) -> Self { match value { + DefaultPrivilege::SuperAdmin => super_admin_perm_builder(), DefaultPrivilege::Admin => admin_perm_builder(), DefaultPrivilege::Editor => editor_perm_builder(), DefaultPrivilege::Writer { resource } => { @@ -205,12 +215,23 @@ pub mod model { reader_perm_builder().with_resource(resource.to_owned()) } DefaultPrivilege::Ingestor { resource } => { - ingest_perm_builder().with_resource(resource.to_owned()) + if let Some(resource) = resource.as_ref() { + ingest_perm_builder().with_resource(resource.to_owned()) + } else { + ingest_perm_builder() + } } } } } + fn super_admin_perm_builder() -> RoleBuilder { + RoleBuilder { + actions: vec![Action::All], + resource_type: Some(ParseableResourceType::All), + } + } + fn admin_perm_builder() -> RoleBuilder { RoleBuilder { actions: vec![Action::All], diff --git a/src/rbac/user.rs b/src/rbac/user.rs index 72158ea53..8fffc9749 100644 --- a/src/rbac/user.rs +++ b/src/rbac/user.rs @@ -24,14 +24,14 @@ use argon2::{ }; use openid::Bearer; -use rand::distributions::{Alphanumeric, DistString}; +use rand::{ + RngCore, + distributions::{Alphanumeric, DistString}, +}; use crate::{ - handlers::http::{ - modal::utils::rbac_utils::{get_metadata, put_metadata}, - rbac::{InvalidUserGroupError, RBACError}, - }, - parseable::PARSEABLE, + handlers::http::rbac::{InvalidUserGroupError, RBACError}, + parseable::{DEFAULT_TENANT, PARSEABLE}, rbac::map::{mut_sessions, read_user_groups, roles, users}, }; @@ -48,11 +48,12 @@ pub struct User { pub ty: UserType, pub roles: HashSet, pub user_groups: HashSet, + pub tenant: Option, } impl User { // create a new User and return self with password generated for said user. - pub fn new_basic(username: String) -> (Self, String) { + pub fn new_basic(username: String, tenant: Option) -> (Self, String) { let PassCode { password, hash } = Basic::gen_new_password(); ( Self { @@ -62,6 +63,7 @@ impl User { }), roles: HashSet::new(), user_groups: HashSet::new(), + tenant, }, password, ) @@ -72,6 +74,7 @@ impl User { roles: HashSet, user_info: UserInfo, bearer: Option, + tenant: Option, ) -> Self { Self { ty: UserType::OAuth(Box::new(OAuth { @@ -81,6 +84,7 @@ impl User { })), roles, user_groups: HashSet::new(), + tenant, } } @@ -127,7 +131,7 @@ pub struct Basic { impl Basic { // generate a new password pub fn gen_new_password() -> PassCode { - let password = Alphanumeric.sample_string(&mut rand::thread_rng(), 16); + let password = Alphanumeric.sample_string(&mut rand::thread_rng(), 32); let hash = gen_hash(&password); PassCode { password, hash } } @@ -150,7 +154,11 @@ pub fn verify(password_hash: &str, password: &str) -> bool { // generate a one way hash for password to be stored in metadata file // ref https://github.com/P-H-C/phc-string-format/blob/master/phc-sf-spec.md fn gen_hash(password: &str) -> String { - let salt = SaltString::generate(&mut OsRng); + let mut bytes = [0u8; 32]; + let r = &mut OsRng; + r.fill_bytes(&mut bytes); + let salt = SaltString::encode_b64(&bytes).unwrap(); + // let salt = SaltString::generate(&mut OsRng); let argon2 = Argon2::default(); argon2 .hash_password(password.as_bytes(), &salt) @@ -163,7 +171,7 @@ pub struct PassCode { pub hash: String, } -pub fn get_admin_user() -> User { +pub fn get_super_admin_user() -> User { let username = PARSEABLE.options.username.clone(); let password = PARSEABLE.options.password.clone(); let hashcode = gen_hash(&password); @@ -173,8 +181,9 @@ pub fn get_admin_user() -> User { username, password_hash: hashcode, }), - roles: ["admin".to_string()].into(), + roles: ["super-admin".to_string()].into(), user_groups: HashSet::new(), + tenant: None, } } @@ -237,6 +246,7 @@ pub struct GroupUser { pub userid: String, pub username: String, pub method: String, + pub tenant_id: Option, } impl PartialEq for GroupUser { @@ -274,6 +284,7 @@ impl GroupUser { userid: username.clone(), // Same value for basic users username: username.clone(), method: "native".to_string(), + tenant_id: user.tenant.clone(), }, UserType::OAuth(oauth) => { // For OAuth users, derive the display username from user_info @@ -289,6 +300,7 @@ impl GroupUser { userid: userid.clone(), username: display_username, method: "oauth".to_string(), + tenant_id: user.tenant.clone(), } } } @@ -362,14 +374,14 @@ impl UserGroup { UserGroup { name, roles, users } } - pub fn add_roles(&mut self, roles: HashSet) -> Result<(), RBACError> { + pub fn add_roles(&mut self, roles: HashSet, tenant_id: &str) -> Result<(), RBACError> { if roles.is_empty() { return Ok(()); } self.roles.extend(roles); // also refresh all user sessions for group_user in &self.users { - mut_sessions().remove_user(group_user.userid()); + mut_sessions().remove_user(group_user.userid(), tenant_id); } Ok(()) } @@ -381,7 +393,10 @@ impl UserGroup { self.users.extend(users.clone()); // also refresh all user sessions for group_user in &users { - mut_sessions().remove_user(group_user.userid()); + mut_sessions().remove_user( + group_user.userid(), + group_user.tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ); } Ok(()) } @@ -400,7 +415,10 @@ impl UserGroup { // also refresh all user sessions for group_user in &self.users { - mut_sessions().remove_user(group_user.userid()); + mut_sessions().remove_user( + group_user.userid(), + group_user.tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ); } Ok(()) } @@ -416,7 +434,10 @@ impl UserGroup { } // also refresh all user sessions for group_user in &removed_users { - mut_sessions().remove_user(group_user.userid()); + mut_sessions().remove_user( + group_user.userid(), + group_user.tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + ); } self.users.clone_from(&new_users); @@ -451,11 +472,11 @@ impl UserGroup { self.remove_users(users_to_remove) } - pub async fn update_in_metadata(&self) -> Result<(), RBACError> { - let mut metadata = get_metadata().await?; - metadata.user_groups.retain(|x| x.name != self.name); - metadata.user_groups.push(self.clone()); - put_metadata(&metadata).await?; - Ok(()) - } + // pub async fn update_in_metadata(&self, tenant_id: &Option) -> Result<(), RBACError> { + // let mut metadata = get_metadata(tenant_id).await?; + // metadata.user_groups.retain(|x| x.name != self.name); + // metadata.user_groups.push(self.clone()); + // put_metadata(&metadata).await?; + // Ok(()) + // } } diff --git a/src/rbac/utils.rs b/src/rbac/utils.rs index f7369cecc..e52daf89d 100644 --- a/src/rbac/utils.rs +++ b/src/rbac/utils.rs @@ -19,7 +19,10 @@ use std::collections::{HashMap, HashSet}; use url::Url; -use crate::{parseable::PARSEABLE, rbac::map::read_user_groups}; +use crate::{ + parseable::{DEFAULT_TENANT, PARSEABLE}, + rbac::map::read_user_groups, +}; use super::{ Users, UsersPrism, @@ -29,6 +32,7 @@ use super::{ }; pub fn to_prism_user(user: &User) -> UsersPrism { + let tenant_id = user.tenant.as_ref().map_or(DEFAULT_TENANT, |v| v); let (id, username, method, email, picture) = match &user.ty { UserType::Native(_) => (user.userid(), user.userid(), "native", None, None), UserType::OAuth(oauth) => { @@ -44,27 +48,43 @@ pub fn to_prism_user(user: &User) -> UsersPrism { } }; let direct_roles: HashMap> = Users - .get_role(id) + .get_role(id, &user.tenant) .iter() .filter_map(|role_name| { roles() - .get(role_name) - .map(|role| (role_name.to_owned(), role.clone())) + .get(tenant_id) + .filter(|roles| roles.get(role_name).is_some()) + // .map(|roles| { + // if let Some(role) = roles.get(role_name) { + // (role_name.to_owned(), role.clone()) + // } + // }) + // .get(role_name) + .map(|roles| { + let role = roles.get(role_name).unwrap(); + (role_name.to_owned(), role.clone()) + }) }) .collect(); let mut group_roles: HashMap>> = HashMap::new(); let mut user_groups = HashSet::new(); // user might be part of some user groups, fetch the roles from there as well - for user_group in Users.get_user_groups(user.userid()) { - if let Some(group) = read_user_groups().get(&user_group) { + for user_group in Users.get_user_groups(user.userid(), &user.tenant) { + if let Some(groups) = read_user_groups().get(tenant_id) + && let Some(group) = groups.get(&user_group) + { let ug_roles: HashMap> = group .roles .iter() .filter_map(|role_name| { roles() - .get(role_name) - .map(|role| (role_name.to_owned(), role.clone())) + .get(tenant_id) + .filter(|roles| roles.get(role_name).is_some()) + .map(|roles| { + let role = roles.get(role_name).unwrap(); + (role_name.to_owned(), role.clone()) + }) }) .collect(); group_roles.insert(group.name.clone(), ug_roles); diff --git a/src/stats.rs b/src/stats.rs index a6826c2a7..da50f7300 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -31,6 +31,7 @@ use crate::metrics::{ EVENTS_STORAGE_SIZE_DATE, LIFETIME_EVENTS_INGESTED, LIFETIME_EVENTS_INGESTED_SIZE, LIFETIME_EVENTS_STORAGE_SIZE, STORAGE_SIZE, }; +use crate::parseable::DEFAULT_TENANT; use crate::storage::{ObjectStorage, ObjectStorageError, ObjectStoreFormat}; /// Helper struct type created by copying stats values from metadata @@ -48,9 +49,13 @@ pub struct FullStats { pub deleted_stats: Stats, } -pub fn get_current_stats(stream_name: &str, format: &'static str) -> Option { - let event_labels = event_labels(stream_name, format); - let storage_size_labels = storage_size_labels(stream_name); +pub fn get_current_stats( + stream_name: &str, + format: &'static str, + tenant_id: &Option, +) -> Option { + let event_labels = event_labels(stream_name, format, tenant_id); + let storage_size_labels = storage_size_labels(stream_name, tenant_id); let events_ingested = EVENTS_INGESTED .get_metric_with_label_values(&event_labels) @@ -109,32 +114,39 @@ pub fn get_current_stats(stream_name: &str, format: &'static str) -> Option, + storage: &Arc, stream_name: &str, meta: ObjectStoreFormat, dates: Vec, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let mut num_row: i64 = 0; let mut storage_size: i64 = 0; let mut ingestion_size: i64 = 0; - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let mut manifests = meta.snapshot.manifest_list; manifests.retain(|item| dates.iter().any(|date| item.manifest_path.contains(date))); if !manifests.is_empty() { for manifest in manifests { let manifest_date = manifest.time_lower_bound.date_naive().to_string(); - let _ = - EVENTS_INGESTED_DATE.remove_label_values(&[stream_name, "json", &manifest_date]); + let _ = EVENTS_INGESTED_DATE.remove_label_values(&[ + stream_name, + "json", + &manifest_date, + tenant, + ]); let _ = EVENTS_INGESTED_SIZE_DATE.remove_label_values(&[ stream_name, "json", &manifest_date, + tenant, ]); let _ = EVENTS_STORAGE_SIZE_DATE.remove_label_values(&[ "data", stream_name, "parquet", &manifest_date, + tenant, ]); num_row += manifest.events_ingested as i64; @@ -143,26 +155,26 @@ pub async fn update_deleted_stats( } } EVENTS_DELETED - .with_label_values(&[stream_name, "json"]) + .with_label_values(&[stream_name, "json", tenant]) .add(num_row); EVENTS_DELETED_SIZE - .with_label_values(&[stream_name, "json"]) + .with_label_values(&[stream_name, "json", tenant]) .add(ingestion_size); DELETED_EVENTS_STORAGE_SIZE - .with_label_values(&["data", stream_name, "parquet"]) + .with_label_values(&["data", stream_name, "parquet", tenant]) .add(storage_size); EVENTS_INGESTED - .with_label_values(&[stream_name, "json"]) + .with_label_values(&[stream_name, "json", tenant]) .sub(num_row); EVENTS_INGESTED_SIZE - .with_label_values(&[stream_name, "json"]) + .with_label_values(&[stream_name, "json", tenant]) .sub(ingestion_size); STORAGE_SIZE - .with_label_values(&["data", stream_name, "parquet"]) + .with_label_values(&["data", stream_name, "parquet", tenant]) .sub(storage_size); - let stats = get_current_stats(stream_name, "json"); + let stats = get_current_stats(stream_name, "json", tenant_id); if let Some(stats) = stats - && let Err(e) = storage.put_stats(stream_name, &stats).await + && let Err(e) = storage.put_stats(stream_name, &stats, tenant_id).await { warn!("Error updating stats to objectstore due to error [{}]", e); } @@ -170,9 +182,13 @@ pub async fn update_deleted_stats( Ok(()) } -pub fn delete_stats(stream_name: &str, format: &'static str) -> prometheus::Result<()> { - let event_labels = event_labels(stream_name, format); - let storage_size_labels = storage_size_labels(stream_name); +pub fn delete_stats( + stream_name: &str, + format: &'static str, + tenant_id: &Option, +) -> prometheus::Result<()> { + let event_labels = event_labels(stream_name, format, tenant_id); + let storage_size_labels = storage_size_labels(stream_name, tenant_id); remove_label_values(&EVENTS_INGESTED, &event_labels); remove_label_values(&EVENTS_INGESTED_SIZE, &event_labels); @@ -216,22 +232,42 @@ fn delete_with_label_prefix(metrics: &IntCounterVec, prefix: &[&str]) { } } -pub fn event_labels<'a>(stream_name: &'a str, format: &'static str) -> [&'a str; 2] { - [stream_name, format] +pub fn event_labels<'a>( + stream_name: &'a str, + format: &'static str, + tenant_id: &'a Option, +) -> [&'a str; 3] { + if let Some(tenant_id) = tenant_id.as_ref() { + [stream_name, format, tenant_id] + } else { + [stream_name, format, DEFAULT_TENANT] + } } -pub fn storage_size_labels(stream_name: &str) -> [&str; 3] { - ["data", stream_name, "parquet"] +pub fn storage_size_labels<'a>( + stream_name: &'a str, + tenant_id: &'a Option, +) -> [&'a str; 4] { + if let Some(tenant_id) = tenant_id.as_ref() { + ["data", stream_name, "parquet", tenant_id] + } else { + ["data", stream_name, "parquet", DEFAULT_TENANT] + } } pub fn event_labels_date<'a>( stream_name: &'a str, format: &'static str, date: &'a str, -) -> [&'a str; 3] { - [stream_name, format, date] + tenant_id: &'a str, +) -> [&'a str; 4] { + [stream_name, format, date, tenant_id] } -pub fn storage_size_labels_date<'a>(stream_name: &'a str, date: &'a str) -> [&'a str; 4] { - ["data", stream_name, "parquet", date] +pub fn storage_size_labels_date<'a>( + stream_name: &'a str, + date: &'a str, + tenant_id: &'a str, +) -> [&'a str; 5] { + ["data", stream_name, "parquet", date, tenant_id] } diff --git a/src/storage/azure_blob.rs b/src/storage/azure_blob.rs index 4920c1094..972b1ed43 100644 --- a/src/storage/azure_blob.rs +++ b/src/storage/azure_blob.rs @@ -55,7 +55,7 @@ use crate::{ increment_files_scanned_in_object_store_calls_by_date, increment_object_store_calls_by_date, }, - parseable::LogStream, + parseable::{DEFAULT_TENANT, LogStream}, }; use super::{ @@ -212,9 +212,14 @@ pub struct BlobStore { } impl BlobStore { - async fn _get_object(&self, path: &RelativePath) -> Result { + async fn _get_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let resp = self.client.get(&to_object_store_path(path)).await; - increment_object_store_calls_by_date("GET", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("GET", &Utc::now().date_naive().to_string(), tenant); match resp { Ok(resp) => { @@ -223,11 +228,13 @@ impl BlobStore { "GET", 1, &Utc::now().date_naive().to_string(), + tenant, ); increment_bytes_scanned_in_object_store_calls_by_date( "GET", body.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); Ok(body) } @@ -239,15 +246,18 @@ impl BlobStore { &self, path: &RelativePath, resource: PutPayload, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let resp = self.client.put(&to_object_store_path(path), resource).await; - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string(), tenant); match resp { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "PUT", 1, &Utc::now().date_naive().to_string(), + tenant, ); Ok(()) } @@ -255,11 +265,16 @@ impl BlobStore { } } - async fn _delete_prefix(&self, key: &str) -> Result<(), ObjectStorageError> { + async fn _delete_prefix( + &self, + key: &str, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let files_scanned = Arc::new(AtomicU64::new(0)); let files_deleted = Arc::new(AtomicU64::new(0)); let object_stream = self.client.list(Some(&(key.into()))); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); object_stream .for_each_concurrent(None, |x| async { @@ -272,6 +287,7 @@ impl BlobStore { increment_object_store_calls_by_date( "DELETE", &Utc::now().date_naive().to_string(), + tenant, ); if delete_resp.is_err() { error!( @@ -291,21 +307,28 @@ impl BlobStore { "LIST", files_scanned.load(Ordering::Relaxed), &Utc::now().date_naive().to_string(), + tenant, ); increment_files_scanned_in_object_store_calls_by_date( "DELETE", files_deleted.load(Ordering::Relaxed), &Utc::now().date_naive().to_string(), + tenant, ); Ok(()) } - async fn _list_dates(&self, stream: &str) -> Result, ObjectStorageError> { + async fn _list_dates( + &self, + stream: &str, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { let resp: Result = self .client .list_with_delimiter(Some(&(stream.into()))) .await; - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); let resp = match resp { Ok(resp) => resp, @@ -320,6 +343,7 @@ impl BlobStore { "LIST", common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); // return prefixes at the root level @@ -332,17 +356,23 @@ impl BlobStore { Ok(dates) } - async fn _upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { + async fn _upload_file( + &self, + key: &str, + path: &Path, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let bytes = tokio::fs::read(path).await?; - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let result = self.client.put(&key.into(), bytes.into()).await; - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string(), tenant); match result { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "PUT", 1, &Utc::now().date_naive().to_string(), + tenant, ); Ok(()) } @@ -354,10 +384,11 @@ impl BlobStore { &self, key: &RelativePath, path: &Path, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let mut file = OpenOptions::new().read(true).open(path).await?; let location = &to_object_store_path(key); - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let async_writer = self.client.put_multipart(location).await; let mut async_writer = match async_writer { Ok(writer) => writer, @@ -372,7 +403,11 @@ impl BlobStore { let mut data = Vec::new(); file.read_to_end(&mut data).await?; let result = self.client.put(location, data.into()).await; - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "PUT", + &Utc::now().date_naive().to_string(), + tenant, + ); match result { Ok(_) => { @@ -380,6 +415,7 @@ impl BlobStore { "PUT", 1, &Utc::now().date_naive().to_string(), + tenant, ); } Err(err) => { @@ -420,6 +456,7 @@ impl BlobStore { increment_object_store_calls_by_date( "PUT_MULTIPART", &Utc::now().date_naive().to_string(), + tenant, ); } @@ -440,6 +477,7 @@ impl ObjectStorage for BlobStore { async fn get_buffered_reader( &self, _path: &RelativePath, + _tenant_id: &Option, ) -> Result { Err(ObjectStorageError::UnhandledError(Box::new( std::io::Error::new( @@ -453,39 +491,51 @@ impl ObjectStorage for BlobStore { &self, key: &RelativePath, path: &Path, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { - self._upload_multipart(key, path).await + self._upload_multipart(key, path, tenant_id).await } - async fn head(&self, path: &RelativePath) -> Result { + async fn head( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let result = self.client.head(&to_object_store_path(path)).await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string(), tenant); if result.is_ok() { increment_files_scanned_in_object_store_calls_by_date( "HEAD", 1, &Utc::now().date_naive().to_string(), + tenant, ); } Ok(result?) } - async fn get_object(&self, path: &RelativePath) -> Result { - Ok(self._get_object(path).await?) + async fn get_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result { + Ok(self._get_object(path, tenant_id).await?) } async fn get_objects( &self, base_path: Option<&RelativePath>, filter_func: Box bool + Send>, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let prefix = if let Some(base_path) = base_path { to_object_store_path(base_path) } else { self.root.clone() }; - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let mut list_stream = self.client.list(Some(&prefix)); let mut res = vec![]; @@ -511,6 +561,7 @@ impl ObjectStorage for BlobStore { .get_object( RelativePath::from_path(meta.location.as_ref()) .map_err(ObjectStorageError::PathError)?, + tenant_id, ) .await?; res.push(byts); @@ -521,19 +572,21 @@ impl ObjectStorage for BlobStore { "LIST", files_scanned as u64, &Utc::now().date_naive().to_string(), + tenant, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); Ok(res) } async fn get_ingestor_meta_file_paths( &self, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let mut path_arr = vec![]; let mut files_scanned = 0; - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let mut object_stream = self.client.list(Some(&self.root)); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); while let Some(meta_result) = object_stream.next().await { let meta = match meta_result { @@ -555,6 +608,7 @@ impl ObjectStorage for BlobStore { "LIST", files_scanned as u64, &Utc::now().date_naive().to_string(), + tenant, ); Ok(path_arr) } @@ -563,68 +617,98 @@ impl ObjectStorage for BlobStore { &self, path: &RelativePath, resource: Bytes, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { - self._put_object(path, resource.into()) + self._put_object(path, resource.into(), tenant_id) .await .map_err(|err| ObjectStorageError::ConnectionError(Box::new(err)))?; Ok(()) } - async fn delete_prefix(&self, path: &RelativePath) -> Result<(), ObjectStorageError> { - self._delete_prefix(path.as_ref()).await?; + async fn delete_prefix( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + self._delete_prefix(path.as_ref(), tenant_id).await?; Ok(()) } - async fn delete_object(&self, path: &RelativePath) -> Result<(), ObjectStorageError> { + async fn delete_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let result = self.client.delete(&to_object_store_path(path)).await; - increment_object_store_calls_by_date("DELETE", &Utc::now().date_naive().to_string()); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + increment_object_store_calls_by_date( + "DELETE", + &Utc::now().date_naive().to_string(), + tenant, + ); if result.is_ok() { increment_files_scanned_in_object_store_calls_by_date( "DELETE", 1, &Utc::now().date_naive().to_string(), + tenant, ); } Ok(result?) } - async fn check(&self) -> Result<(), ObjectStorageError> { + async fn check(&self, tenant_id: &Option) -> Result<(), ObjectStorageError> { let result = self .client .head(&to_object_store_path(&parseable_json_path())) .await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string(), tenant); if result.is_ok() { increment_files_scanned_in_object_store_calls_by_date( "HEAD", 1, &Utc::now().date_naive().to_string(), + tenant, ); } Ok(result.map(|_| ())?) } - async fn delete_stream(&self, stream_name: &str) -> Result<(), ObjectStorageError> { - self._delete_prefix(stream_name).await?; + async fn delete_stream( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + self._delete_prefix(stream_name, tenant_id).await?; Ok(()) } - async fn try_delete_node_meta(&self, node_filename: String) -> Result<(), ObjectStorageError> { + async fn try_delete_node_meta( + &self, + node_filename: String, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let file = RelativePathBuf::from(&node_filename); - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let result = self.client.delete(&to_object_store_path(&file)).await; - increment_object_store_calls_by_date("DELETE", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "DELETE", + &Utc::now().date_naive().to_string(), + tenant, + ); match result { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "DELETE", 1, &Utc::now().date_naive().to_string(), + tenant, ); Ok(()) } @@ -647,8 +731,13 @@ impl ObjectStorage for BlobStore { "LIST", common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + DEFAULT_TENANT, + ); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + DEFAULT_TENANT, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); // return prefixes at the root level let dirs: HashSet<_> = common_prefixes .iter() @@ -663,7 +752,11 @@ impl ObjectStorage for BlobStore { let key = format!("{dir}/{STREAM_METADATA_FILE_NAME}"); let task = async move { let result = self.client.head(&StorePath::from(key)).await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "HEAD", + &Utc::now().date_naive().to_string(), + DEFAULT_TENANT, + ); result.map(|_| ()) }; stream_json_check.push(task); @@ -672,14 +765,19 @@ impl ObjectStorage for BlobStore { "HEAD", dirs.len() as u64, &Utc::now().date_naive().to_string(), + DEFAULT_TENANT, ); stream_json_check.try_collect::<()>().await?; Ok(dirs) } - async fn list_dates(&self, stream_name: &str) -> Result, ObjectStorageError> { - let streams = self._list_dates(stream_name).await?; + async fn list_dates( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { + let streams = self._list_dates(stream_name, tenant_id).await?; Ok(streams) } @@ -688,15 +786,18 @@ impl ObjectStorage for BlobStore { &self, stream_name: &str, date: &str, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let pre = object_store::path::Path::from(format!("{}/{}/", stream_name, date)); let resp = self.client.list_with_delimiter(Some(&pre)).await?; + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); let hours: Vec = resp .common_prefixes @@ -723,15 +824,18 @@ impl ObjectStorage for BlobStore { stream_name: &str, date: &str, hour: &str, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let pre = object_store::path::Path::from(format!("{}/{}/{}/", stream_name, date, hour)); let resp = self.client.list_with_delimiter(Some(&pre)).await?; + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); let minutes: Vec = resp .common_prefixes .iter() @@ -753,8 +857,13 @@ impl ObjectStorage for BlobStore { Ok(minutes) } - async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { - Ok(self._upload_file(key, path).await?) + async fn upload_file( + &self, + key: &str, + path: &Path, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + Ok(self._upload_file(key, path, tenant_id).await?) } fn absolute_url(&self, prefix: &RelativePath) -> object_store::path::Path { @@ -779,17 +888,21 @@ impl ObjectStorage for BlobStore { Url::parse(&url_string).unwrap() } - async fn list_dirs(&self) -> Result, ObjectStorageError> { + async fn list_dirs( + &self, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { let pre = object_store::path::Path::from("/"); - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let resp = self.client.list_with_delimiter(Some(&pre)).await; - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); let resp = match resp { Ok(resp) => { increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); resp @@ -810,16 +923,19 @@ impl ObjectStorage for BlobStore { async fn list_dirs_relative( &self, relative_path: &RelativePath, + tenant_id: &Option, ) -> Result, ObjectStorageError> { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let prefix = object_store::path::Path::from(relative_path.as_str()); let resp = self.client.list_with_delimiter(Some(&prefix)).await; - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); let resp = match resp { Ok(resp) => { increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); resp diff --git a/src/storage/field_stats.rs b/src/storage/field_stats.rs index 817a66cd3..a7d47b1c3 100644 --- a/src/storage/field_stats.rs +++ b/src/storage/field_stats.rs @@ -90,6 +90,7 @@ pub async fn calculate_field_stats( parquet_path: &Path, schema: &Schema, max_field_statistics: usize, + tenant_id: &Option, ) -> Result { //create datetime from timestamp present in parquet path let parquet_ts = extract_datetime_from_parquet_path_regex(parquet_path).map_err(|e| { @@ -99,7 +100,13 @@ pub async fn calculate_field_stats( )) })?; let field_stats = { - let ctx = SessionContext::new_with_state(QUERY_SESSION_STATE.clone()); + let mut session_state = QUERY_SESSION_STATE.clone(); + session_state + .config_mut() + .options_mut() + .catalog + .default_schema = tenant_id.as_ref().map_or("public".into(), |v| v.to_owned()); + let ctx = SessionContext::new_with_state(session_state); let table_name = Ulid::new().to_string(); ctx.register_parquet( &table_name, @@ -132,6 +139,7 @@ pub async fn calculate_field_stats( Some(&DATASET_STATS_CUSTOM_PARTITION.to_string()), vec![log_source_entry], TelemetryType::Logs, + tenant_id, ) .await?; let vec_json = apply_generic_flattening_for_partition( @@ -145,7 +153,7 @@ pub async fn calculate_field_stats( for json in vec_json { let origin_size = serde_json::to_vec(&json).unwrap().len() as u64; // string length need not be the same as byte length let schema = PARSEABLE - .get_stream(DATASET_STATS_STREAM_NAME)? + .get_stream(DATASET_STATS_STREAM_NAME, tenant_id)? .get_schema_raw(); json::Event { json, @@ -162,6 +170,7 @@ pub async fn calculate_field_stats( StreamType::Internal, &p_custom_fields, TelemetryType::Logs, + tenant_id, )? .process()?; } diff --git a/src/storage/gcs.rs b/src/storage/gcs.rs index 6e6b7090b..c77ea9693 100644 --- a/src/storage/gcs.rs +++ b/src/storage/gcs.rs @@ -32,7 +32,7 @@ use crate::{ increment_files_scanned_in_object_store_calls_by_date, increment_object_store_calls_by_date, }, - parseable::LogStream, + parseable::{DEFAULT_TENANT, LogStream}, }; use async_trait::async_trait; use bytes::Bytes; @@ -177,9 +177,14 @@ pub struct Gcs { } impl Gcs { - async fn _get_object(&self, path: &RelativePath) -> Result { + async fn _get_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result { let resp = self.client.get(&to_object_store_path(path)).await; - increment_object_store_calls_by_date("GET", &Utc::now().date_naive().to_string()); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + increment_object_store_calls_by_date("GET", &Utc::now().date_naive().to_string(), tenant); match resp { Ok(resp) => { let body: Bytes = resp.bytes().await?; @@ -187,11 +192,13 @@ impl Gcs { "GET", 1, &Utc::now().date_naive().to_string(), + tenant, ); increment_bytes_scanned_in_object_store_calls_by_date( "GET", body.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); Ok(body) } @@ -203,15 +210,18 @@ impl Gcs { &self, path: &RelativePath, resource: PutPayload, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let resp = self.client.put(&to_object_store_path(path), resource).await; - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string(), tenant); match resp { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "PUT", 1, &Utc::now().date_naive().to_string(), + tenant, ); Ok(()) } @@ -219,12 +229,17 @@ impl Gcs { } } - async fn _delete_prefix(&self, key: &str) -> Result<(), ObjectStorageError> { + async fn _delete_prefix( + &self, + key: &str, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let files_scanned = Arc::new(AtomicU64::new(0)); let files_deleted = Arc::new(AtomicU64::new(0)); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); // Track LIST operation let object_stream = self.client.list(Some(&(key.into()))); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); object_stream .for_each_concurrent(None, |x| async { files_scanned.fetch_add(1, Ordering::Relaxed); @@ -236,6 +251,7 @@ impl Gcs { increment_object_store_calls_by_date( "DELETE", &Utc::now().date_naive().to_string(), + tenant, ); if delete_resp.is_err() { error!( @@ -255,21 +271,28 @@ impl Gcs { "LIST", files_scanned.load(Ordering::Relaxed), &Utc::now().date_naive().to_string(), + tenant, ); increment_files_scanned_in_object_store_calls_by_date( "DELETE", files_deleted.load(Ordering::Relaxed), &Utc::now().date_naive().to_string(), + tenant, ); Ok(()) } - async fn _list_dates(&self, stream: &str) -> Result, ObjectStorageError> { + async fn _list_dates( + &self, + stream: &str, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { let resp: Result = self .client .list_with_delimiter(Some(&(stream.into()))) .await; - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); let resp = match resp { Ok(resp) => resp, @@ -284,6 +307,7 @@ impl Gcs { "LIST", common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); // return prefixes at the root level @@ -296,17 +320,23 @@ impl Gcs { Ok(dates) } - async fn _upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { + async fn _upload_file( + &self, + key: &str, + path: &Path, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let bytes = tokio::fs::read(path).await?; - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let result = self.client.put(&key.into(), bytes.into()).await; - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string(), tenant); match result { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "PUT", 1, &Utc::now().date_naive().to_string(), + tenant, ); Ok(()) } @@ -318,10 +348,11 @@ impl Gcs { &self, key: &RelativePath, path: &Path, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let mut file = OpenOptions::new().read(true).open(path).await?; let location = &to_object_store_path(key); - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let async_writer = self.client.put_multipart(location).await; let mut async_writer = match async_writer { Ok(writer) => writer, @@ -338,13 +369,18 @@ impl Gcs { // Track single PUT operation for small files let result = self.client.put(location, data.into()).await; - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "PUT", + &Utc::now().date_naive().to_string(), + tenant, + ); match result { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "PUT", 1, &Utc::now().date_naive().to_string(), + tenant, ); } Err(err) => { @@ -382,6 +418,7 @@ impl Gcs { increment_object_store_calls_by_date( "PUT_MULTIPART", &Utc::now().date_naive().to_string(), + tenant, ); } @@ -406,17 +443,19 @@ impl ObjectStorage for Gcs { async fn get_buffered_reader( &self, path: &RelativePath, + tenant_id: &Option, ) -> Result { let path = &to_object_store_path(path); - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let meta = self.client.head(path).await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string(), tenant); let meta = match meta { Ok(meta) => { increment_files_scanned_in_object_store_calls_by_date( "HEAD", 1, &Utc::now().date_naive().to_string(), + tenant, ); meta } @@ -434,39 +473,51 @@ impl ObjectStorage for Gcs { &self, key: &RelativePath, path: &Path, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { - self._upload_multipart(key, path).await + self._upload_multipart(key, path, tenant_id).await } - async fn head(&self, path: &RelativePath) -> Result { + async fn head( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result { let result = self.client.head(&to_object_store_path(path)).await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string(), tenant); if result.is_ok() { increment_files_scanned_in_object_store_calls_by_date( "HEAD", 1, &Utc::now().date_naive().to_string(), + tenant, ); } Ok(result?) } - async fn get_object(&self, path: &RelativePath) -> Result { - Ok(self._get_object(path).await?) + async fn get_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result { + Ok(self._get_object(path, tenant_id).await?) } async fn get_objects( &self, base_path: Option<&RelativePath>, filter_func: Box bool + Send>, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let prefix = if let Some(base_path) = base_path { to_object_store_path(base_path) } else { self.root.clone() }; - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let mut list_stream = self.client.list(Some(&prefix)); let mut res = vec![]; @@ -492,6 +543,7 @@ impl ObjectStorage for Gcs { .get_object( RelativePath::from_path(meta.location.as_ref()) .map_err(ObjectStorageError::PathError)?, + tenant_id, ) .await?; res.push(byts); @@ -502,19 +554,21 @@ impl ObjectStorage for Gcs { "LIST", files_scanned as u64, &Utc::now().date_naive().to_string(), + tenant, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); Ok(res) } async fn get_ingestor_meta_file_paths( &self, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let mut path_arr = vec![]; let mut files_scanned = 0; - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let mut object_stream = self.client.list(Some(&self.root)); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); while let Some(meta_result) = object_stream.next().await { let meta = match meta_result { @@ -536,6 +590,7 @@ impl ObjectStorage for Gcs { "LIST", files_scanned as u64, &Utc::now().date_naive().to_string(), + tenant, ); Ok(path_arr) } @@ -544,69 +599,99 @@ impl ObjectStorage for Gcs { &self, path: &RelativePath, resource: Bytes, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { - self._put_object(path, resource.into()) + self._put_object(path, resource.into(), tenant_id) .await .map_err(|err| ObjectStorageError::ConnectionError(Box::new(err)))?; Ok(()) } - async fn delete_prefix(&self, path: &RelativePath) -> Result<(), ObjectStorageError> { - self._delete_prefix(path.as_ref()).await?; + async fn delete_prefix( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + self._delete_prefix(path.as_ref(), tenant_id).await?; Ok(()) } - async fn delete_object(&self, path: &RelativePath) -> Result<(), ObjectStorageError> { + async fn delete_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let result = self.client.delete(&to_object_store_path(path)).await; - increment_object_store_calls_by_date("DELETE", &Utc::now().date_naive().to_string()); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + increment_object_store_calls_by_date( + "DELETE", + &Utc::now().date_naive().to_string(), + tenant, + ); if result.is_ok() { increment_files_scanned_in_object_store_calls_by_date( "DELETE", 1, &Utc::now().date_naive().to_string(), + tenant, ); } Ok(result?) } - async fn check(&self) -> Result<(), ObjectStorageError> { + async fn check(&self, tenant_id: &Option) -> Result<(), ObjectStorageError> { let result = self .client .head(&to_object_store_path(&parseable_json_path())) .await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string(), tenant); if result.is_ok() { increment_files_scanned_in_object_store_calls_by_date( "HEAD", 1, &Utc::now().date_naive().to_string(), + tenant, ); } Ok(result.map(|_| ())?) } - async fn delete_stream(&self, stream_name: &str) -> Result<(), ObjectStorageError> { - self._delete_prefix(stream_name).await?; + async fn delete_stream( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + self._delete_prefix(stream_name, tenant_id).await?; Ok(()) } - async fn try_delete_node_meta(&self, node_filename: String) -> Result<(), ObjectStorageError> { + async fn try_delete_node_meta( + &self, + node_filename: String, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let file = RelativePathBuf::from(&node_filename); - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let result = self.client.delete(&to_object_store_path(&file)).await; - increment_object_store_calls_by_date("DELETE", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "DELETE", + &Utc::now().date_naive().to_string(), + tenant, + ); match result { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "DELETE", 1, &Utc::now().date_naive().to_string(), + tenant, ); Ok(()) } @@ -628,8 +713,13 @@ impl ObjectStorage for Gcs { "LIST", common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + DEFAULT_TENANT, + ); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + DEFAULT_TENANT, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); // return prefixes at the root level let dirs: HashSet<_> = common_prefixes .iter() @@ -644,7 +734,11 @@ impl ObjectStorage for Gcs { let key = format!("{dir}/{STREAM_METADATA_FILE_NAME}"); let task = async move { let result = self.client.head(&StorePath::from(key)).await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "HEAD", + &Utc::now().date_naive().to_string(), + DEFAULT_TENANT, + ); result.map(|_| ()) }; stream_json_check.push(task); @@ -653,14 +747,19 @@ impl ObjectStorage for Gcs { "HEAD", dirs.len() as u64, &Utc::now().date_naive().to_string(), + DEFAULT_TENANT, ); stream_json_check.try_collect::<()>().await?; Ok(dirs) } - async fn list_dates(&self, stream_name: &str) -> Result, ObjectStorageError> { - let streams = self._list_dates(stream_name).await?; + async fn list_dates( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { + let streams = self._list_dates(stream_name, tenant_id).await?; Ok(streams) } @@ -669,15 +768,18 @@ impl ObjectStorage for Gcs { &self, stream_name: &str, date: &str, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let pre = object_store::path::Path::from(format!("{}/{}/", stream_name, date)); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let resp = self.client.list_with_delimiter(Some(&pre)).await?; increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); let hours: Vec = resp .common_prefixes @@ -704,15 +806,18 @@ impl ObjectStorage for Gcs { stream_name: &str, date: &str, hour: &str, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let pre = object_store::path::Path::from(format!("{}/{}/{}/", stream_name, date, hour)); let resp = self.client.list_with_delimiter(Some(&pre)).await?; + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); let minutes: Vec = resp .common_prefixes .iter() @@ -734,8 +839,13 @@ impl ObjectStorage for Gcs { Ok(minutes) } - async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { - Ok(self._upload_file(key, path).await?) + async fn upload_file( + &self, + key: &str, + path: &Path, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + Ok(self._upload_file(key, path, tenant_id).await?) } fn absolute_url(&self, prefix: &RelativePath) -> object_store::path::Path { @@ -756,17 +866,22 @@ impl ObjectStorage for Gcs { url::Url::parse(&format!("gs://{}", self.bucket)).unwrap() } - async fn list_dirs(&self) -> Result, ObjectStorageError> { + async fn list_dirs( + &self, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { let pre = object_store::path::Path::from("/"); let resp = self.client.list_with_delimiter(Some(&pre)).await; - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); let resp = match resp { Ok(resp) => { increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); resp @@ -787,17 +902,19 @@ impl ObjectStorage for Gcs { async fn list_dirs_relative( &self, relative_path: &RelativePath, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let prefix = object_store::path::Path::from(relative_path.as_str()); - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let resp = self.client.list_with_delimiter(Some(&prefix)).await; - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string(), tenant); let resp = match resp { Ok(resp) => { increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant, ); resp diff --git a/src/storage/localfs.rs b/src/storage/localfs.rs index 25f2b2c07..d8cdd6892 100644 --- a/src/storage/localfs.rs +++ b/src/storage/localfs.rs @@ -42,7 +42,7 @@ use crate::{ increment_files_scanned_in_object_store_calls_by_date, increment_object_store_calls_by_date, }, option::validation, - parseable::LogStream, + parseable::{DEFAULT_TENANT, LogStream}, storage::SETTINGS_ROOT_DIRECTORY, }; @@ -110,15 +110,17 @@ impl ObjectStorage for LocalFS { &self, key: &RelativePath, path: &Path, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let mut file = OpenOptions::new().read(true).open(path).await?; let mut data = Vec::new(); file.read_to_end(&mut data).await?; - self.put_object(key, data.into()).await + self.put_object(key, data.into(), tenant_id).await } async fn get_buffered_reader( &self, _path: &RelativePath, + _tenant_id: &Option, ) -> Result { Err(ObjectStorageError::UnhandledError(Box::new( std::io::Error::new( @@ -127,7 +129,11 @@ impl ObjectStorage for LocalFS { ), ))) } - async fn head(&self, path: &RelativePath) -> Result { + async fn head( + &self, + path: &RelativePath, + _tenant_id: &Option, + ) -> Result { let file_path = self.path_in_root(path); // Check if file exists and get metadata @@ -153,8 +159,13 @@ impl ObjectStorage for LocalFS { Err(e) => Err(ObjectStorageError::IoError(e)), } } - async fn get_object(&self, path: &RelativePath) -> Result { + async fn get_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result { let file_path; + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); // this is for the `get_manifest()` function because inside a snapshot, we store the absolute path (without `/`) on linux based OS // `home/user/.../manifest.json` @@ -185,8 +196,13 @@ impl ObjectStorage for LocalFS { "GET", 1, &Utc::now().date_naive().to_string(), + tenant_str, + ); + increment_object_store_calls_by_date( + "GET", + &Utc::now().date_naive().to_string(), + tenant_str, ); - increment_object_store_calls_by_date("GET", &Utc::now().date_naive().to_string()); Ok(x.into()) } Err(e) => { @@ -203,9 +219,11 @@ impl ObjectStorage for LocalFS { async fn get_ingestor_meta_file_paths( &self, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let mut path_arr = vec![]; let mut files_scanned = 0u64; + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let entries_result = fs::read_dir(&self.root).await; let mut entries = match entries_result { @@ -238,8 +256,13 @@ impl ObjectStorage for LocalFS { "LIST", files_scanned, &Utc::now().date_naive().to_string(), + tenant_str, + ); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); Ok(path_arr) } @@ -248,12 +271,14 @@ impl ObjectStorage for LocalFS { &self, base_path: Option<&RelativePath>, filter_func: Box bool + std::marker::Send + 'static>, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let prefix = if let Some(path) = base_path { path.to_path(&self.root) } else { self.root.clone() }; + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let entries_result = fs::read_dir(&prefix).await; let mut entries = match entries_result { @@ -291,10 +316,12 @@ impl ObjectStorage for LocalFS { "GET", 1, &Utc::now().date_naive().to_string(), + tenant_str, ); increment_object_store_calls_by_date( "GET", &Utc::now().date_naive().to_string(), + tenant_str, ); res.push(file.into()); } @@ -308,8 +335,13 @@ impl ObjectStorage for LocalFS { "LIST", files_scanned as u64, &Utc::now().date_naive().to_string(), + tenant_str, + ); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); Ok(res) } @@ -318,8 +350,10 @@ impl ObjectStorage for LocalFS { &self, path: &RelativePath, resource: Bytes, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let path = self.path_in_root(path); + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); if let Some(parent) = path.parent() { fs::create_dir_all(parent).await?; } @@ -331,26 +365,45 @@ impl ObjectStorage for LocalFS { "PUT", 1, &Utc::now().date_naive().to_string(), + tenant_str, + ); + increment_object_store_calls_by_date( + "PUT", + &Utc::now().date_naive().to_string(), + tenant_str, ); - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); } res.map_err(Into::into) } - async fn delete_prefix(&self, path: &RelativePath) -> Result<(), ObjectStorageError> { + async fn delete_prefix( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let path = self.path_in_root(path); + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let result = tokio::fs::remove_dir_all(path).await; if result.is_ok() { - increment_object_store_calls_by_date("DELETE", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "DELETE", + &Utc::now().date_naive().to_string(), + tenant_str, + ); } result?; Ok(()) } - async fn delete_object(&self, path: &RelativePath) -> Result<(), ObjectStorageError> { + async fn delete_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let path = self.path_in_root(path); + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let result = tokio::fs::remove_file(path).await; if result.is_ok() { @@ -359,40 +412,68 @@ impl ObjectStorage for LocalFS { "DELETE", 1, &Utc::now().date_naive().to_string(), + tenant_str, + ); + increment_object_store_calls_by_date( + "DELETE", + &Utc::now().date_naive().to_string(), + tenant_str, ); - increment_object_store_calls_by_date("DELETE", &Utc::now().date_naive().to_string()); } result?; Ok(()) } - async fn check(&self) -> Result<(), ObjectStorageError> { + async fn check(&self, tenant_id: &Option) -> Result<(), ObjectStorageError> { let result = fs::create_dir_all(&self.root).await; + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); if result.is_ok() { - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "HEAD", + &Utc::now().date_naive().to_string(), + tenant_str, + ); } result.map_err(|e| ObjectStorageError::UnhandledError(e.into())) } - async fn delete_stream(&self, stream_name: &str) -> Result<(), ObjectStorageError> { + async fn delete_stream( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let path = self.root.join(stream_name); + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let result = fs::remove_dir_all(path).await; if result.is_ok() { - increment_object_store_calls_by_date("DELETE", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "DELETE", + &Utc::now().date_naive().to_string(), + tenant_str, + ); } Ok(result?) } - async fn try_delete_node_meta(&self, node_filename: String) -> Result<(), ObjectStorageError> { + async fn try_delete_node_meta( + &self, + node_filename: String, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let path = self.root.join(node_filename); + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let result = fs::remove_file(path).await; if result.is_ok() { - increment_object_store_calls_by_date("DELETE", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "DELETE", + &Utc::now().date_naive().to_string(), + tenant_str, + ); } Ok(result?) @@ -410,7 +491,11 @@ impl ObjectStorage for LocalFS { let result = fs::read_dir(&self.root).await; let directories = match result { Ok(read_dir) => { - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + "default", + ); ReadDirStream::new(read_dir) } Err(err) => { @@ -442,7 +527,11 @@ impl ObjectStorage for LocalFS { let result = fs::read_dir(&self.root).await; let directories = match result { Ok(read_dir) => { - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + "default", + ); ReadDirStream::new(read_dir) } Err(err) => { @@ -463,11 +552,19 @@ impl ObjectStorage for LocalFS { Ok(logstreams) } - async fn list_dirs(&self) -> Result, ObjectStorageError> { + async fn list_dirs( + &self, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let result = fs::read_dir(&self.root).await; let read_dir = match result { Ok(read_dir) => { - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, + ); read_dir } Err(err) => { @@ -498,6 +595,7 @@ impl ObjectStorage for LocalFS { async fn list_dirs_relative( &self, relative_path: &RelativePath, + _tenant_id: &Option, ) -> Result, ObjectStorageError> { let root = self.root.join(relative_path.as_str()); @@ -529,13 +627,22 @@ impl ObjectStorage for LocalFS { Ok(dirs) } - async fn list_dates(&self, stream_name: &str) -> Result, ObjectStorageError> { + async fn list_dates( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { let path = self.root.join(stream_name); + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let result = fs::read_dir(&path).await; let read_dir = match result { Ok(read_dir) => { - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, + ); read_dir } Err(err) => { @@ -559,6 +666,7 @@ impl ObjectStorage for LocalFS { &self, stream_name: &str, date: &str, + _tenant_id: &Option, ) -> Result, ObjectStorageError> { let path = self.root.join(stream_name).join(date); let directories = ReadDirStream::new(fs::read_dir(&path).await?); @@ -577,6 +685,7 @@ impl ObjectStorage for LocalFS { stream_name: &str, date: &str, hour: &str, + _tenant_id: &Option, ) -> Result, ObjectStorageError> { let path = self.root.join(stream_name).join(date).join(hour); // Propagate any read_dir errors instead of swallowing them @@ -592,13 +701,19 @@ impl ObjectStorage for LocalFS { .collect()) } - async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { + async fn upload_file( + &self, + key: &str, + path: &Path, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { let op = CopyOptions { overwrite: true, skip_exist: true, ..CopyOptions::default() }; let to_path = self.root.join(key); + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); if let Some(path) = to_path.parent() { fs::create_dir_all(path).await?; } @@ -606,7 +721,11 @@ impl ObjectStorage for LocalFS { let result = fs_extra::file::copy(path, to_path, &op); match result { Ok(_) => { - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "PUT", + &Utc::now().date_naive().to_string(), + tenant_str, + ); Ok(()) } Err(err) => Err(err.into()), diff --git a/src/storage/object_storage.rs b/src/storage/object_storage.rs index 3983beb6e..e70895e42 100644 --- a/src/storage/object_storage.rs +++ b/src/storage/object_storage.rs @@ -53,6 +53,7 @@ use crate::metrics::increment_parquets_stored_by_date; use crate::metrics::increment_parquets_stored_size_by_date; use crate::metrics::{EVENTS_STORAGE_SIZE_DATE, LIFETIME_EVENTS_STORAGE_SIZE, STORAGE_SIZE}; use crate::option::Mode; +use crate::parseable::DEFAULT_TENANT; use crate::parseable::{LogStream, PARSEABLE, Stream}; use crate::stats::FullStats; use crate::storage::SETTINGS_ROOT_DIRECTORY; @@ -99,6 +100,7 @@ async fn upload_single_parquet_file( stream_relative_path: String, stream_name: String, schema: Arc, + tenant_id: Option, ) -> Result { let filename = path .file_name() @@ -111,10 +113,16 @@ async fn upload_single_parquet_file( .metadata() .map_err(|e| ObjectStorageError::Custom(format!("Failed to get local file metadata: {e}")))? .len(); + // tracing::warn!("upload single stream_relative_path- {stream_relative_path:?}"); + // tracing::warn!("upload single path- {path:?}"); // Upload the file store - .upload_multipart(&RelativePathBuf::from(&stream_relative_path), &path) + .upload_multipart( + &RelativePathBuf::from(&stream_relative_path), + &path, + &tenant_id, + ) .await .map_err(|e| { error!("Failed to upload file {filename:?} to {stream_relative_path}: {e}"); @@ -127,13 +135,14 @@ async fn upload_single_parquet_file( &stream_relative_path, local_file_size, &stream_name, + &tenant_id, ) .await?; if !upload_is_valid { // Upload validation failed, clean up the uploaded file and return error let _ = store - .delete_object(&RelativePathBuf::from(&stream_relative_path)) + .delete_object(&RelativePathBuf::from(&stream_relative_path), &tenant_id) .await; error!("Upload size validation failed for file {filename:?}, deleted from object storage"); return Ok(UploadResult { @@ -143,7 +152,12 @@ async fn upload_single_parquet_file( } // Update storage metrics - update_storage_metrics(&path, &stream_name, filename)?; + update_storage_metrics( + &path, + &stream_name, + filename, + tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v), + )?; // Create manifest entry let absolute_path = store @@ -153,7 +167,7 @@ async fn upload_single_parquet_file( let manifest = catalog::create_from_parquet_file(absolute_path, &path)?; // Calculate field stats if enabled - calculate_stats_if_enabled(&stream_name, &path, &schema).await; + calculate_stats_if_enabled(&stream_name, &path, &schema, tenant_id).await; Ok(UploadResult { file_path: path, @@ -166,6 +180,7 @@ fn update_storage_metrics( path: &std::path::Path, stream_name: &str, filename: &str, + tenant_id: &str, ) -> Result<(), ObjectStorageError> { let mut file_date_part = filename.split('.').collect::>()[0]; file_date_part = file_date_part.split('=').collect::>()[1]; @@ -174,18 +189,18 @@ fn update_storage_metrics( .map(|m| m.len()) .map_err(|e| ObjectStorageError::Custom(format!("metadata failed for {filename}: {e}")))?; STORAGE_SIZE - .with_label_values(&["data", stream_name, "parquet"]) + .with_label_values(&["data", stream_name, "parquet", tenant_id]) .add(compressed_size as i64); EVENTS_STORAGE_SIZE_DATE - .with_label_values(&["data", stream_name, "parquet", file_date_part]) + .with_label_values(&["data", stream_name, "parquet", file_date_part, tenant_id]) .inc_by(compressed_size); LIFETIME_EVENTS_STORAGE_SIZE - .with_label_values(&["data", stream_name, "parquet"]) + .with_label_values(&["data", stream_name, "parquet", tenant_id]) .add(compressed_size as i64); // billing metrics for parquet storage - increment_parquets_stored_by_date(file_date_part); - increment_parquets_stored_size_by_date(compressed_size, file_date_part); + increment_parquets_stored_by_date(file_date_part, tenant_id); + increment_parquets_stored_size_by_date(compressed_size, file_date_part, tenant_id); Ok(()) } @@ -195,11 +210,12 @@ async fn calculate_stats_if_enabled( stream_name: &str, path: &std::path::Path, schema: &Arc, + tenant_id: Option, ) { if stream_name != DATASET_STATS_STREAM_NAME && PARSEABLE.options.collect_dataset_stats { let max_field_statistics = PARSEABLE.options.max_field_statistics; if let Err(err) = - calculate_field_stats(stream_name, path, schema, max_field_statistics).await + calculate_field_stats(stream_name, path, schema, max_field_statistics, &tenant_id).await { tracing::trace!( "Error calculating field stats for stream {}: {}", @@ -216,10 +232,11 @@ async fn validate_uploaded_parquet_file( stream_relative_path: &str, expected_size: u64, stream_name: &str, + tenant_id: &Option, ) -> Result { // Verify the file exists and has the expected size match store - .head(&RelativePathBuf::from(stream_relative_path)) + .head(&RelativePathBuf::from(stream_relative_path), tenant_id) .await { Ok(metadata) => { @@ -265,37 +282,65 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { async fn get_buffered_reader( &self, path: &RelativePath, + tenant_id: &Option, ) -> Result; - async fn head(&self, path: &RelativePath) -> Result; - async fn get_object(&self, path: &RelativePath) -> Result; + async fn head( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result; + async fn get_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result; // TODO: make the filter function optional as we may want to get all objects async fn get_objects( &self, base_path: Option<&RelativePath>, filter_fun: Box bool + Send>, + tenant_id: &Option, ) -> Result, ObjectStorageError>; async fn upload_multipart( &self, key: &RelativePath, path: &Path, + tenant_id: &Option, ) -> Result<(), ObjectStorageError>; async fn put_object( &self, path: &RelativePath, resource: Bytes, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError>; + async fn delete_prefix( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError>; + async fn check(&self, tenant_id: &Option) -> Result<(), ObjectStorageError>; + async fn delete_stream( + &self, + stream_name: &str, + tenant_id: &Option, ) -> Result<(), ObjectStorageError>; - async fn delete_prefix(&self, path: &RelativePath) -> Result<(), ObjectStorageError>; - async fn check(&self) -> Result<(), ObjectStorageError>; - async fn delete_stream(&self, stream_name: &str) -> Result<(), ObjectStorageError>; async fn list_streams(&self) -> Result, ObjectStorageError>; async fn list_old_streams(&self) -> Result, ObjectStorageError>; - async fn list_dirs(&self) -> Result, ObjectStorageError>; + async fn list_dirs( + &self, + tenant_id: &Option, + ) -> Result, ObjectStorageError>; async fn list_dirs_relative( &self, relative_path: &RelativePath, + tenant_id: &Option, ) -> Result, ObjectStorageError>; - async fn list_dates(&self, stream_name: &str) -> Result, ObjectStorageError>; + async fn list_dates( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result, ObjectStorageError>; /// Lists the immediate “hour=” partition directories under the given date. /// Only immediate child entries named `hour=HH` should be returned (no trailing slash). /// `HH` must be zero-padded two-digit numerals (`"hour=00"` through `"hour=23"`). @@ -303,6 +348,7 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, date: &str, + tenant_id: &Option, ) -> Result, ObjectStorageError>; /// Lists the immediate “minute=” partition directories under the given date/hour. @@ -313,25 +359,40 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, date: &str, hour: &str, + tenant_id: &Option, ) -> Result, ObjectStorageError>; // async fn list_manifest_files( // &self, // stream_name: &str, // ) -> Result>, ObjectStorageError>; - async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError>; - async fn delete_object(&self, path: &RelativePath) -> Result<(), ObjectStorageError>; + async fn upload_file( + &self, + key: &str, + path: &Path, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError>; + async fn delete_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError>; async fn get_ingestor_meta_file_paths( &self, + tenant_id: &Option, ) -> Result, ObjectStorageError>; - async fn try_delete_node_meta(&self, node_filename: String) -> Result<(), ObjectStorageError>; + async fn try_delete_node_meta( + &self, + node_filename: String, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError>; /// Returns the amount of time taken by the `ObjectStore` to perform a get /// call. - async fn get_latency(&self) -> Duration { + async fn get_latency(&self, tenant_id: &Option) -> Duration { // It's Ok to `unwrap` here. The hardcoded value will always Result in // an `Ok`. let path = parseable_json_path(); let start = Instant::now(); - let _ = self.get_object(&path).await; + let _ = self.get_object(&path, tenant_id).await; start.elapsed() } @@ -349,17 +410,18 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, meta: ObjectStoreFormat, schema: Arc, + tenant_id: &Option, ) -> Result { let s: Schema = schema.as_ref().clone(); PARSEABLE .metastore - .put_schema(s.clone(), stream_name) + .put_schema(s.clone(), stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; PARSEABLE .metastore - .put_stream_json(&meta, stream_name) + .put_stream_json(&meta, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -370,18 +432,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, time_partition_limit: NonZeroU32, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let mut format: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, )?; format.time_partition_limit = Some(time_partition_limit.to_string()); PARSEABLE .metastore - .put_stream_json(&format, stream_name) + .put_stream_json(&format, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -392,18 +455,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, custom_partition: Option<&String>, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let mut format: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, )?; format.custom_partition = custom_partition.cloned(); PARSEABLE .metastore - .put_stream_json(&format, stream_name) + .put_stream_json(&format, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -414,18 +478,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, log_source: &[LogSourceEntry], + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let mut format: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, )?; format.log_source = log_source.to_owned(); PARSEABLE .metastore - .put_stream_json(&format, stream_name) + .put_stream_json(&format, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -458,18 +523,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, first_event: &str, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let mut format: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, )?; format.first_event_at = Some(first_event.to_string()); PARSEABLE .metastore - .put_stream_json(&format, stream_name) + .put_stream_json(&format, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -480,11 +546,12 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, stats: &FullStats, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, )?; @@ -493,7 +560,7 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { Ok(PARSEABLE .metastore - .put_stream_json(&stream_metadata, stream_name) + .put_stream_json(&stream_metadata, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } @@ -502,11 +569,12 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, retention: &Retention, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, )?; @@ -514,7 +582,7 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { Ok(PARSEABLE .metastore - .put_stream_json(&stream_metadata, stream_name) + .put_stream_json(&stream_metadata, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } @@ -522,10 +590,11 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { async fn upsert_stream_metadata( &self, stream_name: &str, + tenant_id: &Option, ) -> Result { let stream_metadata = match PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await { Ok(data) => data, @@ -533,7 +602,7 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { // get the base stream metadata let bytes = PARSEABLE .metastore - .get_stream_json(stream_name, true) + .get_stream_json(stream_name, true, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -547,7 +616,7 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { PARSEABLE .metastore - .put_stream_json(&config, stream_name) + .put_stream_json(&config, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -562,12 +631,13 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream: &str, snapshot: Snapshot, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { - let mut stream_meta = self.upsert_stream_metadata(stream).await?; + let mut stream_meta = self.upsert_stream_metadata(stream, tenant_id).await?; stream_meta.snapshot = snapshot; Ok(PARSEABLE .metastore - .put_stream_json(&stream_meta, stream) + .put_stream_json(&stream_meta, stream, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } @@ -576,9 +646,12 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { async fn create_stream_from_querier( &self, stream_name: &str, + tenant_id: &Option, ) -> Result { - if let Ok(querier_stream_json_bytes) = - PARSEABLE.metastore.get_stream_json(stream_name, true).await + if let Ok(querier_stream_json_bytes) = PARSEABLE + .metastore + .get_stream_json(stream_name, true, tenant_id) + .await { let querier_stream_metadata = serde_json::from_slice::(&querier_stream_json_bytes)?; @@ -590,7 +663,7 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { let stream_metadata_bytes: Bytes = serde_json::to_vec(&stream_metadata)?.into(); PARSEABLE .metastore - .put_stream_json(&stream_metadata, stream_name) + .put_stream_json(&stream_metadata, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; return Ok(stream_metadata_bytes); @@ -603,21 +676,25 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { async fn create_stream_from_ingestor( &self, stream_name: &str, + tenant_id: &Option, ) -> Result { // create only when stream name not found in memory - if PARSEABLE.get_stream(stream_name).is_ok() { + if PARSEABLE.get_stream(stream_name, tenant_id).is_ok() { let stream_metadata_bytes = PARSEABLE .metastore - .get_stream_json(stream_name, false) + .get_stream_json(stream_name, false, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; return Ok(stream_metadata_bytes); } + tracing::warn!( + "unable to find stream- {stream_name} with tenant- {tenant_id:?} in PARSEABLE.get_stream" + ); let mut all_log_sources: Vec = Vec::new(); if let Some(stream_metadata_obs) = PARSEABLE .metastore - .get_all_stream_jsons(stream_name, Some(Mode::Ingest)) + .get_all_stream_jsons(stream_name, Some(Mode::Ingest), tenant_id) .await .into_iter() .next() @@ -628,6 +705,7 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { serde_json::from_slice::(stream_metadata_bytes)?; all_log_sources.extend(stream_ob_metadata.log_source.clone()); } + tracing::warn!("inserted {} stream metadata", all_log_sources.len()); // Merge log sources let mut merged_log_sources: Vec = Vec::new(); @@ -662,12 +740,13 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { let stream_metadata_bytes: Bytes = serde_json::to_vec(&stream_metadata)?.into(); PARSEABLE .metastore - .put_stream_json(&stream_metadata, stream_name) + .put_stream_json(&stream_metadata, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; return Ok(stream_metadata_bytes); } + tracing::warn!("returning empty bytes"); Ok(Bytes::new()) } @@ -675,13 +754,15 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { async fn create_schema_from_metastore( &self, stream_name: &str, + tenant_id: &Option, ) -> Result { - let schema = fetch_schema(stream_name).await?; + let schema = fetch_schema(stream_name, tenant_id).await?; + tracing::warn!("fetched schema- {schema:?}"); let schema_bytes = Bytes::from(serde_json::to_vec(&schema)?); // convert to bytes PARSEABLE .metastore - .put_schema(schema, stream_name) + .put_schema(schema, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(schema_bytes) @@ -690,11 +771,12 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { async fn get_log_source_from_storage( &self, stream_name: &str, + tenant_id: &Option, ) -> Result, ObjectStorageError> { let mut all_log_sources: Vec = Vec::new(); let stream_metas = PARSEABLE .metastore - .get_all_stream_jsons(stream_name, None) + .get_all_stream_jsons(stream_name, None, tenant_id) .await; if let Ok(stream_metas) = stream_metas { for stream_meta in stream_metas.iter() { @@ -746,9 +828,10 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { async fn get_first_and_latest_event_from_storage( &self, stream_name: &str, + tenant_id: &Option, ) -> Result<(Option, Option), ObjectStorageError> { // Get all available dates for the stream - let dates = self.list_dates(stream_name).await?; + let dates = self.list_dates(stream_name, tenant_id).await?; if dates.is_empty() { return Ok((None, None)); } @@ -778,10 +861,10 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { // Extract timestamps for min and max dates let first_timestamp = self - .extract_timestamp_for_date(stream_name, min_date, true) + .extract_timestamp_for_date(stream_name, min_date, true, tenant_id) .await?; let latest_timestamp = self - .extract_timestamp_for_date(stream_name, max_date, false) + .extract_timestamp_for_date(stream_name, max_date, false, tenant_id) .await?; let first_event_at = first_timestamp.map(|ts| ts.to_rfc3339()); @@ -796,9 +879,10 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, date: &str, find_min: bool, + tenant_id: &Option, ) -> Result>, ObjectStorageError> { // Get all hours for this date - let hours = self.list_hours(stream_name, date).await?; + let hours = self.list_hours(stream_name, date, tenant_id).await?; if hours.is_empty() { return Ok(None); } @@ -824,7 +908,7 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { // Get all minutes for the target hour let minutes = self - .list_minutes(stream_name, date, target_hour_str) + .list_minutes(stream_name, date, target_hour_str, tenant_id) .await?; if minutes.is_empty() { return Ok(None); @@ -870,24 +954,29 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { // pick a better name fn get_bucket_name(&self) -> String; - async fn upload_files_from_staging(&self, stream_name: &str) -> Result<(), ObjectStorageError> { + async fn upload_files_from_staging( + &self, + stream_name: &str, + tenant_id: Option, + ) -> Result<(), ObjectStorageError> { if !PARSEABLE.options.staging_dir().exists() { return Ok(()); } info!("Starting object_store_sync for stream- {stream_name}"); - let stream = PARSEABLE.get_or_create_stream(stream_name); + let stream = PARSEABLE.get_or_create_stream(stream_name, &tenant_id); let upload_context = UploadContext::new(stream); // Process parquet files concurrently and collect results - let manifest_files = process_parquet_files(&upload_context, stream_name).await?; + let manifest_files = + process_parquet_files(&upload_context, stream_name, tenant_id.clone()).await?; // Update snapshot with collected manifest files - update_snapshot_with_manifests(stream_name, manifest_files).await?; + update_snapshot_with_manifests(stream_name, manifest_files, &tenant_id).await?; // Process schema files - process_schema_files(&upload_context, stream_name).await?; + process_schema_files(&upload_context, stream_name, &tenant_id).await?; Ok(()) } @@ -897,6 +986,7 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { async fn process_parquet_files( upload_context: &UploadContext, stream_name: &str, + tenant_id: Option, ) -> Result, ObjectStorageError> { let semaphore = Arc::new(tokio::sync::Semaphore::new(100)); let mut join_set = JoinSet::new(); @@ -904,6 +994,7 @@ async fn process_parquet_files( // Spawn upload tasks for each parquet file for path in upload_context.stream.parquet_files() { + tracing::warn!(process_parquet_files_path=?path); spawn_parquet_upload_task( &mut join_set, semaphore.clone(), @@ -911,6 +1002,7 @@ async fn process_parquet_files( upload_context, stream_name, path, + tenant_id.clone(), ) .await; } @@ -927,15 +1019,22 @@ async fn spawn_parquet_upload_task( upload_context: &UploadContext, stream_name: &str, path: std::path::PathBuf, + tenant_id: Option, ) { let filename = path .file_name() .expect("only parquet files are returned by iterator") .to_str() .expect("filename is valid string"); + // tracing::warn!("spawn parquet file name- {filename}"); - let stream_relative_path = - stream_relative_path(stream_name, filename, &upload_context.custom_partition); + let stream_relative_path = stream_relative_path( + stream_name, + filename, + &upload_context.custom_partition, + &tenant_id, + ); + // tracing::warn!("spawn parquet stream_relative_path- {stream_relative_path}"); let stream_name = stream_name.to_string(); let schema = upload_context.schema.clone(); @@ -943,7 +1042,15 @@ async fn spawn_parquet_upload_task( join_set.spawn(async move { let _permit = semaphore.acquire().await.expect("semaphore is not closed"); - upload_single_parquet_file(store, path, stream_relative_path, stream_name, schema).await + upload_single_parquet_file( + store, + path, + stream_relative_path, + stream_name, + schema, + tenant_id, + ) + .await }); } @@ -994,9 +1101,10 @@ async fn collect_upload_results( async fn update_snapshot_with_manifests( stream_name: &str, manifest_files: Vec, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { if !manifest_files.is_empty() { - catalog::update_snapshot(stream_name, manifest_files).await?; + catalog::update_snapshot(stream_name, manifest_files, tenant_id).await?; } Ok(()) } @@ -1005,11 +1113,13 @@ async fn update_snapshot_with_manifests( async fn process_schema_files( upload_context: &UploadContext, stream_name: &str, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { for path in upload_context.stream.schema_files() { + tracing::warn!(upload_context_schema_files=?path); let file = File::open(&path)?; let schema: Schema = serde_json::from_reader(file)?; - commit_schema_to_storage(stream_name, schema).await?; + commit_schema_to_storage(stream_name, schema, tenant_id).await?; if let Err(e) = remove_file(path) { warn!("Failed to remove staged file: {e}"); @@ -1023,6 +1133,7 @@ fn stream_relative_path( stream_name: &str, filename: &str, custom_partition: &Option, + tenant_id: &Option, ) -> String { let mut file_suffix = str::replacen(filename, ".", "/", 3); @@ -1030,38 +1141,53 @@ fn stream_relative_path( let custom_partition_list = custom_partition_fields.split(',').collect::>(); file_suffix = str::replacen(filename, ".", "/", 3 + custom_partition_list.len()); } - - format!("{stream_name}/{file_suffix}") + if let Some(tenant) = tenant_id + && !tenant.eq(DEFAULT_TENANT) + { + format!("{tenant}/{stream_name}/{file_suffix}") + } else { + format!("{stream_name}/{file_suffix}") + } } pub fn sync_all_streams(joinset: &mut JoinSet>) { let object_store = PARSEABLE.storage().get_object_store(); - for stream_name in PARSEABLE.streams.list() { - let object_store = object_store.clone(); - joinset.spawn(async move { - let start = Instant::now(); - info!("Starting object_store_sync for stream- {stream_name}"); - let result = object_store.upload_files_from_staging(&stream_name).await; - if let Err(ref e) = result { - error!("Failed to upload files from staging for stream {stream_name}: {e}"); - } else { - info!( - "Completed object_store_sync for stream- {stream_name} in {} ms", - start.elapsed().as_millis() - ); - } - result - }); + let tenants = if let Some(tenants) = PARSEABLE.list_tenants() { + tenants.into_iter().map(|v| Some(v)).collect() + } else { + vec![None] + }; + for tenant_id in tenants { + for stream_name in PARSEABLE.streams.list(&tenant_id) { + let object_store = object_store.clone(); + let id = tenant_id.clone(); + joinset.spawn(async move { + let start = Instant::now(); + let result = object_store + .upload_files_from_staging(&stream_name, id) + .await; + if let Err(ref e) = result { + error!("Failed to upload files from staging for stream {stream_name}: {e}"); + } else { + info!( + "Completed object_store_sync for stream- {stream_name} in {} ms", + start.elapsed().as_millis() + ); + } + result + }); + } } } pub async fn commit_schema_to_storage( stream_name: &str, schema: Schema, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { let stream_schema = PARSEABLE .metastore - .get_schema(stream_name) + .get_schema(stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; @@ -1073,7 +1199,7 @@ pub async fn commit_schema_to_storage( PARSEABLE .metastore - .put_schema(new_schema, stream_name) + .put_schema(new_schema, stream_name, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail()))) } @@ -1085,7 +1211,8 @@ pub fn to_bytes(any: &(impl ?Sized + serde::Serialize)) -> Bytes { .expect("serialize cannot fail") } -pub fn schema_path(stream_name: &str) -> RelativePathBuf { +pub fn schema_path(stream_name: &str, tenant_id: &Option) -> RelativePathBuf { + let tenant = tenant_id.as_ref().map_or("", |v| v); if PARSEABLE.options.mode == Mode::Ingest { let id = INGESTOR_META .get() @@ -1093,23 +1220,25 @@ pub fn schema_path(stream_name: &str) -> RelativePathBuf { .get_node_id(); let file_name = format!(".ingestor.{id}{SCHEMA_FILE_NAME}"); - RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY, &file_name]) + RelativePathBuf::from_iter([tenant, stream_name, STREAM_ROOT_DIRECTORY, &file_name]) } else { - RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY, SCHEMA_FILE_NAME]) + RelativePathBuf::from_iter([tenant, stream_name, STREAM_ROOT_DIRECTORY, SCHEMA_FILE_NAME]) } } #[inline(always)] -pub fn stream_json_path(stream_name: &str) -> RelativePathBuf { +pub fn stream_json_path(stream_name: &str, tenant_id: &Option) -> RelativePathBuf { + let tenant = tenant_id.as_ref().map_or("", |v| v); if PARSEABLE.options.mode == Mode::Ingest { let id = INGESTOR_META .get() .unwrap_or_else(|| panic!("{}", INGESTOR_EXPECT)) .get_node_id(); let file_name = format!(".ingestor.{id}{STREAM_METADATA_FILE_NAME}",); - RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY, &file_name]) + RelativePathBuf::from_iter([tenant, stream_name, STREAM_ROOT_DIRECTORY, &file_name]) } else { RelativePathBuf::from_iter([ + tenant, stream_name, STREAM_ROOT_DIRECTORY, STREAM_METADATA_FILE_NAME, @@ -1137,8 +1266,16 @@ pub fn parseable_json_path() -> RelativePathBuf { /// TODO: Needs to be updated for distributed mode #[inline(always)] -pub fn alert_json_path(alert_id: Ulid) -> RelativePathBuf { - RelativePathBuf::from_iter([ALERTS_ROOT_DIRECTORY, &format!("{alert_id}.json")]) +pub fn alert_json_path(alert_id: Ulid, tenant_id: &Option) -> RelativePathBuf { + if let Some(tenant_id) = tenant_id.as_ref() { + RelativePathBuf::from_iter([ + tenant_id, + ALERTS_ROOT_DIRECTORY, + &format!("{alert_id}.json"), + ]) + } else { + RelativePathBuf::from_iter([ALERTS_ROOT_DIRECTORY, &format!("{alert_id}.json")]) + } } /// TODO: Needs to be updated for distributed mode @@ -1164,8 +1301,12 @@ pub fn alert_state_json_path(alert_id: Ulid) -> RelativePathBuf { /// Constructs the path for storing MTTR history JSON file /// Format: ".alerts/mttr.json" #[inline(always)] -pub fn mttr_json_path() -> RelativePathBuf { - RelativePathBuf::from_iter([ALERTS_ROOT_DIRECTORY, "mttr.json"]) +pub fn mttr_json_path(tenant_id: &Option) -> RelativePathBuf { + if let Some(tenant) = tenant_id.as_ref() { + RelativePathBuf::from_iter([&tenant, ALERTS_ROOT_DIRECTORY, "mttr.json"]) + } else { + RelativePathBuf::from_iter([ALERTS_ROOT_DIRECTORY, "mttr.json"]) + } } #[inline(always)] diff --git a/src/storage/retention.rs b/src/storage/retention.rs index db4b6ca8b..c42b2253c 100644 --- a/src/storage/retention.rs +++ b/src/storage/retention.rs @@ -45,26 +45,34 @@ pub fn init_scheduler() { let mut scheduler = AsyncScheduler::new(); let func = move || async { //get retention every day at 12 am - for stream_name in PARSEABLE.streams.list() { - match PARSEABLE.get_stream(&stream_name) { - Ok(stream) => { - if let Some(config) = stream.get_retention() { - for Task { action, days, .. } in config.tasks.into_iter() { - match action { - Action::Delete => { - let stream_name = stream_name.clone(); - tokio::spawn(async move { - action::delete(stream_name, u32::from(days)).await; - }); - } - }; + let tenants = if let Some(tenants) = PARSEABLE.list_tenants() { + tenants.into_iter().map(|v| Some(v)).collect() + } else { + vec![None] + }; + for tenant_id in tenants { + for stream_name in PARSEABLE.streams.list(&tenant_id) { + match PARSEABLE.get_stream(&stream_name, &tenant_id) { + Ok(stream) => { + if let Some(config) = stream.get_retention() { + for Task { action, days, .. } in config.tasks.into_iter() { + match action { + Action::Delete => { + let stream_name = stream_name.clone(); + let id = tenant_id.clone(); + tokio::spawn(async move { + action::delete(stream_name, u32::from(days), &id).await; + }); + } + }; + } } } - } - Err(err) => { - warn!("failed to load retention config for {stream_name} due to {err:?}") - } - }; + Err(err) => { + warn!("failed to load retention config for {stream_name} due to {err:?}") + } + }; + } } }; @@ -177,13 +185,13 @@ mod action { use relative_path::RelativePathBuf; use tracing::{error, info}; - pub(super) async fn delete(stream_name: String, days: u32) { + pub(super) async fn delete(stream_name: String, days: u32, tenant_id: &Option) { info!("running retention task - delete for stream={stream_name}"); let store = PARSEABLE.storage.get_object_store(); let retain_until = get_retain_until(Utc::now().date_naive(), days as u64); - let Ok(mut dates) = store.list_dates(&stream_name).await else { + let Ok(mut dates) = store.list_dates(&stream_name, tenant_id).await else { return; }; dates.retain(|date| date.starts_with("date")); @@ -195,7 +203,7 @@ mod action { if !dates.is_empty() { let delete_tasks = FuturesUnordered::new(); if let Err(err) = - remove_manifest_from_snapshot(store.clone(), &stream_name, dates.clone()).await + remove_manifest_from_snapshot(&store, &stream_name, dates.clone(), tenant_id).await { error!( "Failed to update snapshot for retention cleanup (stream={}): {}. Aborting delete.", @@ -210,7 +218,7 @@ mod action { PARSEABLE .storage .get_object_store() - .delete_prefix(&path) + .delete_prefix(&path, tenant_id) .await }); } diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 17c360251..5ec506769 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -56,7 +56,7 @@ use crate::{ increment_files_scanned_in_object_store_calls_by_date, increment_object_store_calls_by_date, }, - parseable::LogStream, + parseable::{DEFAULT_TENANT, LogStream}, }; use super::{ @@ -339,9 +339,18 @@ pub struct S3 { } impl S3 { - async fn _get_object(&self, path: &RelativePath) -> Result { + async fn _get_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let resp = self.client.get(&to_object_store_path(path)).await; - increment_object_store_calls_by_date("GET", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "GET", + &Utc::now().date_naive().to_string(), + tenant_str, + ); match resp { Ok(resp) => { @@ -350,11 +359,13 @@ impl S3 { "GET", 1, &Utc::now().date_naive().to_string(), + tenant_str, ); increment_bytes_scanned_in_object_store_calls_by_date( "GET", body.len() as u64, &Utc::now().date_naive().to_string(), + tenant_str, ); Ok(body) } @@ -366,15 +377,22 @@ impl S3 { &self, path: &RelativePath, resource: PutPayload, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let resp = self.client.put(&to_object_store_path(path), resource).await; - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "PUT", + &Utc::now().date_naive().to_string(), + tenant_str, + ); match resp { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "PUT", 1, &Utc::now().date_naive().to_string(), + tenant_str, ); Ok(()) } @@ -382,13 +400,23 @@ impl S3 { } } - async fn _delete_prefix(&self, key: &str) -> Result<(), ObjectStorageError> { + async fn _delete_prefix( + &self, + key: &str, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let files_scanned = Arc::new(AtomicU64::new(0)); let files_deleted = Arc::new(AtomicU64::new(0)); // Track LIST operation let object_stream = self.client.list(Some(&(key.into()))); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, + ); + let tenant_str_clone = tenant_str.to_string(); object_stream .for_each_concurrent(None, |x| async { files_scanned.fetch_add(1, Ordering::Relaxed); @@ -400,6 +428,7 @@ impl S3 { increment_object_store_calls_by_date( "DELETE", &Utc::now().date_naive().to_string(), + &tenant_str_clone, ); if delete_resp.is_err() { error!( @@ -419,21 +448,32 @@ impl S3 { "LIST", files_scanned.load(Ordering::Relaxed), &Utc::now().date_naive().to_string(), + tenant_str, ); increment_files_scanned_in_object_store_calls_by_date( "DELETE", files_deleted.load(Ordering::Relaxed), &Utc::now().date_naive().to_string(), + tenant_str, ); Ok(()) } - async fn _list_dates(&self, stream: &str) -> Result, ObjectStorageError> { + async fn _list_dates( + &self, + stream: &str, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let resp: Result = self .client .list_with_delimiter(Some(&(stream.into()))) .await; - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, + ); let resp = match resp { Ok(resp) => resp, @@ -448,6 +488,7 @@ impl S3 { "LIST", common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant_str, ); // return prefixes at the root level @@ -460,17 +501,28 @@ impl S3 { Ok(dates) } - async fn _upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { + async fn _upload_file( + &self, + key: &str, + path: &Path, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let bytes = tokio::fs::read(path).await?; let result = self.client.put(&key.into(), bytes.into()).await; - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "PUT", + &Utc::now().date_naive().to_string(), + tenant_str, + ); match result { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "PUT", 1, &Utc::now().date_naive().to_string(), + tenant_str, ); Ok(()) } @@ -482,7 +534,9 @@ impl S3 { &self, key: &RelativePath, path: &Path, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let mut file = OpenOptions::new().read(true).open(path).await?; let location = &to_object_store_path(key); @@ -502,13 +556,18 @@ impl S3 { // Track single PUT operation for small files let result = self.client.put(location, data.into()).await; - increment_object_store_calls_by_date("PUT", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "PUT", + &Utc::now().date_naive().to_string(), + tenant_str, + ); match result { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "PUT", 1, &Utc::now().date_naive().to_string(), + tenant_str, ); } Err(err) => { @@ -551,6 +610,7 @@ impl S3 { increment_object_store_calls_by_date( "PUT_MULTIPART", &Utc::now().date_naive().to_string(), + tenant_str, ); } @@ -571,16 +631,23 @@ impl ObjectStorage for S3 { async fn get_buffered_reader( &self, path: &RelativePath, + tenant_id: &Option, ) -> Result { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let path = &to_object_store_path(path); let meta = self.client.head(path).await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "HEAD", + &Utc::now().date_naive().to_string(), + tenant_str, + ); let meta = match meta { Ok(meta) => { increment_files_scanned_in_object_store_calls_by_date( "HEAD", 1, &Utc::now().date_naive().to_string(), + tenant_str, ); meta } @@ -598,33 +665,50 @@ impl ObjectStorage for S3 { &self, key: &RelativePath, path: &Path, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { - self._upload_multipart(key, path).await + self._upload_multipart(key, path, tenant_id).await } - async fn head(&self, path: &RelativePath) -> Result { + async fn head( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let result = self.client.head(&to_object_store_path(path)).await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "HEAD", + &Utc::now().date_naive().to_string(), + tenant_str, + ); if result.is_ok() { increment_files_scanned_in_object_store_calls_by_date( "HEAD", 1, &Utc::now().date_naive().to_string(), + tenant_str, ); } Ok(result?) } - async fn get_object(&self, path: &RelativePath) -> Result { - Ok(self._get_object(path).await?) + async fn get_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result { + Ok(self._get_object(path, tenant_id).await?) } async fn get_objects( &self, base_path: Option<&RelativePath>, filter_func: Box bool + Send>, + tenant_id: &Option, ) -> Result, ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let prefix = if let Some(base_path) = base_path { to_object_store_path(base_path) } else { @@ -656,6 +740,7 @@ impl ObjectStorage for S3 { .get_object( RelativePath::from_path(meta.location.as_ref()) .map_err(ObjectStorageError::PathError)?, + tenant_id, ) .await?; res.push(byts); @@ -665,19 +750,30 @@ impl ObjectStorage for S3 { "LIST", files_scanned as u64, &Utc::now().date_naive().to_string(), + tenant_str, + ); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); Ok(res) } async fn get_ingestor_meta_file_paths( &self, + tenant_id: &Option, ) -> Result, ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let mut path_arr = vec![]; let mut files_scanned = 0; let mut object_stream = self.client.list(Some(&self.root)); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, + ); while let Some(meta_result) = object_stream.next().await { let meta = match meta_result { @@ -699,6 +795,7 @@ impl ObjectStorage for S3 { "LIST", files_scanned as u64, &Utc::now().date_naive().to_string(), + tenant_str, ); Ok(path_arr) } @@ -707,69 +804,104 @@ impl ObjectStorage for S3 { &self, path: &RelativePath, resource: Bytes, + tenant_id: &Option, ) -> Result<(), ObjectStorageError> { - self._put_object(path, resource.into()) + self._put_object(path, resource.into(), tenant_id) .await .map_err(|err| ObjectStorageError::ConnectionError(Box::new(err)))?; Ok(()) } - async fn delete_prefix(&self, path: &RelativePath) -> Result<(), ObjectStorageError> { - self._delete_prefix(path.as_ref()).await?; + async fn delete_prefix( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + self._delete_prefix(path.as_ref(), tenant_id).await?; Ok(()) } - async fn delete_object(&self, path: &RelativePath) -> Result<(), ObjectStorageError> { + async fn delete_object( + &self, + path: &RelativePath, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let result = self.client.delete(&to_object_store_path(path)).await; - increment_object_store_calls_by_date("DELETE", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "DELETE", + &Utc::now().date_naive().to_string(), + tenant_str, + ); if result.is_ok() { increment_files_scanned_in_object_store_calls_by_date( "DELETE", 1, &Utc::now().date_naive().to_string(), + tenant_str, ); } Ok(result?) } - async fn check(&self) -> Result<(), ObjectStorageError> { + async fn check(&self, tenant_id: &Option) -> Result<(), ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let result = self .client .head(&to_object_store_path(&parseable_json_path())) .await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "HEAD", + &Utc::now().date_naive().to_string(), + tenant_str, + ); if result.is_ok() { increment_files_scanned_in_object_store_calls_by_date( "HEAD", 1, &Utc::now().date_naive().to_string(), + tenant_str, ); } Ok(result.map(|_| ())?) } - async fn delete_stream(&self, stream_name: &str) -> Result<(), ObjectStorageError> { - self._delete_prefix(stream_name).await?; + async fn delete_stream( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + self._delete_prefix(stream_name, tenant_id).await?; Ok(()) } - async fn try_delete_node_meta(&self, node_filename: String) -> Result<(), ObjectStorageError> { + async fn try_delete_node_meta( + &self, + node_filename: String, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let file = RelativePathBuf::from(&node_filename); let result = self.client.delete(&to_object_store_path(&file)).await; - increment_object_store_calls_by_date("DELETE", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "DELETE", + &Utc::now().date_naive().to_string(), + tenant_str, + ); match result { Ok(_) => { increment_files_scanned_in_object_store_calls_by_date( "DELETE", 1, &Utc::now().date_naive().to_string(), + tenant_str, ); Ok(()) } @@ -785,14 +917,20 @@ impl ObjectStorage for S3 { } async fn list_old_streams(&self) -> Result, ObjectStorageError> { + let tenant_str = DEFAULT_TENANT; let resp = self.client.list_with_delimiter(None).await?; let common_prefixes = resp.common_prefixes; // get all dirs increment_files_scanned_in_object_store_calls_by_date( "LIST", common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant_str, + ); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); // return prefixes at the root level let dirs: HashSet<_> = common_prefixes .iter() @@ -807,7 +945,11 @@ impl ObjectStorage for S3 { let key = format!("{dir}/{STREAM_METADATA_FILE_NAME}"); let task = async move { let result = self.client.head(&StorePath::from(key)).await; - increment_object_store_calls_by_date("HEAD", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "HEAD", + &Utc::now().date_naive().to_string(), + "", + ); result.map(|_| ()) }; stream_json_check.push(task); @@ -816,14 +958,19 @@ impl ObjectStorage for S3 { "HEAD", dirs.len() as u64, &Utc::now().date_naive().to_string(), + tenant_str, ); stream_json_check.try_collect::<()>().await?; Ok(dirs) } - async fn list_dates(&self, stream_name: &str) -> Result, ObjectStorageError> { - let streams = self._list_dates(stream_name).await?; + async fn list_dates( + &self, + stream_name: &str, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { + let streams = self._list_dates(stream_name, tenant_id).await?; Ok(streams) } @@ -832,15 +979,22 @@ impl ObjectStorage for S3 { &self, stream_name: &str, date: &str, + tenant_id: &Option, ) -> Result, ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let pre = object_store::path::Path::from(format!("{}/{}/", stream_name, date)); let resp = self.client.list_with_delimiter(Some(&pre)).await?; increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant_str, + ); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); let hours: Vec = resp .common_prefixes @@ -867,15 +1021,22 @@ impl ObjectStorage for S3 { stream_name: &str, date: &str, hour: &str, + tenant_id: &Option, ) -> Result, ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let pre = object_store::path::Path::from(format!("{}/{}/{}/", stream_name, date, hour)); let resp = self.client.list_with_delimiter(Some(&pre)).await?; increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant_str, + ); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, ); - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); let minutes: Vec = resp .common_prefixes .iter() @@ -897,8 +1058,13 @@ impl ObjectStorage for S3 { Ok(minutes) } - async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { - Ok(self._upload_file(key, path).await?) + async fn upload_file( + &self, + key: &str, + path: &Path, + tenant_id: &Option, + ) -> Result<(), ObjectStorageError> { + Ok(self._upload_file(key, path, tenant_id).await?) } fn absolute_url(&self, prefix: &RelativePath) -> object_store::path::Path { @@ -919,16 +1085,25 @@ impl ObjectStorage for S3 { url::Url::parse(&format!("s3://{}", self.bucket)).unwrap() } - async fn list_dirs(&self) -> Result, ObjectStorageError> { + async fn list_dirs( + &self, + tenant_id: &Option, + ) -> Result, ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let pre = object_store::path::Path::from("/"); let resp = self.client.list_with_delimiter(Some(&pre)).await; - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, + ); let resp = match resp { Ok(resp) => { increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant_str, ); resp @@ -949,17 +1124,24 @@ impl ObjectStorage for S3 { async fn list_dirs_relative( &self, relative_path: &RelativePath, + tenant_id: &Option, ) -> Result, ObjectStorageError> { + let tenant_str = tenant_id.as_deref().unwrap_or(DEFAULT_TENANT); let prefix = object_store::path::Path::from(relative_path.as_str()); let resp = self.client.list_with_delimiter(Some(&prefix)).await; - increment_object_store_calls_by_date("LIST", &Utc::now().date_naive().to_string()); + increment_object_store_calls_by_date( + "LIST", + &Utc::now().date_naive().to_string(), + tenant_str, + ); let resp = match resp { Ok(resp) => { increment_files_scanned_in_object_store_calls_by_date( "LIST", resp.common_prefixes.len() as u64, &Utc::now().date_naive().to_string(), + tenant_str, ); resp diff --git a/src/storage/store_metadata.rs b/src/storage/store_metadata.rs index 5a7057cd8..042309408 100644 --- a/src/storage/store_metadata.rs +++ b/src/storage/store_metadata.rs @@ -17,7 +17,7 @@ */ use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fs::{self, OpenOptions, create_dir_all}, path::PathBuf, }; @@ -36,6 +36,7 @@ use crate::{ user::{User, UserGroup}, }, storage::{ObjectStorageError, object_storage::parseable_json_path}, + tenants::Service, utils::uid, }; @@ -68,6 +69,7 @@ pub struct StorageMetadata { pub roles: HashMap>, #[serde(default)] pub default_role: Option, + pub suspended_services: Option>, } impl Default for StorageMetadata { @@ -84,6 +86,7 @@ impl Default for StorageMetadata { streams: Vec::new(), roles: HashMap::default(), default_role: None, + suspended_services: None, } } } @@ -120,6 +123,7 @@ impl MetastoreObject for StorageMetadata { /// overwrites staging metadata while updating storage info pub async fn resolve_parseable_metadata( parseable_metadata: &Option, + tenant_id: &Option, ) -> Result { let staging_metadata = get_staging_metadata()?; let remote_metadata = parseable_metadata @@ -133,10 +137,10 @@ pub async fn resolve_parseable_metadata( metadata.server_mode = PARSEABLE.options.mode; if overwrite_remote { - put_remote_metadata(&metadata).await?; + put_remote_metadata(&metadata, tenant_id).await?; } if overwrite_staging { - put_staging_metadata(&metadata)?; + put_staging_metadata(&metadata, tenant_id)?; } Ok(metadata) @@ -289,22 +293,33 @@ pub fn get_staging_metadata() -> io::Result> { Ok(Some(meta)) } -pub async fn put_remote_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { +pub async fn put_remote_metadata( + metadata: &StorageMetadata, + tenant_id: &Option, +) -> Result<(), ObjectStorageError> { PARSEABLE .metastore - .put_parseable_metadata(metadata) + .put_parseable_metadata(metadata, tenant_id) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail()))) } -pub fn put_staging_metadata(meta: &StorageMetadata) -> io::Result<()> { +pub fn put_staging_metadata(meta: &StorageMetadata, tenant_id: &Option) -> io::Result<()> { let mut staging_metadata = meta.clone(); staging_metadata.server_mode = PARSEABLE.options.mode; staging_metadata.staging = PARSEABLE.options.staging_dir().to_path_buf(); - let path = PARSEABLE - .options - .staging_dir() - .join(PARSEABLE_METADATA_FILE_NAME); + let path = if let Some(tenant_id) = tenant_id.as_ref() { + PARSEABLE + .options + .staging_dir() + .join(tenant_id) + .join(PARSEABLE_METADATA_FILE_NAME) + } else { + PARSEABLE + .options + .staging_dir() + .join(PARSEABLE_METADATA_FILE_NAME) + }; let mut file = OpenOptions::new() .create(true) .truncate(true) diff --git a/src/tenants/mod.rs b/src/tenants/mod.rs new file mode 100644 index 000000000..4afd216d4 --- /dev/null +++ b/src/tenants/mod.rs @@ -0,0 +1,158 @@ +/* + * Parseable Server (C) 2022 - 2025 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use std::{collections::HashSet, sync::Arc}; + +use dashmap::DashMap; +use itertools::Itertools; +use once_cell::sync::Lazy; +use serde::{Deserialize, Serialize}; + +use crate::{rbac::role::Action, storage::StorageMetadata}; + +pub static TENANT_METADATA: Lazy> = + Lazy::new(|| Arc::new(TenantMetadata::default())); + +#[derive(Default)] +pub struct TenantMetadata { + tenants: DashMap, +} + +#[derive(Default, PartialEq, Eq)] +pub struct TenantOverview { + suspended_services: HashSet, + meta: StorageMetadata, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)] +#[serde(rename_all = "camelCase")] +pub enum Service { + Ingest, + Query, + Workspace, +} + +impl TenantMetadata { + pub fn insert_tenant(&self, tenant_id: String, meta: StorageMetadata) { + let suspensions = meta.suspended_services.clone().unwrap_or_default(); + self.tenants.insert( + tenant_id, + TenantOverview { + suspended_services: suspensions, + meta, + }, + ); + } + + pub fn suspend_service(&self, tenant_id: &str, service: Service) { + if let Some(mut tenant) = self.tenants.get_mut(tenant_id) { + tenant.suspended_services.insert(service); + } + } + + pub fn resume_service(&self, tenant_id: &str, service: Service) { + if let Some(mut tenant) = self.tenants.get_mut(tenant_id) { + tenant.suspended_services.remove(&service); + } + } + + pub fn delete_tenant(&self, tenant_id: &str) { + self.tenants.remove(tenant_id); + } + + pub fn is_action_suspended( + &self, + tenant_id: &str, + action: &Action, + ) -> Result, TenantNotFound> { + if let Some(tenant) = self.tenants.get(tenant_id) { + let states = &tenant.value().suspended_services; + if states.contains(&Service::Ingest) && action.eq(&Action::Ingest) { + Ok(Some("Ingestion is suspended for your workspace".into())) + } else if states.contains(&Service::Query) && action.eq(&Action::Query) { + Ok(Some("Querying is suspended for your workspace".into())) + } else if states.contains(&Service::Workspace) { + Ok(Some("Your workspace is suspended".into())) + } else { + Ok(None) + } + } else { + return Err(TenantNotFound(tenant_id.to_owned())); + } + } + + pub fn get_tenants(&self) -> Vec<(String, StorageMetadata)> { + self.tenants + .iter() + .map(|k| (k.key().clone(), k.value().meta.clone())) + .collect_vec() + } +} + +#[derive(Debug, thiserror::Error)] +#[error("Tenant not found: {0}")] +pub struct TenantNotFound(pub String); + +// // Type for serialization and deserialization +// #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +// pub struct TenantMetadata { +// pub tenant_id: String, +// pub version: String, +// pub mode: String, +// pub staging: PathBuf, +// pub storage: String, +// #[serde(default = "crate::utils::uid::generate_ulid")] +// pub deployment_id: uid::Uid, +// pub users: Vec, +// pub user_groups: Vec, +// pub streams: Vec, +// pub server_mode: Mode, +// #[serde(default)] +// pub roles: HashMap>, +// #[serde(default)] +// pub default_role: Option, +// } + +// impl TenantMetadata { +// pub fn from_storage_meta(meta: StorageMetadata, id: &str) -> Self { +// Self { +// tenant_id: id.to_owned(), +// version: meta.version, +// mode: meta.mode, +// staging: meta.staging, +// storage: meta.storage, +// deployment_id: meta.deployment_id, +// users: meta.users, +// user_groups: meta.user_groups, +// streams: meta.streams, +// server_mode: meta.server_mode, +// roles: meta.roles, +// default_role: meta.default_role, +// } +// } +// } + +// impl MetastoreObject for TenantMetadata { +// fn get_object_path(&self) -> String { +// format!("{}/.parseable.json", &self.tenant_id) +// } + +// fn get_object_id(&self) -> String { +// self.tenant_id.clone() +// } +// } diff --git a/src/users/dashboards.rs b/src/users/dashboards.rs index 8900096e2..68852169b 100644 --- a/src/users/dashboards.rs +++ b/src/users/dashboards.rs @@ -16,6 +16,8 @@ * */ +use std::collections::HashMap; + use chrono::{DateTime, Utc}; use once_cell::sync::Lazy; use relative_path::RelativePathBuf; @@ -27,7 +29,7 @@ use ulid::Ulid; use crate::{ handlers::http::users::{DASHBOARDS_DIR, USERS_ROOT_DIR, dashboards::DashboardError}, metastore::metastore_traits::MetastoreObject, - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, }; pub static DASHBOARDS: Lazy = Lazy::new(Dashboards::default); @@ -170,40 +172,52 @@ pub fn validate_dashboard_id(dashboard_id: String) -> Result>); +pub struct Dashboards(RwLock>>); impl Dashboards { /// Load all dashboards from the object store /// and store them in memory /// This function is called on server start pub async fn load(&self) -> anyhow::Result<()> { - let mut this = vec![]; + let mut this: HashMap> = HashMap::new(); let all_dashboards = PARSEABLE.metastore.get_dashboards().await?; - for dashboard in all_dashboards { - if dashboard.is_empty() { + for (tenant_id, dashboards) in all_dashboards { + let mut tenant_dbs = Vec::new(); + + if dashboards.is_empty() { continue; } - let dashboard_value = match serde_json::from_slice::(&dashboard) { - Ok(value) => value, - Err(err) => { - tracing::warn!("Failed to parse dashboard JSON: {}", err); - continue; + for dashboard in dashboards { + let dashboard_value = match serde_json::from_slice::(&dashboard) + { + Ok(value) => value, + Err(err) => { + tracing::warn!("Failed to parse dashboard JSON: {}", err); + continue; + } + }; + + if let Ok(dashboard) = serde_json::from_value::(dashboard_value.clone()) + { + tenant_dbs.retain(|d: &Dashboard| d.dashboard_id != dashboard.dashboard_id); + tenant_dbs.push(dashboard); + + // this.retain(|d: &Dashboard| d.dashboard_id != dashboard.dashboard_id); + // this.push(dashboard); + } else { + tracing::warn!("Failed to deserialize dashboard: {:?}", dashboard_value); } - }; - - if let Ok(dashboard) = serde_json::from_value::(dashboard_value.clone()) { - this.retain(|d: &Dashboard| d.dashboard_id != dashboard.dashboard_id); - this.push(dashboard); - } else { - tracing::warn!("Failed to deserialize dashboard: {:?}", dashboard_value); } + + this.entry(tenant_id).or_default().extend(tenant_dbs); } let mut s = self.0.write().await; - s.append(&mut this); + // s.append(&mut this); + s.clone_from(&this); Ok(()) } @@ -214,8 +228,12 @@ impl Dashboards { &self, // user_id: &str, dashboard: &Dashboard, + tenant_id: &Option, ) -> Result<(), DashboardError> { - PARSEABLE.metastore.put_dashboard(dashboard).await?; + PARSEABLE + .metastore + .put_dashboard(dashboard, tenant_id) + .await?; Ok(()) } @@ -227,24 +245,25 @@ impl Dashboards { &self, user_id: &str, dashboard: &mut Dashboard, + tenant_id: &Option, ) -> Result<(), DashboardError> { dashboard.created = Some(Utc::now()); dashboard.set_metadata(user_id, None); - + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let mut dashboards = self.0.write().await; - let has_duplicate = dashboards - .iter() - .any(|d| d.title == dashboard.title && d.dashboard_id != dashboard.dashboard_id); + if let Some(dbs) = dashboards.get_mut(tenant) { + let has_duplicate = dbs + .iter() + .any(|d| d.title == dashboard.title && d.dashboard_id != dashboard.dashboard_id); + if has_duplicate { + return Err(DashboardError::Metadata("Dashboard title must be unique")); + } + self.save_dashboard(dashboard, tenant_id).await?; - if has_duplicate { - return Err(DashboardError::Metadata("Dashboard title must be unique")); + dbs.push(dashboard.clone()); } - self.save_dashboard(dashboard).await?; - - dashboards.push(dashboard.clone()); - Ok(()) } @@ -256,34 +275,52 @@ impl Dashboards { user_id: &str, dashboard_id: Ulid, dashboard: &mut Dashboard, + tenant_id: &Option, ) -> Result<(), DashboardError> { let mut dashboards = self.0.write().await; + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(dbs) = dashboards.get_mut(tenant) { + let existing_dashboard = dbs + .iter() + .find(|d| { + d.dashboard_id == Some(dashboard_id) && d.author == Some(user_id.to_string()) + }) + .cloned() + .ok_or_else(|| { + DashboardError::Metadata( + "Dashboard does not exist or you do not have permission to access it", + ) + })?; + dashboard.set_metadata(user_id, Some(dashboard_id)); + dashboard.created = existing_dashboard.created; + + let has_duplicate = dbs + .iter() + .any(|d| d.title == dashboard.title && d.dashboard_id != dashboard.dashboard_id); + + if has_duplicate { + return Err(DashboardError::Metadata("Dashboard title must be unique")); + } - let existing_dashboard = dashboards - .iter() - .find(|d| d.dashboard_id == Some(dashboard_id) && d.author == Some(user_id.to_string())) - .cloned() - .ok_or_else(|| { - DashboardError::Metadata( - "Dashboard does not exist or you do not have permission to access it", - ) - })?; - - dashboard.set_metadata(user_id, Some(dashboard_id)); - dashboard.created = existing_dashboard.created; - - let has_duplicate = dashboards - .iter() - .any(|d| d.title == dashboard.title && d.dashboard_id != dashboard.dashboard_id); - - if has_duplicate { - return Err(DashboardError::Metadata("Dashboard title must be unique")); - } - - self.save_dashboard(dashboard).await?; - - dashboards.retain(|d| d.dashboard_id != Some(dashboard_id)); - dashboards.push(dashboard.clone()); + self.save_dashboard(dashboard, tenant_id).await?; + + dbs.retain(|d| d.dashboard_id != Some(dashboard_id)); + dbs.push(dashboard.clone()); + } else { + return Err(DashboardError::Metadata( + "Dashboard does not exist or you do not have permission to access it", + )); + }; + + // let existing_dashboard = dashboards + // .iter() + // .find(|d| d.dashboard_id == Some(dashboard_id) && d.author == Some(user_id.to_string())) + // .cloned() + // .ok_or_else(|| { + // DashboardError::Metadata( + // "Dashboard does not exist or you do not have permission to access it", + // ) + // })?; Ok(()) } @@ -296,38 +333,53 @@ impl Dashboards { user_id: &str, dashboard_id: Ulid, is_admin: bool, + tenant_id: &Option, ) -> Result<(), DashboardError> { let obj = self - .ensure_dashboard_ownership(dashboard_id, user_id, is_admin) + .ensure_dashboard_ownership(dashboard_id, user_id, is_admin, tenant_id) .await?; { // validation has happened, dashboard exists and can be deleted by the user - PARSEABLE.metastore.delete_dashboard(&obj).await?; + PARSEABLE + .metastore + .delete_dashboard(&obj, tenant_id) + .await?; } // delete from in-memory self.0 .write() .await - .retain(|d| d.dashboard_id != Some(dashboard_id)); + .get_mut(tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v)) + .map(|dashboards| dashboards.retain(|d| d.dashboard_id != Some(dashboard_id))) + .iter() + .for_each(drop); Ok(()) } /// Get a dashboard by ID /// fetch dashboard from memory - pub async fn get_dashboard(&self, dashboard_id: Ulid) -> Option { - self.0 - .read() - .await - .iter() - .find(|d| { - d.dashboard_id - .as_ref() - .is_some_and(|id| *id == dashboard_id) - }) - .cloned() + pub async fn get_dashboard( + &self, + dashboard_id: Ulid, + tenant_id: &Option, + ) -> Option { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let dashboards = self.0.read().await; + if let Some(dashboards) = dashboards.get(tenant_id) { + dashboards + .iter() + .find(|d| { + d.dashboard_id + .as_ref() + .is_some_and(|id| *id == dashboard_id) + }) + .cloned() + } else { + None + } } /// Get a dashboard by ID and user ID @@ -337,31 +389,63 @@ impl Dashboards { dashboard_id: Ulid, user_id: &str, is_admin: bool, + tenant_id: &Option, ) -> Option { - self.0 - .read() - .await - .iter() - .find(|d| { - d.dashboard_id - .as_ref() - .is_some_and(|id| *id == dashboard_id) - && (d.author == Some(user_id.to_string()) || is_admin) - }) - .cloned() + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let dashboards = self.0.read().await; + if let Some(dashboards) = dashboards.get(tenant_id) { + dashboards + .iter() + .find(|d| { + d.dashboard_id + .as_ref() + .is_some_and(|id| *id == dashboard_id) + && (d.author == Some(user_id.to_string()) || is_admin) + }) + .cloned() + } else { + None + } + // self.0 + // .read() + // .await + // .iter() + // .find(|d| { + // d.dashboard_id + // .as_ref() + // .is_some_and(|id| *id == dashboard_id) + // && (d.author == Some(user_id.to_string()) || is_admin) + // }) + // .cloned() } /// List all dashboards /// fetch all dashboards from memory - pub async fn list_dashboards(&self, limit: usize) -> Vec { + pub async fn list_dashboards( + &self, + limit: usize, + tenant_id: &Option, + ) -> Vec { // limit the number of dashboards returned in order of modified date // if limit is 0, return all dashboards + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let dashboards = self.0.read().await; - let mut sorted_dashboards = dashboards - .iter() - .filter(|d| d.dashboard_id.is_some()) - .cloned() - .collect::>(); + + let mut sorted_dashboards = if let Some(dashboards) = dashboards.get(tenant_id) { + dashboards + .iter() + .filter(|d| d.dashboard_id.is_some()) + .cloned() + .collect::>() + } else { + vec![] + }; + + // let mut sorted_dashboards = dashboards + // .iter() + // .filter(|d| d.dashboard_id.is_some()) + // .cloned() + // .collect::>(); sorted_dashboards.sort_by_key(|d| std::cmp::Reverse(d.modified)); if limit > 0 { sorted_dashboards.truncate(limit); @@ -371,13 +455,23 @@ impl Dashboards { /// List tags from all dashboards /// This function returns a list of unique tags from all dashboards - pub async fn list_tags(&self) -> Vec { + pub async fn list_tags(&self, tenant_id: &Option) -> Vec { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let dashboards = self.0.read().await; - let mut tags = dashboards - .iter() - .filter_map(|d| d.tags.as_ref()) - .flat_map(|t| t.iter().cloned()) - .collect::>(); + let mut tags = if let Some(dashboards) = dashboards.get(tenant_id) { + dashboards + .iter() + .filter_map(|d| d.tags.as_ref()) + .flat_map(|t| t.iter().cloned()) + .collect::>() + } else { + vec![] + }; + // let mut tags = dashboards + // .iter() + // .filter_map(|d| d.tags.as_ref()) + // .flat_map(|t| t.iter().cloned()) + // .collect::>(); tags.sort(); tags.dedup(); tags @@ -386,21 +480,30 @@ impl Dashboards { /// List dashboards by tag /// This function returns a list of dashboards that match any of the provided tags /// If no tags are provided, it returns an empty list - pub async fn list_dashboards_by_tags(&self, tags: Vec) -> Vec { + pub async fn list_dashboards_by_tags( + &self, + tags: Vec, + tenant_id: &Option, + ) -> Vec { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let dashboards = self.0.read().await; - dashboards - .iter() - .filter(|d| { - if let Some(dashboard_tags) = &d.tags { - dashboard_tags - .iter() - .any(|dashboard_tag| tags.contains(dashboard_tag)) - } else { - false - } - }) - .cloned() - .collect() + if let Some(dashboards) = dashboards.get(tenant_id) { + dashboards + .iter() + .filter(|d| { + if let Some(dashboard_tags) = &d.tags { + dashboard_tags + .iter() + .any(|dashboard_tag| tags.contains(dashboard_tag)) + } else { + false + } + }) + .cloned() + .collect() + } else { + vec![] + } } /// Ensure the user is the owner of the dashboard @@ -412,8 +515,9 @@ impl Dashboards { dashboard_id: Ulid, user_id: &str, is_admin: bool, + tenant_id: &Option, ) -> Result { - self.get_dashboard_by_user(dashboard_id, user_id, is_admin) + self.get_dashboard_by_user(dashboard_id, user_id, is_admin, tenant_id) .await .ok_or_else(|| { DashboardError::Metadata( diff --git a/src/users/filters.rs b/src/users/filters.rs index 780386ceb..70e3448c6 100644 --- a/src/users/filters.rs +++ b/src/users/filters.rs @@ -16,6 +16,8 @@ * */ +use std::collections::HashMap; + use once_cell::sync::Lazy; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -24,10 +26,10 @@ use tokio::sync::RwLock; use super::TimeFilter; use crate::{ metastore::metastore_traits::MetastoreObject, - parseable::PARSEABLE, + parseable::{DEFAULT_TENANT, PARSEABLE}, rbac::{Users, map::SessionKey}, storage::object_storage::filter_path, - utils::{get_hash, user_auth_for_datasets, user_auth_for_query}, + utils::{get_hash, get_tenant_id_from_key, user_auth_for_datasets, user_auth_for_query}, }; pub static FILTERS: Lazy = Lazy::new(Filters::default); @@ -109,27 +111,34 @@ pub struct Rules { } #[derive(Debug, Default)] -pub struct Filters(RwLock>); +pub struct Filters(RwLock>>); impl Filters { pub async fn load(&self) -> anyhow::Result<()> { let all_filters = PARSEABLE.metastore.get_filters().await.unwrap_or_default(); let mut s = self.0.write().await; - s.extend(all_filters); + for (tenant_id, filters) in all_filters { + s.entry(tenant_id).or_default().extend(filters); + } Ok(()) } - pub async fn update(&self, filter: &Filter) { + pub async fn update(&self, filter: &Filter, tenant_id: &Option) { let mut s = self.0.write().await; - s.retain(|f| f.filter_id != filter.filter_id); - s.push(filter.clone()); + if let Some(filters) = s.get_mut(tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v)) { + filters.retain(|f| f.filter_id != filter.filter_id); + filters.push(filter.clone()); + } } - pub async fn delete_filter(&self, filter_id: &str) { + pub async fn delete_filter(&self, filter_id: &str, tenant_id: &Option) { + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); let mut s = self.0.write().await; - s.retain(|f| f.filter_id != Some(filter_id.to_string())); + if let Some(filters) = s.get_mut(tenant_id) { + filters.retain(|f| f.filter_id != Some(filter_id.to_string())); + } } pub async fn get_filter( @@ -137,44 +146,53 @@ impl Filters { filter_id: &str, user_id: &str, is_admin: bool, + tenant_id: &Option, ) -> Option { - self.0 - .read() - .await - .iter() - .find(|f| { - f.filter_id == Some(filter_id.to_string()) - && (f.user_id == Some(user_id.to_string()) || is_admin) - }) - .cloned() + let tenant_id = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + if let Some(filters) = self.0.read().await.get(tenant_id) { + filters + .iter() + .find(|f| { + f.filter_id == Some(filter_id.to_string()) + && (f.user_id == Some(user_id.to_string()) || is_admin) + }) + .cloned() + } else { + None + } } pub async fn list_filters(&self, key: &SessionKey) -> Vec { let read = self.0.read().await; - - let mut filters = Vec::new(); + let tenant_id = get_tenant_id_from_key(key); + let tenant = tenant_id.as_ref().map_or(DEFAULT_TENANT, |v| v); + let mut _filters = Vec::new(); let permissions = Users.get_permissions(key); - for f in read.iter() { - let query: &str = f.query.filter_query.as_deref().unwrap_or(""); - let filter_type = &f.query.filter_type; - - // if filter type is SQL, check if the user has access to the dataset based on the query string - // if filter type is search or filter, check if the user has access to the dataset based on the dataset name - if *filter_type == FilterType::SQL { - if (user_auth_for_query(key, query).await).is_ok() { - filters.push(f.clone()) - } - } else if *filter_type == FilterType::Search || *filter_type == FilterType::Filter { - let dataset_name = &f.stream_name; - if user_auth_for_datasets(&permissions, &[dataset_name.to_string()]) - .await - .is_ok() - { - filters.push(f.clone()) + if let Some(filters) = read.get(tenant) { + for f in filters.iter() { + let query: &str = f.query.filter_query.as_deref().unwrap_or(""); + let filter_type = &f.query.filter_type; + + // if filter type is SQL, check if the user has access to the dataset based on the query string + // if filter type is search or filter, check if the user has access to the dataset based on the dataset name + if *filter_type == FilterType::SQL { + if (user_auth_for_query(key, query).await).is_ok() { + _filters.push(f.clone()) + } + } else if *filter_type == FilterType::Search || *filter_type == FilterType::Filter { + let dataset_name = &f.stream_name; + if user_auth_for_datasets(&permissions, &[dataset_name.to_string()], &tenant_id) + .await + .is_ok() + { + _filters.push(f.clone()) + } } } + _filters + } else { + vec![] } - filters } } diff --git a/src/utils/arrow/flight.rs b/src/utils/arrow/flight.rs index b78d185c2..481f6c56a 100644 --- a/src/utils/arrow/flight.rs +++ b/src/utils/arrow/flight.rs @@ -97,7 +97,7 @@ pub async fn append_temporary_events( Status, > { let schema = PARSEABLE - .get_stream(stream_name) + .get_stream(stream_name, &None) .map_err(|err| Status::failed_precondition(format!("Metadata Error: {err}")))? .get_schema(); let rb = concat_batches(&schema, minute_result) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 57cbe18fa..e9508db74 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -27,7 +27,7 @@ pub mod uid; pub mod update; use crate::handlers::http::rbac::RBACError; -use crate::parseable::PARSEABLE; +use crate::parseable::{DEFAULT_TENANT, PARSEABLE}; use crate::query::resolve_stream_names; use crate::rbac::Users; use crate::rbac::map::SessionKey; @@ -58,14 +58,45 @@ pub fn extract_datetime(path: &str) -> Option { } } -pub fn get_user_from_request(req: &HttpRequest) -> Result { +pub fn get_user_and_tenant_from_request( + req: &HttpRequest, +) -> Result<(String, Option), RBACError> { let session_key = extract_session_key_from_req(req).map_err(|_| RBACError::UserDoesNotExist)?; - let user_id = Users.get_userid_from_session(&session_key); - if user_id.is_none() { + match &session_key { + SessionKey::BasicAuth { username, password } => { + if let Some(user) = Users.get_user_from_basic(&username, &password) { + return Ok((username.clone(), user.tenant.clone())); + } + } + SessionKey::SessionId(_) => {} + } + let Some((user_id, tenant_id)) = Users.get_userid_from_session(&session_key) else { return Err(RBACError::UserDoesNotExist); + }; + let tenant_id = if tenant_id.eq(DEFAULT_TENANT) { + None + } else { + Some(tenant_id) + }; + Ok((user_id, tenant_id)) +} + +pub fn get_tenant_id_from_request(req: &HttpRequest) -> Option { + if let Some(tenant_value) = req.headers().get("tenant") { + Some(tenant_value.to_str().unwrap().to_owned()) + } else { + None + } +} + +pub fn get_tenant_id_from_key(key: &SessionKey) -> Option { + if let Some((_, tenant_id)) = Users.get_userid_from_session(key) + && tenant_id.ne(DEFAULT_TENANT) + { + Some(tenant_id.clone()) + } else { + None } - let user_id = user_id.unwrap(); - Ok(user_id) } pub fn get_hash(key: &str) -> String { @@ -82,13 +113,15 @@ pub async fn user_auth_for_query( let tables = resolve_stream_names(query).map_err(|e| { actix_web::error::ErrorBadRequest(format!("Failed to extract table names: {e}")) })?; + let tenant_id = get_tenant_id_from_key(session_key); let permissions = Users.get_permissions(session_key); - user_auth_for_datasets(&permissions, &tables).await + user_auth_for_datasets(&permissions, &tables, &tenant_id).await } pub async fn user_auth_for_datasets( permissions: &[Permission], tables: &[String], + tenant_id: &Option, ) -> Result<(), actix_web::error::Error> { for table_name in tables { let mut authorized = false; @@ -101,23 +134,29 @@ pub async fn user_auth_for_datasets( authorized = true; break; } - Permission::Resource(Action::Query, ParseableResourceType::Stream(stream)) => { - if !PARSEABLE.check_or_load_stream(stream).await { + Permission::Resource( + Action::Query, + Some(ParseableResourceType::Stream(stream)), + ) => { + if !PARSEABLE.check_or_load_stream(stream, tenant_id).await { return Err(actix_web::error::ErrorUnauthorized(format!( "Stream not found: {table_name}" ))); } - let is_internal = PARSEABLE.get_stream(table_name).is_ok_and(|stream| { - stream - .get_stream_type() - .eq(&crate::storage::StreamType::Internal) - }); + let is_internal = + PARSEABLE + .get_stream(table_name, tenant_id) + .is_ok_and(|stream| { + stream + .get_stream_type() + .eq(&crate::storage::StreamType::Internal) + }); if stream == table_name || stream == "*" || is_internal { authorized = true; } } - Permission::Resource(action, ParseableResourceType::All) + Permission::Resource(action, Some(ParseableResourceType::All)) if ![ Action::All, Action::PutUser, @@ -156,7 +195,7 @@ pub fn is_admin(req: &HttpRequest) -> Result { // Check if user has admin permissions (Action::All on All resources) for permission in permissions.iter() { match permission { - Permission::Resource(Action::All, ParseableResourceType::All) => { + Permission::Resource(Action::All, Some(ParseableResourceType::All)) => { return Ok(true); } _ => continue,