Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/handlers/http/modal/utils/ingest_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ fn verify_dataset_fields_count(
let dataset_fields_warn_threshold = 0.8 * PARSEABLE.options.dataset_fields_allowed_limit as f64;
// Check if the fields count exceeds the warn threshold
if fields_count > dataset_fields_warn_threshold as usize {
tracing::warn!(
tracing::trace!(
"Dataset {0} has {1} fields, which exceeds the warning threshold of {2}. Ingestion will not be possible after reaching {3} fields. We recommend creating a new dataset.",
stream_name,
fields_count,
Expand Down
2 changes: 1 addition & 1 deletion src/rbac/role.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ pub mod model {

fn super_admin_perm_builder() -> RoleBuilder {
RoleBuilder {
actions: vec![Action::SuperAdmin],
actions: vec![Action::SuperAdmin, Action::All],
resource_type: Some(ParseableResourceType::All),
}
}
Expand Down
1 change: 1 addition & 0 deletions src/rbac/user.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ pub struct User {
pub roles: HashSet<String>,
pub user_groups: HashSet<String>,
pub tenant: Option<String>,
#[serde(default)]
pub protected: bool,
}

Expand Down
111 changes: 63 additions & 48 deletions src/storage/field_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,10 @@ pub fn build_stats_sql(
.iter()
.map(|f| format!("'{}'", f.replace('\'', "''")))
.collect();
format!("AND rv.field_name IN ({})", quoted_fields.join(", "))
format!(
"AND field_stats_field_name IN ({})",
quoted_fields.join(", ")
)
} else {
String::default()
}
Expand All @@ -654,53 +657,65 @@ pub fn build_stats_sql(
let rn_end = offset + limit;

format!(
"WITH field_totals AS (
SELECT
field_stats_field_name,
SUM(field_stats_count) as total_field_count
FROM (
SELECT DISTINCT
p_timestamp,
field_stats_field_name,
field_stats_count
FROM {DATASET_STATS_STREAM_NAME}
WHERE dataset_name = '{dataset_name}'
) deduped
GROUP BY field_stats_field_name
),
ranked_values AS (
SELECT
field_stats_field_name as field_name,
field_stats_distinct_stats_distinct_value as distinct_value,
SUM(field_stats_distinct_stats_count) as distinct_value_count,
ROW_NUMBER() OVER (
PARTITION BY field_stats_field_name
ORDER BY SUM(field_stats_distinct_stats_count) DESC
) as rn
FROM {DATASET_STATS_STREAM_NAME}
WHERE dataset_name = '{dataset_name}'
AND field_stats_distinct_stats_distinct_value IS NOT NULL
GROUP BY field_stats_field_name, field_stats_distinct_stats_distinct_value
),
field_distinct_counts AS (
SELECT
field_name,
COUNT(*) as distinct_count
FROM ranked_values
GROUP BY field_name
)
SELECT
rv.field_name,
ft.total_field_count as field_count,
fdc.distinct_count,
rv.distinct_value,
rv.distinct_value_count
FROM ranked_values rv
JOIN field_totals ft ON rv.field_name = ft.field_stats_field_name
JOIN field_distinct_counts fdc ON rv.field_name = fdc.field_name
WHERE rv.rn > {rn_start} AND rv.rn <= {rn_end}
{fields_filter}
ORDER BY rv.field_name, rv.distinct_value_count DESC"
"WITH
ranked_values AS (
SELECT
field_stats_field_name AS field_name,
field_stats_distinct_stats_distinct_value AS distinct_value,
SUM(field_stats_distinct_stats_count) AS distinct_value_count,
ROW_NUMBER() OVER (
PARTITION BY
field_stats_field_name
ORDER BY
SUM(field_stats_distinct_stats_count) DESC,
field_stats_distinct_stats_distinct_value ASC
) AS rn,
COUNT(*) OVER (
PARTITION BY
field_stats_field_name
) AS distinct_count
FROM
{DATASET_STATS_STREAM_NAME}
WHERE
dataset_name = '{dataset_name}'
AND field_stats_distinct_stats_distinct_value IS NOT NULL
{fields_filter}
GROUP BY
field_stats_field_name,
field_stats_distinct_stats_distinct_value
),
top_values AS (
SELECT
*
FROM
ranked_values
WHERE
rn > {rn_start}
AND rn <= {rn_end}
),
field_totals AS (
SELECT
field_stats_field_name,
SUM(field_stats_count) AS total_field_count
FROM
{DATASET_STATS_STREAM_NAME}
WHERE
dataset_name = '{dataset_name}'
GROUP BY
field_stats_field_name
)
SELECT
tv.field_name,
ft.total_field_count AS field_count,
tv.distinct_count,
tv.distinct_value,
tv.distinct_value_count
FROM
top_values tv
JOIN field_totals ft ON tv.field_name = ft.field_stats_field_name
ORDER BY
tv.field_name,
tv.distinct_value_count DESC"
)
}
#[cfg(test)]
Expand Down
Loading