diff --git a/src/AST/CommonAST.cs b/src/AST/CommonAST.cs index 735b7c6..6c561a9 100644 --- a/src/AST/CommonAST.cs +++ b/src/AST/CommonAST.cs @@ -16,7 +16,12 @@ public enum NodeKind ParenthesizedExpression, SpecialOperatorExpression, WildcardExpression, - PathExpression + PathExpression, + + // Phase 1: Aggregation support + FieldReference, + NamedExpression, + CompositeAggregation } /// @@ -309,6 +314,164 @@ public class PathExpression : Expression public required string Path { get; set; } } +#region Phase 1: Aggregation Support + +/// +/// Field categorization for different data sources +/// +public enum FieldType +{ + Intrinsic, // Built-in fields (duration, name, etc.) + Attribute, // Custom attributes + Metadata, // System metadata + Computed // Derived/computed fields +} + +/// +/// Data types for field validation +/// +public enum DataType +{ + String, + Integer, + Float, + Boolean, + Duration, + DateTime, + Array, + Object, + Unknown +} + +/// +/// Phase 1 aggregate function types (implementation priority) +/// +public enum AggregateFunction +{ + // Phase 1: Core 5 functions + Count, + Sum, + Average, + Minimum, + Maximum + + // Phase 2: Additional functions (design only, not implemented) + // StandardDeviation, Variance, Percentile, DistinctCount, + // MakeList, MakeSet, ArgumentMax, ArgumentMin, Any +} + +/// +/// Enhanced field reference with namespace support for cross-language compatibility +/// Inherits from Identifier and adds aggregation-specific metadata +/// +public class FieldReference : Identifier +{ + public override NodeKind NodeKind => NodeKind.FieldReference; + + /// + /// Field type for validation and optimization + /// + public FieldType FieldType { get; set; } = FieldType.Attribute; + + /// + /// Data type of the field + /// + public DataType? DataType { get; set; } + + /// + /// Whether this field is required for the operation + /// + public bool IsRequired { get; set; } = true; +} + +/// +/// Represents a named expression used in project, extend, summarize and other operators +/// Supports both single names and multiple names: name = expr, (name1, name2) = expr +/// +public class NamedExpression : Expression +{ + public override NodeKind NodeKind => NodeKind.NamedExpression; + + /// + /// Single result name (most common case) + /// + public string? Name { get; set; } + + /// + /// Multiple result names for tuple destructuring: (name1, name2) = expr + /// + public List? Names { get; set; } + + /// + /// The expression being named + /// + public required Expression Expression { get; set; } + + /// + /// True if this expression has explicit naming + /// + public bool IsNamed => Name != null || (Names != null && Names.Count > 0); +} + +/// +/// Individual aggregate operation within CompositeAggregationNode +/// +public class AggregateOperationNode +{ + /// Phase 1: Only these 5 functions + public required AggregateFunction Function { get; set; } + + /// Field to aggregate (null for count()) + public FieldReference? Field { get; set; } + + /// KQL-specific: Result column name + public string? ResultName { get; set; } + + /// Source expression for complex aggregations + public Expression? SourceExpression { get; set; } +} + +/// +/// Unified node for aggregation operations from both KQL and TraceQL +/// Handles grouping + aggregations in a single operation +/// Supports group-only, aggregate-only, and mixed operations +/// +public class CompositeAggregationNode : OperationNode +{ + public override NodeKind NodeKind => NodeKind.CompositeAggregation; + + /// + /// List of fields to group by (optional) + /// Empty list = no grouping + /// + public List GroupByFields { get; set; } = new List(); + + /// + /// List of aggregation operations (optional) + /// Empty list = group-only operation + /// + public List Aggregations { get; set; } = new List(); + + /// + /// Source language context for translation + /// + public string SourceLanguage { get; set; } = "Unknown"; + + /// + /// Validation: Must have either grouping OR aggregations (or both) + /// + public bool IsValid => GroupByFields.Count > 0 || Aggregations.Count > 0; + + /// + /// Operation classification + /// + public bool IsGroupOnly => Aggregations.Count == 0 && GroupByFields.Count > 0; + public bool IsAggregateOnly => Aggregations.Count > 0 && GroupByFields.Count == 0; + public bool IsMixed => Aggregations.Count > 0 && GroupByFields.Count > 0; +} + +#endregion + /// /// Builder for creating AST nodes /// @@ -443,6 +606,156 @@ public static SpecialOperatorExpression CreateSpecialOperatorExpression(Expressi Right = right }; } + + #region Phase 1: Aggregation Builder Methods + + /// + /// Creates a field reference with namespace support + /// + public static FieldReference CreateFieldReference( + string name, + string? nameSpace = null, + FieldType fieldType = FieldType.Attribute, + DataType? dataType = null) + { + return new FieldReference + { + Name = name, + Namespace = nameSpace, + FieldType = fieldType, + DataType = dataType + }; + } + + /// + /// Creates a named expression + /// + public static NamedExpression CreateNamedExpression( + Expression expression, + string? name = null, + List? names = null) + { + return new NamedExpression + { + Expression = expression, + Name = name, + Names = names + }; + } + + /// + /// Creates an aggregate operation node + /// + public static AggregateOperationNode CreateAggregateOperation( + AggregateFunction function, + FieldReference? field = null, + string? resultName = null, + Expression? sourceExpression = null) + { + return new AggregateOperationNode + { + Function = function, + Field = field, + ResultName = resultName, + SourceExpression = sourceExpression + }; + } + + /// + /// Creates a composite aggregation node (unified for both KQL and TraceQL) + /// + public static CompositeAggregationNode CreateCompositeAggregation( + List? groupByFields = null, + List? aggregations = null, + string sourceLanguage = "Unknown") + { + return new CompositeAggregationNode + { + GroupByFields = groupByFields ?? new List(), + Aggregations = aggregations ?? new List(), + SourceLanguage = sourceLanguage + }; + } + + /// + /// Creates a group-only composite aggregation (KQL: summarize by fields) + /// + public static CompositeAggregationNode CreateGroupOnlyAggregation( + List groupByFields, + string sourceLanguage = "KQL") + { + return new CompositeAggregationNode + { + GroupByFields = groupByFields, + Aggregations = new List(), // Empty = group-only + SourceLanguage = sourceLanguage + }; + } + + /// + /// Creates an aggregate-only composite aggregation (KQL: summarize aggregates) + /// + public static CompositeAggregationNode CreateAggregateOnlyAggregation( + List aggregations, + string sourceLanguage = "KQL") + { + return new CompositeAggregationNode + { + GroupByFields = new List(), // Empty = no grouping + Aggregations = aggregations, + SourceLanguage = sourceLanguage + }; + } + + /// + /// Creates a count aggregate operation + /// + public static AggregateOperationNode CreateCountOperation(string? resultName = null) + { + return CreateAggregateOperation(AggregateFunction.Count, null, resultName); + } + + /// + /// Creates a sum aggregate operation + /// + public static AggregateOperationNode CreateSumOperation( + FieldReference field, + string? resultName = null) + { + return CreateAggregateOperation(AggregateFunction.Sum, field, resultName); + } + + /// + /// Creates an average aggregate operation + /// + public static AggregateOperationNode CreateAverageOperation( + FieldReference field, + string? resultName = null) + { + return CreateAggregateOperation(AggregateFunction.Average, field, resultName); + } + + /// + /// Creates a minimum aggregate operation + /// + public static AggregateOperationNode CreateMinimumOperation( + FieldReference field, + string? resultName = null) + { + return CreateAggregateOperation(AggregateFunction.Minimum, field, resultName); + } + + /// + /// Creates a maximum aggregate operation + /// + public static AggregateOperationNode CreateMaximumOperation( + FieldReference field, + string? resultName = null) + { + return CreateAggregateOperation(AggregateFunction.Maximum, field, resultName); + } + + #endregion } /// @@ -587,4 +900,4 @@ public static QueryNode SpansOnlyFilterExample() return query; } -} \ No newline at end of file +} diff --git a/src/AST/KqlToCommonAstVisitor.cs b/src/AST/KqlToCommonAstVisitor.cs index 1d25f7b..a2bfa54 100644 --- a/src/AST/KqlToCommonAstVisitor.cs +++ b/src/AST/KqlToCommonAstVisitor.cs @@ -43,6 +43,9 @@ public void Visit(SyntaxNode node) case SyntaxKind.FilterOperator: VisitFilterOperator(node as FilterOperator); break; + case SyntaxKind.SummarizeOperator: + VisitSummarizeOperator(node as SummarizeOperator); + break; // Handle all binary expression types case SyntaxKind.EqualExpression: case SyntaxKind.NotEqualExpression: @@ -74,6 +77,9 @@ public void Visit(SyntaxNode node) case SyntaxKind.ParenthesizedExpression: VisitParenthesizedExpression(node as Kusto.Language.Syntax.ParenthesizedExpression); break; + case SyntaxKind.FunctionCallExpression: + VisitFunctionCallExpression(node as FunctionCallExpression); + break; default: // other node types are not yet implemented, just default to visiting the children for now VisitChildren(node); @@ -290,37 +296,37 @@ private void VisitParenthesizedExpression(Kusto.Language.Syntax.ParenthesizedExp // } // } - // private void VisitFunctionCallExpression(Kusto.Language.Syntax.FunctionCallExpression node) - // { - // if (node == null) return; + private void VisitFunctionCallExpression(FunctionCallExpression node) + { + if (node == null) return; - // string functionName = "unknown"; - // if (node.Name is Kusto.Language.Syntax.NameReference nameRef) - // { - // functionName = nameRef.Name.Text; - // } + string functionName = "unknown"; + if (node.Name is NameReference nameRef) + { + functionName = nameRef.Name.SimpleName; + } - // var arguments = new List(); + var arguments = new List(); - // // Process arguments - // if (node.ArgumentList != null) - // { - // foreach (var arg in node.ArgumentList.Expressions) - // { - // Visit(arg); - // if (_expressionStack.Count > 0) - // { - // arguments.Add(_expressionStack.Pop()); - // } - // } - // } + // Process arguments + if (node.ArgumentList != null) + { + foreach (var arg in node.ArgumentList.Expressions) + { + Visit(arg.Element); + if (_expressionStack.Count > 0) + { + arguments.Add(_expressionStack.Pop()); + } + } + } - // // Reverse the arguments since we process them in reverse order - // arguments.Reverse(); + // Reverse the arguments since we process them in reverse order + arguments.Reverse(); - // var callExpr = AstBuilder.CreateCallExpression(functionName, arguments); - // _expressionStack.Push(callExpr); - // } + var callExpr = AstBuilder.CreateCallExpression(functionName, arguments); + _expressionStack.Push(callExpr); + } // private void VisitInExpression(Kusto.Language.Syntax.InExpression node) // { @@ -358,6 +364,28 @@ private void VisitParenthesizedExpression(Kusto.Language.Syntax.ParenthesizedExp // } // } + #region Phase 1: Aggregation Support + + /// + /// Visits a KQL summarize operator and creates CompositeAggregationNode + /// TODO: Implement full summarize operator parsing once KQL syntax structure is verified + /// + private void VisitSummarizeOperator(SummarizeOperator node) + { + if (node == null) return; + + // For now, create a placeholder aggregation node + // TODO: Parse actual summarize structure from KQL syntax tree + var compositeAggregation = AstBuilder.CreateCompositeAggregation( + null, // groupByFields - to be implemented + null, // aggregations - to be implemented + "KQL"); + + _rootNode.Operations.Add(compositeAggregation); + } + + #endregion + // Helper method to map KQL operators to CommonAST BinaryOperatorKind private BinaryOperatorKind MapKqlOperatorToCommonAST(string op) { @@ -379,4 +407,4 @@ private BinaryOperatorKind MapKqlOperatorToCommonAST(string op) default: throw new NotSupportedException($"Unsupported binary operator: {op}"); } } -} \ No newline at end of file +} diff --git a/tests/CommonAST.Tests/AggregationTests.cs b/tests/CommonAST.Tests/AggregationTests.cs new file mode 100644 index 0000000..d8e1fa7 --- /dev/null +++ b/tests/CommonAST.Tests/AggregationTests.cs @@ -0,0 +1,274 @@ +using System; +using System.Collections.Generic; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using CommonAST; + +namespace CommonAST.Tests +{ + [TestClass] + public class AggregationTests + { + #region Phase 1: Basic AST Node Tests + + [TestMethod] + public void CreateFieldReference_WithNamespace_CreatesCorrectly() + { + // Arrange & Act + var fieldRef = AstBuilder.CreateFieldReference("Duration", "event", FieldType.Attribute, DataType.Integer); + + // Assert + Assert.IsNotNull(fieldRef); + Assert.AreEqual(NodeKind.FieldReference, fieldRef.NodeKind); + Assert.AreEqual("Duration", fieldRef.Name); + Assert.AreEqual("event", fieldRef.Namespace); + Assert.AreEqual(FieldType.Attribute, fieldRef.FieldType); + Assert.AreEqual(DataType.Integer, fieldRef.DataType); + Assert.AreEqual(true, fieldRef.IsRequired); + } + + [TestMethod] + public void CreateNamedExpression_WithSingleName_CreatesCorrectly() + { + // Arrange + var expr = AstBuilder.CreateIdentifier("count"); + + // Act + var namedExpr = AstBuilder.CreateNamedExpression(expr, "TotalCount"); + + // Assert + Assert.IsNotNull(namedExpr); + Assert.AreEqual(NodeKind.NamedExpression, namedExpr.NodeKind); + Assert.AreEqual("TotalCount", namedExpr.Name); + Assert.IsNull(namedExpr.Names); + Assert.IsTrue(namedExpr.IsNamed); + Assert.AreSame(expr, namedExpr.Expression); + } + + [TestMethod] + public void CreateAggregateOperation_CountFunction_CreatesCorrectly() + { + // Arrange & Act + var aggOp = AstBuilder.CreateCountOperation("TotalCount"); + + // Assert + Assert.IsNotNull(aggOp); + Assert.AreEqual(AggregateFunction.Count, aggOp.Function); + Assert.IsNull(aggOp.Field); // Count doesn't require a field + Assert.AreEqual("TotalCount", aggOp.ResultName); + Assert.IsNull(aggOp.SourceExpression); + } + + [TestMethod] + public void CreateAggregateOperation_SumFunction_CreatesCorrectly() + { + // Arrange + var field = AstBuilder.CreateFieldReference("Duration", dataType: DataType.Integer); + + // Act + var aggOp = AstBuilder.CreateSumOperation(field, "TotalDuration"); + + // Assert + Assert.IsNotNull(aggOp); + Assert.AreEqual(AggregateFunction.Sum, aggOp.Function); + Assert.AreSame(field, aggOp.Field); + Assert.AreEqual("TotalDuration", aggOp.ResultName); + Assert.IsNull(aggOp.SourceExpression); + } + + [TestMethod] + public void CreateCompositeAggregation_GroupOnly_CreatesCorrectly() + { + // Arrange + var groupFields = new List + { + AstBuilder.CreateFieldReference("State"), + AstBuilder.CreateFieldReference("EventType") + }; + + // Act + var composite = AstBuilder.CreateGroupOnlyAggregation(groupFields, "KQL"); + + // Assert + Assert.IsNotNull(composite); + Assert.AreEqual(NodeKind.CompositeAggregation, composite.NodeKind); + Assert.AreEqual(2, composite.GroupByFields.Count); + Assert.AreEqual(0, composite.Aggregations.Count); + Assert.AreEqual("KQL", composite.SourceLanguage); + Assert.IsTrue(composite.IsValid); + Assert.IsTrue(composite.IsGroupOnly); + Assert.IsFalse(composite.IsAggregateOnly); + Assert.IsFalse(composite.IsMixed); + } + + [TestMethod] + public void CreateCompositeAggregation_AggregateOnly_CreatesCorrectly() + { + // Arrange + var aggregations = new List + { + AstBuilder.CreateCountOperation("TotalCount"), + AstBuilder.CreateSumOperation(AstBuilder.CreateFieldReference("Duration"), "TotalDuration") + }; + + // Act + var composite = AstBuilder.CreateAggregateOnlyAggregation(aggregations, "KQL"); + + // Assert + Assert.IsNotNull(composite); + Assert.AreEqual(NodeKind.CompositeAggregation, composite.NodeKind); + Assert.AreEqual(0, composite.GroupByFields.Count); + Assert.AreEqual(2, composite.Aggregations.Count); + Assert.AreEqual("KQL", composite.SourceLanguage); + Assert.IsTrue(composite.IsValid); + Assert.IsFalse(composite.IsGroupOnly); + Assert.IsTrue(composite.IsAggregateOnly); + Assert.IsFalse(composite.IsMixed); + } + + [TestMethod] + public void CreateCompositeAggregation_Mixed_CreatesCorrectly() + { + // Arrange + var groupFields = new List + { + AstBuilder.CreateFieldReference("State") + }; + + var aggregations = new List + { + AstBuilder.CreateCountOperation("TotalCount"), + AstBuilder.CreateAverageOperation(AstBuilder.CreateFieldReference("Duration"), "AvgDuration") + }; + + // Act + var composite = AstBuilder.CreateCompositeAggregation(groupFields, aggregations, "KQL"); + + // Assert + Assert.IsNotNull(composite); + Assert.AreEqual(NodeKind.CompositeAggregation, composite.NodeKind); + Assert.AreEqual(1, composite.GroupByFields.Count); + Assert.AreEqual(2, composite.Aggregations.Count); + Assert.AreEqual("KQL", composite.SourceLanguage); + Assert.IsTrue(composite.IsValid); + Assert.IsFalse(composite.IsGroupOnly); + Assert.IsFalse(composite.IsAggregateOnly); + Assert.IsTrue(composite.IsMixed); + } + + [TestMethod] + public void CreateCompositeAggregation_EmptyBoth_IsInvalid() + { + // Arrange & Act + var composite = AstBuilder.CreateCompositeAggregation(null, null, "KQL"); + + // Assert + Assert.IsNotNull(composite); + Assert.AreEqual(0, composite.GroupByFields.Count); + Assert.AreEqual(0, composite.Aggregations.Count); + Assert.IsFalse(composite.IsValid); // Must have either grouping OR aggregations + } + + [TestMethod] + public void CreateAllAggregateOperations_Phase1Functions_CreateCorrectly() + { + // Arrange + var field = AstBuilder.CreateFieldReference("Duration", dataType: DataType.Integer); + + // Act + var count = AstBuilder.CreateCountOperation("Count"); + var sum = AstBuilder.CreateSumOperation(field, "Sum"); + var avg = AstBuilder.CreateAverageOperation(field, "Average"); + var min = AstBuilder.CreateMinimumOperation(field, "Minimum"); + var max = AstBuilder.CreateMaximumOperation(field, "Maximum"); + + // Assert + Assert.AreEqual(AggregateFunction.Count, count.Function); + Assert.IsNull(count.Field); + + Assert.AreEqual(AggregateFunction.Sum, sum.Function); + Assert.AreSame(field, sum.Field); + + Assert.AreEqual(AggregateFunction.Average, avg.Function); + Assert.AreSame(field, avg.Field); + + Assert.AreEqual(AggregateFunction.Minimum, min.Function); + Assert.AreSame(field, min.Field); + + Assert.AreEqual(AggregateFunction.Maximum, max.Function); + Assert.AreSame(field, max.Field); + } + + [TestMethod] + public void CreateKqlMultiAggregationExample_MatchesDesignDoc() + { + // Example: | summarize TotalCount = count(), AvgDuration = avg(Duration) by State, EventType + + // Arrange + var groupFields = new List + { + AstBuilder.CreateFieldReference("State"), + AstBuilder.CreateFieldReference("EventType") + }; + + var aggregations = new List + { + AstBuilder.CreateCountOperation("TotalCount"), + AstBuilder.CreateAverageOperation( + AstBuilder.CreateFieldReference("Duration", dataType: DataType.Integer), + "AvgDuration") + }; + + // Act + var composite = AstBuilder.CreateCompositeAggregation(groupFields, aggregations, "KQL"); + + // Assert + Assert.IsNotNull(composite); + Assert.AreEqual("KQL", composite.SourceLanguage); + Assert.AreEqual(2, composite.GroupByFields.Count); + Assert.AreEqual(2, composite.Aggregations.Count); + Assert.IsTrue(composite.IsMixed); + + // Verify group fields + Assert.AreEqual("State", composite.GroupByFields[0].Name); + Assert.AreEqual("EventType", composite.GroupByFields[1].Name); + + // Verify aggregations + var count = composite.Aggregations[0]; + Assert.AreEqual(AggregateFunction.Count, count.Function); + Assert.AreEqual("TotalCount", count.ResultName); + Assert.IsNull(count.Field); + + var avg = composite.Aggregations[1]; + Assert.AreEqual(AggregateFunction.Average, avg.Function); + Assert.AreEqual("AvgDuration", avg.ResultName); + Assert.IsNotNull(avg.Field); + Assert.AreEqual("Duration", avg.Field.Name); + } + + [TestMethod] + public void CreateQueryWithCompositeAggregation_AddsToOperations() + { + // Arrange + var query = AstBuilder.CreateQuery("TestTable"); + var composite = AstBuilder.CreateCompositeAggregation( + new List { AstBuilder.CreateFieldReference("State") }, + new List { AstBuilder.CreateCountOperation("Count") }, + "KQL" + ); + + // Act + query.Operations.Add(composite); + + // Assert + Assert.AreEqual("TestTable", query.Source); + Assert.AreEqual(1, query.Operations.Count); + Assert.IsInstanceOfType(query.Operations[0], typeof(CompositeAggregationNode)); + + var aggNode = query.Operations[0] as CompositeAggregationNode; + Assert.IsNotNull(aggNode); + Assert.IsTrue(aggNode.IsMixed); + } + + #endregion + } +}