diff --git a/Cargo.toml b/Cargo.toml index a24a998..ee408c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ edition = "2024" thiserror = "2" nom = "8" nom_locate = "5" +unicase = "2" [dependencies.serde] version = "1" diff --git a/src/analysis.rs b/src/analysis.rs index f3519b6..5e639d0 100644 --- a/src/analysis.rs +++ b/src/analysis.rs @@ -1,9 +1,11 @@ use std::{ - collections::{BTreeMap, HashMap, btree_map::Entry}, + borrow::Cow, + collections::{BTreeMap, HashMap, HashSet, btree_map::Entry}, mem, }; use serde::Serialize; +use unicase::Ascii; use crate::{ Attrs, Expr, Query, Raw, Source, SourceKind, Type, Value, error::AnalysisError, token::Operator, @@ -50,6 +52,55 @@ pub struct AnalysisOptions { pub default_scope: Scope, /// Type information for event records being queried. pub event_type_info: Type, + /// Custom types that are not defined in the EventQL reference. + /// + /// This set allows users to register custom type names that can be used + /// in type conversion expressions (e.g., `field AS CustomType`). Custom + /// type names are case-insensitive. + /// + /// # Examples + /// + /// ``` + /// use eventql_parser::prelude::AnalysisOptions; + /// + /// let options = AnalysisOptions::default() + /// .add_custom_type("Foobar"); + /// ``` + pub custom_types: HashSet>, +} + +impl AnalysisOptions { + /// Adds a custom type name to the analysis options. + /// + /// Custom types allow you to use type conversion syntax with types that are + /// not part of the standard EventQL type system. The type name is stored + /// case-insensitively. + /// + /// # Arguments + /// + /// * `value` - The custom type name to register + /// + /// # Returns + /// + /// Returns `self` to allow for method chaining. + /// + /// # Examples + /// + /// ``` + /// use eventql_parser::prelude::AnalysisOptions; + /// + /// let options = AnalysisOptions::default() + /// .add_custom_type("Timestamp") + /// .add_custom_type("UUID"); + /// ``` + pub fn add_custom_type<'a>(mut self, value: impl Into>) -> Self { + match value.into() { + Cow::Borrowed(t) => self.custom_types.insert(Ascii::new(t.to_owned())), + Cow::Owned(t) => self.custom_types.insert(Ascii::new(t)), + }; + + self + } } impl Default for AnalysisOptions { @@ -347,6 +398,7 @@ impl Default for AnalysisOptions { ("tracestate".to_owned(), Type::String), ("signature".to_owned(), Type::String), ])), + custom_types: HashSet::default(), } } } @@ -743,6 +795,25 @@ impl<'a> Analysis<'a> { expect.check(attrs, Type::Bool) } + Operator::As => { + if let Value::Id(name) = &binary.rhs.value { + if let Some(tpe) = name_to_type(self.options, name) { + // NOTE - we could check if it's safe to convert the left branch to that type + return Ok(tpe); + } else { + return Err(AnalysisError::UnsupportedCustomType( + attrs.pos.line, + attrs.pos.col, + name.clone(), + )); + } + } + + unreachable!( + "we already made sure during parsing that we can only have an ID symbol at this point" + ) + } + Operator::Not => unreachable!(), }, @@ -997,6 +1068,15 @@ impl<'a> Analysis<'a> { .unwrap_or_default(), Value::Binary(binary) => match binary.operator { Operator::Add | Operator::Sub | Operator::Mul | Operator::Div => Type::Number, + Operator::As => { + if let Value::Id(n) = &binary.rhs.as_ref().value + && let Some(tpe) = name_to_type(self.options, n.as_str()) + { + tpe + } else { + Type::Unspecified + } + } Operator::Eq | Operator::Neq | Operator::Lt @@ -1023,9 +1103,31 @@ impl<'a> Analysis<'a> { | Operator::Or | Operator::Xor | Operator::Not - | Operator::Contains => unreachable!(), + | Operator::Contains + | Operator::As => unreachable!(), }, Value::Group(expr) => self.project_type(&expr.value), } } } + +fn name_to_type(opts: &AnalysisOptions, name: &str) -> Option { + if name.eq_ignore_ascii_case("string") { + Some(Type::String) + } else if name.eq_ignore_ascii_case("int") || name.eq_ignore_ascii_case("float64") { + Some(Type::Number) + } else if name.eq_ignore_ascii_case("boolean") { + Some(Type::Bool) + } else if name.eq_ignore_ascii_case("date") { + Some(Type::Date) + } else if name.eq_ignore_ascii_case("time") { + Some(Type::Time) + } else if name.eq_ignore_ascii_case("datetime") { + Some(Type::DateTime) + } else if opts.custom_types.contains(&Ascii::new(name.to_owned())) { + // ^ Sad we have to allocate here for no reason + Some(Type::Custom(name.to_owned())) + } else { + None + } +} diff --git a/src/ast.rs b/src/ast.rs index 02e0849..0bc56ae 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -73,6 +73,33 @@ pub enum Type { Subject, /// Function type App { args: Vec, result: Box }, + /// Date type (e.g., `2026-01-03`) + /// + /// Used when a field is explicitly converted to a date using the `AS DATE` syntax. + Date, + /// Time type (e.g., `13:45:39`) + /// + /// Used when a field is explicitly converted to a time using the `AS TIME` syntax. + Time, + /// DateTime type (e.g., `2026-01-01T13:45:39Z`) + /// + /// Used when a field is explicitly converted to a datetime using the `AS DATETIME` syntax. + DateTime, + /// Custom type not defined in the EventQL reference + /// + /// Used when a field is converted to a custom type registered in [`AnalysisOptions::custom_types`]. + /// The string contains the custom type name as it appears in the query. + /// + /// # Examples + /// + /// ``` + /// use eventql_parser::prelude::{parse_query, AnalysisOptions}; + /// + /// let query = parse_query("FROM e IN events PROJECT INTO { ts: e.data.timestamp as CustomTimestamp }").unwrap(); + /// let options = AnalysisOptions::default().add_custom_type("CustomTimestamp"); + /// let typed_query = query.run_static_analysis(&options).unwrap(); + /// ``` + Custom(String), } impl Type { @@ -103,6 +130,12 @@ impl Type { (Self::Number, Self::Number) => Ok(Self::Number), (Self::String, Self::String) => Ok(Self::String), (Self::Bool, Self::Bool) => Ok(Self::Bool), + (Self::Date, Self::Date) => Ok(Self::Date), + (Self::Time, Self::Time) => Ok(Self::Time), + (Self::DateTime, Self::DateTime) => Ok(Self::DateTime), + (Self::Custom(a), Self::Custom(b)) if a.eq_ignore_ascii_case(b.as_str()) => { + Ok(Self::Custom(a)) + } (Self::Array(mut a), Self::Array(b)) => { *a = a.as_ref().clone().check(attrs, *b)?; Ok(Self::Array(a)) diff --git a/src/error.rs b/src/error.rs index 6d6de31..0416cca 100644 --- a/src/error.rs +++ b/src/error.rs @@ -83,6 +83,15 @@ pub enum ParserError { #[error("{0}:{1}: unexpected token {2}")] UnexpectedToken(u32, u32, String), + /// Expected a type name but found something else. + /// + /// Fields: `(line, column, found_token)` + /// + /// This occurs when defining a type conversion operation but the left side is + /// not a type. + #[error("{0}:{1}: expected a type")] + ExpectedType(u32, u32), + /// The input ended unexpectedly while parsing. /// /// This occurs when the parser expects more tokens but encounters @@ -178,4 +187,9 @@ pub enum AnalysisError { /// SELECT projection clause. #[error("{0}:{1}: expected a record")] ExpectRecordLiteral(u32, u32), + + /// When a custom type (meaning a type not supported by EventQL by default) is used but + /// not registered in the `AnalysisOptions` custom type set. + #[error("{0}:{1}: unsupported custom type '{2}'")] + UnsupportedCustomType(u32, u32, String), } diff --git a/src/lexer.rs b/src/lexer.rs index dfc7ddb..777047d 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -147,6 +147,8 @@ fn ident(input: Text) -> IResult { Sym::Operator(Operator::Not) } else if value.fragment().eq_ignore_ascii_case("contains") { Sym::Operator(Operator::Contains) + } else if value.fragment().eq_ignore_ascii_case("as") { + Sym::Operator(Operator::As) } else { Sym::Id(value.fragment()) }; diff --git a/src/parser.rs b/src/parser.rs index 3aa14ac..e9df8f8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -353,6 +353,13 @@ impl<'a> Parser<'a> { self.shift(); let rhs = self.parse_binary(rhs_bind)?; + if matches!(operator, Operator::As) && !matches!(rhs.value, Value::Id(_)) { + return Err(ParserError::ExpectedType( + rhs.attrs.pos.line, + rhs.attrs.pos.col, + )); + } + lhs = Expr { attrs: lhs.attrs, value: Value::Binary(Binary { @@ -477,6 +484,7 @@ fn binding_pow(op: Operator) -> (u64, u64) { Operator::Add | Operator::Sub => (20, 21), Operator::Mul | Operator::Div => (30, 31), Operator::Contains => (40, 39), + Operator::As => (50, 49), Operator::Eq | Operator::Neq | Operator::Gt diff --git a/src/tests/analysis.rs b/src/tests/analysis.rs index 7dc48bd..3b13176 100644 --- a/src/tests/analysis.rs +++ b/src/tests/analysis.rs @@ -1,4 +1,4 @@ -use crate::parse_query; +use crate::{parse_query, prelude::AnalysisOptions}; #[test] fn test_infer_wrong_where_clause_1() { @@ -44,3 +44,32 @@ fn test_analyze_invalid_type_contains() { let query = parse_query(include_str!("./resources/invalid_type_contains.eql")).unwrap(); insta::assert_yaml_snapshot!(query.run_static_analysis(&Default::default())); } + +#[test] +fn test_analyze_valid_type_conversion() { + let query = parse_query(include_str!("./resources/valid_type_conversion.eql")).unwrap(); + insta::assert_yaml_snapshot!(query.run_static_analysis(&Default::default())); +} + +#[test] +fn test_analyze_invalid_type_conversion_custom_type() { + let query = parse_query(include_str!("./resources/type_conversion_custom_type.eql")).unwrap(); + insta::assert_yaml_snapshot!(query.run_static_analysis(&Default::default())); +} + +#[test] +fn test_analyze_valid_type_conversion_custom_type() { + let query = parse_query(include_str!("./resources/type_conversion_custom_type.eql")).unwrap(); + insta::assert_yaml_snapshot!( + query.run_static_analysis(&AnalysisOptions::default().add_custom_type("Foobar")) + ); +} + +#[test] +fn test_analyze_valid_type_conversion_weird_case() { + let query = parse_query(include_str!( + "./resources/valid_type_conversion-weird-case.eql" + )) + .unwrap(); + insta::assert_yaml_snapshot!(query.run_static_analysis(&Default::default())); +} diff --git a/src/tests/parser.rs b/src/tests/parser.rs index 4e4fd88..a8c7a2d 100644 --- a/src/tests/parser.rs +++ b/src/tests/parser.rs @@ -72,3 +72,15 @@ fn test_parser_valid_contains() { let tokens = tokenize(include_str!("./resources/valid_contains.eql")).unwrap(); insta::assert_yaml_snapshot!(parse(tokens.as_slice()).unwrap()); } + +#[test] +fn test_parser_valid_type_conversion() { + let tokens = tokenize(include_str!("./resources/valid_type_conversion.eql")).unwrap(); + insta::assert_yaml_snapshot!(parse(tokens.as_slice()).unwrap()); +} + +#[test] +fn test_parser_invalid_type_conversion_expr() { + let tokens = tokenize(include_str!("./resources/invalid_type_conversion_expr.eql")).unwrap(); + insta::assert_yaml_snapshot!(parse(tokens.as_slice())); +} diff --git a/src/tests/resources/invalid_type_conversion_expr.eql b/src/tests/resources/invalid_type_conversion_expr.eql new file mode 100644 index 0000000..4899662 --- /dev/null +++ b/src/tests/resources/invalid_type_conversion_expr.eql @@ -0,0 +1,3 @@ +FROM e IN events +FROM f IN subjects +PROJECT INTO { date: e.data.date as f.type } diff --git a/src/tests/resources/type_conversion_custom_type.eql b/src/tests/resources/type_conversion_custom_type.eql new file mode 100644 index 0000000..594c367 --- /dev/null +++ b/src/tests/resources/type_conversion_custom_type.eql @@ -0,0 +1,2 @@ +FROM e IN events +PROJECT INTO { date: e.data.date as Foobar } diff --git a/src/tests/resources/valid_type_conversion-weird-case.eql b/src/tests/resources/valid_type_conversion-weird-case.eql new file mode 100644 index 0000000..a201016 --- /dev/null +++ b/src/tests/resources/valid_type_conversion-weird-case.eql @@ -0,0 +1,2 @@ +FROM e IN events +PROJECT INTO { date: e.data.date as DaTeTiMe } diff --git a/src/tests/resources/valid_type_conversion.eql b/src/tests/resources/valid_type_conversion.eql new file mode 100644 index 0000000..ddc6510 --- /dev/null +++ b/src/tests/resources/valid_type_conversion.eql @@ -0,0 +1,2 @@ +FROM e IN events +PROJECT INTO { date: e.data.date as DATETIME } diff --git a/src/tests/snapshots/eventql_parser__tests__analysis__analyze_invalid_type_conversion_custom_type.snap b/src/tests/snapshots/eventql_parser__tests__analysis__analyze_invalid_type_conversion_custom_type.snap new file mode 100644 index 0000000..3da6eae --- /dev/null +++ b/src/tests/snapshots/eventql_parser__tests__analysis__analyze_invalid_type_conversion_custom_type.snap @@ -0,0 +1,10 @@ +--- +source: src/tests/analysis.rs +expression: "query.run_static_analysis(&Default::default())" +--- +Err: + Analysis: + UnsupportedCustomType: + - 2 + - 22 + - Foobar diff --git a/src/tests/snapshots/eventql_parser__tests__analysis__analyze_valid_type_conversion.snap b/src/tests/snapshots/eventql_parser__tests__analysis__analyze_valid_type_conversion.snap new file mode 100644 index 0000000..bb45231 --- /dev/null +++ b/src/tests/snapshots/eventql_parser__tests__analysis__analyze_valid_type_conversion.snap @@ -0,0 +1,72 @@ +--- +source: src/tests/analysis.rs +expression: "query.run_static_analysis(&Default::default())" +--- +Ok: + attrs: + pos: + line: 1 + col: 1 + sources: + - binding: + name: e + pos: + line: 1 + col: 6 + kind: + Name: events + predicate: ~ + group_by: ~ + order_by: ~ + limit: ~ + projection: + attrs: + pos: + line: 2 + col: 14 + value: + Record: + - name: date + value: + attrs: + pos: + line: 2 + col: 22 + value: + Binary: + lhs: + attrs: + pos: + line: 2 + col: 22 + value: + Access: + target: + attrs: + pos: + line: 2 + col: 22 + value: + Access: + target: + attrs: + pos: + line: 2 + col: 22 + value: + Id: e + field: data + field: date + operator: As + rhs: + attrs: + pos: + line: 2 + col: 37 + value: + Id: DATETIME + distinct: false + meta: + project: + Record: + date: DateTime diff --git a/src/tests/snapshots/eventql_parser__tests__analysis__analyze_valid_type_conversion_custom_type.snap b/src/tests/snapshots/eventql_parser__tests__analysis__analyze_valid_type_conversion_custom_type.snap new file mode 100644 index 0000000..41bb68f --- /dev/null +++ b/src/tests/snapshots/eventql_parser__tests__analysis__analyze_valid_type_conversion_custom_type.snap @@ -0,0 +1,73 @@ +--- +source: src/tests/analysis.rs +expression: "query.run_static_analysis(&AnalysisOptions::default().add_custom_type(\"Foobar\"))" +--- +Ok: + attrs: + pos: + line: 1 + col: 1 + sources: + - binding: + name: e + pos: + line: 1 + col: 6 + kind: + Name: events + predicate: ~ + group_by: ~ + order_by: ~ + limit: ~ + projection: + attrs: + pos: + line: 2 + col: 14 + value: + Record: + - name: date + value: + attrs: + pos: + line: 2 + col: 22 + value: + Binary: + lhs: + attrs: + pos: + line: 2 + col: 22 + value: + Access: + target: + attrs: + pos: + line: 2 + col: 22 + value: + Access: + target: + attrs: + pos: + line: 2 + col: 22 + value: + Id: e + field: data + field: date + operator: As + rhs: + attrs: + pos: + line: 2 + col: 37 + value: + Id: Foobar + distinct: false + meta: + project: + Record: + date: + Custom: Foobar diff --git a/src/tests/snapshots/eventql_parser__tests__analysis__analyze_valid_type_conversion_weird_case.snap b/src/tests/snapshots/eventql_parser__tests__analysis__analyze_valid_type_conversion_weird_case.snap new file mode 100644 index 0000000..1539c1c --- /dev/null +++ b/src/tests/snapshots/eventql_parser__tests__analysis__analyze_valid_type_conversion_weird_case.snap @@ -0,0 +1,72 @@ +--- +source: src/tests/analysis.rs +expression: "query.run_static_analysis(&Default::default())" +--- +Ok: + attrs: + pos: + line: 1 + col: 1 + sources: + - binding: + name: e + pos: + line: 1 + col: 6 + kind: + Name: events + predicate: ~ + group_by: ~ + order_by: ~ + limit: ~ + projection: + attrs: + pos: + line: 2 + col: 14 + value: + Record: + - name: date + value: + attrs: + pos: + line: 2 + col: 22 + value: + Binary: + lhs: + attrs: + pos: + line: 2 + col: 22 + value: + Access: + target: + attrs: + pos: + line: 2 + col: 22 + value: + Access: + target: + attrs: + pos: + line: 2 + col: 22 + value: + Id: e + field: data + field: date + operator: As + rhs: + attrs: + pos: + line: 2 + col: 37 + value: + Id: DaTeTiMe + distinct: false + meta: + project: + Record: + date: DateTime diff --git a/src/tests/snapshots/eventql_parser__tests__parser__parser_invalid_type_conversion_expr.snap b/src/tests/snapshots/eventql_parser__tests__parser__parser_invalid_type_conversion_expr.snap new file mode 100644 index 0000000..112ebf3 --- /dev/null +++ b/src/tests/snapshots/eventql_parser__tests__parser__parser_invalid_type_conversion_expr.snap @@ -0,0 +1,8 @@ +--- +source: src/tests/parser.rs +expression: parse(tokens.as_slice()) +--- +Err: + ExpectedType: + - 3 + - 37 diff --git a/src/tests/snapshots/eventql_parser__tests__parser__parser_valid_type_conversion.snap b/src/tests/snapshots/eventql_parser__tests__parser__parser_valid_type_conversion.snap new file mode 100644 index 0000000..4466da2 --- /dev/null +++ b/src/tests/snapshots/eventql_parser__tests__parser__parser_valid_type_conversion.snap @@ -0,0 +1,68 @@ +--- +source: src/tests/parser.rs +expression: parse(tokens.as_slice()).unwrap() +--- +attrs: + pos: + line: 1 + col: 1 +sources: + - binding: + name: e + pos: + line: 1 + col: 6 + kind: + Name: events +predicate: ~ +group_by: ~ +order_by: ~ +limit: ~ +projection: + attrs: + pos: + line: 2 + col: 14 + value: + Record: + - name: date + value: + attrs: + pos: + line: 2 + col: 22 + value: + Binary: + lhs: + attrs: + pos: + line: 2 + col: 22 + value: + Access: + target: + attrs: + pos: + line: 2 + col: 22 + value: + Access: + target: + attrs: + pos: + line: 2 + col: 22 + value: + Id: e + field: data + field: date + operator: As + rhs: + attrs: + pos: + line: 2 + col: 37 + value: + Id: DATETIME +distinct: false +meta: ~ diff --git a/src/token.rs b/src/token.rs index 6539bc5..8d4ab60 100644 --- a/src/token.rs +++ b/src/token.rs @@ -96,6 +96,8 @@ pub enum Operator { Not, /// Containment (`array CONTAINS value`) Contains, + /// Type conversion (`e.foo as STRING`) + As, } impl Display for Operator { @@ -116,6 +118,7 @@ impl Display for Operator { Operator::Xor => write!(f, "XOR"), Operator::Not => write!(f, "NOT"), Operator::Contains => write!(f, "CONTAINS"), + Operator::As => write!(f, "AS"), } } }