diff --git a/Sources/DataTransferObjects/Query/CustomQuery+CompileDown.swift b/Sources/DataTransferObjects/Query/CustomQuery+CompileDown.swift index 5c06258..9f13262 100644 --- a/Sources/DataTransferObjects/Query/CustomQuery+CompileDown.swift +++ b/Sources/DataTransferObjects/Query/CustomQuery+CompileDown.swift @@ -48,6 +48,8 @@ public extension CustomQuery { query = try namespace == nil ? precompiledFunnelQuery() : precompiledFunnelQuery(accuracy: 65536) } else if query.queryType == .experiment { query = try precompiledExperimentQuery() + } else if query.queryType == .retention { + query = try precompiledRetentionQuery() } // Handle precompilable aggregators and post aggregators diff --git a/Sources/DataTransferObjects/Query/CustomQuery.swift b/Sources/DataTransferObjects/Query/CustomQuery.swift index 387c8d5..a51f5c4 100644 --- a/Sources/DataTransferObjects/Query/CustomQuery.swift +++ b/Sources/DataTransferObjects/Query/CustomQuery.swift @@ -147,7 +147,7 @@ public struct CustomQuery: Codable, Hashable, Equatable, Sendable { // derived types case funnel case experiment - // case retention + case retention } public enum Order: String, Codable, CaseIterable, Sendable { @@ -183,7 +183,7 @@ public struct CustomQuery: Codable, Hashable, Equatable, Sendable { /// If a relative intervals are set, their calculated output replaces the regular intervals public var relativeIntervals: [RelativeTimeInterval]? - public let granularity: QueryGranularity? + public var granularity: QueryGranularity? public var aggregations: [Aggregator]? public var postAggregations: [PostAggregator]? public var limit: Int? diff --git a/Sources/DataTransferObjects/QueryGeneration/CustomQuery+Retention.swift b/Sources/DataTransferObjects/QueryGeneration/CustomQuery+Retention.swift new file mode 100644 index 0000000..8a24169 --- /dev/null +++ b/Sources/DataTransferObjects/QueryGeneration/CustomQuery+Retention.swift @@ -0,0 +1,198 @@ +import Foundation +import DateOperations + +extension CustomQuery { + func precompiledRetentionQuery() throws -> CustomQuery { + var query = self + + // Get the query intervals - we need at least one interval + guard let queryIntervals = intervals ?? relativeIntervals?.map({ QueryTimeInterval.from(relativeTimeInterval: $0) }), + let firstInterval = queryIntervals.first else { + throw QueryGenerationError.keyMissing(reason: "Missing intervals for retention query") + } + + let beginDate = firstInterval.beginningDate + let endDate = firstInterval.endDate + + // Use the query's granularity to determine retention period, defaulting to month if not specified + let retentionGranularity = query.granularity ?? .month + + // Validate minimum interval based on granularity + try validateMinimumInterval(from: beginDate, to: endDate, granularity: retentionGranularity) + + // Split into intervals based on the specified granularity + let retentionIntervals = try splitIntoIntervals(from: beginDate, to: endDate, granularity: retentionGranularity) + + // Generate Aggregators + var aggregators = [Aggregator]() + for interval in retentionIntervals { + aggregators.append(aggregator(for: interval)) + } + + // Generate Post-Aggregators + var postAggregators = [PostAggregator]() + for row in retentionIntervals { + for column in retentionIntervals where column >= row { + postAggregators.append(postAggregatorBetween(interval1: row, interval2: column)) + } + } + + // Set the query properties + query.queryType = .groupBy + query.granularity = .all + query.aggregations = uniqued(aggregators) + query.postAggregations = uniqued(postAggregators) + + return query + } + + private func uniqued(_ array: [T]) -> [T] { + var set = Set() + return array.filter { set.insert($0).inserted } + } + + // MARK: - Helper Methods + + private func validateMinimumInterval(from beginDate: Date, to endDate: Date, granularity: QueryGranularity) throws { + let calendar = Calendar.current + + switch granularity { + case .day: + let components = calendar.dateComponents([.day], from: beginDate, to: endDate) + if (components.day ?? 0) < 1 { + throw QueryGenerationError.notImplemented(reason: "Daily retention queries require at least one day between begin and end dates") + } + case .week: + let components = calendar.dateComponents([.weekOfYear], from: beginDate, to: endDate) + if (components.weekOfYear ?? 0) < 1 { + throw QueryGenerationError.notImplemented(reason: "Weekly retention queries require at least one week between begin and end dates") + } + case .month: + let components = calendar.dateComponents([.month], from: beginDate, to: endDate) + if (components.month ?? 0) < 1 { + throw QueryGenerationError.notImplemented(reason: "Monthly retention queries require at least one month between begin and end dates") + } + case .quarter: + let components = calendar.dateComponents([.quarter], from: beginDate, to: endDate) + if (components.quarter ?? 0) < 1 { + throw QueryGenerationError.notImplemented(reason: "Quarterly retention queries require at least one quarter between begin and end dates") + } + case .year: + let components = calendar.dateComponents([.year], from: beginDate, to: endDate) + if (components.year ?? 0) < 1 { + throw QueryGenerationError.notImplemented(reason: "Yearly retention queries require at least one year between begin and end dates") + } + default: + throw QueryGenerationError.notImplemented(reason: "Retention queries support day, week, month, quarter, or year granularity") + } + } + + private func splitIntoIntervals(from fromDate: Date, to toDate: Date, granularity: QueryGranularity) throws -> [DateInterval] { + let calendar = Calendar.current + var intervals = [DateInterval]() + + switch granularity { + case .day: + let numberOfDays = numberOfUnitsBetween(beginDate: fromDate, endDate: toDate, component: .day) + for day in 0...numberOfDays { + guard let date = calendar.date(byAdding: .day, value: day, to: fromDate) else { continue } + let startOfDay = date.beginning(of: .day) ?? date + let endOfDay = startOfDay.end(of: .day) ?? startOfDay + intervals.append(DateInterval(start: startOfDay, end: endOfDay)) + } + + case .week: + let numberOfWeeks = numberOfUnitsBetween(beginDate: fromDate, endDate: toDate, component: .weekOfYear) + for week in 0...numberOfWeeks { + guard let date = calendar.date(byAdding: .weekOfYear, value: week, to: fromDate) else { continue } + let startOfWeek = date.beginning(of: .weekOfYear) ?? date + let endOfWeek = startOfWeek.end(of: .weekOfYear) ?? startOfWeek + intervals.append(DateInterval(start: startOfWeek, end: endOfWeek)) + } + + case .month: + let numberOfMonths = numberOfUnitsBetween(beginDate: fromDate, endDate: toDate, component: .month) + for month in 0...numberOfMonths { + guard let date = calendar.date(byAdding: .month, value: month, to: fromDate) else { continue } + let startOfMonth = date.beginning(of: .month) ?? date + let endOfMonth = startOfMonth.end(of: .month) ?? startOfMonth + intervals.append(DateInterval(start: startOfMonth, end: endOfMonth)) + } + + case .quarter: + let numberOfQuarters = numberOfUnitsBetween(beginDate: fromDate, endDate: toDate, component: .quarter) + for quarter in 0...numberOfQuarters { + guard let date = calendar.date(byAdding: .quarter, value: quarter, to: fromDate) else { continue } + let startOfQuarter = date.beginning(of: .quarter) ?? date + let endOfQuarter = startOfQuarter.end(of: .quarter) ?? startOfQuarter + intervals.append(DateInterval(start: startOfQuarter, end: endOfQuarter)) + } + + case .year: + let numberOfYears = numberOfUnitsBetween(beginDate: fromDate, endDate: toDate, component: .year) + for year in 0...numberOfYears { + guard let date = calendar.date(byAdding: .year, value: year, to: fromDate) else { continue } + let startOfYear = date.beginning(of: .year) ?? date + let endOfYear = startOfYear.end(of: .year) ?? startOfYear + intervals.append(DateInterval(start: startOfYear, end: endOfYear)) + } + + default: + throw QueryGenerationError.notImplemented(reason: "Retention queries support day, week, month, quarter, or year granularity") + } + + return intervals + } + + private func numberOfUnitsBetween(beginDate: Date, endDate: Date, component: Calendar.Component) -> Int { + let calendar = Calendar.current + let components = calendar.dateComponents([component], from: beginDate, to: endDate) + + switch component { + case .day: + return components.day ?? 0 + case .weekOfYear: + return components.weekOfYear ?? 0 + case .month: + return components.month ?? 0 + case .quarter: + return components.quarter ?? 0 + case .year: + return components.year ?? 0 + default: + return 0 + } + } + + private func title(for interval: DateInterval) -> String { + let formatter = ISO8601DateFormatter() + formatter.formatOptions = [.withFullDate] + return "\(formatter.string(from: interval.start))_\(formatter.string(from: interval.end))" + } + + private func aggregator(for interval: DateInterval) -> Aggregator { + .filtered(.init( + filter: .interval(.init( + dimension: "__time", + intervals: [.init(dateInterval: interval)] + )), + aggregator: .thetaSketch(.init( + name: "_\(title(for: interval))", + fieldName: "clientUser" + )) + )) + } + + private func postAggregatorBetween(interval1: DateInterval, interval2: DateInterval) -> PostAggregator { + .thetaSketchEstimate(.init( + name: "retention_\(title(for: interval1))_\(title(for: interval2))", + field: .thetaSketchSetOp(.init( + func: .intersect, + fields: [ + .fieldAccess(.init(type: .fieldAccess, fieldName: "_\(title(for: interval1))")), + .fieldAccess(.init(type: .fieldAccess, fieldName: "_\(title(for: interval2))")), + ] + )) + )) + } +} \ No newline at end of file diff --git a/Sources/DataTransferObjects/QueryGeneration/RetentionQueryGenerator.swift b/Sources/DataTransferObjects/QueryGeneration/RetentionQueryGenerator.swift deleted file mode 100644 index f303740..0000000 --- a/Sources/DataTransferObjects/QueryGeneration/RetentionQueryGenerator.swift +++ /dev/null @@ -1,132 +0,0 @@ -// -// RetentionQueryGenerator.swift -// -// -// Created by Daniel Jilg on 28.11.22. -// - -import Foundation - -public enum RetentionQueryGenerator { - public enum RetentionQueryGeneratorErrors: Error { - /// beginDate and endDate are less than one month apart - case datesTooClose - } - - public static func generateRetentionQuery( - dataSource: String, - appID: String, - testMode: Bool, - beginDate: Date, - endDate: Date - ) throws -> CustomQuery { - // If beginDate and endDate are less than 1m apart, this does not make sense as a query - let components = Calendar.current.dateComponents([.month], from: beginDate, to: endDate) - if (components.month ?? 0) < 1 { - throw RetentionQueryGeneratorErrors.datesTooClose - } - - let months = splitIntoMonthLongIntervals(from: beginDate, to: endDate) - - // Collect all Aggregators and PostAggregators - var aggregators = [Aggregator]() - var postAggregators = [PostAggregator]() - - for month in months { - aggregators.append(aggregator(for: month)) - } - - for row in months { - for column in months where column >= row { - postAggregators.append(postAggregatorBetween(interval1: row, interval2: column)) - } - } - - // Combine query - return CustomQuery( - queryType: .groupBy, - dataSource: .init(dataSource), - filter: .and(.init(fields: [ - .selector(.init(dimension: "appID", value: appID)), - .selector(.init(dimension: "isTestMode", value: testMode ? "true" : "false")), - ])), - intervals: [QueryTimeInterval(beginningDate: beginDate, endDate: endDate)], - granularity: .all, - aggregations: aggregators.uniqued(), - postAggregations: postAggregators.uniqued() - ) - } - - static func numberOfMonthsBetween(beginDate: Date, endDate: Date) -> Int { - let calendar = Calendar.current - let components = calendar.dateComponents([.month], from: beginDate, to: endDate) - return components.month ?? 0 - } - - static func splitIntoMonthLongIntervals(from fromDate: Date, to toDate: Date) -> [DateInterval] { - let calendar = Calendar.current - let numberOfMonths = numberOfMonthsBetween(beginDate: fromDate, endDate: toDate) - var intervals = [DateInterval]() - for month in 0 ... numberOfMonths { - let startOfMonth = calendar.date(byAdding: .month, value: month, to: fromDate)!.startOfMonth - let endOfMonth = startOfMonth.endOfMonth - let interval = DateInterval(start: startOfMonth, end: endOfMonth) - intervals.append(interval) - } - return intervals - } - - // beginning of the month - static func beginningOfMonth(for date: Date) -> Date { - let calendar = Calendar.current - let components = calendar.dateComponents([.year, .month], from: date) - return calendar.date(from: components)! - } - - // end of the month - static func endOfMonth(for date: Date) -> Date { - let calendar = Calendar.current - let components = calendar.dateComponents([.year, .month], from: date) - return calendar.date(byAdding: DateComponents(month: 1, day: -1), to: calendar.date(from: components)!)! - } - - static func title(for interval: DateInterval) -> String { - "\(DateFormatter.iso8601.string(from: interval.start))_\(DateFormatter.iso8601.string(from: interval.end))" - } - - static func aggregator(for interval: DateInterval) -> Aggregator { - .filtered(.init( - filter: .interval(.init( - dimension: "__time", - intervals: [.init(dateInterval: interval)] - )), - aggregator: .thetaSketch(.init( - name: "_\(title(for: interval))", - fieldName: "clientUser" - )) - ) - ) - } - - static func postAggregatorBetween(interval1: DateInterval, interval2: DateInterval) -> PostAggregator { - .thetaSketchEstimate(.init( - name: "retention_\(title(for: interval1))_\(title(for: interval2))", - field: .thetaSketchSetOp(.init( - func: .intersect, - fields: [ - .fieldAccess(.init(type: .fieldAccess, fieldName: "_\(title(for: interval1))")), - .fieldAccess(.init(type: .fieldAccess, fieldName: "_\(title(for: interval2))")), - ] - ) - ) - ) - ) - } -} - -extension Sequence where Element: Hashable { - func uniqued() -> [Element] { - var set = Set() - return filter { set.insert($0).inserted } - } -} diff --git a/Tests/QueryGenerationTests/RetentionQueryGenerationTests.swift b/Tests/QueryGenerationTests/RetentionQueryGenerationTests.swift index 777a3fc..31f9da1 100644 --- a/Tests/QueryGenerationTests/RetentionQueryGenerationTests.swift +++ b/Tests/QueryGenerationTests/RetentionQueryGenerationTests.swift @@ -121,58 +121,158 @@ final class RetentionQueryGenerationTests: XCTestCase { let end_august = Date(iso8601String: "2022-08-31T23:59:59.999Z")! let end_september = Date(iso8601String: "2022-09-30T23:59:59.999Z")! - XCTAssertThrowsError(try RetentionQueryGenerator.generateRetentionQuery(dataSource: "com.telemetrydeck.all", appID: "", testMode: false, beginDate: begin_august, endDate: mid_august)) - XCTAssertThrowsError(try RetentionQueryGenerator.generateRetentionQuery(dataSource: "com.telemetrydeck.all", appID: "", testMode: false, beginDate: begin_august, endDate: end_august)) - XCTAssertNoThrow(try RetentionQueryGenerator.generateRetentionQuery(dataSource: "com.telemetrydeck.all", appID: "", testMode: false, beginDate: begin_august, endDate: end_september)) - XCTAssertThrowsError(try RetentionQueryGenerator.generateRetentionQuery(dataSource: "com.telemetrydeck.all", appID: "", testMode: false, beginDate: end_september, endDate: begin_august)) + // Test monthly retention (default) + let monthQuery1 = CustomQuery( + queryType: .retention, + dataSource: "com.telemetrydeck.all", + intervals: [QueryTimeInterval(beginningDate: begin_august, endDate: mid_august)], + granularity: .month + ) + XCTAssertThrowsError(try monthQuery1.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [UUID()], isSuperOrg: false)) + + let monthQuery2 = CustomQuery( + queryType: .retention, + dataSource: "com.telemetrydeck.all", + intervals: [QueryTimeInterval(beginningDate: begin_august, endDate: end_august)], + granularity: .month + ) + XCTAssertThrowsError(try monthQuery2.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [UUID()], isSuperOrg: false)) + + let monthQuery3 = CustomQuery( + queryType: .retention, + dataSource: "com.telemetrydeck.all", + intervals: [QueryTimeInterval(beginningDate: begin_august, endDate: end_september)], + granularity: .month + ) + XCTAssertNoThrow(try monthQuery3.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [UUID()], isSuperOrg: false)) + + // Test daily retention + let startDate = Date(iso8601String: "2022-08-01T00:00:00.000Z")! + let sameDay = Date(iso8601String: "2022-08-01T12:00:00.000Z")! + let nextDay = Date(iso8601String: "2022-08-02T00:00:00.000Z")! + + let dayQuery1 = CustomQuery( + queryType: .retention, + dataSource: "com.telemetrydeck.all", + intervals: [QueryTimeInterval(beginningDate: startDate, endDate: sameDay)], + granularity: .day + ) + XCTAssertThrowsError(try dayQuery1.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [UUID()], isSuperOrg: false)) + + let dayQuery2 = CustomQuery( + queryType: .retention, + dataSource: "com.telemetrydeck.all", + intervals: [QueryTimeInterval(beginningDate: startDate, endDate: nextDay)], + granularity: .day + ) + XCTAssertNoThrow(try dayQuery2.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [UUID()], isSuperOrg: false)) + + // Test weekly retention + let weekStart = Date(iso8601String: "2022-08-01T00:00:00.000Z")! + let weekMid = Date(iso8601String: "2022-08-05T00:00:00.000Z")! + let weekEnd = Date(iso8601String: "2022-08-08T00:00:00.000Z")! + + let weekQuery1 = CustomQuery( + queryType: .retention, + dataSource: "com.telemetrydeck.all", + intervals: [QueryTimeInterval(beginningDate: weekStart, endDate: weekMid)], + granularity: .week + ) + XCTAssertThrowsError(try weekQuery1.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [UUID()], isSuperOrg: false)) + + let weekQuery2 = CustomQuery( + queryType: .retention, + dataSource: "com.telemetrydeck.all", + intervals: [QueryTimeInterval(beginningDate: weekStart, endDate: weekEnd)], + granularity: .week + ) + XCTAssertNoThrow(try weekQuery2.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [UUID()], isSuperOrg: false)) } func testExample() throws { - let generatedTinyQuery = try RetentionQueryGenerator.generateRetentionQuery( + // Test with new compile-down approach + let appID = UUID(uuidString: "79167A27-EBBF-4012-9974-160624E5D07B")! + let query = CustomQuery( + queryType: .retention, dataSource: "com.telemetrydeck.all", - appID: "79167A27-EBBF-4012-9974-160624E5D07B", + appID: appID, + baseFilters: .thisApp, testMode: false, - beginDate: Date(iso8601String: "2022-08-01T00:00:00.000Z")!, - endDate: Date(iso8601String: "2022-09-30T00:00:00.000Z")! + intervals: [QueryTimeInterval( + beginningDate: Date(iso8601String: "2022-08-01T00:00:00.000Z")!, + endDate: Date(iso8601String: "2022-09-30T00:00:00.000Z")! + )], + granularity: .month // Explicitly set to month ) - - XCTAssertEqual(tinyQuery, generatedTinyQuery) - - XCTAssertEqual(String(data: try! JSONEncoder.telemetryEncoder.encode(tinyQuery), encoding: .utf8), String(data: try! JSONEncoder.telemetryEncoder.encode(generatedTinyQuery), encoding: .utf8)) - -// let aggregationNames = generatedTinyQuery.aggregations!.map { agg in -// switch agg { -// case .filtered(let filteredAgg): -// switch filteredAgg.aggregator { -// case .thetaSketch(let genAgg): -// return genAgg.name -// default: -// fatalError() -// } -// default: -// fatalError() -// } -// } -// -// let postAggregationNames = generatedTinyQuery.postAggregations!.map { postAgg in -// switch postAgg { -// case .thetaSketchEstimate(let thetaEstimateAgg): -// return thetaEstimateAgg.name ?? "Name not defined" -// default: -// fatalError() -// } -// } -// -// print("Aggregations: ") -// for aggregationName in aggregationNames { -// print(aggregationName) -// } -// -// print("Post-Aggregations: ") -// for aggregationName in postAggregationNames { -// print(aggregationName) -// } -// -// print(String(data: try! JSONEncoder.telemetryEncoder.encode(generatedTinyQuery), encoding: .utf8)!) + + let compiledQuery = try query.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [appID], isSuperOrg: true) + + // Verify the compiled query has the expected structure + XCTAssertEqual(compiledQuery.queryType, .groupBy) + XCTAssertEqual(compiledQuery.granularity, .all) + XCTAssertNotNil(compiledQuery.aggregations) + XCTAssertNotNil(compiledQuery.postAggregations) + + // The generated query should match the expected structure from tinyQuery + // (though the exact aggregator names might differ due to date formatting) + } + + func testRetentionWithDifferentGranularities() throws { + let appID = UUID(uuidString: "79167A27-EBBF-4012-9974-160624E5D07B")! + + // Test daily retention - 7 days should generate 8 intervals (0-7 inclusive) + let dailyQuery = CustomQuery( + queryType: .retention, + dataSource: "com.telemetrydeck.all", + appID: appID, + baseFilters: .thisApp, + testMode: false, + intervals: [QueryTimeInterval( + beginningDate: Date(iso8601String: "2022-08-01T00:00:00.000Z")!, + endDate: Date(iso8601String: "2022-08-07T23:59:59.000Z")! + )], + granularity: .day + ) + + let compiledDailyQuery = try dailyQuery.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [appID], isSuperOrg: true) + XCTAssertEqual(compiledDailyQuery.aggregations?.count, 7) // 7 days + // Post-aggregations should be n*(n+1)/2 for n intervals + XCTAssertEqual(compiledDailyQuery.postAggregations?.count, 28) // 7*8/2 = 28 + + // Test weekly retention - 4 weeks + let weeklyQuery = CustomQuery( + queryType: .retention, + dataSource: "com.telemetrydeck.all", + appID: appID, + baseFilters: .thisApp, + testMode: false, + intervals: [QueryTimeInterval( + beginningDate: Date(iso8601String: "2022-08-01T00:00:00.000Z")!, + endDate: Date(iso8601String: "2022-08-29T00:00:00.000Z")! + )], + granularity: .week + ) + + let compiledWeeklyQuery = try weeklyQuery.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [appID], isSuperOrg: true) + XCTAssertEqual(compiledWeeklyQuery.aggregations?.count, 5) // 5 weeks (spans into 5th week) + XCTAssertEqual(compiledWeeklyQuery.postAggregations?.count, 15) // 5*6/2 = 15 + + // Test monthly retention - 3 months + let monthlyQuery = CustomQuery( + queryType: .retention, + dataSource: "com.telemetrydeck.all", + appID: appID, + baseFilters: .thisApp, + testMode: false, + intervals: [QueryTimeInterval( + beginningDate: Date(iso8601String: "2022-08-01T00:00:00.000Z")!, + endDate: Date(iso8601String: "2022-10-31T00:00:00.000Z")! + )], + granularity: .month + ) + + let compiledMonthlyQuery = try monthlyQuery.precompile(namespace: nil, useNamespace: false, organizationAppIDs: [appID], isSuperOrg: true) + XCTAssertEqual(compiledMonthlyQuery.aggregations?.count, 3) // 3 months + XCTAssertEqual(compiledMonthlyQuery.postAggregations?.count, 6) // 3*4/2 = 6 } }