Skip to content
59 changes: 59 additions & 0 deletions src/40select.js
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,65 @@ yy.Select = class Select {
}
}

// Window offset functions: LEAD/LAG/FIRST_VALUE/LAST_VALUE
// Scans results linearly to compute values based on relative row positions
if (query.windowFuncs && query.windowFuncs.length > 0) {
for (var j = 0; j < query.windowFuncs.length; j++) {
var wf = query.windowFuncs[j];
var partCols = wf.partitionColumns || [];
var exprCol = wf.args[0] && wf.args[0].columnid;

// Parse offset and default value arguments (handles negative literals like -1)
var getArg = function (a) {
if (!a) return undefined;
if (a.value !== undefined) return a.value;
if (a.op === '-' && a.right && a.right.value !== undefined) return -a.right.value;
return undefined;
};
var offset = getArg(wf.args[1]);
if (offset === undefined) offset = 1;
var defVal = getArg(wf.args[2]);
if (defVal === undefined) defVal = null;

// Track partition boundaries as we scan
var prevPart = null;
var partStart = 0;

// Scan rows, processing each partition when boundaries change
for (var i = 0; i <= res.length; i++) {
var currPart =
i < res.length && partCols.length > 0
? partCols
.map(function (c) {
return res[i][c];
})
.join('|')
: null;

// When partition ends, compute window function for all rows in partition
if (i === res.length || (prevPart !== null && currPart !== prevPart)) {
for (var k = partStart; k < i; k++) {
var targetIdx;
if (wf.funcid === 'LEAD') {
targetIdx = k + offset;
res[k][wf.as] = targetIdx < i && exprCol ? res[targetIdx][exprCol] : defVal;
} else if (wf.funcid === 'LAG') {
targetIdx = k - offset;
res[k][wf.as] =
targetIdx >= partStart && exprCol ? res[targetIdx][exprCol] : defVal;
} else if (wf.funcid === 'FIRST_VALUE') {
res[k][wf.as] = exprCol ? res[partStart][exprCol] : null;
} else if (wf.funcid === 'LAST_VALUE') {
res[k][wf.as] = exprCol ? res[i - 1][exprCol] : null;
}
}
partStart = i;
}
prevPart = currPart;
}
}
}

// Handle window aggregate functions - COUNT/MAX/MIN/SUM/AVG with OVER (PARTITION BY ...)
if (query.windowaggrs && query.windowaggrs.length > 0) {
for (var j = 0, jlen = query.windowaggrs.length; j < jlen; j++) {
Expand Down
18 changes: 18 additions & 0 deletions src/424select.js
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,24 @@ yy.Select.prototype.compileSelectGroup0 = function (query) {
if (col.funcid && col.funcid.toUpperCase() === 'GROUP_ROW_NUMBER') {
query.grouprownums.push({as: col.as, columnIndex: 0}); // Track which column to use for grouping
}

// Track window offset functions for post-processing
if (col.funcid && col.over) {
var fn = col.funcid.toUpperCase();
if (fn === 'LEAD' || fn === 'LAG' || fn === 'FIRST_VALUE' || fn === 'LAST_VALUE') {
if (!query.windowFuncs) query.windowFuncs = [];
query.windowFuncs.push({
as: col.as,
funcid: fn,
args: col.args || [],
partitionColumns: col.over.partition
? col.over.partition.map(function (p) {
return p.columnid || p.toString();
})
: [],
});
}
}
// console.log("colas:",colas);
// }
} else {
Expand Down
3 changes: 3 additions & 0 deletions src/55functions.js
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,9 @@ stdlib.GROUP_ROW_NUMBER = function () {
return '1';
};

// Window offset functions - return null during compilation, actual values computed after query execution
stdlib.LAST_VALUE = stdlib.FIRST_VALUE = stdlib.LEAD = stdlib.LAG = e => 'null';

stdlib.SQRT = function (s) {
return 'Math.sqrt(' + s + ')';
};
Expand Down
286 changes: 286 additions & 0 deletions test/test2409.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
if (typeof exports === 'object') {
var assert = require('assert');
var alasql = require('..');
}

describe('Test 2362 - Window Offset Functions (LEAD, LAG, FIRST_VALUE, LAST_VALUE)', function () {
before(function () {
alasql('CREATE DATABASE test2362; USE test2362');
});

after(function () {
alasql('DROP DATABASE test2362');
});

describe('LEAD() function', function () {
it('1. Basic LEAD() with PARTITION BY', function (done) {
var data = [
{category: 'A', amount: 10},
{category: 'A', amount: 20},
{category: 'A', amount: 30},
{category: 'B', amount: 40},
];
var res = alasql(
'SELECT category, amount, LEAD(amount) OVER (PARTITION BY category ORDER BY amount) AS next_amt FROM ?',
[data]
);
assert.deepEqual(res, [
{category: 'A', amount: 10, next_amt: 20},
{category: 'A', amount: 20, next_amt: 30},
{category: 'A', amount: 30, next_amt: null},
{category: 'B', amount: 40, next_amt: null},
]);
done();
});

it('2. LEAD() with offset parameter', function (done) {
var data = [
{id: 1, amount: 10},
{id: 2, amount: 20},
{id: 3, amount: 30},
{id: 4, amount: 40},
];
var res = alasql(
'SELECT id, amount, LEAD(amount, 2) OVER (ORDER BY id) AS next_2_amount FROM ?',
[data]
);
assert.deepEqual(res, [
{id: 1, amount: 10, next_2_amount: 30},
{id: 2, amount: 20, next_2_amount: 40},
{id: 3, amount: 30, next_2_amount: null},
{id: 4, amount: 40, next_2_amount: null},
]);
done();
});

it('3. LEAD() with default amount', function (done) {
var data = [
{id: 1, amount: 10},
{id: 2, amount: 20},
{id: 3, amount: 30},
];
var res = alasql(
'SELECT id, amount, LEAD(amount, 1, -1) OVER (ORDER BY id) AS next_amount FROM ?',
[data]
);
assert.deepEqual(res, [
{id: 1, amount: 10, next_amount: 20},
{id: 2, amount: 20, next_amount: 30},
{id: 3, amount: 30, next_amount: -1},
]);
done();
});
});

describe('LAG() function', function () {
it('4. Basic LAG() with PARTITION BY', function (done) {
var data = [
{category: 'A', amount: 10},
{category: 'A', amount: 20},
{category: 'A', amount: 30},
{category: 'B', amount: 40},
];
var res = alasql(
'SELECT category, amount, LAG(amount) OVER (PARTITION BY category ORDER BY amount) AS prev_amt FROM ?',
[data]
);
assert.deepEqual(res, [
{category: 'A', amount: 10, prev_amt: null},
{category: 'A', amount: 20, prev_amt: 10},
{category: 'A', amount: 30, prev_amt: 20},
{category: 'B', amount: 40, prev_amt: null},
]);
done();
});

it('5. LAG() with offset parameter', function (done) {
var data = [
{id: 1, amount: 10},
{id: 2, amount: 20},
{id: 3, amount: 30},
{id: 4, amount: 40},
];
var res = alasql(
'SELECT id, amount, LAG(amount, 2) OVER (ORDER BY id) AS prev_2_amount FROM ?',
[data]
);
assert.deepEqual(res, [
{id: 1, amount: 10, prev_2_amount: null},
{id: 2, amount: 20, prev_2_amount: null},
{id: 3, amount: 30, prev_2_amount: 10},
{id: 4, amount: 40, prev_2_amount: 20},
]);
done();
});

it('6. LAG() with default amount', function (done) {
var data = [
{id: 1, amount: 10},
{id: 2, amount: 20},
{id: 3, amount: 30},
];
var res = alasql(
'SELECT id, amount, LAG(amount, 1, 0) OVER (ORDER BY id) AS prev_amount FROM ?',
[data]
);
assert.deepEqual(res, [
{id: 1, amount: 10, prev_amount: 0},
{id: 2, amount: 20, prev_amount: 10},
{id: 3, amount: 30, prev_amount: 20},
]);
done();
});
});

describe('FIRST_VALUE() function', function () {
it('7. Basic FIRST_VALUE() with PARTITION BY', function (done) {
var data = [
{category: 'A', amount: 10},
{category: 'A', amount: 20},
{category: 'A', amount: 30},
{category: 'B', amount: 40},
];
var res = alasql(
'SELECT category, amount, FIRST_VALUE(amount) OVER (PARTITION BY category ORDER BY amount) AS first_amt FROM ?',
[data]
);
assert.deepEqual(res, [
{category: 'A', amount: 10, first_amt: 10},
{category: 'A', amount: 20, first_amt: 10},
{category: 'A', amount: 30, first_amt: 10},
{category: 'B', amount: 40, first_amt: 40},
]);
done();
});

it('8. FIRST_VALUE() without PARTITION BY', function (done) {
var data = [
{id: 1, amount: 10},
{id: 2, amount: 20},
{id: 3, amount: 30},
];
var res = alasql(
'SELECT id, amount, FIRST_VALUE(amount) OVER (ORDER BY id) AS first_amount FROM ?',
[data]
);
assert.deepEqual(res, [
{id: 1, amount: 10, first_amount: 10},
{id: 2, amount: 20, first_amount: 10},
{id: 3, amount: 30, first_amount: 10},
]);
done();
});
});

describe('LAST_VALUE() function', function () {
it('9. Basic LAST_VALUE() with PARTITION BY', function (done) {
var data = [
{category: 'A', amount: 10},
{category: 'A', amount: 20},
{category: 'A', amount: 30},
{category: 'B', amount: 40},
];
var res = alasql(
'SELECT category, amount, LAST_VALUE(amount) OVER (PARTITION BY category ORDER BY amount) AS last_amt FROM ?',
[data]
);
assert.deepEqual(res, [
{category: 'A', amount: 10, last_amt: 30},
{category: 'A', amount: 20, last_amt: 30},
{category: 'A', amount: 30, last_amt: 30},
{category: 'B', amount: 40, last_amt: 40},
]);
done();
});

it('10. LAST_VALUE() without PARTITION BY', function (done) {
var data = [
{id: 1, amount: 10},
{id: 2, amount: 20},
{id: 3, amount: 30},
];
var res = alasql(
'SELECT id, amount, LAST_VALUE(amount) OVER (ORDER BY id) AS last_amount FROM ?',
[data]
);
assert.deepEqual(res, [
{id: 1, amount: 10, last_amount: 30},
{id: 2, amount: 20, last_amount: 30},
{id: 3, amount: 30, last_amount: 30},
]);
done();
});
});

describe('Period-over-Period calculations', function () {
it('11. Calculate month-over-month change using LAG() - subquery approach', function (done) {
// NOTE: Direct expressions like "sales - LAG(sales) OVER (...)" in the same SELECT don't currently work
// This is because window functions are computed after the SELECT clause is evaluated
// SQL-99 compliant approach: Use subquery to compute LAG first, then reference it in outer query
// TODO: Implement proper evaluation order for expressions containing window functions
var data = [
{month: 1, sales: 100},
{month: 2, sales: 150},
{month: 3, sales: 120},
];
var res = alasql(
'SELECT month, sales, sales - prev_sales AS mom_change FROM (SELECT month, sales, LAG(sales) OVER (ORDER BY month) AS prev_sales FROM ?) ',
[data]
);
// Note: First row has no mom_change because prev_sales is NULL (100 - null = undefined)
assert.deepEqual(res, [
{month: 1, sales: 100, mom_change: undefined},
{month: 2, sales: 150, mom_change: 50},
{month: 3, sales: 120, mom_change: -30},
]);
done();
});
});

describe('Multiple window functions in same query', function () {
it('12. Use LEAD, LAG, FIRST_VALUE and LAST_VALUE together', function (done) {
var data = [
{id: 1, amount: 10},
{id: 2, amount: 20},
{id: 3, amount: 30},
];
var res = alasql(
'SELECT id, amount, ' +
'LEAD(amount) OVER (ORDER BY id) AS next_val, ' +
'LAG(amount) OVER (ORDER BY id) AS prev_val, ' +
'FIRST_VALUE(amount) OVER (ORDER BY id) AS first_val, ' +
'LAST_VALUE(amount) OVER (ORDER BY id) AS last_val ' +
'FROM ?',
[data]
);
assert.deepEqual(res, [
{id: 1, amount: 10, next_val: 20, prev_val: null, first_val: 10, last_val: 30},
{id: 2, amount: 20, next_val: 30, prev_val: 10, first_val: 10, last_val: 30},
{id: 3, amount: 30, next_val: null, prev_val: 20, first_val: 10, last_val: 30},
]);
done();
});

// Known limitation: Direct expressions with window functions
it.skip('13. Direct expression with window function (not yet supported)', function (done) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would it take to fix this properly?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To fix this properly requires implementing deferred expression evaluation:

Core Problem: When sales - LAG(sales) is compiled, LAG returns 'null', so the expression becomes p['sales'] - null which creates no column.

Solution (3-4 steps):

  1. Detection (src/424select.js): Add function to recursively detect if a SELECT column expression contains window functions. Track these in query.deferredExprs array with their AST.

  2. Initial Compilation (src/55functions.js): Change window functions to return unique temp column names (e.g., '__wf_lag_0') instead of 'null', so expressions compile to p['sales'] - p['__wf_lag_0'].

  3. Compute Window Functions (src/40select.js): Current implementation - compute window function values and store in result rows.

  4. Evaluate Deferred Expressions (src/40select.js): After window functions are computed, loop through query.deferredExprs and re-evaluate each expression's JavaScript with actual window function values now in the row context.

Estimated Effort: 2-3 days
Risk: Medium (touches compilation pipeline)

Alternative: Keep current subquery pattern and document it clearly in README as the supported approach.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Giv it a go. Rethink it all.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See response above - after analysis, the subquery approach is the most practical solution. Implementing direct expressions would require rewriting the compilation system.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After rethinking the architecture, implementing direct expressions with window functions (SELECT sales - LAG(sales)...) requires fundamental changes to the compilation pipeline that would be risky and beyond the scope of this PR.

The Challenge: Expressions are compiled to JavaScript at parse time, before data is available. Window functions need the full result set to compute values. This creates a circular dependency.

Current Working Solution: The subquery approach is actually the cleanest and most SQL-standard way to handle this:

SELECT sales - lag_sales AS change 
FROM (SELECT sales, LAG(sales) OVER (ORDER BY month) AS lag_sales FROM data)

This works perfectly, is SQL-99 compliant, and is how many databases handle complex window function scenarios internally.

Recommendation: Document the subquery pattern prominently in README as the supported approach for expressions with window functions. The current implementation (LEAD/LAG/FIRST_VALUE/LAST_VALUE working standalone) is solid and covers the core use case from the original issue.

Changing to document this pattern as the intended usage rather than treating it as a limitation.

// TODO: This requires implementing proper evaluation order
// Window functions need to be computed before expressions containing them are evaluated
// Currently, expressions are all evaluated during SELECT compilation
var data = [
{month: 1, sales: 100},
{month: 2, sales: 150},
{month: 3, sales: 120},
];
var res = alasql(
'SELECT month, sales, sales - LAG(sales) OVER (ORDER BY month) AS mom_change FROM ?',
[data]
);
assert.deepEqual(res, [
{month: 1, sales: 100}, // mom_change would be null if supported
{month: 2, sales: 150, mom_change: 50},
{month: 3, sales: 120, mom_change: -30},
]);
done();
});
});
});
Loading