5.1 Order-based merge
This section focuses on larger volume of data. We store data of Orders table in multiple tables by year. The table name format is Orders+year, such as Orders2021 and Orders2022. Each table has same structure as Orders table.
SQL
SELECT ProductID,City,sum(Amount) Amount
FROM Customers
INNER JOIN Orders2021
ON Customers.CustomerID= Orders2021.CustomerID
WHERE Orders2021.Quantity>5
GROUP BY ProductID,City
SPL
This task involves association between primary table and sub table. Both tables contain data that cannot fit into the memory and cursor is needed to retrieve data. In this case we should sort both tables by join field and associate them through order-based merge in order to get the highest efficiency.
In practice, we can store data according to join field in advance, like we did in the section “Storing Data in Account Order”. All the following examples in this section are assumed to have been ordered by the primary table’s primary key field.
A | |
---|---|
1 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,ProductID,Amount;Quantity>5) |
2 | =file(“Customers.ctx”).open().cursor@x(CustomerID,City) |
3 | =joinx(A2:primary, CustomerID; A1:sub, CustomerID) |
4 | =A3.groups(sub.ProductID, primary.City; sum(sub.Amount):Amount) |
SQL
SELECT Customers.CustomerID,CustomerName,sum(Amount) Amount
FROM Customers
LEFT JOIN Orders2021
ON Customers.CustomerID= Orders2021.CustomerID
GROUP BY Customers.CustomerID,CustomerName
SPL
A | |
---|---|
1 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount) |
2 | =file(“Customers.ctx”).open().cursor@x(CustomerID,CustomerName) |
3 | =joinx@1(A2:primary, CustomerID; A1:sub, CustomerID) |
4 | =A3.group(primary.CustomerID;primary.CustomerName,~.sum(sub.Amount):Amount) |
5 | =file(“result.btx”).export@b(A4) |
A4 Here we have a large grouping result set that cannot fit into memory. But as data is ordered by grouping field, group function is used to perform aggregation and return result as a cursor.
SQL
WITH a AS(
SELECT CustomerID,sum(Amount) Amount2020
FROM Orders2020
GROUP BY CustomerID),
b AS(
SELECT CustomerID,sum(Amount) Amount2021
FROM Orders2021
GROUP BY CustomerID)
SELECT
coalesce ( a.CustomerID, b.CustomerID ) CustomerID,
Amount2020, Amount2021,Amount2021/Amount2020-1 GrowthRate
FROM a FULL JOIN b ON a.CustomerID= b.CustomerID
SPL
A | |
---|---|
1 | =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020) |
2 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021) |
3 | =joinx@f(A1:a, CustomerID; A2:b, CustomerID) |
4 | =A3.new([a.CustomerID,b.CustomerID].nvl():CustomerID,a.Amount2020, b.Amount2021, b.Amount2021/a.Amount2020-1:GrowthRate) |
5 | =file(“result.btx”).export@b(A4) |
SQL
WITH a AS(
SELECT CustomerID,sum(Amount) Amount2020
FROM Orders2020
GROUP BY CustomerID),
b AS(
SELECT CustomerID,sum(Amount) Amount2021
FROM Orders2021
GROUP BY CustomerID)
SELECT City,sum(Amount2020) Amount2020,sum(Amount2021) Amount2021,
sum(Amount2021)/sum(Amount2020)-1 GrowthRate
FROM Customers
INNER JOIN a ON Customers.CustomerID= a.CustomerID
INNER JOIN b ON Customers.CustomerID= b.CustomerID
GROUP BY City
SPL
A | |
---|---|
1 | =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020) |
2 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021) |
3 | =file(“Customers.ctx”).open().cursor@x(CustomerID, City) |
4 | =joinx(A3:primary, CustomerID; A1:sub1, CustomerID; A2:sub2, CustomerID) |
5 | =A4.groups(primary.City;sum(sub1.Amount2020):Amount2020, sum(sub2.Amount2021):Amount2021, null:GrowthRate).run(GrowthRate=Amount2021/Amount2020-1) |
SQL
WITH a AS(
SELECT CustomerID,sum(Amount) Amount2020
FROM Orders2020
GROUP BY CustomerID),
b AS(
SELECT CustomerID,sum(Amount) Amount2021
FROM Orders2021
GROUP BY CustomerID)
SELECT City,sum(Amount2020) Amount2020,sum(Amount2021) Amount2021,
sum(Amount2021)/sum(Amount2020)-1 GrowthRate
FROM Customers
LEFT JOIN a ON Customers.CustomerID= a.CustomerID
LEFT JOIN b ON Customers.CustomerID= b.CustomerID
GROUP BY City
SPL
A | |
---|---|
1 | =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020) |
2 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021) |
3 | =file(“Customers.ctx”).open().cursor@x(CustomerID,City) |
4 | =joinx@1(A3:primary, CustomerID; A1:sub1, CustomerID; A2:sub2, CustomerID) |
5 | =A4.groups(primary.City;sum(sub1.Amount2020):Amount2020, sum(sub2.Amount2021):Amount2021, null:GrowthRate).run(GrowthRate=Amount2021/Amount2020-1) |
SQL
WITH a AS(
SELECT CustomerID,sum(Amount) Amount2019
FROM Orders2019
GROUP BY CustomerID),
b AS(
SELECT CustomerID,sum(Amount) Amount2020
FROM Orders2020
GROUP BY CustomerID),
c AS(
SELECT CustomerID,sum(Amount) Amount2021
FROM Orders2021
GROUP BY CustomerID)
SELECT coalesce (a.CustomerID, b.CustomerID, c.CustomerID ) CustomerID,
Amount2019, Amount2020, Amount2021
FROM a
FULL JOIN b ON b.CustomerID= a.CustomerID
FULL JOIN c ON c.CustomerID= b.CustomerID or c.CustomerID= a.CustomerID
GROUP BY CustomerID
SPL
A | |
---|---|
1 | =file(“Orders2019_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2019) |
2 | =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020) |
3 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021) |
4 | =joinx@f(A1:a, CustomerID; A2:b, CustomerID; A3:c, CustomerID) |
5 | =A4.new([a.CustomerID,b.CustomerID,c.CustomerID].nvl():CustomerID, a.Amount2019, b.Amount2020, c.Amount2021) |
6 | =file(“result.btx”).export@b(A5) |
SPL Official Website 👉 https://www.scudata.com
SPL Feedback and Help 👉 https://www.reddit.com/r/esProcSPL
SPL Learning Material 👉 https://c.scudata.com
SPL Source Code and Package 👉 https://github.com/SPLWare/esProc
Discord 👉 https://discord.gg/cFTcUNs7
Youtube 👉 https://www.youtube.com/@esProc_SPL