USE AdventureWorks2012;
SET
NOCOUNT
ON
;
STATISTICS
IO
TIME
PRINT
'PESO Solution'
SELECT
t1.SalesOrderID
AS
OrderID
,t2.SalesOrderID
SimilarOrderID
FROM
(
SalesOrderID
,
COUNT
(*)
Items
MIN
(ProductID)
minProdID
MAX
maxProdID
Sales.SalesOrderDetail
GROUP
BY
)
v
INNER
JOIN
t1
t1.SalesOrderID = v.SalesOrderID
t2
t2.ProductID = t1.ProductID
AND
t2.SalesOrderID > t1.SalesOrderID
w
w.SalesOrderID = t2.SalesOrderID
WHERE
w.minProdID = v.minProdID
w.maxProdID = v.maxProdID
w.Items = v.Items
HAVING
(*) =
(v.Items);
'Common Relational Division /CELKO/Naomi solution'
O1.SalesOrderId
,O2.SalesOrderID
Sales.SalesOrderDetail O1
Sales.SalesOrderDetail O2
O1.ProductID = O2.ProductID
O1.SalesOrderID < O2.SalesOrderID
O1.SalesOrderID
(O1.ProductID) = (
Sales.SalesOrderDetail SD1
SD1.SalesOrderID = O1.SalesOrderID
(O2.ProductID) = (
Sales.SalesOrderDetail SD2
SD2.SalesOrderID = O2.SalesOrderID
);
'XML PATH de-normalization solution'
WITH
cte
,STUFF((
', '
+
CAST
(ProductID
VARCHAR
(30))
SD1.SalesOrderID = SD.SalesOrderID
ORDER
ProductID
FOR
XML PATH(
''
), 1, 2,
Products
Sales.SalesOrderDetail SD
SD.SalesOrderID
cte.SalesOrderID
,cte1.SalesOrderID
,cte.Products
cte1
cte.SalesOrderID < cte1.SalesOrderID
cte.Products = cte1.Products;
OFF
v.SalesOrderID
OrderID,
w.SalesOrderID
SimilarOrderID,
v.Items
SalesOrderID,
Items,
minProdID,
maxProdID,
SUM
sumProdID,
CHECKSUM_AGG(10000 * ProductID)
cs
w.cs = v.cs
w.sumProdID = v.sumProdID
w.SalesOrderID > v.SalesOrderID
,ProductID
(ProductID) OVER (PARTITION
SalesOrderID)
ProductsCount
cte O1
cte O2
O1.ProductsCount > = 3
O2.ProductsCount >= 3
(O1.ProductID) >= (
) * 2.0
) / 3.0
(O2.ProductID) >= (
,SimilarOrderID;
,stuff((
cast
IN
43659
,43913,
43659, 44528,
43659, 44566,
43659, 44761,
43659, 46077)
DECLARE
@Percentage
DECIMAL
(10, 2);
@Percentage = 0.75;
) * @Percentage
(C1.Items)
[Products 1]
(C2.Items)
[Products 2]
cte C1
O1.SalesOrderID = C1.SalesOrderID
cte C2
O2.SalesOrderID = C2.SalesOrderID
(*) >=
(C1.Items) * @Percentage
(C2.Items) * @Percentage
(C1.Items)>=3
(C2.Items) >=3
We could significantly improve the first out of the latest two solutions extending the join condition by the following statement:
O1.ProductsCount
between
floor(O2.ProductsCount * @Percentage)
and
ceiling(O2.ProductsCount / @Percentage)
On my local machine it gives an improvement equal to about 2.5 times in terms of time execution. I let the readers of this article to test this suggestion.