2020年12月12日 星期六

[Applied]LINQ版本的視窗函數(SQL Window Function)

資料庫SQL中有一些配合Over字句Patition By群組後去處理集合
相當好用的視窗函數,例如編號用的Row_Number、Rank、Dense_Rank等等
似乎可以在LINQ中實現

因此在新版本的Applied套件中新增了此項功能

Nuget網址: https://www.nuget.org/packages/Applied/

底下為示範的程式

public class TestData
{
    public int Year { get; set; }
    public string Name { get; set; }
    public decimal Value { get; set; }
    public decimal Sum { get; set; }
    public decimal Average { get; set; }
    public int RowNumber { get; set; }
    public int Ntile { get; set; }
    public int DenseRank { get; set; }
    public int Rank { get; set; }
    public decimal FirstValue { get; set; }
    public decimal LastValue { get; set; }
    public decimal NthValue { get; set; }
    public decimal Lead { get; set; }
    public decimal Lag { get; set; }
    public decimal CumeDist { get; set; }
    public decimal PercentRank { get; set; }
    public decimal PercentileDisc { get; set; }
    public decimal PercentileCont { get; set; }
    public decimal KeepDenseRankFirst { get; set; }
    public decimal KeepDenseRankLast { get; set; }
    public decimal TeaTime { get; set; }
}
List<TestData> data = new List<TestData>();
data.Add(new TestData() { Year = 2019, Name = "A", Value = 111.1m });
data.Add(new TestData() { Year = 2019, Name = "B", Value = 333.3m });
data.Add(new TestData() { Year = 2019, Name = "C", Value = 333.3m });
data.Add(new TestData() { Year = 2019, Name = "A", Value = 222.2m });
data.Add(new TestData() { Year = 2019, Name = "C", Value = 444.4m });
data.Add(new TestData() { Year = 2019, Name = "A", Value = 222.2m });
data.Add(new TestData() { Year = 2019, Name = "B", Value = 333.3m });
data.Add(new TestData() { Year = 2019, Name = "C", Value = 555.5m });
data.Add(new TestData() { Year = 2020, Name = "A", Value = 111.1m });
data.Add(new TestData() { Year = 2020, Name = "B", Value = 333.3m });
data.Add(new TestData() { Year = 2020, Name = "A", Value = 222.2m });
data.Add(new TestData() { Year = 2020, Name = "C", Value = 333.3m });

data = data.GroupBy(a => new { a.Year }).AsPartition(p => p.OrderBy(a => a.Value).ThenBy(a => a.Name))
.Over(p => p.Sum(a => a.Value), (a, value) => a.Apply(() => new { Sum = value }))
.Over(p => p.Average(a => a.Value), (a, value) => a.Apply(() => new { Average = value }))
.Over(p => p.RowNumber(), (a, value) => a.Apply(() => new { RowNumber = value }))
.Over(p => p.Ntile(2), (a, value) => a.Apply(() => new { Ntile = value }))
.Over(p => p.DenseRank(), (a, value) => a.Apply(() => new { DenseRank = value }))
.Over(p => p.Rank(), (a, value) => a.Apply(() => new { Rank = value }))
.Over(p => p.FirstValue(a => a.Value), (a, value) => a.Apply(() => new { FirstValue = value }))
.Over(p => p.LastValue(a => a.Value), (a, value) => a.Apply(() => new { LastValue = value }))
.Over(p => p.NthValue(a => a.Value, 2), (a, value) => a.Apply(() => new { NthValue = value }))
.Over(p => p.Lead(a => a.Value), (a, value) => a.Apply(() => new { Lead = value }))
.Over(p => p.Lag(a => a.Value), (a, value) => a.Apply(() => new { Lag = value }))
.Over(p => p.CumeDist(), (a, value) => a.Apply(() => new { CumeDist = value }))
.Over(p => p.PercentRank(), (a, value) => a.Apply(() => new { PercentRank = value }))
.Over(p => p.PercentileDisc(0.5m, a => a.Value), (a, value) => a.Apply(() => new { PercentileDisc = value }))
.Over(p => p.PercentileCont(0.5m, a => a.Value), (a, value) => a.Apply(() => new { PercentileCont = value }))
.Over(p => p.KeepDenseRankFirst(g => g.Sum(a => a.Value)), (a, value) => a.Apply(() => new { KeepDenseRankFirst = value }))
.Over(p => p.KeepDenseRankLast(g => g.Sum(a => a.Value)), (a, value) => a.Apply(() => new { KeepDenseRankLast = value }))
.ToList();

執行結果


 

其中Over方法的第一個Func參數有兩種型態

一個是直接由IEnumerable<TSource>集合回傳單一值IElement的彙總方法
可以使用Linq原有的Sum、Average等等去計算集合部分的結果

另一個是IWindowFunctionFactory<TSource>
要回傳IWindowFunction<TSourceBase, IElement>的視窗函數物件,由該物件執行計算

目前已實現有15種一般的Window Function
不過也許可能有特殊需求情況,需要自行去實現自訂的Window Function
可以用IWindowFunction<TSourceBase, IElement>介面去達到自訂功能,如下

public class TeaTime<TSourceBase, IElement> : IWindowFunction<TSourceBase, IElement>
{
    private readonly Func<TSourceBase, IElement> _field;
    public TeaTime(Func<TSourceBase, IElement> field)
    {
        if (field == null)
        {
            throw new ArgumentNullException("field");
        }
        _field = field;
    }
    public IEnumerable<TResult> GetPartitionResults<TSource, TResult>(IRankEnumerable<TSource> elements
        , Func<TSource, IElement, TResult> selector) where TSource : TSourceBase
    {
        foreach (TSource element in elements)
        {
            IElement value = _field(element);
            yield return selector(element, value);
        }
    }
}
public static class MyWindowFunctions
{
    public static IWindowFunction<TSource, IElement> TeaTime<TSource, IElement>(this IWindowFunctionFactory<TSource> factory
        , Func<TSource, IElement> field)
    {
        return new TeaTime<TSource, IElement>(field);
    }
}
data = data.GroupBy(a => new { a.Year }).AsPartition(p => p.OrderBy(a => a.Value).ThenBy(a => a.Name))
.Over(p => p.TeaTime(a => a.Value), (a, value) => a.Apply(() => new { TeaTime = value }))
.ToList();