有没有可能使用 LINQ 来支点数据?

我想知道是否有可能使用 LINQ 从以下布局中枢数据:

CustID | OrderDate | Qty
1      | 1/1/2008  | 100
2      | 1/2/2008  | 200
1      | 2/2/2008  | 350
2      | 2/28/2008 | 221
1      | 3/12/2008 | 250
2      | 3/15/2008 | 2150

变成这样:

CustID  | Jan- 2008 | Feb- 2008 | Mar - 2008 |
1       | 100       | 350       |  250
2       | 200       | 221       | 2150
88454 次浏览

按月份对数据进行分组,然后将其投影到一个新的数据表中,每个月都有一列数据。新的表将是您的枢轴表。

像这样吗?

List<CustData> myList = GetCustData();


var query = myList
.GroupBy(c => c.CustId)
.Select(g => new {
CustId = g.Key,
Jan = g.Where(c => c.OrderDate.Month == 1).Sum(c => c.Qty),
Feb = g.Where(c => c.OrderDate.Month == 2).Sum(c => c.Qty),
March = g.Where(c => c.OrderDate.Month == 3).Sum(c => c.Qty)
});

Linq 中的 GroupBy与 SQL 的工作原理不同。在 SQL 中,您可以获得键和聚合(行/列形状)。在 Linq 中,您可以获得键和作为键的子元素的任何元素(分层形状)。要透视,您必须将层次结构投影回您选择的行/列形式。

我用 linq 扩展方法回答了 类似的问题:

// order s(ource) by OrderDate to have proper column ordering
var r = s.Pivot3(e => e.custID, e => e.OrderDate.ToString("MMM-yyyy")
, lst => lst.Sum(e => e.Qty));
// order r(esult) by CustID

(+)通用实现
绝对比 Amy B 慢

有人可以改进我的实现吗(例如,该方法可以对列和行进行排序) ?

下面是一种更通用的使用 LINQ 透视数据的方法:

IEnumerable<CustData> s;
var groupedData = s.ToLookup(
k => new ValueKey(
k.CustID, // 1st dimension
String.Format("{0}-{1}", k.OrderDate.Month, k.OrderDate.Year // 2nd dimension
) ) );
var rowKeys = groupedData.Select(g => (int)g.Key.DimKeys[0]).Distinct().OrderBy(k=>k);
var columnKeys = groupedData.Select(g => (string)g.Key.DimKeys[1]).Distinct().OrderBy(k=>k);
foreach (var row in rowKeys) {
Console.Write("CustID {0}: ", row);
foreach (var column in columnKeys) {
Console.Write("{0:####} ", groupedData[new ValueKey(row,column)].Sum(r=>r.Qty) );
}
Console.WriteLine();
}

其中 ValueKey 是一个表示多维键的特殊类:

public sealed class ValueKey {
public readonly object[] DimKeys;
public ValueKey(params object[] dimKeys) {
DimKeys = dimKeys;
}
public override int GetHashCode() {
if (DimKeys==null) return 0;
int hashCode = DimKeys.Length;
for (int i = 0; i < DimKeys.Length; i++) {
hashCode ^= DimKeys[i].GetHashCode();
}
return hashCode;
}
public override bool Equals(object obj) {
if ( obj==null || !(obj is ValueKey))
return false;
var x = DimKeys;
var y = ((ValueKey)obj).DimKeys;
if (ReferenceEquals(x,y))
return true;
if (x.Length!=y.Length)
return false;
for (int i = 0; i < x.Length; i++) {
if (!x[i].Equals(y[i]))
return false;
}
return true;
}
}

这种方法可以用于按 N 维(n > 2)进行分组,并且适用于相当小的数据集。对于大型数据集(高达100万条以上的记录)或者当枢轴配置不能硬编码的情况下,我编写了特殊的 数据透视库(它是免费的) :

var pvtData = new PivotData(new []{"CustID","OrderDate"}, new SumAggregatorFactory("Qty"));
pvtData.ProcessData(s, (o, f) => {
var custData = (TT)o;
switch (f) {
case "CustID": return custData.CustID;
case "OrderDate":
return String.Format("{0}-{1}", custData.OrderDate.Month, custData.OrderDate.Year);
case "Qty": return custData.Qty;
}
return null;
} );
Console.WriteLine( pvtData[1, "1-2008"].Value );

我认为,最简单的方法是使用查找:

var query =
from c in myList
group c by c.CustId into gcs
let lookup = gcs.ToLookup(y => y.OrderDate.Month, y => y.Qty)
select new
{
CustId = gcs.Key,
Jan = lookup[1].Sum(),
Feb = lookup[2].Sum(),
Mar = lookup[3].Sum(),
};

这是最有效的方法:

检查下面的方法,而不是每个月每次都在客户组中进行迭代。

var query = myList
.GroupBy(c => c.CustId)
.Select(g => {
var results = new CustomerStatistics();
foreach (var customer in g)
{
switch (customer.OrderDate.Month)
{
case 1:
results.Jan += customer.Qty;
break;
case 2:
results.Feb += customer.Qty;
break;
case 3:
results.March += customer.Qty;
break;
default:
break;
}
}
return  new
{
CustId = g.Key,
results.Jan,
results.Feb,
results.March
};
});

或者这个:

var query = myList
.GroupBy(c => c.CustId)
.Select(g => {
var results = g.Aggregate(new CustomerStatistics(), (result, customer) => result.Accumulate(customer), customerStatistics => customerStatistics.Compute());
return  new
{
CustId = g.Key,
results.Jan,
results.Feb,
results.March
};
});

完整的解决方案:

using System;
using System.Collections.Generic;
using System.Linq;


namespace ConsoleApp
{
internal class Program
{
private static void Main(string[] args)
{
IEnumerable<CustData> myList = GetCustData().Take(100);


var query = myList
.GroupBy(c => c.CustId)
.Select(g =>
{
CustomerStatistics results = g.Aggregate(new CustomerStatistics(), (result, customer) => result.Accumulate(customer), customerStatistics => customerStatistics.Compute());
return new
{
CustId = g.Key,
results.Jan,
results.Feb,
results.March
};
});
Console.ReadKey();
}


private static IEnumerable<CustData> GetCustData()
{
Random random = new Random();
int custId = 0;
while (true)
{
custId++;
yield return new CustData { CustId = custId, OrderDate = new DateTime(2018, random.Next(1, 4), 1), Qty = random.Next(1, 50) };
}
}


}
public class CustData
{
public int CustId { get; set; }
public DateTime OrderDate { get; set; }
public int Qty { get; set; }
}
public class CustomerStatistics
{
public int Jan { get; set; }
public int Feb { get; set; }
public int March { get; set; }
internal CustomerStatistics Accumulate(CustData customer)
{
switch (customer.OrderDate.Month)
{
case 1:
Jan += customer.Qty;
break;
case 2:
Feb += customer.Qty;
break;
case 3:
March += customer.Qty;
break;
default:
break;
}
return this;
}
public CustomerStatistics Compute()
{
return this;
}
}
}
// LINQPad Code for Amy B answer
void Main()
{
List<CustData> myList = GetCustData();
    

var query = myList
.GroupBy(c => c.CustId)
.Select(g => new
{
CustId = g.Key,
Jan = g.Where(c => c.OrderDate.Month == 1).Sum(c => c.Qty),
Feb = g.Where(c => c.OrderDate.Month == 2).Sum(c => c.Qty),
March = g.Where(c => c.OrderDate.Month == 3).Sum(c => c.Qty),
//April = g.Where(c => c.OrderDate.Month == 4).Sum(c => c.Qty),
//May = g.Where(c => c.OrderDate.Month == 5).Sum(c => c.Qty),
//June = g.Where(c => c.OrderDate.Month == 6).Sum(c => c.Qty),
//July = g.Where(c => c.OrderDate.Month == 7).Sum(c => c.Qty),
//August = g.Where(c => c.OrderDate.Month == 8).Sum(c => c.Qty),
//September = g.Where(c => c.OrderDate.Month == 9).Sum(c => c.Qty),
//October = g.Where(c => c.OrderDate.Month == 10).Sum(c => c.Qty),
//November = g.Where(c => c.OrderDate.Month == 11).Sum(c => c.Qty),
//December = g.Where(c => c.OrderDate.Month == 12).Sum(c => c.Qty)
});
        

    

query.Dump();
}


/// <summary>
/// --------------------------------
/// CustID  | OrderDate     | Qty
/// --------------------------------
/// 1       | 1 / 1 / 2008  | 100
/// 2       | 1 / 2 / 2008  | 200
/// 1       | 2 / 2 / 2008  | 350
/// 2       | 2 / 28 / 2008 | 221
/// 1       | 3 / 12 / 2008 | 250
/// 2       | 3 / 15 / 2008 | 2150
/// </ summary>
public List<CustData> GetCustData()
{
List<CustData> custData = new List<CustData>
{
new CustData
{
CustId = 1,
OrderDate = new DateTime(2008, 1, 1),
Qty = 100
},


new CustData
{
CustId = 2,
OrderDate = new DateTime(2008, 1, 2),
Qty = 200
},


new CustData
{
CustId = 1,
OrderDate = new DateTime(2008, 2, 2),
Qty = 350
},


new CustData
{
CustId = 2,
OrderDate = new DateTime(2008, 2, 28),
Qty = 221
},


new CustData
{
CustId = 1,
OrderDate = new DateTime(2008, 3, 12),
Qty = 250
},


new CustData
{
CustId = 2,
OrderDate = new DateTime(2008, 3, 15),
Qty = 2150
},
};


return custData;
}


public class CustData
{
public int CustId;
public DateTime OrderDate;
public uint Qty;
}

enter image description here