score:204

Accepted answer

something like this?

list<custdata> mylist = getcustdata();

var query = mylist
    .groupby(c => c.custid)
    .select(g => new {
        custid = g.key,
        jan = g.where(c => c.orderdate.month == 1).sum(c => c.qty),
        feb = g.where(c => c.orderdate.month == 2).sum(c => c.qty),
        march = g.where(c => c.orderdate.month == 3).sum(c => c.qty)
    });

groupby in linq does not work the same as sql. in sql, you get the key and aggregates (row/column shape). in linq, you get the key and any elements as children of the key (hierarchical shape). to pivot, you must project the hierarchy back into a row/column form of your choosing.

score:-4

group your data on month, and then project it into a new datatable with columns for each month. the new table would be your pivot table.

score:0

this is most efficient way:

check the following approach. instead of iterating through the customers group each time for each month.

var query = mylist
    .groupby(c => c.custid)
    .select(g => {
        var results = new customerstatistics();
        foreach (var customer in g)
        {
            switch (customer.orderdate.month)
            {
                case 1:
                    results.jan += customer.qty;
                    break;
                case 2:
                    results.feb += customer.qty;
                    break;
                case 3:
                    results.march += customer.qty;
                    break;
                default:
                    break;
            }
        }
        return  new
        {
            custid = g.key,
            results.jan,
            results.feb,
            results.march
        };
    });

or this one :

var query = mylist
    .groupby(c => c.custid)
    .select(g => {
        var results = g.aggregate(new customerstatistics(), (result, customer) => result.accumulate(customer), customerstatistics => customerstatistics.compute());
        return  new
        {
            custid = g.key,
            results.jan,
            results.feb,
            results.march
        };
    });

complete solution:

using system;
using system.collections.generic;
using system.linq;

namespace consoleapp
{
    internal class program
    {
        private static void main(string[] args)
        {
            ienumerable<custdata> mylist = getcustdata().take(100);

            var query = mylist
                .groupby(c => c.custid)
                .select(g =>
                {
                    customerstatistics results = g.aggregate(new customerstatistics(), (result, customer) => result.accumulate(customer), customerstatistics => customerstatistics.compute());
                    return new
                    {
                        custid = g.key,
                        results.jan,
                        results.feb,
                        results.march
                    };
                });
            console.readkey();
        }

        private static ienumerable<custdata> getcustdata()
        {
            random random = new random();
            int custid = 0;
            while (true)
            {
                custid++;
                yield return new custdata { custid = custid, orderdate = new datetime(2018, random.next(1, 4), 1), qty = random.next(1, 50) };
            }
        }

    }
    public class custdata
    {
        public int custid { get; set; }
        public datetime orderdate { get; set; }
        public int qty { get; set; }
    }
    public class customerstatistics
    {
        public int jan { get; set; }
        public int feb { get; set; }
        public int march { get; set; }
        internal customerstatistics accumulate(custdata customer)
        {
            switch (customer.orderdate.month)
            {
                case 1:
                    jan += customer.qty;
                    break;
                case 2:
                    feb += customer.qty;
                    break;
                case 3:
                    march += customer.qty;
                    break;
                default:
                    break;
            }
            return this;
        }
        public customerstatistics compute()
        {
            return this;
        }
    }
}

score:0

// linqpad code for amy b answer
void main()
{
    list<custdata> mylist = getcustdata();
    
    var query = mylist
        .groupby(c => c.custid)
        .select(g => new
        {
            custid = g.key,
            jan = g.where(c => c.orderdate.month == 1).sum(c => c.qty),
            feb = g.where(c => c.orderdate.month == 2).sum(c => c.qty),
            march = g.where(c => c.orderdate.month == 3).sum(c => c.qty),
            //april = g.where(c => c.orderdate.month == 4).sum(c => c.qty),
            //may = g.where(c => c.orderdate.month == 5).sum(c => c.qty),
            //june = g.where(c => c.orderdate.month == 6).sum(c => c.qty),
            //july = g.where(c => c.orderdate.month == 7).sum(c => c.qty),
            //august = g.where(c => c.orderdate.month == 8).sum(c => c.qty),
            //september = g.where(c => c.orderdate.month == 9).sum(c => c.qty),
            //october = g.where(c => c.orderdate.month == 10).sum(c => c.qty),
            //november = g.where(c => c.orderdate.month == 11).sum(c => c.qty),
            //december = g.where(c => c.orderdate.month == 12).sum(c => c.qty)          
        });
        
    
    query.dump();
}

/// <summary>
/// --------------------------------
/// custid  | orderdate     | qty
/// --------------------------------
/// 1       | 1 / 1 / 2008  | 100
/// 2       | 1 / 2 / 2008  | 200
/// 1       | 2 / 2 / 2008  | 350
/// 2       | 2 / 28 / 2008 | 221
/// 1       | 3 / 12 / 2008 | 250
/// 2       | 3 / 15 / 2008 | 2150 
/// </ summary>
public list<custdata> getcustdata()
{
    list<custdata> custdata = new list<custdata>
    {
        new custdata
        {
            custid = 1,
            orderdate = new datetime(2008, 1, 1),
            qty = 100
        },

        new custdata
        {
            custid = 2,
            orderdate = new datetime(2008, 1, 2),
            qty = 200
        },

        new custdata
        {
            custid = 1,
            orderdate = new datetime(2008, 2, 2),
            qty = 350
        },

        new custdata
        {
            custid = 2,
            orderdate = new datetime(2008, 2, 28),
            qty = 221
        },

        new custdata
        {
            custid = 1,
            orderdate = new datetime(2008, 3, 12),
            qty = 250
        },

        new custdata
        {
            custid = 2,
            orderdate = new datetime(2008, 3, 15),
            qty = 2150
        },      
    };

    return custdata;
}

public class custdata
{
    public int custid;
    public datetime orderdate;
    public uint qty;
}

enter image description here

score:2

here is a bit more generic way how to pivot data using linq:

ienumerable<custdata> s;
var groupeddata = s.tolookup( 
        k => new valuekey(
            k.custid, // 1st dimension
            string.format("{0}-{1}", k.orderdate.month, k.orderdate.year // 2nd dimension
        ) ) );
var rowkeys = groupeddata.select(g => (int)g.key.dimkeys[0]).distinct().orderby(k=>k);
var columnkeys = groupeddata.select(g => (string)g.key.dimkeys[1]).distinct().orderby(k=>k);
foreach (var row in rowkeys) {
    console.write("custid {0}: ", row);
    foreach (var column in columnkeys) {
        console.write("{0:####} ", groupeddata[new valuekey(row,column)].sum(r=>r.qty) );
    }
    console.writeline();
}

where valuekey is a special class that represents multidimensional key:

public sealed class valuekey {
    public readonly object[] dimkeys;
    public valuekey(params object[] dimkeys) {
        dimkeys = dimkeys;
    }
    public override int gethashcode() {
        if (dimkeys==null) return 0;
        int hashcode = dimkeys.length;
        for (int i = 0; i < dimkeys.length; i++) { 
            hashcode ^= dimkeys[i].gethashcode();
        }
        return hashcode;
    }
    public override bool equals(object obj) {
        if ( obj==null || !(obj is valuekey))
            return false;
        var x = dimkeys;
        var y = ((valuekey)obj).dimkeys;
        if (referenceequals(x,y))
            return true;
        if (x.length!=y.length)
            return false;
        for (int i = 0; i < x.length; i++) {
            if (!x[i].equals(y[i]))
                return false;
        }
        return true;            
    }
}

this approach can be used for grouping by n-dimensions (n>2) and will work fine for rather small datasets. for large datasets (up to 1 mln of records and more) or for cases when pivot configuration cannot be hardcoded i've written special pivotdata library (it is free):

var pvtdata = new pivotdata(new []{"custid","orderdate"}, new sumaggregatorfactory("qty"));
pvtdata.processdata(s, (o, f) => {
    var custdata = (tt)o;
    switch (f) {
        case "custid": return custdata.custid;
        case "orderdate": 
        return string.format("{0}-{1}", custdata.orderdate.month, custdata.orderdate.year);
        case "qty": return custdata.qty;
    }
    return null;
} );
console.writeline( pvtdata[1, "1-2008"].value );  

score:8

the neatest approach for this, i think, is to use a lookup:

var query =
    from c in mylist
    group c by c.custid into gcs
    let lookup = gcs.tolookup(y => y.orderdate.month, y => y.qty)
    select new
    {
        custid = gcs.key,
        jan = lookup[1].sum(),
        feb = lookup[2].sum(),
        mar = lookup[3].sum(),
    };

score:13

i answered similar question using linq extension method:

// order s(ource) by orderdate to have proper column ordering
var r = s.pivot3(e => e.custid, e => e.orderdate.tostring("mmm-yyyy")
    , lst => lst.sum(e => e.qty));
// order r(esult) by custid

(+) generic implementation
(-) definitely slower than amy b's

can anyone improve my implementation (i.e. the method does the ordering of columns & rows)?


Related Query

More Query from same tag