none
Create A Daily Log Table from a Transaction Table RRS feed

  • Question

  • Hi guys

    I have the below table of transactions for which I've been tasked to create a daily log table from:

    Date Account Investment Cumulative Holding
    1/20/2015 John A $10
    1/25/2015 Sally D $5
    2/3/2015 Sally B $10
    2/22/2015 John C $30
    4/12/2015 John A $20
    4/13/2015 Sally B $20

    The daily log table would need to have continuous date ranges for each account and investment combination.  The cumulative holdings would simply be filled down for dates inbetween transactions.

    Thanks in advance!

    Friday, December 18, 2015 6:29 PM

Answers

  • Just making sure, that very large row count in the DateIndex table is still a lot less from your result table, right (because they will be multiplied with all AccountxInvestments)? In that case perf can be bad, but I still don't understand why you need to sort the date table, isn't it already sorted when you create it? (And sorting a 300k rows with date column still takes only a few seconds)

    By the way, I tried doing this myself, and looks like it'll be a lot more convenient if you use Group by(with AllRows) on the first table with using 2 columns, then applying the join for each table. So you can sort the table by = Table.Group(#"Changed Type", {"Account", "Investment"}, {{"Investments", each _, type table}})

    (In this case, the DateIndex table should have only the date column, not the Investment)

    This is the result I got:

    http://pastebin.com/46Qa5efb

    And this is my M code:

    let
        JoiningFunction = (investments as table) as table =>
           let   
               #"Sorted Rows" = Table.Sort(investments,{{"Date", Order.Ascending}}),
               #"Merged Queries" = Table.NestedJoin(#"Sorted Rows",{"Date"},DateIndex,{"Column1"},"NewColumn",JoinKind.RightOuter),
               #"Expanded NewColumn" = Table.ExpandTableColumn(#"Merged Queries", "NewColumn", {"Column1"}, {"NewColumn.Column1"}),
               #"Sorted Rows1" = Table.Sort(#"Expanded NewColumn",{{"NewColumn.Column1", Order.Ascending}}),
               #"Filled Down" = Table.FillDown(#"Sorted Rows1",{"Account", "Investment", "Cumulative Holding"}),
               #"Reordered Columns" = Table.ReorderColumns(#"Filled Down",{"NewColumn.Column1", "Date", "Account", "Investment", "Cumulative Holding"}),
               #"Removed Columns" = Table.RemoveColumns(#"Reordered Columns",{"Date"}),
               #"Renamed Columns" = Table.RenameColumns(#"Removed Columns",{{"NewColumn.Column1", "Date"}})
           in
               #"Renamed Columns",
        Source = Excel.CurrentWorkbook(){[Name="Table2"]}[Content],
        #"Changed Type" = Table.TransformColumnTypes(Source,{{"Date", type date}, {"Account", type text}, {"Investment", type text}, {"Cumulative Holding", Int64.Type}}),
        #"Grouped Rows" = Table.Group(#"Changed Type", {"Account", "Investment"}, {{"Investments", each _, type table}}),
        result = Table.TransformColumns(#"Grouped Rows",{"Investments", JoiningFunction}),
        #"Expanded Investments" = Table.ExpandTableColumn(result, "Investments", {"Account", "Cumulative Holding", "Date", "Investment"}, {"Investments.Account", "Cumulative Holding", "Date", "Investments.Investment"}),
        #"Removed Columns" = Table.RemoveColumns(#"Expanded Investments",{"Investments.Account", "Investments.Investment"}),
        #"Changed Type1" = Table.TransformColumnTypes(#"Removed Columns",{{"Date", type date}}),
        #"Filtered Rows" = Table.SelectRows(#"Changed Type1", each [Cumulative Holding] <> null)
    in
        #"Filtered Rows"

    Note: You might want to sort the table right after the group by

    Monday, December 21, 2015 10:45 PM
    Moderator
  • Thanks Oguz!

    Below is my solution which was the fastest in my testing (I haven't tested your solution yet). In my testing, this was also the least redundant - there's a simple join, a small amount of iteration and date gaps are calculated precisely to the scenario.

    let

        // Add indexes and self-join
        Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
        #"Changed Type" = Table.TransformColumnTypes(Source,{{"Date", type date}}),
        #"Sorted Rows" = Table.Sort(#"Changed Type",{{"Account", Order.Ascending}, {"Investment", Order.Ascending}, {"Date", Order.Ascending}}),
        #"Added Index" = Table.AddIndexColumn(#"Sorted Rows", "Index", 0, 1),
        #"Added Custom" = Table.AddColumn(#"Added Index", "Index - 1", each [Index] - 1),
        #"Merged Queries" = Table.NestedJoin(#"Added Custom",{"Index"},#"Added Custom",{"Index - 1"},"NewColumn",JoinKind.LeftOuter),
        #"Expanded NewColumn" = Table.ExpandTableColumn(#"Merged Queries", "NewColumn", {"Date", "Account", "Investment"}, {"Date.1", "Account.1", "Investment.1"}),

        maxDate = Date.From(Table.Max(#"Expanded NewColumn", "Date")[Date]),

        // This logic checks to see if the record is the last record for an Account/Investment combination.
        #"Added Custom1" = Table.AddColumn(#"Expanded NewColumn", "DateList", each
            if [Account] = [Account.1] and [Investment] = [Investment.1] then
                if [Date] = [Date.1] then
                    {[Date]} else // Mulitple records on same day. No "fill down".
                    List.Dates([Date], Duration.Days([Date.1] - [Date]), #duration(1, 0 ,0 ,0)) else // "Middle" record. "Fill down" to next record.
                List.Dates([Date], Duration.Days(maxDate - [Date]) + 1, #duration(1, 0 ,0 ,0))), // Final record. "Fill down" to last date of data.

        // Cleanup
        #"Expanded DateList" = Table.ExpandListColumn(#"Added Custom1", "DateList"),
        #"Removed Other Columns" = Table.SelectColumns(#"Expanded DateList",{"Account", "Investment", "Cumulative Holding", "DateList"}),
        #"Changed Type1" = Table.TransformColumnTypes(#"Removed Other Columns",{{"Account", type text}, {"Investment", type text}, {"Cumulative Holding", Int64.Type}, {"DateList", type date}})
    in
        #"Changed Type1"

    Tuesday, December 22, 2015 2:45 PM

All replies

  • Can you elaborate how should the result table(s) look like? Is it like this?

    Date Account Investment Cumulative Holding
    1/20/2015 John A $10
    1/21/2015 John A
    $10
    1/22/2015 John A
    $10
    ... (goes like this for a month)
    A
    $10
    2/21/2015 John A
    $10
    2/22/2015 John A
    $20

    And you want other separate tables like this for John & C, Sally & B, Sally & D ?

    Friday, December 18, 2015 7:30 PM
    Moderator
  • Hi Oguz

    The output should be a single (very long) table with duplicate dates.  If it helps, the goal is basically to fill in the gaps. 

    Your output is correct, that is how it would look.  Appended to that would also be John's C investments until the last date of his account which is 4/12/15 - the date of his last A transaction.  Appended to that would be Sally's D and B holdings until her last entry date. 

    As a test, if I were to query the table and filter it to "Sally" and "4/13/2015" then I would expect to see 2 rows returned: D for $5 and B for $20.

    Friday, December 18, 2015 10:22 PM
  • First, you would sort your table (or use groupby) with Account > Date (you might need to add other columns to the sort criteria too depending on your need)

    Then, you can create another table named DateIndex with only dates that goes from smallest date in the table to the biggest date (the table will only have that one date column). You can do this by using List.Dates function that already exists.

    And then, you can join your main table with that date index table with left outer join. then you can use Table.FillDown.

    Does this sound good?

    Friday, December 18, 2015 10:38 PM
    Moderator
  • That would work if there were 1 account and 1 investment. Unfortunately, there there are multiple accounts with multiple investments.  The approach is correct but it would need to be repeated and appended by (Count of Accounts x Count of Investment) times.

    In reality, my data set contains 1,000 accounts each with 1 -> 5  investments.  I understand that the row count will be large but I'll be loading it into the data model so that won't matter.

    Saturday, December 19, 2015 6:42 PM
  • If you add a column with a list of all Investments (List.Distinct(TransactionTable[Investment]) to the DateIndex Table as suggested by Oguz and expand it, you will get a date-table for each Investment (in one table).

    Then connect your transaction table by date and investment - and the technique from above should work.


    Imke Feldmann TheBIccountant.com

    Sunday, December 20, 2015 8:13 AM
    Moderator
  • I'll give it a go in the coming day, but my understanding is that fill down will not work in this instance.  Thanks guys!
    Sunday, December 20, 2015 11:06 PM
  • Hi guys

    My transaction table is made up of 1,000 accounts and 5 other columns containing funds, investments etc... So when I attempted to use the above approach I unfortunately experienced major performance issues - especially when trying to sort the large date table (20mins+ wait).

    At a high level, the fastest solution was to appropriately sort my transaction table and then add a list of dates between the current record and the next (self join on an index) and then expand it.  This method takes only a few seconds.


    • Edited by Simon Nuss Monday, December 21, 2015 4:42 PM
    Monday, December 21, 2015 4:41 PM
  • No need to sort transaction table if you start with the Investments, then merge with calendar and last merge the transaction table to it. Then the order of the date table(s) should be kept.

    Imke Feldmann TheBIccountant.com


    Monday, December 21, 2015 4:53 PM
    Moderator
  • Sorting the date table shouldn't be needed as Imke said, but I'm curious why the sorting was slow, how many rows were there, and was there only 1 column? I tried sorting a big date table and it sorted it instantly.
    Monday, December 21, 2015 7:12 PM
    Moderator
  • In my actual data set I have:

    1,000 accounts 

    40 funds

    10 investments

    8 classes

    5 types

    Each of the above has a varying date range of 1 -> 10 years.

    Based on this, creating a date table as Imke suggested results in a very large date table, i.e. the row count was the product of each distinct list:

    "If you add a column with a list of all Investments (List.Distinct(TransactionTable[Investment]) to the DateIndex Table as suggested by Oguz and expand it, you will get a date-table for each Investment (in one table)."

    Perhaps I missed something?


    • Edited by Simon Nuss Monday, December 21, 2015 7:38 PM
    Monday, December 21, 2015 7:38 PM
  • Just making sure, that very large row count in the DateIndex table is still a lot less from your result table, right (because they will be multiplied with all AccountxInvestments)? In that case perf can be bad, but I still don't understand why you need to sort the date table, isn't it already sorted when you create it? (And sorting a 300k rows with date column still takes only a few seconds)

    By the way, I tried doing this myself, and looks like it'll be a lot more convenient if you use Group by(with AllRows) on the first table with using 2 columns, then applying the join for each table. So you can sort the table by = Table.Group(#"Changed Type", {"Account", "Investment"}, {{"Investments", each _, type table}})

    (In this case, the DateIndex table should have only the date column, not the Investment)

    This is the result I got:

    http://pastebin.com/46Qa5efb

    And this is my M code:

    let
        JoiningFunction = (investments as table) as table =>
           let   
               #"Sorted Rows" = Table.Sort(investments,{{"Date", Order.Ascending}}),
               #"Merged Queries" = Table.NestedJoin(#"Sorted Rows",{"Date"},DateIndex,{"Column1"},"NewColumn",JoinKind.RightOuter),
               #"Expanded NewColumn" = Table.ExpandTableColumn(#"Merged Queries", "NewColumn", {"Column1"}, {"NewColumn.Column1"}),
               #"Sorted Rows1" = Table.Sort(#"Expanded NewColumn",{{"NewColumn.Column1", Order.Ascending}}),
               #"Filled Down" = Table.FillDown(#"Sorted Rows1",{"Account", "Investment", "Cumulative Holding"}),
               #"Reordered Columns" = Table.ReorderColumns(#"Filled Down",{"NewColumn.Column1", "Date", "Account", "Investment", "Cumulative Holding"}),
               #"Removed Columns" = Table.RemoveColumns(#"Reordered Columns",{"Date"}),
               #"Renamed Columns" = Table.RenameColumns(#"Removed Columns",{{"NewColumn.Column1", "Date"}})
           in
               #"Renamed Columns",
        Source = Excel.CurrentWorkbook(){[Name="Table2"]}[Content],
        #"Changed Type" = Table.TransformColumnTypes(Source,{{"Date", type date}, {"Account", type text}, {"Investment", type text}, {"Cumulative Holding", Int64.Type}}),
        #"Grouped Rows" = Table.Group(#"Changed Type", {"Account", "Investment"}, {{"Investments", each _, type table}}),
        result = Table.TransformColumns(#"Grouped Rows",{"Investments", JoiningFunction}),
        #"Expanded Investments" = Table.ExpandTableColumn(result, "Investments", {"Account", "Cumulative Holding", "Date", "Investment"}, {"Investments.Account", "Cumulative Holding", "Date", "Investments.Investment"}),
        #"Removed Columns" = Table.RemoveColumns(#"Expanded Investments",{"Investments.Account", "Investments.Investment"}),
        #"Changed Type1" = Table.TransformColumnTypes(#"Removed Columns",{{"Date", type date}}),
        #"Filtered Rows" = Table.SelectRows(#"Changed Type1", each [Cumulative Holding] <> null)
    in
        #"Filtered Rows"

    Note: You might want to sort the table right after the group by

    Monday, December 21, 2015 10:45 PM
    Moderator
  • Thanks Oguz!

    Below is my solution which was the fastest in my testing (I haven't tested your solution yet). In my testing, this was also the least redundant - there's a simple join, a small amount of iteration and date gaps are calculated precisely to the scenario.

    let

        // Add indexes and self-join
        Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
        #"Changed Type" = Table.TransformColumnTypes(Source,{{"Date", type date}}),
        #"Sorted Rows" = Table.Sort(#"Changed Type",{{"Account", Order.Ascending}, {"Investment", Order.Ascending}, {"Date", Order.Ascending}}),
        #"Added Index" = Table.AddIndexColumn(#"Sorted Rows", "Index", 0, 1),
        #"Added Custom" = Table.AddColumn(#"Added Index", "Index - 1", each [Index] - 1),
        #"Merged Queries" = Table.NestedJoin(#"Added Custom",{"Index"},#"Added Custom",{"Index - 1"},"NewColumn",JoinKind.LeftOuter),
        #"Expanded NewColumn" = Table.ExpandTableColumn(#"Merged Queries", "NewColumn", {"Date", "Account", "Investment"}, {"Date.1", "Account.1", "Investment.1"}),

        maxDate = Date.From(Table.Max(#"Expanded NewColumn", "Date")[Date]),

        // This logic checks to see if the record is the last record for an Account/Investment combination.
        #"Added Custom1" = Table.AddColumn(#"Expanded NewColumn", "DateList", each
            if [Account] = [Account.1] and [Investment] = [Investment.1] then
                if [Date] = [Date.1] then
                    {[Date]} else // Mulitple records on same day. No "fill down".
                    List.Dates([Date], Duration.Days([Date.1] - [Date]), #duration(1, 0 ,0 ,0)) else // "Middle" record. "Fill down" to next record.
                List.Dates([Date], Duration.Days(maxDate - [Date]) + 1, #duration(1, 0 ,0 ,0))), // Final record. "Fill down" to last date of data.

        // Cleanup
        #"Expanded DateList" = Table.ExpandListColumn(#"Added Custom1", "DateList"),
        #"Removed Other Columns" = Table.SelectColumns(#"Expanded DateList",{"Account", "Investment", "Cumulative Holding", "DateList"}),
        #"Changed Type1" = Table.TransformColumnTypes(#"Removed Other Columns",{{"Account", type text}, {"Investment", type text}, {"Cumulative Holding", Int64.Type}, {"DateList", type date}})
    in
        #"Changed Type1"

    Tuesday, December 22, 2015 2:45 PM