1

I'm trying to convert a multi-index df to a nested dictionary by aggregation sum of all index.

DataFrame is in the format

                    jan  feb
first second third          
bar   bar1   bar3     0    5
             bar4     4    4
      bar2   bar5     1    9
foo   foo1   foo3     7    2
             foo4     7    7
      foo2   foo5     0    3

I tried to aggregate with each index separately and convert to dict using .to_dict('records'). But couldn't able to achieve nested dictionary with aggregation of each index expected result is

[
    {'first': 'bar',
     'jan': 5,
     'feb': 18,
     'sub_rows': [{'second': 'bar1',
                   'jan': 4,
                   'feb': 9,
                   'sub_rows': [{'third': 'bar3', 'jan': 0, 'feb': 5}, {'third': 'bar4', 'jan': 4, 'feb': 4}]
                   },
                  {'second': 'bar2',
                   'jan': 1,
                   'feb': 9,
                   'sub_rows': [{'third': 'bar5', 'jan': 1, 'feb': 9}]
                   }]
     },
    {'first': 'foo',
     'jan': 14,
     'feb': 12,
     'sub_rows': [{'second': 'foo1',
                   'jan': 14,
                   'feb': 9,
                   'sub_rows': [{'third': 'foo3', 'jan': 7, 'feb': 2}, {'third': 'foo4', 'jan': 7, 'feb': 7}]
                   },
                  {'second': 'foo2',
                   'jan': 0,
                   'feb': 3,
                   'sub_rows': [{'third': 'foo5', 'jan': 0, 'feb': 3}]
                   }]
     }
]

Can you please guide me to achieve this format. Thanks

Dhaaz
  • 27
  • 4
  • You might try using the `xs()` method for slicing the data frame by each index level, as shown here: https://stackoverflow.com/questions/42150769/pandas-multi-index-dataframe-to-nested-dictionary – Chris Sears Feb 04 '22 at 16:58

2 Answers2

1
import pandas as pd


def multiindex_df_to_list(df):
    out_list = []
    nlevels = df.index.nlevels
    level_name = df.index.levels[0].name if nlevels > 1 else df.index.name
    for i, level_value in enumerate(df.groupby(level=0)):
        out_list.append({})
        out_list[i][level_name] = level_value[0]
        for col, sum_val in level_value[1].sum().iteritems():
            out_list[i][col] = sum_val
        if nlevels > 1:
            out_list[i]['sub_rows'] = multiindex_df_to_list(level_value[1].droplevel(level=0))
    return out_list


def main():
    idx = pd.MultiIndex.from_arrays(
        [
            ['bar', 'bar', 'bar', 'foo', 'foo', 'foo'],
            ['bar1', 'bar1', 'bar2', 'foo1', 'foo1', 'foo2'],
            ['bar3', 'bar4', 'bar5', 'foo3', 'foo4', 'foo5']
        ],
        names=('first', 'second', 'third'))
    col = ['jan', 'feb']
    data = [[0, 5], [4, 4], [1, 9], [7, 2], [7, 7], [0, 3]]
    df = pd.DataFrame(data, idx, columns=col)

    out_list = multiindex_df_to_list(df)
    return out_list


if __name__ == "__main__":
    main()
1

Another recursive answer using a list as parameter:

def to_nested_dict(df, data=[]):
    lvl_name = df.index.get_level_values(0).name

    for name, subdf in df.groupby(level=0):
        d = {lvl_name: name} | subdf.sum().to_dict()
        data.append(d)
        if df.index.nlevels > 1:
            l = d.setdefault('sub_rows', [])
            to_nested_dict(subdf.loc[name], l)
    return data

data = to_nested_dict(df)

Output:

>>> data
[{'first': 'bar',
  'jan': 5,
  'feb': 18,
  'sub_rows': [{'second': 'bar1',
    'jan': 4,
    'feb': 9,
    'sub_rows': [{'third': 'bar3', 'jan': 0, 'feb': 5},
     {'third': 'bar4', 'jan': 4, 'feb': 4}]},
   {'second': 'bar2',
    'jan': 1,
    'feb': 9,
    'sub_rows': [{'third': 'bar5', 'jan': 1, 'feb': 9}]}]},
 {'first': 'foo',
  'jan': 14,
  'feb': 12,
  'sub_rows': [{'second': 'foo1',
    'jan': 14,
    'feb': 9,
    'sub_rows': [{'third': 'foo3', 'jan': 7, 'feb': 2},
     {'third': 'foo4', 'jan': 7, 'feb': 7}]},
   {'second': 'foo2',
    'jan': 0,
    'feb': 3,
    'sub_rows': [{'third': 'foo5', 'jan': 0, 'feb': 3}]}]}]
Corralien
  • 70,617
  • 7
  • 16
  • 36