score:2

Accepted answer

Here's a solution that I've been working on that works with tabular input data for the general case of an arbitrary number of levels.

import pandas as pd
import json

def find_element(children_list,name):
    """
    Find element in children list
    if exists or return none
    """
    for i in children_list:
        if i["name"] == name:
            return i
    #If not found return None
    return None

def add_node(path,value,nest):
    """
    The path is a list.  Each element is a name that corresponds 
    to a level in the final nested dictionary.  
    """

    #Get first name from path
    this_name = path.pop(0)

    #Does the element exist already?
    element = find_element(nest["children"], this_name)

    #If the element exists, we can use it, otherwise we need to create a new one
    if element:

        if len(path)>0:
            add_node(path,value, element)

    #Else it does not exist so create it and return its children
    else:

        if len(path) == 0:
            nest["children"].append({"name": this_name, "value": value})
        else:
            #Add new element
            nest["children"].append({"name": this_name, "children":[]})

            #Get added element 
            element = nest["children"][-1]

            #Still elements of path left so recurse
            add_node(path,value, element)

And here is an example of how you can use it. You have to tell it which columns to use as the levels of the hierarchy and which column stores the values.

df = pd.read_json('{"l1":{"0":"a","1":"a","2":"a","3":"a","4":"b","5":"b","6":"b","7":"b"},"l2":{"0":"a1","1":"a1","2":"a2","3":"a2","4":"b1","5":"b1","6":"b2","7":"b3"},"l3":{"0":"a11","1":"a12","2":"a21","3":"a22","4":"b11","5":"b12","6":"b22","7":"b34"},"val":{"0":1,"1":2,"2":3,"3":4,"4":5,"5":6,"6":7,"7":8}}')


d = {"name": "root",
"children": []}

levels = ["l1","l2", "l3"]
for row in df.iterrows():
    r = row[1]
    path = list(r[levels])
    value = r["val"]
    add_node(path,value,d)

print json.dumps(d, sort_keys=False,
              indent=2)

Related Query