json_normalize
forventer en liste af ordbøger og i tilfælde af Ebs
- det er kun en ordbog, så vi bør forbehandle JSON-dataene:
In [88]: with open(fn) as f:
...: data = json.load(f)
...:
In [89]: for r in data['Volumes']:
...: if 'Ebs' not in r: # add 'Ebs' dict if it's not in the record...
...: r['Ebs'] = []
...: if not isinstance(r['Ebs'], list): # wrap 'Ebs' in a list if it's not a list
...: r['Ebs'] = [r['Ebs']]
...:
In [90]: data
Out[90]:
{'Volumes': [{'Attachments': [{'AttachTime': '2013-12-18T22:35:00.000Z',
'DeleteOnTermination': True,
'Device': '/dev/sda1',
'InstanceId': 'i-1234567890abcdef0',
'State': 'attached',
'Tags': [{'Key': 'Name', 'Value': 'DBJanitor-Private'},
{'Key': 'Owner', 'Value': 'DBJanitor'},
{'Key': 'Product', 'Value': 'Database'},
{'Key': 'Portfolio', 'Value': 'DB Janitor'},
{'Key': 'Service', 'Value': 'DB Service'}],
'VolumeId': 'vol-049df61146c4d7901'}],
'AvailabilityZone': 'us-east-1a',
'Ebs': [{'AttachTime': '2016-09-14T19:49:11.000Z',
'DeleteOnTermination': True,
'Status': 'attached',
'VolumeId': 'vol-049df61146c4d7901'}],
'VolumeId': 'vol-049df61146c4d7901',
'VolumeType': 'standard'}]}
BEMÆRK:'Ebs': {..}
er blevet erstattet med 'Ebs': [{..}]
In [91]: e = pd.io.json.json_normalize(data['Volumes'],
...: ['Ebs'],
...: ['VolumeId'],
...: meta_prefix='parent_')
...:
In [92]: e
Out[92]:
AttachTime DeleteOnTermination Status VolumeId parent_VolumeId
0 2016-09-14T19:49:11.000Z True attached vol-049df61146c4d7901 vol-049df61146c4d7901