我不知道你想怎么处理
$oid
和
gi
. 我假设这对列是一样的,所以你可能需要调整这个。您可能需要调整部分以获得所需的列名,但它或多或少地提供了您所需的内容。
但我所做的是将整个字典平展成一行数据帧,然后遍历列以创建单独的行。
test =[{
"_id": {
"$oid": "5bab"
},
"d": [
{
"d": [
17317,
16556,
9680,
55982,
45948
],
"h": 74.65,
"ts": "2018-09-26T21:02:19+00:00",
"p": [
61,
76,
137,
152,
122
],
"si": "9829563c95d0155f",
"t": 24.82,
"ti": "0000000000000000"
},
{
"d": [
17821,
17488,
9199,
56447,
44089
],
"h": 80.09,
"ts": "2018-09-26T21:02:19+00:00",
"p": [
61,
76,
137,
152,
122
],
"si": "a42fbc88a44a316f",
"t": 25.1,
"ti": "0000000000000000"
}
],
"gi": "GW-P1007"
}]
import json
import pandas as pd
import re
jsonStr = json.dumps(test[0])
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
jsonObj = json.loads(jsonStr)
flat = flatten_json(jsonObj)
results = pd.DataFrame()
special_cols = []
columns_list = list(flat.keys())
for item in columns_list:
try:
row_idx = re.findall(r'\_(\d+)\_', item )[0]
except:
special_cols.append(item)
continue
column = re.findall(r'\_\d+\_(.*)', item )[0]
column = column.replace('_', '')
row_idx = int(row_idx)
value = flat[item]
results.loc[row_idx, column] = value
for item in special_cols:
results[item] = flat[item]
输出:
print (results.to_string())
d0 d1 d2 d3 d4 h ts p0 p1 p2 p3 p4 si t ti _id_$oid gi
0 17317.0 16556.0 9680.0 55982.0 45948.0 74.65 2018-09-26T21:02:19+00:00 61.0 76.0 137.0 152.0 122.0 9829563c95d0155f 24.82 0000000000000000 5bab GW-P1007
1 17821.0 17488.0 9199.0 56447.0 44089.0 80.09 2018-09-26T21:02:19+00:00 61.0 76.0 137.0 152.0 122.0 a42fbc88a44a316f 25.10 0000000000000000 5bab GW-P1007