当我的搜索成功时,使用deepcopy时,我可以通过切割和传递基地来解决我的问题。其他人试图做类似的事情,可以调整下面的切片。
origin_rdd = sc.parallelize(origin)
concept_lambda = lambda r: search(r[-1], r[0:9], term, [])
results = origin_rdd.flatMap(concept_lambda)
搜索功能
def search(input, row_base, search_key, results):
if input:
for i in input:
if isinstance(i, list):
search(i, row_base, search_key, results)
if isinstance(i, dict):
for k, v in iteritems(i):
if k == search_key:
row = copy.deepcopy(row_base)
row.append(i)
results.append(row)
continue
elif isinstance(v, list):
search(v, row_base, search_key, results)
elif isinstance(v, dict):
search(v, row_base, search_key, results)
return results