我们可以用
map_from_arrays
从
Spark-2.4+
和
collect_list
在
count,value
#if count type is not int then cast to array<int>
df.agg(to_json(map_from_arrays(collect_list(col("Value")),collect_list(col("Count")).cast("array<int>"))).alias("json")).\
show(10,False)
#if count type int then no need to casting
df.agg(to_json(map_from_arrays(collect_list(col("Value")),collect_list(col("Count")).cast("array<int>"))).alias("json")).\
show(10,False)
#+------------------------------+
#|json |
#+------------------------------+
#|{"Blue":10,"Green":5,"Red":21}|
#+------------------------------+
#get as string
df.agg(to_json(map_from_arrays(collect_list(col("Value")),collect_list(col("Count")).cast("array<int>"))).alias("json")).collect()[0][0]
#or
df.agg(to_json(map_from_arrays(collect_list(col("Value")),collect_list(col("Count")).cast("array<int>"))).alias("json")).collect()[0]['json']
#{"Blue":10,"Green":5,"Red":21}