Code like as below: #To get deal keys
schema of lt_online:
root
|-- FT/RT: string (nullable = true)
|-- Country: string (nullable = true)
|-- Charge_Type: string (nullable = true)
|-- Tariff_Loc: string (nullable = true)
|-- Charge_No: string (nullable = true)
|-- Status: string (nullable = true)
|-- Validity_from: string (nullable = true)
|-- Validity_to: string (nullable = true)
|-- Range_Basis: string (nullable = true)
|-- Limited_Parties: string (nullable = true)
|-- Charge_Detail: string (nullable = true)
|-- Freetime_Unit: string (nullable = true)
|-- Freetime: string (nullable = true)
|-- Count_Holidays: string (nullable = true)
|-- Majeure: string (nullable = true)
|-- Start_Event: string (nullable = true)
|-- Same/Next_Day: string (nullable = true)
|-- Next_Day_if_AFTER: string (nullable = true)
|-- Availability_Date: string (nullable = true)
|-- Route_Group: string (nullable = true)
|-- Route_Code: string (nullable = true)
|-- Origin: string (nullable = true)
|-- LoadZone: string (nullable = true)
|-- FDischZone: string (nullable = true)
|-- PODZone: string (nullable = true)
|-- FDestZone: string (nullable = true)
|-- Equipment_Group: string (nullable = true)
|-- Equipment_Type: string (nullable = true)
|-- Range_From: string (nullable = true)
|-- Range_To: void (nullable = true)
|-- Cargo_Type: string (nullable = true)
|-- Commodity: string (nullable = true)
|-- SC_Group: string (nullable = true)
|-- SC_Number: string (nullable = true)
|-- IMO: string (nullable = true)
|-- Shipper_Group: string (nullable = true)
|-- Cnee_Group: string (nullable = true)
|-- Direction: string (nullable = true)
|-- Service: string (nullable = true)
|-- Haulage: string (nullable = true)
|-- Transport_Type: string (nullable = true)
|-- Option1: string (nullable = true)
|-- Option2: string (nullable = true)
|-- 1st_of_Route_Group: string (nullable = true)
|-- 1st_of_LoadZone: string (nullable = true)
|-- 1st_of_FDischZone: string (nullable = true)
|-- 1st_of_PODZone: string (nullable = true)
|-- 1st_of_FDestZone: string (nullable = true)
|-- 1st_of_Equipment_Group: string (nullable = true)
|-- 1st_of_SC_Group: string (nullable = true)
|-- 1st_of_Shipper_Group: string (nullable = true)
|-- 1st_of_Cnee_Group: string (nullable = true)
pyspark code as below df=lt_online.withColumn("dealkeys",lit('')).withColumn("dealAttributes",lit(''))
start=[]
start_dict={}
dealatt=["Charge_No","Status","Validity_from","Validity_to"]
dealkeys=["Charge_Type","Direction"]
for index,row in lt_online.toPandas().iterrows():
start=[]
start_dict={}
key = row['Charge_No']
for i in dealatt:
#final = row[i]
start_dict[i]=row[i]
df_deal_att = df.withColumn('dealkeys', when(col('Charge_No') == key , str(start_dict)).otherwise(col('dealkeys')))
for i in dealkeys:
#key = row['Charge_No']
final = {"keyname" : i,"value" : row[i],"description":".."}
start.append(final)
#final_val= {"value" : row['Charge_Type']}
#start.append(final_val)
#df3=lt_online.withColumn("new_column",str(start))
print(key,start_dict)
df3 = df_deal_att.withColumn('dealAttributes', when(col('Charge_No') == key , str(start)).otherwise(col('dealAttributes')))
when i run DF3 dataframe dealAttributes and dealkeys old data got blank and latest record only inserted. Please see the screenshot