# imports
import csv
from os.path import realpath
from itertools import groupby
from qgis.core import QgsProject, QgsFeature, QgsVectorLayer, QgsField, QgsVectorLayerJoinInfo
from PyQt5.QtCore import QVariant
######################################################
PART 1 : PROCESSING THE ORIGINAL CSV-FILE AND
PROVIDING THE RESULT STORED AS A LIST WITH DICTS
######################################################
referring to the original CSV-file
path_to_csv = realpath("D:/qgis_test/test.csv")
referring to the grouping and working fields in the CSV-file
grouping_field = "city"
working_field = "month"
opening the csv-file
with open(path_to_csv, 'r', newline='', encoding='utf-8') as csv_file:
# creating a csv reader object
csv_reader = csv.reader(csv_file, delimiter=',')
# getting original column names
columns = next(csv_reader) # e.g. 'id', 'city', 'month'
# getting index of the grouping and working fields
ind_group = columns.index(grouping_field)
ind_work = columns.index(working_field)
# getting original data as a list with lists
original_data = list(csv_reader)
# grouping data by the second column i.e. "city"
data_grouped = {key: list(group) for key, group in groupby(original_data, lambda column: column[ind_group])}
# finding the longest grouped list of data
n = max(list(len(set(column[ind_work] for column in value)) for value in data_grouped.values()))
# creating new columns
new_columns = [columns[ind_work]] + [columns[ind_work] + str(i + 1) for i in range(n) if i > 0] # 'month', 'month2', 'month3'
# initiating a temporal storage for processed data
new_data = []
# looping over grouped data
for key, value in data_grouped.items():
# making storage for processed data
feature_new_data = {}
# processing new data for each record
feature_new_data[grouping_field] = key
dummy_fill = [None] * n
#print(value)
unique_values = set(x[ind_work] for x in value)
new_values = list(unique_values)[:n] + dummy_fill[len(unique_values):]
# works for Python >= 3.9 https://peps.python.org/pep-0584/
feature_new_data = feature_new_data | dict(zip(new_columns, new_values))
new_data.append(feature_new_data)
######################################################
PART 2 : CREATING A TEMPORAL LAYER WITH ATTRIBUTE
TABLE WHERE THE PROCESSED DATA WILL BE STORED
######################################################
creating the temporal layer
attr_layer = QgsVectorLayer("None", "attr_layer", "memory")
accessing its provider
provider = attr_layer.dataProvider()
attr_layer.startEditing()
setting columns that have to be created
relevant_columns = [columns[ind_group]] + new_columns
creating columns in the temporal layer
for column in relevant_columns:
provider.addAttributes([QgsField(column, QVariant.String)])
attr_layer.updateFields()
nesting new data into the temporal layer
for new_data_set in new_data:
feat = QgsFeature()
feat.setAttributes([*new_data_set.values()])
provider.addFeature(feat)
attr_layer.updateExtents()
attr_layer.commitChanges()
If necessary the 'attr_layer' layer can be added to the Project
QgsProject.instance().addMapLayer(attr_layer)
######################################################
PART 3 : JOINING ORIGINAL VECTOR LAYER WITH
TEMPORAL THAT CONTAINS PROCESSED DATA
######################################################
referring to the original Vector layer
point_layer = QgsProject.instance().mapLayersByName("points")[0]
target layer to join
target_layer_id = attr_layer.id()
parameters for the join
joining_field = grouping_field
prefix = ""
Performing join
joinObject = QgsVectorLayerJoinInfo()
joinObject.setJoinFieldName(joining_field)
joinObject.setTargetFieldName(joining_field)
joinObject.setJoinLayerId(target_layer_id)
joinObject.setJoinFieldNamesSubset(new_columns)
joinObject.setPrefix(prefix)
joinObject.setUsingMemoryCache(True)
joinObject.setJoinLayer(attr_layer)
point_layer.addJoin(joinObject)
fiddoes not have to match, as I'm trying to join them by the attributename. – eafwnrg Oct 27 '23 at 09:44