How to do line based accuracy assessment with pyqgis

Question

I would like to have a line based accuracy assessment between two line vector layers. For this it might an idea to create buffers around all lines of each layer and compute the % of overlay and % of difference. Before starting to code this, I wanted to check if there is anything already available that would do something similar.

I also thought about creating a buffer around all lines, then converting the vector layers to raster layers and applying r.kappa...

Any other ideas? =)

I don't know exactly what the specs are for your assessment but my first thought was that your probably want to filter out the geometries that are equal. geopandas in combination with numpy seems like great tool to to do comparisons between datasets in general. Have look at this answer to get the idea: http://gis.stackexchange.com/questions/212273/how-to-check-what-changed-inside-the-updated-shapefile — LarsVegas, Oct 22 '16 at 05:09

score 1 · Accepted Answer · answered Dec 15 '17 at 16:50

I concluded that there is nothing already available yet and decided to go with the option of buffering both the lines to be evaluated and the reference lines and converting them to raster layers, overlaying the raster layers and applying r.kappa to calculate the confusion matrix to sum up the amounts of pixels being true positive, false negative, true negative and true negative.

I implemented this for a so-called detection quality and a localization quality. For the first, both the reference and the evaluation lines are buffered and overlaid. For the second, the reference is buffered with different distances and again overlaid with the evaluation lines. This allows to calculate the amount of true positive pixels per buffer distance.

Here is my code to this, to be executed as a PyQGIS script from the Python console in QGIS:

"""
!/bin/python
-*- coding: utf-8 -*
QGIS Version: QGIS 2.18

### Author ###
 S. Crommelinck, 2017

### Description ###
 This script merges calculates the detection quality (error of completeness and correctness) for vector files compared
 to a reference data. Both files should be shapefiles containing lines.


### Import script in QGIS Python console ###
# add directory with script to Python search path
import sys
sys.path.append(r"D:\path to script")

#import own module
import C1_acc_ass

# rerun module after changing the source code
reload(C1_acc_ass)
"""

### Predefine variables ###
# Make sure there exists a column DN set to 1 for all features in each layer to be evaluated
data_dir = r"D:\path to directory of shapefile lines to be evaluated"

# Ref_v should contain one column named DN set to 1 for all features
ref_v = r"D:\path to reference line shapefile"

# Number of pixels in width and height of evaluation raster 
raster_size = 20000

# buff size for input data
buff_dist = 0.05

# buff size for reference data
buff_ref = 0.05

# buff sizes for localization quality
buff_distances = "0.1,0.2,0.3,0.4,0.5"

### Import required modules ###
import os
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from qgis.core import *
from qgis.utils import *
import processing

### Main processing part ###
# Change into data directory
os.chdir(data_dir)

# List all files in current directory
files = os.listdir(os.curdir)

# Loop over all .tif files in input directory
for f in files:
    if os.path.splitext(f)[1] == '.shp':
        ### Buffering ###
        # Define name of buffered vector
        input_v_buff = os.path.splitext(f)[0] + "_buffered.shp"

    # Buffer vector (a large value for segments results in a larger processing time and a rounder buffer output (x-sided polygon), which is not useful before rasterization
    if not os.path.isfile(input_v_buff):
        processing.runalg('qgis:fixeddistancebuffer',
                          {"INPUT": f,
                           "DISTANCE": buff_dist,
                           "SEGMENTS": 1,
                           "DISSOLVE": True,
                           "OUTPUT": input_v_buff})
        print "--> %s has been buffered with a distance of %.2f m to %s\n" % (f, buff_dist, input_v_buff)

    ### Rasterization ###
    # Read buffered layer as QGIS layer
    vlayer = QgsVectorLayer(input_v_buff, "vect", "ogr")

    # Define raster extent
    extent = vlayer.extent()
    xmin = extent.xMinimum()
    xmax = extent.xMaximum()
    ymin = extent.yMinimum()
    ymax = extent.yMaximum()

    # Define name of buffered vector
    input_r_buff = os.path.splitext(f)[0] + "_buffered.tif"

    # Run rasterization
    if not os.path.isfile(input_r_buff):
        processing.runalg('gdalogr:rasterize',
                          {"INPUT": input_v_buff,
                           "FIELD": "DN",
                           "DIMENSIONS": 0,
                           "WIDTH": raster_size,
                           "HEIGHT": raster_size,
                           "RAST_EXT": "%f,%f,%f,%f" % (xmin, xmax, ymin, ymax),
                           "OUTPUT": input_r_buff})
        print "--> %s has been rasterized to %s\n" % (input_v_buff, input_r_buff)

    #########################
    ### Detection Quality ###
    #########################
    ### Buffering of reference data ###
    # Define name of buffered vector
    ref_v_buff = str("ref_v_buffsize" + str(buff_ref) + ".shp")

    if not os.path.isfile(ref_v_buff):
        # Buffer vector (a large value for segments results in a larger processing time and a rounder buffer output (x-sided polygon), which is not useful before rasterization
        processing.runalg('qgis:fixeddistancebuffer',
                          {"INPUT": ref_v,
                           "DISTANCE": buff_ref,
                           "SEGMENTS": 1,
                           "DISSOLVE": True,
                           "OUTPUT": ref_v_buff})
        print "--> %s has been buffered with a distance of %.2f m to %s.\n" % (ref_v, buff_ref, ref_v_buff)

    ### Rasterization ###
    # Define name of buffered reference vector
    ref_r_buff = str("ref_r_buffsize" + str(buff_ref) + ".tif")

    if not os.path.isfile(ref_r_buff):
        # Run rasterization
        processing.runalg('gdalogr:rasterize',
                          {"INPUT": ref_v_buff,
                           "FIELD": "DN",
                           "DIMENSIONS": 0,
                           "WIDTH": raster_size,
                           "HEIGHT": raster_size,
                           "RAST_EXT": "%f,%f,%f,%f" % (xmin, xmax, ymin, ymax),
                           "OUTPUT": ref_r_buff})
        print "--> %s has been rasterized to %s.\n" % (ref_v_buff, ref_r_buff)

    ### Kappa calculation ###
    # Define name of accuraccy assessment file
    det_quality = os.path.splitext(f)[0] + "_det_quality.txt"

    if not os.path.isfile(det_quality):
        # Run accuracy assessment (The classified result map layer categories is arranged along the vertical
        # axis of the table, while the reference map layer categories along the horizontal axis)
        processing.runalg('grass:r.kappa', {"classification": input_r_buff,
                                            "reference": ref_r_buff,
                                            "GRASS_REGION_PARAMETER": "%f,%f,%f,%f" % (xmin, xmax, ymin, ymax),
                                            "output": det_quality})
        print "--> %s and reference layer %s have been compared for accuracy assessment. Results are stored in %s\n" % (
        input_r_buff, ref_r_buff, det_quality)

    ############################
    ### Localization Quality ###
    ############################

    ### Rasterization ###
    # Define name of buffered reference vector
    ref_r = str("ref_r.tif")

    if not os.path.isfile(ref_r):
        # Run rasterization
        processing.runalg('gdalogr:rasterize',
                          {"INPUT": ref_v,
                           "FIELD": "DN",
                           "DIMENSIONS": 0,
                           "WIDTH": raster_size,
                           "HEIGHT": raster_size,
                           "RAST_EXT": "%f,%f,%f,%f" % (xmin, xmax, ymin, ymax),
                           "OUTPUT": ref_r})
        print "--> %s has been rasterized to %s.\n" % (ref_v, ref_r)

    ### Buffering ###
    # Buffer rasterized reference data with distances from 0-0.5m
    ref_r_buffsizes = str("ref_r_buffsizes.tif")

    if not os.path.isfile(ref_r_buffsizes):
        processing.runalg('grass7:r.buffer',
                          {"input": ref_r,
                           "distances": "0.1,0.2,0.3,0.4,0.5",
                           "units": 0,
                           "-z": True,
                           "GRASS_REGION_PARAMETER": "%f,%f,%f,%f" % (xmin, xmax, ymin, ymax),
                           "GRASS_REGION_CELLSIZE_PARAMETER": 0.05,
                           "output": ref_r_buffsizes})
        print "--> %s has been buffered with distances from 0-1m to %s.\n" % (ref_r, ref_r_buffsizes)

    ### Rasterization ###
    # Define name of buffered vector
    input_r = os.path.splitext(f)[0] + ".tif"

    # Run rasterization
    if not os.path.isfile(input_r):
        processing.runalg('gdalogr:rasterize',
                          {"INPUT": f,
                           "FIELD": "DN",
                           "DIMENSIONS": 1,
                           "WIDTH": 0.05,
                           "HEIGHT": 0.05,
                           "RAST_EXT": "%f,%f,%f,%f" % (xmin, xmax, ymin, ymax),
                           "OUTPUT": input_r})
        print "--> %s has been rasterized to %s\n" % (f, input_r)

    ### Kappa calculation ###
    # Define name of accuraccy assessment file
    loc_quality = os.path.splitext(f)[0] + "_loc_quality.txt"

    if not os.path.isfile(loc_quality):
        # Run accuracy assessment
        processing.runalg('grass:r.kappa', {"classification": input_r,
                                            "reference": ref_r_buffsizes,
                                            "GRASS_REGION_PARAMETER": "%f,%f,%f,%f" % (xmin, xmax, ymin, ymax),
                                            "output": loc_quality})
        print "--> %s and reference layer %s.tif have been compared for accuracy assessment. Results are stored in %s\n" % (
            input_r, ref_r, loc_quality)

# Print final overall message
print "All processing has been finished."

How to do line based accuracy assessment with pyqgis

1 Answers1