8

I have been using this answer on GIS SE to split 3-band (RGB) imagery into 256x256 3-band image tiles:

import os, gdal

in_path = '/path/to/indata/'
input_filename = 'dtm_5.tif'

out_path = '/path/to/output_folder/'
output_filename = 'tile_'

tile_size_x = 256
tile_size_y = 256

ds = gdal.Open(in_path + input_filename)
band = ds.GetRasterBand(1)
xsize = band.XSize
ysize = band.YSize

for i in range(0, xsize, tile_size_x):
    for j in range(0, ysize, tile_size_y):
        com_string = "gdal_translate -of GTIFF -srcwin " + str(i)+ ", " + str(j) + ", " + str(tile_size_x) + ", " + str(tile_size_y) + " " + str(in_path) + str(input_filename) + " " + str(out_path) + str(output_filename) + str(i) + "_" + str(j) + ".tif"
        os.system(com_string)

Is there a comparable Rasterio or numpy approach?

Aaron
  • 51,658
  • 28
  • 154
  • 317
  • 2
    Just a suggestion but using gdal_translate with os.system will work much faster if you use subprocess.Popen(['gdal_translate','-of','GTIFF'.... you can generate a bunch of them and have them working simultaneously up to the bottleneck of HDD access. With trying to do this directly with GDAL you need to create a new dataset on each iteration, calculate the upper left and set the geotransform then read from source / write to target.. it's just easier to do it using gdal_translate. – Michael Stimson Jun 07 '18 at 05:04

3 Answers3

13

Below is a simple example (rasterio 1.0.0 or later, won't work in 0.3.6). There might be better/simpler ways (and there is an easier way if your raster is internally tiled and the tile block sizes match your desired output tile size).

The rasterio docs have some examples of concurrent processing if you want to go down that road.

import os
from itertools import product
import rasterio as rio
from rasterio import windows

in_path = '/path/to/indata/'
input_filename = 'dtm_5.tif'

out_path = '/path/to/output_folder/'
output_filename = 'tile_{}-{}.tif'

def get_tiles(ds, width=256, height=256):
    nols, nrows = ds.meta['width'], ds.meta['height']
    offsets = product(range(0, nols, width), range(0, nrows, height))
    big_window = windows.Window(col_off=0, row_off=0, width=nols, height=nrows)
    for col_off, row_off in  offsets:
        window =windows.Window(col_off=col_off, row_off=row_off, width=width, height=height).intersection(big_window)
        transform = windows.transform(window, ds.transform)
        yield window, transform


with rio.open(os.path.join(in_path, input_filename)) as inds:
    tile_width, tile_height = 256, 256

    meta = inds.meta.copy()

    for window, transform in get_tiles(inds):
        print(window)
        meta['transform'] = transform
        meta['width'], meta['height'] = window.width, window.height
        outpath = os.path.join(out_path,output_filename.format(int(window.col_off), int(window.row_off)))
        with rio.open(outpath, 'w', **meta) as outds:
            outds.write(inds.read(window=window))
user2856
  • 65,736
  • 6
  • 115
  • 196
2

I wanted an overlap of tiles for my application. Solved it by extending @user2856 to include 'overlap' as a parameter.

import os
import math
from itertools import product
import rasterio as rio
from rasterio import windows

def get_tiles(ds, tile_width, tile_height, overlap): nols, nrows = ds.meta['width'], ds.meta['height'] xstep = tile_width - overlap ystep = tile_height - overlap for x in range(0, nols, xstep): if x + tile_width > nols: x = nols - tile_width for y in range(0, nrows, ystep): if y + tile_height > nrows: y = nrows - tile_height window = windows.Window(x, y, tile_width, tile_height) transform = windows.transform(window, ds.transform) yield window, transform

in_path = '/content/drive/MyDrive/Raster_Dataset_Airports/Spacenet' input_filename = 'Minsk.tif'

out_path = 'Minsk_Tiles' output_filename = 'Minsk_{}-{}.tif'

tile_width = 512 tile_height = 512 overlap = 128

with rio.open(os.path.join(in_path, input_filename)) as src: metadata = src.meta.copy()

for window, transform in get_tiles(src, tile_width, tile_height, overlap):
    metadata['transform'] = transform
    metadata['width'], metadata['height'] = window.width, window.height
    out_filepath = os.path.join(out_path, output_filename.format(window.col_off, window.row_off))

    with rio.open(out_filepath, 'w', **metadata) as dst:
        dst.write(src.read(window=window))

Here is a verification mechanism to compare with the GDAL CLI command.

!gdal_retile.py  -ps 512 512 -overlap 128  -targetDir  Minsk_Tiles2 \
   /content/drive/MyDrive/Raster_Dataset_Airports/Spacenet/Minks.tif

Generate tile index files for both tiles folder and view them in GIS software.

!gdaltindex gdal_gen_index.shp /content/Minsk_Tiles/*.tif
!gdaltindex rasterio_gen_index.shp /content/Minsk_Tiles2 /*.tif

import geopandas as gpd

gdf_gdal=gpd.read_file('/content/gdal_gen_index.shp') gdf_rasterio=gpd.read_file('/content/rasterio_gen_index.shp')

addcolor
  • 1,282
  • 9
  • 19
0

This script efficiently tile a Raster by using GDAL instead of GdalTranslate or GdalWarp

https://gist.github.com/mustafateke/52f8b52804450cabd6bce7c7c9d49d21

# This script is created by Mustafa Teke (mustafa.teke@gmail.com)
# 17/08/2021

import math import os import gdal from datetime import datetime

#Save subset ROI to given path def subsetGeoTiff(ds, outFileName, arr_out, start, size, bands ):

driver = gdal.GetDriverByName("GTiff")
#set compression
outdata = driver.Create(outFileName, size[0], size[1], bands, gdal.GDT_UInt16)
newGeoTransform = list( ds.GetGeoTransform() )
newGeoTransform[0] = newGeoTransform[0] + start[0]*newGeoTransform[1] + start[1]*newGeoTransform[2]
newGeoTransform[3] = newGeoTransform[3] + start[0]*newGeoTransform[4] + start[1]*newGeoTransform[5]

outdata.SetGeoTransform( newGeoTransform )    
outdata.SetProjection(ds.GetProjection())

for i in range(0,bands) :
    outdata.GetRasterBand(i+1).WriteArray(arr_out[i, :, :])
    outdata.GetRasterBand(i+1).SetNoDataValue(0)

outdata.FlushCache()
outdata = None

startTime = datetime.now() print("Start" , datetime.now().strftime("%m/%d/%Y, %H:%M:%S"))

Path = "D:/Git/Scripts/T37SEB_20200825.tif" outDir, file_extension = os.path.splitext(Path) filename = os.path.split(outDir)[1] #Create a folder in the same name as the image if not os.path.exists(filename): os.makedirs(filename)

#Open dataset and get contents as a numpy array ds = gdal.Open(Path) image = ds.ReadAsArray()

imageWidth = ds.RasterXSize imageHeight = ds.RasterYSize

tileSizeX = 256 tileSizeY = 256

offsetX = int(tileSizeX/2) offsetY = int(tileSizeY/2)

tileSize = (tileSizeY, tileSizeX)

for startX in range(0, imageWidth, offsetX): for startY in range(0, imageHeight, offsetY): endX = startX + tileSizeX endY = startY + tileSizeY

    currentTile = image[:, startX:endX,startY:endY]
    #if you want to save save directly with opencv
    # However reverse order of data
    #cv2.imwrite(filename + '_%d_%d' % (nTileY, nTileX)  + file_extension, currentTile)
    start = (startY,startX)
    outFullFileName = os.path.join( outDir, filename + '_%d_%d' % (startY, startX)  + file_extension )
    subsetGeoTiff(ds, outFullFileName, currentTile, start, tileSize,  currentTile.shape[0] )


endTime = datetime.now() ds=None

print("Finished " , datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), " in " , endTime-startTime )