Source code for museotoolbox.processing

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# =============================================================================
# ___  ___                       _____           _______
# |  \/  |                      |_   _|         | | ___ \
# | .  . |_   _ ___  ___  ___     | | ___   ___ | | |_/ / _____  __
# | |\/| | | | / __|/ _ \/ _ \    | |/ _ \ / _ \| | ___ \/ _ \ \/ /
# | |  | | |_| \__ \  __/ (_) |   | | (_) | (_) | | |_/ / (_) >  <
# \_|  |_/\__,_|___/\___|\___/    \_/\___/ \___/|_\____/ \___/_/\_\
#
# @author:  Nicolas Karasiak
# @site:    www.karasiak.net
# @git:     www.github.com/nkarasiak/MuseoToolBox
# =============================================================================
"""
The :mod:`museotoolbox.processing` module gathers raster and vector tools.
"""
# general libraries
import os
import numpy as np
import tempfile
from psutil import virtual_memory

# spatial libraries
from osgeo import __version__ as osgeo_version
from osgeo import gdal, ogr
from joblib import Parallel, delayed

from ..internal_tools import ProgressBar, push_feedback


[docs]def image_mask_from_vector( in_vector, in_image, out_image, invert=False, gdt=gdal.GDT_Byte): """ Create a image mask where polygons/points are the pixels to keep. Parameters ---------- in_vector : str. Path of the vector file to rasterize. in_image : str. Path of the raster file where the vector file will be rasterize. out_image : str. Path of the file (.tif) to create. invert : bool, optional (default=False). invert=True make polygons/points with 0 values in out_image. gdt : int, optional (default=gdal.GDT_Byte). The gdal datatype of the rasterized vector. """ rasterize( in_image, in_vector, None, out_image, invert=invert, gdt=gdt)
[docs]def get_gdt_from_minmax_values(max_value, min_value=0): """ Return the Gdal DataType according the minimum or the maximum value. Parameters ---------- max_value : int or float. The maximum value needed. min_value : int or float, optional (default=0). The minimum value needed. Returns ------- gdalDT : int. gdal datatype. Examples --------- >>> get_gdt_from_minmax_values(260) 2 >>> get_gdt_from_minmax_values(16) 1 >>> get_gdt_from_minmax_values(16,-260) 3 """ max_abs_value = np.amax(np.abs([max_value, min_value])) # if values are int if isinstance(max_abs_value, (int, np.integer)): if min_value >= 0: if max_value <= 255: gdalDT = gdal.GDT_Byte elif max_value > 255 and max_value <= 65535: gdalDT = gdal.GDT_UInt16 elif max_value >= 65535: gdalDT = gdal.GDT_UInt32 elif min_value < 0: if min_value > -65535: gdalDT = gdal.GDT_Int16 else: gdalDT = gdal.GDT_Int32 # if values are float if isinstance(max_abs_value, float): if max_abs_value > +3.4E+38: gdalDT = gdal.GDT_Float64 else: gdalDT = gdal.GDT_Float32 return gdalDT
[docs]def convert_dt(dt, to_otb_dt=False): """ Return the datatype from gdal to numpy or from numpy to gdal. Parameters ----------- dt : int or str gdal datatype from src_dataset.GetRasterBand(1).DataType. numpy datatype from np.array([]).dtype.name Returns -------- dt : int or data type - For gdal, the data type (int). - For numpy, the date type (type). Examples --------- >>> _convert_dt(gdal.GDT_Int16) numpy.int16 >>> _convert_dt(gdal.GDT_Float64) numpy.float64 >>> _convert_dt(numpyDT=np.array([],dtype=np.int16).dtype.name) 3 >>> _convert_dt(numpyDT=np.array([],dtype=np.float64).dtype.name) 7 """ from osgeo import gdal_array if isinstance(dt, int): is_gdal = True else: is_gdal = False if is_gdal is True: code = gdal_array.GDALTypeCodeToNumericTypeCode(dt) else: NP2GDAL_CONVERSION = { "uint8": 1, "int8": 3, "uint16": 2, "int16": 3, "uint32": 4, "int32": 5, "float32": 6, "float64": 7, "complex64": 10, "complex128": 11, "int64": 5, "uint64": 5 } try: code = NP2GDAL_CONVERSION[dt] if dt.endswith('int64'): push_feedback( 'Warning : Numpy type {} is not recognized by gdal. Will use int32 instead'.format(dt)) except BaseException: code = 7 push_feedback( 'Warning : Numpy type {} is not recognized by gdal. Will use float64 instead'.format(dt)) if to_otb_dt: if is_gdal: code = _convert_gdal_to_otb_dt(dt) else: code = _convert_gdal_to_otb_dt(code) return code
def _convert_gdal_to_otb_dt(dt): """ Convert Gdal DataType to OTB str format. Parameters ---------- dt : int gdal datatype from src_dataset.GetRasterBand(1).DataType. Returns ---------- otb_dt : str. The otb data type. Examples --------- >>> _convert_gdal_to_otb_dt(gdal.GDT_Float32) 'float' >>> _convert_gdal_to_otb_dt(gdal.GDT_Byte) 'uint8' >>> _convert_gdal_to_otb_dt(gdal.GDT_UInt32) 'uint32' >>> _convert_gdal_to_otb_dt(gdal.GDT_CFloat64) 'cdouble' """ # uint8/uint16/int16/uint32/int32/float/double/cint16/cint32/cfloat/cdouble code = [ 'uint8', 'uint8', 'uint16', 'int16', 'uint32', 'int32', 'float', 'double', 'cint16', 'cint32', 'cfloat', 'cdouble'] if dt > len(code): otb_dt = ('cdouble') else: otb_dt = code[dt] return otb_dt
[docs]def extract_ROI(in_image, in_vector, *fields, **kwargs): """ Extract raster values from Regions Of Interest in a vector file. Initially written by Mathieu Fauvel, improved by Nicolas Karasiak. Parameters ----------- in_image : str. the name or path of the raster file, could be any file that GDAL can open. in_vector : str. A filename or path corresponding to a vector file. It could be any file that GDAL/OGR can open. *fields : str. Each field to extract label/value from. **kwargs : list of kwargs. - get_pixel_position : bool, optional (default=False). If `get_pixel_position=True`, will return pixel position in the image for each point. - only_pixel_position : bool, optional (default=False). If `only_pixel_position=True`, with only return pixel position for each point. - prefer_memory : bool, optional (default=False). If `prefer_memory=False`, will write temporary raster on disk to extract ROI values. - verbose : bool or int, optional (default=True). The higher is the int verbose, the more it will returns informations. Returns -------- X : np.ndarray, size of (n_samples,n_features). The sample matrix. A n*d matrix, where n is the number of referenced pixels and d is the number of features. Each line of the matrix is a pixel. y : np.ndarray, size of (n_samples,). The label of each pixel. See also --------- museotoolbox.processing.read_vector_values : read field values from vector file. Examples --------- >>> from museotoolbox.datasets import load_historical_data >>> from museotoolbox.processing import extract_ROI >>> raster,vector= load_historical_data() >>> X,Y = extract_ROI(raster,vector,'Class') >>> X array([[ 213., 189., 151.], [ 223., 198., 158.], [ 212., 188., 150.], ..., [ 144., 140., 105.], [ 95., 92., 57.], [ 141., 137., 102.]]) >>> X.shape (12647,3) >>> Y [3 3 3 ..., 1 1 1] >>> Y.shape (12647,) """ # generate kwargs value if 'verbose' in kwargs.keys(): verbose = kwargs['verbose'] else: verbose = False if 'get_pixel_position' in kwargs: get_pixel_position = kwargs['get_pixel_position'] else: get_pixel_position = False if 'only_pixel_position' in kwargs: only_pixel_position = kwargs['only_pixel_position'] else: only_pixel_position = False if 'prefer_memory' in kwargs: prefer_memory = kwargs['prefer_memory'] else: prefer_memory = False # Open Raster raster = gdal.Open(in_image, gdal.GA_ReadOnly) if raster is None: raise ValueError('Impossible to open ' + in_image) # exit() # Convert vector to raster nFields = len(fields) if nFields == 0 or fields[0] == False: fields = [False] else: source = ogr.Open(in_vector) layer = source.GetLayer() np_dtypes = [] ldefn = layer.GetLayerDefn() for f in fields: idx = ldefn.GetFieldIndex(f) if idx == -1: listFields = [] for n in range(ldefn.GetFieldCount()): fdefn = ldefn.GetFieldDefn(n) if fdefn.name is not listFields: listFields.append('"' + fdefn.name + '"') raise ValueError( 'Sorry, field "{}" is not available.\nThese fields are available : {}.'.format( f, ', '.join(listFields))) fdefn = ldefn.GetFieldDefn(idx) fdefn_type = fdefn.type if fdefn_type < 4 or fdefn_type == 12: if fdefn_type > 1 and fdefn_type != 12: np_dtype = np.float64 else: np_dtype = np.int64 else: raise ValueError( 'Wrong type for field "{}" : {}. \nPlease use int or float.'.format( f, fdefn.GetFieldTypeName( fdefn.type))) np_dtypes.append(np_dtype) rois = [] temps = [] for field in fields: if prefer_memory: raster_in_mem = True image_field = 'MEM' data_src = rasterize(in_image, in_vector, field, out_image=image_field, gdt=gdal.GDT_Float64) else: raster_in_mem = False image_field = tempfile.mktemp('_roi.tif') rasterize(in_image, in_vector, field, out_image=image_field, gdt=gdal.GDT_Float64) data_src = gdal.Open(image_field, gdal.GA_ReadOnly) temps.append(image_field) if data_src is None: raise Exception( 'A problem occured when rasterizing {} with field {}'.format( in_vector, field)) if (raster.RasterXSize != data_src.RasterXSize) or ( raster.RasterYSize != data_src.RasterYSize): raise Exception('Raster and vector do not cover the same extent.') rois.append(data_src) # Get block size band = raster.GetRasterBand(1) block_sizes = band.GetBlockSize() x_block_size = block_sizes[0] y_block_size = block_sizes[1] gdalDT = band.DataType del band # Get the number of variables and the size of the images d = raster.RasterCount nc = raster.RasterXSize nl = raster.RasterYSize # ulx, xres, xskew, uly, yskew, yres = raster.GetGeoTransform() if get_pixel_position is True or only_pixel_position is True: coords = np.array([], dtype=np.int64).reshape(0, 2) xDataType = convert_dt(gdalDT) # Read block data X = np.array([], dtype=xDataType).reshape(0, d) F = np.array([], dtype=np.int64).reshape( 0, nFields) # now support multiple fields # for progress bar if verbose: total = 100 pb = ProgressBar(total, message='Reading raster values... ') for i in range(0, nl, y_block_size): if i + y_block_size < nl: # Check for size consistency in Y lines = y_block_size else: lines = nl - i for j in range(0, nc, x_block_size): # Check for size consistency in X if j + x_block_size < nc: cols = x_block_size else: cols = nc - j # for ProgressBar if verbose: currentPosition = (i / nl) * 100 pb.add_position(currentPosition) # Load the reference data ROI = rois[0].GetRasterBand(1).ReadAsArray(j, i, cols, lines) t = np.nonzero(ROI) if t[0].size > 0: if get_pixel_position or only_pixel_position: coordsTp = np.empty((t[0].shape[0], 2)) coordsTp[:, 0] = t[1] + j coordsTp[:, 1] = t[0] + i coords = np.concatenate((coords, coordsTp)) # Load the Variables if not only_pixel_position: # extract values from each field if nFields > 0: Ftemp = np.empty( (t[0].shape[0], nFields), dtype=np.int64) for idx, roiTemp in enumerate(rois): roiField = roiTemp.GetRasterBand( 1).ReadAsArray(j, i, cols, lines) Ftemp[:, idx] = roiField[t] F = np.concatenate((F, Ftemp)) # extract raster values (X) Xtp = np.empty((t[0].shape[0], d), dtype=xDataType) for k in range(d): band = raster.GetRasterBand( k + 1).ReadAsArray(j, i, cols, lines) Xtp[:, k] = band[t] X = np.concatenate((X, Xtp)) if verbose: pb.add_position(100) # Clean/Close variables # del Xtp,band roi = None # Close the roi file raster = None # Close the raster file # remove temp raster if raster_in_mem: for roi in temps: os.remove(roi) # generate returns if only_pixel_position: toReturn = coords else: if nFields > 0: toReturn = [X] + [F[:, f] for f in range(nFields)] else: toReturn = X if get_pixel_position: if nFields == 0: toReturn = [toReturn] + [coords] else: toReturn = toReturn + [coords] return toReturn
[docs]def rasterize(in_image, in_vector, in_field=False, out_image='MEM', gdt=gdal.GDT_Int16, invert=False): """ Rasterize vector to the size of data (raster) Parameters ----------- in_image : str. A filename or path corresponding to a raster image. in_vector : str. A filename or path corresponding to a vector file. in_field : str, optional (default=False). Name of the filed to rasteirze. If False, will rasterize the polygons or points with >0 value, and set the other values to 0. out_image : str, optional (default = 'MEM'). A filename or path corresponding to a geotiff (.tif) raster image to save. 'MEM' will store raster in memory. gdt : int, optional (default=gdal.GDT_Int16) gdal datatype. invert : bool, optional (default=False). if invert is True, polygons will have 0 values in the out_image. Returns -------- dst_ds : gdal object The open dataset with gdal (essential if out_image is set to 'MEM') """ data_src = gdal.Open(in_image) shp = ogr.Open(in_vector) lyr = shp.GetLayer() if out_image.upper() == 'MEM': driver = gdal.GetDriverByName('MEM') out_image = '' options = [] else: driver = gdal.GetDriverByName('GTiff') options = ['BIGTIFF=IF_SAFER'] dst_ds = driver.Create( out_image, data_src.RasterXSize, data_src.RasterYSize, 1, gdt, options=options) dst_ds.SetGeoTransform(data_src.GetGeoTransform()) dst_ds.SetProjection(data_src.GetProjection()) if in_field is False or in_field is None: if invert: try: options = gdal.RasterizeOptions(inverse=invert) gdal.Rasterize(dst_ds, in_vector, options=options) except BaseException: raise Exception( 'Version of gdal is too old : RasterizeOptions is not available.\nPlease update.') else: # gdal.Rasterize(dst_ds, vectorSrc) gdal.RasterizeLayer(dst_ds, [1], lyr, options=options) dst_ds.GetRasterBand(1).SetNoDataValue(0) else: options = ['ATTRIBUTE=' + in_field] gdal.RasterizeLayer(dst_ds, [1], lyr, None, options=options) data_src, shp, lyr = None, None, None return dst_ds
[docs]class RasterMath: """ Read one or multiple rasters per block, and perform one or many functions to one or many geotiff raster outputs. If you want a sample of your data, just call :func:`~museotoolbox.processing.RasterMath.get_random_block`. The default option of rasterMath will return in 2d the dataset : - each line is a pixel with in columns its differents values in bands so masked data will not be given to this user. If you want to have the data in 3d (X,Y,Z), masked data will be given too (using numpy.ma). Parameters ---------- in_image : str. Path of a gdal extension supported raster. in_image_mask : str or False, optional (default=False). If str, path of the raster mask. Value masked are 0, other are considered not masked. Use ``invert=True`` in :mod:`museotoolbox.processing.image_mask_from_vector` to mask only what is not in polygons. return_3d : bool, optional (default=False). Default will return a row per pixel (2 dimensions), and axis 2 (bands) are columns. If ``return_3d=True``, will return the block without reshape (not suitable to learn with `sklearn`). block_size : list or False, optional (default=[256,256]). Define the reading and writing block size. First element is the number of columns, second element the number of lines per block. If False, will use the block size as defined in in_image. To define later the block_size, use `custom_block_size`. n_jobs : int, optional, (default value : 1) Numbers of workers or process that will work in parallel. To use if your function are very time consumming. message : str, optional (default='rasterMath...'). If str, the message will be displayed before the progress bar. verbose : bool or int, optional (default=True). The higher is the int verbose, the more it will returns informations. Examples --------- >>> import museotoolbox as mtb >>> raster,_= mtb.datasets.load_historical_data() >>> rM = mtb.processing.RasterMath(raster) Total number of blocks : 15 >>> rM.add_function(np.mean,out_image='/tmp/test.tif',axis=1,dtype=np.int16) Using datatype from numpy table : int16. Detected 1 band for function mean. >>> rM.run() rasterMath... [########################################]100% Saved /tmp/test.tif using function mean """
[docs] def __init__( self, in_image, in_image_mask=False, return_3d=False, block_size=[ 256, 256], n_jobs=1, message='rasterMath...', verbose=True): self.verbose = verbose self.message = message self.driver = gdal.GetDriverByName('GTiff') self.itemsize = 0 # Load raster self.opened_images = [] self.add_image(in_image) if n_jobs < 0: self.n_jobs = os.cpu_count() else: self.n_jobs = n_jobs self.n_bands = self.opened_images[0].RasterCount self.n_columns = self.opened_images[0].RasterXSize self.n_lines = self.opened_images[0].RasterYSize # Get the geoinformation self.geo_transform = self.opened_images[0].GetGeoTransform() self.projection = self.opened_images[0].GetProjection() # Get block size and itemsize band = self.opened_images[0].GetRasterBand(1) self.input_block_sizes = band.GetBlockSize() # input block size if block_size is False: self.x_block_size = self.input_block_sizes[0] self.y_block_size = self.input_block_sizes[1] else: self.x_block_size = block_size[0] self.y_block_size = block_size[1] self.block_sizes = [self.x_block_size, self.y_block_size] self.custom_block_size() # set block size self.nodata = band.GetNoDataValue() self.dtype = band.DataType self.dtype_np = convert_dt(band.DataType) self.return_3d = return_3d del band # for memory purposes # Load in_image_mask if given self.mask = in_image_mask if self.mask: self.opened_mask = gdal.Open(in_image_mask) if self.opened_mask is None: raise ReferenceError( 'Impossible to open image ' + in_image_mask) # Initialize the output self.lastProgress = 0 self._outputs = [] self._raster_options = {}
[docs] def add_image( self, in_image): """ Add raster image. Parameters ----------- in_image : str. Path of a gdal extension supported raster. """ opened_raster = gdal.Open(in_image, gdal.GA_ReadOnly) if opened_raster is None: raise ReferenceError('Impossible to open image ' + in_image) is_same_size = True if len(self.opened_images) > 0: if opened_raster.RasterXSize != self.opened_images[ 0].RasterXSize or opened_raster.RasterYSize != self.opened_images[0].RasterYSize: is_same_size = False raise ValueError( "raster {} doesn't have the same size (X and Y) as the initial raster.\n \ Museotoolbox can't add it as an input raster.".format( os.path.basename(in_image))) n_bands = opened_raster.RasterCount band = opened_raster.GetRasterBand(1) mem_size = band.ReadAsArray(0, 0, 1, 1).itemsize * n_bands self.itemsize += mem_size del band if is_same_size: self.opened_images.append(opened_raster)
[docs] def add_function( self, function, out_image, out_n_bands=False, out_np_dt=False, out_nodata=False, compress=True, **kwargs): """ Add function to rasterMath. Parameters ---------- function : function. Function to parse where the first argument is a numpy array similar to what :mod:`museotoolbox.processing.RasterMath.get_random_block()` returns. out_image : str. A path to a geotiff extension filename corresponding to a raster image to create. out_n_bands : int or False, optional (default=False). If False, will run the given function to find the number of bands to define in the out_image. out_np_dt : int or False, optional (default=False). If False, will run the given function to get the datatype. out_nodata : int, float or False, optional (default=False). If True or if False (if nodata is present in the init raster), will use the minimum value available for the given or found datatype. compress: bool or str, optional (default=True). If True, will use PACKBITS. If 'high', will use DEFLATE with ZLEVEL = 9 and PREDICTOR=2. **kwargs : kwargs are keyword arguments you need for the given function. See also ---------- museotoolbox.processing.RasterMath.get_random_block : To test your function, parse the first argument with a random block museotoolbox.processing.convert_dt : To see conversion between numpy datatype to gdal datatype. museotoolbox.processing.get_dt_from_minmax_values : To get the gdal datatype according to a min/max value. """ if len(kwargs) > 0: randomBlock = function(self.get_random_block(), **kwargs) else: randomBlock = function(self.get_random_block()) if out_np_dt is False: np_type = randomBlock.dtype dtypeName = np_type.name out_np_dt = convert_dt(dtypeName) if self.verbose: push_feedback( 'Using datatype from numpy table : {}.'.format(dtypeName)) else: dtypeName = np.dtype(out_np_dt).name out_np_dt = convert_dt(dtypeName) # get number of bands randomBlock = self.reshape_ndim(randomBlock) out_n_bands = randomBlock.shape[-1] need_s = '' if out_n_bands > 1: need_s = 's' if self.verbose: push_feedback( 'Detected {} band{} for function {}.'.format( out_n_bands, need_s, function.__name__)) if self._raster_options == []: self._init_raster_parameters(compress=compress) else: params = self.get_raster_parameters() arg_pos = next( (x for x in params if x.startswith('compress')), None) if arg_pos: # remove old compress arg params.pop(params.index(arg_pos)) self.custom_raster_parameters(params) self._init_raster_parameters(compress=compress) self._add_output(out_image, out_n_bands, out_np_dt) if len(kwargs) == 0: kwargs = False if (out_nodata is True) or (self.nodata is not None) or ( self.mask is not False): if np.issubdtype(dtypeName, np.floating): minValue = float(np.finfo(dtypeName).min) else: minValue = np.iinfo(dtypeName).min if not isinstance(out_nodata, bool): if out_nodata < minValue: out_nodata = minValue else: out_nodata = minValue if self.verbose: push_feedback('No data is set to : {}.'.format(out_nodata)) self._outputs[-1]['gdal_type'] = out_np_dt self._outputs[-1]['np_type'] = dtypeName self._outputs[-1]['function'] = function self._outputs[-1]['n_bands'] = out_n_bands self._outputs[-1]['kwargs'] = kwargs self._outputs[-1]['nodata'] = out_nodata self._outputs[-1]['itemsize'] = randomBlock.itemsize * out_n_bands
def _init_raster_parameters(self, compress=True): self._raster_options = [] if compress: if compress == 'jpg': self._raster_options.append('COMPRESS=JPEG') self._raster_options.append('JPEG_QUALITY=80') else: self._raster_options.append('BIGTIFF=IF_SAFER') if osgeo_version >= '2.1': self._raster_options.append( 'NUM_THREADS={}'.format(self.n_jobs)) if compress == 'high': self._raster_options.append('COMPRESS=DEFLATE') self._raster_options.append('PREDICTOR=2') self._raster_options.append('ZLEVEL=9') else: self._raster_options.append('COMPRESS=PACKBITS') else: self._raster_options = ['BIGTIFF=IF_NEEDED']
[docs] def get_raster_parameters(self): """ Get raster parameters (compression, block size...) Returns -------- options : list. List of parameters for creating the geotiff raster. References ----------- As MuseoToolBox only saves in geotiff, parameters of gdal drivers for GeoTiff are here : https://gdal.org/drivers/raster/gtiff.html """ if self._raster_options == []: self._init_raster_parameters() return self._raster_options
[docs] def custom_raster_parameters(self, parameters_list): """ Parameters to custom raster creation. Do not enter here blockXsize and blockYsize parameters as it is directly managed by :mod:`custom_block_size` function. Parameters ----------- parameters_list : list. - example : ['BIGTIFF=IF_NEEDED','COMPRESS=DEFLATE'] - example : ['COMPRESS=JPEG','JPEG_QUALITY=80'] References ----------- As MuseoToolBox only saves in geotiff, parameters of gdal drivers for GeoTiff are here : https://gdal.org/drivers/raster/gtiff.html See also --------- museotoolbox.processing.RasterMath.custom_block_size : To custom the reading and writing block size. """ self._raster_options = parameters_list
def _managed_raster_parameters(self): # remove blockysize or blockxsize if already in options self._raster_options = [val for val in self._raster_options if not val.upper().startswith( 'BLOCKYSIZE') and not val.upper().startswith('BLOCKXSIZE') and not val.upper().startswith('TILED')] self._raster_options.extend(['BLOCKYSIZE={}'.format( self.y_block_size), 'BLOCKXSIZE={}'.format(self.x_block_size)]) if self.y_block_size == self.x_block_size: if self.y_block_size in [64, 128, 256, 512, 1024, 2048, 4096]: self._raster_options.extend(['TILED=YES']) def _add_output(self, out_image, out_n_bands, out_np_dt): if not os.path.exists(os.path.dirname(os.path.abspath(out_image))): os.makedirs(os.path.dirname(os.path.abspath(out_image))) self._managed_raster_parameters() dst_ds = self.driver.Create( out_image, self.n_columns, self.n_lines, out_n_bands, out_np_dt, options=self._raster_options ) dst_ds.SetGeoTransform(self.geo_transform) dst_ds.SetProjection(self.projection) self._outputs.append(dict(gdal_object=dst_ds)) def _iter_block(self, get_block=False, y_block_size=False, x_block_size=False): if not y_block_size: y_block_size = self.y_block_size if not x_block_size: x_block_size = self.x_block_size for row in range(0, self.n_lines, y_block_size): for col in range(0, self.n_columns, x_block_size): width = min(self.n_columns - col, x_block_size) height = min(self.n_lines - row, y_block_size) if get_block: X = self._generate_block_array( col, row, width, height, self.mask) yield X, col, row, width, height else: yield col, row, width, height def _generate_block_array(self, col, row, width, height, mask=False): """ Return block according to position and width/height of the raster. Parameters ---------- col : int. the col. row : int the line. width : int. the width. height : int. the height. mask : bool. Use the mask (only if a mask if given in parameter of `RasterMath`.) Returns ------- arr : numpy array with masked values. (`np.ma.masked_array`) """ arrs = [] if mask: bandMask = self.opened_mask.GetRasterBand(1) arrMask = bandMask.ReadAsArray( col, row, width, height).astype(np.bool) if self.return_3d is False: arrMask = arrMask.reshape(width * height) else: arrMask = None for nRaster in range(len(self.opened_images)): nb = self.opened_images[nRaster].RasterCount if self.return_3d: arr = np.empty((height, width, nb), dtype=self.dtype_np) else: arr = np.empty((height * width, nb), dtype=self.dtype_np) # for ind in range(nb): arr = self.opened_images[nRaster].ReadAsArray( col, row, width, height) if arr.ndim > 2: arr = np.moveaxis(arr, 0, -1) if not self.return_3d: if arr.ndim == 2: arr = arr.flatten() else: arr = arr.reshape(-1, arr.shape[-1]) arr = self._filter_nodata(arr, arrMask) arrs.append(arr) if len(arrs) == 1: arrs = arrs[0] return arrs def _filter_nodata(self, arr, mask=None): """ Filter no data according to a mask and to nodata value set in the raster. """ arr = self.reshape_ndim(arr) if mask is not None: mask = self.reshape_ndim(mask) arr_to_check = np.copy(arr) arr_shape = arr.shape out_arr = np.zeros((arr_shape), dtype=self.dtype_np) if self.nodata: out_arr[:] = self.nodata if self.mask: t = np.logical_or((mask == False), arr_to_check == self.nodata) else: t = np.where(arr_to_check == self.nodata) if self.return_3d: if arr.ndim == 2: arr = np.expand_dims(arr, 2) tmp_mask = np.zeros(arr_shape[:2], dtype=bool) if self.mask: t = t[...,0] else: t = t[:2] tmp_mask[t] = True tmp_mask = np.repeat(tmp_mask.reshape( *tmp_mask.shape, 1), arr.shape[-1], axis=2) out_arr = np.ma.masked_array(arr, tmp_mask) else: tmp_mask = np.zeros(arr_shape, dtype=bool) tmp_mask[t, ...] = True out_arr = np.ma.masked_array(arr, tmp_mask) return out_arr
[docs] def get_image_as_array(self): """ Return the input image(s) as an array. Be careful it can be huge to load in memory. """ arr = self._generate_block_array(0,0,self.n_columns,self.n_lines,mask=self.mask) if self.return_3d is False: if len(self.opened_images) == 1: arr = arr[~arr.mask[:,0],...].data else: arr = [tmp_arr[~arr.mask[:,0],...].data for tmp_arr in arr] if len(self.opened_images) > 1: arr = [self.reshape_ndim(tmp_arr) for tmp_arr in arr] else: arr = self.reshape_ndim(arr) return arr
[docs] def get_block(self, block_number=0, return_with_mask=False): """ Get a block by its position, ordered as follow : +-----------+-----------+ | block 0 | block 1 | +-----------+-----------+ | block 2 | block 3 | +-----------+-----------+ Parameters ----------- block_number : int, optional (default=0). Position of the desired block. return_with_mask : bool, optinal (default=False) If True and if return_3d is True, will return a numpy masked array. Returns -------- Block : np.ndarray or np.ma.MaskedArray """ if block_number > self.n_blocks: raise ValueError( 'There are only {} blocks in your image.'.format( self.n_blocks)) else: row = [l for l in range(0, self.n_lines, self.y_block_size)] col = [c for c in range(0, self.n_columns, self.x_block_size)] row_number = int(block_number / self.n_x_blocks) col_number = int(block_number % self.n_x_blocks) width = min(self.n_columns - col[col_number], self.x_block_size) height = min(self.n_lines - row[row_number], self.y_block_size) tmp = self._generate_block_array( col[col_number], row[row_number], width, height, self.mask) # return only available pixels when user ask and it is 2d if return_with_mask is False and self.return_3d is False: if len(self.opened_images) > 1: tmp = [np.ma.copy(t) for t in tmp] tmp = [t.data for t in tmp] else: tmp = np.ma.copy(tmp) tmp = tmp.data return tmp
[docs] def get_block_coords(self, block_number=0): """ Get position of a block : order as [x,y,width,height] Parameters ----------- block_number, int, optional (default=0). Position of the desired block. Returns -------- List of positions of the block [x,y,width,height] """ if block_number > self.n_blocks: raise ValueError( 'There are only {} blocks in your image.'.format( self.n_blocks)) else: row = [l for l in range(0, self.n_lines, self.y_block_size)] col = [c for c in range(0, self.n_columns, self.x_block_size)] row_number = int(block_number / self.n_x_blocks) col_number = int(block_number % self.n_x_blocks) width = min(self.n_columns - col[col_number], self.x_block_size) height = min(self.n_lines - row[row_number], self.y_block_size) return [col[col_number], row[row_number], width, height]
def _manage_block_mask(self, block): if isinstance(block, list): mask_block = block[0].mask else: mask_block = block.mask # if everything is masked if np.all(mask_block): size = 0 # if everything is not masked elif np.all(mask_block == False): size = 1 # if part masked, part unmasked elif np.any(mask_block == False): size = 1 if self.return_3d: if mask_block.ndim > 2: mask = mask_block[..., 0] elif mask_block.ndim == 1: mask = mask_block else: mask = mask_block[..., 0] if isinstance(block, list) and self.return_3d is False: block = [b[~mask, ...].data for b in block] return block
[docs] def get_random_block(self, random_state=None): """ Get a random block from the raster. Parameters ------------ random_state : int, optional (default=None) If int, random_state is the seed used by the random number generator. If None, the random number generator is the RandomState instance used by numpy np.random. """ # mask = np.array([True]) np.random.seed(random_state) rdm = np.random.permutation(np.arange(self.n_blocks)) idx = 0 size = 0 while size == 0: tmp = self.get_block(block_number=rdm[idx], return_with_mask=True) if isinstance(tmp, list): mask_block = tmp[0].mask else: mask_block = tmp.mask # if everything is masked if np.all(mask_block): size = 0 # if everything is not masked elif np.all(mask_block == False): size = 1 tmp = tmp # if part masked, part unmasked elif np.any(mask_block == False): size = 1 if self.return_3d: if mask_block.ndim > 2: mask = mask_block[..., 0] elif mask_block.ndim == 1: mask = mask_block else: mask = mask_block[..., 0] if isinstance(tmp, list) and self.return_3d is False: tmp = [b[~mask, ...].data for b in tmp] idx += 1 return tmp
[docs] def reshape_ndim(self, x): """ Reshape array with at least one band. Parameters ---------- x : numpy.ndarray, shape [n_pixels, n_features] or shape [n_pixels]. Returns ------- x : numpy.ndarray, shape [n_pixels, n_features]. """ if x.ndim == 0: x = x.reshape(-1, 1) if x.ndim == self.return_3d + 1: x = x.reshape(*x.shape, 1) return x
[docs] def read_band_per_band(self): """ Yields each whole band as np masked array (so with masked data) """ for nRaster in range(len(self.opened_images)): nb = self.opened_images[nRaster].RasterCount for n in range(1, nb + 1): band = self.opened_images[nRaster].GetRasterBand(n) band = band.ReadAsArray() if self.mask: mask = np.asarray( self.opened_mask.GetRasterBand(1).ReadAsArray(), dtype=bool) band = np.ma.MaskedArray(band, mask=~mask) else: band = np.ma.MaskedArray( band, mask=np.where( band == self.nodata, True, False)) yield band
[docs] def read_block_per_block(self): """ Yield each block. """ for block in range(self.n_blocks): yield self.get_block(block)
[docs] def custom_block_size(self, x_block_size=False, y_block_size=False): """ Define custom block size for reading and writing the raster. Parameters ---------- y_block_size : float or int, default False. IF int, number of rows per block. If -1, means all the rows. If float, value must be between 0 and 1, such as 1/3. x_block_size : float or int, default False. If int, number of columns per block. If -1, means all the columns. If float, value must be between 0 and 1, such as 1/3. """ if y_block_size: if y_block_size == -1: self.y_block_size = self.n_lines elif isinstance(y_block_size, float): self.y_block_size = int(np.ceil(self.n_lines * y_block_size)) else: self.y_block_size = y_block_size else: self.y_block_size = self.block_sizes[1] if x_block_size: if x_block_size == -1: self.x_block_size = self.n_columns elif isinstance(x_block_size, float): self.x_block_size = int(np.ceil(self.n_columns * x_block_size)) else: self.x_block_size = x_block_size else: self.x_block_size = self.block_sizes[0] if self.x_block_size == -1: self.x_block_size = self.n_columns if self.y_block_size == -1: self.y_block_size = self.n_lines self.n_blocks = np.ceil(self.n_lines / self.y_block_size).astype( int) * np.ceil(self.n_columns / self.x_block_size).astype(int) self.block_sizes = [self.x_block_size, self.y_block_size] self.n_y_blocks = len( [i for i in range(0, self.n_lines, self.y_block_size)]) self.n_x_blocks = len( [i for i in range(0, self.n_columns, self.x_block_size)]) # to compute memory size needed in run self.size = self.y_block_size * self.x_block_size if self.verbose: push_feedback('Total number of blocks : %s' % self.n_blocks)
# ============================================================================= # Begin_process_block function for RasterMath # Function is outside of class in order to be used with Parallel # ============================================================================= @staticmethod def _process_block(fun, kwargs, block, n_bands, nodata, np_dtype, return_3d, idx_block): """ Private function to compute external function per block. In order to save with the size as the input block, this function need the input mask. Parameters ---------- fun : function kwargs : kwargs False, or dict of kwargs. block : np.ndarray or np.ma.MaskedArray The function will compute using the given block nodata : nodata, int or float No Data value is some pixels are masked np_dtype : numpy datatype Numpy datatype for the output array return_3d : boolean 2d-array or 3d-array. Returns ------- res : np.ndarray The block to write """ tmp_arr = True if isinstance(block, list): mask_block = block[0].mask else: mask_block = block.mask # if everything is masked if np.all(mask_block): mask = True out_block = nodata # if everything is not masked elif np.all(mask_block == False): mask = False # if part masked, part unmasked elif np.any(mask_block == False): tmp_arr = mask_block.size if return_3d: if mask_block.ndim > 2: mask = mask_block[..., 0] elif mask_block.ndim == 1: mask = mask_block else: mask = mask_block[..., 0] # if block is not fully masked if mask is not True: # if no mask, we send the np.ndarray only if mask is False: if isinstance(block, list): block = [b.data for b in block] else: block = block.data if kwargs: out_block = fun(block, **kwargs) else: out_block = fun(block) # if part is masked else: # if 3d we send the np.ma.MaskedArray if return_3d: if kwargs: out_block = fun(block, **kwargs) else: out_block = fun(block) if out_block.ndim == 1: out_block = np.expand_dims(out_block, 2) out_block[mask, ...] = nodata # if 2d, we send only the np.ndarray without masked data else: # create empty array with nodata value if mask_block.ndim > 1: mbs = mask_block.shape[:-1] else: mbs = mask_block.shape out_block_shape = list(mbs) out_block_shape.append(n_bands) out_block = np.full(out_block_shape, nodata, np_dtype) if isinstance(block, list): block = [b[~mask, ...].data for b in block] if kwargs: tmp_arr = fun(block, **kwargs) else: tmp_arr = fun(block) else: if kwargs: tmp_arr = fun(block[~mask].data, **kwargs) else: tmp_arr = fun(block[~mask, ...].data) if tmp_arr.ndim == 1: tmp_arr = tmp_arr.reshape(-1, 1) out_block[~mask, ...] = tmp_arr return out_block # ============================================================================= # End of _process_block function # =============================================================================
[docs] def run(self, memory_size='1G'): """ Execute and Write output according to given functions. Parameters ---------- memory_size : str, optional (default='1G') Maximun size of ram the program can use to store blocks and results in memory. Support : M,G,T for Mo,Go, or To Example : '256M', '1G', '8G' or '1T'. Put -1 or False to use all the free memory. Returns ------- None """ # Initalize the run self._position = 0 if memory_size == -1 or memory_size is False: memory_to_use = virtual_memory().available else: try: size_value = memory_size[:-1] if memory_size[-1].capitalize() == 'K': memory_to_use = 1049 * int(size_value) if memory_size[-1].capitalize() == 'M': memory_to_use = 1048576 * int(size_value) elif memory_size[-1].capitalize() == 'G': memory_to_use = 1073741824 * int(size_value) elif memory_size[-1].capitalize() == 'T': memory_to_use = 1099511627776 * int(size_value) except BaseException: raise ValueError( ' {} is not a valid value. Use for example 100M, 10G or 1T.'.format(memory_size)) # Compute the size needed in memory for each output function and input # block items_size = self.itemsize for i in self._outputs: items_size += i['itemsize'] minimun_memory = items_size * self.x_block_size * self.y_block_size length = min(int(memory_to_use / minimun_memory), self.n_blocks) if length == 0: minimum_memory_kb = np.ceil( np.divide(minimun_memory, 1049)).astype(int) raise MemoryError( 'Not enought memory. For one block, you need at least {}{}'.format( minimum_memory_kb, 'K')) if self.verbose: if length > self.n_blocks: total = self.n_blocks else: total = length print('Batch processing ({} blocks using {}Mo of ram)'.format( int(total), np.ceil(minimun_memory * total / 1048576).astype(int))) self.pb = ProgressBar(self.n_blocks, message=self.message) self._position = 0 for i in range(0, self.n_blocks, length): if i <= self.n_blocks - length: idx_blocks = np.arange(i, i + length) else: idx_blocks = np.arange(i, self.n_blocks) for idx_output, output in enumerate(self._outputs): function = output['function'] kwargs = output['kwargs'] if self.n_jobs > 1: res = Parallel( self.n_jobs)( delayed( self._process_block)( function, kwargs, self.get_block( idx_block, return_with_mask=True), output['n_bands'], output['nodata'], output['np_type'], self.return_3d, idx_block) for idx, idx_block in enumerate(idx_blocks)) if self.verbose: self._position += total / len(self._outputs) self.pb.add_position(self._position) else: res = [] for idx, idx_block in enumerate(idx_blocks): res.append( self._process_block( function, kwargs, self.get_block( idx_block, return_with_mask=True), output['n_bands'], output['nodata'], output['np_type'], self.return_3d, idx_block)) if self.verbose: self._position += 1 / len(self._outputs) self.pb.add_position(self._position) for idx_block, block in enumerate(res): self.write_block(block, idx_blocks[idx_block], idx_output) self._outputs[idx_output]['gdal_object'].FlushCache() # delete output gdal object for fun in self._outputs: if fun['nodata'] is not False: band = fun['gdal_object'].GetRasterBand(1) band.SetNoDataValue(fun['nodata']) band.FlushCache() fun['gdal_object'] = None # no more thing to do if self.verbose: self.pb.add_position(self.n_blocks)
[docs] def write_block(self, block, idx_block, idx_func): """ Write a block at a position on a output image Parameters ---------- idx_block : int. List of indexes of all blocks tab_blocks : numpy tab. List of values or tab that will be written in the output image idx_func : int function's index Returns ------- None. """ coords = self.get_block_coords(idx_block) for ind in range(self._outputs[idx_func]['n_bands']): # write result band per band indGdal = ind + 1 curBand = self._outputs[idx_func]['gdal_object'].GetRasterBand( indGdal) # if int or float value, write it in each pixel of the block if isinstance(block, (float, int)): resToWrite = np.full((coords[3], coords[2]), block) else: # to be sure to have 2 or 3 dim resToWrite = self.reshape_ndim(block)[..., ind] if resToWrite.ndim <= 1: resToWrite = self.reshape_ndim( resToWrite).reshape(coords[3], coords[2]) curBand.WriteArray(resToWrite, coords[0], coords[1])
# rm FlushCache to speed process # curBand.FlushCache()
[docs]def sample_extraction( in_image, in_vector, out_vector, unique_fid=None, band_prefix=None, verbose=1): """ Extract centroid from shapefile according to the raster, and extract band value if band_prefix is given. This script is available via terminal by entering : `mtb_SampleExtraction`. Parameters ---------- in_image : str. A filename or path of a raster file. It could be any file that GDAL can open. in_vector : str. A filename or path corresponding to a vector file. It could be any file that GDAL/OGR can open. out_vector : str. Extension will be used to select driver. Please use one of them : ['gpkg','sqlite','shp','netcdf','gpx']. unique_fid : str, optional (default=None). If None, will add a field called 'uniquefid' in the output vector. band_prefix : str, optional (default=None). If band_prefix (e.g. 'band'), will extract values from raster. """ def _pixel_location_from_centroid(coords, geo_transform): """ Convert XY coords into the centroid of a pixel Parameters -------- coords : arr or list. X is coords[0], Y is coords[1]. geo_transform : list. List got from gdal.Open(inRaster).GetGeoTransform() . """ newX = geo_transform[1] * (coords[0] + 0.5) + \ geo_transform[0] newY = geo_transform[5] * (coords[1] + 0.5) + geo_transform[3] return [newX, newY] if unique_fid is None: unique_fid = 'uniquefid' if verbose: push_feedback("Adding 'uniquefid' field to the original vector.") _add_vector_unique_fid( in_vector, unique_fid, verbose=verbose) if verbose: push_feedback("Extract values from raster...") X, Y, coords = extract_ROI( in_image, in_vector, unique_fid, get_pixel_position=True, verbose=verbose) geo_transform = gdal.Open(in_image).GetGeoTransform() centroid = [_pixel_location_from_centroid( coord, geo_transform) for coord in coords] # init outLayer if np.issubdtype(X.dtype, np.integer): try: dtype = ogr.OFTInteger64 except BaseException: dtype = ogr.OFTInteger else: dtype = ogr.OFTReal outLayer = _create_point_layer( in_vector, out_vector, unique_fid, dtype=dtype, verbose=verbose) if verbose: outLayer._add_total_points(len(centroid)) if verbose: push_feedback("Adding each centroid to {}...".format(out_vector)) for idx, xy in enumerate(centroid): try: curY = Y[idx][0] except BaseException: curY = Y[idx] if curY != 0: if band_prefix is None: outLayer._add_point_to_layer(xy, curY) else: outLayer._add_point_to_layer(xy, curY, X[idx], band_prefix) outLayer.close_layer()
class _create_point_layer: def __init__(self, in_vector, out_vector, unique_id_field, dtype=ogr.OFTInteger, verbose=1): """ Create a vector layer as point type. Parameters ------------ in_vector : str. A filename or path corresponding to a vector file. It could be any file that GDAL/OGR can open. out_vector : str. Outvector. Extension will be used to select driver. Please use one of them : ['gpkg','sqlite','shp','netcdf','gpx']. unique_fid : str, optional (default=None). If None, will add a field called 'uniquefid' in the output vector. dtype : int, optional (default=ogr.OFTInteger) the ogr datatype. verbose : bool or int, optional (default=True). The higher is the int verbose, the more it will returns informations. Methods ---------- _add_total_points(nSamples): int. Will generate progress bar. _add_point_to_layer(coords): list,arr. coords[0] is X, coords[1] is Y. closeLayer(): Close the layer. """ self.verbose = verbose self._dtype = dtype # load inVector self.inData = ogr.Open(in_vector, 0) self.inLyr = self.inData.GetLayerByIndex(0) srs = self.inLyr.GetSpatialRef() # create outVector self.driver_name = get_ogr_driver_from_filename(out_vector) driver = ogr.GetDriverByName(self.driver_name) self.outData = driver.CreateDataSource(out_vector) # finish outVector creation self.outLyr = self.outData.CreateLayer('centroid', srs, ogr.wkbPoint) self.outLyrDefinition = self.outLyr.GetLayerDefn() # initialize variables self.idx = 0 self.lastPosition = 0 self.nSamples = None if self.driver_name == 'SQLITE' or self.driver_name == 'GPKG': self.outLyr.StartTransaction() self.unique_id_field = unique_id_field # Will generate unique_ID_and_FID when copying vector self.unique_ID_and_FID = False self.addBand = False def _add_band_value(self, bandPrefix, nBands): """ Parameters ------- bandPrefix : str. Prefix for each band (E.g. 'band') nBands : int. Number of band to save. """ self.nBandsFields = [] for b in range(nBands): field = bandPrefix + str(b) self.nBandsFields.append(field) self.outLyr.CreateField(ogr.FieldDefn(field, self._dtype)) self.addBand = True def _add_total_points(self, nSamples): """ Adding the total number of points will show a progress bar. Parameters -------- nSamples : int. The number of points to be added (in order to have a progress bar. Will not affect the processing if bad value is put here.) """ self.nSamples = nSamples self.pb = ProgressBar(nSamples, 'Adding points... ') def _add_point_to_layer( self, coords, uniqueIDValue, band_value=None, band_prefix=None): """ Parameters ------- coords : list, or arr. X is coords[0], Y is coords[1] uniqueIDValue : int. Unique ID Value to retrieve the value from fields band_value : None, or arr. If array, should have the same size as the number of bands defined in addBandsValue function. """ if self.verbose: if self.nSamples: currentPosition = int(self.idx + 1) if currentPosition != self.lastPosition: self.pb.add_position(self.idx + 1) self.lastPosition = currentPosition if self.unique_ID_and_FID is False: self._update_arr_according_to_vector() # add Band to list of fields if needed if band_value is not None and self.addBand is False: self._add_band_value(band_prefix, band_value.shape[0]) point = ogr.Geometry(ogr.wkbPoint) point.SetPoint(0, coords[0], coords[1]) featureIndex = self.idx feature = ogr.Feature(self.outLyrDefinition) feature.SetGeometry(point) feature.SetFID(featureIndex) # Retrieve inVector FID FID = self.uniqueFIDs[np.where(np.asarray( self.uniqueIDs, dtype=np.int) == int(uniqueIDValue))[0][0]] featUpdates = self.inLyr.GetFeature(int(FID)) for f in self.fields: if f != 'ogc_fid': feature.SetField(f, featUpdates.GetField(f)) if self.addBand is True: for idx, f in enumerate(self.nBandsFields): feature.SetField(f, int(band_value[idx])) self.outLyr.CreateFeature(feature) self.idx += 1 def _update_arr_according_to_vector(self): """ Update outVector layer by adding field from inVector. Store ID and FIDs to find the same value. """ self.uniqueIDs = [] self.uniqueFIDs = [] currentFeature = self.inLyr.GetNextFeature() self.fields = [ currentFeature.GetFieldDefnRef(i).GetName() for i in range( currentFeature.GetFieldCount())] # Add input Layer Fields to the output Layer layerDefinition = self.inLyr.GetLayerDefn() for i in range(len(self.fields)): fieldDefn = layerDefinition.GetFieldDefn(i) self.outLyr.CreateField(fieldDefn) self.inLyr.ResetReading() for feat in self.inLyr: uID = feat.GetField(self.unique_id_field) uFID = feat.GetFID() self.uniqueIDs.append(uID) self.uniqueFIDs.append(uFID) self.unique_ID_and_FID = True def close_layer(self): """ Once work is done, close all layers. """ if self.driver_name == 'SQLITE' or self.driver_name == 'GPKG': self.outLyr.CommitTransaction() self.inData.Destroy() self.outData.Destroy()
[docs]def get_distance_matrix(in_image, in_vector, field=False, verbose=False): """ Return for each pixel, the distance one-to-one to the other pixels listed in the vector. Parameters ---------- in_image : str. Path of the raster file where the vector file will be rasterize. in_vector : str. Path of the vector file to rasterize. field : str or False, optional (default=False). Name of the vector field to extract the value (must be float or integer). Returns -------- distance_matrix : array of shape (nSamples,nSamples) label : array of shape (nSamples) """ if field is not False: only_pixel_position = False else: only_pixel_position = True coords = extract_ROI( in_image, in_vector, field, get_pixel_position=True, only_pixel_position=only_pixel_position, verbose=verbose) from scipy.spatial import distance if field: label = coords[1] coords = coords[2] distance_matrix = np.asarray(distance.cdist( coords, coords, 'euclidean'), dtype=np.uint64) if field: return distance_matrix, label else: return distance_matrix
[docs]def get_ogr_driver_from_filename(fileName): """ Return driver name used in OGR accoriding to the extension of the vector. Parameters ---------- fileName : str. Path of the vector with extension. Returns ------- driverName : str 'SQLITE', 'GPKG', 'ESRI Shapefile'... Examples -------- >>> mtb.processing.get_ogr_driver_from_filename('goVegan.gpkg') 'GPKG' >>> mtb.processing.get_ogr_driver_from_filename('stopEatingAnimals.shp') 'ESRI Shapefile' """ extensions = ['sqlite', 'shp', 'netcdf', 'gpx', 'gpkg'] driversName = ['SQLITE', 'ESRI Shapefile', 'netCDF', 'GPX', 'GPKG'] fileName, ext = os.path.splitext(fileName) if ext[1:] not in extensions: msg = 'Your extension {} is not recognized as a valid extension for saving shape.\n'.format( ext) msg = msg + 'Supported extensions are ' + str(driversName) + '\n' msg = msg + 'We recommend you to use \'sqlite\' extension.' raise Warning(msg) else: driverIdx = [x for x, i in enumerate(extensions) if i == ext[1:]][0] driverName = driversName[driverIdx] return driverName
[docs]def read_vector_values(vector, *args, **kwargs): """ Read values from vector. Will list all fields beginning with the roiprefix 'band-' for example. Parameters ---------- vector : str Vector path ('myFolder/class.shp',str). *args : str Field name containing the field to extract values from (i.e. 'class', str). **kwargs : arg - band_prefix = 'band-' which is the common suffix listing the spectral values (i.e. band_prefix = 'band-'). - get_features = True, will return features in one list AND spatial Reference. Returns ------- List values, same length as number of parameters. If band_prefix as parameters, will return one array with n dimension. See also --------- museotoolbox.processing.extract_ROI: extract raster values from vector file. Examples --------- >>> from museotoolbox.datasets import load_historical_data >>> _,vector=load_historical_data() >>> Y = read_vector_values(vector,'Class') array([1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 4, 5, 4, 5, 3, 3, 3], dtype=int32) >>> Y,fid = read_vector_values(vector,'Class','uniquefid') (array([1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 4, 5, 4, 5, 3, 3, 3], dtype=int32), array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], dtype=int32)) """ try: file = ogr.Open(vector) lyr = file.GetLayer() except BaseException: raise Exception("Can't open {} file".format(vector)) # get all fields and save only roiFields ldefn = lyr.GetLayerDefn() listFields = [] # add kwargs extractBands = False get_features = False if kwargs: # check if need to extract bands from vector if 'band_prefix' in kwargs.keys(): extractBands = True band_prefix = kwargs['band_prefix'] # check if need to extract features from vector if 'get_features' in kwargs.keys(): get_features = kwargs['get_features'] if extractBands: bandsFields = [] # if get_features, save Spatial Reference and Features if get_features: srs = lyr.GetSpatialRef() features = [] # List available fields for n in range(ldefn.GetFieldCount()): fdefn = ldefn.GetFieldDefn(n) if fdefn.name is not listFields: listFields.append(fdefn.name) if extractBands: if fdefn.name.startswith(band_prefix): bandsFields.append(fdefn.name) if len(kwargs) == 0 and len(args) == 0: raise ValueError('These fields are available : {}'.format(listFields)) else: if extractBands and len(bandsFields) == 0: raise ValueError( 'Band prefix field "{}" do not exists. These fields are available : {}'.format( band_prefix, listFields)) # Initialize empty arrays if len(args) > 0: # for single fields ROIlevels = [np.zeros(lyr.GetFeatureCount()) for i in args] if extractBands: # for band_prefix ROIvalues = np.zeros( [lyr.GetFeatureCount(), len(bandsFields)], dtype=np.int32) # Listing each feature and store to array for i, feature in enumerate(lyr): if extractBands: for j, band in enumerate(bandsFields): feat = feature.GetField(band) if i == 0: ROIvalues.astype(type(feat)) ROIvalues[i, j] = feat if len(args) > 0: try: for a in range(len(args)): feat = feature.GetField(args[a]) if i == 0: ROIlevels[a] = ROIlevels[a].astype(type(feat)) ROIlevels[a][i] = feature.GetField(args[a]) except BaseException: raise ValueError( "Field \"{}\" do not exists. These fields are available : {}".format( args[a], listFields)) if get_features: features.append(feature) # Initialize return fieldsToReturn = [] # if bandPrefix if extractBands: fieldsToReturn.append(ROIvalues) # if single fields if len(args) > 0: for i in range(len(args)): fieldsToReturn.append(ROIlevels[i]) # if features if get_features: fieldsToReturn.append(features) fieldsToReturn.append(srs) # if 1d, to turn single array if len(fieldsToReturn) == 1: fieldsToReturn = fieldsToReturn[0] return fieldsToReturn
def _add_vector_unique_fid(in_vector, unique_field='uniquefid', verbose=True): """ Add a field in the vector with an unique value for each of the feature. Parameters ----------- inVector : str Path of the vector file. uniqueField : str Name of the field to create verbose : bool or int, default True. Returns -------- None Examples --------- >>> _add_vector_unique_fid('myDB.gpkg',uniqueField='polygonid') Adding polygonid [########################################]100% """ if verbose: pB = ProgressBar(100, message='Adding ' + unique_field) driver_name = get_ogr_driver_from_filename(in_vector) inDriver = ogr.GetDriverByName(driver_name) inSrc = inDriver.Open(in_vector, 1) # 1 for writable inLyr = inSrc.GetLayer() # get the layer for this datasource inLyrDefn = inLyr.GetLayerDefn() if driver_name == 'SQLITE' or driver_name == 'GPKG': inLyr.StartTransaction() listFields = [] for n in range(inLyrDefn.GetFieldCount()): fdefn = inLyrDefn.GetFieldDefn(n) if fdefn.name is not listFields: listFields.append(fdefn.name) if unique_field in listFields: if verbose > 0: print( 'Field \'{}\' is already in {}'.format( unique_field, in_vector)) inSrc.Destroy() else: newField = ogr.FieldDefn(unique_field, ogr.OFTInteger) newField.SetWidth(20) inLyr.CreateField(newField) FIDs = [feat.GetFID() for feat in inLyr] ThisID = 1 for idx, FID in enumerate(FIDs): if verbose: pB.add_position(idx / len(FIDs) + 1 * 100) feat = inLyr.GetFeature(FID) #ThisID = int(feat.GetFGetFeature(feat)) # Write the FID to the ID field feat.SetField(unique_field, int(ThisID)) inLyr.SetFeature(feat) # update the feature # inLyr.CreateFeature(feat) ThisID += 1 if driver_name == 'SQLITE' or driver_name == 'GPKG': inLyr.CommitTransaction() inSrc.Destroy() def _reshape_ndim(X): """ Reshape ndim of X to have at least 2 dimensions Parameters ----------- X : np.ndarray array. Returns -------- X : np.ndarray Returns array with a least 2 dimensions. Examples --------- >>> X = np.arange(5,50) >>> X.shape (45,) >>> _reshape_ndim(X).shape (45, 1) """ if X.ndim == 1: X = X.reshape(-1, 1) return X