/****************************************************************************** * * Project: High Performance Image Reprojector * Purpose: Implementation of the GDALWarpKernel class. Implements the actual * image warping for a "chunk" of input and output imagery already * loaded into memory. * Author: Frank Warmerdam, warmerdam@pobox.com * ****************************************************************************** * Copyright (c) 2003, Frank Warmerdam * Copyright (c) 2008-2013, Even Rouault * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. ****************************************************************************/ #include "cpl_port.h" #include "gdalwarper.h" #include #include #include #include #include #include #include #include #include #include #include #include "cpl_atomic_ops.h" #include "cpl_conv.h" #include "cpl_error.h" #include "cpl_mask.h" #include "cpl_multiproc.h" #include "cpl_progress.h" #include "cpl_string.h" #include "cpl_vsi.h" #include "cpl_worker_thread_pool.h" #include "cpl_quad_tree.h" #include "gdal.h" #include "gdal_alg.h" #include "gdal_alg_priv.h" #include "gdal_thread_pool.h" #include "gdalwarpkernel_opencl.h" // #define CHECK_SUM_WITH_GEOS #ifdef CHECK_SUM_WITH_GEOS #include "ogr_geometry.h" #include "ogr_geos.h" #endif // We restrict to 64bit processors because they are guaranteed to have SSE2. // Could possibly be used too on 32bit, but we would need to check at runtime. #if defined(__x86_64) || defined(_M_X64) #include "gdalsse_priv.h" #if __SSE4_1__ #include #endif #if __SSE3__ #include #endif #endif CPL_CVSID("$Id$") constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001; constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f; // #define INSTANTIATE_FLOAT64_SSE2_IMPL static const int anGWKFilterRadius[] = { 0, // Nearest neighbour 1, // Bilinear 2, // Cubic Convolution (Catmull-Rom) 2, // Cubic B-Spline 3, // Lanczos windowed sinc 0, // Average 0, // Mode 0, // Reserved GRA_Gauss=7 0, // Max 0, // Min 0, // Med 0, // Q1 0, // Q3 0, // Sum 0, // RMS }; static double GWKBilinear(double dfX); static double GWKCubic(double dfX); static double GWKBSpline(double dfX); static double GWKLanczosSinc(double dfX); static const FilterFuncType apfGWKFilter[] = { nullptr, // Nearest neighbour GWKBilinear, // Bilinear GWKCubic, // Cubic Convolution (Catmull-Rom) GWKBSpline, // Cubic B-Spline GWKLanczosSinc, // Lanczos windowed sinc nullptr, // Average nullptr, // Mode nullptr, // Reserved GRA_Gauss=7 nullptr, // Max nullptr, // Min nullptr, // Med nullptr, // Q1 nullptr, // Q3 nullptr, // Sum nullptr, // RMS }; // TODO(schwehr): Can we make these functions have a const * const arg? static double GWKBilinear4Values(double *padfVals); static double GWKCubic4Values(double *padfVals); static double GWKBSpline4Values(double *padfVals); static double GWKLanczosSinc4Values(double *padfVals); static const FilterFunc4ValuesType apfGWKFilter4Values[] = { nullptr, // Nearest neighbour GWKBilinear4Values, // Bilinear GWKCubic4Values, // Cubic Convolution (Catmull-Rom) GWKBSpline4Values, // Cubic B-Spline GWKLanczosSinc4Values, // Lanczos windowed sinc nullptr, // Average nullptr, // Mode nullptr, // Reserved GRA_Gauss=7 nullptr, // Max nullptr, // Min nullptr, // Med nullptr, // Q1 nullptr, // Q3 nullptr, // Sum nullptr, // RMS }; int GWKGetFilterRadius(GDALResampleAlg eResampleAlg) { static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1, "Bad size of anGWKFilterRadius"); return anGWKFilterRadius[eResampleAlg]; } FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg) { static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1, "Bad size of apfGWKFilter"); return apfGWKFilter[eResampleAlg]; } FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg) { static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1, "Bad size of apfGWKFilter4Values"); return apfGWKFilter4Values[eResampleAlg]; } #ifdef HAVE_OPENCL static CPLErr GWKOpenCLCase(GDALWarpKernel *); #endif static CPLErr GWKGeneralCase(GDALWarpKernel *); static CPLErr GWKRealCase(GDALWarpKernel *poWK); static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK); static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK); static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK); static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK); #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK); #endif static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK); static CPLErr GWKNearestByte(GDALWarpKernel *poWK); static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK); static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK); static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK); #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK); #endif static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK); static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK); static CPLErr GWKNearestShort(GDALWarpKernel *poWK); static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK); static CPLErr GWKNearestFloat(GDALWarpKernel *poWK); static CPLErr GWKAverageOrMode(GDALWarpKernel *); static CPLErr GWKSumPreserving(GDALWarpKernel *); static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *); static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *); static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *); /************************************************************************/ /* GWKJobStruct */ /************************************************************************/ struct GWKJobStruct { std::mutex &mutex; std::condition_variable &cv; int &counter; bool &stopFlag; GDALWarpKernel *poWK; int iYMin; int iYMax; int (*pfnProgress)(GWKJobStruct *psJob); void *pTransformerArg; void (*pfnFunc)( void *); // used by GWKRun() to assign the proper pTransformerArg GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_, int &counter_, bool &stopFlag_) : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_), poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr), pTransformerArg(nullptr), pfnFunc(nullptr) { } }; struct GWKThreadData { std::unique_ptr poJobQueue{}; std::unique_ptr> threadJobs{}; int nMaxThreads{0}; int counter{0}; bool stopFlag{false}; std::mutex mutex{}; std::condition_variable cv{}; bool bTransformerArgInputAssignedToThread{false}; void *pTransformerArgInput{ nullptr}; // owned by calling layer. Not to be destroyed std::map mapThreadToTransformerArg{}; int nTotalThreadCountForThisRun = 0; int nCurThreadCountForThisRun = 0; }; /************************************************************************/ /* GWKProgressThread() */ /************************************************************************/ // Return TRUE if the computation must be interrupted. static int GWKProgressThread(GWKJobStruct *psJob) { bool stop = false; { std::lock_guard lock(psJob->mutex); psJob->counter++; stop = psJob->stopFlag; } psJob->cv.notify_one(); return stop; } /************************************************************************/ /* GWKProgressMonoThread() */ /************************************************************************/ // Return TRUE if the computation must be interrupted. static int GWKProgressMonoThread(GWKJobStruct *psJob) { GDALWarpKernel *poWK = psJob->poWK; if (!poWK->pfnProgress( poWK->dfProgressBase + poWK->dfProgressScale * (++psJob->counter / static_cast(psJob->iYMax)), "", poWK->pProgress)) { CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); psJob->stopFlag = true; return TRUE; } return FALSE; } /************************************************************************/ /* GWKGenericMonoThread() */ /************************************************************************/ static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK, void (*pfnFunc)(void *pUserData)) { GWKThreadData td; // NOTE: the mutex is not used. GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag); job.poWK = poWK; job.iYMin = 0; job.iYMax = poWK->nDstYSize; job.pfnProgress = GWKProgressMonoThread; job.pTransformerArg = poWK->pTransformerArg; pfnFunc(&job); return td.stopFlag ? CE_Failure : CE_None; } /************************************************************************/ /* GWKThreadsCreate() */ /************************************************************************/ void *GWKThreadsCreate(char **papszWarpOptions, GDALTransformerFunc /* pfnTransformer */, void *pTransformerArg) { const char *pszWarpThreads = CSLFetchNameValue(papszWarpOptions, "NUM_THREADS"); if (pszWarpThreads == nullptr) pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1"); int nThreads = 0; if (EQUAL(pszWarpThreads, "ALL_CPUS")) nThreads = CPLGetNumCPUs(); else nThreads = atoi(pszWarpThreads); if (nThreads <= 1) nThreads = 0; if (nThreads > 128) nThreads = 128; GWKThreadData *psThreadData = new GWKThreadData(); auto poThreadPool = nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr; if (nThreads && poThreadPool) { psThreadData->nMaxThreads = nThreads; psThreadData->threadJobs.reset(new std::vector( nThreads, GWKJobStruct(psThreadData->mutex, psThreadData->cv, psThreadData->counter, psThreadData->stopFlag))); psThreadData->poJobQueue = poThreadPool->CreateJobQueue(); psThreadData->pTransformerArgInput = pTransformerArg; } return psThreadData; } /************************************************************************/ /* GWKThreadsEnd() */ /************************************************************************/ void GWKThreadsEnd(void *psThreadDataIn) { if (psThreadDataIn == nullptr) return; GWKThreadData *psThreadData = static_cast(psThreadDataIn); if (psThreadData->poJobQueue) { for (auto &pair : psThreadData->mapThreadToTransformerArg) { CPLAssert(pair.second != psThreadData->pTransformerArgInput); GDALDestroyTransformer(pair.second); } psThreadData->poJobQueue.reset(); } delete psThreadData; } /************************************************************************/ /* ThreadFuncAdapter() */ /************************************************************************/ static void ThreadFuncAdapter(void *pData) { GWKJobStruct *psJob = static_cast(pData); GWKThreadData *psThreadData = static_cast(psJob->poWK->psThreadData); // Look if we have already a per-thread transformer void *pTransformerArg = nullptr; const GIntBig nThreadId = CPLGetPID(); { std::lock_guard lock(psThreadData->mutex); ++psThreadData->nCurThreadCountForThisRun; auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId); if (oIter != psThreadData->mapThreadToTransformerArg.end()) { pTransformerArg = oIter->second; } else if (!psThreadData->bTransformerArgInputAssignedToThread && psThreadData->nCurThreadCountForThisRun == psThreadData->nTotalThreadCountForThisRun) { // If we are the last thread to be started, temporarily borrow the // original transformer psThreadData->bTransformerArgInputAssignedToThread = true; pTransformerArg = psThreadData->pTransformerArgInput; psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg; } if (pTransformerArg == nullptr) { CPLAssert(psThreadData->pTransformerArgInput != nullptr); CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread); } } // If no transformer assigned to current thread, instantiate one if (pTransformerArg == nullptr) { // This somehow assumes that GDALCloneTransformer() is thread-safe // which should normally be the case. pTransformerArg = GDALCloneTransformer(psThreadData->pTransformerArgInput); // Lock for the stop flag and the transformer map. std::lock_guard lock(psThreadData->mutex); if (!pTransformerArg) { psJob->stopFlag = true; return; } psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg; } psJob->pTransformerArg = pTransformerArg; psJob->pfnFunc(pData); // Give back original transformer, if borrowed. { std::lock_guard lock(psThreadData->mutex); if (psThreadData->bTransformerArgInputAssignedToThread && pTransformerArg == psThreadData->pTransformerArgInput) { psThreadData->mapThreadToTransformerArg.erase( psThreadData->mapThreadToTransformerArg.find(nThreadId)); psThreadData->bTransformerArgInputAssignedToThread = false; } } } /************************************************************************/ /* GWKRun() */ /************************************************************************/ static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName, void (*pfnFunc)(void *pUserData)) { const int nDstYSize = poWK->nDstYSize; CPLDebug("GDAL", "GDALWarpKernel()::%s() " "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d", pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize, poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize, poWK->nDstYSize); if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress)) { CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); return CE_Failure; } GWKThreadData *psThreadData = static_cast(poWK->psThreadData); if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr) { return GWKGenericMonoThread(poWK, pfnFunc); } int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2); // Config option mostly useful for tests to be able to test multithreading // with small rasters const int nWarpChunkSize = atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536")); if (nWarpChunkSize > 0) { GIntBig nChunks = static_cast(nDstYSize) * poWK->nDstXSize / nWarpChunkSize; if (nThreads > nChunks) nThreads = static_cast(nChunks); } if (nThreads <= 0) nThreads = 1; CPLDebug("WARP", "Using %d threads", nThreads); auto &jobs = *psThreadData->threadJobs; CPLAssert(static_cast(jobs.size()) >= nThreads); // Fill-in job structures. for (int i = 0; i < nThreads; ++i) { auto &job = jobs[i]; job.poWK = poWK; job.iYMin = static_cast(static_cast(i) * nDstYSize / nThreads); job.iYMax = static_cast(static_cast(i + 1) * nDstYSize / nThreads); if (poWK->pfnProgress != GDALDummyProgress) job.pfnProgress = GWKProgressThread; job.pfnFunc = pfnFunc; } { std::unique_lock lock(psThreadData->mutex); psThreadData->nTotalThreadCountForThisRun = nThreads; // coverity[missing_lock] psThreadData->nCurThreadCountForThisRun = 0; // Start jobs. for (int i = 0; i < nThreads; ++i) { auto &job = jobs[i]; psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter, static_cast(&job)); } /* -------------------------------------------------------------------- */ /* Report progress. */ /* -------------------------------------------------------------------- */ if (poWK->pfnProgress != GDALDummyProgress) { int &counter = psThreadData->counter; while (counter < nDstYSize) { psThreadData->cv.wait(lock); if (!poWK->pfnProgress( poWK->dfProgressBase + poWK->dfProgressScale * (counter / static_cast(nDstYSize)), "", poWK->pProgress)) { CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); psThreadData->stopFlag = true; break; } } } } /* -------------------------------------------------------------------- */ /* Wait for all jobs to complete. */ /* -------------------------------------------------------------------- */ psThreadData->poJobQueue->WaitCompletion(); return psThreadData->stopFlag ? CE_Failure : CE_None; } /************************************************************************/ /* ==================================================================== */ /* GDALWarpKernel */ /* ==================================================================== */ /************************************************************************/ /** * \class GDALWarpKernel "gdalwarper.h" * * Low level image warping class. * * This class is responsible for low level image warping for one * "chunk" of imagery. The class is essentially a structure with all * data members public - primarily so that new special-case functions * can be added without changing the class declaration. * * Applications are normally intended to interactive with warping facilities * through the GDALWarpOperation class, though the GDALWarpKernel can in * theory be used directly if great care is taken in setting up the * control data. * *

Design Issues

* * The intention is that PerformWarp() would analyze the setup in terms * of the datatype, resampling type, and validity/density mask usage and * pick one of many specific implementations of the warping algorithm over * a continuum of optimization vs. generality. At one end there will be a * reference general purpose implementation of the algorithm that supports * any data type (working internally in double precision complex), all three * resampling types, and any or all of the validity/density masks. At the * other end would be highly optimized algorithms for common cases like * nearest neighbour resampling on GDT_Byte data with no masks. * * The full set of optimized versions have not been decided but we should * expect to have at least: * - One for each resampling algorithm for 8bit data with no masks. * - One for each resampling algorithm for float data with no masks. * - One for each resampling algorithm for float data with any/all masks * (essentially the generic case for just float data). * - One for each resampling algorithm for 8bit data with support for * input validity masks (per band or per pixel). This handles the common * case of nodata masking. * - One for each resampling algorithm for float data with support for * input validity masks (per band or per pixel). This handles the common * case of nodata masking. * * Some of the specializations would operate on all bands in one pass * (especially the ones without masking would do this), while others might * process each band individually to reduce code complexity. * *

Masking Semantics

* * A detailed explanation of the semantics of the validity and density masks, * and their effects on resampling kernels is needed here. */ /************************************************************************/ /* GDALWarpKernel Data Members */ /************************************************************************/ /** * \var GDALResampleAlg GDALWarpKernel::eResample; * * Resampling algorithm. * * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear, * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS, * GRA_Mode or GRA_Sum. * * This field is required. GDT_NearestNeighbour may be used as a default * value. */ /** * \var GDALDataType GDALWarpKernel::eWorkingDataType; * * Working pixel data type. * * The datatype of pixels in the source image (papabySrcimage) and * destination image (papabyDstImage) buffers. Note that operations on * some data types (such as GDT_Byte) may be much better optimized than other * less common cases. * * This field is required. It may not be GDT_Unknown. */ /** * \var int GDALWarpKernel::nBands; * * Number of bands. * * The number of bands (layers) of imagery being warped. Determines the * number of entries in the papabySrcImage, papanBandSrcValid, * and papabyDstImage arrays. * * This field is required. */ /** * \var int GDALWarpKernel::nSrcXSize; * * Source image width in pixels. * * This field is required. */ /** * \var int GDALWarpKernel::nSrcYSize; * * Source image height in pixels. * * This field is required. */ /** * \var double GDALWarpKernel::dfSrcXExtraSize; * * Number of pixels included in nSrcXSize that are present on the edges of * the area of interest to take into account the width of the kernel. * * This field is required. */ /** * \var double GDALWarpKernel::dfSrcYExtraSize; * * Number of pixels included in nSrcYExtraSize that are present on the edges of * the area of interest to take into account the height of the kernel. * * This field is required. */ /** * \var int GDALWarpKernel::papabySrcImage; * * Array of source image band data. * * This is an array of pointers (of size GDALWarpKernel::nBands) pointers * to image data. Each individual band of image data is organized as a single * block of image data in left to right, then bottom to top order. The actual * type of the image data is determined by GDALWarpKernel::eWorkingDataType. * * To access the pixel value for the (x=3, y=4) pixel (zero based) of * the second band with eWorkingDataType set to GDT_Float32 use code like * this: * * \code * float dfPixelValue; * int nBand = 2-1; // Band indexes are zero based. * int nPixel = 3; // Zero based. * int nLine = 4; // Zero based. * * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize ); * assert( nLine >= 0 && nLine < poKern->nSrcYSize ); * assert( nBand >= 0 && nBand < poKern->nBands ); * dfPixelValue = ((float *) poKern->papabySrcImage[nBand]) * [nPixel + nLine * poKern->nSrcXSize]; * \endcode * * This field is required. */ /** * \var GUInt32 **GDALWarpKernel::papanBandSrcValid; * * Per band validity mask for source pixels. * * Array of pixel validity mask layers for each source band. Each of * the mask layers is the same size (in pixels) as the source image with * one bit per pixel. Note that it is legal (and common) for this to be * NULL indicating that none of the pixels are invalidated, or for some * band validity masks to be NULL in which case all pixels of the band are * valid. The following code can be used to test the validity of a particular * pixel. * * \code * int bIsValid = TRUE; * int nBand = 2-1; // Band indexes are zero based. * int nPixel = 3; // Zero based. * int nLine = 4; // Zero based. * * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize ); * assert( nLine >= 0 && nLine < poKern->nSrcYSize ); * assert( nBand >= 0 && nBand < poKern->nBands ); * * if( poKern->papanBandSrcValid != NULL * && poKern->papanBandSrcValid[nBand] != NULL ) * { * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand]; * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize; * * bIsValid = CPLMaskGet(panBandMask, iPixelOffset) * } * \endcode */ /** * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid; * * Per pixel validity mask for source pixels. * * A single validity mask layer that applies to the pixels of all source * bands. It is accessed similarly to papanBandSrcValid, but without the * extra level of band indirection. * * This pointer may be NULL indicating that all pixels are valid. * * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available, * the pixel isn't considered to be valid unless both arrays indicate it is * valid. */ /** * \var float *GDALWarpKernel::pafUnifiedSrcDensity; * * Per pixel density mask for source pixels. * * A single density mask layer that applies to the pixels of all source * bands. It contains values between 0.0 and 1.0 indicating the degree to * which this pixel should be allowed to contribute to the output result. * * This pointer may be NULL indicating that all pixels have a density of 1.0. * * The density for a pixel may be accessed like this: * * \code * float fDensity = 1.0; * int nPixel = 3; // Zero based. * int nLine = 4; // Zero based. * * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize ); * assert( nLine >= 0 && nLine < poKern->nSrcYSize ); * if( poKern->pafUnifiedSrcDensity != NULL ) * fDensity = poKern->pafUnifiedSrcDensity * [nPixel + nLine * poKern->nSrcXSize]; * \endcode */ /** * \var int GDALWarpKernel::nDstXSize; * * Width of destination image in pixels. * * This field is required. */ /** * \var int GDALWarpKernel::nDstYSize; * * Height of destination image in pixels. * * This field is required. */ /** * \var GByte **GDALWarpKernel::papabyDstImage; * * Array of destination image band data. * * This is an array of pointers (of size GDALWarpKernel::nBands) pointers * to image data. Each individual band of image data is organized as a single * block of image data in left to right, then bottom to top order. The actual * type of the image data is determined by GDALWarpKernel::eWorkingDataType. * * To access the pixel value for the (x=3, y=4) pixel (zero based) of * the second band with eWorkingDataType set to GDT_Float32 use code like * this: * * \code * float dfPixelValue; * int nBand = 2-1; // Band indexes are zero based. * int nPixel = 3; // Zero based. * int nLine = 4; // Zero based. * * assert( nPixel >= 0 && nPixel < poKern->nDstXSize ); * assert( nLine >= 0 && nLine < poKern->nDstYSize ); * assert( nBand >= 0 && nBand < poKern->nBands ); * dfPixelValue = ((float *) poKern->papabyDstImage[nBand]) * [nPixel + nLine * poKern->nSrcYSize]; * \endcode * * This field is required. */ /** * \var GUInt32 *GDALWarpKernel::panDstValid; * * Per pixel validity mask for destination pixels. * * A single validity mask layer that applies to the pixels of all destination * bands. It is accessed similarly to papanUnitifiedSrcValid, but based * on the size of the destination image. * * This pointer may be NULL indicating that all pixels are valid. */ /** * \var float *GDALWarpKernel::pafDstDensity; * * Per pixel density mask for destination pixels. * * A single density mask layer that applies to the pixels of all destination * bands. It contains values between 0.0 and 1.0. * * This pointer may be NULL indicating that all pixels have a density of 1.0. * * The density for a pixel may be accessed like this: * * \code * float fDensity = 1.0; * int nPixel = 3; // Zero based. * int nLine = 4; // Zero based. * * assert( nPixel >= 0 && nPixel < poKern->nDstXSize ); * assert( nLine >= 0 && nLine < poKern->nDstYSize ); * if( poKern->pafDstDensity != NULL ) * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize]; * \endcode */ /** * \var int GDALWarpKernel::nSrcXOff; * * X offset to source pixel coordinates for transformation. * * See pfnTransformer. * * This field is required. */ /** * \var int GDALWarpKernel::nSrcYOff; * * Y offset to source pixel coordinates for transformation. * * See pfnTransformer. * * This field is required. */ /** * \var int GDALWarpKernel::nDstXOff; * * X offset to destination pixel coordinates for transformation. * * See pfnTransformer. * * This field is required. */ /** * \var int GDALWarpKernel::nDstYOff; * * Y offset to destination pixel coordinates for transformation. * * See pfnTransformer. * * This field is required. */ /** * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer; * * Source/destination location transformer. * * The function to call to transform coordinates between source image * pixel/line coordinates and destination image pixel/line coordinates. * See GDALTransformerFunc() for details of the semantics of this function. * * The GDALWarpKern algorithm will only ever use this transformer in * "destination to source" mode (bDstToSrc=TRUE), and will always pass * partial or complete scanlines of points in the destination image as * input. This means, among other things, that it is safe to the * approximating transform GDALApproxTransform() as the transformation * function. * * Source and destination images may be subsets of a larger overall image. * The transformation algorithms will expect and return pixel/line coordinates * in terms of this larger image, so coordinates need to be offset by * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before * passing to pfnTransformer, and after return from it. * * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback * data to this function when it is called. * * This field is required. */ /** * \var void *GDALWarpKernel::pTransformerArg; * * Callback data for pfnTransformer. * * This field may be NULL if not required for the pfnTransformer being used. */ /** * \var GDALProgressFunc GDALWarpKernel::pfnProgress; * * The function to call to report progress of the algorithm, and to check * for a requested termination of the operation. It operates according to * GDALProgressFunc() semantics. * * Generally speaking the progress function will be invoked for each * scanline of the destination buffer that has been processed. * * This field may be NULL (internally set to GDALDummyProgress()). */ /** * \var void *GDALWarpKernel::pProgress; * * Callback data for pfnProgress. * * This field may be NULL if not required for the pfnProgress being used. */ /************************************************************************/ /* GDALWarpKernel() */ /************************************************************************/ GDALWarpKernel::GDALWarpKernel() : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour), eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0), dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr), papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr), pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0), papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr), dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0), nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0), nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr), pTransformerArg(nullptr), pfnProgress(GDALDummyProgress), pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0), padfDstNoDataReal(nullptr), psThreadData(nullptr) { } /************************************************************************/ /* ~GDALWarpKernel() */ /************************************************************************/ GDALWarpKernel::~GDALWarpKernel() { } /************************************************************************/ /* PerformWarp() */ /************************************************************************/ /** * \fn CPLErr GDALWarpKernel::PerformWarp(); * * This method performs the warp described in the GDALWarpKernel. * * @return CE_None on success or CE_Failure if an error occurs. */ CPLErr GDALWarpKernel::PerformWarp() { const CPLErr eErr = Validate(); if (eErr != CE_None) return eErr; // See #2445 and #3079. if (nSrcXSize <= 0 || nSrcYSize <= 0) { if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress)) { CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); return CE_Failure; } return CE_None; } /* -------------------------------------------------------------------- */ /* Pre-calculate resampling scales and window sizes for filtering. */ /* -------------------------------------------------------------------- */ dfXScale = static_cast(nDstXSize) / (nSrcXSize - dfSrcXExtraSize); dfYScale = static_cast(nDstYSize) / (nSrcYSize - dfSrcYExtraSize); if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize) dfXScale = 1.0; if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize) dfYScale = 1.0; if (dfXScale < 1.0) { double dfXReciprocalScale = 1.0 / dfXScale; const int nXReciprocalScale = static_cast(dfXReciprocalScale + 0.5); if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05) dfXScale = 1.0 / nXReciprocalScale; } if (dfYScale < 1.0) { double dfYReciprocalScale = 1.0 / dfYScale; const int nYReciprocalScale = static_cast(dfYReciprocalScale + 0.5); if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05) dfYScale = 1.0 / nYReciprocalScale; } // XSCALE and YSCALE undocumented for now. Can help in some cases. // Best would probably be a per-pixel scale computation. const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE"); if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING")) dfXScale = CPLAtof(pszXScale); const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE"); if (pszYScale != nullptr) dfYScale = CPLAtof(pszYScale); // If the xscale is significantly lower than the yscale, this is highly // suspicious of a situation of wrapping a very large virtual file in // geographic coordinates with left and right parts being close to the // antimeridian. In that situation, the xscale computed by the above method // is completely wrong. Prefer doing an average of a few sample points // instead if ((dfYScale / dfXScale > 100 || (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING")))) { // Sample points along a grid const int nPointsX = std::min(10, nDstXSize); const int nPointsY = std::min(10, nDstYSize); const int nPoints = 3 * nPointsX * nPointsY; std::vector padfX; std::vector padfY; std::vector padfZ(nPoints); std::vector pabSuccess(nPoints); for (int iY = 0; iY < nPointsY; iY++) { for (int iX = 0; iX < nPointsX; iX++) { const double dfX = nPointsX == 1 ? 0.0 : static_cast(iX) * nDstXSize / (nPointsX - 1); const double dfY = nPointsY == 1 ? 0.0 : static_cast(iY) * nDstYSize / (nPointsY - 1); // Reproject each destination sample point and its neighbours // at (x+1,y) and (x,y+1), so as to get the local scale. padfX.push_back(dfX); padfY.push_back(dfY); padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1); padfY.push_back(dfY); padfX.push_back(dfX); padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1); } } pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0], &padfZ[0], &pabSuccess[0]); // Compute the xscale at each sampling point std::vector adfXScales; for (int i = 0; i < nPoints; i += 3) { if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2]) { const double dfPointXScale = 1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]), std::abs(padfX[i + 2] - padfX[i])); adfXScales.push_back(dfPointXScale); } } // Sort by increasing xcale std::sort(adfXScales.begin(), adfXScales.end()); if (!adfXScales.empty()) { // Compute the average of scales, but eliminate outliers small // scales, if some samples are just along the discontinuity. const double dfMaxPointXScale = adfXScales.back(); double dfSumPointXScale = 0; int nCountPointScale = 0; for (double dfPointXScale : adfXScales) { if (dfPointXScale > dfMaxPointXScale / 10) { dfSumPointXScale += dfPointXScale; nCountPointScale++; } } if (nCountPointScale > 0) // should always be true { const double dfXScaleFromSampling = dfSumPointXScale / nCountPointScale; #if DEBUG_VERBOSE CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale, dfXScaleFromSampling); #endif dfXScale = dfXScaleFromSampling; } } } #if DEBUG_VERBOSE CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale); #endif const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95; // Safety check for callers that would use GDALWarpKernel without using // GDALWarpOperation. if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos || ((eResample == GRA_Cubic || eResample == GRA_Bilinear) && !bUse4SamplesFormula)) && atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) != WARP_EXTRA_ELTS) { CPLError(CE_Failure, CPLE_AppDefined, "Source arrays must have WARP_EXTRA_ELTS extra elements at " "their end. " "See GDALWarpKernel class definition. If this condition is " "fulfilled, define a EXTRA_ELTS=%d warp options", WARP_EXTRA_ELTS); return CE_Failure; } dfXFilter = anGWKFilterRadius[eResample]; dfYFilter = anGWKFilterRadius[eResample]; nXRadius = dfXScale < 1.0 ? static_cast(ceil(dfXFilter / dfXScale)) : static_cast(dfXFilter); nYRadius = dfYScale < 1.0 ? static_cast(ceil(dfYFilter / dfYScale)) : static_cast(dfYFilter); // Filter window offset depends on the parity of the kernel radius. nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius; nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius; bApplyVerticalShift = CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false); dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef( papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0")); /* -------------------------------------------------------------------- */ /* Set up resampling functions. */ /* -------------------------------------------------------------------- */ if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false)) return GWKGeneralCase(this); #if defined(HAVE_OPENCL) if ((eWorkingDataType == GDT_Byte || eWorkingDataType == GDT_CInt16 || eWorkingDataType == GDT_UInt16 || eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_CFloat32 || eWorkingDataType == GDT_Float32) && (eResample == GRA_Bilinear || eResample == GRA_Cubic || eResample == GRA_CubicSpline || eResample == GRA_Lanczos) && !bApplyVerticalShift && // OpenCL warping gives different results than the ones expected by autotest, // so disable it by default even if found. CPLTestBool( CSLFetchNameValueDef(papszWarpOptions, "USE_OPENCL", CPLGetConfigOption("GDAL_USE_OPENCL", "NO")))) { if (pafUnifiedSrcDensity != nullptr) { // If pafUnifiedSrcDensity is only set to 1.0, then we can // discard it. bool bFoundNotOne = false; for (GPtrDiff_t j = 0; j < static_cast(nSrcXSize) * nSrcYSize; j++) { if (pafUnifiedSrcDensity[j] != 1.0) { bFoundNotOne = true; break; } } if (!bFoundNotOne) { CPLFree(pafUnifiedSrcDensity); pafUnifiedSrcDensity = nullptr; } } if (pafUnifiedSrcDensity != nullptr) { // Typically if there's a cutline or an alpha band static bool bHasWarned = false; if (!bHasWarned) { bHasWarned = true; CPLDebug("WARP", "pafUnifiedSrcDensity is not null, " "hence OpenCL warper cannot be used"); } } else { const CPLErr eResult = GWKOpenCLCase(this); // CE_Warning tells us a suitable OpenCL environment was not available // so we fall through to other CPU based methods. if (eResult != CE_Warning) return eResult; } } #endif // defined HAVE_OPENCL const bool bNoMasksOrDstDensityOnly = papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr && pafUnifiedSrcDensity == nullptr && panDstValid == nullptr; if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly) return GWKNearestNoMasksOrDstDensityOnlyByte(this); if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear && bNoMasksOrDstDensityOnly) return GWKBilinearNoMasksOrDstDensityOnlyByte(this); if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic && bNoMasksOrDstDensityOnly) return GWKCubicNoMasksOrDstDensityOnlyByte(this); if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline && bNoMasksOrDstDensityOnly) return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this); if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour) return GWKNearestByte(this); if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) && eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly) return GWKNearestNoMasksOrDstDensityOnlyShort(this); if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic && bNoMasksOrDstDensityOnly) return GWKCubicNoMasksOrDstDensityOnlyShort(this); if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline && bNoMasksOrDstDensityOnly) return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this); if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear && bNoMasksOrDstDensityOnly) return GWKBilinearNoMasksOrDstDensityOnlyShort(this); if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic && bNoMasksOrDstDensityOnly) return GWKCubicNoMasksOrDstDensityOnlyUShort(this); if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline && bNoMasksOrDstDensityOnly) return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this); if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear && bNoMasksOrDstDensityOnly) return GWKBilinearNoMasksOrDstDensityOnlyUShort(this); if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) && eResample == GRA_NearestNeighbour) return GWKNearestShort(this); if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly) return GWKNearestNoMasksOrDstDensityOnlyFloat(this); if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour) return GWKNearestFloat(this); if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear && bNoMasksOrDstDensityOnly) return GWKBilinearNoMasksOrDstDensityOnlyFloat(this); if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic && bNoMasksOrDstDensityOnly) return GWKCubicNoMasksOrDstDensityOnlyFloat(this); #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear && bNoMasksOrDstDensityOnly) return GWKBilinearNoMasksOrDstDensityOnlyDouble(this); if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic && bNoMasksOrDstDensityOnly) return GWKCubicNoMasksOrDstDensityOnlyDouble(this); #endif if (eResample == GRA_Average) return GWKAverageOrMode(this); if (eResample == GRA_RMS) return GWKAverageOrMode(this); if (eResample == GRA_Mode) return GWKAverageOrMode(this); if (eResample == GRA_Max) return GWKAverageOrMode(this); if (eResample == GRA_Min) return GWKAverageOrMode(this); if (eResample == GRA_Med) return GWKAverageOrMode(this); if (eResample == GRA_Q1) return GWKAverageOrMode(this); if (eResample == GRA_Q3) return GWKAverageOrMode(this); if (eResample == GRA_Sum) return GWKSumPreserving(this); if (!GDALDataTypeIsComplex(eWorkingDataType)) { return GWKRealCase(this); } return GWKGeneralCase(this); } /************************************************************************/ /* Validate() */ /************************************************************************/ /** * \fn CPLErr GDALWarpKernel::Validate() * * Check the settings in the GDALWarpKernel, and issue a CPLError() * (and return CE_Failure) if the configuration is considered to be * invalid for some reason. * * This method will also do some standard defaulting such as setting * pfnProgress to GDALDummyProgress() if it is NULL. * * @return CE_None on success or CE_Failure if an error is detected. */ CPLErr GDALWarpKernel::Validate() { if (static_cast(eResample) >= (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0]))) { CPLError(CE_Failure, CPLE_AppDefined, "Unsupported resampling method %d.", static_cast(eResample)); return CE_Failure; } return CE_None; } /************************************************************************/ /* GWKOverlayDensity() */ /* */ /* Compute the final density for the destination pixel. This */ /* is a function of the overlay density (passed in) and the */ /* original density. */ /************************************************************************/ static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset, double dfDensity) { if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr) return; poWK->pafDstDensity[iDstOffset] = static_cast( 1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset])); } /************************************************************************/ /* GWKRoundValueT() */ /************************************************************************/ template struct sGWKRoundValueT { static T eval(double); }; template struct sGWKRoundValueT /* signed */ { static T eval(double dfValue) { return static_cast(floor(dfValue + 0.5)); } }; template struct sGWKRoundValueT /* unsigned */ { static T eval(double dfValue) { return static_cast(dfValue + 0.5); } }; template static T GWKRoundValueT(double dfValue) { return sGWKRoundValueT::is_signed>::eval(dfValue); } template <> float GWKRoundValueT(double dfValue) { return static_cast(dfValue); } #ifdef notused template <> double GWKRoundValueT(double dfValue) { return dfValue; } #endif /************************************************************************/ /* GWKClampValueT() */ /************************************************************************/ template static CPL_INLINE T GWKClampValueT(double dfValue) { if (dfValue < std::numeric_limits::min()) return std::numeric_limits::min(); else if (dfValue > std::numeric_limits::max()) return std::numeric_limits::max(); else return GWKRoundValueT(dfValue); } template <> float GWKClampValueT(double dfValue) { return static_cast(dfValue); } #ifdef notused template <> double GWKClampValueT(double dfValue) { return dfValue; } #endif /************************************************************************/ /* GWKSetPixelValueRealT() */ /************************************************************************/ template static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iDstOffset, double dfDensity, T value) { T *pDst = reinterpret_cast(poWK->papabyDstImage[iBand]); /* -------------------------------------------------------------------- */ /* If the source density is less than 100% we need to fetch the */ /* existing destination value, and mix it with the source to */ /* get the new "to apply" value. Also compute composite */ /* density. */ /* */ /* We avoid mixing if density is very near one or risk mixing */ /* in very extreme nodata values and causing odd results (#1610) */ /* -------------------------------------------------------------------- */ if (dfDensity < 0.9999) { if (dfDensity < 0.0001) return true; double dfDstDensity = 1.0; if (poWK->pafDstDensity != nullptr) dfDstDensity = poWK->pafDstDensity[iDstOffset]; else if (poWK->panDstValid != nullptr && !CPLMaskGet(poWK->panDstValid, iDstOffset)) dfDstDensity = 0.0; // It seems like we also ought to be testing panDstValid[] here! const double dfDstReal = pDst[iDstOffset]; // The destination density is really only relative to the portion // not occluded by the overlay. const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity; const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) / (dfDensity + dfDstInfluence); /* -------------------------------------------------------------------- */ /* Actually apply the destination value. */ /* */ /* Avoid using the destination nodata value for integer datatypes */ /* if by chance it is equal to the computed pixel value. */ /* -------------------------------------------------------------------- */ pDst[iDstOffset] = GWKClampValueT(dfReal); } else { pDst[iDstOffset] = value; } if (poWK->padfDstNoDataReal != nullptr && poWK->padfDstNoDataReal[iBand] == static_cast(pDst[iDstOffset])) { if (pDst[iDstOffset] == std::numeric_limits::min()) pDst[iDstOffset] = std::numeric_limits::min() + 1; else pDst[iDstOffset]--; } return true; } /************************************************************************/ /* GWKSetPixelValue() */ /************************************************************************/ static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iDstOffset, double dfDensity, double dfReal, double dfImag) { GByte *pabyDst = poWK->papabyDstImage[iBand]; /* -------------------------------------------------------------------- */ /* If the source density is less than 100% we need to fetch the */ /* existing destination value, and mix it with the source to */ /* get the new "to apply" value. Also compute composite */ /* density. */ /* */ /* We avoid mixing if density is very near one or risk mixing */ /* in very extreme nodata values and causing odd results (#1610) */ /* -------------------------------------------------------------------- */ if (dfDensity < 0.9999) { if (dfDensity < 0.0001) return true; double dfDstDensity = 1.0; if (poWK->pafDstDensity != nullptr) dfDstDensity = poWK->pafDstDensity[iDstOffset]; else if (poWK->panDstValid != nullptr && !CPLMaskGet(poWK->panDstValid, iDstOffset)) dfDstDensity = 0.0; double dfDstReal = 0.0; double dfDstImag = 0.0; // It seems like we also ought to be testing panDstValid[] here! // TODO(schwehr): Factor out this repreated type of set. switch (poWK->eWorkingDataType) { case GDT_Byte: dfDstReal = pabyDst[iDstOffset]; dfDstImag = 0.0; break; case GDT_Int8: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_Int16: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_UInt16: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_Int32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_UInt32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_Int64: dfDstReal = static_cast( reinterpret_cast(pabyDst)[iDstOffset]); dfDstImag = 0.0; break; case GDT_UInt64: dfDstReal = static_cast( reinterpret_cast(pabyDst)[iDstOffset]); dfDstImag = 0.0; break; case GDT_Float32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_Float64: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; dfDstImag = 0.0; break; case GDT_CInt16: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset * 2]; dfDstImag = reinterpret_cast(pabyDst)[iDstOffset * 2 + 1]; break; case GDT_CInt32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset * 2]; dfDstImag = reinterpret_cast(pabyDst)[iDstOffset * 2 + 1]; break; case GDT_CFloat32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset * 2]; dfDstImag = reinterpret_cast(pabyDst)[iDstOffset * 2 + 1]; break; case GDT_CFloat64: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset * 2]; dfDstImag = reinterpret_cast(pabyDst)[iDstOffset * 2 + 1]; break; case GDT_Unknown: case GDT_TypeCount: CPLAssert(false); return false; } // The destination density is really only relative to the portion // not occluded by the overlay. const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity; dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) / (dfDensity + dfDstInfluence); dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) / (dfDensity + dfDstInfluence); } /* -------------------------------------------------------------------- */ /* Actually apply the destination value. */ /* */ /* Avoid using the destination nodata value for integer datatypes */ /* if by chance it is equal to the computed pixel value. */ /* -------------------------------------------------------------------- */ // TODO(schwehr): Can we make this a template? #define CLAMP(type) \ do \ { \ type *_pDst = reinterpret_cast(pabyDst); \ if (dfReal < static_cast(std::numeric_limits::min())) \ _pDst[iDstOffset] = \ static_cast(std::numeric_limits::min()); \ else if (dfReal > \ static_cast(std::numeric_limits::max())) \ _pDst[iDstOffset] = \ static_cast(std::numeric_limits::max()); \ else \ _pDst[iDstOffset] = (std::numeric_limits::is_signed) \ ? static_cast(floor(dfReal + 0.5)) \ : static_cast(dfReal + 0.5); \ if (poWK->padfDstNoDataReal != nullptr && \ poWK->padfDstNoDataReal[iBand] == \ static_cast(_pDst[iDstOffset])) \ { \ if (_pDst[iDstOffset] == \ static_cast(std::numeric_limits::min())) \ _pDst[iDstOffset] = \ static_cast(std::numeric_limits::min() + 1); \ else \ _pDst[iDstOffset]--; \ } \ } while (false) switch (poWK->eWorkingDataType) { case GDT_Byte: CLAMP(GByte); break; case GDT_Int8: CLAMP(GInt8); break; case GDT_Int16: CLAMP(GInt16); break; case GDT_UInt16: CLAMP(GUInt16); break; case GDT_UInt32: CLAMP(GUInt32); break; case GDT_Int32: CLAMP(GInt32); break; case GDT_UInt64: CLAMP(std::uint64_t); break; case GDT_Int64: CLAMP(std::int64_t); break; case GDT_Float32: reinterpret_cast(pabyDst)[iDstOffset] = static_cast(dfReal); break; case GDT_Float64: reinterpret_cast(pabyDst)[iDstOffset] = dfReal; break; case GDT_CInt16: { typedef GInt16 T; if (dfReal < static_cast(std::numeric_limits::min())) reinterpret_cast(pabyDst)[iDstOffset * 2] = std::numeric_limits::min(); else if (dfReal > static_cast(std::numeric_limits::max())) reinterpret_cast(pabyDst)[iDstOffset * 2] = std::numeric_limits::max(); else reinterpret_cast(pabyDst)[iDstOffset * 2] = static_cast(floor(dfReal + 0.5)); if (dfImag < static_cast(std::numeric_limits::min())) reinterpret_cast(pabyDst)[iDstOffset * 2 + 1] = std::numeric_limits::min(); else if (dfImag > static_cast(std::numeric_limits::max())) reinterpret_cast(pabyDst)[iDstOffset * 2 + 1] = std::numeric_limits::max(); else reinterpret_cast(pabyDst)[iDstOffset * 2 + 1] = static_cast(floor(dfImag + 0.5)); break; } case GDT_CInt32: { typedef GInt32 T; if (dfReal < static_cast(std::numeric_limits::min())) reinterpret_cast(pabyDst)[iDstOffset * 2] = std::numeric_limits::min(); else if (dfReal > static_cast(std::numeric_limits::max())) reinterpret_cast(pabyDst)[iDstOffset * 2] = std::numeric_limits::max(); else reinterpret_cast(pabyDst)[iDstOffset * 2] = static_cast(floor(dfReal + 0.5)); if (dfImag < static_cast(std::numeric_limits::min())) reinterpret_cast(pabyDst)[iDstOffset * 2 + 1] = std::numeric_limits::min(); else if (dfImag > static_cast(std::numeric_limits::max())) reinterpret_cast(pabyDst)[iDstOffset * 2 + 1] = std::numeric_limits::max(); else reinterpret_cast(pabyDst)[iDstOffset * 2 + 1] = static_cast(floor(dfImag + 0.5)); break; } case GDT_CFloat32: reinterpret_cast(pabyDst)[iDstOffset * 2] = static_cast(dfReal); reinterpret_cast(pabyDst)[iDstOffset * 2 + 1] = static_cast(dfImag); break; case GDT_CFloat64: reinterpret_cast(pabyDst)[iDstOffset * 2] = dfReal; reinterpret_cast(pabyDst)[iDstOffset * 2 + 1] = dfImag; break; case GDT_Unknown: case GDT_TypeCount: return false; } return true; } /************************************************************************/ /* GWKSetPixelValueReal() */ /************************************************************************/ static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iDstOffset, double dfDensity, double dfReal) { GByte *pabyDst = poWK->papabyDstImage[iBand]; /* -------------------------------------------------------------------- */ /* If the source density is less than 100% we need to fetch the */ /* existing destination value, and mix it with the source to */ /* get the new "to apply" value. Also compute composite */ /* density. */ /* */ /* We avoid mixing if density is very near one or risk mixing */ /* in very extreme nodata values and causing odd results (#1610) */ /* -------------------------------------------------------------------- */ if (dfDensity < 0.9999) { if (dfDensity < 0.0001) return true; double dfDstReal = 0.0; double dfDstDensity = 1.0; if (poWK->pafDstDensity != nullptr) dfDstDensity = poWK->pafDstDensity[iDstOffset]; else if (poWK->panDstValid != nullptr && !CPLMaskGet(poWK->panDstValid, iDstOffset)) dfDstDensity = 0.0; // It seems like we also ought to be testing panDstValid[] here! switch (poWK->eWorkingDataType) { case GDT_Byte: dfDstReal = pabyDst[iDstOffset]; break; case GDT_Int8: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_Int16: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_UInt16: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_Int32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_UInt32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_Int64: dfDstReal = static_cast( reinterpret_cast(pabyDst)[iDstOffset]); break; case GDT_UInt64: dfDstReal = static_cast( reinterpret_cast(pabyDst)[iDstOffset]); break; case GDT_Float32: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_Float64: dfDstReal = reinterpret_cast(pabyDst)[iDstOffset]; break; case GDT_CInt16: case GDT_CInt32: case GDT_CFloat32: case GDT_CFloat64: case GDT_Unknown: case GDT_TypeCount: CPLAssert(false); return false; } // The destination density is really only relative to the portion // not occluded by the overlay. const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity; dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) / (dfDensity + dfDstInfluence); } /* -------------------------------------------------------------------- */ /* Actually apply the destination value. */ /* */ /* Avoid using the destination nodata value for integer datatypes */ /* if by chance it is equal to the computed pixel value. */ /* -------------------------------------------------------------------- */ switch (poWK->eWorkingDataType) { case GDT_Byte: CLAMP(GByte); break; case GDT_Int8: CLAMP(GInt8); break; case GDT_Int16: CLAMP(GInt16); break; case GDT_UInt16: CLAMP(GUInt16); break; case GDT_UInt32: CLAMP(GUInt32); break; case GDT_Int32: CLAMP(GInt32); break; case GDT_UInt64: CLAMP(std::uint64_t); break; case GDT_Int64: CLAMP(std::int64_t); break; case GDT_Float32: reinterpret_cast(pabyDst)[iDstOffset] = static_cast(dfReal); break; case GDT_Float64: reinterpret_cast(pabyDst)[iDstOffset] = dfReal; break; case GDT_CInt16: case GDT_CInt32: case GDT_CFloat32: case GDT_CFloat64: return false; case GDT_Unknown: case GDT_TypeCount: CPLAssert(false); return false; } return true; } /************************************************************************/ /* GWKGetPixelValue() */ /************************************************************************/ /* It is assumed that panUnifiedSrcValid has been checked before */ static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iSrcOffset, double *pdfDensity, double *pdfReal, double *pdfImag) { GByte *pabySrc = poWK->papabySrcImage[iBand]; if (poWK->papanBandSrcValid != nullptr && poWK->papanBandSrcValid[iBand] != nullptr && !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)) { *pdfDensity = 0.0; return false; } *pdfReal = 0.0; *pdfImag = 0.0; // TODO(schwehr): Fix casting. switch (poWK->eWorkingDataType) { case GDT_Byte: *pdfReal = pabySrc[iSrcOffset]; *pdfImag = 0.0; break; case GDT_Int8: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_Int16: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_UInt16: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_Int32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_UInt32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_Int64: *pdfReal = static_cast( reinterpret_cast(pabySrc)[iSrcOffset]); *pdfImag = 0.0; break; case GDT_UInt64: *pdfReal = static_cast( reinterpret_cast(pabySrc)[iSrcOffset]); *pdfImag = 0.0; break; case GDT_Float32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_Float64: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; *pdfImag = 0.0; break; case GDT_CInt16: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset * 2]; *pdfImag = reinterpret_cast(pabySrc)[iSrcOffset * 2 + 1]; break; case GDT_CInt32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset * 2]; *pdfImag = reinterpret_cast(pabySrc)[iSrcOffset * 2 + 1]; break; case GDT_CFloat32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset * 2]; *pdfImag = reinterpret_cast(pabySrc)[iSrcOffset * 2 + 1]; break; case GDT_CFloat64: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset * 2]; *pdfImag = reinterpret_cast(pabySrc)[iSrcOffset * 2 + 1]; break; case GDT_Unknown: case GDT_TypeCount: CPLAssert(false); *pdfDensity = 0.0; return false; } if (poWK->pafUnifiedSrcDensity != nullptr) *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; else *pdfDensity = 1.0; return *pdfDensity != 0.0; } /************************************************************************/ /* GWKGetPixelValueReal() */ /************************************************************************/ static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iSrcOffset, double *pdfDensity, double *pdfReal) { GByte *pabySrc = poWK->papabySrcImage[iBand]; if (poWK->papanBandSrcValid != nullptr && poWK->papanBandSrcValid[iBand] != nullptr && !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)) { *pdfDensity = 0.0; return false; } switch (poWK->eWorkingDataType) { case GDT_Byte: *pdfReal = pabySrc[iSrcOffset]; break; case GDT_Int8: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_Int16: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_UInt16: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_Int32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_UInt32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_Int64: *pdfReal = static_cast( reinterpret_cast(pabySrc)[iSrcOffset]); break; case GDT_UInt64: *pdfReal = static_cast( reinterpret_cast(pabySrc)[iSrcOffset]); break; case GDT_Float32: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_Float64: *pdfReal = reinterpret_cast(pabySrc)[iSrcOffset]; break; case GDT_CInt16: case GDT_CInt32: case GDT_CFloat32: case GDT_CFloat64: case GDT_Unknown: case GDT_TypeCount: CPLAssert(false); return false; } if (poWK->pafUnifiedSrcDensity != nullptr) *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; else *pdfDensity = 1.0; return *pdfDensity != 0.0; } /************************************************************************/ /* GWKGetPixelRow() */ /************************************************************************/ /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */ /* data-types. */ static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iSrcOffset, int nHalfSrcLen, double *padfDensity, double adfReal[], double *padfImag) { // We know that nSrcLen is even, so we can *always* unroll loops 2x. const int nSrcLen = nHalfSrcLen * 2; bool bHasValid = false; if (padfDensity != nullptr) { // Init the density. for (int i = 0; i < nSrcLen; i += 2) { padfDensity[i] = 1.0; padfDensity[i + 1] = 1.0; } if (poWK->panUnifiedSrcValid != nullptr) { for (int i = 0; i < nSrcLen; i += 2) { if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i)) bHasValid = true; else padfDensity[i] = 0.0; if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1)) bHasValid = true; else padfDensity[i + 1] = 0.0; } // Reset or fail as needed. if (bHasValid) bHasValid = false; else return false; } if (poWK->papanBandSrcValid != nullptr && poWK->papanBandSrcValid[iBand] != nullptr) { for (int i = 0; i < nSrcLen; i += 2) { if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i)) bHasValid = true; else padfDensity[i] = 0.0; if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i + 1)) bHasValid = true; else padfDensity[i + 1] = 0.0; } // Reset or fail as needed. if (bHasValid) bHasValid = false; else return false; } } // TODO(schwehr): Fix casting. // Fetch data. switch (poWK->eWorkingDataType) { case GDT_Byte: { GByte *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[i]; adfReal[i + 1] = pSrc[i + 1]; } break; } case GDT_Int8: { GInt8 *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[i]; adfReal[i + 1] = pSrc[i + 1]; } break; } case GDT_Int16: { GInt16 *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[i]; adfReal[i + 1] = pSrc[i + 1]; } break; } case GDT_UInt16: { GUInt16 *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[i]; adfReal[i + 1] = pSrc[i + 1]; } break; } case GDT_Int32: { GInt32 *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[i]; adfReal[i + 1] = pSrc[i + 1]; } break; } case GDT_UInt32: { GUInt32 *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[i]; adfReal[i + 1] = pSrc[i + 1]; } break; } case GDT_Int64: { auto pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = static_cast(pSrc[i]); adfReal[i + 1] = static_cast(pSrc[i + 1]); } break; } case GDT_UInt64: { auto pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = static_cast(pSrc[i]); adfReal[i + 1] = static_cast(pSrc[i + 1]); } break; } case GDT_Float32: { float *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[i]; adfReal[i + 1] = pSrc[i + 1]; } break; } case GDT_Float64: { double *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[i]; adfReal[i + 1] = pSrc[i + 1]; } break; } case GDT_CInt16: { GInt16 *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += 2 * iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[2 * i]; padfImag[i] = pSrc[2 * i + 1]; adfReal[i + 1] = pSrc[2 * i + 2]; padfImag[i + 1] = pSrc[2 * i + 3]; } break; } case GDT_CInt32: { GInt32 *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += 2 * iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[2 * i]; padfImag[i] = pSrc[2 * i + 1]; adfReal[i + 1] = pSrc[2 * i + 2]; padfImag[i + 1] = pSrc[2 * i + 3]; } break; } case GDT_CFloat32: { float *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += 2 * iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[2 * i]; padfImag[i] = pSrc[2 * i + 1]; adfReal[i + 1] = pSrc[2 * i + 2]; padfImag[i + 1] = pSrc[2 * i + 3]; } break; } case GDT_CFloat64: { double *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); pSrc += 2 * iSrcOffset; for (int i = 0; i < nSrcLen; i += 2) { adfReal[i] = pSrc[2 * i]; padfImag[i] = pSrc[2 * i + 1]; adfReal[i + 1] = pSrc[2 * i + 2]; padfImag[i + 1] = pSrc[2 * i + 3]; } break; } case GDT_Unknown: case GDT_TypeCount: CPLAssert(false); if (padfDensity) memset(padfDensity, 0, nSrcLen * sizeof(double)); return false; } if (padfDensity == nullptr) return true; if (poWK->pafUnifiedSrcDensity == nullptr) { for (int i = 0; i < nSrcLen; i += 2) { // Take into account earlier calcs. if (padfDensity[i] > SRC_DENSITY_THRESHOLD) { padfDensity[i] = 1.0; bHasValid = true; } if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD) { padfDensity[i + 1] = 1.0; bHasValid = true; } } } else { for (int i = 0; i < nSrcLen; i += 2) { if (padfDensity[i] > SRC_DENSITY_THRESHOLD) padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i]; if (padfDensity[i] > SRC_DENSITY_THRESHOLD) bHasValid = true; if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD) padfDensity[i + 1] = poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]; if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD) bHasValid = true; } } return bHasValid; } /************************************************************************/ /* GWKGetPixelT() */ /************************************************************************/ template static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand, GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue) { T *pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); if ((poWK->panUnifiedSrcValid != nullptr && !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) || (poWK->papanBandSrcValid != nullptr && poWK->papanBandSrcValid[iBand] != nullptr && !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))) { *pdfDensity = 0.0; return false; } *pValue = pSrc[iSrcOffset]; if (poWK->pafUnifiedSrcDensity == nullptr) *pdfDensity = 1.0; else *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; return *pdfDensity != 0.0; } /************************************************************************/ /* GWKBilinearResample() */ /* Set of bilinear interpolators */ /************************************************************************/ static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag) { // Save as local variables to avoid following pointers. const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; int iSrcX = static_cast(floor(dfSrcX - 0.5)); int iSrcY = static_cast(floor(dfSrcY - 0.5)); double dfRatioX = 1.5 - (dfSrcX - iSrcX); double dfRatioY = 1.5 - (dfSrcY - iSrcY); bool bShifted = false; if (iSrcX == -1) { iSrcX = 0; dfRatioX = 1; } if (iSrcY == -1) { iSrcY = 0; dfRatioY = 1; } GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; // Shift so we don't overrun the array. if (static_cast(nSrcXSize) * nSrcYSize == iSrcOffset + 1 || static_cast(nSrcXSize) * nSrcYSize == iSrcOffset + nSrcXSize + 1) { bShifted = true; --iSrcOffset; } double adfDensity[2] = {0.0, 0.0}; double adfReal[2] = {0.0, 0.0}; double adfImag[2] = {0.0, 0.0}; double dfAccumulatorReal = 0.0; double dfAccumulatorImag = 0.0; double dfAccumulatorDensity = 0.0; double dfAccumulatorDivisor = 0.0; const GPtrDiff_t nSrcPixels = static_cast(nSrcXSize) * nSrcYSize; // Get pixel row. if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 && iSrcOffset < nSrcPixels && GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal, adfImag)) { double dfMult1 = dfRatioX * dfRatioY; double dfMult2 = (1.0 - dfRatioX) * dfRatioY; // Shifting corrected. if (bShifted) { adfReal[0] = adfReal[1]; adfImag[0] = adfImag[1]; adfDensity[0] = adfDensity[1]; } // Upper Left Pixel. if (iSrcX >= 0 && iSrcX < nSrcXSize && adfDensity[0] > SRC_DENSITY_THRESHOLD) { dfAccumulatorDivisor += dfMult1; dfAccumulatorReal += adfReal[0] * dfMult1; dfAccumulatorImag += adfImag[0] * dfMult1; dfAccumulatorDensity += adfDensity[0] * dfMult1; } // Upper Right Pixel. if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize && adfDensity[1] > SRC_DENSITY_THRESHOLD) { dfAccumulatorDivisor += dfMult2; dfAccumulatorReal += adfReal[1] * dfMult2; dfAccumulatorImag += adfImag[1] * dfMult2; dfAccumulatorDensity += adfDensity[1] * dfMult2; } } // Get pixel row. if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize && iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels && GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity, adfReal, adfImag)) { double dfMult1 = dfRatioX * (1.0 - dfRatioY); double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY); // Shifting corrected if (bShifted) { adfReal[0] = adfReal[1]; adfImag[0] = adfImag[1]; adfDensity[0] = adfDensity[1]; } // Lower Left Pixel if (iSrcX >= 0 && iSrcX < nSrcXSize && adfDensity[0] > SRC_DENSITY_THRESHOLD) { dfAccumulatorDivisor += dfMult1; dfAccumulatorReal += adfReal[0] * dfMult1; dfAccumulatorImag += adfImag[0] * dfMult1; dfAccumulatorDensity += adfDensity[0] * dfMult1; } // Lower Right Pixel. if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize && adfDensity[1] > SRC_DENSITY_THRESHOLD) { dfAccumulatorDivisor += dfMult2; dfAccumulatorReal += adfReal[1] * dfMult2; dfAccumulatorImag += adfImag[1] * dfMult2; dfAccumulatorDensity += adfDensity[1] * dfMult2; } } /* -------------------------------------------------------------------- */ /* Return result. */ /* -------------------------------------------------------------------- */ if (dfAccumulatorDivisor == 1.0) { *pdfReal = dfAccumulatorReal; *pdfImag = dfAccumulatorImag; *pdfDensity = dfAccumulatorDensity; return false; } else if (dfAccumulatorDivisor < 0.00001) { *pdfReal = 0.0; *pdfImag = 0.0; *pdfDensity = 0.0; return false; } else { *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor; *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor; *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor; return true; } } template static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, T *pValue) { const int iSrcX = static_cast(floor(dfSrcX - 0.5)); const int iSrcY = static_cast(floor(dfSrcY - 0.5)); GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * poWK->nSrcXSize; const double dfRatioX = 1.5 - (dfSrcX - iSrcX); const double dfRatioY = 1.5 - (dfSrcY - iSrcY); const T *const pSrc = reinterpret_cast(poWK->papabySrcImage[iBand]); if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 && iSrcY + 1 < poWK->nSrcYSize) { const double dfAccumulator = (pSrc[iSrcOffset] * dfRatioX + pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) * dfRatioY + (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX + pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) * (1.0 - dfRatioY); *pValue = GWKRoundValueT(dfAccumulator); return true; } double dfAccumulatorDivisor = 0.0; double dfAccumulator = 0.0; // Upper Left Pixel. if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 && iSrcY < poWK->nSrcYSize) { const double dfMult = dfRatioX * dfRatioY; dfAccumulatorDivisor += dfMult; dfAccumulator += pSrc[iSrcOffset] * dfMult; } // Upper Right Pixel. if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 && iSrcY < poWK->nSrcYSize) { const double dfMult = (1.0 - dfRatioX) * dfRatioY; dfAccumulatorDivisor += dfMult; dfAccumulator += pSrc[iSrcOffset + 1] * dfMult; } // Lower Right Pixel. if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 && iSrcY + 1 < poWK->nSrcYSize) { const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY); dfAccumulatorDivisor += dfMult; dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult; } // Lower Left Pixel. if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 && iSrcY + 1 < poWK->nSrcYSize) { const double dfMult = dfRatioX * (1.0 - dfRatioY); dfAccumulatorDivisor += dfMult; dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult; } /* -------------------------------------------------------------------- */ /* Return result. */ /* -------------------------------------------------------------------- */ double dfValue = 0.0; if (dfAccumulatorDivisor < 0.00001) { *pValue = 0; return false; } else if (dfAccumulatorDivisor == 1.0) { dfValue = dfAccumulator; } else { dfValue = dfAccumulator / dfAccumulatorDivisor; } *pValue = GWKRoundValueT(dfValue); return true; } /************************************************************************/ /* GWKCubicResample() */ /* Set of bicubic interpolators using cubic convolution. */ /************************************************************************/ // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18 // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2) // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation // TODO(schwehr): Use an inline function. #define CubicConvolution(distance1, distance2, distance3, f0, f1, f2, f3) \ (f1 + 0.5 * (distance1 * (f2 - f0) + \ distance2 * (2.0 * f0 - 5.0 * f1 + 4.0 * f2 - f3) + \ distance3 * (3.0 * (f1 - f2) + f3 - f0))) /************************************************************************/ /* GWKCubicComputeWeights() */ /************************************************************************/ // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]); // TODO(schwehr): Use an inline function. #define GWKCubicComputeWeights(dfX_, adfCoeffs) \ { \ const double dfX = dfX_; \ const double dfHalfX = 0.5 * dfX; \ const double dfThreeX = 3.0 * dfX; \ const double dfHalfX2 = dfHalfX * dfX; \ \ adfCoeffs[0] = dfHalfX * (-1 + dfX * (2 - dfX)); \ adfCoeffs[1] = 1 + dfHalfX2 * (-5 + dfThreeX); \ adfCoeffs[2] = dfHalfX * (1 + dfX * (4 - dfThreeX)); \ adfCoeffs[3] = dfHalfX2 * (-1 + dfX); \ } // TODO(schwehr): Use an inline function. #define CONVOL4(v1, v2) \ ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] + \ (v1)[3] * (v2)[3]) #if 0 // Optimal (in theory...) for max 2 convolutions: 14 multiplications // instead of 17. // TODO(schwehr): Use an inline function. #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \ { \ const double dfX = dfX_; \ dfHalfX = 0.5 * dfX; \ const double dfThreeX = 3.0 * dfX; \ const double dfXMinus1 = dfX - 1; \ \ adfCoeffs[0] = -1 + dfX * (2 - dfX); \ adfCoeffs[1] = dfX * (-5 + dfThreeX); \ /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \ adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \ /*adfCoeffs[3] = dfX * (-1 + dfX); */ \ adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \ } // TODO(schwehr): Use an inline function. #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \ ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \ (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3])) #endif static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag) { const int iSrcX = static_cast(dfSrcX - 0.5); const int iSrcY = static_cast(dfSrcY - 0.5); GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * poWK->nSrcXSize; const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; double adfDensity[4] = {}; double adfReal[4] = {}; double adfImag[4] = {}; // Get the bilinear interpolation at the image borders. if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize) return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, pdfImag); double adfValueDens[4] = {}; double adfValueReal[4] = {}; double adfValueImag[4] = {}; double adfCoeffsX[4] = {}; GWKCubicComputeWeights(dfDeltaX, adfCoeffsX); for (GPtrDiff_t i = -1; i < 3; i++) { if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1, 2, adfDensity, adfReal, adfImag) || adfDensity[0] < SRC_DENSITY_THRESHOLD || adfDensity[1] < SRC_DENSITY_THRESHOLD || adfDensity[2] < SRC_DENSITY_THRESHOLD || adfDensity[3] < SRC_DENSITY_THRESHOLD) { return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, pdfImag); } adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity); adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal); adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag); } /* -------------------------------------------------------------------- */ /* For now, if we have any pixels missing in the kernel area, */ /* we fallback on using bilinear interpolation. Ideally we */ /* should do "weight adjustment" of our results similarly to */ /* what is done for the cubic spline and lanc. interpolators. */ /* -------------------------------------------------------------------- */ double adfCoeffsY[4] = {}; GWKCubicComputeWeights(dfDeltaY, adfCoeffsY); *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens); *pdfReal = CONVOL4(adfCoeffsY, adfValueReal); *pdfImag = CONVOL4(adfCoeffsY, adfValueImag); return true; } // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero // perf benefit. #if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) /************************************************************************/ /* XMMLoad4Values() */ /* */ /* Load 4 packed byte or uint16, cast them to float and put them in a */ /* m128 register. */ /************************************************************************/ static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr) { unsigned int i; memcpy(&i, ptr, 4); __m128i xmm_i = _mm_cvtsi32_si128(s); // Zero extend 4 packed unsigned 8-bit integers in a to packed // 32-bit integers. #if __SSE4_1__ xmm_i = _mm_cvtepu8_epi32(xmm_i); #else xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128()); xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128()); #endif return _mm_cvtepi32_ps(xmm_i); } static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr) { GUInt64 i; memcpy(&i, ptr, 8); __m128i xmm_i = _mm_cvtsi64_si128(s); // Zero extend 4 packed unsigned 16-bit integers in a to packed // 32-bit integers. #if __SSE4_1__ xmm_i = _mm_cvtepu16_epi32(xmm_i); #else xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128()); #endif return _mm_cvtepi32_ps(xmm_i); } /************************************************************************/ /* XMMHorizontalAdd() */ /* */ /* Return the sum of the 4 floating points of the register. */ /************************************************************************/ #if __SSE3__ static CPL_INLINE float XMMHorizontalAdd(__m128 v) { __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1) __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0) shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2) sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2) return _mm_cvtss_f32(sums); } #else static CPL_INLINE float XMMHorizontalAdd(__m128 v) { __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2) __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0) shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1) sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1) return _mm_cvtss_f32(sums); } #endif #endif // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) /************************************************************************/ /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */ /************************************************************************/ // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16, // because there are a few assumptions above those types. template static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal) { const int iSrcX = static_cast(dfSrcX - 0.5); const int iSrcY = static_cast(dfSrcY - 0.5); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * poWK->nSrcXSize; // Get the bilinear interpolation at the image borders. if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize) { double adfImagIgnored[4] = {}; return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, adfImagIgnored); } #if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) const float fDeltaX = static_cast(dfSrcX) - 0.5f - iSrcX; const float fDeltaY = static_cast(dfSrcY) - 0.5f - iSrcY; // TODO(schwehr): Explain the magic numbers. float afTemp[4 + 4 + 4 + 1]; float *pafAligned = reinterpret_cast(afTemp + ((size_t)afTemp & 0xf)); float *pafCoeffs = pafAligned; float *pafDensity = pafAligned + 4; float *pafValue = pafAligned + 8; const float fHalfDeltaX = 0.5f * fDeltaX; const float fThreeDeltaX = 3.0f * fDeltaX; const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX; pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX)); pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX); pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX)); pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX); __m128 xmmCoeffs = _mm_load_ps(pafCoeffs); const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD); __m128 xmmMaskLowDensity = _mm_setzero_ps(); for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3; i++, iOffset += poWK->nSrcXSize) { const __m128 xmmDensity = _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset); xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity, _mm_cmplt_ps(xmmDensity, xmmThreshold)); pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity)); const __m128 xmmValues = XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset); pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues)); } if (_mm_movemask_ps(xmmMaskLowDensity)) { double adfImagIgnored[4] = {}; return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, adfImagIgnored); } const float fHalfDeltaY = 0.5f * fDeltaY; const float fThreeDeltaY = 3.0f * fDeltaY; const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY; pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY)); pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY); pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY)); pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY); xmmCoeffs = _mm_load_ps(pafCoeffs); const __m128 xmmDensity = _mm_load_ps(pafDensity); const __m128 xmmValue = _mm_load_ps(pafValue); *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity)); *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue)); // We did all above computations on float32 whereas the general case is // float64. Not sure if one is fundamentally more correct than the other // one, but we want our optimization to give the same result as the // general case as much as possible, so if the resulting value is // close to some_int_value + 0.5, redo the computation with the general // case. // Note: If other types than Byte or UInt16, will need changes. if (fabs(*pdfReal - static_cast(*pdfReal) - 0.5) > .007) return true; #endif // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)) const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; double adfValueDens[4] = {}; double adfValueReal[4] = {}; double adfCoeffsX[4] = {}; GWKCubicComputeWeights(dfDeltaX, adfCoeffsX); double adfCoeffsY[4] = {}; GWKCubicComputeWeights(dfDeltaY, adfCoeffsY); for (GPtrDiff_t i = -1; i < 3; i++) { const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1; #if !(defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))) if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD || poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD || poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD || poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD) { double adfImagIgnored[4] = {}; return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, adfImagIgnored); } #endif adfValueDens[i + 1] = CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset); adfValueReal[i + 1] = CONVOL4( adfCoeffsX, reinterpret_cast(poWK->papabySrcImage[iBand]) + iOffset); } *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens); *pdfReal = CONVOL4(adfCoeffsY, adfValueReal); return true; } /************************************************************************/ /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */ /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */ /************************************************************************/ static bool GWKCubicResampleSrcMaskIsDensity4SampleReal( const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal) { const int iSrcX = static_cast(dfSrcX - 0.5); const int iSrcY = static_cast(dfSrcY - 0.5); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * poWK->nSrcXSize; const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; // Get the bilinear interpolation at the image borders. if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize) { double adfImagIgnored[4] = {}; return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, adfImagIgnored); } double adfCoeffsX[4] = {}; GWKCubicComputeWeights(dfDeltaX, adfCoeffsX); double adfCoeffsY[4] = {}; GWKCubicComputeWeights(dfDeltaY, adfCoeffsY); double adfValueDens[4] = {}; double adfValueReal[4] = {}; double adfDensity[4] = {}; double adfReal[4] = {}; double adfImagIgnored[4] = {}; for (GPtrDiff_t i = -1; i < 3; i++) { if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1, 2, adfDensity, adfReal, adfImagIgnored) || adfDensity[0] < SRC_DENSITY_THRESHOLD || adfDensity[1] < SRC_DENSITY_THRESHOLD || adfDensity[2] < SRC_DENSITY_THRESHOLD || adfDensity[3] < SRC_DENSITY_THRESHOLD) { return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY, pdfDensity, pdfReal, adfImagIgnored); } adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity); adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal); } *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens); *pdfReal = CONVOL4(adfCoeffsY, adfValueReal); return true; } template static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, T *pValue) { const int iSrcX = static_cast(dfSrcX - 0.5); const int iSrcY = static_cast(dfSrcY - 0.5); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * poWK->nSrcXSize; const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; const double dfDeltaY2 = dfDeltaY * dfDeltaY; const double dfDeltaY3 = dfDeltaY2 * dfDeltaY; // Get the bilinear interpolation at the image borders. if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize) return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY, pValue); double adfCoeffs[4] = {}; GWKCubicComputeWeights(dfDeltaX, adfCoeffs); double adfValue[4] = {}; for (GPtrDiff_t i = -1; i < 3; i++) { const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1; adfValue[i + 1] = CONVOL4( adfCoeffs, reinterpret_cast(poWK->papabySrcImage[iBand]) + iOffset); } const double dfValue = CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0], adfValue[1], adfValue[2], adfValue[3]); *pValue = GWKClampValueT(dfValue); return true; } /************************************************************************/ /* GWKLanczosSinc() */ /************************************************************************/ /* * Lanczos windowed sinc interpolation kernel with radius r. * / * | sinc(x) * sinc(x/r), if |x| < r * L(x) = | 1, if x = 0 , * | 0, otherwise * \ * * where sinc(x) = sin(PI * x) / (PI * x). */ static double GWKLanczosSinc(double dfX) { if (dfX == 0.0) return 1.0; const double dfPIX = M_PI * dfX; const double dfPIXoverR = dfPIX / 3; const double dfPIX2overR = dfPIX * dfPIXoverR; return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR; } static double GWKLanczosSinc4Values(double *padfValues) { for (int i = 0; i < 4; i++) { if (padfValues[i] == 0.0) { padfValues[i] = 1.0; } else { const double dfPIX = M_PI * padfValues[i]; const double dfPIXoverR = dfPIX / 3; const double dfPIX2overR = dfPIX * dfPIXoverR; padfValues[i] = sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR; } } return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3]; } /************************************************************************/ /* GWKBilinear() */ /************************************************************************/ static double GWKBilinear(double dfX) { double dfAbsX = fabs(dfX); if (dfAbsX <= 1.0) return 1 - dfAbsX; else return 0.0; } static double GWKBilinear4Values(double *padfValues) { double dfAbsX0 = fabs(padfValues[0]); double dfAbsX1 = fabs(padfValues[1]); double dfAbsX2 = fabs(padfValues[2]); double dfAbsX3 = fabs(padfValues[3]); if (dfAbsX0 <= 1.0) padfValues[0] = 1 - dfAbsX0; else padfValues[0] = 0.0; if (dfAbsX1 <= 1.0) padfValues[1] = 1 - dfAbsX1; else padfValues[1] = 0.0; if (dfAbsX2 <= 1.0) padfValues[2] = 1 - dfAbsX2; else padfValues[2] = 0.0; if (dfAbsX3 <= 1.0) padfValues[3] = 1 - dfAbsX3; else padfValues[3] = 0.0; return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3]; } /************************************************************************/ /* GWKCubic() */ /************************************************************************/ static double GWKCubic(double dfX) { // http://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm // W(x) formula with a = -0.5 (cubic hermite spline ) // or // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf // k(x) (formula 8) with (B,C)=(0,0.5) the Catmull-Rom spline double dfAbsX = fabs(dfX); if (dfAbsX <= 1.0) { double dfX2 = dfX * dfX; return dfX2 * (1.5 * dfAbsX - 2.5) + 1; } else if (dfAbsX <= 2.0) { double dfX2 = dfX * dfX; return dfX2 * (-0.5 * dfAbsX + 2.5) - 4 * dfAbsX + 2; } else return 0.0; } static double GWKCubic4Values(double *padfValues) { const double dfAbsX_0 = fabs(padfValues[0]); const double dfAbsX_1 = fabs(padfValues[1]); const double dfAbsX_2 = fabs(padfValues[2]); const double dfAbsX_3 = fabs(padfValues[3]); const double dfX2_0 = padfValues[0] * padfValues[0]; const double dfX2_1 = padfValues[1] * padfValues[1]; const double dfX2_2 = padfValues[2] * padfValues[2]; const double dfX2_3 = padfValues[3] * padfValues[3]; double dfVal0 = 0.0; if (dfAbsX_0 <= 1.0) dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0; else if (dfAbsX_0 <= 2.0) dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0; double dfVal1 = 0.0; if (dfAbsX_1 <= 1.0) dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0; else if (dfAbsX_1 <= 2.0) dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0; double dfVal2 = 0.0; if (dfAbsX_2 <= 1.0) dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0; else if (dfAbsX_2 <= 2.0) dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0; double dfVal3 = 0.0; if (dfAbsX_3 <= 1.0) dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0; else if (dfAbsX_3 <= 2.0) dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0; padfValues[0] = dfVal0; padfValues[1] = dfVal1; padfValues[2] = dfVal2; padfValues[3] = dfVal3; return dfVal0 + dfVal1 + dfVal2 + dfVal3; } /************************************************************************/ /* GWKBSpline() */ /************************************************************************/ // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf // Equation 8 with (B,C)=(1,0) // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1 // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2 static double GWKBSpline(double x) { const double xp2 = x + 2.0; const double xp1 = x + 1.0; const double xm1 = x - 1.0; // This will most likely be used, so we'll compute it ahead of time to // avoid stalling the processor. const double xp2c = xp2 * xp2 * xp2; // Note that the test is computed only if it is needed. // TODO(schwehr): Make this easier to follow. return xp2 > 0.0 ? ((xp1 > 0.0) ? ((x > 0.0) ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) + 6.0 * x * x * x : 0.0) + -4.0 * xp1 * xp1 * xp1 : 0.0) + xp2c : 0.0; // * 0.166666666666666666666 } static double GWKBSpline4Values(double *padfValues) { for (int i = 0; i < 4; i++) { const double x = padfValues[i]; const double xp2 = x + 2.0; const double xp1 = x + 1.0; const double xm1 = x - 1.0; // This will most likely be used, so we'll compute it ahead of time to // avoid stalling the processor. const double xp2c = xp2 * xp2 * xp2; // Note that the test is computed only if it is needed. // TODO(schwehr): Make this easier to follow. padfValues[i] = (xp2 > 0.0) ? ((xp1 > 0.0) ? ((x > 0.0) ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) + 6.0 * x * x * x : 0.0) + -4.0 * xp1 * xp1 * xp1 : 0.0) + xp2c : 0.0; // * 0.166666666666666666666 } return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3]; } /************************************************************************/ /* GWKResampleWrkStruct */ /************************************************************************/ typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct; typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag, GWKResampleWrkStruct *psWrkStruct); struct _GWKResampleWrkStruct { pfnGWKResampleType pfnGWKResample; // Space for saved X weights. double *padfWeightsX; bool *pabCalcX; double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos. int iLastSrcX; // Only used by GWKResampleOptimizedLanczos. int iLastSrcY; // Only used by GWKResampleOptimizedLanczos. double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos. double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos. // Space for saving a row of pixels. double *padfRowDensity; double *padfRowReal; double *padfRowImag; }; /************************************************************************/ /* GWKResampleCreateWrkStruct() */ /************************************************************************/ static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag, GWKResampleWrkStruct *psWrkStruct); static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag, GWKResampleWrkStruct *psWrkStruct); static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK) { const int nXDist = (poWK->nXRadius + 1) * 2; const int nYDist = (poWK->nYRadius + 1) * 2; GWKResampleWrkStruct *psWrkStruct = static_cast( CPLMalloc(sizeof(GWKResampleWrkStruct))); // Alloc space for saved X weights. psWrkStruct->padfWeightsX = static_cast(CPLCalloc(nXDist, sizeof(double))); psWrkStruct->pabCalcX = static_cast(CPLMalloc(nXDist * sizeof(bool))); psWrkStruct->padfWeightsY = static_cast(CPLCalloc(nYDist, sizeof(double))); psWrkStruct->iLastSrcX = -10; psWrkStruct->iLastSrcY = -10; psWrkStruct->dfLastDeltaX = -10; psWrkStruct->dfLastDeltaY = -10; // Alloc space for saving a row of pixels. if (poWK->pafUnifiedSrcDensity == nullptr && poWK->panUnifiedSrcValid == nullptr && poWK->papanBandSrcValid == nullptr) { psWrkStruct->padfRowDensity = nullptr; } else { psWrkStruct->padfRowDensity = static_cast(CPLCalloc(nXDist, sizeof(double))); } psWrkStruct->padfRowReal = static_cast(CPLCalloc(nXDist, sizeof(double))); psWrkStruct->padfRowImag = static_cast(CPLCalloc(nXDist, sizeof(double))); if (poWK->eResample == GRA_Lanczos) { psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos; const double dfXScale = poWK->dfXScale; if (dfXScale < 1.0) { int iMin = poWK->nFiltInitX; int iMax = poWK->nXRadius; while (iMin * dfXScale < -3.0) iMin++; while (iMax * dfXScale > 3.0) iMax--; for (int i = iMin; i <= iMax; ++i) { psWrkStruct->padfWeightsX[i - poWK->nFiltInitX] = GWKLanczosSinc(i * dfXScale); } } const double dfYScale = poWK->dfYScale; if (dfYScale < 1.0) { int jMin = poWK->nFiltInitY; int jMax = poWK->nYRadius; while (jMin * dfYScale < -3.0) jMin++; while (jMax * dfYScale > 3.0) jMax--; for (int j = jMin; j <= jMax; ++j) { psWrkStruct->padfWeightsY[j - poWK->nFiltInitY] = GWKLanczosSinc(j * dfYScale); } } } else psWrkStruct->pfnGWKResample = GWKResample; return psWrkStruct; } /************************************************************************/ /* GWKResampleDeleteWrkStruct() */ /************************************************************************/ static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct) { CPLFree(psWrkStruct->padfWeightsX); CPLFree(psWrkStruct->padfWeightsY); CPLFree(psWrkStruct->pabCalcX); CPLFree(psWrkStruct->padfRowDensity); CPLFree(psWrkStruct->padfRowReal); CPLFree(psWrkStruct->padfRowImag); CPLFree(psWrkStruct); } /************************************************************************/ /* GWKResample() */ /************************************************************************/ static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag, GWKResampleWrkStruct *psWrkStruct) { // Save as local variables to avoid following pointers in loops. const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; double dfAccumulatorReal = 0.0; double dfAccumulatorImag = 0.0; double dfAccumulatorDensity = 0.0; double dfAccumulatorWeight = 0.0; const int iSrcX = static_cast(floor(dfSrcX - 0.5)); const int iSrcY = static_cast(floor(dfSrcY - 0.5)); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; const double dfXScale = poWK->dfXScale; const double dfYScale = poWK->dfYScale; const int nXDist = (poWK->nXRadius + 1) * 2; // Space for saved X weights. double *padfWeightsX = psWrkStruct->padfWeightsX; bool *pabCalcX = psWrkStruct->pabCalcX; // Space for saving a row of pixels. double *padfRowDensity = psWrkStruct->padfRowDensity; double *padfRowReal = psWrkStruct->padfRowReal; double *padfRowImag = psWrkStruct->padfRowImag; // Mark as needing calculation (don't calculate the weights yet, // because a mask may render it unnecessary). memset(pabCalcX, false, nXDist * sizeof(bool)); FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample]; CPLAssert(pfnGetWeight); // Skip sampling over edge of image. int j = poWK->nFiltInitY; int jMax = poWK->nYRadius; if (iSrcY + j < 0) j = -iSrcY; if (iSrcY + jMax >= nSrcYSize) jMax = nSrcYSize - iSrcY - 1; int iMin = poWK->nFiltInitX; int iMax = poWK->nXRadius; if (iSrcX + iMin < 0) iMin = -iSrcX; if (iSrcX + iMax >= nSrcXSize) iMax = nSrcXSize - iSrcX - 1; const int bXScaleBelow1 = (dfXScale < 1.0); const int bYScaleBelow1 = (dfYScale < 1.0); GPtrDiff_t iRowOffset = iSrcOffset + static_cast(j - 1) * nSrcXSize + iMin; // Loop over pixel rows in the kernel. for (; j <= jMax; ++j) { iRowOffset += nSrcXSize; // Get pixel values. // We can potentially read extra elements after the "normal" end of the // source arrays, but the contract of papabySrcImage[iBand], // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity // is to have WARP_EXTRA_ELTS reserved at their end. if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2, padfRowDensity, padfRowReal, padfRowImag)) continue; // Calculate the Y weight. double dfWeight1 = (bYScaleBelow1) ? pfnGetWeight((j - dfDeltaY) * dfYScale) : pfnGetWeight(j - dfDeltaY); // Iterate over pixels in row. double dfAccumulatorRealLocal = 0.0; double dfAccumulatorImagLocal = 0.0; double dfAccumulatorDensityLocal = 0.0; double dfAccumulatorWeightLocal = 0.0; for (int i = iMin; i <= iMax; ++i) { // Skip sampling if pixel has zero density. if (padfRowDensity != nullptr && padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD) continue; double dfWeight2 = 0.0; // Make or use a cached set of weights for this row. if (pabCalcX[i - iMin]) { // Use saved weight value instead of recomputing it. dfWeight2 = padfWeightsX[i - iMin]; } else { // Calculate & save the X weight. padfWeightsX[i - iMin] = dfWeight2 = (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale) : pfnGetWeight(i - dfDeltaX); pabCalcX[i - iMin] = true; } // Accumulate! dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2; dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2; if (padfRowDensity != nullptr) dfAccumulatorDensityLocal += padfRowDensity[i - iMin] * dfWeight2; dfAccumulatorWeightLocal += dfWeight2; } dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1; dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1; dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1; dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1; } if (dfAccumulatorWeight < 0.000001 || (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001)) { *pdfDensity = 0.0; return false; } // Calculate the output taking into account weighting. if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001) { *pdfReal = dfAccumulatorReal / dfAccumulatorWeight; *pdfImag = dfAccumulatorImag / dfAccumulatorWeight; if (padfRowDensity != nullptr) *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight; else *pdfDensity = 1.0; } else { *pdfReal = dfAccumulatorReal; *pdfImag = dfAccumulatorImag; if (padfRowDensity != nullptr) *pdfDensity = dfAccumulatorDensity; else *pdfDensity = 1.0; } return true; } /************************************************************************/ /* GWKResampleOptimizedLanczos() */ /************************************************************************/ static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pdfDensity, double *pdfReal, double *pdfImag, GWKResampleWrkStruct *psWrkStruct) { // Save as local variables to avoid following pointers in loops. const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; double dfAccumulatorReal = 0.0; double dfAccumulatorImag = 0.0; double dfAccumulatorDensity = 0.0; double dfAccumulatorWeight = 0.0; const int iSrcX = static_cast(floor(dfSrcX - 0.5)); const int iSrcY = static_cast(floor(dfSrcY - 0.5)); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; const double dfXScale = poWK->dfXScale; const double dfYScale = poWK->dfYScale; // Space for saved X weights. double *padfWeightsX = psWrkStruct->padfWeightsX; double *padfWeightsY = psWrkStruct->padfWeightsY; // Space for saving a row of pixels. double *padfRowDensity = psWrkStruct->padfRowDensity; double *padfRowReal = psWrkStruct->padfRowReal; double *padfRowImag = psWrkStruct->padfRowImag; // Skip sampling over edge of image. int jMin = poWK->nFiltInitY; int jMax = poWK->nYRadius; if (iSrcY + jMin < 0) jMin = -iSrcY; if (iSrcY + jMax >= nSrcYSize) jMax = nSrcYSize - iSrcY - 1; int iMin = poWK->nFiltInitX; int iMax = poWK->nXRadius; if (iSrcX + iMin < 0) iMin = -iSrcX; if (iSrcX + iMax >= nSrcXSize) iMax = nSrcXSize - iSrcX - 1; if (dfXScale < 1.0) { while (iMin * dfXScale < -3.0) iMin++; while (iMax * dfXScale > 3.0) iMax--; // padfWeightsX computed in GWKResampleCreateWrkStruct. } else { while (iMin - dfDeltaX < -3.0) iMin++; while (iMax - dfDeltaX > 3.0) iMax--; if (iSrcX != psWrkStruct->iLastSrcX || dfDeltaX != psWrkStruct->dfLastDeltaX) { // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the // following trigonometric formulas. // TODO(schwehr): Move this somewhere where it can be rendered at // LaTeX. sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * // k) + cos(M_PI * dfBase) * sin(M_PI * k) sin(M_PI * (dfBase + k)) // = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k) // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : // -1) // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * // cos(M_PI / dfR * k) + cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * // k) sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / // dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k) const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX); const double dfSin2PIDeltaXOver3 = dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3; // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2. const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3); const double dfSinPIDeltaX = (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3; const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI); const double dfInvPI2Over3xSinPIDeltaX = dfInvPI2Over3 * dfSinPIDeltaX; const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 = -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3; const double dfSinPIOver3 = 0.8660254037844386; const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 = dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3; const double padfCst[] = { dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3, dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 - dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3, dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 + dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3}; for (int i = iMin; i <= iMax; ++i) { const double dfX = i - dfDeltaX; if (dfX == 0.0) padfWeightsX[i - poWK->nFiltInitX] = 1.0; else padfWeightsX[i - poWK->nFiltInitX] = padfCst[(i + 3) % 3] / (dfX * dfX); #if DEBUG_VERBOSE // TODO(schwehr): AlmostEqual. // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] - // GWKLanczosSinc(dfX, 3.0)) < 1e-10); #endif } psWrkStruct->iLastSrcX = iSrcX; psWrkStruct->dfLastDeltaX = dfDeltaX; } } if (dfYScale < 1.0) { while (jMin * dfYScale < -3.0) jMin++; while (jMax * dfYScale > 3.0) jMax--; // padfWeightsY computed in GWKResampleCreateWrkStruct. } else { while (jMin - dfDeltaY < -3.0) jMin++; while (jMax - dfDeltaY > 3.0) jMax--; if (iSrcY != psWrkStruct->iLastSrcY || dfDeltaY != psWrkStruct->dfLastDeltaY) { const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY); const double dfSin2PIDeltaYOver3 = dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3; // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2. const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3); const double dfSinPIDeltaY = (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3; const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI); const double dfInvPI2Over3xSinPIDeltaY = dfInvPI2Over3 * dfSinPIDeltaY; const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 = -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3; const double dfSinPIOver3 = 0.8660254037844386; const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 = dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3; const double padfCst[] = { dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3, dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 - dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3, dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 + dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3}; for (int j = jMin; j <= jMax; ++j) { const double dfY = j - dfDeltaY; if (dfY == 0.0) padfWeightsY[j - poWK->nFiltInitY] = 1.0; else padfWeightsY[j - poWK->nFiltInitY] = padfCst[(j + 3) % 3] / (dfY * dfY); #if DEBUG_VERBOSE // TODO(schwehr): AlmostEqual. // CPLAssert(fabs(padfWeightsY[j-poWK->nFiltInitY] - // GWKLanczosSinc(dfY, 3.0)) < 1e-10); #endif } psWrkStruct->iLastSrcY = iSrcY; psWrkStruct->dfLastDeltaY = dfDeltaY; } } GPtrDiff_t iRowOffset = iSrcOffset + static_cast(jMin - 1) * nSrcXSize + iMin; // If we have no density information, we can simply compute the // accumulated weight. if (padfRowDensity == nullptr) { double dfRowAccWeight = 0.0; for (int i = iMin; i <= iMax; ++i) { dfRowAccWeight += padfWeightsX[i - poWK->nFiltInitX]; } double dfColAccWeight = 0.0; for (int j = jMin; j <= jMax; ++j) { dfColAccWeight += padfWeightsY[j - poWK->nFiltInitY]; } dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight; } const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType); // Loop over pixel rows in the kernel. int nCountValid = 0; for (int j = jMin; j <= jMax; ++j) { iRowOffset += nSrcXSize; // Get pixel values. // We can potentially read extra elements after the "normal" end of the // source arrays, but the contract of papabySrcImage[iBand], // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity // is to have WARP_EXTRA_ELTS reserved at their end. if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2, padfRowDensity, padfRowReal, padfRowImag)) continue; const double dfWeight1 = padfWeightsY[j - poWK->nFiltInitY]; // Iterate over pixels in row. if (padfRowDensity != nullptr) { for (int i = iMin; i <= iMax; ++i) { // Skip sampling if pixel has zero density. if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD) continue; nCountValid++; // Use a cached set of weights for this row. const double dfWeight2 = dfWeight1 * padfWeightsX[i - poWK->nFiltInitX]; // Accumulate! dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2; dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2; dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2; dfAccumulatorWeight += dfWeight2; } } else if (bIsNonComplex) { double dfRowAccReal = 0.0; for (int i = iMin; i <= iMax; ++i) { const double dfWeight2 = padfWeightsX[i - poWK->nFiltInitX]; // Accumulate! dfRowAccReal += padfRowReal[i - iMin] * dfWeight2; } dfAccumulatorReal += dfRowAccReal * dfWeight1; } else { double dfRowAccReal = 0.0; double dfRowAccImag = 0.0; for (int i = iMin; i <= iMax; ++i) { const double dfWeight2 = padfWeightsX[i - poWK->nFiltInitX]; // Accumulate! dfRowAccReal += padfRowReal[i - iMin] * dfWeight2; dfRowAccImag += padfRowImag[i - iMin] * dfWeight2; } dfAccumulatorReal += dfRowAccReal * dfWeight1; dfAccumulatorImag += dfRowAccImag * dfWeight1; } } if (dfAccumulatorWeight < 0.000001 || (padfRowDensity != nullptr && (dfAccumulatorDensity < 0.000001 || nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2))) { *pdfDensity = 0.0; return false; } // Calculate the output taking into account weighting. if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001) { const double dfInvAcc = 1.0 / dfAccumulatorWeight; *pdfReal = dfAccumulatorReal * dfInvAcc; *pdfImag = dfAccumulatorImag * dfInvAcc; if (padfRowDensity != nullptr) *pdfDensity = dfAccumulatorDensity * dfInvAcc; else *pdfDensity = 1.0; } else { *pdfReal = dfAccumulatorReal; *pdfImag = dfAccumulatorImag; if (padfRowDensity != nullptr) *pdfDensity = dfAccumulatorDensity; else *pdfDensity = 1.0; } return true; } /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template static bool GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, T *pValue, double *padfWeight) { // Commonly used; save locally. const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; const int iSrcX = static_cast(floor(dfSrcX - 0.5)); const int iSrcY = static_cast(floor(dfSrcY - 0.5)); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; const int nXRadius = poWK->nXRadius; const int nYRadius = poWK->nYRadius; // Politely refuse to process invalid coordinates or obscenely small image. if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize || nYRadius > nSrcYSize) return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY, pValue); T *pSrcBand = reinterpret_cast(poWK->papabySrcImage[iBand]); const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; const FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample]; CPLAssert(pfnGetWeight); const FilterFunc4ValuesType pfnGetWeight4Values = apfGWKFilter4Values[poWK->eResample]; CPLAssert(pfnGetWeight4Values); const double dfXScale = std::min(poWK->dfXScale, 1.0); const double dfYScale = std::min(poWK->dfYScale, 1.0); // Loop over all rows in the kernel. double dfAccumulatorWeightHorizontal = 0.0; double dfAccumulatorWeightVertical = 0.0; int iMin = 1 - nXRadius; if (iSrcX + iMin < 0) iMin = -iSrcX; int iMax = nXRadius; if (iSrcX + iMax >= nSrcXSize - 1) iMax = nSrcXSize - 1 - iSrcX; int i = iMin; // Used after for. int iC = 0; // Used after for. for (; i + 2 < iMax; i += 4, iC += 4) { padfWeight[iC] = (i - dfDeltaX) * dfXScale; padfWeight[iC + 1] = padfWeight[iC] + dfXScale; padfWeight[iC + 2] = padfWeight[iC + 1] + dfXScale; padfWeight[iC + 3] = padfWeight[iC + 2] + dfXScale; dfAccumulatorWeightHorizontal += pfnGetWeight4Values(padfWeight + iC); } for (; i <= iMax; ++i, ++iC) { const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale); padfWeight[iC] = dfWeight; dfAccumulatorWeightHorizontal += dfWeight; } int j = 1 - nYRadius; if (iSrcY + j < 0) j = -iSrcY; int jMax = nYRadius; if (iSrcY + jMax >= nSrcYSize - 1) jMax = nSrcYSize - 1 - iSrcY; double dfAccumulator = 0.0; for (; j <= jMax; ++j) { const GPtrDiff_t iSampJ = iSrcOffset + static_cast(j) * nSrcXSize; // Loop over all pixels in the row. double dfAccumulatorLocal = 0.0; double dfAccumulatorLocal2 = 0.0; iC = 0; i = iMin; // Process by chunk of 4 cols. for (; i + 2 < iMax; i += 4, iC += 4) { // Retrieve the pixel & accumulate. dfAccumulatorLocal += pSrcBand[i + iSampJ] * padfWeight[iC]; dfAccumulatorLocal += pSrcBand[i + 1 + iSampJ] * padfWeight[iC + 1]; dfAccumulatorLocal2 += pSrcBand[i + 2 + iSampJ] * padfWeight[iC + 2]; dfAccumulatorLocal2 += pSrcBand[i + 3 + iSampJ] * padfWeight[iC + 3]; } dfAccumulatorLocal += dfAccumulatorLocal2; if (i < iMax) { dfAccumulatorLocal += pSrcBand[i + iSampJ] * padfWeight[iC]; dfAccumulatorLocal += pSrcBand[i + 1 + iSampJ] * padfWeight[iC + 1]; i += 2; iC += 2; } if (i == iMax) { dfAccumulatorLocal += pSrcBand[i + iSampJ] * padfWeight[iC]; } // Calculate the Y weight. const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale); dfAccumulator += dfWeight * dfAccumulatorLocal; dfAccumulatorWeightVertical += dfWeight; } const double dfAccumulatorWeight = dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical; *pValue = GWKClampValueT(dfAccumulator / dfAccumulatorWeight); return true; } /* We restrict to 64bit processors because they are guaranteed to have SSE2 */ /* Could possibly be used too on 32bit, but we would need to check at runtime */ #if defined(__x86_64) || defined(_M_X64) /************************************************************************/ /* GWKResampleNoMasks_SSE2_T() */ /************************************************************************/ template static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, T *pValue, double *padfWeight) { // Commonly used; save locally. const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; const int iSrcX = static_cast(floor(dfSrcX - 0.5)); const int iSrcY = static_cast(floor(dfSrcY - 0.5)); const GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; const int nXRadius = poWK->nXRadius; const int nYRadius = poWK->nYRadius; // Politely refuse to process invalid coordinates or obscenely small image. if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize || nYRadius > nSrcYSize) return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY, pValue); const T *pSrcBand = reinterpret_cast(poWK->papabySrcImage[iBand]); const FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample]; CPLAssert(pfnGetWeight); const FilterFunc4ValuesType pfnGetWeight4Values = apfGWKFilter4Values[poWK->eResample]; CPLAssert(pfnGetWeight4Values); const double dfDeltaX = dfSrcX - 0.5 - iSrcX; const double dfDeltaY = dfSrcY - 0.5 - iSrcY; const double dfXScale = std::min(poWK->dfXScale, 1.0); const double dfYScale = std::min(poWK->dfYScale, 1.0); // Loop over all rows in the kernel. double dfAccumulatorWeightHorizontal = 0.0; double dfAccumulatorWeightVertical = 0.0; double dfAccumulator = 0.0; int iMin = 1 - nXRadius; if (iSrcX + iMin < 0) iMin = -iSrcX; int iMax = nXRadius; if (iSrcX + iMax >= nSrcXSize - 1) iMax = nSrcXSize - 1 - iSrcX; int i, iC; for (iC = 0, i = iMin; i + 2 < iMax; i += 4, iC += 4) { padfWeight[iC] = (i - dfDeltaX) * dfXScale; padfWeight[iC + 1] = padfWeight[iC] + dfXScale; padfWeight[iC + 2] = padfWeight[iC + 1] + dfXScale; padfWeight[iC + 3] = padfWeight[iC + 2] + dfXScale; dfAccumulatorWeightHorizontal += pfnGetWeight4Values(padfWeight + iC); } for (; i <= iMax; ++i, ++iC) { double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale); padfWeight[iC] = dfWeight; dfAccumulatorWeightHorizontal += dfWeight; } int j = 1 - nYRadius; if (iSrcY + j < 0) j = -iSrcY; int jMax = nYRadius; if (iSrcY + jMax >= nSrcYSize - 1) jMax = nSrcYSize - 1 - iSrcY; // Process by chunk of 4 rows. for (; j + 2 < jMax; j += 4) { const GPtrDiff_t iSampJ = iSrcOffset + static_cast(j) * nSrcXSize; // Loop over all pixels in the row. iC = 0; i = iMin; // Process by chunk of 4 cols. XMMReg4Double v_acc_1 = XMMReg4Double::Zero(); XMMReg4Double v_acc_2 = XMMReg4Double::Zero(); XMMReg4Double v_acc_3 = XMMReg4Double::Zero(); XMMReg4Double v_acc_4 = XMMReg4Double::Zero(); for (; i + 2 < iMax; i += 4, iC += 4) { // Retrieve the pixel & accumulate. XMMReg4Double v_pixels_1 = XMMReg4Double::Load4Val(pSrcBand + i + iSampJ); XMMReg4Double v_pixels_2 = XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize); XMMReg4Double v_pixels_3 = XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize); XMMReg4Double v_pixels_4 = XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize); XMMReg4Double v_padfWeight = XMMReg4Double::Load4Val(padfWeight + iC); v_acc_1 += v_pixels_1 * v_padfWeight; v_acc_2 += v_pixels_2 * v_padfWeight; v_acc_3 += v_pixels_3 * v_padfWeight; v_acc_4 += v_pixels_4 * v_padfWeight; } if (i < iMax) { XMMReg2Double v_pixels_1 = XMMReg2Double::Load2Val(pSrcBand + i + iSampJ); XMMReg2Double v_pixels_2 = XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize); XMMReg2Double v_pixels_3 = XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize); XMMReg2Double v_pixels_4 = XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize); XMMReg2Double v_padfWeight = XMMReg2Double::Load2Val(padfWeight + iC); v_acc_1.AddToLow(v_pixels_1 * v_padfWeight); v_acc_2.AddToLow(v_pixels_2 * v_padfWeight); v_acc_3.AddToLow(v_pixels_3 * v_padfWeight); v_acc_4.AddToLow(v_pixels_4 * v_padfWeight); i += 2; iC += 2; } double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum(); double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum(); double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum(); double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum(); if (i == iMax) { dfAccumulatorLocal_1 += static_cast(pSrcBand[i + iSampJ]) * padfWeight[iC]; dfAccumulatorLocal_2 += static_cast(pSrcBand[i + iSampJ + nSrcXSize]) * padfWeight[iC]; dfAccumulatorLocal_3 += static_cast(pSrcBand[i + iSampJ + 2 * nSrcXSize]) * padfWeight[iC]; dfAccumulatorLocal_4 += static_cast(pSrcBand[i + iSampJ + 3 * nSrcXSize]) * padfWeight[iC]; } // Calculate the Y weight. const double dfWeight0 = (j - dfDeltaY) * dfYScale; const double dfWeight1 = dfWeight0 + dfYScale; const double dfWeight2 = dfWeight1 + dfYScale; const double dfWeight3 = dfWeight2 + dfYScale; double adfWeight[4] = {dfWeight0, dfWeight1, dfWeight2, dfWeight3}; dfAccumulatorWeightVertical += pfnGetWeight4Values(adfWeight); dfAccumulator += adfWeight[0] * dfAccumulatorLocal_1; dfAccumulator += adfWeight[1] * dfAccumulatorLocal_2; dfAccumulator += adfWeight[2] * dfAccumulatorLocal_3; dfAccumulator += adfWeight[3] * dfAccumulatorLocal_4; } for (; j <= jMax; ++j) { const GPtrDiff_t iSampJ = iSrcOffset + static_cast(j) * nSrcXSize; // Loop over all pixels in the row. iC = 0; i = iMin; // Process by chunk of 4 cols. XMMReg4Double v_acc = XMMReg4Double::Zero(); for (; i + 2 < iMax; i += 4, iC += 4) { // Retrieve the pixel & accumulate. XMMReg4Double v_pixels = XMMReg4Double::Load4Val(pSrcBand + i + iSampJ); XMMReg4Double v_padfWeight = XMMReg4Double::Load4Val(padfWeight + iC); v_acc += v_pixels * v_padfWeight; } double dfAccumulatorLocal = v_acc.GetHorizSum(); if (i < iMax) { dfAccumulatorLocal += pSrcBand[i + iSampJ] * padfWeight[iC]; dfAccumulatorLocal += pSrcBand[i + 1 + iSampJ] * padfWeight[iC + 1]; i += 2; iC += 2; } if (i == iMax) { dfAccumulatorLocal += static_cast(pSrcBand[i + iSampJ]) * padfWeight[iC]; } // Calculate the Y weight. double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale); dfAccumulator += dfWeight * dfAccumulatorLocal; dfAccumulatorWeightVertical += dfWeight; } const double dfAccumulatorWeight = dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical; *pValue = GWKClampValueT(dfAccumulator / dfAccumulatorWeight); return true; } /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template <> bool GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, GByte *pValue, double *padfWeight) { return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue, padfWeight); } /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template <> bool GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, GInt16 *pValue, double *padfWeight) { return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue, padfWeight); } /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template <> bool GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, GUInt16 *pValue, double *padfWeight) { return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue, padfWeight); } /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template <> bool GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, float *pValue, double *padfWeight) { return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue, padfWeight); } #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL /************************************************************************/ /* GWKResampleNoMasksT() */ /************************************************************************/ template <> bool GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY, double *pValue, double *padfWeight) { return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue, padfWeight); } #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */ #endif /* defined(__x86_64) || defined(_M_X64) */ /************************************************************************/ /* GWKRoundSourceCoordinates() */ /************************************************************************/ static void GWKRoundSourceCoordinates( int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess, double dfSrcCoordPrecision, double dfErrorThreshold, GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff, double dfDstY) { double dfPct = 0.8; if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0) { dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold); } const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision; for (int iDstX = 0; iDstX < nDstXSize; iDstX++) { const double dfXBefore = padfX[iDstX]; const double dfYBefore = padfY[iDstX]; padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) * dfSrcCoordPrecision; padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) * dfSrcCoordPrecision; // If we are in an uncertainty zone, go to non-approximated // transformation. // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must // be at least 10 times greater than the approximation error. if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold || fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold) { padfX[iDstX] = iDstX + dfDstXOff; padfY[iDstX] = dfDstY; padfZ[iDstX] = 0.0; pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX, padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX); padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) * dfSrcCoordPrecision; padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) * dfSrcCoordPrecision; } } } /************************************************************************/ /* GWKOpenCLCase() */ /* */ /* This is identical to GWKGeneralCase(), but functions via */ /* OpenCL. This means we have vector optimization (SSE) and/or */ /* GPU optimization depending on our prefs. The code itself is */ /* general and not optimized, but by defining constants we can */ /* make some pretty darn good code on the fly. */ /************************************************************************/ #if defined(HAVE_OPENCL) static CPLErr GWKOpenCLCase(GDALWarpKernel *poWK) { const int nDstXSize = poWK->nDstXSize; const int nDstYSize = poWK->nDstYSize; const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; const int nDstXOff = poWK->nDstXOff; const int nDstYOff = poWK->nDstYOff; const int nSrcXOff = poWK->nSrcXOff; const int nSrcYOff = poWK->nSrcYOff; bool bUseImag = false; cl_channel_type imageFormat; switch (poWK->eWorkingDataType) { case GDT_Byte: imageFormat = CL_UNORM_INT8; break; case GDT_UInt16: imageFormat = CL_UNORM_INT16; break; case GDT_CInt16: bUseImag = true; [[fallthrough]]; case GDT_Int16: imageFormat = CL_SNORM_INT16; break; case GDT_CFloat32: bUseImag = true; [[fallthrough]]; case GDT_Float32: imageFormat = CL_FLOAT; break; default: // No support for higher precision formats. CPLDebug("OpenCL", "Unsupported resampling OpenCL data type %d.", static_cast(poWK->eWorkingDataType)); return CE_Warning; } OCLResampAlg resampAlg; switch (poWK->eResample) { case GRA_Bilinear: resampAlg = OCL_Bilinear; break; case GRA_Cubic: resampAlg = OCL_Cubic; break; case GRA_CubicSpline: resampAlg = OCL_CubicSpline; break; case GRA_Lanczos: resampAlg = OCL_Lanczos; break; default: // No support for higher precision formats. CPLDebug("OpenCL", "Unsupported resampling OpenCL resampling alg %d.", static_cast(poWK->eResample)); return CE_Warning; } struct oclWarper *warper = nullptr; cl_int err; CPLErr eErr = CE_None; // TODO(schwehr): Fix indenting. try { // Using a factor of 2 or 4 seems to have much less rounding error // than 3 on the GPU. // Then the rounding error can cause strange artifacts under the // right conditions. warper = GDALWarpKernelOpenCL_createEnv( nSrcXSize, nSrcYSize, nDstXSize, nDstYSize, imageFormat, poWK->nBands, 4, bUseImag, poWK->papanBandSrcValid != nullptr, poWK->pafDstDensity, poWK->padfDstNoDataReal, resampAlg, &err); if (err != CL_SUCCESS || warper == nullptr) { eErr = CE_Warning; if (warper != nullptr) throw eErr; return eErr; } CPLDebug("GDAL", "GDALWarpKernel()::GWKOpenCLCase() " "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d", nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize, nDstXOff, nDstYOff, nDstXSize, nDstYSize); if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress)) { CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); eErr = CE_Failure; throw eErr; } /* ==================================================================== */ /* Loop over bands. */ /* ==================================================================== */ for (int iBand = 0; iBand < poWK->nBands; iBand++) { if (poWK->papanBandSrcValid != nullptr && poWK->papanBandSrcValid[iBand] != nullptr) { GDALWarpKernelOpenCL_setSrcValid( warper, reinterpret_cast(poWK->papanBandSrcValid[iBand]), iBand); if (err != CL_SUCCESS) { CPLError( CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); eErr = CE_Failure; throw eErr; } } err = GDALWarpKernelOpenCL_setSrcImg( warper, poWK->papabySrcImage[iBand], iBand); if (err != CL_SUCCESS) { CPLError(CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); eErr = CE_Failure; throw eErr; } err = GDALWarpKernelOpenCL_setDstImg( warper, poWK->papabyDstImage[iBand], iBand); if (err != CL_SUCCESS) { CPLError(CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); eErr = CE_Failure; throw eErr; } } /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... one */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ // For x, 2 *, because we cache the precomputed values at the end. double *padfX = static_cast(CPLMalloc(2 * sizeof(double) * nDstXSize)); double *padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef( poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof(CSLFetchNameValueDef( poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); // Precompute values. for (int iDstX = 0; iDstX < nDstXSize; iDstX++) padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY) { /* ---------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* ---------------------------------------------------------------- */ memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize); const double dfYConst = iDstY + 0.5 + poWK->nDstYOff; for (int iDstX = 0; iDstX < nDstXSize; iDstX++) padfY[iDstX] = dfYConst; memset(padfZ, 0, sizeof(double) * nDstXSize); /* ---------------------------------------------------------------- */ /* Transform the points from destination pixel/line * coordinates*/ /* to source pixel/line coordinates. */ /* ---------------------------------------------------------------- */ poWK->pfnTransformer(poWK->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess); if (dfSrcCoordPrecision > 0.0) { GWKRoundSourceCoordinates( nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, poWK->pTransformerArg, 0.5 + nDstXOff, iDstY + 0.5 + nDstYOff); } err = GDALWarpKernelOpenCL_setCoordRow( warper, padfX, padfY, nSrcXOff, nSrcYOff, pabSuccess, iDstY); if (err != CL_SUCCESS) { CPLError(CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); eErr = CE_Failure; break; } // Update the valid & density masks because we don't do so in the // kernel. for (int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++) { const double dfX = padfX[iDstX]; const double dfY = padfY[iDstX]; const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; // See GWKGeneralCase() for appropriate commenting. if (!pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff) continue; int iSrcX = static_cast(dfX) - nSrcXOff; int iSrcY = static_cast(dfY) - nSrcYOff; if (iSrcX < 0 || iSrcX >= nSrcXSize || iSrcY < 0 || iSrcY >= nSrcYSize) continue; GPtrDiff_t iSrcOffset = iSrcX + static_cast(iSrcY) * nSrcXSize; double dfDensity = 1.0; if (poWK->pafUnifiedSrcDensity != nullptr && iSrcX >= 0 && iSrcY >= 0 && iSrcX < nSrcXSize && iSrcY < nSrcYSize) dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; GWKOverlayDensity(poWK, iDstOffset, dfDensity); // Because this is on the bit-wise level, it can't be done well // in OpenCL. if (poWK->panDstValid != nullptr) poWK->panDstValid[iDstOffset >> 5] |= 0x01 << (iDstOffset & 0x1f); } } CPLFree(padfX); CPLFree(padfY); CPLFree(padfZ); CPLFree(pabSuccess); if (eErr != CE_None) throw eErr; err = GDALWarpKernelOpenCL_runResamp( warper, poWK->pafUnifiedSrcDensity, poWK->panUnifiedSrcValid, poWK->pafDstDensity, poWK->panDstValid, poWK->dfXScale, poWK->dfYScale, poWK->dfXFilter, poWK->dfYFilter, poWK->nXRadius, poWK->nYRadius, poWK->nFiltInitX, poWK->nFiltInitY); if (err != CL_SUCCESS) { CPLError(CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); eErr = CE_Failure; throw eErr; } /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++) { for (int iBand = 0; iBand < poWK->nBands; iBand++) { void *rowReal = nullptr; void *rowImag = nullptr; GByte *pabyDst = poWK->papabyDstImage[iBand]; err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag, iDstY, iBand); if (err != CL_SUCCESS) { CPLError( CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); eErr = CE_Failure; throw eErr; } // Copy the data from the warper to GDAL's memory. switch (poWK->eWorkingDataType) { case GDT_Byte: memcpy(&(pabyDst[iDstY * nDstXSize]), rowReal, sizeof(GByte) * nDstXSize); break; case GDT_Int16: memcpy(&(reinterpret_cast( pabyDst)[iDstY * nDstXSize]), rowReal, sizeof(GInt16) * nDstXSize); break; case GDT_UInt16: memcpy(&(reinterpret_cast( pabyDst)[iDstY * nDstXSize]), rowReal, sizeof(GUInt16) * nDstXSize); break; case GDT_Float32: memcpy(&(reinterpret_cast( pabyDst)[iDstY * nDstXSize]), rowReal, sizeof(float) * nDstXSize); break; case GDT_CInt16: { GInt16 *pabyDstI16 = &(reinterpret_cast( pabyDst)[iDstY * nDstXSize]); for (int iDstX = 0; iDstX < nDstXSize; iDstX++) { pabyDstI16[iDstX * 2] = static_cast(rowReal)[iDstX]; pabyDstI16[iDstX * 2 + 1] = static_cast(rowImag)[iDstX]; } } break; case GDT_CFloat32: { float *pabyDstF32 = &(reinterpret_cast( pabyDst)[iDstY * nDstXSize]); for (int iDstX = 0; iDstX < nDstXSize; iDstX++) { pabyDstF32[iDstX * 2] = static_cast(rowReal)[iDstX]; pabyDstF32[iDstX * 2 + 1] = static_cast(rowImag)[iDstX]; } } break; default: // No support for higher precision formats. CPLError(CE_Failure, CPLE_AppDefined, "Unsupported resampling OpenCL data type %d.", static_cast(poWK->eWorkingDataType)); eErr = CE_Failure; throw eErr; } } } } catch (const CPLErr &) { } if ((err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS) { CPLError(CE_Failure, CPLE_AppDefined, "OpenCL routines reported failure (%d) on line %d.", static_cast(err), __LINE__); return CE_Failure; } return eErr; } #endif /* defined(HAVE_OPENCL) */ /************************************************************************/ /* GWKCheckAndComputeSrcOffsets() */ /************************************************************************/ static CPL_INLINE bool GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX, int _iDstY, double *_padfX, double *_padfY, int _nSrcXSize, int _nSrcYSize, GPtrDiff_t &iSrcOffset) { const GDALWarpKernel *_poWK = psJob->poWK; for (int iTry = 0; iTry < 2; ++iTry) { if (iTry == 1) { // If the source coordinate is slightly outside of the source raster // retry to transform it alone, so that the exact coordinate // transformer is used. _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff; _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff; double dfZ = 0; _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, _padfX + _iDstX, _padfY + _iDstX, &dfZ, _pabSuccess + _iDstX); } if (!_pabSuccess[_iDstX]) return false; // If this happens this is likely the symptom of a bug somewhere. if (CPLIsNan(_padfX[_iDstX]) || CPLIsNan(_padfY[_iDstX])) { static bool bNanCoordFound = false; if (!bNanCoordFound) { CPLDebug("WARP", "GWKCheckAndComputeSrcOffsets(): " "NaN coordinate found on point %d.", _iDstX); bNanCoordFound = true; } return false; } /* -------------------------------------------------------------------- */ /* Figure out what pixel we want in our source raster, and skip */ /* further processing if it is well off the source image. */ /* -------------------------------------------------------------------- */ /* We test against the value before casting to avoid the */ /* problem of asymmetric truncation effects around zero. That is */ /* -0.5 will be 0 when cast to an int. */ if (_padfX[_iDstX] < _poWK->nSrcXOff) { // If the source coordinate is slightly outside of the source raster // retry to transform it alone, so that the exact coordinate // transformer is used. if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1) continue; return false; } if (_padfY[_iDstX] < _poWK->nSrcYOff) { // If the source coordinate is slightly outside of the source raster // retry to transform it alone, so that the exact coordinate // transformer is used. if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1) continue; return false; } // Check for potential overflow when casting from float to int, (if // operating outside natural projection area, padfX/Y can be a very huge // positive number before doing the actual conversion), as such cast is // undefined behavior that can trigger exception with some compilers // (see #6753) if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff) { // If the source coordinate is slightly outside of the source raster // retry to transform it alone, so that the exact coordinate // transformer is used. if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1) continue; return false; } if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff) { // If the source coordinate is slightly outside of the source raster // retry to transform it alone, so that the exact coordinate // transformer is used. if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1) continue; return false; } break; } int iSrcX = static_cast(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff; int iSrcY = static_cast(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff; if (iSrcX == _nSrcXSize) iSrcX--; if (iSrcY == _nSrcYSize) iSrcY--; // Those checks should normally be OK given the previous ones. CPLAssert(iSrcX >= 0); CPLAssert(iSrcY >= 0); CPLAssert(iSrcX < _nSrcXSize); CPLAssert(iSrcY < _nSrcYSize); iSrcOffset = iSrcX + static_cast(iSrcY) * _nSrcXSize; return true; } /************************************************************************/ /* GWKGeneralCase() */ /* */ /* This is the most general case. It attempts to handle all */ /* possible features with relatively little concern for */ /* efficiency. */ /************************************************************************/ static void GWKGeneralCaseThread(void *pData) { GWKJobStruct *psJob = reinterpret_cast(pData); GDALWarpKernel *poWK = psJob->poWK; const int iYMin = psJob->iYMin; const int iYMax = psJob->iYMax; const double dfMultFactorVerticalShiftPipeline = poWK->bApplyVerticalShift ? CPLAtof(CSLFetchNameValueDef( poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE", "1.0")) : 0.0; int nDstXSize = poWK->nDstXSize; int nSrcXSize = poWK->nSrcXSize; int nSrcYSize = poWK->nSrcYSize; /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... one */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ // For x, 2 *, because we cache the precomputed values at the end. double *padfX = static_cast(CPLMalloc(2 * sizeof(double) * nDstXSize)); double *padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const bool bUse4SamplesFormula = poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95; GWKResampleWrkStruct *psWrkStruct = nullptr; if (poWK->eResample != GRA_NearestNeighbour) { psWrkStruct = GWKResampleCreateWrkStruct(poWK); } const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef( poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); // Precompute values. for (int iDstX = 0; iDstX < nDstXSize; iDstX++) padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for (int iDstY = iYMin; iDstY < iYMax; iDstY++) { /* -------------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* -------------------------------------------------------------------- */ memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize); const double dfY = iDstY + 0.5 + poWK->nDstYOff; for (int iDstX = 0; iDstX < nDstXSize; iDstX++) padfY[iDstX] = dfY; memset(padfZ, 0, sizeof(double) * nDstXSize); /* -------------------------------------------------------------------- */ /* Transform the points from destination pixel/line coordinates */ /* to source pixel/line coordinates. */ /* -------------------------------------------------------------------- */ poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess); if (dfSrcCoordPrecision > 0.0) { GWKRoundSourceCoordinates( nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg, 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff); } /* ==================================================================== */ /* Loop over pixels in output scanline. */ /* ==================================================================== */ for (int iDstX = 0; iDstX < nDstXSize; iDstX++) { GPtrDiff_t iSrcOffset = 0; if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY, padfX, padfY, nSrcXSize, nSrcYSize, iSrcOffset)) continue; /* -------------------------------------------------------------------- */ /* Do not try to apply transparent/invalid source pixels to the */ /* destination. This currently ignores the multi-pixel input */ /* of bilinear and cubic resamples. */ /* -------------------------------------------------------------------- */ double dfDensity = 1.0; if (poWK->pafUnifiedSrcDensity != nullptr) { dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; if (dfDensity < SRC_DENSITY_THRESHOLD) continue; } if (poWK->panUnifiedSrcValid != nullptr && !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) continue; /* ==================================================================== */ /* Loop processing each band. */ /* ==================================================================== */ bool bHasFoundDensity = false; const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; for (int iBand = 0; iBand < poWK->nBands; iBand++) { double dfBandDensity = 0.0; double dfValueReal = 0.0; double dfValueImag = 0.0; /* -------------------------------------------------------------------- */ /* Collect the source value. */ /* -------------------------------------------------------------------- */ if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 || nSrcYSize == 1) { // FALSE is returned if dfBandDensity == 0, which is // checked below. CPL_IGNORE_RET_VAL(GWKGetPixelValue( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal, &dfValueImag)); } else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula) { GWKBilinearResample4Sample( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImag); } else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula) { GWKCubicResample4Sample( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImag); } else #ifdef DEBUG // Only useful for clang static analyzer. if (psWrkStruct != nullptr) #endif { psWrkStruct->pfnGWKResample( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImag, psWrkStruct); } // If we didn't find any valid inputs skip to next band. if (dfBandDensity < BAND_DENSITY_THRESHOLD) continue; if (poWK->bApplyVerticalShift) { if (!std::isfinite(padfZ[iDstX])) continue; // Subtract padfZ[] since the coordinate transformation is // from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShift - padfZ[iDstX] * dfMultFactorVerticalShiftPipeline; } bHasFoundDensity = true; /* -------------------------------------------------------------------- */ /* We have a computed value from the source. Now apply it * to */ /* the destination pixel. */ /* -------------------------------------------------------------------- */ GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity, dfValueReal, dfValueImag); } if (!bHasFoundDensity) continue; /* -------------------------------------------------------------------- */ /* Update destination density/validity masks. */ /* -------------------------------------------------------------------- */ GWKOverlayDensity(poWK, iDstOffset, dfDensity); if (poWK->panDstValid != nullptr) { CPLMaskSet(poWK->panDstValid, iDstOffset); } } /* Next iDstX */ /* -------------------------------------------------------------------- */ /* Report progress to the user, and optionally cancel out. */ /* -------------------------------------------------------------------- */ if (psJob->pfnProgress && psJob->pfnProgress(psJob)) break; } /* -------------------------------------------------------------------- */ /* Cleanup and return. */ /* -------------------------------------------------------------------- */ CPLFree(padfX); CPLFree(padfY); CPLFree(padfZ); CPLFree(pabSuccess); if (psWrkStruct) GWKResampleDeleteWrkStruct(psWrkStruct); } static CPLErr GWKGeneralCase(GDALWarpKernel *poWK) { return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread); } /************************************************************************/ /* GWKRealCase() */ /* */ /* General case for non-complex data types. */ /************************************************************************/ static void GWKRealCaseThread(void *pData) { GWKJobStruct *psJob = static_cast(pData); GDALWarpKernel *poWK = psJob->poWK; const int iYMin = psJob->iYMin; const int iYMax = psJob->iYMax; const int nDstXSize = poWK->nDstXSize; const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; const double dfMultFactorVerticalShiftPipeline = poWK->bApplyVerticalShift ? CPLAtof(CSLFetchNameValueDef( poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE", "1.0")) : 0.0; /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... one */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ // For x, 2 *, because we cache the precomputed values at the end. double *padfX = static_cast(CPLMalloc(2 * sizeof(double) * nDstXSize)); double *padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const bool bUse4SamplesFormula = poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95; GWKResampleWrkStruct *psWrkStruct = nullptr; if (poWK->eResample != GRA_NearestNeighbour) { psWrkStruct = GWKResampleCreateWrkStruct(poWK); } const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef( poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr && poWK->papanBandSrcValid == nullptr && poWK->pafUnifiedSrcDensity != nullptr; // Precompute values. for (int iDstX = 0; iDstX < nDstXSize; iDstX++) padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for (int iDstY = iYMin; iDstY < iYMax; iDstY++) { /* -------------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* -------------------------------------------------------------------- */ memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize); const double dfY = iDstY + 0.5 + poWK->nDstYOff; for (int iDstX = 0; iDstX < nDstXSize; iDstX++) padfY[iDstX] = dfY; memset(padfZ, 0, sizeof(double) * nDstXSize); /* -------------------------------------------------------------------- */ /* Transform the points from destination pixel/line coordinates */ /* to source pixel/line coordinates. */ /* -------------------------------------------------------------------- */ poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess); if (dfSrcCoordPrecision > 0.0) { GWKRoundSourceCoordinates( nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg, 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff); } /* ==================================================================== */ /* Loop over pixels in output scanline. */ /* ==================================================================== */ for (int iDstX = 0; iDstX < nDstXSize; iDstX++) { GPtrDiff_t iSrcOffset = 0; if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY, padfX, padfY, nSrcXSize, nSrcYSize, iSrcOffset)) continue; /* -------------------------------------------------------------------- */ /* Do not try to apply transparent/invalid source pixels to the */ /* destination. This currently ignores the multi-pixel input */ /* of bilinear and cubic resamples. */ /* -------------------------------------------------------------------- */ double dfDensity = 1.0; if (poWK->pafUnifiedSrcDensity != nullptr) { dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; if (dfDensity < SRC_DENSITY_THRESHOLD) continue; } if (poWK->panUnifiedSrcValid != nullptr && !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) continue; /* ==================================================================== */ /* Loop processing each band. */ /* ==================================================================== */ bool bHasFoundDensity = false; const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; for (int iBand = 0; iBand < poWK->nBands; iBand++) { double dfBandDensity = 0.0; double dfValueReal = 0.0; /* -------------------------------------------------------------------- */ /* Collect the source value. */ /* -------------------------------------------------------------------- */ if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 || nSrcYSize == 1) { // FALSE is returned if dfBandDensity == 0, which is // checked below. CPL_IGNORE_RET_VAL(GWKGetPixelValueReal( poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal)); } else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula) { double dfValueImagIgnored = 0.0; GWKBilinearResample4Sample( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImagIgnored); } else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula) { if (bSrcMaskIsDensity) { if (poWK->eWorkingDataType == GDT_Byte) { GWKCubicResampleSrcMaskIsDensity4SampleRealT( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity, &dfValueReal); } else if (poWK->eWorkingDataType == GDT_UInt16) { GWKCubicResampleSrcMaskIsDensity4SampleRealT< GUInt16>(poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity, &dfValueReal); } else { GWKCubicResampleSrcMaskIsDensity4SampleReal( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity, &dfValueReal); } } else { double dfValueImagIgnored = 0.0; GWKCubicResample4Sample( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImagIgnored); } } else #ifdef DEBUG // Only useful for clang static analyzer. if (psWrkStruct != nullptr) #endif { double dfValueImagIgnored = 0.0; psWrkStruct->pfnGWKResample( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity, &dfValueReal, &dfValueImagIgnored, psWrkStruct); } // If we didn't find any valid inputs skip to next band. if (dfBandDensity < BAND_DENSITY_THRESHOLD) continue; if (poWK->bApplyVerticalShift) { if (!std::isfinite(padfZ[iDstX])) continue; // Subtract padfZ[] since the coordinate transformation is // from target to source dfValueReal = dfValueReal * poWK->dfMultFactorVerticalShift - padfZ[iDstX] * dfMultFactorVerticalShiftPipeline; } bHasFoundDensity = true; /* -------------------------------------------------------------------- */ /* We have a computed value from the source. Now apply it * to */ /* the destination pixel. */ /* -------------------------------------------------------------------- */ GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity, dfValueReal); } if (!bHasFoundDensity) continue; /* -------------------------------------------------------------------- */ /* Update destination density/validity masks. */ /* -------------------------------------------------------------------- */ GWKOverlayDensity(poWK, iDstOffset, dfDensity); if (poWK->panDstValid != nullptr) { CPLMaskSet(poWK->panDstValid, iDstOffset); } } // Next iDstX. /* -------------------------------------------------------------------- */ /* Report progress to the user, and optionally cancel out. */ /* -------------------------------------------------------------------- */ if (psJob->pfnProgress && psJob->pfnProgress(psJob)) break; } /* -------------------------------------------------------------------- */ /* Cleanup and return. */ /* -------------------------------------------------------------------- */ CPLFree(padfX); CPLFree(padfY); CPLFree(padfZ); CPLFree(pabSuccess); if (psWrkStruct) GWKResampleDeleteWrkStruct(psWrkStruct); } static CPLErr GWKRealCase(GDALWarpKernel *poWK) { return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread); } /************************************************************************/ /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */ /************************************************************************/ template static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData) { GWKJobStruct *psJob = static_cast(pData); GDALWarpKernel *poWK = psJob->poWK; const int iYMin = psJob->iYMin; const int iYMax = psJob->iYMax; const double dfMultFactorVerticalShiftPipeline = poWK->bApplyVerticalShift ? CPLAtof(CSLFetchNameValueDef( poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE", "1.0")) : 0.0; const int nDstXSize = poWK->nDstXSize; const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... one */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ // For x, 2 *, because we cache the precomputed values at the end. double *padfX = static_cast(CPLMalloc(2 * sizeof(double) * nDstXSize)); double *padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const int nXRadius = poWK->nXRadius; double *padfWeight = static_cast(CPLCalloc(1 + nXRadius * 2, sizeof(double))); const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef( poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); // Precompute values. for (int iDstX = 0; iDstX < nDstXSize; iDstX++) padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for (int iDstY = iYMin; iDstY < iYMax; iDstY++) { /* -------------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* -------------------------------------------------------------------- */ memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize); const double dfY = iDstY + 0.5 + poWK->nDstYOff; for (int iDstX = 0; iDstX < nDstXSize; iDstX++) padfY[iDstX] = dfY; memset(padfZ, 0, sizeof(double) * nDstXSize); /* -------------------------------------------------------------------- */ /* Transform the points from destination pixel/line coordinates */ /* to source pixel/line coordinates. */ /* -------------------------------------------------------------------- */ poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess); if (dfSrcCoordPrecision > 0.0) { GWKRoundSourceCoordinates( nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg, 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff); } /* ==================================================================== */ /* Loop over pixels in output scanline. */ /* ==================================================================== */ for (int iDstX = 0; iDstX < nDstXSize; iDstX++) { GPtrDiff_t iSrcOffset = 0; if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY, padfX, padfY, nSrcXSize, nSrcYSize, iSrcOffset)) continue; /* ==================================================================== */ /* Loop processing each band. */ /* ==================================================================== */ const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; for (int iBand = 0; iBand < poWK->nBands; iBand++) { T value = 0; if constexpr (eResample == GRA_NearestNeighbour) { value = reinterpret_cast( poWK->papabySrcImage[iBand])[iSrcOffset]; } else if constexpr (bUse4SamplesFormula) { if constexpr (eResample == GRA_Bilinear) GWKBilinearResampleNoMasks4SampleT( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &value); else GWKCubicResampleNoMasks4SampleT( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &value); } else { GWKResampleNoMasksT( poWK, iBand, padfX[iDstX] - poWK->nSrcXOff, padfY[iDstX] - poWK->nSrcYOff, &value, padfWeight); } if (poWK->bApplyVerticalShift) { if (!std::isfinite(padfZ[iDstX])) continue; // Subtract padfZ[] since the coordinate transformation is // from target to source value = GWKClampValueT( value * poWK->dfMultFactorVerticalShift - padfZ[iDstX] * dfMultFactorVerticalShiftPipeline); } if (poWK->pafDstDensity) poWK->pafDstDensity[iDstOffset] = 1.0f; reinterpret_cast(poWK->papabyDstImage[iBand])[iDstOffset] = value; } } /* -------------------------------------------------------------------- */ /* Report progress to the user, and optionally cancel out. */ /* -------------------------------------------------------------------- */ if (psJob->pfnProgress && psJob->pfnProgress(psJob)) break; } /* -------------------------------------------------------------------- */ /* Cleanup and return. */ /* -------------------------------------------------------------------- */ CPLFree(padfX); CPLFree(padfY); CPLFree(padfZ); CPLFree(pabSuccess); CPLFree(padfWeight); } template static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData) { GWKResampleNoMasksOrDstDensityOnlyThreadInternal( pData); } template static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData) { GWKJobStruct *psJob = static_cast(pData); GDALWarpKernel *poWK = psJob->poWK; static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic); const bool bUse4SamplesFormula = poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95; if (bUse4SamplesFormula) GWKResampleNoMasksOrDstDensityOnlyThreadInternal( pData); else GWKResampleNoMasksOrDstDensityOnlyThreadInternal( pData); } static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKNearestNoMasksOrDstDensityOnlyByte", GWKResampleNoMasksOrDstDensityOnlyThread); } static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKCubicNoMasksOrDstDensityOnlyByte", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } #endif static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte", GWKResampleNoMasksOrDstDensityOnlyThread); } /************************************************************************/ /* GWKNearestByte() */ /* */ /* Case for 8bit input data with nearest neighbour resampling */ /* using valid flags. Should be as fast as possible for this */ /* particular transformation type. */ /************************************************************************/ template static void GWKNearestThread(void *pData) { GWKJobStruct *psJob = static_cast(pData); GDALWarpKernel *poWK = psJob->poWK; const int iYMin = psJob->iYMin; const int iYMax = psJob->iYMax; const double dfMultFactorVerticalShiftPipeline = poWK->bApplyVerticalShift ? CPLAtof(CSLFetchNameValueDef( poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE", "1.0")) : 0.0; const int nDstXSize = poWK->nDstXSize; const int nSrcXSize = poWK->nSrcXSize; const int nSrcYSize = poWK->nSrcYSize; /* -------------------------------------------------------------------- */ /* Allocate x,y,z coordinate arrays for transformation ... one */ /* scanlines worth of positions. */ /* -------------------------------------------------------------------- */ // For x, 2 *, because we cache the precomputed values at the end. double *padfX = static_cast(CPLMalloc(2 * sizeof(double) * nDstXSize)); double *padfY = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); double *padfZ = static_cast(CPLMalloc(sizeof(double) * nDstXSize)); int *pabSuccess = static_cast(CPLMalloc(sizeof(int) * nDstXSize)); const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef( poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0")); const double dfErrorThreshold = CPLAtof( CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0")); // Precompute values. for (int iDstX = 0; iDstX < nDstXSize; iDstX++) padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff; /* ==================================================================== */ /* Loop over output lines. */ /* ==================================================================== */ for (int iDstY = iYMin; iDstY < iYMax; iDstY++) { /* -------------------------------------------------------------------- */ /* Setup points to transform to source image space. */ /* -------------------------------------------------------------------- */ memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize); const double dfY = iDstY + 0.5 + poWK->nDstYOff; for (int iDstX = 0; iDstX < nDstXSize; iDstX++) padfY[iDstX] = dfY; memset(padfZ, 0, sizeof(double) * nDstXSize); /* -------------------------------------------------------------------- */ /* Transform the points from destination pixel/line coordinates */ /* to source pixel/line coordinates. */ /* -------------------------------------------------------------------- */ poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY, padfZ, pabSuccess); if (dfSrcCoordPrecision > 0.0) { GWKRoundSourceCoordinates( nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg, 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff); } /* ==================================================================== */ /* Loop over pixels in output scanline. */ /* ==================================================================== */ for (int iDstX = 0; iDstX < nDstXSize; iDstX++) { GPtrDiff_t iSrcOffset = 0; if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY, padfX, padfY, nSrcXSize, nSrcYSize, iSrcOffset)) continue; /* -------------------------------------------------------------------- */ /* Do not try to apply invalid source pixels to the dest. */ /* -------------------------------------------------------------------- */ if (poWK->panUnifiedSrcValid != nullptr && !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) continue; /* -------------------------------------------------------------------- */ /* Do not try to apply transparent source pixels to the * destination.*/ /* -------------------------------------------------------------------- */ double dfDensity = 1.0; if (poWK->pafUnifiedSrcDensity != nullptr) { dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset]; if (dfDensity < SRC_DENSITY_THRESHOLD) continue; } /* ==================================================================== */ /* Loop processing each band. */ /* ==================================================================== */ const GPtrDiff_t iDstOffset = iDstX + static_cast(iDstY) * nDstXSize; for (int iBand = 0; iBand < poWK->nBands; iBand++) { T value = 0; double dfBandDensity = 0.0; /* -------------------------------------------------------------------- */ /* Collect the source value. */ /* -------------------------------------------------------------------- */ if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity, &value)) { if (poWK->bApplyVerticalShift) { if (!std::isfinite(padfZ[iDstX])) continue; // Subtract padfZ[] since the coordinate transformation // is from target to source value = GWKClampValueT( value * poWK->dfMultFactorVerticalShift - padfZ[iDstX] * dfMultFactorVerticalShiftPipeline); } if (dfBandDensity < 1.0) { if (dfBandDensity == 0.0) { // Do nothing. } else { // Let the general code take care of mixing. GWKSetPixelValueRealT(poWK, iBand, iDstOffset, dfBandDensity, value); } } else { reinterpret_cast( poWK->papabyDstImage[iBand])[iDstOffset] = value; } } } /* -------------------------------------------------------------------- */ /* Mark this pixel valid/opaque in the output. */ /* -------------------------------------------------------------------- */ GWKOverlayDensity(poWK, iDstOffset, dfDensity); if (poWK->panDstValid != nullptr) { CPLMaskSet(poWK->panDstValid, iDstOffset); } } /* Next iDstX */ /* -------------------------------------------------------------------- */ /* Report progress to the user, and optionally cancel out. */ /* -------------------------------------------------------------------- */ if (psJob->pfnProgress && psJob->pfnProgress(psJob)) break; } /* -------------------------------------------------------------------- */ /* Cleanup and return. */ /* -------------------------------------------------------------------- */ CPLFree(padfX); CPLFree(padfY); CPLFree(padfZ); CPLFree(pabSuccess); } static CPLErr GWKNearestByte(GDALWarpKernel *poWK) { return GWKRun(poWK, "GWKNearestByte", GWKNearestThread); } static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKNearestNoMasksOrDstDensityOnlyShort", GWKResampleNoMasksOrDstDensityOnlyThread); } static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } #endif static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKCubicNoMasksOrDstDensityOnlyShort", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort", GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread); } static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort", GWKResampleNoMasksOrDstDensityOnlyThread); } static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort", GWKResampleNoMasksOrDstDensityOnlyThread); } static CPLErr GWKNearestShort(GDALWarpKernel *poWK) { return GWKRun(poWK, "GWKNearestShort", GWKNearestThread); } static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK) { return GWKRun( poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat", GWKResampleNoMasksOrDstDensityOnlyThread); } static CPLErr GWKNearestFloat(GDALWarpKernel *poWK) { return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread