Files
clang-p2996/mlir/lib/Dialect/Quant/Utils/FakeQuantSupport.cpp
River Riddle 06e25d5645 [mlir][IR] Refactor the getChecked and verifyConstructionInvariants methods on Attributes/Types
`verifyConstructionInvariants` is intended to allow for verifying the invariants of an attribute/type on construction, and `getChecked` is intended to enable more graceful error handling aside from an assert. There are a few problems with the current implementation of these methods:
* `verifyConstructionInvariants` requires an mlir::Location for emitting errors, which is prohibitively costly in the situations that would most likely use them, e.g. the parser.
This creates an unfortunate code duplication between the verifier code and the parser code, given that the parser operates on llvm::SMLoc and it is an undesirable overhead to pre-emptively convert from that to an mlir::Location.
* `getChecked` effectively requires duplicating the definition of the `get` method, creating a quite clunky workflow due to the subtle different in its signature.

This revision aims to talk the above problems by refactoring the implementation to use a callback for error emission. Using a callback allows for deferring the costly part of error emission until it is actually necessary.

Due to the necessary signature change in each instance of these methods, this revision also takes this opportunity to cleanup the definition of these methods by:
* restructuring the signature of `getChecked` such that it can be generated from the same code block as the `get` method.
* renaming `verifyConstructionInvariants` to `verify` to match the naming scheme of the rest of the compiler.

Differential Revision: https://reviews.llvm.org/D97100
2021-02-22 17:37:49 -08:00

185 lines
6.7 KiB
C++

//===- FakeQuantSupport.cpp - Support utilities for FakeQuant ops ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Quant/FakeQuantSupport.h"
#include "mlir/Dialect/Quant/QuantTypes.h"
using namespace mlir;
using namespace mlir::quant;
static bool getDefaultStorageParams(unsigned numBits, bool narrowRange,
bool isSigned, MLIRContext *ctx,
Type &storageType, int64_t &qmin,
int64_t &qmax) {
// Hard-coded type mapping from TFLite.
if (numBits <= 8) {
storageType = IntegerType::get(ctx, 8);
if (isSigned) {
qmin = -128;
qmax = 127;
} else {
qmin = 0;
qmax = 255;
}
} else if (numBits <= 16) {
storageType = IntegerType::get(ctx, 16);
if (isSigned) {
qmin = -32768;
qmax = 32767;
} else {
qmin = 0;
qmax = 65535;
}
} else if (numBits <= 32) {
storageType = IntegerType::get(ctx, 32);
if (isSigned) {
qmin = std::numeric_limits<int32_t>::min();
qmax = std::numeric_limits<int32_t>::max();
} else {
qmin = std::numeric_limits<uint32_t>::min();
qmax = std::numeric_limits<uint32_t>::max();
}
} else {
return true;
}
// Handle narrowRange.
if (narrowRange) {
qmin += 1;
}
return false;
}
// This is a specific implementation of nudging:
// If 0.0 < rmin < rmax or rmin < rmax < 0.0, the range will be shifted
// to include 0.0, but the range width size (rmax-rmin) isn't changed. The zero
// point is derived from the shifted range, and the scale isn't changed. As
// a consequence some values, which are supposed in the original [rmin, rmax]
// range will be outside the shifted range and be clamped during quantization.
// TODO: we should nudge the scale as well, but that requires the
// fake quant op used in the training to use the nudged scale as well.
static void getNudgedScaleAndZeroPoint(int64_t qmin, int64_t qmax, double rmin,
double rmax, double &scale,
int64_t &nudgedZeroPoint) {
// Determine the scale.
const double qminDouble = qmin;
const double qmaxDouble = qmax;
scale = (rmax - rmin) / (qmaxDouble - qminDouble);
// Zero point computation.
// In float, solve the affine equation for any known pair
// (real value, corresponding quantized value), of which, two such pairs
// are known: (rmin, qmin), (rmax, qmax).
// The arithmetic error on the zero point computed from either pair will be
// roughly machine_epsilon * (sum of absolute values of terms).
// Use the variant that adds the smaller error.
const double zeroPointFromMin = qminDouble - rmin / scale;
const double zeroPointFromMinError =
std::abs(qminDouble) + std::abs(rmin / scale);
const double zeroPointFromMax = qmaxDouble - rmax / scale;
const double zeroPointFromMaxError =
std::abs(qmaxDouble) + std::abs(rmax / scale);
const double zeroPointDouble = (zeroPointFromMinError < zeroPointFromMaxError)
? zeroPointFromMin
: zeroPointFromMax;
// Now nudge the zero point to be an integer.
nudgedZeroPoint = 0;
if (zeroPointDouble < qminDouble) {
nudgedZeroPoint = qmin;
} else if (zeroPointDouble > qmaxDouble) {
nudgedZeroPoint = qmax;
} else {
nudgedZeroPoint = round(zeroPointDouble);
}
// By construction, the nudged zero point should always be in range.
assert(nudgedZeroPoint >= qmin);
assert(nudgedZeroPoint <= qmax);
}
UniformQuantizedType
mlir::quant::fakeQuantAttrsToType(Location loc, unsigned numBits, double rmin,
double rmax, bool narrowRange,
Type expressedType, bool isSigned) {
MLIRContext *ctx = expressedType.getContext();
unsigned flags = isSigned ? QuantizationFlags::Signed : 0;
Type storageType;
int64_t qmin;
int64_t qmax;
if (getDefaultStorageParams(numBits, narrowRange, isSigned, ctx, storageType,
qmin, qmax)) {
return (emitError(loc, "unsupported FakeQuant number of bits: ") << numBits,
nullptr);
}
// Special case where min/max is close enough. The tensor contents are all
// 0.0s, so the scale is set to 1.0 and the tensor can be quantized to zero
// points and dequantized to 0.0.
if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
return UniformQuantizedType::getChecked(
loc, flags, storageType, expressedType, 1.0, qmin, qmin, qmax);
}
double scale;
int64_t nudgedZeroPoint;
getNudgedScaleAndZeroPoint(qmin, qmax, rmin, rmax, scale, nudgedZeroPoint);
return UniformQuantizedType::getChecked(loc, flags, storageType,
expressedType, scale, nudgedZeroPoint,
qmin, qmax);
}
UniformQuantizedPerAxisType mlir::quant::fakeQuantAttrsToType(
Location loc, unsigned numBits, int32_t quantizedDimension,
ArrayRef<double> rmins, ArrayRef<double> rmaxs, bool narrowRange,
Type expressedType, bool isSigned) {
size_t axis_size = rmins.size();
if (axis_size != rmaxs.size()) {
return (emitError(loc, "mismatched per-axis min and max size: ")
<< axis_size << " vs. " << rmaxs.size(),
nullptr);
}
MLIRContext *ctx = expressedType.getContext();
Type storageType;
int64_t qmin;
int64_t qmax;
if (getDefaultStorageParams(numBits, narrowRange, isSigned, ctx, storageType,
qmin, qmax)) {
return (emitError(loc, "unsupported FakeQuant number of bits: ") << numBits,
nullptr);
}
SmallVector<double, 4> scales;
SmallVector<int64_t, 4> zeroPoints;
scales.reserve(axis_size);
zeroPoints.reserve(axis_size);
for (size_t axis = 0; axis != axis_size; ++axis) {
double rmin = rmins[axis];
double rmax = rmaxs[axis];
if (std::fabs(rmax - rmin) < std::numeric_limits<double>::epsilon()) {
scales.push_back(1.0);
zeroPoints.push_back(qmin);
continue;
}
double scale;
int64_t nudgedZeroPoint;
getNudgedScaleAndZeroPoint(qmin, qmax, rmin, rmax, scale, nudgedZeroPoint);
scales.push_back(scale);
zeroPoints.push_back(nudgedZeroPoint);
}
unsigned flags = isSigned ? QuantizationFlags::Signed : 0;
return UniformQuantizedPerAxisType::getChecked(
loc, flags, storageType, expressedType, scales, zeroPoints,
quantizedDimension, qmin, qmax);
}