mirror of
https://github.com/deepinsight/insightface.git
synced 2026-05-15 12:52:47 +00:00
Merge branch 'master' of https://github.com/deepinsight/insightface
This commit is contained in:
@@ -491,6 +491,12 @@ class FaceImageIter2(io.DataIter):
|
||||
else:
|
||||
label, fname, bbox, landmark = self.imglist[idx]
|
||||
return label, self.read_image(fname), bbox, landmark
|
||||
else:
|
||||
s = self.imgrec.read()
|
||||
if s is None:
|
||||
raise StopIteration
|
||||
header, img = recordio.unpack(s)
|
||||
return header.label, img, None, None
|
||||
|
||||
def brightness_aug(self, src, x):
|
||||
alpha = 1.0 + random.uniform(-x, x)
|
||||
|
||||
379
src/operator/lsoftmax-inl.h
Normal file
379
src/operator/lsoftmax-inl.h
Normal file
@@ -0,0 +1,379 @@
|
||||
/*!
|
||||
* Copyright (c) 2016 by Contributors
|
||||
* \file lsoftmax-inl.h
|
||||
* \brief LSoftmax from <Large-Margin Softmax Loss for Convolutional Neural Networks>
|
||||
* \author luoyetx
|
||||
*/
|
||||
#ifndef MXNET_OPERATOR_LSOFTMAX_INL_H_
|
||||
#define MXNET_OPERATOR_LSOFTMAX_INL_H_
|
||||
|
||||
#include <dmlc/logging.h>
|
||||
#include <dmlc/parameter.h>
|
||||
#include <mxnet/operator.h>
|
||||
#include <cmath>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "./operator_common.h"
|
||||
|
||||
namespace mxnet {
|
||||
namespace op {
|
||||
|
||||
namespace lsoftmax_enum {
|
||||
enum LSoftmaxOpInputs {kData, kWeight, kLabel};
|
||||
enum LSoftmaxOpOutputs {kOut, kDataNorm, kWeightNorm};
|
||||
enum LSoftmaxResource {kTempSpace};
|
||||
}
|
||||
|
||||
struct LSoftmaxParam : public dmlc::Parameter<LSoftmaxParam> {
|
||||
int margin;
|
||||
float beta;
|
||||
float beta_min;
|
||||
float scale;
|
||||
int num_hidden;
|
||||
bool grad_norm;
|
||||
int verbose;
|
||||
float eps;
|
||||
DMLC_DECLARE_PARAMETER(LSoftmaxParam) {
|
||||
DMLC_DECLARE_FIELD(margin).set_default(2).set_lower_bound(1)
|
||||
.describe("LSoftmax margin");
|
||||
DMLC_DECLARE_FIELD(beta).set_default(1).set_lower_bound(0)
|
||||
.describe("LSoftmax beta, same as lambda to weight original value");
|
||||
DMLC_DECLARE_FIELD(beta_min).set_default(0).set_lower_bound(0)
|
||||
.describe("Minimum beta");
|
||||
DMLC_DECLARE_FIELD(scale).set_default(1).set_range(0, 1)
|
||||
.describe("Scale of beta during training for every iteration");
|
||||
DMLC_DECLARE_FIELD(num_hidden).set_lower_bound(1)
|
||||
.describe("Number of hidden nodes of the output");
|
||||
DMLC_DECLARE_FIELD(grad_norm).set_default(false)
|
||||
.describe("do grad norm");
|
||||
DMLC_DECLARE_FIELD(verbose).set_default(0)
|
||||
.describe("Log for beta change");
|
||||
DMLC_DECLARE_FIELD(eps).set_default(1e-10f)
|
||||
.describe("l2 eps");
|
||||
}
|
||||
};
|
||||
|
||||
template<typename xpu, typename DType>
|
||||
class LSoftmaxOp : public Operator {
|
||||
public:
|
||||
explicit LSoftmaxOp(LSoftmaxParam param) {
|
||||
this->param_ = param;
|
||||
// setup global lookup table
|
||||
k_table_.clear();
|
||||
c_table_.clear();
|
||||
k_table_.push_back(1);
|
||||
c_table_.push_back(1);
|
||||
const int margin = param.margin;
|
||||
const double pi = std::atan(1) * 4;
|
||||
double factor = 1;
|
||||
for (int i = 1; i <= margin; ++i) {
|
||||
factor = factor * (margin - i + 1) / i;
|
||||
k_table_.push_back(std::cos(i * pi / margin));
|
||||
c_table_.push_back(factor);
|
||||
}
|
||||
//next_beta_ = param.beta * 0.1f;
|
||||
count_ = 0;
|
||||
if(const char* env_p = std::getenv("BETA")) {
|
||||
float _beta = std::atof(env_p);
|
||||
if (param_.verbose) {
|
||||
LOG(INFO)<<"beta:"<<_beta;
|
||||
}
|
||||
param_.beta = _beta;
|
||||
}
|
||||
else if(const char* env_p = std::getenv("GLOBAL_STEP")) {
|
||||
int nbatch = std::atoi(env_p);
|
||||
if (param_.verbose) {
|
||||
LOG(INFO)<<"nbatch:"<<nbatch;
|
||||
}
|
||||
float _beta = param.beta*std::pow((double)param.scale, (double)nbatch);
|
||||
param_.beta = std::max(_beta, param_.beta_min);
|
||||
}
|
||||
if (param_.verbose) {
|
||||
LOG(INFO)<<param_.margin<<","<<param_.beta<<","<<param_.beta_min<<","<<param_.scale;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void Forward(const OpContext &ctx,
|
||||
const std::vector<TBlob> &in_data,
|
||||
const std::vector<OpReqType> &req,
|
||||
const std::vector<TBlob> &out_data,
|
||||
const std::vector<TBlob> &aux_args) {
|
||||
using namespace mshadow;
|
||||
using namespace mshadow::expr;
|
||||
CHECK_EQ(in_data.size(), 3);
|
||||
CHECK_EQ(out_data.size(), 3);
|
||||
CHECK_EQ(req.size(), 3);
|
||||
CHECK_EQ(req[lsoftmax_enum::kOut], kWriteTo);
|
||||
CHECK(req[lsoftmax_enum::kDataNorm] == kNullOp ||
|
||||
req[lsoftmax_enum::kDataNorm] == kWriteTo);
|
||||
CHECK(req[lsoftmax_enum::kWeightNorm] == kNullOp ||
|
||||
req[lsoftmax_enum::kWeightNorm] == kWriteTo);
|
||||
Stream<xpu> *s = ctx.get_stream<xpu>();
|
||||
const int n = in_data[lsoftmax_enum::kData].size(0);
|
||||
const int m = in_data[lsoftmax_enum::kWeight].size(0);
|
||||
Tensor<xpu, 2, DType> x = in_data[lsoftmax_enum::kData].FlatTo2D<xpu, DType>(s);
|
||||
Tensor<xpu, 2, DType> w = in_data[lsoftmax_enum::kWeight].FlatTo2D<xpu, DType>(s);
|
||||
Tensor<xpu, 1, DType> label = in_data[lsoftmax_enum::kLabel].get_with_shape<xpu, 1, DType>(Shape1(n), s);
|
||||
Tensor<xpu, 2, DType> out = out_data[lsoftmax_enum::kOut].FlatTo2D<xpu, DType>(s);
|
||||
Tensor<xpu, 1, DType> x_norm = out_data[lsoftmax_enum::kDataNorm].get_with_shape<xpu, 1, DType>(Shape1(n), s);
|
||||
Tensor<xpu, 1, DType> w_norm = out_data[lsoftmax_enum::kWeightNorm].get_with_shape<xpu, 1, DType>(Shape1(m), s);
|
||||
#if defined(__CUDACC__)
|
||||
CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
|
||||
<< "Must init CuBLAS handle in stream";
|
||||
#endif
|
||||
// original fully connected
|
||||
out = dot(x, w.T());
|
||||
if (ctx.is_train) {
|
||||
// large margin fully connected
|
||||
const int margin = param_.margin;
|
||||
if(const char* env_p = std::getenv("BETA")) {
|
||||
float _beta = std::atof(env_p);
|
||||
param_.beta = _beta;
|
||||
}
|
||||
const DType beta = static_cast<DType>(param_.beta);
|
||||
//LOG(INFO)<<"beta:"<<beta<<std::endl;
|
||||
Tensor<cpu, 1, DType> k_table_cpu(k_table_.data(), Shape1(k_table_.size()));
|
||||
Tensor<cpu, 1, DType> c_table_cpu(c_table_.data(), Shape1(c_table_.size()));
|
||||
Tensor<xpu, 1, DType> k_table_xpu(Shape1(k_table_.size()));
|
||||
Tensor<xpu, 1, DType> c_table_xpu(Shape1(c_table_.size()));
|
||||
k_table_xpu.set_stream(s);
|
||||
c_table_xpu.set_stream(s);
|
||||
AllocSpace(&k_table_xpu);
|
||||
AllocSpace(&c_table_xpu);
|
||||
Copy(k_table_xpu, k_table_cpu, s);
|
||||
Copy(c_table_xpu, c_table_cpu, s);
|
||||
LSoftmaxForward(x, w, label, out, x_norm, w_norm, k_table_xpu, c_table_xpu, margin, beta);
|
||||
FreeSpace(&k_table_xpu);
|
||||
FreeSpace(&c_table_xpu);
|
||||
}
|
||||
}
|
||||
|
||||
//virtual void GradNorm(mshadow::Tensor<xpu, 2, DType> grad, mshadow::Stream<xpu>* s) {
|
||||
// using namespace mshadow;
|
||||
// using namespace mshadow::expr;
|
||||
// Tensor<cpu, 2, DType> grad_cpu(grad.shape_);
|
||||
// AllocSpace(&grad_cpu);
|
||||
// Copy(grad_cpu, grad, s);
|
||||
// DType grad_norm = param_.eps;
|
||||
// for(uint32_t i=0;i<grad_cpu.shape_[0];i++) {
|
||||
// for(uint32_t j=0;j<grad_cpu.shape_[1];j++) {
|
||||
// grad_norm += grad_cpu[i][j]*grad_cpu[i][j];
|
||||
// }
|
||||
// }
|
||||
// grad_norm = sqrt(grad_norm);
|
||||
// grad_cpu /= grad_norm;
|
||||
// Copy(grad, grad_cpu, s);
|
||||
// FreeSpace(&grad_cpu);
|
||||
//}
|
||||
|
||||
virtual DType GradNorm(mshadow::Tensor<xpu, 2, DType> grad, mshadow::Stream<xpu>* s) {
|
||||
using namespace mshadow;
|
||||
using namespace mshadow::expr;
|
||||
Tensor<cpu, 2, DType> grad_cpu(grad.shape_);
|
||||
AllocSpace(&grad_cpu);
|
||||
Copy(grad_cpu, grad, s);
|
||||
DType grad_norm = param_.eps;
|
||||
for(uint32_t i=0;i<grad_cpu.shape_[0];i++) {
|
||||
for(uint32_t j=0;j<grad_cpu.shape_[1];j++) {
|
||||
grad_norm += grad_cpu[i][j]*grad_cpu[i][j];
|
||||
}
|
||||
}
|
||||
grad_norm = sqrt(grad_norm);
|
||||
//grad_cpu /= grad_norm;
|
||||
//Copy(grad, grad_cpu, s);
|
||||
FreeSpace(&grad_cpu);
|
||||
return grad_norm;
|
||||
}
|
||||
|
||||
virtual void Backward(const OpContext &ctx,
|
||||
const std::vector<TBlob> &out_grad,
|
||||
const std::vector<TBlob> &in_data,
|
||||
const std::vector<TBlob> &out_data,
|
||||
const std::vector<OpReqType> &req,
|
||||
const std::vector<TBlob> &in_grad,
|
||||
const std::vector<TBlob> &aux_args) {
|
||||
using namespace mshadow;
|
||||
using namespace mshadow::expr;
|
||||
CHECK_EQ(out_grad.size(), 1);
|
||||
CHECK_EQ(in_data.size(), 3);
|
||||
CHECK_EQ(out_data.size(), 3);
|
||||
CHECK_GE(in_grad.size(), 2);
|
||||
CHECK_GE(req.size(), 2);
|
||||
CHECK_EQ(req[lsoftmax_enum::kData], kWriteTo);
|
||||
CHECK_EQ(req[lsoftmax_enum::kWeight], kWriteTo);
|
||||
Stream<xpu> *s = ctx.get_stream<xpu>();
|
||||
const int n = in_data[lsoftmax_enum::kData].size(0);
|
||||
const int m = in_data[lsoftmax_enum::kWeight].size(0);
|
||||
Tensor<xpu, 2, DType> x = in_data[lsoftmax_enum::kData].FlatTo2D<xpu, DType>(s);
|
||||
Tensor<xpu, 2, DType> w = in_data[lsoftmax_enum::kWeight].FlatTo2D<xpu, DType>(s);
|
||||
Tensor<xpu, 1, DType> label = in_data[lsoftmax_enum::kLabel].get_with_shape<xpu, 1, DType>(Shape1(n), s);
|
||||
Tensor<xpu, 1, DType> x_norm = out_data[lsoftmax_enum::kDataNorm].get_with_shape<xpu, 1, DType>(Shape1(n), s);
|
||||
Tensor<xpu, 1, DType> w_norm = out_data[lsoftmax_enum::kWeightNorm].get_with_shape<xpu, 1, DType>(Shape1(m), s);
|
||||
Tensor<xpu, 2, DType> o_grad = out_grad[lsoftmax_enum::kOut].FlatTo2D<xpu, DType>(s);
|
||||
Tensor<xpu, 2, DType> x_grad = in_grad[lsoftmax_enum::kData].FlatTo2D<xpu, DType>(s);
|
||||
Tensor<xpu, 2, DType> w_grad = in_grad[lsoftmax_enum::kWeight].FlatTo2D<xpu, DType>(s);
|
||||
// workspace is used for cos_t, cos_mt, k, sin2_t, fo and cos_t_m for every data point
|
||||
Tensor<xpu, 2, DType> workspace = ctx.requested[lsoftmax_enum::kTempSpace].get_space_typed<xpu, 2, DType>(Shape2(6, n), s);
|
||||
#if defined(__CUDACC__)
|
||||
CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
|
||||
<< "Must init CuBLAS handle in stream";
|
||||
#endif
|
||||
// original fully connected
|
||||
x_grad = dot(o_grad, w);
|
||||
w_grad = dot(o_grad.T(), x);
|
||||
// large margin fully connected
|
||||
const int margin = param_.margin;
|
||||
const DType beta = static_cast<DType>(param_.beta);
|
||||
count_+=1;
|
||||
if (param_.verbose) {
|
||||
if(count_%param_.verbose==0) {
|
||||
LOG(INFO)<<"["<<count_<<"]current beta:"<<beta;
|
||||
DType n = GradNorm(x_grad, s);
|
||||
LOG(INFO)<<"x_grad norm:"<<n;
|
||||
n = GradNorm(w_grad, s);
|
||||
LOG(INFO)<<"w_grad norm:"<<n;
|
||||
}
|
||||
}
|
||||
Tensor<cpu, 1, DType> k_table_cpu(k_table_.data(), Shape1(k_table_.size()));
|
||||
Tensor<cpu, 1, DType> c_table_cpu(c_table_.data(), Shape1(c_table_.size()));
|
||||
Tensor<xpu, 1, DType> k_table_xpu(Shape1(k_table_.size()));
|
||||
Tensor<xpu, 1, DType> c_table_xpu(Shape1(c_table_.size()));
|
||||
k_table_xpu.set_stream(s);
|
||||
c_table_xpu.set_stream(s);
|
||||
AllocSpace(&k_table_xpu);
|
||||
AllocSpace(&c_table_xpu);
|
||||
Copy(k_table_xpu, k_table_cpu, s);
|
||||
Copy(c_table_xpu, c_table_cpu, s);
|
||||
LSoftmaxBackward(x, w, label, x_norm, w_norm, o_grad, x_grad, w_grad, workspace,
|
||||
k_table_xpu, c_table_xpu, margin, beta);
|
||||
FreeSpace(&k_table_xpu);
|
||||
FreeSpace(&c_table_xpu);
|
||||
//if(param_.grad_norm) {
|
||||
// GradNorm(x_grad, s);
|
||||
// GradNorm(w_grad, s);
|
||||
//}
|
||||
// dirty hack, should also work for multi device
|
||||
if(std::getenv("BETA")==NULL) {
|
||||
param_.beta *= param_.scale;
|
||||
param_.beta = std::max(param_.beta, param_.beta_min);
|
||||
}
|
||||
//LOG(INFO)<<"w_grad:"<<w_grad.shape_[0]<<","<<w_grad.shape_[1];
|
||||
//norm = reduce_with_axis<red::sum, false>(F<mxnet::op::mshadow_op::square>(w_grad), 2);
|
||||
//norm = F<mxnet::op::mshadow_op::square_root>(norm + param_.eps);
|
||||
//out = data / broadcast_with_axis(norm, 1, dshape[2]);
|
||||
//if (param_.beta < next_beta_) {
|
||||
// next_beta_ *= 0.1f;
|
||||
// if (param_.verbose) {
|
||||
// LOG(INFO) << "LSoftmax changes beta to " << param_.beta;
|
||||
// }
|
||||
//}
|
||||
}
|
||||
|
||||
//Tensor<xpu, 2, DType> grad_norm(const Tensor<xpu, 2, DType> grad) {
|
||||
//}
|
||||
|
||||
|
||||
|
||||
|
||||
private:
|
||||
LSoftmaxParam param_;
|
||||
// global lookup table
|
||||
std::vector<DType> k_table_;
|
||||
std::vector<DType> c_table_;
|
||||
//float next_beta_;
|
||||
uint32_t count_;
|
||||
}; // class LSoftmaxOp
|
||||
|
||||
template<typename xpu>
|
||||
Operator *CreateOp(LSoftmaxParam param, int dtype);
|
||||
|
||||
#if DMLC_USE_CXX11
|
||||
class LSoftmaxProp : public OperatorProperty {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> > &kwargs) override {
|
||||
param_.Init(kwargs);
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> GetParams() const override {
|
||||
return param_.__DICT__();
|
||||
}
|
||||
|
||||
std::vector<std::string> ListArguments() const override {
|
||||
return {"data", "weight", "label"};
|
||||
}
|
||||
|
||||
std::vector<std::string> ListOutputs() const override {
|
||||
return {"output", "data_norm", "weight_norm"};
|
||||
}
|
||||
|
||||
int NumOutputs() const override {
|
||||
return 3;
|
||||
}
|
||||
|
||||
int NumVisibleOutputs() const override {
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool InferShape(std::vector<TShape> *in_shape,
|
||||
std::vector<TShape> *out_shape,
|
||||
std::vector<TShape> *aux_shape) const override {
|
||||
using namespace mshadow;
|
||||
CHECK_EQ(in_shape->size(), 3) << "Input:[data, label, weight]";
|
||||
const TShape &dshape = in_shape->at(lsoftmax_enum::kData);
|
||||
const TShape &lshape = in_shape->at(lsoftmax_enum::kLabel);
|
||||
CHECK_EQ(dshape.ndim(), 2) << "data shape should be (batch_size, feature_dim)";
|
||||
CHECK_EQ(lshape.ndim(), 1) << "label shape should be (batch_size,)";
|
||||
const int n = dshape[0];
|
||||
const int feature_dim = dshape[1];
|
||||
const int m = param_.num_hidden;
|
||||
SHAPE_ASSIGN_CHECK(*in_shape, lsoftmax_enum::kWeight, Shape2(m, feature_dim));
|
||||
out_shape->clear();
|
||||
out_shape->push_back(Shape2(n, m)); // output
|
||||
out_shape->push_back(Shape1(n)); // data norm
|
||||
out_shape->push_back(Shape1(m)); // weight norm
|
||||
aux_shape->clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<ResourceRequest> BackwardResource(
|
||||
const std::vector<TShape> &in_shape) const override {
|
||||
return {ResourceRequest::kTempSpace};
|
||||
}
|
||||
|
||||
std::vector<int> DeclareBackwardDependency(
|
||||
const std::vector<int> &out_grad,
|
||||
const std::vector<int> &in_data,
|
||||
const std::vector<int> &out_data) const override {
|
||||
return {out_grad[lsoftmax_enum::kOut], out_data[lsoftmax_enum::kDataNorm],
|
||||
out_data[lsoftmax_enum::kWeightNorm], in_data[lsoftmax_enum::kData],
|
||||
in_data[lsoftmax_enum::kWeight], in_data[lsoftmax_enum::kLabel]};
|
||||
}
|
||||
|
||||
std::string TypeString() const override {
|
||||
return "LSoftmax";
|
||||
}
|
||||
|
||||
OperatorProperty *Copy() const override {
|
||||
auto ptr = new LSoftmaxProp();
|
||||
ptr->param_ = param_;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
Operator *CreateOperator(Context ctx) const override {
|
||||
LOG(FATAL) << "Not Implemented.";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Operator *CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
|
||||
std::vector<int> *in_type) const override;
|
||||
|
||||
private:
|
||||
LSoftmaxParam param_;
|
||||
}; // class LSoftmaxProp
|
||||
#endif // DMLC_USE_CXX11
|
||||
|
||||
} // namespace op
|
||||
} // namespace mxnet
|
||||
|
||||
#endif // MXNET_OPERATOR_LSOFTMAX_INL_H_
|
||||
75
src/operator/lsoftmax.cc
Normal file
75
src/operator/lsoftmax.cc
Normal file
@@ -0,0 +1,75 @@
|
||||
/*!
|
||||
* Copyright (c) 2016 by Contributors
|
||||
* \file lsoftmax.cc
|
||||
* \brief LSoftmax from <Large-Margin Softmax Loss for Convolutional Neural Networks>
|
||||
* \author luoyetx
|
||||
*/
|
||||
#include "./lsoftmax-inl.h"
|
||||
|
||||
namespace mshadow {
|
||||
|
||||
template <typename DType>
|
||||
inline void LSoftmaxForward(const Tensor<cpu, 2, DType> &x,
|
||||
const Tensor<cpu, 2, DType> &w,
|
||||
const Tensor<cpu, 1, DType> &label,
|
||||
const Tensor<cpu, 2, DType> &out,
|
||||
const Tensor<cpu, 1, DType> &x_norm,
|
||||
const Tensor<cpu, 1, DType> &w_norm,
|
||||
const Tensor<cpu, 1, DType> &k_table,
|
||||
const Tensor<cpu, 1, DType> &c_table,
|
||||
const int margin,
|
||||
const DType beta) {
|
||||
LOG(FATAL) << "Not Implemented.";
|
||||
}
|
||||
|
||||
template <typename DType>
|
||||
inline void LSoftmaxBackward(const Tensor<cpu, 2, DType> &x,
|
||||
const Tensor<cpu, 2, DType> &w,
|
||||
const Tensor<cpu, 1, DType> &label,
|
||||
const Tensor<cpu, 1, DType> &x_norm,
|
||||
const Tensor<cpu, 1, DType> &w_norm,
|
||||
const Tensor<cpu, 2, DType> &o_grad,
|
||||
const Tensor<cpu, 2, DType> &x_grad,
|
||||
const Tensor<cpu, 2, DType> &w_grad,
|
||||
const Tensor<cpu, 2, DType> &workspace,
|
||||
const Tensor<cpu, 1, DType> &k_table,
|
||||
const Tensor<cpu, 1, DType> &c_table,
|
||||
const int margin,
|
||||
const DType beta) {
|
||||
LOG(FATAL) << "Not Implemented.";
|
||||
}
|
||||
|
||||
} // namespace mshadow
|
||||
|
||||
namespace mxnet {
|
||||
namespace op {
|
||||
|
||||
template<>
|
||||
Operator *CreateOp<cpu>(LSoftmaxParam param, int dtype) {
|
||||
Operator *op = NULL;
|
||||
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
|
||||
op = new LSoftmaxOp<cpu, DType>(param);
|
||||
})
|
||||
return op;
|
||||
}
|
||||
|
||||
Operator *LSoftmaxProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
|
||||
std::vector<int> *in_type) const {
|
||||
std::vector<TShape> out_shape, aux_shape;
|
||||
std::vector<int> out_type, aux_type;
|
||||
CHECK(InferType(in_type, &out_type, &aux_type));
|
||||
CHECK(InferShape(in_shape, &out_shape, &aux_shape));
|
||||
DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
|
||||
}
|
||||
|
||||
DMLC_REGISTER_PARAMETER(LSoftmaxParam);
|
||||
|
||||
MXNET_REGISTER_OP_PROPERTY(LSoftmax, LSoftmaxProp)
|
||||
.describe("LSoftmax from <Large-Margin Softmax Loss for Convolutional Neural Networks>")
|
||||
.add_argument("data", "Symbol", "data")
|
||||
.add_argument("weight", "Symbol", "weight")
|
||||
.add_argument("label", "Symbol", "label")
|
||||
.add_arguments(LSoftmaxParam::__FIELDS__());
|
||||
|
||||
} // namespace op
|
||||
} // namespace mxnet
|
||||
322
src/operator/lsoftmax.cu
Normal file
322
src/operator/lsoftmax.cu
Normal file
@@ -0,0 +1,322 @@
|
||||
/*!
|
||||
* Copyright (c) 2016 by Contributors
|
||||
* \file lsoftmax.cu
|
||||
* \brief LSoftmax from <Large-Margin Softmax Loss for Convolutional Neural Networks>
|
||||
* \author luoyetx
|
||||
*/
|
||||
#include "./lsoftmax-inl.h"
|
||||
|
||||
namespace mshadow {
|
||||
namespace cuda {
|
||||
|
||||
namespace {
|
||||
// workspace variables
|
||||
enum LSoftmaxTempSpaceType {kCost, kCosmt, kK, kSin2t, kFo, kCostM};
|
||||
}
|
||||
|
||||
#define CUDA_KERNEL_LOOP(i, n) \
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
|
||||
i < (n); \
|
||||
i += blockDim.x * gridDim.x)
|
||||
|
||||
MSHADOW_XINLINE int LSPowOfMO(const int k) {
|
||||
return 1 - ((k&0x01) << 1);
|
||||
}
|
||||
|
||||
template<typename DType>
|
||||
__global__ void LSCalcNorm(const Tensor<gpu, 2, DType> x,
|
||||
Tensor<gpu, 1, DType> x_norm) {
|
||||
const int n = x.size(0);
|
||||
const int m = x.size(1);
|
||||
CUDA_KERNEL_LOOP(i, n) {
|
||||
DType norm = 0;
|
||||
for (int j = 0; j < m; ++j) {
|
||||
norm += x[i][j] * x[i][j];
|
||||
}
|
||||
x_norm[i] = sqrt(norm);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename DType>
|
||||
__device__ int LSFindK(const DType *k_table, const int n, const DType cos_t) {
|
||||
const DType eps = 1e-5;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (((k_table[i+1] < cos_t) || (abs(k_table[i+1] - cos_t) < eps)) &&
|
||||
((k_table[i] > cos_t) || (abs(k_table[i] - cos_t) < eps))) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename DType>
|
||||
__device__ DType LSCalcCosmt(const DType *c_table, const int n,
|
||||
const DType cos_t, const int margin) {
|
||||
const DType sin2_t = 1 - cos_t * cos_t;
|
||||
DType cos_t_p = pow(cos_t, margin);
|
||||
DType sin2_t_p = 1;
|
||||
DType cos_mt = cos_t_p; // p = 0
|
||||
for (int p = 1; p <= margin / 2; ++p) {
|
||||
cos_t_p /= cos_t * cos_t; // don't replace `cos_t*cos_t` with `1-sin2_t`, this can cause numeric issue if cos_t --> 0
|
||||
sin2_t_p *= sin2_t;
|
||||
cos_mt += LSPowOfMO(p) * c_table[2*p] * cos_t_p * sin2_t_p;
|
||||
}
|
||||
return cos_mt;
|
||||
}
|
||||
|
||||
template<typename DType>
|
||||
__global__ void LSoftmaxForwardKernel(const Tensor<gpu, 2, DType> x,
|
||||
const Tensor<gpu, 2, DType> w,
|
||||
const Tensor<gpu, 1, DType> label,
|
||||
const Tensor<gpu, 1, DType> x_norm,
|
||||
const Tensor<gpu, 1, DType> w_norm,
|
||||
Tensor<gpu, 2, DType> out,
|
||||
const Tensor<gpu, 1, DType> k_table,
|
||||
const Tensor<gpu, 1, DType> c_table,
|
||||
const int margin,
|
||||
const DType beta) {
|
||||
const int n = x.size(0);
|
||||
const int feature_dim = x.size(1);
|
||||
const int m = w.size(0);
|
||||
CUDA_KERNEL_LOOP(i, n) {
|
||||
const int yi = static_cast<int>(label[i]);
|
||||
const DType fo_i_yi = out[i][yi];
|
||||
const DType cos_t = fo_i_yi / (x_norm[i] * w_norm[yi]);
|
||||
const int k = LSFindK(k_table.dptr_, k_table.size(0), cos_t);
|
||||
const DType cos_mt = LSCalcCosmt(c_table.dptr_, c_table.size(0), cos_t, margin);
|
||||
const DType f_i_yi = (LSPowOfMO(k) * cos_mt - 2*k) * (w_norm[yi] * x_norm[i]);
|
||||
out[i][yi] = (f_i_yi + beta * fo_i_yi) / (1 + beta);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename DType>
|
||||
inline void LSoftmaxForward(const Tensor<gpu, 2, DType> &x,
|
||||
const Tensor<gpu, 2, DType> &w,
|
||||
const Tensor<gpu, 1, DType> &label,
|
||||
const Tensor<gpu, 2, DType> &out,
|
||||
const Tensor<gpu, 1, DType> &x_norm,
|
||||
const Tensor<gpu, 1, DType> &w_norm,
|
||||
const Tensor<gpu, 1, DType> &k_table,
|
||||
const Tensor<gpu, 1, DType> &c_table,
|
||||
const int margin,
|
||||
const DType beta) {
|
||||
const int n = x.size(0);
|
||||
const int m = w.size(0);
|
||||
dim3 dimBlock(kBaseThreadNum);
|
||||
dim3 dimGrid((n + kBaseThreadNum - 1) / kBaseThreadNum);
|
||||
LSCalcNorm<<<dimGrid, dimBlock>>>(x, x_norm);
|
||||
dimGrid.x = ((m + kBaseThreadNum - 1) / kBaseThreadNum);
|
||||
LSCalcNorm<<<dimGrid, dimBlock>>>(w, w_norm);
|
||||
dimGrid.x = ((n + kBaseThreadNum - 1) / kBaseThreadNum);
|
||||
LSoftmaxForwardKernel<<<dimGrid, dimBlock>>>(x, w, label, x_norm, w_norm, out, k_table, c_table, margin, beta);
|
||||
}
|
||||
|
||||
template<typename DType>
|
||||
__global__ void LSoftmaxBackwardRequired(const Tensor<gpu, 2, DType> x,
|
||||
const Tensor<gpu, 2, DType> w,
|
||||
const Tensor<gpu, 1, DType> label,
|
||||
const Tensor<gpu, 1, DType> x_norm,
|
||||
const Tensor<gpu, 1, DType> w_norm,
|
||||
Tensor<gpu, 2, DType> workspace,
|
||||
const Tensor<gpu, 1, DType> k_table,
|
||||
const Tensor<gpu, 1, DType> c_table,
|
||||
const int margin) {
|
||||
const int n = x.size(0);
|
||||
const int feature_dim = x.size(1);
|
||||
CUDA_KERNEL_LOOP(i, n) {
|
||||
const int yi = static_cast<int>(label[i]);
|
||||
// fo_i_yi = dot(w_yi, x_i)
|
||||
DType fo_i_yi = 0;
|
||||
for (int p = 0; p < feature_dim; ++p) {
|
||||
fo_i_yi += w[yi][p] * x[i][p];
|
||||
}
|
||||
const DType cos_t = fo_i_yi / (x_norm[i] * w_norm[yi]);
|
||||
const int k = LSFindK(k_table.dptr_, k_table.size(0), cos_t);
|
||||
const DType cos_mt = LSCalcCosmt(c_table.dptr_, c_table.size(0), cos_t, margin);
|
||||
const DType sin2_t = 1 - cos_t * cos_t;
|
||||
workspace[kCost][i] = cos_t;
|
||||
workspace[kCosmt][i] = cos_mt;
|
||||
workspace[kK][i] = static_cast<DType>(k);
|
||||
workspace[kSin2t][i] = sin2_t;
|
||||
workspace[kFo][i] = fo_i_yi;
|
||||
workspace[kCostM][i] = pow(cos_t, margin - 1);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename DType>
|
||||
__global__ void LSoftmaxBackwardXKernel(const Tensor<gpu, 2, DType> x,
|
||||
const Tensor<gpu, 2, DType> w,
|
||||
const Tensor<gpu, 1, DType> label,
|
||||
const Tensor<gpu, 1, DType> x_norm,
|
||||
const Tensor<gpu, 1, DType> w_norm,
|
||||
const Tensor<gpu, 2, DType> o_grad,
|
||||
Tensor<gpu, 2, DType> x_grad,
|
||||
const Tensor<gpu, 2, DType> workspace,
|
||||
const Tensor<gpu, 1, DType> c_table,
|
||||
const int margin,
|
||||
const DType beta) {
|
||||
const int nthreads = x.size(0) * x.size(1);
|
||||
const int feature_dim = x.size(1);
|
||||
CUDA_KERNEL_LOOP(idx, nthreads) {
|
||||
const int i = idx / feature_dim;
|
||||
const int l = idx % feature_dim;
|
||||
const int yi = static_cast<int>(label[i]);
|
||||
const DType cos_t = workspace[kCost][i];
|
||||
const DType cos_mt = workspace[kCosmt][i];
|
||||
const int k = static_cast<int>(workspace[kK][i]);
|
||||
const DType sin2_t = workspace[kSin2t][i];
|
||||
const DType fo_i_yi = workspace[kFo][i];
|
||||
const DType w_norm_yi = w_norm[yi];
|
||||
const DType x_norm_i = x_norm[i];
|
||||
|
||||
const DType dcos_dx = w[yi][l] / (w_norm_yi * x_norm_i) - \
|
||||
fo_i_yi * x[i][l] / (w_norm_yi * x_norm_i * x_norm_i * x_norm_i);
|
||||
const DType dsin2_dx = -2 * cos_t * dcos_dx;
|
||||
DType cos_t_p = workspace[kCostM][i];
|
||||
DType sin2_t_p = 1;
|
||||
DType dcosm_dx = margin * cos_t_p * dcos_dx; // p = 0
|
||||
for (int p = 1; p <= margin / 2; ++p) {
|
||||
cos_t_p /= cos_t * cos_t;
|
||||
dcosm_dx += LSPowOfMO(p) * c_table[2*p] * (p * cos_t * dsin2_dx + \
|
||||
(margin - 2*p) * sin2_t * dcos_dx) * cos_t_p * sin2_t_p;
|
||||
sin2_t_p *= sin2_t;
|
||||
}
|
||||
const DType df_dx = (LSPowOfMO(k) * cos_mt - 2*k) * w_norm_yi / x_norm_i * x[i][l] + \
|
||||
LSPowOfMO(k) * w_norm_yi * x_norm_i * dcosm_dx;
|
||||
const DType alpha = 1 / (1 + beta);
|
||||
x_grad[i][l] += alpha * o_grad[i][yi] * (df_dx - w[yi][l]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename DType>
|
||||
__global__ void LSoftmaxBackwardWKernel(const Tensor<gpu, 2, DType> x,
|
||||
const Tensor<gpu, 2, DType> w,
|
||||
const Tensor<gpu, 1, DType> label,
|
||||
const Tensor<gpu, 1, DType> x_norm,
|
||||
const Tensor<gpu, 1, DType> w_norm,
|
||||
const Tensor<gpu, 2, DType> o_grad,
|
||||
Tensor<gpu, 2, DType> w_grad,
|
||||
const Tensor<gpu, 2, DType> workspace,
|
||||
const Tensor<gpu, 1, DType> c_table,
|
||||
const int margin,
|
||||
const DType beta) {
|
||||
const int nthreads = w.size(0) * w.size(1);
|
||||
const int n = x.size(0);
|
||||
const int feature_dim = w.size(1);
|
||||
CUDA_KERNEL_LOOP(idx, nthreads) {
|
||||
const int j = idx / feature_dim;
|
||||
const int l = idx % feature_dim;
|
||||
DType dw = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
const int yi = static_cast<int>(label[i]);
|
||||
if (yi == j) {
|
||||
const DType cos_t = workspace[kCost][i];
|
||||
const DType cos_mt = workspace[kCosmt][i];
|
||||
const int k = static_cast<int>(workspace[kK][i]);
|
||||
const DType sin2_t = workspace[kSin2t][i];
|
||||
const DType fo_i_yi = workspace[kFo][i];
|
||||
const DType x_norm_i = x_norm[i];
|
||||
const DType w_norm_yi = w_norm[yi];
|
||||
|
||||
const DType dcos_dw = x[i][l] / (w_norm_yi * x_norm_i) - \
|
||||
fo_i_yi * w[yi][l] / (x_norm_i * w_norm_yi * w_norm_yi * w_norm_yi);
|
||||
const DType dsin2_dw = -2 * cos_t * dcos_dw;
|
||||
DType cos_t_p = workspace[kCostM][i];
|
||||
DType sin2_t_p = 1;
|
||||
DType dcosm_dw = margin * cos_t_p * dcos_dw; // p = 0
|
||||
for (int p = 1; p <= margin / 2; ++p) {
|
||||
cos_t_p /= cos_t * cos_t;
|
||||
dcosm_dw += LSPowOfMO(p) * c_table[2*p] * (p * cos_t * dsin2_dw + \
|
||||
(margin - 2*p) * sin2_t * dcos_dw) * cos_t_p * sin2_t_p;
|
||||
sin2_t_p *= sin2_t;
|
||||
}
|
||||
const DType df_dw_j = (LSPowOfMO(k) * cos_mt - 2*k) * x_norm_i / w_norm_yi * w[yi][l] + \
|
||||
LSPowOfMO(k) * w_norm_yi * x_norm_i * dcosm_dw;
|
||||
dw += o_grad[i][yi] * (df_dw_j - x[i][l]);
|
||||
}
|
||||
}
|
||||
const DType alpha = 1 / (1 + beta);
|
||||
w_grad[j][l] += alpha * dw;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename DType>
|
||||
inline void LSoftmaxBackward(const Tensor<gpu, 2, DType> &x,
|
||||
const Tensor<gpu, 2, DType> &w,
|
||||
const Tensor<gpu, 1, DType> &label,
|
||||
const Tensor<gpu, 1, DType> &x_norm,
|
||||
const Tensor<gpu, 1, DType> &w_norm,
|
||||
const Tensor<gpu, 2, DType> &o_grad,
|
||||
const Tensor<gpu, 2, DType> &x_grad,
|
||||
const Tensor<gpu, 2, DType> &w_grad,
|
||||
const Tensor<gpu, 2, DType> &workspace,
|
||||
const Tensor<gpu, 1, DType> &k_table,
|
||||
const Tensor<gpu, 1, DType> &c_table,
|
||||
const int margin,
|
||||
const DType beta) {
|
||||
const int n = x.size(0);
|
||||
const int feature_dim = x.size(1);
|
||||
const int m = w.size(0);
|
||||
dim3 dimBlock(kBaseThreadNum);
|
||||
dim3 dimGrid((n + kBaseThreadNum - 1) / kBaseThreadNum);
|
||||
LSoftmaxBackwardRequired<<<dimGrid, dimBlock>>>(x, w, label, x_norm, w_norm, workspace,
|
||||
k_table, c_table, margin);
|
||||
dimGrid.x = ((n * feature_dim + kBaseThreadNum - 1) / kBaseThreadNum);
|
||||
LSoftmaxBackwardXKernel<<<dimGrid, dimBlock>>>(x, w, label, x_norm, w_norm, o_grad, x_grad, workspace,
|
||||
c_table, margin, beta);
|
||||
dimGrid.x = ((m * feature_dim + kBaseThreadNum - 1) / kBaseThreadNum);
|
||||
LSoftmaxBackwardWKernel<<<dimGrid, dimBlock>>>(x, w, label, x_norm, w_norm, o_grad, w_grad, workspace,
|
||||
c_table, margin, beta);
|
||||
}
|
||||
|
||||
} // namespace cuda
|
||||
|
||||
template<typename DType>
|
||||
inline void LSoftmaxForward(const Tensor<gpu, 2, DType> &x,
|
||||
const Tensor<gpu, 2, DType> &w,
|
||||
const Tensor<gpu, 1, DType> &label,
|
||||
const Tensor<gpu, 2, DType> &out,
|
||||
const Tensor<gpu, 1, DType> &x_norm,
|
||||
const Tensor<gpu, 1, DType> &w_norm,
|
||||
const Tensor<gpu, 1, DType> &k_table,
|
||||
const Tensor<gpu, 1, DType> &c_table,
|
||||
const int margin,
|
||||
const DType beta) {
|
||||
cuda::LSoftmaxForward(x, w, label, out, x_norm, w_norm,
|
||||
k_table, c_table, margin, beta);
|
||||
}
|
||||
|
||||
template<typename DType>
|
||||
inline void LSoftmaxBackward(const Tensor<gpu, 2, DType> &x,
|
||||
const Tensor<gpu, 2, DType> &w,
|
||||
const Tensor<gpu, 1, DType> &label,
|
||||
const Tensor<gpu, 1, DType> &x_norm,
|
||||
const Tensor<gpu, 1, DType> &w_norm,
|
||||
const Tensor<gpu, 2, DType> &o_grad,
|
||||
const Tensor<gpu, 2, DType> &x_grad,
|
||||
const Tensor<gpu, 2, DType> &w_grad,
|
||||
const Tensor<gpu, 2, DType> &workspace,
|
||||
const Tensor<gpu, 1, DType> &k_table,
|
||||
const Tensor<gpu, 1, DType> &c_table,
|
||||
const int margin,
|
||||
const DType beta) {
|
||||
cuda::LSoftmaxBackward(x, w, label, x_norm, w_norm, o_grad, x_grad, w_grad, workspace,
|
||||
k_table, c_table, margin, beta);
|
||||
}
|
||||
|
||||
} // namespace mshadow
|
||||
|
||||
namespace mxnet {
|
||||
namespace op {
|
||||
|
||||
template<>
|
||||
Operator *CreateOp<gpu>(LSoftmaxParam param, int dtype) {
|
||||
Operator *op = NULL;
|
||||
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
|
||||
op = new LSoftmaxOp<gpu, DType>(param);
|
||||
})
|
||||
return op;
|
||||
}
|
||||
|
||||
} // namespace op
|
||||
} // namespace mxnet
|
||||
64
src/train.sh
Executable file
64
src/train.sh
Executable file
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env bash
|
||||
export MXNET_CPU_WORKER_NTHREADS=15
|
||||
export MXNET_CUDNN_AUTOTUNE_DEFAULT=0
|
||||
export MXNET_ENGINE_TYPE=ThreadedEnginePerDevice
|
||||
|
||||
#export CUDA_VISIBLE_DEVICES='4,5'
|
||||
#python -u train_softmax.py --retrain --pretrained '../model/sphereface-152-0-0' --load-epoch 8 --prefix '../model/sphereface-retrain-0' --loss-type 0
|
||||
export CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7'
|
||||
export CUDA_VISIBLE_DEVICES='0,1,2,3'
|
||||
export CUDA_VISIBLE_DEVICES='4,5,6,7'
|
||||
export CUDA_VISIBLE_DEVICES='4,5'
|
||||
export CUDA_VISIBLE_DEVICES='0,1'
|
||||
#python -u train_softmax.py --network 's60' --patch '16_0_96_112_0' --loss-type 1 > logs60_l1_v4 2>&1 &
|
||||
#python -u train_softmax.py --network 's60' --patch '0_0_96_95_0' --loss-type 1 --prefix '../model/spherefacex'
|
||||
#python -u train_softmax.py --network 's20' --patch '0_0_96_112_0' --loss-type 0
|
||||
#python -u train_softmax.py --network 'r50' --patch '0_0_96_112_0' --loss-type 0
|
||||
#python -u train_softmax.py --network 'm4' --patch '0_0_96_112_0' --loss-type 1 --prefix '../model/spherefacem' --per-batch-size 224 > celm.log 2>&1 &
|
||||
#python -u train_softmax.py --network 'm29' --patch '0_0_96_112_0' --loss-type 0 --lr 0.1 --prefix '../model/softmax' --verbose 2000 --per-batch-size 128 > sx_m29.log 2>&1 &
|
||||
#python -u train_softmax.py --network 'm29' --patch '0_0_96_112_0' --loss-type 1 --lr 0.1 --prefix '../model/sphere47' --verbose 2000 --per-batch-size 224 --beta-min 4.7 > sp_m29_47.log 2>&1 &
|
||||
export CUDA_VISIBLE_DEVICES='2,3'
|
||||
#python -u train_softmax.py --network 'm1' --patch '0_0_96_112_0' --loss-type 0 --lr 0.01 --prefix '../model/marginal0' --verbose 2000
|
||||
#python -u train_softmax.py --network 's60' --patch '0_0_96_95_0' --loss-type 1
|
||||
#python -u train_softmax.py --network 's20' --patch '0_0_96_95_0' --loss-type 1
|
||||
#python -u train_softmax.py --network 's60' --patch '0_0_96_112_0' --loss-type 1 --prefix '../model/spherefacec' > logs60_c 2>&1 &
|
||||
#python -u train_marginal.py --patch '0_0_96_112_0' --network 's36' --verbose 1000 --lr 0.01
|
||||
#python -u train_coco.py --patch '0_0_96_112_0' --images-per-identity 32
|
||||
#python -u train_softmax.py --network 's36' --patch '0_0_96_112_0' --loss-type 1 --prefix '../model/spherefacei36' --per-batch-size 256
|
||||
#python -u train_softmax.py --network 's36' --patch '0_0_96_112_0' --loss-type 1 --prefix '../model/spherefacei36' --per-batch-size 256 > cel4.log 2>&1 &
|
||||
#python -u train_softmax.py --network 'm28' --patch '0_0_96_112_0' --loss-type 11 --lr 0.1 --prefix '../model/L11' --verbose 500 --per-batch-size 128 --images-per-identity 4
|
||||
#python -u train_softmax.py --network 'm27' --patch '0_0_96_112_0' --loss-type 1 --lr 0.1 --prefix '../model/sphere' --verbose 2000 --per-batch-size 224 > sp_m27.log 2>&1 &
|
||||
#python -u train_softmax.py --network 'm27' --patch '0_0_96_112_0' --loss-type 0 --lr 0.1 --prefix '../model/softmax' --verbose 2000 --per-batch-size 128 > sx_m27.log 2>&1 &
|
||||
export CUDA_VISIBLE_DEVICES='4,5'
|
||||
#python -u train_softmax.py --network 'm29' --patch '0_0_96_112_0' --loss-type 0 --lr 0.1 --prefix '../model/softmax' --verbose 2000 --per-batch-size 128 > sx_m29.log 2>&1 &
|
||||
#python -u train_softmax.py --network 'm27' --patch '0_0_96_112_0' --loss-type 1 --lr 0.1 --prefix '../model/sphere' --verbose 2000 --per-batch-size 224 > sp_m27.log 2>&1 &
|
||||
#python -u train_softmax.py --network 'm28' --patch '0_0_96_112_0' --loss-type 1 --lr 0.1 --prefix '../model/sphere' --verbose 2000 --per-batch-size 224 > sp_m28.log 2>&1 &
|
||||
export CUDA_VISIBLE_DEVICES='6,7'
|
||||
#python -u train_softmax.py --network 'm29' --patch '0_0_96_112_0' --loss-type 1 --lr 0.1 --prefix '../model/spherem' --verbose 2000 --per-batch-size 224
|
||||
#python -u train_softmax.py --network 'm28' --patch '0_0_96_112_0' --loss-type 0 --lr 0.1 --prefix '../model/softmax' --verbose 2000 --per-batch-size 128 > sx_m28.log 2>&1 &
|
||||
#python -u train_marginal.py --patch '0_0_96_112_0' --network 'i4' --verbose 2000 --lr 0.01
|
||||
#python -u train_softmax.py --network 'i4' --patch '0_0_96_112_0' --loss-type 1 --gamma 0.06 --beta-min 4
|
||||
#python -u train_softmax.py --network 'x4' --patch '0_0_96_112_0' --loss-type 1 --gamma 0.09
|
||||
#python -u train_softmax.py --network 's60' --patch '0_0_80_95_0' --loss-type 1 > logs60_l1_v3 2>&1 &
|
||||
#python -u train_softmax.py --network 's60' --patch '0_0_96_95_0' --loss-type 1 > logs60_l1_v2 2>&1 &
|
||||
#python -u train_softmax.py --network 's20' --patch '0_0_96_112_0'
|
||||
export CUDA_VISIBLE_DEVICES='4,5,6,7'
|
||||
python -u train_softmax.py --network 'm29' --patch '0_0_96_112_0' --loss-type 0 --lr 0.1 --prefix '../model/softmax' --verbose 2000 --per-batch-size 128 > sx_m29.log 2>&1 &
|
||||
#python -u train_softmax.py --network 's60' --patch '0_0_96_95_0' --loss-type 1 --gamma 0.06 --beta-freeze 5000 --prefix '../model/spherefacei' > cel2.log 2>&1 &
|
||||
export CUDA_VISIBLE_DEVICES='0,1,2,3'
|
||||
#python -u train_softmax.py --network 'm29' --patch '0_0_96_112_0' --loss-type 1 --lr 0.1 --prefix '../model/spherem' --verbose 2000 --per-batch-size 224 --lr-steps '60000,80000,90000' > spm_m29.log 2>&1 &
|
||||
#python -u train_softmax.py --network 's60' --patch '0_15_96_112_0' --loss-type 1 --gamma 0.06 --beta-freeze 5000 --prefix '../model/spherefacei' > cel3.log 2>&1 &
|
||||
export CUDA_VISIBLE_DEVICES='2'
|
||||
#python -u train_marginal.py --patch '0_0_96_112_0' --network 's36' --verbose 2000 --lr 0.01 > mar_s36.log 2>&1 &
|
||||
export CUDA_VISIBLE_DEVICES='3'
|
||||
#python -u train_marginal.py --patch '0_0_96_112_0' --network 'i4' --verbose 2000 --lr 0.01 > mar_i4.log 2>&1 &
|
||||
#python -u train_softmax.py --network 'i4' --patch '0_0_96_112_0' --loss-type 1 --gamma 0.06 --beta-freeze 5000
|
||||
#python -u train_softmax.py --network 'r50' --patch '0_0_96_112_0' --loss-type 1 --gamma 0.24 > logr50_l1 2>&1 &
|
||||
#python -u train_softmax.py --network 'r50' --patch '0_0_96_112_0' --loss-type 2 --verbose 100
|
||||
#python -u train_softmax.py --network 'r50' --patch '0_0_96_95_0' > logr101_pu 2>&1 &
|
||||
#python -u train_softmax.py --network 'r50' --patch '0_0_96_112_0'
|
||||
#python -u train_softmax.py --network 'r101' --patch '0_0_96_95_0'
|
||||
#python -u train_softmax.py --loss-type 1 --num-layers 64 --patch '0_0_96_112_0'
|
||||
#python -u train_softmax.py --loss-type 1 --num-layers 36 --patch '0_0_96_95_0'
|
||||
#python -u train_softmax.py --loss-type 1 --num-layers 20 --patch '0_0_80_95_0'
|
||||
|
||||
@@ -17,7 +17,6 @@ from mxnet import ndarray as nd
|
||||
import argparse
|
||||
import mxnet.optimizer as optimizer
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
|
||||
import resnet_dcn
|
||||
import spherenet
|
||||
import marginalnet
|
||||
import inceptions
|
||||
@@ -26,7 +25,8 @@ import lfw
|
||||
import sklearn
|
||||
from sklearn.decomposition import PCA
|
||||
#from center_loss import *
|
||||
import asoftmax
|
||||
#import resnet_dcn
|
||||
#import asoftmax
|
||||
|
||||
|
||||
logger = logging.getLogger()
|
||||
@@ -105,6 +105,8 @@ def parse_args():
|
||||
help='')
|
||||
parser.add_argument('--loss-type', type=int, default=1,
|
||||
help='')
|
||||
parser.add_argument('--incay', action='store_true', default=False,
|
||||
help='feature incay')
|
||||
parser.add_argument('--use-deformable', type=int, default=0,
|
||||
help='')
|
||||
parser.add_argument('--patch', type=str, default='0_0_96_112_0',
|
||||
@@ -138,6 +140,7 @@ def get_symbol(args, arg_params, aux_params):
|
||||
_,_,embedding,_ = resnet_dcn.get_symbol(512, args.num_layers)
|
||||
gt_label = mx.symbol.Variable('softmax_label')
|
||||
assert args.loss_type>=0
|
||||
extra_loss = None
|
||||
if args.loss_type==0:
|
||||
_weight = mx.symbol.Variable('fc7_weight')
|
||||
_bias = mx.symbol.Variable('fc7_bias', lr_mult=2.0, wd_mult=0.0)
|
||||
@@ -206,12 +209,22 @@ def get_symbol(args, arg_params, aux_params):
|
||||
softmax = mx.symbol.SoftmaxOutput(data=fc7, label = gt_label, name='softmax', normalization='valid')
|
||||
else:
|
||||
softmax = mx.symbol.SoftmaxOutput(data=fc7, label = gt_label, name='softmax')
|
||||
if args.loss_type<=1 and args.incay:
|
||||
params = [1.e-10, 0.01]
|
||||
sel = mx.symbol.argmax(data = fc7, axis=1)
|
||||
sel = (sel==gt_label)
|
||||
norm = embedding*embedding
|
||||
norm = mx.symbol.sum(norm, axis=1)
|
||||
norm += params[0]
|
||||
feature_incay = sel/norm
|
||||
feature_incay = mx.symbol.mean(feature_incay) * params[1]
|
||||
extra_loss = mx.symbol.MakeLoss(feature_incay)
|
||||
#out = softmax
|
||||
#l2_embedding = mx.symbol.L2Normalization(embedding)
|
||||
|
||||
#ce = mx.symbol.softmax_cross_entropy(fc7, gt_label, name='softmax_ce')/args.per_batch_size
|
||||
#out = mx.symbol.Group([mx.symbol.BlockGrad(embedding), softmax, mx.symbol.BlockGrad(ce)])
|
||||
if args.loss_type>=10 and extra_loss is not None:
|
||||
if extra_loss is not None:
|
||||
out = mx.symbol.Group([mx.symbol.BlockGrad(embedding), softmax, extra_loss])
|
||||
else:
|
||||
out = mx.symbol.Group([mx.symbol.BlockGrad(embedding), softmax])
|
||||
@@ -277,7 +290,7 @@ def train_net(args):
|
||||
|
||||
path_imglist = "/raid5data/dplearn/faces_normed/train.lst"
|
||||
args.num_classes = 82395
|
||||
args.use_val = True
|
||||
args.use_val = False
|
||||
val_path = "/raid5data/dplearn/faces_normed/val.lst"
|
||||
path_imgrec = "/opt/jiaguo/faces_normed/train.rec"
|
||||
val_rec = "/opt/jiaguo/faces_normed/val.rec"
|
||||
@@ -291,7 +304,7 @@ def train_net(args):
|
||||
data_shape = (args.image_channel,112,96)
|
||||
mean = [127.5,127.5,127.5]
|
||||
|
||||
if args.use_val and args.loss_type<=1:
|
||||
if args.use_val:
|
||||
val_dataiter = FaceImageIter2(
|
||||
batch_size = args.batch_size,
|
||||
data_shape = data_shape,
|
||||
@@ -349,7 +362,7 @@ def train_net(args):
|
||||
)
|
||||
|
||||
|
||||
if args.loss_type<=1:
|
||||
if args.loss_type<=9:
|
||||
train_dataiter = FaceImageIter2(
|
||||
batch_size = args.batch_size,
|
||||
data_shape = data_shape,
|
||||
@@ -570,7 +583,8 @@ def train_net(args):
|
||||
global_step = [0]
|
||||
save_step = [0]
|
||||
if len(args.lr_steps)==0:
|
||||
lr_steps = [40000, 70000, 90000]
|
||||
#lr_steps = [40000, 70000, 90000]
|
||||
lr_steps = [30000, 50000, 70000, 90000]
|
||||
if args.loss_type==1:
|
||||
lr_steps = [70000, 100000]
|
||||
else:
|
||||
@@ -595,16 +609,21 @@ def train_net(args):
|
||||
acc, embeddings_list = lfw_test(mbatch)
|
||||
save_step[0]+=1
|
||||
msave = save_step[0]
|
||||
do_save = False
|
||||
if acc>=highest_acc[0]:
|
||||
highest_acc[0] = acc
|
||||
if acc>=0.992:
|
||||
print('saving', msave)
|
||||
arg, aux = model.get_params()
|
||||
mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux)
|
||||
lfw_npy = "%s-lfw-%04d" % (prefix, msave)
|
||||
X = np.concatenate(embeddings_list, axis=0)
|
||||
print(X.shape)
|
||||
np.save(lfw_npy, X)
|
||||
if acc>=0.995:
|
||||
do_save = True
|
||||
if mbatch>lr_steps[-1] and msave%5==0:
|
||||
do_save = True
|
||||
if do_save:
|
||||
print('saving', msave)
|
||||
arg, aux = model.get_params()
|
||||
mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux)
|
||||
#lfw_npy = "%s-lfw-%04d" % (prefix, msave)
|
||||
#X = np.concatenate(embeddings_list, axis=0)
|
||||
#print(X.shape)
|
||||
#np.save(lfw_npy, X)
|
||||
print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[0]))
|
||||
if mbatch<=args.beta_freeze:
|
||||
_beta = args.beta
|
||||
|
||||
Reference in New Issue
Block a user