mirror of https://github.com/Jittor/Jittor
Merge pull request #156 from Jittor/lxl
add misc and bcelogits with pos_weight
This commit is contained in:
commit
d11c3ad40b
|
@ -178,7 +178,7 @@ void CudnnConvBackwardXOp::jit_run() {
|
||||||
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD,
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD,
|
||||||
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED
|
||||||
};
|
};
|
||||||
int num_algos = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT;
|
int num_algos = CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT;
|
||||||
int perf_count;
|
int perf_count;
|
||||||
cudnnConvolutionBwdDataAlgoPerf_t perf_results[num_algos];
|
cudnnConvolutionBwdDataAlgoPerf_t perf_results[num_algos];
|
||||||
cudnnConvolutionBwdDataAlgo_t algo;
|
cudnnConvolutionBwdDataAlgo_t algo;
|
||||||
|
|
|
@ -6,16 +6,12 @@
|
||||||
#include "var.h"
|
#include "var.h"
|
||||||
#include "cutt_transpose_op.h"
|
#include "cutt_transpose_op.h"
|
||||||
#include "ops/op_register.h"
|
#include "ops/op_register.h"
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#ifdef JIT
|
|
||||||
#include "cutt.h"
|
#include "cutt.h"
|
||||||
#endif
|
|
||||||
#include "cutt_warper.h"
|
#include "cutt_warper.h"
|
||||||
|
#include "misc/stack_vector.h"
|
||||||
|
|
||||||
namespace jittor {
|
namespace jittor {
|
||||||
|
|
||||||
#ifndef JIT
|
|
||||||
static auto make_transpose = get_op_info("cutt_transpose")
|
static auto make_transpose = get_op_info("cutt_transpose")
|
||||||
.get_constructor<VarPtr, Var*, NanoVector>();
|
.get_constructor<VarPtr, Var*, NanoVector>();
|
||||||
|
|
||||||
|
@ -58,52 +54,49 @@ VarPtr CuttTransposeOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
||||||
return make_transpose(dout, reverse);
|
return make_transpose(dout, reverse);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CuttTransposeOp::jit_prepare(JK& jk) {
|
|
||||||
jk << _CS("[Tx:") << x->dtype();
|
|
||||||
jk << _CS("][DIM=") << JK::hex1(axes.size());
|
|
||||||
for (uint i=0; i<axes.size(); i++)
|
|
||||||
jk << _CS("][AXES") << JK::hex1(axes[i]) << '=' << JK::hex1(i);
|
|
||||||
jk << ']';
|
|
||||||
}
|
|
||||||
unordered_map<string, unsigned int> cutt_plan_cache;
|
unordered_map<string, unsigned int> cutt_plan_cache;
|
||||||
|
|
||||||
#else // JIT
|
void CuttTransposeOp::run() {
|
||||||
#ifdef JIT_cuda
|
auto* __restrict__ xp = x->mem_ptr;
|
||||||
|
auto* __restrict__ yp = y->mem_ptr;
|
||||||
extern unordered_map<string, unsigned int> cutt_plan_cache;
|
StackVector<int> x_shape;
|
||||||
|
StackVector<int> new_shape, new_axes, trans, reverse;
|
||||||
void CuttTransposeOp::jit_run() {
|
int dim = x->shape.size();
|
||||||
auto* __restrict__ xp = x->ptr<Tx>();
|
for (int i=0; i<dim; i++) {
|
||||||
auto* __restrict__ yp = y->ptr<Tx>();
|
trans[i] = new_shape.size();
|
||||||
vector<int> permutation, permutation2;
|
if (x->shape[i] != 1)
|
||||||
vector<int> y_shape;
|
new_shape.push_back(x->shape[i]);
|
||||||
vector<int> x_shape;
|
}
|
||||||
@for(i, 0, DIM, permutation.push_back(DIM-1-AXES@i);)
|
for (int i = 0; i < dim; ++i) {
|
||||||
@for(i, 0, DIM, permutation2.push_back(permutation[DIM-1-@i@@]);)
|
if (x->shape[axes[i]] != 1) {
|
||||||
std::vector<int> reverse;
|
new_axes.push_back(trans[axes[i]]);
|
||||||
reverse.reserve(permutation2.size());
|
}
|
||||||
for (uint i=0; i<permutation2.size(); i++)
|
}
|
||||||
reverse[permutation2[i]] = i;
|
dim = new_shape.size();
|
||||||
|
for (int i=0; i<dim; i++)
|
||||||
@for(i, 0, DIM, x_shape.push_back(x->shape[DIM-1-@i@@]);)
|
reverse[i] = dim-1-new_axes[dim-1-i];
|
||||||
|
for (int i=0; i<dim; i++)
|
||||||
|
x_shape[i] = new_shape[dim-1-i];
|
||||||
|
if (dim == 1) {
|
||||||
|
checkCudaErrors(cudaMemcpyAsync(yp, xp, x->size, cudaMemcpyDefault, 0));
|
||||||
|
return;
|
||||||
|
}
|
||||||
jk.clear();
|
jk.clear();
|
||||||
jk << @DIM << ",";
|
jk << dim << ',';
|
||||||
for (uint i=0; i<@DIM; i++) jk << x_shape[i] << ",";
|
for (int i=0; i<dim; i++) jk << x_shape[i] << ',';
|
||||||
for (uint i=0; i<@DIM; i++) jk << reverse[i] << ",";
|
for (int i=0; i<dim; i++) jk << reverse[i] << ',';
|
||||||
jk << sizeof(Tx) << ".";
|
jk << x->dtype().dsize() << '.';
|
||||||
auto iter = cutt_plan_cache.find(jk.to_string());
|
auto iter = cutt_plan_cache.find(jk.to_string());
|
||||||
|
LOGvvv << "Run cutt_transpose with key:" << jk.to_string();
|
||||||
|
|
||||||
if (iter!=cutt_plan_cache.end()){
|
if (iter!=cutt_plan_cache.end()){
|
||||||
cuttExecute(iter->second, xp, yp);
|
cuttExecute(iter->second, xp, yp);
|
||||||
} else {
|
} else {
|
||||||
cuttHandle plan;
|
cuttHandle plan;
|
||||||
cuttPlan(&plan, @DIM, x_shape.data(), reverse.data(), sizeof(Tx), 0);
|
cuttPlan(&plan, dim, x_shape.data(), reverse.data(), x->dtype().dsize(), 0);
|
||||||
cutt_plan_cache[jk.to_string()] = plan;
|
cutt_plan_cache[jk.to_string()] = plan;
|
||||||
cuttExecute(plan, xp, yp);
|
cuttExecute(plan, xp, yp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // JIT_cuda
|
|
||||||
#endif // JIT
|
|
||||||
|
|
||||||
} // jittor
|
} // jittor
|
|
@ -19,7 +19,7 @@ struct CuttTransposeOp : Op {
|
||||||
const char* name() const override { return "cutt_transpose"; }
|
const char* name() const override { return "cutt_transpose"; }
|
||||||
VarPtr grad(Var* out, Var* dout, Var* v, int v_index) override;
|
VarPtr grad(Var* out, Var* dout, Var* v, int v_index) override;
|
||||||
void infer_shape() override;
|
void infer_shape() override;
|
||||||
DECLARE_jit_run;
|
void run() override;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // jittor
|
} // jittor
|
|
@ -12,6 +12,35 @@ import numpy as np
|
||||||
import math
|
import math
|
||||||
from collections.abc import Sequence,Iterable
|
from collections.abc import Sequence,Iterable
|
||||||
|
|
||||||
|
def __copy__(x):
|
||||||
|
return x.copy().detach()
|
||||||
|
jt.Var.__copy__ = __copy__
|
||||||
|
|
||||||
|
def __deepcopy__(x,memo):
|
||||||
|
result = x.copy().detach()
|
||||||
|
memo[id(x)]=result
|
||||||
|
return result
|
||||||
|
jt.Var.__deepcopy__ = __deepcopy__
|
||||||
|
|
||||||
|
def __len__(x):
|
||||||
|
return x.shape[0]
|
||||||
|
jt.Var.__len__ = __len__
|
||||||
|
|
||||||
|
def __iter__(x):
|
||||||
|
result = []
|
||||||
|
for i in range(x.shape[0]):
|
||||||
|
result.append(x[i])
|
||||||
|
return result.__iter__()
|
||||||
|
jt.Var.__iter__ = __iter__
|
||||||
|
|
||||||
|
def all(x,dim):
|
||||||
|
return x.all_(dim).bool()
|
||||||
|
jt.Var.all = all
|
||||||
|
|
||||||
|
def any(x,dim):
|
||||||
|
return x.any_(dim).bool()
|
||||||
|
jt.Var.any = any
|
||||||
|
|
||||||
|
|
||||||
def repeat(x, *shape):
|
def repeat(x, *shape):
|
||||||
r'''
|
r'''
|
||||||
|
@ -47,10 +76,24 @@ def repeat(x, *shape):
|
||||||
x = x.broadcast(x_shape)
|
x = x.broadcast(x_shape)
|
||||||
elif len_x_shape > len_shape:
|
elif len_x_shape > len_shape:
|
||||||
rep_shape = (len_x_shape - len_shape) * [1] + shape
|
rep_shape = (len_x_shape - len_shape) * [1] + shape
|
||||||
|
|
||||||
|
reshape_shape = []
|
||||||
|
broadcast_shape = []
|
||||||
|
for x_s,r_s in zip(x_shape,rep_shape):
|
||||||
|
reshape_shape.append(1)
|
||||||
|
reshape_shape.append(x_s)
|
||||||
|
|
||||||
|
broadcast_shape.append(r_s)
|
||||||
|
broadcast_shape.append(1)
|
||||||
|
|
||||||
|
x = x.reshape(reshape_shape)
|
||||||
|
x = x.broadcast(broadcast_shape)
|
||||||
|
|
||||||
tar_shape = (np.array(x_shape) * np.array(rep_shape)).tolist()
|
tar_shape = (np.array(x_shape) * np.array(rep_shape)).tolist()
|
||||||
dims = []
|
|
||||||
for i in range(len(tar_shape)): dims.append(f"i{i}%{x_shape[i]}")
|
x = x.reshape(tar_shape)
|
||||||
return x.reindex(tar_shape, dims)
|
return x
|
||||||
|
|
||||||
jt.Var.repeat = repeat
|
jt.Var.repeat = repeat
|
||||||
|
|
||||||
def chunk(x, chunks, dim=0):
|
def chunk(x, chunks, dim=0):
|
||||||
|
@ -326,9 +369,8 @@ def unique(x):
|
||||||
'''
|
'''
|
||||||
x = x.reshape(-1)
|
x = x.reshape(-1)
|
||||||
_,x = jt.argsort(x)
|
_,x = jt.argsort(x)
|
||||||
index2 = [i for i in range(1,x.shape[0])]
|
index,= jt.index((x.shape[0],))
|
||||||
index1 = [i for i in range(x.shape[0]-1)]
|
y = x[1:][x[index[1:]] != x[index[:-1]]]
|
||||||
y = x[1:][x[index2] != x[index1]]
|
|
||||||
x = jt.contrib.concat([x[:1],y],dim=0)
|
x = jt.contrib.concat([x[:1],y],dim=0)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
@ -401,12 +443,6 @@ def log2(x):
|
||||||
|
|
||||||
jt.Var.log2 = log2
|
jt.Var.log2 = log2
|
||||||
|
|
||||||
def item(x):
|
|
||||||
assert x.ndim==1 and x.shape[0]==1
|
|
||||||
return x.numpy().item()
|
|
||||||
|
|
||||||
jt.Var.item = item
|
|
||||||
|
|
||||||
def meshgrid(*tensors):
|
def meshgrid(*tensors):
|
||||||
r'''
|
r'''
|
||||||
Take N tensors, each of which can be 1-dimensional vector, and create N n-dimensional grids,
|
Take N tensors, each of which can be 1-dimensional vector, and create N n-dimensional grids,
|
||||||
|
|
|
@ -264,17 +264,29 @@ class L1Loss(Module):
|
||||||
def execute(self, output, target):
|
def execute(self, output, target):
|
||||||
return l1_loss(output, target)
|
return l1_loss(output, target)
|
||||||
|
|
||||||
class BCEWithLogitsLoss(Module):
|
def binary_cross_entropy_with_logits(output, target, weight=None, pos_weight=None, size_average=True):
|
||||||
def __init__(self, weight=None, size_average=True):
|
max_val = jt.clamp(-output,min_v=0)
|
||||||
self.sigmoid = Sigmoid()
|
if pos_weight is not None:
|
||||||
self.bce = BCELoss(weight, size_average)
|
log_weight = (pos_weight-1)*target + 1
|
||||||
def execute(self, output, target):
|
loss = (1-target)*output+(log_weight*(((-max_val).exp()+(-output - max_val).exp()).log()+max_val))
|
||||||
output = self.sigmoid(output)
|
else:
|
||||||
output = self.bce(output, target)
|
loss = (1-target)*output+max_val+((-max_val).exp()+(-output -max_val).exp()).log()
|
||||||
return output
|
if weight is not None:
|
||||||
|
loss *=weight
|
||||||
|
|
||||||
def binary_cross_entropy_with_logits(input, target, weight=None, size_average=True):
|
if size_average:
|
||||||
return BCEWithLogitsLoss(weight, size_average)(input, target)
|
return loss.mean()
|
||||||
|
else:
|
||||||
|
return loss.sum()
|
||||||
|
|
||||||
|
class BCEWithLogitsLoss(Module):
|
||||||
|
def __init__(self, weight=None, pos_weight=None, size_average=True):
|
||||||
|
self.pos_weight = pos_weight
|
||||||
|
self.weight = weight
|
||||||
|
self.size_average = size_average
|
||||||
|
|
||||||
|
def execute(self, output, target):
|
||||||
|
return binary_cross_entropy_with_logits(output,target,self.weight,self.pos_weight,self.size_average)
|
||||||
|
|
||||||
def softmax(x, dim = None):
|
def softmax(x, dim = None):
|
||||||
if dim is None:
|
if dim is None:
|
||||||
|
|
|
@ -210,3 +210,64 @@ class Adam(Optimizer):
|
||||||
v.update(b1 * v + (1-b1) * g * g)
|
v.update(b1 * v + (1-b1) * g * g)
|
||||||
step_size = lr * jt.sqrt(1-b1**n) / (1-b0 ** n)
|
step_size = lr * jt.sqrt(1-b1**n) / (1-b0 ** n)
|
||||||
p.update(p - m * step_size / (jt.sqrt(v) + eps))
|
p.update(p - m * step_size / (jt.sqrt(v) + eps))
|
||||||
|
|
||||||
|
|
||||||
|
class LRScheduler:
|
||||||
|
def __init__(self,optimizer, last_epoch=-1):
|
||||||
|
assert isinstance(optimizer,Optimizer)
|
||||||
|
self.optimizer = optimizer
|
||||||
|
|
||||||
|
if last_epoch==-1:
|
||||||
|
for gp in optimizer.param_groups:
|
||||||
|
gp.setdefault('initial_lr',gp.get('lr',optimizer.lr))
|
||||||
|
else:
|
||||||
|
for gp in optimizer.param_groups:
|
||||||
|
assert 'initial_lr' in gp
|
||||||
|
|
||||||
|
self.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
|
||||||
|
self.last_epoch = last_epoch
|
||||||
|
self.optimizer._step_count = 0
|
||||||
|
self._step_count = 0
|
||||||
|
self.step()
|
||||||
|
|
||||||
|
def get_lr(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def get_last_lr(self):
|
||||||
|
return self._last_lr
|
||||||
|
|
||||||
|
def step(self,epoch=None):
|
||||||
|
self._step_count += 1
|
||||||
|
|
||||||
|
if epoch is None:
|
||||||
|
self.last_epoch += 1
|
||||||
|
values = self.get_lr()
|
||||||
|
else:
|
||||||
|
self.last_epoch = epoch
|
||||||
|
values = self.get_lr()
|
||||||
|
|
||||||
|
for i, data in enumerate(zip(self.optimizer.param_groups, values)):
|
||||||
|
param_group, lr = data
|
||||||
|
param_group['lr'] = lr
|
||||||
|
|
||||||
|
self._last_lr = [group['lr'] for group in self.optimizer.param_groups]
|
||||||
|
|
||||||
|
|
||||||
|
class LambdaLR(LRScheduler):
|
||||||
|
|
||||||
|
def __init__(self, optimizer, lr_lambda, last_epoch=-1):
|
||||||
|
if not isinstance(lr_lambda, list) and not isinstance(lr_lambda, tuple):
|
||||||
|
self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups)
|
||||||
|
else:
|
||||||
|
if len(lr_lambda) != len(optimizer.param_groups):
|
||||||
|
raise ValueError("Expected {} lr_lambdas, but got {}".format(len(optimizer.param_groups), len(lr_lambda)))
|
||||||
|
|
||||||
|
self.lr_lambdas = list(lr_lambda)
|
||||||
|
|
||||||
|
super(LambdaLR, self).__init__(optimizer, last_epoch)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_lr(self):
|
||||||
|
return [base_lr * lmbda(self.last_epoch)
|
||||||
|
for lmbda, base_lr in zip(self.lr_lambdas, self.base_lrs)]
|
|
@ -30,7 +30,7 @@ class TestCuttTransposeOp(unittest.TestCase):
|
||||||
for perm in perms:
|
for perm in perms:
|
||||||
with jt.log_capture_scope(
|
with jt.log_capture_scope(
|
||||||
log_silent=1,
|
log_silent=1,
|
||||||
log_v=0, log_vprefix="op.cc=100"
|
log_v=0, log_vprefix="cutt=100"
|
||||||
) as raw_log:
|
) as raw_log:
|
||||||
if perm:
|
if perm:
|
||||||
x = np.transpose(a, perm)
|
x = np.transpose(a, perm)
|
||||||
|
@ -39,7 +39,7 @@ class TestCuttTransposeOp(unittest.TestCase):
|
||||||
x = np.transpose(a)
|
x = np.transpose(a)
|
||||||
y = jt.transpose(a).data
|
y = jt.transpose(a).data
|
||||||
self.assertEqual(x.shape, y.shape)
|
self.assertEqual(x.shape, y.shape)
|
||||||
logs = find_log_with_re(raw_log, "(Jit op key (not )?found: " + "cutt_transpose" + ".*)")
|
logs = find_log_with_re(raw_log, "(Run cutt_transpose with key.*)")
|
||||||
if perm is None:
|
if perm is None:
|
||||||
continue
|
continue
|
||||||
last = -1
|
last = -1
|
||||||
|
@ -53,7 +53,7 @@ class TestCuttTransposeOp(unittest.TestCase):
|
||||||
last = perm[i]
|
last = perm[i]
|
||||||
if not in_order:
|
if not in_order:
|
||||||
assert len(logs)==1
|
assert len(logs)==1
|
||||||
assert (x==y).all(), f"\n{x}\n{y}"
|
assert (x==y).all(), f"\n{x}\n{y}\n{perm}\n{a.shape}"
|
||||||
|
|
||||||
ia = [gen_data([5, 7]), gen_data([2,2,2]), gen_data([2,3,4,5]), gen_data([5,3]), gen_data([3,1,5,3,1])]
|
ia = [gen_data([5, 7]), gen_data([2,2,2]), gen_data([2,3,4,5]), gen_data([5,3]), gen_data([3,1,5,3,1])]
|
||||||
for a in ia: check(a)
|
for a in ia: check(a)
|
||||||
|
|
|
@ -177,7 +177,8 @@ vector<VarPtr> grad(Var* loss, vector<Var*> targets) {
|
||||||
Var* dout = grads[id];
|
Var* dout = grads[id];
|
||||||
trace_grad_op = op;
|
trace_grad_op = op;
|
||||||
VarPtr dvar = make_grad(op, out, dout, var, index);
|
VarPtr dvar = make_grad(op, out, dout, var, index);
|
||||||
if (dvar && dvar->num>=0 && var->num)
|
if (dvar && dvar->num>=0 && var->num>0)
|
||||||
|
// var->num == 0 represents a any match var
|
||||||
ASSERT(dvar->num==var->num && dvar->shape.size()==var->shape.size())
|
ASSERT(dvar->num==var->num && dvar->shape.size()==var->shape.size())
|
||||||
<< "dvar" << dvar << "var" << var;
|
<< "dvar" << dvar << "var" << var;
|
||||||
if (!grad)
|
if (!grad)
|
||||||
|
|
|
@ -17,6 +17,7 @@ struct StackVector {
|
||||||
inline T& front() { return a[0]; }
|
inline T& front() { return a[0]; }
|
||||||
inline T& back() { return a[n-1]; }
|
inline T& back() { return a[n-1]; }
|
||||||
inline int size() { return n;}
|
inline int size() { return n;}
|
||||||
|
inline T* data() { return a;}
|
||||||
inline StackVector(int n=0) : n(n) {}
|
inline StackVector(int n=0) : n(n) {}
|
||||||
|
|
||||||
struct Iter {
|
struct Iter {
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#ifdef HAS_CUDA
|
#ifdef HAS_CUDA
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <helper_cuda.h>
|
#include <helper_cuda.h>
|
||||||
|
#include "misc/cuda_flags.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace jittor {
|
namespace jittor {
|
||||||
|
@ -36,14 +37,14 @@ void CopyOp::run() {
|
||||||
auto size = x->size;
|
auto size = x->size;
|
||||||
auto x_ptr = x->mem_ptr;
|
auto x_ptr = x->mem_ptr;
|
||||||
auto y_ptr = outputs().front()->mem_ptr;
|
auto y_ptr = outputs().front()->mem_ptr;
|
||||||
if (flags.get(NodeFlags::_cpu)) {
|
#ifdef HAS_CUDA
|
||||||
|
if (flags.get(NodeFlags::_cuda)) {
|
||||||
|
checkCudaErrors(cudaMemcpyAsync(y_ptr, x_ptr, size, cudaMemcpyDefault, 0));
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
std::memcpy(y_ptr, x_ptr, size);
|
std::memcpy(y_ptr, x_ptr, size);
|
||||||
}
|
}
|
||||||
#ifdef HAS_CUDA
|
|
||||||
else {
|
|
||||||
checkCudaErrors(cudaMemcpyAsync(y_ptr, x_ptr, size, cudaMemcpyDefault, 0));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -34,9 +34,9 @@ unordered_set<string> reduce_ops = {
|
||||||
"add",
|
"add",
|
||||||
// @pybind(prod, product, reduce_multiply)
|
// @pybind(prod, product, reduce_multiply)
|
||||||
"multiply",
|
"multiply",
|
||||||
// @pybind(reduce_logical_and, all)
|
// @pybind(reduce_logical_and, all_)
|
||||||
"logical_and",
|
"logical_and",
|
||||||
// @pybind(reduce_logical_or, any)
|
// @pybind(reduce_logical_or, any_)
|
||||||
"logical_or",
|
"logical_or",
|
||||||
"logical_xor",
|
"logical_xor",
|
||||||
"bitwise_and",
|
"bitwise_and",
|
||||||
|
@ -65,7 +65,8 @@ ReduceOp::ReduceOp(Var* x, NanoString op, NanoVector dims, bool keepdims)
|
||||||
reduce_mask |= 1<<dim;
|
reduce_mask |= 1<<dim;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (x->dtype() == ns_bool && ns == ns_add)
|
// if (x->dtype() == ns_bool && ns == ns_add)
|
||||||
|
if (x->dtype() == ns_bool)
|
||||||
y = create_output(nullptr, ns_int32);
|
y = create_output(nullptr, ns_int32);
|
||||||
else
|
else
|
||||||
y = create_output(nullptr, binary_dtype_infer(ns, x, x));
|
y = create_output(nullptr, binary_dtype_infer(ns, x, x));
|
||||||
|
|
|
@ -69,7 +69,7 @@ void SetitemOp::infer_shape() {
|
||||||
for (int i=0; i<data_dim; i++) {
|
for (int i=0; i<data_dim; i++) {
|
||||||
int j = i - data_dim + out_shape.size();
|
int j = i - data_dim + out_shape.size();
|
||||||
if (!(data_shape[i]==1 && out_shape[j]!=-1)) {
|
if (!(data_shape[i]==1 && out_shape[j]!=-1)) {
|
||||||
CHECK(data_shape[i]<0 || data_shape[i]==out_shape[j])
|
CHECK(data_shape[i]<0 || out_shape[j]<0 || data_shape[i]==out_shape[j])
|
||||||
<< "Data shape not match" << data_shape << out_shape;
|
<< "Data shape not match" << data_shape << out_shape;
|
||||||
bmask |= 1<<j;
|
bmask |= 1<<j;
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,38 +40,8 @@ TransposeOp::TransposeOp(Var* x, NanoVector axes_) : x(x), axes(axes_) {
|
||||||
.get_constructor<VarPtr, Var*, NanoVector>();
|
.get_constructor<VarPtr, Var*, NanoVector>();
|
||||||
}
|
}
|
||||||
if (cutt_transpose) {
|
if (cutt_transpose) {
|
||||||
bool need_reshape = false;
|
auto var = cutt_transpose(x, axes);
|
||||||
int dims = x->shape.size();
|
forward(var);
|
||||||
vector<int64> in_axes;
|
|
||||||
vector<int64> in_shape;
|
|
||||||
vector<int64> out_shape;
|
|
||||||
vector<int64> trans;
|
|
||||||
int cnt = 0;
|
|
||||||
for (int i = 0; i < dims; ++i) {
|
|
||||||
if (x->shape[i] == 1) {
|
|
||||||
need_reshape = true;
|
|
||||||
trans.push_back(-1);
|
|
||||||
} else {
|
|
||||||
trans.push_back(cnt);
|
|
||||||
cnt += 1;
|
|
||||||
in_shape.push_back(x->shape[i]);
|
|
||||||
}
|
|
||||||
out_shape.push_back(x->shape[axes[i]]);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < dims; ++i) {
|
|
||||||
if (x->shape[axes[i]] != 1) {
|
|
||||||
in_axes.push_back(trans[axes[i]]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (need_reshape) {
|
|
||||||
auto x1 = make_reshape(x, NanoVector(in_shape));
|
|
||||||
auto x2 = cutt_transpose(x1, in_axes);
|
|
||||||
auto x3 = make_reshape(x2, NanoVector(out_shape));
|
|
||||||
forward(x3);
|
|
||||||
} else {
|
|
||||||
auto var = cutt_transpose(x, axes);
|
|
||||||
forward(var);
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -164,6 +164,19 @@ static vector<Stack> get_stack_info() {
|
||||||
(int)PyFrame_GetLineNumber(prev_f)});
|
(int)PyFrame_GetLineNumber(prev_f)});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (stacks.size() == 0) {
|
||||||
|
auto m = std::min(3,n);
|
||||||
|
for (int i=0; i<m; i++) {
|
||||||
|
auto f = frames[n-m+i];
|
||||||
|
auto s = to_string(f->f_code->co_filename);
|
||||||
|
auto num = (int)PyFrame_GetLineNumber(f);
|
||||||
|
stacks.emplace_back(Stack{
|
||||||
|
s+":"+S(num),
|
||||||
|
"",
|
||||||
|
s,
|
||||||
|
num});
|
||||||
|
}
|
||||||
|
}
|
||||||
return stacks;
|
return stacks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ static void push_py_object_pickle(RingBuffer* rb, PyObject* obj, uint64& __restr
|
||||||
ASSERT(0 == PyBytes_AsStringAndSize(ret.obj, &s, &size));
|
ASSERT(0 == PyBytes_AsStringAndSize(ret.obj, &s, &size));
|
||||||
rb->push_t<int64>(size, offset);
|
rb->push_t<int64>(size, offset);
|
||||||
rb->push(size, offset);
|
rb->push(size, offset);
|
||||||
LOGir << string(rb->get_ptr(size, offset), size);
|
// LOGir << string(rb->get_ptr(size, offset), size);
|
||||||
std::memcpy(rb->get_ptr(size, offset), s, size);
|
std::memcpy(rb->get_ptr(size, offset), s, size);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue