再探Tensor类

在之前,我们实现的张量初始化是这样的。

1
2
3
Tensor<float>::Tensor(uint32_t channels, uint32_t rows, uint32_t cols) {
data_ = arma::fcube(rows, cols, channels);
}

这个Tensor类其实并不能满足我们的使用需要,因为我们有些时候数据并不是三维的,
原来的Tensor不能在逻辑上区分当前的张量是三维的、二维的还是一维的,因为实际的数据存储类arma::fcube总是一个三维数据。
而且在之前我们也没有实现reshape。

所以,现在让我们一起来完善这个Tensor类吧。

1
2
3
4
5
6
7
8
9
10
11
Tensor<float>::Tensor(uint32_t channels, uint32_t rows, uint32_t cols) {
// float cube(n_rows, n_cols, n_slices)
data_ = arma::fcube(rows, cols, channels);
if (channels == 1 && rows == 1) {
this->raw_shapes_ = std::vector<uint32_t>{cols};
} else if (channels == 1) {
this->raw_shapes_ = std::vector<uint32_t>{rows, cols};
} else {
this->raw_shapes_ = std::vector<uint32_t>{channels, rows, cols};
}
}

在这里,我们调用arma::fcube来初始化data
同时raw_shape记录的是另外一个方面的形状信息,主要用于review和flatten层中。
尽管实际的数据存储类arma::fcube总是一个三维数据,但是逻辑上用raw_shapes
来记录当前的张量是三维的、二维的还是一维的。

列优先的Reshape

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
void Tensor<float>::ReRawshape(const std::vector<uint32_t>& shapes) {
CHECK(!this->data_.empty());
CHECK(!shapes.empty());
const uint32_t origin_size = this->size();
uint32_t current_size = 1;
for (uint32_t s : shapes) {
current_size *= s;
}
CHECK(shapes.size() <= 3);
CHECK(current_size == origin_size);

// cube.reshape( n_rows, n_cols, n_slices )
if (shapes.size() == 3) {
// shapes = {channels, rows, cols}
this->data_.reshape(shapes.at(1), shapes.at(2), shapes.at(0));
this->raw_shapes_ = {shapes.at(0), shapes.at(1), shapes.at(2)};
} else if (shapes.size() == 2) {
// shapes = {rows, cols}
this->data_.reshape(shapes.at(0), shapes.at(1), 1);
this->raw_shapes_ = {shapes.at(0), shapes.at(1)};
} else {
// shapes = {cols}
this->data_.reshape(shapes.at(0), 1, 1);
this->raw_shapes_ = {shapes.at(0)};
}
}

在这里调用了armadillo::cube.reshape
由于armadillo::cube是一个列优先的容器,所以Reshape的方式是列优先的。

行优先的Reshape

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
void Tensor<float>::ReView(const std::vector<uint32_t>& shapes) {
CHECK(!this->data_.empty());
// shapes = {channels, rows, cols}
const uint32_t target_channels = shapes.at(0);
const uint32_t target_rows = shapes.at(1);
const uint32_t target_cols = shapes.at(2);
// 初始化new_data用于存放reshape后的结果
arma::fcube new_data(target_rows, target_cols, target_channels);

const uint32_t plane_size = target_rows * target_cols;
for (uint32_t c = 0; c < this->data_.n_slices; ++c) {
// 逐通道遍历,取出每一通道的数据
const arma::fmat& channel = this->data_.slice(c);
for (uint32_t c_ = 0; c_ < this->data_.n_cols; ++c_) {
// 逐列遍历
const float* colptr = channel.colptr(c_);
for (uint32_t r = 0; r < this->data_.n_rows; ++r) {
// 逐行遍历
// 当前元素的位置=c*通道+r*列+c_*行
const uint32_t pos_index =
c * data_.n_rows * data_.n_cols + r * data_.n_cols + c_;
// 计算reshape后的位置
const uint32_t ch = pos_index / plane_size;
const uint32_t row = (pos_index - ch * plane_size) / target_cols;
const uint32_t col = (pos_index - ch * plane_size - row * target_cols);
// 拷贝
new_data.at(row, col, ch) = *(colptr + r);
}
}
}
this->data_ = new_data;
}

构建计算图的图关系

在之前的计算图初始化中RuntimeGraph::Init(),我们并没有构建计算图的图关系。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// 构建图关系
for (const auto &current_op : this->operators_) {
// 当前算子的后一层算子的名字
const std::vector<std::string> &output_names = current_op->output_names;
// 遍历operators_
for (const auto &next_op : this->operators_) {
// 如果遍历到当前节点,跳到下一轮
if (next_op == current_op) {
continue;
}
// 如果遇到next_op的name和当前current_op->output_name是一致的
if (std::find(output_names.begin(), output_names.end(), next_op->name) !=
output_names.end()) {
// 将next_op插入到current_op的下一个节点当中
current_op->output_operators.insert({next_op->name, next_op});
}
}
}

计算图初始化完成后,接下来我们需要做的事情是找到op list(this->operators)中的输入和输出节点
总所周知,一个图一定有一个输入和输出。打个比方,
图的执行好像在走迷宫,就好像我们走迷宫之前需要先指定迷宫的输入输出位置。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
void RuntimeGraph::Build(const std::string &input_name, const std::string &output_name) {
// 如何计算图没有初始化,就初始化
if (graph_state_ == GraphState::NeedInit) {
bool init_graph = Init();
LOG_IF(FATAL, !init_graph) << "Init graph failed!";
}

CHECK(graph_state_ >= GraphState::NeedBuild)
<< "Graph status error, current state is " << int(graph_state_);
LOG_IF(FATAL, this->operators_.empty())
<< "Graph operators is empty, may be no init";

this->input_operators_maps_.clear();
this->output_operators_maps_.clear();

// 遍历operators
for (const auto &kOperator : this->operators_) {
if (kOperator->type == "pnnx.Input") {
// 找到this->operators中的输入节点
this->input_operators_maps_.insert({kOperator->name, kOperator});
} else if (kOperator->type == "pnnx.Output") {
// 找到this->operators中的输出节点,目前只是支持一个输出
this->output_operators_maps_.insert({kOperator->name, kOperator});
} else {
// 以后的课中加layer的
}
}

// 初始化每个算子的输入和输出空间
RuntimeGraphShape::InitOperatorInputTensor(operators_);
RuntimeGraphShape::InitOperatorOutputTensor(graph_->ops, operators_);
graph_state_ = GraphState::Complete;
input_name_ = input_name;
output_name_ = output_name;
}

初始化各算子的输入和输出空间

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
class RuntimeGraphShape {
public:
/**
* 如果图是第一次运行,则根据节点输入operand的形状准备好后续Layer计算中所需要的Tensor
* 如果图是第二次以上运行,则检查输入operand的形状和operand中张量的形状是否匹配
* @param operators 计算图中的计算节点
*/
static void InitOperatorInputTensor(const std::vector<std::shared_ptr<RuntimeOperator>> &operators);

/**
* 如果图是第一次运行,则根据节点输出operand的形状准备好后续Layer计算中所需要的Tensor
* 如果图是第二次以上运行,则检查输出operand的形状和operand中张量的形状是否匹配
* @param pnnx_operators pnnx图节点
* @param operators KuiperInfer计算图中的计算节点
*/
static void InitOperatorOutputTensor(const std::vector<pnnx::Operator *> &pnnx_operators,
const std::vector<std::shared_ptr<RuntimeOperator>> &operators);
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
void RuntimeGraphShape::InitOperatorInputTensor(
const std::vector<std::shared_ptr<RuntimeOperator>> &operators) {
if (operators.empty()) {
LOG(ERROR) << "Operators for init input shapes is empty!";
return;
}
for (const auto &op : operators) {
// 遍历所有的operators
if (op->input_operands.empty()) {
continue;
} else {
const std::map<std::string, std::shared_ptr<RuntimeOperand>> &
input_operands_map = op->input_operands;
for (const auto &input_operand_iter : input_operands_map) {
// 遍历该operator对应的input_operands
const auto &input_operand = input_operand_iter.second; // 键值对中的value

const auto &type = input_operand->type;
CHECK(type == RuntimeDataType::kTypeFloat32)
<< "The graph only support float32 yet!";
const auto &input_operand_shape = input_operand->shapes;
auto &input_datas = input_operand->datas;

CHECK(!input_operand_shape.empty());
const int32_t batch = input_operand_shape.at(0); // 得到批次大小
CHECK(batch >= 0) << "Dynamic batch size is not supported!";
CHECK(input_operand_shape.size() == 2 ||
input_operand_shape.size() == 4 ||
input_operand_shape.size() == 3)
<< "Unsupported tensor shape sizes: " << input_operand_shape.size();

if (!input_datas.empty()) {
// 如果数据非空
CHECK(input_datas.size() == batch) << "Batch size is wrong!";
for (int32_t i = 0; i < batch; ++i) {
// 遍历所有批次,做形状检查,避免第二次的初始化
const std::vector<uint32_t> &input_data_shape =
input_datas.at(i)->shapes();
CHECK(input_data_shape.size() == 3)
<< "THe origin shape size of operator input data do not equals "
"to three";
if (input_operand_shape.size() == 4) {
CHECK(input_data_shape.at(0) == input_operand_shape.at(1) &&
input_data_shape.at(1) == input_operand_shape.at(2) &&
input_data_shape.at(2) == input_operand_shape.at(3));
} else if (input_operand_shape.size() == 2) {
CHECK(input_data_shape.at(1) == input_operand_shape.at(1) &&
input_data_shape.at(0) == 1 && input_data_shape.at(2) == 1);
} else {
// current shape size = 3
CHECK(input_data_shape.at(1) == input_operand_shape.at(1) &&
input_data_shape.at(0) == 1 &&
input_data_shape.at(2) == input_operand_shape.at(2));
}
}
} else {
// 输入数据是空的
input_datas.resize(batch);
for (int32_t i = 0; i < batch; ++i) {
// 遍历所有批次,初始化张量
if (input_operand_shape.size() == 4) {
input_datas.at(i) = std::make_shared<Tensor<float>>(
input_operand_shape.at(1), input_operand_shape.at(2),
input_operand_shape.at(3));
} else if (input_operand_shape.size() == 2) {
input_datas.at(i) = std::make_shared<Tensor<float>>(
1, input_operand_shape.at(1), 1);
} else {
// current shape is 3
input_datas.at(i) = std::make_shared<Tensor<float>>(
1, input_operand_shape.at(1), input_operand_shape.at(2));
}
}
}
}
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
void RuntimeGraphShape::InitOperatorOutputTensor(
const std::vector<pnnx::Operator *> &pnnx_operators,
const std::vector<std::shared_ptr<RuntimeOperator>> &operators) {
CHECK(!pnnx_operators.empty() && !operators.empty());
CHECK(pnnx_operators.size() == operators.size());
for (uint32_t i = 0; i < pnnx_operators.size(); ++i) {
// 遍历所有的pnnx_operators
const std::vector<pnnx::Operand *> operands = pnnx_operators.at(i)->outputs;
CHECK(operands.size() <= 1) << "Only support one node one output yet!";
if (operands.empty()) {
continue;
}
// 如果operands的非空的
CHECK(operands.size() == 1) << "Only support one output in the KuiperInfer";
pnnx::Operand *operand = operands.front();
const auto &runtime_op = operators.at(i);
CHECK(operand != nullptr) << "Operand output is null";
const std::vector<int32_t> &operand_shapes = operand->shape;
const auto &output_tensors = runtime_op->output_operands;

const int32_t batch = operand_shapes.at(0);
CHECK(batch >= 0) << "Dynamic batch size is not supported!";
CHECK(operand_shapes.size() == 2 || operand_shapes.size() == 4 ||
operand_shapes.size() == 3)
<< "Unsupported shape sizes: " << operand_shapes.size();

if (!output_tensors) {
// 如果output_operands是空的,初始化输出张量
std::shared_ptr<RuntimeOperand> output_operand =
std::make_shared<RuntimeOperand>();
output_operand->shapes = operand_shapes;
output_operand->type = RuntimeDataType::kTypeFloat32;
output_operand->name = operand->name + "_output";
for (int j = 0; j < batch; ++j) {
// 遍历每一批次的数据,放到output_operand
if (operand_shapes.size() == 4) {
output_operand->datas.push_back(std::make_shared<Tensor<float>>(
operand_shapes.at(1), operand_shapes.at(2),
operand_shapes.at(3)));
} else if (operand_shapes.size() == 2) {
output_operand->datas.push_back(
std::make_shared<Tensor<float>>(1, operand_shapes.at(1), 1));
} else {
// current shape is 3
output_operand->datas.push_back(std::make_shared<Tensor<float>>(
1, operand_shapes.at(1), operand_shapes.at(2)));
}
}
runtime_op->output_operands = std::move(output_operand);
} else {
// 如果output_operands是非空的
CHECK(batch == output_tensors->datas.size());
// output_tensors empty
CHECK(output_tensors->type == RuntimeDataType::kTypeFloat32);
CHECK(output_tensors->shapes == operand_shapes);
for (uint32_t b = 0; b < batch; ++b) {
// 遍历每一批次的数据,做形状检查,如果形状不对,则reshape
const std::vector<uint32_t> &tensor_shapes =
output_tensors->datas.at(b)->shapes();
if (operand_shapes.size() == 4) {
if (tensor_shapes.at(0) != operand_shapes.at(1) ||
tensor_shapes.at(1) != operand_shapes.at(2) ||
tensor_shapes.at(2) != operand_shapes.at(3)) {
DLOG(WARNING) << "The shape of tensor do not adapting with output operand";
const auto &target_shapes = std::vector<uint32_t>{(uint32_t) operand_shapes.at(1),
(uint32_t) operand_shapes.at(2),
(uint32_t) operand_shapes.at(3)};
output_tensors->datas.at(b)->ReRawshape(target_shapes);
}
} else if (operand_shapes.size() == 2) {
if (tensor_shapes.at(0) != 1 ||
tensor_shapes.at(1) != operand_shapes.at(1) ||
tensor_shapes.at(2) != 1) {
DLOG(WARNING) << "The shape of tensor do not adapting with output operand";
const auto &target_shapes = std::vector<uint32_t>{1, (uint32_t) operand_shapes.at(1), 1};
output_tensors->datas.at(b)->ReRawshape(target_shapes);
}
} else {
// current shape is 3
if (tensor_shapes.at(0) != 1 ||
tensor_shapes.at(1) != operand_shapes.at(1) ||
tensor_shapes.at(2) != operand_shapes.at(2)) {
DLOG(WARNING) << "The shape of tensor do not adapting with output operand";
const auto &target_shapes =
std::vector<uint32_t>{1, (uint32_t) operand_shapes.at(1), (uint32_t) operand_shapes.at(2)};
output_tensors->datas.at(b)->ReRawshape(target_shapes);
}
}
}
}
}
}