再探Tensor类 在之前,我们实现的张量初始化是这样的。1 2 3 Tensor<float >::Tensor (uint32_t channels, uint32_t rows, uint32_t cols) { data_ = arma::fcube (rows, cols, channels); }
这个Tensor类其实并不能满足我们的使用需要,因为我们有些时候数据并不是三维的, 原来的Tensor不能在逻辑上区分当前的张量是三维的、二维的还是一维的,因为实际的数据存储类arma::fcube总是一个三维数据。 而且在之前我们也没有实现reshape。
所以,现在让我们一起来完善这个Tensor类吧。
1 2 3 4 5 6 7 8 9 10 11 Tensor<float >::Tensor (uint32_t channels, uint32_t rows, uint32_t cols) { data_ = arma::fcube (rows, cols, channels); if (channels == 1 && rows == 1 ) { this ->raw_shapes_ = std::vector<uint32_t >{cols}; } else if (channels == 1 ) { this ->raw_shapes_ = std::vector<uint32_t >{rows, cols}; } else { this ->raw_shapes_ = std::vector<uint32_t >{channels, rows, cols}; } }
在这里,我们调用arma::fcube 来初始化data, 同时raw_shape记录的是另外一个方面的形状信息,主要用于review和flatten层中。 尽管实际的数据存储类arma::fcube总是一个三维数据,但是逻辑上用raw_shapes 来记录当前的张量是三维的、二维的还是一维的。
列优先的Reshape 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 void Tensor<float >::ReRawshape (const std::vector<uint32_t >& shapes) { CHECK (!this ->data_.empty ()); CHECK (!shapes.empty ()); const uint32_t origin_size = this ->size (); uint32_t current_size = 1 ; for (uint32_t s : shapes) { current_size *= s; } CHECK (shapes.size () <= 3 ); CHECK (current_size == origin_size); if (shapes.size () == 3 ) { this ->data_.reshape (shapes.at (1 ), shapes.at (2 ), shapes.at (0 )); this ->raw_shapes_ = {shapes.at (0 ), shapes.at (1 ), shapes.at (2 )}; } else if (shapes.size () == 2 ) { this ->data_.reshape (shapes.at (0 ), shapes.at (1 ), 1 ); this ->raw_shapes_ = {shapes.at (0 ), shapes.at (1 )}; } else { this ->data_.reshape (shapes.at (0 ), 1 , 1 ); this ->raw_shapes_ = {shapes.at (0 )}; } }
在这里调用了armadillo::cube.reshape , 由于armadillo::cube是一个列优先的容器,所以Reshape的方式是列优先的。
行优先的Reshape 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 void Tensor<float >::ReView (const std::vector<uint32_t >& shapes) { CHECK (!this ->data_.empty ()); const uint32_t target_channels = shapes.at (0 ); const uint32_t target_rows = shapes.at (1 ); const uint32_t target_cols = shapes.at (2 ); arma::fcube new_data (target_rows, target_cols, target_channels) ; const uint32_t plane_size = target_rows * target_cols; for (uint32_t c = 0 ; c < this ->data_.n_slices; ++c) { const arma::fmat& channel = this ->data_.slice (c); for (uint32_t c_ = 0 ; c_ < this ->data_.n_cols; ++c_) { const float * colptr = channel.colptr (c_); for (uint32_t r = 0 ; r < this ->data_.n_rows; ++r) { const uint32_t pos_index = c * data_.n_rows * data_.n_cols + r * data_.n_cols + c_; const uint32_t ch = pos_index / plane_size; const uint32_t row = (pos_index - ch * plane_size) / target_cols; const uint32_t col = (pos_index - ch * plane_size - row * target_cols); new_data.at (row, col, ch) = *(colptr + r); } } } this ->data_ = new_data; }
构建计算图的图关系 在之前的计算图初始化中RuntimeGraph::Init(),我们并没有构建计算图的图关系。1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 for (const auto ¤t_op : this ->operators_) { const std::vector<std::string> &output_names = current_op->output_names; for (const auto &next_op : this ->operators_) { if (next_op == current_op) { continue ; } if (std::find (output_names.begin (), output_names.end (), next_op->name) != output_names.end ()) { current_op->output_operators.insert ({next_op->name, next_op}); } } }
计算图初始化完成后,接下来我们需要做的事情是找到op list(this->operators)中的输入和输出节点 总所周知,一个图一定有一个输入和输出。打个比方, 图的执行好像在走迷宫,就好像我们走迷宫之前需要先指定迷宫的输入输出位置。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 void RuntimeGraph::Build (const std::string &input_name, const std::string &output_name) { if (graph_state_ == GraphState::NeedInit) { bool init_graph = Init (); LOG_IF (FATAL, !init_graph) << "Init graph failed!" ; } CHECK (graph_state_ >= GraphState::NeedBuild) << "Graph status error, current state is " << int (graph_state_); LOG_IF (FATAL, this ->operators_.empty ()) << "Graph operators is empty, may be no init" ; this ->input_operators_maps_.clear (); this ->output_operators_maps_.clear (); for (const auto &kOperator : this ->operators_) { if (kOperator->type == "pnnx.Input" ) { this ->input_operators_maps_.insert ({kOperator->name, kOperator}); } else if (kOperator->type == "pnnx.Output" ) { this ->output_operators_maps_.insert ({kOperator->name, kOperator}); } else { } } RuntimeGraphShape::InitOperatorInputTensor (operators_); RuntimeGraphShape::InitOperatorOutputTensor (graph_->ops, operators_); graph_state_ = GraphState::Complete; input_name_ = input_name; output_name_ = output_name; }
初始化各算子的输入和输出空间 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 class RuntimeGraphShape { public : static void InitOperatorInputTensor (const std::vector<std::shared_ptr<RuntimeOperator>> &operators) ; static void InitOperatorOutputTensor (const std::vector<pnnx::Operator *> &pnnx_operators, const std::vector<std::shared_ptr<RuntimeOperator>> &operators) ;};
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 void RuntimeGraphShape::InitOperatorInputTensor ( const std::vector<std::shared_ptr<RuntimeOperator>> &operators) { if (operators.empty ()) { LOG (ERROR) << "Operators for init input shapes is empty!" ; return ; } for (const auto &op : operators) { if (op->input_operands.empty ()) { continue ; } else { const std::map<std::string, std::shared_ptr<RuntimeOperand>> & input_operands_map = op->input_operands; for (const auto &input_operand_iter : input_operands_map) { const auto &input_operand = input_operand_iter.second; const auto &type = input_operand->type; CHECK (type == RuntimeDataType::kTypeFloat32) << "The graph only support float32 yet!" ; const auto &input_operand_shape = input_operand->shapes; auto &input_datas = input_operand->datas; CHECK (!input_operand_shape.empty ()); const int32_t batch = input_operand_shape.at (0 ); CHECK (batch >= 0 ) << "Dynamic batch size is not supported!" ; CHECK (input_operand_shape.size () == 2 || input_operand_shape.size () == 4 || input_operand_shape.size () == 3 ) << "Unsupported tensor shape sizes: " << input_operand_shape.size (); if (!input_datas.empty ()) { CHECK (input_datas.size () == batch) << "Batch size is wrong!" ; for (int32_t i = 0 ; i < batch; ++i) { const std::vector<uint32_t > &input_data_shape = input_datas.at (i)->shapes (); CHECK (input_data_shape.size () == 3 ) << "THe origin shape size of operator input data do not equals " "to three" ; if (input_operand_shape.size () == 4 ) { CHECK (input_data_shape.at (0 ) == input_operand_shape.at (1 ) && input_data_shape.at (1 ) == input_operand_shape.at (2 ) && input_data_shape.at (2 ) == input_operand_shape.at (3 )); } else if (input_operand_shape.size () == 2 ) { CHECK (input_data_shape.at (1 ) == input_operand_shape.at (1 ) && input_data_shape.at (0 ) == 1 && input_data_shape.at (2 ) == 1 ); } else { CHECK (input_data_shape.at (1 ) == input_operand_shape.at (1 ) && input_data_shape.at (0 ) == 1 && input_data_shape.at (2 ) == input_operand_shape.at (2 )); } } } else { input_datas.resize (batch); for (int32_t i = 0 ; i < batch; ++i) { if (input_operand_shape.size () == 4 ) { input_datas.at (i) = std::make_shared<Tensor<float >>( input_operand_shape.at (1 ), input_operand_shape.at (2 ), input_operand_shape.at (3 )); } else if (input_operand_shape.size () == 2 ) { input_datas.at (i) = std::make_shared<Tensor<float >>( 1 , input_operand_shape.at (1 ), 1 ); } else { input_datas.at (i) = std::make_shared<Tensor<float >>( 1 , input_operand_shape.at (1 ), input_operand_shape.at (2 )); } } } } } } }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 void RuntimeGraphShape::InitOperatorOutputTensor ( const std::vector<pnnx::Operator *> &pnnx_operators, const std::vector<std::shared_ptr<RuntimeOperator>> &operators) { CHECK (!pnnx_operators.empty () && !operators.empty ()); CHECK (pnnx_operators.size () == operators.size ()); for (uint32_t i = 0 ; i < pnnx_operators.size (); ++i) { const std::vector<pnnx::Operand *> operands = pnnx_operators.at (i)->outputs; CHECK (operands.size () <= 1 ) << "Only support one node one output yet!" ; if (operands.empty ()) { continue ; } CHECK (operands.size () == 1 ) << "Only support one output in the KuiperInfer" ; pnnx::Operand *operand = operands.front (); const auto &runtime_op = operators.at (i); CHECK (operand != nullptr ) << "Operand output is null" ; const std::vector<int32_t > &operand_shapes = operand->shape; const auto &output_tensors = runtime_op->output_operands; const int32_t batch = operand_shapes.at (0 ); CHECK (batch >= 0 ) << "Dynamic batch size is not supported!" ; CHECK (operand_shapes.size () == 2 || operand_shapes.size () == 4 || operand_shapes.size () == 3 ) << "Unsupported shape sizes: " << operand_shapes.size (); if (!output_tensors) { std::shared_ptr<RuntimeOperand> output_operand = std::make_shared <RuntimeOperand>(); output_operand->shapes = operand_shapes; output_operand->type = RuntimeDataType::kTypeFloat32; output_operand->name = operand->name + "_output" ; for (int j = 0 ; j < batch; ++j) { if (operand_shapes.size () == 4 ) { output_operand->datas.push_back (std::make_shared<Tensor<float >>( operand_shapes.at (1 ), operand_shapes.at (2 ), operand_shapes.at (3 ))); } else if (operand_shapes.size () == 2 ) { output_operand->datas.push_back ( std::make_shared<Tensor<float >>(1 , operand_shapes.at (1 ), 1 )); } else { output_operand->datas.push_back (std::make_shared<Tensor<float >>( 1 , operand_shapes.at (1 ), operand_shapes.at (2 ))); } } runtime_op->output_operands = std::move (output_operand); } else { CHECK (batch == output_tensors->datas.size ()); CHECK (output_tensors->type == RuntimeDataType::kTypeFloat32); CHECK (output_tensors->shapes == operand_shapes); for (uint32_t b = 0 ; b < batch; ++b) { const std::vector<uint32_t > &tensor_shapes = output_tensors->datas.at (b)->shapes (); if (operand_shapes.size () == 4 ) { if (tensor_shapes.at (0 ) != operand_shapes.at (1 ) || tensor_shapes.at (1 ) != operand_shapes.at (2 ) || tensor_shapes.at (2 ) != operand_shapes.at (3 )) { DLOG (WARNING) << "The shape of tensor do not adapting with output operand" ; const auto &target_shapes = std::vector<uint32_t >{(uint32_t ) operand_shapes.at (1 ), (uint32_t ) operand_shapes.at (2 ), (uint32_t ) operand_shapes.at (3 )}; output_tensors->datas.at (b)->ReRawshape (target_shapes); } } else if (operand_shapes.size () == 2 ) { if (tensor_shapes.at (0 ) != 1 || tensor_shapes.at (1 ) != operand_shapes.at (1 ) || tensor_shapes.at (2 ) != 1 ) { DLOG (WARNING) << "The shape of tensor do not adapting with output operand" ; const auto &target_shapes = std::vector<uint32_t >{1 , (uint32_t ) operand_shapes.at (1 ), 1 }; output_tensors->datas.at (b)->ReRawshape (target_shapes); } } else { if (tensor_shapes.at (0 ) != 1 || tensor_shapes.at (1 ) != operand_shapes.at (1 ) || tensor_shapes.at (2 ) != operand_shapes.at (2 )) { DLOG (WARNING) << "The shape of tensor do not adapting with output operand" ; const auto &target_shapes = std::vector<uint32_t >{1 , (uint32_t ) operand_shapes.at (1 ), (uint32_t ) operand_shapes.at (2 )}; output_tensors->datas.at (b)->ReRawshape (target_shapes); } } } } } }