From d94eb5ae3712bbe15204767f9898c1045ae8d39e Mon Sep 17 00:00:00 2001 From: liujiaqi06 Date: Fri, 2 Dec 2022 09:24:02 +0000 Subject: [PATCH 1/4] support trainer label-names --- paddlenlp/trainer/trainer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/paddlenlp/trainer/trainer.py b/paddlenlp/trainer/trainer.py index 6921f27b44ee..9931842fea46 100644 --- a/paddlenlp/trainer/trainer.py +++ b/paddlenlp/trainer/trainer.py @@ -1204,6 +1204,11 @@ def compute_loss(self, model, inputs, return_outputs=False): """ if self.criterion is not None and "labels" in inputs: labels = inputs.pop("labels") + elif self.label_names is not None: + labels = [] + for label in self.label_names: + labels.append(inputs.pop(label)) + labels = tuple(labels) elif self.criterion is not None and "start_positions" in inputs and "end_positions" in inputs: labels = (inputs.pop("start_positions"), inputs.pop("end_positions")) elif self.criterion is not None and "generator_labels" in inputs: From 56f1116faeae69eee2a547b250154a187f236c57 Mon Sep 17 00:00:00 2001 From: liujiaqi06 Date: Thu, 8 Dec 2022 03:37:34 +0000 Subject: [PATCH 2/4] fox model config of ernie 3.0 tiny v2 --- paddlenlp/transformers/ernie/modeling.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/paddlenlp/transformers/ernie/modeling.py b/paddlenlp/transformers/ernie/modeling.py index e183c8721592..8bc813501300 100644 --- a/paddlenlp/transformers/ernie/modeling.py +++ b/paddlenlp/transformers/ernie/modeling.py @@ -821,7 +821,7 @@ class ErniePretrainedModel(PretrainedModel): "num_hidden_layers": 12, "task_type_vocab_size": 3, "type_vocab_size": 4, - "use_task_id": True, + "use_task_id": False, "vocab_size": 40000, }, "ernie-3.0-tiny-medium-v2": { @@ -836,7 +836,7 @@ class ErniePretrainedModel(PretrainedModel): "num_hidden_layers": 6, "task_type_vocab_size": 16, "type_vocab_size": 4, - "use_task_id": True, + "use_task_id": False, "vocab_size": 40000, }, "ernie-3.0-tiny-mini-v2": { @@ -851,7 +851,7 @@ class ErniePretrainedModel(PretrainedModel): "num_hidden_layers": 6, "task_type_vocab_size": 16, "type_vocab_size": 4, - "use_task_id": True, + "use_task_id": False, "vocab_size": 40000, }, "ernie-3.0-tiny-micro-v2": { @@ -866,7 +866,7 @@ class ErniePretrainedModel(PretrainedModel): "num_hidden_layers": 4, "task_type_vocab_size": 16, "type_vocab_size": 4, - "use_task_id": True, + "use_task_id": False, "vocab_size": 40000, }, "ernie-3.0-tiny-nano-v2": { @@ -881,7 +881,7 @@ class ErniePretrainedModel(PretrainedModel): "num_hidden_layers": 4, "task_type_vocab_size": 16, "type_vocab_size": 4, - "use_task_id": True, + "use_task_id": False, "vocab_size": 40000, }, "ernie-3.0-tiny-pico-v2": { @@ -889,14 +889,14 @@ class ErniePretrainedModel(PretrainedModel): "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 128, - "intermediate_size": 1248, + "intermediate_size": 512, "initializer_range": 0.02, "max_position_embeddings": 2048, "num_attention_heads": 2, "num_hidden_layers": 3, "task_type_vocab_size": 16, "type_vocab_size": 4, - "use_task_id": True, + "use_task_id": False, "vocab_size": 40000, }, } From b8b65967103b8fb358e73e93731a1c79b3e2146e Mon Sep 17 00:00:00 2001 From: LiuChiachi Date: Thu, 2 Feb 2023 12:12:20 +0000 Subject: [PATCH 3/4] support compression --- paddlenlp/trainer/trainer_compress.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/paddlenlp/trainer/trainer_compress.py b/paddlenlp/trainer/trainer_compress.py index be1eea552ff1..3a1c6f640d69 100644 --- a/paddlenlp/trainer/trainer_compress.py +++ b/paddlenlp/trainer/trainer_compress.py @@ -160,7 +160,11 @@ def _dynabert(self, model): # TODO: args.gradient_accumulation_steps if args.max_steps > 0: args.num_training_steps = args.max_steps - args.num_train_epochs = math.ceil(args.num_training_steps / len(train_dataloader)) + args.num_train_epochs = math.ceil(args.num_train_epochs) + # args.num_train_epochs = args.nun_train_epoh + # import pdb; pdb.set_trace() + # if args. + # args.num_train_epochs = math.ceil(args.num_training_steps / len(train_dataloader)) else: args.num_training_steps = len(train_dataloader) * args.num_train_epochs args.num_train_epochs = math.ceil(args.num_train_epochs) @@ -329,6 +333,8 @@ def check_dynabert_config(net_config, width_mult): # before. elif "out_proj" in key or "linear2" in key: net_config[key]["expand_ratio"] = 1.0 + elif "classifier" in key: + net_config[key]["expand_ratio"] = 1.0 return net_config @@ -717,7 +723,7 @@ def _quant_aware_training_dynamic(self, input_dir): "dtype": "int8", # window size for 'range_abs_max' quantization. defaulf is 10000 "window_size": 10000, - "quantizable_layer_type": ["Linear", "Conv2D"], + "quantizable_layer_type": ["Linear", "Conv2D", "Matmul"], "moving_rate": args.moving_rate, "onnx_format": args.onnx_format, } @@ -733,7 +739,7 @@ def _quant_aware_training_dynamic(self, input_dir): # TODO: args.gradient_accumulation_steps if args.max_steps > 0: args.num_training_steps = args.max_steps - args.num_train_epochs = math.ceil(args.num_training_steps / len(train_dataloader)) + args.num_train_epochs = math.ceil(args.num_train_epochs) else: args.num_training_steps = len(train_dataloader) * args.num_train_epochs args.num_train_epochs = math.ceil(args.num_train_epochs) @@ -810,11 +816,18 @@ def _quant_aware_training_dynamic(self, input_dir): ) paddle.save(model_to_save.state_dict(), output_param_path) logger.info("eval done total: %s s" % (time.time() - tic_eval)) + if global_step >= args.num_training_steps: + break + if global_step >= args.num_training_steps: + break logger.info("Best result: %.4f" % best_acc) self.model.set_state_dict(paddle.load(output_param_path)) - input_spec = generate_input_spec(self.model, self.train_dataset, self.args.input_dtype) + # input_spec = generate_input_spec(self.model, self.train_dataset, self.args.input_dtype) + # input_spec = [paddle.static.InputSpec(shape=[None, None], dtype="int32", name="input_ids"), + # paddle.static.InputSpec(shape=[None, None], dtype="int32", name="short_session_input_ids")] + input_spec = [paddle.static.InputSpec(shape=[None, None], dtype="int32", name="input_ids")] quanter.save_quantized_model( self.model, os.path.join(input_dir, args.output_filename_prefix), input_spec=input_spec ) @@ -1018,6 +1031,7 @@ def cut_embeddings(model, tokenizer, config, word_emb_index, max_seq_length, max # Rewrites config config["max_position_embeddings"] = max_seq_length config["vocab_size"] = max_vocab_size + config["hidden_act"] = "relu6" config.save_pretrained(output_dir) # Rewrites vocab file From 259a0907b97c53458d3f052603a923de71f22114 Mon Sep 17 00:00:00 2001 From: LiuChiachi Date: Thu, 16 Feb 2023 02:13:31 +0000 Subject: [PATCH 4/4] fix input spec --- paddlenlp/trainer/trainer_compress.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/paddlenlp/trainer/trainer_compress.py b/paddlenlp/trainer/trainer_compress.py index 3a1c6f640d69..b790c288151d 100644 --- a/paddlenlp/trainer/trainer_compress.py +++ b/paddlenlp/trainer/trainer_compress.py @@ -821,13 +821,14 @@ def _quant_aware_training_dynamic(self, input_dir): if global_step >= args.num_training_steps: break logger.info("Best result: %.4f" % best_acc) - self.model.set_state_dict(paddle.load(output_param_path)) - # input_spec = generate_input_spec(self.model, self.train_dataset, self.args.input_dtype) - # input_spec = [paddle.static.InputSpec(shape=[None, None], dtype="int32", name="input_ids"), - # paddle.static.InputSpec(shape=[None, None], dtype="int32", name="short_session_input_ids")] + logger.info("Load parameters from: %s" % output_param_path) + self.model.set_state_dict(paddle.load(output_param_path)) - input_spec = [paddle.static.InputSpec(shape=[None, None], dtype="int32", name="input_ids")] + input_spec = [ + paddle.static.InputSpec(shape=[None, None], dtype="int32", name="input_ids"), + paddle.static.InputSpec(shape=[None, None], dtype="int32", name="short_session_input_ids"), + ] quanter.save_quantized_model( self.model, os.path.join(input_dir, args.output_filename_prefix), input_spec=input_spec )