diff --git a/optim.py b/optim.py index 5d2e9c3..f4a74b8 100644 --- a/optim.py +++ b/optim.py @@ -151,8 +151,8 @@ def optim4GPU(cfg, model): param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'gamma', 'beta'] optimizer_grouped_parameters = [ - {'params': [p for n, p in param_optimizer if n not in no_decay], 'weight_decay_rate': 0.01}, - {'params': [p for n, p in param_optimizer if n in no_decay], 'weight_decay_rate': 0.0}] + {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01}, + {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0}] return BertAdam(optimizer_grouped_parameters, lr=cfg.lr, warmup=cfg.warmup,