PyTorch权重初始化
在神经网络训练过程中,权重的初始化是非常重要的一步。良好的权重初始化可以加速模型的收敛速度,避免梯度消失或梯度爆炸等问题。在PyTorch中,我们可以通过各种方法来初始化模型的权重,本文将介绍一些常用的权重初始化方法,并通过代码示例演示它们的使用效果。
常用的权重初始化方法
1. 随机初始化
随机初始化是最简单的权重初始化方法之一,即通过从指定的分布中随机初始化权重值。PyTorch中提供了torch.nn.init
模块,可以使用其中的normal_
、uniform_
等方法来进行随机初始化。
import torch
import torch.nn as nn
model = nn.Sequential(
nn.Linear(10, 20),
nn.ReLU(),
nn.Linear(20, 10)
)
# 随机初始化权重
for m in model.modules():
if isinstance(m, nn.Linear):
nn.init.normal_(m.weight, mean=0, std=0.01)
nn.init.constant_(m.bias, 0)
# 输出模型的权重
for name, param in model.named_parameters():
print(name, param)
运行结果如下:
0.weight tensor([[ 0.0066, -0.0060, 0.0178, 0.0200, -0.0027, 0.0129, -0.0049, 0.0225, -0.0023, 0.0006],
[ 0.0098, -0.0040, -0.0013, 0.0034, -0.0064, -0.0019, 0.0088, 0.0065, -0.0073, -0.0070],
[ 0.0192, -0.0090, 0.0055, -0.0011, -0.0012, -0.0053, -0.0039, -0.0028, 0.0060, 0.0194],
[-0.0115, -0.0081, -0.0074, 0.0014, 0.0077, 0.0046, -0.0068, 0.0004, -0.0018, -0.0022],
[-0.0081, 0.0115, 0.0077, -0.0126, -0.0025, -0.0146, 0.0151, 0.0030, -0.0152, -0.0069],
[-0.0070, 0.0018, -0.0079, 0.0008, 0.0080, -0.0101, -0.0028, 0.0217, 0.0101, 0.0120],
[-0.0074, 0.0067, -0.0004, -0.0033, 0.0032, -0.0088, 0.0145, -0.0041, -0.0200, -0.0015],
[ 0.0247, 0.0130, -0.0098, 0.0005, 0.0188, -0.0010, -0.0115, 0.0049, -0.0025, 0.0018],
[-0.0025, 0.0024, 0.0231, -0.0024, -0.0060, 0.0003, -0.0114, 0.0184, 0.0029, 0.0077],
[-0.0021, -0.0048, -0.0023, -0.0099, -0.0093, -0.0056, -0.0099, 0.0043, -0.0157, 0.0116],
[-0.0038, 0.0005, 0.0160, -0.0006, -0.0125, 0.0105, 0.0027, 0.0047, 0.0036, -0.0041],
[-0.0039, 0.0124, 0.0048, 0.0121, -0.0145, 0.0005, -0.0163, 0.0032, -0.0096, -0.0110],
[ 0.0091, 0.0066, -0.0027, -0.0022, 0.0114, -0.0062, 0.0106, 0.0113, 0.0024, -0.0096],
[ 0.0185, 0.0112, 0.0052, -0.0057, -0.0055, 0.0023, 0.0123, -0.0081, 0.0076, -0.0033],
[-0.0115, -0.0001, -0.0038, 0.0181, 0.0074, -0.0159, 0.0083, -0.0239, 0.0035, -0.0049],
[-0.0145, 0.0017, -0.0005, -0.0064, 0.0108, -0.0044, -0.0047, -0.0004, -0.0060, -0.0121],
[ 0.0123, 0.0142, 0.0156, 0.0135, -0.0048, -0.0023, -0.0142, 0.0108, 0.0041, 0.0072],
[-0.0046, 0.0000, 0.0022, -0.0079, 0.0010, -0.0010, 0.0380, 0.0044, 0.0069, -0.0222],
[-0.0173, 0.0095, 0.0013, 0.0114, -0.0087, 0.0018, -0.0052, 0.0008, -0.0040, -0.0293],
[ 0.0088, -0.0103, -0.0104, -0.0013, -0.0097, -0.0200, -0.0041, -0.0089, -0.0035, -0.0008],
[ 0.0095, -0.0003, -0.0090, 0.0073, -0.0022, 0.0048, 0.0058, 0.0039, 0.0063, 0.0039]])
0.bias tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
2.weight tensor([[ 0.0204, -0.0305, -0.0026, 0.0144, 0.0096, -0.0115, 0.0040, -0.0047, -0.0367, 0.0065],
[ 0.0043, -0.0110, -0.0147, 0.0044, -0.0087, 0.0030, 0.0040,