pytorch discriminator model выдает неправильные значения

from torchvision.models import convnext_tiny, ConvNeXt_Tiny_Weights
weights = ConvNeXt_Tiny_Weights.DEFAULT
model = convnext_tiny(weights=weights)
class _netD(nn.Module):
    def __init__(self, ngpu):
        super(_netD, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            model,
            nn.Sigmoid()
            # state size. 1
        )
    def forward(self, input):
        return self.main(input)
# Create the Discriminator
netD = _netD(ngpu).to(device)

# Handle multi-GPU if desired
if (device.type == 'cuda') and (ngpu > 1):
    netD = nn.DataParallel(netD, list(range(ngpu)))

# Apply the ``weights_init`` function to randomly initialize all weights
# like this: ``to mean=0, stdev=0.2``.
#netD.apply(weights_init)

# Print the model
print(netD)

структура модели

_netD(
  (main): Sequential(
    (0): ConvNeXt(
      (features): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
          (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
        )
        (1): Sequential(
          (0): CNBlock(
            (block): Sequential(
              (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
              (1): Permute()
              (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=96, out_features=384, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=384, out_features=96, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.0, mode=row)
          )
          (1): CNBlock(
            (block): Sequential(
              (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
              (1): Permute()
              (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=96, out_features=384, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=384, out_features=96, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.0058823529411764705, mode=row)
          )
          (2): CNBlock(
            (block): Sequential(
              (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
              (1): Permute()
              (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=96, out_features=384, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=384, out_features=96, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.011764705882352941, mode=row)
          )
        )
        (2): Sequential(
          (0): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
          (1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2))
        )
        (3): Sequential(
          (0): CNBlock(
            (block): Sequential(
              (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
              (1): Permute()
              (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=192, out_features=768, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=768, out_features=192, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.017647058823529415, mode=row)
          )
          (1): CNBlock(
            (block): Sequential(
              (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
              (1): Permute()
              (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=192, out_features=768, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=768, out_features=192, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.023529411764705882, mode=row)
          )
          (2): CNBlock(
            (block): Sequential(
              (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
              (1): Permute()
              (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=192, out_features=768, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=768, out_features=192, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.029411764705882353, mode=row)
          )
        )
        (4): Sequential(
          (0): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True)
          (1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2))
        )
        (5): Sequential(
          (0): CNBlock(
            (block): Sequential(
              (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
              (1): Permute()
              (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=384, out_features=1536, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=1536, out_features=384, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.03529411764705883, mode=row)
          )
          (1): CNBlock(
            (block): Sequential(
              (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
              (1): Permute()
              (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=384, out_features=1536, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=1536, out_features=384, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.0411764705882353, mode=row)
          )
          (2): CNBlock(
            (block): Sequential(
              (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
              (1): Permute()
              (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=384, out_features=1536, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=1536, out_features=384, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.047058823529411764, mode=row)
          )
          (3): CNBlock(
            (block): Sequential(
              (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
              (1): Permute()
              (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=384, out_features=1536, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=1536, out_features=384, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.052941176470588235, mode=row)
          )
          (4): CNBlock(
            (block): Sequential(
              (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
              (1): Permute()
              (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=384, out_features=1536, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=1536, out_features=384, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.058823529411764705, mode=row)
          )
          (5): CNBlock(
            (block): Sequential(
              (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
              (1): Permute()
              (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=384, out_features=1536, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=1536, out_features=384, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.06470588235294118, mode=row)
          )
          (6): CNBlock(
            (block): Sequential(
              (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
              (1): Permute()
              (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=384, out_features=1536, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=1536, out_features=384, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.07058823529411766, mode=row)
          )
          (7): CNBlock(
            (block): Sequential(
              (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
              (1): Permute()
              (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=384, out_features=1536, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=1536, out_features=384, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.07647058823529412, mode=row)
          )
          (8): CNBlock(
            (block): Sequential(
              (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
              (1): Permute()
              (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=384, out_features=1536, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=1536, out_features=384, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.0823529411764706, mode=row)
          )
        )
        (6): Sequential(
          (0): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (1): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2))
        )
        (7): Sequential(
          (0): CNBlock(
            (block): Sequential(
              (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
              (1): Permute()
              (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=768, out_features=3072, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=3072, out_features=768, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.08823529411764706, mode=row)
          )
          (1): CNBlock(
            (block): Sequential(
              (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
              (1): Permute()
              (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=768, out_features=3072, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=3072, out_features=768, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.09411764705882353, mode=row)
          )
          (2): CNBlock(
            (block): Sequential(
              (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
              (1): Permute()
              (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
              (3): Linear(in_features=768, out_features=3072, bias=True)
              (4): GELU(approximate='none')
              (5): Linear(in_features=3072, out_features=768, bias=True)
              (6): Permute()
            )
            (stochastic_depth): StochasticDepth(p=0.1, mode=row)
          )
        )
      )
      (avgpool): AdaptiveAvgPool2d(output_size=1)
      (classifier): Sequential(
        (0): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
        (1): Flatten(start_dim=1, end_dim=-1)
        (2): Linear(in_features=768, out_features=1000, bias=True)
      )
    )
    (1): Sigmoid()
  )
)

код для обучения

# Training Loop

# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0

print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
    # For each batch in the dataloader
    for i, data in enumerate(dataloader, 0):

        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        ## Train with all-real batch
        netD.zero_grad()
        # Format batch
        real_cpu = data[0].to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
        # Forward pass real batch through D
        output = netD(real_cpu).view(-1)
        # Calculate loss on all-real batch
        errD_real = criterion(output, label)
        # Calculate gradients for D in backward pass
        errD_real.backward()
        D_x = output.mean().item()

        ## Train with all-fake batch
        # Generate batch of latent vectors
        noise = torch.randn(b_size, nz, 1, 1, device=device)
        # Generate fake image batch with G
        fake = netG(noise)
        label.fill_(fake_label)
        # Classify all fake batch with D
        output = netD(fake.detach()).view(-1)
        # Calculate D's loss on the all-fake batch
        errD_fake = criterion(output, label)
        # Calculate the gradients for this batch, accumulated (summed) with previous gradients
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        # Compute error of D as sum over the fake and the real batches
        errD = errD_real + errD_fake
        # Update D
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        netG.zero_grad()
        label.fill_(real_label)  # fake labels are real for generator cost
        # Since we just updated D, perform another forward pass of all-fake batch through D
        output = netD(fake).view(-1)
        # Calculate G's loss based on this output
        errG = criterion(output, label)
        # Calculate gradients for G
        errG.backward()
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()

        # Output training stats
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                  % (epoch, num_epochs, i, len(dataloader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))


        # Check how the generator is doing by saving G's output on fixed_noise
        if (iters % 2500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
            with torch.no_grad():
                fake = netG(fixed_noise).detach().cpu()
            img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
        iters += 1
    G_losses.append(errG.item())
    D_losses.append(errD.item())
    plt.imshow(np.transpose(img_list[-1],(1,2,0)))
    plt.show()
    torch.save(netG, '/content/drive/MyDrive/face/netG')
    torch.save(netD, '/content/drive/MyDrive/face/netD')

и ошибка

ValueError                                Traceback (most recent call last)
<ipython-input-41-8998c0d16cb1> in <cell line: 11>()
 27         print((output[110]))
 28         print((label[0]))
---> 29         errD_real = criterion(output, label)
 30         # Calculate gradients for D in backward pass
 31         errD_real.backward()

3 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py in 
binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
3111         reduction_enum = _Reduction.get_enum(reduction)
3112     if target.size() != input.size():
-> 3113         raise ValueError(
3114             "Using a target size ({}) that is different to the input size ({}) is 
deprecated. "
3115             "Please ensure they have the same size.".format(target.size(), 
input.size())

ValueError: Using a target size (torch.Size([10])) that is different to the input size 
(torch.Size([10000])) is deprecated. Please ensure they have the same size.

не знаю почему так происходит, пытался найти решение в интернете, ничего похожего не нашел


Ответы (1 шт):

Автор решения: Саша Торовин

помогло добавление nn.Linear(1000, 1)

from torchvision.models import convnext_tiny, 
ConvNeXt_Tiny_Weights
weights = ConvNeXt_Tiny_Weights.DEFAULT
model = convnext_tiny(weights=weights)
class _netD(nn.Module):
    def __init__(self, ngpu):
        super(_netD, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            model,
            nn.Linear(1000, 1),
            nn.Sigmoid()
        # state size. 1
    )
    def forward(self, input):
        return self.main(input)
# Create the Discriminator
netD = _netD(ngpu).to(device)

# Handle multi-GPU if desired
if (device.type == 'cuda') and (ngpu > 1):
    netD = nn.DataParallel(netD, list(range(ngpu)))

# Print the model
print(netD)
→ Ссылка