Source code for torchattacks.attacks.deepfool

import torch
import torch.nn as nn

from ..attack import Attack


[docs] class DeepFool(Attack): r""" 'DeepFool: A Simple and Accurate Method to Fool Deep Neural Networks' [https://arxiv.org/abs/1511.04599] Distance Measure : L2 Arguments: model (nn.Module): model to attack. steps (int): number of steps. (Default: 50) overshoot (float): parameter for enhancing the noise. (Default: 0.02) Shape: - images: :math:`(N, C, H, W)` where `N = number of batches`, `C = number of channels`, `H = height` and `W = width`. It must have a range [0, 1]. - labels: :math:`(N)` where each value :math:`y_i` is :math:`0 \leq y_i \leq` `number of labels`. - output: :math:`(N, C, H, W)`. Examples:: >>> attack = torchattacks.DeepFool(model, steps=50, overshoot=0.02) >>> adv_images = attack(images, labels) """ def __init__(self, model, steps=50, overshoot=0.02): super().__init__("DeepFool", model) self.steps = steps self.overshoot = overshoot self.supported_mode = ["default"]
[docs] def forward(self, images, labels): r""" Overridden. """ adv_images, target_labels = self.forward_return_target_labels(images, labels) return adv_images
[docs] def forward_return_target_labels(self, images, labels): r""" Overridden. """ images = images.clone().detach().to(self.device) labels = labels.clone().detach().to(self.device) batch_size = len(images) correct = torch.tensor([True] * batch_size) target_labels = labels.clone().detach().to(self.device) curr_steps = 0 adv_images = [] for idx in range(batch_size): image = images[idx : idx + 1].clone().detach() adv_images.append(image) while (True in correct) and (curr_steps < self.steps): for idx in range(batch_size): if not correct[idx]: continue early_stop, pre, adv_image = self._forward_indiv( adv_images[idx], labels[idx] ) adv_images[idx] = adv_image target_labels[idx] = pre if early_stop: correct[idx] = False curr_steps += 1 adv_images = torch.cat(adv_images).detach() return adv_images, target_labels
def _forward_indiv(self, image, label): image.requires_grad = True fs = self.get_logits(image)[0] _, pre = torch.max(fs, dim=0) if pre != label: return (True, pre, image) ws = self._construct_jacobian(fs, image) image = image.detach() f_0 = fs[label] w_0 = ws[label] wrong_classes = [i for i in range(len(fs)) if i != label] f_k = fs[wrong_classes] w_k = ws[wrong_classes] f_prime = f_k - f_0 w_prime = w_k - w_0 value = torch.abs(f_prime) / torch.norm(nn.Flatten()(w_prime), p=2, dim=1) _, hat_L = torch.min(value, 0) delta = ( torch.abs(f_prime[hat_L]) * w_prime[hat_L] / (torch.norm(w_prime[hat_L], p=2) ** 2) ) target_label = hat_L if hat_L < label else hat_L + 1 adv_image = image + (1 + self.overshoot) * delta adv_image = torch.clamp(adv_image, min=0, max=1).detach() return (False, target_label, adv_image) # https://stackoverflow.com/questions/63096122/pytorch-is-it-possible-to-differentiate-a-matrix # torch.autograd.functional.jacobian is only for torch >= 1.5.1 def _construct_jacobian(self, y, x): x_grads = [] for idx, y_element in enumerate(y): if x.grad is not None: x.grad.zero_() y_element.backward(retain_graph=(False or idx + 1 < len(y))) x_grads.append(x.grad.clone().detach()) return torch.stack(x_grads).reshape(*y.shape, *x.shape)