Source code for torchattacks.attacks.jsma

import torch
import numpy as np

from ..attack import Attack


[docs] class JSMA(Attack): r""" Jacobian Saliency Map Attack in the paper 'The Limitations of Deep Learning in Adversarial Settings' [https://arxiv.org/abs/1511.07528v1] Distance Measure : L0 Arguments: model (nn.Module): model to attack. theta (float): perturb length, range is either [theta, 0], [0, theta]. (Default: 1.0) gamma (float): highest percentage of pixels can be modified. (Default: 0.1) Shape: - images: :math:`(N, C, H, W)` where `N = number of batches`, `C = number of channels`, `H = height` and `W = width`. It must have a range [0, 1]. - labels: :math:`(N)` where each value :math:`y_i` is :math:`0 \leq y_i \leq` `number of labels`. - output: :math:`(N, C, H, W)`. Examples:: >>> attack = torchattacks.JSMA(model, theta=1.0, gamma=0.1) >>> adv_images = attack(images, labels) """ def __init__(self, model, theta=1.0, gamma=0.1): super().__init__("JSMA", model) self.theta = theta self.gamma = gamma self.supported_mode = ["default", "targeted"]
[docs] def forward(self, images, labels): r""" Overridden. """ images = images.clone().detach().to(self.device) labels = labels.clone().detach().to(self.device) if self.targeted: target_labels = self.get_target_label(images, labels) else: # Because the JSMA algorithm does not use any loss function, # it cannot perform untargeted attacks indeed # (we have no control over the convergence of the attack to a data point that is NOT equal to the original class), # so we make the default setting of the target label is right circular shift # to make attack work if user didn't set target label. target_labels = (labels + 1) % 10 adv_images = None for im, tl in zip(images, target_labels): # Since the attack uses the Jacobian-matrix, # if we input a large number of images directly into it, # the processing will be very complicated, # here, in order to simplify the processing, # we only process one image at a time. # Shape of MNIST is [-1, 1, 28, 28], # and shape of CIFAR10 is [-1, 3, 32, 32]. pert_image = self.perturbation_single( torch.unsqueeze(im, 0), torch.unsqueeze(tl, 0) ) try: adv_images = torch.cat((adv_images, pert_image), 0) except Exception: adv_images = pert_image adv_images = torch.clamp(adv_images, min=0, max=1) return adv_images
def compute_jacobian(self, image): var_image = image.clone().detach() var_image.requires_grad = True output = self.get_logits(var_image) num_features = int(np.prod(var_image.shape[1:])) jacobian = torch.zeros([output.shape[1], num_features]) for i in range(output.shape[1]): if var_image.grad is not None: var_image.grad.zero_() output[0][i].backward(retain_graph=True) # Copy the derivative to the target place jacobian[i] = ( var_image.grad.squeeze().view(-1, num_features).clone() ) # nopep8 return jacobian.to(self.device) @torch.no_grad() def saliency_map( self, jacobian, target_label, increasing, search_space, nb_features ): # The search domain domain = torch.eq(search_space, 1).float() # The sum of all features' derivative with respect to each class all_sum = torch.sum(jacobian, dim=0, keepdim=True) # The forward derivative of the target class target_grad = jacobian[target_label] # The sum of forward derivative of other classes others_grad = all_sum - target_grad # This list blanks out those that are not in the search domain if increasing: increase_coef = 2 * (torch.eq(domain, 0)).float().to(self.device) else: increase_coef = -1 * 2 * (torch.eq(domain, 0)).float().to(self.device) increase_coef = increase_coef.view(-1, nb_features) # Calculate sum of target forward derivative of any 2 features. target_tmp = target_grad.clone() target_tmp -= increase_coef * torch.max(torch.abs(target_grad)) # PyTorch will automatically extend the dimensions alpha = target_tmp.view(-1, 1, nb_features) + target_tmp.view( -1, nb_features, 1 ) # Calculate sum of other forward derivative of any 2 features. others_tmp = others_grad.clone() others_tmp += increase_coef * torch.max(torch.abs(others_grad)) beta = others_tmp.view(-1, 1, nb_features) + others_tmp.view(-1, nb_features, 1) # Zero out the situation where a feature sums with itself tmp = np.ones((nb_features, nb_features), int) np.fill_diagonal(tmp, 0) zero_diagonal = torch.from_numpy(tmp).byte().to(self.device) # According to the definition of saliency map in the paper (formulas 8 and 9), # those elements in the saliency map that doesn't satisfy the requirement will be blanked out. if increasing: mask1 = torch.gt(alpha, 0.0) mask2 = torch.lt(beta, 0.0) else: mask1 = torch.lt(alpha, 0.0) mask2 = torch.gt(beta, 0.0) # Apply the mask to the saliency map mask = torch.mul(torch.mul(mask1, mask2), zero_diagonal.view_as(mask1)) # Do the multiplication according to formula 10 in the paper saliency_map = torch.mul(torch.mul(alpha, torch.abs(beta)), mask.float()) # Get the most significant two pixels max_idx = torch.argmax(saliency_map.view(-1, nb_features * nb_features), dim=1) # p = max_idx // nb_features p = torch.div(max_idx, nb_features, rounding_mode="floor") # q = max_idx % nb_features q = max_idx - p * nb_features return p, q
[docs] def perturbation_single(self, image, target_label): """ image: only one element label: only one element """ var_image = image var_label = target_label var_image = var_image.to(self.device) var_label = var_label.to(self.device) if self.theta > 0: increasing = True else: increasing = False num_features = int(np.prod(var_image.shape[1:])) shape = var_image.shape # Perturb two pixels in one iteration, thus max_iters is divided by 2 max_iters = int(np.ceil(num_features * self.gamma / 2.0)) # Masked search domain, if the pixel has already reached the top or bottom, we don't bother to modify it if increasing: search_domain = torch.lt(var_image, 0.99) else: search_domain = torch.gt(var_image, 0.01) search_domain = search_domain.view(num_features) output = self.get_logits(var_image) current_pred = torch.argmax(output.data, 1) iter = 0 while ( (iter < max_iters) and (current_pred != target_label) and (search_domain.sum() != 0) ): # Calculate Jacobian matrix of forward derivative jacobian = self.compute_jacobian(var_image) # Get the saliency map and calculate the two pixels that have the greatest influence p1, p2 = self.saliency_map( jacobian, var_label, increasing, search_domain, num_features ) # Apply modifications # var_sample_flatten = var_image.view(-1, num_features).clone().detach_() var_sample_flatten = var_image.view(-1, num_features) var_sample_flatten[0, p1] += self.theta var_sample_flatten[0, p2] += self.theta new_image = torch.clamp(var_sample_flatten, min=0.0, max=1.0) new_image = new_image.view(shape) search_domain[p1] = 0 search_domain[p2] = 0 # var_image = new_image.clone().detach().to(self.device) var_image = new_image.to(self.device) output = self.get_logits(var_image) current_pred = torch.argmax(output.data, 1) iter += 1 adv_image = var_image return adv_image