提问 发文

Swin Transformer各层特征可视化

微微菌

| 2024-03-12 14:18 272 0 0

一、寻找可视化的目标层

1.把模型各层打印出来

model = build_model(config)
print(model)


示例:我的Swin模型各层(简化后):

请添加图片描述

例如:

##目标层是stage1
target_layer = [ model.layers[0].downsample.norm ]
##或者目标层是stage2
target_layer = [ model.layers[1].downsample.norm ]
##或者目标层是stage3
target_layer = [ model.layers[-2].downsample.norm ]
##目标层是stage4
target_layer = [ model.norm ]

二、完整代码

代码如下(示例):

import argparse
import cv2
import numpy as np
import torch
import timm
from collections import OrderedDict
from build import build_model
from config import get_config
from swin_transformer import SwinTransformer
from pytorch_grad_cam import GradCAM, \
ScoreCAM, \
GradCAMPlusPlus, \
AblationCAM, \
XGradCAM, \
EigenCAM, \
EigenGradCAM, \
LayerCAM, \
FullGrad

from pytorch_grad_cam.utils.image import show_cam_on_image, \
preprocess_image


def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--use-cuda', action='store_true', default=False,
help='Use NVIDIA GPU acceleration')
parser.add_argument(
'--image-path',
type=str,
default='E:/dataset/NUS-WIDE/images/0001_14016746.jpg',
# default='F:/pytorch-grad-cam-master/examples/dog_cat.jfif',
help='Input image path')
parser.add_argument(
'--imagename',
type=str,
default='0001_14016746',
help='Input image path')
parser.add_argument('--cfg', type=str, default="F:\pytorch_grad_cam_master\configs\swin_tiny_patch4_window7_224.yaml", metavar="FILE",
help='path to config file', )
parser.add_argument('--aug_smooth', action='store_true',
help='Apply test time augmentation to smooth the CAM')
parser.add_argument(
'--eigen_smooth',
action='store_true',
help='Reduce noise by taking the first principle componenet'
'of cam_weights*activations')

parser.add_argument(
'--method',
type=str,
default='scorecam',
help='Can be gradcam/gradcam++/scorecam/xgradcam/ablationcam')
parser.add_argument(
"--opts",
help="Modify config options by adding 'KEY VALUE' pairs. ",
default=None,
nargs='+',
)
parser.add_argument('--batch_size', default=4, type=int,
help='Batch size.(default: 64)')
parser.add_argument('--data_path',default="E:/dataset/NUS-WIDE",
help='Path of dataset')
parser.add_argument('--zip', action='store_true', help='use zipped dataset instead of folder dataset')
parser.add_argument('--cache-mode', type=str, default='part', choices=['no', 'full', 'part'],
help='no: no cache, '
'full: cache all data, '
'part: sharding the dataset into nonoverlapping pieces and only cache one piece')
parser.add_argument('--resume', help='resume from checkpoint')
parser.add_argument('--accumulation-steps', type=int, help="gradient accumulation steps")
parser.add_argument('--use-checkpoint', action='store_true',
help="whether to use gradient checkpointing to save memory")
parser.add_argument('--amp-opt-level', type=str, default='O1', choices=['O0', 'O1', 'O2'],
help='mixed precision opt level, if O0, no amp is used')
parser.add_argument('--output', default='output', type=str, metavar='PATH',
help='root of output folder, the full path is <output>/<model_name>/<tag> (default: output)')
parser.add_argument('--tag', help='tag of experiment')
parser.add_argument('--eval', action='store_true', help='Perform evaluation only')
parser.add_argument('--throughput', action='store_true', help='Test throughput only')
parser.add_argument("--local_rank", type=int, default=-1, help='local rank for DistributedDataParallel')
args = parser.parse_args()
args.use_cuda = args.use_cuda and torch.cuda.is_available()
if args.use_cuda:
print('Using GPU for acceleration')
else:
print('Using CPU for computation')

return args


def reshape_transform(tensor, height=7, width=7):
##第一层height、wideth设置为28,第二层height、wideth设置为14,第三、四层height、wideth设置为7
result = tensor.reshape(tensor.size(0),
height, width, tensor.size(2))

# Bring the channels to the first dimension,
# like in CNNs.
result = result.transpose(2, 3).transpose(1, 2)
return result


if __name__ == '__main__':
""" python swinT_example.py -image-path <path_to_image>
Example usage of using cam-methods on a SwinTransformers network.

"""

args = get_args()
config = get_config(args)
methods = \
{"gradcam": GradCAM,
"scorecam": ScoreCAM,
"gradcam++": GradCAMPlusPlus,
"ablationcam": AblationCAM,
"xgradcam": XGradCAM,
"eigencam": EigenCAM,
"eigengradcam": EigenGradCAM,
"layercam": LayerCAM,
"fullgrad": FullGrad}

if args.method not in list(methods.keys()):
raise Exception(f"method should be one of {list(methods.keys())}")


model = build_model(config)
# print(model)


state_dict = torch.load("F:/pytorch_grad_cam_master/nopre-cifar-model.t",map_location='cpu') # 模型可以保存为pth文件,也可以为pt文件。

model.load_state_dict({k.replace('module.', ''): v for k, v in list(state_dict.items())})
if args.use_cuda:
model = model.cuda().eval()
# target_layer = [model.norm] ##swin最后一层
# target_layer = [model.layers[-2].downsample.norm]##swin第三层
target_layer = [model.layers[1].downsample.norm] ##swin第二层
##print("目标层:",target_layer)
if args.use_cuda:
model = model.cuda()

if args.method not in methods:
raise Exception(f"Method {args.method} not implemented")

cam = methods[args.method](model=model,
target_layers=target_layer,
use_cuda=args.use_cuda,
reshape_transform=reshape_transform)

rgb_img = cv2.imread(args.image_path, 1)[:, :, ::-1]
rgb_img = cv2.resize(rgb_img, (224, 224))
rgb_img = np.float32(rgb_img) / 255
input_tensor = preprocess_image(rgb_img, mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])

# If None, returns the map for the highest scoring category.
# Otherwise, targets the requested category.
target_category = None

# AblationCAM and ScoreCAM have batched implementations.
# You can override the internal batch size for faster computation.
cam.batch_size = 32

grayscale_cam = cam(input_tensor=input_tensor,
target_category=target_category,
eigen_smooth=args.eigen_smooth,
aug_smooth=args.aug_smooth)

# Here grayscale_cam has only one image in the batch
grayscale_cam = grayscale_cam[0, :]

cam_image = show_cam_on_image(rgb_img, grayscale_cam)
cv2.imwrite(f'{args.method}_{args.imagename}_cam.jpg', cam_image)

三、可视化各层特征出现RuntimeError: shape ‘[1, 7, 7, 768]’ is invalid for input of size 150528
(示例):在可视化stage1和stage2时会出现以上报错,这是因为目标层stage1和stage2输出的tensor大小分别为(1,28×28,384)(1,14×14,768),在reshape_transform()方法中参数width和hight直接固定为7,所以产生报错。

##解决方法
def reshape_transform(tensor, height=7, width=7):
##以swin为例:目标层为stage1则height、wideth设置为28,stage2则height、wideth设置为14,stage3和stage4则height、wideth设置为7
result = tensor.reshape(tensor.size(0),
height, width, tensor.size(2))

# Bring the channels to the first dimension,
# like in CNNs.
result = result.transpose(2, 3).transpose(1, 2)
return result


四、各层特征图可视化结果

在这里插入图片描述


————————————————

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。

原文链接:https://blog.csdn.net/weixin_46425667/article/details/122250073



收藏 0
分享
分享方式
微信

评论

游客

全部 0条评论

10603

文章

10.52W+

人气

19

粉丝

1

关注

官方媒体

轻松设计高效搭建,减少3倍设计改稿与开发运维工作量

开始免费试用 预约演示

扫一扫关注公众号 扫一扫联系客服

©Copyrights 2016-2022 杭州易知微科技有限公司 浙ICP备2021017017号-3 浙公网安备33011002011932号

互联网信息服务业务 合字B2-20220090

400-8505-905 复制
免费试用
微信社区
易知微-数据可视化
微信扫一扫入群