卷积学习笔记

发布于 2023-08-24  274 次阅读


nn.Conv2d

不知道为啥,调库的卷积和手搓的卷积效果不一样。。。

艹,发现了更离谱的现象,用tensor和numpy搓出来的卷积效果也不同。。。

代码和效果放在文章最后给出,希望路过的大佬能指出问题

卷积层的卷积操作和我想象的不同,故记录在笔记本

image-20230824194923251

  • 记住padding是在卷积前padding
  • 每个卷积出来的像素值由所有kernel与通道算出来的结果相加得到
import torch
import torch.nn as nn

# 自定义卷积核权重,每个输入通道对应一个卷积核
custom_kernels = [
    torch.tensor([[1, 0, -1],
                  [2, 0, -2],
                  [1, 0, -1]], dtype=torch.float32),

    torch.tensor([[0, 1, 0],
                  [1, -4, 1],
                  [0, 1, 0]], dtype=torch.float32),

    torch.tensor([[-1, -1, -1],
                  [-1, 8, -1],
                  [-1, -1, -1]], dtype=torch.float32)
]

# 添加维度以匹配卷积核的形状 (out_channels, in_channels, height, width)
custom_kernels = [kernel.unsqueeze(0) for kernel in custom_kernels]

# 定义自定义卷积层类
class CustomConv2d(nn.Module):
    def __init__(self):
        super(CustomConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=0, bias=False)

        custom_weights = torch.stack([torch.cat(custom_kernels, dim=0), torch.cat(custom_kernels, dim=0), torch.cat(custom_kernels, dim=0)])
        print(custom_weights.shape)
        self.conv.weight = nn.Parameter(custom_weights, requires_grad=False)

    def forward(self, x):
        return self.conv(x)

# 创建自定义卷积层实例
custom_conv_layer = CustomConv2d()

# 输入数据
input_data = torch.ones(1, 3, 5, 5)  # (batch_size, channels, height, width)

# 运行卷积操作
output = custom_conv_layer(input_data)

print("输入数据:\n", input_data[0][0])
print("输出数据:\n", output[0][2])

以该段代码为例,我们可以发现卷积核的shape为 [in_channels, out_channels, kernel_size[0], kernel_size[1]],也即 [3, 3, 3, 3] ,也就是说out_channels会有3个,分别是怎么得到的呢?

out_channels[i] 由卷积核 in_channels[0][i]in_channels[1][i]in_channels[2][i] 进行卷积操作然后三个操作得到的值相加得到的。

可以修改卷积核的值自行尝试,会发现3个通道的值得到的像素一样,并且为3个通道的卷积之和相加的结果

  • tensor扔进nn里的数据结构为 [batch_size, channels, height, width]

  • 因为我自己试过一些卷积,我觉得对图片实施卷积完全可以RGB每个通道一个卷积核,然后每个通道的值直接获得,于是利用nn.functional 的Conv2d来实现:

    import torch
    import torch.nn.functional as F
    from torchvision import transforms
    from PIL import Image
    
    # 读取图片并转换为张量
    image_path = '../../img/3.png'
    image = Image.open(image_path).convert('RGB')
    image_tensor = transforms.ToTensor()(image).unsqueeze(0)
    
    # 定义卷积核(滤波器)
    kernel = torch.tensor([
      [[1/9]*3]*3,
    
      [[1/9]*3]*3,
    
      [[1/9]*3]*3
    ], dtype=torch.float32)
    
    # 执行卷积操作
    num_kernels = kernel.shape[0]
    num_channels = image_tensor.shape[1]
    output_tensor = torch.zeros((1, num_kernels, image_tensor.shape[2], image_tensor.shape[3]))
    
    print("image_tensor.shape: ", image_tensor.shape)
    print("kernel.shape: ", kernel.shape)
    print("output_tensor.shape: ", output_tensor.shape)
    
    for i in range(num_kernels):
      conv_result = F.conv2d(image_tensor[0, i].unsqueeze(0), weight=kernel[i].unsqueeze(1).unsqueeze(2), padding=(0,1))
      output_tensor[0, i] = conv_result[0]
    
    # 显示输出图像
    # output_image = output_tensor[0].clamp(0, 1)  # 将像素值限制在0和1之间
    # output_image = transforms.ToPILImage()(output_image)
    show_tensor_img(output_tensor[0])
    

测试

调包实现卷积

优点:快,方便

缺点:学习过程慢,自定义参数麻烦

代码:

import torch
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image

# 读取图片并转换为张量
image_path = '../../img/3.png'
image = Image.open(image_path).convert('RGB')
image_tensor = transforms.ToTensor()(image).unsqueeze(0)

# 定义卷积核(滤波器)
kernel = torch.tensor([
    [[1/16]*4]*3,

    [[1/16]*4]*3,

    [[1/16]*4]*3
], dtype=torch.float32)

# 执行卷积操作
num_kernels = kernel.shape[0]
num_channels = image_tensor.shape[1]
output_tensor = torch.zeros((1, num_kernels, image_tensor.shape[2], image_tensor.shape[3]-1))

print("image_tensor.shape: ", image_tensor.shape)
print("kernel.shape: ", kernel.shape)
print("output_tensor.shape: ", output_tensor.shape)

for i in range(num_kernels):
    conv_result = F.conv2d(image_tensor[0, i].unsqueeze(0), weight=kernel[i].unsqueeze(1).unsqueeze(2), padding=(0,1))
    output_tensor[0, i] = conv_result[0]

# 显示输出图像
# output_image = output_tensor[0].clamp(0, 1)  # 将像素值限制在0和1之间
# output_image = transforms.ToPILImage()(output_image)

# 显示原始图像和卷积后的图像
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title('Original Image')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.title('Convolved Image')
show_tensor_img(output_tensor[0])

运行时间:0.5s

效果:

diaobao

numpy手搓卷积

代码:

import cv2
import numpy as np
import matplotlib.pyplot as plt
# 卷积

def custom_convolution(image, kernel):
    height, width, channels = image.shape
    k_height, k_width = kernel.shape
    convolved_image = np.zeros((height - k_height + 1, width - k_width + 1, channels), dtype=np.float32)

    for c in range(channels):
        for i in range(height - k_height + 1):
            for j in range(width - k_width + 1):
                patch = image[i:i+k_height, j:j+k_width, c]
                convolved_pixel = np.sum(patch * kernel)
                convolved_image[i, j, c] = convolved_pixel

    return convolved_image

# 加载图像
image_path = '../img/3.png'  # 替换为你的图像文件路径
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# 定义卷积核
kernel = np.array([[1/16]*4,
                   [1/16]*4,
                   [1/16]*4])

# 进行卷积操作
convolved_image = custom_convolution(image, kernel)

# 进行卷积后的图像显示处理
convolved_image_display = (convolved_image - np.min(convolved_image)) / (np.max(convolved_image) - np.min(convolved_image)) * 255

# 显示原始图像和卷积后的图像
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title('Original Image')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(convolved_image_display.astype(np.uint8))
plt.title('Convolved Image')
plt.axis('off')

plt.tight_layout()
plt.show()

运行时间 23.8s

效果:

numpy

torch.tensor手搓卷积

# 手搓卷积
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def custom_convolution(image, kernel, device=device):
    image = image.to(device)
    kernel = kernel.to(device)
    channels, height, width = image.shape
    k_height, k_width = kernel.shape
    convolved_image = torch.zeros((channels, height - k_height + 1, width - k_width + 1), dtype=torch.float32).to(device)
    print(image.shape, convolved_image.shape)

    for c in range(channels):
        for i in range(height - k_height + 1):
            for j in range(width - k_width + 1):
                patch = image[c, i:i+k_height, j:j+k_width].to(device)
                convolved_pixel = torch.sum(patch * kernel).to(device)
                convolved_image[c, i, j] = convolved_pixel

    return convolved_image

img_tensor_conv_cus = custom_convolution(image_tensor[0], torch.tensor([[1/16]*4,
                   [1/16]*4,
                   [1/16]*4]))

# 显示原始图像和卷积后的图像
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title('Original Image')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.title('Convolved Image')
show_tensor_img(img_tensor_conv_cus)

运行时间:2min

效果:

torch_conv


整天不想事儿,就想着干饭