卷积学习笔记

nn.Conv2d

不知道为啥，调库的卷积和手搓的卷积效果不一样。。。

艹，发现了更离谱的现象，用tensor和numpy搓出来的卷积效果也不同。。。

代码和效果放在文章最后给出，希望路过的大佬能指出问题

卷积层的卷积操作和我想象的不同，故记录在笔记本

记住padding是在卷积前padding
每个卷积出来的像素值由所有kernel与通道算出来的结果相加得到

import torch
import torch.nn as nn

# 自定义卷积核权重，每个输入通道对应一个卷积核
custom_kernels = [
    torch.tensor([[1, 0, -1],
                  [2, 0, -2],
                  [1, 0, -1]], dtype=torch.float32),

    torch.tensor([[0, 1, 0],
                  [1, -4, 1],
                  [0, 1, 0]], dtype=torch.float32),

    torch.tensor([[-1, -1, -1],
                  [-1, 8, -1],
                  [-1, -1, -1]], dtype=torch.float32)
]

# 添加维度以匹配卷积核的形状 (out_channels, in_channels, height, width)
custom_kernels = [kernel.unsqueeze(0) for kernel in custom_kernels]

# 定义自定义卷积层类
class CustomConv2d(nn.Module):
    def __init__(self):
        super(CustomConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=0, bias=False)

        custom_weights = torch.stack([torch.cat(custom_kernels, dim=0), torch.cat(custom_kernels, dim=0), torch.cat(custom_kernels, dim=0)])
        print(custom_weights.shape)
        self.conv.weight = nn.Parameter(custom_weights, requires_grad=False)

    def forward(self, x):
        return self.conv(x)

# 创建自定义卷积层实例
custom_conv_layer = CustomConv2d()

# 输入数据
input_data = torch.ones(1, 3, 5, 5)  # (batch_size, channels, height, width)

# 运行卷积操作
output = custom_conv_layer(input_data)

print("输入数据:\n", input_data[0][0])
print("输出数据:\n", output[0][2])

以该段代码为例，我们可以发现卷积核的shape为 [in_channels, out_channels, kernel_size[0], kernel_size[1]]，也即 [3, 3, 3, 3] ，也就是说out_channels会有3个，分别是怎么得到的呢？

out_channels[i] 由卷积核 in_channels[0][i] 、in_channels[1][i]、in_channels[2][i] 进行卷积操作然后三个操作得到的值相加得到的。

可以修改卷积核的值自行尝试，会发现3个通道的值得到的像素一样，并且为3个通道的卷积之和相加的结果

tensor扔进nn里的数据结构为 [batch_size, channels, height, width]

因为我自己试过一些卷积，我觉得对图片实施卷积完全可以RGB每个通道一个卷积核，然后每个通道的值直接获得，于是利用nn.functional 的Conv2d来实现：

import torch
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image

# 读取图片并转换为张量
image_path = '../../img/3.png'
image = Image.open(image_path).convert('RGB')
image_tensor = transforms.ToTensor()(image).unsqueeze(0)

# 定义卷积核（滤波器）
kernel = torch.tensor([
  [[1/9]*3]*3,

  [[1/9]*3]*3,

  [[1/9]*3]*3
], dtype=torch.float32)

# 执行卷积操作
num_kernels = kernel.shape[0]
num_channels = image_tensor.shape[1]
output_tensor = torch.zeros((1, num_kernels, image_tensor.shape[2], image_tensor.shape[3]))

print("image_tensor.shape: ", image_tensor.shape)
print("kernel.shape: ", kernel.shape)
print("output_tensor.shape: ", output_tensor.shape)

for i in range(num_kernels):
  conv_result = F.conv2d(image_tensor[0, i].unsqueeze(0), weight=kernel[i].unsqueeze(1).unsqueeze(2), padding=(0,1))
  output_tensor[0, i] = conv_result[0]

# 显示输出图像
# output_image = output_tensor[0].clamp(0, 1)  # 将像素值限制在0和1之间
# output_image = transforms.ToPILImage()(output_image)
show_tensor_img(output_tensor[0])

测试

调包实现卷积

优点：快，方便

缺点：学习过程慢，自定义参数麻烦

代码：

import torch
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image

# 读取图片并转换为张量
image_path = '../../img/3.png'
image = Image.open(image_path).convert('RGB')
image_tensor = transforms.ToTensor()(image).unsqueeze(0)

# 定义卷积核（滤波器）
kernel = torch.tensor([
    [[1/16]*4]*3,

    [[1/16]*4]*3,

    [[1/16]*4]*3
], dtype=torch.float32)

# 执行卷积操作
num_kernels = kernel.shape[0]
num_channels = image_tensor.shape[1]
output_tensor = torch.zeros((1, num_kernels, image_tensor.shape[2], image_tensor.shape[3]-1))

print("image_tensor.shape: ", image_tensor.shape)
print("kernel.shape: ", kernel.shape)
print("output_tensor.shape: ", output_tensor.shape)

for i in range(num_kernels):
    conv_result = F.conv2d(image_tensor[0, i].unsqueeze(0), weight=kernel[i].unsqueeze(1).unsqueeze(2), padding=(0,1))
    output_tensor[0, i] = conv_result[0]

# 显示输出图像
# output_image = output_tensor[0].clamp(0, 1)  # 将像素值限制在0和1之间
# output_image = transforms.ToPILImage()(output_image)

# 显示原始图像和卷积后的图像
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title('Original Image')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.title('Convolved Image')
show_tensor_img(output_tensor[0])

运行时间：0.5s

效果：

diaobao

numpy手搓卷积

代码：

import cv2
import numpy as np
import matplotlib.pyplot as plt
# 卷积

def custom_convolution(image, kernel):
    height, width, channels = image.shape
    k_height, k_width = kernel.shape
    convolved_image = np.zeros((height - k_height + 1, width - k_width + 1, channels), dtype=np.float32)

    for c in range(channels):
        for i in range(height - k_height + 1):
            for j in range(width - k_width + 1):
                patch = image[i:i+k_height, j:j+k_width, c]
                convolved_pixel = np.sum(patch * kernel)
                convolved_image[i, j, c] = convolved_pixel

    return convolved_image

# 加载图像
image_path = '../img/3.png'  # 替换为你的图像文件路径
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# 定义卷积核
kernel = np.array([[1/16]*4,
                   [1/16]*4,
                   [1/16]*4])

# 进行卷积操作
convolved_image = custom_convolution(image, kernel)

# 进行卷积后的图像显示处理
convolved_image_display = (convolved_image - np.min(convolved_image)) / (np.max(convolved_image) - np.min(convolved_image)) * 255

# 显示原始图像和卷积后的图像
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title('Original Image')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(convolved_image_display.astype(np.uint8))
plt.title('Convolved Image')
plt.axis('off')

plt.tight_layout()
plt.show()

运行时间 23.8s

效果：

numpy

torch.tensor手搓卷积

# 手搓卷积
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def custom_convolution(image, kernel, device=device):
    image = image.to(device)
    kernel = kernel.to(device)
    channels, height, width = image.shape
    k_height, k_width = kernel.shape
    convolved_image = torch.zeros((channels, height - k_height + 1, width - k_width + 1), dtype=torch.float32).to(device)
    print(image.shape, convolved_image.shape)

    for c in range(channels):
        for i in range(height - k_height + 1):
            for j in range(width - k_width + 1):
                patch = image[c, i:i+k_height, j:j+k_width].to(device)
                convolved_pixel = torch.sum(patch * kernel).to(device)
                convolved_image[c, i, j] = convolved_pixel

    return convolved_image

img_tensor_conv_cus = custom_convolution(image_tensor[0], torch.tensor([[1/16]*4,
                   [1/16]*4,
                   [1/16]*4]))

# 显示原始图像和卷积后的图像
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title('Original Image')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.title('Convolved Image')
show_tensor_img(img_tensor_conv_cus)

运行时间：2min

效果：

torch_conv

Jerry的小屋

nn.Conv2d

测试

调包实现卷积

numpy手搓卷积

torch.tensor手搓卷积

利用Openai_api制作聊天机器人

konachan爬取图片

Jerry

叨叨几句... NOTHING

取消回复

Jerry的小屋