今日目标
o 理解计算机视觉的基本概念和应用
o 掌握OpenCV图像处理基础操作
o 学会图像滤波、边缘检测和特征提取
o 了解图像分割和目标检测技术
o 掌握深度学习在计算机视觉中的应用
计算机视觉概述
计算机视觉是人工智能的重要分支,致力于让计算机理解和处理视觉信息:
o 图像处理:滤波、变换、增强
o 特征提取:边缘、角点、纹理特征
o 目标检测:定位、识别、跟踪
o 图像理解:场景理解、语义分割
计算机视觉应用领域
# 主要应用领域:
# - 自动驾驶和机器人视觉
# - 医疗影像诊断
# - 安防监控和人脸识别
# - 工业质量检测
# - 增强现实和虚拟现实
# - 图像搜索和推荐
OpenCV基础
1. 安装和导入
pip install opencv-python opencv-contrib-python matplotlib numpy scikit-image pillow
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import skimage
from skimage import filters, feature, segmentation
import warnings
warnings.filterwarnings('ignore')
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
print(f"OpenCV版本: {cv2.__version__}")
print(f"NumPy版本: {np.__version__}")
2. 图像读取和显示
def image_basic_operations():
"""图像基本操作示例"""
# 创建示例图像
def create_sample_image():
"""创建示例图像"""
# 创建一个彩色图像
img = np.zeros((300, 400, 3), dtype=np.uint8)
# 绘制矩形
cv2.rectangle(img, (50, 50), (150, 150), (255, 0, 0), -1) # 蓝色矩形
cv2.rectangle(img, (200, 50), (300, 150), (0, 255, 0), -1) # 绿色矩形
# 绘制圆形
cv2.circle(img, (100, 250), 50, (0, 0, 255), -1) # 红色圆形
# 添加文字
cv2.putText(img, 'OpenCV', (150, 280), cv2.FONT_HERSHEY_SIMPLEX,
1, (255, 255, 255), 2)
return img
# 创建示例图像
sample_img = create_sample_image()
print("=== 图像基本操作 ===")
# 1. 图像信息
print(f"图像形状: {sample_img.shape}")
print(f"图像类型: {sample_img.dtype}")
print(f"图像大小: {sample_img.size} 像素")
# 2. 图像显示
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
# 3. 颜色空间转换
gray_img = cv2.cvtColor(sample_img, cv2.COLOR_BGR2GRAY)
hsv_img = cv2.cvtColor(sample_img, cv2.COLOR_BGR2HSV)
plt.subplot(1, 3, 2)
plt.imshow(gray_img, cmap='gray')
plt.title('灰度图像')
plt.axis('off')
plt.subplot(1, 3, 3)
plt.imshow(cv2.cvtColor(hsv_img, cv2.COLOR_HSV2RGB))
plt.title('HSV图像')
plt.axis('off')
plt.tight_layout()
plt.show()
# 4. 图像保存
cv2.imwrite('sample_image.jpg', sample_img)
print("图像已保存为 sample_image.jpg")
return sample_img, gray_img, hsv_img
# 运行图像基本操作示例
sample_image, gray_image, hsv_image = image_basic_operations()
3. 图像变换
def image_transformations():
"""图像变换示例"""
# 使用之前的示例图像
img = sample_image
print("=== 图像变换 ===")
# 1. 图像缩放
height, width = img.shape[:2]
resized_img = cv2.resize(img, (width//2, height//2))
enlarged_img = cv2.resize(img, (width*2, height*2))
# 2. 图像旋转
center = (width//2, height//2)
rotation_matrix = cv2.getRotationMatrix2D(center, 45, 1.0)
rotated_img = cv2.warpAffine(img, rotation_matrix, (width, height))
# 3. 图像翻转
flipped_horizontal = cv2.flip(img, 1) # 水平翻转
flipped_vertical = cv2.flip(img, 0) # 垂直翻转
# 4. 图像平移
translation_matrix = np.float32([[1, 0, 50], [0, 1, 30]])
translated_img = cv2.warpAffine(img, translation_matrix, (width, height))
# 可视化变换结果
plt.figure(figsize=(15, 10))
plt.subplot(2, 3, 1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title('原始图像')
plt.axis('off')
plt.subplot(2, 3, 2)
plt.imshow(cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB))
plt.title('缩放图像')
plt.axis('off')
plt.subplot(2, 3, 3)
plt.imshow(cv2.cvtColor(rotated_img, cv2.COLOR_BGR2RGB))
plt.title('旋转图像')
plt.axis('off')
plt.subplot(2, 3, 4)
plt.imshow(cv2.cvtColor(flipped_horizontal, cv2.COLOR_BGR2RGB))
plt.title('水平翻转')
plt.axis('off')
plt.subplot(2, 3, 5)
plt.imshow(cv2.cvtColor(flipped_vertical, cv2.COLOR_BGR2RGB))
plt.title('垂直翻转')
plt.axis('off')
plt.subplot(2, 3, 6)
plt.imshow(cv2.cvtColor(translated_img, cv2.COLOR_BGR2RGB))
plt.title('平移图像')
plt.axis('off')
plt.tight_layout()
plt.show()
return {
'resized': resized_img,
'rotated': rotated_img,
'flipped_h': flipped_horizontal,
'flipped_v': flipped_vertical,
'translated': translated_img
}
# 运行图像变换示例
transform_results = image_transformations()
图像滤波和增强
1. 空间域滤波
def spatial_filtering():
"""空间域滤波示例"""
# 创建带噪声的图像
def create_noisy_image():
"""创建带噪声的图像"""
img = np.zeros((200, 200), dtype=np.uint8)
# 绘制几何图形
cv2.rectangle(img, (50, 50), (150, 150), 255, -1)
cv2.circle(img, (100, 100), 30, 0, -1)
# 添加高斯噪声
noise = np.random.normal(0, 25, img.shape).astype(np.uint8)
noisy_img = cv2.add(img, noise)
return img, noisy_img
original_img, noisy_img = create_noisy_image()
print("=== 空间域滤波 ===")
# 1. 均值滤波
mean_filtered = cv2.blur(noisy_img, (5, 5))
# 2. 高斯滤波
gaussian_filtered = cv2.GaussianBlur(noisy_img, (5, 5), 0)
# 3. 中值滤波
median_filtered = cv2.medianBlur(noisy_img, 5)
# 4. 双边滤波
bilateral_filtered = cv2.bilateralFilter(noisy_img, 9, 75, 75)
# 可视化滤波结果
plt.figure(figsize=(15, 10))
plt.subplot(2, 3, 1)
plt.imshow(original_img, cmap='gray')
plt.title('原始图像')
plt.axis('off')
plt.subplot(2, 3, 2)
plt.imshow(noisy_img, cmap='gray')
plt.title('带噪声图像')
plt.axis('off')
plt.subplot(2, 3, 3)
plt.imshow(mean_filtered, cmap='gray')
plt.title('均值滤波')
plt.axis('off')
plt.subplot(2, 3, 4)
plt.imshow(gaussian_filtered, cmap='gray')
plt.title('高斯滤波')
plt.axis('off')
plt.subplot(2, 3, 5)
plt.imshow(median_filtered, cmap='gray')
plt.title('中值滤波')
plt.axis('off')
plt.subplot(2, 3, 6)
plt.imshow(bilateral_filtered, cmap='gray')
plt.title('双边滤波')
plt.axis('off')
plt.tight_layout()
plt.show()
return {
'original': original_img,
'noisy': noisy_img,
'mean_filtered': mean_filtered,
'gaussian_filtered': gaussian_filtered,
'median_filtered': median_filtered,
'bilateral_filtered': bilateral_filtered
}
# 运行空间域滤波示例
filtering_results = spatial_filtering()
2. 边缘检测
def edge_detection():
"""边缘检测示例"""
# 使用之前的示例图像
img = sample_image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
print("=== 边缘检测 ===")
# 1. Sobel算子
sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
sobel_combined = np.sqrt(sobel_x**2 + sobel_y**2)
sobel_combined = np.uint8(sobel_combined)
# 2. Canny边缘检测
canny_edges = cv2.Canny(gray, 50, 150)
# 3. Laplacian算子
laplacian = cv2.Laplacian(gray, cv2.CV_64F)
laplacian = np.uint8(np.absolute(laplacian))
# 4. 使用skimage的Canny
skimage_canny = feature.canny(gray, sigma=1)
# 可视化边缘检测结果
plt.figure(figsize=(15, 10))
plt.subplot(2, 3, 1)
plt.imshow(gray, cmap='gray')
plt.title('灰度图像')
plt.axis('off')
plt.subplot(2, 3, 2)
plt.imshow(sobel_x, cmap='gray')
plt.title('Sobel X方向')
plt.axis('off')
plt.subplot(2, 3, 3)
plt.imshow(sobel_y, cmap='gray')
plt.title('Sobel Y方向')
plt.axis('off')
plt.subplot(2, 3, 4)
plt.imshow(sobel_combined, cmap='gray')
plt.title('Sobel组合')
plt.axis('off')
plt.subplot(2, 3, 5)
plt.imshow(canny_edges, cmap='gray')
plt.title('Canny边缘检测')
plt.axis('off')
plt.subplot(2, 3, 6)
plt.imshow(laplacian, cmap='gray')
plt.title('Laplacian算子')
plt.axis('off')
plt.tight_layout()
plt.show()
return {
'sobel_x': sobel_x,
'sobel_y': sobel_y,
'sobel_combined': sobel_combined,
'canny': canny_edges,
'laplacian': laplacian,
'skimage_canny': skimage_canny
}
# 运行边缘检测示例
edge_results = edge_detection()
特征提取
1. 角点检测
def corner_detection():
"""角点检测示例"""
# 创建测试图像
def create_corner_test_image():
"""创建角点测试图像"""
img = np.zeros((300, 300), dtype=np.uint8)
# 绘制矩形
cv2.rectangle(img, (50, 50), (150, 150), 255, 2)
# 绘制三角形
pts = np.array([[200, 50], [250, 150], [150, 150]], np.int32)
cv2.polylines(img, [pts], True, 255, 2)
# 绘制圆形
cv2.circle(img, (100, 250), 50, 255, 2)
return img
test_img = create_corner_test_image()
print("=== 角点检测 ===")
# 1. Harris角点检测
harris_corners = cv2.cornerHarris(test_img, blockSize=2, ksize=3, k=0.04)
harris_corners = cv2.dilate(harris_corners, None)
# 2. Shi-Tomasi角点检测
shi_tomasi_corners = cv2.goodFeaturesToTrack(test_img, maxCorners=25,
qualityLevel=0.01, minDistance=10)
# 3. FAST角点检测
fast = cv2.FastFeatureDetector_create()
fast_keypoints = fast.detect(test_img, None)
# 可视化角点检测结果
plt.figure(figsize=(15, 5))
# Harris角点
plt.subplot(1, 3, 1)
plt.imshow(test_img, cmap='gray')
plt.imshow(harris_corners, cmap='jet', alpha=0.5)
plt.title('Harris角点检测')
plt.axis('off')
# Shi-Tomasi角点
plt.subplot(1, 3, 2)
plt.imshow(test_img, cmap='gray')
if shi_tomasi_corners is not None:
for corner in shi_tomasi_corners:
x, y = corner.ravel()
plt.plot(x, y, 'ro', markersize=5)
plt.title('Shi-Tomasi角点检测')
plt.axis('off')
# FAST角点
plt.subplot(1, 3, 3)
plt.imshow(test_img, cmap='gray')
for kp in fast_keypoints:
x, y = kp.pt
plt.plot(x, y, 'go', markersize=5)
plt.title('FAST角点检测')
plt.axis('off')
plt.tight_layout()
plt.show()
return {
'harris': harris_corners,
'shi_tomasi': shi_tomasi_corners,
'fast': fast_keypoints
}
# 运行角点检测示例
corner_results = corner_detection()
2. 特征匹配
def feature_matching():
"""特征匹配示例"""
# 创建两个相似但略有不同的图像
def create_test_images():
"""创建测试图像"""
img1 = np.zeros((200, 200), dtype=np.uint8)
img2 = np.zeros((200, 200), dtype=np.uint8)
# 在第一个图像中绘制图案
cv2.rectangle(img1, (50, 50), (150, 150), 255, -1)
cv2.circle(img1, (100, 100), 30, 0, -1)
# 在第二个图像中绘制相同的图案,但位置略有偏移
cv2.rectangle(img2, (60, 60), (160, 160), 255, -1)
cv2.circle(img2, (110, 110), 30, 0, -1)
return img1, img2
img1, img2 = create_test_images()
print("=== 特征匹配 ===")
# 1. SIFT特征检测
sift = cv2.SIFT_create()
kp1, des1 = sift.detectAndCompute(img1, None)
kp2, des2 = sift.detectAndCompute(img2, None)
# 2. 特征匹配
bf = cv2.BFMatcher()
matches = bf.knnMatch(des1, des2, k=2)
# 3. 应用比率测试
good_matches = []
for match_pair in matches:
if len(match_pair) == 2:
m, n = match_pair
if m.distance < 0.75 * n.distance:
good_matches.append(m)
# 4. 绘制匹配结果
matched_img = cv2.drawMatches(img1, kp1, img2, kp2, good_matches, None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
# 可视化结果
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
plt.imshow(img1, cmap='gray')
plt.title('图像1')
plt.axis('off')
plt.subplot(1, 3, 2)
plt.imshow(img2, cmap='gray')
plt.title('图像2')
plt.axis('off')
plt.subplot(1, 3, 3)
plt.imshow(cv2.cvtColor(matched_img, cv2.COLOR_BGR2RGB))
plt.title('特征匹配结果')
plt.axis('off')
plt.tight_layout()
plt.show()
print(f"检测到的特征点数量: 图像1={len(kp1)}, 图像2={len(kp2)}")
print(f"匹配的特征点数量: {len(good_matches)}")
return {
'keypoints1': kp1,
'keypoints2': kp2,
'descriptors1': des1,
'descriptors2': des2,
'matches': good_matches,
'matched_image': matched_img
}
# 运行特征匹配示例
matching_results = feature_matching()
图像分割
1. 阈值分割
def image_segmentation():
"""图像分割示例"""
# 创建测试图像
def create_segmentation_test_image():
"""创建分割测试图像"""
img = np.zeros((200, 200), dtype=np.uint8)
# 添加不同灰度值的区域
img[50:100, 50:100] = 100 # 中等灰度
img[120:170, 120:170] = 200 # 高灰度
img[50:100, 120:170] = 50 # 低灰度
# 添加噪声
noise = np.random.normal(0, 10, img.shape).astype(np.uint8)
img = cv2.add(img, noise)
return img
test_img = create_segmentation_test_image()
print("=== 图像分割 ===")
# 1. 全局阈值分割
_, global_thresh = cv2.threshold(test_img, 127, 255, cv2.THRESH_BINARY)
# 2. 自适应阈值分割
adaptive_thresh = cv2.adaptiveThreshold(test_img, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
# 3. Otsu阈值分割
_, otsu_thresh = cv2.threshold(test_img, 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 4. 多阈值分割
_, thresh1 = cv2.threshold(test_img, 75, 255, cv2.THRESH_BINARY)
_, thresh2 = cv2.threshold(test_img, 150, 255, cv2.THRESH_BINARY)
multi_thresh = thresh1 + thresh2
# 可视化分割结果
plt.figure(figsize=(15, 10))
plt.subplot(2, 3, 1)
plt.imshow(test_img, cmap='gray')
plt.title('原始图像')
plt.axis('off')
plt.subplot(2, 3, 2)
plt.imshow(global_thresh, cmap='gray')
plt.title('全局阈值分割')
plt.axis('off')
plt.subplot(2, 3, 3)
plt.imshow(adaptive_thresh, cmap='gray')
plt.title('自适应阈值分割')
plt.axis('off')
plt.subplot(2, 3, 4)
plt.imshow(otsu_thresh, cmap='gray')
plt.title('Otsu阈值分割')
plt.axis('off')
plt.subplot(2, 3, 5)
plt.imshow(multi_thresh, cmap='gray')
plt.title('多阈值分割')
plt.axis('off')
# 直方图
plt.subplot(2, 3, 6)
plt.hist(test_img.ravel(), bins=256, range=[0, 256])
plt.title('图像直方图')
plt.xlabel('像素值')
plt.ylabel('频次')
plt.tight_layout()
plt.show()
return {
'global_thresh': global_thresh,
'adaptive_thresh': adaptive_thresh,
'otsu_thresh': otsu_thresh,
'multi_thresh': multi_thresh
}
# 运行图像分割示例
segmentation_results = image_segmentation()
2. 分水岭分割
def watershed_segmentation():
"""分水岭分割示例"""
# 创建测试图像
def create_watershed_test_image():
"""创建分水岭测试图像"""
img = np.zeros((200, 200), dtype=np.uint8)
# 绘制几个圆形区域
cv2.circle(img, (50, 50), 30, 255, -1)
cv2.circle(img, (150, 50), 25, 255, -1)
cv2.circle(img, (100, 150), 35, 255, -1)
# 添加噪声
noise = np.random.normal(0, 20, img.shape).astype(np.uint8)
img = cv2.add(img, noise)
return img
test_img = create_watershed_test_image()
print("=== 分水岭分割 ===")
# 1. 图像预处理
# 高斯滤波去噪
blurred = cv2.GaussianBlur(test_img, (5, 5), 0)
# 阈值分割
_, thresh = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY)
# 形态学操作
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# 确定背景区域
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# 确定前景区域
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
_, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
sure_fg = np.uint8(sure_fg)
# 找到未知区域
unknown = cv2.subtract(sure_bg, sure_fg)
# 2. 标记
_, markers = cv2.connectedComponents(sure_fg)
markers = markers + 1
markers[unknown == 255] = 0
# 3. 应用分水岭算法
markers = cv2.watershed(cv2.cvtColor(test_img, cv2.COLOR_GRAY2BGR), markers)
# 可视化结果
plt.figure(figsize=(15, 10))
plt.subplot(2, 3, 1)
plt.imshow(test_img, cmap='gray')
plt.title('原始图像')
plt.axis('off')
plt.subplot(2, 3, 2)
plt.imshow(thresh, cmap='gray')
plt.title('阈值分割')
plt.axis('off')
plt.subplot(2, 3, 3)
plt.imshow(sure_bg, cmap='gray')
plt.title('确定背景')
plt.axis('off')
plt.subplot(2, 3, 4)
plt.imshow(sure_fg, cmap='gray')
plt.title('确定前景')
plt.axis('off')
plt.subplot(2, 3, 5)
plt.imshow(unknown, cmap='gray')
plt.title('未知区域')
plt.axis('off')
plt.subplot(2, 3, 6)
plt.imshow(markers, cmap='jet')
plt.title('分水岭分割结果')
plt.axis('off')
plt.tight_layout()
plt.show()
return {
'sure_bg': sure_bg,
'sure_fg': sure_fg,
'unknown': unknown,
'markers': markers
}
# 运行分水岭分割示例
watershed_results = watershed_segmentation()
今日总结
今天我们学习了计算机视觉的基础知识:
1. OpenCV基础:图像读取、显示、基本操作
2. 图像变换:缩放、旋转、翻转、平移
3. 图像滤波:均值、高斯、中值、双边滤波
4. 边缘检测:Sobel、Canny、Laplacian算子
5. 特征提取:角点检测、特征匹配
6. 图像分割:阈值分割、分水岭算法
计算机视觉是AI的重要分支,掌握这些技术可以构建智能图像处理系统。