今日目标

o 理解计算机视觉的基本概念和应用

o 掌握OpenCV图像处理基础操作

o 学会图像滤波、边缘检测和特征提取

o 了解图像分割和目标检测技术

o 掌握深度学习在计算机视觉中的应用

计算机视觉概述

计算机视觉是人工智能的重要分支，致力于让计算机理解和处理视觉信息：

o 图像处理：滤波、变换、增强

o 特征提取：边缘、角点、纹理特征

o 目标检测：定位、识别、跟踪

o 图像理解：场景理解、语义分割

计算机视觉应用领域

# 主要应用领域：
# - 自动驾驶和机器人视觉
# - 医疗影像诊断
# - 安防监控和人脸识别
# - 工业质量检测
# - 增强现实和虚拟现实
# - 图像搜索和推荐

OpenCV基础

1. 安装和导入

pip install opencv-python opencv-contrib-python matplotlib numpy scikit-image pillow

import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import skimage
from skimage import filters, feature, segmentation
import warnings
warnings.filterwarnings('ignore')

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

print(f"OpenCV版本: {cv2.__version__}")
print(f"NumPy版本: {np.__version__}")

2. 图像读取和显示

def image_basic_operations():
    """图像基本操作示例"""
    
    # 创建示例图像
    def create_sample_image():
        """创建示例图像"""
        # 创建一个彩色图像
        img = np.zeros((300, 400, 3), dtype=np.uint8)
        
        # 绘制矩形
        cv2.rectangle(img, (50, 50), (150, 150), (255, 0, 0), -1)  # 蓝色矩形
        cv2.rectangle(img, (200, 50), (300, 150), (0, 255, 0), -1)  # 绿色矩形
        
        # 绘制圆形
        cv2.circle(img, (100, 250), 50, (0, 0, 255), -1)  # 红色圆形
        
        # 添加文字
        cv2.putText(img, 'OpenCV', (150, 280), cv2.FONT_HERSHEY_SIMPLEX, 
                    1, (255, 255, 255), 2)
        
        return img
    
    # 创建示例图像
    sample_img = create_sample_image()
    
    print("=== 图像基本操作 ===")
    
    # 1. 图像信息
    print(f"图像形状: {sample_img.shape}")
    print(f"图像类型: {sample_img.dtype}")
    print(f"图像大小: {sample_img.size} 像素")
    
    # 2. 图像显示
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1, 3, 1)
    plt.imshow(cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB))
    plt.title('原始图像')
    plt.axis('off')
    
    # 3. 颜色空间转换
    gray_img = cv2.cvtColor(sample_img, cv2.COLOR_BGR2GRAY)
    hsv_img = cv2.cvtColor(sample_img, cv2.COLOR_BGR2HSV)
    
    plt.subplot(1, 3, 2)
    plt.imshow(gray_img, cmap='gray')
    plt.title('灰度图像')
    plt.axis('off')
    
    plt.subplot(1, 3, 3)
    plt.imshow(cv2.cvtColor(hsv_img, cv2.COLOR_HSV2RGB))
    plt.title('HSV图像')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # 4. 图像保存
    cv2.imwrite('sample_image.jpg', sample_img)
    print("图像已保存为 sample_image.jpg")
    
    return sample_img, gray_img, hsv_img

# 运行图像基本操作示例
sample_image, gray_image, hsv_image = image_basic_operations()

3. 图像变换

def image_transformations():
    """图像变换示例"""
    
    # 使用之前的示例图像
    img = sample_image
    
    print("=== 图像变换 ===")
    
    # 1. 图像缩放
    height, width = img.shape[:2]
    resized_img = cv2.resize(img, (width//2, height//2))
    enlarged_img = cv2.resize(img, (width*2, height*2))
    
    # 2. 图像旋转
    center = (width//2, height//2)
    rotation_matrix = cv2.getRotationMatrix2D(center, 45, 1.0)
    rotated_img = cv2.warpAffine(img, rotation_matrix, (width, height))
    
    # 3. 图像翻转
    flipped_horizontal = cv2.flip(img, 1)  # 水平翻转
    flipped_vertical = cv2.flip(img, 0)    # 垂直翻转
    
    # 4. 图像平移
    translation_matrix = np.float32([[1, 0, 50], [0, 1, 30]])
    translated_img = cv2.warpAffine(img, translation_matrix, (width, height))
    
    # 可视化变换结果
    plt.figure(figsize=(15, 10))
    
    plt.subplot(2, 3, 1)
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.title('原始图像')
    plt.axis('off')
    
    plt.subplot(2, 3, 2)
    plt.imshow(cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB))
    plt.title('缩放图像')
    plt.axis('off')
    
    plt.subplot(2, 3, 3)
    plt.imshow(cv2.cvtColor(rotated_img, cv2.COLOR_BGR2RGB))
    plt.title('旋转图像')
    plt.axis('off')
    
    plt.subplot(2, 3, 4)
    plt.imshow(cv2.cvtColor(flipped_horizontal, cv2.COLOR_BGR2RGB))
    plt.title('水平翻转')
    plt.axis('off')
    
    plt.subplot(2, 3, 5)
    plt.imshow(cv2.cvtColor(flipped_vertical, cv2.COLOR_BGR2RGB))
    plt.title('垂直翻转')
    plt.axis('off')
    
    plt.subplot(2, 3, 6)
    plt.imshow(cv2.cvtColor(translated_img, cv2.COLOR_BGR2RGB))
    plt.title('平移图像')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return {
        'resized': resized_img,
        'rotated': rotated_img,
        'flipped_h': flipped_horizontal,
        'flipped_v': flipped_vertical,
        'translated': translated_img
    }

# 运行图像变换示例
transform_results = image_transformations()

图像滤波和增强

1. 空间域滤波

def spatial_filtering():
    """空间域滤波示例"""
    
    # 创建带噪声的图像
    def create_noisy_image():
        """创建带噪声的图像"""
        img = np.zeros((200, 200), dtype=np.uint8)
        
        # 绘制几何图形
        cv2.rectangle(img, (50, 50), (150, 150), 255, -1)
        cv2.circle(img, (100, 100), 30, 0, -1)
        
        # 添加高斯噪声
        noise = np.random.normal(0, 25, img.shape).astype(np.uint8)
        noisy_img = cv2.add(img, noise)
        
        return img, noisy_img
    
    original_img, noisy_img = create_noisy_image()
    
    print("=== 空间域滤波 ===")
    
    # 1. 均值滤波
    mean_filtered = cv2.blur(noisy_img, (5, 5))
    
    # 2. 高斯滤波
    gaussian_filtered = cv2.GaussianBlur(noisy_img, (5, 5), 0)
    
    # 3. 中值滤波
    median_filtered = cv2.medianBlur(noisy_img, 5)
    
    # 4. 双边滤波
    bilateral_filtered = cv2.bilateralFilter(noisy_img, 9, 75, 75)
    
    # 可视化滤波结果
    plt.figure(figsize=(15, 10))
    
    plt.subplot(2, 3, 1)
    plt.imshow(original_img, cmap='gray')
    plt.title('原始图像')
    plt.axis('off')
    
    plt.subplot(2, 3, 2)
    plt.imshow(noisy_img, cmap='gray')
    plt.title('带噪声图像')
    plt.axis('off')
    
    plt.subplot(2, 3, 3)
    plt.imshow(mean_filtered, cmap='gray')
    plt.title('均值滤波')
    plt.axis('off')
    
    plt.subplot(2, 3, 4)
    plt.imshow(gaussian_filtered, cmap='gray')
    plt.title('高斯滤波')
    plt.axis('off')
    
    plt.subplot(2, 3, 5)
    plt.imshow(median_filtered, cmap='gray')
    plt.title('中值滤波')
    plt.axis('off')
    
    plt.subplot(2, 3, 6)
    plt.imshow(bilateral_filtered, cmap='gray')
    plt.title('双边滤波')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return {
        'original': original_img,
        'noisy': noisy_img,
        'mean_filtered': mean_filtered,
        'gaussian_filtered': gaussian_filtered,
        'median_filtered': median_filtered,
        'bilateral_filtered': bilateral_filtered
    }

# 运行空间域滤波示例
filtering_results = spatial_filtering()

2. 边缘检测

def edge_detection():
    """边缘检测示例"""
    
    # 使用之前的示例图像
    img = sample_image
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    print("=== 边缘检测 ===")
    
    # 1. Sobel算子
    sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    sobel_combined = np.sqrt(sobel_x**2 + sobel_y**2)
    sobel_combined = np.uint8(sobel_combined)
    
    # 2. Canny边缘检测
    canny_edges = cv2.Canny(gray, 50, 150)
    
    # 3. Laplacian算子
    laplacian = cv2.Laplacian(gray, cv2.CV_64F)
    laplacian = np.uint8(np.absolute(laplacian))
    
    # 4. 使用skimage的Canny
    skimage_canny = feature.canny(gray, sigma=1)
    
    # 可视化边缘检测结果
    plt.figure(figsize=(15, 10))
    
    plt.subplot(2, 3, 1)
    plt.imshow(gray, cmap='gray')
    plt.title('灰度图像')
    plt.axis('off')
    
    plt.subplot(2, 3, 2)
    plt.imshow(sobel_x, cmap='gray')
    plt.title('Sobel X方向')
    plt.axis('off')
    
    plt.subplot(2, 3, 3)
    plt.imshow(sobel_y, cmap='gray')
    plt.title('Sobel Y方向')
    plt.axis('off')
    
    plt.subplot(2, 3, 4)
    plt.imshow(sobel_combined, cmap='gray')
    plt.title('Sobel组合')
    plt.axis('off')
    
    plt.subplot(2, 3, 5)
    plt.imshow(canny_edges, cmap='gray')
    plt.title('Canny边缘检测')
    plt.axis('off')
    
    plt.subplot(2, 3, 6)
    plt.imshow(laplacian, cmap='gray')
    plt.title('Laplacian算子')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return {
        'sobel_x': sobel_x,
        'sobel_y': sobel_y,
        'sobel_combined': sobel_combined,
        'canny': canny_edges,
        'laplacian': laplacian,
        'skimage_canny': skimage_canny
    }

# 运行边缘检测示例
edge_results = edge_detection()

特征提取

1. 角点检测

def corner_detection():
    """角点检测示例"""
    
    # 创建测试图像
    def create_corner_test_image():
        """创建角点测试图像"""
        img = np.zeros((300, 300), dtype=np.uint8)
        
        # 绘制矩形
        cv2.rectangle(img, (50, 50), (150, 150), 255, 2)
        
        # 绘制三角形
        pts = np.array([[200, 50], [250, 150], [150, 150]], np.int32)
        cv2.polylines(img, [pts], True, 255, 2)
        
        # 绘制圆形
        cv2.circle(img, (100, 250), 50, 255, 2)
        
        return img
    
    test_img = create_corner_test_image()
    
    print("=== 角点检测 ===")
    
    # 1. Harris角点检测
    harris_corners = cv2.cornerHarris(test_img, blockSize=2, ksize=3, k=0.04)
    harris_corners = cv2.dilate(harris_corners, None)
    
    # 2. Shi-Tomasi角点检测
    shi_tomasi_corners = cv2.goodFeaturesToTrack(test_img, maxCorners=25, 
                                                 qualityLevel=0.01, minDistance=10)
    
    # 3. FAST角点检测
    fast = cv2.FastFeatureDetector_create()
    fast_keypoints = fast.detect(test_img, None)
    
    # 可视化角点检测结果
    plt.figure(figsize=(15, 5))
    
    # Harris角点
    plt.subplot(1, 3, 1)
    plt.imshow(test_img, cmap='gray')
    plt.imshow(harris_corners, cmap='jet', alpha=0.5)
    plt.title('Harris角点检测')
    plt.axis('off')
    
    # Shi-Tomasi角点
    plt.subplot(1, 3, 2)
    plt.imshow(test_img, cmap='gray')
    if shi_tomasi_corners is not None:
        for corner in shi_tomasi_corners:
            x, y = corner.ravel()
            plt.plot(x, y, 'ro', markersize=5)
    plt.title('Shi-Tomasi角点检测')
    plt.axis('off')
    
    # FAST角点
    plt.subplot(1, 3, 3)
    plt.imshow(test_img, cmap='gray')
    for kp in fast_keypoints:
        x, y = kp.pt
        plt.plot(x, y, 'go', markersize=5)
    plt.title('FAST角点检测')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return {
        'harris': harris_corners,
        'shi_tomasi': shi_tomasi_corners,
        'fast': fast_keypoints
    }

# 运行角点检测示例
corner_results = corner_detection()

2. 特征匹配

def feature_matching():
    """特征匹配示例"""
    
    # 创建两个相似但略有不同的图像
    def create_test_images():
        """创建测试图像"""
        img1 = np.zeros((200, 200), dtype=np.uint8)
        img2 = np.zeros((200, 200), dtype=np.uint8)
        
        # 在第一个图像中绘制图案
        cv2.rectangle(img1, (50, 50), (150, 150), 255, -1)
        cv2.circle(img1, (100, 100), 30, 0, -1)
        
        # 在第二个图像中绘制相同的图案，但位置略有偏移
        cv2.rectangle(img2, (60, 60), (160, 160), 255, -1)
        cv2.circle(img2, (110, 110), 30, 0, -1)
        
        return img1, img2
    
    img1, img2 = create_test_images()
    
    print("=== 特征匹配 ===")
    
    # 1. SIFT特征检测
    sift = cv2.SIFT_create()
    kp1, des1 = sift.detectAndCompute(img1, None)
    kp2, des2 = sift.detectAndCompute(img2, None)
    
    # 2. 特征匹配
    bf = cv2.BFMatcher()
    matches = bf.knnMatch(des1, des2, k=2)
    
    # 3. 应用比率测试
    good_matches = []
    for match_pair in matches:
        if len(match_pair) == 2:
            m, n = match_pair
            if m.distance < 0.75 * n.distance:
                good_matches.append(m)
    
    # 4. 绘制匹配结果
    matched_img = cv2.drawMatches(img1, kp1, img2, kp2, good_matches, None,
                                 flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
    
    # 可视化结果
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1, 3, 1)
    plt.imshow(img1, cmap='gray')
    plt.title('图像1')
    plt.axis('off')
    
    plt.subplot(1, 3, 2)
    plt.imshow(img2, cmap='gray')
    plt.title('图像2')
    plt.axis('off')
    
    plt.subplot(1, 3, 3)
    plt.imshow(cv2.cvtColor(matched_img, cv2.COLOR_BGR2RGB))
    plt.title('特征匹配结果')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    print(f"检测到的特征点数量: 图像1={len(kp1)}, 图像2={len(kp2)}")
    print(f"匹配的特征点数量: {len(good_matches)}")
    
    return {
        'keypoints1': kp1,
        'keypoints2': kp2,
        'descriptors1': des1,
        'descriptors2': des2,
        'matches': good_matches,
        'matched_image': matched_img
    }

# 运行特征匹配示例
matching_results = feature_matching()

图像分割

1. 阈值分割

def image_segmentation():
    """图像分割示例"""
    
    # 创建测试图像
    def create_segmentation_test_image():
        """创建分割测试图像"""
        img = np.zeros((200, 200), dtype=np.uint8)
        
        # 添加不同灰度值的区域
        img[50:100, 50:100] = 100   # 中等灰度
        img[120:170, 120:170] = 200 # 高灰度
        img[50:100, 120:170] = 50   # 低灰度
        
        # 添加噪声
        noise = np.random.normal(0, 10, img.shape).astype(np.uint8)
        img = cv2.add(img, noise)
        
        return img
    
    test_img = create_segmentation_test_image()
    
    print("=== 图像分割 ===")
    
    # 1. 全局阈值分割
    _, global_thresh = cv2.threshold(test_img, 127, 255, cv2.THRESH_BINARY)
    
    # 2. 自适应阈值分割
    adaptive_thresh = cv2.adaptiveThreshold(test_img, 255, 
                                           cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                           cv2.THRESH_BINARY, 11, 2)
    
    # 3. Otsu阈值分割
    _, otsu_thresh = cv2.threshold(test_img, 0, 255, 
                                   cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # 4. 多阈值分割
    _, thresh1 = cv2.threshold(test_img, 75, 255, cv2.THRESH_BINARY)
    _, thresh2 = cv2.threshold(test_img, 150, 255, cv2.THRESH_BINARY)
    multi_thresh = thresh1 + thresh2
    
    # 可视化分割结果
    plt.figure(figsize=(15, 10))
    
    plt.subplot(2, 3, 1)
    plt.imshow(test_img, cmap='gray')
    plt.title('原始图像')
    plt.axis('off')
    
    plt.subplot(2, 3, 2)
    plt.imshow(global_thresh, cmap='gray')
    plt.title('全局阈值分割')
    plt.axis('off')
    
    plt.subplot(2, 3, 3)
    plt.imshow(adaptive_thresh, cmap='gray')
    plt.title('自适应阈值分割')
    plt.axis('off')
    
    plt.subplot(2, 3, 4)
    plt.imshow(otsu_thresh, cmap='gray')
    plt.title('Otsu阈值分割')
    plt.axis('off')
    
    plt.subplot(2, 3, 5)
    plt.imshow(multi_thresh, cmap='gray')
    plt.title('多阈值分割')
    plt.axis('off')
    
    # 直方图
    plt.subplot(2, 3, 6)
    plt.hist(test_img.ravel(), bins=256, range=[0, 256])
    plt.title('图像直方图')
    plt.xlabel('像素值')
    plt.ylabel('频次')
    
    plt.tight_layout()
    plt.show()
    
    return {
        'global_thresh': global_thresh,
        'adaptive_thresh': adaptive_thresh,
        'otsu_thresh': otsu_thresh,
        'multi_thresh': multi_thresh
    }

# 运行图像分割示例
segmentation_results = image_segmentation()

2. 分水岭分割

def watershed_segmentation():
    """分水岭分割示例"""
    
    # 创建测试图像
    def create_watershed_test_image():
        """创建分水岭测试图像"""
        img = np.zeros((200, 200), dtype=np.uint8)
        
        # 绘制几个圆形区域
        cv2.circle(img, (50, 50), 30, 255, -1)
        cv2.circle(img, (150, 50), 25, 255, -1)
        cv2.circle(img, (100, 150), 35, 255, -1)
        
        # 添加噪声
        noise = np.random.normal(0, 20, img.shape).astype(np.uint8)
        img = cv2.add(img, noise)
        
        return img
    
    test_img = create_watershed_test_image()
    
    print("=== 分水岭分割 ===")
    
    # 1. 图像预处理
    # 高斯滤波去噪
    blurred = cv2.GaussianBlur(test_img, (5, 5), 0)
    
    # 阈值分割
    _, thresh = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY)
    
    # 形态学操作
    kernel = np.ones((3, 3), np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
    
    # 确定背景区域
    sure_bg = cv2.dilate(opening, kernel, iterations=3)
    
    # 确定前景区域
    dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
    _, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
    sure_fg = np.uint8(sure_fg)
    
    # 找到未知区域
    unknown = cv2.subtract(sure_bg, sure_fg)
    
    # 2. 标记
    _, markers = cv2.connectedComponents(sure_fg)
    markers = markers + 1
    markers[unknown == 255] = 0
    
    # 3. 应用分水岭算法
    markers = cv2.watershed(cv2.cvtColor(test_img, cv2.COLOR_GRAY2BGR), markers)
    
    # 可视化结果
    plt.figure(figsize=(15, 10))
    
    plt.subplot(2, 3, 1)
    plt.imshow(test_img, cmap='gray')
    plt.title('原始图像')
    plt.axis('off')
    
    plt.subplot(2, 3, 2)
    plt.imshow(thresh, cmap='gray')
    plt.title('阈值分割')
    plt.axis('off')
    
    plt.subplot(2, 3, 3)
    plt.imshow(sure_bg, cmap='gray')
    plt.title('确定背景')
    plt.axis('off')
    
    plt.subplot(2, 3, 4)
    plt.imshow(sure_fg, cmap='gray')
    plt.title('确定前景')
    plt.axis('off')
    
    plt.subplot(2, 3, 5)
    plt.imshow(unknown, cmap='gray')
    plt.title('未知区域')
    plt.axis('off')
    
    plt.subplot(2, 3, 6)
    plt.imshow(markers, cmap='jet')
    plt.title('分水岭分割结果')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return {
        'sure_bg': sure_bg,
        'sure_fg': sure_fg,
        'unknown': unknown,
        'markers': markers
    }

# 运行分水岭分割示例
watershed_results = watershed_segmentation()

今日总结

今天我们学习了计算机视觉的基础知识：

1. OpenCV基础：图像读取、显示、基本操作

2. 图像变换：缩放、旋转、翻转、平移

3. 图像滤波：均值、高斯、中值、双边滤波

4. 边缘检测：Sobel、Canny、Laplacian算子

5. 特征提取：角点检测、特征匹配

6. 图像分割：阈值分割、分水岭算法

计算机视觉是AI的重要分支，掌握这些技术可以构建智能图像处理系统。

达永编程网

程序员技术分享与交流平台

Python教程(四十二):计算机视觉-图像处理和分析