Datawhale AI夏令营第五期CV方向-城市管理违规行为智能识别-Task1

赛题解析

城市管理违规行为智能识别
初赛任务是根据给定的城管视频监控数据集，进行城市违规行为的检测。违规行为主要包括垃圾桶满溢、机动车违停、非机动车违停等。
选手需要能够从视频中分析并标记出违规行为，提供违规行为发生的时间和位置信息。

数据可视化

首先对现有数据可视化，直观地了解数据集。

import os
import cv2
import json
from PIL import Image, ImageDraw, ImageFont# 文件夹路径
video_folder = r"D:\Illegal_behavior_detection\训练集(有标注第一批)\视频"
annotation_folder = r"D:\Illegal_behavior_detection\训练集(有标注第一批)\标注"
output_folder = r"D:\Illegal_behavior_detection\训练集(有标注第一批)\标注视频"# 如果输出文件夹不存在，则创建
os.makedirs(output_folder, exist_ok=True)# 获取所有视频文件
video_files = [f for f in os.listdir(video_folder) if f.endswith('.mp4')]
labeled_video_files = [f for f in os.listdir(output_folder) if f.endswith('.mp4')]
# 类别对应的颜色
category_colors = {"非机动车违停": (0, 0, 255),    # 红色"机动车违停": (0, 255, 255),    # 黄色"垃圾桶满溢": (255, 0, 0),      # 蓝色"违法经营": (0, 255, 0)         # 绿色
}# 加载字体，指定一个支持中文的字体文件路径
font = ImageFont.truetype("simhei.ttf", 24) for video_file in video_files:if video_file in labeled_video_files: continuevideo_path = os.path.join(video_folder, video_file)annotation_path = os.path.join(annotation_folder, video_file.replace('.mp4', '.json'))# 读取视频cap = cv2.VideoCapture(video_path)if not cap.isOpened():print(f"无法打开视频文件: {video_file}")continue# 获取视频的宽、高、帧率width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))fps = cap.get(cv2.CAP_PROP_FPS)# 创建保存视频的对象output_path = os.path.join(output_folder, video_file)fourcc = cv2.VideoWriter_fourcc(*'mp4v')out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))# 读取标注文件with open(annotation_path, 'r', encoding='utf-8') as f:annotations = json.load(f)frame_id_to_annotations = {}for annotation in annotations:frame_id = annotation['frame_id']if frame_id not in frame_id_to_annotations:frame_id_to_annotations[frame_id] = []frame_id_to_annotations[frame_id].append(annotation)frame_id = 0while True:ret, frame = cap.read()if not ret:breakif frame_id in frame_id_to_annotations:# 将OpenCV的图像转换为PIL图像frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))draw = ImageDraw.Draw(frame_pil)for ann in frame_id_to_annotations[frame_id]:bbox = ann['bbox']category = ann['category']# 获取类别对应的颜色color = category_colors.get(category, (255, 255, 255))  # 如果类别不在字典中，则默认白色# 绘制矩形框draw.rectangle([bbox[0], bbox[1], bbox[2], bbox[3]], outline=color, width=2)# 添加类别标签draw.text((bbox[0], bbox[1] - 25), category, font=font, fill=color)# 将PIL图像转换回OpenCV格式frame = cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)# 写入当前帧out.write(frame)frame_id += 1cap.release()out.release()print("标注视频已成功生成并保存到输出文件夹。")

在这里插入图片描述
在观看视频中，发现部分数据标注严重错误，需要用标注软件进行修正。

标注格式转换

由于视频趋于同质化，且帧之间区别不大，将所有帧(60000+)用于训练没有意义，故进行抽帧，将数据集大小减小至6000。

for anno_path, video_path in zip(annos, videos):print(video_path)anno_df = pd.read_json(anno_path)cap = cv2.VideoCapture(video_path)frame_idx = 0 while True:ret, frame = cap.read()if not ret:breakif frame_idx % 10 == 0:img_height, img_width = frame.shape[:2]frame_anno = anno_df[anno_df['frame_id'] == frame_idx]cv2.imwrite(img_path + os.path.basename(anno_path).split('.')[0] + '_' + str(frame_idx) + '.jpg', frame)if len(frame_anno) != 0:with open(label_path + os.path.basename(anno_path).split('.')[0] + '_' + str(frame_idx) + '.txt', 'w') as up:for category, bbox in zip(frame_anno['category'].values, frame_anno['bbox'].values):category_idx = category_labels.index(category)x_min, y_min, x_max, y_max = bboxx_center = (x_min + x_max) / 2 / img_widthy_center = (y_min + y_max) / 2 / img_heightwidth = (x_max - x_min) / img_widthheight = (y_max - y_min) / img_heightif x_center > 1 or y_center > 1:print(bbox)up.write(f'{category_idx} {x_center} {y_center} {width} {height}\n')frame_idx += 1