【python实战】-- 解压提取所有指定文件的指定内容

系列文章目录

文章目录

系列文章目录
前言
一、pandas是什么？
- 1、需求
- 2、程序
总结
前言

一、pandas是什么？

1、需求

指定目录下有若干文件
批量解压
需要汇总包含指定字符的所有文件中的指定数据
2、程序

import os
import shutil
import zipfile
import pandas as pd
import xlrd
import xlwt
import csv
from xlutils.copy import copy
from openpyxl import Workbook
from openpyxl import load_workbook
from os.path import dirname
from decimal import Decimal
from openpyxl.utils.dataframe import dataframe_to_rows
# 读写2007 excel
import openpyxl
from openpyxl.styles import numbers
from openpyxl.styles import Alignment
import glob
import tkinter as tk
from tkinter import messagebox
from tkinter import simpledialogzippath = input("请输入需解压的文件路径：\n")
parent_path = zippathfile_flag = '.zip'
def del_old_zip(file_path):os.remove(file_path)
def decompress(file_path,root):z = zipfile.ZipFile(f"{file_path}","r")z.extractall(path=f"{root}")for names in z.namelist():if names.endswith(file_flag):z.close()return 1z.close()return 0 def start_dir_make(root,dirname):os.chdir(root)os.mkdir(dirname)return os.path.join(root,dirname)def rem_dir_extra(root,father_dir_name):try:for item in os.listdir(os.path.join(root,father_dir_name)):if not os.path.isdir(os.path.join(root,father_dir_name,item)):continueif item == father_dir_name and len(os.listdir(os.path.join(root,father_dir_name))) == 1:os.chdir(root)os.rename(father_dir_name,father_dir_name + '-old')shutil.move(os.path.join(root,father_dir_name + '-old', item),os.path.join(root))os.rmdir(os.path.join(root,father_dir_name + '-old'))rem_dir_extra(root,item)else:rem_dir_extra(os.path.join(root,father_dir_name),item)except Exception as e:print("清除文件夹出错"+str(e))def get_allfile_msg(file_dir):for root, dirs, files in os.walk(file_dir):return root, dirs, [file for file in files if file.endswith('.xls') or file.endswith('.xlsx') or file.endswith('.csv')] def get_allfile_url(root, files):allFile_url = []for file_name in files:file_url = root + "/" + file_nameallFile_url.append(file_url)return allFile_urldef get_file_name(path, suffix = ['.xlsx', '.xls','.csv']):  #'.xlsx', '.xls',tmp_lst = []for root,dirs,files in os.walk(path):for file in files:tmp_lst.append(os.path.join(root, file))return tmp_lstdef extract_last_part_of_path(path):return os.path.basename(path)#定义读取csv_pandas
def read_csv_file(file_path):#参数：error_bad_lines=False跳过错误的行 delimiter=',',encoding = 'gbk',header = 0, engine='python'  sep = r"\s+\s{0}"  encoding = "iso-8859-1"return pd.read_csv(file_path,encoding = 'latin1',sep = r"\s+\s{0}",dtype=object,quotechar="'",delimiter=',',doublequote=True,engine="python",header = 1)   #第2行作为表头if __name__ == '__main__':flag = 1while flag:for root,dirs,files in os.walk(parent_path):for name in files:if name.endswith(file_flag):new_ws = start_dir_make(root,name.replace(file_flag,""))zip_path = os.path.join(root,name)flag = decompress(zip_path,new_ws)del_old_zip(zip_path)rem_dir_extra(root,name.replace(file_flag,""))print(f'{root}\\{name}'.join(['文件：','\n解压完成\n']))rem_dir_extra(os.path.split(parent_path)[0],os.path.split(parent_path)[1])print("解压完成，请检查！！")print("请输入汇总需求，S1或S2或S1S2")wb = Workbook()ws = wb.activews.title="Summary"#设置所有单元格的对齐方式为居中alignment = Alignment(horizontal='center',vertical='center')   titlesS1 = ['data1','data2','data3']    titlesS2 = ['data4','data5','data6']  titlesS1S2 = ['data1','data2','data3','data4','data5','data6']#第一列波段设置区域ws.cell(row = 1,column = 1).value = '判定'ws.cell(row = 1,column = 1).alignment = alignmentws.cell(row = 5,column = 1).value = '文件名'ws.cell(row = 5,column = 1).alignment = alignmentws.cell(row = 6,column = 1).value = 'wave'ws.cell(row = 6,column = 1).alignment = alignmentfor l in range(380,1051):ws.cell(l-373,1).value = lws.cell(l-373,1).alignment = alignmentcontinue#*****************************************************************#读取指定文件夹#file_dir = os.getcwd()file_dir = parent_pathcurrent_path = os.path.dirname(os.path.abspath(__file__))#file_dir = r"D:\Users\gxcaoty\Desktop\39526-905\一车间"root, dirs, files = get_allfile_msg(file_dir)allFile_url = get_allfile_url(root, files)dir_numbers = len(dirs)    #file_dir下的文件夹个数user_input = input("请输入S1或S2或S1S2\n")count = 0for root,dirs,files in os.walk(file_dir):for file_path in glob.glob(os.path.join(root,'*.csv')):if '39526A-905' in file_path and 'Add' not in file_path:print(file_path)xl = file_pathcount += 1c = countm = c - 1print(f"共发现 {m} 个文件！")#print(files_chose)try:last_part = extract_last_part_of_path(xl)#print(last_part)  #filename为文件名filename = xl csv_data = read_csv_file(filename)df = csv_dataif user_input == "S1":df = df.iloc[:,1:4]df = df.astype(float)#print(df)#反射率标准#**********************************************************wave1start = 430wave1end = 530wave1standard = 1.5wave2start = 550wave2end = 780wave2standard = 1.1combinedwave1 = f'{wave1start},{wave1end},{wave1standard}'combinedwave2 = f'{wave2start},{wave2end},{wave2standard}'#print(combinedwave1)ws.cell(row = 2,column = 1).value = combinedwave1ws.cell(row = 3,column = 1).value = combinedwave2#***********************************************************#计算判定区域for n in range(0,3):cal1 = df.iloc[wave1start-380+2:wave1end-380+2,n].max()  cal2 = df.iloc[wave2start-380+2:wave2end-380+2,n].max()  if cal1 <= wave1standard and cal2 <= wave2standard :ws.cell(row = 1,column = n+2+3*m).value = "OK"ws.cell(row = 1,column = n+2+3*m).alignment = alignmentelse:ws.cell(row = 1,column = n+2+3*m).value = "NG"ws.cell(row = 1,column = n+2+3*m).alignment = alignment#print(ave1,ave2)ws.cell(row = 2,column = n+2+3*m).value = cal1ws.cell(row = 3,column = n+2+3*m).value = cal2continue#文件名输出区域ws.cell(row = 5,column = 2+3*m).value = last_part                     #标题输出区域(data1~data6)for k,title in enumerate(titlesS1,2):                    ws.cell(row = 6,column = k+3*m).value = titlews.cell(row = 6,column = k+3*m).alignment = alignmentcontinue#源数据输出区域for i ,row in df.iterrows():#print(i)for j ,value in enumerate(row,start=1):ws.cell(row = i+7,column = j+1+3*m).value = valueelif user_input == "S2":df = df.iloc[:,4:7]df = df.astype(float)#print(df)#反射率标准#**********************************************************wave1start = 430wave1end = 530wave1standard = 1.5wave2start = 550wave2end = 780wave2standard = 1.1combinedwave1 = f'{wave1start},{wave1end},{wave1standard}'combinedwave2 = f'{wave2start},{wave2end},{wave2standard}'#print(combinedwave1)ws.cell(row = 2,column = 1).value = combinedwave1ws.cell(row = 3,column = 1).value = combinedwave2#***********************************************************#计算判定区域for n in range(0,3):cal1 = df.iloc[wave1start-380+2:wave1end-380+2,n].max()  cal2 = df.iloc[wave2start-380+2:wave2end-380+2,n].max()  if cal1 <= wave1standard and cal2 <= wave2standard :ws.cell(row = 1,column = n+2+3*m).value = "OK"ws.cell(row = 1,column = n+2+3*m).alignment = alignmentelse:ws.cell(row = 1,column = n+2+3*m).value = "NG"ws.cell(row = 1,column = n+2+3*m).alignment = alignment#print(ave1,ave2)ws.cell(row = 2,column = n+2+3*m).value = cal1ws.cell(row = 3,column = n+2+3*m).value = cal2continue#文件名输出区域ws.cell(row = 5,column = 2+3*m).value = last_part                     #标题输出区域(data1~data6)for k,title in enumerate(titlesS2,2):                    ws.cell(row = 6,column = k+3*m).value = titlews.cell(row = 6,column = k+3*m).alignment = alignmentcontinue#源数据输出区域for i ,row in df.iterrows():#print(i)for j ,value in enumerate(row,start=1):ws.cell(row = i+7,column = j+1+3*m).value = valueelif user_input == "S1S2":df = df.iloc[:,1:7]df = df.astype(float)#print(df)#反射率标准#**********************************************************wave1start = 430wave1end = 530wave1standard = 1.5wave2start = 550wave2end = 780wave2standard = 1.1combinedwave1 = f'{wave1start},{wave1end},{wave1standard}'combinedwave2 = f'{wave2start},{wave2end},{wave2standard}'#print(combinedwave1)ws.cell(row = 2,column = 1).value = combinedwave1ws.cell(row = 3,column = 1).value = combinedwave2#***********************************************************#计算判定区域for n in range(0,6):cal1 = df.iloc[wave1start-380+2:wave1end-380+2,n].max()  cal2 = df.iloc[wave2start-380+2:wave2end-380+2,n].max()  if cal1 <= wave1standard and cal2 <= wave2standard :ws.cell(row = 1,column = n+2+6*m).value = "OK"ws.cell(row = 1,column = n+2+6*m).alignment = alignmentelse:ws.cell(row = 1,column = n+2+6*m).value = "NG"ws.cell(row = 1,column = n+2+6*m).alignment = alignment#print(ave1,ave2)ws.cell(row = 2,column = n+2+6*m).value = cal1ws.cell(row = 3,column = n+2+6*m).value = cal2continue#文件名输出区域ws.cell(row = 5,column = 2+6*m).value = last_part                     #标题输出区域(data1~data6)for k,title in enumerate(titlesS1S2,2):                    ws.cell(row = 6,column = k+6*m).value = titlews.cell(row = 6,column = k+6*m).alignment = alignmentcontinue#源数据输出区域for i ,row in df.iterrows():#print(i)for j ,value in enumerate(row,start=1):ws.cell(row = i+7,column = j+1+6*m).value = valueelse:print("非指定指令")except Exception as e:print(e)output_file_path=os.path.join(current_path,'SummaryoutS1S2.xlsx')wb.save(output_file_path)