import os from smbclient import ( register_session, listdir, scandir, stat, makedirs, # 递归创建目录 open_file ) from datetime import datetime import numpy as np import cv2 import pandas as pd import io class SMBScanner: def __init__(self, ip, username, password, domain=''): self.ip = ip self.username = username self.password = password self.domain = domain def connect(self): """连接 SMB 共享""" try: register_session( self.ip, username=self.username, password=self.password ) print(f"成功连接到 {self.ip}") return True except Exception as e: print(f"连接失败: {e}") return False def directory_exists(self, full_path): """ 检查目录是否存在 Args: full_path: 全路径 Returns: bool: 目录是否存在 """ if not self.connect(): return False try: # 尝试获取目录信息 dir_stat = stat(full_path) return True except Exception as e: print(f"未知错误: {e}") return False def read_excel(self, smb_path, sheet_name=0): """读取Excel文件""" if not self.connect(): return False try: with open_file(smb_path, mode='rb') as smb_file: file_content = smb_file.read() excel_data = io.BytesIO(file_content) df = pd.read_excel(excel_data, sheet_name=sheet_name) return df except Exception as e: print(f"读取Excel失败: {e}") return None def process_all_rows(self, df): """ 处理所有行数据 """ if df is None or df.empty: print("没有数据可处理") return print("开始处理每行数据:") print("=" * 60) results = [] for row_number, (index, row) in enumerate(df.iterrows(), 1): # print(f"\n处理第 {row_number} 行:") # print("-" * 40) # 显示行数据 for col_name in df.columns: value = row[col_name] # print(f" {col_name}: {value}") # 处理逻辑(根据实际需求修改) processed_row = { 'row_number': row_number, 'original_index': index, 'data': row.to_dict(), 'summary': f"处理了 {len(df.columns)} 个字段" } results.append(processed_row) # 进度显示 if row_number % 10 == 0 or row_number == len(df): print(f"\n 进度: {row_number}/{len(df)} ({row_number/len(df)*100:.1f}%)") # print("\n" + "=" * 60) print(f"处理完成!共处理 {len(results)} 行数据") return results def get_smb_images(self, full_path): """SMB 图片文件获取""" image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff'] image_files = [] try: for entry in scandir(full_path): if entry.is_file(): _, ext = os.path.splitext(entry.name) if ext.lower() in image_extensions: image_files.append(entry.path) elif entry.is_dir(): imgs = self.get_smb_images(entry.path) image_files.extend(imgs) except Exception as e: print(f"错误: {e}") return image_files def build_full_path(self, share_path, file_path): """构建完整的 SMB 路径""" # 清理路径中的多余斜杠 share_path = share_path.strip('\\') file_path = file_path.lstrip('\\') return f"\\\\{self.ip}\\{share_path}\\{file_path}" def read_txt_by_line(self, full_path): """逐行读取,适合大文件""" if not self.connect(): return None print(f"读取 TXT 文件: {full_path}") try: with open_file(full_path, mode='rb') as file_obj: content_bytes = file_obj.read() # 使用 StringIO 逐行处理 text_content = content_bytes.decode('utf-8', errors='ignore') string_io = io.StringIO(text_content) lines = [] line_number = 0 while True: line = string_io.readline() if not line: # 读到文件末尾 break line_number += 1 line = line.strip() # print(f"行 {line_number}: {line}") lines.append(line) print(f"总共读取 {line_number} 行") return lines except Exception as e: print(f"读取文件时出错: {e}") return None def read_img_file(self, full_path): """读取文件并返回 OpenCV 图像""" if not self.connect(): return None print(f"读取文件: {full_path}") file_obj = None try: # 以二进制模式读取文件 file_obj = open_file(full_path, mode='rb') content = b"" # 分块读取文件内容 while True: chunk = file_obj.read(8192) # 8KB 块 if not chunk: break content += chunk print(f"成功读取 {len(content)} 字节") # 解码图像 if len(content) == 0: print("文件为空") return None image_array = np.frombuffer(content, np.uint8) image = cv2.imdecode(image_array, cv2.IMREAD_COLOR) if image is None: print("图像解码失败 - 可能不是有效的图像文件") return None print(f"图像解码成功: {image.shape}") return image except Exception as e: print(f"读取文件失败: {e}") return None finally: if file_obj: file_obj.close() def writeFile(self, share_path, file_path, data, chunk_size=8192): """写入文件到 SMB 共享""" if not self.connect(): return False full_path = self.build_full_path(share_path, file_path) file_obj = None try: # 确保目录存在 dir_path = os.path.dirname(full_path) try: makedirs(dir_path, exist_ok=True) except: pass # 目录可能已存在 file_obj = open_file(full_path, mode='wb') if isinstance(data, bytes): total_size = len(data) written = 0 for i in range(0, total_size, chunk_size): chunk = data[i:i + chunk_size] file_obj.write(chunk) written += len(chunk) print(f"写入进度: {written}/{total_size} 字节 ({written/total_size*100:.1f}%)") elif hasattr(data, '__iter__'): total_written = 0 for chunk in data: if isinstance(chunk, str): chunk = chunk.encode('utf-8') file_obj.write(chunk) total_written += len(chunk) print(f"已写入: {total_written} 字节") else: file_obj.write(bytes(data)) print(f"文件写入完成: {full_path}") return True except Exception as e: print(f"写入文件失败: {e}") return False finally: if file_obj: file_obj.close() def writeImageToFile(self, share_path, file_path, image, image_format='.jpg', quality=95): """将 OpenCV 图像写入 SMB 文件""" if not self.connect(): return False full_path = f"{file_path}{image_format}" file_obj = None try: if image_format.lower() == '.jpg': encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality] success, encoded_image = cv2.imencode(image_format, image, encode_params) else: success, encoded_image = cv2.imencode(image_format, image) if not success: print("图像编码失败") return False image_data = encoded_image.tobytes() return self.writeFile(share_path, f"{file_path}{image_format}", image_data) except Exception as e: print(f"写入图像失败: {e}") return False def _ensure_remote_directory(self, share_name, remote_dir): """确保远程目录存在""" if not remote_dir: return try: # 构建完整远程路径 full_remote_path = self.build_full_path(share_name, remote_dir) # 使用 makedirs 递归创建目录(如果不存在) makedirs(full_remote_path, exist_ok=True) print(f"确保远程目录存在: {remote_dir}") except Exception as e: print(f"创建远程目录失败: {e}") raise def upload_directory(self, local_dir, share_name, remote_dir="", overwrite=True): """ 将本地目录推送到远程共享目录 """ if not self.connect(): return False print(f"开始上传目录: {local_dir} -> {share_name}/{remote_dir}") if not os.path.exists(local_dir): print(f"本地目录不存在: {local_dir}") return False try: # 确保远程目录存在 self._ensure_remote_directory(share_name, remote_dir) # 递归上传目录内容 success = self._upload_directory_recursive(local_dir, share_name, remote_dir, overwrite) if success: print("目录上传完成") else: print("目录上传过程中出现错误") return success except Exception as e: print(f"上传目录失败: {e}") return False def _upload_directory_recursive(self, local_path, share_name, remote_path, overwrite): """递归上传目录内容""" try: success = True for item_name in os.listdir(local_path): local_item_path = os.path.join(local_path, item_name) remote_item_path = f"{remote_path}/{item_name}" if remote_path else item_name if os.path.isdir(local_item_path): # 处理子目录 print(f"上传子目录: {item_name}") # 确保远程子目录存在 self._ensure_remote_directory(share_name, remote_item_path) # 递归上传子目录 sub_success = self._upload_directory_recursive(local_item_path, share_name, remote_item_path, overwrite) if not sub_success: success = False else: # 上传文件 file_success = self._upload_single_file(local_item_path, share_name, remote_item_path, overwrite) if not file_success: success = False return success except Exception as e: print(f"上传目录内容失败 {local_path}: {e}") return False def _upload_single_file(self, local_file_path, share_name, remote_file_path, overwrite): """上传单个文件""" file_obj = None try: # 构建远程完整路径 full_remote_path = self.build_full_path(share_name, remote_file_path) # 检查文件是否已存在 if not overwrite: try: stat(full_remote_path) print(f"文件已存在,跳过: {remote_file_path}") return True except FileNotFoundError: # 文件不存在,继续上传 pass # 上传文件 print(f"上传文件: {os.path.basename(local_file_path)}") # 读取本地文件 with open(local_file_path, 'rb') as local_file: local_content = local_file.read() # 写入远程文件 with open_file(full_remote_path, mode='wb') as remote_file: remote_file.write(local_content) file_size = len(local_content) print(f"文件上传成功: {remote_file_path} ({file_size} 字节)") return True except Exception as e: print(f"上传文件失败 {local_file_path}: {e}") return False def upload_file(self, local_file_path, share_name, remote_file_path, overwrite=True): """ 上传单个文件到远程共享目录 """ if not self.connect(): return False print(f"上传文件: {local_file_path} -> {share_name}/{remote_file_path}") file_obj = None try: # 构建远程完整路径 full_remote_path = self.build_full_path(share_name, remote_file_path) # 检查文件是否已存在 if not overwrite: try: stat(full_remote_path) print(f"文件已存在,跳过: {remote_file_path}") return True except FileNotFoundError: # 文件不存在,继续上传 pass # 以二进制模式读取本地文件 with open(local_file_path, 'rb') as local_file: content = b"" # 分块读取文件内容 while True: chunk = local_file.read(8192) # 8KB 块 if not chunk: break content += chunk print(f"成功读取 {len(content)} 字节") if len(content) == 0: print("文件为空") return False # 写入远程文件 with open_file(full_remote_path, mode='wb') as remote_file: remote_file.write(content) print(f"文件上传成功") return True except Exception as e: print(f"上传文件失败: {e}") return False def find_folders_by_name(self, share_path, folder_name, start_dir="", max_depth=10): """专门查找文件夹""" return self.find_items_by_name( share_path=share_path, target_name=folder_name, item_type="folder", start_dir=start_dir, max_depth=max_depth ) def find_files_by_name(self, share_path, file_name, start_dir="", max_depth=10): """专门查找文件""" return self.find_items_by_name( share_path=share_path, target_name=file_name, item_type="file", start_dir=start_dir, max_depth=max_depth ) def find_items_by_name(self, share_path, target_name, item_type="both", start_dir="", max_depth=10): """ 递归查找指定名称的文件夹和/或文件 Args: share_path: 共享名称 target_name: 目标名称(支持通配符 * 和 ?) item_type: 查找类型 - "folder", "file", "both" start_dir: 起始目录 max_depth: 最大搜索深度 Returns: list: 找到的完整路径列表 """ if not self.connect(): return [] found_paths = [] start_path = self.build_full_path(share_path, start_dir) try: self._search_recursive( share_path=share_path, current_path=start_path, target_name=target_name, item_type=item_type, found_paths=found_paths, current_depth=0, max_depth=max_depth ) except Exception as e: print(f"搜索过程中出错: {e}") return found_paths def _search_recursive(self, share_path, current_path, target_name, item_type, found_paths, current_depth, max_depth): """递归搜索文件夹和文件""" if current_depth > max_depth: return try: for entry in scandir(current_path): try: # 检查文件夹 if entry.is_dir(): if self._is_match(entry.name, target_name) and item_type in ["both", "folder"]: found_paths.append(entry.path) print(f"找到目标文件夹: {entry.path}") # 递归搜索子目录 self._search_recursive( share_path=share_path, current_path=entry.path, target_name=target_name, item_type=item_type, found_paths=found_paths, current_depth=current_depth + 1, max_depth=max_depth ) # 检查文件 elif entry.is_file(): if self._is_match(entry.name, target_name) and item_type in ["both", "file"]: found_paths.append(entry.path) print(f"找到目标文件: {entry.path}") except Exception as e: print(f"处理条目 {entry.path} 时出错: {e}") except Exception as e: print(f"搜索目录 {current_path} 时出错: {e}") def _is_match(self, name, pattern): """ 检查名称是否匹配模式(支持简单通配符) Args: name: 实际名称 pattern: 匹配模式(支持 * 和 ?) Returns: bool: 是否匹配 """ # 如果没有通配符,直接比较 if '*' not in pattern and '?' not in pattern: return name.lower() == pattern.lower() # 通配符匹配 import fnmatch return fnmatch.fnmatch(name.lower(), pattern.lower()) def list_directory(self, share_path, dir, recursive=False, max_depth=3): """列出目录内容""" if not self.connect(): return [] try: full_path = f"\\\\{self.ip}\\{share_path}\\{dir}" print(f"开始遍历: {full_path}") result = [] self._walk_directory(full_path, recursive, max_depth, 0, result) except Exception as e: print(f"遍历失败: {e}") return result def _walk_directory(self, path, recursive, max_depth, current_depth, result): """递归遍历目录""" if current_depth > max_depth: return try: for entry in scandir(path): try: file_stat = stat(entry.path) indent = " " * current_depth # 创建条目信息字典 item = { 'name': entry.name, 'path': entry.path, 'depth': current_depth, 'indent': indent, 'is_dir': entry.is_dir(), 'size': file_stat.st_size if not entry.is_dir() else 0, 'modified_time': datetime.fromtimestamp(file_stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S') } if entry.is_dir(): # print(f"{indent}文件夹:{entry.name}/") result.append(item) if recursive and current_depth < max_depth: sub_items = self._walk_directory( entry.path, recursive, max_depth, current_depth + 1 ) result.extend(sub_items) else: file_size = self._format_size(file_stat.st_size) mod_time = datetime.fromtimestamp( file_stat.st_mtime ).strftime('%Y-%m-%d %H:%M:%S') # print(f"{indent}文件:{entry.name} [{file_size}] [{mod_time}]") item['formatted_size'] = file_size result.append(item) except Exception as e: print(f"{indent} 无法访问: {entry.name} - {e}") except Exception as e: print(f"无法读取目录 {path}: {e}") return result def _format_size(self, size_bytes): """格式化文件大小""" if size_bytes == 0: return "0 B" size_names = ["B", "KB", "MB", "GB", "TB"] i = 0 while size_bytes >= 1024 and i < len(size_names) - 1: size_bytes /= 1024.0 i += 1 return f"{size_bytes:.1f} {size_names[i]}" def get_file_info(self, share_path, file_path): """获取文件详细信息""" if not self.connect(): return None try: full_path = f"\\\\{self.ip}\\{share_path}\\{file_path}" file_stat = stat(full_path) return { 'name': os.path.basename(file_path), 'path': full_path, 'size': file_stat.st_size, 'size_formatted': self._format_size(file_stat.st_size), 'create_time': datetime.fromtimestamp(file_stat.st_ctime), 'modify_time': datetime.fromtimestamp(file_stat.st_mtime), 'access_time': datetime.fromtimestamp(file_stat.st_atime), 'is_dir': False # 需要额外判断 } except Exception as e: print(f"获取文件信息失败: {e}") return None def display_image(self, image, window_name="Image"): """ 显示图像 Args: image: OpenCV图像 window_name: 窗口名称 """ # 创建窗口 cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) # 调整窗口大小适应屏幕 screen_width = 1920 # 可根据实际屏幕调整 screen_height = 1080 img_height, img_width = image.shape[:2] # 计算缩放比例 scale = min(screen_width / img_width, screen_height / img_height, 1.0) if scale < 1.0: new_width = int(img_width * scale) new_height = int(img_height * scale) image = cv2.resize(image, (new_width, new_height)) print(f"图像已缩放: {img_width}x{img_height} -> {new_width}x{new_height}") # 显示图像 cv2.imshow(window_name, image) print("图像显示中... 按任意键关闭窗口") # 等待按键 cv2.waitKey(0) cv2.destroyAllWindows() print("窗口已关闭") # 从传入的路径中提取ip,共享目录,目标访问目录 def get_conf(zip_url, user_name, pwd) : zip_url = zip_url.replace('\\\\', '/') zip_url = zip_url.replace('\\', '/') if zip_url.startswith("/"): zip_url = zip_url.replace('/', '', 1) parts = zip_url.split('/') if len(parts) < 2 : print(f"传入的共享目录格式错误: {zip_url}") return "", "fail" dir = '' if len(parts) > 2: new_parts = parts[2:] dir = '/'.join(new_parts) # 配置信息 config = { 'ip': parts[0], 'username': user_name, 'password': pwd, 'domain': '', # 工作组留空 'share': parts[1], 'dir': dir } return config def get_scanner(zip_url, user_name, pwd) : config = get_conf(zip_url, user_name, pwd) # 创建扫描器 scanner = SMBScanner( ip=config['ip'], username=config['username'], password=config['password'], domain=config['domain'] ) return scanner # 路线编码 -> 路线信息 def get_road_dict(dir,user_name,pwd) : config = get_conf(dir, user_name, pwd) scanner = get_scanner(dir, user_name=user_name, pwd=pwd) found_paths = scanner.find_files_by_name( share_path=config['share'], file_name='每公里指标明细表*.xls', start_dir=config['dir'], max_depth=1 ) print(f"\n找到 {len(found_paths)} 个 'fileindex.txt' 文件:") for i, path in enumerate(found_paths, 1): print(f"{i}. {path}") road_dict = {} if len(found_paths) > 0 : df = scanner.read_excel(found_paths[0]) rows = scanner.process_all_rows(df) for i, row in enumerate(rows, 1): data = row['data'] if pd.notna(data['线路编码']) : up_or_down = 'A' if data['方向(上行/下行)'] == '下行' : up_or_down = 'B' key = f"{data['线路编码']}{str(int(data['区划代码']))}{up_or_down}" if road_dict.get(key) : road_dict[key].append(row) else : road_dict[key] = [row] # 路线编码 -> 路线信息 return road_dict # filename -> 桩号 def get_pile_dict(dir,user_name,pwd) : config = get_conf(dir, user_name, pwd) scanner = get_scanner(dir, user_name=user_name, pwd=pwd) found_paths = scanner.find_files_by_name( share_path=config['share'], file_name='fileindex.txt', start_dir=config['dir'], max_depth=8 ) print(f"\n找到 {len(found_paths)} 个 'fileindex.txt' 文件:") for i, path in enumerate(found_paths, 1): print(f"{i}. {path}") pile_dict = {} if len(found_paths) > 0 : lines = scanner.read_txt_by_line(full_path=found_paths[0]) for i, line in enumerate(lines, 1): parts = line.strip().split("->") if len(parts)>=4: pile_dict[parts[3]]=parts # filename -> 桩号 return pile_dict def write_to_excel_pandas(data, filename, sheet_name='Sheet1'): """ 使用 pandas 将数据写入 Excel Args: data: 数据列表,每个元素是一行 filename: 输出文件名 sheet_name: 工作表名称 """ # 创建 DataFrame df = pd.DataFrame(data) # 写入 Excel df.to_excel(filename, sheet_name=sheet_name, index=False, header=False) print(f"数据已写入 {filename}") def main(): # 配置信息 config = { 'ip': '192.168.110.114', 'username': 'administrator', 'password': 'abc@1234', 'domain': '', # 工作组留空 'share': 'share_File', 'dir': '西南计算机' } # 创建扫描器 scanner = SMBScanner( ip=config['ip'], username=config['username'], password=config['password'], domain=config['domain'] ) # 遍历共享目录 # scanner.list_directory( # share_path=config['share'], # dir=config['dir'], # recursive=True, # 递归遍历 # max_depth=9 # 最大深度 # ) # 读取文件 # full_path = scanner.build_full_path( # share_path=config['share'], # file_path= f"{config['dir']}/AA县/报送数据/图像类/CD45500155A/Images/20250508131651/01/20250508-131712-644.jpg" # ) # image = scanner.read_img_file(full_path=full_path) # scanner.display_image(image) # # 写入文件 # scanner.writeImageToFile( # share_path=config['share'], # file_path= f"{config['dir']}/AA县/报送数据/图像类_识别/CD45500155A/Images/20250508131651/01/20250508-131712-644.jpg", # image=image # ) # # 查找指定文件夹 报送数据 # found_paths = scanner.find_folders_by_name( # share_path=config['share'], # folder_name='报送数据' # ) # print(f"\n找到 {len(found_paths)} 个 '报送数据' 文件夹:") # for i, path in enumerate(found_paths, 1): # print(f"{i}. {path}") # # 查找指定文件 # found_paths = scanner.find_files_by_name( # share_path=config['share'], # file_name='每公里指标明细表*.xls' # ) # print(f"\n找到 {len(found_paths)} 个") # for i, path in enumerate(found_paths, 1): # print(f"{i}. {path}") # # 查找指定目录中的所有图片 # full_path = scanner.build_full_path(share_path=config['share'], file_path='西南计算机\\AA县\\报送数据') # imgPaths = scanner.get_smb_images(full_path) # for i, path in enumerate(imgPaths, 1): # print(f"{i}. {path}") # # 读取excel # full_path = scanner.build_full_path(share_path=config['share'], file_path='西南计算机\\AA县\\每公里指标明细表(北碚).xls') # df = scanner.read_excel(full_path) # rows = scanner.process_all_rows(df) # road_dict = {} # for i, row in enumerate(rows, 1): # data = row['data'] # if pd.notna(data['线路编码']) : # up_or_down = 'A' # if data['方向(上行/下行)'] == '下行' : # up_or_down = 'B' # key = f"{data['线路编码']}{str(int(data['区划代码']))}{up_or_down}" # if road_dict.get(key) : # road_dict[key].append(row) # else : # road_dict[key] = [row] # 路线编码 -> 路线信息 # 读取txt # found_paths = scanner.find_files_by_name( # share_path=config['share'], # file_name='fileindex.txt' # ) # print(f"\n找到 {len(found_paths)} 个 'fileindex.txt' 文件:") # for i, path in enumerate(found_paths, 1): # print(f"{i}. {path}") # 读取txt # lines = scanner.read_txt_by_line(full_path=found_paths[0]) # for i, line in enumerate(lines, 1): # print(f"{i}. {line}") # 上传目录 # output_dir = "D:/devForBdzlWork/ai-train_platform/predictions" # scanner.upload_directory(output_dir, config['share'], remote_dir="西南计算机/AA县/报送数据_识别") # get_pile_dict # input_dir = "192.168.110.114/share_File/西南计算机/AA县/报送数据/图像类/CD45500155A/Images" # pile_dict = get_pile_dict(input_dir, config['username'], config['password']) # print("-------------------------------------------") if __name__ == "__main__": main()