ai_project_v1/b3dm/data_3dtiles_manager.py
2026-01-14 11:37:35 +08:00

566 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from minio import Minio
from minio.error import S3Error
import json
import os
import numpy as np
from urllib.parse import urlparse
import hashlib
import time
import re
import pickle
from datetime import datetime
class MinIO3DTilesManager:
def __init__(self, endpoint_url, access_key, secret_key, secure=False,
mapping_file="minio_path_mapping.pkl"):
"""
初始化MinIO客户端
Args:
endpoint_url: MinIO服务地址 (如: 222.212.85.86:9001)
access_key: 访问密钥
secret_key: 秘密密钥
secure: 是否使用HTTPS
mapping_file: 路径映射文件名
"""
if endpoint_url.startswith('http://'):
endpoint_url = endpoint_url.replace('http://', '')
elif endpoint_url.startswith('https://'):
endpoint_url = endpoint_url.replace('https://', '')
secure = True
self.endpoint_url = endpoint_url
self.access_key = access_key
self.secret_key = secret_key
self.minio_client = Minio(
endpoint_url,
access_key=access_key,
secret_key=secret_key,
secure=secure
)
# 获取脚本所在目录
self.script_dir = os.path.dirname(os.path.abspath(__file__))
# 映射文件路径
self.mapping_file = os.path.join(self.script_dir, mapping_file)
# 加载现有的路径映射
self.path_mapping = self.load_path_mapping()
def load_path_mapping(self):
"""加载路径映射数据"""
if os.path.exists(self.mapping_file):
try:
with open(self.mapping_file, 'rb') as f:
mapping = pickle.load(f)
return mapping
except Exception as e:
return {}
else:
return {}
def save_path_mapping(self):
"""保存路径映射数据"""
try:
with open(self.mapping_file, 'wb') as f:
pickle.dump(self.path_mapping, f)
return True
except Exception as e:
return False
def get_cache_key(self, tileset_url, save_dir=None):
"""生成缓存键"""
# 基于URL和保存目录生成缓存键
cache_data = f"{tileset_url}|{save_dir}"
return hashlib.md5(cache_data.encode()).hexdigest()
def get_cached_tileset_info(self, tileset_url, save_dir=None):
"""获取缓存的tileset信息"""
cache_key = self.get_cache_key(tileset_url, save_dir)
# 检查缓存映射中是否有这个tileset
for file_id, info in self.path_mapping.items():
if info.get('cache_key') == cache_key and info.get('is_tileset_root'):
# 检查入口文件是否存在
local_path = info.get('local_path')
if local_path and os.path.exists(local_path):
return local_path
return None
def update_tileset_cache(self, tileset_url, save_dir, local_path):
"""更新tileset缓存信息"""
cache_key = self.get_cache_key(tileset_url, save_dir)
# 将tileset根文件标记为缓存
entry_bucket, entry_path = self.parse_minio_url(tileset_url)
file_id = f"{entry_bucket}/{entry_path}"
if file_id in self.path_mapping:
self.path_mapping[file_id]['cache_key'] = cache_key
self.path_mapping[file_id]['is_tileset_root'] = True
self.path_mapping[file_id]['tileset_url'] = tileset_url
self.path_mapping[file_id]['save_dir'] = save_dir
self.path_mapping[file_id]['cache_time'] = datetime.now().isoformat()
def download_full_tileset(self, tileset_url, save_dir=None, region_filter=None, use_cache=True):
"""
下载完整的3D Tiles数据集支持缓存功能
Args:
tileset_url: MinIO上的tileset.json URL
save_dir: 本地保存目录
region_filter: 区域过滤器
use_cache: 是否使用缓存
Returns:
tuple: (success, result)
- success: True/False
- result: 如果success=True且use_cache=True返回本地路径否则返回True/False
"""
if save_dir is None:
save_dir = os.path.join(self.script_dir, "data_3dtiles")
# 清理保存目录名称
save_dir = self.clean_file_path(save_dir)
# 检查缓存:只需检查入口文件是否存在
if use_cache:
cached_path = self.get_cached_tileset_info(tileset_url, save_dir)
if cached_path:
# 入口文件存在,默认缓存完备
return True, cached_path
# 解析URL
entry_bucket, entry_path = self.parse_minio_url(tileset_url)
if not entry_bucket or not entry_path:
return False, "无法解析URL"
entry_dir = os.path.dirname(entry_path)
# 创建保存目录
os.makedirs(save_dir, exist_ok=True)
visited = set()
# 下载入口文件
entry_local_path = self.get_local_path(
entry_bucket, entry_path,
entry_bucket, entry_dir,
save_dir
)
success, result = self.download_file(entry_bucket, entry_path, entry_local_path)
if not success:
return False, f"入口文件下载失败: {result}"
entry_id = f"{entry_bucket}/{entry_path}"
visited.add(entry_id)
# 加载tileset数据
tileset_data = self.load_json_from_minio(entry_bucket, entry_path)
if not tileset_data or "root" not in tileset_data:
return False, "无效的tileset.json文件"
# 遍历下载所有文件
self.traverse_and_download_tileset(
tileset_data["root"],
entry_bucket,
entry_dir,
entry_bucket,
entry_dir,
save_dir,
region_filter,
None,
visited
)
# 更新缓存信息
self.update_tileset_cache(tileset_url, save_dir, entry_local_path)
# 保存路径映射
self.save_path_mapping()
if use_cache:
return True, entry_local_path
else:
return True, True
def get_tileset_local_path(self, tileset_url, save_dir=None):
"""
获取已缓存的tileset本地路径
Args:
tileset_url: tileset的URL
save_dir: 保存目录
Returns:
str: 本地路径如果未缓存则返回None
"""
if save_dir is None:
save_dir = os.path.join(self.script_dir, "data_3dtiles")
return self.get_cached_tileset_info(tileset_url, save_dir)
def clear_tileset_cache(self, tileset_url=None, save_dir=None):
"""
清除tileset缓存
Args:
tileset_url: 指定要清除的tileset URL如果为None则清除所有
save_dir: 保存目录
Returns:
bool: 成功/失败
"""
try:
if tileset_url:
# 清除指定tileset的缓存
cache_key = self.get_cache_key(tileset_url, save_dir)
# 找出所有相关的缓存条目
to_remove = []
for file_id, info in self.path_mapping.items():
if info.get('cache_key') == cache_key:
to_remove.append(file_id)
# 删除这些条目
for file_id in to_remove:
del self.path_mapping[file_id]
print(f"已清除tileset缓存: {tileset_url}")
else:
# 清除所有缓存
self.path_mapping = {}
if os.path.exists(self.mapping_file):
os.remove(self.mapping_file)
print("已清除所有缓存")
return True
except Exception as e:
return False
# 以下是原有的辅助方法
def clean_filename(self, filename):
"""清理文件名中的特殊字符"""
if not filename:
return ""
cleaned = re.sub(r'[<>:"/\\|?*\x00-\x1F]', '_', filename)
cleaned = re.sub(r'_+', '_', cleaned)
cleaned = cleaned.strip(' _')
return cleaned
def parse_minio_url(self, url):
"""解析MinIO URL"""
if url.startswith('http://') or url.startswith('https://'):
parsed = urlparse(url)
path = parsed.path.lstrip('/')
parts = path.split('/', 1)
if len(parts) == 2:
bucket, key = parts
else:
bucket = parts[0]
key = ""
return bucket, key
else:
parts = url.split('/', 1)
if len(parts) == 2:
bucket, key = parts
else:
bucket = parts[0]
key = ""
return bucket, key
def download_file(self, bucket_name, object_name, file_path):
"""从MinIO下载文件"""
try:
os.makedirs(os.path.dirname(file_path), exist_ok=True)
# 清理文件名
clean_file_path = self.clean_file_path(file_path)
# 检查是否已下载
file_id = f"{bucket_name}/{object_name}"
if file_id in self.path_mapping:
mapped_path = self.path_mapping[file_id]['local_path']
if os.path.exists(mapped_path):
return True, mapped_path
# 下载文件
self.minio_client.fget_object(
bucket_name,
object_name,
clean_file_path
)
# 更新路径映射
self.path_mapping[file_id] = {
'local_path': clean_file_path,
'bucket': bucket_name,
'object': object_name,
'download_time': datetime.now().isoformat(),
'size': os.path.getsize(clean_file_path)
}
return True, clean_file_path
except S3Error as e:
return False, str(e)
except Exception as e:
return False, str(e)
def clean_file_path(self, file_path):
"""清理文件路径中的所有特殊字符"""
dir_name = os.path.dirname(file_path)
file_name = os.path.basename(file_path)
if dir_name:
dir_parts = dir_name.split(os.sep)
cleaned_parts = []
for part in dir_parts:
cleaned_part = self.clean_filename(part)
if cleaned_part:
cleaned_parts.append(cleaned_part)
cleaned_dir = os.sep.join(cleaned_parts)
else:
cleaned_dir = ""
cleaned_file = self.clean_filename(file_name)
if cleaned_dir:
cleaned_path = os.path.join(cleaned_dir, cleaned_file)
else:
cleaned_path = cleaned_file
return cleaned_path
def load_json_from_minio(self, bucket_name, object_name):
"""从MinIO加载JSON文件"""
try:
self.minio_client.stat_object(bucket_name, object_name)
response = self.minio_client.get_object(bucket_name, object_name)
content = response.read().decode('utf-8')
response.close()
response.release_conn()
return json.loads(content)
except S3Error as e:
return None
except Exception as e:
return None
def get_local_path(self, bucket_name, object_name, base_bucket, base_object, save_dir):
"""生成保持目录结构的本地路径"""
clean_bucket = self.clean_filename(bucket_name)
bucket_dir = clean_bucket
if bucket_name == base_bucket and base_object:
base_dir = os.path.dirname(base_object)
if base_dir:
if object_name.startswith(base_dir):
relative_path = object_name[len(base_dir):].lstrip('/\\')
else:
relative_path = object_name
else:
relative_path = object_name
else:
relative_path = object_name
if relative_path:
path_parts = relative_path.split('/')
cleaned_parts = []
for part in path_parts:
cleaned_part = self.clean_filename(part)
if cleaned_part:
cleaned_parts.append(cleaned_part)
if cleaned_parts:
cleaned_relative = '/'.join(cleaned_parts)
local_path = os.path.join(save_dir, bucket_dir, cleaned_relative)
else:
local_path = os.path.join(save_dir, bucket_dir)
else:
local_path = os.path.join(save_dir, bucket_dir)
return os.path.normpath(local_path)
def traverse_and_download_tileset(self, tile_obj, current_bucket, current_dir,
base_bucket, base_dir, save_dir,
region_filter=None, parent_transform=None,
visited=None):
"""递归遍历并下载3D Tiles文件"""
if visited is None:
visited = set()
current_transform = parent_transform
if "transform" in tile_obj:
tile_mat = tile_obj["transform"]
if current_transform is None:
current_transform = tile_mat
else:
mat1 = np.array(current_transform).reshape(4, 4)
mat2 = np.array(tile_mat).reshape(4, 4)
combined_mat = np.dot(mat1, mat2).flatten().tolist()
current_transform = combined_mat
skip_current_tile = False
if region_filter and "boundingVolume" in tile_obj:
if not region_filter.check_tile_bounding_volume(tile_obj["boundingVolume"]):
skip_current_tile = True
if not skip_current_tile and "content" in tile_obj and "uri" in tile_obj["content"]:
tile_uri = tile_obj["content"]["uri"]
file_bucket = current_bucket
file_path = ""
if tile_uri.startswith('http://') or tile_uri.startswith('https://'):
parsed_bucket, parsed_path = self.parse_minio_url(tile_uri)
if parsed_bucket:
file_bucket = parsed_bucket
file_path = parsed_path
else:
if current_dir:
file_path = os.path.join(current_dir, tile_uri).replace('\\', '/')
else:
file_path = tile_uri
file_path = file_path.lstrip('/')
file_id = f"{file_bucket}/{file_path}"
if file_id not in visited:
print(f"下载文件:{file_id}")
visited.add(file_id)
local_path = self.get_local_path(
file_bucket, file_path,
base_bucket, base_dir,
save_dir
)
self.download_file(file_bucket, file_path, local_path)
if file_path.lower().endswith('.json'):
sub_tileset = self.load_json_from_minio(file_bucket, file_path)
if sub_tileset and "root" in sub_tileset:
sub_dir = os.path.dirname(file_path) if file_path else ""
self.traverse_and_download_tileset(
sub_tileset["root"],
file_bucket,
sub_dir,
base_bucket,
base_dir,
save_dir,
region_filter,
current_transform,
visited
)
if "children" in tile_obj:
for child_tile in tile_obj["children"]:
self.traverse_and_download_tileset(
child_tile,
current_bucket,
current_dir,
base_bucket,
base_dir,
save_dir,
region_filter,
current_transform,
visited
)
def upload_file(self, bucket_name, object_name, file_path):
"""上传文件到MinIO"""
try:
if not os.path.exists(file_path):
return False, f"文件不存在: {file_path}"
file_size = os.path.getsize(file_path)
self.minio_client.fput_object(bucket_name, object_name, file_path)
return True, f"{bucket_name}/{object_name}"
except S3Error as e:
return False, f"MinIO上传错误: {e}"
except Exception as e:
return False, f"上传失败: {str(e)}"
def upload_directory(self, bucket_name, local_dir, remote_prefix=""):
"""上传目录到MinIO"""
if not os.path.exists(local_dir):
return [], [f"目录不存在: {local_dir}"]
uploaded_files = []
failed_files = []
for root, dirs, files in os.walk(local_dir):
for file in files:
local_path = os.path.join(root, file)
rel_path = os.path.relpath(local_path, local_dir)
if remote_prefix:
remote_path = os.path.join(remote_prefix, rel_path).replace('\\', '/')
else:
remote_path = rel_path.replace('\\', '/')
success, message = self.upload_file(bucket_name, remote_path, local_path)
if success:
uploaded_files.append(remote_path)
else:
failed_files.append((remote_path, message))
return uploaded_files, failed_files
def check_and_create_bucket(self, bucket_name):
"""检查并创建bucket"""
try:
if not self.minio_client.bucket_exists(bucket_name):
self.minio_client.make_bucket(bucket_name)
return True, f"创建bucket: {bucket_name}"
return True, f"bucket已存在: {bucket_name}"
except S3Error as e:
return False, f"创建bucket失败: {e}"
# 使用示例
if __name__ == "__main__":
# 配置参数
ENDPOINT_URL = "222.212.85.86:9000"
ACCESS_KEY = "WuRenJi"
SECRET_KEY = "WRJ@2024"
# 初始化管理器
manager = MinIO3DTilesManager(
endpoint_url=ENDPOINT_URL,
access_key=ACCESS_KEY,
secret_key=SECRET_KEY,
secure=False
)
# 使用缓存下载tileset
tileset_url = "http://222.212.85.86:9000/300bdf2b-a150-406e-be63-d28bd29b409f/model/石棉0908/terra_b3dms/tileset.json"
# 第一次下载(会下载到本地)
success, result = manager.download_full_tileset(tileset_url, use_cache=True)
if success:
print(f"下载成功,本地路径: {result}")
# 第二次下载相同URL直接从缓存返回
success, result = manager.download_full_tileset(tileset_url, use_cache=True)
if success:
print(f"从缓存获取,本地路径: {result}")
# 强制重新下载(忽略缓存)
success, result = manager.download_full_tileset(tileset_url, use_cache=False)
if success:
print("强制重新下载成功")
# 获取缓存的本地路径
local_path = manager.get_tileset_local_path(tileset_url)
if local_path:
print(f"缓存的本地路径: {local_path}")