ai_project_v1/b3dm/data_3dtiles_manager.py

from minio import Minio
from minio.error import S3Error
import json
import os
import numpy as np
from urllib.parse import urlparse
import hashlib
import time
import re
import pickle
from datetime import datetime

class MinIO3DTilesManager:
    def __init__(self, endpoint_url, access_key, secret_key, secure=False,
                 mapping_file="minio_path_mapping.pkl"):
        """
        初始化MinIO客户端

        Args:
            endpoint_url: MinIO服务地址 (如: 222.212.85.86:9001)
            access_key: 访问密钥
            secret_key: 秘密密钥
            secure: 是否使用HTTPS
            mapping_file: 路径映射文件名
        """
        if endpoint_url.startswith('http://'):
            endpoint_url = endpoint_url.replace('http://', '')
        elif endpoint_url.startswith('https://'):
            endpoint_url = endpoint_url.replace('https://', '')
            secure = True

        self.endpoint_url = endpoint_url
        self.access_key = access_key
        self.secret_key = secret_key

        self.minio_client = Minio(
            endpoint_url,
            access_key=access_key,
            secret_key=secret_key,
            secure=secure
        )

        # 获取脚本所在目录
        self.script_dir = os.path.dirname(os.path.abspath(__file__))

        # 映射文件路径
        self.mapping_file = os.path.join(self.script_dir, mapping_file)

        # 加载现有的路径映射
        self.path_mapping = self.load_path_mapping()

    def load_path_mapping(self):
        """加载路径映射数据"""
        if os.path.exists(self.mapping_file):
            try:
                with open(self.mapping_file, 'rb') as f:
                    mapping = pickle.load(f)
                return mapping
            except Exception as e:
                return {}
        else:
            return {}

    def save_path_mapping(self):
        """保存路径映射数据"""
        try:
            with open(self.mapping_file, 'wb') as f:
                pickle.dump(self.path_mapping, f)
            return True
        except Exception as e:
            return False

    def get_cache_key(self, tileset_url, save_dir=None):
        """生成缓存键"""
        # 基于URL和保存目录生成缓存键
        cache_data = f"{tileset_url}|{save_dir}"
        return hashlib.md5(cache_data.encode()).hexdigest()

    def get_cached_tileset_info(self, tileset_url, save_dir=None):
        """获取缓存的tileset信息"""
        cache_key = self.get_cache_key(tileset_url, save_dir)

        # 检查缓存映射中是否有这个tileset
        for file_id, info in self.path_mapping.items():
            if info.get('cache_key') == cache_key and info.get('is_tileset_root'):
                # 检查入口文件是否存在
                local_path = info.get('local_path')
                if local_path and os.path.exists(local_path):
                    return local_path
        return None

    def update_tileset_cache(self, tileset_url, save_dir, local_path):
        """更新tileset缓存信息"""
        cache_key = self.get_cache_key(tileset_url, save_dir)

        # 将tileset根文件标记为缓存
        entry_bucket, entry_path = self.parse_minio_url(tileset_url)
        file_id = f"{entry_bucket}/{entry_path}"

        if file_id in self.path_mapping:
            self.path_mapping[file_id]['cache_key'] = cache_key
            self.path_mapping[file_id]['is_tileset_root'] = True
            self.path_mapping[file_id]['tileset_url'] = tileset_url
            self.path_mapping[file_id]['save_dir'] = save_dir
            self.path_mapping[file_id]['cache_time'] = datetime.now().isoformat()

    def download_full_tileset(self, tileset_url, save_dir=None, region_filter=None, use_cache=True):
        """
        下载完整的3D Tiles数据集，支持缓存功能

        Args:
            tileset_url: MinIO上的tileset.json URL
            save_dir: 本地保存目录
            region_filter: 区域过滤器
            use_cache: 是否使用缓存

        Returns:
            tuple: (success, result)
            - success: True/False
            - result: 如果success=True且use_cache=True，返回本地路径；否则返回True/False
        """
        if save_dir is None:
            save_dir = os.path.join(self.script_dir, "data_3dtiles")

        # 清理保存目录名称
        save_dir = self.clean_file_path(save_dir)

        # 检查缓存：只需检查入口文件是否存在
        if use_cache:
            cached_path = self.get_cached_tileset_info(tileset_url, save_dir)
            if cached_path:
                # 入口文件存在，默认缓存完备
                return True, cached_path

        # 解析URL
        entry_bucket, entry_path = self.parse_minio_url(tileset_url)
        if not entry_bucket or not entry_path:
            return False, "无法解析URL"

        entry_dir = os.path.dirname(entry_path)

        # 创建保存目录
        os.makedirs(save_dir, exist_ok=True)

        visited = set()

        # 下载入口文件
        entry_local_path = self.get_local_path(
            entry_bucket, entry_path,
            entry_bucket, entry_dir,
            save_dir
        )

        success, result = self.download_file(entry_bucket, entry_path, entry_local_path)
        if not success:
            return False, f"入口文件下载失败: {result}"

        entry_id = f"{entry_bucket}/{entry_path}"
        visited.add(entry_id)

        # 加载tileset数据
        tileset_data = self.load_json_from_minio(entry_bucket, entry_path)
        if not tileset_data or "root" not in tileset_data:
            return False, "无效的tileset.json文件"

        # 遍历下载所有文件
        self.traverse_and_download_tileset(
            tileset_data["root"],
            entry_bucket,
            entry_dir,
            entry_bucket,
            entry_dir,
            save_dir,
            region_filter,
            None,
            visited
        )

        # 更新缓存信息
        self.update_tileset_cache(tileset_url, save_dir, entry_local_path)

        # 保存路径映射
        self.save_path_mapping()

        if use_cache:
            return True, entry_local_path
        else:
            return True, True

    def get_tileset_local_path(self, tileset_url, save_dir=None):
        """
        获取已缓存的tileset本地路径

        Args:
            tileset_url: tileset的URL
            save_dir: 保存目录

        Returns:
            str: 本地路径，如果未缓存则返回None
        """
        if save_dir is None:
            save_dir = os.path.join(self.script_dir, "data_3dtiles")

        return self.get_cached_tileset_info(tileset_url, save_dir)

    def clear_tileset_cache(self, tileset_url=None, save_dir=None):
        """
        清除tileset缓存

        Args:
            tileset_url: 指定要清除的tileset URL，如果为None则清除所有
            save_dir: 保存目录

        Returns:
            bool: 成功/失败
        """
        try:
            if tileset_url:
                # 清除指定tileset的缓存
                cache_key = self.get_cache_key(tileset_url, save_dir)

                # 找出所有相关的缓存条目
                to_remove = []
                for file_id, info in self.path_mapping.items():
                    if info.get('cache_key') == cache_key:
                        to_remove.append(file_id)

                # 删除这些条目
                for file_id in to_remove:
                    del self.path_mapping[file_id]

                print(f"已清除tileset缓存: {tileset_url}")
            else:
                # 清除所有缓存
                self.path_mapping = {}
                if os.path.exists(self.mapping_file):
                    os.remove(self.mapping_file)
                print("已清除所有缓存")

            return True
        except Exception as e:
            return False

    # 以下是原有的辅助方法
    def clean_filename(self, filename):
        """清理文件名中的特殊字符"""
        if not filename:
            return ""
        cleaned = re.sub(r'[<>:"/\\|?*\x00-\x1F]', '_', filename)
        cleaned = re.sub(r'_+', '_', cleaned)
        cleaned = cleaned.strip(' _')
        return cleaned

    def parse_minio_url(self, url):
        """解析MinIO URL"""
        if url.startswith('http://') or url.startswith('https://'):
            parsed = urlparse(url)
            path = parsed.path.lstrip('/')
            parts = path.split('/', 1)
            if len(parts) == 2:
                bucket, key = parts
            else:
                bucket = parts[0]
                key = ""
            return bucket, key
        else:
            parts = url.split('/', 1)
            if len(parts) == 2:
                bucket, key = parts
            else:
                bucket = parts[0]
                key = ""
            return bucket, key

    def download_file(self, bucket_name, object_name, file_path):
        """从MinIO下载文件"""
        try:
            os.makedirs(os.path.dirname(file_path), exist_ok=True)

            # 清理文件名
            clean_file_path = self.clean_file_path(file_path)

            # 检查是否已下载
            file_id = f"{bucket_name}/{object_name}"
            if file_id in self.path_mapping:
                mapped_path = self.path_mapping[file_id]['local_path']
                if os.path.exists(mapped_path):
                    return True, mapped_path

            # 下载文件
            self.minio_client.fget_object(
                bucket_name,
                object_name,
                clean_file_path
            )

            # 更新路径映射
            self.path_mapping[file_id] = {
                'local_path': clean_file_path,
                'bucket': bucket_name,
                'object': object_name,
                'download_time': datetime.now().isoformat(),
                'size': os.path.getsize(clean_file_path)
            }

            return True, clean_file_path

        except S3Error as e:
            return False, str(e)
        except Exception as e:
            return False, str(e)

    def clean_file_path(self, file_path):
        """清理文件路径中的所有特殊字符"""
        dir_name = os.path.dirname(file_path)
        file_name = os.path.basename(file_path)

        if dir_name:
            dir_parts = dir_name.split(os.sep)
            cleaned_parts = []
            for part in dir_parts:
                cleaned_part = self.clean_filename(part)
                if cleaned_part:
                    cleaned_parts.append(cleaned_part)
            cleaned_dir = os.sep.join(cleaned_parts)
        else:
            cleaned_dir = ""

        cleaned_file = self.clean_filename(file_name)

        if cleaned_dir:
            cleaned_path = os.path.join(cleaned_dir, cleaned_file)
        else:
            cleaned_path = cleaned_file

        return cleaned_path

    def load_json_from_minio(self, bucket_name, object_name):
        """从MinIO加载JSON文件"""
        try:
            self.minio_client.stat_object(bucket_name, object_name)

            response = self.minio_client.get_object(bucket_name, object_name)
            content = response.read().decode('utf-8')
            response.close()
            response.release_conn()

            return json.loads(content)

        except S3Error as e:
            return None
        except Exception as e:
            return None

    def get_local_path(self, bucket_name, object_name, base_bucket, base_object, save_dir):
        """生成保持目录结构的本地路径"""
        clean_bucket = self.clean_filename(bucket_name)
        bucket_dir = clean_bucket

        if bucket_name == base_bucket and base_object:
            base_dir = os.path.dirname(base_object)

            if base_dir:
                if object_name.startswith(base_dir):
                    relative_path = object_name[len(base_dir):].lstrip('/\\')
                else:
                    relative_path = object_name
            else:
                relative_path = object_name
        else:
            relative_path = object_name

        if relative_path:
            path_parts = relative_path.split('/')
            cleaned_parts = []
            for part in path_parts:
                cleaned_part = self.clean_filename(part)
                if cleaned_part:
                    cleaned_parts.append(cleaned_part)

            if cleaned_parts:
                cleaned_relative = '/'.join(cleaned_parts)
                local_path = os.path.join(save_dir, bucket_dir, cleaned_relative)
            else:
                local_path = os.path.join(save_dir, bucket_dir)
        else:
            local_path = os.path.join(save_dir, bucket_dir)

        return os.path.normpath(local_path)

    def traverse_and_download_tileset(self, tile_obj, current_bucket, current_dir,
                                     base_bucket, base_dir, save_dir,
                                     region_filter=None, parent_transform=None,
                                     visited=None):
        """递归遍历并下载3D Tiles文件"""
        if visited is None:
            visited = set()

        current_transform = parent_transform
        if "transform" in tile_obj:
            tile_mat = tile_obj["transform"]
            if current_transform is None:
                current_transform = tile_mat
            else:
                mat1 = np.array(current_transform).reshape(4, 4)
                mat2 = np.array(tile_mat).reshape(4, 4)
                combined_mat = np.dot(mat1, mat2).flatten().tolist()
                current_transform = combined_mat

        skip_current_tile = False
        if region_filter and "boundingVolume" in tile_obj:
            if not region_filter.check_tile_bounding_volume(tile_obj["boundingVolume"]):
                skip_current_tile = True

        if not skip_current_tile and "content" in tile_obj and "uri" in tile_obj["content"]:
            tile_uri = tile_obj["content"]["uri"]

            file_bucket = current_bucket
            file_path = ""

            if tile_uri.startswith('http://') or tile_uri.startswith('https://'):
                parsed_bucket, parsed_path = self.parse_minio_url(tile_uri)
                if parsed_bucket:
                    file_bucket = parsed_bucket
                    file_path = parsed_path
            else:
                if current_dir:
                    file_path = os.path.join(current_dir, tile_uri).replace('\\', '/')
                else:
                    file_path = tile_uri

            file_path = file_path.lstrip('/')

            file_id = f"{file_bucket}/{file_path}"

            if file_id not in visited:
                print(f"下载文件:{file_id}")
                visited.add(file_id)

                local_path = self.get_local_path(
                    file_bucket, file_path,
                    base_bucket, base_dir,
                    save_dir
                )

                self.download_file(file_bucket, file_path, local_path)

                if file_path.lower().endswith('.json'):
                    sub_tileset = self.load_json_from_minio(file_bucket, file_path)
                    if sub_tileset and "root" in sub_tileset:
                        sub_dir = os.path.dirname(file_path) if file_path else ""
                        self.traverse_and_download_tileset(
                            sub_tileset["root"],
                            file_bucket,
                            sub_dir,
                            base_bucket,
                            base_dir,
                            save_dir,
                            region_filter,
                            current_transform,
                            visited
                        )

        if "children" in tile_obj:
            for child_tile in tile_obj["children"]:
                self.traverse_and_download_tileset(
                    child_tile,
                    current_bucket,
                    current_dir,
                    base_bucket,
                    base_dir,
                    save_dir,
                    region_filter,
                    current_transform,
                    visited
                )

    def upload_file(self, bucket_name, object_name, file_path):
        """上传文件到MinIO"""
        try:
            if not os.path.exists(file_path):
                return False, f"文件不存在: {file_path}"

            file_size = os.path.getsize(file_path)
            self.minio_client.fput_object(bucket_name, object_name, file_path)

            return True, f"{bucket_name}/{object_name}"

        except S3Error as e:
            return False, f"MinIO上传错误: {e}"
        except Exception as e:
            return False, f"上传失败: {str(e)}"

    def upload_directory(self, bucket_name, local_dir, remote_prefix=""):
        """上传目录到MinIO"""
        if not os.path.exists(local_dir):
            return [], [f"目录不存在: {local_dir}"]

        uploaded_files = []
        failed_files = []

        for root, dirs, files in os.walk(local_dir):
            for file in files:
                local_path = os.path.join(root, file)
                rel_path = os.path.relpath(local_path, local_dir)
                if remote_prefix:
                    remote_path = os.path.join(remote_prefix, rel_path).replace('\\', '/')
                else:
                    remote_path = rel_path.replace('\\', '/')

                success, message = self.upload_file(bucket_name, remote_path, local_path)
                if success:
                    uploaded_files.append(remote_path)
                else:
                    failed_files.append((remote_path, message))

        return uploaded_files, failed_files

    def check_and_create_bucket(self, bucket_name):
        """检查并创建bucket"""
        try:
            if not self.minio_client.bucket_exists(bucket_name):
                self.minio_client.make_bucket(bucket_name)
                return True, f"创建bucket: {bucket_name}"
            return True, f"bucket已存在: {bucket_name}"
        except S3Error as e:
            return False, f"创建bucket失败: {e}"


# 使用示例
if __name__ == "__main__":
    # 配置参数
    ENDPOINT_URL = "222.212.85.86:9000"
    ACCESS_KEY = "WuRenJi"
    SECRET_KEY = "WRJ@2024"

    # 初始化管理器
    manager = MinIO3DTilesManager(
        endpoint_url=ENDPOINT_URL,
        access_key=ACCESS_KEY,
        secret_key=SECRET_KEY,
        secure=False
    )

    # 使用缓存下载tileset
    tileset_url = "http://222.212.85.86:9000/300bdf2b-a150-406e-be63-d28bd29b409f/model/石棉0908/terra_b3dms/tileset.json"

    # 第一次下载（会下载到本地）
    success, result = manager.download_full_tileset(tileset_url, use_cache=True)
    if success:
        print(f"下载成功，本地路径: {result}")

    # 第二次下载相同URL（直接从缓存返回）
    success, result = manager.download_full_tileset(tileset_url, use_cache=True)
    if success:
        print(f"从缓存获取，本地路径: {result}")

    # 强制重新下载（忽略缓存）
    success, result = manager.download_full_tileset(tileset_url, use_cache=False)
    if success:
        print("强制重新下载成功")

    # 获取缓存的本地路径
    local_path = manager.get_tileset_local_path(tileset_url)
    if local_path:
        print(f"缓存的本地路径: {local_path}")