anon-cmevs-2026 commited on
Commit
5c1bb37
·
verified ·
1 Parent(s): d835437

Initial code release for NeurIPS 2026 D&B reviewer reference

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. .gitignore +31 -0
  3. CITATION.cff +9 -0
  4. GITHUB_UPLOAD.md +31 -0
  5. LICENSE +22 -0
  6. README.md +157 -0
  7. README_REPRODUCE.md +95 -0
  8. SHA256SUMS +60 -0
  9. SUBMISSION_CHECKLIST.md +29 -0
  10. configs/base_erpt.yaml +142 -0
  11. configs/blender_indoor.yaml +17 -0
  12. configs/blender_outdoor.yaml +17 -0
  13. configs/default.yaml +22 -0
  14. configs/hm3d.yaml +17 -0
  15. configs/scannetpp.yaml +17 -0
  16. configs/tiny.yaml +15 -0
  17. core/__init__.py +35 -0
  18. core/coordinate.py +191 -0
  19. core/depth_estimation.py +185 -0
  20. core/depth_fusion.py +769 -0
  21. core/erp_projection.py +277 -0
  22. core/erp_warp.py +591 -0
  23. core/tangent_extraction.py +566 -0
  24. data/README.md +16 -0
  25. dataset_metadata/croissant.json +414 -0
  26. dataset_metadata/manifests_h100/ARCHIVE_DIGESTS.txt +6 -0
  27. dataset_metadata/manifests_h100/README.md +104 -0
  28. dataset_metadata/manifests_h100/SHA256SUMS_HM3D.txt +0 -0
  29. dataset_metadata/manifests_h100/SHA256SUMS_OB3D.txt +0 -0
  30. dataset_metadata/manifests_h100/SHA256SUMS_blender_indoor_round1+2.txt +0 -0
  31. dataset_metadata/manifests_h100/SHA256SUMS_blender_indoor_round2.txt +0 -0
  32. dataset_metadata/manifests_h100/SHA256SUMS_scannetpp.txt +0 -0
  33. dataset_metadata/manifests_h100/SHA256SUMS_tartanground.txt +3 -0
  34. environment.yml +25 -0
  35. examples/metadata/candidates.jsonl +7 -0
  36. examples/tiny_blender_scene/README.md +5 -0
  37. metadata_examples/candidates.schema.json +38 -0
  38. metadata_examples/per_step_log.schema.json +15 -0
  39. metadata_examples/selected_viewpoints.schema.json +32 -0
  40. pipelines/get_blend_bounds.py +199 -0
  41. pipelines/render_erp_blender.py +1015 -0
  42. pipelines/run_blend_pipeline.py +1860 -0
  43. pipelines/run_full_pipeline.py +1036 -0
  44. pipelines/run_hm3d_pipeline.py +0 -0
  45. pipelines/run_pipeline.py +500 -0
  46. pipelines/run_ply_pipeline.py +1967 -0
  47. requirements.txt +15 -0
  48. results/README.md +13 -0
  49. scripts/_common.py +228 -0
  50. scripts/audit_quality.py +62 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ dataset_metadata/manifests_h100/SHA256SUMS_tartanground.txt filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ .DS_Store
4
+ .env
5
+ .venv/
6
+ venv/
7
+ *.egg-info/
8
+
9
+ # Local data, generated outputs, and large third-party artifacts.
10
+ data/*
11
+ !data/README.md
12
+ outputs/
13
+ logs/
14
+ checkpoints/
15
+ third_party/*
16
+ !third_party/README.md
17
+ *.blend
18
+ *.glb
19
+ *.gltf
20
+ *.ply
21
+ *.obj
22
+ *.fbx
23
+ *.exr
24
+ *.npy
25
+ *.npz
26
+ *.pt
27
+ *.pth
28
+ *.ckpt
29
+
30
+ # Keep checked-in result templates, but ignore local regenerated variants.
31
+ results/*.local.csv
CITATION.cff ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ cff-version: 1.2.0
2
+ message: "If you use this code, please cite the associated anonymous NeurIPS submission during review and the camera-ready paper after publication."
3
+ title: "CM-EVS: Conflict-Minimized Efficient View Selection for Scalable 3D Scene Data Acquisition"
4
+ authors:
5
+ - family-names: "Anonymous"
6
+ given-names: "Authors"
7
+ date-released: 2026-04-27
8
+ license: MIT
9
+
GITHUB_UPLOAD.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Anonymous GitHub Upload Guide
2
+
3
+ The NeurIPS review policy requires the code URL to be accessible and anonymized at submission time. Use an anonymous repository service or a repository that does not reveal author identity.
4
+
5
+ ## Final Local Check
6
+
7
+ Run from the repository root:
8
+
9
+ ```bash
10
+ bash scripts/run_tiny.sh
11
+ rm -rf outputs
12
+ bash scripts/check_anonymity.sh
13
+ ```
14
+
15
+ ## Initialize and Push
16
+
17
+ ```bash
18
+ git init
19
+ git add .
20
+ git commit -m "Anonymous CM-EVS code release"
21
+ git branch -M main
22
+ git remote add origin <anonymous-repository-url>
23
+ git push -u origin main
24
+ ```
25
+
26
+ Before pushing, verify that `git status --short` does not include local scene data, rendered outputs, checkpoints, or third-party dataset assets.
27
+
28
+ ## OpenReview Field
29
+
30
+ Paste the anonymous repository URL into the **Code URL** field. If the repository is mirrored through an anonymous hosting service, use the anonymized URL rather than a personal GitHub URL.
31
+
LICENSE ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Anonymous Authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
README.md ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CM-EVS Anonymous Code Release
2
+
3
+ This repository contains the anonymous code release for **CM-EVS: Conflict-Minimized Efficient View Selection for Scalable 3D Scene Data Acquisition**.
4
+
5
+ The release is intentionally organized around one primary review path:
6
+
7
+ ```text
8
+ Blender indoor .blend scenes
9
+ -> candidate generation
10
+ -> conflict-minimized view selection
11
+ -> selected ERP rendering
12
+ -> coverage, oracle-gap, and quality-audit outputs
13
+ ```
14
+
15
+ HM3D/GLB and ScanNet++/PLY support is included as secondary adapters, but the first path reviewers should inspect is the Blender-indoor path.
16
+
17
+ ## Review-Ready Entry Points
18
+
19
+ | Purpose | Command |
20
+ | --- | --- |
21
+ | No-data smoke test | `bash scripts/run_tiny.sh` |
22
+ | Blender-indoor dry run | `DRY_RUN=1 BLENDER=/path/to/blender INPUT_DIR=/path/to/blend_scenes bash scripts/run_blender_indoor.sh` |
23
+ | Full Blender-indoor run | `BLENDER=/path/to/blender INPUT_DIR=/path/to/blend_scenes bash scripts/run_blender_indoor.sh` |
24
+ | Summarize Blender-indoor run | `python3 scripts/summarize_blender_indoor_run.py --output-root outputs/blender_indoor` |
25
+ | Anonymity check | `bash scripts/check_anonymity.sh` |
26
+
27
+ The smoke test is designed to run without private assets. The full Blender-indoor run requires local `.blend` scenes and a Blender executable.
28
+
29
+ ## Repository Layout
30
+
31
+ ```text
32
+ .
33
+ ├── pipelines/ # full scene pipelines; Blender indoor is the primary path
34
+ ├── scripts/ # review and reproduction entry points
35
+ ├── configs/ # default and source-specific configs
36
+ ├── core/ # ERP projection, tangent extraction, depth, and warping modules
37
+ ├── tools/ # semantic and navigability helpers
38
+ ├── utils/ # IO and pose utilities
39
+ ├── examples/ # tiny Blender-indoor-style metadata example
40
+ ├── metadata_examples/ # JSON schemas for candidate/selection logs
41
+ ├── data/ # local data mount point, not tracked
42
+ ├── third_party/ # optional external dependencies, not tracked
43
+ └── results/ # generated result CSVs
44
+ ```
45
+
46
+ ## Environment
47
+
48
+ ```bash
49
+ conda env create -f environment.yml
50
+ conda activate cmevs
51
+ ```
52
+
53
+ If Conda is unavailable:
54
+
55
+ ```bash
56
+ python3 -m venv .venv
57
+ source .venv/bin/activate
58
+ pip install -r requirements.txt
59
+ ```
60
+
61
+ ## Minimal Smoke Test
62
+
63
+ ```bash
64
+ bash scripts/run_tiny.sh
65
+ ```
66
+
67
+ Expected outputs:
68
+
69
+ ```text
70
+ outputs/tiny/metadata/candidates.jsonl
71
+ outputs/tiny/metadata/selected_viewpoints.json
72
+ outputs/tiny/metadata/per_step_log.jsonl
73
+ outputs/tiny/renders/
74
+ outputs/tiny/results/coverage_main.csv
75
+ outputs/tiny/results/oracle_validation.csv
76
+ outputs/tiny/results/audit_50_frames.csv
77
+ ```
78
+
79
+ This test validates the repository wiring and metadata contracts. It is not intended to reproduce paper-scale numbers.
80
+
81
+ ## Paper Experiments
82
+
83
+ The driver scripts for the §6 evaluation experiments (fixed-budget coverage, oracle-gain validation, λ sweep, cross-source robustness, downstream depth) are scheduled to be released alongside the camera-ready paper. The current release ships the algorithmic core (`scripts/build_candidates.py`, `scripts/select_views.py`, `scripts/render_selected.py`), the per-stage evaluation building blocks (`scripts/evaluate_coverage.py`, `scripts/evaluate_oracle_gap.py`, `scripts/audit_quality.py`), and the metadata-contract example through the smoke test. Reviewers can verify the algorithmic core end-to-end via the smoke test above.
84
+
85
+ ## Primary Full Run: Blender Indoor
86
+
87
+ Put `.blend` scenes under `data/blender_indoor/`, or point `INPUT_DIR` to another directory. Nested layouts are supported; the first subdirectory under `INPUT_DIR` is used as the scene name.
88
+
89
+ Dry run:
90
+
91
+ ```bash
92
+ DRY_RUN=1 \
93
+ BLENDER=/path/to/blender \
94
+ INPUT_DIR=data/blender_indoor \
95
+ OUTPUT_ROOT=outputs/blender_indoor \
96
+ bash scripts/run_blender_indoor.sh
97
+ ```
98
+
99
+ Full run:
100
+
101
+ ```bash
102
+ BLENDER=/path/to/blender \
103
+ INPUT_DIR=data/blender_indoor \
104
+ OUTPUT_ROOT=outputs/blender_indoor \
105
+ NUM_FRAMES=30 \
106
+ RESOLUTION=2048,1024 \
107
+ bash scripts/run_blender_indoor.sh
108
+ ```
109
+
110
+ Equivalent direct CLI:
111
+
112
+ ```bash
113
+ export PYTHONPATH="$PWD:$PWD/pipelines:${PYTHONPATH:-}"
114
+
115
+ python3 pipelines/run_full_pipeline.py \
116
+ --blender /path/to/blender \
117
+ --input-dir data/blender_indoor \
118
+ --output-root outputs/blender_indoor \
119
+ --num-frames 30 \
120
+ --resolution 2048,1024 \
121
+ --grid-spacing 0.5 \
122
+ --min-frames 5 \
123
+ --stop-gain 0.08
124
+ ```
125
+
126
+ ## Secondary Adapters
127
+
128
+ The repository also includes adapters for additional sources used in robustness analyses:
129
+
130
+ - `configs/blender_outdoor.yaml`: generic `.glb` / `.gltf` scenes.
131
+ - `configs/hm3d.yaml`: HM3D-style `.glb` / `.gltf` scenes.
132
+ - `configs/scannetpp.yaml`: ScanNet++-style `.ply` scenes.
133
+
134
+ These adapters are provided for completeness, but the Blender-indoor route is the recommended first reviewer path.
135
+
136
+ ## Data and Checkpoints
137
+
138
+ This repository does not redistribute third-party scene assets, dataset files, or model checkpoints. Put local assets under `data/` or pass absolute paths via CLI. The `data/` directory is ignored by git.
139
+
140
+ Depth Pro is optional for ERPT-style depth fusion. If used, place it under:
141
+
142
+ ```text
143
+ third_party/ml-depth-pro/
144
+ third_party/ml-depth-pro/checkpoints/depth_pro.pt
145
+ ```
146
+
147
+ ## Final Submission Check
148
+
149
+ Before uploading the code URL or zip:
150
+
151
+ ```bash
152
+ bash scripts/run_tiny.sh
153
+ rm -rf outputs
154
+ bash scripts/check_anonymity.sh
155
+ ```
156
+
157
+ The code is released under the MIT License for review. Dataset assets remain governed by their original licenses.
README_REPRODUCE.md ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reproducibility Guide
2
+
3
+ This guide maps the code release to the experiments in the paper. The primary reproducibility path is Blender indoor; other sources are retained as secondary adapters for robustness checks.
4
+
5
+ ## Primary Asset Requirement
6
+
7
+ | Source | Expected Input | Primary Command |
8
+ | --- | --- | --- |
9
+ | Blender indoor | `.blend` scenes | `scripts/run_blender_indoor.sh` |
10
+
11
+ The repository does not redistribute scene assets. Reviewers can run the no-data smoke test immediately, and can run the full path after mounting local `.blend` scenes.
12
+
13
+ ## Secondary Assets
14
+
15
+ | Source | Expected Input | Config |
16
+ | --- | --- | --- |
17
+ | Blender outdoor / generic meshes | `.glb` or `.gltf` | `configs/blender_outdoor.yaml` |
18
+ | HM3D | `.glb` or `.gltf` plus optional semantic/navmesh files | `configs/hm3d.yaml` |
19
+ | ScanNet++ | `.ply` | `configs/scannetpp.yaml` |
20
+
21
+ ## Building Blocks Available in This Release
22
+
23
+ | Module | Purpose | Entry Point |
24
+ | --- | --- | --- |
25
+ | Candidate generation | Phase 1 of §3 — produce \(\mathcal{P}_\varphi\) | `scripts/build_candidates.py` |
26
+ | Conflict-aware selection | Phase 2 of §3 — greedy with \(s_t = G_t - \lambda L_t + \beta B_t\) | `scripts/select_views.py` |
27
+ | Selected-view rendering | Phase 3 — final ERP render from chosen candidates | `scripts/render_selected.py` |
28
+ | Coverage metric | §6.1 high-resolution oracle coverage | `scripts/evaluate_coverage.py` |
29
+ | Oracle-gain validation | §6.2 warping vs. pre-render-all comparison | `scripts/evaluate_oracle_gap.py` |
30
+ | Quality audit | Appendix F.2 50-frame audit | `scripts/audit_quality.py` |
31
+ | Run summarization | Aggregate per-scene `selected_frames.json` into a CSV | `scripts/summarize_blender_indoor_run.py` |
32
+ | Audit summarization | Aggregate per-frame audit results into a CSV | `scripts/summarize_quality_audit.py` |
33
+
34
+ The §6 driver scripts that orchestrate these building blocks across an entire baseline sweep (e.g., the `K\!\in\!\{8,16,24,32\}` table of §6.1, the \(\lambda\) sweep of §6.5, and the four-source benchmark of §6.6) are scheduled to be released alongside the camera-ready paper.
35
+
36
+ ## Minimal Review Run
37
+
38
+ ```bash
39
+ bash scripts/run_tiny.sh
40
+ ```
41
+
42
+ This validates the Blender-indoor-style metadata format, greedy selection loop, render-output contract, coverage metric, oracle-gap script, and quality audit script — end-to-end without any private scene assets.
43
+
44
+ ## Blender-Indoor Full Run
45
+
46
+ ```bash
47
+ DRY_RUN=1 \
48
+ BLENDER=/path/to/blender \
49
+ INPUT_DIR=/path/to/blend_scenes \
50
+ OUTPUT_ROOT=outputs/blender_indoor \
51
+ bash scripts/run_blender_indoor.sh
52
+ ```
53
+
54
+ After confirming the detected scene list, remove `DRY_RUN=1`:
55
+
56
+ ```bash
57
+ BLENDER=/path/to/blender \
58
+ INPUT_DIR=/path/to/blend_scenes \
59
+ OUTPUT_ROOT=outputs/blender_indoor \
60
+ NUM_FRAMES=30 \
61
+ RESOLUTION=2048,1024 \
62
+ GRID_SPACING=0.5 \
63
+ bash scripts/run_blender_indoor.sh
64
+ ```
65
+
66
+ ## Metric Scripts
67
+
68
+ The native Blender-indoor pipeline emits `selected_frames.json` under each scene output directory. Summarize a completed run with:
69
+
70
+ ```bash
71
+ python3 scripts/summarize_blender_indoor_run.py \
72
+ --output-root outputs/blender_indoor \
73
+ --output outputs/blender_indoor/results/coverage_main.csv
74
+ ```
75
+
76
+ If you have consolidated candidate and selection metadata into the normalized JSONL/JSON contract used by the smoke test, use:
77
+
78
+ ```bash
79
+ python3 scripts/evaluate_coverage.py \
80
+ --candidates outputs/blender_indoor/metadata/candidates.jsonl \
81
+ --selected outputs/blender_indoor/metadata/selected_viewpoints.json \
82
+ --output outputs/blender_indoor/results/coverage_main.csv
83
+
84
+ python3 scripts/evaluate_oracle_gap.py \
85
+ --candidates outputs/blender_indoor/metadata/candidates.jsonl \
86
+ --selected outputs/blender_indoor/metadata/selected_viewpoints.json \
87
+ --output outputs/blender_indoor/results/oracle_validation.csv
88
+
89
+ python3 scripts/audit_quality.py \
90
+ --render-dir outputs/blender_indoor/renders \
91
+ --metadata outputs/blender_indoor/metadata/selected_viewpoints.json \
92
+ --output outputs/blender_indoor/results/audit_50_frames.csv
93
+ ```
94
+
95
+ The exact dataset paths should be adapted to the local machine. Do not commit generated data, logs, checkpoints, third-party repositories, or scene assets to the anonymous code repository.
SHA256SUMS ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 7c9e65d5cbe4069429eeb1b091992bb662d402f05fcc6689010af6d40a4b2170 .gitignore
2
+ 56bad452228b39e39bc3b441c1451eae2e314a6e9b025063beb1a9475c93ade8 CITATION.cff
3
+ f6afa50672dccfb3c42c2468e6bbd6430bf3dd63a2a945ef356c82871b8b56b3 GITHUB_UPLOAD.md
4
+ 2c14d7ac4ef207357073eabef1bc7f65853ab248237550a5542a59ba677f8ade LICENSE
5
+ 53f3e568310533e684abb1260e61a3102e1361d7cf4cabb0cc6047c311deb31c README.md
6
+ efb670c7a9eef10060a1d50f36be3670402e20716f1c01d7ed70964ae1879e14 README_REPRODUCE.md
7
+ 4cd137b698a2087a9d3a57c848a5484154324eb79139dfdd523a84b29b34eadd SUBMISSION_CHECKLIST.md
8
+ a195a9cdf86c7c2d23d5259fa0cb3cc75dd327411be3b3f5d78f227f36ad4a8c configs/base_erpt.yaml
9
+ 0086890995745e230a23ef766795af44e5cd633f792d9e9386594d6b1a339586 configs/blender_indoor.yaml
10
+ 3147f7c85a712e6f02f17330640ecb8c6bda0ff5b9994f22e90f95a53633adfb configs/blender_outdoor.yaml
11
+ 4e03f04feaee0a3d0d355e6a260c93d0d0ecd44e3b58b7ee6c23a710fcefcfd9 configs/default.yaml
12
+ 93ddb9458d7caaa9898df553a1a9f756b5fca0cdc6514edff226b13176c0577e configs/hm3d.yaml
13
+ 00cd245e4e590b8f7f649cfa87170099d758c0f166970755270b2f752a478666 configs/scannetpp.yaml
14
+ f41e12d62f7dc5f0fa6259d8d35dfc8ea20f2fdd94e1bed3dcbbe07ae856dd48 configs/tiny.yaml
15
+ 9b3d00ec613ce18b4eeadbc9af0053251e074c61e8937062d79442fe9282b48f core/__init__.py
16
+ 3fad91fe28ebd11a98178179622a2cee9ad2dfb0f057ae62eae5f3821f0e985e core/coordinate.py
17
+ 06b214daacd64b0de9747abc2d50d0318ac7ce122f315d48c5e05d891a78d221 core/depth_estimation.py
18
+ 31fbacffa8b2b0aa40eddfea6d7ba53c28137d9ee6ad91298771f66c98e8cf73 core/depth_fusion.py
19
+ ee596d36829271f164a4e1d6a297aaa42ef8eaaa0339d3b4b14f3b9089b084c7 core/erp_projection.py
20
+ f714d3d887b142cb63f0c1fc23dde0949b1e179330964edb6216bc06d5d318ab core/erp_warp.py
21
+ a8a1ba38ffde69fd0c07f653fc388dc9fc53eb5aa2b95a3ed58d54848714e03e core/tangent_extraction.py
22
+ 21e7c6c68fd31d5c63f6cb9f949e38adf78a5660f83650481afccc418b9390ab data/README.md
23
+ e5261ad1221380fad1174aeb931355679f419f8bfa8266f3bff06d9b2b917f75 dataset_metadata/croissant.json
24
+ c9e7127fc7c6de554516ee219e8aeaa2fca1b00951099d973c7e8af8db32ea95 environment.yml
25
+ 4b6766691ae074a066ed06919cc372a2849b435e372e6a24849154d0b09c1195 examples/metadata/candidates.jsonl
26
+ 71c290335857fd7b4a53e507bc94fa6e6c20d1fbdc768fed0e9de20e11229e65 examples/tiny_blender_scene/README.md
27
+ ae204fa937a7a427e8bc06fe204ecc16e26eba191a0187b98e6ed20e0a9d7ace metadata_examples/candidates.schema.json
28
+ 6839cf7fd0bf08de9006ae76b007843fd6ceb997a83a7e632b0b051f903ce789 metadata_examples/per_step_log.schema.json
29
+ 23832a8e7d3b0b7e52d0beb7d71f45c210b558d336f7e48bf6b81fbb3a1d8388 metadata_examples/selected_viewpoints.schema.json
30
+ bddf800be3a0e5046fdf8a5435733d19199e16d63df25a2ba62de9c1c50f46ba pipelines/get_blend_bounds.py
31
+ 4de927c2825256599466766e323d3530eb10b55ea9b618953660bbff9b8ee4d1 pipelines/render_erp_blender.py
32
+ 7313dc7cd2fb4a03b9fdfe19503511ac7551a2c231a2e83f66b02fcbdcd55798 pipelines/run_blend_pipeline.py
33
+ eff64bfb9b1a5d8fa4add9fa7e006b750c3c029220c9ea3cb2e39f735c92a9a6 pipelines/run_full_pipeline.py
34
+ 1008e53f357b0cb3126139b682ceede7d329b6e3dfa2f178b4cf62d7eda591f9 pipelines/run_hm3d_pipeline.py
35
+ 0e72c1081ff0f541a552731d7e86eda3380f652e8866f6bb612f64ba8c57cd5b pipelines/run_pipeline.py
36
+ 8c03c3f667f9ee1466d1c183359bf9a4d182473675d07515f8dfcbecfca39928 pipelines/run_ply_pipeline.py
37
+ b853a2be65e05a38435cadda54c29926a5c8d6c9a96b68752478a24bda42ca76 requirements.txt
38
+ 72dbcd63d46db055eaf12d5afc7a5bdd402833ef03c886db570d47fde476efdf results/README.md
39
+ dd7ed0dc4e17143b0cb2be9269035cc67a6f0d2292992426d4362091143c14f3 scripts/_common.py
40
+ 3c41a31d1723ce8bcba2a619ee79ee986ddb51e368664c073b97359feda45b48 scripts/audit_quality.py
41
+ d6fbe64e1cf56d7e3d487e108207e47797cbc1e17b2063786b965674e4d831fa scripts/build_candidates.py
42
+ 9c7cff2bbfaaf0bb52d8ed9b604f3a7167885d1e514a3ab6469f3909f5d1a7e5 scripts/check_anonymity.sh
43
+ d52befd9b56357de17dbe33ef840a352d6152695319a9446dc5c71bfca6d70f6 scripts/evaluate_coverage.py
44
+ bd2c6c8fb8869ca34608254c2719b9484a9817628e82e3802b0d4efd13d5c568 scripts/evaluate_oracle_gap.py
45
+ 21adb3f4cde65b77387873555f16df4a8b260bc0901bc555a2eab9b7db85930c scripts/render_selected.py
46
+ e9392972fb7937bc42c493cb18b96bba4f5bf2cad207e07bf30750083e9bd710 scripts/run_blender_indoor.sh
47
+ 54f6a5af30881ac32f867a8d22437c999f390871cb162319d79e77e2d96b1e4f scripts/run_tiny.sh
48
+ 00c330c04a8ca40e30251efccda7e716ae3a8876056a5688d34279c0fb02029f scripts/select_views.py
49
+ 90a20ddfd55c434dd9df4496dd757f00ccfd18f76c24affd6b3fe67e466df1a3 scripts/selection_metrics.py
50
+ 8ce9e1d8d99ab19b899eae3abfcd345cfb2405ede6541db97884526a3e8173c6 scripts/summarize_blender_indoor_run.py
51
+ a03153d20d38fb6805aaa980b659c1964d312f209605fbdbd692ef3b3db44232 scripts/summarize_quality_audit.py
52
+ e4cfbffc9ce620f47f977aa6a4cc78f546125ac33189948f5da8a41ac3bf5690 third_party/README.md
53
+ e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 tools/__init__.py
54
+ 043e8fc09641621a207543857f621195b9365e2da9ce9d8f6cfd6517cdcd99d9 tools/make_sha256sums.sh
55
+ 1f28cb01c320379ce4138842f848fdcae2db224b805942d0e17c20d132c74609 tools/navmesh_utils.py
56
+ 08dc49bc2f8bf272274625235f7a9eeec99f94bb3e011778731cc87f28157552 tools/semantic_utils.py
57
+ 80b87f28df042b2789fd650aec2bbf97a29b6d3a20bb2b13b7dba3c64ec8e06a tools/update_croissant_with_real_hashes.py
58
+ 1a930f2a701e468558e3953ca94c744c68747b7a0e25fe92c736a80b2876839e utils/__init__.py
59
+ 4c06aec86567d13a8bb54a3fcb9e4826c57635fbc04c3ae49e3f9c43202cf88d utils/io_utils.py
60
+ 6a6b8e7e61b3cad791e6337c2247a9986416250bd4a3ee2eaf2a5c42a9e0215d utils/pose_utils.py
SUBMISSION_CHECKLIST.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code Submission Checklist
2
+
3
+ Use this checklist before pasting the Code URL into OpenReview.
4
+
5
+ | Requirement | Status | Where to Check |
6
+ | --- | --- | --- |
7
+ | Anonymous repository content | Ready | `bash scripts/check_anonymity.sh` |
8
+ | Executable smoke test | Ready | `bash scripts/run_tiny.sh` |
9
+ | Primary Blender-indoor entry point | Ready | `scripts/run_blender_indoor.sh` |
10
+ | Native Blender-indoor result summarizer | Ready | `scripts/summarize_blender_indoor_run.py` |
11
+ | Per-stage evaluation building blocks | Ready | `scripts/evaluate_coverage.py`, `scripts/evaluate_oracle_gap.py`, `scripts/audit_quality.py` |
12
+ | Clear README | Ready | `README.md` |
13
+ | Reproducibility guide | Ready | `README_REPRODUCE.md` |
14
+ | Environment specification | Ready | `environment.yml`, `requirements.txt` |
15
+ | Dataset paths excluded | Ready | `.gitignore`, `data/README.md` |
16
+ | Third-party checkpoints excluded | Ready | `.gitignore`, `third_party/README.md` |
17
+ | License included | Ready | `LICENSE` |
18
+ | Metadata schemas included | Ready | `metadata_examples/` |
19
+ | Generated outputs excluded before upload | Check manually | `find . -maxdepth 2 -type d -name outputs` |
20
+
21
+ Recommended final commands:
22
+
23
+ ```bash
24
+ bash scripts/run_tiny.sh
25
+ rm -rf outputs
26
+ bash scripts/check_anonymity.sh
27
+ ```
28
+
29
+ Then upload the repository or zip archive.
configs/base_erpt.yaml ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # ERPT Pipeline Configuration(移植自原版 ERPT)
3
+ # 严格遵循 ERPT_native 坐标系约定:右手系 [X右, Y上, Z前]
4
+ # =============================================================================
5
+
6
+ # --- 数据路径 ---
7
+ data:
8
+ data_dir: "inputs"
9
+ output_dir: "outputs"
10
+ depth_dir: null # 可选:外部深度目录
11
+
12
+ # --- ERP 参数 ---
13
+ erp:
14
+ auto_size: true # 自动从图像检测尺寸
15
+ width: 4096 # 参考宽度
16
+ height: 2048 # 参考高度
17
+
18
+ # --- Tangent 切片参数(原版配置) ---
19
+ tangent:
20
+ scheme: "icosahedron"
21
+ num_faces: 20
22
+ add_poles: true
23
+ face_resolution: 768 # 每个 face 的分辨率(像素)
24
+ fov_deg: 90.0 # 普通 face 的基础 FOV(度)
25
+ padding_factor: 1.3 # 有效 FOV = 90 * 1.3 = 117°
26
+ pole_fov_deg: 160.0 # 极区切片使用更大 FOV
27
+ pole_resolution: 768 # 极区分辨率
28
+ pole_extra_rings: 2 # 额外极区密采样环数
29
+ seam_wrap: true
30
+
31
+ # --- Depth Pro 参数(原版配置) ---
32
+ depth_pro:
33
+ enabled: true
34
+ repo_dir: "third_party/ml-depth-pro"
35
+ checkpoint_path: "third_party/ml-depth-pro/checkpoints/depth_pro.pt"
36
+ precision: "fp16" # "fp32" | "fp16" | "bf16"
37
+ depth_def: "z" # "z" (z-depth) | "ray" (ray-depth)
38
+ pass_f_px: true # 传递已知焦距
39
+
40
+ # --- 深度融合参数(原版配置) ---
41
+ fusion:
42
+ blend_mode: "multiband" # "softmin_invdepth" | "multiband"
43
+ output_scale: 1.10 # 全局尺度校正
44
+
45
+ # 权重模式
46
+ weight_mode: "cosine"
47
+ k: 4 # cosine 权重指数
48
+
49
+ # 深度竞争
50
+ depth_competition: "softmin_invdepth"
51
+ softmin_alpha: 10.0
52
+
53
+ # 极区处理
54
+ pole_boost: false
55
+ pole_boost_factor: 1.5
56
+ pole_latitude_deg: 75.0
57
+ pole_ramp_deg: 10.0
58
+
59
+ pole_ring:
60
+ enabled: false
61
+ min_latitude_deg: 60.0
62
+ ramp_deg: 10.0
63
+
64
+ face_pole_suppress:
65
+ enabled: false
66
+ min_latitude_deg: 70.0
67
+ ramp_deg: 10.0
68
+ min_scale: 0.4
69
+
70
+ # 极区一致性校正
71
+ pole_consistency:
72
+ enabled: true
73
+ min_latitude_deg: 60.0
74
+ min_overlap_pixels: 4000
75
+ max_abs_log_shift: 0.7
76
+ ref_slice_types: ["face", "pole_ring"]
77
+ target_slice_types: ["pole_north", "pole_south"]
78
+
79
+ # Z-buffer 门限
80
+ project_zbuffer_eps_abs_m: 0.02
81
+ project_zbuffer_eps_rel: 0.02
82
+
83
+ # Multiband 金字塔
84
+ multiband:
85
+ levels: 6
86
+ highfreq_levels: 2
87
+ eps: 1.0e-6
88
+
89
+ # 有效性
90
+ min_weight_sum: 1.0e-6
91
+
92
+ # --- Warp 参数(原版配置) ---
93
+ warp:
94
+ enabled: true
95
+ center_frame: 0
96
+ target_frames: "auto" # "auto" 自动识别所有非中心帧;或指定列表如 [1, 2, 3]
97
+
98
+ # Splatting 方法
99
+ method: "softmax_splatting" # "softmax_splatting" | "zbuffer_splatting" | "zbuffer_point"
100
+ alpha: 2.0 # softmax 温度
101
+
102
+ # 自适应 Splat 半径
103
+ splat_radius_px: 1.5 # 基础半径
104
+ radius_min_px: 0.6 # 半径下限
105
+ radius_max_px: 2.2 # 中纬度上限
106
+ radius_max_pole_px: 3.4 # 极区上限
107
+ pole_radius_scale: 3.0 # 极区放大因子
108
+ pole_lat_threshold: 60.0 # 极区纬度阈值(度)
109
+ depth_radius_scale: false # 深度缩放
110
+ depth_ref_m: 2.0 # 深度参考值
111
+ depth_scale_factor: 1.0 # 深度已在 fusion.output_scale 烘焙
112
+ depth_edge_aware: true # 深度边缘感知
113
+ depth_edge_threshold: 0.3 # 深度梯度阈值
114
+ depth_edge_min_scale: 0.12 # 边缘处最小半径缩放
115
+
116
+ # 遮挡门控
117
+ occlusion_gate:
118
+ enabled: true
119
+ abs_eps_m: 0.05
120
+ rel_eps: 0.05
121
+
122
+ # Z-buffer 参数
123
+ zbuffer_eps_abs_m: 0.03
124
+ zbuffer_eps_rel: 0.03
125
+ zbuffer_min_weight: 0.001
126
+
127
+ # 空洞填充
128
+ hole_fill_enabled: false
129
+ max_hole_px: 16
130
+
131
+ # 有效性
132
+ min_weight_sum: 1.0e-4
133
+ min_hit_sum: 1.0e-6
134
+
135
+ # 输出控制
136
+ output_flow: true
137
+ output_depth: true
138
+
139
+ # --- 运行参数 ---
140
+ run:
141
+ device: "cuda"
142
+ save_intermediates: true
configs/blender_indoor.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ source: blender_indoor
3
+ input_kind: blend
4
+
5
+ pipeline:
6
+ blender: /path/to/blender
7
+ input_dir: data/blender_indoor
8
+ output_root: outputs/blender_indoor
9
+ num_frames: 30
10
+ resolution: "2048,1024"
11
+ grid_spacing: 0.5
12
+ camera_height: null
13
+ min_frames: 5
14
+ stop_gain: 0.08
15
+ stop_score: -0.3
16
+ stop_delta: 0.08
17
+
configs/blender_outdoor.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ source: blender_outdoor
3
+ input_kind: glb
4
+
5
+ pipeline:
6
+ blender: /path/to/blender
7
+ input_dir: data/blender_outdoor
8
+ output_root: outputs/blender_outdoor
9
+ num_frames: 30
10
+ resolution: "2048,1024"
11
+ grid_spacing: 1.0
12
+ camera_height: null
13
+ min_frames: 5
14
+ stop_gain: 0.08
15
+ stop_score: -0.3
16
+ stop_delta: 0.08
17
+
configs/default.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Default review configuration.
2
+ # The anonymous release is organized around the Blender-indoor path.
3
+
4
+ experiment:
5
+ source: blender_indoor
6
+ input_kind: blend
7
+ review_path: primary
8
+
9
+ pipeline:
10
+ blender: /path/to/blender
11
+ input_dir: data/blender_indoor
12
+ output_root: outputs/blender_indoor
13
+ num_frames: 30
14
+ resolution: "2048,1024"
15
+ grid_spacing: 0.5
16
+ camera_height: null
17
+ min_frames: 5
18
+ stop_gain: 0.08
19
+ stop_score: -0.3
20
+ stop_delta: 0.08
21
+ rotation_type: random_yaw
22
+
configs/hm3d.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ source: hm3d
3
+ input_kind: glb
4
+
5
+ pipeline:
6
+ blender: /path/to/blender
7
+ input_dir: data/hm3d
8
+ output_root: outputs/hm3d
9
+ num_frames: 30
10
+ resolution: "2048,1024"
11
+ grid_spacing: 0.5
12
+ camera_height: null
13
+ min_frames: 5
14
+ stop_gain: 0.08
15
+ stop_score: -0.3
16
+ stop_delta: 0.08
17
+
configs/scannetpp.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ source: scannetpp
3
+ input_kind: ply
4
+
5
+ pipeline:
6
+ input_dir: data/scannetpp
7
+ output_root: outputs/scannetpp
8
+ num_frames: 30
9
+ resolution: "2048,1024"
10
+ grid_spacing: 0.5
11
+ point_size: 2.0
12
+ z_up: true
13
+ min_frames: 5
14
+ stop_gain: 0.08
15
+ stop_score: -0.3
16
+ stop_delta: 0.08
17
+
configs/tiny.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ name: blender_indoor_tiny_smoke_test
3
+ mode: blender_indoor_tiny
4
+ random_seed: 2026
5
+
6
+ selection:
7
+ budget: 4
8
+ lambda_conflict: 0.35
9
+ min_gain: 0.01
10
+
11
+ outputs:
12
+ root: outputs/tiny
13
+ metadata_dir: outputs/tiny/metadata
14
+ render_dir: outputs/tiny/renders
15
+ result_dir: outputs/tiny/results
core/__init__.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ERPT Core 模块
3
+
4
+ 包含:
5
+ - tangent_extraction: ERP -> Tangent 切片生成
6
+ - depth_estimation: Depth Pro 深度估计
7
+ - depth_fusion: Tangent Depth -> ERP Depth 融合
8
+ - coordinate: 坐标系定义(锁定)
9
+ - erp_projection: ERP 投影(锁定)
10
+ """
11
+ from .tangent_extraction import (
12
+ TangentSlice,
13
+ build_icosahedron_slices,
14
+ extract_all_tangents,
15
+ extract_tangent_from_erp,
16
+ compute_coverage_mask,
17
+ compute_ray_directions_for_slice,
18
+ )
19
+
20
+ from .depth_estimation import (
21
+ DepthEstimator,
22
+ estimate_all_tangent_depths,
23
+ )
24
+
25
+ from .depth_fusion import (
26
+ fuse_tangent_depths_to_erp,
27
+ visualize_depth,
28
+ save_depth_visualization,
29
+ )
30
+
31
+ from .erp_warp import (
32
+ WarpResult,
33
+ warp_erp_to_target,
34
+ create_comparison_image,
35
+ )
core/coordinate.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 坐标系约定和四元数工具
3
+
4
+ ERPT_native 坐标系标准:
5
+ - 世界坐标系:右手系 [X右, Y上, Z前]
6
+ - 满足:X × Y = Z(右手法则)
7
+ - ERP投影约定:
8
+ - lon = atan2(x, z):经度,范围 [-π, π]
9
+ - lat = asin(y):纬度,范围 [-π/2, π/2]
10
+ - 图像中心(u=W/2, v=H/2)看向 +Z 方向
11
+ - 图像顶部是 +Y 方向(上)
12
+
13
+ 位姿格式:
14
+ - position: [x, y, z],相机中心在世界坐标系的位置(米)
15
+ - rotation_quaternion: [w, x, y, z],表示 camera->world 旋转 (R_cw)
16
+
17
+ 数学约定:
18
+ - P_world = R_cw @ P_cam + t(相机坐标系到世界坐标系)
19
+ - P_cam = R_wc @ (P_world - t)(世界坐标系到相机坐标系)
20
+ - R_wc = R_cw^T
21
+ """
22
+
23
+ import numpy as np
24
+ from typing import Tuple
25
+
26
+
27
+ def quat_wxyz_to_rotation_matrix(q: np.ndarray) -> np.ndarray:
28
+ """
29
+ 四元数转旋转矩阵
30
+
31
+ 输入四元数表示 camera->world 旋转 (R_cw)
32
+
33
+ Args:
34
+ q: (4,) 四元数 [w, x, y, z],需归一化
35
+
36
+ Returns:
37
+ R: (3, 3) 旋转矩阵 R_cw
38
+ """
39
+ q = np.asarray(q, dtype=np.float64).flatten()
40
+ assert q.shape == (4,), f"Expected shape (4,), got {q.shape}"
41
+
42
+ # 归一化
43
+ norm = np.linalg.norm(q)
44
+ if norm < 1e-9:
45
+ raise ValueError(f"Quaternion norm too small: {norm}")
46
+ q = q / norm
47
+
48
+ w, x, y, z = q[0], q[1], q[2], q[3]
49
+
50
+ # 旋转矩阵公式
51
+ R = np.array([
52
+ [1 - 2*(y*y + z*z), 2*(x*y - w*z), 2*(x*z + w*y)],
53
+ [2*(x*y + w*z), 1 - 2*(x*x + z*z), 2*(y*z - w*x)],
54
+ [2*(x*z - w*y), 2*(y*z + w*x), 1 - 2*(x*x + y*y)]
55
+ ], dtype=np.float64)
56
+
57
+ return R
58
+
59
+
60
+ def rotation_matrix_to_quat_wxyz(R: np.ndarray) -> np.ndarray:
61
+ """
62
+ 旋转矩阵转四元数
63
+
64
+ Args:
65
+ R: (3, 3) 旋转矩阵
66
+
67
+ Returns:
68
+ q: (4,) 四元数 [w, x, y, z]
69
+ """
70
+ R = np.asarray(R, dtype=np.float64).reshape(3, 3)
71
+
72
+ # 确保正交性(SVD正交化)
73
+ U, _, Vt = np.linalg.svd(R)
74
+ R = U @ Vt
75
+ if np.linalg.det(R) < 0:
76
+ U[:, -1] *= -1
77
+ R = U @ Vt
78
+
79
+ # Shepperd's method
80
+ trace = np.trace(R)
81
+
82
+ if trace > 0:
83
+ s = 2.0 * np.sqrt(trace + 1.0)
84
+ w = 0.25 * s
85
+ x = (R[2, 1] - R[1, 2]) / s
86
+ y = (R[0, 2] - R[2, 0]) / s
87
+ z = (R[1, 0] - R[0, 1]) / s
88
+ elif R[0, 0] > R[1, 1] and R[0, 0] > R[2, 2]:
89
+ s = 2.0 * np.sqrt(1.0 + R[0, 0] - R[1, 1] - R[2, 2])
90
+ w = (R[2, 1] - R[1, 2]) / s
91
+ x = 0.25 * s
92
+ y = (R[0, 1] + R[1, 0]) / s
93
+ z = (R[0, 2] + R[2, 0]) / s
94
+ elif R[1, 1] > R[2, 2]:
95
+ s = 2.0 * np.sqrt(1.0 + R[1, 1] - R[0, 0] - R[2, 2])
96
+ w = (R[0, 2] - R[2, 0]) / s
97
+ x = (R[0, 1] + R[1, 0]) / s
98
+ y = 0.25 * s
99
+ z = (R[1, 2] + R[2, 1]) / s
100
+ else:
101
+ s = 2.0 * np.sqrt(1.0 + R[2, 2] - R[0, 0] - R[1, 1])
102
+ w = (R[1, 0] - R[0, 1]) / s
103
+ x = (R[0, 2] + R[2, 0]) / s
104
+ y = (R[1, 2] + R[2, 1]) / s
105
+ z = 0.25 * s
106
+
107
+ q = np.array([w, x, y, z], dtype=np.float64)
108
+
109
+ # 归一化
110
+ q = q / np.linalg.norm(q)
111
+
112
+ # 确保 w >= 0(唯一性)
113
+ if q[0] < 0:
114
+ q = -q
115
+
116
+ return q
117
+
118
+
119
+ def R_cw_to_R_wc(R_cw: np.ndarray) -> np.ndarray:
120
+ """
121
+ camera->world 旋转矩阵转换为 world->camera
122
+
123
+ R_wc = R_cw^T
124
+
125
+ Args:
126
+ R_cw: (3, 3) camera->world 旋转矩阵
127
+
128
+ Returns:
129
+ R_wc: (3, 3) world->camera 旋转矩阵
130
+ """
131
+ return R_cw.T
132
+
133
+
134
+ def R_wc_to_R_cw(R_wc: np.ndarray) -> np.ndarray:
135
+ """
136
+ world->camera 旋转矩阵转换为 camera->world
137
+
138
+ R_cw = R_wc^T
139
+
140
+ Args:
141
+ R_wc: (3, 3) world->camera 旋转矩阵
142
+
143
+ Returns:
144
+ R_cw: (3, 3) camera->world 旋转矩阵
145
+ """
146
+ return R_wc.T
147
+
148
+
149
+ def validate_rotation_matrix(R: np.ndarray, tol: float = 1e-5) -> Tuple[bool, str]:
150
+ """
151
+ 验证旋转矩阵的有效性
152
+
153
+ Args:
154
+ R: (3, 3) 待验证的矩阵
155
+ tol: 容差
156
+
157
+ Returns:
158
+ (is_valid, message)
159
+ """
160
+ R = np.asarray(R, dtype=np.float64).reshape(3, 3)
161
+
162
+ # 检查正交性:R^T @ R = I
163
+ I = R.T @ R
164
+ orth_err = np.max(np.abs(I - np.eye(3)))
165
+ if orth_err > tol:
166
+ return False, f"Orthogonality error: {orth_err:.6e} > {tol}"
167
+
168
+ # 检查行列式:det(R) = +1
169
+ det = np.linalg.det(R)
170
+ if np.abs(det - 1.0) > tol:
171
+ return False, f"Determinant error: det(R)={det:.6f}, expected 1.0"
172
+
173
+ return True, "Valid rotation matrix"
174
+
175
+
176
+ def orthonormalize_rotation(R: np.ndarray) -> np.ndarray:
177
+ """
178
+ 使用SVD正交化旋转矩阵
179
+
180
+ Args:
181
+ R: (3, 3) 近似旋转矩阵
182
+
183
+ Returns:
184
+ R_orth: (3, 3) 正交化后的旋转矩阵
185
+ """
186
+ U, _, Vt = np.linalg.svd(R)
187
+ R_orth = U @ Vt
188
+ if np.linalg.det(R_orth) < 0:
189
+ U[:, -1] *= -1
190
+ R_orth = U @ Vt
191
+ return R_orth
core/depth_estimation.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Depth Pro Wrapper(移植自原版 ERPT)
3
+
4
+ 封装 Apple Depth Pro 单目深度估计模型。
5
+
6
+ API 使用说明:
7
+ 1. 使用 depth_pro.create_model_and_transforms() 创建模型和预处理 transforms
8
+ 2. 输入 RGB 图像 (PIL Image 或 numpy array)
9
+ 3. 调用 model.infer(image, f_px=focal_length) 得到深度
10
+ 4. 输出 depth 单位为米 (m)
11
+
12
+ 深度定义:
13
+ - Depth Pro 输出的是透视相机的 z-depth (沿相机前向轴的深度)
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import os
18
+ import sys
19
+ from pathlib import Path
20
+ from typing import Any, Dict, List, Optional, Tuple
21
+
22
+ import numpy as np
23
+ import torch
24
+ from PIL import Image
25
+
26
+ from .tangent_extraction import TangentSlice
27
+
28
+ # 模型缓存,避免重复加载
29
+ _MODEL_CACHE: Dict[str, Tuple[torch.nn.Module, Any]] = {}
30
+
31
+
32
+ def _get_precision(cfg: Dict[str, Any]) -> torch.dtype:
33
+ """获取计算精度"""
34
+ prec = cfg.get("depth_pro", {}).get("precision", "fp16")
35
+ if prec == "fp16":
36
+ return torch.float16
37
+ elif prec == "bf16":
38
+ return torch.bfloat16
39
+ return torch.float32
40
+
41
+
42
+ def _load_depthpro_model(
43
+ cfg: Dict[str, Any],
44
+ device: torch.device,
45
+ ) -> Tuple[torch.nn.Module, Any]:
46
+ """
47
+ 加载 Depth Pro 模型和 transforms
48
+
49
+ Depth Pro 默认从 ./checkpoints/depth_pro.pt 加载权重,
50
+ 因此需要切换到 repo 目录加载模型。
51
+ """
52
+ dcfg = cfg.get("depth_pro", {})
53
+
54
+ # 获取 Depth Pro 仓库目录
55
+ repo_dir = Path(dcfg.get("repo_dir", "third_party/ml-depth-pro"))
56
+ if not repo_dir.is_absolute():
57
+ root = Path(str(cfg.get("_project_root", Path.cwd())))
58
+ repo_dir = root / repo_dir
59
+
60
+ checkpoint_path = repo_dir / "checkpoints" / "depth_pro.pt"
61
+
62
+ precision = _get_precision(cfg)
63
+ cache_key = f"{checkpoint_path}_{device}_{precision}"
64
+
65
+ if cache_key in _MODEL_CACHE:
66
+ return _MODEL_CACHE[cache_key]
67
+
68
+ # 添加 Depth Pro 路径到 sys.path
69
+ if repo_dir.exists():
70
+ src_path = str(repo_dir / "src")
71
+ if src_path not in sys.path:
72
+ sys.path.insert(0, src_path)
73
+ if str(repo_dir) not in sys.path:
74
+ sys.path.insert(0, str(repo_dir))
75
+
76
+ try:
77
+ import depth_pro
78
+ except ImportError as e:
79
+ raise RuntimeError(
80
+ f"Failed to import depth_pro module. "
81
+ f"Please ensure ml-depth-pro is installed at {repo_dir}\n"
82
+ f"Error: {e}"
83
+ ) from e
84
+
85
+ if not checkpoint_path.exists():
86
+ raise FileNotFoundError(
87
+ f"Depth Pro checkpoint not found: {checkpoint_path}\n"
88
+ f"Please place depth_pro.pt in {checkpoint_path.parent}"
89
+ )
90
+
91
+ print(f"[DepthPro] Loading model from {checkpoint_path}")
92
+ print(f"[DepthPro] Device: {device}, Precision: {precision}")
93
+
94
+ # 保存当前目录并切换到 repo_dir(Depth Pro 默认从 ./checkpoints 加载)
95
+ original_cwd = os.getcwd()
96
+ try:
97
+ os.chdir(repo_dir)
98
+
99
+ # 使用官方 API 加载模型
100
+ model, transform = depth_pro.create_model_and_transforms(
101
+ device=device,
102
+ precision=precision,
103
+ )
104
+
105
+ model.eval()
106
+ print(f"[DepthPro] Model loaded successfully")
107
+
108
+ finally:
109
+ os.chdir(original_cwd)
110
+
111
+ _MODEL_CACHE[cache_key] = (model, transform)
112
+ return model, transform
113
+
114
+
115
+ class DepthEstimator:
116
+ """
117
+ Depth Pro 深度估计器封装类
118
+
119
+ 提供统一的接口用于批量深度估计。
120
+ """
121
+
122
+ def __init__(self, cfg: Dict[str, Any], device: torch.device):
123
+ self.cfg = cfg
124
+ self.device = device
125
+ self.model, self.transform = _load_depthpro_model(cfg, device)
126
+ self.pass_f_px = bool(cfg.get("depth_pro", {}).get("pass_f_px", True))
127
+
128
+ @torch.no_grad()
129
+ def predict_single(self, rgb: np.ndarray, f_px: Optional[float] = None) -> np.ndarray:
130
+ """
131
+ 单张图像深度预测
132
+
133
+ Args:
134
+ rgb: (H, W, 3) uint8 numpy array
135
+ f_px: 可选的 focal length (像素)
136
+
137
+ Returns:
138
+ (H, W) float32 numpy array, 单位米
139
+ """
140
+ pil_img = Image.fromarray(rgb.astype(np.uint8))
141
+ img_tensor = self.transform(pil_img)
142
+
143
+ f_px_tensor = None
144
+ if f_px is not None and self.pass_f_px:
145
+ f_px_tensor = torch.tensor([f_px], device=self.device)
146
+
147
+ prediction = self.model.infer(img_tensor, f_px=f_px_tensor)
148
+ return prediction["depth"].detach().cpu().float().numpy().astype(np.float32)
149
+
150
+
151
+ def estimate_all_tangent_depths(
152
+ tangent_rgbs: Dict[str, np.ndarray],
153
+ slices: List[TangentSlice],
154
+ cfg: Dict[str, Any],
155
+ device: torch.device,
156
+ ) -> Dict[str, np.ndarray]:
157
+ """
158
+ 对所有切片估计深度
159
+
160
+ Args:
161
+ tangent_rgbs: {slice_id: rgb_array} 字典
162
+ slices: 切片规格列表
163
+ cfg: 配置字典
164
+ device: 计算设备
165
+
166
+ Returns:
167
+ tangent_depths: {slice_id: depth_array} 字典
168
+ """
169
+ estimator = DepthEstimator(cfg, device)
170
+
171
+ # 建立 slice_id -> f_px 映射
172
+ f_px_map = {s.slice_id: s.f_px for s in slices}
173
+
174
+ results = {}
175
+ total = len(tangent_rgbs)
176
+
177
+ for i, (slice_id, rgb) in enumerate(tangent_rgbs.items()):
178
+ f_px = f_px_map.get(slice_id)
179
+ depth = estimator.predict_single(rgb, f_px=f_px)
180
+ results[slice_id] = depth
181
+
182
+ print(f" [{i+1}/{total}] {slice_id}: "
183
+ f"depth range [{depth.min():.2f}, {depth.max():.2f}] m")
184
+
185
+ return results
core/depth_fusion.py ADDED
@@ -0,0 +1,769 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tangent Depth -> ERP Depth 融合模块(完整移植自原版 ERPT)
3
+
4
+ 核心功能:
5
+ 1. 将每个切片的深度回投影到 ERP
6
+ 2. 使用 cosine 权重实现平滑融合(无块状边界)
7
+ 3. 使用 softmin(1/depth) 处理重叠区深度竞争
8
+ 4. 极区增强处理
9
+ 5. Multiband 金字塔融合(消除接缝)
10
+ 6. Pole consistency 极区深度对齐
11
+ 7. Z-buffer 门控投影(保持边缘锐利)
12
+
13
+ 关键算法:
14
+ - Cosine 权重: w_face = max(0, dot(ray, face_center))^k
15
+ - Depth 竞争: softmin(1/depth) 确保近处优先且平滑过渡
16
+ - Forward splatting 将切片像素投影到 ERP
17
+ - Multiband: Gaussian/Laplacian 金字塔融合
18
+
19
+ 输出:
20
+ - depth_range: ERP range depth (float32, meters)
21
+ - weight_sum: 权重和(用于 debug)
22
+ - valid_mask: 有效掩码
23
+ """
24
+ from __future__ import annotations
25
+
26
+ import math
27
+ from pathlib import Path
28
+ from typing import Any, Dict, List, Optional, Tuple
29
+
30
+ import numpy as np
31
+ import torch
32
+
33
+ from .tangent_extraction import TangentSlice
34
+
35
+
36
+ # =============================================================================
37
+ # 基础工具函数
38
+ # =============================================================================
39
+
40
+ def compute_cosine_weight(
41
+ ray_dirs: torch.Tensor,
42
+ face_center: torch.Tensor,
43
+ k: float = 4.0,
44
+ ) -> torch.Tensor:
45
+ """
46
+ 计算 cosine 权重: w = max(0, dot(ray, face_center))^k
47
+ """
48
+ dots = torch.sum(ray_dirs * face_center.view(1, 1, 3), dim=-1)
49
+ weights = torch.clamp(dots, min=0.0) ** k
50
+ return weights
51
+
52
+
53
+ def _dirs_to_erp_uv(
54
+ dirs_world: torch.Tensor,
55
+ erp_h: int,
56
+ erp_w: int,
57
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
58
+ """将世界坐标方向转换为 ERP 像素坐标"""
59
+ x = dirs_world[..., 0]
60
+ y = dirs_world[..., 1]
61
+ z = dirs_world[..., 2]
62
+
63
+ lon = torch.atan2(x, z)
64
+ lat = torch.asin(torch.clamp(y, -1.0, 1.0))
65
+
66
+ u = (lon + math.pi) / (2.0 * math.pi) * float(erp_w)
67
+ u = torch.remainder(u, float(erp_w))
68
+ v = (math.pi / 2.0 - lat) / math.pi * float(erp_h - 1)
69
+ v = torch.clamp(v, 0.0, float(erp_h - 1))
70
+
71
+ return u, v
72
+
73
+
74
+ # =============================================================================
75
+ # 极区权重处理
76
+ # =============================================================================
77
+
78
+ def _apply_pole_weights(
79
+ slice_type: str,
80
+ dirs_world: torch.Tensor,
81
+ base_weight: torch.Tensor,
82
+ fusion_cfg: Dict[str, Any],
83
+ ) -> torch.Tensor:
84
+ """极区权重门控与增强"""
85
+ # pole_ring gating
86
+ pole_ring_cfg = fusion_cfg.get("pole_ring", {})
87
+ pole_ring_enabled = bool(pole_ring_cfg.get("enabled", True))
88
+ pole_ring_min_lat_deg = float(pole_ring_cfg.get("min_latitude_deg", 60.0))
89
+ pole_ring_ramp_deg = float(pole_ring_cfg.get("ramp_deg", 10.0))
90
+
91
+ if slice_type == "pole_ring":
92
+ if not pole_ring_enabled:
93
+ return torch.zeros_like(base_weight)
94
+ lat = torch.asin(torch.clamp(dirs_world[..., 1], -1.0, 1.0)) * (180.0 / math.pi)
95
+ abs_lat = torch.abs(lat)
96
+ ramp = torch.clamp(
97
+ (abs_lat - pole_ring_min_lat_deg) / max(pole_ring_ramp_deg, 1e-3),
98
+ min=0.0, max=1.0,
99
+ )
100
+ return base_weight * ramp
101
+
102
+ # pole caps progressive boost
103
+ pole_boost = bool(fusion_cfg.get("pole_boost", True))
104
+ pole_boost_factor = float(fusion_cfg.get("pole_boost_factor", 1.5))
105
+ pole_latitude_deg = float(fusion_cfg.get("pole_latitude_deg", 75.0))
106
+ pole_ramp_deg = float(fusion_cfg.get("pole_ramp_deg", 10.0))
107
+
108
+ if pole_boost and slice_type in ("pole_north", "pole_south"):
109
+ lat = torch.asin(torch.clamp(dirs_world[..., 1], -1.0, 1.0)) * (180.0 / math.pi)
110
+ abs_lat = torch.abs(lat)
111
+ ramp = torch.clamp(
112
+ (abs_lat - pole_latitude_deg) / max(pole_ramp_deg, 1e-3),
113
+ min=0.0, max=1.0,
114
+ )
115
+ mult = 1.0 + ramp * (pole_boost_factor - 1.0)
116
+ return base_weight * mult
117
+
118
+ # faces 在极区衰减
119
+ face_pole_cfg = fusion_cfg.get("face_pole_suppress", {})
120
+ if slice_type == "face" and bool(face_pole_cfg.get("enabled", True)):
121
+ min_lat = float(face_pole_cfg.get("min_latitude_deg", 70.0))
122
+ ramp_deg = float(face_pole_cfg.get("ramp_deg", 10.0))
123
+ min_scale = float(face_pole_cfg.get("min_scale", 0.4))
124
+ lat = torch.asin(torch.clamp(dirs_world[..., 1], -1.0, 1.0)) * (180.0 / math.pi)
125
+ abs_lat = torch.abs(lat)
126
+ t = torch.clamp((abs_lat - min_lat) / max(ramp_deg, 1e-3), 0.0, 1.0)
127
+ scale = 1.0 - t * (1.0 - min_scale)
128
+ return base_weight * scale
129
+
130
+ return base_weight
131
+
132
+
133
+ # =============================================================================
134
+ # Forward splatting(softmin_invdepth 模式用)
135
+ # =============================================================================
136
+
137
+ def _forward_splat(
138
+ erp_h: int,
139
+ erp_w: int,
140
+ u: torch.Tensor,
141
+ v: torch.Tensor,
142
+ range_depth: torch.Tensor,
143
+ weight: torch.Tensor,
144
+ accum_weighted_invdepth: torch.Tensor,
145
+ accum_weight: torch.Tensor,
146
+ depth_competition: str,
147
+ softmin_alpha: float,
148
+ pole_boost: bool,
149
+ pole_boost_factor: float,
150
+ pole_latitude_deg: float,
151
+ ) -> None:
152
+ """Forward splatting with bilinear interpolation"""
153
+ u_flat = u.reshape(-1)
154
+ v_flat = v.reshape(-1)
155
+ d_flat = range_depth.reshape(-1)
156
+ w_flat = weight.reshape(-1)
157
+
158
+ valid = torch.isfinite(d_flat) & (d_flat > 0.0) & torch.isfinite(w_flat) & (w_flat > 0.0)
159
+
160
+ u0 = torch.floor(u_flat).to(torch.int64)
161
+ v0 = torch.floor(v_flat).to(torch.int64)
162
+ du = (u_flat - u0.to(u_flat.dtype)).clamp(0.0, 1.0)
163
+ dv = (v_flat - v0.to(v_flat.dtype)).clamp(0.0, 1.0)
164
+
165
+ u0_wrap = torch.remainder(u0, erp_w)
166
+ u1_wrap = torch.remainder(u0 + 1, erp_w)
167
+ v1 = v0 + 1
168
+
169
+ w00 = (1.0 - du) * (1.0 - dv)
170
+ w10 = du * (1.0 - dv)
171
+ w01 = (1.0 - du) * dv
172
+ w11 = du * dv
173
+
174
+ if depth_competition == "softmin_invdepth":
175
+ inv_depth = 1.0 / torch.clamp(d_flat, min=1e-6)
176
+ value_to_splat = inv_depth
177
+ elif depth_competition == "softmax_negdepth":
178
+ exp_weight = torch.exp(-softmin_alpha * d_flat)
179
+ w_flat = w_flat * exp_weight
180
+ value_to_splat = d_flat
181
+ else:
182
+ value_to_splat = d_flat
183
+
184
+ def _add(u_idx, v_idx, bilinear_w):
185
+ v_ok = (v_idx >= 0) & (v_idx < erp_h)
186
+ m = valid & v_ok
187
+ u_safe = torch.where(m, u_idx, torch.zeros_like(u_idx))
188
+ v_safe = torch.where(m, v_idx, torch.zeros_like(v_idx))
189
+ idx = v_safe * erp_w + u_safe
190
+ final_w = torch.where(m, bilinear_w * w_flat, torch.zeros_like(bilinear_w))
191
+ final_val = torch.where(m, bilinear_w * w_flat * value_to_splat, torch.zeros_like(bilinear_w))
192
+ accum_weight.scatter_add_(0, idx, final_w)
193
+ accum_weighted_invdepth.scatter_add_(0, idx, final_val)
194
+
195
+ _add(u0_wrap, v0, w00)
196
+ _add(u1_wrap, v0, w10)
197
+ _add(u0_wrap, v1, w01)
198
+ _add(u1_wrap, v1, w11)
199
+
200
+
201
+ # =============================================================================
202
+ # Z-buffer 门控投影(multiband 模式用)
203
+ # =============================================================================
204
+
205
+ def _project_slice_to_erp_disp_weight_zbuffer(
206
+ depth_t: torch.Tensor,
207
+ slice_spec: TangentSlice,
208
+ cfg: Dict[str, Any],
209
+ erp_h: int,
210
+ erp_w: int,
211
+ depth_def: str,
212
+ k: float,
213
+ device: torch.device,
214
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
215
+ """
216
+ 将单个切片投影到 ERP,输出 disparity(1/range) 与 weight。
217
+ 采用 per-slice z-buffer(min depth)避免同一 slice 内的边缘被平均糊掉。
218
+ """
219
+ fusion_cfg = cfg.get("fusion", {})
220
+ weight_mode = str(fusion_cfg.get("weight_mode", "cosine"))
221
+
222
+ res = slice_spec.resolution
223
+ K = slice_spec.K
224
+ R_cw = slice_spec.R_cw
225
+
226
+ fx, fy = float(K[0, 0]), float(K[1, 1])
227
+ cx, cy = float(K[0, 2]), float(K[1, 2])
228
+
229
+ xs = torch.arange(res, device=device, dtype=torch.float32)
230
+ ys = torch.arange(res, device=device, dtype=torch.float32)
231
+ yv, xv = torch.meshgrid(ys, xs, indexing="ij")
232
+
233
+ x_cam = (xv - cx) / fx
234
+ y_cam = -(yv - cy) / fy
235
+ z_cam = torch.ones_like(x_cam)
236
+
237
+ dirs_cam = torch.stack([x_cam, y_cam, z_cam], dim=-1)
238
+ ray_len = torch.norm(dirs_cam, dim=-1, keepdim=True).clamp(min=1e-9)
239
+ dirs_cam = dirs_cam / ray_len
240
+
241
+ R = torch.tensor(R_cw, device=device, dtype=torch.float32)
242
+ dirs_world = torch.einsum("ij,hwj->hwi", R, dirs_cam)
243
+
244
+ # range depth
245
+ if depth_def == "z":
246
+ range_depth = depth_t * ray_len.squeeze(-1)
247
+ else:
248
+ range_depth = depth_t
249
+
250
+ u, v = _dirs_to_erp_uv(dirs_world, erp_h, erp_w)
251
+
252
+ if weight_mode == "cosine":
253
+ face_center = torch.tensor(slice_spec.center_dir, device=device, dtype=torch.float32)
254
+ base_w = compute_cosine_weight(dirs_world, face_center, k=k)
255
+ else:
256
+ base_w = torch.ones_like(range_depth)
257
+
258
+ base_w = _apply_pole_weights(slice_spec.slice_type, dirs_world, base_w, fusion_cfg)
259
+
260
+ u_flat = u.reshape(-1)
261
+ v_flat = v.reshape(-1)
262
+ d_flat = range_depth.reshape(-1)
263
+ w_flat = base_w.reshape(-1)
264
+
265
+ valid = torch.isfinite(d_flat) & (d_flat > 0.0) & torch.isfinite(w_flat) & (w_flat > 0.0)
266
+
267
+ u0 = torch.floor(u_flat).to(torch.int64)
268
+ v0 = torch.floor(v_flat).to(torch.int64)
269
+ du = (u_flat - u0.float()).clamp(0.0, 1.0)
270
+ dv = (v_flat - v0.float()).clamp(0.0, 1.0)
271
+
272
+ u0w = torch.remainder(u0, erp_w)
273
+ u1w = torch.remainder(u0 + 1, erp_w)
274
+ v1 = v0 + 1
275
+
276
+ bw00 = (1.0 - du) * (1.0 - dv)
277
+ bw10 = du * (1.0 - dv)
278
+ bw01 = (1.0 - du) * dv
279
+ bw11 = du * dv
280
+
281
+ # Pass A: min depth
282
+ min_depth = torch.full((erp_h * erp_w,), float("inf"), device=device, dtype=torch.float32)
283
+
284
+ def _amin(ui, vi, bw):
285
+ m = valid & (vi >= 0) & (vi < erp_h)
286
+ ui_safe = torch.where(m, ui, torch.zeros_like(ui))
287
+ vi_safe = torch.where(m, vi, torch.zeros_like(vi))
288
+ idx = vi_safe * erp_w + ui_safe
289
+ cand = torch.where(m, d_flat, torch.full_like(d_flat, float("inf")))
290
+ min_depth.scatter_reduce_(0, idx, cand, reduce="amin", include_self=True)
291
+
292
+ _amin(u0w, v0, bw00)
293
+ _amin(u1w, v0, bw10)
294
+ _amin(u0w, v1, bw01)
295
+ _amin(u1w, v1, bw11)
296
+
297
+ # Pass B: accumulate disparity near min depth
298
+ disp_acc = torch.zeros(erp_h * erp_w, device=device, dtype=torch.float32)
299
+ w_acc = torch.zeros(erp_h * erp_w, device=device, dtype=torch.float32)
300
+
301
+ eps_abs = float(fusion_cfg.get("project_zbuffer_eps_abs_m", 0.02))
302
+ eps_rel = float(fusion_cfg.get("project_zbuffer_eps_rel", 0.02))
303
+
304
+ inv_d = 1.0 / torch.clamp(d_flat, min=1e-6)
305
+
306
+ def _acc(ui, vi, bw):
307
+ m = valid & (vi >= 0) & (vi < erp_h)
308
+ ui_safe = torch.where(m, ui, torch.zeros_like(ui))
309
+ vi_safe = torch.where(m, vi, torch.zeros_like(vi))
310
+ idx = vi_safe * erp_w + ui_safe
311
+ md = min_depth.gather(0, idx)
312
+ gate = d_flat <= (md * (1.0 + eps_rel) + eps_abs)
313
+ mm = m & gate
314
+ w_here = torch.where(mm, bw * w_flat, torch.zeros_like(bw))
315
+ disp_here = torch.where(mm, w_here * inv_d, torch.zeros_like(w_here))
316
+ w_acc.scatter_add_(0, idx, w_here)
317
+ disp_acc.scatter_add_(0, idx, disp_here)
318
+
319
+ _acc(u0w, v0, bw00)
320
+ _acc(u1w, v0, bw10)
321
+ _acc(u0w, v1, bw01)
322
+ _acc(u1w, v1, bw11)
323
+
324
+ w_map = w_acc.view(erp_h, erp_w)
325
+ disp_map = torch.zeros_like(w_map)
326
+ m = w_map > 1e-9
327
+ disp_map[m] = disp_acc.view(erp_h, erp_w)[m] / w_map[m]
328
+ return disp_map, w_map
329
+
330
+
331
+ # =============================================================================
332
+ # Multiband 金字塔工具
333
+ # =============================================================================
334
+
335
+ def _pad_circular_w(x: torch.Tensor, pad: int) -> torch.Tensor:
336
+ if pad <= 0:
337
+ return x
338
+ return torch.cat([x[..., -pad:], x, x[..., :pad]], dim=-1)
339
+
340
+
341
+ def _gauss5_kernel(device: torch.device, dtype: torch.dtype) -> torch.Tensor:
342
+ k1 = torch.tensor([1.0, 4.0, 6.0, 4.0, 1.0], device=device, dtype=dtype)
343
+ k1 = k1 / k1.sum()
344
+ k2 = (k1[:, None] * k1[None, :]).view(1, 1, 5, 5)
345
+ return k2
346
+
347
+
348
+ def _blur_circular_w(x: torch.Tensor, kernel: torch.Tensor) -> torch.Tensor:
349
+ import torch.nn.functional as F
350
+ pad = kernel.shape[-1] // 2
351
+ xw = _pad_circular_w(x, pad)
352
+ xwh = F.pad(xw, (0, 0, pad, pad), mode="reflect")
353
+ return F.conv2d(xwh, kernel)
354
+
355
+
356
+ def _down2(x: torch.Tensor) -> torch.Tensor:
357
+ return x[..., ::2, ::2]
358
+
359
+
360
+ def _upsample2_circular_w(x: torch.Tensor, out_h: int, out_w: int) -> torch.Tensor:
361
+ import torch.nn.functional as F
362
+ x3 = torch.cat([x, x, x], dim=-1)
363
+ y3 = F.interpolate(x3, size=(out_h, out_w * 3), mode="bilinear", align_corners=False)
364
+ return y3[..., out_w: 2 * out_w]
365
+
366
+
367
+ # =============================================================================
368
+ # 主融合函数
369
+ # =============================================================================
370
+
371
+ @torch.no_grad()
372
+ def fuse_tangent_depths_to_erp(
373
+ tangent_depths: Dict[str, np.ndarray],
374
+ slices: List[TangentSlice],
375
+ cfg: Dict[str, Any],
376
+ device: torch.device,
377
+ debug_dir: Optional[Path] = None,
378
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
379
+ """
380
+ 将所有切片深度融合为 ERP range depth
381
+
382
+ Args:
383
+ tangent_depths: {slice_id: depth_array}
384
+ slices: 切片规格列表
385
+ cfg: 配置字典
386
+ device: 计算设备
387
+
388
+ Returns:
389
+ depth_range: (erp_h, erp_w) ERP range depth, float32
390
+ weight_sum: (erp_h, erp_w) 权重和
391
+ valid_mask: (erp_h, erp_w) uint8
392
+ """
393
+ erp_cfg = cfg.get("erp", {})
394
+ erp_h = int(erp_cfg.get("height", 1024))
395
+ erp_w = int(erp_cfg.get("width", 2048))
396
+
397
+ fusion_cfg = cfg.get("fusion", {})
398
+ blend_mode = str(fusion_cfg.get("blend_mode", "softmin_invdepth"))
399
+
400
+ if blend_mode == "multiband":
401
+ depth_np, weight_np, valid_np = _fuse_multiband(
402
+ tangent_depths, slices, cfg, device, erp_h, erp_w, debug_dir,
403
+ )
404
+ else:
405
+ depth_np, weight_np, valid_np = _fuse_softmin(
406
+ tangent_depths, slices, cfg, device, erp_h, erp_w,
407
+ )
408
+
409
+ # output_scale 校正
410
+ output_scale = float(fusion_cfg.get("output_scale", 1.0))
411
+ if output_scale != 1.0:
412
+ valid = np.isfinite(depth_np) & (depth_np > 0)
413
+ depth_np[valid] *= output_scale
414
+
415
+ return depth_np, weight_np, valid_np
416
+
417
+
418
+ def _fuse_softmin(
419
+ tangent_depths: Dict[str, np.ndarray],
420
+ slices: List[TangentSlice],
421
+ cfg: Dict[str, Any],
422
+ device: torch.device,
423
+ erp_h: int,
424
+ erp_w: int,
425
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
426
+ """softmin_invdepth 模式融合"""
427
+ fusion_cfg = cfg.get("fusion", {})
428
+ weight_mode = str(fusion_cfg.get("weight_mode", "cosine"))
429
+ k = float(fusion_cfg.get("k", 4.0))
430
+ depth_competition = str(fusion_cfg.get("depth_competition", "softmin_invdepth"))
431
+ softmin_alpha = float(fusion_cfg.get("softmin_alpha", 10.0))
432
+ min_weight_sum = float(fusion_cfg.get("min_weight_sum", 1e-6))
433
+ pole_boost = bool(fusion_cfg.get("pole_boost", True))
434
+ pole_boost_factor = float(fusion_cfg.get("pole_boost_factor", 1.5))
435
+ pole_latitude_deg = float(fusion_cfg.get("pole_latitude_deg", 75.0))
436
+ pole_ring_cfg = fusion_cfg.get("pole_ring", {})
437
+ pole_ring_enabled = bool(pole_ring_cfg.get("enabled", True))
438
+ pole_ring_min_lat_deg = float(pole_ring_cfg.get("min_latitude_deg", 60.0))
439
+ pole_ring_ramp_deg = float(pole_ring_cfg.get("ramp_deg", 5.0))
440
+ depth_def = str(cfg.get("depth_pro", {}).get("depth_def", "z"))
441
+
442
+ accum_weighted_invdepth = torch.zeros(erp_h * erp_w, device=device, dtype=torch.float32)
443
+ accum_weight = torch.zeros(erp_h * erp_w, device=device, dtype=torch.float32)
444
+
445
+ for s in slices:
446
+ if s.slice_id not in tangent_depths:
447
+ continue
448
+ depth_np = tangent_depths[s.slice_id]
449
+ depth_t = torch.from_numpy(depth_np.astype(np.float32)).to(device)
450
+
451
+ res = s.resolution
452
+ K = s.K
453
+ R_cw = s.R_cw
454
+ fx, fy = float(K[0, 0]), float(K[1, 1])
455
+ cx, cy = float(K[0, 2]), float(K[1, 2])
456
+
457
+ xs = torch.arange(res, device=device, dtype=torch.float32)
458
+ ys = torch.arange(res, device=device, dtype=torch.float32)
459
+ yv, xv = torch.meshgrid(ys, xs, indexing="ij")
460
+
461
+ x_cam = (xv - cx) / fx
462
+ y_cam = -(yv - cy) / fy
463
+ z_cam = torch.ones_like(x_cam)
464
+
465
+ dirs_cam = torch.stack([x_cam, y_cam, z_cam], dim=-1)
466
+ dirs_cam = dirs_cam / torch.clamp(torch.norm(dirs_cam, dim=-1, keepdim=True), min=1e-9)
467
+
468
+ R = torch.tensor(R_cw, device=device, dtype=torch.float32)
469
+ dirs_world = torch.einsum("ij,hwj->hwi", R, dirs_cam)
470
+
471
+ if depth_def == "z":
472
+ ray_length = torch.sqrt(x_cam ** 2 + y_cam ** 2 + 1.0)
473
+ range_depth = depth_t * ray_length
474
+ else:
475
+ range_depth = depth_t
476
+
477
+ u, v = _dirs_to_erp_uv(dirs_world, erp_h, erp_w)
478
+
479
+ if weight_mode == "cosine":
480
+ face_center = torch.tensor(s.center_dir, device=device, dtype=torch.float32)
481
+ base_weight = compute_cosine_weight(dirs_world, face_center, k=k)
482
+ else:
483
+ base_weight = torch.ones_like(range_depth)
484
+
485
+ if s.slice_type == "pole_ring":
486
+ if not pole_ring_enabled:
487
+ base_weight = torch.zeros_like(base_weight)
488
+ else:
489
+ lat = torch.asin(torch.clamp(dirs_world[..., 1], -1.0, 1.0)) * (180.0 / math.pi)
490
+ abs_lat = torch.abs(lat)
491
+ ramp = torch.clamp(
492
+ (abs_lat - pole_ring_min_lat_deg) / max(pole_ring_ramp_deg, 1e-3),
493
+ min=0.0, max=1.0,
494
+ )
495
+ base_weight = base_weight * ramp
496
+
497
+ if pole_boost and s.slice_type in ("pole_north", "pole_south"):
498
+ base_weight = base_weight * pole_boost_factor
499
+
500
+ _forward_splat(
501
+ erp_h, erp_w, u, v, range_depth, base_weight,
502
+ accum_weighted_invdepth, accum_weight,
503
+ depth_competition, softmin_alpha,
504
+ pole_boost, pole_boost_factor, pole_latitude_deg,
505
+ )
506
+
507
+ valid_mask_t = accum_weight > min_weight_sum
508
+
509
+ if depth_competition == "softmin_invdepth":
510
+ avg_invdepth = torch.zeros_like(accum_weighted_invdepth)
511
+ avg_invdepth[valid_mask_t] = accum_weighted_invdepth[valid_mask_t] / accum_weight[valid_mask_t]
512
+ depth_out = torch.zeros_like(avg_invdepth)
513
+ depth_out[valid_mask_t] = 1.0 / torch.clamp(avg_invdepth[valid_mask_t], min=1e-6)
514
+ else:
515
+ depth_out = torch.zeros_like(accum_weighted_invdepth)
516
+ depth_out[valid_mask_t] = accum_weighted_invdepth[valid_mask_t] / accum_weight[valid_mask_t]
517
+
518
+ depth_out[~valid_mask_t] = float("nan")
519
+
520
+ depth_out = depth_out.reshape(erp_h, erp_w)
521
+ weight_sum = accum_weight.reshape(erp_h, erp_w)
522
+ valid_mask = valid_mask_t.reshape(erp_h, erp_w)
523
+
524
+ return (
525
+ depth_out.cpu().numpy().astype(np.float32),
526
+ weight_sum.cpu().numpy().astype(np.float32),
527
+ valid_mask.cpu().numpy().astype(np.uint8),
528
+ )
529
+
530
+
531
+ def _fuse_multiband(
532
+ tangent_depths: Dict[str, np.ndarray],
533
+ slices: List[TangentSlice],
534
+ cfg: Dict[str, Any],
535
+ device: torch.device,
536
+ erp_h: int,
537
+ erp_w: int,
538
+ debug_dir: Optional[Path] = None,
539
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
540
+ """Multiband 金字塔融合"""
541
+ fusion_cfg = cfg.get("fusion", {})
542
+ mb_cfg = fusion_cfg.get("multiband", {})
543
+ levels = int(mb_cfg.get("levels", 6))
544
+ highfreq_levels = int(mb_cfg.get("highfreq_levels", 2))
545
+ eps = float(mb_cfg.get("eps", 1e-6))
546
+ min_weight_sum = float(fusion_cfg.get("min_weight_sum", 1e-6))
547
+
548
+ depth_def = str(cfg.get("depth_pro", {}).get("depth_def", "z"))
549
+ k = float(fusion_cfg.get("k", 4.0))
550
+
551
+ # Pole consistency 配置
552
+ pole_cons_cfg = fusion_cfg.get("pole_consistency", {})
553
+ if not isinstance(pole_cons_cfg, dict):
554
+ pole_cons_cfg = {}
555
+ pole_cons_enabled = bool(pole_cons_cfg.get("enabled", False))
556
+ pole_cons_min_lat_deg = float(pole_cons_cfg.get("min_latitude_deg", 60.0))
557
+ pole_cons_min_overlap = int(pole_cons_cfg.get("min_overlap_pixels", 4000))
558
+ pole_cons_max_abs_log_shift = float(pole_cons_cfg.get("max_abs_log_shift", 0.7))
559
+ pole_cons_ref_types = [str(x) for x in pole_cons_cfg.get("ref_slice_types", ["face", "pole_ring"])]
560
+ pole_cons_target_types = [str(x) for x in pole_cons_cfg.get("target_slice_types", ["pole_north", "pole_south"])]
561
+
562
+ top_v_max = int(math.floor((90.0 - pole_cons_min_lat_deg) / 180.0 * float(max(erp_h - 1, 1))))
563
+ bot_v_min = int(math.ceil((90.0 + pole_cons_min_lat_deg) / 180.0 * float(max(erp_h - 1, 1))))
564
+ top_v_max = max(0, min(erp_h - 1, top_v_max))
565
+ bot_v_min = max(0, min(erp_h - 1, bot_v_min))
566
+
567
+ ref_num_top = ref_den_top = ref_num_bot = ref_den_bot = None
568
+ pole_pending: List[TangentSlice] = []
569
+ if pole_cons_enabled:
570
+ ref_num_top = torch.zeros((top_v_max + 1, erp_w), device=device, dtype=torch.float32)
571
+ ref_den_top = torch.zeros_like(ref_num_top)
572
+ ref_num_bot = torch.zeros((erp_h - bot_v_min, erp_w), device=device, dtype=torch.float32)
573
+ ref_den_bot = torch.zeros_like(ref_num_bot)
574
+
575
+ # Per-level accumulators
576
+ kernel = _gauss5_kernel(device=device, dtype=torch.float32)
577
+
578
+ Hs = [erp_h]
579
+ Ws = [erp_w]
580
+ for _ in range(1, levels):
581
+ Hs.append(max(1, Hs[-1] // 2))
582
+ Ws.append(max(1, Ws[-1] // 2))
583
+
584
+ fused_lap: List[torch.Tensor] = []
585
+ best_w: List[torch.Tensor] = []
586
+ sum_w: List[torch.Tensor] = []
587
+ sum_w_lap: List[torch.Tensor] = []
588
+
589
+ for l in range(levels):
590
+ shape = (1, 1, Hs[l], Ws[l])
591
+ if l < highfreq_levels:
592
+ fused_lap.append(torch.zeros(shape, device=device, dtype=torch.float32))
593
+ best_w.append(torch.zeros(shape, device=device, dtype=torch.float32))
594
+ else:
595
+ fused_lap.append(torch.zeros(shape, device=device, dtype=torch.float32))
596
+ sum_w.append(torch.zeros(shape, device=device, dtype=torch.float32))
597
+ sum_w_lap.append(torch.zeros(shape, device=device, dtype=torch.float32))
598
+
599
+ weight_sum0 = torch.zeros(erp_h, erp_w, device=device, dtype=torch.float32)
600
+
601
+ def _process_one_slice(s: TangentSlice, depth_np: np.ndarray):
602
+ depth_t = torch.from_numpy(depth_np.astype(np.float32)).to(device)
603
+
604
+ disp0, w0 = _project_slice_to_erp_disp_weight_zbuffer(
605
+ depth_t, s, cfg, erp_h, erp_w, depth_def, k, device,
606
+ )
607
+ return disp0, w0
608
+
609
+ def _blend_into_pyramid(disp0: torch.Tensor, w0: torch.Tensor):
610
+ nonlocal weight_sum0
611
+ weight_sum0 += w0
612
+
613
+ disp_pyr = [disp0.unsqueeze(0).unsqueeze(0)]
614
+ w_pyr = [w0.unsqueeze(0).unsqueeze(0)]
615
+
616
+ for l in range(1, levels):
617
+ num = _blur_circular_w(disp_pyr[l - 1] * w_pyr[l - 1], kernel)
618
+ den = _blur_circular_w(w_pyr[l - 1], kernel)
619
+ num_ds = _down2(num)
620
+ den_ds = _down2(den)
621
+ disp_ds = num_ds / torch.clamp(den_ds, min=eps)
622
+ disp_pyr.append(disp_ds)
623
+ w_pyr.append(den_ds)
624
+
625
+ lap_pyr: List[torch.Tensor] = []
626
+ for l in range(levels - 1):
627
+ up = _upsample2_circular_w(disp_pyr[l + 1], Hs[l], Ws[l])
628
+ lap_pyr.append(disp_pyr[l] - up)
629
+ lap_pyr.append(disp_pyr[-1])
630
+
631
+ for l in range(levels):
632
+ wl = w_pyr[l]
633
+ Ll = lap_pyr[l]
634
+ if l < highfreq_levels:
635
+ better = wl > best_w[l]
636
+ fused_lap[l] = torch.where(better, Ll, fused_lap[l])
637
+ best_w[l] = torch.where(better, wl, best_w[l])
638
+ else:
639
+ idx = l - highfreq_levels
640
+ sum_w_lap[idx] += wl * Ll
641
+ sum_w[idx] += wl
642
+
643
+ # Process non-pole slices first
644
+ for s in slices:
645
+ if s.slice_id not in tangent_depths:
646
+ continue
647
+ if pole_cons_enabled and (s.slice_type in pole_cons_target_types):
648
+ pole_pending.append(s)
649
+ continue
650
+
651
+ disp0, w0 = _process_one_slice(s, tangent_depths[s.slice_id])
652
+
653
+ # Reference accumulation for pole consistency
654
+ if pole_cons_enabled and (s.slice_type in pole_cons_ref_types):
655
+ if ref_num_top is not None and top_v_max >= 0:
656
+ ref_num_top += disp0[:top_v_max + 1] * w0[:top_v_max + 1]
657
+ ref_den_top += w0[:top_v_max + 1]
658
+ if ref_num_bot is not None and bot_v_min < erp_h:
659
+ ref_num_bot += disp0[bot_v_min:] * w0[bot_v_min:]
660
+ ref_den_bot += w0[bot_v_min:]
661
+
662
+ _blend_into_pyramid(disp0, w0)
663
+
664
+ # Pole consistency pass
665
+ if pole_cons_enabled and pole_pending and ref_num_top is not None:
666
+ ref_disp_top = ref_num_top / torch.clamp(ref_den_top, min=eps)
667
+ ref_disp_bot = ref_num_bot / torch.clamp(ref_den_bot, min=eps)
668
+
669
+ for s in pole_pending:
670
+ disp0, w0 = _process_one_slice(s, tangent_depths[s.slice_id])
671
+
672
+ try:
673
+ if s.slice_type == "pole_north":
674
+ disp_other = disp0[:top_v_max + 1]
675
+ w_other = w0[:top_v_max + 1]
676
+ disp_ref = ref_disp_top
677
+ den_ref = ref_den_top
678
+ else:
679
+ disp_other = disp0[bot_v_min:]
680
+ w_other = w0[bot_v_min:]
681
+ disp_ref = ref_disp_bot
682
+ den_ref = ref_den_bot
683
+
684
+ overlap = (w_other > 1e-9) & (den_ref > 1e-9) & (disp_other > eps) & (disp_ref > eps)
685
+ n_overlap = int(overlap.sum().item())
686
+ if n_overlap >= pole_cons_min_overlap:
687
+ log_ref = -torch.log(disp_ref[overlap].clamp(min=eps))
688
+ log_other = -torch.log(disp_other[overlap].clamp(min=eps))
689
+ shift = float(torch.median(log_ref - log_other).item())
690
+ shift = max(-pole_cons_max_abs_log_shift, min(pole_cons_max_abs_log_shift, shift))
691
+ disp0 = disp0 * float(math.exp(-shift))
692
+ except Exception:
693
+ pass
694
+
695
+ _blend_into_pyramid(disp0, w0)
696
+
697
+ # Finalize lowfreq levels
698
+ for l in range(highfreq_levels, levels):
699
+ idx = l - highfreq_levels
700
+ fused_lap[l] = sum_w_lap[idx] / torch.clamp(sum_w[idx], min=eps)
701
+
702
+ # Reconstruct fused disparity
703
+ disp = fused_lap[-1]
704
+ for l in range(levels - 2, -1, -1):
705
+ disp = _upsample2_circular_w(disp, Hs[l], Ws[l]) + fused_lap[l]
706
+
707
+ disp0_fused = disp.squeeze(0).squeeze(0)
708
+ depth = torch.zeros_like(disp0_fused)
709
+ m = disp0_fused > eps
710
+ depth[m] = 1.0 / disp0_fused[m]
711
+ depth[~m] = float("nan")
712
+
713
+ weight_np = weight_sum0.detach().cpu().numpy().astype(np.float32)
714
+ depth_np = depth.detach().cpu().numpy().astype(np.float32)
715
+ valid_np = (weight_np > min_weight_sum).astype(np.uint8)
716
+
717
+ return depth_np, weight_np, valid_np
718
+
719
+
720
+ # =============================================================================
721
+ # 可视化函数
722
+ # =============================================================================
723
+
724
+ def visualize_depth(
725
+ depth: np.ndarray,
726
+ vmin: Optional[float] = None,
727
+ vmax: Optional[float] = None,
728
+ ) -> np.ndarray:
729
+ """
730
+ 可视化深度图(percentile + TURBO colormap)
731
+
732
+ Returns:
733
+ vis: (H, W, 3) uint8 RGB
734
+ """
735
+ d = depth.astype(np.float32).copy()
736
+ valid = np.isfinite(d) & (d > 0)
737
+
738
+ if not np.any(valid):
739
+ return np.zeros((d.shape[0], d.shape[1], 3), dtype=np.uint8)
740
+
741
+ if vmin is None:
742
+ vmin = float(np.percentile(d[valid], 2))
743
+ if vmax is None:
744
+ vmax = float(np.percentile(d[valid], 98))
745
+ vmax = max(vmax, vmin + 1e-6)
746
+
747
+ d_norm = (np.clip(d, vmin, vmax) - vmin) / (vmax - vmin)
748
+ d_norm[~valid] = 0.0
749
+ d_u8 = (d_norm * 255).astype(np.uint8)
750
+
751
+ try:
752
+ import cv2
753
+ cm = cv2.applyColorMap(d_u8, cv2.COLORMAP_TURBO)
754
+ return cv2.cvtColor(cm, cv2.COLOR_BGR2RGB)
755
+ except ImportError:
756
+ return np.stack([d_u8, d_u8, d_u8], axis=-1)
757
+
758
+
759
+ def save_depth_visualization(
760
+ depth: np.ndarray,
761
+ output_path: Path,
762
+ vmin: Optional[float] = None,
763
+ vmax: Optional[float] = None,
764
+ ) -> None:
765
+ """保存深度可视化图像"""
766
+ import cv2
767
+ vis = visualize_depth(depth, vmin=vmin, vmax=vmax)
768
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
769
+ cv2.imwrite(str(output_path), cv2.cvtColor(vis, cv2.COLOR_RGB2BGR))
core/erp_projection.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ERP 投影模块
3
+
4
+ ERPT_native ERP投影约定:
5
+ - 经度:lon = atan2(x, z),范围 [-π, π]
6
+ - 纬度:lat = asin(y),范围 [-π/2, π/2]
7
+ - 像素坐标:u ∈ [0, W), v ∈ [0, H)
8
+ - 图像中心 (u=W/2, v=H/2) 对应 (lon=0, lat=0),看向 +Z
9
+ - 图像顶部 (v=0) 对应 lat=+π/2,看向 +Y(上)
10
+ - 图像底部 (v=H-1) 对应 lat=-π/2,看向 -Y(下)
11
+ - 图像右边 lon增加,对应 +X 方向
12
+
13
+ 像素到经纬度映射:
14
+ lon = (u / W) * 2π - π
15
+ lat = π/2 - (v / (H-1)) * π
16
+
17
+ 方向向量(相机坐标系,也是世界坐标系当无旋转时):
18
+ x = sin(lon) * cos(lat) # 右
19
+ y = sin(lat) # 上
20
+ z = cos(lon) * cos(lat) # 前
21
+ """
22
+
23
+ import math
24
+ import numpy as np
25
+ import torch
26
+ from typing import Tuple, Union
27
+
28
+
29
+ def erp_to_lonlat(
30
+ u: Union[np.ndarray, torch.Tensor],
31
+ v: Union[np.ndarray, torch.Tensor],
32
+ H: int,
33
+ W: int,
34
+ ) -> Tuple[Union[np.ndarray, torch.Tensor], Union[np.ndarray, torch.Tensor]]:
35
+ """
36
+ ERP像素坐标转经纬度
37
+
38
+ Args:
39
+ u: 水平像素坐标,范围 [0, W)
40
+ v: 垂直像素坐标,范围 [0, H)
41
+ H: 图像高度
42
+ W: 图像宽度
43
+
44
+ Returns:
45
+ lon: 经度,范围 [-π, π]
46
+ lat: 纬度,范围 [-π/2, π/2]
47
+ """
48
+ # lon = (u / W) * 2π - π
49
+ lon = (u / float(W)) * (2.0 * math.pi) - math.pi
50
+
51
+ # lat = π/2 - (v / (H-1)) * π
52
+ lat = (math.pi / 2.0) - (v / float(H - 1)) * math.pi
53
+
54
+ return lon, lat
55
+
56
+
57
+ def lonlat_to_erp(
58
+ lon: Union[np.ndarray, torch.Tensor],
59
+ lat: Union[np.ndarray, torch.Tensor],
60
+ H: int,
61
+ W: int,
62
+ ) -> Tuple[Union[np.ndarray, torch.Tensor], Union[np.ndarray, torch.Tensor]]:
63
+ """
64
+ 经纬度转ERP像素坐标
65
+
66
+ Args:
67
+ lon: 经度,范围 [-π, π]
68
+ lat: 纬度,范围 [-π/2, π/2]
69
+ H: 图像高度
70
+ W: 图像宽度
71
+
72
+ Returns:
73
+ u: 水平像素坐标
74
+ v: 垂直像素坐标
75
+ """
76
+ # u = (lon + π) / (2π) * W
77
+ u = (lon + math.pi) / (2.0 * math.pi) * float(W)
78
+
79
+ # v = (π/2 - lat) / π * (H-1)
80
+ v = (math.pi / 2.0 - lat) / math.pi * float(H - 1)
81
+
82
+ return u, v
83
+
84
+
85
+ def lonlat_to_direction(
86
+ lon: Union[np.ndarray, torch.Tensor],
87
+ lat: Union[np.ndarray, torch.Tensor],
88
+ ) -> Union[np.ndarray, torch.Tensor]:
89
+ """
90
+ 经纬度转方向向量(单位向量)
91
+
92
+ 坐标系:[X右, Y上, Z前]
93
+
94
+ Args:
95
+ lon: 经度
96
+ lat: 纬度
97
+
98
+ Returns:
99
+ dirs: (..., 3) 单位方向向量 [x, y, z]
100
+ """
101
+ if isinstance(lon, torch.Tensor):
102
+ cos_lat = torch.cos(lat)
103
+ x = torch.sin(lon) * cos_lat # 右
104
+ y = torch.sin(lat) # 上
105
+ z = torch.cos(lon) * cos_lat # 前
106
+ dirs = torch.stack([x, y, z], dim=-1)
107
+ else:
108
+ cos_lat = np.cos(lat)
109
+ x = np.sin(lon) * cos_lat
110
+ y = np.sin(lat)
111
+ z = np.cos(lon) * cos_lat
112
+ dirs = np.stack([x, y, z], axis=-1)
113
+
114
+ return dirs
115
+
116
+
117
+ def direction_to_lonlat(
118
+ dirs: Union[np.ndarray, torch.Tensor],
119
+ ) -> Tuple[Union[np.ndarray, torch.Tensor], Union[np.ndarray, torch.Tensor]]:
120
+ """
121
+ 方向向量转经纬度
122
+
123
+ Args:
124
+ dirs: (..., 3) 方向向量 [x, y, z]
125
+
126
+ Returns:
127
+ lon: 经度
128
+ lat: 纬度
129
+ """
130
+ x = dirs[..., 0]
131
+ y = dirs[..., 1]
132
+ z = dirs[..., 2]
133
+
134
+ if isinstance(dirs, torch.Tensor):
135
+ # 归一化
136
+ norm = torch.norm(dirs, dim=-1, keepdim=False)
137
+ norm = torch.clamp(norm, min=1e-9)
138
+
139
+ # lon = atan2(x, z)
140
+ lon = torch.atan2(x, z)
141
+
142
+ # lat = asin(y / norm)
143
+ y_normalized = torch.clamp(y / norm, -1.0, 1.0)
144
+ lat = torch.asin(y_normalized)
145
+ else:
146
+ norm = np.linalg.norm(dirs, axis=-1)
147
+ norm = np.maximum(norm, 1e-9)
148
+
149
+ lon = np.arctan2(x, z)
150
+ y_normalized = np.clip(y / norm, -1.0, 1.0)
151
+ lat = np.arcsin(y_normalized)
152
+
153
+ return lon, lat
154
+
155
+
156
+ def erp_to_direction(
157
+ u: Union[np.ndarray, torch.Tensor],
158
+ v: Union[np.ndarray, torch.Tensor],
159
+ H: int,
160
+ W: int,
161
+ ) -> Union[np.ndarray, torch.Tensor]:
162
+ """
163
+ ERP像素坐标转方向向量
164
+
165
+ Args:
166
+ u: 水平像素坐标
167
+ v: 垂直像素坐标
168
+ H: 图像高度
169
+ W: 图像宽度
170
+
171
+ Returns:
172
+ dirs: (..., 3) 单位方向向量 [x, y, z]
173
+ """
174
+ lon, lat = erp_to_lonlat(u, v, H, W)
175
+ return lonlat_to_direction(lon, lat)
176
+
177
+
178
+ def direction_to_erp(
179
+ dirs: Union[np.ndarray, torch.Tensor],
180
+ H: int,
181
+ W: int,
182
+ ) -> Tuple[Union[np.ndarray, torch.Tensor], Union[np.ndarray, torch.Tensor]]:
183
+ """
184
+ 方向向量转ERP像素坐标
185
+
186
+ Args:
187
+ dirs: (..., 3) 方向向量 [x, y, z]
188
+ H: 图像高度
189
+ W: 图像宽度
190
+
191
+ Returns:
192
+ u: 水平像素坐标
193
+ v: 垂直像素坐标
194
+ """
195
+ lon, lat = direction_to_lonlat(dirs)
196
+ return lonlat_to_erp(lon, lat, H, W)
197
+
198
+
199
+ def create_erp_grid(
200
+ H: int,
201
+ W: int,
202
+ device: torch.device = None,
203
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
204
+ """
205
+ 创建ERP像素网格
206
+
207
+ Args:
208
+ H: 图像高度
209
+ W: 图像宽度
210
+ device: 计算设备
211
+
212
+ Returns:
213
+ uu: (H, W) 水平坐标网格
214
+ vv: (H, W) 垂直坐标网格
215
+ """
216
+ if device is None:
217
+ device = torch.device("cpu")
218
+
219
+ us = torch.arange(W, device=device, dtype=torch.float32)
220
+ vs = torch.arange(H, device=device, dtype=torch.float32)
221
+ vv, uu = torch.meshgrid(vs, us, indexing="ij")
222
+
223
+ return uu, vv
224
+
225
+
226
+ def create_direction_grid(
227
+ H: int,
228
+ W: int,
229
+ device: torch.device = None,
230
+ ) -> torch.Tensor:
231
+ """
232
+ 创建ERP方向向量网格
233
+
234
+ Args:
235
+ H: 图像高度
236
+ W: 图像宽度
237
+ device: 计算设备
238
+
239
+ Returns:
240
+ dirs: (H, W, 3) 单位方向向量
241
+ """
242
+ uu, vv = create_erp_grid(H, W, device)
243
+ return erp_to_direction(uu, vv, H, W)
244
+
245
+
246
+ def wrap_u(u: Union[np.ndarray, torch.Tensor], W: int) -> Union[np.ndarray, torch.Tensor]:
247
+ """
248
+ 水平坐标环绕处理(ERP在水平方向是周期性的)
249
+
250
+ Args:
251
+ u: 水平像素坐标
252
+ W: 图像宽度
253
+
254
+ Returns:
255
+ u_wrapped: 环绕后的坐标,范围 [0, W)
256
+ """
257
+ if isinstance(u, torch.Tensor):
258
+ return torch.remainder(u, float(W))
259
+ else:
260
+ return np.mod(u, float(W))
261
+
262
+
263
+ def clamp_v(v: Union[np.ndarray, torch.Tensor], H: int) -> Union[np.ndarray, torch.Tensor]:
264
+ """
265
+ 垂直坐标裁剪处理
266
+
267
+ Args:
268
+ v: 垂直像素坐标
269
+ H: 图像高度
270
+
271
+ Returns:
272
+ v_clamped: 裁剪后的坐标,范围 [0, H-1]
273
+ """
274
+ if isinstance(v, torch.Tensor):
275
+ return torch.clamp(v, 0.0, float(H - 1))
276
+ else:
277
+ return np.clip(v, 0.0, float(H - 1))
core/erp_warp.py ADDED
@@ -0,0 +1,591 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ERP Forward Warp 模块(移植自原版 ERPT erp_softsplat.py)
3
+
4
+ 使用锁定的投影/坐标系接口:
5
+ - core.erp_projection: erp_to_direction, direction_to_erp, wrap_u, clamp_v
6
+ - utils.pose_utils: Pose (R_cw, R_wc, position)
7
+
8
+ 算法流程:
9
+ 1. 对每个 src ERP 像素,通过 erp_to_direction 获取射线方向
10
+ 2. 根据深度计算 3D 点,变换到目标相机坐标系
11
+ 3. 通过 direction_to_erp 投影到目标 ERP
12
+ 4. Forward splatting 累积 RGB(softmax / zbuffer / point)
13
+
14
+ 支持的 splatting 方法:
15
+ - softmax_splatting(默认):自适应半径 + 高斯核 + softmax 深度竞争
16
+ - zbuffer_splatting:两遍 z-buffer 硬遮挡
17
+ - zbuffer_point:最近邻投影
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import math
22
+ from dataclasses import dataclass
23
+ from typing import Any, Dict, Optional, Tuple
24
+
25
+ import cv2
26
+ import numpy as np
27
+ import torch
28
+
29
+ from .erp_projection import (
30
+ erp_to_direction,
31
+ direction_to_erp,
32
+ wrap_u,
33
+ create_erp_grid,
34
+ )
35
+
36
+ import sys
37
+ from pathlib import Path
38
+ sys.path.insert(0, str(Path(__file__).parent.parent))
39
+ from utils.pose_utils import Pose
40
+
41
+
42
+ @dataclass
43
+ class WarpResult:
44
+ """Warp 结果"""
45
+ warped_rgb: np.ndarray # (H, W, 3) uint8
46
+ valid_mask: np.ndarray # (H, W) uint8, 1=valid, 0=invalid
47
+ flow: Optional[np.ndarray] # (H, W, 2) float32, optical flow
48
+ weight_sum: np.ndarray # (H, W) float32
49
+ warped_depth: Optional[np.ndarray] = None # (H, W) float32, NaN=invalid
50
+
51
+
52
+ # =============================================================================
53
+ # Forward Projection(坐标变换)
54
+ # =============================================================================
55
+
56
+ @torch.no_grad()
57
+ def _forward_project(
58
+ src_depth_t: torch.Tensor,
59
+ src_pose: Pose,
60
+ tgt_pose: Pose,
61
+ erp_h: int,
62
+ erp_w: int,
63
+ device: torch.device,
64
+ uu: Optional[torch.Tensor] = None,
65
+ vv: Optional[torch.Tensor] = None,
66
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
67
+ """
68
+ 将源 ERP 像素投影到目标 ERP
69
+
70
+ 使用锁定的 erp_projection 接口进行坐标变换。
71
+
72
+ Returns:
73
+ u_tgt, v_tgt: (H, W) 目标像素坐标
74
+ range_tgt: (H, W) 目标 range depth
75
+ dirs_tgt: (H, W, 3) 目标方向向量
76
+ """
77
+ if uu is None or vv is None:
78
+ uu, vv = create_erp_grid(erp_h, erp_w, device)
79
+
80
+ # 1. 源像素 -> 方向(源相机坐标系)
81
+ dirs_src = erp_to_direction(uu, vv, erp_h, erp_w) # (H, W, 3)
82
+
83
+ # 2. 方向 * 深度 -> 源相机坐标系 3D 点
84
+ P_cam_src = dirs_src * src_depth_t.unsqueeze(-1) # (H, W, 3)
85
+
86
+ # 3. 源相机 -> 世界
87
+ R_cw_src = torch.tensor(src_pose.R_cw, device=device, dtype=torch.float32)
88
+ t_src = torch.tensor(src_pose.position, device=device, dtype=torch.float32)
89
+ P_world = torch.einsum("ij,hwj->hwi", R_cw_src, P_cam_src) + t_src
90
+
91
+ # 4. 世界 -> 目标相机
92
+ R_wc_tgt = torch.tensor(tgt_pose.R_wc, device=device, dtype=torch.float32)
93
+ t_tgt = torch.tensor(tgt_pose.position, device=device, dtype=torch.float32)
94
+ P_cam_tgt = torch.einsum("ij,hwj->hwi", R_wc_tgt, P_world - t_tgt)
95
+
96
+ # 5. 目标 range depth 和方向
97
+ range_tgt = torch.norm(P_cam_tgt, dim=-1)
98
+ dirs_tgt = P_cam_tgt / torch.clamp(range_tgt.unsqueeze(-1), min=1e-9)
99
+
100
+ # 6. 方向 -> 目标 ERP 像素
101
+ u_tgt, v_tgt = direction_to_erp(dirs_tgt, erp_h, erp_w)
102
+ u_tgt = wrap_u(u_tgt, erp_w)
103
+
104
+ return u_tgt, v_tgt, range_tgt, dirs_tgt
105
+
106
+
107
+ # =============================================================================
108
+ # Adaptive Softmax Splatting
109
+ # =============================================================================
110
+
111
+ def _adaptive_splat_rgb(
112
+ erp_h: int,
113
+ erp_w: int,
114
+ u: torch.Tensor,
115
+ v: torch.Tensor,
116
+ rgb: torch.Tensor,
117
+ depth_compete: torch.Tensor,
118
+ valid: torch.Tensor,
119
+ alpha: float,
120
+ radius: torch.Tensor,
121
+ occlusion_gate: Optional[Dict[str, Any]] = None,
122
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
123
+ """
124
+ 自适应半径 softmax splatting
125
+
126
+ - 高斯核加权
127
+ - softmax(alpha * inv_depth) 深度竞争
128
+ - 可选 occlusion gate(近似 z-buffer 门控)
129
+ """
130
+ device = u.device
131
+ u_flat = u.reshape(-1)
132
+ v_flat = v.reshape(-1)
133
+ rgb_flat = rgb.reshape(-1, 3)
134
+ d_flat = depth_compete.reshape(-1)
135
+ valid_flat = valid.reshape(-1)
136
+ r_flat = radius.reshape(-1)
137
+
138
+ # 安全深度
139
+ safe_d = torch.where(
140
+ valid_flat & torch.isfinite(d_flat) & (d_flat > 0),
141
+ d_flat, torch.ones_like(d_flat),
142
+ )
143
+
144
+ # Softmax 权重 = exp(alpha * inv_depth)
145
+ inv_d = 1.0 / torch.clamp(safe_d, min=0.1)
146
+ valid_inv = inv_d[valid_flat]
147
+ inv_max = valid_inv.max() if len(valid_inv) > 0 else inv_d.max()
148
+ exp_w = torch.exp(alpha * (inv_d - inv_max))
149
+
150
+ # 可选 occlusion gate
151
+ gate_enabled = False
152
+ min_d_flat: Optional[torch.Tensor] = None
153
+ gate_abs = 0.0
154
+ gate_rel = 0.0
155
+ if occlusion_gate and bool(occlusion_gate.get("enabled", False)):
156
+ gate_enabled = True
157
+ gate_abs = float(occlusion_gate.get("abs_eps_m", 0.05))
158
+ gate_rel = float(occlusion_gate.get("rel_eps", 0.05))
159
+ u_nn = torch.round(u_flat).to(torch.long)
160
+ v_nn = torch.round(v_flat).to(torch.long)
161
+ u_nn = torch.remainder(u_nn, erp_w)
162
+ v_ok = (v_nn >= 0) & (v_nn < erp_h)
163
+ v_nn_c = torch.clamp(v_nn, 0, erp_h - 1)
164
+ idx_nn = v_nn_c * erp_w + u_nn
165
+ min_d_flat = torch.full((erp_h * erp_w,), float("inf"), device=device)
166
+ d_nn = torch.where(valid_flat & v_ok & torch.isfinite(d_flat),
167
+ d_flat, torch.full_like(d_flat, float("inf")))
168
+ min_d_flat.scatter_reduce_(0, idx_nn, d_nn, reduce="amin", include_self=True)
169
+
170
+ accum_rgb = torch.zeros(erp_h, erp_w, 3, device=device, dtype=torch.float32)
171
+ accum_w = torch.zeros(erp_h, erp_w, device=device, dtype=torch.float32)
172
+ accum_hit = torch.zeros(erp_h, erp_w, device=device, dtype=torch.float32)
173
+ accum_d = torch.zeros(erp_h, erp_w, device=device, dtype=torch.float32)
174
+
175
+ u0 = torch.floor(u_flat).to(torch.int64)
176
+ v0 = torch.floor(v_flat).to(torch.int64)
177
+ du = (u_flat - u0.float()).clamp(0, 1)
178
+ dv = (v_flat - v0.float()).clamp(0, 1)
179
+
180
+ # Splat 范围
181
+ valid_radii = r_flat[valid_flat & torch.isfinite(r_flat)]
182
+ max_r = min(int(valid_radii.max().item()) + 1, 5) if len(valid_radii) > 0 else 2
183
+
184
+ def _add(u_idx, v_idx, bw):
185
+ v_ok = (v_idx >= 0) & (v_idx < erp_h)
186
+ m = valid_flat & v_ok & torch.isfinite(d_flat)
187
+ u_safe = torch.where(m, u_idx, torch.zeros_like(u_idx))
188
+ v_safe = torch.where(m, v_idx, torch.zeros_like(v_idx))
189
+ idx = v_safe * erp_w + u_safe
190
+
191
+ if gate_enabled and min_d_flat is not None:
192
+ md = min_d_flat.gather(0, idx)
193
+ gate = d_flat <= (md * (1.0 + gate_rel) + gate_abs)
194
+ mm = m & gate
195
+ else:
196
+ mm = m
197
+
198
+ final_w = torch.where(mm, bw * exp_w, torch.zeros_like(bw))
199
+ hit_w = torch.where(mm, bw, torch.zeros_like(bw))
200
+ accum_w.view(-1).scatter_add_(0, idx, final_w)
201
+ accum_hit.view(-1).scatter_add_(0, idx, hit_w)
202
+ accum_rgb.view(-1, 3).scatter_add_(
203
+ 0, idx.unsqueeze(-1).expand(-1, 3),
204
+ (final_w.unsqueeze(-1) * rgb_flat).float(),
205
+ )
206
+ accum_d.view(-1).scatter_add_(0, idx, (final_w * d_flat).float())
207
+
208
+ for di in range(-max_r, max_r + 1):
209
+ for dj in range(-max_r, max_r + 1):
210
+ dist_ij = math.sqrt(di * di + dj * dj)
211
+ if dist_ij > max_r + 0.5:
212
+ continue
213
+ dx = float(di) - du
214
+ dy = float(dj) - dv
215
+ dist = torch.sqrt(dx * dx + dy * dy)
216
+ within = dist <= (r_flat + 0.5)
217
+ gauss_w = torch.where(
218
+ within,
219
+ torch.exp(-0.5 * (dist / r_flat.clamp(min=0.5)) ** 2),
220
+ torch.zeros_like(r_flat),
221
+ )
222
+ u_off = torch.remainder(u0 + di, erp_w)
223
+ v_off = v0 + dj
224
+ _add(u_off, v_off, gauss_w)
225
+
226
+ return accum_rgb, accum_w, accum_hit, accum_d
227
+
228
+
229
+ # =============================================================================
230
+ # Z-Buffer Splatting
231
+ # =============================================================================
232
+
233
+ def _zbuffer_splat_rgb(
234
+ erp_h: int, erp_w: int,
235
+ u: torch.Tensor, v: torch.Tensor,
236
+ rgb: torch.Tensor, depth_compete: torch.Tensor, valid: torch.Tensor,
237
+ eps_abs_m: float, eps_rel: float, min_w: float,
238
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
239
+ """Z-buffer 硬遮挡 forward splatting(两遍法)"""
240
+ device = u.device
241
+ u_flat, v_flat = u.reshape(-1), v.reshape(-1)
242
+ d_flat = depth_compete.reshape(-1)
243
+ rgb_flat = rgb.reshape(-1, 3)
244
+ valid_flat = valid.reshape(-1)
245
+
246
+ m0 = valid_flat & torch.isfinite(u_flat) & torch.isfinite(v_flat) & \
247
+ torch.isfinite(d_flat) & (d_flat > 0.0)
248
+
249
+ u0 = torch.floor(u_flat).to(torch.int64)
250
+ v0 = torch.floor(v_flat).to(torch.int64)
251
+ du = (u_flat - u0.float()).clamp(0, 1)
252
+ dv = (v_flat - v0.float()).clamp(0, 1)
253
+ u0w = torch.remainder(u0, erp_w)
254
+ u1w = torch.remainder(u0 + 1, erp_w)
255
+ v1 = v0 + 1
256
+ w00 = (1 - du) * (1 - dv)
257
+ w10 = du * (1 - dv)
258
+ w01 = (1 - du) * dv
259
+ w11 = du * dv
260
+
261
+ # Pass A: min depth
262
+ min_depth = torch.full((erp_h * erp_w,), float("inf"), device=device)
263
+
264
+ def _amin(ui, vi, w):
265
+ m = m0 & (vi >= 0) & (vi < erp_h) & (w >= min_w)
266
+ us = torch.where(m, ui, torch.zeros_like(ui))
267
+ vs = torch.where(m, vi, torch.zeros_like(vi))
268
+ idx = vs * erp_w + us
269
+ cand = torch.where(m, d_flat, torch.full_like(d_flat, float("inf")))
270
+ min_depth.scatter_reduce_(0, idx, cand, reduce="amin", include_self=True)
271
+
272
+ _amin(u0w, v0, w00); _amin(u1w, v0, w10)
273
+ _amin(u0w, v1, w01); _amin(u1w, v1, w11)
274
+
275
+ # Pass B: accumulate near-front
276
+ accum_rgb = torch.zeros(erp_h, erp_w, 3, device=device)
277
+ accum_w = torch.zeros(erp_h, erp_w, device=device)
278
+ accum_hit = torch.zeros(erp_h, erp_w, device=device)
279
+ accum_d = torch.zeros(erp_h, erp_w, device=device)
280
+
281
+ def _acc(ui, vi, w):
282
+ m = m0 & (vi >= 0) & (vi < erp_h) & (w >= min_w)
283
+ us = torch.where(m, ui, torch.zeros_like(ui))
284
+ vs = torch.where(m, vi, torch.zeros_like(vi))
285
+ idx = vs * erp_w + us
286
+ md = min_depth.gather(0, idx)
287
+ gate = d_flat <= (md * (1 + eps_rel) + eps_abs_m)
288
+ mm = m & gate
289
+ wf = torch.where(mm, w, torch.zeros_like(w))
290
+ accum_w.view(-1).scatter_add_(0, idx, wf)
291
+ accum_hit.view(-1).scatter_add_(0, idx, wf)
292
+ accum_rgb.view(-1, 3).scatter_add_(
293
+ 0, idx.unsqueeze(-1).expand(-1, 3),
294
+ (wf.unsqueeze(-1) * rgb_flat).float(),
295
+ )
296
+ accum_d.view(-1).scatter_add_(0, idx, (wf * d_flat).float())
297
+
298
+ _acc(u0w, v0, w00); _acc(u1w, v0, w10)
299
+ _acc(u0w, v1, w01); _acc(u1w, v1, w11)
300
+
301
+ return accum_rgb, accum_w, accum_hit, accum_d
302
+
303
+
304
+ # =============================================================================
305
+ # Z-Buffer Point
306
+ # =============================================================================
307
+
308
+ def _zbuffer_point_rgb(
309
+ erp_h: int, erp_w: int,
310
+ u: torch.Tensor, v: torch.Tensor,
311
+ rgb: torch.Tensor, depth_compete: torch.Tensor, valid: torch.Tensor,
312
+ eps_abs_m: float, eps_rel: float,
313
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
314
+ """Z-buffer 点渲染(radius=0, winner-take-all)"""
315
+ device = u.device
316
+ u_flat, v_flat = u.reshape(-1), v.reshape(-1)
317
+ d_flat = depth_compete.reshape(-1)
318
+ rgb_flat = rgb.reshape(-1, 3)
319
+ valid_flat = valid.reshape(-1)
320
+
321
+ m0 = valid_flat & torch.isfinite(u_flat) & torch.isfinite(v_flat) & \
322
+ torch.isfinite(d_flat) & (d_flat > 0.0)
323
+
324
+ u_nn = torch.remainder(torch.round(u_flat).to(torch.int64), erp_w)
325
+ v_nn = torch.round(v_flat).to(torch.int64)
326
+ v_ok = (v_nn >= 0) & (v_nn < erp_h)
327
+ m = m0 & v_ok
328
+ us = torch.where(m, u_nn, torch.zeros_like(u_nn))
329
+ vs = torch.where(m, v_nn, torch.zeros_like(v_nn))
330
+ idx = vs * erp_w + us
331
+
332
+ # Pass A: min depth
333
+ min_depth = torch.full((erp_h * erp_w,), float("inf"), device=device)
334
+ cand = torch.where(m, d_flat, torch.full_like(d_flat, float("inf")))
335
+ min_depth.scatter_reduce_(0, idx, cand, reduce="amin", include_self=True)
336
+
337
+ # Pass B
338
+ md = min_depth.gather(0, idx)
339
+ gate = d_flat <= (md * (1 + eps_rel) + eps_abs_m)
340
+ mm = m & gate
341
+ wf = torch.where(mm, torch.ones_like(d_flat), torch.zeros_like(d_flat))
342
+
343
+ accum_rgb = torch.zeros(erp_h, erp_w, 3, device=device)
344
+ accum_w = torch.zeros(erp_h, erp_w, device=device)
345
+ accum_hit = torch.zeros(erp_h, erp_w, device=device)
346
+ accum_d = torch.zeros(erp_h, erp_w, device=device)
347
+
348
+ accum_w.view(-1).scatter_add_(0, idx, wf)
349
+ accum_hit.view(-1).scatter_add_(0, idx, wf)
350
+ accum_rgb.view(-1, 3).scatter_add_(
351
+ 0, idx.unsqueeze(-1).expand(-1, 3),
352
+ (wf.unsqueeze(-1) * rgb_flat).float(),
353
+ )
354
+ accum_d.view(-1).scatter_add_(0, idx, (wf * d_flat).float())
355
+
356
+ return accum_rgb, accum_w, accum_hit, accum_d
357
+
358
+
359
+ # =============================================================================
360
+ # Hole Fill
361
+ # =============================================================================
362
+
363
+ def _edge_aware_hole_fill(
364
+ rgb: np.ndarray, mask: np.ndarray,
365
+ max_hole_px: int = 5,
366
+ inpaint_radius: int = 2,
367
+ ) -> Tuple[np.ndarray, np.ndarray]:
368
+ """小洞填充(只填充极小洞,避免 disocclusion 被错误填充)"""
369
+ holes = (mask == 0).astype(np.uint8)
370
+ if holes.sum() == 0:
371
+ return rgb, mask
372
+
373
+ num, labels, stats, _ = cv2.connectedComponentsWithStats(holes, connectivity=8)
374
+ fill_mask = np.zeros_like(holes)
375
+ max_area = max_hole_px * max_hole_px
376
+
377
+ for i in range(1, num):
378
+ area = stats[i, cv2.CC_STAT_AREA]
379
+ if area <= max_area:
380
+ fill_mask[labels == i] = 1
381
+
382
+ if fill_mask.sum() == 0:
383
+ return rgb, mask
384
+
385
+ rgb_bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
386
+ filled = cv2.inpaint(rgb_bgr, fill_mask, inpaint_radius, cv2.INPAINT_TELEA)
387
+ filled_rgb = cv2.cvtColor(filled, cv2.COLOR_BGR2RGB)
388
+
389
+ rgb_out = rgb.copy()
390
+ mask_out = mask.copy()
391
+ fill_bool = fill_mask > 0
392
+ rgb_out[fill_bool] = filled_rgb[fill_bool]
393
+ mask_out[fill_bool] = 1
394
+
395
+ return rgb_out, mask_out
396
+
397
+
398
+ # =============================================================================
399
+ # 主函数
400
+ # =============================================================================
401
+
402
+ @torch.no_grad()
403
+ def warp_erp_to_target(
404
+ src_rgb: np.ndarray,
405
+ src_depth: np.ndarray,
406
+ src_pose: Pose,
407
+ tgt_pose: Pose,
408
+ cfg: Dict[str, Any],
409
+ device: torch.device,
410
+ ) -> WarpResult:
411
+ """
412
+ 从源 ERP 视角 warp 到目标 ERP 视角
413
+
414
+ 使用锁定的 erp_projection.py 进行坐标变换,
415
+ ���用锁定的 pose_utils.Pose 进行位姿处理。
416
+
417
+ Args:
418
+ src_rgb: (H, W, 3) uint8 源 RGB
419
+ src_depth: (H, W) float32 源 range depth(米)
420
+ src_pose: 源相机位姿(Pose 实例)
421
+ tgt_pose: 目标相机位姿(Pose 实例)
422
+ cfg: 配置字典
423
+ device: 计算设备
424
+
425
+ Returns:
426
+ WarpResult
427
+ """
428
+ warp_cfg = cfg.get("warp", {})
429
+ method = str(warp_cfg.get("method", "softmax_splatting"))
430
+ alpha = float(warp_cfg.get("alpha", 2.0))
431
+ min_weight_sum = float(warp_cfg.get("min_weight_sum", 1e-4))
432
+ output_flow = bool(warp_cfg.get("output_flow", True))
433
+ output_depth = bool(warp_cfg.get("output_depth", True))
434
+ depth_scale_factor = float(warp_cfg.get("depth_scale_factor", 1.0))
435
+
436
+ # Z-buffer 参数
437
+ z_eps_abs = float(warp_cfg.get("zbuffer_eps_abs_m", 0.03))
438
+ z_eps_rel = float(warp_cfg.get("zbuffer_eps_rel", 0.03))
439
+ z_min_w = float(warp_cfg.get("zbuffer_min_weight", 1e-3))
440
+
441
+ # 自适应半径参数
442
+ base_radius = float(warp_cfg.get("splat_radius_px", 1.5))
443
+ radius_min = float(warp_cfg.get("radius_min_px", 0.6))
444
+ radius_max_eq = float(warp_cfg.get("radius_max_px", 2.2))
445
+ radius_max_pole = float(warp_cfg.get("radius_max_pole_px", 3.4))
446
+ pole_radius_scale = float(warp_cfg.get("pole_radius_scale", 3.0))
447
+ pole_lat_threshold = float(warp_cfg.get("pole_lat_threshold", 60.0)) * math.pi / 180.0
448
+ depth_radius_scale = bool(warp_cfg.get("depth_radius_scale", False))
449
+ depth_ref = float(warp_cfg.get("depth_ref_m", 2.0))
450
+ depth_edge_aware = bool(warp_cfg.get("depth_edge_aware", True))
451
+ depth_edge_threshold = float(warp_cfg.get("depth_edge_threshold", 0.3))
452
+ depth_edge_min_scale = float(warp_cfg.get("depth_edge_min_scale", 0.12))
453
+
454
+ # Hole fill
455
+ hole_fill = bool(warp_cfg.get("hole_fill_enabled", False)) and method not in ("zbuffer_splatting", "zbuffer_point")
456
+ max_hole_px = int(warp_cfg.get("max_hole_px", 16))
457
+
458
+ erp_h, erp_w = src_rgb.shape[:2]
459
+
460
+ # 转 tensor
461
+ src_rgb_t = torch.from_numpy(src_rgb.astype(np.float32)).to(device) / 255.0
462
+ src_depth_t = torch.from_numpy(src_depth.astype(np.float32)).to(device)
463
+ if depth_scale_factor != 1.0:
464
+ src_depth_t *= depth_scale_factor
465
+
466
+ valid = torch.isfinite(src_depth_t) & (src_depth_t > 0.0)
467
+
468
+ # --- 深度边缘掩码 ---
469
+ depth_edge_scale = torch.ones_like(src_depth_t)
470
+ if depth_edge_aware:
471
+ from torch.nn.functional import conv2d
472
+ sobel_x = torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]],
473
+ dtype=torch.float32, device=device).view(1, 1, 3, 3)
474
+ sobel_y = torch.tensor([[-1, -2, -1], [0, 0, 0], [1, 2, 1]],
475
+ dtype=torch.float32, device=device).view(1, 1, 3, 3)
476
+ safe_d = torch.where(valid, src_depth_t, src_depth_t[valid].median() if valid.any() else torch.ones_like(src_depth_t))
477
+ log_d = torch.log(torch.clamp(safe_d, min=0.1)).unsqueeze(0).unsqueeze(0)
478
+ gx = conv2d(log_d, sobel_x, padding=1).squeeze()
479
+ gy = conv2d(log_d, sobel_y, padding=1).squeeze()
480
+ grad = torch.sqrt(gx ** 2 + gy ** 2)
481
+ gmax = grad.max()
482
+ if gmax > 1e-6:
483
+ gnorm = grad / gmax
484
+ else:
485
+ gnorm = torch.zeros_like(grad)
486
+ depth_edge_scale = torch.clamp(
487
+ 1.0 - gnorm / max(depth_edge_threshold, 1e-6),
488
+ min=depth_edge_min_scale, max=1.0,
489
+ )
490
+ depth_edge_scale = torch.where(torch.isfinite(depth_edge_scale),
491
+ depth_edge_scale, torch.ones_like(depth_edge_scale))
492
+
493
+ # --- ERP 网格 ---
494
+ uu, vv = create_erp_grid(erp_h, erp_w, device)
495
+
496
+ # --- Forward project ---
497
+ u_tgt, v_tgt, range_tgt, dirs_tgt = _forward_project(
498
+ src_depth_t, src_pose, tgt_pose, erp_h, erp_w, device, uu, vv,
499
+ )
500
+
501
+ # --- 自适应半径 ---
502
+ lat_tgt = torch.asin(torch.clamp(dirs_tgt[..., 1], -1.0, 1.0))
503
+ abs_lat = torch.abs(lat_tgt)
504
+ pole_factor = torch.clamp(
505
+ (abs_lat - pole_lat_threshold) / (0.5 * math.pi - pole_lat_threshold),
506
+ min=0.0, max=1.0,
507
+ )
508
+ lat_scale = 1.0 + pole_factor * (pole_radius_scale - 1.0)
509
+
510
+ if depth_radius_scale:
511
+ safe_range = torch.where(valid, range_tgt, torch.full_like(range_tgt, depth_ref))
512
+ d_scale = 1.0 / (1.0 + safe_range / depth_ref)
513
+ else:
514
+ d_scale = torch.ones_like(range_tgt)
515
+
516
+ adaptive_radius = base_radius * lat_scale * d_scale * depth_edge_scale
517
+ adaptive_radius = torch.where(valid, adaptive_radius, torch.full_like(adaptive_radius, base_radius))
518
+ radius_max_local = radius_max_eq + pole_factor * (radius_max_pole - radius_max_eq)
519
+ adaptive_radius = torch.clamp(adaptive_radius, min=radius_min)
520
+ adaptive_radius = torch.minimum(adaptive_radius, radius_max_local)
521
+
522
+ # --- Splatting ---
523
+ if method == "zbuffer_splatting":
524
+ _rgb, _w, _hit, _d = _zbuffer_splat_rgb(
525
+ erp_h, erp_w, u_tgt, v_tgt, src_rgb_t, range_tgt, valid,
526
+ z_eps_abs, z_eps_rel, z_min_w,
527
+ )
528
+ elif method == "zbuffer_point":
529
+ _rgb, _w, _hit, _d = _zbuffer_point_rgb(
530
+ erp_h, erp_w, u_tgt, v_tgt, src_rgb_t, range_tgt, valid,
531
+ z_eps_abs, z_eps_rel,
532
+ )
533
+ else:
534
+ _rgb, _w, _hit, _d = _adaptive_splat_rgb(
535
+ erp_h, erp_w, u_tgt, v_tgt, src_rgb_t, range_tgt, valid,
536
+ alpha, adaptive_radius, warp_cfg.get("occlusion_gate", None),
537
+ )
538
+
539
+ # --- 归一化 ---
540
+ denom = _w > 0.0
541
+ out_rgb = torch.zeros_like(_rgb)
542
+ out_rgb[denom] = _rgb[denom] / _w[denom].unsqueeze(-1)
543
+
544
+ min_hit = float(warp_cfg.get("min_hit_sum", 1e-6))
545
+ valid_mask = _hit > min_hit
546
+
547
+ warped_np = (out_rgb.clamp(0, 1) * 255).byte().cpu().numpy()
548
+ mask_np = valid_mask.cpu().numpy().astype(np.uint8)
549
+ weight_np = _hit.cpu().numpy().astype(np.float32)
550
+
551
+ # --- Warped depth ---
552
+ warped_depth_np = None
553
+ if output_depth:
554
+ out_d = torch.full((erp_h, erp_w), float("nan"), device=device)
555
+ out_d[denom] = _d[denom] / torch.clamp(_w[denom], min=1e-9)
556
+ out_d[~valid_mask] = float("nan")
557
+ warped_depth_np = out_d.cpu().numpy().astype(np.float32)
558
+
559
+ # --- Hole fill ---
560
+ if hole_fill:
561
+ warped_np, mask_np = _edge_aware_hole_fill(warped_np, mask_np, max_hole_px)
562
+
563
+ # --- Optical flow ---
564
+ flow_np = None
565
+ if output_flow:
566
+ du = u_tgt - uu
567
+ du = (du + 0.5 * erp_w) % erp_w - 0.5 * erp_w
568
+ dv = v_tgt - vv
569
+ flow_np = torch.stack([du, dv], dim=-1).cpu().numpy().astype(np.float32)
570
+
571
+ return WarpResult(
572
+ warped_rgb=warped_np,
573
+ valid_mask=mask_np,
574
+ flow=flow_np,
575
+ weight_sum=weight_np,
576
+ warped_depth=warped_depth_np,
577
+ )
578
+
579
+
580
+ def create_comparison_image(
581
+ warped_rgb: np.ndarray,
582
+ valid_mask: np.ndarray,
583
+ gt_rgb: Optional[np.ndarray] = None,
584
+ ) -> np.ndarray:
585
+ """创建对比图(warped | GT),如无 GT 则只返回 warped"""
586
+ vis = warped_rgb.copy()
587
+ vis[valid_mask == 0] = 0
588
+
589
+ if gt_rgb is not None:
590
+ return np.concatenate([vis, gt_rgb], axis=0)
591
+ return vis
core/tangent_extraction.py ADDED
@@ -0,0 +1,566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ERP -> Tangent 切片生成模块(移植自原版 ERPT)
3
+
4
+ 功能:
5
+ 1. 生成 icosahedron 20 面的相机朝向
6
+ 2. 生成 north/south pole 额外切片(使用更大 FOV)
7
+ 3. 从 ERP 采样生成透视切片(支持 seam wrap)
8
+ 4. 输出切片 RGB 和元数据
9
+
10
+ 关键算法:
11
+ - icosahedron 面法向计算
12
+ - 相机坐标系构建(look-at)
13
+ - ERP -> 透视投影(grid_sample with seam wrap)
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import math
18
+ from dataclasses import dataclass
19
+ from pathlib import Path
20
+ from typing import Any, Dict, List, Optional, Tuple
21
+
22
+ import numpy as np
23
+ import torch
24
+ import torch.nn.functional as F
25
+
26
+
27
+ @dataclass
28
+ class TangentSlice:
29
+ """切片规格"""
30
+ slice_id: str # 切片 ID(如 "face_00", "north", "south")
31
+ slice_type: str # 类型:"face" | "pole_north" | "pole_south"
32
+ center_dir: np.ndarray # 切片中心方向(世界坐标,单位向量)
33
+ R_cw: np.ndarray # 相机到世界的旋转矩阵 (3,3)
34
+ fov_deg: float # 视场角(度)
35
+ resolution: int # 输出分辨率(像素,正方形)
36
+ K: np.ndarray # 相机内参 (3,3)
37
+ f_px: float # 焦距(像素)
38
+
39
+ def to_dict(self) -> Dict[str, Any]:
40
+ """转换为可 JSON 序列化的字典"""
41
+ return {
42
+ "slice_id": self.slice_id,
43
+ "slice_type": self.slice_type,
44
+ "center_dir": self.center_dir.tolist(),
45
+ "R_cw": self.R_cw.tolist(),
46
+ "fov_deg": float(self.fov_deg),
47
+ "resolution": int(self.resolution),
48
+ "K": self.K.tolist(),
49
+ "f_px": float(self.f_px),
50
+ }
51
+
52
+
53
+ def _compute_icosahedron_face_centers() -> List[np.ndarray]:
54
+ """
55
+ 计算正二十面体 20 个面的中心方向(单位向量)
56
+
57
+ 正二十面体有 12 个顶点、20 个面、30 条边。
58
+ 每个面是等边三角形,面中心 = (v0 + v1 + v2) / 3 归一化
59
+
60
+ Returns:
61
+ 20 个单位向量的列表,每个指向一个面的中心
62
+ """
63
+ # 黄金比例
64
+ phi = (1.0 + math.sqrt(5.0)) / 2.0
65
+
66
+ # 正二十面体 12 个顶点(坐标已归一化)
67
+ vertices = np.array([
68
+ [-1, phi, 0],
69
+ [ 1, phi, 0],
70
+ [-1, -phi, 0],
71
+ [ 1, -phi, 0],
72
+ [0, -1, phi],
73
+ [0, 1, phi],
74
+ [0, -1, -phi],
75
+ [0, 1, -phi],
76
+ [ phi, 0, -1],
77
+ [ phi, 0, 1],
78
+ [-phi, 0, -1],
79
+ [-phi, 0, 1],
80
+ ], dtype=np.float64)
81
+
82
+ # 归一化顶点
83
+ vertices = vertices / np.linalg.norm(vertices, axis=1, keepdims=True)
84
+
85
+ # 20 个面的顶点索引
86
+ faces = [
87
+ (0, 11, 5), (0, 5, 1), (0, 1, 7), (0, 7, 10), (0, 10, 11),
88
+ (1, 5, 9), (5, 11, 4), (11, 10, 2), (10, 7, 6), (7, 1, 8),
89
+ (3, 9, 4), (3, 4, 2), (3, 2, 6), (3, 6, 8), (3, 8, 9),
90
+ (4, 9, 5), (2, 4, 11), (6, 2, 10), (8, 6, 7), (9, 8, 1),
91
+ ]
92
+
93
+ centers = []
94
+ for i0, i1, i2 in faces:
95
+ center = vertices[i0] + vertices[i1] + vertices[i2]
96
+ center = center / np.linalg.norm(center)
97
+ centers.append(center.astype(np.float32))
98
+
99
+ return centers
100
+
101
+
102
+ def _look_at_rotation(forward: np.ndarray, up_hint: Optional[np.ndarray] = None) -> np.ndarray:
103
+ """
104
+ 构建从相机坐标系到世界坐标系的旋转矩阵
105
+
106
+ 相机坐标系约定:
107
+ - +Z: 前向(forward)
108
+ - +Y: 上方(up)
109
+ - +X: 右方(right = up × forward)
110
+
111
+ Args:
112
+ forward: 相机前向方向(世界坐标,单位向量)
113
+ up_hint: 上方提示(默认世界 Y 轴)
114
+
115
+ Returns:
116
+ R_cw: (3,3) 旋转矩阵,v_world = R_cw @ v_cam
117
+ """
118
+ f = np.asarray(forward, dtype=np.float64).reshape(3)
119
+ f = f / (np.linalg.norm(f) + 1e-12)
120
+
121
+ if up_hint is None:
122
+ up_hint = np.array([0.0, 1.0, 0.0], dtype=np.float64)
123
+ u = np.asarray(up_hint, dtype=np.float64).reshape(3)
124
+ u = u / (np.linalg.norm(u) + 1e-12)
125
+
126
+ # 如果 forward 与 up_hint 几乎平行,换一个 up_hint
127
+ if abs(np.dot(f, u)) > 0.95:
128
+ u = np.array([0.0, 0.0, 1.0], dtype=np.float64)
129
+
130
+ # 右方向 = up × forward
131
+ r = np.cross(u, f)
132
+ r = r / (np.linalg.norm(r) + 1e-12)
133
+
134
+ # 真正的上方向 = forward × right
135
+ u2 = np.cross(f, r)
136
+ u2 = u2 / (np.linalg.norm(u2) + 1e-12)
137
+
138
+ # 旋转矩阵的列是相机坐标轴在世界坐标系中的表示
139
+ R_cw = np.stack([r, u2, f], axis=1)
140
+ return R_cw.astype(np.float32)
141
+
142
+
143
+ def _compute_intrinsics(resolution: int, fov_deg: float) -> Tuple[np.ndarray, float]:
144
+ """
145
+ 计算针孔相机内参
146
+
147
+ Args:
148
+ resolution: 图像分辨率(正方形)
149
+ fov_deg: 水平视场角(度)
150
+
151
+ Returns:
152
+ K: (3,3) 内参矩阵
153
+ f_px: 焦距(像素)
154
+ """
155
+ fov_rad = np.deg2rad(fov_deg)
156
+ f_px = 0.5 * resolution / np.tan(0.5 * fov_rad)
157
+
158
+ cx = (resolution - 1) * 0.5
159
+ cy = (resolution - 1) * 0.5
160
+
161
+ K = np.array([
162
+ [f_px, 0.0, cx],
163
+ [0.0, f_px, cy],
164
+ [0.0, 0.0, 1.0]
165
+ ], dtype=np.float32)
166
+
167
+ return K, float(f_px)
168
+
169
+
170
+ def build_icosahedron_slices(cfg: Dict[str, Any]) -> List[TangentSlice]:
171
+ """
172
+ 根据配置构建 icosahedron + poles 切片列表
173
+
174
+ 360MonoDepth 风格:使用 padding_factor 而非 overlap_pad_deg
175
+ 有效 FOV = base_fov * padding_factor
176
+
177
+ Args:
178
+ cfg: 配置字典(包含 tangent 配置)
179
+
180
+ Returns:
181
+ 切片规格列表
182
+ """
183
+ tcfg = cfg.get("tangent", {})
184
+
185
+ # 基本参数
186
+ face_resolution = int(tcfg.get("face_resolution", 768))
187
+ fov_deg = float(tcfg.get("fov_deg", 90.0))
188
+
189
+ # 360MonoDepth 风格 padding(优先使用 padding_factor)
190
+ padding_factor = float(tcfg.get("padding_factor", 1.3))
191
+ overlap_pad_deg = float(tcfg.get("overlap_pad_deg", 0.0)) # 向后兼容
192
+
193
+ # 计算有效 FOV
194
+ if padding_factor > 1.0:
195
+ effective_fov = fov_deg * padding_factor
196
+ else:
197
+ effective_fov = fov_deg + overlap_pad_deg
198
+
199
+ # 限制最大 FOV 避免极端畸变
200
+ effective_fov = min(effective_fov, 170.0)
201
+
202
+ # 极区参数(增强覆盖)
203
+ add_poles = bool(tcfg.get("add_poles", True))
204
+ pole_fov_deg = float(tcfg.get("pole_fov_deg", 150.0)) # 默认更大
205
+ pole_resolution = int(tcfg.get("pole_resolution", face_resolution))
206
+ pole_extra_rings = int(tcfg.get("pole_extra_rings", 0)) # 额外极区密采样
207
+
208
+ slices = []
209
+
210
+ # 1. 添加 20 个 icosahedron 面
211
+ face_centers = _compute_icosahedron_face_centers()
212
+ for i, center in enumerate(face_centers):
213
+ R_cw = _look_at_rotation(center)
214
+ K, f_px = _compute_intrinsics(face_resolution, effective_fov)
215
+
216
+ slices.append(TangentSlice(
217
+ slice_id=f"face_{i:02d}",
218
+ slice_type="face",
219
+ center_dir=center,
220
+ R_cw=R_cw,
221
+ fov_deg=effective_fov,
222
+ resolution=face_resolution,
223
+ K=K,
224
+ f_px=f_px,
225
+ ))
226
+
227
+ # 2. 添加极区切片
228
+ if add_poles:
229
+ # 北极(+Y)
230
+ north_dir = np.array([0.0, 1.0, 0.0], dtype=np.float32)
231
+ R_north = _look_at_rotation(north_dir, up_hint=np.array([0.0, 0.0, -1.0]))
232
+ K_north, f_north = _compute_intrinsics(pole_resolution, pole_fov_deg)
233
+
234
+ slices.append(TangentSlice(
235
+ slice_id="north",
236
+ slice_type="pole_north",
237
+ center_dir=north_dir,
238
+ R_cw=R_north,
239
+ fov_deg=pole_fov_deg,
240
+ resolution=pole_resolution,
241
+ K=K_north,
242
+ f_px=f_north,
243
+ ))
244
+
245
+ # 南极(-Y)
246
+ south_dir = np.array([0.0, -1.0, 0.0], dtype=np.float32)
247
+ R_south = _look_at_rotation(south_dir, up_hint=np.array([0.0, 0.0, 1.0]))
248
+ K_south, f_south = _compute_intrinsics(pole_resolution, pole_fov_deg)
249
+
250
+ slices.append(TangentSlice(
251
+ slice_id="south",
252
+ slice_type="pole_south",
253
+ center_dir=south_dir,
254
+ R_cw=R_south,
255
+ fov_deg=pole_fov_deg,
256
+ resolution=pole_resolution,
257
+ K=K_south,
258
+ f_px=f_south,
259
+ ))
260
+
261
+ # 3. 额外极区密采样环(可选)
262
+ if pole_extra_rings > 0:
263
+ _add_polar_ring_slices(
264
+ slices, pole_extra_rings, pole_resolution, pole_fov_deg * 0.8
265
+ )
266
+
267
+ return slices
268
+
269
+
270
+ def _add_polar_ring_slices(
271
+ slices: List[TangentSlice],
272
+ num_rings: int,
273
+ resolution: int,
274
+ fov_deg: float,
275
+ ) -> None:
276
+ """
277
+ 添加额外的极区密采样切片(环状分布在极区附近)
278
+ """
279
+ latitudes = [math.radians(75)]
280
+ if num_rings > 1:
281
+ latitudes = [math.radians(60 + 25 * i / (num_rings - 1)) for i in range(num_rings)]
282
+
283
+ K, f_px = _compute_intrinsics(resolution, fov_deg)
284
+
285
+ for ring_idx, lat in enumerate(latitudes):
286
+ num_slices_per_ring = 6
287
+ for lon_idx in range(num_slices_per_ring):
288
+ lon = lon_idx * 2 * math.pi / num_slices_per_ring
289
+
290
+ # 北极附近
291
+ x_n = math.cos(lat) * math.sin(lon)
292
+ y_n = math.sin(lat)
293
+ z_n = math.cos(lat) * math.cos(lon)
294
+ dir_n = np.array([x_n, y_n, z_n], dtype=np.float32)
295
+ R_n = _look_at_rotation(dir_n)
296
+
297
+ slices.append(TangentSlice(
298
+ slice_id=f"pole_ring_n_{ring_idx}_{lon_idx}",
299
+ slice_type="pole_ring",
300
+ center_dir=dir_n,
301
+ R_cw=R_n,
302
+ fov_deg=fov_deg,
303
+ resolution=resolution,
304
+ K=K,
305
+ f_px=f_px,
306
+ ))
307
+
308
+ # 南极附近
309
+ y_s = -math.sin(lat)
310
+ dir_s = np.array([x_n, y_s, z_n], dtype=np.float32)
311
+ R_s = _look_at_rotation(dir_s)
312
+
313
+ slices.append(TangentSlice(
314
+ slice_id=f"pole_ring_s_{ring_idx}_{lon_idx}",
315
+ slice_type="pole_ring",
316
+ center_dir=dir_s,
317
+ R_cw=R_s,
318
+ fov_deg=fov_deg,
319
+ resolution=resolution,
320
+ K=K,
321
+ f_px=f_px,
322
+ ))
323
+
324
+
325
+ def _build_sample_grid(
326
+ slice_spec: TangentSlice,
327
+ erp_h: int,
328
+ erp_w: int,
329
+ device: torch.device,
330
+ ) -> torch.Tensor:
331
+ """
332
+ 构建从 ERP 采样到切片的网格
333
+
334
+ 对于切片的每个像素 (u, v):
335
+ 1. 反投影到相机坐标系射线方向
336
+ 2. 旋转到世界坐标系
337
+ 3. 计算球面经纬度
338
+ 4. 映射到 ERP 像素坐标
339
+ """
340
+ res = slice_spec.resolution
341
+ K = slice_spec.K
342
+ R_cw = slice_spec.R_cw
343
+
344
+ fx, fy = float(K[0, 0]), float(K[1, 1])
345
+ cx, cy = float(K[0, 2]), float(K[1, 2])
346
+
347
+ # 切片像素坐标
348
+ xs = torch.arange(res, device=device, dtype=torch.float32)
349
+ ys = torch.arange(res, device=device, dtype=torch.float32)
350
+ yv, xv = torch.meshgrid(ys, xs, indexing="ij") # (H, W)
351
+
352
+ # 反投影到相机坐标系
353
+ x_cam = (xv - cx) / fx
354
+ y_cam = -(yv - cy) / fy # 图像 y 向下,相机 y 向上
355
+ z_cam = torch.ones_like(x_cam)
356
+
357
+ # 归一化射线方向
358
+ dirs_cam = torch.stack([x_cam, y_cam, z_cam], dim=-1) # (H, W, 3)
359
+ dirs_cam = dirs_cam / torch.clamp(torch.norm(dirs_cam, dim=-1, keepdim=True), min=1e-9)
360
+
361
+ # 旋转到世界坐标系
362
+ R = torch.tensor(R_cw, device=device, dtype=torch.float32)
363
+ dirs_world = torch.einsum("ij,hwj->hwi", R, dirs_cam) # (H, W, 3)
364
+
365
+ # 计算球面坐标
366
+ x = dirs_world[..., 0]
367
+ y = dirs_world[..., 1]
368
+ z = dirs_world[..., 2]
369
+
370
+ lon = torch.atan2(x, z)
371
+ lat = torch.asin(torch.clamp(y, -1.0, 1.0))
372
+
373
+ # 映射到 ERP 像素坐标
374
+ u = (lon + math.pi) / (2.0 * math.pi) * float(erp_w)
375
+ v = (math.pi / 2.0 - lat) / math.pi * float(erp_h - 1)
376
+
377
+ # Seam wrap: ERP 在 x 方向扩展 3 倍,采样时从中间段采样
378
+ u_padded = u + float(erp_w)
379
+ erp_w_padded = erp_w * 3
380
+
381
+ x_norm = (u_padded / float(erp_w_padded - 1)) * 2.0 - 1.0
382
+ y_norm = (v / float(erp_h - 1)) * 2.0 - 1.0
383
+
384
+ grid = torch.stack([x_norm, y_norm], dim=-1).unsqueeze(0) # (1, H, W, 2)
385
+ return grid
386
+
387
+
388
+ @torch.no_grad()
389
+ def extract_tangent_from_erp(
390
+ erp_rgb: torch.Tensor,
391
+ slice_spec: TangentSlice,
392
+ device: torch.device,
393
+ ) -> np.ndarray:
394
+ """
395
+ 从 ERP 提取单个切片
396
+
397
+ Args:
398
+ erp_rgb: (1, 3, H, W) ERP 图像
399
+ slice_spec: 切片规格
400
+ device: 计算设备
401
+
402
+ Returns:
403
+ tangent_rgb: (H, W, 3) uint8 numpy array
404
+ """
405
+ erp_h, erp_w = erp_rgb.shape[2], erp_rgb.shape[3]
406
+
407
+ # Seam wrap: 扩展 ERP 宽度
408
+ erp_padded = torch.cat([erp_rgb, erp_rgb, erp_rgb], dim=-1) # (1, 3, H, 3W)
409
+
410
+ # 构建采样网格
411
+ grid = _build_sample_grid(slice_spec, erp_h, erp_w, device)
412
+
413
+ # 采样
414
+ tangent = F.grid_sample(
415
+ erp_padded,
416
+ grid,
417
+ mode="bilinear",
418
+ padding_mode="border",
419
+ align_corners=True,
420
+ ) # (1, 3, res, res)
421
+
422
+ # 转换为 numpy
423
+ tangent_np = (tangent.squeeze(0).permute(1, 2, 0).clamp(0, 1) * 255.0).byte().cpu().numpy()
424
+ return tangent_np
425
+
426
+
427
+ @torch.no_grad()
428
+ def extract_all_tangents(
429
+ erp_rgb_np: np.ndarray,
430
+ slices: List[TangentSlice],
431
+ device: torch.device,
432
+ ) -> Dict[str, np.ndarray]:
433
+ """
434
+ 从 ERP 提取所有切片
435
+
436
+ Args:
437
+ erp_rgb_np: (H, W, 3) ERP 图像 numpy array
438
+ slices: 切片规格列表
439
+ device: 计算设备
440
+
441
+ Returns:
442
+ 字典 {slice_id: tangent_rgb}
443
+ """
444
+ erp_t = torch.from_numpy(erp_rgb_np).to(device).permute(2, 0, 1).float() / 255.0
445
+ erp_t = erp_t.unsqueeze(0) # (1, 3, H, W)
446
+
447
+ results = {}
448
+ for s in slices:
449
+ tangent = extract_tangent_from_erp(erp_t, s, device)
450
+ results[s.slice_id] = tangent
451
+
452
+ return results
453
+
454
+
455
+ def compute_ray_directions_for_slice(
456
+ slice_spec: TangentSlice,
457
+ device: torch.device,
458
+ ) -> torch.Tensor:
459
+ """
460
+ 计算切片每个像素对应的世界坐标系射线方向(融合时使用)
461
+
462
+ Returns:
463
+ dirs_world: (H, W, 3) 单位方向向量
464
+ """
465
+ res = slice_spec.resolution
466
+ K = slice_spec.K
467
+ R_cw = slice_spec.R_cw
468
+
469
+ fx, fy = float(K[0, 0]), float(K[1, 1])
470
+ cx, cy = float(K[0, 2]), float(K[1, 2])
471
+
472
+ xs = torch.arange(res, device=device, dtype=torch.float32)
473
+ ys = torch.arange(res, device=device, dtype=torch.float32)
474
+ yv, xv = torch.meshgrid(ys, xs, indexing="ij")
475
+
476
+ x_cam = (xv - cx) / fx
477
+ y_cam = -(yv - cy) / fy
478
+ z_cam = torch.ones_like(x_cam)
479
+
480
+ dirs_cam = torch.stack([x_cam, y_cam, z_cam], dim=-1)
481
+ dirs_cam = dirs_cam / torch.clamp(torch.norm(dirs_cam, dim=-1, keepdim=True), min=1e-9)
482
+
483
+ R = torch.tensor(R_cw, device=device, dtype=torch.float32)
484
+ dirs_world = torch.einsum("ij,hwj->hwi", R, dirs_cam)
485
+
486
+ return dirs_world
487
+
488
+
489
+ @torch.no_grad()
490
+ def compute_coverage_mask(
491
+ slices: List[TangentSlice],
492
+ erp_h: int,
493
+ erp_w: int,
494
+ device: torch.device,
495
+ ) -> Tuple[np.ndarray, Dict[str, float]]:
496
+ """
497
+ 计算 ERP 覆盖率掩码(纯几何计算)
498
+
499
+ Returns:
500
+ coverage_mask: (H, W) uint8, 255=covered, 0=uncovered
501
+ stats: 覆盖率统计字典
502
+ """
503
+ coverage = torch.zeros(erp_h, erp_w, device=device, dtype=torch.float32)
504
+
505
+ for s in slices:
506
+ res = s.resolution
507
+ K = s.K
508
+ R_cw = s.R_cw
509
+
510
+ fx, fy = float(K[0, 0]), float(K[1, 1])
511
+ cx, cy = float(K[0, 2]), float(K[1, 2])
512
+
513
+ xs = torch.arange(res, device=device, dtype=torch.float32)
514
+ ys = torch.arange(res, device=device, dtype=torch.float32)
515
+ yv, xv = torch.meshgrid(ys, xs, indexing="ij")
516
+
517
+ x_cam = (xv - cx) / fx
518
+ y_cam = -(yv - cy) / fy
519
+ z_cam = torch.ones_like(x_cam)
520
+
521
+ dirs_cam = torch.stack([x_cam, y_cam, z_cam], dim=-1)
522
+ dirs_cam = dirs_cam / torch.clamp(torch.norm(dirs_cam, dim=-1, keepdim=True), min=1e-9)
523
+
524
+ R = torch.tensor(R_cw, device=device, dtype=torch.float32)
525
+ dirs_world = torch.einsum("ij,hwj->hwi", R, dirs_cam)
526
+
527
+ x = dirs_world[..., 0]
528
+ y = dirs_world[..., 1]
529
+ z = dirs_world[..., 2]
530
+
531
+ lon = torch.atan2(x, z)
532
+ lat = torch.asin(torch.clamp(y, -1.0, 1.0))
533
+
534
+ u = (lon + math.pi) / (2.0 * math.pi) * float(erp_w)
535
+ v = (math.pi / 2.0 - lat) / math.pi * float(erp_h - 1)
536
+
537
+ u_int = torch.round(u).to(torch.int64)
538
+ v_int = torch.round(v).to(torch.int64)
539
+
540
+ u_int = torch.clamp(u_int % erp_w, 0, erp_w - 1)
541
+ v_int = torch.clamp(v_int, 0, erp_h - 1)
542
+
543
+ idx = v_int * erp_w + u_int
544
+ idx = idx.reshape(-1)
545
+
546
+ coverage_flat = coverage.reshape(-1)
547
+ coverage_flat.scatter_add_(0, idx, torch.ones_like(idx, dtype=torch.float32))
548
+
549
+ covered = coverage > 0
550
+ coverage_mask = (covered.float() * 255).byte().cpu().numpy()
551
+
552
+ total_pixels = erp_h * erp_w
553
+ covered_pixels = int(covered.sum().item())
554
+
555
+ pole_rows = int(erp_h * 0.1)
556
+ north_covered = covered[:pole_rows, :].float().mean().item()
557
+ south_covered = covered[-pole_rows:, :].float().mean().item()
558
+
559
+ stats = {
560
+ "total_coverage": covered_pixels / total_pixels * 100,
561
+ "uncovered_pixels": total_pixels - covered_pixels,
562
+ "north_pole_coverage": north_covered * 100,
563
+ "south_pole_coverage": south_covered * 100,
564
+ }
565
+
566
+ return coverage_mask, stats
data/README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data Directory
2
+
3
+ Place local scene assets here when running experiments. Do not commit dataset files to the anonymous repository.
4
+
5
+ Recommended layout:
6
+
7
+ ```text
8
+ data/
9
+ ├── blender_indoor/
10
+ ├── blender_outdoor/
11
+ ├── hm3d/
12
+ └── scannetpp/
13
+ ```
14
+
15
+ You may also pass absolute paths directly to the pipeline CLI.
16
+
dataset_metadata/croissant.json ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "@context": {
3
+ "@language": "en",
4
+ "@vocab": "https://schema.org/",
5
+ "citeAs": "cr:citeAs",
6
+ "column": "cr:column",
7
+ "conformsTo": "dct:conformsTo",
8
+ "cr": "http://mlcommons.org/croissant/",
9
+ "rai": "http://mlcommons.org/croissant/RAI/",
10
+ "data": {
11
+ "@id": "cr:data",
12
+ "@type": "@json"
13
+ },
14
+ "dataType": {
15
+ "@id": "cr:dataType",
16
+ "@type": "@vocab"
17
+ },
18
+ "dct": "http://purl.org/dc/terms/",
19
+ "examples": {
20
+ "@id": "cr:examples",
21
+ "@type": "@json"
22
+ },
23
+ "extract": "cr:extract",
24
+ "field": "cr:field",
25
+ "fileProperty": "cr:fileProperty",
26
+ "fileObject": "cr:fileObject",
27
+ "fileSet": "cr:fileSet",
28
+ "format": "cr:format",
29
+ "includes": "cr:includes",
30
+ "isLiveDataset": "cr:isLiveDataset",
31
+ "jsonPath": "cr:jsonPath",
32
+ "key": "cr:key",
33
+ "md5": "cr:md5",
34
+ "parentField": "cr:parentField",
35
+ "path": "cr:path",
36
+ "recordSet": "cr:recordSet",
37
+ "references": "cr:references",
38
+ "regex": "cr:regex",
39
+ "repeated": "cr:repeated",
40
+ "replace": "cr:replace",
41
+ "samplingRate": "cr:samplingRate",
42
+ "sc": "https://schema.org/",
43
+ "separator": "cr:separator",
44
+ "source": "cr:source",
45
+ "subField": "cr:subField",
46
+ "transform": "cr:transform"
47
+ },
48
+ "@type": "sc:Dataset",
49
+ "conformsTo": "http://mlcommons.org/croissant/1.0",
50
+ "name": "CM-EVS",
51
+ "description": "CM-EVS is a curated panoramic RGB-D dataset built under a single principle: maximize the geometric coverage of a 3D scene with the fewest equirectangular (ERP) frames possible. The headline release contains 11,583 ERP RGB-depth-pose frames over 326 Blender indoor scenes (CC-BY 4.0), each paired with the per-step provenance log of the depth-conflict-aware curator that selected it. The full v1.0 release additionally provides 786,344 frames re-encoded from TartanGround (783,944 frames over 63 environments) and OB3D (2,400 frames over 12 scenes) outdoor sources into the same ERP and world-to-camera pose schema, plus license-aware adapter packages for HM3D (14,475 frames over 401 rooms after local regeneration) and ScanNet++ (8,267 frames over 500 scans after local regeneration) that produce matched frames locally without redistributing licensed assets.",
52
+ "version": "1.0.0",
53
+ "license": "https://creativecommons.org/licenses/by/4.0/",
54
+ "url": "https://huggingface.co/datasets/anon-cmevs-2026/cmevs-erp-eval",
55
+ "citeAs": "@inproceedings{cmevs2026, title={{CM-EVS}: A Coverage-Curated Panoramic {RGB-D} Dataset for Indoor Scene Understanding}, author={Anonymous Author(s)}, booktitle={NeurIPS 2026 Datasets and Benchmarks Track (under review)}, year={2026}}",
56
+ "creator": {
57
+ "@type": "Organization",
58
+ "name": "Anonymous (double-blind submission)"
59
+ },
60
+ "datePublished": "2026-05-01",
61
+ "keywords": [
62
+ "panoramic",
63
+ "equirectangular",
64
+ "ERP",
65
+ "RGB-D",
66
+ "view planning",
67
+ "fixed-budget",
68
+ "data-centric",
69
+ "viewpoint provenance",
70
+ "indoor scene understanding",
71
+ "panoramic depth estimation",
72
+ "novel view synthesis",
73
+ "world model pretraining"
74
+ ],
75
+ "isLiveDataset": false,
76
+ "distribution": [
77
+ {
78
+ "@type": "cr:FileObject",
79
+ "@id": "blender-indoor-archive.tar",
80
+ "name": "blender-indoor-archive.tar",
81
+ "contentUrl": "https://huggingface.co/datasets/anon-cmevs-2026/cmevs-erp-eval/resolve/main/blender_indoor.tar",
82
+ "encodingFormat": "application/x-tar",
83
+ "sha256": "TODO_SHA256"
84
+ },
85
+ {
86
+ "@type": "cr:FileSet",
87
+ "@id": "blender-indoor-rgb",
88
+ "name": "blender-indoor-rgb",
89
+ "containedIn": {
90
+ "@id": "blender-indoor-archive.tar"
91
+ },
92
+ "encodingFormat": "image/png",
93
+ "includes": "rgb/*.png"
94
+ },
95
+ {
96
+ "@type": "cr:FileSet",
97
+ "@id": "blender-indoor-depth",
98
+ "name": "blender-indoor-depth",
99
+ "containedIn": {
100
+ "@id": "blender-indoor-archive.tar"
101
+ },
102
+ "encodingFormat": "application/octet-stream",
103
+ "includes": "depth/*.npy"
104
+ },
105
+ {
106
+ "@type": "cr:FileSet",
107
+ "@id": "blender-indoor-pose",
108
+ "name": "blender-indoor-pose",
109
+ "containedIn": {
110
+ "@id": "blender-indoor-archive.tar"
111
+ },
112
+ "encodingFormat": "application/json",
113
+ "includes": "pose/*.json"
114
+ },
115
+ {
116
+ "@type": "cr:FileSet",
117
+ "@id": "blender-indoor-metadata",
118
+ "name": "blender-indoor-metadata",
119
+ "containedIn": {
120
+ "@id": "blender-indoor-archive.tar"
121
+ },
122
+ "encodingFormat": "application/json",
123
+ "includes": "metadata/*.json*"
124
+ },
125
+ {
126
+ "@type": "cr:FileObject",
127
+ "@id": "outdoor-tartanground-adapter.tar",
128
+ "name": "outdoor-tartanground-adapter.tar",
129
+ "contentUrl": "https://huggingface.co/datasets/anon-cmevs-2026/cmevs-erp-eval/resolve/main/outdoor_tartanground_adapter.tar",
130
+ "encodingFormat": "application/x-tar",
131
+ "sha256": "TODO_SHA256"
132
+ },
133
+ {
134
+ "@type": "cr:FileObject",
135
+ "@id": "outdoor-ob3d-adapter.tar",
136
+ "name": "outdoor-ob3d-adapter.tar",
137
+ "contentUrl": "https://huggingface.co/datasets/anon-cmevs-2026/cmevs-erp-eval/resolve/main/outdoor_ob3d_adapter.tar",
138
+ "encodingFormat": "application/x-tar",
139
+ "sha256": "TODO_SHA256"
140
+ },
141
+ {
142
+ "@type": "cr:FileObject",
143
+ "@id": "hm3d-adapter.tar",
144
+ "name": "hm3d-adapter.tar",
145
+ "contentUrl": "https://huggingface.co/datasets/anon-cmevs-2026/cmevs-erp-eval/resolve/main/hm3d_adapter.tar",
146
+ "encodingFormat": "application/x-tar",
147
+ "sha256": "TODO_SHA256"
148
+ },
149
+ {
150
+ "@type": "cr:FileObject",
151
+ "@id": "scannetpp-adapter.tar",
152
+ "name": "scannetpp-adapter.tar",
153
+ "contentUrl": "https://huggingface.co/datasets/anon-cmevs-2026/cmevs-erp-eval/resolve/main/scannetpp_adapter.tar",
154
+ "encodingFormat": "application/x-tar",
155
+ "sha256": "TODO_SHA256"
156
+ },
157
+ {
158
+ "@type": "cr:FileObject",
159
+ "@id": "curator-source-code.tar",
160
+ "name": "curator-source-code.tar",
161
+ "contentUrl": "https://huggingface.co/datasets/anon-cmevs-2026/cmevs-erp-eval/resolve/main/code.tar",
162
+ "encodingFormat": "application/x-tar",
163
+ "sha256": "TODO_SHA256"
164
+ },
165
+ {
166
+ "@type": "cr:FileObject",
167
+ "@id": "documentation.tar",
168
+ "name": "documentation.tar",
169
+ "contentUrl": "https://huggingface.co/datasets/anon-cmevs-2026/cmevs-erp-eval/resolve/main/docs.tar",
170
+ "encodingFormat": "application/x-tar",
171
+ "sha256": "TODO_SHA256"
172
+ },
173
+ {
174
+ "@type": "cr:FileObject",
175
+ "@id": "frame-manifest.csv",
176
+ "name": "frame-manifest.csv",
177
+ "contentUrl": "https://huggingface.co/datasets/anon-cmevs-2026/cmevs-erp-eval/resolve/main/frame_manifest.csv",
178
+ "encodingFormat": "text/csv",
179
+ "sha256": "TODO_SHA256"
180
+ }
181
+ ],
182
+ "recordSet": [
183
+ {
184
+ "@type": "cr:RecordSet",
185
+ "@id": "erp-frame-records",
186
+ "name": "erp-frame-records",
187
+ "description": "One record per released ERP frame. Curator-only fields (viewpoint_score, coverage_gain, conflict_ratio, candidate_id) are populated only for frames produced by the depth-conflict-aware curator; outdoor re-encoded frames carry the schema fields without per-step provenance.",
188
+ "field": [
189
+ {
190
+ "@type": "cr:Field",
191
+ "@id": "erp-frame-records/frame_id",
192
+ "name": "frame_id",
193
+ "dataType": "sc:Text",
194
+ "source": {
195
+ "fileObject": {
196
+ "@id": "frame-manifest.csv"
197
+ },
198
+ "extract": {
199
+ "column": "frame_id"
200
+ }
201
+ }
202
+ },
203
+ {
204
+ "@type": "cr:Field",
205
+ "@id": "erp-frame-records/source",
206
+ "name": "source",
207
+ "dataType": "sc:Text",
208
+ "source": {
209
+ "fileObject": {
210
+ "@id": "frame-manifest.csv"
211
+ },
212
+ "extract": {
213
+ "column": "source"
214
+ }
215
+ }
216
+ },
217
+ {
218
+ "@type": "cr:Field",
219
+ "@id": "erp-frame-records/scene_id",
220
+ "name": "scene_id",
221
+ "dataType": "sc:Text",
222
+ "source": {
223
+ "fileObject": {
224
+ "@id": "frame-manifest.csv"
225
+ },
226
+ "extract": {
227
+ "column": "scene_id"
228
+ }
229
+ }
230
+ },
231
+ {
232
+ "@type": "cr:Field",
233
+ "@id": "erp-frame-records/room_id",
234
+ "name": "room_id",
235
+ "dataType": "sc:Text",
236
+ "source": {
237
+ "fileObject": {
238
+ "@id": "frame-manifest.csv"
239
+ },
240
+ "extract": {
241
+ "column": "room_id"
242
+ }
243
+ }
244
+ },
245
+ {
246
+ "@type": "cr:Field",
247
+ "@id": "erp-frame-records/split",
248
+ "name": "split",
249
+ "dataType": "sc:Text",
250
+ "source": {
251
+ "fileObject": {
252
+ "@id": "frame-manifest.csv"
253
+ },
254
+ "extract": {
255
+ "column": "split"
256
+ }
257
+ }
258
+ },
259
+ {
260
+ "@type": "cr:Field",
261
+ "@id": "erp-frame-records/rgb",
262
+ "name": "rgb",
263
+ "dataType": "sc:ImageObject",
264
+ "source": {
265
+ "fileObject": {
266
+ "@id": "frame-manifest.csv"
267
+ },
268
+ "extract": {
269
+ "column": "rgb_path"
270
+ }
271
+ }
272
+ },
273
+ {
274
+ "@type": "cr:Field",
275
+ "@id": "erp-frame-records/depth",
276
+ "name": "depth",
277
+ "dataType": "sc:Text",
278
+ "source": {
279
+ "fileObject": {
280
+ "@id": "frame-manifest.csv"
281
+ },
282
+ "extract": {
283
+ "column": "depth_path"
284
+ }
285
+ }
286
+ },
287
+ {
288
+ "@type": "cr:Field",
289
+ "@id": "erp-frame-records/pose_quaternion",
290
+ "name": "pose_quaternion",
291
+ "dataType": "sc:Text",
292
+ "source": {
293
+ "fileObject": {
294
+ "@id": "frame-manifest.csv"
295
+ },
296
+ "extract": {
297
+ "column": "pose_quaternion"
298
+ }
299
+ }
300
+ },
301
+ {
302
+ "@type": "cr:Field",
303
+ "@id": "erp-frame-records/pose_position",
304
+ "name": "pose_position",
305
+ "dataType": "sc:Text",
306
+ "source": {
307
+ "fileObject": {
308
+ "@id": "frame-manifest.csv"
309
+ },
310
+ "extract": {
311
+ "column": "pose_position"
312
+ }
313
+ }
314
+ },
315
+ {
316
+ "@type": "cr:Field",
317
+ "@id": "erp-frame-records/camera_type",
318
+ "name": "camera_type",
319
+ "dataType": "sc:Text",
320
+ "source": {
321
+ "fileObject": {
322
+ "@id": "frame-manifest.csv"
323
+ },
324
+ "extract": {
325
+ "column": "camera_type"
326
+ }
327
+ }
328
+ },
329
+ {
330
+ "@type": "cr:Field",
331
+ "@id": "erp-frame-records/viewpoint_score",
332
+ "name": "viewpoint_score",
333
+ "dataType": "sc:Float",
334
+ "source": {
335
+ "fileObject": {
336
+ "@id": "frame-manifest.csv"
337
+ },
338
+ "extract": {
339
+ "column": "viewpoint_score"
340
+ }
341
+ }
342
+ },
343
+ {
344
+ "@type": "cr:Field",
345
+ "@id": "erp-frame-records/coverage_gain",
346
+ "name": "coverage_gain",
347
+ "dataType": "sc:Float",
348
+ "source": {
349
+ "fileObject": {
350
+ "@id": "frame-manifest.csv"
351
+ },
352
+ "extract": {
353
+ "column": "coverage_gain"
354
+ }
355
+ }
356
+ },
357
+ {
358
+ "@type": "cr:Field",
359
+ "@id": "erp-frame-records/conflict_ratio",
360
+ "name": "conflict_ratio",
361
+ "dataType": "sc:Float",
362
+ "source": {
363
+ "fileObject": {
364
+ "@id": "frame-manifest.csv"
365
+ },
366
+ "extract": {
367
+ "column": "conflict_ratio"
368
+ }
369
+ }
370
+ },
371
+ {
372
+ "@type": "cr:Field",
373
+ "@id": "erp-frame-records/candidate_id",
374
+ "name": "candidate_id",
375
+ "dataType": "sc:Text",
376
+ "source": {
377
+ "fileObject": {
378
+ "@id": "frame-manifest.csv"
379
+ },
380
+ "extract": {
381
+ "column": "candidate_id"
382
+ }
383
+ }
384
+ }
385
+ ]
386
+ }
387
+ ],
388
+ "rai:dataCollection": "Indoor data is produced by the CM-EVS pipeline (asset loading, coordinate normalization, candidate generation, 26-direction geometric-validity filtering, conflict-aware greedy selection, 2048x1024 high-resolution Cycles ERP rendering, export under the unified schema). Outdoor data is sourced from TartanGround and OB3D and re-encoded into the unified schema; the curator is not run on outdoor sources in v1.0. HM3D and ScanNet++ frames are not redistributed; the release ships adapter regeneration scripts.",
389
+ "rai:dataPreprocessingProtocol": "Coordinate normalization to a right-handed +X-right, +Y-up, +Z-forward world frame with the OpenCV-style camera frame; pose stored as a scalar-first world-to-camera quaternion plus a position relative to the scene's first selected frame. AABB computation; source-specific candidate generation; 26-direction geometric-validity filter. Cubemap-to-ERP re-encoding at native resolution for outdoor sources; optional exposure adjustment for Blender; output schema conversion. Candidate probes, intermediate caches, pre-render-all oracle frames, and locally regenerated HM3D / ScanNet++ outputs are excluded from the public frame count F_pub.",
390
+ "rai:dataAnnotationProtocol": "No human annotation is performed. All labels (split, source, scene id, viewpoint score, coverage gain, conflict ratio) are produced automatically by the curator pipeline and recorded in metadata/per_step_log.jsonl and metadata/selected_viewpoints.json.",
391
+ "rai:dataReleaseMaintenancePlan": "Versioned releases on a 6-month cadence. Errata tracked via the project repository; SHA256 manifests refreshed at every release; HM3D and ScanNet++ regeneration scripts updated when upstream APIs, file layouts, or access terms change.",
392
+ "rai:dataUseCases": [
393
+ "Panoramic depth estimation",
394
+ "ERP novel-view synthesis",
395
+ "Panoramic Gaussian-splatting reconstruction",
396
+ "Panoramic world-model pretraining",
397
+ "Fixed-budget viewpoint policy evaluation"
398
+ ],
399
+ "rai:dataLimitations": [
400
+ "Real-scan derived frames (HM3D, ScanNet++) are not redistributed; users must accept upstream license terms and regenerate locally.",
401
+ "Outdoor frames are re-encoded source trajectories rather than curator-selected subsets and therefore do not carry per-step provenance.",
402
+ "Synthetic-real transfer must be validated separately by source; we do not claim Blender-only gains imply real-scan gains.",
403
+ "Geometry-validity filters may fail in atria, semi-outdoor spaces, narrow transitions, noisy scans, or pure point-cloud scenes."
404
+ ],
405
+ "rai:personalSensitiveInformation": "No new personal data is collected. Real-scan sources (HM3D, ScanNet++) may depict private indoor layouts and are not redistributed as derived frames. Even regeneration scripts and viewpoint metadata can reveal where observations would be sampled within a private space; users must comply with upstream source access terms.",
406
+ "rai:dataBiases": [
407
+ "Source assets inherit geographic, architectural, and scanning biases.",
408
+ "HM3D and ScanNet++ are skewed toward scanned residential indoor spaces.",
409
+ "Blender assets are skewed toward staged residential, office, and architectural scenes.",
410
+ "Outdoor sources (TartanGround, OB3D) are skewed toward simulator-generated terrain along circular trajectories.",
411
+ "Synthetic Blender materials may not match real-scan sensor noise."
412
+ ],
413
+ "rai:dataSocialImpact": "CM-EVS lowers the engineering cost of producing auditable panoramic RGB-D resources from existing 3D scenes. Positive uses include panoramic perception, data-centric evaluation, view-planning research, and 3D-consistent world-model pretraining. Potential harms include over-trusting synthetic data, obscuring upstream dataset bias, and using real indoor scans in privacy-sensitive settings. The release therefore separates public synthetic frames from licensed real-scan regeneration and documents intended uses, non-uses, and source licenses."
414
+ }
dataset_metadata/manifests_h100/ARCHIVE_DIGESTS.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ 74e54654e43a6faa89ca4e50aec00a8e0366cdb3207ae16ee2c5efb1d2be70c1 SHA256SUMS_blender_indoor_round1+2.txt
2
+ 8cda976ce5a9180cf03bda4408ccf921d4ba295eb6ea1b31d66f76bdde398679 SHA256SUMS_blender_indoor_round2.txt
3
+ 7b7a42ab4aae6c22764d1447bfe35cb7210fd11d1b306497131058736f05e20e SHA256SUMS_HM3D.txt
4
+ 60ecb00e2063f6efacc9408137fda5a73a4ac61647bd3cf8c32cc1e31839bc6f SHA256SUMS_scannetpp.txt
5
+ 5e0285d4a3c472fee245501abb9be92ce0dada5f361cad1253bb51ded6e89f99 SHA256SUMS_OB3D.txt
6
+ b13e68a7e6ff7ced448e841649f84d0fc6d7a4fe96c120150e42bffbdf2f06bf SHA256SUMS_tartanground.txt
dataset_metadata/manifests_h100/README.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # H100 数据快照 SHA256 清单
2
+
3
+ > 生成时间:2026-05-02 06:48 UTC(H100 节点 `node96`)
4
+ > 数据根:`/data/data_wr/data_shushu/` (7 TB 数据盘)
5
+ > **范围:仅包含本论文相关的 6 个数据子目录**
6
+
7
+ ---
8
+
9
+ ## 这些清单是什么
10
+
11
+ 这是 H100 上**本论文相关数据**的逐文件 SHA-256 留底。每行格式:
12
+
13
+ ```
14
+ <sha256-hex> <relative-path-from-data-root>
15
+ ```
16
+
17
+ 可用 `shasum -a 256 -c` / `sha256sum -c` 在数据所在机器上原地校验。
18
+
19
+ `ARCHIVE_DIGESTS.txt` 是每份 `SHA256SUMS_*.txt` 自身的 SHA-256,作为各 archive 的"内容指纹"。
20
+
21
+ ---
22
+
23
+ ## 论文 → H100 数据映射
24
+
25
+ V4 §4.3 表 4 列出 5 个数据来源;H100 上 Blender indoor 因为采样轮次不同分成两个目录存放:
26
+
27
+ | 论文来源(V4 §4.3) | V4 声称规模 | H100 子目录 | 实际数量(manifest 行数 = 文件数)|
28
+ | --- | --- | --- | --- |
29
+ | **Blender indoor**(CC-BY 4.0)| 326 场景 / 11,583 帧 | `combined/blender_indoor_FOU_threshold-0.2_round1+2`(第 1 轮 + 第 1+2 轮合并)| 19,271 |
30
+ | | | `combined/blender_indoor_FOU_threshold-0.2_round2`(第 2 轮独立提取)| 20,673 |
31
+ | **HM3D**(adapter,不再分发)| 401 rooms / 14,475 帧 | `combined/HM3D` | 43,431 |
32
+ | **ScanNet++**(adapter,不再分发)| 500 scans / 8,267 帧 | `combined/scannetpp_threshold0.2` | 24,801 |
33
+ | **OB3D**(户外,按上游许可)| 12 场景 / 2,400 帧 | `continuous/OB3D_shushu`(12 场景 × 2 视角 = 24 份)| 7,246 |
34
+ | **TartanGround**(户外,**不再分发**)| 63 environments / 783,944 帧 | `continuous/Tartanground`(11 个环境,762 parts;其余环境仍在用户本地)| 342,064 |
35
+
36
+ **Blender 只有室内**,没有 outdoor — 论文中的 outdoor 部分由 TartanGround + OB3D 提供。
37
+
38
+ ---
39
+
40
+ ## 为什么没有写到 `croissant.json`
41
+
42
+ `croissant.json` 里 `cr:FileObject.sha256` 字段的语义是:**用户从 `contentUrl` 下载到的 .tar 文件的 SHA-256**。我们目前还**没有**把数据打包成最终发布 archive,因此那些字段保持 `TODO_SHA256` 占位。
43
+
44
+ H100 上的目录结构与 `croissant.json` 里的发布产物**不是一一对应**:
45
+
46
+ - `blender-indoor-archive.tar` 应是 **curator 筛选过的 11,583 帧 / 326 场景**,需要从 `round1+2` 与 `round2` 两个 H100 子目录中合并去重 + curator 选帧后再 tar,**这一步还没做**。
47
+ - `*-adapter.tar`(HM3D / ScanNet++ / TartanGround / OB3D)应是 **adapter 脚本 + 元数据包**,**不**包含数据帧。脚本目前在 `cmevs_anonymous_code_release/scripts/` + `adapters/` 下,也还没单独打包成 tar。
48
+
49
+ 这里的 SHA256SUMS 只是当前 H100 数据状态的"内部留底",**不会上传到 HF / OpenReview**,仅供你后续:
50
+ - 增量补 TartanGround 时比对哪些 part 已处理
51
+ - 重组 / 重新打包发布 archive 时确认源数据没被改动
52
+
53
+ ---
54
+
55
+ ## 何时把哈希填进 `croissant.json`
56
+
57
+ 当你**实际打包发布 archive** 后:
58
+
59
+ ```bash
60
+ # 1. 在打包好的 .tar 上重算
61
+ shasum -a 256 blender_indoor_v1.tar
62
+ # 2. 把哈希写入 croissant.json 中 blender-indoor-archive.tar 的 sha256 字段
63
+ # 或运行:tools/update_croissant_with_real_hashes.py
64
+ ```
65
+
66
+ reviewer 下载验证时用的就是这个哈希。
67
+
68
+ ---
69
+
70
+ ## 文件清单
71
+
72
+ | 文件 | 行数(=文件数)| 大小 | H100 路径 |
73
+ | --- | ---: | ---: | --- |
74
+ | `ARCHIVE_DIGESTS.txt` | 6 | 568 B | `~/cmevs_release_hashing/manifests/` |
75
+ | `SHA256SUMS_blender_indoor_round1+2.txt` | 19,271 | 2.9 MB | `combined/blender_indoor_FOU_threshold-0.2_round1+2` |
76
+ | `SHA256SUMS_blender_indoor_round2.txt` | 20,673 | 3.0 MB | `combined/blender_indoor_FOU_threshold-0.2_round2` |
77
+ | `SHA256SUMS_HM3D.txt` | 43,431 | 5.2 MB | `combined/HM3D` |
78
+ | `SHA256SUMS_scannetpp.txt` | 24,801 | 3.0 MB | `combined/scannetpp_threshold0.2` |
79
+ | `SHA256SUMS_OB3D.txt` | 7,246 | 930 KB | `continuous/OB3D_shushu` |
80
+ | `SHA256SUMS_tartanground.txt` | 342,064 | 47 MB | `continuous/Tartanground` |
81
+
82
+ 合计:**457,486 行**(一行 = 一个文件的 sha256),约 **62 MB**(不含 README)。
83
+
84
+ ---
85
+
86
+ ## 排除的目录
87
+
88
+ H100 数据根下还有以下子目录,但**与本论文无关**,因此**未生成 manifest**:
89
+
90
+ - `combined/data_离散_1m间距` — 1m 间距实验性采样,未进入论文最终发布
91
+ - `continuous/shushu_line_连续` — 室内连续直线轨迹,未进入论文
92
+ - `continuous/shushu_circle_连续` — 室内连续圆形轨迹,未进入论文
93
+ - `Robustness/` — 鲁棒性评估数据(洋红坏帧筛选用),未进入论文
94
+
95
+ 如果未来这些目录进入发布范围,再单独跑 `~/cmevs_release_hashing/run_hashes.sh` 补哈希即可。
96
+
97
+ ---
98
+
99
+ ## 上游许可重要说明
100
+
101
+ - **TartanGround 不再分发为 ERP 重新编码帧**——与 V4 §4.2 表 3 `Direct release: no` 一致。这里的 sha256 仅作 H100 内部留底;最终发布只发 adapter 脚本 + 场景 id + 元数据。
102
+ - **HM3D / ScanNet++ 同理**:上游 EULA / ToS 不允许再分发衍生 RGB-D 帧。
103
+ - **OB3D**:��上游许可范围内可再分发;具体由发布前的许可核查决定。
104
+ - **可直接发布的部分**:仅 Blender indoor(CC-BY 4.0 资产渲染)。
dataset_metadata/manifests_h100/SHA256SUMS_HM3D.txt ADDED
The diff for this file is too large to render. See raw diff
 
dataset_metadata/manifests_h100/SHA256SUMS_OB3D.txt ADDED
The diff for this file is too large to render. See raw diff
 
dataset_metadata/manifests_h100/SHA256SUMS_blender_indoor_round1+2.txt ADDED
The diff for this file is too large to render. See raw diff
 
dataset_metadata/manifests_h100/SHA256SUMS_blender_indoor_round2.txt ADDED
The diff for this file is too large to render. See raw diff
 
dataset_metadata/manifests_h100/SHA256SUMS_scannetpp.txt ADDED
The diff for this file is too large to render. See raw diff
 
dataset_metadata/manifests_h100/SHA256SUMS_tartanground.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b13e68a7e6ff7ced448e841649f84d0fc6d7a4fe96c120150e42bffbdf2f06bf
3
+ size 48934340
environment.yml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: cmevs
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ - conda-forge
6
+ dependencies:
7
+ - python=3.10
8
+ - pip
9
+ - numpy>=1.24
10
+ - scipy
11
+ - pandas
12
+ - pyyaml
13
+ - pillow
14
+ - matplotlib
15
+ - scikit-learn
16
+ - pytorch
17
+ - torchvision
18
+ - cudatoolkit
19
+ - pip:
20
+ - opencv-python
21
+ - open3d
22
+ - trimesh
23
+ - tqdm
24
+ - jsonschema
25
+
examples/metadata/candidates.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"candidate_id":"tiny_000","source":"blender_indoor_tiny","scene_id":"tiny_blend_scene","position":[0.0,1.6,0.0],"yaw_deg":0.0,"valid":true,"rejection_layer":"accepted","single_view_probe_coverage":0.31,"conflict_prior":0.05,"covered_cells":["a","b","c","d"],"oracle_gain":0.30,"runtime_s":0.02}
2
+ {"candidate_id":"tiny_001","source":"blender_indoor_tiny","scene_id":"tiny_blend_scene","position":[1.0,1.6,0.0],"yaw_deg":45.0,"valid":true,"rejection_layer":"accepted","single_view_probe_coverage":0.34,"conflict_prior":0.12,"covered_cells":["c","d","e","f","g"],"oracle_gain":0.28,"runtime_s":0.02}
3
+ {"candidate_id":"tiny_002","source":"blender_indoor_tiny","scene_id":"tiny_blend_scene","position":[2.0,1.6,0.5],"yaw_deg":90.0,"valid":true,"rejection_layer":"accepted","single_view_probe_coverage":0.27,"conflict_prior":0.02,"covered_cells":["h","i","j"],"oracle_gain":0.24,"runtime_s":0.02}
4
+ {"candidate_id":"tiny_003","source":"blender_indoor_tiny","scene_id":"tiny_blend_scene","position":[0.5,1.6,1.5],"yaw_deg":135.0,"valid":true,"rejection_layer":"accepted","single_view_probe_coverage":0.22,"conflict_prior":0.18,"covered_cells":["a","k","l"],"oracle_gain":0.16,"runtime_s":0.02}
5
+ {"candidate_id":"tiny_004","source":"blender_indoor_tiny","scene_id":"tiny_blend_scene","position":[1.5,1.6,1.5],"yaw_deg":180.0,"valid":true,"rejection_layer":"accepted","single_view_probe_coverage":0.29,"conflict_prior":0.06,"covered_cells":["m","n","o","p"],"oracle_gain":0.26,"runtime_s":0.02}
6
+ {"candidate_id":"tiny_005","source":"blender_indoor_tiny","scene_id":"tiny_blend_scene","position":[3.0,1.6,1.5],"yaw_deg":270.0,"valid":false,"rejection_layer":"visibility","single_view_probe_coverage":0.04,"conflict_prior":0.35,"covered_cells":[],"oracle_gain":0.01,"runtime_s":0.01}
7
+
examples/tiny_blender_scene/README.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Tiny Example
2
+
3
+ This directory intentionally contains no Blender asset. The tiny smoke test uses Blender-indoor-style metadata in `examples/metadata/candidates.jsonl` so that reviewers can run the repository without private or third-party scene data.
4
+
5
+ For real rendering, place `.blend`, `.glb`, `.gltf`, or `.ply` scene files under `data/` and use the commands in the top-level README.
metadata_examples/candidates.schema.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "title": "CM-EVS candidate metadata",
4
+ "type": "object",
5
+ "required": [
6
+ "candidate_id",
7
+ "source",
8
+ "scene_id",
9
+ "position",
10
+ "valid",
11
+ "single_view_probe_coverage",
12
+ "conflict_prior",
13
+ "covered_cells"
14
+ ],
15
+ "properties": {
16
+ "candidate_id": {"type": "string"},
17
+ "source": {"type": "string"},
18
+ "scene_id": {"type": "string"},
19
+ "position": {
20
+ "type": "array",
21
+ "items": {"type": "number"},
22
+ "minItems": 3,
23
+ "maxItems": 3
24
+ },
25
+ "yaw_deg": {"type": "number"},
26
+ "valid": {"type": "boolean"},
27
+ "rejection_layer": {"type": "string"},
28
+ "single_view_probe_coverage": {"type": "number"},
29
+ "conflict_prior": {"type": "number"},
30
+ "covered_cells": {
31
+ "type": "array",
32
+ "items": {"type": "string"}
33
+ },
34
+ "oracle_gain": {"type": "number"},
35
+ "runtime_s": {"type": "number"}
36
+ }
37
+ }
38
+
metadata_examples/per_step_log.schema.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "title": "CM-EVS per-step selection log",
4
+ "type": "object",
5
+ "required": ["step", "candidate_id", "score", "marginal_gain", "coverage_after"],
6
+ "properties": {
7
+ "step": {"type": "integer"},
8
+ "candidate_id": {"type": "string"},
9
+ "score": {"type": "number"},
10
+ "marginal_gain": {"type": "number"},
11
+ "coverage_after": {"type": "number"},
12
+ "conflict_prior": {"type": "number"}
13
+ }
14
+ }
15
+
metadata_examples/selected_viewpoints.schema.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "title": "CM-EVS selected viewpoints",
4
+ "type": "object",
5
+ "required": ["scene_id", "method", "selected_viewpoints", "summary"],
6
+ "properties": {
7
+ "scene_id": {"type": "string"},
8
+ "method": {"type": "string"},
9
+ "selected_viewpoints": {
10
+ "type": "array",
11
+ "items": {
12
+ "type": "object",
13
+ "required": ["candidate_id", "rank", "position", "yaw_deg"],
14
+ "properties": {
15
+ "candidate_id": {"type": "string"},
16
+ "rank": {"type": "integer"},
17
+ "position": {
18
+ "type": "array",
19
+ "items": {"type": "number"},
20
+ "minItems": 3,
21
+ "maxItems": 3
22
+ },
23
+ "yaw_deg": {"type": "number"},
24
+ "score": {"type": "number"},
25
+ "marginal_gain": {"type": "number"}
26
+ }
27
+ }
28
+ },
29
+ "summary": {"type": "object"}
30
+ }
31
+ }
32
+
pipelines/get_blend_bounds.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 获取.blend文件的场景信息(相机位置、边界框)
4
+
5
+ 此脚本在Blender内部执行,用于获取.blend文件的场景信息。
6
+ 输出JSON格式的数据供batch_render.py使用。
7
+
8
+ 智能检测策略:
9
+ 1. 如果场景中有相机,使用相机位置作为第一帧参考
10
+ 2. 如果没有相机,使用室内对象(Floor、Wall、Room等)的边界框计算中心
11
+
12
+ 使用方法:
13
+ blender --background --python get_blend_bounds.py -- \
14
+ --blend "path/to/scene.blend"
15
+
16
+ 输出格式:
17
+ [BOUNDS] {"bounds_min": [...], "bounds_max": [...], "center": [...], "camera": {...}}
18
+ """
19
+
20
+ import bpy
21
+ import sys
22
+ import json
23
+ import argparse
24
+ from mathutils import Vector
25
+
26
+
27
+ # 室内对象关键词
28
+ INDOOR_KEYWORDS = [
29
+ 'room', 'interior', 'floor', 'wall', 'ceiling', 'indoor',
30
+ 'kitchen', 'bathroom', 'bedroom', 'living', 'dining',
31
+ 'door', 'window', 'lamp', 'light', 'sofa', 'table', 'chair',
32
+ 'bed', 'desk', 'cabinet', 'shelf', 'carpet', 'curtain',
33
+ 'stair', 'corridor', 'hallway', 'closet', 'wardrobe'
34
+ ]
35
+
36
+
37
+ def parse_args():
38
+ """解析命令行参数"""
39
+ argv = sys.argv
40
+ if "--" in argv:
41
+ argv = argv[argv.index("--") + 1:]
42
+ else:
43
+ argv = []
44
+
45
+ parser = argparse.ArgumentParser(description="获取.blend文件场景信息")
46
+ parser.add_argument("--blend", type=str, required=True,
47
+ help=".blend文件路径")
48
+
49
+ return parser.parse_args(argv)
50
+
51
+
52
+ def is_indoor_object(obj_name):
53
+ """判断对象是否是室内对象"""
54
+ name_lower = obj_name.lower()
55
+ return any(kw in name_lower for kw in INDOOR_KEYWORDS)
56
+
57
+
58
+ def get_scene_cameras():
59
+ """
60
+ 获取场景中的所有相机
61
+
62
+ Returns:
63
+ cameras: 相机信息列表 [{"name": str, "position": [x,y,z], "rotation": [x,y,z]}, ...]
64
+ """
65
+ cameras = []
66
+ for obj in bpy.context.scene.objects:
67
+ if obj.type == 'CAMERA':
68
+ loc = obj.location
69
+ rot = obj.rotation_euler
70
+ cameras.append({
71
+ "name": obj.name,
72
+ "position": [loc.x, loc.y, loc.z],
73
+ "rotation": [rot.x, rot.y, rot.z]
74
+ })
75
+ return cameras
76
+
77
+
78
+ def get_scene_bounds(indoor_only=True):
79
+ """
80
+ 获取场景中mesh物体的边界框(Blender坐标系:X右, Y前, Z上)
81
+
82
+ Args:
83
+ indoor_only: 是否仅考虑室内对象(默认True)
84
+
85
+ Returns:
86
+ bounds_min: 边界框最小坐标 [x, y, z]
87
+ bounds_max: 边界框最大坐标 [x, y, z]
88
+ center: 几何中心 [x, y, z]
89
+ indoor_count: 室内对象数量
90
+ """
91
+ min_coords = [float('inf'), float('inf'), float('inf')]
92
+ max_coords = [float('-inf'), float('-inf'), float('-inf')]
93
+
94
+ mesh_count = 0
95
+ indoor_count = 0
96
+
97
+ for obj in bpy.context.scene.objects:
98
+ if obj.type == 'MESH':
99
+ mesh_count += 1
100
+
101
+ # 如果启用室内过滤,检查对象名称
102
+ if indoor_only and not is_indoor_object(obj.name):
103
+ continue
104
+
105
+ indoor_count += 1
106
+
107
+ # 获取世界坐标下的边界框
108
+ for corner in obj.bound_box:
109
+ world_corner = obj.matrix_world @ Vector(corner)
110
+ for i in range(3):
111
+ min_coords[i] = min(min_coords[i], world_corner[i])
112
+ max_coords[i] = max(max_coords[i], world_corner[i])
113
+
114
+ # 如果没有找到任何符合条件的mesh
115
+ if min_coords[0] == float('inf'):
116
+ if indoor_only:
117
+ print(f"[WARN] 未找到室内对象,回退到全场景边界框", file=sys.stderr)
118
+ return get_scene_bounds(indoor_only=False)
119
+ else:
120
+ print(f"[WARN] 未找到任何mesh对象,使用默认边界框", file=sys.stderr)
121
+ min_coords = [-5, -5, 0]
122
+ max_coords = [5, 5, 3]
123
+ indoor_count = 0
124
+
125
+ # 计算几何中心
126
+ center = [
127
+ (min_coords[0] + max_coords[0]) / 2,
128
+ (min_coords[1] + max_coords[1]) / 2,
129
+ (min_coords[2] + max_coords[2]) / 2
130
+ ]
131
+
132
+ mode_str = "室内对象" if indoor_only else "全场景"
133
+ print(f"[INFO] 边界框模式: {mode_str}", file=sys.stderr)
134
+ print(f"[INFO] 找到 {indoor_count}/{mesh_count} 个对象", file=sys.stderr)
135
+ print(f"[INFO] 边界框: min={[f'{x:.2f}' for x in min_coords]}, max={[f'{x:.2f}' for x in max_coords]}", file=sys.stderr)
136
+ print(f"[INFO] 几何中心: [{center[0]:.2f}, {center[1]:.2f}, {center[2]:.2f}]", file=sys.stderr)
137
+
138
+ return min_coords, max_coords, center, indoor_count
139
+
140
+
141
+ def main():
142
+ args = parse_args()
143
+
144
+ print(f"[INFO] 打开.blend文件: {args.blend}", file=sys.stderr)
145
+
146
+ # 打开.blend文件
147
+ bpy.ops.wm.open_mainfile(filepath=args.blend)
148
+
149
+ # 1. 检测场景中的相机
150
+ cameras = get_scene_cameras()
151
+ print(f"[INFO] 检测到 {len(cameras)} 个相机", file=sys.stderr)
152
+
153
+ camera_info = None
154
+ first_frame_position = None
155
+ position_source = "none"
156
+
157
+ if cameras:
158
+ # 使用第一个相机的位置作为参考
159
+ cam = cameras[0]
160
+ camera_info = cam
161
+ first_frame_position = cam["position"]
162
+ position_source = "camera"
163
+ print(f"[INFO] 使用相机 '{cam['name']}' 的位置作为第一帧参考", file=sys.stderr)
164
+ print(f"[INFO] 相机位置: [{cam['position'][0]:.2f}, {cam['position'][1]:.2f}, {cam['position'][2]:.2f}]", file=sys.stderr)
165
+
166
+ # 2. 获取室内对象边界框
167
+ bounds_min, bounds_max, center, indoor_count = get_scene_bounds(indoor_only=True)
168
+
169
+ # 如果没有相机,使用室内边界框中心
170
+ if first_frame_position is None:
171
+ first_frame_position = center
172
+ position_source = "indoor_bounds_center"
173
+ print(f"[INFO] 无相机,使用室内边界框中心作为第一帧位置", file=sys.stderr)
174
+
175
+ # 3. 获取场景单位比例(用于将米转换为场景单位)
176
+ unit_scale = bpy.context.scene.unit_settings.scale_length
177
+ unit_system = bpy.context.scene.unit_settings.system
178
+ print(f"[INFO] 场景单位: scale={unit_scale}, system={unit_system}", file=sys.stderr)
179
+
180
+ # 输出JSON格式(供batch_render.py解析)
181
+ result = {
182
+ "bounds_min": bounds_min,
183
+ "bounds_max": bounds_max,
184
+ "center": center,
185
+ "first_frame_position": first_frame_position,
186
+ "position_source": position_source,
187
+ "camera": camera_info,
188
+ "cameras_count": len(cameras),
189
+ "indoor_objects_count": indoor_count,
190
+ "unit_scale": unit_scale, # 用于米->场景单位转换
191
+ "coordinate_system": "blender_z_up" # X右, Y前, Z上
192
+ }
193
+
194
+ # 使用特殊前缀,便于batch_render.py解析
195
+ print(f"[BOUNDS] {json.dumps(result)}")
196
+
197
+
198
+ if __name__ == "__main__":
199
+ main()
pipelines/render_erp_blender.py ADDED
@@ -0,0 +1,1015 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Blender ERP全景图渲染脚本
4
+
5
+ 此脚本在Blender内部执行,用于渲染ERP(等距圆柱投影)全景图。
6
+
7
+ 使用方法:
8
+ blender --background --python render_erp_blender.py -- \
9
+ --mesh "path/to/mesh.glb" \
10
+ --output "output/panorama_0000.png" \
11
+ --camera-pos "0.0,0.5,0.0" \
12
+ --camera-rot "0.0,0.0,0.0" \
13
+ --resolution "1024,512"
14
+ """
15
+
16
+ import bpy
17
+ import sys
18
+ import os
19
+ import json
20
+ import math
21
+ import argparse
22
+ from mathutils import Vector, Euler, Quaternion
23
+
24
+
25
+ def parse_args():
26
+ """解析命令行参数(Blender的--之后的参数)"""
27
+ # 找到'--'之后的参数
28
+ argv = sys.argv
29
+ if "--" in argv:
30
+ argv = argv[argv.index("--") + 1:]
31
+ else:
32
+ argv = []
33
+
34
+ parser = argparse.ArgumentParser(description="Blender ERP渲染脚本")
35
+ parser.add_argument("--mesh", type=str, required=True,
36
+ help="输入mesh文件路径(GLB/GLTF/OBJ)")
37
+ parser.add_argument("--output", type=str, required=True,
38
+ help="输出图像路径")
39
+ parser.add_argument("--pose-output", type=str, default=None,
40
+ help="输出位姿JSON路径(默认与图像同名)")
41
+ parser.add_argument("--camera-pos", type=str, default="0.0,0.0,0.0",
42
+ help="相机位置 x,y,z")
43
+ parser.add_argument("--camera-rot", type=str, default="0.0,0.0,0.0",
44
+ help="相机旋转 roll,pitch,yaw(弧度)")
45
+ parser.add_argument("--camera-rot-quat", type=str, default=None,
46
+ help="(已废弃)相机旋转四元数 w,x,y,z。现在主路径使用 --camera-rot Euler角")
47
+ parser.add_argument("--resolution", type=str, default="1024,512",
48
+ help="渲染分辨率 width,height")
49
+ parser.add_argument("--samples", type=int, default=16,
50
+ help="渲染采样数(默认16,Emission材质不需要高采样,可大幅提升渲染速度)")
51
+ parser.add_argument("--engine", type=str, default="CYCLES",
52
+ choices=["BLENDER_EEVEE", "CYCLES"],
53
+ help="渲染引擎(全景图必须使用CYCLES)")
54
+ parser.add_argument("--frame-id", type=int, default=0,
55
+ help="帧序号")
56
+ parser.add_argument("--ref-position", type=str, default=None,
57
+ help="参考帧位置 x,y,z(Y-up坐标系),None表示第一帧")
58
+ parser.add_argument("--ref-quaternion", type=str, default=None,
59
+ help="参考帧四元数 w,x,y,z,None表示第一帧")
60
+ parser.add_argument("--render-depth", action="store_true",
61
+ help="是否渲染深度图(保存为.npy格式)")
62
+ parser.add_argument("--depth-output", type=str, default=None,
63
+ help="深度图输出路径(默认与图像同目录,后缀_depth.npy)")
64
+
65
+ return parser.parse_args(argv)
66
+
67
+
68
+ def clear_scene():
69
+ """清空当前场景"""
70
+ # 选择所有对象
71
+ bpy.ops.object.select_all(action='SELECT')
72
+ # 删除选中的对象
73
+ bpy.ops.object.delete(use_global=False)
74
+
75
+ # 清理孤立数据
76
+ for block in bpy.data.meshes:
77
+ if block.users == 0:
78
+ bpy.data.meshes.remove(block)
79
+ for block in bpy.data.materials:
80
+ if block.users == 0:
81
+ bpy.data.materials.remove(block)
82
+ for block in bpy.data.textures:
83
+ if block.users == 0:
84
+ bpy.data.textures.remove(block)
85
+ for block in bpy.data.images:
86
+ if block.users == 0:
87
+ bpy.data.images.remove(block)
88
+
89
+
90
+ def import_mesh(mesh_path):
91
+ """导入mesh文件"""
92
+ ext = os.path.splitext(mesh_path)[1].lower()
93
+
94
+ if ext in ['.glb', '.gltf']:
95
+ bpy.ops.import_scene.gltf(filepath=mesh_path)
96
+ elif ext == '.obj':
97
+ bpy.ops.wm.obj_import(filepath=mesh_path)
98
+ elif ext == '.fbx':
99
+ bpy.ops.import_scene.fbx(filepath=mesh_path)
100
+ elif ext == '.ply':
101
+ bpy.ops.wm.ply_import(filepath=mesh_path)
102
+ else:
103
+ raise ValueError(f"不支持的文件格式: {ext}")
104
+
105
+ print(f"[INFO] 导入mesh: {mesh_path}")
106
+
107
+ # 获取导入的对象
108
+ imported_objects = [obj for obj in bpy.context.selected_objects]
109
+ print(f"[INFO] 导入了 {len(imported_objects)} 个对象")
110
+
111
+ # 为房间结构添加程序化纹理
112
+ apply_procedural_textures(imported_objects)
113
+
114
+ return imported_objects
115
+
116
+
117
+ def is_room_structure(obj_name):
118
+ """
119
+ 判断对象是否是房间结构(墙面、地板、天花板)
120
+
121
+ 房间结构的常见命名模式:
122
+ 1. None.obj - 标准3D-Front房间结构
123
+ 2. geometry_N - 无纹理的通用几何体
124
+ 3. 纯数字.obj (如 12670.obj) - 数字ID命名的结构
125
+ """
126
+ name_lower = obj_name.lower()
127
+
128
+ # 模式1: 包含 "none"
129
+ if 'none' in name_lower:
130
+ return True
131
+
132
+ # 模式2: 以 "geometry_" 开头
133
+ if name_lower.startswith('geometry_') or name_lower.startswith('geometry.'):
134
+ return True
135
+
136
+ # 模式3: 纯数字命名 (如 "12670.obj", "7319.obj")
137
+ base_name = obj_name.replace('.obj', '').replace('.OBJ', '')
138
+ if base_name.isdigit():
139
+ return True
140
+
141
+ return False
142
+
143
+
144
+ def apply_procedural_textures(objects):
145
+ """为所有对象添加Emission材质(材质预览模式:显示原始颜色,不受光照影响)"""
146
+ applied_count = 0
147
+ for obj in objects:
148
+ if obj.type != 'MESH':
149
+ continue
150
+
151
+ # 检查对象是否有材质,或者材质是否为空
152
+ has_material = obj.data.materials and len(obj.data.materials) > 0 and obj.data.materials[0] is not None
153
+
154
+ # 为所有对象应用Emission材质(材质预览模式)
155
+ # 这样所有对象都会显示原始颜色,不受光照影响
156
+ if not has_material or is_room_structure(obj.name):
157
+ # 没有材质或者是房间结构:应用Emission材质
158
+ print(f"[INFO] 为对象添加Emission材质: {obj.name}")
159
+ apply_room_material(obj)
160
+ applied_count += 1
161
+ else:
162
+ # 有材质:也转换为Emission材质(确保所有对象都使用Emission模式)
163
+ print(f"[INFO] 将对象材质转换为Emission: {obj.name}")
164
+ convert_to_emission_material(obj)
165
+ applied_count += 1
166
+
167
+ if applied_count == 0:
168
+ print("[WARN] 未找到需要添加材质的对象")
169
+ else:
170
+ print(f"[INFO] 共为 {applied_count} 个对象添加了Emission材质(材质预览模式)")
171
+
172
+
173
+ def apply_room_material(obj):
174
+ """为房间结构应用程序化材质(墙面、地板、天花板)"""
175
+ # 创建新材质
176
+ mat = bpy.data.materials.new(name="RoomProceduralMaterial")
177
+ mat.use_nodes = True
178
+
179
+ nodes = mat.node_tree.nodes
180
+ links = mat.node_tree.links
181
+
182
+ # 清除默认节点
183
+ nodes.clear()
184
+
185
+ # 创建输出节点
186
+ output = nodes.new('ShaderNodeOutputMaterial')
187
+ output.location = (800, 0)
188
+
189
+ # 使用几何节点获取法线
190
+ geometry = nodes.new('ShaderNodeNewGeometry')
191
+ geometry.location = (-600, 0)
192
+
193
+ # 分离法线的Z分量
194
+ separate_xyz = nodes.new('ShaderNodeSeparateXYZ')
195
+ separate_xyz.location = (-400, 0)
196
+ links.new(geometry.outputs['Normal'], separate_xyz.inputs['Vector'])
197
+
198
+ # === 判断天花板(法线Z < -0.5,朝下的面) ===
199
+ ceiling_check = nodes.new('ShaderNodeMath')
200
+ ceiling_check.operation = 'LESS_THAN'
201
+ ceiling_check.inputs[1].default_value = -0.5
202
+ ceiling_check.location = (-200, 100)
203
+ links.new(separate_xyz.outputs['Z'], ceiling_check.inputs[0])
204
+
205
+ # === 判断地板(法线Z > 0.5,朝上的面) ===
206
+ floor_check = nodes.new('ShaderNodeMath')
207
+ floor_check.operation = 'GREATER_THAN'
208
+ floor_check.inputs[1].default_value = 0.5
209
+ floor_check.location = (-200, -100)
210
+ links.new(separate_xyz.outputs['Z'], floor_check.inputs[0])
211
+
212
+ # === 创建三种材质 ===
213
+ floor_shader = create_wood_floor_material(nodes, links)
214
+ floor_shader.location = (0, 300)
215
+
216
+ wall_shader = create_brick_wall_material(nodes, links) # 砖墙材质
217
+ wall_shader.location = (0, 0)
218
+
219
+ ceiling_shader = create_grid_ceiling_material(nodes, links) # 网格天花板
220
+ ceiling_shader.location = (0, -300)
221
+
222
+ # === 混合着色器:先混合地板和墙面 ===
223
+ mix_floor_wall = nodes.new('ShaderNodeMixShader')
224
+ mix_floor_wall.location = (300, 100)
225
+ links.new(floor_check.outputs['Value'], mix_floor_wall.inputs['Fac'])
226
+ links.new(wall_shader.outputs['Emission'], mix_floor_wall.inputs[1]) # Emission材质输出
227
+ links.new(floor_shader.outputs['Emission'], mix_floor_wall.inputs[2]) # Emission材质输出
228
+
229
+ # === 混合着色器:再混合天花板 ===
230
+ mix_final = nodes.new('ShaderNodeMixShader')
231
+ mix_final.location = (500, 0)
232
+ links.new(ceiling_check.outputs['Value'], mix_final.inputs['Fac'])
233
+ links.new(mix_floor_wall.outputs['Shader'], mix_final.inputs[1])
234
+ links.new(ceiling_shader.outputs['Emission'], mix_final.inputs[2]) # Emission材质输出
235
+
236
+ # 连接输出
237
+ links.new(mix_final.outputs['Shader'], output.inputs['Surface'])
238
+
239
+ # 应用材质到对象
240
+ if obj.data.materials:
241
+ obj.data.materials[0] = mat
242
+ else:
243
+ obj.data.materials.append(mat)
244
+
245
+ print(f"[INFO] Emission材质已应用(地板+砖墙+网格天花板,材质预览模式)")
246
+
247
+
248
+ def convert_to_emission_material(obj):
249
+ """将现有材质转换为Emission材质(材质预览模式)"""
250
+ if not obj.data.materials or len(obj.data.materials) == 0:
251
+ # 如果没有材质,直接应用房间材质
252
+ apply_room_material(obj)
253
+ return
254
+
255
+ # 获取现有材质
256
+ existing_mat = obj.data.materials[0]
257
+ if existing_mat is None:
258
+ apply_room_material(obj)
259
+ return
260
+
261
+ # 如果���质已经有节点,尝试提取Base Color并转换为Emission
262
+ if existing_mat.use_nodes:
263
+ nodes = existing_mat.node_tree.nodes
264
+ links = existing_mat.node_tree.links
265
+
266
+ # 查找Principled BSDF节点
267
+ bsdf_node = None
268
+ for node in nodes:
269
+ if node.type == 'BSDF_PRINCIPLED':
270
+ bsdf_node = node
271
+ break
272
+
273
+ if bsdf_node and 'Base Color' in bsdf_node.inputs:
274
+ # 找到Base Color输入
275
+ base_color_input = bsdf_node.inputs['Base Color']
276
+
277
+ # 创建Emission节点
278
+ emission = nodes.new('ShaderNodeEmission')
279
+ emission.name = "Emission"
280
+ emission.location = bsdf_node.location
281
+
282
+ # 获取Base Color的值或连接
283
+ if base_color_input.is_linked:
284
+ # 如果有连接,连接到Emission
285
+ color_source = base_color_input.links[0].from_node
286
+ color_output = base_color_input.links[0].from_socket
287
+ links.new(color_output, emission.inputs['Color'])
288
+ else:
289
+ # 如果没有连接,使用默认值
290
+ emission.inputs['Color'].default_value = base_color_input.default_value
291
+
292
+ emission.inputs['Strength'].default_value = 1.0 # Emission强度(材质预览模式,避免过曝)
293
+
294
+ # 找到输出节点并连接
295
+ output_node = None
296
+ for node in nodes:
297
+ if node.type == 'OUTPUT_MATERIAL':
298
+ output_node = node
299
+ break
300
+
301
+ if output_node:
302
+ # 断开原有连接
303
+ if output_node.inputs['Surface'].is_linked:
304
+ for link in output_node.inputs['Surface'].links:
305
+ existing_mat.node_tree.links.remove(link)
306
+ # 连接Emission
307
+ links.new(emission.outputs['Emission'], output_node.inputs['Surface'])
308
+ print(f"[INFO] 已将材质转换为Emission: {obj.name}")
309
+ return
310
+
311
+ # 如果无法转换,直接应用房间材质
312
+ apply_room_material(obj)
313
+
314
+
315
+ def create_wood_floor_material(nodes, links):
316
+ """创建木地板程序化材质(Emission模式,显示原始颜色)"""
317
+ # 使用Emission材质,直接发光,不受光照影响
318
+ emission = nodes.new('ShaderNodeEmission')
319
+ emission.name = "FloorEmission"
320
+
321
+ # 木纹噪波纹理
322
+ noise = nodes.new('ShaderNodeTexNoise')
323
+ noise.inputs['Scale'].default_value = 20.0
324
+ noise.inputs['Detail'].default_value = 8.0
325
+ noise.inputs['Roughness'].default_value = 0.6
326
+ noise.location = (-600, 200)
327
+
328
+ # 波浪纹理(模拟木纹条纹)
329
+ wave = nodes.new('ShaderNodeTexWave')
330
+ wave.wave_type = 'BANDS'
331
+ wave.bands_direction = 'X'
332
+ wave.inputs['Scale'].default_value = 3.0
333
+ wave.inputs['Distortion'].default_value = 5.0
334
+ wave.inputs['Detail'].default_value = 3.0
335
+ wave.location = (-600, 0)
336
+
337
+ # 颜色渐变(木材颜色)
338
+ color_ramp = nodes.new('ShaderNodeValToRGB')
339
+ color_ramp.color_ramp.elements[0].color = (0.15, 0.08, 0.04, 1.0) # 深棕色
340
+ color_ramp.color_ramp.elements[1].color = (0.35, 0.20, 0.10, 1.0) # 浅棕色
341
+ color_ramp.location = (-400, 100)
342
+
343
+ # 混合噪波和波浪
344
+ mix_rgb = nodes.new('ShaderNodeMix')
345
+ mix_rgb.data_type = 'RGBA'
346
+ mix_rgb.inputs['Factor'].default_value = 0.5
347
+ mix_rgb.location = (-400, 0)
348
+
349
+ links.new(noise.outputs['Fac'], mix_rgb.inputs['A'])
350
+ links.new(wave.outputs['Fac'], mix_rgb.inputs['B'])
351
+ links.new(mix_rgb.outputs['Result'], color_ramp.inputs['Fac'])
352
+
353
+ # 连接到Emission材质(直接显示颜色,不受光照影响)
354
+ links.new(color_ramp.outputs['Color'], emission.inputs['Color'])
355
+ emission.inputs['Strength'].default_value = 1.0 # Emission强度(材质预览模式,避免过曝)
356
+
357
+ return emission
358
+
359
+
360
+ def create_brick_wall_material(nodes, links):
361
+ """创建砖墙程序化材质(Emission模式,显示原始颜色)"""
362
+ # 使用Emission材质,直接发光,不受光照影响
363
+ emission = nodes.new('ShaderNodeEmission')
364
+ emission.name = "BrickWallEmission"
365
+
366
+ # 使用纹理坐标
367
+ tex_coord = nodes.new('ShaderNodeTexCoord')
368
+ tex_coord.location = (-800, 0)
369
+
370
+ # 缩放映射(控制砖块大小)
371
+ mapping = nodes.new('ShaderNodeMapping')
372
+ mapping.inputs['Scale'].default_value = (4.0, 8.0, 1.0) # X方向砖块较宽
373
+ mapping.location = (-600, 0)
374
+ links.new(tex_coord.outputs['Generated'], mapping.inputs['Vector'])
375
+
376
+ # 砖块纹理
377
+ brick = nodes.new('ShaderNodeTexBrick')
378
+ brick.inputs['Color1'].default_value = (0.6, 0.3, 0.2, 1.0) # 砖红色
379
+ brick.inputs['Color2'].default_value = (0.5, 0.25, 0.15, 1.0) # 深砖红色
380
+ brick.inputs['Mortar'].default_value = (0.85, 0.85, 0.8, 1.0) # 灰白色砂浆
381
+ brick.inputs['Scale'].default_value = 3.0
382
+ brick.inputs['Mortar Size'].default_value = 0.02
383
+ brick.inputs['Mortar Smooth'].default_value = 0.1
384
+ brick.inputs['Bias'].default_value = 0.0
385
+ brick.inputs['Brick Width'].default_value = 0.5
386
+ brick.inputs['Row Height'].default_value = 0.25
387
+ brick.location = (-400, 0)
388
+ links.new(mapping.outputs['Vector'], brick.inputs['Vector'])
389
+
390
+ # 添加细微噪波增加真实感
391
+ noise = nodes.new('ShaderNodeTexNoise')
392
+ noise.inputs['Scale'].default_value = 50.0
393
+ noise.inputs['Detail'].default_value = 3.0
394
+ noise.location = (-400, -200)
395
+ links.new(mapping.outputs['Vector'], noise.inputs['Vector'])
396
+
397
+ # 混合砖块颜色和噪波
398
+ mix_color = nodes.new('ShaderNodeMix')
399
+ mix_color.data_type = 'RGBA'
400
+ mix_color.inputs['Factor'].default_value = 0.1
401
+ mix_color.location = (-200, 0)
402
+ links.new(brick.outputs['Color'], mix_color.inputs['A'])
403
+ links.new(noise.outputs['Color'], mix_color.inputs['B'])
404
+
405
+ # 连接到Emission材质(直接显示颜色,不受光照影响)
406
+ links.new(mix_color.outputs['Result'], emission.inputs['Color'])
407
+ emission.inputs['Strength'].default_value = 2.0 # Emission强度(材质预览模式,避免过曝)
408
+
409
+ return emission
410
+
411
+
412
+ def create_grid_ceiling_material(nodes, links):
413
+ """创建网格天花板程序化材质(Emission模式,显示原始颜色)"""
414
+ # 使用Emission材质,直接发光,不受光照影响
415
+ emission = nodes.new('ShaderNodeEmission')
416
+ emission.name = "GridCeilingEmission"
417
+
418
+ # 使用纹理坐标
419
+ tex_coord = nodes.new('ShaderNodeTexCoord')
420
+ tex_coord.location = (-800, -400)
421
+
422
+ # 缩放映射
423
+ mapping = nodes.new('ShaderNodeMapping')
424
+ mapping.inputs['Scale'].default_value = (5.0, 5.0, 1.0)
425
+ mapping.location = (-600, -400)
426
+ links.new(tex_coord.outputs['Generated'], mapping.inputs['Vector'])
427
+
428
+ # 分离XY坐标
429
+ separate = nodes.new('ShaderNodeSeparateXYZ')
430
+ separate.location = (-400, -400)
431
+ links.new(mapping.outputs['Vector'], separate.inputs['Vector'])
432
+
433
+ # X方向网格线(使用正弦波)
434
+ math_sin_x = nodes.new('ShaderNodeMath')
435
+ math_sin_x.operation = 'SINE'
436
+ math_sin_x.location = (-200, -350)
437
+
438
+ math_mul_x = nodes.new('ShaderNodeMath')
439
+ math_mul_x.operation = 'MULTIPLY'
440
+ math_mul_x.inputs[1].default_value = 6.28 # 2*PI
441
+ math_mul_x.location = (-300, -350)
442
+ links.new(separate.outputs['X'], math_mul_x.inputs[0])
443
+ links.new(math_mul_x.outputs['Value'], math_sin_x.inputs[0])
444
+
445
+ # Y方向网格线
446
+ math_sin_y = nodes.new('ShaderNodeMath')
447
+ math_sin_y.operation = 'SINE'
448
+ math_sin_y.location = (-200, -500)
449
+
450
+ math_mul_y = nodes.new('ShaderNodeMath')
451
+ math_mul_y.operation = 'MULTIPLY'
452
+ math_mul_y.inputs[1].default_value = 6.28
453
+ math_mul_y.location = (-300, -500)
454
+ links.new(separate.outputs['Y'], math_mul_y.inputs[0])
455
+ links.new(math_mul_y.outputs['Value'], math_sin_y.inputs[0])
456
+
457
+ # 取绝对值使线条清晰
458
+ abs_x = nodes.new('ShaderNodeMath')
459
+ abs_x.operation = 'ABSOLUTE'
460
+ abs_x.location = (-100, -350)
461
+ links.new(math_sin_x.outputs['Value'], abs_x.inputs[0])
462
+
463
+ abs_y = nodes.new('ShaderNodeMath')
464
+ abs_y.operation = 'ABSOLUTE'
465
+ abs_y.location = (-100, -500)
466
+ links.new(math_sin_y.outputs['Value'], abs_y.inputs[0])
467
+
468
+ # 合并X和Y网格(取最小值形成网格交叉)
469
+ math_min = nodes.new('ShaderNodeMath')
470
+ math_min.operation = 'MINIMUM'
471
+ math_min.location = (0, -425)
472
+ links.new(abs_x.outputs['Value'], math_min.inputs[0])
473
+ links.new(abs_y.outputs['Value'], math_min.inputs[1])
474
+
475
+ # 颜色渐变:网格线深色,格子浅色
476
+ color_ramp = nodes.new('ShaderNodeValToRGB')
477
+ color_ramp.color_ramp.elements[0].color = (0.3, 0.3, 0.35, 1.0) # 深灰色网格线
478
+ color_ramp.color_ramp.elements[0].position = 0.0
479
+ color_ramp.color_ramp.elements[1].color = (0.95, 0.95, 0.95, 1.0) # 白色格子
480
+ color_ramp.color_ramp.elements[1].position = 0.15
481
+ color_ramp.location = (150, -425)
482
+ links.new(math_min.outputs['Value'], color_ramp.inputs['Fac'])
483
+
484
+ # 连接到Emission材质(直接显示颜色,不受光照影响)
485
+ links.new(color_ramp.outputs['Color'], emission.inputs['Color'])
486
+ emission.inputs['Strength'].default_value = 2.0 # Emission强度(材质预览模式,避免过曝)
487
+
488
+ return emission
489
+
490
+
491
+ def get_scene_bounds():
492
+ """获取场景中所有物体的边界框"""
493
+ min_coords = [float('inf'), float('inf'), float('inf')]
494
+ max_coords = [float('-inf'), float('-inf'), float('-inf')]
495
+
496
+ for obj in bpy.context.scene.objects:
497
+ if obj.type == 'MESH':
498
+ # 获取世界坐标下的边界框
499
+ for corner in obj.bound_box:
500
+ world_corner = obj.matrix_world @ Vector(corner)
501
+ for i in range(3):
502
+ min_coords[i] = min(min_coords[i], world_corner[i])
503
+ max_coords[i] = max(max_coords[i], world_corner[i])
504
+
505
+ # 如果没有找到任何mesh,返回默认值
506
+ if min_coords[0] == float('inf'):
507
+ return ([-5, -5, 0], [5, 5, 3])
508
+
509
+ return (min_coords, max_coords)
510
+
511
+
512
+ def create_erp_camera(name="ERP_Camera"):
513
+ """创建ERP全景相机"""
514
+ # 创建相机数据
515
+ camera_data = bpy.data.cameras.new(name=name)
516
+
517
+ # 设置为全景相机
518
+ camera_data.type = 'PANO'
519
+
520
+ # 设置全景类型为等距圆柱投影(EEVEE和Cycles都支持)
521
+ # Blender 5.0 使用 panorama_type
522
+ if hasattr(camera_data, 'panorama_type'):
523
+ camera_data.panorama_type = 'EQUIRECTANGULAR'
524
+ # Cycles相机设置
525
+ if hasattr(camera_data, 'cycles'):
526
+ camera_data.cycles.panorama_type = 'EQUIRECTANGULAR'
527
+
528
+ # 创建相机对象
529
+ camera_object = bpy.data.objects.new(name, camera_data)
530
+
531
+ # 链接到场景
532
+ bpy.context.scene.collection.objects.link(camera_object)
533
+
534
+ print(f"[INFO] 创建ERP相机: {name}")
535
+
536
+ return camera_object
537
+
538
+
539
+ def setup_camera(camera_object, position, rotation_euler=None, rotation_quat=None):
540
+ """设置相机位置和旋转(Euler 或 Quaternion)"""
541
+ # 设置位置
542
+ camera_object.location = Vector(position)
543
+
544
+ # 设置旋转
545
+ if rotation_quat is not None:
546
+ # 使用四元数(推荐,避免Euler顺序/分解歧义)
547
+ camera_object.rotation_mode = 'QUATERNION'
548
+ camera_object.rotation_quaternion = Quaternion(rotation_quat)
549
+ print(f"[INFO] 相机位置: {position}")
550
+ print(f"[INFO] 相机旋转(Quaternion wxyz): {list(rotation_quat)}")
551
+ else:
552
+ # 使用欧拉角(Blender使用XYZ顺序的欧拉角)
553
+ camera_object.rotation_mode = 'XYZ'
554
+ camera_object.rotation_euler = Euler(rotation_euler, 'XYZ')
555
+ print(f"[INFO] 相机位置: {position}")
556
+ print(f"[INFO] 相机旋转(Euler XYZ, rad): {rotation_euler}")
557
+
558
+
559
+ def setup_render_settings(resolution, engine, samples):
560
+ """设置渲染参数"""
561
+ scene = bpy.context.scene
562
+
563
+ # 设置渲染引擎
564
+ scene.render.engine = engine
565
+ print(f"[INFO] 渲染引擎: {engine}")
566
+
567
+ # 设置分辨率
568
+ scene.render.resolution_x = resolution[0]
569
+ scene.render.resolution_y = resolution[1]
570
+ scene.render.resolution_percentage = 100
571
+ print(f"[INFO] 分辨率: {resolution[0]}x{resolution[1]}")
572
+
573
+ # 设置输出格式
574
+ scene.render.image_settings.file_format = 'PNG'
575
+ scene.render.image_settings.color_mode = 'RGB'
576
+ scene.render.image_settings.color_depth = '8'
577
+
578
+ # 引擎特定设置
579
+ if engine == 'BLENDER_EEVEE':
580
+ # EEVEE设置
581
+ if hasattr(scene, 'eevee'):
582
+ # 设置采样数(如果属性存在)
583
+ if hasattr(scene.eevee, 'taa_render_samples'):
584
+ scene.eevee.taa_render_samples = samples
585
+ # 软阴影(Blender 5.0可能不支持)
586
+ if hasattr(scene.eevee, 'use_soft_shadows'):
587
+ scene.eevee.use_soft_shadows = True
588
+ elif engine == 'CYCLES':
589
+ # Cycles设置
590
+ scene.cycles.samples = samples
591
+ scene.cycles.use_denoising = True
592
+
593
+ # 对于Emission材质,需要确保光线反弹足够
594
+ # 但Emission材质本身会发光,不需要太多反弹
595
+ scene.cycles.max_bounces = 4 # 减少反弹次数(Emission材质不需要太多)
596
+ scene.cycles.diffuse_bounces = 2
597
+ scene.cycles.glossy_bounces = 2
598
+ scene.cycles.transmission_bounces = 2
599
+
600
+ # 尝试使用GPU
601
+ try:
602
+ bpy.context.preferences.addons['cycles'].preferences.compute_device_type = 'CUDA'
603
+ bpy.context.scene.cycles.device = 'GPU'
604
+ print("[INFO] 使用GPU渲染")
605
+ except:
606
+ print("[INFO] 使用CPU渲染")
607
+
608
+
609
+ def setup_lighting(camera_position=None, scene_bounds=None):
610
+ """
611
+ 设置照明(材质预览模式:仅使用强环境光,无其他光源,显示材质原始颜色和亮度)
612
+
613
+ Args:
614
+ camera_position: 相机位置 (x, y, z)(未使用)
615
+ scene_bounds: 场景边界 (min, max)(未使用)
616
+ """
617
+ scene = bpy.context.scene
618
+
619
+ # 添加环境光(材质预览模式)
620
+ world = bpy.data.worlds.new("World")
621
+ scene.world = world
622
+ world.use_nodes = True
623
+
624
+ # 获取节点树
625
+ nodes = world.node_tree.nodes
626
+ links = world.node_tree.links
627
+
628
+ # 清除默认节点
629
+ nodes.clear()
630
+
631
+ # 创建背景节点 - 使用非常强的环境光(类似材质预览模式)
632
+ # 只使用环境光,无其他光源,确保材质显示原始颜色和亮度,无明暗变化
633
+ background = nodes.new('ShaderNodeBackground')
634
+ background.inputs['Color'].default_value = (1.0, 1.0, 1.0, 1.0) # 白色背景
635
+ background.inputs['Strength'].default_value = 1.0 # 环境光强度(材质预览模式,避免过曝)
636
+
637
+ # 创建输出节点
638
+ output = nodes.new('ShaderNodeOutputWorld')
639
+
640
+ # 连接节点
641
+ links.new(background.outputs['Background'], output.inputs['Surface'])
642
+
643
+ # === 不添加任何其他光源 ===
644
+ # 只使用环境光,确保整个场景光照完全均匀,无距离衰减,无明暗变化
645
+ # 这样材质会显示其原始颜色和亮度,就像Blender材质预览模式一样
646
+
647
+ print("[INFO] 设置照明完成(材质预览模式:仅强环境光,无其他光源,显示材质原始颜色和亮度)")
648
+
649
+
650
+ def setup_depth_pass():
651
+ """
652
+ 设置深度渲染 pass(Blender 5.0+ API)
653
+
654
+ 在 Blender 中启用 Z pass,用于获取深度信息。
655
+ 使用 Blender 5.0 新的 compositing_node_group API。
656
+ """
657
+ scene = bpy.context.scene
658
+
659
+ # 启用 View Layer 的 Z pass
660
+ view_layer = bpy.context.view_layer
661
+ view_layer.use_pass_z = True
662
+
663
+ # Blender 5.0: scene.node_tree 已移除,改用 compositing_node_group
664
+ # 创建新的 CompositorNodeTree 并赋给场景
665
+ tree = bpy.data.node_groups.new("DepthCompositor", "CompositorNodeTree")
666
+ scene.compositing_node_group = tree
667
+ nodes = tree.nodes
668
+ links = tree.links
669
+
670
+ # 创建 Render Layers 节点
671
+ render_layers = nodes.new('CompositorNodeRLayers')
672
+ render_layers.location = (0, 300)
673
+
674
+ # Blender 5.0: 用 NodeGroupOutput 替代 CompositorNodeComposite
675
+ output = nodes.new('NodeGroupOutput')
676
+ output.location = (400, 300)
677
+ tree.interface.new_socket(name="Image", in_out="OUTPUT", socket_type="NodeSocketColor")
678
+
679
+ # 连接 RGB 输出
680
+ links.new(render_layers.outputs['Image'], output.inputs['Image'])
681
+
682
+ # 创建 File Output 节点(用于深度 EXR)
683
+ file_output = nodes.new('CompositorNodeOutputFile')
684
+ file_output.location = (400, 0)
685
+ file_output.directory = "" # 稍后在渲染时设置(Blender 5.0: 替代 base_path)
686
+ file_output.format.media_type = 'IMAGE' # Blender 5.0: 必须先设 media_type
687
+ file_output.format.file_format = 'OPEN_EXR'
688
+ file_output.format.color_depth = '32'
689
+ file_output.format.exr_codec = 'ZIP'
690
+
691
+ # Blender 5.0: file_output_items 替代 file_slots
692
+ file_output.file_output_items.clear()
693
+ file_output.file_output_items.new('FLOAT', "depth")
694
+
695
+ # 连接深度输出
696
+ links.new(render_layers.outputs['Depth'], file_output.inputs['depth'])
697
+
698
+ print("[INFO] 深度 pass 已启用(Blender 5.0 API)")
699
+
700
+ return file_output
701
+
702
+
703
+ def _convert_depth_exr_via_blender_api(exr_path, npy_path):
704
+ """使用 Blender 图像 API 将 EXR 转为 NPY(备用路径,不依赖 OpenEXR)。"""
705
+ import numpy as np
706
+ img = bpy.data.images.load(exr_path)
707
+ width = img.size[0]
708
+ height = img.size[1]
709
+ pixels = np.array(img.pixels[:])
710
+ pixels = pixels.reshape(height, width, -1)
711
+ depth = pixels[:, :, 0]
712
+ depth = np.flipud(depth)
713
+ unit_scale = bpy.context.scene.unit_settings.scale_length
714
+ depth_meters = depth * unit_scale
715
+ max_valid_depth = 1000.0
716
+ depth_meters[depth_meters > max_valid_depth] = np.nan
717
+ depth_meters[depth_meters <= 0] = np.nan
718
+ np.save(npy_path, depth_meters.astype(np.float32))
719
+ bpy.data.images.remove(img)
720
+ os.remove(exr_path)
721
+ print(f"[OK] 深度图保存(备用方法): {npy_path}")
722
+
723
+
724
+ def convert_depth_exr_to_npy(exr_path, npy_path):
725
+ """
726
+ 将 Blender 渲染的深度 EXR 转换为 NPY 格式
727
+
728
+ Blender ERP 相机的深度是 range depth(射线距离),单位为 Blender 单位(通常是米)
729
+
730
+ Args:
731
+ exr_path: EXR 文件路径
732
+ npy_path: NPY 输出路径
733
+ """
734
+ import numpy as np
735
+ try:
736
+ import OpenEXR
737
+ import Imath
738
+
739
+ # 打开 EXR 文件
740
+ exr_file = OpenEXR.InputFile(exr_path)
741
+
742
+ # 获取图像尺寸
743
+ header = exr_file.header()
744
+ dw = header['dataWindow']
745
+ width = dw.max.x - dw.min.x + 1
746
+ height = dw.max.y - dw.min.y + 1
747
+
748
+ # 读取深度通道
749
+ # Blender 深度 pass 保存在 'R'、'G'、'B' 或 'V' 通道
750
+ pt = Imath.PixelType(Imath.PixelType.FLOAT)
751
+
752
+ # 尝试不同的通道名称
753
+ channel_names = ['depth.R', 'R', 'V', 'Z', 'depth.V']
754
+ depth_str = None
755
+ for ch in channel_names:
756
+ if ch in header['channels']:
757
+ depth_str = exr_file.channel(ch, pt)
758
+ print(f"[INFO] 使用深度通道: {ch}")
759
+ break
760
+
761
+ if depth_str is None:
762
+ # 列出所有可用通道
763
+ available_channels = list(header['channels'].keys())
764
+ print(f"[WARN] 可用通道: {available_channels}")
765
+ # 尝试使用第一个通道
766
+ if available_channels:
767
+ depth_str = exr_file.channel(available_channels[0], pt)
768
+ print(f"[INFO] 使用通道: {available_channels[0]}")
769
+ else:
770
+ raise ValueError("无法找到深度通道")
771
+
772
+ # 转换为 numpy 数组
773
+ depth = np.frombuffer(depth_str, dtype=np.float32)
774
+ depth = depth.reshape(height, width)
775
+
776
+ # 获取场景单位比例(转换为米)
777
+ unit_scale = bpy.context.scene.unit_settings.scale_length
778
+
779
+ # 将深度转换为米
780
+ # Blender 的深度值是场景单位,需要乘以 unit_scale 转换为米
781
+ depth_meters = depth * unit_scale
782
+
783
+ # 处理无效深度(Blender 用非常大的值表示无穷远)
784
+ # 通常 > 1e9 的值表示背景/无穷远
785
+ max_valid_depth = 1000.0 # 1000 米以上视为无效
786
+ depth_meters[depth_meters > max_valid_depth] = np.nan
787
+ depth_meters[depth_meters <= 0] = np.nan
788
+
789
+ # 保存为 NPY
790
+ np.save(npy_path, depth_meters.astype(np.float32))
791
+
792
+ # 删除临时 EXR 文件
793
+ os.remove(exr_path)
794
+
795
+ # 统计信息
796
+ valid_mask = np.isfinite(depth_meters)
797
+ if np.any(valid_mask):
798
+ print(f"[OK] 深度图保存: {npy_path}")
799
+ print(f" 形状: {depth_meters.shape}")
800
+ print(f" 深度范围: {np.nanmin(depth_meters):.3f} - {np.nanmax(depth_meters):.3f} 米")
801
+ print(f" 有效像素: {np.sum(valid_mask)} / {depth_meters.size} ({100*np.sum(valid_mask)/depth_meters.size:.1f}%)")
802
+ else:
803
+ print(f"[WARN] 深度图全部无效!")
804
+
805
+ except ImportError:
806
+ print("[ERROR] 需要安装 OpenEXR 库: pip install OpenEXR")
807
+ print("[INFO] 尝试使用 Blender 内置方法...")
808
+ try:
809
+ _convert_depth_exr_via_blender_api(exr_path, npy_path)
810
+ except Exception as e:
811
+ print(f"[ERROR] 备用方法也失败: {e}")
812
+ print(f"[INFO] EXR 文件保留在: {exr_path}")
813
+ except Exception as e:
814
+ print(f"[WARN] OpenEXR 读取失败,尝试 Blender 内置方法: {e}")
815
+ try:
816
+ _convert_depth_exr_via_blender_api(exr_path, npy_path)
817
+ except Exception as e2:
818
+ print(f"[ERROR] 备用方法也失败: {e2}")
819
+ print(f"[INFO] EXR 文件保留在: {exr_path}")
820
+
821
+
822
+ def render_and_save(output_path, render_depth=False, depth_output=None):
823
+ """
824
+ 执行渲染并保存
825
+
826
+ Args:
827
+ output_path: RGB 图像输出路径
828
+ render_depth: 是否渲染深度
829
+ depth_output: 深度图输出路径(.npy 格式)
830
+ """
831
+ # 确保输出目录存在
832
+ output_dir = os.path.dirname(output_path)
833
+ if output_dir and not os.path.exists(output_dir):
834
+ os.makedirs(output_dir)
835
+
836
+ # 设置输出路径
837
+ bpy.context.scene.render.filepath = output_path
838
+
839
+ # 执行渲染
840
+ print(f"[INFO] 开始渲染...")
841
+ bpy.ops.render.render(write_still=True)
842
+
843
+ # 强校验:必须有实际输出图像,避免上游出现“returncode=0但无文件”
844
+ if (not os.path.exists(output_path)) or os.path.getsize(output_path) <= 0:
845
+ raise RuntimeError(f"渲染完成但输出图像不存在或为空: {output_path}")
846
+
847
+ print(f"[OK] 渲染完成: {output_path}")
848
+
849
+
850
+ def save_pose(camera_object, output_path, frame_id=0, ref_position=None, ref_quaternion=None):
851
+ """
852
+ 保存相机位姿(绝对位姿,兼容 ERPT 格式)
853
+
854
+ 输出格式:
855
+ - position: 相机中心在世界坐标系的绝对位置(米),[X右, Y上, Z前]
856
+ - rotation_quaternion: [w, x, y, z],camera->world 旋转 (R_cw)
857
+
858
+ 核心公式:R_cw_erpt = T @ R_blender_obj @ M
859
+ - T: Blender世界(Y前Z上) -> 统一世界(Y上Z前) 坐标轴交换
860
+ - R_blender_obj: Blender相机的旋转矩阵(object local -> world)
861
+ - M: Blender相机本地(-Z前) -> ERPT相机(+Z前) Z轴翻转
862
+
863
+ Args:
864
+ camera_object: Blender相机对象
865
+ output_path: 输出路径
866
+ frame_id: 帧序号
867
+ ref_position: (保留参数,当前未使用)
868
+ ref_quaternion: (保留参数,当前未使用)
869
+ """
870
+ from mathutils import Matrix
871
+
872
+ # 获取当前相机的绝对位置和旋转(Blender坐标系:X右, Y前, Z上)
873
+ abs_position_blender = list(camera_object.location)
874
+ abs_quat_blender = camera_object.rotation_euler.to_quaternion()
875
+
876
+ # === 位置转换 ===
877
+ # Blender世界(X右,Y前,Z上) -> 统一标准(X右,Y上,Z前)
878
+ abs_position_unified = [
879
+ abs_position_blender[0], # X_unified = X_blender
880
+ abs_position_blender[2], # Y_unified = Z_blender (上)
881
+ abs_position_blender[1] # Z_unified = Y_blender (前)
882
+ ]
883
+
884
+ # === 旋转转换 ===
885
+ # Blender object rotation matrix (local -> world in Blender coords)
886
+ R_obj_blender = abs_quat_blender.to_matrix()
887
+
888
+ # T: Blender世界坐标 -> 统一世界坐标(交换Y和Z轴)
889
+ T_blender_to_unified = Matrix([
890
+ [1, 0, 0], # X不变
891
+ [0, 0, 1], # Y_unified = Z_blender
892
+ [0, 1, 0] # Z_unified = Y_blender
893
+ ])
894
+
895
+ # M: Blender相机本地坐标 -> ERPT相机坐标(翻转Z轴)
896
+ # Blender相机沿 -Z_local 看,ERPT相机沿 +Z_camera 看
897
+ # 因此 ERPT_Z = -Blender_Z_local,即 Z 轴翻转
898
+ M_cam = Matrix([
899
+ [1, 0, 0],
900
+ [0, 1, 0],
901
+ [0, 0, -1]
902
+ ])
903
+
904
+ # 核心公式:R_cw_erpt = T @ R_obj_blender @ M
905
+ # 含义:ERPT相机坐标 -> (M) -> Blender本地 -> (R_obj) -> Blender世界 -> (T) -> 统一世界
906
+ R_cw_erpt = T_blender_to_unified @ R_obj_blender @ M_cam
907
+
908
+ # 转换为四元数(cam_to_world,ERPT期望的格式)
909
+ quat_cw = R_cw_erpt.to_quaternion()
910
+ abs_quaternion_cw = [quat_cw.w, quat_cw.x, quat_cw.y, quat_cw.z]
911
+
912
+ # === 输出 ===
913
+ # 绝对位姿,cam_to_world格式,兼容ERPT
914
+ pose_data = {
915
+ "frame_id": frame_id,
916
+ "position": abs_position_unified,
917
+ "rotation_quaternion": abs_quaternion_cw,
918
+ "camera_type": "erp_ray",
919
+ "coordinate_system": "right-handed, Y-up, Z-forward (cam_to_world)",
920
+ "render_method": "blender_cycles"
921
+ }
922
+
923
+ # 保存JSON
924
+ with open(output_path, 'w') as f:
925
+ json.dump(pose_data, f, indent=2)
926
+
927
+ if (not os.path.exists(output_path)) or os.path.getsize(output_path) <= 0:
928
+ raise RuntimeError(f"位姿文件写入失败或为空: {output_path}")
929
+
930
+ print(f"[OK] 位姿保存: {output_path}")
931
+ print(f" Position (absolute, meters): {abs_position_unified}")
932
+ print(f" Rotation (cam_to_world): {abs_quaternion_cw}")
933
+
934
+ # 返回绝对位姿(统一标准坐标系,cam_to_world)
935
+ return abs_position_unified, abs_quaternion_cw
936
+
937
+
938
+ def main():
939
+ # 解析参数
940
+ args = parse_args()
941
+
942
+ # 解析相机位置
943
+ camera_pos = [float(x) for x in args.camera_pos.split(',')]
944
+ camera_rot = [float(x) for x in args.camera_rot.split(',')]
945
+ camera_rot_quat = None
946
+ if args.camera_rot_quat:
947
+ camera_rot_quat = [float(x) for x in args.camera_rot_quat.split(',')]
948
+ resolution = [int(x) for x in args.resolution.split(',')]
949
+
950
+ # 解析参考帧位姿
951
+ ref_position = None
952
+ ref_quaternion = None
953
+ if args.ref_position:
954
+ ref_position = [float(x) for x in args.ref_position.split(',')]
955
+ if args.ref_quaternion:
956
+ ref_quaternion = [float(x) for x in args.ref_quaternion.split(',')]
957
+
958
+ # 确定位姿输出路径
959
+ if args.pose_output:
960
+ pose_output = args.pose_output
961
+ else:
962
+ pose_output = os.path.splitext(args.output)[0] + '_pose.json'
963
+
964
+ print("=" * 60)
965
+ print("Blender ERP渲染")
966
+ print("=" * 60)
967
+
968
+ # 1. 清空场景
969
+ print("\n[1/6] 清空场景...")
970
+ clear_scene()
971
+
972
+ # 2. 导入mesh
973
+ print("\n[2/6] 导入mesh...")
974
+ import_mesh(args.mesh)
975
+
976
+ # 3. 创建ERP相机
977
+ print("\n[3/6] 创建ERP相机...")
978
+ camera = create_erp_camera()
979
+ setup_camera(camera, camera_pos, rotation_euler=camera_rot, rotation_quat=camera_rot_quat)
980
+ bpy.context.scene.camera = camera
981
+
982
+ # 获取场景边界(用于灯光设置)
983
+ scene_bounds = get_scene_bounds()
984
+ print(f"[INFO] 场景边界: min={scene_bounds[0]}, max={scene_bounds[1]}")
985
+
986
+ # 4. 设置渲染参数
987
+ print("\n[4/6] 设置渲染参数...")
988
+ setup_render_settings(resolution, args.engine, args.samples)
989
+ setup_lighting(camera_position=camera_pos, scene_bounds=scene_bounds)
990
+
991
+ # 5. 渲染
992
+ print("\n[5/6] 渲染中...")
993
+ render_and_save(args.output)
994
+
995
+ # 6. 保存位姿(相对于第一帧)
996
+ print("\n[6/6] 保存位姿...")
997
+ abs_pos, abs_quat = save_pose(
998
+ camera,
999
+ pose_output,
1000
+ frame_id=args.frame_id,
1001
+ ref_position=ref_position,
1002
+ ref_quaternion=ref_quaternion
1003
+ )
1004
+
1005
+ # 输出绝对位姿供批量脚本使用
1006
+ print(f"[ABS_POSE] {abs_pos[0]},{abs_pos[1]},{abs_pos[2]}|{abs_quat[0]},{abs_quat[1]},{abs_quat[2]},{abs_quat[3]}")
1007
+
1008
+ print("\n" + "=" * 60)
1009
+ print("渲染完成!")
1010
+ print("=" * 60)
1011
+
1012
+
1013
+ if __name__ == "__main__":
1014
+ main()
1015
+
pipelines/run_blend_pipeline.py ADDED
@@ -0,0 +1,1860 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Blend 全流程 Pipeline v5(单 Blender 进程)
4
+
5
+ v3 → v4 改进:
6
+ - Phase 0+1+2 全部在一个 Blender 进程内完成
7
+ - 不再导出 GLB(用 Blender scene.ray_cast 替代 trimesh)
8
+ - 不再每帧重启 Blender(同进程内移动相机 + 渲染)
9
+ - 去掉 trimesh 外部依赖
10
+
11
+ v5 → v6 改进:
12
+ - 新增 GLB/GLTF 格式支持(--glb 参数)
13
+ - --blend / --glb 二选一,支持 .blend .glb .gltf 三种格式
14
+ - GLB 导入后与 .blend 流程完全统一
15
+
16
+ 对外接口 100% 兼容 v3/v5:
17
+ - 命令行参数完全一致(新增 --glb 为可选补充)
18
+ - 输出文件名完全一致(panorama_XXXX.png / _depth.npy / pose_XXXX.json)
19
+ - run_full_pipeline.py 零改动
20
+
21
+ 双模式运行:
22
+ 1) python run_blend_pipeline.py --blender X --blend Y ...
23
+ python run_blend_pipeline.py --blender X --glb Y ...
24
+ → 检测到 --blender → 启动 blender --python THIS_FILE -- --blend/--glb Y ...
25
+ 2) Blender 内部自动进入 in-process 模式
26
+ → Phase 0 (边界) + Phase 1 (撒点+过滤) + Phase 2 (边渲边选)
27
+ """
28
+
29
+ # =====================================================================
30
+ # 检测运行环境
31
+ # =====================================================================
32
+ try:
33
+ import bpy
34
+ from mathutils import Vector, Euler, Matrix
35
+ IN_BLENDER = True
36
+ except ImportError:
37
+ IN_BLENDER = False
38
+
39
+ import argparse
40
+ import json
41
+ import math
42
+ import os
43
+ import subprocess
44
+ import sys
45
+ import time
46
+ import random as _random
47
+ from pathlib import Path
48
+
49
+ import numpy as np
50
+
51
+
52
+ # =====================================================================
53
+ # 常量
54
+ # =====================================================================
55
+
56
+ WARP_H = 128
57
+ WARP_W = 256
58
+ MARGIN = 0.5 # 距墙最小安全距离(防穿模)
59
+
60
+ DEFAULT_STOP_GAIN = 0.08
61
+ DEFAULT_OVERLAP_PENALTY = 0.5
62
+ DEFAULT_MIN_DIST = 0.6
63
+ DEFAULT_MIN_FRAMES = 5
64
+
65
+ ROTATION_TYPES = {
66
+ "none": [0.0, 0.0, 0.0],
67
+ "rotate_x_90": [math.pi / 2, 0.0, 0.0],
68
+ "rotate_x_180": [math.pi, 0.0, 0.0],
69
+ "rotate_z_90": [0.0, 0.0, math.pi / 2],
70
+ }
71
+
72
+
73
+ def get_camera_rot(rotation_type: str, frame_id: int):
74
+ if rotation_type == "random_yaw":
75
+ yaw = 0.0 if frame_id == 0 else _random.uniform(0, 2 * math.pi)
76
+ return [math.pi / 2, 0.0, yaw]
77
+ return list(ROTATION_TYPES[rotation_type])
78
+
79
+
80
+ # =====================================================================
81
+ # 参数解析(兼容两种模式)
82
+ # =====================================================================
83
+
84
+ def parse_args_python():
85
+ """Python 模式: 需要 --blender"""
86
+ parser = argparse.ArgumentParser(description="Blend Pipeline v5(边渲边选)")
87
+ parser.add_argument("--blender", type=str, required=True)
88
+ scene_grp = parser.add_mutually_exclusive_group(required=True)
89
+ scene_grp.add_argument("--blend", type=str, default=None,
90
+ help=".blend 场景文件路径")
91
+ scene_grp.add_argument("--glb", type=str, default=None,
92
+ help=".glb / .gltf 场景文件路径")
93
+ parser.add_argument("--output-dir", type=str, required=True)
94
+ parser.add_argument("--num-frames", type=int, default=30)
95
+ parser.add_argument("--render-depth", action="store_true")
96
+ parser.add_argument("--resolution", type=str, default="2048,1024")
97
+ parser.add_argument("--samples", type=int, default=128)
98
+ parser.add_argument("--engine", type=str, default="CYCLES")
99
+ parser.add_argument("--exposure", type=float, default=0.0)
100
+ parser.add_argument("--grid-spacing", type=float, default=0.5)
101
+ parser.add_argument("--camera-height", type=float, default=None)
102
+ parser.add_argument("--stop-gain", type=float, default=DEFAULT_STOP_GAIN)
103
+ parser.add_argument("--stop-score", type=float, default=-0.3)
104
+ parser.add_argument("--stop-delta", type=float, default=0.08)
105
+ parser.add_argument("--min-frames", type=int, default=DEFAULT_MIN_FRAMES)
106
+ parser.add_argument("--rotation-type", type=str, default="random_yaw",
107
+ choices=["none", "rotate_x_90", "rotate_x_180",
108
+ "rotate_z_90", "random_yaw"])
109
+ parser.add_argument("--gain-curve", action="store_true", default=True)
110
+ parser.add_argument("--no-gain-curve", dest="gain_curve", action="store_false")
111
+ return parser.parse_args()
112
+
113
+
114
+ def parse_args_blender():
115
+ """Blender 模式: 不需要 --blender"""
116
+ argv = sys.argv
117
+ if "--" in argv:
118
+ argv = argv[argv.index("--") + 1:]
119
+ else:
120
+ argv = []
121
+ parser = argparse.ArgumentParser()
122
+ scene_grp = parser.add_mutually_exclusive_group(required=True)
123
+ scene_grp.add_argument("--blend", type=str, default=None,
124
+ help=".blend 场景文件路径")
125
+ scene_grp.add_argument("--glb", type=str, default=None,
126
+ help=".glb / .gltf 场景文件路径")
127
+ parser.add_argument("--output-dir", type=str, required=True)
128
+ parser.add_argument("--num-frames", type=int, default=30)
129
+ parser.add_argument("--resolution", type=str, default="2048,1024")
130
+ parser.add_argument("--samples", type=int, default=128)
131
+ parser.add_argument("--engine", type=str, default="CYCLES")
132
+ parser.add_argument("--exposure", type=float, default=0.0)
133
+ parser.add_argument("--grid-spacing", type=float, default=0.5)
134
+ parser.add_argument("--camera-height", type=float, default=None)
135
+ parser.add_argument("--stop-gain", type=float, default=DEFAULT_STOP_GAIN)
136
+ parser.add_argument("--stop-score", type=float, default=-0.3)
137
+ parser.add_argument("--stop-delta", type=float, default=0.08)
138
+ parser.add_argument("--min-frames", type=int, default=DEFAULT_MIN_FRAMES)
139
+ parser.add_argument("--rotation-type", type=str, default="random_yaw",
140
+ choices=["none", "rotate_x_90", "rotate_x_180",
141
+ "rotate_z_90", "random_yaw"])
142
+ parser.add_argument("--gain-curve", action="store_true", default=True)
143
+ parser.add_argument("--no-gain-curve", dest="gain_curve", action="store_false")
144
+ return parser.parse_args(argv)
145
+
146
+
147
+ # #####################################################################
148
+ #
149
+ # Python 模式入口: 启动单个 Blender 进程
150
+ #
151
+ # #####################################################################
152
+
153
+ def main_python():
154
+ """Python 调用入口 → 启动一个 Blender 进程执行本脚本"""
155
+ args = parse_args_python()
156
+
157
+ # 判断场景格式
158
+ if args.blend:
159
+ scene_path = str(Path(args.blend).resolve())
160
+ scene_flag = "--blend"
161
+ scene_label = f"Blend: {scene_path}"
162
+ else:
163
+ scene_path = str(Path(args.glb).resolve())
164
+ scene_flag = "--glb"
165
+ scene_label = f"GLB: {scene_path}"
166
+
167
+ output_dir = str(Path(args.output_dir).resolve())
168
+ os.makedirs(output_dir, exist_ok=True)
169
+
170
+ this_script = str(Path(__file__).resolve())
171
+
172
+ # 构建 Blender 命令(把参数透传,去掉 --blender 和 --render-depth)
173
+ cmd = [
174
+ args.blender, "--background",
175
+ "--python", this_script,
176
+ "--",
177
+ scene_flag, scene_path,
178
+ "--output-dir", output_dir,
179
+ "--num-frames", str(args.num_frames),
180
+ "--resolution", args.resolution,
181
+ "--samples", str(args.samples),
182
+ "--engine", args.engine,
183
+ "--exposure", str(args.exposure),
184
+ "--grid-spacing", str(args.grid_spacing),
185
+ "--stop-gain", str(args.stop_gain),
186
+ "--stop-score", str(args.stop_score),
187
+ "--stop-delta", str(args.stop_delta),
188
+ "--min-frames", str(args.min_frames),
189
+ "--rotation-type", args.rotation_type,
190
+ ]
191
+ if args.camera_height is not None:
192
+ cmd += ["--camera-height", str(args.camera_height)]
193
+ if not args.gain_curve:
194
+ cmd += ["--no-gain-curve"]
195
+
196
+ print("=" * 60)
197
+ print("ERPT Blend Pipeline v5(单进程边渲边选)")
198
+ print("=" * 60)
199
+ print(f" {scene_label}")
200
+ print(f" Output: {output_dir}")
201
+ print(f" Max frames: {args.num_frames}")
202
+
203
+ # 不设 timeout — 大场景渲染时间不可预测
204
+ proc = subprocess.run(cmd, text=True)
205
+ sys.exit(proc.returncode)
206
+
207
+
208
+ # #####################################################################
209
+ #
210
+ # Blender 模式: Phase 0 + 1 + 2 全部在 Blender 内部执行
211
+ #
212
+ # #####################################################################
213
+
214
+ # =====================================================================
215
+ # Phase 0: 加载场景 + 获取边界
216
+ # =====================================================================
217
+
218
+ def load_scene(scene_path):
219
+ """加载场景文件,支持 .blend / .glb / .gltf 三种格式。
220
+ 启用所有 collection,返回 mesh AABB 边界 (bmin, bmax)。
221
+ """
222
+ ext = Path(scene_path).suffix.lower()
223
+ print(f"\n[Phase 0] 加载场景: {scene_path} (格式: {ext})")
224
+
225
+ if ext == ".blend":
226
+ # ---- .blend 原有流程 ----
227
+ bpy.ops.wm.open_mainfile(filepath=scene_path)
228
+
229
+ # 启用所有 collection + 取消隐藏
230
+ def enable_all(lc):
231
+ lc.exclude = False
232
+ lc.hide_viewport = False
233
+ for c in lc.children:
234
+ enable_all(c)
235
+ enable_all(bpy.context.view_layer.layer_collection)
236
+
237
+ for obj in bpy.context.scene.objects:
238
+ if obj.type == 'MESH':
239
+ obj.hide_viewport = False
240
+ obj.hide_set(False)
241
+
242
+ elif ext in (".glb", ".gltf"):
243
+ # ---- GLB / GLTF 导入流程 ----
244
+ # 先清空默认场景(cube / lamp / camera)
245
+ bpy.ops.wm.read_factory_settings(use_empty=True)
246
+
247
+ import_kwargs = dict(filepath=scene_path)
248
+ # Blender 3.x+ 使用 import_scene.gltf
249
+ if hasattr(bpy.ops.import_scene, 'gltf'):
250
+ bpy.ops.import_scene.gltf(**import_kwargs)
251
+ else:
252
+ raise RuntimeError(
253
+ "当前 Blender 版本不支持 import_scene.gltf,"
254
+ "请升级到 Blender 3.0 及以上版本。"
255
+ )
256
+
257
+ # ��保所有导入对象可见
258
+ for obj in bpy.context.scene.objects:
259
+ if obj.type == 'MESH':
260
+ obj.hide_viewport = False
261
+ obj.hide_set(False)
262
+
263
+ else:
264
+ raise ValueError(
265
+ f"不支持的场景格式: {ext},"
266
+ f"支持的格式: .blend / .glb / .gltf"
267
+ )
268
+
269
+ bpy.context.view_layer.update()
270
+
271
+ # 计算 mesh 边界(通用逻辑)
272
+ bmin = [float('inf')] * 3
273
+ bmax = [float('-inf')] * 3
274
+ n_mesh = 0
275
+ for obj in bpy.context.scene.objects:
276
+ if obj.type == 'MESH':
277
+ n_mesh += 1
278
+ for corner in obj.bound_box:
279
+ wc = obj.matrix_world @ Vector(corner)
280
+ for i in range(3):
281
+ bmin[i] = min(bmin[i], wc[i])
282
+ bmax[i] = max(bmax[i], wc[i])
283
+
284
+ if bmin[0] == float('inf'):
285
+ bmin, bmax = [-5, -5, 0], [5, 5, 3]
286
+
287
+ print(f" Mesh 数量: {n_mesh}")
288
+ print(f" 边界 (Z-up): min=[{bmin[0]:.1f}, {bmin[1]:.1f}, {bmin[2]:.1f}] "
289
+ f"max=[{bmax[0]:.1f}, {bmax[1]:.1f}, {bmax[2]:.1f}]")
290
+ return bmin, bmax
291
+
292
+
293
+ # =====================================================================
294
+ # Phase 1: 撒点 + 4 层 Blender ray_cast 过滤
295
+ # =====================================================================
296
+
297
+ def compute_camera_heights(floor_z, ceiling_z, manual_height=None, bmin=None, bmax=None):
298
+ """计算相机高度层
299
+
300
+ 策略:
301
+ - 手动指定 → 只用该高度
302
+ - 多层建筑 → 每层铺固定高度 [0.5, 0.8, 1.2, 1.7, 2.1] + 动态顶层
303
+ - 单层高空间 → 2.5m 以下用固定高度,2.5m 以上阶梯递增:
304
+ +1.0m, +1.5m, +2.0m, +2.5m, +3.0m ...(间距逐步放大)
305
+ 最后加动态顶层(天花板 -0.5m)
306
+ """
307
+ CEIL_CLEARANCE = 0.3 # 最高高度:离天花板 0.3m(保留 2.1m 层)
308
+ FIXED_HEIGHTS = [0.5, 0.8, 1.2, 1.7, 2.1] # 2.5m 以下的固定高度
309
+
310
+ if manual_height is not None:
311
+ return [manual_height]
312
+
313
+ room_h = ceiling_z - floor_z
314
+ if room_h <= 0:
315
+ return [floor_z + 1.5]
316
+
317
+ def _stepped_heights_for_floor(fz, local_ceil):
318
+ """单层高度计算:固定 + 阶梯递增 + 动态顶层"""
319
+ heights = []
320
+ local_h = local_ceil - fz
321
+
322
+ # 2.5m 以下: 固定高度
323
+ for eye_h in FIXED_HEIGHTS:
324
+ z = fz + eye_h
325
+ if z < local_ceil - CEIL_CLEARANCE:
326
+ heights.append(z)
327
+
328
+ # 2.5m 以上: 阶梯递增(间距从 1.0m 逐步增大到 3.0m)
329
+ if local_h > 3.0: # 层高 > 3m 才加中间高度
330
+ cur_h = FIXED_HEIGHTS[-1] # 从 2.1m 开始
331
+ step = 1.0 # 初始步长 1.0m
332
+ MAX_STEP = 3.0 # 最大步长 3.0m
333
+ STEP_GROW = 0.5 # 每次步长增加 0.5m
334
+
335
+ while True:
336
+ cur_h += step
337
+ z = fz + cur_h
338
+ if z >= local_ceil - CEIL_CLEARANCE:
339
+ break
340
+ heights.append(z)
341
+ step = min(step + STEP_GROW, MAX_STEP)
342
+
343
+ # 动态顶层:天花板 - 0.5m(如果比最高已有高度至少高 0.5m)
344
+ top_z = local_ceil - CEIL_CLEARANCE
345
+ if heights:
346
+ if top_z > max(heights) + 0.5:
347
+ heights.append(top_z)
348
+ elif top_z > fz + 0.5:
349
+ heights.append(top_z)
350
+
351
+ return heights
352
+
353
+ # 先尝试用 Blender raycast 探测楼板
354
+ try:
355
+ floors = _detect_floor_levels(floor_z, ceiling_z, bmin, bmax)
356
+ if floors:
357
+ print(f" [楼层检测] 发现 {len(floors)} 个楼层: "
358
+ f"{[f'{z:.2f}m' for z in floors]}")
359
+ heights = []
360
+ for idx, fz in enumerate(floors):
361
+ # 每层的天花板 = 下一层楼板 或 全局天花板
362
+ if idx + 1 < len(floors):
363
+ local_ceil = floors[idx + 1]
364
+ else:
365
+ local_ceil = ceiling_z
366
+ heights.extend(_stepped_heights_for_floor(fz, local_ceil))
367
+
368
+ if heights:
369
+ result = sorted(set(round(h, 2) for h in heights))
370
+ # 打印高度分布
371
+ for h in result:
372
+ rel = h - floors[0]
373
+ print(f" 高度 Z={h:.2f}m (离地 {rel:.2f}m)")
374
+ return result
375
+ else:
376
+ print(f" [楼层检测] 未检测到楼板,使用启发式")
377
+ except Exception as e:
378
+ print(f" [楼层检测] 异常: {e},使用启发式")
379
+
380
+ # fallback: 简单启发式(同样用阶梯递增)
381
+ h_list = _stepped_heights_for_floor(floor_z, ceiling_z)
382
+ return sorted(set(round(h, 2) for h in h_list)) if h_list else [floor_z + 1.5]
383
+
384
+
385
+ def _detect_floor_levels(floor_z, ceiling_z, bmin=None, bmax=None):
386
+ """用 raycast 从上往下扫描,检测楼板位置
387
+
388
+ 在 XY 平面采样若干点,每个点从顶部往下打射线,收集 hit 的 Z 坐标。
389
+ 对 Z 坐标做聚类(间距 > 1.5m 算不同楼层),得到各楼层地面高度。
390
+
391
+ 关键改进:
392
+ 1. 采样范围按场景大小缩放(不只中心 ±2m)
393
+ 2. 检测到楼板后验证上方有天花板(排除屋顶外表面)
394
+ """
395
+ scene = bpy.context.scene
396
+ depsgraph = bpy.context.evaluated_depsgraph_get()
397
+ dir_down = Vector((0, 0, -1))
398
+ dir_up = Vector((0, 0, 1))
399
+
400
+ # 用场景 AABB 的 XY 中心和范围
401
+ if bmin is not None and bmax is not None:
402
+ cx = (bmin[0] + bmax[0]) / 2
403
+ cy = (bmin[1] + bmax[1]) / 2
404
+ # 采样范围: 场景 XY 的 1/4 跨度,至少 2m,最多 20m
405
+ rx = min(20.0, max(2.0, (bmax[0] - bmin[0]) * 0.25))
406
+ ry = min(20.0, max(2.0, (bmax[1] - bmin[1]) * 0.25))
407
+ else:
408
+ cx, cy = 0.0, 0.0
409
+ rx, ry = 2.0, 2.0
410
+
411
+ hit_zs = []
412
+ # 3x3 网格采样,按场景大小缩放
413
+ offsets = []
414
+ for fx in [-1, 0, 1]:
415
+ for fy in [-1, 0, 1]:
416
+ offsets.append((fx * rx, fy * ry))
417
+
418
+ for dx, dy in offsets:
419
+ origin = Vector((cx + dx, cy + dy, ceiling_z + 1.0))
420
+ # 多次向下 raycast(穿透式:命中后从命中点下方继续)
421
+ cur_z = ceiling_z + 1.0
422
+ for _ in range(10): # 最多穿 10 层
423
+ hit, loc, norm, *_ = scene.ray_cast(
424
+ depsgraph, Vector((cx + dx, cy + dy, cur_z)), dir_down)
425
+ if not hit:
426
+ break
427
+ # 法线朝上(Z 分量 > 0.5)→ 这是地板/楼板表面
428
+ if norm.z > 0.5:
429
+ hit_zs.append((loc.z, cx + dx, cy + dy))
430
+ cur_z = loc.z - 0.05 # 穿过这个表面继续往下
431
+
432
+ if not hit_zs:
433
+ return []
434
+
435
+ # 聚类: 排序后间距 > 1.5m 算不同楼层
436
+ hit_zs.sort(key=lambda t: t[0])
437
+ clusters = [[hit_zs[0]]]
438
+ for item in hit_zs[1:]:
439
+ if item[0] - clusters[-1][-1][0] > 1.5:
440
+ clusters.append([item])
441
+ else:
442
+ clusters[-1].append(item)
443
+
444
+ # 每个 cluster 验证: 楼板上方是否有天花板
445
+ MAX_CEILING_DIST = 30.0 # 最高天花板距离(超过说明是露天/屋顶外表面)
446
+ floors = []
447
+ for c in clusters:
448
+ fz = sorted(c, key=lambda t: t[0])[len(c) // 2][0]
449
+ if not (floor_z - 0.5 <= fz <= ceiling_z - 1.0):
450
+ continue
451
+
452
+ # 验证: 从该楼板上方 1m 处往上打射线,检查是否有天花板
453
+ n_has_ceiling = 0
454
+ n_tested = 0
455
+ for _, px, py in c:
456
+ test_origin = Vector((px, py, fz + 1.0))
457
+ hit_ceil, loc_ceil, norm_ceil, *_ = scene.ray_cast(
458
+ depsgraph, test_origin, dir_up)
459
+ n_tested += 1
460
+ if hit_ceil and (loc_ceil.z - fz) < MAX_CEILING_DIST:
461
+ n_has_ceiling += 1
462
+
463
+ # 过半采样点上方有天花板 → 真正的楼板
464
+ if n_tested > 0 and n_has_ceiling / n_tested >= 0.5:
465
+ floors.append(fz)
466
+ else:
467
+ print(f" [楼层检测] Z={fz:.2f}m 上方无天花板"
468
+ f"({n_has_ceiling}/{n_tested}),排除(可能是屋顶外表面)")
469
+
470
+ return sorted(floors)
471
+
472
+
473
+ def generate_candidate_grid(bmin, bmax, x_spacing, y_spacing, heights):
474
+ cx = (bmin[0] + bmax[0]) / 2
475
+ cy = (bmin[1] + bmax[1]) / 2
476
+ x_half = int((bmax[0] - cx - MARGIN) / x_spacing)
477
+ y_half = int((bmax[1] - cy - MARGIN) / y_spacing)
478
+
479
+ xy_offsets = []
480
+ for ix in range(-x_half, x_half + 1):
481
+ for iy in range(-y_half, y_half + 1):
482
+ x = cx + ix * x_spacing
483
+ y = cy + iy * y_spacing
484
+ if bmin[0] + MARGIN <= x <= bmax[0] - MARGIN and \
485
+ bmin[1] + MARGIN <= y <= bmax[1] - MARGIN:
486
+ xy_offsets.append((ix * ix + iy * iy, x, y))
487
+ xy_offsets.sort(key=lambda t: t[0])
488
+
489
+ candidates = []
490
+ for z in heights:
491
+ for _, x, y in xy_offsets:
492
+ candidates.append([float(x), float(y), float(z)])
493
+
494
+ n_xy = len(xy_offsets)
495
+ print(f" 网格: {n_xy}点/层 x {len(heights)}层 = {len(candidates)} 个候选")
496
+ print(f" 中心: ({cx:.1f}, {cy:.1f}), X间距={x_spacing:.1f}m, Y间距={y_spacing:.1f}m")
497
+ for i, z in enumerate(heights):
498
+ print(f" 第{i+1}层: Z={z:.2f}m")
499
+ return candidates
500
+
501
+
502
+ def _build_26_directions():
503
+ """26 方向球面采样(mathutils.Vector)"""
504
+ dirs = []
505
+ for i in range(16):
506
+ a = i * (2 * math.pi / 16)
507
+ dirs.append(Vector((math.cos(a), math.sin(a), 0.0)))
508
+ elev = math.pi / 4
509
+ for i in range(5):
510
+ a = i * (2 * math.pi / 5)
511
+ dirs.append(Vector((math.cos(a) * math.cos(elev),
512
+ math.sin(a) * math.cos(elev),
513
+ math.sin(elev))))
514
+ for i in range(5):
515
+ a = i * (2 * math.pi / 5)
516
+ dirs.append(Vector((math.cos(a) * math.cos(elev),
517
+ math.sin(a) * math.cos(elev),
518
+ -math.sin(elev))))
519
+ return dirs
520
+
521
+
522
+ def raycast_6layer_filter(candidates, room_height, min_wall_dist=1.0):
523
+ """7 层过滤 — 直接用 Blender scene.ray_cast(不需要 trimesh/GLB)
524
+
525
+ 第 1 层: 室内检测(朝上朝下必须 hit)
526
+ 第 2 层: 穿模检测(≥2 方向 < 0.2m)
527
+ 第 3 层: 角落检测(>50% 水平方向 < 1.0m)
528
+ 第 4 层: 包裹检测(hit_rate≥90% + cv<0.30 + max<8m)
529
+ 第 5 层: 墙面间距(最近水平方向 < 0.3m → Blender 渲染会穿模)
530
+ 第 6 层: 视野质量(<35% 方向有有效命中 → 太空旷或太闭塞)
531
+ 第 7 层: 窄缝检测(对向方向距离之和 < 1.5m → 两面墙夹着)★ 新增
532
+
533
+ 性能: 用第 1~4 层同样的 26 方向数据,第 5~7 层零额外射线开销
534
+ """
535
+ scene = bpy.context.scene
536
+ depsgraph = bpy.context.evaluated_depsgraph_get()
537
+
538
+ N = len(candidates)
539
+ max_up = max(5.0, room_height)
540
+ max_down = max(3.0, room_height)
541
+ dir_up = Vector((0, 0, 1))
542
+ dir_down = Vector((0, 0, -1))
543
+ DIRS_26 = _build_26_directions()
544
+ n26 = len(DIRS_26)
545
+
546
+ # 第 5 层阈值: 最近水平墙面距离
547
+ MIN_WALL_CLEARANCE = 0.3 # Blender 渲染安全距离
548
+
549
+ # 第 6 层阈值: 有效视野比例
550
+ VIEW_GOOD_MIN = 0.5 # 有效命中距离下限
551
+ VIEW_GOOD_MAX = 20.0 # 有效命中距离上限
552
+ VIEW_GOOD_RATIO = 0.35 # 至少 35% 方向有有效命中
553
+
554
+ # 第 7 层阈值: 窄缝检测(对向距离之和)
555
+ MIN_SLIT_WIDTH = 1.5 # 对向墙距之和 < 1.5m → 窄缝
556
+
557
+ passed = []
558
+ stats = {"无天花板": 0, "无地板": 0, "穿模": 0, "角落": 0,
559
+ "包裹": 0, "贴墙": 0, "视野差": 0, "窄缝": 0}
560
+
561
+ t0 = time.time()
562
+ log_interval = max(1, N // 10)
563
+
564
+ for idx, pos in enumerate(candidates):
565
+ if idx % log_interval == 0 and idx > 0:
566
+ print(f" 过滤进度: {idx}/{N} ({idx*100//N}%)", flush=True)
567
+
568
+ origin = Vector(pos)
569
+
570
+ # ---- 第 1 层: 室内检测(朝上朝下各 1 条射线)----
571
+ hit_up, loc_up, *_ = scene.ray_cast(depsgraph, origin, dir_up)
572
+ if not hit_up or (loc_up - origin).length > max_up:
573
+ stats["无天花板"] += 1
574
+ continue
575
+
576
+ hit_dn, loc_dn, *_ = scene.ray_cast(depsgraph, origin, dir_down)
577
+ if not hit_dn or (loc_dn - origin).length > max_down:
578
+ stats["无地板"] += 1
579
+ continue
580
+
581
+ # ---- 第 2~6 层: 26 方向球面采样 ----
582
+ dists = []
583
+ for d in DIRS_26:
584
+ hit, loc, *_ = scene.ray_cast(depsgraph, origin, d)
585
+ dists.append((loc - origin).length if hit else float('inf'))
586
+
587
+ # 第 2 层: 穿模(≥2 方向 < 0.2m → 在物体内部)
588
+ n_close = sum(1 for d in dists if d < 0.2)
589
+ if n_close >= 2:
590
+ stats["穿模"] += 1
591
+ continue
592
+
593
+ # 第 3 层: 角落(水平 16 方向中 > 一半 < 1.0m)
594
+ n_wall = sum(1 for d in dists[:16] if d < min_wall_dist)
595
+ if n_wall > 8:
596
+ stats["角落"] += 1
597
+ continue
598
+
599
+ # 第 4 层: 包裹(hit_rate≥90% + CV<0.30 + max<8m)
600
+ finite = [d for d in dists if d < float('inf')]
601
+ hit_rate = len(finite) / n26
602
+ if hit_rate >= 0.90 and len(finite) >= 2:
603
+ mean_d = sum(finite) / len(finite)
604
+ max_d = max(finite)
605
+ if mean_d > 0:
606
+ var = sum((d - mean_d) ** 2 for d in finite) / len(finite)
607
+ cv = var ** 0.5 / mean_d
608
+ if cv < 0.30 and max_d < 8.0:
609
+ stats["包裹"] += 1
610
+ continue
611
+
612
+ # 第 5 层: 墙面间距(水平 16 方向最近 hit < 0.3m → 贴墙)★ 新增
613
+ horiz_finite = [d for d in dists[:16] if d < float('inf')]
614
+ if horiz_finite and min(horiz_finite) < MIN_WALL_CLEARANCE:
615
+ stats["贴墙"] += 1
616
+ continue
617
+
618
+ # 第 6 层: 视野质量(有效方向太少 → 视野差)
619
+ n_good = sum(1 for d in dists
620
+ if VIEW_GOOD_MIN <= d <= VIEW_GOOD_MAX)
621
+ good_ratio = n_good / n26
622
+ if good_ratio < VIEW_GOOD_RATIO:
623
+ stats["视野差"] += 1
624
+ continue
625
+
626
+ # 第 7 层: 窄缝检测(对向水平方向距离之和 < 1.5m → 两面墙夹着)
627
+ # 水平 16 方向中,方向 i 和方向 i+8 是对向的(0°↔180°, 22.5°↔202.5°...)
628
+ in_slit = False
629
+ for i in range(8):
630
+ d_fwd = dists[i] if dists[i] < float('inf') else 999
631
+ d_bwd = dists[i + 8] if dists[i + 8] < float('inf') else 999
632
+ if d_fwd + d_bwd < MIN_SLIT_WIDTH:
633
+ in_slit = True
634
+ break
635
+ if in_slit:
636
+ stats["窄缝"] += 1
637
+ continue
638
+
639
+ passed.append(pos)
640
+
641
+ dt = time.time() - t0
642
+ print(f" 过滤统计 ({dt:.1f}s): 总计={N}, 通过={len(passed)}")
643
+ for k, v in stats.items():
644
+ print(f" ❌ {k}: {v} ({v * 100 // max(N, 1)}%)")
645
+ print(f" 阈值: 天花板<{max_up:.1f}m, 地板<{max_down:.1f}m, "
646
+ f"穿模<0.2m, 角落<{min_wall_dist:.1f}m, "
647
+ f"包裹: hit≥90%+cv<0.3+max<8m, "
648
+ f"贴墙<{MIN_WALL_CLEARANCE}m, "
649
+ f"视野: ≥{VIEW_GOOD_RATIO:.0%}方向 {VIEW_GOOD_MIN}-{VIEW_GOOD_MAX}m, "
650
+ f"窄缝<{MIN_SLIT_WIDTH}m")
651
+
652
+ if len(passed) < 5 and N > 20:
653
+ print(f" [诊断] 通过率低 ({len(passed)}/{N})")
654
+
655
+ return passed
656
+
657
+
658
+ def setup_erp_camera():
659
+ """创建 ERP 全景相机"""
660
+ for obj in list(bpy.context.scene.objects):
661
+ if obj.type == 'CAMERA':
662
+ bpy.data.objects.remove(obj, do_unlink=True)
663
+
664
+ cam_data = bpy.data.cameras.new("ERP_Camera")
665
+ cam_data.type = 'PANO'
666
+ if hasattr(cam_data, 'panorama_type'):
667
+ cam_data.panorama_type = 'EQUIRECTANGULAR'
668
+ if hasattr(cam_data, 'cycles'):
669
+ cam_data.cycles.panorama_type = 'EQUIRECTANGULAR'
670
+
671
+ cam_obj = bpy.data.objects.new("ERP_Camera", cam_data)
672
+ bpy.context.scene.collection.objects.link(cam_obj)
673
+ bpy.context.scene.camera = cam_obj
674
+ print(f" 创建 ERP 相机: {cam_obj.name}")
675
+ return cam_obj
676
+
677
+
678
+ def enable_gpu():
679
+ try:
680
+ prefs = bpy.context.preferences.addons['cycles'].preferences
681
+ for dt in ['OPTIX', 'CUDA']:
682
+ try:
683
+ prefs.compute_device_type = dt
684
+ prefs.get_devices()
685
+ gpus = [d for d in prefs.devices if d.type == dt]
686
+ if gpus:
687
+ for d in prefs.devices:
688
+ d.use = (d.type == dt)
689
+ bpy.context.scene.cycles.device = 'GPU'
690
+ print(f" GPU 渲染: {gpus[0].name} ({dt})")
691
+ return True
692
+ except Exception:
693
+ continue
694
+ print(" [WARN] 无可用 GPU,使用 CPU 渲染")
695
+ bpy.context.scene.cycles.device = 'CPU'
696
+ except Exception as e:
697
+ print(f" [ERROR] GPU 设置异常: {e}")
698
+ return False
699
+
700
+
701
+ def setup_render_settings(resolution, engine, samples, exposure):
702
+ scene = bpy.context.scene
703
+ scene.render.engine = engine
704
+ scene.render.resolution_x = resolution[0]
705
+ scene.render.resolution_y = resolution[1]
706
+ scene.render.resolution_percentage = 100
707
+ scene.render.image_settings.file_format = 'PNG'
708
+ scene.render.image_settings.color_mode = 'RGB'
709
+ scene.render.image_settings.color_depth = '8'
710
+ scene.view_settings.exposure = exposure
711
+ # AgX(Blender 4+默认)对室内场景会严重压暗;改用 Standard 线性映射,
712
+ # 颜色准确且更亮,曝光完全由 exposure 参数控制。
713
+ scene.view_settings.view_transform = 'Standard'
714
+ scene.view_settings.look = 'None'
715
+
716
+ if engine == 'CYCLES':
717
+ scene.cycles.samples = samples
718
+ scene.cycles.use_denoising = True
719
+ scene.cycles.max_bounces = 12
720
+ scene.cycles.diffuse_bounces = 4
721
+ scene.cycles.glossy_bounces = 4
722
+ scene.cycles.transmission_bounces = 12
723
+ scene.cycles.transparent_max_bounces = 8
724
+ enable_gpu()
725
+
726
+ print(f" 渲染设置: {engine} {resolution[0]}x{resolution[1]} "
727
+ f"samples={samples} exposure={exposure} view_transform=Standard")
728
+
729
+
730
+ def _world_has_effective_light(world) -> bool:
731
+ """判断 World 节点是否能产生有效的环境光(Strength > 0.05)。
732
+ GLB 导入的场景通常有一个 World 对象,但 Background Strength 可能为 0。
733
+ """
734
+ if world is None:
735
+ return False
736
+ if not world.use_nodes or world.node_tree is None:
737
+ # 没用节点系统:用旧 API 的纯色环境,认为有效
738
+ return True
739
+ for node in world.node_tree.nodes:
740
+ if node.type == 'BACKGROUND':
741
+ strength = node.inputs.get('Strength')
742
+ if strength is not None:
743
+ val = strength.default_value
744
+ # 如果有链接(HDR 贴图等),视为有效
745
+ if strength.is_linked or float(val) > 0.05:
746
+ return True
747
+ return False
748
+
749
+
750
+ def setup_lighting():
751
+ """仅在场景缺乏有效光照时补一个均匀环境光。
752
+ - 有可见灯光对象 → 保留原始
753
+ - World 有有效 Background Strength → 保留原始
754
+ - 否则:注入默认环境光(Strength=1.0)
755
+ """
756
+ scene = bpy.context.scene
757
+
758
+ has_lights = any(obj.type == 'LIGHT' for obj in bpy.data.objects if obj.visible_get())
759
+ has_world = _world_has_effective_light(scene.world)
760
+
761
+ if has_lights or has_world:
762
+ print(" [光照] 保留场景原始光照")
763
+ return
764
+
765
+ print(" [光照] 场景无有效灯光,注入均匀环境光 (Strength=1.0)")
766
+ world = scene.world
767
+ if world is None:
768
+ world = bpy.data.worlds.new("World")
769
+ scene.world = world
770
+ world.use_nodes = True
771
+ nodes = world.node_tree.nodes
772
+ links = world.node_tree.links
773
+ nodes.clear()
774
+ bg = nodes.new('ShaderNodeBackground')
775
+ bg.inputs['Color'].default_value = (1.0, 1.0, 1.0, 1.0)
776
+ bg.inputs['Strength'].default_value = 1.0
777
+ out = nodes.new('ShaderNodeOutputWorld')
778
+ links.new(bg.outputs['Background'], out.inputs['Surface'])
779
+
780
+
781
+ def setup_depth_pass():
782
+ """配置 compositor 深度输出(Blender 5.0 API)"""
783
+ scene = bpy.context.scene
784
+ bpy.context.view_layer.use_pass_z = True
785
+
786
+ tree = bpy.data.node_groups.new("DepthComp", "CompositorNodeTree")
787
+ scene.compositing_node_group = tree
788
+ nodes = tree.nodes
789
+ links = tree.links
790
+
791
+ rl = nodes.new('CompositorNodeRLayers')
792
+ rl.location = (0, 300)
793
+
794
+ group_out = nodes.new('NodeGroupOutput')
795
+ group_out.location = (400, 300)
796
+ tree.interface.new_socket(name="Image", in_out="OUTPUT",
797
+ socket_type="NodeSocketColor")
798
+ links.new(rl.outputs['Image'], group_out.inputs['Image'])
799
+
800
+ fo = nodes.new('CompositorNodeOutputFile')
801
+ fo.location = (400, 0)
802
+ fo.directory = ""
803
+ fo.format.media_type = 'IMAGE'
804
+ fo.format.file_format = 'OPEN_EXR'
805
+ fo.format.color_depth = '32'
806
+ fo.format.exr_codec = 'ZIP'
807
+ fo.file_output_items.clear()
808
+ fo.file_output_items.new('FLOAT', "depth")
809
+ links.new(rl.outputs['Depth'], fo.inputs['depth'])
810
+
811
+ print(f" 深度 pass 已配置")
812
+ return fo
813
+
814
+
815
+ # =====================================================================
816
+ # 渲染 + 深度转换 + 位姿保存(同进程,只移动相机)
817
+ # =====================================================================
818
+
819
+ def convert_depth_exr_to_npy(exr_path, npy_path):
820
+ """EXR → NPY(Blender 内置 API,不依赖 OpenEXR 库)"""
821
+ img = bpy.data.images.load(exr_path)
822
+ w, h = img.size[0], img.size[1]
823
+ pixels = np.array(img.pixels[:]).reshape(h, w, -1)
824
+ depth = np.flipud(pixels[:, :, 0])
825
+
826
+ unit_scale = bpy.context.scene.unit_settings.scale_length
827
+ depth_m = depth * unit_scale
828
+ depth_m[(depth_m > 1000.0) | (depth_m <= 0)] = 0.0
829
+
830
+ np.save(npy_path, depth_m.astype(np.float32))
831
+ bpy.data.images.remove(img)
832
+ try:
833
+ os.remove(exr_path)
834
+ except OSError:
835
+ pass
836
+
837
+
838
+ def render_frame_inprocess(cam_obj, frame_id, camera_pos, camera_rot,
839
+ output_dir, depth_fo):
840
+ """同进程渲染一帧,返回 (rgb_path, depth_path, pose_path)"""
841
+ cam_obj.location = Vector(camera_pos)
842
+ cam_obj.rotation_euler = Euler(camera_rot, 'XYZ')
843
+
844
+ base = f"panorama_{frame_id:04d}"
845
+ rgb_path = os.path.join(output_dir, f"{base}.png")
846
+ depth_npy = os.path.join(output_dir, f"{base}_depth.npy")
847
+ pose_path = os.path.join(output_dir, f"pose_{frame_id:04d}.json")
848
+
849
+ bpy.context.scene.render.filepath = rgb_path
850
+
851
+ abs_dir = os.path.abspath(output_dir)
852
+ depth_fo.directory = abs_dir
853
+ depth_fo.file_name = base + "_"
854
+ depth_exr = os.path.join(abs_dir, base + "_depth.exr")
855
+
856
+ bpy.context.scene.frame_set(frame_id)
857
+ bpy.ops.render.render(write_still=True)
858
+
859
+ # 深度转换
860
+ if os.path.exists(depth_exr):
861
+ convert_depth_exr_to_npy(depth_exr, depth_npy)
862
+ else:
863
+ import glob
864
+ hits = glob.glob(os.path.join(abs_dir, f"*{base}*depth*.exr"))
865
+ if hits:
866
+ convert_depth_exr_to_npy(hits[0], depth_npy)
867
+ else:
868
+ print(f" [WARN] 未找到深度 EXR: {depth_exr}")
869
+ depth_npy = None
870
+
871
+ # 位姿(与 render_erp_blender.py save_pose 完全一致的格式)
872
+ save_pose(cam_obj, pose_path, frame_id)
873
+
874
+ return rgb_path, depth_npy, pose_path
875
+
876
+
877
+ def save_pose(camera_object, output_path, frame_id):
878
+ """保存位姿(绝对位姿,cam_to_world,兼容 ERPT)
879
+
880
+ 格式与 render_erp_blender.py 的 save_pose 完全一致:
881
+ R_cw_erpt = T @ R_obj_blender @ M
882
+ """
883
+ unit_scale = bpy.context.scene.unit_settings.scale_length
884
+
885
+ abs_pos_b = list(camera_object.location)
886
+ abs_quat_b = camera_object.rotation_euler.to_quaternion()
887
+
888
+ # Blender(X右,Y前,Z上) → 统一(X右,Y上,Z前)
889
+ abs_pos_u = [
890
+ abs_pos_b[0] * unit_scale, # X
891
+ abs_pos_b[2] * unit_scale, # Y_unified = Z_blender
892
+ abs_pos_b[1] * unit_scale, # Z_unified = Y_blender
893
+ ]
894
+
895
+ R_obj = abs_quat_b.to_matrix()
896
+ T = Matrix([[1, 0, 0], [0, 0, 1], [0, 1, 0]])
897
+ M = Matrix([[1, 0, 0], [0, 1, 0], [0, 0, -1]])
898
+ R_cw = T @ R_obj @ M
899
+ q = R_cw.to_quaternion()
900
+
901
+ pose_data = {
902
+ "frame_id": frame_id,
903
+ "position": abs_pos_u,
904
+ "rotation_quaternion": [q.w, q.x, q.y, q.z],
905
+ "camera_type": "erp_ray",
906
+ "coordinate_system": "right-handed, Y-up, Z-forward (cam_to_world)",
907
+ "render_method": "blender_cycles",
908
+ }
909
+ with open(output_path, 'w') as f:
910
+ json.dump(pose_data, f, indent=2)
911
+
912
+
913
+ # =====================================================================
914
+ # 选帧核心(向量化,内嵌)
915
+ # =====================================================================
916
+
917
+ def build_ray_directions(H=WARP_H, W=WARP_W):
918
+ """向量化构建 ERP 射线方向(Z-up)"""
919
+ i = np.arange(H, dtype=np.float64)
920
+ j = np.arange(W, dtype=np.float64)
921
+ phi = np.pi / 2 - np.pi * (i + 0.5) / H
922
+ theta = 2 * np.pi * (j + 0.5) / W
923
+ phi, theta = np.meshgrid(phi, theta, indexing='ij')
924
+ return np.stack([
925
+ np.cos(phi) * np.cos(theta),
926
+ np.cos(phi) * np.sin(theta),
927
+ np.sin(phi),
928
+ ], axis=-1)
929
+
930
+
931
+ _ray_dirs_cache = {}
932
+
933
+
934
+ def get_ray_dirs(H=WARP_H, W=WARP_W):
935
+ if (H, W) not in _ray_dirs_cache:
936
+ _ray_dirs_cache[(H, W)] = build_ray_directions(H, W)
937
+ return _ray_dirs_cache[(H, W)]
938
+
939
+
940
+ def depth_to_3d_points(position, depth, ray_dirs, max_depth=None):
941
+ valid = depth > 0
942
+ if max_depth is not None:
943
+ valid &= (depth <= max_depth)
944
+ if not np.any(valid):
945
+ return np.empty((0, 3), dtype=np.float64)
946
+ pos = np.array(position, dtype=np.float64)
947
+ return (pos + ray_dirs * depth[..., np.newaxis])[valid]
948
+
949
+
950
+ def project_points_to_coverage(pts, tgt_pos, H=WARP_H, W=WARP_W):
951
+ """把累积点云投影到候选位置的全景图,返回覆盖 mask。"""
952
+ if len(pts) == 0:
953
+ return np.zeros((H, W), dtype=bool)
954
+ tgt = np.array(tgt_pos, dtype=np.float64)
955
+ vecs = pts - tgt
956
+ x, y, z = vecs[:, 0], vecs[:, 1], vecs[:, 2]
957
+ r_xy = np.sqrt(x ** 2 + y ** 2)
958
+ phi = np.arctan2(z, r_xy)
959
+ theta = np.arctan2(y, x) % (2 * np.pi)
960
+ vi = np.clip(((np.pi / 2 - phi) / np.pi * H).astype(np.int32), 0, H - 1)
961
+ uj = np.clip((theta / (2 * np.pi) * W).astype(np.int32), 0, W - 1)
962
+ cov = np.zeros((H, W), dtype=bool)
963
+ cov[vi, uj] = True
964
+ pad = cov.copy()
965
+ pad[1:, :] |= cov[:-1, :]
966
+ pad[:-1, :] |= cov[1:, :]
967
+ pad[:, 1:] |= cov[:, :-1]
968
+ pad[:, :-1] |= cov[:, 1:]
969
+ return pad
970
+
971
+
972
+ # ---- GPU 加速(延迟初始化,Phase 2 第一次选帧时检测)----
973
+ _GPU_BACKEND = None
974
+ _gpu_lib = None
975
+ _gpu_checked = False
976
+
977
+ def _init_gpu():
978
+ """延迟初始化 GPU,避免模块加载时显存冲突"""
979
+ global _GPU_BACKEND, _gpu_lib, _gpu_checked
980
+ if _gpu_checked:
981
+ return
982
+ _gpu_checked = True
983
+
984
+ try:
985
+ import torch
986
+ if torch.cuda.is_available():
987
+ _GPU_BACKEND = "torch"
988
+ _gpu_lib = torch
989
+ print(f"[GPU] torch {torch.__version__} (CUDA),选帧将使用 GPU 加速")
990
+ return
991
+ except ImportError:
992
+ pass
993
+
994
+ try:
995
+ import cupy as cp
996
+ try:
997
+ cp.get_default_memory_pool().free_all_blocks()
998
+ cp.get_default_pinned_memory_pool().free_all_blocks()
999
+ except Exception:
1000
+ pass
1001
+ cp.zeros(1)
1002
+ _GPU_BACKEND = "cupy"
1003
+ _gpu_lib = cp
1004
+ print(f"[GPU] cupy {cp.__version__},选帧将使用 GPU 加速")
1005
+ return
1006
+ except Exception as e:
1007
+ print(f"[Warning] cupy 初始化失败: {e}")
1008
+
1009
+ print("[CPU] 未检测到 torch/cupy,选帧使用 CPU")
1010
+
1011
+
1012
+ def _batch_coverage_gpu(pts_np, candidate_positions, remaining_indices, H, W):
1013
+ """GPU 批量投影:逐候选在 GPU 上算覆盖数
1014
+
1015
+ 返回: dict[ci] -> covered_pixels (int)
1016
+ """
1017
+ total_px = H * W
1018
+ results = {}
1019
+
1020
+ if _GPU_BACKEND == "torch":
1021
+ import torch
1022
+ device = torch.device("cuda")
1023
+ pts_gpu = torch.from_numpy(pts_np).double().to(device)
1024
+ PI = torch.pi
1025
+ TWO_PI = 2 * torch.pi
1026
+
1027
+ for ci in remaining_indices:
1028
+ tgt = torch.tensor(candidate_positions[ci], dtype=torch.float64, device=device)
1029
+ vecs = pts_gpu - tgt
1030
+ x, y, z = vecs[:, 0], vecs[:, 1], vecs[:, 2]
1031
+ r_xy = torch.sqrt(x ** 2 + y ** 2)
1032
+ phi = torch.atan2(z, r_xy)
1033
+ theta = torch.atan2(y, x) % TWO_PI
1034
+ vi = torch.clamp(((PI / 2 - phi) / PI * H).long(), 0, H - 1)
1035
+ uj = torch.clamp((theta / TWO_PI * W).long(), 0, W - 1)
1036
+
1037
+ flat = vi * W + uj
1038
+ cov = torch.zeros(total_px, dtype=torch.bool, device=device)
1039
+ cov[flat] = True
1040
+ cov_2d = cov.view(H, W)
1041
+ pad = cov_2d.clone()
1042
+ pad[1:, :] |= cov_2d[:-1, :]
1043
+ pad[:-1, :] |= cov_2d[1:, :]
1044
+ pad[:, 1:] |= cov_2d[:, :-1]
1045
+ pad[:, :-1] |= cov_2d[:, 1:]
1046
+ results[ci] = int(pad.sum().item())
1047
+
1048
+ elif _GPU_BACKEND == "cupy":
1049
+ import cupy as cp
1050
+ pts_gpu = cp.asarray(pts_np, dtype=cp.float64)
1051
+ PI = cp.pi
1052
+ TWO_PI = 2 * cp.pi
1053
+
1054
+ for ci in remaining_indices:
1055
+ tgt = cp.array(candidate_positions[ci], dtype=cp.float64)
1056
+ vecs = pts_gpu - tgt
1057
+ x, y, z = vecs[:, 0], vecs[:, 1], vecs[:, 2]
1058
+ r_xy = cp.sqrt(x ** 2 + y ** 2)
1059
+ phi = cp.arctan2(z, r_xy)
1060
+ theta = cp.arctan2(y, x) % TWO_PI
1061
+ vi = cp.clip(((PI / 2 - phi) / PI * H).astype(cp.int32), 0, H - 1)
1062
+ uj = cp.clip((theta / TWO_PI * W).astype(cp.int32), 0, W - 1)
1063
+
1064
+ flat = vi * W + uj
1065
+ cov = cp.zeros(total_px, dtype=cp.bool_)
1066
+ cov[flat] = True
1067
+ cov_2d = cov.reshape(H, W)
1068
+ pad = cov_2d.copy()
1069
+ pad[1:, :] |= cov_2d[:-1, :]
1070
+ pad[:-1, :] |= cov_2d[1:, :]
1071
+ pad[:, 1:] |= cov_2d[:, :-1]
1072
+ pad[:, :-1] |= cov_2d[:, 1:]
1073
+ results[ci] = int(cp.sum(pad))
1074
+
1075
+ return results
1076
+
1077
+
1078
+ def trim_depth(new_depth, new_pos, existing_pts, ray_dirs):
1079
+ H, W = new_depth.shape
1080
+ n_orig = int(np.sum(new_depth > 0))
1081
+ if len(existing_pts) == 0:
1082
+ return new_depth.copy(), n_orig, n_orig
1083
+ cov = project_points_to_coverage(existing_pts, new_pos, H, W)
1084
+ trimmed = new_depth.copy()
1085
+ trimmed[cov] = 0
1086
+ return trimmed, n_orig, int(np.sum(trimmed > 0))
1087
+
1088
+
1089
+ def load_depth_downsampled(path, H=WARP_H, W=WARP_W):
1090
+ d = np.load(path).astype(np.float32)
1091
+ d = np.nan_to_num(d, nan=0.0)
1092
+ if d.shape == (H, W):
1093
+ return d
1094
+ try:
1095
+ import cv2
1096
+ return cv2.resize(d, (W, H), interpolation=cv2.INTER_AREA)
1097
+ except ImportError:
1098
+ h, w = d.shape
1099
+ bh, bw = h // H, w // W
1100
+ if bh < 1 or bw < 1:
1101
+ r = np.zeros((H, W), dtype=np.float32)
1102
+ r[:min(h, H), :min(w, W)] = d[:min(h, H), :min(w, W)]
1103
+ return r
1104
+ return d[:bh * H, :bw * W].reshape(H, bh, W, bw).mean(axis=(1, 3))
1105
+
1106
+
1107
+ def select_next_frame(candidates, selected_idx, selected_pos,
1108
+ all_pts, reachable=None):
1109
+ """选下一帧:纯贪心,选 score 最高的候选
1110
+
1111
+ reachable: set of candidate indices,可达候选集合。
1112
+ None = 不限制。
1113
+ cupy 可用时自动 GPU 加速。
1114
+ """
1115
+ n = len(candidates)
1116
+ H, W = WARP_H, WARP_W
1117
+ total_px = H * W
1118
+ overlap_penalty = DEFAULT_OVERLAP_PENALTY
1119
+
1120
+ remaining = []
1121
+ for i in range(n):
1122
+ if i in selected_idx:
1123
+ continue
1124
+ if reachable is not None and i not in reachable:
1125
+ continue
1126
+ remaining.append(i)
1127
+
1128
+ if not remaining:
1129
+ return -1, 0.0, -999.0, 0
1130
+
1131
+ # ---- GPU 路径 ----
1132
+ _init_gpu()
1133
+ if _GPU_BACKEND and len(all_pts) > 0:
1134
+ covered_map = _batch_coverage_gpu(all_pts, candidates, remaining, H, W)
1135
+ scores = {}
1136
+ for ci in remaining:
1137
+ covered = covered_map.get(ci, 0)
1138
+ new_r = (total_px - covered) / total_px
1139
+ ovl_r = covered / total_px
1140
+ scores[ci] = {
1141
+ "gain": new_r,
1142
+ "overlap": ovl_r,
1143
+ "score": new_r - overlap_penalty * ovl_r,
1144
+ }
1145
+ else:
1146
+ # ---- CPU 路径 ----
1147
+ scores = {}
1148
+ for ci in remaining:
1149
+ cov = project_points_to_coverage(all_pts, candidates[ci], H, W)
1150
+ covered = int(np.sum(cov))
1151
+ new_r = (total_px - covered) / total_px
1152
+ ovl_r = covered / total_px
1153
+ scores[ci] = {
1154
+ "gain": new_r,
1155
+ "overlap": ovl_r,
1156
+ "score": new_r - overlap_penalty * ovl_r,
1157
+ }
1158
+
1159
+ best_ci, best_sc, best_g = -1, -999.0, 0.0
1160
+ for ci in remaining:
1161
+ if scores[ci]["score"] > best_sc:
1162
+ best_sc = scores[ci]["score"]
1163
+ best_ci = ci
1164
+ best_g = scores[ci]["gain"]
1165
+
1166
+ return best_ci, best_g, best_sc, len(remaining)
1167
+
1168
+
1169
+ def compute_max_depth(candidates):
1170
+ pos_arr = np.array(candidates)
1171
+ diag = float(np.linalg.norm(pos_arr.max(0) - pos_arr.min(0)))
1172
+ return diag * 1.5
1173
+
1174
+
1175
+ # =====================================================================
1176
+ # Phase 2: 边渲边选主循环
1177
+ # =====================================================================
1178
+
1179
+ def run_phase2(cam_obj, candidates, mesh_center, output_dir,
1180
+ max_frames, resolution, depth_fo, args):
1181
+
1182
+ ray_dirs = get_ray_dirs(WARP_H, WARP_W)
1183
+ max_depth = compute_max_depth(candidates)
1184
+
1185
+ scene_diag = float(np.linalg.norm(
1186
+ np.array(candidates).max(0) - np.array(candidates).min(0)))
1187
+
1188
+ selected_idx = set()
1189
+ selected_pos = []
1190
+ all_pts = np.empty((0, 3), dtype=np.float64)
1191
+ pts_chunks = []
1192
+ results = []
1193
+
1194
+ # 可达性
1195
+ reachable = set()
1196
+
1197
+ stop_score = args.stop_score
1198
+ stop_delta = args.stop_delta
1199
+ min_frames = args.min_frames
1200
+
1201
+ # actual gain 历史
1202
+ ACTUAL_GAIN_WINDOW = 3
1203
+ ACTUAL_GAIN_FLOOR = args.stop_gain
1204
+ actual_gain_history = []
1205
+ delta_history = []
1206
+ consecutive_skips = 0
1207
+ MAX_CONSECUTIVE_SKIPS = 3
1208
+
1209
+ # ======== 楼层分组(候选按 Z 聚类)========
1210
+ z_vals = sorted(set(round(c[2], 2) for c in candidates))
1211
+ floors = [[z_vals[0]]]
1212
+ for z in z_vals[1:]:
1213
+ if z - floors[-1][-1] > 1.0:
1214
+ floors.append([z])
1215
+ else:
1216
+ floors[-1].append(z)
1217
+
1218
+ # 每个候选标记楼层(找 Z 最近的楼层)
1219
+ n_floors = len(floors)
1220
+ floor_mids = [sum(f) / len(f) for f in floors] # 每层的 Z 中心
1221
+ candidate_floor = []
1222
+ for c in candidates:
1223
+ cz = c[2]
1224
+ fi = min(range(n_floors), key=lambda i: abs(cz - floor_mids[i]))
1225
+ candidate_floor.append(fi)
1226
+
1227
+ current_floor = 0
1228
+
1229
+ # 当前楼层的候选索引集合
1230
+ def floor_set(fi):
1231
+ return set(i for i, f in enumerate(candidate_floor) if f == fi)
1232
+
1233
+ floor_names = [f"楼层{i+1}(Z={min(f):.1f}~{max(f):.1f})" for i, f in enumerate(floors)]
1234
+
1235
+ print(f"\n{'='*60}")
1236
+ print(f"[Phase 2] 边渲边选 (候选={len(candidates)}, 最多={max_frames}帧)")
1237
+ print(f"{'='*60}")
1238
+ print(f" 停止条件:")
1239
+ print(f" - 连续 {ACTUAL_GAIN_WINDOW} 帧 actual_gain < {ACTUAL_GAIN_FLOOR:.0%}")
1240
+ print(f" - predicted gain < {ACTUAL_GAIN_FLOOR:.0%} 且 score < {stop_score}")
1241
+ print(f" - (至少 {min_frames} 帧后才检查)")
1242
+ print(f" {n_floors} 个楼层: {floor_names}")
1243
+ print(f" 高度层: {['%.2f' % z for z in z_vals]}")
1244
+ print(f" 选帧策略: 楼层顺序 + 层内全局最优 (可达优先)")
1245
+
1246
+ t_total = time.time()
1247
+
1248
+ # 时间统计
1249
+ time_select = 0.0
1250
+ time_render = 0.0
1251
+ time_depth = 0.0
1252
+ time_reach = 0.0
1253
+
1254
+ for frame_count in range(max_frames):
1255
+
1256
+ # ======== 选位置 ========
1257
+ t_sel = time.time()
1258
+ if frame_count == 0:
1259
+ # F0: XY 取第一楼层候选的几何中心,Z 取高度层中心
1260
+ floor0_candidates = [(i, c) for i, c in enumerate(candidates)
1261
+ if candidate_floor[i] == 0]
1262
+ if floor0_candidates:
1263
+ f0_pts = np.array([c for _, c in floor0_candidates])
1264
+ xy_center = f0_pts[:, :2].mean(axis=0) # XY 几何中心
1265
+ floor0_zs = sorted(set(c[2] for _, c in floor0_candidates))
1266
+ z_target = min(floor0_zs) + 1.2 # 楼板高度 + 1.7m ≈ 人眼高度
1267
+ target = np.array([xy_center[0], xy_center[1], z_target])
1268
+ dists_to_target = [np.linalg.norm(np.array(c) - target)
1269
+ for _, c in floor0_candidates]
1270
+ best_idx = int(np.argmin(dists_to_target))
1271
+ ci = floor0_candidates[best_idx][0]
1272
+ else:
1273
+ mc = np.array(mesh_center, dtype=np.float64)
1274
+ ci = int(np.argmin([np.linalg.norm(np.array(c) - mc)
1275
+ for c in candidates]))
1276
+ gain, score = 1.0, 1.0
1277
+ print(f"\n F{frame_count}: 选候选[{ci}] "
1278
+ f"(楼层中心, Z={candidates[ci][2]:.2f}m) "
1279
+ f"[{floor_names[current_floor]}]")
1280
+ else:
1281
+ # ---- 当前楼层内全局最优(所有高度自由竞争)----
1282
+ cur_floor_ids = floor_set(current_floor)
1283
+ # 限制 reachable 到当前楼层
1284
+ floor_reachable = reachable & cur_floor_ids if reachable else set()
1285
+
1286
+ ci, gain, score, n_remain = select_next_frame(
1287
+ candidates, selected_idx, selected_pos, all_pts,
1288
+ reachable=floor_reachable if floor_reachable else cur_floor_ids)
1289
+
1290
+ expand = False
1291
+ if ci < 0 or score < stop_score:
1292
+ # 可达的不够好 → 当前楼层全局(含不可达)
1293
+ ci2, gain2, score2, n2 = select_next_frame(
1294
+ candidates, selected_idx, selected_pos, all_pts,
1295
+ reachable=cur_floor_ids)
1296
+ if ci2 >= 0 and (ci < 0 or score2 > score):
1297
+ ci, gain, score, n_remain = ci2, gain2, score2, n2
1298
+ expand = True
1299
+
1300
+ if ci < 0 or (score < stop_score and gain < ACTUAL_GAIN_FLOOR):
1301
+ # 当前楼层拍满 → 换下一楼层
1302
+ if ci >= 0:
1303
+ reason = f"predicted gain={gain:.1%} score={score:.3f}"
1304
+ else:
1305
+ reason = "无可选候选"
1306
+ current_floor += 1
1307
+ if current_floor < n_floors:
1308
+ print(f"\n F{frame_count}: {reason}"
1309
+ f" → {floor_names[current_floor-1]} 拍满,"
1310
+ f" 切换到 {floor_names[current_floor]}")
1311
+ continue
1312
+ else:
1313
+ print(f"\n F{frame_count}: {reason}"
1314
+ f" → 所有楼层拍满,停止")
1315
+ break
1316
+
1317
+ tag = "[扩展]" if expand else ""
1318
+ print(f"\n F{frame_count}: 选候选[{ci}] "
1319
+ f"gain={gain:.1%} score={score:.3f} 剩余={n_remain}"
1320
+ f" [Z={candidates[ci][2]:.2f} {floor_names[current_floor]}"
1321
+ f" 可达={len(floor_reachable)}]{tag}")
1322
+
1323
+ pos = candidates[ci]
1324
+ selected_idx.add(ci)
1325
+ selected_pos.append(pos)
1326
+ dt_sel = time.time() - t_sel
1327
+ time_select += dt_sel
1328
+ if frame_count > 0:
1329
+ print(f" [选帧 {dt_sel:.1f}s]")
1330
+
1331
+ # ======== 渲染 ========
1332
+ cam_rot = get_camera_rot(args.rotation_type, frame_count)
1333
+ print(f" 位置: [{pos[0]:.2f}, {pos[1]:.2f}, {pos[2]:.2f}]")
1334
+ print(f" 渲染...", end="", flush=True)
1335
+ t_r = time.time()
1336
+
1337
+ rgb_path, depth_path, pose_path = render_frame_inprocess(
1338
+ cam_obj, frame_count, pos, cam_rot, output_dir, depth_fo)
1339
+ dt_r = time.time() - t_r
1340
+ time_render += dt_r
1341
+ print(f" {dt_r:.1f}s")
1342
+
1343
+ # ======== depth → 3D 点云 ========
1344
+ t_dep = time.time()
1345
+ actual_gain = 1.0
1346
+ delta_ratio = 1.0
1347
+
1348
+ if depth_path and os.path.exists(depth_path):
1349
+ depth = load_depth_downsampled(depth_path, WARP_H, WARP_W)
1350
+ total_px = WARP_H * WARP_W
1351
+ n_valid = int(np.sum(depth > 0))
1352
+ valid_ratio = n_valid / total_px
1353
+
1354
+ if frame_count == 0:
1355
+ new_pts = depth_to_3d_points(pos, depth, ray_dirs, max_depth)
1356
+ pts_chunks.append(new_pts)
1357
+ all_pts = new_pts
1358
+ actual_gain = valid_ratio
1359
+ print(f" depth: {n_valid}px ({valid_ratio:.0%} 有效)"
1360
+ f" → {len(new_pts)} 个 3D 点 (全部)")
1361
+ else:
1362
+ # ---- 质量检查 ----
1363
+ MIN_VALID_RATIO = 0.30
1364
+ if valid_ratio < MIN_VALID_RATIO:
1365
+ print(f" depth: {n_valid}px ({valid_ratio:.0%} 有效)"
1366
+ f" < {MIN_VALID_RATIO:.0%} → 室外/空壳,跳过此帧")
1367
+ results.append({
1368
+ "frame_id": frame_count,
1369
+ "candidate_idx": ci,
1370
+ "position": pos,
1371
+ "gain": float(gain),
1372
+ "actual_gain": 0.0,
1373
+ "delta_ratio": 0.0,
1374
+ "score": float(score),
1375
+ "skipped": True,
1376
+ "skip_reason": f"valid_ratio={valid_ratio:.1%}",
1377
+ })
1378
+ for fp in [rgb_path, depth_path]:
1379
+ if fp and os.path.exists(fp):
1380
+ try:
1381
+ os.remove(fp)
1382
+ except OSError:
1383
+ pass
1384
+ consecutive_skips += 1
1385
+ if consecutive_skips >= MAX_CONSECUTIVE_SKIPS:
1386
+ # 连续空壳 → 当前楼层可能有问题,换层
1387
+ current_floor += 1
1388
+ consecutive_skips = 0
1389
+ if current_floor < n_floors:
1390
+ print(f" 连续 {MAX_CONSECUTIVE_SKIPS} 帧室外/空壳"
1391
+ f" → 切换到 {floor_names[current_floor]}")
1392
+ else:
1393
+ print(f" 连续 {MAX_CONSECUTIVE_SKIPS} 帧室外/空壳"
1394
+ f" → 所有楼层完成,停止")
1395
+ break
1396
+ time_depth += time.time() - t_dep
1397
+ continue
1398
+
1399
+ trimmed, n_orig, n_new = trim_depth(
1400
+ depth, pos, all_pts, ray_dirs)
1401
+ new_pts = depth_to_3d_points(pos, trimmed, ray_dirs, max_depth)
1402
+ pts_chunks.append(new_pts)
1403
+ all_pts = np.concatenate(pts_chunks)
1404
+ actual_gain = n_new / total_px
1405
+ delta_ratio = (len(new_pts) / len(all_pts)
1406
+ if len(all_pts) > 0 else 1.0)
1407
+ print(f" depth: {n_valid}px ({valid_ratio:.0%} 有效)"
1408
+ f" → trim → {n_new}px 新增"
1409
+ f" → {len(new_pts)} 个新 3D 点 (delta)")
1410
+ print(f" 累积点云: {len(all_pts)}")
1411
+ print(f" 实际gain: {actual_gain:.1%}, "
1412
+ f"点云增量: {delta_ratio:.1%}")
1413
+ consecutive_skips = 0
1414
+ else:
1415
+ print(f" [Error] 无 depth 文件!")
1416
+ break
1417
+
1418
+ results.append({
1419
+ "frame_id": frame_count,
1420
+ "candidate_idx": ci,
1421
+ "position": pos,
1422
+ "gain": float(gain),
1423
+ "actual_gain": float(actual_gain),
1424
+ "delta_ratio": float(delta_ratio),
1425
+ "score": float(score),
1426
+ })
1427
+ time_depth += time.time() - t_dep
1428
+
1429
+ # ======== 更新可达性 ========
1430
+ if IN_BLENDER:
1431
+ t_reach = time.time()
1432
+ scene = bpy.context.scene
1433
+ depsgraph = bpy.context.evaluated_depsgraph_get()
1434
+ n_new_reachable = 0
1435
+ for ci_check in range(len(candidates)):
1436
+ if ci_check in selected_idx or ci_check in reachable:
1437
+ continue
1438
+ origin = Vector(pos)
1439
+ target = Vector(candidates[ci_check])
1440
+ direction = (target - origin).normalized()
1441
+ dist_to_target = (target - origin).length
1442
+
1443
+ if dist_to_target < 0.1:
1444
+ reachable.add(ci_check)
1445
+ n_new_reachable += 1
1446
+ continue
1447
+
1448
+ hit, loc, *_ = scene.ray_cast(depsgraph, origin, direction)
1449
+ if not hit or (loc - origin).length >= dist_to_target * 0.95:
1450
+ reachable.add(ci_check)
1451
+ n_new_reachable += 1
1452
+
1453
+ dt_reach = time.time() - t_reach
1454
+ time_reach += dt_reach
1455
+ print(f" [可达性] 新增 {n_new_reachable} 个可达候选, "
1456
+ f"总可达 {len(reachable)} / {len(candidates)} "
1457
+ f"({dt_reach:.1f}s)")
1458
+
1459
+ # ======== 停止条件 ========
1460
+ if frame_count > 0:
1461
+ actual_gain_history.append(actual_gain)
1462
+ delta_history.append(delta_ratio)
1463
+
1464
+ if frame_count > 0 and frame_count >= min_frames:
1465
+ if len(actual_gain_history) >= ACTUAL_GAIN_WINDOW:
1466
+ recent_gain = actual_gain_history[-ACTUAL_GAIN_WINDOW:]
1467
+ recent_delta = delta_history[-ACTUAL_GAIN_WINDOW:]
1468
+ gain_exhausted = all(g < ACTUAL_GAIN_FLOOR for g in recent_gain)
1469
+ delta_exhausted = all(d < stop_delta for d in recent_delta)
1470
+
1471
+ if gain_exhausted or delta_exhausted:
1472
+ avg_g = sum(recent_gain) / len(recent_gain)
1473
+ avg_d = sum(recent_delta) / len(recent_delta)
1474
+ reason = ""
1475
+ if gain_exhausted:
1476
+ reason += f"actual_gain < {ACTUAL_GAIN_FLOOR:.0%} (平均 {avg_g:.1%})"
1477
+ if delta_exhausted:
1478
+ if reason:
1479
+ reason += " + "
1480
+ reason += f"delta < {stop_delta:.1%} (平均 {avg_d:.1%})"
1481
+ # 当前楼层拍满 → 换层
1482
+ current_floor += 1
1483
+ if current_floor < n_floors:
1484
+ print(f" 连续 {ACTUAL_GAIN_WINDOW} 帧 {reason}"
1485
+ f" → {floor_names[current_floor-1]} 拍满,"
1486
+ f" 切换到 {floor_names[current_floor]}")
1487
+ else:
1488
+ print(f" 连续 {ACTUAL_GAIN_WINDOW} 帧 {reason}"
1489
+ f" → 所有楼层拍满,停止")
1490
+ break
1491
+
1492
+ # ======== 补帧:确保总帧数满足 4n+1 ========
1493
+ while len(results) > 1 and (len(results) - 1) % 4 != 0:
1494
+ need = 4 - (len(results) - 1) % 4
1495
+ frame_count = results[-1]["frame_id"] + 1
1496
+ if frame_count >= max_frames + 3:
1497
+ break
1498
+ print(f"\n [补帧] 当前 {len(results)} 帧,不满足 4n+1,需补 {need} 帧")
1499
+
1500
+ ci, gain, score, n_remain = select_next_frame(
1501
+ candidates, selected_idx, selected_pos, all_pts, reachable=None)
1502
+ if ci < 0:
1503
+ print(f" 无可选候选,无法补帧")
1504
+ break
1505
+
1506
+ pos = candidates[ci]
1507
+ selected_idx.add(ci)
1508
+ selected_pos.append(pos)
1509
+
1510
+ cam_rot = get_camera_rot(args.rotation_type, frame_count)
1511
+ print(f" 补帧 F{frame_count}: 候选[{ci}] Z={pos[2]:.2f}m"
1512
+ f" gain={gain:.1%} score={score:.3f}")
1513
+ print(f" 渲染...", end="", flush=True)
1514
+ t_r = time.time()
1515
+ rgb_path, depth_path, pose_path = render_frame_inprocess(
1516
+ cam_obj, frame_count, pos, cam_rot, output_dir, depth_fo)
1517
+ dt_r = time.time() - t_r
1518
+ time_render += dt_r
1519
+ print(f" {dt_r:.1f}s")
1520
+
1521
+ actual_gain = 0.0
1522
+ delta_ratio = 0.0
1523
+ if depth_path and os.path.exists(depth_path):
1524
+ depth = load_depth_downsampled(depth_path, WARP_H, WARP_W)
1525
+ total_px = WARP_H * WARP_W
1526
+ trimmed, n_orig, n_new = trim_depth(depth, pos, all_pts, ray_dirs)
1527
+ new_pts = depth_to_3d_points(pos, trimmed, ray_dirs, max_depth)
1528
+ pts_chunks.append(new_pts)
1529
+ all_pts = np.concatenate(pts_chunks)
1530
+ actual_gain = n_new / total_px
1531
+ delta_ratio = len(new_pts) / len(all_pts) if len(all_pts) > 0 else 0
1532
+ print(f" depth: {n_new}px 新增, gain={actual_gain:.1%}")
1533
+
1534
+ results.append({
1535
+ "frame_id": frame_count,
1536
+ "candidate_idx": ci,
1537
+ "position": pos,
1538
+ "gain": float(gain),
1539
+ "actual_gain": float(actual_gain),
1540
+ "delta_ratio": float(delta_ratio),
1541
+ "score": float(score),
1542
+ "supplementary": True,
1543
+ })
1544
+
1545
+ if len(results) > 1:
1546
+ is_4n1 = (len(results) - 1) % 4 == 0
1547
+ print(f"\n 帧数检查: {len(results)} 帧"
1548
+ f" {'✓ 满足 4n+1' if is_4n1 else '✗ 不满足 4n+1'}")
1549
+
1550
+ dt = time.time() - t_total
1551
+ time_other = dt - time_select - time_render - time_depth - time_reach
1552
+ print(f"\n {'─'*50}")
1553
+ print(f" 共 {len(results)} 帧, {dt:.1f}s ({dt/60:.1f}min)")
1554
+ print(f" 耗时分布:")
1555
+ print(f" 选帧: {time_select:.1f}s ({time_select/max(dt,1)*100:.0f}%)"
1556
+ f" — 点云投影评估候选")
1557
+ print(f" 渲染: {time_render:.1f}s ({time_render/max(dt,1)*100:.0f}%)"
1558
+ f" — Blender Cycles GPU")
1559
+ print(f" 深度: {time_depth:.1f}s ({time_depth/max(dt,1)*100:.0f}%)"
1560
+ f" — depth→点云+trim")
1561
+ print(f" 可达性: {time_reach:.1f}s ({time_reach/max(dt,1)*100:.0f}%)"
1562
+ f" — raycast 扫描")
1563
+ if time_other > 1:
1564
+ print(f" 其他: {time_other:.1f}s ({time_other/max(dt,1)*100:.0f}%)")
1565
+
1566
+ return results
1567
+
1568
+
1569
+ # =====================================================================
1570
+ # 自动曝光
1571
+ # =====================================================================
1572
+
1573
+ def auto_adjust_exposure(cam_obj, test_pos, output_dir, depth_fo, initial_exposure):
1574
+ """F0 位置低采样快速渲一帧,分析亮度,自动调整 exposure。
1575
+
1576
+ 目标:有效像素平均亮度 ≈ 120/255。
1577
+ 过曝 (>200): 降 EV
1578
+ 欠曝 (<40): 升 EV
1579
+ 正常 (40~200): 不动
1580
+ """
1581
+ TARGET_MEAN = 120.0
1582
+ scene = bpy.context.scene
1583
+ original_samples = scene.cycles.samples
1584
+
1585
+ # 低采样快速测试
1586
+ scene.cycles.samples = 16
1587
+ test_path = os.path.join(output_dir, "_exposure_test.png")
1588
+ scene.render.filepath = test_path
1589
+
1590
+ cam_obj.location = Vector(test_pos)
1591
+ cam_obj.rotation_euler = Euler((math.pi / 2, 0, 0), 'XYZ')
1592
+
1593
+ print(f"\n[自动曝光] 测试渲染 (16 samples, exposure={initial_exposure:.1f})...",
1594
+ end="", flush=True)
1595
+ t0 = time.time()
1596
+ bpy.ops.render.render(write_still=True)
1597
+ print(f" {time.time() - t0:.1f}s")
1598
+
1599
+ # 分析亮度
1600
+ img = bpy.data.images.load(test_path)
1601
+ w, h = img.size[0], img.size[1]
1602
+ pixels = np.array(img.pixels[:]).reshape(h, w, -1)
1603
+ rgb = pixels[:, :, :3]
1604
+ brightness = (0.299 * rgb[:,:,0] + 0.587 * rgb[:,:,1] + 0.114 * rgb[:,:,2]) * 255
1605
+
1606
+ # 只看非纯黑像素(排除天空/无效区域)
1607
+ valid_mask = brightness > 1.0
1608
+ n_valid = int(np.sum(valid_mask))
1609
+ if n_valid > 0:
1610
+ mean_b = float(np.mean(brightness[valid_mask]))
1611
+ # 过曝比例(亮度 > 250 的像素占比)
1612
+ overexposed = float(np.sum(brightness[valid_mask] > 250)) / n_valid
1613
+ # 欠曝比例(亮度 < 10 的像素占比)
1614
+ underexposed = float(np.sum(brightness[valid_mask] < 10)) / n_valid
1615
+ else:
1616
+ mean_b = 0.0
1617
+ overexposed = 0.0
1618
+ underexposed = 1.0
1619
+
1620
+ bpy.data.images.remove(img)
1621
+ try:
1622
+ os.remove(test_path)
1623
+ except OSError:
1624
+ pass
1625
+
1626
+ print(f" 亮度分析: 平均={mean_b:.0f}/255, "
1627
+ f"过曝={overexposed:.0%}, 欠曝={underexposed:.0%}, "
1628
+ f"有效像素={n_valid}/{h*w}")
1629
+
1630
+ # 调整
1631
+ new_exposure = initial_exposure
1632
+ if mean_b < 1.0:
1633
+ new_exposure = initial_exposure + 4.0
1634
+ print(f" [严重欠曝] exposure: {initial_exposure:.1f} → {new_exposure:.1f} (+4.0 EV)")
1635
+ elif mean_b < 40:
1636
+ ev_adj = min(4.0, math.log2(TARGET_MEAN / max(mean_b, 1.0)))
1637
+ new_exposure = initial_exposure + ev_adj + 1.0 # 额外 +1
1638
+ print(f" [欠曝] exposure: {initial_exposure:.1f} → {new_exposure:.1f} (+{ev_adj:.1f} EV)")
1639
+ elif mean_b > 200:
1640
+ ev_adj = max(-4.0, math.log2(TARGET_MEAN / mean_b))
1641
+ new_exposure = initial_exposure + ev_adj
1642
+ print(f" [过曝] exposure: {initial_exposure:.1f} → {new_exposure:.1f} ({ev_adj:.1f} EV)")
1643
+ elif overexposed > 0.3:
1644
+ # 平均还行但大面积过曝
1645
+ new_exposure = initial_exposure - 1.5
1646
+ print(f" [局部过曝 {overexposed:.0%}] exposure: {initial_exposure:.1f} → {new_exposure:.1f} (-1.5 EV)")
1647
+ else:
1648
+ print(f" [正常] 曝光无需调整")
1649
+
1650
+ # 限幅
1651
+ new_exposure = max(-2.0, min(12.0, new_exposure))
1652
+
1653
+ scene.view_settings.exposure = new_exposure
1654
+ scene.cycles.samples = original_samples
1655
+ return new_exposure
1656
+
1657
+
1658
+ # =====================================================================
1659
+ # 有效天花板检测(忽略塔尖/天线等异常高点)
1660
+ # =====================================================================
1661
+
1662
+ def _detect_effective_ceiling(bmin, bmax, floor_z, ceiling_z_raw):
1663
+ """用 raycast 从多个 XY 采样点往上打,统计天花板高度的 75% 分位数。
1664
+
1665
+ 塔尖、天线等只有少量采样点能 hit 到,被分位数过滤掉。
1666
+ """
1667
+ scene = bpy.context.scene
1668
+ depsgraph = bpy.context.evaluated_depsgraph_get()
1669
+ dir_up = Vector((0, 0, 1))
1670
+
1671
+ cx = (bmin[0] + bmax[0]) / 2
1672
+ cy = (bmin[1] + bmax[1]) / 2
1673
+ x_range = bmax[0] - bmin[0]
1674
+ y_range = bmax[1] - bmin[1]
1675
+
1676
+ # 5x5 网格采样
1677
+ ceil_hits = []
1678
+ for ix in range(5):
1679
+ for iy in range(5):
1680
+ x = bmin[0] + x_range * (ix + 0.5) / 5
1681
+ y = bmin[1] + y_range * (iy + 0.5) / 5
1682
+ origin = Vector((x, y, floor_z + 0.5))
1683
+ hit, loc, *_ = scene.ray_cast(depsgraph, origin, dir_up)
1684
+ if hit:
1685
+ ceil_hits.append(loc.z)
1686
+
1687
+ if not ceil_hits:
1688
+ print(f" [天花板] 无 hit,使用 AABB: {ceiling_z_raw:.2f}m")
1689
+ return ceiling_z_raw
1690
+
1691
+ ceil_hits.sort()
1692
+ # 75% 分位数:忽略最高的 25%(塔尖/天线)
1693
+ p75_idx = int(len(ceil_hits) * 0.75)
1694
+ effective_ceil = ceil_hits[min(p75_idx, len(ceil_hits) - 1)]
1695
+
1696
+ # 至少保留 AABB 高度的合理范围(不能比中位数还低太多)
1697
+ median_ceil = ceil_hits[len(ceil_hits) // 2]
1698
+ effective_ceil = max(effective_ceil, median_ceil)
1699
+
1700
+ # 不能比最低的 hit 还低(安全下限)
1701
+ effective_ceil = max(effective_ceil, floor_z + 2.5)
1702
+
1703
+ if effective_ceil < ceiling_z_raw - 1.0:
1704
+ print(f" [天花板] AABB={ceiling_z_raw:.2f}m → 有效={effective_ceil:.2f}m"
1705
+ f" (忽略 {ceiling_z_raw - effective_ceil:.1f}m 塔尖/天线)")
1706
+ else:
1707
+ print(f" [天花板] {effective_ceil:.2f}m")
1708
+
1709
+ return effective_ceil
1710
+
1711
+
1712
+ # =====================================================================
1713
+ # Blender 模式主函数
1714
+ # =====================================================================
1715
+
1716
+ def main_blender():
1717
+ args = parse_args_blender()
1718
+
1719
+ # 统一 scene_path
1720
+ if args.blend:
1721
+ scene_path = os.path.abspath(args.blend)
1722
+ else:
1723
+ scene_path = os.path.abspath(args.glb)
1724
+
1725
+ output_dir = os.path.abspath(args.output_dir)
1726
+ resolution = tuple(int(x) for x in args.resolution.split(","))
1727
+ os.makedirs(output_dir, exist_ok=True)
1728
+ sel_dir = os.path.join(output_dir, "frame_selection")
1729
+ os.makedirs(sel_dir, exist_ok=True)
1730
+
1731
+ scene_ext = Path(scene_path).suffix.lower()
1732
+
1733
+ print("=" * 60)
1734
+ print("ERPT Blend Pipeline v5(单进程边渲边选)")
1735
+ print("=" * 60)
1736
+ print(f" Scene: {scene_path} [{scene_ext}]")
1737
+ print(f" Output: {output_dir}")
1738
+ print(f" Max frames: {args.num_frames}")
1739
+ print(f" Resolution: {resolution[0]}x{resolution[1]}")
1740
+ t_start = time.time()
1741
+
1742
+ # ===== Phase 0: 加载场景 =====
1743
+ bmin, bmax = load_scene(scene_path)
1744
+
1745
+ # ===== 渲染设置(只做一次) =====
1746
+ print(f"\n[Setup] 渲染配置")
1747
+ cam_obj = setup_erp_camera()
1748
+ setup_render_settings(resolution, args.engine, args.samples, args.exposure)
1749
+ setup_lighting()
1750
+ depth_fo = setup_depth_pass()
1751
+
1752
+ # ===== Phase 1: 撒点 + 过滤 =====
1753
+ print(f"\n{'='*60}")
1754
+ print("[Phase 1] 多层撒点 + 4层过滤")
1755
+ print(f"{'='*60}")
1756
+
1757
+ floor_z_raw, ceiling_z_raw = bmin[2], bmax[2]
1758
+
1759
+ # 有效天花板检测:用 raycast 忽略塔尖等异常高点
1760
+ ceiling_z = _detect_effective_ceiling(bmin, bmax, floor_z_raw, ceiling_z_raw)
1761
+ floor_z = floor_z_raw
1762
+
1763
+ heights = compute_camera_heights(floor_z, ceiling_z, args.camera_height,
1764
+ bmin=bmin, bmax=bmax)
1765
+ print(f" 场景 Z 范围: {floor_z:.2f} ~ {ceiling_z:.2f}m (总高 {ceiling_z - floor_z:.2f}m)")
1766
+ print(f" 相机层数: {len(heights)}")
1767
+ for i, z in enumerate(heights):
1768
+ print(f" 第{i+1}层: Z={z:.2f}m (离地 {z - floor_z:.2f}m)")
1769
+
1770
+ x_range = bmax[0] - bmin[0]
1771
+ y_range = bmax[1] - bmin[1]
1772
+ n_layers = len(heights)
1773
+ scene_diag = math.sqrt(x_range ** 2 + y_range ** 2)
1774
+
1775
+ x_sp = max(0.5, x_range / 20)
1776
+ y_sp = max(0.5, y_range / 20)
1777
+ nx = max(1, int((x_range - 2 * MARGIN) / args.grid_spacing))
1778
+ ny = max(1, int((y_range - 2 * MARGIN) / args.grid_spacing))
1779
+ total_user = nx * ny * n_layers
1780
+
1781
+ if total_user <= 10000:
1782
+ x_sp = args.grid_spacing
1783
+ y_sp = args.grid_spacing
1784
+ print(f" 间距: {args.grid_spacing}m (候选≈{total_user}个)")
1785
+ else:
1786
+ nx_auto = max(1, int((x_range - 2 * MARGIN) / x_sp))
1787
+ ny_auto = max(1, int((y_range - 2 * MARGIN) / y_sp))
1788
+ total_auto = nx_auto * ny_auto * n_layers
1789
+ print(f" [自适应] 场景 {x_range:.0f}x{y_range:.0f}m, "
1790
+ f"X间距={x_sp:.1f}m, Y间距={y_sp:.1f}m "
1791
+ f"(候选≈{total_auto})")
1792
+
1793
+ candidates = generate_candidate_grid(bmin, bmax, x_sp, y_sp, heights)
1794
+ if not candidates:
1795
+ print(" [Error] 没有候选点")
1796
+ sys.exit(1)
1797
+
1798
+ room_height = ceiling_z - floor_z
1799
+ candidates = raycast_6layer_filter(candidates, room_height)
1800
+ if not candidates:
1801
+ print(" [Warning] 全部被过滤,使用 mesh 中心")
1802
+ cx = (bmin[0] + bmax[0]) / 2
1803
+ cy = (bmin[1] + bmax[1]) / 2
1804
+ candidates = [[cx, cy, heights[0]]]
1805
+
1806
+ np.save(os.path.join(sel_dir, "candidates_filtered.npy"),
1807
+ np.array(candidates))
1808
+
1809
+ # ===== 自动曝光:用候选中心点快速测试 =====
1810
+ mesh_center = [(bmin[0] + bmax[0]) / 2,
1811
+ (bmin[1] + bmax[1]) / 2,
1812
+ (bmin[2] + bmax[2]) / 2]
1813
+ # 选最靠近中心的候选作为测试点
1814
+ mc = np.array(mesh_center)
1815
+ test_dists = [np.linalg.norm(np.array(c) - mc) for c in candidates]
1816
+ test_pos = candidates[int(np.argmin(test_dists))]
1817
+ final_exposure = auto_adjust_exposure(cam_obj, test_pos, output_dir, depth_fo, args.exposure)
1818
+
1819
+ # ===== Phase 2: 边渲边选 =====
1820
+ results = run_phase2(
1821
+ cam_obj, candidates, mesh_center, output_dir,
1822
+ args.num_frames, resolution, depth_fo, args)
1823
+
1824
+ # ===== 保存选帧摘要 =====
1825
+ summary = {
1826
+ "scene": os.path.basename(scene_path),
1827
+ "scene_format": scene_ext,
1828
+ "total_frames": len(results),
1829
+ "candidates_count": len(candidates),
1830
+ "frames": [{
1831
+ "frame_id": r["frame_id"],
1832
+ "position": r["position"],
1833
+ "gain": r["gain"],
1834
+ "actual_gain": r["actual_gain"],
1835
+ "delta_ratio": r["delta_ratio"],
1836
+ "score": r["score"],
1837
+ } for r in results],
1838
+ }
1839
+ with open(os.path.join(sel_dir, "selected_frames.json"), "w") as f:
1840
+ json.dump(summary, f, indent=2, ensure_ascii=False)
1841
+
1842
+ dt = time.time() - t_start
1843
+ print(f"\n{'='*60}")
1844
+ print(f"完成! {len(results)} 帧, {dt:.1f}s ({dt/60:.1f}min)")
1845
+ print(f"{'='*60}")
1846
+ print(f"输出目录: {output_dir}/")
1847
+ for r in results:
1848
+ fid = r["frame_id"]
1849
+ print(f" panorama_{fid:04d}.png + _depth.npy + pose_{fid:04d}.json")
1850
+
1851
+
1852
+ # =====================================================================
1853
+ # 入口
1854
+ # =====================================================================
1855
+
1856
+ if __name__ == "__main__":
1857
+ if IN_BLENDER:
1858
+ main_blender()
1859
+ else:
1860
+ main_python()
pipelines/run_full_pipeline.py ADDED
@@ -0,0 +1,1036 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 全流程 Pipeline: .blend/.glb/.gltf/.ply → 边渲边选 → ERPT Warp
4
+
5
+ 支持六种模式:
6
+
7
+ 1. 单 Blend 场景:
8
+ python run_full_pipeline.py \
9
+ --blender /path/to/blender \
10
+ --blend /path/to/scene.blend \
11
+ --scene-name my_scene \
12
+ --output-root ./dataset
13
+
14
+ 2. 批量 Blend(扫描 input-dir 下所有 .blend):
15
+ python run_full_pipeline.py \
16
+ --blender /path/to/blender \
17
+ --input-dir /path/to/blend_files/ \
18
+ --output-root ./dataset
19
+
20
+ 3. 单 GLB/GLTF 场景:
21
+ python run_full_pipeline.py \
22
+ --blender /path/to/blender \
23
+ --glb /path/to/scene.glb \
24
+ --scene-name my_scene \
25
+ --output-root ./dataset
26
+
27
+ 4. 批量 GLB(扫描 input-dir 下所有 .glb/.gltf):
28
+ python run_full_pipeline.py \
29
+ --blender /path/to/blender \
30
+ --input-dir /path/to/glb_files/ \
31
+ --output-root ./dataset
32
+
33
+ 5. 单 PLY 场景(无需 Blender):
34
+ python run_full_pipeline.py \
35
+ --ply /path/to/scene.ply \
36
+ --scene-name my_scene \
37
+ --output-root ./dataset
38
+
39
+ 6. 批量 PLY(扫描 input-dir 下所有 .ply):
40
+ python run_full_pipeline.py \
41
+ --input-dir /path/to/ply_files/ \
42
+ --output-root ./dataset
43
+
44
+ 加 --dry-run 预览要跑哪些场景
45
+ 已跑完的场景自动跳过(--no-skip-done 强制重跑)
46
+ """
47
+
48
+ import argparse
49
+ import json
50
+ import os
51
+ import shutil
52
+ import subprocess
53
+ import sys
54
+ import time
55
+ from pathlib import Path
56
+
57
+
58
+ def run_step1_blend_pipeline(
59
+ blender_exe: str,
60
+ scene_path: str,
61
+ temp_dir: str,
62
+ num_frames: int,
63
+ resolution: str,
64
+ samples: int,
65
+ engine: str,
66
+ exposure: float,
67
+ grid_spacing: float,
68
+ camera_height,
69
+ stop_gain: float,
70
+ stop_score: float,
71
+ stop_delta: float,
72
+ min_frames: int,
73
+ rotation_type: str = "random_yaw",
74
+ gain_curve: bool = True,
75
+ scene_flag: str = "--blend",
76
+ ) -> int:
77
+ """步骤 1 (Blend/GLB): 调 run_blend_pipeline.py 边渲边选。
78
+ scene_flag: "--blend" 或 "--glb"
79
+ """
80
+ script = Path(__file__).parent / "run_blend_pipeline.py"
81
+ if not script.exists():
82
+ raise FileNotFoundError(f"找不到 run_blend_pipeline.py: {script}")
83
+
84
+ cmd = [
85
+ sys.executable, str(script),
86
+ "--blender", blender_exe,
87
+ scene_flag, scene_path,
88
+ "--output-dir", temp_dir,
89
+ "--num-frames", str(num_frames),
90
+ "--render-depth",
91
+ "--resolution", resolution,
92
+ "--samples", str(samples),
93
+ "--engine", engine,
94
+ "--exposure", str(exposure),
95
+ "--grid-spacing", str(grid_spacing),
96
+ "--stop-gain", str(stop_gain),
97
+ "--stop-score", str(stop_score),
98
+ "--stop-delta", str(stop_delta),
99
+ "--min-frames", str(min_frames),
100
+ "--rotation-type", rotation_type,
101
+ ]
102
+ if camera_height is not None:
103
+ cmd += ["--camera-height", str(camera_height)]
104
+ if not gain_curve:
105
+ cmd += ["--no-gain-curve"]
106
+
107
+ print(f"\n{'='*60}")
108
+ print("[Step 1] 边渲边选 (Blender Cycles)")
109
+ print(f"{'='*60}")
110
+
111
+ proc = subprocess.run(cmd, text=True)
112
+ if proc.returncode != 0:
113
+ print(f" [Error] run_blend_pipeline 退出码: {proc.returncode}")
114
+ return proc.returncode
115
+
116
+ n = sum(1 for f in Path(temp_dir).glob("panorama_*.png"))
117
+ print(f" 渲染完成: {n} 帧")
118
+ return 0
119
+
120
+
121
+ def run_step1_ply_pipeline(
122
+ ply_path: str,
123
+ temp_dir: str,
124
+ num_frames: int,
125
+ resolution: str,
126
+ grid_spacing: float,
127
+ camera_height,
128
+ stop_gain: float,
129
+ stop_score: float,
130
+ stop_delta: float,
131
+ min_frames: int,
132
+ rotation_type: str = "random_yaw",
133
+ point_size: float = 2.0,
134
+ z_up: bool = True,
135
+ ) -> int:
136
+ """步骤 1 (PLY): 调 run_ply_pipeline.py 边渲边选(无需 Blender)"""
137
+ script = Path(__file__).parent / "run_ply_pipeline.py"
138
+ if not script.exists():
139
+ raise FileNotFoundError(f"找不到 run_ply_pipeline.py: {script}")
140
+
141
+ cmd = [
142
+ sys.executable, str(script),
143
+ "--ply", ply_path,
144
+ "--output-dir", temp_dir,
145
+ "--num-frames", str(num_frames),
146
+ "--resolution", resolution,
147
+ "--grid-spacing", str(grid_spacing),
148
+ "--stop-gain", str(stop_gain),
149
+ "--stop-score", str(stop_score),
150
+ "--stop-delta", str(stop_delta),
151
+ "--min-frames", str(min_frames),
152
+ "--rotation-type", rotation_type,
153
+ "--point-size", str(point_size),
154
+ ]
155
+ if camera_height is not None:
156
+ cmd += ["--camera-height", str(camera_height)]
157
+ if not z_up:
158
+ cmd += ["--no-z-up"]
159
+
160
+ print(f"\n{'='*60}")
161
+ print("[Step 1] 边渲边选 (PLY 点云)")
162
+ print(f"{'='*60}")
163
+
164
+ proc = subprocess.run(cmd, text=True)
165
+ if proc.returncode != 0:
166
+ print(f" [Error] run_ply_pipeline 退出码: {proc.returncode}")
167
+ return proc.returncode
168
+
169
+ n = sum(1 for f in Path(temp_dir).glob("panorama_*.png"))
170
+ print(f" 渲染完成: {n} 帧")
171
+ return 0
172
+
173
+
174
+ def run_step1_hm3d_pipeline(
175
+ blender_exe: str,
176
+ scene_path: str,
177
+ temp_dir: str,
178
+ num_frames: int,
179
+ resolution: str,
180
+ samples: int,
181
+ engine: str,
182
+ exposure: float,
183
+ grid_spacing: float,
184
+ camera_height,
185
+ stop_gain: float,
186
+ stop_score: float,
187
+ stop_delta: float,
188
+ min_frames: int,
189
+ rotation_type: str = "random_yaw",
190
+ gain_curve: bool = True,
191
+ ) -> int:
192
+ """步骤 1 (HM3D GLB): 调 run_hm3d_pipeline.py 边渲边选。"""
193
+ script = Path(__file__).parent / "run_hm3d_pipeline.py"
194
+ if not script.exists():
195
+ raise FileNotFoundError(f"找不到 run_hm3d_pipeline.py: {script}")
196
+
197
+ cmd = [
198
+ sys.executable, str(script),
199
+ "--blender", blender_exe,
200
+ "--glb", scene_path,
201
+ "--output-dir", temp_dir,
202
+ "--num-frames", str(num_frames),
203
+ "--render-depth",
204
+ "--resolution", resolution,
205
+ "--samples", str(samples),
206
+ "--engine", engine,
207
+ "--exposure", str(exposure),
208
+ "--grid-spacing", str(grid_spacing),
209
+ "--stop-gain", str(stop_gain),
210
+ "--stop-score", str(stop_score),
211
+ "--stop-delta", str(stop_delta),
212
+ "--min-frames", str(min_frames),
213
+ "--rotation-type", rotation_type,
214
+ "--hm3d", "True",
215
+ ]
216
+ if camera_height is not None:
217
+ cmd += ["--camera-height", str(camera_height)]
218
+ if not gain_curve:
219
+ cmd += ["--no-gain-curve"]
220
+
221
+ print(f"\n{'='*60}")
222
+ print("[Step 1] 边渲边选 (HM3D GLB)")
223
+ print(f"{'='*60}")
224
+
225
+ proc = subprocess.run(cmd, text=True)
226
+ if proc.returncode != 0:
227
+ print(f" [Error] run_hm3d_pipeline 退出码: {proc.returncode}")
228
+ return proc.returncode
229
+
230
+ n = sum(1 for f in Path(temp_dir).rglob("panorama_*.png"))
231
+ print(f" 渲染完成: {n} 帧")
232
+ return 0
233
+
234
+
235
+ def run_step2_organize_hm3d(temp_dir: str, scene_dir: str) -> int:
236
+ """步骤 2 (HM3D): 整理多空间目录结构
237
+
238
+ temp_dir 里有:
239
+ frame_selection/
240
+ space_00/ (panorama_*.png, *_depth.npy, pose_*.json)
241
+ space_01/
242
+ ...
243
+
244
+ 整理成(每个 space 一个独立目录):
245
+ scene_dir/space_00/input/ → 中心帧 RGB + depth + 所有 pose
246
+ scene_dir/space_00/output/ → 所有帧 RGB + depth(GT 真值)
247
+ scene_dir/space_01/input/
248
+ scene_dir/space_01/output/
249
+ ...
250
+ scene_dir/frame_selection/ → 选帧信息
251
+ """
252
+ temp = Path(temp_dir)
253
+
254
+ print(f"\n{'='*60}")
255
+ print("[Step 2] 整理目录结构 (HM3D 多空间)")
256
+ print(f"{'='*60}")
257
+
258
+ space_dirs = sorted(
259
+ [d for d in temp.iterdir() if d.is_dir() and d.name.startswith("space_")]
260
+ )
261
+ if not space_dirs:
262
+ print(" [Error] 没有找到 space_XX 目录")
263
+ return 1
264
+
265
+ print(f" 共 {len(space_dirs)} 个空间")
266
+
267
+ for space_d in space_dirs:
268
+ space_name = space_d.name
269
+
270
+ rgb_files = sorted(space_d.glob("panorama_*.png"))
271
+ if not rgb_files:
272
+ print(f" {space_name}: 无渲染结果,跳过")
273
+ continue
274
+
275
+ n_frames = len(rgb_files)
276
+ out_space_dir = Path(scene_dir) / space_name
277
+ inp_dir = out_space_dir / "input"
278
+ out_dir = out_space_dir / "output"
279
+ inp_dir.mkdir(parents=True, exist_ok=True)
280
+ out_dir.mkdir(parents=True, exist_ok=True)
281
+
282
+ for rgb_path in rgb_files:
283
+ shutil.copy2(str(rgb_path), str(out_dir / rgb_path.name))
284
+ depth_path = space_d / rgb_path.name.replace(".png", "_depth.npy")
285
+ if depth_path.exists():
286
+ shutil.copy2(str(depth_path), str(out_dir / depth_path.name))
287
+
288
+ center_rgb = space_d / "panorama_0000.png"
289
+ center_depth = space_d / "panorama_0000_depth.npy"
290
+ if center_rgb.exists():
291
+ shutil.copy2(str(center_rgb), str(inp_dir / center_rgb.name))
292
+ if center_depth.exists():
293
+ shutil.copy2(str(center_depth), str(inp_dir / center_depth.name))
294
+
295
+ n_pose = 0
296
+ for pose_path in sorted(space_d.glob("pose_*.json")):
297
+ shutil.copy2(str(pose_path), str(inp_dir / pose_path.name))
298
+ n_pose += 1
299
+
300
+ print(f" {space_name}: {n_frames} 帧 → output/, 中心帧 + {n_pose} pose → input/")
301
+
302
+ sel_dir = Path(scene_dir) / "frame_selection"
303
+ sel_dir.mkdir(parents=True, exist_ok=True)
304
+
305
+ sel_json = temp / "frame_selection" / "selected_frames.json"
306
+ if sel_json.exists():
307
+ shutil.copy2(str(sel_json), str(sel_dir / "selected_frames.json"))
308
+
309
+ cand_npy = temp / "frame_selection" / "candidates_filtered.npy"
310
+ if cand_npy.exists():
311
+ shutil.copy2(str(cand_npy), str(sel_dir / "candidates_filtered.npy"))
312
+
313
+ return 0
314
+
315
+
316
+ def run_step2_organize(temp_dir: str, scene_dir: str) -> int:
317
+ """步骤 2: 整理目录结构
318
+
319
+ temp_dir 里有:
320
+ panorama_0000.png, panorama_0000_depth.npy, pose_0000.json, ...
321
+
322
+ 整理成:
323
+ scene_dir/input/ → 中心帧 RGB + depth + 所��� pose(供 ERPT warp 使用)
324
+ scene_dir/output/ → 所有帧 RGB + depth(GT 真值)
325
+ """
326
+ temp = Path(temp_dir)
327
+ inp_dir = Path(scene_dir) / "input"
328
+ out_dir = Path(scene_dir) / "output"
329
+ inp_dir.mkdir(parents=True, exist_ok=True)
330
+ out_dir.mkdir(parents=True, exist_ok=True)
331
+
332
+ print(f"\n{'='*60}")
333
+ print("[Step 2] 整理目录结构")
334
+ print(f"{'='*60}")
335
+
336
+ # 找所有帧
337
+ rgb_files = sorted(temp.glob("panorama_*.png"))
338
+ if not rgb_files:
339
+ print(" [Error] 没有找到渲染的全景图")
340
+ return 1
341
+
342
+ n_frames = len(rgb_files)
343
+ print(f" 共 {n_frames} 帧")
344
+
345
+ # output/: 复制所有帧的 RGB + depth(GT 真值)
346
+ for rgb_path in rgb_files:
347
+ shutil.copy2(str(rgb_path), str(out_dir / rgb_path.name))
348
+ # depth
349
+ depth_path = temp / rgb_path.name.replace(".png", "_depth.npy")
350
+ if depth_path.exists():
351
+ shutil.copy2(str(depth_path), str(out_dir / depth_path.name))
352
+
353
+ print(f" output/: {n_frames} 帧 RGB + depth")
354
+
355
+ # input/: 中心帧 RGB + depth + 所有 pose
356
+ center_rgb = temp / "panorama_0000.png"
357
+ center_depth = temp / "panorama_0000_depth.npy"
358
+
359
+ if center_rgb.exists():
360
+ shutil.copy2(str(center_rgb), str(inp_dir / center_rgb.name))
361
+ if center_depth.exists():
362
+ shutil.copy2(str(center_depth), str(inp_dir / center_depth.name))
363
+
364
+ # 所有 pose
365
+ n_pose = 0
366
+ for pose_path in sorted(temp.glob("pose_*.json")):
367
+ shutil.copy2(str(pose_path), str(inp_dir / pose_path.name))
368
+ n_pose += 1
369
+
370
+ print(f" input/: 中心帧 + {n_pose} 个 pose")
371
+
372
+ # 复制选帧信息(供参考)
373
+ sel_dir = inp_dir / "frame_selection"
374
+ sel_dir.mkdir(parents=True, exist_ok=True)
375
+
376
+ sel_json = temp / "frame_selection" / "selected_frames.json"
377
+ if sel_json.exists():
378
+ shutil.copy2(str(sel_json), str(sel_dir / "selected_frames.json"))
379
+
380
+ cand_npy = temp / "frame_selection" / "candidates_filtered.npy"
381
+ if cand_npy.exists():
382
+ shutil.copy2(str(cand_npy), str(sel_dir / "candidates_filtered.npy"))
383
+
384
+ # 增益曲线(从 selected_frames.json 读数据,用 PIL 画)
385
+ if sel_json.exists():
386
+ try:
387
+ draw_gain_curve(str(sel_dir / "selected_frames.json"),
388
+ str(sel_dir / "gain_curve.jpg"))
389
+ print(f" 增益曲线: {sel_dir}/gain_curve.jpg")
390
+ except Exception as e:
391
+ print(f" [跳过] 画增益曲线失败: {e}")
392
+
393
+ return 0
394
+
395
+
396
+ def draw_gain_curve(json_path, output_path):
397
+ """画增益曲线(优先 matplotlib,fallback PIL)"""
398
+ with open(json_path) as f:
399
+ data = json.load(f)
400
+
401
+ frames = [fr for fr in data["frames"] if not fr.get("skipped")]
402
+ if len(frames) < 2:
403
+ return
404
+
405
+ fids = [fr["frame_id"] for fr in frames]
406
+ pred_gains = [fr["gain"] for fr in frames]
407
+ actual_gains = [fr["actual_gain"] for fr in frames]
408
+ scores = [fr["score"] for fr in frames]
409
+ deltas = [fr["delta_ratio"] for fr in frames]
410
+
411
+ try:
412
+ import matplotlib
413
+ matplotlib.use('Agg')
414
+ import matplotlib.pyplot as plt
415
+
416
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 6), sharex=True)
417
+
418
+ ax1.plot(fids, pred_gains, 'o-', color='#2196F3', label='predicted', markersize=3, linewidth=1.5)
419
+ ax1.plot(fids, actual_gains, 'o-', color='#FF9800', label='actual', markersize=3, linewidth=1.5)
420
+ ax1.axhline(y=0.05, color='red', linestyle='--', alpha=0.5, label='stop_gain=5%')
421
+ ax1.set_ylabel('gain')
422
+ ax1.set_ylim(-0.05, 1.05)
423
+ ax1.legend(loc='upper right', fontsize=9)
424
+ ax1.set_title(f'Gain Curve ({len(frames)} frames)', fontsize=11)
425
+ ax1.grid(True, alpha=0.3)
426
+ # 标注首末值
427
+ ax1.annotate(f'{actual_gains[0]:.0%}', (fids[0], actual_gains[0]),
428
+ textcoords="offset points", xytext=(5, 5), fontsize=7, color='#FF9800')
429
+ ax1.annotate(f'{actual_gains[-1]:.0%}', (fids[-1], actual_gains[-1]),
430
+ textcoords="offset points", xytext=(-25, 5), fontsize=7, color='#FF9800')
431
+
432
+ ax2.plot(fids, scores, 'D-', color='#4CAF50', label='score', markersize=3, linewidth=1.5)
433
+ ax2.plot(fids, deltas, 's-', color='#9C27B0', label='delta', markersize=2, linewidth=1.2)
434
+ ax2.axhline(y=-0.33, color='red', linestyle='--', alpha=0.5, label='stop_score=-0.33')
435
+ ax2.axhline(y=0.01, color='#9C27B0', linestyle=':', alpha=0.4, label='stop_delta=1%')
436
+ ax2.set_ylabel('value')
437
+ ax2.set_xlabel('frame')
438
+ ax2.legend(loc='upper right', fontsize=9)
439
+ ax2.grid(True, alpha=0.3)
440
+ # 标注首末值
441
+ ax2.annotate(f'{deltas[0]:.1%}', (fids[0], deltas[0]),
442
+ textcoords="offset points", xytext=(5, 5), fontsize=7, color='#9C27B0')
443
+ ax2.annotate(f'{deltas[-1]:.1%}', (fids[-1], deltas[-1]),
444
+ textcoords="offset points", xytext=(-25, -10), fontsize=7, color='#9C27B0')
445
+
446
+ plt.tight_layout()
447
+ plt.savefig(output_path, dpi=150, bbox_inches='tight')
448
+ plt.close()
449
+ return
450
+
451
+ except ImportError:
452
+ pass
453
+
454
+ # ---- fallback: PIL ----
455
+ try:
456
+ from PIL import Image, ImageDraw, ImageFont
457
+
458
+ W, H = 800, 500
459
+ ML, MR, MT, MB = 50, 20, 30, 25
460
+ MID = H // 2
461
+ pw = W - ML - MR
462
+
463
+ img = Image.new("RGB", (W, H), "white")
464
+ draw = ImageDraw.Draw(img)
465
+ try:
466
+ font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 9)
467
+ except Exception:
468
+ font = ImageFont.load_default()
469
+
470
+ n = len(frames)
471
+
472
+ def px(i, v, y0, y1, vmin, vmax):
473
+ x = ML + int(i / max(n-1,1) * pw)
474
+ y = y0 + int((1 - (v-vmin)/(vmax-vmin)) * (y1-y0))
475
+ return x, max(y0, min(y1, y))
476
+
477
+ def line(pts, color, y0, y1, vmin, vmax):
478
+ for j in range(len(pts)-1):
479
+ draw.line([px(j,pts[j],y0,y1,vmin,vmax), px(j+1,pts[j+1],y0,y1,vmin,vmax)], fill=color, width=2)
480
+
481
+ line(pred_gains, "#2196F3", MT, MID-5, 0, 1.05)
482
+ line(actual_gains, "#FF9800", MT, MID-5, 0, 1.05)
483
+ line(scores, "#4CAF50", MID+10, H-MB, -0.6, 1.05)
484
+ line(deltas, "#9C27B0", MID+10, H-MB, -0.6, 1.05)
485
+
486
+ draw.text((ML, MT-12), f"Gain ({n} frames)", fill="black", font=font)
487
+ draw.text((ML, MID+2), "Score / Delta", fill="black", font=font)
488
+ img.save(output_path, quality=90)
489
+
490
+ except ImportError:
491
+ pass
492
+
493
+
494
+ def run_step3_erpt_warp_hm3d(scene_dir: str, device: str = "cuda") -> int:
495
+ """步骤 3 (HM3D): 对每个空间调 run_pipeline.py 执行 ERPT warp
496
+
497
+ 遍历 scene_dir/space_XX/input/,对帧数 >= 2 的空间生成 warp 文件
498
+ """
499
+ script = Path(__file__).parent / "run_pipeline.py"
500
+ if not script.exists():
501
+ raise FileNotFoundError(f"找不到 run_pipeline.py: {script}")
502
+
503
+ print(f"\n{'='*60}")
504
+ print("[Step 3] ERPT Warp (HM3D 多空间)")
505
+ print(f"{'='*60}")
506
+
507
+ scene_path = Path(scene_dir)
508
+ space_dirs = sorted(
509
+ [d for d in scene_path.iterdir()
510
+ if d.is_dir() and d.name.startswith("space_")]
511
+ )
512
+
513
+ if not space_dirs:
514
+ print(" [Error] 没有找到 space_XX 目录")
515
+ return 1
516
+
517
+ total_ret = 0
518
+ n_warped = 0
519
+ n_skipped = 0
520
+
521
+ for space_d in space_dirs:
522
+ inp_dir = space_d / "input"
523
+ if not inp_dir.exists():
524
+ continue
525
+
526
+ n_poses = len(list(inp_dir.glob("pose_*.json")))
527
+ if n_poses < 2:
528
+ print(f" {space_d.name}: {n_poses} pose,跳过 warp")
529
+ n_skipped += 1
530
+ continue
531
+
532
+ print(f"\n [{space_d.name}] ERPT Warp ({n_poses} poses)...")
533
+
534
+ cmd = [
535
+ sys.executable, str(script),
536
+ "--stage", "warp_only",
537
+ "--data_dir", str(inp_dir),
538
+ "--output_dir", str(inp_dir),
539
+ "--device", device,
540
+ "--center_frame", "0",
541
+ ]
542
+
543
+ proc = subprocess.run(cmd, text=True)
544
+ if proc.returncode != 0:
545
+ print(f" [Error] {space_d.name} warp 失败 (退出码: {proc.returncode})")
546
+ total_ret = proc.returncode
547
+ continue
548
+
549
+ warp_rgb_dir = inp_dir / "warp_rgb"
550
+ warp_depth_dir = inp_dir / "warp_depth"
551
+ keep_suffixes = ("_rgb.png", "_mask.png", "_depth_range.npy")
552
+
553
+ n_moved = 0
554
+ for subdir in [warp_rgb_dir, warp_depth_dir]:
555
+ if subdir.exists():
556
+ for f in subdir.iterdir():
557
+ if f.is_file() and any(f.name.endswith(s) for s in keep_suffixes):
558
+ shutil.move(str(f), str(inp_dir / f.name))
559
+ n_moved += 1
560
+ shutil.rmtree(str(subdir), ignore_errors=True)
561
+
562
+ n_warped += 1
563
+ print(f" warp 文件: {n_moved} 个")
564
+
565
+ print(f"\n Warp 完成: {n_warped} 个空间, 跳过 {n_skipped} 个")
566
+ return total_ret
567
+
568
+
569
+ def run_step3_erpt_warp(scene_dir: str, device: str = "cuda") -> int:
570
+ """步骤 3: 调 run_pipeline.py 执行 ERPT warp
571
+
572
+ 读取 scene_dir/input/ 里的中心帧 + pose → 生成 warp 文件
573
+ warp 文件直接写到 input/ 目录
574
+ """
575
+ script = Path(__file__).parent / "run_pipeline.py"
576
+ if not script.exists():
577
+ raise FileNotFoundError(f"找不到 run_pipeline.py: {script}")
578
+
579
+ inp_dir = Path(scene_dir) / "input"
580
+
581
+ print(f"\n{'='*60}")
582
+ print("[Step 3] ERPT Warp")
583
+ print(f"{'='*60}")
584
+
585
+ cmd = [
586
+ sys.executable, str(script),
587
+ "--stage", "warp_only",
588
+ "--data_dir", str(inp_dir),
589
+ "--output_dir", str(inp_dir),
590
+ "--device", device,
591
+ "--center_frame", "0",
592
+ ]
593
+
594
+ proc = subprocess.run(cmd, text=True)
595
+ if proc.returncode != 0:
596
+ print(f" [Error] run_pipeline 退出码: {proc.returncode}")
597
+ return proc.returncode
598
+
599
+ # 把 warp 子目录里需要的文件提到 input/ 根目录
600
+ # 只保留: _rgb.png, _mask.png, _depth_range.npy
601
+ warp_rgb_dir = inp_dir / "warp_rgb"
602
+ warp_depth_dir = inp_dir / "warp_depth"
603
+ keep_suffixes = ("_rgb.png", "_mask.png", "_depth_range.npy")
604
+
605
+ n_moved = 0
606
+ for subdir in [warp_rgb_dir, warp_depth_dir]:
607
+ if subdir.exists():
608
+ for f in subdir.iterdir():
609
+ if f.is_file() and any(f.name.endswith(s) for s in keep_suffixes):
610
+ shutil.move(str(f), str(inp_dir / f.name))
611
+ n_moved += 1
612
+ # 删除整个子目录(包含不需要的 flow/weight_sum/comparison 等)
613
+ shutil.rmtree(str(subdir), ignore_errors=True)
614
+
615
+ print(f" warp 文件已移到 input/: {n_moved} 个")
616
+ return 0
617
+
618
+
619
+ def is_already_done(output_root, scene_name):
620
+ """检查是否已经跑完"""
621
+ sel_path = os.path.join(
622
+ output_root, scene_name, "input", "frame_selection",
623
+ "selected_frames.json")
624
+ if not os.path.exists(sel_path):
625
+ # 也检查 input/ 下直接放的
626
+ sel_path = os.path.join(output_root, scene_name, "input",
627
+ "selected_frames.json")
628
+ if not os.path.exists(sel_path):
629
+ return False
630
+ try:
631
+ with open(sel_path) as f:
632
+ data = json.load(f)
633
+ return data.get("total_frames", 0) > 0
634
+ except Exception:
635
+ return False
636
+
637
+
638
+ def find_glb_files(input_dir):
639
+ """递归查找所有 .glb / .gltf 文件,返回 [(glb_path, scene_name), ...]
640
+
641
+ scene_name 取文件名(不含扩展名),或第一级子目录名(如有子目录)。
642
+ """
643
+ input_dir = os.path.abspath(input_dir)
644
+ glb_files = []
645
+ for root, dirs, files in os.walk(input_dir):
646
+ for f in files:
647
+ if f.lower().endswith(".glb") or f.lower().endswith(".gltf"):
648
+ glb_path = os.path.join(root, f)
649
+ rel = os.path.relpath(root, input_dir)
650
+ if rel == ".":
651
+ scene_name = os.path.splitext(f)[0]
652
+ else:
653
+ scene_name = rel.split(os.sep)[0]
654
+ glb_files.append((glb_path, scene_name))
655
+ glb_files.sort(key=lambda x: x[1])
656
+ return glb_files
657
+
658
+
659
+ def find_blend_files(input_dir):
660
+ """递归查找所有 .blend 文件,返回 [(blend_path, scene_name), ...]
661
+
662
+ scene_name 取 input_dir 下的第一级子目录名(scene_indoor_XXXX),
663
+ 不管 .blend 文件嵌套了几层。
664
+
665
+ 例如:
666
+ input_dir = /path/to/dataset/indoor
667
+ .blend 在 /path/to/dataset/indoor/scene_indoor_0001/1407m1/xxx.blend
668
+ → scene_name = scene_indoor_0001
669
+ """
670
+ input_dir = os.path.abspath(input_dir)
671
+ blend_files = []
672
+ for root, dirs, files in os.walk(input_dir):
673
+ for f in files:
674
+ if f.endswith(".blend"):
675
+ blend_path = os.path.join(root, f)
676
+ rel = os.path.relpath(root, input_dir)
677
+ scene_name = rel.split(os.sep)[0]
678
+ blend_files.append((blend_path, scene_name))
679
+ blend_files.sort(key=lambda x: x[1])
680
+ return blend_files
681
+
682
+
683
+ def find_ply_files(input_dir):
684
+ """递归查找所有 .ply 文件,返回 [(ply_path, scene_name), ...]
685
+
686
+ scene_name 取文件名(不含扩展名),或第一级子目录名(如有子目录)。
687
+ """
688
+ input_dir = os.path.abspath(input_dir)
689
+ ply_files = []
690
+ for root, dirs, files in os.walk(input_dir):
691
+ for f in files:
692
+ if f.lower().endswith(".ply"):
693
+ ply_path = os.path.join(root, f)
694
+ rel = os.path.relpath(root, input_dir)
695
+ if rel == ".":
696
+ scene_name = os.path.splitext(f)[0]
697
+ else:
698
+ scene_name = rel.split(os.sep)[0]
699
+ ply_files.append((ply_path, scene_name))
700
+ ply_files.sort(key=lambda x: x[1])
701
+ return ply_files
702
+
703
+
704
+ def run_single_scene(args, scene_path, scene_name, scene_type="blend"):
705
+ """跑单个场景,返回 0=成功 / 非0=失败
706
+
707
+ scene_type: "blend" | "glb" | "hm3d" | "ply"
708
+ """
709
+ output_root = str(Path(args.output_root).resolve())
710
+ scene_dir = os.path.join(output_root, scene_name)
711
+ temp_dir = os.path.join(scene_dir, "_render_temp")
712
+ os.makedirs(scene_dir, exist_ok=True)
713
+
714
+ type_labels = {
715
+ "blend": ".blend (Blender Cycles)",
716
+ "glb": ".glb/.gltf (Blender Cycles)",
717
+ "hm3d": ".glb/.gltf (HM3D 渲染)",
718
+ "ply": ".ply (点云渲染)",
719
+ }
720
+ type_label = type_labels.get(scene_type, scene_type)
721
+ print("=" * 60)
722
+ print(f"全流程 Pipeline: {type_label} → 边渲边选 → ERPT Warp")
723
+ print("=" * 60)
724
+ print(f" Scene: {scene_name}")
725
+ print(f" Input: {scene_path}")
726
+ print(f" Output: {scene_dir}/")
727
+ t_start = time.time()
728
+
729
+ # Step 1
730
+ if scene_type == "hm3d":
731
+ ret = run_step1_hm3d_pipeline(
732
+ blender_exe=args.blender,
733
+ scene_path=scene_path,
734
+ temp_dir=temp_dir,
735
+ num_frames=args.num_frames,
736
+ resolution=args.resolution,
737
+ samples=args.samples,
738
+ engine=args.engine,
739
+ exposure=args.exposure,
740
+ grid_spacing=args.grid_spacing,
741
+ camera_height=args.camera_height,
742
+ stop_gain=args.stop_gain,
743
+ stop_score=args.stop_score,
744
+ stop_delta=args.stop_delta,
745
+ min_frames=args.min_frames,
746
+ rotation_type=args.rotation_type,
747
+ gain_curve=getattr(args, "gain_curve", True),
748
+ )
749
+ elif scene_type in ("blend", "glb"):
750
+ scene_flag = "--blend" if scene_type == "blend" else "--glb"
751
+ ret = run_step1_blend_pipeline(
752
+ blender_exe=args.blender,
753
+ scene_path=scene_path,
754
+ temp_dir=temp_dir,
755
+ num_frames=args.num_frames,
756
+ resolution=args.resolution,
757
+ samples=args.samples,
758
+ engine=args.engine,
759
+ exposure=args.exposure,
760
+ grid_spacing=args.grid_spacing,
761
+ camera_height=args.camera_height,
762
+ stop_gain=args.stop_gain,
763
+ stop_score=args.stop_score,
764
+ stop_delta=args.stop_delta,
765
+ min_frames=args.min_frames,
766
+ rotation_type=args.rotation_type,
767
+ gain_curve=getattr(args, "gain_curve", True),
768
+ scene_flag=scene_flag,
769
+ )
770
+ else:
771
+ ret = run_step1_ply_pipeline(
772
+ ply_path=scene_path,
773
+ temp_dir=temp_dir,
774
+ num_frames=args.num_frames,
775
+ resolution=args.resolution,
776
+ grid_spacing=args.grid_spacing,
777
+ camera_height=args.camera_height,
778
+ stop_gain=args.stop_gain,
779
+ stop_score=args.stop_score,
780
+ stop_delta=args.stop_delta,
781
+ min_frames=args.min_frames,
782
+ rotation_type=args.rotation_type,
783
+ point_size=getattr(args, "point_size", 2.0),
784
+ z_up=getattr(args, "z_up", True),
785
+ )
786
+
787
+ if ret != 0:
788
+ print(f"[Error] Step 1 失败")
789
+ return ret
790
+
791
+ # Step 2
792
+ if scene_type == "hm3d":
793
+ ret = run_step2_organize_hm3d(temp_dir, scene_dir)
794
+ else:
795
+ ret = run_step2_organize(temp_dir, scene_dir)
796
+ if ret != 0:
797
+ print(f"[Error] Step 2 失败")
798
+ return ret
799
+
800
+ # Step 3
801
+ if not args.skip_warp:
802
+ if scene_type == "hm3d":
803
+ ret = run_step3_erpt_warp_hm3d(scene_dir, device=args.device)
804
+ else:
805
+ ret = run_step3_erpt_warp(scene_dir, device=args.device)
806
+ if ret != 0:
807
+ print(f"[Error] Step 3 失败")
808
+
809
+ # 清理
810
+ if os.path.exists(temp_dir):
811
+ shutil.rmtree(temp_dir, ignore_errors=True)
812
+
813
+ dt = time.time() - t_start
814
+ print(f"\n{'='*60}")
815
+ print(f"完成! {scene_name}, {dt:.1f}s ({dt/60:.1f}min)")
816
+ print(f"{'='*60}")
817
+ return 0
818
+
819
+
820
+ def run_single(args):
821
+ """单场景模式(blend、glb、hm3d 或 ply)"""
822
+ if args.ply:
823
+ ply_path = str(Path(args.ply).resolve())
824
+ scene_name = args.scene_name or Path(args.ply).stem
825
+ ret = run_single_scene(args, ply_path, scene_name, scene_type="ply")
826
+ elif args.hm3d:
827
+ glb_path = str(Path(args.hm3d).resolve())
828
+ scene_name = args.scene_name or Path(args.hm3d).stem
829
+ ret = run_single_scene(args, glb_path, scene_name, scene_type="hm3d")
830
+ elif args.glb:
831
+ glb_path = str(Path(args.glb).resolve())
832
+ scene_name = args.scene_name or Path(args.glb).stem
833
+ ret = run_single_scene(args, glb_path, scene_name, scene_type="glb")
834
+ else:
835
+ blend_path = str(Path(args.blend).resolve())
836
+ scene_name = args.scene_name or Path(args.blend).stem
837
+ ret = run_single_scene(args, blend_path, scene_name, scene_type="blend")
838
+ if ret != 0:
839
+ sys.exit(1)
840
+
841
+
842
+ def run_batch(args):
843
+ """批量模式(自动检测 .blend / .glb / .gltf / .ply)"""
844
+ input_dir_abs = os.path.abspath(args.input_dir)
845
+ if not os.path.isdir(input_dir_abs):
846
+ print(f"[Error] --input-dir 目录不存在: {input_dir_abs}")
847
+ print(f" (原始参数: {args.input_dir})")
848
+ sys.exit(1)
849
+
850
+ # 没有 --blender → 只能跑 PLY
851
+ if not getattr(args, "blender", None):
852
+ scene_files = find_ply_files(args.input_dir)
853
+ scene_type = "ply"
854
+ ext_label = ".ply"
855
+ else:
856
+ # 有 blender:优先 .blend,其次 .glb/.gltf (HM3D),最后 .ply
857
+ scene_files = find_blend_files(args.input_dir)
858
+ scene_type = "blend"
859
+ ext_label = ".blend"
860
+ if not scene_files:
861
+ scene_files = find_glb_files(args.input_dir)
862
+ scene_type = "hm3d" # 默认使用 HM3D 渲染管线
863
+ ext_label = ".glb/.gltf (HM3D)"
864
+ if not scene_files:
865
+ scene_files = find_ply_files(args.input_dir)
866
+ scene_type = "ply"
867
+ ext_label = ".ply"
868
+
869
+ if not scene_files:
870
+ print(f"[Error] 在 {args.input_dir} 下没找到 {ext_label} 文件")
871
+ sys.exit(1)
872
+
873
+ output_root = str(Path(args.output_root).resolve())
874
+
875
+ print(f"{'='*60}")
876
+ print(f"批量处理模式 ({ext_label})")
877
+ print(f"{'='*60}")
878
+ print(f" 输入目录: {args.input_dir}")
879
+ print(f" 输出目录: {output_root}")
880
+ print(f" 找到 {len(scene_files)} 个 {ext_label} 文件")
881
+ # input(f" 按 Enter 键继续,或 Ctrl+C 取消...")
882
+
883
+ to_run = []
884
+ skipped = []
885
+ for scene_path, scene_name in scene_files:
886
+ if args.skip_done and is_already_done(output_root, scene_name):
887
+ skipped.append((scene_path, scene_name))
888
+ else:
889
+ to_run.append((scene_path, scene_name))
890
+
891
+ if skipped:
892
+ print(f" 跳过 {len(skipped)} 个已完成:")
893
+ for _, sn in skipped:
894
+ print(f" ✓ {sn}")
895
+
896
+ print(f" 待处理 {len(to_run)} 个:")
897
+ for bp, sn in to_run:
898
+ print(f" → {sn} ({os.path.basename(bp)})")
899
+
900
+ if args.dry_run:
901
+ print(f"\n[Dry run] 不实际运行")
902
+ return
903
+
904
+ if not to_run:
905
+ print(f"\n全部已完成!")
906
+ return
907
+
908
+ t_all = time.time()
909
+ success = []
910
+ failed = []
911
+
912
+ for idx, (scene_path, scene_name) in enumerate(to_run):
913
+ print(f"\n{'='*60}")
914
+ print(f"[{idx+1}/{len(to_run)}] {scene_name}")
915
+ print(f"{'='*60}")
916
+
917
+ t_scene = time.time()
918
+ try:
919
+ ret = run_single_scene(args, scene_path, scene_name, scene_type)
920
+ dt = time.time() - t_scene
921
+ if ret == 0:
922
+ success.append((scene_name, dt))
923
+ print(f"\n ✓ {scene_name} ({dt:.0f}s)")
924
+ else:
925
+ failed.append((scene_name, f"exit code {ret}"))
926
+ print(f"\n ✗ {scene_name} 失败 ({dt:.0f}s)")
927
+ except Exception as e:
928
+ dt = time.time() - t_scene
929
+ failed.append((scene_name, str(e)))
930
+ print(f"\n ✗ {scene_name} 异常: {e} ({dt:.0f}s)")
931
+
932
+ dt_all = time.time() - t_all
933
+ print(f"\n{'='*60}")
934
+ print(f"批量处理完成")
935
+ print(f"{'='*60}")
936
+ print(f" 总耗时: {dt_all:.0f}s ({dt_all/60:.1f}min = {dt_all/3600:.1f}h)")
937
+ print(f" 成功: {len(success)} 个")
938
+ for sn, dt in success:
939
+ print(f" ✓ {sn} ({dt:.0f}s)")
940
+ if failed:
941
+ print(f" 失败: {len(failed)} 个")
942
+ for sn, reason in failed:
943
+ print(f" ✗ {sn}: {reason}")
944
+ if skipped:
945
+ print(f" 跳过: {len(skipped)} 个 (已完成)")
946
+
947
+
948
+ def main():
949
+ parser = argparse.ArgumentParser(
950
+ description="全流程: .blend/.glb/.ply → 边渲边选 → ERPT Warp"
951
+ )
952
+
953
+ # 输入(四种模式互斥)
954
+ input_group = parser.add_mutually_exclusive_group()
955
+ input_group.add_argument("--blend", type=str, default=None,
956
+ help=".blend 场景文件路径(单 Blend 场景模式)")
957
+ input_group.add_argument("--glb", type=str, default=None,
958
+ help=".glb / .gltf 场景文件路径(单 GLB 场景模式)")
959
+ input_group.add_argument("--hm3d", type=str, default=None,
960
+ help=".glb / .gltf 场景文件路径(单 HM3D 场景模式)")
961
+ input_group.add_argument("--ply", type=str, default=None,
962
+ help=".ply 场景文件路径(单 PLY 场景模式)")
963
+
964
+ parser.add_argument("--input-dir", type=str, default=None,
965
+ help="包含场景文件的根目录(批量模式,自动检测 .blend/.glb/.ply)")
966
+ parser.add_argument("--scene-name", type=str, default=None,
967
+ help="场景名(默认从文件名提取)")
968
+ parser.add_argument("--output-root", type=str, default="./dataset",
969
+ help="输出根目录(默认 ./dataset)")
970
+
971
+ # Blender 参数(仅 Blend/GLB/HM3D 模式需要)
972
+ parser.add_argument("--blender", type=str, default=None,
973
+ help="Blender 可执行文件路径(Blend/GLB/HM3D 模式必须)")
974
+ parser.add_argument("--samples", type=int, default=128)
975
+ parser.add_argument("--engine", type=str, default="CYCLES")
976
+ parser.add_argument("--exposure", type=float, default=0.0)
977
+ parser.add_argument("--gain-curve", action="store_true", default=True,
978
+ help="画增益曲线 (默认开启)")
979
+ parser.add_argument("--no-gain-curve", dest="gain_curve", action="store_false")
980
+
981
+ # PLY 参数(仅 PLY 模式)
982
+ parser.add_argument("--point-size", type=float, default=2.0,
983
+ help="点云渲染点径(像素),PLY 模式有效(默认 2.0)")
984
+ parser.add_argument("--z-up", action="store_true", default=True,
985
+ help="PLY 坐标系为 Z-up(默认 True)")
986
+ parser.add_argument("--no-z-up", dest="z_up", action="store_false",
987
+ help="PLY 坐标系为 Y-up(已是 ERPT_native,不转换)")
988
+
989
+ # 通用渲染参数
990
+ parser.add_argument("--num-frames", type=int, default=30)
991
+ parser.add_argument("--resolution", type=str, default="2048,1024")
992
+
993
+ # 选帧参数
994
+ parser.add_argument("--grid-spacing", type=float, default=0.5)
995
+ parser.add_argument("--camera-height", type=float, default=None)
996
+ parser.add_argument("--stop-gain", type=float, default=0.08)
997
+ parser.add_argument("--stop-score", type=float, default=-0.3)
998
+ parser.add_argument("--stop-delta", type=float, default=0.08)
999
+ parser.add_argument("--min-frames", type=int, default=5)
1000
+ parser.add_argument("--rotation-type", type=str, default="random_yaw",
1001
+ choices=["none", "rotate_x_90", "rotate_x_180",
1002
+ "rotate_z_90", "random_yaw"])
1003
+
1004
+ # ERPT 参数
1005
+ parser.add_argument("--device", type=str, default="cuda")
1006
+ parser.add_argument("--skip-warp", action="store_true",
1007
+ help="只做步骤 1+2,跳过 ERPT warp")
1008
+
1009
+ # 批量模式参数
1010
+ parser.add_argument("--skip-done", action="store_true", default=True,
1011
+ help="跳过已跑完的场景(默认开启)")
1012
+ parser.add_argument("--no-skip-done", action="store_true",
1013
+ help="强制重跑所有场景")
1014
+ parser.add_argument("--dry-run", action="store_true",
1015
+ help="只列出要跑的场景,不实际运行")
1016
+
1017
+ args = parser.parse_args()
1018
+
1019
+ if args.no_skip_done:
1020
+ args.skip_done = False
1021
+
1022
+ # 校验 Blend/GLB/HM3D 模式必须提供 --blender
1023
+ if (args.blend or args.glb or args.hm3d) and not args.blender:
1024
+ parser.error("--blend / --glb / --hm3d 模式必须同时提供 --blender 可执行文件路径")
1025
+
1026
+ # 模式判定
1027
+ if args.input_dir:
1028
+ run_batch(args)
1029
+ elif args.blend or args.glb or args.hm3d or args.ply:
1030
+ run_single(args)
1031
+ else:
1032
+ parser.error("必须指定 --blend / --glb / --hm3d / --ply(单场景)或 --input-dir(批量)")
1033
+
1034
+
1035
+ if __name__ == "__main__":
1036
+ main()
pipelines/run_hm3d_pipeline.py ADDED
The diff for this file is too large to render. See raw diff
 
pipelines/run_pipeline.py ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ERPT Pipeline 主入口(Forward Warp,深度估计可选)
4
+
5
+ 使用方法:
6
+ # 默认:使用已有深度真值做 warp(不加载深度估计权重)
7
+ python run_pipeline.py --stage warp_only --data_dir /path/to/scene
8
+
9
+ # 强制完整流程(深度估计 + warp)
10
+ python run_pipeline.py --stage all
11
+
12
+ # 仅深度估计
13
+ python run_pipeline.py --stage depth_only
14
+ """
15
+
16
+ import argparse
17
+ import re
18
+ import time
19
+ from pathlib import Path
20
+ from typing import Dict, Any, List, Optional
21
+
22
+ import yaml
23
+ import numpy as np
24
+ import torch
25
+ import cv2
26
+
27
+ # 添加模块路径
28
+ import sys
29
+ sys.path.insert(0, str(Path(__file__).parent))
30
+
31
+ # Warp 相关(始终加载)
32
+ from core.erp_warp import warp_erp_to_target, WarpResult, create_comparison_image
33
+ from utils.io_utils import load_image, save_image, load_json, save_json, save_depth
34
+ from utils.pose_utils import Pose, load_pose
35
+
36
+ # 深度估计相关(延迟加载,仅 depth_only / all 模式才 import)
37
+ _depth_modules_loaded = False
38
+
39
+
40
+ def _load_depth_modules():
41
+ """延迟加载深度估计模块(避免 warp_only 模式加载大模型权重)"""
42
+ global _depth_modules_loaded
43
+ if _depth_modules_loaded:
44
+ return
45
+ global build_icosahedron_slices, extract_all_tangents, compute_coverage_mask
46
+ global estimate_all_tangent_depths
47
+ global fuse_tangent_depths_to_erp, save_depth_visualization, visualize_depth
48
+
49
+ from core.tangent_extraction import (
50
+ build_icosahedron_slices,
51
+ extract_all_tangents,
52
+ compute_coverage_mask,
53
+ )
54
+ from core.depth_estimation import estimate_all_tangent_depths
55
+ from core.depth_fusion import (
56
+ fuse_tangent_depths_to_erp,
57
+ save_depth_visualization,
58
+ visualize_depth,
59
+ )
60
+ _depth_modules_loaded = True
61
+ print("[Depth] 深度估计模块已加载")
62
+
63
+
64
+ # =============================================================================
65
+ # 数据发现
66
+ # =============================================================================
67
+
68
+ def discover_image_files(directory: Path) -> dict:
69
+ """自动发现目录中的全景图文件"""
70
+ image_extensions = ['.png', '.jpg', '.jpeg', '.PNG', '.JPG', '.JPEG']
71
+ image_files = []
72
+ for ext in image_extensions:
73
+ image_files.extend(directory.glob(f"*{ext}"))
74
+ image_files = sorted(image_files)
75
+ if not image_files:
76
+ return {}
77
+
78
+ result = {}
79
+ for img_path in image_files:
80
+ stem = img_path.stem
81
+ match = re.search(r'[_-](\d+)$', stem)
82
+ if match:
83
+ result[int(match.group(1))] = img_path
84
+ continue
85
+ if stem.isdigit():
86
+ result[int(stem)] = img_path
87
+
88
+ if not result:
89
+ for idx, img_path in enumerate(image_files):
90
+ result[idx] = img_path
91
+
92
+ return result
93
+
94
+
95
+ def discover_pose_files(directory: Path) -> dict:
96
+ """自动发现目录中的位姿文件"""
97
+ pose_files = sorted(directory.glob("*.json"))
98
+ result = {}
99
+ for pose_path in pose_files:
100
+ stem = pose_path.stem
101
+ if stem in ['meta', 'config', 'stats', 'cameras', 'render_meta', 'description']:
102
+ continue
103
+ match = re.search(r'[_-](\d+)$', stem)
104
+ if match:
105
+ result[int(match.group(1))] = pose_path
106
+ continue
107
+ if stem.isdigit():
108
+ result[int(stem)] = pose_path
109
+
110
+ return result
111
+
112
+
113
+ # =============================================================================
114
+ # 配置加载
115
+ # =============================================================================
116
+
117
+ def load_config(config_path: Path) -> Dict[str, Any]:
118
+ with open(config_path, "r", encoding="utf-8") as f:
119
+ return yaml.safe_load(f)
120
+
121
+
122
+ def resolve_paths(cfg: Dict[str, Any], config_dir: Path) -> Dict[str, Any]:
123
+ """解析相对路径为绝对路径"""
124
+ data_cfg = cfg.get("data", {})
125
+ for key in ["data_dir", "output_dir", "depth_dir"]:
126
+ if key in data_cfg and data_cfg[key]:
127
+ path = Path(data_cfg[key])
128
+ if not path.is_absolute():
129
+ data_cfg[key] = str(config_dir / path)
130
+
131
+ depth_pro_cfg = cfg.get("depth_pro", {})
132
+ if "repo_dir" in depth_pro_cfg and depth_pro_cfg["repo_dir"]:
133
+ rp = Path(depth_pro_cfg["repo_dir"])
134
+ if not rp.is_absolute():
135
+ depth_pro_cfg["repo_dir"] = str(config_dir / rp)
136
+
137
+ cfg["_project_root"] = str(config_dir)
138
+ return cfg
139
+
140
+
141
+ # =============================================================================
142
+ # 深度估计流程(仅 all / depth_only 模式调用)
143
+ # =============================================================================
144
+
145
+ def run_depth_pipeline(
146
+ center_rgb: np.ndarray,
147
+ cfg: Dict[str, Any],
148
+ device: torch.device,
149
+ output_dir: Path,
150
+ erp_h: int,
151
+ erp_w: int,
152
+ frame_id: int = 0,
153
+ ) -> np.ndarray:
154
+ """运行深度估计全流程:切片 -> 推理 -> 融合"""
155
+ _load_depth_modules()
156
+
157
+ depth_out_dir = output_dir / "depth_erp"
158
+ depth_out_dir.mkdir(parents=True, exist_ok=True)
159
+
160
+ # --- Step 1: 构建切片规格 ---
161
+ print(f"\n{'='*60}")
162
+ print(f"[Step 1] Building tangent slices (frame {frame_id})")
163
+ print(f"{'='*60}")
164
+
165
+ if "erp" not in cfg:
166
+ cfg["erp"] = {}
167
+ cfg["erp"]["height"] = erp_h
168
+ cfg["erp"]["width"] = erp_w
169
+
170
+ slices = build_icosahedron_slices(cfg)
171
+ print(f" Total slices: {len(slices)}")
172
+ for s in slices:
173
+ if s.slice_type != "face":
174
+ print(f" {s.slice_id}: type={s.slice_type}, fov={s.fov_deg:.1f}°")
175
+
176
+ coverage_mask, coverage_stats = compute_coverage_mask(slices, erp_h, erp_w, device)
177
+ print(f" Coverage: {coverage_stats['total_coverage']:.2f}%")
178
+
179
+ dbg_dir = output_dir / "debug"
180
+ dbg_dir.mkdir(parents=True, exist_ok=True)
181
+ save_image(np.stack([coverage_mask] * 3, axis=-1), dbg_dir / "coverage_mask.png")
182
+
183
+ # --- Step 2: 提取切片 ---
184
+ print(f"\n{'='*60}")
185
+ print(f"[Step 2] Extracting tangent slices (frame {frame_id})")
186
+ print(f"{'='*60}")
187
+
188
+ t0 = time.time()
189
+ tangent_rgbs = extract_all_tangents(center_rgb, slices, device)
190
+ print(f" Extracted {len(tangent_rgbs)} slices in {time.time()-t0:.2f}s")
191
+
192
+ if cfg.get("run", {}).get("save_intermediates", False):
193
+ tangent_dir = output_dir / "tangents"
194
+ tangent_dir.mkdir(parents=True, exist_ok=True)
195
+ for slice_id, rgb in tangent_rgbs.items():
196
+ save_image(rgb, tangent_dir / f"{slice_id}_rgb.png")
197
+
198
+ # --- Step 3: Depth Pro 推理 ---
199
+ print(f"\n{'='*60}")
200
+ print(f"[Step 3] Running Depth Pro inference (frame {frame_id})")
201
+ print(f"{'='*60}")
202
+
203
+ dp_cfg = cfg.get("depth_pro", {})
204
+ if not bool(dp_cfg.get("enabled", True)):
205
+ print(" [Warning] Depth Pro disabled, using dummy depth")
206
+ tangent_depths = {}
207
+ for sid, rgb in tangent_rgbs.items():
208
+ tangent_depths[sid] = np.full(rgb.shape[:2], 5.0, dtype=np.float32)
209
+ else:
210
+ t0 = time.time()
211
+ tangent_depths = estimate_all_tangent_depths(
212
+ tangent_rgbs, slices, cfg, device,
213
+ )
214
+ print(f" Estimated {len(tangent_depths)} depths in {time.time()-t0:.2f}s")
215
+
216
+ if cfg.get("run", {}).get("save_intermediates", False):
217
+ tangent_dir = output_dir / "tangents"
218
+ for sid, depth in tangent_depths.items():
219
+ save_depth(depth, tangent_dir / f"{sid}_depth.npy")
220
+
221
+ # --- Step 4: 融合到 ERP ---
222
+ print(f"\n{'='*60}")
223
+ print(f"[Step 4] Fusing tangent depths to ERP (frame {frame_id})")
224
+ print(f"{'='*60}")
225
+
226
+ t0 = time.time()
227
+ depth_erp, weight_sum, valid_mask = fuse_tangent_depths_to_erp(
228
+ tangent_depths, slices, cfg, device,
229
+ debug_dir=dbg_dir if cfg.get("run", {}).get("save_intermediates", False) else None,
230
+ )
231
+ print(f" Fused in {time.time()-t0:.2f}s")
232
+
233
+ valid_ratio = np.sum(valid_mask > 0) / (erp_h * erp_w)
234
+ valid_depths = depth_erp[np.isfinite(depth_erp) & (depth_erp > 0)]
235
+ if len(valid_depths) > 0:
236
+ print(f" Valid depth ratio: {valid_ratio * 100:.2f}%")
237
+ print(f" Depth range: [{valid_depths.min():.2f}, {valid_depths.max():.2f}] m")
238
+
239
+ # --- Step 5: 保存结果 ---
240
+ save_depth(depth_erp, depth_out_dir / f"depth_{frame_id:04d}.npy")
241
+ save_depth_visualization(depth_erp, depth_out_dir / f"depth_{frame_id:04d}_vis.png")
242
+ cv2.imwrite(str(depth_out_dir / f"depth_{frame_id:04d}_valid_mask.png"), valid_mask * 255)
243
+
244
+ return depth_erp
245
+
246
+
247
+ # =============================================================================
248
+ # Warp 流程
249
+ # =============================================================================
250
+
251
+ def run_warp_pipeline(
252
+ center_rgb: np.ndarray,
253
+ depth_erp: np.ndarray,
254
+ center_frame: int,
255
+ image_files: dict,
256
+ pose_files: dict,
257
+ cfg: Dict[str, Any],
258
+ device: torch.device,
259
+ output_dir: Path,
260
+ erp_h: int,
261
+ erp_w: int,
262
+ ) -> None:
263
+ """运行 warp 全流程:遍历目标帧,执行 forward splatting"""
264
+ warp_cfg = cfg.get("warp", {})
265
+ output_depth = bool(warp_cfg.get("output_depth", True))
266
+
267
+ # 确定目标帧列表
268
+ available_targets = sorted([fid for fid in pose_files.keys() if fid != center_frame])
269
+
270
+ cfg_targets = warp_cfg.get("target_frames", None)
271
+ if cfg_targets is not None and cfg_targets != "auto":
272
+ cfg_set = set(int(t) for t in cfg_targets)
273
+ target_frames = [fid for fid in available_targets if fid in cfg_set]
274
+ else:
275
+ target_frames = available_targets
276
+
277
+ print(f"\n{'='*60}")
278
+ print(f"[Warp] Forward splatting from frame {center_frame}")
279
+ print(f"{'='*60}")
280
+ print(f" Method: {warp_cfg.get('method', 'softmax_splatting')}")
281
+ print(f" Available targets with pose: {available_targets}")
282
+ print(f" Will warp: {target_frames}")
283
+
284
+ # 加载中心帧位姿
285
+ if center_frame not in pose_files:
286
+ print(f" [Error] Center pose not found for frame {center_frame}")
287
+ return
288
+ src_pose = load_pose(pose_files[center_frame])
289
+ print(f" Source pose: position={src_pose.position.tolist()}")
290
+
291
+ # 输出目录
292
+ warp_rgb_dir = output_dir / "warp_rgb"
293
+ warp_rgb_dir.mkdir(parents=True, exist_ok=True)
294
+ if output_depth:
295
+ warp_depth_dir = output_dir / "warp_depth"
296
+ warp_depth_dir.mkdir(parents=True, exist_ok=True)
297
+
298
+ total_warp = len(target_frames)
299
+ for idx, tgt_id in enumerate(target_frames):
300
+ if tgt_id not in pose_files:
301
+ print(f" [{idx+1}/{total_warp}] Frame {tgt_id}: pose not found, skip")
302
+ continue
303
+
304
+ tgt_pose = load_pose(pose_files[tgt_id])
305
+ print(f" [{idx+1}/{total_warp}] Frame {center_frame} -> {tgt_id} ...", end="", flush=True)
306
+
307
+ t0 = time.time()
308
+ result = warp_erp_to_target(
309
+ src_rgb=center_rgb,
310
+ src_depth=depth_erp,
311
+ src_pose=src_pose,
312
+ tgt_pose=tgt_pose,
313
+ cfg=cfg,
314
+ device=device,
315
+ )
316
+ dt = time.time() - t0
317
+
318
+ valid_pct = result.valid_mask.sum() / result.valid_mask.size * 100
319
+ print(f" done ({dt:.2f}s, valid={valid_pct:.1f}%)")
320
+
321
+ prefix = f"pano{center_frame:04d}_to_pano{tgt_id:04d}"
322
+
323
+ # 保存 warped RGB
324
+ save_image(result.warped_rgb, warp_rgb_dir / f"{prefix}_rgb.png")
325
+
326
+ # 保存 valid mask
327
+ cv2.imwrite(str(warp_rgb_dir / f"{prefix}_mask.png"), result.valid_mask * 255)
328
+
329
+ # 保存 warped depth
330
+ if output_depth and result.warped_depth is not None:
331
+ save_depth(result.warped_depth, warp_depth_dir / f"{prefix}_depth_range.npy")
332
+
333
+ print(f" Warp complete. Output saved to: {warp_rgb_dir}")
334
+
335
+
336
+ # =============================================================================
337
+ # 主函数
338
+ # =============================================================================
339
+
340
+ def main():
341
+ _script_dir = Path(__file__).parent
342
+ _default_config = _script_dir / "config.yaml"
343
+
344
+ parser = argparse.ArgumentParser(description="ERPT Pipeline")
345
+ parser.add_argument("--config", type=str,
346
+ default=str(_default_config) if _default_config.exists() else None,
347
+ help="Config file path")
348
+ parser.add_argument("--data_dir", type=str, default=None,
349
+ help="Data directory (overrides config)")
350
+ parser.add_argument("--output_dir", type=str, default=None,
351
+ help="Output directory (overrides config)")
352
+ parser.add_argument("--device", type=str, default="cuda")
353
+ parser.add_argument("--stage", type=str, default="warp_only",
354
+ choices=["all", "depth_only", "warp_only"])
355
+ parser.add_argument("--center_frame", type=int, default=None,
356
+ help="Center frame ID (overrides config)")
357
+ args = parser.parse_args()
358
+
359
+ # 加载配置
360
+ if args.config:
361
+ config_path = Path(args.config)
362
+ cfg = load_config(config_path)
363
+ cfg = resolve_paths(cfg, config_path.parent)
364
+ else:
365
+ cfg = {
366
+ "data": {},
367
+ "erp": {"auto_size": True},
368
+ "tangent": {},
369
+ "depth_pro": {"enabled": True, "precision": "fp16", "pass_f_px": True},
370
+ "fusion": {"blend_mode": "multiband", "output_scale": 1.10, "k": 4},
371
+ "run": {"save_intermediates": False},
372
+ }
373
+
374
+ # 命令行覆盖
375
+ if args.data_dir:
376
+ cfg["data"]["data_dir"] = str(Path(args.data_dir).resolve())
377
+ if args.output_dir:
378
+ cfg["data"]["output_dir"] = args.output_dir
379
+
380
+ data_dir = Path(cfg["data"].get("data_dir", "inputs"))
381
+ output_dir = Path(cfg["data"].get("output_dir", "outputs"))
382
+
383
+ device = torch.device(args.device if torch.cuda.is_available() or args.device == "cpu" else "cpu")
384
+ print(f"Using device: {device}")
385
+
386
+ center_frame = args.center_frame or int(cfg.get("warp", {}).get("center_frame", 0))
387
+
388
+ print(f"\n{'='*60}")
389
+ print("ERPT Pipeline")
390
+ print(f"{'='*60}")
391
+ print(f"Stage: {args.stage}")
392
+ print(f"Data dir: {data_dir}")
393
+ print(f"Output dir: {output_dir}")
394
+
395
+ t_start = time.time()
396
+
397
+ # --- 加载数据 ---
398
+ print(f"\n{'='*60}")
399
+ print("[Loading data]")
400
+ print(f"{'='*60}")
401
+
402
+ image_files = discover_image_files(data_dir)
403
+ pose_files = discover_pose_files(data_dir)
404
+ print(f" Found {len(image_files)} images, {len(pose_files)} poses")
405
+
406
+ if not image_files:
407
+ raise FileNotFoundError(f"No image files found in: {data_dir}")
408
+
409
+ if center_frame not in image_files:
410
+ center_frame = sorted(image_files.keys())[0]
411
+ print(f" Using frame {center_frame} as center")
412
+
413
+ center_rgb = load_image(image_files[center_frame])
414
+ print(f" Center image: {image_files[center_frame].name}")
415
+ print(f" Shape: {center_rgb.shape}")
416
+
417
+ erp_cfg = cfg.get("erp", {})
418
+ if bool(erp_cfg.get("auto_size", True)):
419
+ erp_h, erp_w = center_rgb.shape[:2]
420
+ print(f" Auto size: {erp_w}x{erp_h}")
421
+ else:
422
+ erp_h = int(erp_cfg.get("height", 2048))
423
+ erp_w = int(erp_cfg.get("width", 4096))
424
+
425
+ # --- 深度加载 / 估计 ---
426
+ depth_erp = None
427
+
428
+ if args.stage == "all":
429
+ print(f"\n [Stage: all] 强制执行深度估计")
430
+ depth_erp = run_depth_pipeline(
431
+ center_rgb, cfg, device, output_dir, erp_h, erp_w, center_frame,
432
+ )
433
+
434
+ elif args.stage == "depth_only":
435
+ depth_erp = run_depth_pipeline(
436
+ center_rgb, cfg, device, output_dir, erp_h, erp_w, center_frame,
437
+ )
438
+
439
+ elif args.stage == "warp_only":
440
+ # 搜索已有深度(真值 > 已估计结果),不回退到深度估计
441
+ depth_candidates = []
442
+
443
+ if center_frame in image_files:
444
+ stem = image_files[center_frame].stem
445
+ depth_candidates.append(data_dir / f"{stem}_depth.npy")
446
+ depth_candidates.append(data_dir / f"{stem}_depth.exr")
447
+ depth_candidates.append(data_dir / f"{stem}.npy")
448
+ depth_candidates.append(data_dir / f"depth_{center_frame:04d}.npy")
449
+ depth_candidates.append(output_dir / "depth_erp" / f"depth_{center_frame:04d}.npy")
450
+
451
+ for dp in depth_candidates:
452
+ if dp.exists():
453
+ if dp.suffix == ".exr":
454
+ depth_erp = cv2.imread(str(dp), cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
455
+ if depth_erp is not None and depth_erp.ndim == 3:
456
+ depth_erp = depth_erp[:, :, 0]
457
+ depth_erp = depth_erp.astype(np.float32) if depth_erp is not None else None
458
+ else:
459
+ depth_erp = np.load(str(dp)).astype(np.float32)
460
+ if depth_erp is not None:
461
+ print(f" Loaded depth from {dp}")
462
+ break
463
+
464
+ # 尺寸校验
465
+ if depth_erp is not None and depth_erp.shape != (erp_h, erp_w):
466
+ old_shape = depth_erp.shape
467
+ depth_erp = cv2.resize(depth_erp, (erp_w, erp_h), interpolation=cv2.INTER_LINEAR)
468
+ print(f" [Warning] Depth resized: {old_shape} -> ({erp_h}, {erp_w})")
469
+
470
+ # 没找到深度 → 报错(不回退到深度估计)
471
+ if depth_erp is None:
472
+ tried = "\n ".join(str(p) for p in depth_candidates)
473
+ raise FileNotFoundError(
474
+ f"[warp_only] 未找到深度文件,无法执行 warp。\n"
475
+ f"已搜索路径:\n {tried}\n"
476
+ f"如需深度估计请使用 --stage all"
477
+ )
478
+
479
+ # --- Warp 阶段 ---
480
+ warp_cfg = cfg.get("warp", {})
481
+ warp_enabled = bool(warp_cfg.get("enabled", True))
482
+
483
+ if args.stage in ("all", "warp_only") and warp_enabled:
484
+ run_warp_pipeline(
485
+ center_rgb, depth_erp, center_frame,
486
+ image_files, pose_files,
487
+ cfg, device, output_dir, erp_h, erp_w,
488
+ )
489
+
490
+ # --- 完成 ---
491
+ total_time = time.time() - t_start
492
+ print(f"\n{'='*60}")
493
+ print("Pipeline Complete")
494
+ print(f"{'='*60}")
495
+ print(f"Total time: {total_time:.2f}s")
496
+ print(f"Output saved to: {output_dir}")
497
+
498
+
499
+ if __name__ == "__main__":
500
+ main()
pipelines/run_ply_pipeline.py ADDED
@@ -0,0 +1,1967 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ PLY 全流程 Pipeline(纯 Python,无需 Blender)
4
+
5
+ 从 .ply 点云/网格文件出发,完成:
6
+ Phase 0: 加载场景 + 获取 AABB 边界
7
+ Phase 1: 多高度层撒点 + 7 层过滤(trimesh ray_cast 替代 bpy)
8
+ Phase 2: 边渲边选(Open3D ERP 点云渲染 + 深度图)
9
+
10
+ 输出格式与 run_blend_pipeline.py 完全一致:
11
+ panorama_XXXX.png + panorama_XXXX_depth.npy + pose_XXXX.json
12
+
13
+ 坐标系: ERPT_native 右手系 [X右, Y上, Z前]
14
+ PLY 坐标系通常为 Z-up,渲染前统一转换为 Y-up。
15
+
16
+ 运行:
17
+ python run_ply_pipeline.py \\
18
+ --ply /path/to/scene.ply \\
19
+ --output-dir /path/to/output \\
20
+ --num-frames 30 \\
21
+ --resolution 2048,1024
22
+
23
+ 依赖:
24
+ pip install open3d trimesh numpy opencv-python pillow
25
+ """
26
+
27
+ import argparse
28
+ import json
29
+ import math
30
+ import os
31
+ import random as _random
32
+ import sys
33
+ import time
34
+ from concurrent.futures import ThreadPoolExecutor
35
+ from pathlib import Path
36
+ from typing import Optional
37
+
38
+ import numpy as np
39
+ import trimesh
40
+ import open3d as o3d
41
+ import cv2
42
+
43
+ # ── GPU 支持检测 ──────────────────────────────────────────────────────────────
44
+ try:
45
+ import torch
46
+ import torch.nn.functional as _F
47
+ _CUDA_AVAILABLE = torch.cuda.is_available()
48
+ _TORCH_DEVICE = torch.device("cuda") if _CUDA_AVAILABLE else torch.device("cpu")
49
+ if _CUDA_AVAILABLE:
50
+ print(f"[GPU] CUDA 可用: {torch.cuda.get_device_name(0)}")
51
+ else:
52
+ print("[GPU] CUDA 不可用,使用 CPU 渲染")
53
+ except ImportError:
54
+ torch = None
55
+ _CUDA_AVAILABLE = False
56
+ _TORCH_DEVICE = None
57
+ print("[GPU] torch 未安装,使用 CPU 渲染")
58
+
59
+
60
+ WARP_H = 128
61
+ WARP_W = 256
62
+ MARGIN = 0.2 # 距边界最小安全距离(PLY 场景通常比 blend 精度低,稍微宽松)
63
+
64
+ DEFAULT_STOP_GAIN = 0.08
65
+ DEFAULT_OVERLAP_PENALTY = 0.5
66
+ DEFAULT_MIN_DIST = 0.6
67
+ DEFAULT_MIN_FRAMES = 5
68
+
69
+ ROTATION_TYPES = {
70
+ "none": [0.0, 0.0, 0.0],
71
+ "rotate_x_90": [math.pi / 2, 0.0, 0.0],
72
+ "rotate_x_180": [math.pi, 0.0, 0.0],
73
+ "rotate_z_90": [0.0, 0.0, math.pi / 2],
74
+ }
75
+
76
+
77
+ def get_camera_rot(rotation_type: str, frame_id: int):
78
+ if rotation_type == "random_yaw":
79
+ yaw = 0.0 if frame_id == 0 else _random.uniform(0, 2 * math.pi)
80
+ return [math.pi / 2, 0.0, yaw]
81
+ return list(ROTATION_TYPES[rotation_type])
82
+
83
+
84
+ def parse_args():
85
+ parser = argparse.ArgumentParser(description="PLY Pipeline(边渲边选)")
86
+ parser.add_argument("--ply", type=str, required=True,
87
+ help=".ply 文件路径")
88
+ parser.add_argument("--output-dir", type=str, required=True,
89
+ help="输出目录")
90
+ parser.add_argument("--num-frames", type=int, default=30)
91
+ parser.add_argument("--resolution", type=str, default="2048,1024",
92
+ help="渲染分辨率 width,height")
93
+ parser.add_argument("--grid-spacing", type=float, default=0.5,
94
+ help="候选点网格间距(米)")
95
+ parser.add_argument("--camera-height", type=float, default=None,
96
+ help="固定相机高度(米),None=自动多层")
97
+ parser.add_argument("--stop-gain", type=float, default=DEFAULT_STOP_GAIN)
98
+ parser.add_argument("--stop-score", type=float, default=-0.3)
99
+ parser.add_argument("--stop-delta", type=float, default=0.08)
100
+ parser.add_argument("--min-frames", type=int, default=DEFAULT_MIN_FRAMES)
101
+ parser.add_argument("--rotation-type", type=str, default="random_yaw",
102
+ choices=["none", "rotate_x_90", "rotate_x_180",
103
+ "rotate_z_90", "random_yaw"])
104
+ parser.add_argument("--point-size", type=float, default=2.0,
105
+ help="点云渲染点径(像素)")
106
+ parser.add_argument("--z-up", action="store_true", default=True,
107
+ help="PLY 坐标系为 Z-up(默认 True,转为 Y-up)")
108
+ parser.add_argument("--no-z-up", dest="z_up", action="store_false")
109
+ return parser.parse_args()
110
+
111
+
112
+ def load_ply_scene(ply_path: str, z_up: bool = True):
113
+ """加载 PLY,可选将 Z-up 转为 Y-up(ERPT_native)
114
+
115
+ PLY 常见坐标系:
116
+ Z-up: X右, Y前, Z上 → 转换: X'=X, Y'=Z, Z'=Y(ERPT_native)
117
+ Y-up: X右, Y上, Z前 → 直接使用
118
+
119
+ Returns:
120
+ mesh_or_pc: trimesh 对象(Mesh 或 PointCloud)
121
+ pts_world: np.ndarray (N,3) Y-up 世界点坐标
122
+ bmin, bmax: AABB (3,) float
123
+ is_mesh: bool, True=Trimesh Mesh(支持 ray_cast)
124
+ faces: np.ndarray (F,3) int 或 None(纯点云时为 None)
125
+ """
126
+ print(f"\n[Phase 0] 加载场景: {ply_path}")
127
+ scene_or_mesh = trimesh.load(ply_path, process=False, force=None)
128
+
129
+ # trimesh 可能返回 Scene(多个 mesh 合并)
130
+ if isinstance(scene_or_mesh, trimesh.Scene):
131
+ mesh = trimesh.util.concatenate(
132
+ [g for g in scene_or_mesh.geometry.values()
133
+ if isinstance(g, trimesh.Trimesh)]
134
+ )
135
+ is_mesh = True
136
+ elif isinstance(scene_or_mesh, trimesh.Trimesh):
137
+ mesh = scene_or_mesh
138
+ is_mesh = True
139
+ elif isinstance(scene_or_mesh, trimesh.PointCloud):
140
+ mesh = scene_or_mesh
141
+ is_mesh = False
142
+ else:
143
+ # 尝试强制为 PointCloud
144
+ mesh = trimesh.load(ply_path, process=False, force='mesh')
145
+ is_mesh = isinstance(mesh, trimesh.Trimesh)
146
+
147
+ # 获取顶点坐标和面数据
148
+ pts_raw = np.array(mesh.vertices, dtype=np.float64)
149
+ faces = np.array(mesh.faces, dtype=np.int32) if is_mesh else None
150
+
151
+ print(f" 点数: {len(pts_raw)}, 面数: {len(faces) if faces is not None else 0}, is_mesh={is_mesh}")
152
+
153
+ # 坐标系转换 Z-up → Y-up(ERPT_native)
154
+ if z_up:
155
+ pts_world = pts_raw[:, [0, 2, 1]].copy()
156
+ else:
157
+ pts_world = pts_raw.copy()
158
+
159
+ bmin = pts_world.min(axis=0)
160
+ bmax = pts_world.max(axis=0)
161
+
162
+ print(f" AABB (Y-up): min=[{bmin[0]:.2f}, {bmin[1]:.2f}, {bmin[2]:.2f}] "
163
+ f"max=[{bmax[0]:.2f}, {bmax[1]:.2f}, {bmax[2]:.2f}]")
164
+
165
+ return mesh, pts_world, bmin, bmax, is_mesh, faces
166
+
167
+
168
+ class RayCaster:
169
+ """封装 trimesh RayMeshIntersector,提供与 Blender ray_cast 相同的接口。
170
+
171
+ 对于纯点云(非 mesh)场景,降级为"无碰撞"模式(所有射线无 hit),
172
+ 只能依靠 AABB 做粗略过滤。
173
+ """
174
+
175
+ def __init__(self, mesh, pts_world: np.ndarray, bmin, bmax,
176
+ is_mesh: bool, z_up: bool = True):
177
+ self.is_mesh = is_mesh
178
+ self.pts_world = pts_world
179
+ self.bmin = np.array(bmin)
180
+ self.bmax = np.array(bmax)
181
+ self.z_up = z_up
182
+ self._intersector = None
183
+
184
+ if is_mesh and isinstance(mesh, trimesh.Trimesh):
185
+ if z_up:
186
+ # 需要把 mesh 顶点也转为 Y-up
187
+ verts = np.array(mesh.vertices, dtype=np.float64)
188
+ verts_yup = verts[:, [0, 2, 1]]
189
+ import copy
190
+ m2 = copy.deepcopy(mesh)
191
+ m2.vertices = verts_yup
192
+ self._intersector = trimesh.ray.ray_pyembree.RayMeshIntersector(m2) \
193
+ if hasattr(trimesh.ray, 'ray_pyembree') \
194
+ else trimesh.ray.ray_triangle.RayMeshIntersector(m2)
195
+ else:
196
+ self._intersector = trimesh.ray.ray_pyembree.RayMeshIntersector(mesh) \
197
+ if hasattr(trimesh.ray, 'ray_pyembree') \
198
+ else trimesh.ray.ray_triangle.RayMeshIntersector(mesh)
199
+ print(" [RayCaster] 使用 trimesh RayMeshIntersector")
200
+ else:
201
+ print(" [RayCaster] 非 Mesh 场景,使用 AABB 降级模式")
202
+
203
+ def cast_ray(self, origin: np.ndarray, direction: np.ndarray):
204
+ """单条射线,返回 (hit: bool, dist: float)
205
+
206
+ hit=True 时 dist 为交点距离(米)。
207
+ hit=False 时 dist=inf。
208
+ """
209
+ if self._intersector is None:
210
+ return False, float('inf')
211
+
212
+ o = np.array(origin, dtype=np.float64)[np.newaxis] # (1,3)
213
+ d = np.array(direction, dtype=np.float64)[np.newaxis]
214
+ d = d / (np.linalg.norm(d) + 1e-12)
215
+
216
+ try:
217
+ locs, idx_ray, idx_tri = self._intersector.intersects_location(
218
+ o, d, multiple_hits=True)
219
+ except Exception:
220
+ return False, float('inf')
221
+
222
+ if len(locs) == 0:
223
+ return False, float('inf')
224
+
225
+ dists = np.linalg.norm(locs - origin, axis=1)
226
+ # 过滤极近距离(防止自交)
227
+ valid = dists > 1e-4
228
+ if not np.any(valid):
229
+ return False, float('inf')
230
+
231
+ min_dist = float(dists[valid].min())
232
+ return True, min_dist
233
+
234
+ def cast_rays_batch(self, origin: np.ndarray,
235
+ directions: np.ndarray) -> np.ndarray:
236
+ """批量射线,返回 dist 数组 (N,),无 hit 为 inf。"""
237
+ if self._intersector is None:
238
+ return np.full(len(directions), float('inf'))
239
+
240
+ origins = np.tile(origin[np.newaxis], (len(directions), 1))
241
+ dirs = directions / (np.linalg.norm(directions, axis=1, keepdims=True) + 1e-12)
242
+
243
+ try:
244
+ locs, idx_ray, idx_tri = self._intersector.intersects_location(
245
+ origins, dirs, multiple_hits=True)
246
+ except Exception:
247
+ return np.full(len(directions), float('inf'))
248
+
249
+ dists_out = np.full(len(directions), float('inf'))
250
+ if len(locs) == 0:
251
+ return dists_out
252
+
253
+ # 每条射线取最近交点
254
+ for i, (loc, ir) in enumerate(zip(locs, idx_ray)):
255
+ d = float(np.linalg.norm(loc - origin))
256
+ if d > 1e-4 and d < dists_out[ir]:
257
+ dists_out[ir] = d
258
+
259
+ return dists_out
260
+
261
+
262
+ def compute_camera_heights(floor_y: float, ceiling_y: float,
263
+ manual_height=None):
264
+ """计算相机高度层(Y-up 坐标系,Y=高度)"""
265
+ CEIL_CLEARANCE = 0.3
266
+ FIXED_HEIGHTS = [0.5, 0.8, 1.2, 1.7, 2.1]
267
+
268
+ if manual_height is not None:
269
+ return [manual_height]
270
+
271
+ room_h = ceiling_y - floor_y
272
+ if room_h <= 0:
273
+ return [floor_y + 1.5]
274
+
275
+ heights = []
276
+ for eye_h in FIXED_HEIGHTS:
277
+ z = floor_y + eye_h
278
+ if z < ceiling_y - CEIL_CLEARANCE:
279
+ heights.append(z)
280
+
281
+ if room_h > 3.0:
282
+ cur_h = FIXED_HEIGHTS[-1]
283
+ step = 1.0
284
+ while True:
285
+ cur_h += step
286
+ z = floor_y + cur_h
287
+ if z >= ceiling_y - CEIL_CLEARANCE:
288
+ break
289
+ heights.append(z)
290
+ step = min(step + 0.5, 3.0)
291
+
292
+ top_y = ceiling_y - CEIL_CLEARANCE
293
+ if heights and top_y > max(heights) + 0.5:
294
+ heights.append(top_y)
295
+ elif not heights and top_y > floor_y + 0.5:
296
+ heights.append(top_y)
297
+
298
+ return sorted(set(round(h, 2) for h in heights)) if heights else [floor_y + 1.5]
299
+
300
+
301
+ def generate_candidate_grid(bmin, bmax, x_spacing, z_spacing, heights):
302
+ """生成候选点网格(Y-up 坐标系:X=右, Y=高, Z=前)
303
+
304
+ heights 是 Y 方向的高度值列表。
305
+ """
306
+ cx = (bmin[0] + bmax[0]) / 2
307
+ cz = (bmin[2] + bmax[2]) / 2
308
+
309
+ x_half = int((bmax[0] - cx - MARGIN) / x_spacing)
310
+ z_half = int((bmax[2] - cz - MARGIN) / z_spacing)
311
+
312
+ xz_offsets = []
313
+ for ix in range(-x_half, x_half + 1):
314
+ for iz in range(-z_half, z_half + 1):
315
+ x = cx + ix * x_spacing
316
+ z = cz + iz * z_spacing
317
+ if (bmin[0] + MARGIN <= x <= bmax[0] - MARGIN and
318
+ bmin[2] + MARGIN <= z <= bmax[2] - MARGIN):
319
+ xz_offsets.append((ix * ix + iz * iz, x, z))
320
+ xz_offsets.sort(key=lambda t: t[0])
321
+
322
+ candidates = []
323
+ for y in heights:
324
+ for _, x, z in xz_offsets:
325
+ candidates.append([float(x), float(y), float(z)])
326
+
327
+ n_xz = len(xz_offsets)
328
+ print(f" 网格: {n_xz}点/层 x {len(heights)}层 = {len(candidates)} 个候选")
329
+ return candidates
330
+
331
+
332
+ def _build_26_directions_yup():
333
+ """26 方向球面采样(Y-up 坐标系:水平面=XZ,竖直=Y)"""
334
+ dirs = []
335
+ # 水平 16 方向(XZ 平面)
336
+ for i in range(16):
337
+ a = i * (2 * math.pi / 16)
338
+ dirs.append(np.array([math.cos(a), 0.0, math.sin(a)]))
339
+ # 上方 5 方向
340
+ elev = math.pi / 4
341
+ for i in range(5):
342
+ a = i * (2 * math.pi / 5)
343
+ dirs.append(np.array([
344
+ math.cos(a) * math.cos(elev),
345
+ math.sin(elev),
346
+ math.sin(a) * math.cos(elev),
347
+ ]))
348
+ # 下方 5 方向
349
+ for i in range(5):
350
+ a = i * (2 * math.pi / 5)
351
+ dirs.append(np.array([
352
+ math.cos(a) * math.cos(elev),
353
+ -math.sin(elev),
354
+ math.sin(a) * math.cos(elev),
355
+ ]))
356
+ return dirs
357
+
358
+
359
+ def raycast_filter(candidates, raycaster: RayCaster, room_height: float,
360
+ min_wall_dist: float = 1.0):
361
+ """7 层候选点过滤(Y-up 坐标系)
362
+
363
+ 第 1 层: 室内检测(朝+Y/-Y 各一条射线,距离合理)
364
+ 第 2 层: 穿模检测(≥2 方向 < 0.2m)
365
+ 第 3 层: 角落检测(>50% 水平方向 < 1.0m)
366
+ 第 4 层: 包裹检测(hit_rate≥90% + CV<0.30 + max<8m)
367
+ 第 5 层: 贴墙检测(水平 16 方向最近 < 0.3m)
368
+ 第 6 层: 视野质量(有效命中比例 < 35%)
369
+ 第 7 层: 窄缝检测(对向水平距离之和 < 1.5m)
370
+
371
+ 非 Mesh 场景(降级模式):跳过射线过滤,仅做 AABB 内判断。
372
+ """
373
+ if raycaster._intersector is None:
374
+ print(" [过滤] 非 Mesh 场景,跳过射线过滤,返回所有 AABB 内候选")
375
+ bmin, bmax = raycaster.bmin, raycaster.bmax
376
+ passed = [c for c in candidates
377
+ if all(bmin[i] + MARGIN <= c[i] <= bmax[i] - MARGIN
378
+ for i in [0, 2])]
379
+ print(f" 过滤统计: 总计={len(candidates)}, 通过={len(passed)}")
380
+ return passed
381
+
382
+ DIRS_26 = _build_26_directions_yup()
383
+ n26 = len(DIRS_26)
384
+
385
+ dir_up = np.array([0.0, 1.0, 0.0])
386
+ dir_down = np.array([0.0, -1.0, 0.0])
387
+ max_up = max(5.0, room_height)
388
+ max_down = max(3.0, room_height)
389
+
390
+ MIN_WALL_CLEARANCE = 0.3
391
+ VIEW_GOOD_MIN = 0.5
392
+ VIEW_GOOD_MAX = 20.0
393
+ VIEW_GOOD_RATIO = 0.35
394
+ MIN_SLIT_WIDTH = 1.5
395
+
396
+ N = len(candidates)
397
+ passed = []
398
+ stats = {"无天花板": 0, "无地板": 0, "穿模": 0, "角落": 0,
399
+ "包裹": 0, "贴墙": 0, "视野差": 0, "窄缝": 0}
400
+
401
+ t0 = time.time()
402
+ log_interval = max(1, N // 10)
403
+
404
+ for idx, pos in enumerate(candidates):
405
+ if idx % log_interval == 0 and idx > 0:
406
+ print(f" 过滤进度: {idx}/{N} ({idx*100//N}%)", flush=True)
407
+
408
+ origin = np.array(pos, dtype=np.float64)
409
+
410
+ # 第 1 层: 室内检测(Y-up:朝上=+Y,朝下=-Y)
411
+ hit_up, d_up = raycaster.cast_ray(origin, dir_up)
412
+ if not hit_up or d_up > max_up:
413
+ stats["无天花板"] += 1
414
+ continue
415
+
416
+ hit_dn, d_dn = raycaster.cast_ray(origin, dir_down)
417
+ if not hit_dn or d_dn > max_down:
418
+ stats["无地板"] += 1
419
+ continue
420
+
421
+ # 第 2~7 层: 26 方向采样
422
+ dists = raycaster.cast_rays_batch(origin, np.array(DIRS_26))
423
+
424
+ # 第 2 层: 穿模
425
+ n_close = int(np.sum(dists < 0.2))
426
+ if n_close >= 2:
427
+ stats["穿模"] += 1
428
+ continue
429
+
430
+ # 第 3 层: 角落(水平 16 方向)
431
+ n_wall = int(np.sum(dists[:16] < min_wall_dist))
432
+ if n_wall > 8:
433
+ stats["角落"] += 1
434
+ continue
435
+
436
+ # 第 4 层: 包裹
437
+ finite = dists[np.isfinite(dists)]
438
+ hit_rate = len(finite) / n26
439
+ if hit_rate >= 0.90 and len(finite) >= 2:
440
+ mean_d = float(finite.mean())
441
+ max_d = float(finite.max())
442
+ if mean_d > 0:
443
+ cv = float(finite.std()) / mean_d
444
+ if cv < 0.30 and max_d < 8.0:
445
+ stats["包裹"] += 1
446
+ continue
447
+
448
+ # 第 5 层: 贴墙
449
+ horiz_finite = dists[:16][np.isfinite(dists[:16])]
450
+ if len(horiz_finite) > 0 and float(horiz_finite.min()) < MIN_WALL_CLEARANCE:
451
+ stats["贴墙"] += 1
452
+ continue
453
+
454
+ # 第 6 层: 视野质量
455
+ n_good = int(np.sum((dists >= VIEW_GOOD_MIN) & (dists <= VIEW_GOOD_MAX)))
456
+ if n_good / n26 < VIEW_GOOD_RATIO:
457
+ stats["视野差"] += 1
458
+ continue
459
+
460
+ # 第 7 层: 窄缝
461
+ in_slit = False
462
+ for i in range(8):
463
+ d_fwd = dists[i] if np.isfinite(dists[i]) else 999
464
+ d_bwd = dists[i + 8] if np.isfinite(dists[i + 8]) else 999
465
+ if d_fwd + d_bwd < MIN_SLIT_WIDTH:
466
+ in_slit = True
467
+ break
468
+ if in_slit:
469
+ stats["窄缝"] += 1
470
+ continue
471
+
472
+ passed.append(pos)
473
+
474
+ dt = time.time() - t0
475
+ print(f" 过滤统计 ({dt:.1f}s): 总计={N}, 通过={len(passed)}")
476
+ for k, v in stats.items():
477
+ if v > 0:
478
+ print(f" ❌ {k}: {v} ({v * 100 // max(N, 1)}%)")
479
+
480
+ return passed
481
+
482
+
483
+ def _euler_to_rotation_matrix(rx: float, ry: float, rz: float) -> np.ndarray:
484
+ """XYZ 欧拉角 → 旋转矩阵(用于 ERP 相机朝向,Y-up 坐标系)"""
485
+ cx, sx = math.cos(rx), math.sin(rx)
486
+ cy, sy = math.cos(ry), math.sin(ry)
487
+ cz, sz = math.cos(rz), math.sin(rz)
488
+
489
+ Rx = np.array([[1, 0, 0], [0, cx, -sx], [0, sx, cx]])
490
+ Ry = np.array([[cy, 0, sy], [0, 1, 0], [-sy, 0, cy]])
491
+ Rz = np.array([[cz, -sz, 0], [sz, cz, 0], [0, 0, 1]])
492
+ return Rz @ Ry @ Rx
493
+
494
+
495
+ def render_erp_pointcloud(pts_world: np.ndarray,
496
+ cam_pos: np.ndarray,
497
+ cam_rot_euler: list,
498
+ width: int,
499
+ height: int,
500
+ point_size: float = 2.0):
501
+ """将点云渲染为 ERP 全景图(等距圆柱投影)
502
+
503
+ 算法:
504
+ 1. 将所有世界点变换到相机坐标系
505
+ 2. 计算每个点的方位角 (lon) 和仰角 (lat)(Y-up 右手系)
506
+ 3. 投影到 ERP 像素坐标
507
+ 4. 用 Z-buffer 填充 RGB + depth 图,splat_radius=point_size
508
+
509
+ 坐标系 (ERPT_native, Y-up):
510
+ 相机前向 = +Z_cam, 上方 = +Y_cam, 右方 = +X_cam
511
+ lon = atan2(x_cam, z_cam) (正前方=0,右=+)
512
+ lat = atan2(y_cam, sqrt(x^2+z^2))(上=+π/2)
513
+
514
+ Returns:
515
+ rgb : np.ndarray (H, W, 3) uint8
516
+ depth : np.ndarray (H, W) float32,range depth(米),0=无效
517
+ """
518
+ o3d = o3d
519
+
520
+ cam_pos = np.array(cam_pos, dtype=np.float64)
521
+ # 旋转矩阵:world → camera
522
+ # 相机默认朝向 +Z,根据欧拉角旋转
523
+ R_cw = _euler_to_rotation_matrix(*cam_rot_euler) # cam_to_world
524
+ R_wc = R_cw.T # world_to_cam
525
+
526
+ # 变换点云到相机坐标系
527
+ vecs = pts_world - cam_pos # (N, 3)
528
+ pts_cam = (R_wc @ vecs.T).T # (N, 3)
529
+
530
+ x_c = pts_cam[:, 0]
531
+ y_c = pts_cam[:, 1]
532
+ z_c = pts_cam[:, 2]
533
+
534
+ # 计算经纬度(ERPT_native 约定)
535
+ lon = np.arctan2(x_c, z_c) # [-π, π]
536
+ r_xz = np.sqrt(x_c ** 2 + z_c ** 2)
537
+ lat = np.arctan2(y_c, r_xz) # [-π/2, π/2]
538
+
539
+ # 转为像素坐标
540
+ u = ((lon / (2 * math.pi) + 0.5) * width).astype(np.float32)
541
+ v = ((0.5 - lat / math.pi) * height).astype(np.float32)
542
+ u = np.clip(u, 0, width - 1).astype(np.int32)
543
+ v = np.clip(v, 0, height - 1).astype(np.int32)
544
+
545
+ # range depth = 射线距离(米)
546
+ dist = np.sqrt(x_c ** 2 + y_c ** 2 + z_c ** 2).astype(np.float32)
547
+
548
+ # 尝试获取点云颜色
549
+ has_colors = hasattr(pts_world, '_colors')
550
+ colors_rgb = None
551
+
552
+ # 初始化图像缓冲区
553
+ rgb_buf = np.zeros((height, width, 3), dtype=np.uint8)
554
+ depth_buf = np.full((height, width), np.inf, dtype=np.float32)
555
+
556
+ # Z-buffer 渲染(每点 splat_radius 像素)
557
+ radius = max(1, int(round(point_size / 2)))
558
+
559
+ # 为效率起见,用 numpy 向量化做单像素填充,然后 dilate
560
+ # 先做精确 Z-buffer(单像素)
561
+ for i in np.argsort(dist)[::-1]: # 从远到近,近的覆盖远的
562
+ ui, vi = u[i], v[i]
563
+ di = dist[i]
564
+ if di <= 0 or not np.isfinite(di):
565
+ continue
566
+ if di < depth_buf[vi, ui]:
567
+ depth_buf[vi, ui] = di
568
+ if colors_rgb is not None:
569
+ rgb_buf[vi, ui] = colors_rgb[i]
570
+ else:
571
+ # 无颜色时用伪彩色(深度着色)
572
+ c = int(np.clip(255 * (1.0 - di / 20.0), 0, 255))
573
+ rgb_buf[vi, ui] = [c, c, c]
574
+
575
+ # 如果有 Open3D 点云颜色,补充颜色
576
+ # (此处暂用灰度,完整颜色在下方 _render_with_colors 中处理)
577
+ depth_out = np.where(np.isfinite(depth_buf), depth_buf, 0.0).astype(np.float32)
578
+ return rgb_buf, depth_out
579
+
580
+
581
+ def _gpu_align_u(u_ref, u_other, W: int):
582
+ """把 u_other 对齐到与 u_ref 最近的 ERP 循环副本(GPU tensor)"""
583
+ half_w = float(W) / 2.0
584
+ diff = u_other - u_ref
585
+ u_other = torch.where(diff > half_w, u_other - W, u_other)
586
+ u_other = torch.where(diff < -half_w, u_other + W, u_other)
587
+ return u_other
588
+
589
+
590
+ def _gpu_raster_batch(u0, v0, u1, v1, u2, v2,
591
+ c0, c1, c2, d0b, d1b, d2b,
592
+ rgb_flat, depth_flat, H: int, W: int):
593
+ """完全向量化批量光栅化(无 Python for 循环)。
594
+
595
+ 将所有三角面的包围盒像素展开成一个大 tensor,一次性完成
596
+ 重心坐标计算和 scatter_reduce Z-buffer 写入。
597
+
598
+ Args:
599
+ u0/v0, u1/v1, u2/v2: (B,) float32,三顶点 ERP 像素坐标
600
+ c0/c1/c2: (B,3) float32,三顶点颜色 [0,255]
601
+ d0b/d1b/d2b: (B,) float32,三顶点距离
602
+ rgb_flat: (H*W, 3) 输出颜色缓冲(就地修改)
603
+ depth_flat: (H*W,) 输出深度缓冲(就地修改)
604
+ """
605
+ dev = u0.device
606
+
607
+ # ── 包围盒 ──────────────────────────────────────────────────────────────
608
+ u_lo = torch.clamp(torch.floor(torch.minimum(torch.minimum(u0, u1), u2)).long(), 0, W - 1)
609
+ u_hi = torch.clamp(torch.ceil (torch.maximum(torch.maximum(u0, u1), u2)).long(), 0, W - 1)
610
+ v_lo = torch.clamp(torch.floor(torch.minimum(torch.minimum(v0, v1), v2)).long(), 0, H - 1)
611
+ v_hi = torch.clamp(torch.ceil (torch.maximum(torch.maximum(v0, v1), v2)).long(), 0, H - 1)
612
+
613
+ du = u_hi - u_lo + 1
614
+ dv = v_hi - v_lo + 1
615
+ bbox_px = du * dv
616
+
617
+ # 过滤退化面 & 超大面
618
+ valid = (u_hi >= u_lo) & (v_hi >= v_lo) & (bbox_px <= 128 * 128)
619
+ if not valid.any():
620
+ return
621
+
622
+ idx = valid.nonzero(as_tuple=False).squeeze(1)
623
+ u0v = u0[idx]; v0v = v0[idx]
624
+ u1v = u1[idx]; v1v = v1[idx]
625
+ u2v = u2[idx]; v2v = v2[idx]
626
+ c0v = c0[idx]; c1v = c1[idx]; c2v = c2[idx]
627
+ d0v = d0b[idx]; d1v = d1b[idx]; d2v = d2b[idx]
628
+ u_lv = u_lo[idx]; u_hv = u_hi[idx]
629
+ v_lv = v_lo[idx]; v_hv = v_hi[idx]
630
+ duv = u_hv - u_lv + 1
631
+ dvv = v_hv - v_lv + 1
632
+ npx = duv * dvv
633
+
634
+ Bp = int(npx.sum().item())
635
+ if Bp == 0:
636
+ return
637
+
638
+ # ── 展开:repeat_interleave 把面 id 重复 npx[i] 次 ──────────────────────
639
+ face_id = torch.repeat_interleave(
640
+ torch.arange(len(idx), device=dev, dtype=torch.long), npx)
641
+
642
+ cumsum = torch.zeros(len(idx) + 1, dtype=torch.long, device=dev)
643
+ cumsum[1:] = torch.cumsum(npx, 0)
644
+ local_flat = torch.arange(Bp, device=dev, dtype=torch.long) - cumsum[face_id]
645
+
646
+ local_u = local_flat % duv[face_id]
647
+ local_v = local_flat // duv[face_id]
648
+
649
+ uu = (u_lv[face_id] + local_u).float()
650
+ vv = (v_lv[face_id] + local_v).float()
651
+
652
+ # ── 重心坐标(完全向量化) ───────────────────────────────────────────────
653
+ ax = u0v[face_id]; ay = v0v[face_id]
654
+ bx = u1v[face_id]; by = v1v[face_id]
655
+ cx = u2v[face_id]; cy = v2v[face_id]
656
+
657
+ denom = (by - cy) * (ax - cx) + (cx - bx) * (ay - cy)
658
+ safe = denom.abs() > 1e-8
659
+ inv_d = torch.where(safe, 1.0 / denom, torch.zeros_like(denom))
660
+ w0 = ((by - cy) * (uu - cx) + (cx - bx) * (vv - cy)) * inv_d
661
+ w1 = ((cy - ay) * (uu - cx) + (ax - cx) * (vv - cy)) * inv_d
662
+ w2 = 1.0 - w0 - w1
663
+
664
+ inside = safe & (w0 >= -0.01) & (w1 >= -0.01) & (w2 >= -0.01)
665
+ if not inside.any():
666
+ return
667
+
668
+ fi = face_id[inside]
669
+ uui = uu[inside].long()
670
+ vvi = vv[inside].long()
671
+ w0i = w0[inside]; w1i = w1[inside]; w2i = w2[inside]
672
+
673
+ # ── Z-buffer scatter_reduce(amin) ───────────────────────────────────────
674
+ di = w0i * d0v[fi] + w1i * d1v[fi] + w2i * d2v[fi]
675
+ lin = vvi * W + uui
676
+ depth_flat.scatter_reduce_(0, lin, di, reduce='amin', include_self=True)
677
+
678
+ # ── 颜色写入(near-wins) ────────────────────────────────────────────────
679
+ cur_d = depth_flat[lin]
680
+ winner = (di - cur_d).abs() < 1e-4
681
+ w0e = w0i[winner].unsqueeze(1)
682
+ w1e = w1i[winner].unsqueeze(1)
683
+ w2e = w2i[winner].unsqueeze(1)
684
+ fie = fi[winner]
685
+ col_i = torch.clamp(w0e * c0v[fie] + w1e * c1v[fie] + w2e * c2v[fie], 0, 255)
686
+ rgb_flat.scatter_(0, lin[winner].unsqueeze(1).expand(-1, 3), col_i)
687
+
688
+
689
+ def _gpu_raster_mesh(u_f, v_f, dist, col_t, f_t,
690
+ rgb_flat, depth_flat, H: int, W: int,
691
+ batch_size: int = 65536):
692
+ """ERP 网格光栅化主流程(含接缝三副本 + OOM 自动降级)。
693
+
694
+ 操作 rgb_flat / depth_flat 缓冲(就地修改)。
695
+ """
696
+ d0t = dist[f_t[:, 0]]; d1t = dist[f_t[:, 1]]; d2t = dist[f_t[:, 2]]
697
+ avg_d = (d0t + d1t + d2t) / 3.0
698
+ order = torch.argsort(avg_d, descending=True)
699
+ f_ord = f_t[order]
700
+ d0t, d1t, d2t = dist[f_ord[:, 0]], dist[f_ord[:, 1]], dist[f_ord[:, 2]]
701
+ c0t = col_t[f_ord[:, 0]]; c1t = col_t[f_ord[:, 1]]; c2t = col_t[f_ord[:, 2]]
702
+ u0r = u_f[f_ord[:, 0]]; u1r = u_f[f_ord[:, 1]]; u2r = u_f[f_ord[:, 2]]
703
+ v0r = v_f[f_ord[:, 0]]; v1r = v_f[f_ord[:, 1]]; v2r = v_f[f_ord[:, 2]]
704
+
705
+ valid_f = (d0t > 1e-4) & (d1t > 1e-4) & (d2t > 1e-4)
706
+ f_idx = valid_f.nonzero(as_tuple=False).squeeze(1)
707
+
708
+ def _process_batch(bi):
709
+ u0b = u0r[bi]; v0b = v0r[bi]
710
+ u1b = _gpu_align_u(u0b, u1r[bi], W)
711
+ u2b = _gpu_align_u(u0b, u2r[bi], W)
712
+ v1b = v1r[bi]; v2b = v2r[bi]
713
+ c0b = c0t[bi]; c1b = c1t[bi]; c2b = c2t[bi]
714
+ d0b_ = d0t[bi]; d1b_ = d1t[bi]; d2b_ = d2t[bi]
715
+ # 三副本 concat:主 + 左(u-W) + 右(u+W),一次送入减少 kernel launch
716
+ _gpu_raster_batch(
717
+ torch.cat([u0b, u0b - W, u0b + W]),
718
+ torch.cat([v0b, v0b, v0b ]),
719
+ torch.cat([u1b, u1b - W, u1b + W]),
720
+ torch.cat([v1b, v1b, v1b ]),
721
+ torch.cat([u2b, u2b - W, u2b + W]),
722
+ torch.cat([v2b, v2b, v2b ]),
723
+ torch.cat([c0b, c0b, c0b]),
724
+ torch.cat([c1b, c1b, c1b]),
725
+ torch.cat([c2b, c2b, c2b]),
726
+ torch.cat([d0b_, d0b_, d0b_]),
727
+ torch.cat([d1b_, d1b_, d1b_]),
728
+ torch.cat([d2b_, d2b_, d2b_]),
729
+ rgb_flat, depth_flat, H, W,
730
+ )
731
+
732
+ try:
733
+ _process_batch(f_idx)
734
+ except torch.cuda.OutOfMemoryError:
735
+ torch.cuda.empty_cache()
736
+ print(f" [WARN] OOM({len(f_idx)} 面),自动降级分批 batch={batch_size}")
737
+ for start in range(0, len(f_idx), batch_size):
738
+ _process_batch(f_idx[start: start + batch_size])
739
+
740
+
741
+ def _gpu_splat_pointcloud(u_f, v_f, dist, col_t,
742
+ rgb_flat, depth_flat, H: int, W: int,
743
+ point_size: float = 2.0):
744
+ """点云 scatter Z-buffer splatting(就地修改 rgb_flat / depth_flat)"""
745
+ valid = dist > 1e-4
746
+ u_i = torch.clamp(u_f[valid].long(), 0, W - 1)
747
+ v_i = torch.clamp(v_f[valid].long(), 0, H - 1)
748
+ dist_v = dist[valid]
749
+ col_v = col_t[valid]
750
+ lin = v_i * W + u_i
751
+ radius = max(0, int(round(point_size / 2)) - 1)
752
+
753
+ if radius == 0:
754
+ depth_flat.scatter_reduce_(0, lin, dist_v, reduce='amin', include_self=True)
755
+ sort_idx = torch.argsort(dist_v)
756
+ rgb_flat.scatter_(0, lin[sort_idx].unsqueeze(1).expand(-1, 3), col_v[sort_idx])
757
+ else:
758
+ for dr in range(-radius, radius + 1):
759
+ for dc in range(-radius, radius + 1):
760
+ v_nb = torch.clamp(v_i + dr, 0, H - 1)
761
+ u_nb = torch.clamp(u_i + dc, 0, W - 1)
762
+ depth_flat.scatter_reduce_(0, v_nb * W + u_nb, dist_v,
763
+ reduce='amin', include_self=True)
764
+ sort_idx = torch.argsort(dist_v, descending=True)
765
+ for dr in range(-radius, radius + 1):
766
+ for dc in range(-radius, radius + 1):
767
+ v_nb = torch.clamp(v_i[sort_idx] + dr, 0, H - 1)
768
+ u_nb = torch.clamp(u_i[sort_idx] + dc, 0, W - 1)
769
+ rgb_flat.scatter_(0, (v_nb * W + u_nb).unsqueeze(1).expand(-1, 3),
770
+ col_v[sort_idx])
771
+
772
+
773
+ def _gpu_fill_holes(rgb_2d, depth_out):
774
+ """GPU 空洞填充:大核 max_pool2d + 残余小孔迭代收尾。
775
+
776
+ Args:
777
+ rgb_2d: (H,W,3) float32 GPU tensor
778
+ depth_out: (H,W) float32 GPU tensor(0=空洞)
779
+
780
+ Returns:
781
+ rgb_2d: (H,W,3) float32,填充后
782
+ """
783
+ hole = (depth_out == 0)
784
+ if not hole.any():
785
+ return rgb_2d
786
+
787
+ fill_radius = 32
788
+ k = fill_radius * 2 + 1 # 65
789
+
790
+ rgb_f = rgb_2d.permute(2, 0, 1).unsqueeze(0).float() # (1,3,H,W)
791
+ valid_m = (~hole).float().unsqueeze(0).unsqueeze(0) # (1,1,H,W)
792
+
793
+ rgb_masked = rgb_f * valid_m
794
+ expanded = _F.max_pool2d(rgb_masked, kernel_size=k, stride=1, padding=fill_radius)
795
+ valid_exp = _F.max_pool2d(valid_m, kernel_size=k, stride=1, padding=fill_radius) > 0
796
+
797
+ fill_mask = hole.unsqueeze(0).unsqueeze(0) & valid_exp
798
+ rgb_f = torch.where(fill_mask.expand_as(rgb_f), expanded, rgb_f)
799
+
800
+ # 残余大孔洞:最多 8 轮 3×3 迭代收尾
801
+ hole2 = hole & ~fill_mask.squeeze(0).squeeze(0)
802
+ if hole2.any():
803
+ valid_f2 = (~hole2).float().unsqueeze(0).unsqueeze(0)
804
+ rgb_f2 = rgb_f
805
+ for _ in range(8):
806
+ if not hole2.any():
807
+ break
808
+ r2m = rgb_f2 * valid_f2
809
+ exp2 = _F.max_pool2d(r2m, kernel_size=3, stride=1, padding=1)
810
+ vd2 = _F.max_pool2d(valid_f2, kernel_size=3, stride=1, padding=1) > 0
811
+ nw2 = hole2.unsqueeze(0).unsqueeze(0) & vd2
812
+ if not nw2.any():
813
+ break
814
+ rgb_f2 = torch.where(nw2.expand_as(rgb_f2), exp2, rgb_f2)
815
+ valid_f2 = torch.where(nw2, torch.ones_like(valid_f2), valid_f2)
816
+ hole2 = hole2 & ~nw2.squeeze(0).squeeze(0)
817
+ rgb_f = rgb_f2
818
+
819
+ return rgb_f.squeeze(0).permute(1, 2, 0) # (H,W,3)
820
+
821
+
822
+ def _render_erp_gpu(pts_world: np.ndarray,
823
+ colors_world,
824
+ cam_pos,
825
+ R_wc: np.ndarray,
826
+ width: int,
827
+ height: int,
828
+ faces: np.ndarray = None,
829
+ point_size: float = 2.0,
830
+ batch_size: int = 65536) -> tuple:
831
+ """GPU(CUDA)加速的 ERP 全景渲染。
832
+
833
+ 所有计算在 GPU tensor 上完成:
834
+ 1. 顶点变换 + ERP 投影(全量向量化)
835
+ 2. 网格/点云光栅化(委托给共享辅助函数)
836
+ 3. 空洞填充:max_pool2d 大核膨胀(纯 GPU,无 cv2)
837
+
838
+ Returns:
839
+ rgb (H, W, 3) uint8 numpy
840
+ depth (H, W) float32 numpy
841
+ """
842
+ dev = _TORCH_DEVICE
843
+
844
+ # ── 1. 顶点变换到相机坐标系 ──────────────────────────────────────────────
845
+ pts = torch.from_numpy(pts_world.astype(np.float32)).to(dev)
846
+ cp = torch.from_numpy(cam_pos.astype(np.float32)).to(dev)
847
+ R = torch.from_numpy(R_wc.astype(np.float32)).to(dev)
848
+
849
+ pts_cam = (R @ (pts - cp).T).T
850
+ x_c, y_c, z_c = pts_cam[:, 0], pts_cam[:, 1], pts_cam[:, 2]
851
+
852
+ # ── 2. ERP 投影 ──────────────────────────────────────────────────────────
853
+ lon = torch.atan2(x_c, z_c)
854
+ r_xz = torch.sqrt(x_c ** 2 + z_c ** 2)
855
+ lat = torch.atan2(y_c, r_xz)
856
+ u_f = (lon / (2 * math.pi) + 0.5) * width
857
+ v_f = (0.5 - lat / math.pi) * height
858
+ dist = torch.sqrt(x_c ** 2 + y_c ** 2 + z_c ** 2)
859
+
860
+ # ── 3. 顶点颜色 ──────────────────────────────────────────────────────────
861
+ if colors_world is not None:
862
+ col_np = colors_world if colors_world.dtype == np.uint8 \
863
+ else (np.clip(colors_world, 0, 1) * 255).astype(np.uint8)
864
+ col_t = torch.from_numpy(col_np.astype(np.float32)).to(dev)
865
+ else:
866
+ d_norm = torch.clamp(dist / max(float(dist.max()), 1.0), 0, 1)
867
+ g = torch.clamp((1.0 - d_norm) * 200 + 30, 0, 255)
868
+ col_t = g.unsqueeze(1).expand(-1, 3)
869
+
870
+ H, W = height, width
871
+ INF = 1e9
872
+ rgb_flat = torch.zeros(H * W, 3, dtype=torch.float32, device=dev)
873
+ depth_flat = torch.full((H * W,), INF, dtype=torch.float32, device=dev)
874
+
875
+ # ── 4. 光栅化 ────────────────────────────────────────────────────────────
876
+ if faces is not None and len(faces) > 0:
877
+ f_t = torch.from_numpy(faces.astype(np.int64)).to(dev)
878
+ _gpu_raster_mesh(u_f, v_f, dist, col_t, f_t,
879
+ rgb_flat, depth_flat, H, W, batch_size)
880
+ else:
881
+ _gpu_splat_pointcloud(u_f, v_f, dist, col_t,
882
+ rgb_flat, depth_flat, H, W, point_size)
883
+
884
+ # ── 5. reshape ───────────────────────────────────────────────────────────
885
+ depth_2d = depth_flat.reshape(H, W)
886
+ rgb_2d = rgb_flat.reshape(H, W, 3)
887
+ depth_out = torch.where(depth_2d < INF / 2, depth_2d, torch.zeros_like(depth_2d))
888
+
889
+ # ── 6. 空洞填充 ──────────────────────────────────────────────────────────
890
+ if faces is not None and len(faces) > 0:
891
+ rgb_2d = _gpu_fill_holes(rgb_2d, depth_out)
892
+
893
+ # ── 7. 回传 numpy ──────────��─────────────────────────────────────────────
894
+ rgb_np = rgb_2d.clamp(0, 255).byte().cpu().numpy()
895
+ depth_np = depth_out.cpu().numpy().astype(np.float32)
896
+ return rgb_np, depth_np
897
+
898
+
899
+ def render_erp_batch_gpu(pts_world: np.ndarray,
900
+ colors_world,
901
+ cam_poses: list,
902
+ cam_rots: list,
903
+ width: int,
904
+ height: int,
905
+ faces: np.ndarray = None,
906
+ point_size: float = 2.0) -> list:
907
+ """并行批量渲染多个相机视角(共用同一场景,减少重复数据传输)。
908
+
909
+ 将场景数据(pts_world, colors_world, faces)只上传一次到 GPU,
910
+ 然后依次渲染 len(cam_poses) 个视角,显著减少 PCIe 传输开销。
911
+
912
+ Args:
913
+ cam_poses: list of [x, y, z],各帧相机位置
914
+ cam_rots: list of [rx, ry, rz],各帧相机欧拉角
915
+
916
+ Returns:
917
+ list of (rgb_np, depth_np) 与输入顺序对应
918
+ """
919
+ if not (_CUDA_AVAILABLE and torch is not None):
920
+ return [
921
+ render_erp_from_ply(pts_world, colors_world, pos, rot,
922
+ width, height, point_size, faces)
923
+ for pos, rot in zip(cam_poses, cam_rots)
924
+ ]
925
+
926
+ dev = _TORCH_DEVICE
927
+
928
+ # ── 场景数据上传(只做一次)──────────────────────────────────────────────
929
+ pts_t = torch.from_numpy(pts_world.astype(np.float32)).to(dev)
930
+
931
+ if colors_world is not None:
932
+ col_np = colors_world if colors_world.dtype == np.uint8 \
933
+ else (np.clip(colors_world, 0, 1) * 255).astype(np.uint8)
934
+ col_t_scene = torch.from_numpy(col_np.astype(np.float32)).to(dev)
935
+ else:
936
+ col_t_scene = None
937
+
938
+ f_t = None
939
+ if faces is not None and len(faces) > 0:
940
+ f_t = torch.from_numpy(faces.astype(np.int64)).to(dev)
941
+
942
+ # ── 逐帧渲染(场景数据复用)──────────────────────────────────────────────
943
+ results = []
944
+ for cam_pos, cam_rot in zip(cam_poses, cam_rots):
945
+ cam_pos_np = np.array(cam_pos, dtype=np.float64)
946
+ R_wc = _euler_to_rotation_matrix(*cam_rot).T
947
+
948
+ cp = torch.from_numpy(cam_pos_np.astype(np.float32)).to(dev)
949
+ R = torch.from_numpy(R_wc.astype(np.float32)).to(dev)
950
+
951
+ pts_cam = (R @ (pts_t - cp).T).T
952
+ x_c, y_c, z_c = pts_cam[:, 0], pts_cam[:, 1], pts_cam[:, 2]
953
+ lon = torch.atan2(x_c, z_c)
954
+ r_xz = torch.sqrt(x_c ** 2 + z_c ** 2)
955
+ lat = torch.atan2(y_c, r_xz)
956
+ u_f = (lon / (2 * math.pi) + 0.5) * width
957
+ v_f = (0.5 - lat / math.pi) * height
958
+ dist = torch.sqrt(x_c ** 2 + y_c ** 2 + z_c ** 2)
959
+
960
+ if col_t_scene is None:
961
+ d_norm = torch.clamp(dist / max(float(dist.max()), 1.0), 0, 1)
962
+ g = torch.clamp((1.0 - d_norm) * 200 + 30, 0, 255)
963
+ col_frame = g.unsqueeze(1).expand(-1, 3)
964
+ else:
965
+ col_frame = col_t_scene
966
+
967
+ try:
968
+ rgb_np, depth_np = _render_erp_gpu_from_projected(
969
+ u_f, v_f, dist, col_frame, f_t, width, height, point_size)
970
+ except Exception as e:
971
+ print(f" [WARN] batch GPU 渲染帧失败,回退单帧: {e}")
972
+ rgb_np, depth_np = _render_erp_gpu(
973
+ pts_world, colors_world, cam_pos_np, R_wc,
974
+ width, height, faces=faces, point_size=point_size)
975
+ results.append((rgb_np, depth_np))
976
+
977
+ return results
978
+
979
+
980
+ def _render_erp_gpu_from_projected(
981
+ u_f, v_f, dist, col_t, f_t,
982
+ width: int, height: int, point_size: float = 2.0) -> tuple:
983
+ """内部函数:从已投影的 GPU tensor 直接光栅化,省去重复的顶点变换。
984
+
985
+ Args:
986
+ u_f, v_f: (N,) float32 GPU tensor,ERP 像素浮点坐标
987
+ dist: (N,) float32 GPU tensor,各顶点到相机距离
988
+ col_t: (N,3) float32 GPU tensor,顶点颜色 [0,255]
989
+ f_t: (F,3) int64 GPU tensor 或 None
990
+ width, height: 输出分辨率
991
+ """
992
+ dev = u_f.device
993
+ H, W = height, width
994
+ INF = 1e9
995
+
996
+ rgb_flat = torch.zeros(H * W, 3, dtype=torch.float32, device=dev)
997
+ depth_flat = torch.full((H * W,), INF, dtype=torch.float32, device=dev)
998
+
999
+ if f_t is not None and len(f_t) > 0:
1000
+ _gpu_raster_mesh(u_f, v_f, dist, col_t, f_t,
1001
+ rgb_flat, depth_flat, H, W)
1002
+ else:
1003
+ _gpu_splat_pointcloud(u_f, v_f, dist, col_t,
1004
+ rgb_flat, depth_flat, H, W, point_size)
1005
+
1006
+ depth_2d = depth_flat.reshape(H, W)
1007
+ rgb_2d = rgb_flat.reshape(H, W, 3)
1008
+ depth_out = torch.where(depth_2d < INF / 2, depth_2d, torch.zeros_like(depth_2d))
1009
+
1010
+ if f_t is not None and len(f_t) > 0:
1011
+ rgb_2d = _gpu_fill_holes(rgb_2d, depth_out)
1012
+
1013
+ rgb_np = rgb_2d.clamp(0, 255).byte().cpu().numpy()
1014
+ depth_np = depth_out.cpu().numpy().astype(np.float32)
1015
+ return rgb_np, depth_np
1016
+
1017
+
1018
+ def render_erp_from_ply(pts_world: np.ndarray,
1019
+ colors_world,
1020
+ cam_pos: list,
1021
+ cam_rot_euler: list,
1022
+ width: int,
1023
+ height: int,
1024
+ point_size: float = 2.0,
1025
+ faces: np.ndarray = None):
1026
+ """带颜色的 ERP 全景图渲染(自动 dispatch 到 GPU/CPU)
1027
+
1028
+ Args:
1029
+ pts_world: (N,3) float64,Y-up 世界坐标
1030
+ colors_world: (N,3) uint8 或 float32,RGB 颜色,None=伪彩
1031
+ cam_pos: [x, y, z] 相机位置(Y-up)
1032
+ cam_rot_euler: [rx, ry, rz] 相机欧拉角(弧度)
1033
+ width, height: 输出分辨率
1034
+ point_size: splat 直径(像素),仅纯点云模式有效
1035
+ faces: (F,3) int,三角面顶点索引;有面时走面光栅化
1036
+
1037
+ Returns:
1038
+ rgb: (H, W, 3) uint8
1039
+ depth: (H, W) float32,range depth(米),0=无效
1040
+ """
1041
+ cam_pos = np.array(cam_pos, dtype=np.float64)
1042
+ R_cw = _euler_to_rotation_matrix(*cam_rot_euler)
1043
+ R_wc = R_cw.T
1044
+
1045
+ # ── GPU dispatch ──────────────────────────────────────────────────────
1046
+ if _CUDA_AVAILABLE and torch is not None:
1047
+ try:
1048
+ return _render_erp_gpu(pts_world, colors_world,
1049
+ cam_pos, R_wc,
1050
+ width, height,
1051
+ faces=faces, point_size=point_size)
1052
+ except Exception as _gpu_err:
1053
+ print(f" [WARN] GPU 渲染失败,回退 CPU: {_gpu_err}")
1054
+
1055
+ # ── CPU 路径 ──────────────────────────────────────────────────────────
1056
+ # 所有顶点变换到相机坐标系
1057
+ vecs = pts_world - cam_pos
1058
+ pts_cam = (R_wc @ vecs.T).T # (N, 3)
1059
+
1060
+ x_c = pts_cam[:, 0].astype(np.float32)
1061
+ y_c = pts_cam[:, 1].astype(np.float32)
1062
+ z_c = pts_cam[:, 2].astype(np.float32)
1063
+
1064
+ # ERP 投影:每顶点 → (u_f, v_f, dist)
1065
+ lon = np.arctan2(x_c, z_c)
1066
+ r_xz = np.sqrt(x_c ** 2 + z_c ** 2)
1067
+ lat = np.arctan2(y_c, r_xz)
1068
+ u_f = (lon / (2 * math.pi) + 0.5) * width # float 像素坐标
1069
+ v_f = (0.5 - lat / math.pi) * height
1070
+ dist = np.sqrt(x_c ** 2 + y_c ** 2 + z_c ** 2)
1071
+
1072
+ # 顶点颜色
1073
+ if colors_world is not None:
1074
+ col_all = colors_world
1075
+ if col_all.dtype != np.uint8:
1076
+ col_all = (np.clip(col_all, 0, 1) * 255).astype(np.uint8)
1077
+ else:
1078
+ d_norm = np.clip(dist / max(float(dist.max()), 1.0), 0, 1)
1079
+ g = np.clip((1.0 - d_norm) * 200 + 30, 0, 255).astype(np.uint8)
1080
+ col_all = np.stack([g, g, g], axis=1)
1081
+
1082
+ rgb_buf = np.zeros((height, width, 3), dtype=np.uint8)
1083
+ depth_buf = np.full((height, width), np.inf, dtype=np.float32)
1084
+
1085
+ if faces is not None and len(faces) > 0:
1086
+ # ── 网格模式:三角面光栅化 ──────────────────────────────────────
1087
+ f = faces.astype(np.int32)
1088
+ d0, d1, d2 = dist[f[:, 0]], dist[f[:, 1]], dist[f[:, 2]]
1089
+ face_dist = (d0 + d1 + d2) / 3.0
1090
+ valid_face = (d0 > 1e-4) & (d1 > 1e-4) & (d2 > 1e-4)
1091
+ f = f[valid_face]
1092
+ face_dist = face_dist[valid_face]
1093
+
1094
+ # 从远到近排序,近面覆盖远面
1095
+ order = np.argsort(face_dist)[::-1]
1096
+ f = f[order]
1097
+ face_dist = face_dist[order]
1098
+
1099
+ c0s = col_all[f[:, 0]].astype(np.float32)
1100
+ c1s = col_all[f[:, 1]].astype(np.float32)
1101
+ c2s = col_all[f[:, 2]].astype(np.float32)
1102
+ d0s = dist[f[:, 0]]
1103
+ d1s = dist[f[:, 1]]
1104
+ d2s = dist[f[:, 2]]
1105
+
1106
+ # 三顶点的 ERP 浮点坐标
1107
+ u0s_raw = u_f[f[:, 0]]
1108
+ u1s_raw = u_f[f[:, 1]]
1109
+ u2s_raw = u_f[f[:, 2]]
1110
+ v0s = v_f[f[:, 0]]
1111
+ v1s = v_f[f[:, 1]]
1112
+ v2s = v_f[f[:, 2]]
1113
+
1114
+ def _raster_triangle(u0v, v0v, u1v, v1v, u2v, v2v,
1115
+ c0, c1, c2, dep0, dep1, dep2):
1116
+ """将单个三角面光栅化写入 rgb_buf / depth_buf(闭包)"""
1117
+ v_min = max(0, int(math.floor(min(v0v, v1v, v2v))))
1118
+ v_max = min(height - 1, int(math.ceil(max(v0v, v1v, v2v))))
1119
+ u_min = max(0, int(math.floor(min(u0v, u1v, u2v))))
1120
+ u_max = min(width - 1, int(math.ceil(max(u0v, u1v, u2v))))
1121
+ if v_max < v_min or u_max < u_min:
1122
+ return
1123
+ vs_arr = np.arange(v_min, v_max + 1)
1124
+ us_arr = np.arange(u_min, u_max + 1)
1125
+ uu, vv = np.meshgrid(us_arr, vs_arr)
1126
+ uu = uu.flatten().astype(np.float32)
1127
+ vv = vv.flatten().astype(np.float32)
1128
+ denom = ((v1v - v2v) * (u0v - u2v) + (u2v - u1v) * (v0v - v2v))
1129
+ if abs(denom) < 1e-8:
1130
+ return
1131
+ inv_d = 1.0 / denom
1132
+ w0 = ((v1v - v2v) * (uu - u2v) + (u2v - u1v) * (vv - v2v)) * inv_d
1133
+ w1 = ((v2v - v0v) * (uu - u2v) + (u0v - u2v) * (vv - v2v)) * inv_d
1134
+ w2 = 1.0 - w0 - w1
1135
+ inside = (w0 >= -0.01) & (w1 >= -0.01) & (w2 >= -0.01)
1136
+ if not inside.any():
1137
+ return
1138
+ uu_in = uu[inside].astype(np.int32)
1139
+ vv_in = vv[inside].astype(np.int32)
1140
+ w0_in = w0[inside][:, None]
1141
+ w1_in = w1[inside][:, None]
1142
+ w2_in = w2[inside][:, None]
1143
+ di = w0_in[:, 0] * dep0 + w1_in[:, 0] * dep1 + w2_in[:, 0] * dep2
1144
+ ci_rgb = np.clip(w0_in * c0 + w1_in * c1 + w2_in * c2, 0, 255).astype(np.uint8)
1145
+ closer = di < depth_buf[vv_in, uu_in]
1146
+ if closer.any():
1147
+ depth_buf[vv_in[closer], uu_in[closer]] = di[closer]
1148
+ rgb_buf[vv_in[closer], uu_in[closer]] = ci_rgb[closer]
1149
+
1150
+ half_w = width / 2.0
1151
+
1152
+ for i in range(len(f)):
1153
+ u0v, v0v = float(u0s_raw[i]), float(v0s[i])
1154
+ u1v, v1v = float(u1s_raw[i]), float(v1s[i])
1155
+ u2v, v2v = float(u2s_raw[i]), float(v2s[i])
1156
+ c0, c1, c2 = c0s[i], c1s[i], c2s[i]
1157
+ dep0, dep1, dep2 = float(d0s[i]), float(d1s[i]), float(d2s[i])
1158
+
1159
+ # 检测是否跨越 ERP 左右边界(u 坐标差 > width/2)
1160
+ us_tri = np.array([u0v, u1v, u2v])
1161
+ u_span = float(us_tri.max() - us_tri.min())
1162
+
1163
+ if u_span > half_w:
1164
+ # 跨边界:以 u0 为基准,把 u1/u2 对齐到与 u0 最近的循环副本
1165
+ def _align(u_ref, u_other):
1166
+ diff = u_other - u_ref
1167
+ if diff > half_w:
1168
+ return u_other - width
1169
+ elif diff < -half_w:
1170
+ return u_other + width
1171
+ return u_other
1172
+
1173
+ u1_a = _align(u0v, u1v)
1174
+ u2_a = _align(u0v, u2v)
1175
+
1176
+ # 主渲染(对齐后坐标,_raster_triangle 内部 clip 到 [0, width-1])
1177
+ _raster_triangle(u0v, v0v, u1_a, v1v, u2_a, v2v, c0, c1, c2, dep0, dep1, dep2)
1178
+ # 循环副本(偏移 ±width 处理左右两侧黑边)
1179
+ _raster_triangle(u0v - width, v0v, u1_a - width, v1v, u2_a - width, v2v,
1180
+ c0, c1, c2, dep0, dep1, dep2)
1181
+ _raster_triangle(u0v + width, v0v, u1_a + width, v1v, u2_a + width, v2v,
1182
+ c0, c1, c2, dep0, dep1, dep2)
1183
+ else:
1184
+ _raster_triangle(u0v, v0v, u1v, v1v, u2v, v2v, c0, c1, c2, dep0, dep1, dep2)
1185
+
1186
+ else:
1187
+ # ── 纯点云模式:Z-buffer Splatting ────────────────────────────
1188
+ valid_mask = dist > 1e-4
1189
+ u_i = np.clip(u_f.astype(np.int32), 0, width - 1)[valid_mask]
1190
+ v_i = np.clip(v_f.astype(np.int32), 0, height - 1)[valid_mask]
1191
+ dist_v = dist[valid_mask]
1192
+ col_v = col_all[valid_mask]
1193
+
1194
+ order = np.argsort(dist_v)[::-1]
1195
+ radius = max(0, int(round(point_size / 2)) - 1)
1196
+
1197
+ if radius == 0:
1198
+ near_order = np.argsort(dist_v)
1199
+ for idx in near_order:
1200
+ vi, ui, di = v_i[idx], u_i[idx], dist_v[idx]
1201
+ if di < depth_buf[vi, ui]:
1202
+ depth_buf[vi, ui] = di
1203
+ rgb_buf[vi, ui] = col_v[idx]
1204
+ else:
1205
+ for idx in order:
1206
+ vi, ui, di = int(v_i[idx]), int(u_i[idx]), float(dist_v[idx])
1207
+ if not np.isfinite(di):
1208
+ continue
1209
+ v0 = max(0, vi - radius)
1210
+ v1 = min(height, vi + radius + 1)
1211
+ u0 = max(0, ui - radius)
1212
+ u1 = min(width, ui + radius + 1)
1213
+ region = depth_buf[v0:v1, u0:u1]
1214
+ mask = di < region
1215
+ region[mask] = di
1216
+ depth_buf[v0:v1, u0:u1] = region
1217
+ rgb_region = rgb_buf[v0:v1, u0:u1]
1218
+ rgb_region[mask] = col_v[idx]
1219
+ rgb_buf[v0:v1, u0:u1] = rgb_region
1220
+
1221
+ depth_out = np.where(np.isfinite(depth_buf), depth_buf, 0.0).astype(np.float32)
1222
+
1223
+ # ── 空洞填充:迭代最近邻复制,每轮向外扩 1 像素,不修改已有有效像素 ──────
1224
+ if faces is not None and len(faces) > 0:
1225
+ hole_mask = (depth_out == 0) # True=空洞
1226
+ if hole_mask.any():
1227
+ kernel = np.ones((3, 3), np.uint8)
1228
+ valid_u8 = (~hole_mask).astype(np.uint8)
1229
+ for _ in range(32):
1230
+ if not hole_mask.any():
1231
+ break
1232
+ dilated_valid = cv2.dilate(valid_u8, kernel)
1233
+ newly = hole_mask & (dilated_valid > 0)
1234
+ if not newly.any():
1235
+ break
1236
+ for c in range(3):
1237
+ src = rgb_buf[:, :, c]
1238
+ # dilate 在 valid 区域的颜色,传播到邻近空洞(取邻域最大值近似最近邻)
1239
+ expanded = cv2.dilate(src * valid_u8, kernel)
1240
+ rgb_buf[:, :, c] = np.where(newly, expanded, src)
1241
+ hole_mask[newly] = False
1242
+ valid_u8[newly] = 1
1243
+
1244
+ return rgb_buf, depth_out
1245
+
1246
+
1247
+ def _extract_ply_colors(mesh_or_pc) -> Optional[np.ndarray]:
1248
+ """尝试从 trimesh 对象中提取顶点颜色 (N,3) uint8"""
1249
+ try:
1250
+ if isinstance(mesh_or_pc, trimesh.Trimesh):
1251
+ if mesh_or_pc.visual is not None:
1252
+ if hasattr(mesh_or_pc.visual, 'vertex_colors'):
1253
+ vc = mesh_or_pc.visual.vertex_colors
1254
+ if vc is not None and len(vc) > 0:
1255
+ return np.array(vc[:, :3], dtype=np.uint8)
1256
+ elif isinstance(mesh_or_pc, trimesh.PointCloud):
1257
+ if mesh_or_pc.colors is not None and len(mesh_or_pc.colors) > 0:
1258
+ return np.array(mesh_or_pc.colors[:, :3], dtype=np.uint8)
1259
+ except Exception as e:
1260
+ print(f" [WARN] 提取颜色失败: {e}")
1261
+ return None
1262
+
1263
+
1264
+ def save_pose(cam_pos_yup: list, cam_rot_euler: list,
1265
+ output_path: str, frame_id: int):
1266
+ """保存位姿 JSON(ERPT 格式,Y-up 坐标系,cam_to_world)
1267
+
1268
+ cam_pos_yup: [x, y, z],Y-up 世界坐标(已是 ERPT_native)
1269
+ cam_rot_euler: [rx, ry, rz] 弧度,XYZ 顺序
1270
+ """
1271
+ R_cw = _euler_to_rotation_matrix(*cam_rot_euler)
1272
+
1273
+ # 旋转矩阵 → 四元数(XYZW 顺序,转为 WXYZ)
1274
+ # Shepperd 方法
1275
+ m = R_cw
1276
+ t = m[0, 0] + m[1, 1] + m[2, 2]
1277
+ if t > 0:
1278
+ s = 0.5 / math.sqrt(t + 1.0)
1279
+ w = 0.25 / s
1280
+ x = (m[2, 1] - m[1, 2]) * s
1281
+ y = (m[0, 2] - m[2, 0]) * s
1282
+ z = (m[1, 0] - m[0, 1]) * s
1283
+ elif m[0, 0] > m[1, 1] and m[0, 0] > m[2, 2]:
1284
+ s = 2.0 * math.sqrt(1.0 + m[0, 0] - m[1, 1] - m[2, 2])
1285
+ w = (m[2, 1] - m[1, 2]) / s
1286
+ x = 0.25 * s
1287
+ y = (m[0, 1] + m[1, 0]) / s
1288
+ z = (m[0, 2] + m[2, 0]) / s
1289
+ elif m[1, 1] > m[2, 2]:
1290
+ s = 2.0 * math.sqrt(1.0 + m[1, 1] - m[0, 0] - m[2, 2])
1291
+ w = (m[0, 2] - m[2, 0]) / s
1292
+ x = (m[0, 1] + m[1, 0]) / s
1293
+ y = 0.25 * s
1294
+ z = (m[1, 2] + m[2, 1]) / s
1295
+ else:
1296
+ s = 2.0 * math.sqrt(1.0 + m[2, 2] - m[0, 0] - m[1, 1])
1297
+ w = (m[1, 0] - m[0, 1]) / s
1298
+ x = (m[0, 2] + m[2, 0]) / s
1299
+ y = (m[1, 2] + m[2, 1]) / s
1300
+ z = 0.25 * s
1301
+
1302
+ pose_data = {
1303
+ "frame_id": frame_id,
1304
+ "position": [float(v) for v in cam_pos_yup],
1305
+ "rotation_quaternion": [float(w), float(x), float(y), float(z)],
1306
+ "camera_type": "erp_ray",
1307
+ "coordinate_system": "right-handed, Y-up, Z-forward (cam_to_world)",
1308
+ "render_method": "ply_erp",
1309
+ }
1310
+ with open(output_path, 'w') as f:
1311
+ json.dump(pose_data, f, indent=2)
1312
+
1313
+
1314
+ def build_ray_directions(H=WARP_H, W=WARP_W):
1315
+ """ERP 射线方向(Y-up 坐标系)"""
1316
+ i = np.arange(H, dtype=np.float64)
1317
+ j = np.arange(W, dtype=np.float64)
1318
+ lat = np.pi / 2 - np.pi * (i + 0.5) / H # [-π/2, π/2]
1319
+ lon = 2 * np.pi * (j + 0.5) / W # [0, 2π]
1320
+ lat, lon = np.meshgrid(lat, lon, indexing='ij')
1321
+ r_xz = np.cos(lat)
1322
+ return np.stack([
1323
+ r_xz * np.sin(lon), # X
1324
+ np.sin(lat), # Y (up)
1325
+ r_xz * np.cos(lon), # Z (front)
1326
+ ], axis=-1)
1327
+
1328
+
1329
+ _ray_dirs_cache = {}
1330
+
1331
+
1332
+ def get_ray_dirs(H=WARP_H, W=WARP_W):
1333
+ if (H, W) not in _ray_dirs_cache:
1334
+ _ray_dirs_cache[(H, W)] = build_ray_directions(H, W)
1335
+ return _ray_dirs_cache[(H, W)]
1336
+
1337
+
1338
+ def depth_to_3d_points(position, depth, ray_dirs, max_depth=None):
1339
+ valid = depth > 0
1340
+ if max_depth is not None:
1341
+ valid &= (depth <= max_depth)
1342
+ if not np.any(valid):
1343
+ return np.empty((0, 3), dtype=np.float64)
1344
+ pos = np.array(position, dtype=np.float64)
1345
+ return (pos + ray_dirs * depth[..., np.newaxis])[valid]
1346
+
1347
+
1348
+ def project_points_to_coverage(pts, tgt_pos, H=WARP_H, W=WARP_W):
1349
+ if len(pts) == 0:
1350
+ return np.zeros((H, W), dtype=bool)
1351
+ tgt = np.array(tgt_pos, dtype=np.float64)
1352
+ vecs = pts - tgt
1353
+ x, y, z = vecs[:, 0], vecs[:, 1], vecs[:, 2]
1354
+ r_xz = np.sqrt(x ** 2 + z ** 2)
1355
+ lat = np.arctan2(y, r_xz)
1356
+ lon = np.arctan2(x, z) % (2 * np.pi)
1357
+ vi = np.clip(((np.pi / 2 - lat) / np.pi * H).astype(np.int32), 0, H - 1)
1358
+ uj = np.clip((lon / (2 * np.pi) * W).astype(np.int32), 0, W - 1)
1359
+ cov = np.zeros((H, W), dtype=bool)
1360
+ cov[vi, uj] = True
1361
+ pad = cov.copy()
1362
+ pad[1:, :] |= cov[:-1, :]
1363
+ pad[:-1, :] |= cov[1:, :]
1364
+ pad[:, 1:] |= cov[:, :-1]
1365
+ pad[:, :-1] |= cov[:, 1:]
1366
+ return pad
1367
+
1368
+
1369
+ def select_next_frame(candidates, selected_idx, selected_pos, all_pts,
1370
+ reachable=None):
1371
+ n = len(candidates)
1372
+ H, W = WARP_H, WARP_W
1373
+ total_px = H * W
1374
+ overlap_penalty = DEFAULT_OVERLAP_PENALTY
1375
+
1376
+ remaining = []
1377
+ for i in range(n):
1378
+ if i in selected_idx:
1379
+ continue
1380
+ if reachable is not None and i not in reachable:
1381
+ continue
1382
+ remaining.append(i)
1383
+
1384
+ if not remaining:
1385
+ return -1, 0.0, -999.0, 0
1386
+
1387
+ scores = {}
1388
+ for ci in remaining:
1389
+ cov = project_points_to_coverage(all_pts, candidates[ci], H, W)
1390
+ covered = int(np.sum(cov))
1391
+ new_r = (total_px - covered) / total_px
1392
+ ovl_r = covered / total_px
1393
+ scores[ci] = {
1394
+ "gain": new_r,
1395
+ "overlap": ovl_r,
1396
+ "score": new_r - overlap_penalty * ovl_r,
1397
+ }
1398
+
1399
+ best_ci, best_sc, best_g = -1, -999.0, 0.0
1400
+ for ci in remaining:
1401
+ if scores[ci]["score"] > best_sc:
1402
+ best_sc = scores[ci]["score"]
1403
+ best_ci = ci
1404
+ best_g = scores[ci]["gain"]
1405
+
1406
+ return best_ci, best_g, best_sc, len(remaining)
1407
+
1408
+
1409
+ def compute_max_depth(candidates):
1410
+ pos_arr = np.array(candidates)
1411
+ diag = float(np.linalg.norm(pos_arr.max(0) - pos_arr.min(0)))
1412
+ return diag * 1.5
1413
+
1414
+
1415
+ def load_depth_downsampled(path, H=WARP_H, W=WARP_W):
1416
+ d = np.load(path).astype(np.float32)
1417
+ d = np.nan_to_num(d, nan=0.0)
1418
+ if d.shape == (H, W):
1419
+ return d
1420
+ try:
1421
+ return cv2.resize(d, (W, H), interpolation=cv2.INTER_AREA)
1422
+ except Exception:
1423
+ h, w = d.shape
1424
+ bh, bw = h // H, w // W
1425
+ if bh < 1 or bw < 1:
1426
+ r = np.zeros((H, W), dtype=np.float32)
1427
+ r[:min(h, H), :min(w, W)] = d[:min(h, H), :min(w, W)]
1428
+ return r
1429
+ return d[:bh * H, :bw * W].reshape(H, bh, W, bw).mean(axis=(1, 3))
1430
+
1431
+
1432
+ def trim_depth(new_depth, new_pos, existing_pts, ray_dirs):
1433
+ n_orig = int(np.sum(new_depth > 0))
1434
+ if len(existing_pts) == 0:
1435
+ return new_depth.copy(), n_orig, n_orig
1436
+ cov = project_points_to_coverage(existing_pts, new_pos,
1437
+ new_depth.shape[0], new_depth.shape[1])
1438
+ trimmed = new_depth.copy()
1439
+ trimmed[cov] = 0
1440
+ return trimmed, n_orig, int(np.sum(trimmed > 0))
1441
+
1442
+
1443
+ def update_reachability(current_pos: np.ndarray, candidates, selected_idx,
1444
+ reachable: set, raycaster: RayCaster):
1445
+ """从当前位置出发,检测哪些候选点可达(无遮挡直线视线)"""
1446
+ if raycaster._intersector is None:
1447
+ # 非 Mesh:所有候选都"可达"
1448
+ for ci, c in enumerate(candidates):
1449
+ if ci not in selected_idx:
1450
+ reachable.add(ci)
1451
+ return 0
1452
+
1453
+ n_new = 0
1454
+ for ci, cand in enumerate(candidates):
1455
+ if ci in selected_idx or ci in reachable:
1456
+ continue
1457
+ target = np.array(cand, dtype=np.float64)
1458
+ dist_to_target = float(np.linalg.norm(target - current_pos))
1459
+ if dist_to_target < 0.1:
1460
+ reachable.add(ci)
1461
+ n_new += 1
1462
+ continue
1463
+ direction = (target - current_pos) / dist_to_target
1464
+ hit, hit_dist = raycaster.cast_ray(current_pos, direction)
1465
+ if not hit or hit_dist >= dist_to_target * 0.95:
1466
+ reachable.add(ci)
1467
+ n_new += 1
1468
+
1469
+ return n_new
1470
+
1471
+
1472
+ def run_phase2(pts_world: np.ndarray,
1473
+ colors_world,
1474
+ faces,
1475
+ candidates,
1476
+ mesh_center,
1477
+ raycaster: RayCaster,
1478
+ output_dir: str,
1479
+ max_frames: int,
1480
+ resolution,
1481
+ args):
1482
+ """边选帧边渲染主循环(PLY 版本,逻辑与 Blender 版本对齐)"""
1483
+ W_render, H_render = resolution
1484
+ ray_dirs = get_ray_dirs(WARP_H, WARP_W)
1485
+ max_depth = compute_max_depth(candidates)
1486
+
1487
+ selected_idx = set()
1488
+ selected_pos = []
1489
+ all_pts = np.empty((0, 3), dtype=np.float64)
1490
+ pts_chunks = []
1491
+ results = []
1492
+ reachable = set()
1493
+
1494
+ stop_score = args.stop_score
1495
+ stop_delta = args.stop_delta
1496
+ min_frames = args.min_frames
1497
+
1498
+ ACTUAL_GAIN_WINDOW = 3
1499
+ ACTUAL_GAIN_FLOOR = args.stop_gain
1500
+ actual_gain_history = []
1501
+ delta_history = []
1502
+
1503
+ # ── 异步 I/O 线程池:写盘与 GPU 渲染并行 ─────────────────────────────
1504
+ _io_executor = ThreadPoolExecutor(max_workers=2)
1505
+ _pending_io = [] # list of Future,用于等待上一帧写盘完成
1506
+
1507
+ def _save_frame_async(rgb, depth, rgb_path, depth_npy, pos, cam_rot,
1508
+ pose_path, frame_id):
1509
+ """在线程池中异步保存 PNG + npy + json"""
1510
+ def _do_save():
1511
+ cv2.imwrite(rgb_path, cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR))
1512
+ np.save(depth_npy, depth.astype(np.float32))
1513
+ save_pose(pos, cam_rot, pose_path, frame_id)
1514
+ return _io_executor.submit(_do_save)
1515
+ consecutive_skips = 0
1516
+ MAX_CONSECUTIVE_SKIPS = 3
1517
+
1518
+ # 楼层分组(按 Y 坐标聚类)
1519
+ y_vals = sorted(set(round(c[1], 2) for c in candidates))
1520
+ floors = [[y_vals[0]]]
1521
+ for y in y_vals[1:]:
1522
+ if y - floors[-1][-1] > 1.0:
1523
+ floors.append([y])
1524
+ else:
1525
+ floors[-1].append(y)
1526
+
1527
+ n_floors = len(floors)
1528
+ floor_mids = [sum(f) / len(f) for f in floors]
1529
+ candidate_floor = [
1530
+ min(range(n_floors), key=lambda i: abs(c[1] - floor_mids[i]))
1531
+ for c in candidates
1532
+ ]
1533
+ current_floor = 0
1534
+
1535
+ def floor_set(fi):
1536
+ return set(i for i, f in enumerate(candidate_floor) if f == fi)
1537
+
1538
+ floor_names = [f"楼层{i+1}(Y={min(f):.1f}~{max(f):.1f})" for i, f in enumerate(floors)]
1539
+
1540
+ print(f"\n{'='*60}")
1541
+ print(f"[Phase 2] 边渲边选 (候选={len(candidates)}, 最多={max_frames}帧)")
1542
+ print(f"{'='*60}")
1543
+ print(f" {n_floors} 个楼层: {floor_names}")
1544
+
1545
+ t_total = time.time()
1546
+ time_select = time_render = time_depth = time_reach = 0.0
1547
+
1548
+ for frame_count in range(max_frames):
1549
+
1550
+ # ---- 选位置 ----
1551
+ t_sel = time.time()
1552
+ if frame_count == 0:
1553
+ floor0_cands = [(i, c) for i, c in enumerate(candidates)
1554
+ if candidate_floor[i] == 0]
1555
+ if floor0_cands:
1556
+ f0_pts = np.array([c for _, c in floor0_cands])
1557
+ xz_center = np.array([f0_pts[:, 0].mean(), f0_pts[:, 2].mean()])
1558
+ floor0_ys = sorted(set(c[1] for _, c in floor0_cands))
1559
+ y_target = min(floor0_ys) + 1.2
1560
+ target = np.array([xz_center[0], y_target, xz_center[1]])
1561
+ dists = [np.linalg.norm(np.array(c) - target)
1562
+ for _, c in floor0_cands]
1563
+ ci = floor0_cands[int(np.argmin(dists))][0]
1564
+ else:
1565
+ mc = np.array(mesh_center, dtype=np.float64)
1566
+ ci = int(np.argmin([np.linalg.norm(np.array(c) - mc)
1567
+ for c in candidates]))
1568
+ gain, score = 1.0, 1.0
1569
+ print(f"\n F{frame_count}: 选候选[{ci}] (楼层中心, Y={candidates[ci][1]:.2f}m) "
1570
+ f"[{floor_names[current_floor]}]")
1571
+ else:
1572
+ cur_floor_ids = floor_set(current_floor)
1573
+ floor_reachable = reachable & cur_floor_ids if reachable else set()
1574
+
1575
+ ci, gain, score, n_remain = select_next_frame(
1576
+ candidates, selected_idx, selected_pos, all_pts,
1577
+ reachable=floor_reachable if floor_reachable else cur_floor_ids)
1578
+
1579
+ expand = False
1580
+ if ci < 0 or score < stop_score:
1581
+ ci2, gain2, score2, n2 = select_next_frame(
1582
+ candidates, selected_idx, selected_pos, all_pts,
1583
+ reachable=cur_floor_ids)
1584
+ if ci2 >= 0 and (ci < 0 or score2 > score):
1585
+ ci, gain, score, n_remain = ci2, gain2, score2, n2
1586
+ expand = True
1587
+
1588
+ if ci < 0 or (score < stop_score and gain < ACTUAL_GAIN_FLOOR):
1589
+ reason = "无可选候选" if ci < 0 else f"gain={gain:.1%} score={score:.3f}"
1590
+ current_floor += 1
1591
+ if current_floor < n_floors:
1592
+ print(f"\n F{frame_count}: {reason}"
1593
+ f" → 切换到 {floor_names[current_floor]}")
1594
+ continue
1595
+ else:
1596
+ print(f"\n F{frame_count}: {reason} → 所有楼层拍满,停止")
1597
+ break
1598
+
1599
+ tag = "[扩展]" if expand else ""
1600
+ print(f"\n F{frame_count}: 选候选[{ci}] "
1601
+ f"gain={gain:.1%} score={score:.3f} 剩余={n_remain}"
1602
+ f" [Y={candidates[ci][1]:.2f} {floor_names[current_floor]}]{tag}")
1603
+
1604
+ pos = candidates[ci]
1605
+ selected_idx.add(ci)
1606
+ selected_pos.append(pos)
1607
+ dt_sel = time.time() - t_sel
1608
+ time_select += dt_sel
1609
+
1610
+ # ---- 渲染(GPU 共享场景数据,减少重复上传)----
1611
+ cam_rot = get_camera_rot(args.rotation_type, frame_count)
1612
+ base = f"panorama_{frame_count:04d}"
1613
+ rgb_path = os.path.join(output_dir, f"{base}.png")
1614
+ depth_npy = os.path.join(output_dir, f"{base}_depth.npy")
1615
+ pose_path = os.path.join(output_dir, f"pose_{frame_count:04d}.json")
1616
+
1617
+ print(f" 位置: [{pos[0]:.2f}, {pos[1]:.2f}, {pos[2]:.2f}]")
1618
+ print(f" 渲染...", end="", flush=True)
1619
+ t_r = time.time()
1620
+
1621
+ # 等待上一帧异步 I/O 完成(保证不超过 2 帧待写)
1622
+ while len(_pending_io) >= 2:
1623
+ _pending_io.pop(0).result()
1624
+
1625
+ # 使用 render_erp_batch_gpu:场景数据只上传一次
1626
+ batch_results = render_erp_batch_gpu(
1627
+ pts_world, colors_world,
1628
+ cam_poses=[pos],
1629
+ cam_rots=[cam_rot],
1630
+ width=W_render,
1631
+ height=H_render,
1632
+ faces=faces,
1633
+ point_size=args.point_size,
1634
+ )
1635
+ rgb, depth = batch_results[0]
1636
+
1637
+ # 异步写盘(与下一帧 GPU 渲染并行)
1638
+ fut = _save_frame_async(
1639
+ rgb, depth, rgb_path, depth_npy,
1640
+ pos, cam_rot, pose_path, frame_count)
1641
+ _pending_io.append(fut)
1642
+
1643
+ dt_r = time.time() - t_r
1644
+ time_render += dt_r
1645
+ print(f" {dt_r:.1f}s")
1646
+
1647
+ # ---- depth → 3D 点云 ----
1648
+ t_dep = time.time()
1649
+ actual_gain = 1.0
1650
+ delta_ratio = 1.0
1651
+
1652
+ # 直接使用内存中的 depth(depth_npy 由异步线程写入,可能尚未落盘)
1653
+ _d = depth.astype(np.float32)
1654
+ _d = np.nan_to_num(_d, nan=0.0)
1655
+ if _d.shape == (WARP_H, WARP_W):
1656
+ depth_small = _d
1657
+ else:
1658
+ try:
1659
+ depth_small = cv2.resize(_d, (WARP_W, WARP_H), interpolation=cv2.INTER_AREA)
1660
+ except Exception:
1661
+ h, w = _d.shape
1662
+ bh, bw = h // WARP_H, w // WARP_W
1663
+ if bh < 1 or bw < 1:
1664
+ depth_small = np.zeros((WARP_H, WARP_W), dtype=np.float32)
1665
+ depth_small[:min(h, WARP_H), :min(w, WARP_W)] = _d[:min(h, WARP_H), :min(w, WARP_W)]
1666
+ else:
1667
+ depth_small = _d[:bh*WARP_H, :bw*WARP_W].reshape(WARP_H, bh, WARP_W, bw).mean(axis=(1, 3))
1668
+ total_px = WARP_H * WARP_W
1669
+ n_valid = int(np.sum(depth_small > 0))
1670
+ valid_ratio = n_valid / total_px
1671
+
1672
+ if frame_count == 0:
1673
+ new_pts = depth_to_3d_points(pos, depth_small, ray_dirs, max_depth)
1674
+ pts_chunks.append(new_pts)
1675
+ all_pts = new_pts
1676
+ actual_gain = valid_ratio
1677
+ print(f" depth: {n_valid}px ({valid_ratio:.0%} 有效)"
1678
+ f" → {len(new_pts)} 个 3D 点 (全部)")
1679
+ else:
1680
+ MIN_VALID_RATIO = 0.10 # PLY 点云空洞较多,阈值适当降低
1681
+ if valid_ratio < MIN_VALID_RATIO:
1682
+ print(f" depth: {n_valid}px ({valid_ratio:.0%} 有效) < "
1683
+ f"{MIN_VALID_RATIO:.0%} → 跳过此帧")
1684
+ results.append({
1685
+ "frame_id": frame_count,
1686
+ "candidate_idx": ci,
1687
+ "position": pos,
1688
+ "gain": float(gain),
1689
+ "actual_gain": 0.0,
1690
+ "delta_ratio": 0.0,
1691
+ "score": float(score),
1692
+ "skipped": True,
1693
+ "skip_reason": f"valid_ratio={valid_ratio:.1%}",
1694
+ })
1695
+ for fp in [rgb_path, depth_npy]:
1696
+ if os.path.exists(fp):
1697
+ try:
1698
+ os.remove(fp)
1699
+ except OSError:
1700
+ pass
1701
+ consecutive_skips += 1
1702
+ if consecutive_skips >= MAX_CONSECUTIVE_SKIPS:
1703
+ current_floor += 1
1704
+ consecutive_skips = 0
1705
+ if current_floor < n_floors:
1706
+ print(f" 连续 {MAX_CONSECUTIVE_SKIPS} 帧空洞"
1707
+ f" → 切换到 {floor_names[current_floor]}")
1708
+ else:
1709
+ print(f" 连续 {MAX_CONSECUTIVE_SKIPS} 帧空洞,停止")
1710
+ break
1711
+ time_depth += time.time() - t_dep
1712
+ continue
1713
+
1714
+ trimmed, n_orig, n_new = trim_depth(depth_small, pos, all_pts, ray_dirs)
1715
+ new_pts = depth_to_3d_points(pos, trimmed, ray_dirs, max_depth)
1716
+ pts_chunks.append(new_pts)
1717
+ all_pts = np.concatenate(pts_chunks)
1718
+ actual_gain = n_new / total_px
1719
+ delta_ratio = len(new_pts) / len(all_pts) if len(all_pts) > 0 else 1.0
1720
+ print(f" depth: {n_valid}px ({valid_ratio:.0%} 有效)"
1721
+ f" → trim → {n_new}px 新增 → {len(new_pts)} 个新 3D 点")
1722
+ print(f" 累积点云: {len(all_pts)}, 实际gain: {actual_gain:.1%}")
1723
+ consecutive_skips = 0
1724
+
1725
+ time_depth += time.time() - t_dep
1726
+
1727
+ results.append({
1728
+ "frame_id": frame_count,
1729
+ "candidate_idx": ci,
1730
+ "position": pos,
1731
+ "gain": float(gain),
1732
+ "actual_gain": float(actual_gain),
1733
+ "delta_ratio": float(delta_ratio),
1734
+ "score": float(score),
1735
+ })
1736
+
1737
+ # ---- 可达性更新 ----
1738
+ t_reach = time.time()
1739
+ n_new_r = update_reachability(
1740
+ np.array(pos), candidates, selected_idx, reachable, raycaster)
1741
+ dt_reach = time.time() - t_reach
1742
+ time_reach += dt_reach
1743
+ print(f" [可达性] 新增 {n_new_r} 个,总 {len(reachable)}/{len(candidates)} "
1744
+ f"({dt_reach:.1f}s)")
1745
+
1746
+ # ---- 停止条件 ----
1747
+ if frame_count > 0:
1748
+ actual_gain_history.append(actual_gain)
1749
+ delta_history.append(delta_ratio)
1750
+
1751
+ if frame_count > 0 and frame_count >= min_frames:
1752
+ if len(actual_gain_history) >= ACTUAL_GAIN_WINDOW:
1753
+ recent_gain = actual_gain_history[-ACTUAL_GAIN_WINDOW:]
1754
+ recent_delta = delta_history[-ACTUAL_GAIN_WINDOW:]
1755
+ gain_exhausted = all(g < ACTUAL_GAIN_FLOOR for g in recent_gain)
1756
+ delta_exhausted = all(d < stop_delta for d in recent_delta)
1757
+
1758
+ if gain_exhausted or delta_exhausted:
1759
+ current_floor += 1
1760
+ if current_floor < n_floors:
1761
+ reason = (f"gain<{ACTUAL_GAIN_FLOOR:.0%}" if gain_exhausted
1762
+ else f"delta<{stop_delta:.0%}")
1763
+ print(f" 连续 {ACTUAL_GAIN_WINDOW} 帧 {reason}"
1764
+ f" → 切换到 {floor_names[current_floor]}")
1765
+ else:
1766
+ print(f" 所有楼层拍满,停止")
1767
+ break
1768
+
1769
+ # 补帧:确保 4n+1
1770
+ while len(results) > 1 and (len(results) - 1) % 4 != 0:
1771
+ frame_count = results[-1]["frame_id"] + 1
1772
+ if frame_count >= max_frames + 3:
1773
+ break
1774
+ print(f"\n [补帧] 当前 {len(results)} 帧,需补至 4n+1")
1775
+ ci, gain, score, n_remain = select_next_frame(
1776
+ candidates, selected_idx, selected_pos, all_pts, reachable=None)
1777
+ if ci < 0:
1778
+ break
1779
+
1780
+ pos = candidates[ci]
1781
+ selected_idx.add(ci)
1782
+ selected_pos.append(pos)
1783
+
1784
+ cam_rot = get_camera_rot(args.rotation_type, frame_count)
1785
+ base = f"panorama_{frame_count:04d}"
1786
+ rgb_path = os.path.join(output_dir, f"{base}.png")
1787
+ depth_npy = os.path.join(output_dir, f"{base}_depth.npy")
1788
+
1789
+ batch_res = render_erp_batch_gpu(
1790
+ pts_world, colors_world,
1791
+ cam_poses=[pos], cam_rots=[cam_rot],
1792
+ width=W_render, height=H_render,
1793
+ faces=faces, point_size=args.point_size,
1794
+ )
1795
+ rgb, depth = batch_res[0]
1796
+ try:
1797
+ cv2.imwrite(rgb_path, cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR))
1798
+ except Exception:
1799
+ from PIL import Image
1800
+ Image.fromarray(rgb).save(rgb_path)
1801
+ np.save(depth_npy, depth.astype(np.float32))
1802
+ save_pose(pos, cam_rot, os.path.join(output_dir, f"pose_{frame_count:04d}.json"),
1803
+ frame_count)
1804
+
1805
+ depth_small = load_depth_downsampled(depth_npy, WARP_H, WARP_W)
1806
+ trimmed, n_orig, n_new = trim_depth(depth_small, pos, all_pts, ray_dirs)
1807
+ new_pts = depth_to_3d_points(pos, trimmed, ray_dirs, max_depth)
1808
+ pts_chunks.append(new_pts)
1809
+ all_pts = np.concatenate(pts_chunks)
1810
+ actual_gain = n_new / total_px
1811
+ results.append({
1812
+ "frame_id": frame_count,
1813
+ "candidate_idx": ci,
1814
+ "position": pos,
1815
+ "gain": float(gain),
1816
+ "actual_gain": float(actual_gain),
1817
+ "delta_ratio": float(len(new_pts) / max(len(all_pts), 1)),
1818
+ "score": float(score),
1819
+ "supplementary": True,
1820
+ })
1821
+ print(f" 补帧 F{frame_count}: gain={actual_gain:.1%}")
1822
+
1823
+ # ── 等待所有异步 I/O 完成,关闭线程池 ────────────────────────────────
1824
+ for fut in _pending_io:
1825
+ try:
1826
+ fut.result()
1827
+ except Exception as e:
1828
+ print(f" [WARN] 异步写盘失败: {e}")
1829
+ _io_executor.shutdown(wait=False)
1830
+
1831
+ dt = time.time() - t_total
1832
+ print(f"\n {'─'*50}")
1833
+ print(f" 共 {len(results)} 帧, {dt:.1f}s ({dt/60:.1f}min)")
1834
+ print(f" 耗时: 选帧={time_select:.1f}s 渲染={time_render:.1f}s "
1835
+ f"深度={time_depth:.1f}s 可达={time_reach:.1f}s")
1836
+ return results
1837
+
1838
+
1839
+ def main():
1840
+ args = parse_args()
1841
+
1842
+ ply_path = str(Path(args.ply).resolve())
1843
+ output_dir = str(Path(args.output_dir).resolve())
1844
+ resolution = tuple(int(x) for x in args.resolution.split(","))
1845
+ os.makedirs(output_dir, exist_ok=True)
1846
+ sel_dir = os.path.join(output_dir, "frame_selection")
1847
+ os.makedirs(sel_dir, exist_ok=True)
1848
+
1849
+ print("=" * 60)
1850
+ print("ERPT PLY Pipeline(边渲边选)")
1851
+ print("=" * 60)
1852
+ print(f" PLY: {ply_path}")
1853
+ print(f" Output: {output_dir}")
1854
+ print(f" Max frames: {args.num_frames}")
1855
+ print(f" Resolution: {resolution[0]}x{resolution[1]}")
1856
+ t_start = time.time()
1857
+
1858
+ # ===== Phase 0: 加载场景 =====
1859
+ mesh_obj, pts_world, bmin, bmax, is_mesh, faces = load_ply_scene(
1860
+ ply_path, z_up=args.z_up)
1861
+
1862
+ # 提取颜色
1863
+ colors_world = _extract_ply_colors(mesh_obj)
1864
+ if colors_world is not None:
1865
+ print(f" 颜色: {len(colors_world)} 个顶点颜色")
1866
+ else:
1867
+ print(f" 颜色: 无顶点颜色,使用深度伪彩")
1868
+
1869
+ # 构建射线检测器
1870
+ raycaster = RayCaster(mesh_obj, pts_world, bmin, bmax, is_mesh, z_up=args.z_up)
1871
+
1872
+ # ===== Phase 1: 撒点 + 过滤 =====
1873
+ print(f"\n{'='*60}")
1874
+ print("[Phase 1] 多层撒点 + 7 层过滤")
1875
+ print(f"{'='*60}")
1876
+
1877
+ # Y-up 坐标系:Y=高度,floor=bmin[1],ceiling=bmax[1]
1878
+ floor_y = float(bmin[1])
1879
+ ceiling_y = float(bmax[1])
1880
+ print(f" 场景 Y 范围: {floor_y:.2f} ~ {ceiling_y:.2f}m (总高 {ceiling_y-floor_y:.2f}m)")
1881
+
1882
+ heights = compute_camera_heights(floor_y, ceiling_y, args.camera_height)
1883
+ print(f" 相机高度层: {[f'{h:.2f}m' for h in heights]}")
1884
+
1885
+ x_range = float(bmax[0] - bmin[0])
1886
+ z_range = float(bmax[2] - bmin[2])
1887
+ x_sp = args.grid_spacing
1888
+ z_sp = args.grid_spacing
1889
+
1890
+ # 候选点过多时自适应增大间距
1891
+ n_xy = max(1, int((x_range - 2 * MARGIN) / x_sp)) * \
1892
+ max(1, int((z_range - 2 * MARGIN) / z_sp))
1893
+ total_est = n_xy * len(heights)
1894
+ if total_est > 10000:
1895
+ scale = math.sqrt(total_est / 10000)
1896
+ x_sp = min(x_sp * scale, x_range / 4)
1897
+ z_sp = min(z_sp * scale, z_range / 4)
1898
+ print(f" [自适应] 候选≈{total_est}个,间距调整为 X={x_sp:.1f}m Z={z_sp:.1f}m")
1899
+
1900
+ candidates = generate_candidate_grid(bmin, bmax, x_sp, z_sp, heights)
1901
+ if not candidates:
1902
+ print(" [Error] 没有候选点")
1903
+ sys.exit(1)
1904
+
1905
+ room_height = ceiling_y - floor_y
1906
+ candidates = raycast_filter(candidates, raycaster, room_height)
1907
+ if not candidates:
1908
+ print(" [Warning] 全部被过滤,使用 AABB 中心")
1909
+ cx = float((bmin[0] + bmax[0]) / 2)
1910
+ cy = float(heights[0])
1911
+ cz = float((bmin[2] + bmax[2]) / 2)
1912
+ candidates = [[cx, cy, cz]]
1913
+
1914
+ np.save(os.path.join(sel_dir, "candidates_filtered.npy"),
1915
+ np.array(candidates))
1916
+ print(f" 最终候选点: {len(candidates)} 个")
1917
+
1918
+ mesh_center = [
1919
+ float((bmin[0] + bmax[0]) / 2),
1920
+ float((bmin[1] + bmax[1]) / 2),
1921
+ float((bmin[2] + bmax[2]) / 2),
1922
+ ]
1923
+
1924
+ # ===== Phase 2: 边渲边选 =====
1925
+ results = run_phase2(
1926
+ pts_world=pts_world,
1927
+ colors_world=colors_world,
1928
+ faces=faces,
1929
+ candidates=candidates,
1930
+ mesh_center=mesh_center,
1931
+ raycaster=raycaster,
1932
+ output_dir=output_dir,
1933
+ max_frames=args.num_frames,
1934
+ resolution=resolution,
1935
+ args=args,
1936
+ )
1937
+
1938
+ # ===== 保存选帧摘要 =====
1939
+ summary = {
1940
+ "scene": os.path.basename(ply_path),
1941
+ "total_frames": len(results),
1942
+ "candidates_count": len(candidates),
1943
+ "frames": [{
1944
+ "frame_id": r["frame_id"],
1945
+ "position": r["position"],
1946
+ "gain": r["gain"],
1947
+ "actual_gain": r["actual_gain"],
1948
+ "delta_ratio": r["delta_ratio"],
1949
+ "score": r["score"],
1950
+ } for r in results if not r.get("skipped")],
1951
+ }
1952
+ with open(os.path.join(sel_dir, "selected_frames.json"), "w") as f:
1953
+ json.dump(summary, f, indent=2, ensure_ascii=False)
1954
+
1955
+ dt = time.time() - t_start
1956
+ print(f"\n{'='*60}")
1957
+ print(f"完成! {len(results)} 帧, {dt:.1f}s ({dt/60:.1f}min)")
1958
+ print(f"{'='*60}")
1959
+ print(f"输出目录: {output_dir}/")
1960
+ for r in results:
1961
+ if not r.get("skipped"):
1962
+ fid = r["frame_id"]
1963
+ print(f" panorama_{fid:04d}.png + _depth.npy + pose_{fid:04d}.json")
1964
+
1965
+
1966
+ if __name__ == "__main__":
1967
+ main()
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy>=1.24
2
+ scipy
3
+ pandas
4
+ pyyaml
5
+ pillow
6
+ matplotlib
7
+ scikit-learn
8
+ torch
9
+ torchvision
10
+ opencv-python
11
+ open3d
12
+ trimesh
13
+ tqdm
14
+ jsonschema
15
+
results/README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Results Directory
2
+
3
+ Generated CSV summaries should be written here or under `outputs/<run_name>/results/`.
4
+
5
+ Recommended filenames:
6
+
7
+ - `coverage_main.csv`
8
+ - `oracle_validation.csv`
9
+ - `lambda_sweep.csv`
10
+ - `cross_source.csv`
11
+ - `audit_50_frames.csv`
12
+ - `runtime_scaling.csv`
13
+
scripts/_common.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import binascii
4
+ import csv
5
+ import json
6
+ import math
7
+ import shutil
8
+ import struct
9
+ import zlib
10
+ from pathlib import Path
11
+ from typing import Any, Iterable
12
+
13
+
14
+ REPO_ROOT = Path(__file__).resolve().parents[1]
15
+
16
+
17
+ def ensure_dir(path: str | Path) -> Path:
18
+ out = Path(path)
19
+ out.mkdir(parents=True, exist_ok=True)
20
+ return out
21
+
22
+
23
+ def read_json(path: str | Path) -> Any:
24
+ with Path(path).open("r", encoding="utf-8") as f:
25
+ return json.load(f)
26
+
27
+
28
+ def write_json(path: str | Path, obj: Any) -> None:
29
+ path = Path(path)
30
+ ensure_dir(path.parent)
31
+ with path.open("w", encoding="utf-8") as f:
32
+ json.dump(obj, f, indent=2, sort_keys=True)
33
+ f.write("\n")
34
+
35
+
36
+ def read_jsonl(path: str | Path) -> list[dict[str, Any]]:
37
+ rows: list[dict[str, Any]] = []
38
+ with Path(path).open("r", encoding="utf-8") as f:
39
+ for line_no, line in enumerate(f, start=1):
40
+ line = line.strip()
41
+ if not line:
42
+ continue
43
+ try:
44
+ rows.append(json.loads(line))
45
+ except json.JSONDecodeError as exc:
46
+ raise ValueError(f"Invalid JSONL at {path}:{line_no}: {exc}") from exc
47
+ return rows
48
+
49
+
50
+ def write_jsonl(path: str | Path, rows: Iterable[dict[str, Any]]) -> None:
51
+ path = Path(path)
52
+ ensure_dir(path.parent)
53
+ with path.open("w", encoding="utf-8") as f:
54
+ for row in rows:
55
+ f.write(json.dumps(row, sort_keys=True))
56
+ f.write("\n")
57
+
58
+
59
+ def write_csv(path: str | Path, rows: list[dict[str, Any]], fieldnames: list[str] | None = None) -> None:
60
+ path = Path(path)
61
+ ensure_dir(path.parent)
62
+ if fieldnames is None:
63
+ keys: list[str] = []
64
+ for row in rows:
65
+ for key in row:
66
+ if key not in keys:
67
+ keys.append(key)
68
+ fieldnames = keys
69
+ with path.open("w", encoding="utf-8", newline="") as f:
70
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
71
+ writer.writeheader()
72
+ for row in rows:
73
+ writer.writerow(row)
74
+
75
+
76
+ def _table_fieldnames(rows: list[dict[str, Any]], fieldnames: list[str] | None = None) -> list[str]:
77
+ if fieldnames is not None:
78
+ return fieldnames
79
+ keys: list[str] = []
80
+ for row in rows:
81
+ for key in row:
82
+ if key not in keys:
83
+ keys.append(key)
84
+ return keys
85
+
86
+
87
+ def write_markdown_table(path: str | Path, rows: list[dict[str, Any]], fieldnames: list[str] | None = None) -> None:
88
+ path = Path(path)
89
+ ensure_dir(path.parent)
90
+ fieldnames = _table_fieldnames(rows, fieldnames)
91
+ with path.open("w", encoding="utf-8") as f:
92
+ f.write("| " + " | ".join(fieldnames) + " |\n")
93
+ f.write("| " + " | ".join(["---"] * len(fieldnames)) + " |\n")
94
+ for row in rows:
95
+ f.write("| " + " | ".join(str(row.get(name, "")) for name in fieldnames) + " |\n")
96
+
97
+
98
+ def _latex_escape(value: Any) -> str:
99
+ text = str(value)
100
+ return (
101
+ text.replace("\\", "\\textbackslash{}")
102
+ .replace("&", "\\&")
103
+ .replace("%", "\\%")
104
+ .replace("$", "\\$")
105
+ .replace("#", "\\#")
106
+ .replace("_", "\\_")
107
+ .replace("{", "\\{")
108
+ .replace("}", "\\}")
109
+ )
110
+
111
+
112
+ def write_latex_table(
113
+ path: str | Path,
114
+ rows: list[dict[str, Any]],
115
+ fieldnames: list[str] | None = None,
116
+ caption: str = "Table-ready experiment results.",
117
+ label: str = "tab:cmevs_results",
118
+ ) -> None:
119
+ path = Path(path)
120
+ ensure_dir(path.parent)
121
+ fieldnames = _table_fieldnames(rows, fieldnames)
122
+ align = "l" * len(fieldnames)
123
+ with path.open("w", encoding="utf-8") as f:
124
+ f.write("\\begin{table}[t]\n")
125
+ f.write("\\centering\n")
126
+ f.write(f"\\caption{{{_latex_escape(caption)}}}\n")
127
+ safe_label = str(label).replace("{", "").replace("}", "")
128
+ f.write(f"\\label{{{safe_label}}}\n")
129
+ f.write(f"\\begin{{tabular}}{{{align}}}\n")
130
+ f.write("\\toprule\n")
131
+ f.write(" & ".join(_latex_escape(name) for name in fieldnames) + " \\\\\n")
132
+ f.write("\\midrule\n")
133
+ for row in rows:
134
+ f.write(" & ".join(_latex_escape(row.get(name, "")) for name in fieldnames) + " \\\\\n")
135
+ f.write("\\bottomrule\n")
136
+ f.write("\\end{tabular}\n")
137
+ f.write("\\end{table}\n")
138
+
139
+
140
+ def copy_file(src: str | Path, dst: str | Path) -> None:
141
+ dst = Path(dst)
142
+ ensure_dir(dst.parent)
143
+ shutil.copy2(src, dst)
144
+
145
+
146
+ def candidate_by_id(candidates: Iterable[dict[str, Any]]) -> dict[str, dict[str, Any]]:
147
+ return {str(row["candidate_id"]): row for row in candidates}
148
+
149
+
150
+ def valid_candidates(candidates: Iterable[dict[str, Any]]) -> list[dict[str, Any]]:
151
+ return [row for row in candidates if bool(row.get("valid", True))]
152
+
153
+
154
+ def cell_set(candidate: dict[str, Any]) -> set[str]:
155
+ return {str(cell) for cell in candidate.get("covered_cells", [])}
156
+
157
+
158
+ def universe_cells(candidates: Iterable[dict[str, Any]]) -> set[str]:
159
+ cells: set[str] = set()
160
+ for candidate in candidates:
161
+ if bool(candidate.get("valid", True)):
162
+ cells.update(cell_set(candidate))
163
+ return cells
164
+
165
+
166
+ def selected_ids(selected_doc: dict[str, Any]) -> list[str]:
167
+ return [str(row["candidate_id"]) for row in selected_doc.get("selected_viewpoints", [])]
168
+
169
+
170
+ def safe_div(num: float, den: float) -> float:
171
+ return 0.0 if den == 0 else num / den
172
+
173
+
174
+ def pearson(xs: list[float], ys: list[float]) -> float:
175
+ if len(xs) != len(ys) or len(xs) < 2:
176
+ return float("nan")
177
+ mx = sum(xs) / len(xs)
178
+ my = sum(ys) / len(ys)
179
+ num = sum((x - mx) * (y - my) for x, y in zip(xs, ys))
180
+ vx = sum((x - mx) ** 2 for x in xs)
181
+ vy = sum((y - my) ** 2 for y in ys)
182
+ if vx <= 0.0 or vy <= 0.0:
183
+ return float("nan")
184
+ return num / math.sqrt(vx * vy)
185
+
186
+
187
+ def _png_chunk(kind: bytes, payload: bytes) -> bytes:
188
+ return (
189
+ struct.pack(">I", len(payload))
190
+ + kind
191
+ + payload
192
+ + struct.pack(">I", binascii.crc32(kind + payload) & 0xFFFFFFFF)
193
+ )
194
+
195
+
196
+ def write_solid_png(path: str | Path, width: int, height: int, rgb: tuple[int, int, int]) -> None:
197
+ path = Path(path)
198
+ ensure_dir(path.parent)
199
+ raw = bytearray()
200
+ row = bytes(rgb) * width
201
+ for _ in range(height):
202
+ raw.append(0)
203
+ raw.extend(row)
204
+ ihdr = struct.pack(">IIBBBBB", width, height, 8, 2, 0, 0, 0)
205
+ data = zlib.compress(bytes(raw), level=9)
206
+ with path.open("wb") as f:
207
+ f.write(b"\x89PNG\r\n\x1a\n")
208
+ f.write(_png_chunk(b"IHDR", ihdr))
209
+ f.write(_png_chunk(b"IDAT", data))
210
+ f.write(_png_chunk(b"IEND", b""))
211
+
212
+
213
+ def write_npy_f4(path: str | Path, height: int, width: int, value: float) -> None:
214
+ path = Path(path)
215
+ ensure_dir(path.parent)
216
+ header = "{'descr': '<f4', 'fortran_order': False, 'shape': (%d, %d), }" % (height, width)
217
+ header_bytes = header.encode("latin1")
218
+ prefix_len = 6 + 2 + 2
219
+ padding = 16 - ((prefix_len + len(header_bytes) + 1) % 16)
220
+ header_bytes += b" " * padding + b"\n"
221
+ row = struct.pack("<" + "f" * width, *([float(value)] * width))
222
+ with path.open("wb") as f:
223
+ f.write(b"\x93NUMPY")
224
+ f.write(b"\x01\x00")
225
+ f.write(struct.pack("<H", len(header_bytes)))
226
+ f.write(header_bytes)
227
+ for _ in range(height):
228
+ f.write(row)
scripts/audit_quality.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ from pathlib import Path
6
+
7
+ from _common import read_json, write_csv
8
+
9
+
10
+ def parse_args() -> argparse.Namespace:
11
+ parser = argparse.ArgumentParser(description="Audit generated CM-EVS render artifacts.")
12
+ parser.add_argument("--render-dir", type=Path, required=True)
13
+ parser.add_argument("--metadata", type=Path, required=True)
14
+ parser.add_argument("--output", type=Path, required=True)
15
+ parser.add_argument("--limit", type=int, default=50)
16
+ return parser.parse_args()
17
+
18
+
19
+ def has_magic(path: Path, magic: bytes) -> bool:
20
+ if not path.exists():
21
+ return False
22
+ with path.open("rb") as f:
23
+ return f.read(len(magic)) == magic
24
+
25
+
26
+ def main() -> None:
27
+ args = parse_args()
28
+ selected_doc = read_json(args.metadata)
29
+ rows = []
30
+ for item in selected_doc.get("selected_viewpoints", [])[: args.limit]:
31
+ cid = str(item["candidate_id"])
32
+ rank = int(item["rank"])
33
+ stem = f"{rank:03d}_{cid}"
34
+ rgb = args.render_dir / f"{stem}_rgb.png"
35
+ depth = args.render_dir / f"{stem}_depth.npy"
36
+ pose = args.render_dir / f"{stem}_pose.json"
37
+ pose_ok = False
38
+ if pose.exists():
39
+ try:
40
+ read_json(pose)
41
+ pose_ok = True
42
+ except Exception:
43
+ pose_ok = False
44
+ rows.append(
45
+ {
46
+ "candidate_id": cid,
47
+ "rank": rank,
48
+ "rgb_exists": rgb.exists(),
49
+ "rgb_png_magic": has_magic(rgb, b"\x89PNG\r\n\x1a\n"),
50
+ "depth_exists": depth.exists(),
51
+ "depth_npy_magic": has_magic(depth, b"\x93NUMPY"),
52
+ "pose_exists": pose.exists(),
53
+ "pose_json_valid": pose_ok,
54
+ "passed": rgb.exists() and depth.exists() and pose_ok,
55
+ }
56
+ )
57
+ write_csv(args.output, rows)
58
+ print(f"Wrote {args.output}")
59
+
60
+
61
+ if __name__ == "__main__":
62
+ main()