diff --git a/code/download_checkpoints.py b/code/download_checkpoints.py index a94cd2e..e1aa327 100644 --- a/code/download_checkpoints.py +++ b/code/download_checkpoints.py @@ -1,7 +1,10 @@ +import argparse import os # os.environ["HF_ENDPOINT"] = 'https://hf-mirror.com' -from huggingface_hub import hf_hub_download, snapshot_download +from huggingface_hub import hf_hub_download, snapshot_download, login +# Set HF_TOKEN environment variable before running this script, e.g.: +# export HF_TOKEN=hf_... def download_ckpt(local_dir, repo_id, filename): os.makedirs(local_dir, exist_ok=True) @@ -15,15 +18,61 @@ def download_ckpt(local_dir, repo_id, filename): print(f"File has been downloaded to: {file_path}") else: print(f"File exists already: {local_path}") + + +def download_snapshot(local_dir, repo_id): + if os.path.isdir(local_dir) and os.listdir(local_dir): + print(f"Already exists: {local_dir}") + return + os.makedirs(local_dir, exist_ok=True) + print(f"\nDownloading snapshot {repo_id} → {local_dir}...\n") + snapshot_download(repo_id, local_dir=local_dir) + print(f"Done: {local_dir}") + + +parser = argparse.ArgumentParser() +parser.add_argument("--resolution", choices=["720p", "480p", "both"], default="720p", + help="Which Wan2.1-I2V model to download (720p, 480p, or both)") +parser.add_argument("--skip_wan", action="store_true", + help="Skip the large Wan2.1-I2V model download") +parser.add_argument("--token", default=os.environ.get("HF_TOKEN"), + help="HuggingFace token (falls back to HF_TOKEN env var)") +args = parser.parse_args() + +if args.token: + login(token=args.token) + os.makedirs("./checkpoints", exist_ok=True) -repo_id_list = ["Ruicheng/moge-vitl","Iceclear/StableSR","Iceclear/StableSR","Skywork/Matrix-3D","Skywork/Matrix-3D","Skywork/Matrix-3D","Skywork/Matrix-3D","Skywork/Matrix-3D"] -filename_list = ["model.pt","stablesr_turbo.ckpt","vqgan_cfw_00011.ckpt","checkpoints/text2panoimage_lora.safetensors","checkpoints/pano_lrm_480p.pt","checkpoints/pano_video_gen_480p.ckpt","checkpoints/pano_video_gen_720p.bin","checkpoints/pano_video_gen_720p_5b.safetensors"] -local_dir_list = ["./checkpoints/moge","./checkpoints/StableSR","./checkpoints/StableSR","./checkpoints/flux_lora","./checkpoints/pano_lrm","./checkpoints/Wan-AI/wan_lora","./checkpoints/Wan-AI/wan_lora","./checkpoints/Wan-AI/wan_lora"] - -N = len(repo_id_list) -for i in range(N): - repo_id = repo_id_list[i] - filename = filename_list[i] - local_dir = local_dir_list[i] - print(f"\nDownloading {filename} from {repo_id} to local folder {local_dir}...\n") + +# Small checkpoints from HF +repo_id_list = ["Ruicheng/moge-vitl", "Iceclear/StableSR", "Iceclear/StableSR", + "Skywork/Matrix-3D", "Skywork/Matrix-3D", "Skywork/Matrix-3D", + "Skywork/Matrix-3D", "Skywork/Matrix-3D"] +filename_list = ["model.pt", "stablesr_turbo.ckpt", "vqgan_cfw_00011.ckpt", + "checkpoints/text2panoimage_lora.safetensors", "checkpoints/pano_lrm_480p.pt", + "checkpoints/pano_video_gen_480p.ckpt", "checkpoints/pano_video_gen_720p.bin", + "checkpoints/pano_video_gen_720p_5b.safetensors"] +local_dir_list = ["./checkpoints/moge", "./checkpoints/StableSR", "./checkpoints/StableSR", + "./checkpoints/flux_lora", "./checkpoints/pano_lrm", + "./checkpoints/Wan-AI/wan_lora", "./checkpoints/Wan-AI/wan_lora", + "./checkpoints/Wan-AI/wan_lora"] + +for repo_id, filename, local_dir in zip(repo_id_list, filename_list, local_dir_list): + print(f"\nDownloading {filename} from {repo_id} → {local_dir}...\n") download_ckpt(local_dir, repo_id, filename) + +# VideoLLaMA3-7B (prompt generation for i2p mode) — to HF cache +print("\nDownloading DAMO-NLP-SG/VideoLLaMA3-7B to HF cache...\n") +snapshot_download("DAMO-NLP-SG/VideoLLaMA3-7B") + +# FLUX.1-Fill-dev (gated — requires HF token + accepted terms) +# Downloaded to HF cache (not local_dir) so from_pretrained() finds it automatically +print("\nDownloading black-forest-labs/FLUX.1-Fill-dev to HF cache...\n") +snapshot_download("black-forest-labs/FLUX.1-Fill-dev") + +# Large Wan2.1-I2V base model +if not args.skip_wan: + if args.resolution in ("720p", "both"): + download_snapshot("./checkpoints/Wan-AI/Wan2.1-I2V-14B-720P", "Wan-AI/Wan2.1-I2V-14B-720P") + if args.resolution in ("480p", "both"): + download_snapshot("./checkpoints/Wan-AI/Wan2.1-I2V-14B-480P", "Wan-AI/Wan2.1-I2V-14B-480P") diff --git a/code/pano_init/src/worldgen/pano_gen.py b/code/pano_init/src/worldgen/pano_gen.py index e3c6a06..0e9d79f 100644 --- a/code/pano_init/src/worldgen/pano_gen.py +++ b/code/pano_init/src/worldgen/pano_gen.py @@ -20,7 +20,11 @@ def build_pano_gen_model(lora_path=None, device="cuda"): def build_pano_fill_model(lora_path=None, device="cuda:0"): if lora_path is None: lora_path = hf_hub_download(repo_id="LeoXie/WorldGen", filename=f"models--WorldGen-Flux-Lora/worldgen_img2scene.safetensors") - pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16, device=device) + pipe = FluxFillPipeline.from_pretrained( + "black-forest-labs/FLUX.1-Fill-dev", + dtype=torch.bfloat16, + low_cpu_mem_usage=True, + ) print(f"Loading LoRA weights from: {lora_path}") pipe.load_lora_weights(lora_path) diff --git a/code/panoramic_image_generation.py b/code/panoramic_image_generation.py index 73d2ca0..5633318 100644 --- a/code/panoramic_image_generation.py +++ b/code/panoramic_image_generation.py @@ -13,7 +13,7 @@ def create_output_dir(base_path: str, prefix: str = "example") -> str: os.makedirs(base_path, exist_ok=True) max_num = 0 for dirname in os.listdir(base_path): - match = re.match(f"{prefix}(\d+)", dirname) + match = re.match(f"{prefix}(\\d+)", dirname) if match: max_num = max(max_num, int(match.group(1))) new_dir = f"{prefix}{max_num + 1}" diff --git a/code/panoramic_image_to_video.py b/code/panoramic_image_to_video.py index 78d6f77..fe93cac 100644 --- a/code/panoramic_image_to_video.py +++ b/code/panoramic_image_to_video.py @@ -360,7 +360,7 @@ def main(args): if not use_5b_model: #vid_path, mask_path,text, - tgt_resolution = (1440,720) if is_720p else (960,480) + tgt_resolution = (960,720) if is_720p else (960,480) #dset = TextVideoDataset(vid_path = os.path.join(condition_dir,"rendered_rgb.mp4"), mask_path = os.path.join(condition_dir,"rendered_mask.mp4"), text=prompt) # (self, vid_path, mask_path,text, max_num_frames=81, frame_interval=1, num_frames=81, height=720, width=1440, is_i2v=True): dset = TextVideoDataset(vid_path = os.path.join(condition_dir,"rendered_rgb.mp4"), mask_path = os.path.join(condition_dir,"rendered_mask.mp4"), text=prompt, height=tgt_resolution[1],width=tgt_resolution[0]) @@ -373,8 +373,8 @@ def main(args): prompt=prompt+" The video is of high quality, and the view is very clear. High quality, masterpiece, best quality, highres, ultra-detailed, fantastic.", negative_prompt="The video is not of a high quality, it has a low resolution. Distortion. strange artifacts.", cfg_scale=5.0, - num_frames=81, - num_inference_steps=50, + num_frames=161, + num_inference_steps=5, seed=seed, tiled=True, height=tgt_resolution[1], width=tgt_resolution[0], @@ -397,7 +397,7 @@ def main(args): dset = VideoDataset( #base_path="/", metadata_path="/datasets_3d/zhongqi.yang/matrix3d_inference/dataset/metadata_1k.csv", base_path="/", metadata_path=None, - num_frames=81, + num_frames=161, time_division_factor=4, time_division_remainder=1, max_pixels=height*width, height=height, width=width, height_division_factor=16, width_division_factor=16, @@ -416,12 +416,12 @@ def main(args): seed=120, tiled=True, height=height, width=width, input_image=cases["video"][0], - num_frames=81, + num_frames=161, cond_video = (cases["cond_video"]), cond_mask = (cases["cond_mask"]), ) - # the original resolution of 5b model is actually [704,1408], in order to be unified with latter steps, we resize the output to [720,1440]. - video = [img.resize((1440,720)) for img in video_ori] + # the original resolution of 5b model is actually [704,1408], in order to be unified with latter steps, we resize the output to [960,720]. + video = [img.resize((960,720)) for img in video_ori] if dist.get_rank() == 0: generated_dir = os.path.join(case_dir,"generated") diff --git a/code/vbench_batch.py b/code/vbench_batch.py new file mode 100644 index 0000000..fe7eb7f --- /dev/null +++ b/code/vbench_batch.py @@ -0,0 +1,233 @@ +""" +VBench batch generation for Matrix-3D. +Runs the two-step pipeline (i2p panorama → video) for all scenery/indoor prompts. + +Usage (from Matrix-3D root): + python code/vbench_batch.py [--output_dir output/vbench/videos] [--num_samples 5] [--seed 0] [--resolution 720] +""" +import argparse +import csv +import json +import os +import random +import glob as _glob +import re +import shutil +import subprocess +import sys +import time + +import psutil +import torch + +_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +_ROOT_DIR = os.path.dirname(_SCRIPT_DIR) +_VBENCH_DATA = os.path.join(_ROOT_DIR, "..", "VBench", "vbench2_beta_i2v", "vbench2_beta_i2v", "data") +_DEFAULT_JSON = os.path.join(_VBENCH_DATA, "i2v-bench-info.json") +_DEFAULT_CROP = os.path.join(_VBENCH_DATA, "crop", "1-1") +_CATEGORIES = {"scenery", "indoor"} + + +def _safe(text): + return re.sub(r'[<>:"/\\|?*]', "_", text)[:150] + + +def _fmt_duration(secs): + h, m, s = int(secs // 3600), int(secs % 3600 // 60), int(secs % 60) + return f"{h:02d}h{m:02d}m{s:02d}s" + + +def _sys_stats(): + vm = psutil.virtual_memory() + ram_used = vm.used / 1024**3 + ram_total = vm.total / 1024**3 + if torch.cuda.is_available(): + gpu_used = torch.cuda.memory_allocated() / 1024**3 + gpu_total = torch.cuda.get_device_properties(0).total_memory / 1024**3 + else: + gpu_used = gpu_total = 0.0 + return ram_used, ram_total, gpu_used, gpu_total + + +def run(cmd, cwd=None): + result = subprocess.run(cmd, cwd=cwd or _ROOT_DIR) + return result.returncode + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--output_dir", default=os.path.join(_ROOT_DIR, "output", "vbench", "videos")) + parser.add_argument("--num_samples", type=int, default=5) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--resolution", type=int, default=720) + parser.add_argument("--vbench_json", default=_DEFAULT_JSON) + parser.add_argument("--crop_dir", default=_DEFAULT_CROP) + args = parser.parse_args() + + info_json = os.path.abspath(args.vbench_json) + crop_dir = os.path.abspath(args.crop_dir) + out_dir = os.path.abspath(args.output_dir) + os.makedirs(out_dir, exist_ok=True) + + stats_path = os.path.join(os.path.dirname(out_dir), "vbench_gen_stats.csv") + stats_is_new = not os.path.exists(stats_path) + stats_f = open(stats_path, "a", newline="", encoding="utf-8") + stats_w = csv.writer(stats_f) + if stats_is_new: + stats_w.writerow(["timestamp", "task_idx", "prompt", "sample_idx", "seed", "duration_s", + "video_count", "total_elapsed_s", "avg_s_per_video", + "ram_used_gb", "ram_total_gb", "gpu_used_gb", "gpu_total_gb", + "out_path", "status"]) + + ram_total_gb = psutil.virtual_memory().total / 1024**3 + gpu_total_gb = torch.cuda.get_device_properties(0).total_memory / 1024**3 if torch.cuda.is_available() else 0.0 + + with open(info_json, encoding="utf-8") as f: + entries = json.load(f) + + seen, prompts = set(), [] + for e in entries: + name = e["file_name"] + if name in seen: + continue + if e.get("type") not in _CATEGORIES: + continue + seen.add(name) + caption = e.get("caption", os.path.splitext(name)[0]) + prompts.append((name, caption)) + + total = len(prompts) * args.num_samples + print(f"{'='*70}") + print(f"[vbench] Matrix-3D VBench batch") + print(f"[vbench] {len(prompts)} prompts x {args.num_samples} samples = {total} videos") + print(f"[vbench] categories: {sorted(_CATEGORIES)} resolution: {args.resolution}p") + print(f"[vbench] output → {out_dir}") + print(f"[vbench] stats → {stats_path}") + print(f"{'='*70}") + + done = skipped = generated = errors = 0 + ok_total_s = 0.0 + t_start = time.time() + + for task_idx, (image_name, caption) in enumerate(prompts): + image_path = os.path.join(crop_dir, image_name) + if not os.path.isfile(image_path): + print(f"[vbench] SKIP: image not found — {image_path}") + continue + + for sample_idx in range(args.num_samples): + seed = random.randint(0, 2**31 - 1) + out_path = os.path.join(out_dir, f"{_safe(caption)}-{sample_idx}-{seed}.mp4") + + # ── header ────────────────────────────────────────────────────── + elapsed = time.time() - t_start + pct = 100 * done / total if total else 0 + eta_str = "" + avg_str = "" + if generated > 0: + avg_s = ok_total_s / generated + remaining = (total - done) * avg_s + eta_str = f" ETA {_fmt_duration(remaining)}" + avg_str = f" avg {avg_s/60:.1f} min/video" + print(f"\n{'─'*70}") + print(f"[vbench] [{done+1}/{total} {pct:.0f}%{eta_str}{avg_str}] elapsed {_fmt_duration(elapsed)}") + print(f"[vbench] prompt {task_idx+1}/{len(prompts)} sample {sample_idx+1}/{args.num_samples} seed {seed}") + print(f"[vbench] {caption[:70]}") + + existing = _glob.glob(os.path.join(out_dir, f"{_safe(caption)}-{sample_idx}-*.mp4")) + if existing: + out_path = existing[0] + print(f"[vbench] → SKIP (already exists)") + skipped += 1 + done += 1 + stats_w.writerow([time.strftime("%Y-%m-%dT%H:%M:%S"), task_idx, caption, sample_idx, seed, + "", generated, f"{elapsed:.1f}", "", + "", ram_total_gb, "", gpu_total_gb, out_path, "skipped"]) + stats_f.flush() + continue + + work_dir = os.path.join(_ROOT_DIR, "output", "vbench", "_work", f"{task_idx}_{sample_idx}") + os.makedirs(work_dir, exist_ok=True) + + try: + st = time.time() + + # Step 1: image → panorama + print(f"[vbench] step 1/2 panorama generation …") + t1 = time.time() + rc = run([ + sys.executable, "code/panoramic_image_generation.py", + "--mode=i2p", + f"--input_image_path={image_path}", + f"--output_path={work_dir}", + f"--seed={seed}", + ]) + if rc != 0: + raise RuntimeError(f"panoramic_image_generation.py exited with code {rc}") + print(f"[vbench] step 1/2 done ({time.time()-t1:.0f}s)") + + # Step 2: panorama → video + print(f"[vbench] step 2/2 video generation …") + t2 = time.time() + rc = run([ + sys.executable, "code/panoramic_image_to_video.py", + f"--inout_dir={work_dir}", + f"--resolution={args.resolution}", + f"--seed={seed}", + ]) + if rc != 0: + raise RuntimeError(f"panoramic_image_to_video.py exited with code {rc}") + print(f"[vbench] step 2/2 done ({time.time()-t2:.0f}s)") + + generated_mp4 = os.path.join(work_dir, "generated", "generated.mp4") + if not os.path.exists(generated_mp4): + raise RuntimeError(f"output video not found: {generated_mp4}") + + shutil.copy2(generated_mp4, out_path) + shutil.rmtree(work_dir, ignore_errors=True) + + ed = time.time() + duration = ed - st + ok_total_s += duration + generated += 1 + + ram_used, _, gpu_used, _ = _sys_stats() + total_elapsed = ed - t_start + avg_s_per = ok_total_s / generated + + print(f"[vbench] ✓ saved {os.path.basename(out_path)}") + print(f"[vbench] duration {_fmt_duration(duration)} | avg {avg_s_per/60:.1f} min/video") + print(f"[vbench] RAM {ram_used:.1f}/{ram_total_gb:.0f} GB | GPU {gpu_used:.1f}/{gpu_total_gb:.0f} GB") + + stats_w.writerow([time.strftime("%Y-%m-%dT%H:%M:%S"), task_idx, caption, sample_idx, seed, + f"{duration:.1f}", generated, f"{total_elapsed:.1f}", f"{avg_s_per:.1f}", + f"{ram_used:.2f}", f"{ram_total_gb:.2f}", f"{gpu_used:.2f}", f"{gpu_total_gb:.2f}", + out_path, "ok"]) + stats_f.flush() + + except Exception as exc: + print(f"[vbench] ✗ ERROR: {exc}") + ram_used, _, gpu_used, _ = _sys_stats() + stats_w.writerow([time.strftime("%Y-%m-%dT%H:%M:%S"), task_idx, caption, sample_idx, seed, + "", generated, f"{time.time()-t_start:.1f}", "", + f"{ram_used:.2f}", f"{ram_total_gb:.2f}", f"{gpu_used:.2f}", f"{gpu_total_gb:.2f}", + out_path, "error"]) + stats_f.flush() + errors += 1 + + done += 1 + + elapsed_total = time.time() - t_start + stats_f.close() + print(f"\n{'='*70}") + print(f"[vbench] DONE generated={generated} skipped={skipped} errors={errors}") + print(f"[vbench] total elapsed: {_fmt_duration(elapsed_total)}") + if generated: + print(f"[vbench] avg per video: {ok_total_s/generated/60:.1f} min") + print(f"[vbench] videos → {out_dir}") + print(f"[vbench] stats → {stats_path}") + print(f"{'='*70}") + + +if __name__ == "__main__": + main() diff --git a/generate.bat b/generate.bat new file mode 100644 index 0000000..f3405a5 --- /dev/null +++ b/generate.bat @@ -0,0 +1,33 @@ +@echo off +setlocal +set "CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" +set "PATH=%CUDA_HOME%\bin;%CUDA_HOME%\libnvvp;%PATH%" +set "USE_LIBUV=0" + +set "output_dir=output\example1" + +REM Step1: text to panorama image +python code/panoramic_image_generation.py ^ + --mode=t2p ^ + --prompt="a medieval village, half-timbered houses, cobblestone streets, lush greenery, clear blue sky, detailed textures, vibrant colors, high resolution" ^ + --output_path="%output_dir%" + +REM Or you can choose image to panorama image generation +REM python code/panoramic_image_generation.py ^ +REM --mode=i2p ^ +REM --input_image_path="./data/image2.jpg" ^ +REM --output_path="%output_dir%" + +REM Step2: panorama image to video generation +set "VISIBLE_GPU_NUM=1" +torchrun --nproc_per_node %VISIBLE_GPU_NUM% code/panoramic_image_to_video.py ^ + --inout_dir="%output_dir%" ^ + --resolution=720 + +REM Step3: 3d scene extraction +python code/panoramic_video_to_3DScene.py ^ + --inout_dir="%output_dir%" ^ + --resolution=720 + +echo ✅ Generation pipeline commands completed. +endlocal diff --git a/generate_vbench.bat b/generate_vbench.bat new file mode 100644 index 0000000..67b19db --- /dev/null +++ b/generate_vbench.bat @@ -0,0 +1,15 @@ +@echo off +setlocal +set "CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" +set "PATH=%CUDA_HOME%\bin;%CUDA_HOME%\libnvvp;%PATH%" +set "USE_LIBUV=0" +set "HF_TOKEN=c3876e09447f08f0aebb615ede24f61030b14b32" + +python code/vbench_batch.py ^ + --output_dir=output\vbench\videos ^ + --num_samples=5 ^ + --seed=0 ^ + --resolution=720 + +echo Generation completed. +endlocal diff --git a/install.bat b/install.bat new file mode 100644 index 0000000..456363d --- /dev/null +++ b/install.bat @@ -0,0 +1,86 @@ +@echo off +setlocal +set "CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" +set "PATH=%CUDA_HOME%\bin;%CUDA_HOME%\libnvvp;%PATH%" +set "PYTHONNOUSERSITE=1" +set "PIP_USE_PEP517=0" + +echo ✅ Installing Submodules... + +pushd submodules\nvdiffrast || goto :error +pip install . || goto :error +popd + +pushd submodules\simple-knn || goto :error +python setup.py install +if errorlevel 1 ( + echo [WARN] simple-knn native build failed on this Windows environment. Continuing without it. +) +popd + +pip install git+https://github.com/rmurai0610/diff-gaussian-rasterization-w-pose.git +if errorlevel 1 ( + echo [WARN] diff-gaussian-rasterization-w-pose build failed on this environment. Continuing without it. +) + +if not exist submodules\ODGS ( + git clone https://github.com/esw0116/ODGS.git submodules\ODGS || goto :error +) + +pushd submodules\ODGS || goto :error +pip install submodules/odgs-gaussian-rasterization +if errorlevel 1 ( + echo [WARN] odgs-gaussian-rasterization build failed. Continuing without ODGS native rasterization. +) +popd + +pushd code\DiffSynth-Studio || goto :error +echo ✅ Installing DiffSynth-Studio... +python setup.py develop +if errorlevel 1 ( + echo [WARN] setup.py develop failed, trying setup.py install... + python setup.py install || goto :error +) +popd + +echo ✅ Installing Python dependencies... +pip install "numpy<2" "opencv-python<4.11" || goto :error +pip install plyfile decord ffmpeg trimesh pyrender xfuser diffusers open3d py360convert || goto :error +pip install "git+https://github.com/facebookresearch/pytorch3d.git@v0.7.7" || goto :error +pip install peft easydict torchsde "open-clip-torch==2.7.0" fairscale natsort || goto :error +pip install realesrgan || goto :error +pip install "flash-attn==2.7.4.post1" --no-build-isolation || goto :error +pip install git+https://github.com/EasternJournalist/utils3d.git#egg=utils3d || goto :error +pip install "xformers==0.0.31" || goto :error +pip install "jaxtyping==0.3.2" || goto :error +pip install "modelscope==1.28.2" || goto :error +pip install "diffusers==0.35.1" || goto :error +pip install "matplotlib==3.8.4" || goto :error +pip install "transformers==4.56.0" || goto :error +pip install "torchmetrics==0.7.0" || goto :error +pip install "OmegaConf==2.1.1" || goto :error +pip install "imageio-ffmpeg==0.6.0" || goto :error +pip install "pytorch-lightning==1.4.2" || goto :error +pip install "omegaconf==2.1.1" || goto :error +pip install "webdataset==0.2.5" || goto :error +pip install "kornia==0.6" || goto :error +pip install "streamlit==1.12.1" || goto :error +pip install "einops==0.8.0" || goto :error +pip install open_clip_torch || goto :error +pip install "SwissArmyTransformer==0.4.12" || goto :error +pip install "wandb==0.21.1" || goto :error +pip install -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers || goto :error +pip uninstall -y basicsr || goto :error +pip install openai-clip || goto :error + +echo ✅ All dependencies installed successfully. + +echo ✅ Downloading model checkpoints... +python code\download_checkpoints.py || goto :error + +echo ✅ All done. +goto :eof + +:error +echo ❌ Installation failed. Aborting. +exit /b 1 diff --git a/install.sh b/install.sh index a292fd6..b2f8c52 100644 --- a/install.sh +++ b/install.sh @@ -20,28 +20,28 @@ cd .. echo "✅ Installing Python dependencies..." pip install plyfile decord ffmpeg trimesh pyrender xfuser diffusers open3d py360convert pip install "git+https://github.com/facebookresearch/pytorch3d.git@v0.7.7" -pip install peft easydict torchsde open-clip-torch==2.7.0 fairscale natsort +pip install peft easydict torchsde open-clip-torch>=2.7.0 fairscale natsort pip install realesrgan #Version >3.7 and <3.9 -pip install flash-attn==2.7.4.post1 --no-build-isolation +pip install flash-attn>=2.7.4.post1 --no-build-isolation pip install git+https://github.com/EasternJournalist/utils3d.git#egg=utils3d -pip install xformers==0.0.31 -pip install jaxtyping==0.3.2 -pip install modelscope==1.28.2 -pip install diffusers==0.34.0 -pip install matplotlib==3.8.4 -pip install transformers==4.56.0 -pip install torchmetrics==0.7.0 -pip install OmegaConf==2.1.1 -pip install imageio-ffmpeg==0.6.0 -pip install pytorch-lightning==1.4.2 -pip install omegaconf==2.1.1 -pip install webdataset==0.2.5 -pip install kornia==0.6 -pip install streamlit==1.12.1 -pip install einops==0.8.0 +pip install xformers>=0.0.31 +pip install jaxtyping>=0.3.2 +pip install modelscope>=1.28.2 +pip install diffusers>=0.34.0 +pip install matplotlib>=3.8.4 +pip install transformers>=4.56.0 +pip install torchmetrics>=0.7.0 +pip install OmegaConf>=2.1.1 +pip install imageio-ffmpeg>=0.6.0 +pip install pytorch-lightning>=1.4.2 +pip install omegaconf>=2.1.1 +pip install webdataset>=0.2.5 +pip install kornia>=0.6 +pip install streamlit>=1.12.1 +pip install einops>=0.8.0 pip install open_clip_torch -pip install SwissArmyTransformer==0.4.12 -pip install wandb==0.21.1 +pip install SwissArmyTransformer>=0.4.12 +pip install wandb>=0.21.1 pip install -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers pip uninstall basicsr pip install openai-clip diff --git a/submodules/simple-knn/setup.py b/submodules/simple-knn/setup.py index 580d2bd..3926212 100644 --- a/submodules/simple-knn/setup.py +++ b/submodules/simple-knn/setup.py @@ -12,11 +12,68 @@ from setuptools import setup from torch.utils.cpp_extension import CUDAExtension, BuildExtension import os +import glob cxx_compiler_flags = [] +nvcc_flags = [] if os.name == 'nt': - cxx_compiler_flags.append("/wd4624") + # setuptools/distutils on Windows uses its own registry-based MSVC detection + # (ignoring PATH) and picks x86 cl.exe because plat_name='win32'. This causes + # ptr_size=4, missing _WIN64/_M_X64, broken intrinsics, and linker x86 lib paths. + # + # Fix: + # DISTUTILS_USE_SDK=1 + MSSdk=1 → distutils uses cl.exe from PATH instead of + # its own vcvars-based detection. + # x64 cl.exe prepended to PATH → both distutils and NVCC find the right binary. + # --compiler-bindir in nvcc_flags → NVCC uses x64 cl.exe explicitly (belt+suspenders). + # LIB env var prepended with x64 paths → linker finds x64 CRT/UCRT/UM symbols. + + _x64_cl_dir = None + for pattern in [ + "C:/Program Files/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/HostX64/x64", + "C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/HostX64/x64", + ]: + cl_dirs = sorted(glob.glob(pattern)) + if cl_dirs: + _x64_cl_dir = cl_dirs[-1] + break + + if _x64_cl_dir: + # Make distutils use our PATH cl.exe, not registry-detected x86. + os.environ['DISTUTILS_USE_SDK'] = '1' + os.environ['MSSdk'] = '1' + os.environ['PATH'] = _x64_cl_dir + os.pathsep + os.environ.get('PATH', '') + + # Prepend x64 lib dirs so linker finds x64 CRT/UCRT/UM (distutils injects x86 ones). + msvc_x64_lib = _x64_cl_dir.split('bin')[0] + 'lib/x64' + x64_libs = [msvc_x64_lib] + for sdk_pattern in ["C:/Program Files (x86)/Windows Kits/10/lib/*/ucrt/x64"]: + sdk_ucrt_dirs = sorted(glob.glob(sdk_pattern)) + if sdk_ucrt_dirs: + ucrt_x64 = sdk_ucrt_dirs[-1] + um_x64 = ucrt_x64.replace('/ucrt/', '/um/') + x64_libs += [ucrt_x64, um_x64] + break + existing_lib = os.environ.get('LIB', '') + os.environ['LIB'] = os.pathsep.join(x64_libs) + (os.pathsep + existing_lib if existing_lib else '') + + cxx_compiler_flags = [ + "/wd4624", + "/Zc:alignedNew-", + "/std:c++17", + "/Zc:__cplusplus", + ] + + nvcc_flags = [ + # Explicitly point NVCC at the x64 cl.exe so it doesn't pick an x86 one from PATH. + # _WIN64 and _M_X64 are then auto-defined by cl.exe itself; we do NOT add -D_WIN64 + # manually because doing so without _M_X64 triggers __faststorefence undefined errors. + *([f"--compiler-bindir={_x64_cl_dir}"] if _x64_cl_dir else []), + "-Xcompiler", "/Zc:alignedNew-", + "-Xcompiler", "/std:c++17", + "-Xcompiler", "/Zc:__cplusplus", + ] setup( name="simple_knn", @@ -24,10 +81,10 @@ CUDAExtension( name="simple_knn._C", sources=[ - "spatial.cu", + "spatial.cu", "simple_knn.cu", "ext.cpp"], - extra_compile_args={"nvcc": [], "cxx": cxx_compiler_flags}) + extra_compile_args={"nvcc": nvcc_flags, "cxx": cxx_compiler_flags}) ], cmdclass={ 'build_ext': BuildExtension