diff --git a/Cargo.lock b/Cargo.lock index 4c745f0..a42f6e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1479,6 +1479,7 @@ dependencies = [ "textwrap", "time", "url", + "wheel", "which", "windows-registry", ] @@ -2078,6 +2079,7 @@ dependencies = [ "python-platform", "rayon", "regex", + "resolver", "rfc2047-decoder", "rstest", "rust-ini", @@ -2091,6 +2093,7 @@ dependencies = [ "url", "version-ranges", "walkdir", + "wheel", "zip", ] @@ -2119,13 +2122,17 @@ dependencies = [ "itertools 0.14.0", "log", "logging", + "logging_timer", "owo-colors", + "pep508_rs", "pex", "platform", "python-platform", "python-proxy", "rayon", + "repackage", "request", + "resolver", "scripts", "serde_json", "sha2 0.11.0", @@ -2134,6 +2141,7 @@ dependencies = [ "tempfile", "url", "walkdir", + "wheel", "zip", "zstd", ] @@ -2565,6 +2573,25 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" +[[package]] +name = "repackage" +version = "0.0.0" +dependencies = [ + "anyhow", + "chrono", + "fs-err", + "logging_timer", + "ouroboros", + "pex", + "platform", + "rayon", + "serde", + "serde_json", + "walkdir", + "wheel", + "zip", +] + [[package]] name = "request" version = "0.0.0" @@ -2615,6 +2642,23 @@ dependencies = [ "web-sys", ] +[[package]] +name = "resolver" +version = "0.0.0" +dependencies = [ + "anyhow", + "dashmap", + "indexmap 2.14.0", + "logging_timer", + "pep440_rs", + "pep508_rs", + "python-platform", + "regex", + "url", + "version-ranges", + "wheel", +] + [[package]] name = "rfc2047-decoder" version = "1.1.2" @@ -3472,6 +3516,8 @@ dependencies = [ "python-platform", "rayon", "regex", + "repackage", + "resolver", "scripts", "serde", "serde_json", @@ -3664,6 +3710,7 @@ dependencies = [ "python-platform", "python-proxy", "rayon", + "resolver", "rstest", "rust-ini", "scripts", @@ -3672,6 +3719,7 @@ dependencies = [ "tempfile", "testing", "walkdir", + "wheel", "zip", ] @@ -3851,6 +3899,31 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "wheel" +version = "0.0.0" +dependencies = [ + "anyhow", + "chrono", + "csv", + "fs-err", + "glob", + "mailparse", + "ouroboros", + "pep440_rs", + "pep508_rs", + "python-pkginfo", + "regex", + "rfc2047-decoder", + "rstest", + "rust-ini", + "serde", + "serde_json", + "testing", + "url", + "zip", +] + [[package]] name = "which" version = "8.0.2" diff --git a/Cargo.toml b/Cargo.toml index 4221965..46c4439 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,12 +25,15 @@ members = [ "crates/platform", "crates/python-platform", "crates/python-proxy", + "crates/repackage", "crates/request", + "crates/resolver", "crates/scripts", "crates/target", "crates/testing", "crates/tools", - "crates/venv" + "crates/venv", + "crates/wheel" ] [package.metadata.build] @@ -245,13 +248,17 @@ interpreter = { path = "crates/interpreter" } itertools = { workspace = true } log = { workspace = true } logging = { path = "crates/logging" } +logging_timer = { workspace = true } owo-colors = { workspace = true } +pep508_rs = { workspace = true } pex = { path = "crates/pex" } platform = { path = "crates/platform" } python-platform = { path = "crates/python-platform" } python-proxy = { path = "crates/python-proxy" } rayon = { workspace = true } request = { path = "crates/request" } +repackage = { path = "crates/repackage" } +resolver = { path = "crates/resolver" } scripts = { path = "crates/scripts", features = ["embedded"] } serde_json = { workspace = true } sha2 = { workspace = true } @@ -260,5 +267,6 @@ target = { path = "crates/target" } tempfile = { workspace = true } url = { workspace = true } walkdir = { workspace = true } +wheel = { path = "crates/wheel" } zip = { workspace = true } zstd = { workspace = true } diff --git a/crates/interpreter/Cargo.toml b/crates/interpreter/Cargo.toml index ecf3928..dec6888 100644 --- a/crates/interpreter/Cargo.toml +++ b/crates/interpreter/Cargo.toml @@ -25,6 +25,7 @@ serde_json = { workspace = true } tempfile = { workspace = true } time = { workspace = true } url = { workspace = true } +wheel = { path = "../wheel" } which = { workspace = true } [target.'cfg(windows)'.dependencies] diff --git a/crates/interpreter/src/lib.rs b/crates/interpreter/src/lib.rs index 4535840..ce1b155 100644 --- a/crates/interpreter/src/lib.rs +++ b/crates/interpreter/src/lib.rs @@ -14,7 +14,6 @@ mod interpreter; mod platform; mod pyenv; mod search_path; -mod tag; mod version; pub use constraints::unix::calculate_compatible_binary_names as calculate_compatible_unix_binary_names; @@ -27,5 +26,5 @@ pub use constraints::{ pub use interpreter::{Interpreter, InterpreterDetails}; pub use platform::Platform; pub use search_path::SearchPath; -pub use tag::Tag; pub use version::{LATEST_STABLE, OLDEST_SUPPORTED_STABLE}; +pub use wheel::Tag; diff --git a/crates/pex/Cargo.toml b/crates/pex/Cargo.toml index 64f8fff..ced841f 100644 --- a/crates/pex/Cargo.toml +++ b/crates/pex/Cargo.toml @@ -26,6 +26,7 @@ python-pkginfo = { workspace = true } python-platform = { path = "../python-platform" } rayon = { workspace = true } regex = { workspace = true } +resolver = { path = "../resolver" } rfc2047-decoder = { workspace = true } rust-ini = { workspace = true } scripts = { path = "../scripts" } @@ -37,6 +38,7 @@ tempfile = { workspace = true } url = { workspace = true } version-ranges = { workspace = true } walkdir = { workspace = true } +wheel = { path = "../wheel" } zip = { workspace = true } [dev-dependencies] diff --git a/crates/pex/src/lib.rs b/crates/pex/src/lib.rs index cc04a38..02db4ad 100644 --- a/crates/pex/src/lib.rs +++ b/crates/pex/src/lib.rs @@ -2,22 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 #![deny(clippy::all)] -#![feature(str_as_str)] -mod dependency_configuration; mod pex; mod pex_info; mod pex_path; -mod wheel; -pub use dependency_configuration::DependencyConfiguration; pub use pex::{ - CollectWheelMetadata, Layout, Pex, - Resolve, ResolveError, - ResolvedWheel, ResolvedWheels, collect_loose_user_source, collect_zipped_user_source_indexes, @@ -25,16 +18,3 @@ pub use pex::{ }; pub use pex_info::{BinPath, InheritPath, InterpreterSelectionStrategy, PexInfo, RawPexInfo}; pub use pex_path::PexPath; -pub use wheel::{ - EntryPoint, - EntryPoints, - MetadataDirs, - Record, - WheelDir, - WheelFile, - WheelLayout, - WheelMetadata, - WheelOptions, - recompress_zipped_whl, - repackage_wheels, -}; diff --git a/crates/pex/src/pex.rs b/crates/pex/src/pex.rs index 53ea8df..396afbb 100644 --- a/crates/pex/src/pex.rs +++ b/crates/pex/src/pex.rs @@ -2,36 +2,35 @@ // SPDX-License-Identifier: Apache-2.0 use std::borrow::Cow; -use std::collections::{BTreeSet, HashMap, HashSet, VecDeque}; +use std::collections::HashSet; use std::ffi::OsStr; use std::fs::FileType; use std::io; use std::io::{BufReader, Read, Seek}; use std::path::{Path, PathBuf}; -use std::str::FromStr; use std::sync::{Arc, Mutex}; use anyhow::{anyhow, bail}; -use dashmap::DashMap; use fs_err as fs; use fs_err::File; use indexmap::IndexMap; -use interpreter::{Interpreter, InterpreterConstraints, SearchPath, Tag}; +use interpreter::{Interpreter, InterpreterConstraints, SearchPath}; use itertools::Itertools; use log::{Level, debug, warn}; use logging_timer::{time, timer}; -use pep440_rs::{Version, VersionSpecifiers}; -use pep508_rs::{ExtraName, PackageName, Requirement, VersionOrUrl}; +use pep508_rs::Requirement; use python_platform::PythonPlatform; use rayon::prelude::*; +use resolver::dependency_configuration::DependencyConfiguration; +use resolver::{CollectWheelMetadata, ResolvedWheel}; use scripts::{IdentifyInterpreter, Scripts}; use strum_macros::{AsRefStr, EnumString}; use url::Url; use walkdir::{DirEntry, WalkDir}; +use wheel::{MetadataDirs, MetadataReader, WheelFile}; use zip::ZipArchive; -use crate::wheel::{MetadataDirs, MetadataReader, WheelFile, WheelMetadata}; -use crate::{DependencyConfiguration, InterpreterSelectionStrategy, PexInfo, WheelDir}; +use crate::{InterpreterSelectionStrategy, PexInfo}; #[derive(AsRefStr, EnumString)] pub enum Layout { @@ -129,54 +128,6 @@ pub struct ResolveError { pub err: anyhow::Error, } -pub struct ResolvedWheel<'a> { - file_name: &'a str, - pub project_name: &'a str, - pub version: &'a str, - pub root_is_purelib: bool, - pub metadata_dirs: MetadataDirs, -} - -impl<'a> ResolvedWheel<'a> { - pub fn data_dir(&'a self) -> WheelDir<'a> { - self.metadata_dirs.data_dir() - } - - pub fn dist_info_dir(&'a self) -> WheelDir<'a> { - self.metadata_dirs.dist_info_dir() - } - - pub fn pex_info_dir(&'a self) -> WheelDir<'a> { - self.metadata_dirs.pex_info_dir() - } -} - -#[derive(Clone)] -pub struct CollectWheelMetadata<'a>(Arc>>); - -impl<'a> Default for CollectWheelMetadata<'a> { - fn default() -> Self { - Self::new() - } -} - -impl<'a> CollectWheelMetadata<'a> { - pub fn new() -> Self { - Self(Arc::new(DashMap::new())) - } - - pub fn into_collected(self) -> anyhow::Result>> { - let metadata = Arc::try_unwrap(self.0) - .ok() - .ok_or_else(|| anyhow!("Metadata is still being collected."))?; - Ok(metadata.into_iter().map(|(_, metadata)| metadata).collect()) - } - - fn collect(&self, file_name: &'a str, metadata_func: impl FnOnce() -> WheelMetadata<'a>) { - self.0.entry(file_name).or_insert_with(metadata_func); - } -} - pub struct Resolve<'a> { pub interpreter: Interpreter, pub wheels: IndexMap<&'a str, ResolvedWheel<'a>>, @@ -184,25 +135,6 @@ pub struct Resolve<'a> { pub additional_wheels: Vec<(&'a Pex<'a>, IndexMap<&'a str, ResolvedWheel<'a>>)>, } -#[derive(Hash, Eq, PartialEq)] -struct RequirementKey { - package_name: PackageName, - extras: BTreeSet, -} - -impl RequirementKey { - fn of(requirement: &Requirement) -> Self { - Self { - package_name: requirement.name.clone(), - extras: requirement.extras.iter().cloned().collect(), - } - } - - fn satisfies(&self, requested: &RequirementKey) -> bool { - self.package_name == requested.package_name && requested.extras.is_subset(&self.extras) - } -} - impl<'a> Pex<'a> { #[time("debug", "Pex.{}")] pub fn load(path: &'a Path) -> anyhow::Result { @@ -252,8 +184,10 @@ impl<'a> Pex<'a> { Layout::ZipApp => Ok(Scripts::Zipped(ZipArchive::new(File::open(&path)?)?)), } } + pub fn dependency_configuration(&self) -> anyhow::Result { - DependencyConfiguration::load(&self.info) + let pex_info = self.info.raw(); + DependencyConfiguration::parse(pex_info.excluded.as_slice(), pex_info.overridden.as_slice()) } #[time("debug", "Pex.{}")] @@ -263,233 +197,55 @@ impl<'a> Pex<'a> { dependency_configuration: &DependencyConfiguration, collect_extra_metadata: Option>, ) -> anyhow::Result>> { - let supported_tags: HashMap = target - .supported_tags() - .enumerate() - .map(|(idx, tag)| Tag::parse(tag).map(|tag| (tag, idx))) - .collect::>()?; - - let wheel_files = self - .info - .parse_distributions() - .collect::, _>>()?; - - let ranked_wheel_files = wheel_files - .into_iter() - .filter_map(|wheel_file| { - for tag in &wheel_file.tags { - if let Some(rank) = supported_tags.get(tag) { - return Some(RankedWheelFile { - wheel_file, - rank: *rank, - }); - } - } - None - }) - .collect::>(); - - let ranked_wheels = self.load_wheel_metadata(target, ranked_wheel_files)?; - - struct WheelInfo<'b> { - file_name: &'b str, - raw_project_name: &'b str, - raw_version: &'b str, - version: Version, - requires_dists: Vec>, - requires_python: Option, - root_is_purelib: bool, - rank: usize, - metadata_dirs: MetadataDirs, - } - - let mut wheels_by_project_name: HashMap> = - HashMap::with_capacity(ranked_wheels.len()); - for ranked_wheel in ranked_wheels { - wheels_by_project_name - .entry(ranked_wheel.metadata.project_name) - .or_default() - .push(WheelInfo { - file_name: ranked_wheel.metadata.file_name, - raw_project_name: ranked_wheel.metadata.raw_project_name, - raw_version: ranked_wheel.metadata.raw_version, - version: ranked_wheel.metadata.version, - requires_dists: ranked_wheel.metadata.requires_dists, - requires_python: ranked_wheel.metadata.requires_python, - root_is_purelib: ranked_wheel.metadata.root_is_purelib, - rank: ranked_wheel.rank, - metadata_dirs: ranked_wheel.metadata.metadata_dirs, - }) - } - for wheels in wheels_by_project_name.values_mut() { - wheels.sort_by_key(|WheelInfo { rank, .. }| *rank); - } - - let mut resolved_by_project_name: IndexMap = - IndexMap::with_capacity(wheels_by_project_name.len()); - let mut indexed_extras: Vec> = vec![vec![]]; - let mut to_resolve: VecDeque<(Requirement, usize)> = self + let requirements: Vec> = self .info .raw() .requirements .iter() - .map(|requirement| { - Requirement::from_str(requirement).map(|requirement| (requirement, 0)) - }) - .filter_map(|result| match result { - Ok((requirement, extras_index)) => { - if dependency_configuration.excluded(&requirement) { - None - } else { - Some(Ok((requirement, extras_index))) - } - } - Err(err) => Some(Err(err)), - }) - .collect::>()?; - let marker_env = target.marker_env(); - let no_wheels: Vec = vec![]; - while let Some((requirement, extras_index)) = to_resolve.pop_front() { - let requirement_key = RequirementKey::of(&requirement); - - // Already processed. - if resolved_by_project_name.contains_key(&requirement_key) { - continue; - } - if resolved_by_project_name - .keys() - .any(|key| key.satisfies(&requirement_key)) - { - continue; - } + .map(|requirement| Ok(requirement.as_ref().parse()?)) + .collect::>>()?; - // Does not apply. - if !requirement - .marker - .evaluate(marker_env, &indexed_extras[extras_index]) - { - continue; - } + let parse_wheel_files = || { + self.info + .parse_distributions() + .collect::>>() + }; - let wheels = wheels_by_project_name - .get(&requirement.name) - .or_else(|| { - if self.info.raw().ignore_errors { - Some(&no_wheels) - } else { - None - } - }) - .ok_or_else(|| { - let inapplicable_wheels = self - .info - .parse_distributions() - .filter_map(|result| match result { - Ok(wheel_file) if wheel_file.project_name == requirement.name => { - Some(wheel_file.file_name) - } - _ => None, - }) - .collect::>(); - let count = inapplicable_wheels.len(); - let wheels = if count == 1 { "wheel" } else { "wheels" }; - let reason = if inapplicable_wheels.is_empty() { - format_args!( - "The PEX contains {count} embedded {wheels} for project: {project}", - project = requirement.name - ) - } else { - format_args!( - "The PEX contains {count} inapplicable {wheels} for project: \ - {project}\n\ - {inapplicable_wheels}", - project = requirement.name, - inapplicable_wheels = inapplicable_wheels.join("\n") - ) - }; - anyhow!( - "The PEX at {path} has requirement {requirement} that cannot be satisfied \ - for {target}.\n\ - {reason}", - path = self.path.display(), - target = target.description(), - reason = reason, - ) - })?; - for WheelInfo { - file_name, - raw_project_name, - raw_version, - version, - requires_dists, - requires_python, - root_is_purelib, - metadata_dirs, - .. - } in wheels - { - if let Some(version_or_url) = requirement.version_or_url.as_ref() { - match version_or_url { - VersionOrUrl::VersionSpecifier(version_specifier) => { - if !version_specifier.contains(version) { - continue; - } - } - VersionOrUrl::Url(url) => bail!( - "A PEX should never contain an URL requirement.\ - The PEX at {path} requires: {url}", - path = self.path.display() - ), - } - } - let extras_index = if requirement.extras.is_empty() { - 0 - } else { - let idx = indexed_extras.len(); - indexed_extras.push(requirement.extras); - idx - }; - if let Some(extra_metadata) = collect_extra_metadata.as_ref() { - extra_metadata.collect(file_name, || WheelMetadata { - file_name, - raw_project_name, - project_name: requirement.name.clone(), - raw_version, - version: version.clone(), - requires_dists: requires_dists.clone(), - requires_python: requires_python.clone(), - root_is_purelib: *root_is_purelib, - metadata_dirs: metadata_dirs.clone(), - }) - } - resolved_by_project_name.insert( - requirement_key, - ResolvedWheel { - file_name, - project_name: raw_project_name, - version: raw_version, - root_is_purelib: *root_is_purelib, - metadata_dirs: metadata_dirs.clone(), - }, - ); - for req in requires_dists { - if dependency_configuration.excluded(req) { - continue; - } - to_resolve.push_back(( - dependency_configuration - .overridden(req, target, &indexed_extras[extras_index])? - .unwrap_or_else(|| req.clone()), - extras_index, - )) - } - break; - } + let ignore_errors = self.info.raw().ignore_errors; + match self.layout { + // N.B.: When deps_are_wheel_files for a `--layout loose` PEX, our layout detection + // detects as `--layout packed`, which properly handles the .whl zips. + Layout::Loose => resolver::resolve_wheels( + target, + requirements, + parse_wheel_files, + &mut LoosePexMetadataReader(self.path), + dependency_configuration, + collect_extra_metadata, + ignore_errors, + ), + // N.B.: When deps_are_wheel_files for a `--layout packed` PEX, the packed wheel chroot + // zips and normal .whl zips have the same for code and metadata; so no differentiation + // in behavior is needed. + Layout::Packed => resolver::resolve_wheels( + target, + requirements, + parse_wheel_files, + &mut PackedPexMetadataReader(self.path), + dependency_configuration, + collect_extra_metadata, + ignore_errors, + ), + Layout::ZipApp => resolver::resolve_wheels( + target, + requirements, + parse_wheel_files, + &mut ZipAppPexMetadataReader::new(self.path, self.info.raw().deps_are_wheel_files)?, + dependency_configuration, + collect_extra_metadata, + ignore_errors, + ), } - Ok(resolved_by_project_name - .into_values() - .map(|resolved_wheel| (resolved_wheel.file_name, resolved_wheel)) - .collect()) } pub fn resolve_all( @@ -682,45 +438,6 @@ impl<'a> Pex<'a> { .join("\n") ) } - - fn load_wheel_metadata( - &'a self, - target: &impl PythonPlatform<'a>, - wheel_files: Vec>, - ) -> anyhow::Result>> { - let python_version = target.version(); - match self.layout { - // N.B.: When deps_are_wheel_files for a `--layout loose` PEX, our layout detection - // detects as `--layout packed`, which properly handles the .whl zips. - Layout::Loose => read_wheel_metadata( - python_version.as_ref(), - wheel_files, - &mut LoosePexMetadataReader(self.path), - ), - // N.B.: When deps_are_wheel_files for a `--layout packed` PEX, the packed wheel chroot - // zips and normal .whl zips have the same for code and metadata; so no differentiation - // in behavior is needed. - Layout::Packed => read_wheel_metadata( - python_version.as_ref(), - wheel_files, - &mut PackedPexMetadataReader(self.path), - ), - Layout::ZipApp => read_wheel_metadata( - python_version.as_ref(), - wheel_files, - &mut ZipAppPexMetadataReader::new(self.path, self.info.raw().deps_are_wheel_files)?, - ), - } - } -} -struct RankedWheelFile<'a> { - wheel_file: WheelFile<'a>, - rank: usize, -} - -struct RankedWheel<'a> { - metadata: WheelMetadata<'a>, - rank: usize, } struct ZipAppPexMetadataReader<'a> { @@ -829,29 +546,6 @@ impl<'a> MetadataReader for PackedPexMetadataReader<'a> { } } -fn read_wheel_metadata<'a>( - python_version: &Version, - ranked_wheel_files: Vec>, - metadata_reader: &mut impl MetadataReader, -) -> anyhow::Result>> { - let mut ranked_wheels = Vec::with_capacity(ranked_wheel_files.len()); - for ranked_wheel_file in ranked_wheel_files { - let metadata_dirs = metadata_reader.locate_dirs(&ranked_wheel_file.wheel_file)?; - let metadata = - WheelMetadata::parse(ranked_wheel_file.wheel_file, metadata_dirs, metadata_reader)?; - if let Some(requires_python) = &metadata.requires_python - && !requires_python.contains(python_version) - { - continue; - } - ranked_wheels.push(RankedWheel { - metadata, - rank: ranked_wheel_file.rank, - }); - } - Ok(ranked_wheels) -} - #[cfg(test)] mod tests { use std::path::{Path, PathBuf}; @@ -863,16 +557,16 @@ mod tests { use interpreter::{Interpreter, SearchPath}; use pep440_rs::VersionSpecifiers; use pep508_rs::{Requirement, VersionOrUrl}; + use resolver::ResolvedWheel; use rstest::{fixture, rstest}; use scripts::{IdentifyInterpreter, Scripts}; use testing::{embedded_scripts, interpreter_identification_script, python_exe, tmp_dir}; use url::Url; use version_ranges::Ranges; + use wheel::WheelFile; use zip::write::SimpleFileOptions; use zip::{CompressionMethod, ZipWriter}; - use crate::pex::ResolvedWheel; - use crate::wheel::WheelFile; use crate::{Pex, PexPath}; const EXPECTED_ANSICOLORS_PEX_WHEELS: [&str; 1] = ["ansicolors==1.1.8"]; diff --git a/crates/pex/src/pex_info.rs b/crates/pex/src/pex_info.rs index d8503bd..0ac4484 100644 --- a/crates/pex/src/pex_info.rs +++ b/crates/pex/src/pex_info.rs @@ -12,8 +12,7 @@ use logging_timer::time; use ouroboros::self_referencing; use serde::{Deserialize, Serialize}; use serde_json::Value; - -use crate::wheel::WheelFile; +use wheel::WheelFile; #[derive(Copy, Clone, Debug, Deserialize, Serialize)] pub enum BinPath { @@ -62,7 +61,7 @@ impl From for SelectionStrategy { } } -#[derive(Debug, Deserialize, Serialize)] +#[derive(Debug, Default, Deserialize, Serialize)] pub struct RawPexInfo<'a> { pub bind_resource_paths: Option>, pub build_properties: IndexMap<&'a str, Value>, @@ -87,7 +86,8 @@ pub struct RawPexInfo<'a> { pub pex_paths: Vec>, #[serde(borrow)] pub pex_root: Option>, - pub requirements: Vec<&'a str>, + #[serde(borrow)] + pub requirements: Vec>, pub script: Option<&'a str>, pub strip_pex_env: Option, pub venv: bool, @@ -123,9 +123,7 @@ impl PexInfo { }) } - pub(crate) fn parse_distributions( - &self, - ) -> impl Iterator>> { + pub fn parse_distributions(&self) -> impl Iterator>> { self.borrow_info() .distributions .keys() diff --git a/crates/repackage/Cargo.toml b/crates/repackage/Cargo.toml new file mode 100644 index 0000000..9f00006 --- /dev/null +++ b/crates/repackage/Cargo.toml @@ -0,0 +1,22 @@ +# Copyright 2026 Pex project contributors. +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "repackage" +edition = { workspace = true } +publish = false + +[dependencies] +anyhow = { workspace = true } +chrono = { workspace = true } +fs-err = { workspace = true } +logging_timer = { workspace = true } +ouroboros = { workspace = true } +pex = { path = "../pex" } +platform = { path = "../platform" } +rayon = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +walkdir = { workspace = true } +wheel = { path = "../wheel" } +zip = { workspace = true } diff --git a/crates/repackage/src/lib.rs b/crates/repackage/src/lib.rs new file mode 100644 index 0000000..d39fe86 --- /dev/null +++ b/crates/repackage/src/lib.rs @@ -0,0 +1,721 @@ +// Copyright 2026 Pex project contributors. +// SPDX-License-Identifier: Apache-2.0 + +#![deny(clippy::all)] +#![feature(str_as_str)] + +use std::borrow::Cow; +use std::ffi::OsStr; +use std::fmt::Display; +use std::io; +use std::io::{Cursor, Read, Seek, Write}; +use std::ops::{Deref, DerefMut}; +use std::path::{Component, Path, PathBuf}; +use std::sync::Arc; + +use anyhow::bail; +use chrono::{DateTime, Utc}; +use fs_err as fs; +use fs_err::File; +use logging_timer::time; +use pex::{Layout, Pex}; +use platform::PosixPath; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use walkdir::WalkDir; +use wheel::{Record, WheelFile, WheelLayout}; +use zip::read::ZipArchiveMetadata; +use zip::result::ZipError; +use zip::write::SimpleFileOptions; +use zip::{CompressionMethod, ZipArchive, ZipWriter}; + +use crate::original_wheel_info::{OriginalWheelInfo, ZipFileName}; + +pub mod original_wheel_info; + +#[derive(Copy, Clone)] +enum DirPexDepType { + Chroot, + OriginalWhl, + ZippedChroot, +} + +pub struct WheelOptions { + compression_method: CompressionMethod, + compression_level: Option, + timestamp: Option>, +} + +impl WheelOptions { + pub fn new( + compression_method: CompressionMethod, + compression_level: Option, + timestamp: Option>, + ) -> Self { + Self { + compression_method, + compression_level, + timestamp, + } + } + + pub fn file_options(&self) -> anyhow::Result { + self.add_timestamp( + SimpleFileOptions::default() + .compression_method(self.compression_method) + .compression_level(self.compression_level), + ) + } + + fn add_timestamp(&self, options: SimpleFileOptions) -> anyhow::Result { + Ok(if let Some(timestamp) = self.timestamp { + options.last_modified_time(zip::DateTime::try_from(timestamp.naive_utc())?) + } else { + options + }) + } +} + +#[time("debug", "{}")] +pub fn repackage_wheels( + pex: &Pex, + options: &WheelOptions, + dest_dir: &Path, +) -> anyhow::Result> { + let wheel_files = pex + .info + .parse_distributions() + .collect::, _>>()?; + match pex.layout { + Layout::Loose | Layout::Packed => { + let dep_type = if pex.info.raw().deps_are_wheel_files { + DirPexDepType::OriginalWhl + } else if matches!(pex.layout, Layout::Packed) { + DirPexDepType::ZippedChroot + } else { + DirPexDepType::Chroot + }; + wheel_files + .into_par_iter() + .map(|wheel_file: WheelFile| { + repackage_directory_pex_wheel( + pex.path, + &wheel_file, + dep_type, + options, + dest_dir, + ) + }) + .collect::>>() + } + Layout::ZipApp => { + let pex_zip = ZipArchive::new(File::open(pex.path)?)?; + let zip_metadata = pex_zip.metadata(); + wheel_files + .into_par_iter() + .map(|wheel_file: WheelFile| { + repackage_zipapp_pex_wheel( + pex.path, + zip_metadata.clone(), + &wheel_file, + pex.info.raw().deps_are_wheel_files, + options, + dest_dir, + ) + }) + .collect::>>() + } + } +} + +fn repackage_zipapp_pex_wheel( + pex_zip: &Path, + zip_metadata: Arc, + wheel_file: &WheelFile, + is_whl_zip: bool, + options: &WheelOptions, + dest_dir: &Path, +) -> anyhow::Result { + let mut pex_zip_fp = + unsafe { ZipArchive::unsafe_new_with_metadata(File::open(pex_zip)?, zip_metadata.clone()) }; + if is_whl_zip { + let wheel_prefix = format!( + ".deps/{wheel_file_name}", + wheel_file_name = wheel_file.file_name + ); + recompress_zipped_whl( + ZipArchive::new(pex_zip_fp.by_name_seek(&wheel_prefix)?)?, + wheel_file, + options, + dest_dir, + ) + } else { + recompress_zipped_whl_chroot( + pex_zip_fp, + pex_zip.display(), + wheel_file, + options, + dest_dir, + true, + ) + } +} + +fn repackage_directory_pex_wheel( + pex_dir: &Path, + wheel_file: &WheelFile, + dep_type: DirPexDepType, + options: &WheelOptions, + dest_dir: &Path, +) -> anyhow::Result { + let wheel_path = pex_dir.join(".deps").join(wheel_file.file_name); + match dep_type { + DirPexDepType::Chroot => compress_whl_chroot(&wheel_path, wheel_file, options, dest_dir), + DirPexDepType::OriginalWhl => recompress_zipped_whl( + ZipArchive::new(File::open(wheel_path)?)?, + wheel_file, + options, + dest_dir, + ), + DirPexDepType::ZippedChroot => recompress_zipped_whl_chroot( + ZipArchive::new(File::open(&wheel_path)?)?, + wheel_path.display(), + wheel_file, + options, + dest_dir, + false, + ), + } +} + +pub fn recompress_zipped_whl( + mut wheel: ZipArchive, + wheel_file: &WheelFile, + options: &WheelOptions, + dest_dir: &Path, +) -> anyhow::Result { + fs::create_dir_all(dest_dir)?; + let dest_wheel = dest_dir.join(wheel_file.file_name); + let compressed = File::create(&dest_wheel)?; + let mut compressed_whl = ZipWriter::new(compressed); + for index in 0..wheel.len() { + let entry = wheel.by_index_raw(index)?; + if entry.name().ends_with(".pyc") { + continue; + } + if entry.compression() == options.compression_method && options.timestamp.is_none() { + compressed_whl.raw_copy_file(entry)?; + } else if entry.is_dir() { + compressed_whl.add_directory(entry.name(), options.add_timestamp(entry.options())?)?; + } else { + drop(entry); + let mut entry = wheel.by_index(index)?; + // N.B.: entry.options is actually lossy (loses high bits); so we can't round-trip + // more exotic permissions faithfully currently. An example of this is the cowsay 6.1 + // wheel on PyPi whose RECORD has 0o100664 which gets truncated to 0o644. Note also + // though that `raw_copy_file` (used above when no transcoding is needed) _does_ + // preserve these bits. + // See: https://github.com/zip-rs/zip2/issues/433 + compressed_whl.start_file( + entry.name(), + options.add_timestamp( + entry + .options() + .compression_method(options.compression_method) + .compression_level(options.compression_level), + )?, + )?; + + io::copy(&mut entry, &mut compressed_whl)?; + } + } + compressed_whl.finish()?; + Ok(File::open(dest_wheel)?) +} + +fn recompress_zipped_whl_chroot( + mut zipped_wheel_chroot: ZipArchive, + zip_source: impl Display, + wheel_file: &WheelFile, + options: &WheelOptions, + dest_dir: &Path, + prefixed: bool, +) -> anyhow::Result { + fs::create_dir_all(dest_dir)?; + let file_options = options.file_options()?; + let wheel_prefix = if prefixed { + Some(format!( + ".deps/{wheel_file_name}/", + wheel_file_name = wheel_file.file_name + )) + } else { + None + }; + let prefix = wheel_prefix.as_deref().unwrap_or_default(); + + let metadata_dirs = wheel_file.metadata_dirs_from_zip( + &zipped_wheel_chroot, + zip_source, + wheel_prefix.as_deref(), + )?; + let dist_info_dir = metadata_dirs.dist_info_dir(); + let record_name = format!("{prefix}{dist_info_dir}/RECORD"); + let record = Record::read(Cursor::new(io::read_to_string( + zipped_wheel_chroot.by_name(&record_name)?, + )?))?; + + let (stash_dir, legacy_bin_dir) = 'result: { + let layout_json_name = if prefixed { + Cow::Owned(format!( + "{prefix}{file_name}", + file_name = WheelLayout::file_name() + )) + } else { + Cow::Borrowed(WheelLayout::file_name()) + }; + match zipped_wheel_chroot.by_name(layout_json_name.as_ref()) { + Ok(zip_file) => { + let layout = WheelLayout::read(zip_file)?; + break 'result (Some(layout.stash_dir), false); + } + Err(ZipError::FileNotFound) => {} + Err(err) => bail!("{err}"), + } + let legacy_bin_dir_name = if prefixed { + Cow::Owned(format!("{prefix}bin")) + } else { + Cow::Borrowed("bin") + }; + let has_legacy_bin_dir = !record.wheel_has_bin_dir() + && zipped_wheel_chroot + .by_name(legacy_bin_dir_name.as_ref()) + .ok() + .map(|entry| entry.is_dir()) + .unwrap_or_default(); + (None, has_legacy_bin_dir) + }; + + let original_wheel_info = format!( + "{prefix}{pex_info_dir}/{file_name}", + pex_info_dir = metadata_dirs.pex_info_dir(), + file_name = OriginalWheelInfo::file_name() + ); + + let wheel_info = if let Ok(wheel_info) = zipped_wheel_chroot.by_name(&original_wheel_info) { + let size = wheel_info.size(); + Some(OriginalWheelInfo::read(wheel_info, size)?) + } else { + None + }; + + let data_dir = metadata_dirs.data_dir().as_path(); + let mut zip_finder = ZipPathFinder { + zip: zipped_wheel_chroot, + prefix: wheel_prefix.as_deref(), + }; + let (dest_wheel, compressed_whl) = if let Some(wheel_info) = wheel_info { + let dest_wheel = dest_dir.join(wheel_info.filename()); + let mut compressed_whl = ZipWriter::new(File::create(&dest_wheel)?); + for (zip_file_name, options) in + wheel_info.iter_file_options(file_options, options.timestamp)? + { + if zip_file_name.ends_with(".pyc") { + continue; + } + let name = 'result: { + if let Ok(data_dir_rel_path) = zip_file_name.as_path().strip_prefix(&data_dir) { + if let Some(stash_dir) = stash_dir.as_deref() { + break 'result format!( + "{prefix}{stash_dir}/{rel_path}", + stash_dir = stash_dir.display(), + rel_path = PosixPath::relpath( + normalized_data_dir_relpath( + stash_dir, + data_dir_rel_path, + wheel_file, + &zip_finder + )? + .as_ref() + )? + ); + } + if legacy_bin_dir { + let rel_path = normalized_data_dir_relpath( + Path::new("bin"), + data_dir_rel_path, + wheel_file, + &zip_finder, + )?; + assert!(starts_with(rel_path.as_ref(), "bin")); + break 'result format!( + "{prefix}{rel_path}", + rel_path = PosixPath::relpath(rel_path.as_ref())? + ); + } + } + format!("{prefix}{zip_file_name}") + }; + let mut src = match zip_finder.by_name(&name) { + Ok(src) => src, + Err(_) if zip_file_name.ends_with("/") => { + // N.B.: Pex can omit original directory entries when those directories are + // empty. + compressed_whl.add_directory(zip_file_name.to_string(), options)?; + continue; + } + Err(err) => bail!( + "Mapped {zip_file_name} in {file_name} to {name} which was not found: {err}", + file_name = wheel_file.file_name + ), + }; + if src.is_dir() { + compressed_whl.add_directory(zip_file_name.to_string(), options)?; + } else { + compressed_whl.start_file(zip_file_name, options)?; + if src.name() == record_name { + compressed_whl.write_all( + record + .filtered( + &metadata_dirs, + stash_dir.as_deref(), + if legacy_bin_dir { + Some(Path::new("bin")) + } else { + None + }, + )? + .as_slice(), + )?; + } else { + io::copy(&mut src, &mut compressed_whl)?; + } + } + } + (dest_wheel, compressed_whl) + } else { + let dest_wheel = dest_dir.join(wheel_file.file_name); + let mut compressed_whl = ZipWriter::new(File::create(&dest_wheel)?); + for entry in record.entries() { + let dst_rel_path = entry.path.as_ref(); + let name = 'result: { + if let Ok(data_dir_rel_path) = dst_rel_path.strip_prefix(&data_dir) { + if let Some(stash_dir) = stash_dir.as_deref() { + break 'result format!( + "{prefix}{stash_dir}/{rel_path}", + stash_dir = stash_dir.display(), + rel_path = PosixPath::relpath( + normalized_data_dir_relpath( + stash_dir, + data_dir_rel_path, + wheel_file, + &zip_finder + )? + .as_ref() + )? + ); + } + if legacy_bin_dir { + let rel_path = normalized_data_dir_relpath( + Path::new("bin"), + data_dir_rel_path, + wheel_file, + &zip_finder, + )?; + assert!(starts_with(rel_path.as_ref(), "bin")); + break 'result format!( + "{prefix}{rel_path}", + rel_path = PosixPath::relpath(rel_path.as_ref())? + ); + } + } + format!( + "{prefix}{rel_path}", + rel_path = PosixPath::relpath(dst_rel_path)? + ) + }; + let mut src = zip_finder.by_name(&name)?; + compressed_whl.start_file_from_path(dst_rel_path, file_options)?; + io::copy(&mut src, &mut compressed_whl)?; + } + (dest_wheel, compressed_whl) + }; + + compressed_whl.finish()?; + Ok(File::open(dest_wheel)?) +} + +fn compress_whl_chroot( + wheel_dir: &Path, + wheel_file: &WheelFile, + options: &WheelOptions, + dest_dir: &Path, +) -> anyhow::Result { + fs::create_dir_all(dest_dir)?; + let file_options = options.file_options()?; + + let metadata_dirs = wheel_file.metadata_dirs(wheel_dir)?; + let (record, record_rel_path) = Record::parse(wheel_dir, &metadata_dirs)?; + + let (stash_dir, legacy_bin_dir) = 'result: { + if let Some(layout) = WheelLayout::load_from_dir(wheel_dir)? { + let stash_dir = wheel_dir.join(layout.stash_dir); + if stash_dir.exists() { + break 'result (Some(stash_dir), None); + } + } + let bin_dir = wheel_dir.join("bin"); + if bin_dir.is_dir() && !record.wheel_has_bin_dir() { + break 'result (None, Some(bin_dir)); + } + (None, None) + }; + + let data_dir = metadata_dirs.data_dir().as_path(); + let pex_info_dir = wheel_dir.join(metadata_dirs.pex_info_dir().to_string()); + let (dest_wheel, compressed_whl) = if let Some(wheel_info) = + OriginalWheelInfo::load_from_dir(pex_info_dir)? + { + let dest_wheel = dest_dir.join(wheel_info.filename()); + let mut compressed_whl = ZipWriter::new(File::create(&dest_wheel)?); + for (zip_file_name, options) in + wheel_info.iter_file_options(file_options, options.timestamp)? + { + if zip_file_name.ends_with(".pyc") { + continue; + } + let dst_rel_path = zip_file_name.as_path(); + let dst_rel_path = dst_rel_path.as_ref(); + let mut src = wheel_dir.join(dst_rel_path); + if let Ok(data_dir_rel_path) = dst_rel_path.strip_prefix(&data_dir) { + if let Some(stash_dir) = stash_dir.as_deref() { + src = stash_dir.join(normalized_data_dir_relpath( + stash_dir, + data_dir_rel_path, + wheel_file, + &LoosePathFinder, + )?) + } else if let Some(bin_dir) = legacy_bin_dir.as_deref() { + let rel_path = normalized_data_dir_relpath( + bin_dir, + data_dir_rel_path, + wheel_file, + &LoosePathFinder, + )?; + assert!(starts_with(rel_path.as_ref(), "bin")); + src = bin_dir.join(rel_path) + } + } + if src.is_dir() { + compressed_whl.add_directory_from_path(dst_rel_path, options)?; + } else { + compressed_whl.start_file_from_path(dst_rel_path, options)?; + if dst_rel_path == record_rel_path { + compressed_whl.write_all( + record + .filtered( + &metadata_dirs, + stash_dir.as_deref().map(|dir| { + dir.strip_prefix(wheel_dir) + .expect("We appended the stash dir to the wheel dir above.") + }), + legacy_bin_dir.as_deref().map(|dir| { + dir.strip_prefix(wheel_dir).expect( + "We appended the legacy bin dir to the wheel dir above.", + ) + }), + )? + .as_slice(), + )?; + } else { + io::copy(&mut File::open(src)?, &mut compressed_whl)?; + } + } + } + (dest_wheel, compressed_whl) + } else { + let dest_wheel = dest_dir.join(wheel_file.file_name); + let mut compressed_whl = ZipWriter::new(File::create(&dest_wheel)?); + for entry in record.entries() { + let dst_rel_path = entry.path.as_ref(); + let mut src = wheel_dir.join(dst_rel_path); + if let Ok(data_dir_rel_path) = dst_rel_path.strip_prefix(&data_dir) { + if let Some(stash_dir) = stash_dir.as_deref() { + src = stash_dir.join(normalized_data_dir_relpath( + stash_dir, + data_dir_rel_path, + wheel_file, + &LoosePathFinder, + )?) + } else if let Some(bin_dir) = legacy_bin_dir.as_deref() { + let rel_path = normalized_data_dir_relpath( + bin_dir, + data_dir_rel_path, + wheel_file, + &LoosePathFinder, + )?; + assert!(starts_with(rel_path.as_ref(), "bin")); + src = bin_dir.join(rel_path) + } + } + compressed_whl.start_file_from_path(dst_rel_path, file_options)?; + io::copy(&mut File::open(src)?, &mut compressed_whl)?; + } + (dest_wheel, compressed_whl) + }; + + compressed_whl.finish()?; + Ok(File::open(dest_wheel)?) +} + +fn starts_with(path: &Path, name: impl AsRef) -> bool { + matches!(path.components().next(), Some(Component::Normal(named)) if named == name.as_ref()) +} + +trait ProjectPathFinder<'a> { + fn find( + &'a self, + strip_prefix: &Path, + prefix: PathBuf, + project: &WheelFile, + suffix: PathBuf, + ) -> anyhow::Result>; +} + +struct LoosePathFinder; + +impl<'a> ProjectPathFinder<'a> for LoosePathFinder { + fn find( + &'a self, + strip_prefix: &Path, + prefix: PathBuf, + project: &WheelFile, + suffix: PathBuf, + ) -> anyhow::Result> { + for entry in WalkDir::new(strip_prefix.join(&prefix)).min_depth(1) { + let entry = entry?; + let prefix_rel_path = entry + .path() + .strip_prefix(strip_prefix) + .expect("We walked the prefix; so we can always safely strip it."); + if prefix_rel_path.ends_with(&suffix) { + for component in prefix_rel_path.components() { + if let Some(name) = component.as_os_str().to_str() + && (name == project.raw_project_name + || name == project.project_name.as_ref()) + { + return Ok(Cow::Owned(prefix_rel_path.to_owned())); + } + } + } + } + bail!( + "Failed to find path in wheel {wheel} rooted at {root} with prefix {prefix} and \ + suffix {suffix}", + wheel = project.file_name, + root = strip_prefix.display(), + prefix = prefix.display(), + suffix = suffix.display() + ) + } +} + +struct ZipPathFinder<'a, R: Read + Seek> { + zip: ZipArchive, + prefix: Option<&'a str>, +} + +impl<'a, R: Read + Seek> Deref for ZipPathFinder<'a, R> { + type Target = ZipArchive; + + fn deref(&self) -> &::Target { + &self.zip + } +} + +impl<'a, R: Read + Seek> DerefMut for ZipPathFinder<'a, R> { + fn deref_mut(&mut self) -> &mut ::Target { + &mut self.zip + } +} + +impl<'a, R: Read + Seek> ProjectPathFinder<'a> for ZipPathFinder<'a, R> { + fn find( + &'a self, + strip_prefix: &Path, + prefix: PathBuf, + project: &WheelFile, + suffix: PathBuf, + ) -> anyhow::Result> { + let (strip_prefix, prefix) = if let Some(zip_prefix) = self.prefix { + let strip_prefix = Path::new(zip_prefix).join(strip_prefix); + let zip_file_name_path = strip_prefix.join(prefix); + ( + Cow::Owned(strip_prefix), + ZipFileName::from(zip_file_name_path)?, + ) + } else { + ( + Cow::Borrowed(strip_prefix), + ZipFileName::from(strip_prefix.join(prefix))?, + ) + }; + let suffix = ZipFileName::from(suffix)?; + for file_name in self.zip.file_names() { + if let Some(rel_path) = file_name.strip_prefix(prefix.as_str()) + && let Some(rel_path) = rel_path.strip_suffix(suffix.as_str()) + { + for component in rel_path.split("/") { + if component == project.raw_project_name + || component == project.project_name.as_ref() + { + return Ok(Cow::Borrowed( + Path::new(file_name).strip_prefix(strip_prefix)?, + )); + } + } + } + } + bail!( + "Failed to find path in wheel {wheel} zip with prefix {prefix} and suffix {suffix}", + wheel = project.file_name, + ) + } +} + +fn normalized_data_dir_relpath<'a>( + prefix: &Path, + path: &'a Path, + wheel_file: &WheelFile, + project_path_finder: &'a impl ProjectPathFinder<'a>, +) -> anyhow::Result> { + let mut components = path.components(); + let start = components.next(); + if let Some(start) = start + && matches!(start, Component::Normal(name) if name == "scripts") + { + Ok(Cow::Owned( + [Component::Normal(OsStr::new("bin"))] + .into_iter() + .chain(components) + .collect(), + )) + } else if let Some(start) = start + && matches!(start, Component::Normal(name) if name == "headers") + { + // N.B.: You'd think sysconfig_paths["include"] would be the right answer here but both + // `pip`, and by emulation, `uv pip`, map `*.data/headers` to + // `/include/site/pythonX.Y/`. Traditional PEXes honors this; so we + // need to as well. + // + // The "mess" is admitted and described at length here: + // + https://discuss.python.org/t/clarification-on-a-wheels-header-data/9305 + // + https://discuss.python.org/t/deprecating-the-headers-wheel-data-key/23712 + Ok(project_path_finder.find( + prefix, + Path::new("include").join("site"), + wheel_file, + components.collect(), + )?) + } else { + Ok(Cow::Borrowed(path)) + } +} diff --git a/crates/pex/src/wheel/original_wheel_info.rs b/crates/repackage/src/original_wheel_info.rs similarity index 84% rename from crates/pex/src/wheel/original_wheel_info.rs rename to crates/repackage/src/original_wheel_info.rs index 2c45398..2ca7690 100644 --- a/crates/pex/src/wheel/original_wheel_info.rs +++ b/crates/repackage/src/original_wheel_info.rs @@ -28,10 +28,10 @@ impl DateTime { } #[derive(Deserialize)] -pub(crate) struct ZipFileName<'a>(#[serde(borrow)] Cow<'a, str>); +pub struct ZipFileName<'a>(#[serde(borrow)] Cow<'a, str>); impl<'a> ZipFileName<'a> { - pub(crate) fn from(path: PathBuf) -> anyhow::Result { + pub fn from(path: PathBuf) -> anyhow::Result { Ok(Self(Cow::Owned( path.into_os_string() .into_string() @@ -40,12 +40,12 @@ impl<'a> ZipFileName<'a> { } #[cfg(unix)] - pub(crate) fn as_path(&self) -> Cow<'_, Path> { + pub fn as_path(&self) -> Cow<'_, Path> { Cow::Borrowed(Path::new(self.0.as_ref())) } #[cfg(windows)] - pub(crate) fn as_path(&self) -> Cow<'_, Path> { + pub fn as_path(&self) -> Cow<'_, Path> { Cow::Owned(self.0.split("/").collect()) } } @@ -71,7 +71,7 @@ struct RawOriginalWheelInfo<'a> { } #[self_referencing] -pub(crate) struct OriginalWheelInfo { +pub struct OriginalWheelInfo { data: Vec, #[borrows(data)] #[covariant] @@ -79,11 +79,11 @@ pub(crate) struct OriginalWheelInfo { } impl OriginalWheelInfo { - pub(crate) const fn file_name() -> &'static str { + pub const fn file_name() -> &'static str { "original-whl-info.json" } - pub(crate) fn load_from_dir(dir: impl AsRef) -> anyhow::Result> { + pub fn load_from_dir(dir: impl AsRef) -> anyhow::Result> { let path = dir.as_ref().join(Self::file_name()); Ok(if path.exists() { let mut file = File::open(path)?; @@ -94,17 +94,17 @@ impl OriginalWheelInfo { }) } - pub(crate) fn read(contents: impl Read, size: u64) -> anyhow::Result { + pub fn read(contents: impl Read, size: u64) -> anyhow::Result { let mut data = Vec::with_capacity(usize::try_from(size)?); BufReader::new(contents).read_to_end(&mut data)?; Ok(Self::try_new(data, |data| serde_json::from_slice(data))?) } - pub(crate) fn filename(&self) -> &str { + pub fn filename(&self) -> &str { self.borrow_info().filename } - pub(crate) fn iter_file_options( + pub fn iter_file_options( &self, base_options: SimpleFileOptions, timestamp: Option>, diff --git a/crates/resolver/Cargo.toml b/crates/resolver/Cargo.toml new file mode 100644 index 0000000..7ea6b01 --- /dev/null +++ b/crates/resolver/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "resolver" +edition = { workspace = true } +publish = false + +[dependencies] +anyhow = { workspace = true } +dashmap = { workspace = true } +indexmap = { workspace = true } +logging_timer = { workspace = true } +pep440_rs = { workspace = true } +pep508_rs = { workspace = true } +python-platform = { path = "../python-platform" } +regex = { workspace = true } +url = { workspace = true } +version-ranges = { workspace = true } +wheel = { path = "../wheel" } diff --git a/crates/pex/src/dependency_configuration.rs b/crates/resolver/src/dependency_configuration.rs similarity index 89% rename from crates/pex/src/dependency_configuration.rs rename to crates/resolver/src/dependency_configuration.rs index 4cb31b8..16e3afb 100644 --- a/crates/pex/src/dependency_configuration.rs +++ b/crates/resolver/src/dependency_configuration.rs @@ -15,8 +15,6 @@ use regex::{Regex, RegexBuilder}; use url::Url; use version_ranges::Ranges; -use crate::PexInfo; - enum ExcludeConstraint { None, VersionRanges(Ranges), @@ -35,10 +33,8 @@ static OVERRIDE_REPLACE: LazyLock = LazyLock::new(|| { }); impl DependencyConfiguration { - pub(crate) fn load(pex_info: &PexInfo) -> anyhow::Result { - let excluded = pex_info - .raw() - .excluded + pub fn parse(excluded: &[&str], overridden: &[&str]) -> anyhow::Result { + let parsed_excludes = excluded .iter() .map(|excluded| { match Requirement::::from_str(excluded).map_err(|err| anyhow!("{err}")) { @@ -57,8 +53,8 @@ impl DependencyConfiguration { }) .collect::>>()?; - let mut overridden: HashMap>> = HashMap::new(); - for override_spec in &pex_info.raw().overridden { + let mut parsed_overrides: HashMap>> = HashMap::new(); + for override_spec in overridden { let (name, requirement) = if let Some(captures) = OVERRIDE_REPLACE.captures(override_spec) && let Some(name) = captures.name("project") @@ -72,16 +68,19 @@ impl DependencyConfiguration { let requirement = Requirement::from_str(override_spec)?; (requirement.name.clone(), requirement) }; - overridden.entry(name).or_default().insert(requirement); + parsed_overrides + .entry(name) + .or_default() + .insert(requirement); } Ok(Self { - excluded, - overridden, + excluded: parsed_excludes, + overridden: parsed_overrides, }) } - pub(crate) fn excluded(&self, requirement: &Requirement) -> bool { + pub fn excluded(&self, requirement: &Requirement) -> bool { if let Some(constraint) = self.excluded.get(&requirement.name) { match constraint { ExcludeConstraint::None => true, @@ -101,7 +100,7 @@ impl DependencyConfiguration { } } - pub(crate) fn overridden<'a>( + pub fn overridden<'a>( &self, requirement: &Requirement, target: &impl PythonPlatform<'a>, diff --git a/crates/resolver/src/lib.rs b/crates/resolver/src/lib.rs new file mode 100644 index 0000000..6f105ba --- /dev/null +++ b/crates/resolver/src/lib.rs @@ -0,0 +1,349 @@ +// Copyright 2026 Pex project contributors. +// SPDX-License-Identifier: Apache-2.0 + +#![deny(clippy::all)] + +use std::collections::{BTreeSet, HashMap, VecDeque}; +use std::sync::Arc; + +use anyhow::{anyhow, bail}; +use dashmap::DashMap; +use indexmap::IndexMap; +use logging_timer::time; +use pep440_rs::{Version, VersionSpecifiers}; +use pep508_rs::{ExtraName, PackageName, Requirement, VersionOrUrl}; +use python_platform::PythonPlatform; +use url::Url; +use wheel::{MetadataDirs, MetadataReader, Tag, WheelDir, WheelFile, WheelMetadata}; + +use crate::dependency_configuration::DependencyConfiguration; + +pub mod dependency_configuration; + +pub struct ResolvedWheel<'a> { + file_name: &'a str, + pub project_name: &'a str, + pub version: &'a str, + pub root_is_purelib: bool, + pub metadata_dirs: MetadataDirs, +} + +impl<'a> ResolvedWheel<'a> { + pub fn data_dir(&'a self) -> WheelDir<'a> { + self.metadata_dirs.data_dir() + } + + pub fn dist_info_dir(&'a self) -> WheelDir<'a> { + self.metadata_dirs.dist_info_dir() + } + + pub fn pex_info_dir(&'a self) -> WheelDir<'a> { + self.metadata_dirs.pex_info_dir() + } +} + +#[derive(Clone)] +pub struct CollectWheelMetadata<'a>(Arc>>); + +impl<'a> Default for CollectWheelMetadata<'a> { + fn default() -> Self { + Self::new() + } +} + +impl<'a> CollectWheelMetadata<'a> { + pub fn new() -> Self { + Self(Arc::new(DashMap::new())) + } + + pub fn into_collected(self) -> anyhow::Result>> { + let metadata = Arc::try_unwrap(self.0) + .ok() + .ok_or_else(|| anyhow!("Metadata is still being collected."))?; + Ok(metadata.into_iter().map(|(_, metadata)| metadata).collect()) + } + + fn collect(&self, file_name: &'a str, metadata_func: impl FnOnce() -> WheelMetadata<'a>) { + self.0.entry(file_name).or_insert_with(metadata_func); + } +} + +#[time("debug", "{}")] +pub fn resolve_wheels<'a>( + target: &impl PythonPlatform<'a>, + requirements: Vec>, + wheel_files: impl Fn() -> anyhow::Result>>, + metadata_reader: &mut impl MetadataReader, + dependency_configuration: &DependencyConfiguration, + collect_extra_metadata: Option>, + ignore_errors: bool, +) -> anyhow::Result>> { + let supported_tags: HashMap = target + .supported_tags() + .enumerate() + .map(|(idx, tag)| Tag::parse(tag).map(|tag| (tag, idx))) + .collect::>()?; + + let ranked_wheel_files = wheel_files()? + .into_iter() + .filter_map(|wheel_file| { + for tag in &wheel_file.tags { + if let Some(rank) = supported_tags.get(tag) { + return Some(RankedWheelFile { + wheel_file, + rank: *rank, + }); + } + } + None + }) + .collect::>(); + + let ranked_wheels = read_wheel_metadata( + target.version().as_ref(), + ranked_wheel_files, + metadata_reader, + )?; + + struct WheelInfo<'b> { + file_name: &'b str, + raw_project_name: &'b str, + raw_version: &'b str, + version: Version, + requires_dists: Vec>, + requires_python: Option, + root_is_purelib: bool, + rank: usize, + metadata_dirs: MetadataDirs, + } + + let mut wheels_by_project_name: HashMap> = + HashMap::with_capacity(ranked_wheels.len()); + for ranked_wheel in ranked_wheels { + wheels_by_project_name + .entry(ranked_wheel.metadata.project_name) + .or_default() + .push(WheelInfo { + file_name: ranked_wheel.metadata.file_name, + raw_project_name: ranked_wheel.metadata.raw_project_name, + raw_version: ranked_wheel.metadata.raw_version, + version: ranked_wheel.metadata.version, + requires_dists: ranked_wheel.metadata.requires_dists, + requires_python: ranked_wheel.metadata.requires_python, + root_is_purelib: ranked_wheel.metadata.root_is_purelib, + rank: ranked_wheel.rank, + metadata_dirs: ranked_wheel.metadata.metadata_dirs, + }) + } + for wheels in wheels_by_project_name.values_mut() { + wheels.sort_by_key(|WheelInfo { rank, .. }| *rank); + } + + let mut resolved_by_project_name: IndexMap = + IndexMap::with_capacity(wheels_by_project_name.len()); + let mut indexed_extras: Vec> = vec![vec![]]; + let mut to_resolve: VecDeque<(Requirement, usize)> = requirements + .into_iter() + .filter_map(|requirement| { + if dependency_configuration.excluded(&requirement) { + None + } else { + Some((requirement, 0)) + } + }) + .collect::>(); + + let marker_env = target.marker_env(); + let no_wheels: Vec = vec![]; + while let Some((requirement, extras_index)) = to_resolve.pop_front() { + let requirement_key = RequirementKey::of(&requirement); + + // Already processed. + if resolved_by_project_name.contains_key(&requirement_key) { + continue; + } + if resolved_by_project_name + .keys() + .any(|key| key.satisfies(&requirement_key)) + { + continue; + } + + // Does not apply. + if !requirement + .marker + .evaluate(marker_env, &indexed_extras[extras_index]) + { + continue; + } + + let wheels = wheels_by_project_name + .get(&requirement.name) + .or({ + if ignore_errors { + Some(&no_wheels) + } else { + None + } + }) + .ok_or_else(|| { + let inapplicable_wheels = wheel_files() + .expect( + "We already parsed wheel files once successfully and parsing is \ + deterministic.", + ) + .into_iter() + .filter_map(|wheel_file| { + if wheel_file.project_name == requirement.name { + Some(wheel_file.file_name) + } else { + None + } + }) + .collect::>(); + let count = inapplicable_wheels.len(); + let wheels = if count == 1 { "wheel" } else { "wheels" }; + let reason = if inapplicable_wheels.is_empty() { + format_args!( + "The PEX contains {count} embedded {wheels} for project: {project}", + project = requirement.name + ) + } else { + format_args!( + "The PEX contains {count} inapplicable {wheels} for project: \ + {project}\n\ + {inapplicable_wheels}", + project = requirement.name, + inapplicable_wheels = inapplicable_wheels.join("\n") + ) + }; + anyhow!( + "The requirement {requirement} cannot be satisfied for {target}.\n\ + {reason}", + target = target.description(), + reason = reason, + ) + })?; + for WheelInfo { + file_name, + raw_project_name, + raw_version, + version, + requires_dists, + requires_python, + root_is_purelib, + metadata_dirs, + .. + } in wheels + { + if let Some(version_or_url) = requirement.version_or_url.as_ref() { + match version_or_url { + VersionOrUrl::VersionSpecifier(version_specifier) => { + if !version_specifier.contains(version) { + continue; + } + } + VersionOrUrl::Url(url) => bail!("URL requirements are not supported: {url}"), + } + } + let extras_index = if requirement.extras.is_empty() { + 0 + } else { + let idx = indexed_extras.len(); + indexed_extras.push(requirement.extras); + idx + }; + if let Some(extra_metadata) = collect_extra_metadata.as_ref() { + extra_metadata.collect(file_name, || WheelMetadata { + file_name, + raw_project_name, + project_name: requirement.name.clone(), + raw_version, + version: version.clone(), + requires_dists: requires_dists.clone(), + requires_python: requires_python.clone(), + root_is_purelib: *root_is_purelib, + metadata_dirs: metadata_dirs.clone(), + }) + } + resolved_by_project_name.insert( + requirement_key, + ResolvedWheel { + file_name, + project_name: raw_project_name, + version: raw_version, + root_is_purelib: *root_is_purelib, + metadata_dirs: metadata_dirs.clone(), + }, + ); + for req in requires_dists { + if dependency_configuration.excluded(req) { + continue; + } + to_resolve.push_back(( + dependency_configuration + .overridden(req, target, &indexed_extras[extras_index])? + .unwrap_or_else(|| req.clone()), + extras_index, + )) + } + break; + } + } + Ok(resolved_by_project_name + .into_values() + .map(|resolved_wheel| (resolved_wheel.file_name, resolved_wheel)) + .collect()) +} + +fn read_wheel_metadata<'a>( + python_version: &Version, + ranked_wheel_files: Vec>, + metadata_reader: &mut impl MetadataReader, +) -> anyhow::Result>> { + let mut ranked_wheels = Vec::with_capacity(ranked_wheel_files.len()); + for ranked_wheel_file in ranked_wheel_files { + let metadata_dirs = metadata_reader.locate_dirs(&ranked_wheel_file.wheel_file)?; + let metadata = + WheelMetadata::parse(ranked_wheel_file.wheel_file, metadata_dirs, metadata_reader)?; + if let Some(requires_python) = &metadata.requires_python + && !requires_python.contains(python_version) + { + continue; + } + ranked_wheels.push(RankedWheel { + metadata, + rank: ranked_wheel_file.rank, + }); + } + Ok(ranked_wheels) +} + +struct RankedWheelFile<'a> { + wheel_file: WheelFile<'a>, + rank: usize, +} + +struct RankedWheel<'a> { + metadata: WheelMetadata<'a>, + rank: usize, +} + +#[derive(Hash, Eq, PartialEq)] +struct RequirementKey { + package_name: PackageName, + extras: BTreeSet, +} + +impl RequirementKey { + fn of(requirement: &Requirement) -> Self { + Self { + package_name: requirement.name.clone(), + extras: requirement.extras.iter().cloned().collect(), + } + } + + fn satisfies(&self, requested: &RequirementKey) -> bool { + self.package_name == requested.package_name && requested.extras.is_subset(&self.extras) + } +} diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml index 4117fb6..46af5cf 100644 --- a/crates/tools/Cargo.toml +++ b/crates/tools/Cargo.toml @@ -34,6 +34,8 @@ platform = { path = "../platform" } python-platform = { path = "../python-platform" } rayon = { workspace = true } regex = { workspace = true } +repackage = { path = "../repackage" } +resolver = { path = "../resolver" } scripts = { path = "../scripts" } shell-quote = { workspace = true } serde = { workspace = true } diff --git a/crates/tools/src/commands/repository/extract.rs b/crates/tools/src/commands/repository/extract.rs index cda6ffc..b87df1c 100644 --- a/crates/tools/src/commands/repository/extract.rs +++ b/crates/tools/src/commands/repository/extract.rs @@ -25,10 +25,10 @@ use pex::{ Pex, PexPath, RawPexInfo, - WheelOptions, collect_loose_user_source, collect_zipped_user_source_indexes, }; +use repackage::{WheelOptions, repackage_wheels}; use scripts::IdentifyInterpreter; use tar::Header; use zip::{CompressionMethod, ZipArchive}; @@ -83,10 +83,10 @@ pub(crate) fn extract(python: &Path, pex: Pex, args: ExtractArgs) -> anyhow::Res ) }; let options = WheelOptions::new(CompressionMethod::Deflated, None, timestamp); - pex::repackage_wheels(&pex, &options, &args.dest_dir)?; + repackage_wheels(&pex, &options, &args.dest_dir)?; let pex_path = PexPath::from_pex_info(&pex.info, true); for additional_pex in pex_path.load_pexes()? { - pex::repackage_wheels(&additional_pex, &options, &args.dest_dir)?; + repackage_wheels(&additional_pex, &options, &args.dest_dir)?; } if args.sources || args.serve { @@ -253,15 +253,7 @@ build-backend = "setuptools.build_meta" "packages", sources.packages.into_iter().map(Cow::Owned).collect(), ); - let install_requires = IniList( - "install_requires", - pex_info - .requirements - .iter() - .copied() - .map(Cow::Borrowed) - .collect(), - ); + let install_requires = IniList("install_requires", pex_info.requirements.to_vec()); let mut console_scripts = Vec::with_capacity(1); if let Some(entry_point) = pex_info.entry_point diff --git a/crates/tools/src/commands/venv.rs b/crates/tools/src/commands/venv.rs index dfcfa10..6d990d7 100644 --- a/crates/tools/src/commands/venv.rs +++ b/crates/tools/src/commands/venv.rs @@ -17,7 +17,8 @@ use fs_err as fs; use interpreter::SearchPath; use log::warn; use pep508_rs::PackageName; -use pex::{CollectWheelMetadata, Layout, Pex, PexPath}; +use pex::{Layout, Pex, PexPath}; +use resolver::CollectWheelMetadata; use shell_quote::Quote; use venv::virtualenv::FileSystemLinker; use venv::{Provenance, Virtualenv, venv_pex}; diff --git a/crates/tools/src/resolve.rs b/crates/tools/src/resolve.rs index b75e638..1b88b40 100644 --- a/crates/tools/src/resolve.rs +++ b/crates/tools/src/resolve.rs @@ -7,7 +7,8 @@ use indexmap::IndexMap; use interpreter::{Interpreter, SearchPath}; use pep440_rs::{Version, VersionSpecifiers}; use pep508_rs::{PackageName, Requirement}; -use pex::{CollectWheelMetadata, Pex}; +use pex::Pex; +use resolver::CollectWheelMetadata; use url::Url; pub(crate) struct WheeInfo<'a> { diff --git a/crates/venv/Cargo.toml b/crates/venv/Cargo.toml index 43ccd52..8a2e1f1 100644 --- a/crates/venv/Cargo.toml +++ b/crates/venv/Cargo.toml @@ -20,12 +20,14 @@ pex = { path = "../pex" } platform = { path = "../platform" } python-platform = { path = "../python-platform" } rayon = { workspace = true } +resolver = { path = "../resolver" } rust-ini = { workspace = true } scripts = { path = "../scripts" } serde_json = { workspace = true } target-lexicon = { workspace = true } tempfile = { workspace = true } walkdir = { workspace = true } +wheel = { path = "../wheel" } zip = { workspace = true } [target.'cfg(windows)'.dependencies] diff --git a/crates/venv/src/venv_pex.rs b/crates/venv/src/venv_pex.rs index e3d766e..7d5b3eb 100644 --- a/crates/venv/src/venv_pex.rs +++ b/crates/venv/src/venv_pex.rs @@ -1,6 +1,7 @@ // Copyright 2026 Pex project contributors. // SPDX-License-Identifier: Apache-2.0 +use std::borrow::Cow; use std::collections::HashMap; use std::fmt::{Display, Formatter}; use std::io::{BufReader, Cursor, ErrorKind, Read, Seek, Write}; @@ -16,16 +17,9 @@ use indexmap::IndexMap; use logging_timer::time; use pex::{ BinPath, - EntryPoint, - EntryPoints, Layout, - MetadataDirs, Pex, RawPexInfo, - Record, - ResolvedWheel, - WheelDir, - WheelLayout, collect_loose_user_source, collect_zipped_user_source_indexes, filter_zipped_user_source, @@ -33,6 +27,7 @@ use pex::{ use platform::{Perms, mark_executable, path_as_bytes, path_as_str, symlink_or_link_or_copy}; use python_platform::PythonVersion; use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use resolver::ResolvedWheel; use scripts::{ Scripts, VenvPex, @@ -42,6 +37,7 @@ use scripts::{ VenvPexRepl, }; use serde_json::Value; +use wheel::{EntryPoint, EntryPoints, MetadataDirs, Record, WheelDir, WheelLayout}; use zip::ZipArchive; use crate::Provenance; @@ -1201,7 +1197,7 @@ fn write_repl( }; struct ActivationDetails<'a> { - requirements: &'a Vec<&'a str>, + requirements: &'a Vec>, selected_wheels: &'a Vec<&'a str>, } diff --git a/crates/wheel/Cargo.toml b/crates/wheel/Cargo.toml new file mode 100644 index 0000000..61c5e0d --- /dev/null +++ b/crates/wheel/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "wheel" +edition = { workspace = true } +publish = false + +[dependencies] +anyhow = { workspace = true } +chrono = { workspace = true } +csv = { workspace = true } +fs-err = { workspace = true } +mailparse = { workspace = true } +ouroboros = { workspace = true } +pep440_rs = { workspace = true } +pep508_rs = { workspace = true } +python-pkginfo = { workspace = true } +regex = { workspace = true } +rfc2047-decoder = { workspace = true } +rust-ini = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +url = { workspace = true } +zip = { workspace = true } + +[dev-dependencies] +glob = { workspace = true } +rstest = { workspace = true } +testing = { path = "../testing" } \ No newline at end of file diff --git a/crates/pex/src/wheel/entry_points.rs b/crates/wheel/src/entry_points.rs similarity index 100% rename from crates/pex/src/wheel/entry_points.rs rename to crates/wheel/src/entry_points.rs diff --git a/crates/pex/src/wheel/file.rs b/crates/wheel/src/file.rs similarity index 97% rename from crates/pex/src/wheel/file.rs rename to crates/wheel/src/file.rs index b0e8c3f..4da87bb 100644 --- a/crates/pex/src/wheel/file.rs +++ b/crates/wheel/src/file.rs @@ -10,12 +10,13 @@ use std::path::{Component, Path, PathBuf}; use std::str::FromStr; use anyhow::{anyhow, bail}; -use interpreter::Tag; use ouroboros::self_referencing; use pep440_rs::Version; use pep508_rs::PackageName; use zip::ZipArchive; +use crate::Tag; + pub struct WheelDir<'a> { project_name: &'a str, version: &'a str, @@ -23,7 +24,7 @@ pub struct WheelDir<'a> { } impl<'a> WheelDir<'a> { - pub(crate) fn contains(&self, path: &Path) -> bool { + pub fn contains(&self, path: &Path) -> bool { if let Some(Component::Normal(start)) = path.components().next() { let start = start.as_encoded_bytes(); if start.starts_with(self.project_name.as_bytes()) { @@ -126,7 +127,7 @@ pub struct MetadataDirs { } impl MetadataDirs { - pub(crate) fn locate_in_dir( + pub fn locate_in_dir( wheel_dir: &Path, project_name: &PackageName, version: &Version, @@ -150,7 +151,7 @@ impl MetadataDirs { Self::locate(project_name, version, listing, wheel_dir.display()) } - pub(crate) fn locate_in_zip( + pub fn locate_in_zip( zip: &ZipArchive, zip_source: impl Display, prefix: Option<&str>, @@ -234,10 +235,10 @@ impl Clone for MetadataDirs { pub struct WheelFile<'a> { pub file_name: &'a str, - pub(crate) raw_project_name: &'a str, + pub raw_project_name: &'a str, pub project_name: PackageName, - pub(crate) raw_version: &'a str, - pub(crate) version: Version, + pub raw_version: &'a str, + pub version: Version, _build_tag: Option<&'a str>, pub tags: Vec>, } @@ -310,11 +311,11 @@ impl<'a> WheelFile<'a> { }) } - pub(crate) fn metadata_dirs(&self, wheel_dir: &Path) -> anyhow::Result { + pub fn metadata_dirs(&self, wheel_dir: &Path) -> anyhow::Result { MetadataDirs::locate_in_dir(wheel_dir, &self.project_name, &self.version) } - pub(crate) fn metadata_dirs_from_zip( + pub fn metadata_dirs_from_zip( &self, zip: &ZipArchive, zip_source: impl Display, @@ -335,11 +336,11 @@ mod tests { use std::borrow::Cow; use std::str::FromStr; - use interpreter::Tag; use pep440_rs::Version; use pep508_rs::PackageName; - use crate::wheel::file::{WheelFile, locate_metadata_dir}; + use crate::file::locate_metadata_dir; + use crate::{Tag, WheelFile}; #[test] fn test_parse_wheel_file_name_simple() { diff --git a/crates/pex/src/wheel/layout.rs b/crates/wheel/src/layout.rs similarity index 100% rename from crates/pex/src/wheel/layout.rs rename to crates/wheel/src/layout.rs diff --git a/crates/pex/src/wheel/mod.rs b/crates/wheel/src/lib.rs similarity index 62% rename from crates/pex/src/wheel/mod.rs rename to crates/wheel/src/lib.rs index 1ba8001..123976e 100644 --- a/crates/pex/src/wheel/mod.rs +++ b/crates/wheel/src/lib.rs @@ -1,18 +1,18 @@ // Copyright 2026 Pex project contributors. // SPDX-License-Identifier: Apache-2.0 +#![deny(clippy::all)] + mod entry_points; mod file; mod layout; mod metadata; -mod original_wheel_info; -mod package; mod record; +mod tag; pub use entry_points::{EntryPoint, EntryPoints}; pub use file::{MetadataDirs, WheelDir, WheelFile}; pub use layout::WheelLayout; -pub(crate) use metadata::MetadataReader; -pub use metadata::WheelMetadata; -pub use package::{WheelOptions, recompress_zipped_whl, repackage_wheels}; +pub use metadata::{MetadataReader, WheelMetadata}; pub use record::Record; +pub use tag::Tag; diff --git a/crates/pex/src/wheel/metadata.rs b/crates/wheel/src/metadata.rs similarity index 96% rename from crates/pex/src/wheel/metadata.rs rename to crates/wheel/src/metadata.rs index dd3c4af..f254545 100644 --- a/crates/pex/src/wheel/metadata.rs +++ b/crates/wheel/src/metadata.rs @@ -10,7 +10,7 @@ use pep508_rs::{PackageName, Requirement}; use python_pkginfo::Metadata; use url::Url; -use crate::wheel::file::{MetadataDirs, WheelFile}; +use crate::file::{MetadataDirs, WheelFile}; pub struct WheelMetadata<'a> { pub file_name: &'a str, @@ -24,7 +24,7 @@ pub struct WheelMetadata<'a> { pub metadata_dirs: MetadataDirs, } -pub(crate) trait MetadataReader { +pub trait MetadataReader { fn locate_dirs(&mut self, wheel_file: &WheelFile) -> anyhow::Result; fn read( &mut self, @@ -47,7 +47,7 @@ fn parse_root_is_purelib_from_wheel(content: &[u8]) -> anyhow::Result { } impl<'a> WheelMetadata<'a> { - pub(crate) fn parse( + pub fn parse( wheel_file: WheelFile<'a>, metadata_dirs: MetadataDirs, metadata_reader: &mut impl MetadataReader, @@ -104,9 +104,7 @@ mod tests { use testing::{tmp_dir, venv_python_exe}; use zip::ZipArchive; - use crate::wheel::MetadataDirs; - use crate::wheel::file::WheelFile; - use crate::wheel::metadata::{MetadataReader, WheelMetadata}; + use crate::{MetadataDirs, MetadataReader, WheelFile, WheelMetadata}; #[fixture] #[once] diff --git a/crates/pex/src/wheel/record.rs b/crates/wheel/src/record.rs similarity index 94% rename from crates/pex/src/wheel/record.rs rename to crates/wheel/src/record.rs index 0914ab6..c803d06 100644 --- a/crates/pex/src/wheel/record.rs +++ b/crates/wheel/src/record.rs @@ -10,13 +10,13 @@ use csv::{StringRecord, Terminator}; use fs_err::File; use ouroboros::self_referencing; -use crate::wheel::file::MetadataDirs; +use crate::file::MetadataDirs; -pub(crate) struct Entry<'a> { - pub(crate) path: Cow<'a, Path>, - pub(crate) raw_path: &'a str, - pub(crate) hash: &'a str, - pub(crate) size: &'a str, +pub struct Entry<'a> { + pub path: Cow<'a, Path>, + pub raw_path: &'a str, + pub hash: &'a str, + pub size: &'a str, } fn parse_entry_record<'a>( @@ -71,7 +71,7 @@ pub struct Record { } impl Record { - pub(crate) fn parse( + pub fn parse( wheel_dir: &Path, metadata_dirs: &MetadataDirs, ) -> anyhow::Result<(Self, PathBuf)> { @@ -107,7 +107,7 @@ impl Record { }) } - pub(crate) fn entries(&self) -> &[Entry<'_>] { + pub fn entries(&self) -> &[Entry<'_>] { self.borrow_entries().as_slice() } @@ -117,7 +117,7 @@ impl Record { }) } - pub(crate) fn filtered( + pub fn filtered( &self, metadata_dirs: &MetadataDirs, stash_dir: Option<&Path>, diff --git a/crates/interpreter/src/tag.rs b/crates/wheel/src/tag.rs similarity index 100% rename from crates/interpreter/src/tag.rs rename to crates/wheel/src/tag.rs diff --git a/src/commands/build.rs b/src/commands/build.rs index ef64ff6..096743b 100644 --- a/src/commands/build.rs +++ b/src/commands/build.rs @@ -1,30 +1,189 @@ // Copyright 2026 Pex project contributors. // SPDX-License-Identifier: Apache-2.0 +use std::borrow::Cow; +use std::io::BufReader; use std::path::PathBuf; use clap::{ArgAction, Args}; +use fs_err::File; +use pep508_rs::Requirement; +use pex::{PexInfo, RawPexInfo}; +use resolver::dependency_configuration::DependencyConfiguration; +use scripts::Scripts; +use url::Url; + +use crate::target::{PYTHON_PLATFORM_LONG_HELP, PythonPlatform}; #[derive(Args, Debug)] #[group(skip)] pub struct Build { /// Requirements to include in the PEX. - #[arg(value_name = "REQUIREMENT", help_heading = "Contents")] - requirements: Vec, + /// + /// If no requirements are specified, an empty hermetic PEX will be generated. + #[arg( + value_name = "REQUIREMENT", + help_heading = "Contents", + verbatim_doc_comment + )] + requirements: Vec>, /// Wheels (or directories containing wheels) to include in the PEX. + /// + /// There must be at least one wheel satisfying each direct requirement. If no targets are + /// specified, that is the only check performed; otherwise a full transitive closure is + /// confirmed for each specified target. #[arg( long, visible_alias = "wheel", value_name = "PATH", action = ArgAction::Append, - help_heading = "Contents" + help_heading = "Contents", + verbatim_doc_comment )] wheels: Vec, + + /// The Python platforms the built PEX will target at runtime. + /// + /// If specified, the targets will be used to resolve any specified requirements from the + /// configured wheels. If required wheels are not present, the build will error. + #[arg( + long = "target", + action = ArgAction::Append, + help_heading = "Targets", + value_parser = PythonPlatform::parse, + long_help=PYTHON_PLATFORM_LONG_HELP, + verbatim_doc_comment + )] + targets: Vec, + + /// Existing PEX-INFO to use for the built PEX. + /// + /// If the PEX-INFO is from a traditional PEX it may be edited minimally to conform to the PEXrc + /// runtime and any specified requirements. If no PEX-INFO is supplied, it will be created from + /// the other given inputs. + #[arg(long, help_heading = "Contents", verbatim_doc_comment)] + pex_info: Option, + + /// Instead of building a zipapp PEX, build a packed PEX. + /// + /// A Packed PEX is a directory containing a top-level `pex` script / `__main__.py` with wheels + /// and other needed assets as-is under that. This can be useful in situations where using + /// rsync-style transfer to ship incremental updates to large PEXes as opposed to having to ship + /// the whole PEX. + #[arg( + long, + help_heading = "Layout", + default_value_t = false, + verbatim_doc_comment + )] + packed: bool, + + /// Instead of booting via a Python shebang, boot via a Posix `sh` shebang. + /// + /// When running the PEX file directly (on Unix), instead of using a `#!/usr/bin/env python` + /// style shebang, use a specially crafted `#!/bin/sh ...` shebang header that performs initial + /// boot interpreter discovery smartly. If your PEX will target systems with a Posix shell at + /// `/bin/sh` (overwhelmingly common on unix systems), this is the most robust and + /// lowest-latency boot mode for repeated runs (at ~O(1ms)). + /// + /// N.B.: Both the Python and `sh` shebang headers are safe, but ignored on Windows systems. + /// For those, you must run the PEX via Python (`python PEX`, `py PEX`, etc.) or else use an + /// extension scheme you register with windows (Setting up a `.pyz` association is common). + #[arg( + long, + help_heading = "Boot Mode", + default_value_t = false, + verbatim_doc_comment + )] + sh_boot: bool, + + /// The name of the generated PEX file. + /// + /// Omitting this will run PEX immediately and not save it to a file. + /// + /// If the name contains the {platform} placeholder, the most-specific platform tags supported + /// by the PEX will be substituted. For example, for a multi-platform Linux x86-64, Mac ARM PEX + /// containing platform-specific wheels, `-o 'example-{platform}.pex'` might expand to a PEX + /// filename of `example-cp314-cp314-macosx_11_0_arm64.manylinux2014_x86_64.pex`. + #[arg( + short = 'o', + long, + visible_alias = "output-file", + help_heading = "Output", + verbatim_doc_comment + )] + output: Option, } impl Build { pub fn execute(self) -> anyhow::Result<()> { - todo!("Creating a PEX from sources and requirements is coming soon: {self:#?}") + if let Some(pex_info) = self.pex_info { + let pex_info_file = File::open(&pex_info)?; + let size = pex_info_file.metadata()?.len(); + let pex_info = PexInfo::parse( + BufReader::new(pex_info_file), + size, + Some(|| Cow::Owned(pex_info.display().to_string())), + )?; + let requirements = if self.requirements.is_empty() { + pex_info + .raw() + .requirements + .iter() + .map(|requirement| Ok(requirement.parse::>()?)) + .collect::>>()? + } else { + self.requirements + }; + create_pex( + self.targets, + requirements.as_slice(), + self.wheels, + pex_info.raw(), + self.packed, + self.sh_boot, + self.output, + ) + } else { + let pex_info = RawPexInfo { + requirements: self + .requirements + .iter() + .map(ToString::to_string) + .map(Cow::Owned) + .collect(), + ..Default::default() + }; + + create_pex( + self.targets, + self.requirements.as_slice(), + self.wheels, + &pex_info, + self.packed, + self.sh_boot, + self.output, + ) + } } } + +fn create_pex( + _targets: Vec, + _requirements: &[Requirement], + _wheels: Vec, + pex_info: &RawPexInfo, + _packed: bool, + _sh_boot: bool, + _output: Option, +) -> anyhow::Result<()> { + let _dependency_configuration = DependencyConfiguration::parse( + pex_info.excluded.as_slice(), + pex_info.overridden.as_slice(), + )?; + // 1. Resolve wheels (targets, requirements, wheels, dependency_configuration) + let _scripts = Scripts::Embedded; + // 2. Call create_packed_pex or create_zipapp (resolve, pex_info, scripts, sh_boot, output) + todo!("Creating a PEX from sources and requirements is coming soon.") +} diff --git a/src/commands/extract.rs b/src/commands/extract.rs index 683619a..c99b250 100644 --- a/src/commands/extract.rs +++ b/src/commands/extract.rs @@ -8,9 +8,10 @@ use std::path::{Path, PathBuf}; use cache::Fingerprint; use clap::Args; use owo_colors::OwoColorize; -use pex::{Pex, WheelOptions}; +use pex::Pex; use crate::compression_method::CompressionArgs; +use crate::package::{WheelOptions, repackage_wheels}; use crate::source; #[derive(Args)] @@ -38,7 +39,7 @@ impl Extract { } fn to_dir(dest_dir: &Path, pex: Pex, options: &WheelOptions) -> anyhow::Result<()> { - let wheels = pex::repackage_wheels(&pex, options, dest_dir)?; + let wheels = repackage_wheels(&pex, options, dest_dir)?; let count = wheels.len(); let mut wheel_info = Vec::with_capacity(count); diff --git a/src/commands/inject.rs b/src/commands/inject.rs index b6d3689..1043cbb 100644 --- a/src/commands/inject.rs +++ b/src/commands/inject.rs @@ -19,17 +19,19 @@ use indexmap::IndexSet; use interpreter::Interpreter; use log::info; use owo_colors::OwoColorize; -use pex::{Layout, Pex, WheelFile, WheelOptions}; +use pex::{Layout, Pex}; use platform::mark_executable; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use scripts::{IdentifyInterpreter, Scripts}; use target::SimplifiedTarget; use tempfile::NamedTempFile; +use wheel::WheelFile; use zip::write::SimpleFileOptions; use zip::{CompressionMethod, ZipArchive, ZipWriter}; use crate::compression_method::CompressionArgs; use crate::embeds::{Binary, CLIB_BY_TARGET, PROXY_BY_TARGET, PROXYW_BY_TARGET}; +use crate::package::{WheelOptions, repackage_wheels}; use crate::source; #[derive(Args)] @@ -300,7 +302,7 @@ fn inject_pex_dir( } } let deps_dir = dest_pex.path().join(".deps"); - pex::repackage_wheels(&pex, options, &deps_dir)?; + repackage_wheels(&pex, options, &deps_dir)?; let wheel_file_names = pex .info .raw() @@ -456,7 +458,7 @@ fn inject_pex_zip( let deps_dir = tempfile::tempdir_in(pex.path.parent().unwrap_or_else(|| Path::new(".")))?; let stored_file_options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); - pex::repackage_wheels(&pex, options, deps_dir.path())?; + repackage_wheels(&pex, options, deps_dir.path())?; let mut fingerprints = Vec::with_capacity(pex_info.distributions.len()); for wheel_file_name in pex_info.distributions.keys().copied() { dst_zip.start_file(format!(".deps/{wheel_file_name}"), stored_file_options)?; diff --git a/src/commands/platform/python.rs b/src/commands/platform/python.rs index f8bca70..891b80e 100644 --- a/src/commands/platform/python.rs +++ b/src/commands/platform/python.rs @@ -1,29 +1,12 @@ // Copyright 2026 Pex project contributors. // SPDX-License-Identifier: Apache-2.0 -use std::path::{Path, PathBuf}; - use clap::Args; use cli::{Json, Output}; use interpreter::Interpreter; use scripts::{IdentifyInterpreter, Scripts}; -#[derive(Clone)] -enum PythonPlatform { - Spec(String), - Interpreter(PathBuf), -} - -impl PythonPlatform { - fn parse(value: &str) -> anyhow::Result { - let interpreter = Path::new(value); - if interpreter.is_file() && platform::is_executable(interpreter)? { - Ok(Self::Interpreter(interpreter.to_owned())) - } else { - Ok(Self::Spec(value.to_owned())) - } - } -} +use crate::target::{PYTHON_PLATFORM_LONG_HELP, PythonPlatform}; #[derive(Args)] #[group(skip)] @@ -35,80 +18,7 @@ pub struct Python { output: Output, /// The Python platform to inspect. - /// - /// Can be either the path to a local Python executable or else a Python platform spec. - /// In its simplest form, the spec can be just a Python version number; and CPython will be - /// assumed. The version number must be in .(.) form. If the micro version - /// is not specified, 0 is used. For example: - /// + 3.14 - /// + 3.14.5 - /// - /// The Python implementation can be selected by prefixing the version with cpython or pypy: - /// + cpython-3.14.5 - /// + pypy-3.11 - /// - /// Cpython versions can be further suffixed with the following abi flags: - /// + t: A free-threaded build (Only applies to CPython 3.13 and newer). - /// + d: A debug build. - /// + m: A pymalloc build (Only applies to CPython 3.7 and older). - /// + u: A ucs4 Unicode build (Only applies to CPython 3.2 and older). - /// - /// For example: - /// + cpython-3.14t - /// + 3.14.5td - /// + 2.7mu - /// - /// PyPy versions can be suffixed by the PyPy release following an underscore: - /// + pypy-3.11_7.3 - /// + pypy-2.7.18_7.3 - /// - /// In the preceding forms, the Python platform spec is rendered for the current operating - /// system and chip architecture. You can further refine the spec by specifying these as - /// suffixes. - /// - /// The basic operating system suffixes are: - /// + 3.14.5-linux - /// + 3.14.5-macos - /// + 3.14.5-windows - /// - /// When using these, defaults for each operating system are chosen: - /// + linux: 4.4.302-cip103 (January 2016) & glibc 2.17 (December 2012) & x86_64 - /// + macos: 11.3 (Big Sur April 2021) & aarch64 - /// + windows: 10 (first released July 2015) & x86_64 - /// - /// Linux can be further refined by using the manylinux and musllinux standards; for example: - /// + 3.14.5-manylinux1 - /// + 3.14.5-manylinux2014 - /// + 3.14.5-manylinux_2_43 - /// + 3.14.5-musllinux_1_2 - /// - /// macOS can be further refined by specifying the release in _(_) form: - /// + 3.14.5-macos_10_6 - /// + 3.14.5-macos_11_7_11 - /// + 3.14.5-macos_26_5 - /// - /// Windows can be further refined by specifying the release as well: - /// + 3.14.5-windows_11 - /// - /// Finally, when specifying an operating system, an explicit chip architecture suffix can be - /// selected from among the following: - /// + aarch64 (or arm64) - /// + armv7 [^1] - /// + ppc64le [^1] - /// + riscv64 [^1] - /// + s390x [^1] - /// + x86_64 (or x64 or amd64) - /// - /// With this, you have a full [^2] specification Python platform specification. For example: - /// + pypy-3.11_7.3-manylinux_2_17-aarch64 - /// + cpython-3.14.5-macos_26_5-arm64 - /// + cpython-3.14.5-windows_11-amd64 - /// - /// [^1]: These chip architectures are only supported for Linux. - /// [^2]: The derived Python platform specification is complete save for the platform_version - /// environment marker that appears to be unused in the wild. Its value is defaulted to - /// "". - #[arg(value_parser = PythonPlatform::parse, verbatim_doc_comment)] + #[arg(value_parser = PythonPlatform::parse, long_help=PYTHON_PLATFORM_LONG_HELP)] python_platform: PythonPlatform, } diff --git a/src/compression_method.rs b/src/compression_method.rs index a0e8c58..eb85d49 100644 --- a/src/compression_method.rs +++ b/src/compression_method.rs @@ -3,7 +3,8 @@ use chrono::{DateTime, Utc}; use clap::{Args, ValueEnum}; -use pex::WheelOptions; + +use crate::package::WheelOptions; #[derive(Clone, ValueEnum)] pub enum CompressionMethod { diff --git a/src/lib.rs b/src/lib.rs index 6e1b8d3..49793df 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,9 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 #![deny(clippy::all)] +#![feature(str_as_str)] pub mod commands; pub mod compression_method; pub mod embeds; +pub mod package; pub mod simplified_target; pub mod source; +pub mod target; diff --git a/crates/pex/src/wheel/package.rs b/src/package.rs similarity index 99% rename from crates/pex/src/wheel/package.rs rename to src/package.rs index c942df1..c8e0dd2 100644 --- a/crates/pex/src/wheel/package.rs +++ b/src/package.rs @@ -15,20 +15,17 @@ use chrono::{DateTime, Utc}; use fs_err as fs; use fs_err::File; use logging_timer::time; +use pex::{Layout, Pex}; use platform::PosixPath; use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use repackage::original_wheel_info::{OriginalWheelInfo, ZipFileName}; use walkdir::WalkDir; +use wheel::{Record, WheelFile, WheelLayout}; use zip::read::ZipArchiveMetadata; use zip::result::ZipError; use zip::write::SimpleFileOptions; use zip::{CompressionMethod, ZipArchive, ZipWriter}; -use crate::wheel::WheelFile; -use crate::wheel::layout::WheelLayout; -use crate::wheel::original_wheel_info::{OriginalWheelInfo, ZipFileName}; -use crate::wheel::record::Record; -use crate::{Layout, Pex}; - #[derive(Copy, Clone)] enum DirPexDepType { Chroot, diff --git a/src/target.rs b/src/target.rs new file mode 100644 index 0000000..17c5398 --- /dev/null +++ b/src/target.rs @@ -0,0 +1,105 @@ +// Copyright 2026 Pex project contributors. +// SPDX-License-Identifier: Apache-2.0 + +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +pub const PYTHON_PLATFORM_LONG_HELP: &str = r#" +Can be either the path to a local Python executable or else a Python platform spec. +In its simplest form, the spec can be just a Python version number; and CPython will be +assumed. The version number must be in .(.) form. If the micro version +is not specified, 0 is used. For example: ++ 3.14 ++ 3.14.5 + +The Python implementation can be selected by prefixing the version with cpython or pypy: ++ cpython-3.14.5 ++ pypy-3.11 + +Cpython versions can be further suffixed with the following abi flags: ++ t: A free-threaded build (Only applies to CPython 3.13 and newer). ++ d: A debug build. ++ m: A pymalloc build (Only applies to CPython 3.7 and older). ++ u: A ucs4 Unicode build (Only applies to CPython 3.2 and older). + +For example: ++ cpython-3.14t ++ 3.14.5td ++ 2.7mu + +PyPy versions can be suffixed by the PyPy release following an underscore: ++ pypy-3.11_7.3 ++ pypy-2.7.18_7.3 + +In the preceding forms, the Python platform spec is rendered for the current operating +system and chip architecture. You can further refine the spec by specifying these as +suffixes. + +The basic operating system suffixes are: ++ 3.14.5-linux ++ 3.14.5-macos ++ 3.14.5-windows + +When using these, defaults for each operating system are chosen: ++ linux: 4.4.302-cip103 (January 2016) & glibc 2.17 (December 2012) & x86_64 ++ macos: 11.3 (Big Sur April 2021) & aarch64 ++ windows: 10 (first released July 2015) & x86_64 + +Linux can be further refined by using the manylinux and musllinux standards; for example: ++ 3.14.5-manylinux1 ++ 3.14.5-manylinux2014 ++ 3.14.5-manylinux_2_43 ++ 3.14.5-musllinux_1_2 + +macOS can be further refined by specifying the release in _(_) form: ++ 3.14.5-macos_10_6 ++ 3.14.5-macos_11_7_11 ++ 3.14.5-macos_26_5 + +Windows can be further refined by specifying the release as well: ++ 3.14.5-windows_11 + +Finally, when specifying an operating system, an explicit chip architecture suffix can be +selected from among the following: ++ aarch64 (or arm64) ++ armv7 [^1] ++ ppc64le [^1] ++ riscv64 [^1] ++ s390x [^1] ++ x86_64 (or x64 or amd64) + +With this, you have a full [^2] specification Python platform specification. For example: ++ pypy-3.11_7.3-manylinux_2_17-aarch64 ++ cpython-3.14.5-macos_26_5-arm64 ++ cpython-3.14.5-windows_11-amd64 + +[^1]: These chip architectures are only supported for Linux. +[^2]: The derived Python platform specification is complete save for the platform_version + environment marker that appears to be unused in the wild. Its value is defaulted to + "". +"#; + +#[derive(Clone, Debug)] +pub enum PythonPlatform { + Spec(String), + Interpreter(PathBuf), +} + +impl PythonPlatform { + pub fn parse(value: &str) -> anyhow::Result { + let interpreter = Path::new(value); + if interpreter.is_file() && platform::is_executable(interpreter)? { + Ok(Self::Interpreter(interpreter.to_owned())) + } else { + Ok(Self::Spec(value.to_owned())) + } + } +} + +impl FromStr for PythonPlatform { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + Self::parse(s) + } +}