bandersnatch package

Package contents

Submodules

bandersnatch.configuration module

Module containing classes to access the bandersnatch configuration file

class bandersnatch.configuration.BandersnatchConfig(*args: Any, **kwargs: Any)[source]

Bases: ConfigParser

Configuration singleton. Provides global access to loaded configuration options as a ConfigParser subclass. Always reads default mirror options when initialized. If given a file path, that file is loaded second so its values overwrite corresponding defaults.

SHOWN_DEPRECATIONS = False
check_for_deprecations() None[source]
optionxform(optionstr: str) str[source]
class bandersnatch.configuration.SetConfigValues(json_save, root_uri, diff_file_path, diff_append_epoch, digest_name, storage_backend_name, cleanup, release_files_save, compare_method, download_mirror, download_mirror_no_fallback, simple_format)[source]

Bases: NamedTuple

cleanup: bool

Alias for field number 6

compare_method: str

Alias for field number 8

diff_append_epoch: bool

Alias for field number 3

diff_file_path: str

Alias for field number 2

digest_name: str

Alias for field number 4

download_mirror: str

Alias for field number 9

download_mirror_no_fallback: bool

Alias for field number 10

json_save: bool

Alias for field number 0

release_files_save: bool

Alias for field number 7

root_uri: str

Alias for field number 1

simple_format: SimpleFormat

Alias for field number 11

storage_backend_name: str

Alias for field number 5

class bandersnatch.configuration.Singleton[source]

Bases: type

bandersnatch.configuration.create_example_config(dest: Path) None[source]

Create an example configuration file at the specified location.

Parameters:

dest (Path) – destination path for the configuration file.

bandersnatch.configuration.validate_config_values(config: ConfigParser) SetConfigValues[source]

bandersnatch.delete module

async bandersnatch.delete.delete_packages(config: ConfigParser, args: Namespace, master: Master) int[source]
async bandersnatch.delete.delete_path(blob_path: Path, dry_run: bool = False) int[source]
async bandersnatch.delete.delete_simple_page(simple_base_path: Path, package: str, hash_index: bool = False, dry_run: bool = True) int[source]

bandersnatch.filter module

Blocklist management

class bandersnatch.filter.Filter(*args: Any, **kwargs: Any)[source]

Bases: object

Base Filter class

property allowlist: SectionProxy
property blocklist: SectionProxy
check_match(**kwargs: Any) bool[source]

Check if the plugin matches based on the arguments provides.

Returns:

True if the values match a filter rule, False otherwise

Return type:

bool

deprecated_name: str = ''
filter(metadata: dict) bool[source]

Check if the plugin matches based on the package’s metadata.

Returns:

True if the values match a filter rule, False otherwise

Return type:

bool

initialize_plugin() None[source]

Code to initialize the plugin

name = 'filter'
pinned_version_exists(metadata: dict) bool[source]

Check if version specifier exist.

Returns:

True if version specifier exist, False otherwise

Return type:

bool

class bandersnatch.filter.FilterMetadataPlugin(*args: Any, **kwargs: Any)[source]

Bases: Filter

Plugin that blocks sync operations for an entire project based on info fields.

name = 'metadata_plugin'
class bandersnatch.filter.FilterProjectPlugin(*args: Any, **kwargs: Any)[source]

Bases: Filter

Plugin that blocks sync operations for an entire project

name = 'project_plugin'
class bandersnatch.filter.FilterReleaseFilePlugin(*args: Any, **kwargs: Any)[source]

Bases: Filter

Plugin that modify the download of specific release or dist files

name = 'release_file_plugin'
class bandersnatch.filter.FilterReleasePlugin(*args: Any, **kwargs: Any)[source]

Bases: Filter

Plugin that modifies the download of specific releases or dist files

name = 'release_plugin'
class bandersnatch.filter.LoadedFilters(load_all: bool = False)[source]

Bases: object

A class to load all of the filters enabled

ENTRYPOINT_GROUPS = ['bandersnatch_filter_plugins.v2.project', 'bandersnatch_filter_plugins.v2.metadata', 'bandersnatch_filter_plugins.v2.release', 'bandersnatch_filter_plugins.v2.release_file']
filter_metadata_plugins() list[Filter][source]

Load and return the metadata filtering plugin objects

Returns:

List of objects derived from the bandersnatch.filter.Filter class

Return type:

list of bandersnatch.filter.Filter

filter_project_plugins() list[Filter][source]

Load and return the project filtering plugin objects

Returns:

List of objects derived from the bandersnatch.filter.Filter class

Return type:

list of bandersnatch.filter.Filter

filter_release_file_plugins() list[Filter][source]

Load and return the release file filtering plugin objects

Returns:

List of objects derived from the bandersnatch.filter.Filter class

Return type:

list of bandersnatch.filter.Filter

filter_release_plugins() list[Filter][source]

Load and return the release filtering plugin objects

Returns:

List of objects derived from the bandersnatch.filter.Filter class

Return type:

list of bandersnatch.filter.Filter

loaded_filter_plugins: dict[str, list[Filter]]

bandersnatch.log module

bandersnatch.log.setup_logging(args: Any) StreamHandler[source]

bandersnatch.main module

async bandersnatch.main.async_main(args: Namespace, config: ConfigParser) int[source]
bandersnatch.main.main(loop: AbstractEventLoop | None = None) int[source]

bandersnatch.master module

class bandersnatch.master.Master(url: str, timeout: float = 10.0, global_timeout: float | None = 18000.0, proxy: str | None = None, allow_non_https: bool = False)[source]

Bases: object

async all_packages() Any[source]
async changed_packages(last_serial: int) dict[str, int][source]
async check_for_stale_cache(path: str, required_serial: int | None, got_serial: int | None) None[source]
async fetch_simple_index() Any[source]

Return a mapping of all project data from the PyPI Index API

async get(path: str, required_serial: int | None, **kw: Any) AsyncGenerator[ClientResponse, None][source]
async get_package_metadata(package_name: str, serial: int = 0) Any[source]
property simple_url: str
async url_fetch(url: str, file_path: Path, executor: ProcessPoolExecutor | ThreadPoolExecutor | None = None, chunk_size: int = 65536) None[source]
exception bandersnatch.master.StalePage[source]

Bases: Exception

We got a page back from PyPI that doesn’t meet our expected serial.

bandersnatch.mirror module

class bandersnatch.mirror.BandersnatchMirror(homedir: Path, master: Master, storage_backend: str | None = None, stop_on_error: bool = False, workers: int = 3, hash_index: bool = False, json_save: bool = False, digest_name: str | None = None, root_uri: str | None = None, keep_index_versions: int = 0, diff_append_epoch: bool = False, diff_full_path: Path | str | None = None, flock_timeout: int = 1, diff_file_list: list[Path] | None = None, *, cleanup: bool = False, release_files_save: bool = True, compare_method: str | None = None, download_mirror: str | None = None, download_mirror_no_fallback: bool | None = False, simple_format: SimpleFormat | str = 'ALL')[source]

Bases: Mirror

async cleanup_non_pep_503_paths(package: Package) None[source]

Before 4.0 we use to store backwards compatible named dirs for older pip This function checks for them and cleans them up

async determine_packages_to_sync() None[source]

Update the self.packages_to_sync to contain packages that need to be synced.

async download_file(url: str, file_size: str, upload_time: datetime, sha256sum: str, chunk_size: int = 65536, urlpath: str = '') Path | None[source]
errors = False
finalize_sync(sync_index_page: bool = True) None[source]
find_target_serial() int[source]
property generationfile: Path
json_file(package_name: str) Path[source]
need_index_sync = True
need_wrapup = False
on_error(exception: BaseException, **kwargs: dict) None[source]
populate_download_urls(release_file: dict[str, str]) tuple[str, list[str]][source]

Populate download URLs for a certain file, possible combinations are:

  • download_mirror is not set: return “url” attribute from release_file

  • download_mirror is set, no_fallback is false: prepend “download_mirror + path” before “url”

  • download_mirror is set, no_fallback is true: return only “download_mirror + path”

Theoritically we are able to support multiple download mirrors by prepending more urls in the list.

async process_package(package: Package) None[source]
record_finished_package(name: str) None[source]
save_json_metadata(package_info: dict, name: str) bool[source]

Take the JSON metadata we just fetched and save to disk

simple_directory(package: Package) Path[source]
property statusfile: Path
async sync_release_files(package: Package) None[source]

Purge + download files returning files removed + added

sync_simple_pages(package: Package) None[source]
property todolist: Path
property webdir: Path
wrapup_successful_sync() None[source]
write_simple_pages(package: Package, content: SimpleFormats) None[source]
class bandersnatch.mirror.Mirror(master: Master, workers: int = 3)[source]

Bases: object

altered_packages: dict[str, set[str]]
async determine_packages_to_sync() None[source]

Update the self.packages_to_sync to contain packages that need to be synced.

finalize_sync(sync_index_page: bool = True) None[source]
now = None
on_error(exception: BaseException, **kwargs: dict) None[source]
async package_syncer(idx: int) None[source]
packages_to_sync: dict[str, int | str] = {}
async process_package(package: Package) None[source]
async sync_packages() None[source]
synced_serial: int | None = 0
async synchronize(specific_packages: list[str] | None = None, sync_simple_index: bool = True) dict[str, set[str]][source]
target_serial: int | None = None
async bandersnatch.mirror.fetch_and_store(master: Master, storage_backend: Storage, url: str, path: Path | str, digest: str, upload_time: datetime, chunk_size: int = 65536, digest_name: str = 'sha256', return_size: bool = False) int | None[source]

Fetch from url and store in path.

async bandersnatch.mirror.mirror(config: ConfigParser, specific_packages: list[str] | None = None, sync_simple_index: bool = True) int[source]

bandersnatch.package module

class bandersnatch.package.Package(name: str, serial: int = 0)[source]

Bases: object

filter_all_releases(release_filters: list[Filter]) bool[source]

Filter releases and removes releases that fail the filters

filter_all_releases_files(release_file_filters: list[Filter]) bool[source]

Filter release files and remove empty releases after doing so.

filter_metadata(metadata_filters: list[Filter]) bool[source]

Run the metadata filtering plugins

classmethod from_metadata(metadata: dict[str, Any]) Package[source]
property info: Any
property last_serial: int
property metadata: dict[str, Any]
property release_files: list
property releases: Any
async update_metadata(master: Master, attempts: int = 3) None[source]

bandersnatch.storage module

Storage management

class bandersnatch.storage.FileSpec(path: Path | str, url: str, filename: str, size: int, digests: dict[str, str], upload_time: datetime)[source]

Bases: object

Describes a single expected release file for integrity verification.

digests: dict[str, str]
filename: str
path: Path | str
size: int
upload_time: datetime
url: str
class bandersnatch.storage.Storage(*args: Any, config: ConfigParser | None = None, **kwargs: Any)[source]

Bases: object

Base Storage class

PATH_BACKEND

alias of Path

static canonicalize_package(name: str) str[source]
compare_files(file1: Path | str, file2: Path | str) bool[source]

Compare two files and determine whether they contain the same data. Return True if they match

copy_file(source: Path | str, dest: Path | str) None[source]

Copy a file from source to dest

delete(path: Path | str, dry_run: bool = False) int[source]

Delete the provided path.

delete_file(path: Path | str, dry_run: bool = False) int[source]

Delete the provided path, recursively if necessary.

delete_package_file(path: Path | str) None[source]

Domain specific implementation of deleting a package file. (Filesystem backend overrides this to also remove empty parent directories)

property directory: str
exists(path: Path | str) bool[source]

Check whether the provided path exists

find(root: Path | str, dirs: bool = True) str[source]

A test helper simulating ‘find’.

Iterates over directories and filenames, given as relative paths to the root.

flock_path: Path | str
get_file_size(path: Path | str) int[source]

Get the size of a given path in bytes

get_hash(path: Path | str, function: str = 'sha256') str[source]

Get the sha256sum of a given path

get_json_paths(name: str) Sequence[Path | str][source]
get_lock(path: str) BaseFileLock[source]

Retrieve the appropriate FileLock backend for this storage plugin

Parameters:

path (str) – The path to use for locking

Returns:

A FileLock backend for obtaining locks

Return type:

filelock.BaseFileLock

get_upload_time(path: Path | str) datetime[source]

Get the upload time of a given path

hash_file(path: Path | str, function: str = 'sha256') str[source]
initialize_plugin() None[source]

Code to initialize the plugin

is_dir(path: Path | str) bool[source]

Check whether the provided path is a directory.

is_file(path: Path | str) bool[source]

Check whether the provided path is a file.

iter_dir(path: Path | str) Generator[Path | str, None, None][source]

Iterate over the path, returning the sub-paths

iter_package_files(packages_path: Path | str) Iterator[Path | str][source]

Iterates through all the files in the packages path. This is the default implementation for any backend type.

mkdir(path: Path | str, exist_ok: bool = False, parents: bool = False) None[source]

Create the provided directory

move_file(source: Path | str, dest: Path | str) None[source]

Move a file from source to dest

name = 'storage'
open_file(path: Path | str, text: bool = True) Generator[IO, None, None][source]

Yield a file context to iterate over. If text is true, open the file with ‘rb’ mode specified.

read_file(path: Path | str, text: bool = True, encoding: str = 'utf-8', errors: str | None = None) str | bytes[source]

Yield a file context to iterate over. If text is true, open the file with ‘rb’ mode specified.

rewrite(filepath: Path | str, mode: str = 'w', **kw: Any) Generator[IO, None, None][source]

Rewrite an existing file atomically to avoid programs running in parallel to have race conditions while reading.

rmdir(path: Path | str, recurse: bool = False, force: bool = False, ignore_errors: bool = False, dry_run: bool = False) int[source]

Remove the directory. If recurse is True, allow removing empty children. If force is true, remove contents destructively.

scandir(path: Path | str) Generator[StorageDirEntry, None, None][source]

Read entries from the provided directory

set_hash(path: Path | str, digest: str, function: str = 'sha256') None[source]

Store a hash digest for the given path as metadata. (for backend specific optimizations)

set_upload_time(path: Path | str, time: datetime) None[source]

Set the upload time of a given path

stamp_file_metadata(path: Path | str, digest: str, upload_time: datetime, function: str = 'sha256') None[source]

Store both the hash digest and upload time for a path in one operation.

Create a symlink at dest that points back at source

update_safe(filename: Path | str, **kw: Any) Generator[IO, None, None][source]

Rewrite a file atomically.

Clients are allowed to delete the tmpfile to signal that they don’t want to have it updated.

async verify_files(expected: Iterable[FileSpec], dry_run: bool = False) AsyncIterator[FileSpec][source]

Iterates through all the expected files and yields those that are missing or corrupt. This is the default implementation for any backend type. (dry_run is included as some backends may set metadata if its not a dry run)

write_file(path: Path | str, contents: str | bytes) None[source]

Write data to the provided path. If contents is a string, the file will be opened and written in “r” + “utf-8” mode, if bytes are supplied it will be accessed using “rb” mode (i.e. binary write).

class bandersnatch.storage.StorageDirEntry(*args, **kwargs)[source]

Bases: Protocol

is_dir() bool[source]
is_file() bool[source]
property name: str | bytes
property path: str | bytes
class bandersnatch.storage.StoragePlugin(*args: Any, config: ConfigParser | None = None, **kwargs: Any)[source]

Bases: Storage

Plugin that provides a storage backend for bandersnatch

name = 'storage_plugin'
bandersnatch.storage.load_storage_plugins(entrypoint_group: str, enabled_plugin: str | None = None, config: ConfigParser | None = None, clear_cache: bool = False) set[Storage][source]

Load all storage plugins that are registered with importlib

Parameters:
  • entrypoint_group (str) – The entrypoint group name to load plugins from

  • enabled_plugin (str) – The optional enabled storage plugin to search for

  • config (configparser.ConfigParser) – The optional configparser instance to pass in

  • clear_cache (bool) – Whether to clear the plugin cache

Returns:

A list of objects derived from the Storage class

Return type:

List of Storage

bandersnatch.storage.storage_backend_plugins(backend: str | None = 'filesystem', config: ConfigParser | None = None, clear_cache: bool = False) Iterable[Storage][source]

Load and return the release filtering plugin objects

Parameters:
  • backend (str) – The optional enabled storage plugin to search for

  • config (configparser.ConfigParser) – The optional configparser instance to pass in

  • clear_cache (bool) – Whether to clear the plugin cache

Returns:

List of objects derived from the bandersnatch.storage.Storage class

Return type:

list of bandersnatch.storage.Storage

bandersnatch.utils module

class bandersnatch.utils.StrEnum(new_class_name, /, names, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]

Bases: str, Enum

Enumeration class where members can be treated as strings.

value: str
bandersnatch.utils.bandersnatch_safe_name(name: str) str[source]

Convert an arbitrary string to a standard distribution name Any runs of non-alphanumeric/. characters are replaced with a single ‘-‘.

  • This was copied from pkg_resources (part of setuptools)

bandersnatch also lower cases the returned name

bandersnatch.utils.convert_url_to_path(url: str) str[source]
bandersnatch.utils.find(root: Path | str, dirs: bool = True) str[source]

A test helper simulating ‘find’.

Iterates over directories and filenames, given as relative paths to the root.

bandersnatch.utils.find_all_files(files: set[Path], base_dir: Path) None[source]
bandersnatch.utils.hash(path: Path, function: str = 'sha256') str[source]
bandersnatch.utils.make_time_stamp() str[source]

Helper function that returns a timestamp suitable for use in a filename on any OS

bandersnatch.utils.parse_version(version: str) list[str][source]

Converts a version string to a list of strings to check the 1st part of build tags. See PEP 425 (https://peps.python.org/pep-0425/#python-tag) for details.

Parameters:

version (str) – string in the form of ‘{major}.{minor}’ e.g. ‘3.6’

Returns:

list of 1st element strings from build tag tuples

See https://peps.python.org/pep-0425/#python-tag for details. Some Windows binaries have only the 1st part before the file extension. e.g. [‘-cp36-’, ‘-pp36-’, ‘-ip36-’, ‘-jy36-’, ‘-py3.6-’, ‘-py3.6.’]

Return type:

List[str]

bandersnatch.utils.removeprefix(original: str, prefix: str) str[source]
Return a string with the given prefix string removed if present.

If the string starts with the prefix string, return string[len(prefix):]. Otherwise, return the original string.

Parameters:
  • original (str) – string to remove the prefix (e.g. ‘py3.6’)

  • prefix (str) – the prefix to remove (e.g. ‘py’)

Returns:

either the modified or the original string (e.g. ‘3.6’)

Return type:

str

bandersnatch.utils.rewrite(filepath: str | Path, mode: str = 'w', **kw: Any) Generator[IO, None, None][source]

Rewrite an existing file atomically to avoid programs running in parallel to have race conditions while reading.

Remove a file and if the dir is empty remove it

bandersnatch.utils.user_agent() str[source]

bandersnatch.verify module

class bandersnatch.verify.DownloadStats(file_count: int = 0, total_bytes: int = 0, unknown_size_count: int = 0)[source]

Bases: object

file_count: int = 0
record_size(size: int | None) None[source]
total_bytes: int = 0
unknown_size_count: int = 0
async bandersnatch.verify.delete_unowned_files(storage_backend: Storage, mirror_base: Path, executor: ThreadPoolExecutor, all_package_files: Sequence[Path | str], dry_run: bool) int[source]

Calculates difference in expected files and stored files. Deletes them using the storage backend implementation

async bandersnatch.verify.get_latest_json(master: Master, json_path: Path, executor: ThreadPoolExecutor | None = None, delete_removed_packages: bool = False) None[source]
async bandersnatch.verify.load_package(master: Master, storage_backend: Storage, json_file: str, mirror_base_path: Path, args: Namespace, executor: ThreadPoolExecutor | None = None, stop_on_error: bool = False) Package | None[source]
bandersnatch.verify.log_download_summary(stats: DownloadStats, dry_run: bool = False) None[source]
async bandersnatch.verify.metadata_verify(config: ConfigParser, args: Namespace) int[source]

Crawl all saved JSON metadata or online to check we have all packages. If --delete is given, also remove files not referenced by any package.

bandersnatch.verify.on_error(stop_on_error: bool, exception: BaseException, package: str) None[source]
async bandersnatch.verify.verify(master: Master, config: ConfigParser, storage_backend: Storage, json_file: str, mirror_base_path: Path, all_package_files: list[Path | str], args: Namespace, executor: ThreadPoolExecutor | None = None, stats: DownloadStats | None = None) None[source]

Verify a single package JSON file and remediate any missing/corrupt files.

  1. Caluclates expected release files from the JSON file

  2. Calls storage backend to verify the files and returns any missing or corrupt files

  3. Downloads those files and stores them using the storage backend

async bandersnatch.verify.verify_producer(master: Master, config: ConfigParser, storage_backend: Storage, all_package_files: list[Path | str], mirror_base_path: Path, json_files: list[str], args: Namespace, executor: ThreadPoolExecutor | None = None) DownloadStats[source]